00001 /* 00002 * distribution.h 00003 * 00004 * Copyright (C) 2007 Thomas A. Vaughan 00005 * All rights reserved. 00006 * 00007 * 00008 * Redistribution and use in source and binary forms, with or without 00009 * modification, are permitted provided that the following conditions are met: 00010 * * Redistributions of source code must retain the above copyright 00011 * notice, this list of conditions and the following disclaimer. 00012 * * Redistributions in binary form must reproduce the above copyright 00013 * notice, this list of conditions and the following disclaimer in the 00014 * documentation and/or other materials provided with the distribution. 00015 * * Neither the name of the <organization> nor the 00016 * names of its contributors may be used to endorse or promote products 00017 * derived from this software without specific prior written permission. 00018 * 00019 * THIS SOFTWARE IS PROVIDED BY THOMAS A. VAUGHAN ''AS IS'' AND ANY 00020 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00022 * DISCLAIMED. IN NO EVENT SHALL THOMAS A. VAUGHAN BE LIABLE FOR ANY 00023 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 00026 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 * 00030 * 00031 * Distribution collection. 00032 */ 00033 00034 #ifndef WAVEPACKET_UTIL_DISTRIBUTION_H__ 00035 #define WAVEPACKET_UTIL_DISTRIBUTION_H__ 00036 00037 00038 /// \ingroup util 00039 /// Basic distribution template. 00040 /// You can add samples, then query to find percentiles etc. 00041 template <class T> 00042 class distribution_t { 00043 public: 00044 distribution_t(void) throw() { this->clear(); } 00045 00046 void clear(void) throw() { 00047 m_count = 0; 00048 m_map.clear(); 00049 } 00050 00051 void addSample(IN T x) { 00052 m_count++; 00053 typename map_t::iterator i = m_map.find(x); 00054 if (m_map.end() == i) { 00055 m_map[x] = 1; 00056 return; 00057 } 00058 // m_map[x] = m_map[x] + 1; 00059 i->second = i->second + 1; 00060 } 00061 00062 long size(void) const throw() { return m_count; } 00063 00064 /// note that 0 < p < 1 00065 T getValueAtPercentile(IN float p) const throw() { 00066 ASSERT(p > 0.0 && p < 1.0, "Bad p: %f", p); 00067 00068 // This is the most important line in the file! 00069 // (idx = p * m_count) 00070 // There is a temptation to worry about rounding, and 00071 // add additional fudge factors like 0.00001 or 0.5 00072 // Don't! 00073 // 00074 // Imagine a distribution with only 3 entries. 00075 // The 33% value is the first entry! 00076 // The 34-66% value is the second 00077 // The 67+% value is the third 00078 long idx = (long) (p * m_count); 00079 00080 return getValueAtIndex(idx); 00081 } 00082 00083 /// given a sample index, return its value. Samples are ordered by 00084 /// size, so the smallest will have idx = 0. 00085 T getValueAtIndex(IN int idx) const throw() { 00086 ASSERT(idx >= 0 && idx < m_count, 00087 "Bad index: %d", idx); 00088 00089 long j = 0; 00090 for (typename map_t::const_iterator i = m_map.begin(); 00091 i != m_map.end(); ++i) { 00092 j += i->second; 00093 if (j > idx) { 00094 // DPRINTF(" Using sample %ld", j); 00095 return i->first; 00096 } 00097 } 00098 ASSERT(false, "should never get here!"); 00099 return 0; 00100 } 00101 00102 float getPercentileAtValue(IN T x) const throw() { 00103 long j = 0; 00104 for (typename map_t::const_iterator i = m_map.begin(); 00105 i != m_map.end(); ++i) { 00106 if (i->first >= x) { 00107 return ((1.0 * j) / (1.0 * m_count)); 00108 } 00109 j += i->second; 00110 } 00111 return 1.0; // must be past end 00112 } 00113 00114 private: 00115 typedef std::map<T, int> map_t; 00116 00117 map_t m_map; // value --> count 00118 long m_count; // total number of samples in distribut 00119 }; 00120 00121 00122 #endif // WAVEPACKET_UTIL_DISTRIBUTION_H__ 00123