distribution.h

Go to the documentation of this file.
00001 /*
00002  * distribution.h
00003  *
00004  * Copyright (C) 2007   Thomas A. Vaughan
00005  * All rights reserved.
00006  *
00007  *
00008  * Redistribution and use in source and binary forms, with or without
00009  * modification, are permitted provided that the following conditions are met:
00010  *     * Redistributions of source code must retain the above copyright
00011  *       notice, this list of conditions and the following disclaimer.
00012  *     * Redistributions in binary form must reproduce the above copyright
00013  *       notice, this list of conditions and the following disclaimer in the
00014  *       documentation and/or other materials provided with the distribution.
00015  *     * Neither the name of the <organization> nor the
00016  *       names of its contributors may be used to endorse or promote products
00017  *       derived from this software without specific prior written permission.
00018  *
00019  * THIS SOFTWARE IS PROVIDED BY THOMAS A. VAUGHAN ''AS IS'' AND ANY
00020  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00021  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00022  * DISCLAIMED. IN NO EVENT SHALL THOMAS A. VAUGHAN BE LIABLE FOR ANY
00023  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00024  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00025  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00026  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00028  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029  *
00030  *
00031  * Distribution collection.
00032  */
00033 
00034 #ifndef WAVEPACKET_UTIL_DISTRIBUTION_H__
00035 #define WAVEPACKET_UTIL_DISTRIBUTION_H__
00036 
00037 
00038 /// \ingroup util
00039 /// Basic distribution template.
00040 /// You can add samples, then query to find percentiles etc.
00041 template <class T>
00042 class distribution_t {
00043 public:
00044         distribution_t(void) throw() { this->clear(); }
00045 
00046         void clear(void) throw() {
00047                         m_count = 0;
00048                         m_map.clear();
00049                 }
00050 
00051         void addSample(IN T x) {
00052                         m_count++;
00053                         typename map_t::iterator i = m_map.find(x);
00054                         if (m_map.end() == i) {
00055                                 m_map[x] = 1;
00056                                 return;
00057                         }
00058                         // m_map[x] = m_map[x] + 1;
00059                         i->second = i->second + 1;
00060                 }
00061 
00062         long size(void) const throw() { return m_count; }
00063 
00064         /// note that 0 < p < 1
00065         T getValueAtPercentile(IN float p) const throw() {
00066                         ASSERT(p > 0.0 && p < 1.0, "Bad p: %f", p);
00067 
00068                         // This is the most important line in the file!
00069                         //      (idx = p * m_count)
00070                         // There is a temptation to worry about rounding, and
00071                         //   add additional fudge factors like 0.00001 or 0.5
00072                         // Don't!
00073                         //
00074                         // Imagine a distribution with only 3 entries.
00075                         // The 33% value is the first entry!
00076                         // The 34-66% value is the second
00077                         // The 67+% value is the third
00078                         long idx = (long) (p * m_count);
00079 
00080                         return getValueAtIndex(idx);
00081                 }
00082 
00083         /// given a sample index, return its value.  Samples are ordered by
00084         ///     size, so the smallest will have idx = 0.
00085         T getValueAtIndex(IN int idx) const throw() {
00086                         ASSERT(idx >= 0 && idx < m_count,
00087                             "Bad index: %d", idx);
00088 
00089                         long j = 0;
00090                         for (typename map_t::const_iterator i = m_map.begin();
00091                              i != m_map.end(); ++i) {
00092                                 j += i->second;
00093                                 if (j > idx) {
00094 //                                      DPRINTF("    Using sample %ld", j);
00095                                         return i->first;
00096                                 }
00097                         }
00098                         ASSERT(false, "should never get here!");
00099                         return 0;
00100                 }
00101 
00102         float getPercentileAtValue(IN T x) const throw() {
00103                         long j = 0;
00104                         for (typename map_t::const_iterator i = m_map.begin();
00105                              i != m_map.end(); ++i) {
00106                                 if (i->first >= x) {
00107                                         return ((1.0 * j) / (1.0 * m_count));
00108                                 }
00109                                 j += i->second;
00110                         }
00111                         return 1.0;     // must be past end
00112                 }
00113 
00114 private:
00115         typedef std::map<T, int> map_t;
00116 
00117         map_t           m_map;          // value --> count
00118         long            m_count;        // total number of samples in distribut
00119 };
00120 
00121 
00122 #endif  // WAVEPACKET_UTIL_DISTRIBUTION_H__
00123