OpenWalnut  1.4.0
WValueSetHistogram.cpp
1 //---------------------------------------------------------------------------
2 //
3 // Project: OpenWalnut ( http://www.openwalnut.org )
4 //
5 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
6 // For more information see http://www.openwalnut.org/copying
7 //
8 // This file is part of OpenWalnut.
9 //
10 // OpenWalnut is free software: you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // OpenWalnut is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public License
21 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
22 //
23 //---------------------------------------------------------------------------
24 
25 #include <algorithm>
26 #include <cstring> // memset()
27 #include <numeric>
28 #include <string>
29 #include <utility>
30 
31 #include "../../common/WAssert.h"
32 #include "../../common/WLimits.h"
33 #include "../../common/exceptions/WOutOfBounds.h"
34 #include "../WDataHandlerEnums.h"
35 
36 #include "WValueSetHistogram.h"
37 
38 WValueSetHistogram::WValueSetHistogram( boost::shared_ptr< WValueSetBase > valueSet, size_t buckets ):
39  WHistogram( valueSet->getMinimumValue(), valueSet->getMaximumValue(), buckets )
40 {
41  buildHistogram( *valueSet );
42 }
43 
44 WValueSetHistogram::WValueSetHistogram( const WValueSetBase& valueSet, size_t buckets ):
45  WHistogram( valueSet.getMinimumValue(), valueSet.getMaximumValue(), buckets )
46 {
47  buildHistogram( valueSet );
48 }
49 
50 WValueSetHistogram::WValueSetHistogram( boost::shared_ptr< WValueSetBase > valueSet, double min, double max, size_t buckets ):
51  WHistogram( min, max, buckets )
52 {
53  buildHistogram( *valueSet );
54 }
55 
56 WValueSetHistogram::WValueSetHistogram( const WValueSetBase& valueSet, double min, double max, size_t buckets ):
57  WHistogram( min, max, buckets )
58 {
59  buildHistogram( valueSet );
60 }
61 
63 {
65 
66  // create base histogram
67  WAssert( m_nbBuckets > 1, "WValueSetHistogram::buildHistogram : number of buckets needs to be larger than 1." );
69  m_initialBucketSize = ( m_maximum - m_minimum ) / static_cast< double >( m_nInitialBuckets );
70  WAssert( m_initialBucketSize > 0.0, "WValueSetHistogram::buildHistogram() : m_initialBucketSize to small." );
71 
72  // NOTE: as all the intervals are right-open, we need an additional slot in our array for the last interval [m_maximum,\infinity). For the
73  // calculation of interval sizes, the value must not be incremented
75 
76  // create and initialize array to zero which finally contains the counts
77  size_t* initialBuckets = new size_t[ m_nInitialBuckets ];
78  memset( initialBuckets, 0, m_nInitialBuckets * sizeof( size_t ) );
79  // *initialBuckets = { 0 }; // this should works with C++0x (instead memset), TEST IT!
80 
81  // the array can be shared among several instances of WValueSetHistogram.
82  m_initialBuckets = boost::shared_array< size_t >( initialBuckets );
83 
84  // no mapping applied yet. Initial and mapped are equal
88 
89  // and finally create the histogram
90  for( size_t i = 0; i < valueSet.size(); ++i )
91  {
92  double tmp = valueSet.getScalarDouble( i );
93  insert( tmp );
94  }
95 
96  m_nbTotalElements = valueSet.size();
97 }
98 
99 WValueSetHistogram::WValueSetHistogram( const WValueSetHistogram& histogram, size_t buckets ):
100  WHistogram( histogram ),
101  m_initialBucketSize( histogram.m_initialBucketSize ),
102  m_initialBuckets( histogram.m_initialBuckets ),
103  m_nInitialBuckets( histogram.m_nInitialBuckets ),
104  m_mappedBuckets( histogram.m_mappedBuckets ),
105  m_nMappedBuckets( histogram.m_nMappedBuckets ),
106  m_mappedBucketSize( histogram.m_mappedBucketSize ),
107  m_nbTotalElements( histogram.m_nbTotalElements )
108 {
109  // apply modification of the histogram bucket size?
110  if( ( buckets == 0 ) || ( buckets == m_nMappedBuckets ) )
111  {
112  return;
113  }
114 
115  WAssert( buckets > 1, "WValueSetHistogram::WValueSetHistogram : number of buckets needs to be larger than 1." );
116  WAssert( buckets < m_nInitialBuckets,
117  "WValueSetHistogram::WValueSetHistogram : number of buckets needs to be smaller than the initial bucket count." );
118 
119  // number of elements in the new mapped histogram = division + (round up)
120  m_nMappedBuckets = buckets - 1;
121  m_mappedBucketSize = ( m_maximum - m_minimum ) / static_cast< double >( m_nMappedBuckets );
122 
123  // NOTE: as all the intervals are right-open, we need an additional slot in our array for the last interval [m_maximum,\infinity). For the
124  // calculation of interval sizes, the value must not be incremented
126 
127  size_t ratio = static_cast<size_t>( m_nInitialBuckets / m_nMappedBuckets );
128 
129  m_mappedBuckets.reset();
130 
131  size_t* mappedBuckets = new size_t[ m_nMappedBuckets ];
132  memset( mappedBuckets, 0, m_nMappedBuckets * sizeof( size_t ) );
133  // *mappedBuckets = { 0 }; // works with C++0x
134  m_mappedBuckets = boost::shared_array< size_t >( mappedBuckets );
135 
136  // map it
137  size_t index = 0;
138  for( size_t i = 0; i < m_nInitialBuckets; ++i )
139  {
140  if( ( i % ratio == 0 ) && ( i != 0 ) && ( i != m_nInitialBuckets - 1 ) )
141  {
142  index++;
143  }
144  m_mappedBuckets[ index ] += m_initialBuckets[i];
145  }
146 }
147 
149 {
150  if( this != &other ) // protect against invalid self-assignment
151  {
152  m_minimum = other.m_minimum;
153  m_maximum = other.m_maximum;
158 
159  // copy the initial/mapped buckets reference, no deep copy here!
162  }
163 
164  return *this;
165 }
166 
168 {
169 }
170 
171 boost::shared_array< size_t > WValueSetHistogram::getInitialBuckets() const
172 {
173  return m_initialBuckets;
174 }
175 
177 {
178  return m_nInitialBuckets;
179 }
180 
182 {
183  return m_initialBucketSize;
184 }
185 
186 double WValueSetHistogram::getBucketSize( size_t /* index */ ) const
187 {
188  // ignore index as each bucket has the same width
189  return m_mappedBucketSize;
190 }
191 
192 void WValueSetHistogram::insert( double value )
193 {
194  m_mappedBuckets[ getIndexForValue( value ) ]++;
195 }
196 
197 size_t WValueSetHistogram::operator[]( size_t index ) const
198 {
199  // if no mapping has been applied, mappedBuckets points to the initial bucket
200  return m_mappedBuckets[ index ];
201 }
202 
203 size_t WValueSetHistogram::at( size_t index ) const
204 {
205  WAssert( index < m_nMappedBuckets, "WValueSetHistogram::at() : index out of range." );
206 
207  // if no mapping has been applied, mappedBuckets points to the initial bucket
208  return m_mappedBuckets[ index ];
209 }
210 
212 {
213  return m_nMappedBuckets; // overwrite the WHistogram::size here as we have our own size.
214 }
215 
217 {
218  return m_nbTotalElements;
219 }
220 
221 std::pair< double, double > WValueSetHistogram::getIntervalForIndex( size_t index ) const
222 {
223  double first = m_minimum + m_mappedBucketSize * index;
224  double second = m_minimum + m_mappedBucketSize * ( index + 1 );
225  return std::make_pair( first, second );
226 }
227 
228 size_t WValueSetHistogram::accumulate( size_t startIndex, size_t endIndex ) const
229 {
230  if( startIndex > endIndex )
231  {
232  std::swap( startIndex, endIndex );
233  }
234 
235  // valid index?
236  if( endIndex > size() ) // as endIndex is exclusive, it is allowed to equal size()
237  {
238  throw WOutOfBounds( std::string( "The specified endIndex is out of bounds." ) );
239  }
240 
241  // unfortunately, shared_array can't be used for std::accumulate
242  size_t acc = 0;
243  while( startIndex != endIndex )
244  {
245  acc += m_mappedBuckets[ startIndex++ ];
246  }
247 
248  return acc;
249 }
250 
251 std::ostream& operator<<( std::ostream& out, const WValueSetHistogram& h )
252 {
253  for( size_t i = 0; i < h.size() - 1; ++i )
254  {
255  std::pair< double, double > interval = h.getIntervalForIndex( i );
256  // NOTE: the notation for open intervals is [a,b) or alternatively [a,b[.
257  //out << i << " = [" << interval.first << ", " << interval.second << ") = " << h[ i ] << std::endl;
258  out << interval.first << " " << interval.second << " " << std::min( h[ i ], size_t( 3000 ) ) << std::endl;
259  }
260  // the last interval is handled special
261  //out << h.size() - 1 << " = [" << h.getIntervalForIndex( h.size() - 1 ).first << ", inf) = " << h[ h.size() - 1 ] << std::endl;
262  out << h.getIntervalForIndex( h.size() - 1 ).first << " inf " << std::min( h[ h.size() - 1 ], size_t( 3000 ) ) << std::endl;
263 
264  return out;
265 }
266 
Used to find the occurrence frequencies of values in a value set.
virtual size_t getTotalElementCount() const
This returns the number of value set entries added to the histogram.
size_t m_nInitialBuckets
Number of buckets in the initial histogram.
size_t getNInitialBuckets() const
Return the number of initial buckets.
~WValueSetHistogram()
Destructor.
virtual std::pair< double, double > getIntervalForIndex(size_t index) const
Returns the actual interval associated with the given index.
double m_mappedBucketSize
Size of one bucket in the mapped histogram.
double m_minimum
The smallest value.
Definition: WHistogram.h:123
double m_nbBuckets
The number of buckets.
Definition: WHistogram.h:133
virtual size_t size() const
Returns the number of buckets in the histogram with the actual mapping.
WValueSetHistogram & operator=(const WValueSetHistogram &other)
Copy assignment.
boost::shared_array< size_t > m_initialBuckets
Pointer to all initial buckets of the histogram.
virtual double getScalarDouble(size_t i) const =0
Indicates invalid element access of a container.
Definition: WOutOfBounds.h:36
Container which associate values with (uniform width) bins (aka intervals or buckets).
Definition: WHistogram.h:36
size_t m_nMappedBuckets
Tracks the number of a buckets in the mapped histogram.
virtual void insert(double value)
increment the value by one, contains the logic to find the element place in the array.
double m_initialBucketSize
Size of one bucket in the initial histogram.
double getInitialBucketSize() const
Return the size of one initial bucket.
virtual size_t operator[](size_t index) const
Get the count of the bucket.
boost::shared_array< size_t > m_mappedBuckets
Pointer to all initial buckets of the histogram.
WValueSetHistogram(boost::shared_ptr< WValueSetBase > valueSet, size_t buckets=1000)
Constructor.
void buildHistogram(const WValueSetBase &valueSet)
Actually builds the histogram.
virtual double getBucketSize(size_t index=0) const
Return the size of one bucket.
size_t m_nbTotalElements
The number of elements distributed in the buckets.
Abstract base class to all WValueSets.
Definition: WValueSetBase.h:63
virtual size_t at(size_t index) const
Get the count of the bucket.
virtual size_t getIndexForValue(double value) const
Returns the right index to the bucket containing the given value.
boost::shared_array< size_t > getInitialBuckets() const
Return the initial buckets.
double m_maximum
The biggest value.
Definition: WHistogram.h:128
virtual size_t size() const =0
virtual size_t accumulate(size_t startIndex, size_t endIndex) const
Sums up the buckets in the specified interval.