CluE  1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
frequencydistribution.cpp
Go to the documentation of this file.
1 #include "../frequency/frequencydistribution.h"
2 
3 #include <vector>
4 #include <iostream>
5 #include <math.h>
6 #include <float.h>
7 #include <algorithm>
8 #include <numeric>
9 #include <limits>
10 
11 using namespace CluE;
12 
16 FrequencyDistribution::FrequencyDistribution(std::vector<unsigned long> const& f)
17 {
18  size_t fSize = f.size();
19  unsigned long fSum = accumulate(f.begin(), f.end(), 0);
20  this->frequencies = f;
21  for(size_t i = 0; i < fSize; i++)
22  probabilities.push_back(double(f[i]) / fSum);
23  this->frequencysum = fSum;
24 }
25 
29 FrequencyDistribution::FrequencyDistribution(std::vector<FrequencyDistribution*> const& distributions)
30 {
31  this->frequencysum = 0;
32 
33  unsigned int num = distributions.size();
34  unsigned int length = 0;
35  for (unsigned int i=0; i<num; i++)
36  {
37  unsigned int l = distributions[i]->size();
38  if (l>length)
39  length=l;
40  }
41  this->frequencies.assign(length, 0);
42 
43  for (unsigned int i=0; i<num; i++)
44  {
45  unsigned int l = distributions[i]->size();
46  for (unsigned int j=0; j<l; j++)
47  {
48  this->frequencies[j] += distributions[i]->frequencies[j];
49  }
50  }
51  this->frequencysum = accumulate(this->frequencies.begin(), this->frequencies.end(), 0);
52 
53  size_t fSize = this->frequencies.size();
54  for(size_t i = 0; i < fSize; i++)
55  probabilities.push_back(double(this->frequencies[i]) / this->frequencysum);
56 
57  if (num==0)
58  std::clog << "FrequencyDistribution::FrequencyDistribution - WARNING: returning empty frequency distribution" << std::endl;
59 }
60 
61 unsigned int FrequencyDistribution::size() const
62 {
63  return this->frequencies.size();
64 }
65 
66 unsigned long FrequencyDistribution::frequency(unsigned int n) const
67 {
68  if (n<this->frequencies.size())
69  return this->frequencies[n];
70  return 0;
71 }
72 
73 unsigned long FrequencyDistribution::total() const
74 {
75  return this->frequencysum;
76 }
77 
78 double FrequencyDistribution::probability(unsigned int i) const
79 {
80  if (i<this->frequencies.size())
81  return probability(i);
82 
83  std::clog << "FrequencyDistribution::probability - WARNING: returning zero, index out of bound" << std::endl;
84  return .0;
85 }
86 
88 {
89  if (this->frequencysum==0)
90  return .0;
91 
92  std::vector<double> temp;
93  for(unsigned int i=0;i < this->frequencies.size(); i++)
94  {
95  if (this->frequencies[i]>0)
96  {
97  double p = probability(i);
98  if (p>.0)
99  temp.push_back(p*log(p));
100  }
101  }
102 
103  std::sort(temp.begin(), temp.end());
104  return -accumulate(temp.begin(), temp.end(), .0)/log(2);
105 }
106 
108 {
109  unsigned int length = this->frequencies.size();
110  unsigned int fdlength = fd.frequencies.size();
111  if (fdlength!=length)
112  {
113  std::cerr << "FrequencyDistribution::kullbackleibler - ERROR: frequency distributions have different lengths! ("
114  << fdlength << "!=" << length << ")" << std::endl;
115  return std::numeric_limits<double>::quiet_NaN();
116  }
117 
118  if (this->frequencysum==0)
119  return .0;
120 
121  if (fd.frequencysum==0)
122  return std::numeric_limits<double>::infinity();
123 
124  std::vector<double> temp;
125  for(unsigned int i=0; i<length; i++)
126  {
127  if (this->frequencies[i]>0)
128  {
129  if (fd.frequencies[i]==0)
130  return std::numeric_limits<double>::infinity();
131  double p = probability(i);
132  if (p>.0)
133  {
134  double q = fd.probability(i);
135  if (q>.0)
136  {
137  double r = p/q;
138  if (r>.0)
139  temp.push_back(p*log(r));
140  } else return std::numeric_limits<double>::infinity();
141  }
142  }
143  }
144  std::sort(temp.begin(), temp.end());
145  return accumulate(temp.begin(), temp.end(), .0)/log(2);
146  }
147 
148  std::ostream& operator << (std::ostream& os, FrequencyDistribution& fd)
149  {
150  unsigned int dimension = fd.size();
151  os << "FrequencyDistribution(";
152  for (unsigned int i=0;i<dimension;i++)
153  {
154  os << fd.frequency(i) << ", ";
155  }
156  os << "sum=" << fd.total() << ")";
157  return os;
158  }
virtual double probability(unsigned int) const
virtual unsigned long frequency(unsigned int) const
virtual double entropy() const
Probability distribution on non-negative integers.
std::vector< unsigned long > frequencies
virtual unsigned int size() const
virtual double kullbackleibler(FrequencyDistribution const &) const
FrequencyDistribution(std::vector< unsigned long > const &f)
constructs the centroid of the given vector of FrequencyDistribution objects.
std::vector< double > probabilities
virtual unsigned long total() const
std::ostream & operator<<(std::ostream &, FrequencyDistribution &)