CluE  1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
dasguptalong.h
Go to the documentation of this file.
1 #ifndef DASGUPTALONG_H
2 #define DASGUPTALONG_H
3 
4 #include <cmath>
5 
6 #include "../base/algorithm.h"
7 #include "../base/inputsetter.h"
8 #include "../base/measuresetter.h"
9 #include "../datastructure/partitionsolution.h"
10 #include "../sampling/farthestfirsttraversal.h"
11 #include "../misc/dynamicbitset.h"
12 
13 #include "../exception/invalidruntimeconfigurationexception.h"
14 #include "../exception/invalidargumentexception.h"
15 
16 namespace CluE
17 {
18 
27 template<typename T> class DasguptaLong : public Algorithm, public InputSetter<T>, public MeasureSetter<T>
28 {
29 public:
38  DasguptaLong(DissimilarityMeasure<T> const *measure = 0, std::vector<T*> const* input = 0, unsigned int firstPoint = 0);
39 
42  virtual ~DasguptaLong();
43 
48  virtual SolutionProvider* compute();
49 
50  virtual void setInput(std::vector<T*> const*);
51 
52  virtual void setMeasure(DissimilarityMeasure<T> const *measure);
53 
57  void setFirstPoint(unsigned int index);
58 
65  void setGranularity(double alpha, double beta);
66 
72 
73 private:
75  std::vector<T*> const *input;
76  unsigned int first_point;
77  double alpha;
78  double beta;
79 };
80 
81 template<typename T> DasguptaLong<T>::DasguptaLong(DissimilarityMeasure<T> const *measure, std::vector<T*> const* data, unsigned int fp) :
82  measure(measure==0 ? 0 : measure->clone()),
83  input(data),
84  first_point(fp),
85  alpha(1),
86  beta(2)
87 {
88 }
89 
90 template<typename T> DasguptaLong<T>::DasguptaLong(const DasguptaLong<T>& dl) :
91  measure(dl.measure == 0 ? 0 : dl.measure->clone()),
92  input(dl.input),
93  first_point(first_point),
94  alpha(dl.alpha),
95  beta(dl.beta)
96 {
97 }
98 
100 {
101  if(measure != 0)
102  delete measure;
103 
104  Algorithm::operator= (dl);
105 
106  measure = dl.measure == 0 ? 0 : dl.measure->clone();
107  input = dl.input;
108  first_point = first_point;
109  alpha = dl.alpha;
110  beta = dl.beta;
111 
112  return *this;
113 }
114 
115 template<typename T> DasguptaLong<T>::~DasguptaLong()
116 {
117  if(measure != 0)
118  delete measure;
119 }
120 
122 {
123  PartitionSolution<T>* solution = new PartitionSolution<T>();
124  unsigned int numOfPoints = this->input->size();
125 
126  if(this->input == 0)
127  throw InvalidRuntimeConfigurationException(0, "Input is NULL.");
128  if(this->measure == 0)
129  throw InvalidRuntimeConfigurationException(1, "Dissimilarity measure is NULL.");
130 
131 
132  time_t start, end;
133  start = time(0);
134 
135  FarthestFirstTraversal<T> fft(this->measure, this->input, numOfPoints, first_point);
136  DiscreteProxySolution<T> *dps = dynamic_cast<DiscreteProxySolution<T>*>(fft.compute());
137  std::vector<T*> *numberedPoints = &dps->proxysets[0];
138 
139  //Build R
140  std::vector<double> rValues;
141  std::vector<unsigned int> rPoints;
142  rValues.push_back(0); //dummy value
143  for(unsigned int i = 1; i < numOfPoints; i++)
144  {
145  double minDist = measure->dissimilarity(* (*numberedPoints)[i], * (*numberedPoints)[0]);
146  for(unsigned int j = 1; j < i; j++)
147  {
148  double candDist = measure->dissimilarity(* (*numberedPoints)[i], * (*numberedPoints)[j]);
149  if(candDist < minDist)
150  minDist = candDist;
151  }
152  rValues.push_back(minDist);
153  }
154  double r = alpha * rValues[1];
155 
156  //Build L
157  DynamicBitset<> isLeveled(numOfPoints);
158  isLeveled.set(first_point, true);
159 
160  std::vector<std::vector<unsigned int> > lValues;
161  lValues.push_back(std::vector<unsigned int>());
162  lValues[0].push_back(first_point);
163  for(unsigned int i = 1; isLeveled.count() < numOfPoints; i++)
164  {
165  lValues.push_back(std::vector<unsigned int>());
166 
167  double lowerBound = r / std::pow(beta, (double) i);
168  double upperBound = r / std::pow(beta, (double)(i-1));
169  for(unsigned int j = 0; j < numOfPoints; j++)
170  {
171  if(isLeveled[j])
172  continue;
173 
174  if(lowerBound < rValues[j] && rValues[j] <= upperBound)
175  {
176  isLeveled.set(j, true);
177  lValues[i].push_back(j);
178  }
179  }
180  }
181 
182  //Pi
183  unsigned int pi[numOfPoints];
184  pi[first_point] = 0;
185  unsigned int levels = lValues.size();
186  for(unsigned int level = 1; level < levels; level++)
187  {
188  //Iterate over all levels...
189  unsigned int levelSize = lValues[level].size();
190  for(unsigned int levelIndex = 0; levelIndex < levelSize; levelIndex++)
191  {
192  //...to assign to each point...
193  T* currentPoint = (*numberedPoints)[lValues[level][levelIndex]];
194  double minDist = measure->dissimilarity(*currentPoint, * (*numberedPoints)[first_point]);
195  unsigned int minPoint = first_point;
196  for(unsigned int lowerLevel = 1; lowerLevel < level; lowerLevel++)
197  {
198  //...from a lower level...
199  unsigned int lowerLevelSize = lValues[lowerLevel].size();
200  for(unsigned int lowerLevelIndex = 0; lowerLevelIndex < lowerLevelSize; lowerLevelIndex++)
201  {
202  //...the nearest point.
203  double candDist = measure->dissimilarity(*currentPoint, * (*numberedPoints)[lValues[lowerLevel][lowerLevelIndex]]);
204  if(candDist < minDist)
205  {
206  minDist = candDist;
207  minPoint = lValues[lowerLevel][lowerLevelIndex];
208  }
209  }
210  }
211  pi[lValues[level][levelIndex]] = minPoint;
212  }
213  }
214 
215  /*
216  std::cout << "Pi:" << std::endl;
217  int n = lValues.size();
218  for(int i = 0; i < n; i++)
219  {
220  int m = lValues[i].size();
221  for(int j = 0; j < m; j++)
222  {
223  std::cout << lValues[i][j] << " -> " << pi[lValues[i][j]] << std::endl;
224  }
225  }
226  */
227 
228  //Build clusterings
229  std::vector<std::vector<std::vector<T*> > >& sets = solution->partitions;
230  for(unsigned int i = 0; i < levels; i++)
231  {
232  sets.push_back(std::vector<std::vector<T*> >());
233  std::vector<std::vector<T*> >& partitions = sets.back();
234 
235  unsigned int recursivePi[numOfPoints];
236  //In-granularity-level points
237  for(unsigned int j = 0; j <= i; j++)
238  {
239  unsigned int size = lValues[j].size();
240  for(unsigned int k = 0; k < size; k++)
241  {
242  partitions.push_back(std::vector<T*>());
243  recursivePi[lValues[j][k]] = partitions.size()-1;
244  }
245  }
246  //Out-of-granularity-level points
247  for(unsigned int j = i+1; j < levels; j++)
248  {
249  unsigned int size = lValues[j].size();
250  for(unsigned int k = 0; k < size; k++)
251  {
252  unsigned int index = lValues[j][k];
253  recursivePi[index] = recursivePi[pi[index]];
254  }
255  }
256 
257  //Assign points to clusters
258  for(unsigned int i = 0; i < numOfPoints; i++)
259  {
260  partitions[recursivePi[i]].push_back((*numberedPoints)[i]);
261  }
262  }
263 
264  delete dps;
265 
266  end = time(0);
267  solution->seconds = end-start;
268 
269  return solution;
270 }
271 
272 template<typename T> void DasguptaLong<T>::setFirstPoint(unsigned int s)
273 {
274  this->first_point = s;
275 }
276 
277 template<typename T> void DasguptaLong<T>::setGranularity(double a, double b)
278 {
279  if(b > 1)
280  {
281  if(a >= 1 && a < b)
282  {
283  this->alpha = a;
284  this->beta = b;
285  }
286  else
287  {
288  throw InvalidArgumentException(0, "The given value of alpha is invalid. No changes were made.", "alpha");
289  }
290  }
291  else
292  {
293  throw InvalidArgumentException(1, "The given value of beta is invalid. No changes were made.", "beta");
294  }
295 }
296 
297 template<typename T> void DasguptaLong<T>::setInput(std::vector<T*> const* data)
298 {
299  this->input = data;
300 }
301 
302 template<typename T> void DasguptaLong<T>::setMeasure(DissimilarityMeasure<T> const *measure)
303 {
304  if(this->measure != 0)
305  delete this->measure;
306 
307  if(measure == 0)
308  this->measure = 0;
309  else
310  this->measure = measure->clone();
311 }
312 
314 {
315  return dynamic_cast<DasguptaLong<T>*>(s);
316 }
317 
318 }
319 #endif
std::vector< std::vector< std::vector< T * > > > partitions
DasguptaLong algorithm.
Definition: dasguptalong.h:27
DasguptaLong(DissimilarityMeasure< T > const *measure=0, std::vector< T * > const *input=0, unsigned int firstPoint=0)
Constructor for general usage.
Definition: dasguptalong.h:81
virtual ~DasguptaLong()
Definition: dasguptalong.h:115
DissimilarityMeasure< T > * measure
Definition: dasguptalong.h:74
unsigned int first_point
Definition: dasguptalong.h:76
Data structure for discrete proxies.
unsigned long long count()
Number of set bits.
Definition: dynamicbitset.h:78
virtual void setMeasure(DissimilarityMeasure< T > const *measure)
Definition: dasguptalong.h:302
std::vector< std::vector< T * > > proxysets
Dynamic bitset similiar to boost::dynamic_bitset.
Definition: dynamicbitset.h:15
std::vector< T * > const * input
Definition: dasguptalong.h:75
virtual DissimilarityMeasure< T > * clone() const =0
void setGranularity(double alpha, double beta)
Sets the granularity.
Definition: dasguptalong.h:277
Abstract base class for algorithms.
Definition: algorithm.h:17
virtual void setInput(std::vector< T * > const *)
Definition: dasguptalong.h:297
static DasguptaLong< T > * toDasguptaLong(Algorithm *s)
does a dynamic cast of the given Algorithm to DasguptaLong
Definition: dasguptalong.h:313
Interface to propagate the ability to set a DissimilarityMeasure.
Definition: measuresetter.h:13
void set(size_t position, bool value)
Write access to the (position+1)-th bit.
Definition: dynamicbitset.h:67
Indicates invalid values of arguments.
Farthest first traversal algorithm.
Abstract base class for dissimilarity measurement.
virtual SolutionProvider * compute()
Definition: dasguptalong.h:121
Indicates that a computation entered an invalid configuration state.
virtual SolutionProvider * compute()
Returns an ordered DiscretesProxySolution. The first point is the starting point. ...
DasguptaLong< T > & operator=(const DasguptaLong< T > &)
Definition: dasguptalong.h:99
Abstract base class for algorithm solutions.
void setFirstPoint(unsigned int index)
Sets the starting point for farthest first traversal.
Definition: dasguptalong.h:272
Data structure for partitions.
Interface to propagate the ability to set input data.
Definition: inputsetter.h:13