CluE  1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
agglomerative.h
Go to the documentation of this file.
1 #ifndef AGGLOMERATIVE_H
2 #define AGGLOMERATIVE_H
3 
4 #include "../base/algorithm.h"
5 #include "../base/clusterdissimilaritymeasure.h"
6 #include "../base/solutionprovider.h"
7 #include "../base/inputsetter.h"
8 #include "../base/clustermeasuresetter.h"
9 #include "../datastructure/partitionsolution.h"
10 #include "../exception/invalidruntimeconfigurationexception.h"
11 
12 #include <vector>
13 #include <limits>
14 
15 namespace CluE
16 {
17 
23 template<typename T> class Agglomerative : public Algorithm, public InputSetter<T>, public ClusterMeasureSetter<T>
24 {
25 public:
33  enum
34  {
35  OnDemand, // slow, needs no extra memory
36  Precompute // fast, needs more memory
37  };
38 
39  Agglomerative(std::vector<T*> const* data, ClusterDissimilarityMeasure<T>* measure = NULL,
40  int calculationTime = OnDemand);
41 
44  virtual ~Agglomerative();
45 
50  virtual PartitionSolution<T>* compute();
51 
52  virtual void setInput(std::vector<T*> const*);
53  virtual void setMeasure(ClusterDissimilarityMeasure<T> const*);
54  void setMode(int);
55 
61 
62 private:
63  std::vector<T*> const* input;
65  int mode;
66 };
67 
68 template<typename T> Agglomerative<T>::Agglomerative(std::vector<T*> const* data,
69  ClusterDissimilarityMeasure<T>* m, int cm) : input(data),
70  measure(m==NULL?NULL:m->clone()), mode(cm)
71 {
72 }
73 
74 template<typename T> Agglomerative<T>::Agglomerative(const Agglomerative<T>& rhs) : Algorithm(rhs),
75  input(rhs.input), measure(rhs.measure==NULL?NULL:rhs.measure->clone()), mode(rhs.mode)
76 {
77 }
78 
80  const Agglomerative& rhs)
81 {
82  Algorithm::operator=(rhs);
83  DissimilarityMeasure<T>* dm = rhs.measure==NULL?NULL:rhs.measure->clone();
84  delete this->measure;
85  this->measure = dm;
86  this->input = rhs.input;
87  this->mode = rhs.mode;
88  return *this;
89 }
90 
91 template<typename T> Agglomerative<T>::~Agglomerative()
92 {
93  delete this->measure;
94 }
95 
97 {
98  if (this->input==NULL)
99  throw InvalidRuntimeConfigurationException(0, "Input is NULL.");
100  if (this->measure==NULL)
101  throw InvalidRuntimeConfigurationException(1, "Dissimilarity measure is NULL");
102 
103  clock_t start, end;
104  start = clock();
105 
106  PartitionSolution<T>* solution = new PartitionSolution<T>();
107 
108  unsigned int N = this->input->size();
109 
110  // create a first partition (partitions[0]) with one cluster for input data object
111  std::clog << "CluE::Agglomerative<T>::compute() - starting with " << N << " singleton clusters..." << std::endl;
112  std::vector<std::vector<T*> > partitionzero;
113  for (unsigned int i=0; i<N; i++)
114  {
115  std::vector<T*> singleton;
116  singleton.push_back((*this->input)[i]);
117  partitionzero.push_back(singleton);
118  }
119  solution->partitions.push_back(partitionzero);
120 
121  // precompute dissimilarities
122  double* dis = NULL;
123 
124  if (this->mode==Precompute)
125  {
126  std::clog << "CluE::Agglomerative<T>::compute() - precomputing dissimilarities..." << std::endl;
127  dis = new double[N*N];
128 
129  for (unsigned int i=0; i<N; i++)
130  for (unsigned int j=0; j<N; j++)
131  dis[N*i+j] = measure->dissimilarity(solution->partitions[0][i], solution->partitions[0][j]);
132  }
133 
134  for (unsigned int n=1; n<N; n++)
135  {
136  std::clog << "CluE::Agglomerative<T>::compute() - currently computing round " << n << std::endl;
137  // find the two clusters with minimum dissimilarity
138  int first = -1;
139  int second = -1;
140  double min = std::numeric_limits<double>::infinity();
141  for (unsigned int i=0; i<N-n+1; i++)
142  for (unsigned int j=0; j<N-n+1; j++)
143  {
144  //cout << "i=" << i << ", j=" << j << std::endl;
145  if (i!=j)
146  {
147  double d;
148  if (this->mode==Precompute)
149  d = dis[N*i+j];
150  else
151  d = measure->dissimilarity(solution->partitions[n-1][i], solution->partitions[n-1][j]);
152 
153  if (d < min)
154  {
155  min = d;
156  first = i;
157  second = j;
158  }
159  }
160  }
161 
162  if (second<first)
163  {
164  int i = first;
165  first = second;
166  second = i;
167  }
168  // cout << "merging cluster " << first << " and " << second << std::endl;
169 
170  // merge found clusters at next granularity (merged cluster at index first and last cluster moved to index second)
171  solution->partitions.push_back(solution->partitions[n-1]);
172  while (!solution->partitions[n][second].empty())
173  {
174  solution->partitions[n][first].push_back(solution->partitions[n][second].back());
175  solution->partitions[n][second].pop_back();
176  }
177  solution->partitions[n][second] = solution->partitions[n].back();
178  solution->partitions[n].pop_back();
179 
180  if (this->mode==Precompute)
181  {
182  // update dissimilarities
183  for (unsigned int i=0; i<N-n; i++)
184  {
185  dis[N*second+i] = dis[N*(N-n)+i];
186  dis[N*i+second] = dis[N*i+(N-n)];
187  }
188 
189  for (unsigned int i=0; i<N-n; i++)
190  {
191  dis[N*first+i] = this->measure->dissimilarity(solution->partitions[n][first], solution->partitions[n][i]);
192  dis[N*i+first] = this->measure->dissimilarity(solution->partitions[n][i], solution->partitions[n][first]);
193  }
194  }
195  }
196 
197  end = clock();
198  solution->seconds=double(end-start)/CLOCKS_PER_SEC;
199  std::clog << "CluE::Agglomerative<T>::compute() - finished" << std::endl;
200  return solution;
201 }
202 
203 template<typename T> void Agglomerative<T>::setInput(std::vector<T*> const* data)
204 {
205  this->input = data;
206 }
207 
209 {
210  this->measure = m==NULL?NULL:m->clone();
211 }
212 
213 template<typename T> void Agglomerative<T>::setMode(int m)
214 {
215  this->mode = m;
216 }
217 
219 {
220  return dynamic_cast<Agglomerative<T>*>(s);
221 }
222 
223 }
224 
225 #endif
std::vector< std::vector< std::vector< T * > > > partitions
Agglomerative< T > & operator=(const Agglomerative< T > &)
Definition: agglomerative.h:79
Abstract base class for cluster dissimilarity measurement.
virtual ~Agglomerative()
Definition: agglomerative.h:91
std::vector< T * > const * input
Definition: agglomerative.h:63
ClusterDissimilarityMeasure< T > * measure
Definition: agglomerative.h:64
Agglomerative clustering algorithm.
Definition: agglomerative.h:23
virtual void setInput(std::vector< T * > const *)
static Agglomerative< T > * toAgglomerative(Algorithm *s)
does a dynamic cast of the given Algorithm to Agglomerative
Agglomerative(std::vector< T * > const *data, ClusterDissimilarityMeasure< T > *measure=NULL, int calculationTime=OnDemand)
Definition: agglomerative.h:68
Abstract base class for algorithms.
Definition: algorithm.h:17
Abstract base class for dissimilarity measurement.
virtual ClusterDissimilarityMeasure< T > * clone() const =0
virtual void setMeasure(ClusterDissimilarityMeasure< T > const *)
virtual PartitionSolution< T > * compute()
Definition: agglomerative.h:96
Interface to propagate the ability to set a ClusterDissimilarityMeasure.
Indicates that a computation entered an invalid configuration state.
Data structure for partitions.
Interface to propagate the ability to set input data.
Definition: inputsetter.h:13