CluE  1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
lloydtypecf.h
Go to the documentation of this file.
1 #ifndef LloydtypeCFCF_H
2 #define LloydtypeCFCF_H
3 
4 #include "../base/algorithm.h"
5 #include "../base/inputsetter.h"
6 #include "../base/measuresetter.h"
7 #include "../base/proxygenerator.h"
8 #include "../base/dissimilaritymeasure.h"
9 #include "../base/solutionprovider.h"
10 #include "../datastructure/doublesolution.h"
11 #include "../exception/invalidruntimeconfigurationexception.h"
12 #include "../base/euclideanspaceprovider.h"
13 
14 #include "cfrentry.h"
15 
16 #include <set>
17 #include <ctime>
18 #include <vector>
19 
20 namespace CluE
21 {
22 
26 template<typename T>
28 {
29 public:
31  DoubleSolution<T>(),
32  costs(0)
33  {
34 
35  }
36 
37  double costs;
38 };
39 
45 template<typename T>
46 class LloydtypeCF : public Algorithm, public InputSetter<T>, public MeasureSetter<T>
47 {
48 public:
53  LloydtypeCF(std::vector<T*> const* data = NULL, EuclideanSpaceProvider<T> const * s = NULL,
54  DissimilarityMeasure<T>* m = NULL, unsigned int maxIterations = 0);
55 
60  LloydtypeCF(std::vector<T> const& seeds, std::vector<T*> const* data = NULL,
61  EuclideanSpaceProvider<T> const * s = NULL, DissimilarityMeasure<T>* m = NULL, unsigned int maxIterations = 0);
62 
66  LloydtypeCF(std::vector<T*> const& seeds, std::vector<T*> const* data = NULL,
67  EuclideanSpaceProvider<T> const * s = NULL, DissimilarityMeasure<T>* m = NULL, unsigned int i = 0);
68 
71  virtual ~LloydtypeCF();
72 
80 
81  virtual void setInput(std::vector<T*> const*);
82  void setSeeding(std::vector<T> const&);
83  void setSeeding(std::vector<T*> const&);
85  virtual void setMeasure(DissimilarityMeasure<T> const*);
86 
92 
93 private:
94  std::vector<T*> const* input;
95  std::vector<T> seeding;
98  unsigned int maxiterations;
99 };
100 
101 template<typename T> LloydtypeCF<T>::LloydtypeCF(std::vector<T*> const* data,
102  EuclideanSpaceProvider<T> const * s, DissimilarityMeasure<T>* m, unsigned int i) : input(data),
103  espace(s==NULL?NULL:s->clone()), measure(m==NULL?NULL:m->clone()), maxiterations(i)
104 {
105 }
106 
107 template<typename T> LloydtypeCF<T>::LloydtypeCF(std::vector<T> const& seeds,
108  std::vector<T*> const* data, EuclideanSpaceProvider<T> const * s, DissimilarityMeasure<T>* m,
109  unsigned int i) : seeding(seeds), input(data), espace(s==NULL?NULL:s->clone()),
110  measure(m==NULL?NULL:m->clone()), maxiterations(i)
111 {
112 }
113 
114 template<typename T> LloydtypeCF<T>::LloydtypeCF(std::vector<T*> const& seeds,
115  std::vector<T*> const* data, EuclideanSpaceProvider<T> const * s, DissimilarityMeasure<T>* m,
116  unsigned int i) : input(data), espace(s==NULL?NULL:s->clone()),
117  measure(m==NULL?NULL:m->clone()), maxiterations(i)
118 {
119  this->setSeeding(seeds);
120 }
121 
122 template<typename T> LloydtypeCF<T>::LloydtypeCF(const LloydtypeCF<T>& rhs) :
123  Algorithm(rhs), input(rhs.input), seeding(rhs.seeding),
124  espace(rhs.espace==NULL?NULL:rhs.espace->clone()),
125  measure(rhs.measure==NULL?NULL:rhs.measure->clone()), maxiterations(rhs.maxiterations)
126 {
127 }
128 
129 template<typename T> LloydtypeCF<T>& LloydtypeCF<T>::operator=(const LloydtypeCF<T>& rhs)
130 {
131  Algorithm::operator=(rhs);
132  ProxyGenerator<T>* pg = rhs.generator==NULL?NULL:rhs.generator->clone();
133  DissimilarityMeasure<T>* dm = rhs.measure==NULL?NULL:rhs.measure->clone();
134  delete this->generator;
135  this->generator = pg;
136  delete this->measure;
137  this->measure = dm;
138  this->input = rhs.input;
139  this->seeding = rhs.seeding;
140  this->maxiterations = rhs.maxiterations;
141  return *this;
142 }
143 
144 template<typename T> LloydtypeCF<T>::~LloydtypeCF()
145 {
146  delete this->measure;
147  delete this->espace;
148 }
149 
151 {
152  if (this->input==NULL)
153  throw InvalidRuntimeConfigurationException(0, "Input is NULL.");
154  if (this->measure==NULL)
155  throw InvalidRuntimeConfigurationException(1, "Dissimilarity measure is NULL.");
156  if (this->espace==NULL)
157  throw InvalidRuntimeConfigurationException(2, "Euclidean space provider is NULL.");
158 
159  time_t start, end;
160  start = time(0);
161 
162  unsigned int k = this->seeding.size();
163  if (k==0)
164  throw InvalidRuntimeConfigurationException(3, "Empty seeding.");
165 
166  // Create CFREntry for each cluster
167  T nullVector = espace->nullVector();
168  double nullVectorSq = nullVector * nullVector;
169  std::vector<CFREntry<T>> clusters(k, CFREntry<T>(0, nullVector, nullVectorSq, nullVector));
170  for(size_t i = 0; i < k; ++i)
171  clusters[i].representative = seeding[i];
172 
173  bool in_motion;
174  unsigned int iterations = 0;
175  std::vector<size_t> assignment(input->size(), -1);
176  do
177  {
178  iterations++;
179  in_motion = false;
180 
181  // Assign nearest center (from last iteration) to each input point
182  for(size_t i = 0; i < input->size(); ++i)
183  {
184  double minDist = std::numeric_limits<double>::infinity();
185  size_t minIndex = 0;
186  for(size_t j = 0; j < k; ++j)
187  {
188  double tmpDist = measure->dissimilarity(*(*input)[i], clusters[j].representative);
189  if(tmpDist < minDist)
190  {
191  minDist = tmpDist;
192  minIndex = j;
193  }
194  }
195 
196  // Check for change in assignment
197  if(assignment[i] != minIndex)
198  {
199  if(assignment[i] != -1)
200  clusters[assignment[i]].remove(*(*input)[i]);
201  clusters[minIndex].insert(*(*input)[i]);
202  assignment[i] = minIndex;
203  in_motion = true;
204  }
205  }
206 
207 
208  // Break after reassignment of all points to the centers of the maxiterations. iteration
209  if (this->maxiterations > 0 && iterations > this->maxiterations)
210  {
211  // Current iteration is maxiterations+1
212 
213  in_motion = false;
214 
215  //std::clog << "CluE::LloydtypeCFe<T>::compute() - maximum number of iterations "
216  // << altered_clusters.size() << " is reached" << std::endl;
217  }
218 
219  // Don't calculate new centers when not in motion any more
220  if(!in_motion)
221  break;
222 
223  // Calculate new centers
224  for(size_t i = 0; i < k; ++i)
225  clusters[i].representative = clusters[i].cog();
226  }
227  while (in_motion);
228  end = time(0);
229 
231  // Proxies
232  std::vector<T> proxies;
233  proxies.resize(k);
234  for(size_t i = 0; i < k; ++i)
235  proxies[i] = clusters[i].representative;
236  solution->proxysets.push_back(proxies);
237  // Partitions
238  std::vector<std::vector<T*>> partitions(k, std::vector<T*>());
239  for(size_t i = 0; i < input->size(); ++i)
240  partitions[assignment[i]].push_back((*input)[i]);
241  solution->partitions.push_back(partitions);
242  // Costs
243  for(size_t i = 0; i < k; ++i)
244  solution->costs += clusters[i].kMeansCost(clusters[i].cog());
245  // Time
246  solution->seconds=end-start;
247  //std::clog << "CluE::LloydtypeCF<T>::compute() - finished after " << iterations << " iterations" << std::endl;
248  return solution;
249 }
250 
251 template<typename T> void LloydtypeCF<T>::setInput(std::vector<T*> const* data)
252 {
253  this->input = data;
254 }
255 
256 template<typename T> void LloydtypeCF<T>::setSeeding(std::vector<T> const& seeds)
257 {
258  this->seeding = seeds;
259 }
260 
261 template<typename T> void LloydtypeCF<T>::setSeeding(std::vector<T*> const& seeds)
262 {
263  this->seeding.clear();
264  unsigned int size = seeds.size();
265  for (unsigned int i=0; i<size; i++)
266  this->seeding.push_back(*seeds[i]);
267 }
268 
269 template<typename T> void LloydtypeCF<T>::setMeasure(DissimilarityMeasure<T> const* m)
270 {
271  this->measure = m==NULL?NULL:m->clone();
272 }
273 
275 {
276  this->espace = m==NULL?NULL:m->clone();
277 }
278 
280 {
281  return dynamic_cast<LloydtypeCF<T>*>(a);
282 }
283 
284 }
285 
286 #endif
Data structure for partitions and proxies.
LloydtypeCF specific DoubleSolution.
Definition: lloydtypecf.h:27
virtual ~LloydtypeCF()
Definition: lloydtypecf.h:144
void setEuclideanSpaceProvider(EuclideanSpaceProvider< T > const *)
Definition: lloydtypecf.h:274
DissimilarityMeasure< T > * measure
Definition: lloydtypecf.h:97
virtual void setInput(std::vector< T * > const *)
Definition: lloydtypecf.h:251
LloydtypeCF< T > & operator=(const LloydtypeCF< T > &)
Definition: lloydtypecf.h:129
void setSeeding(std::vector< T > const &)
Definition: lloydtypecf.h:256
LloydtypeCF(std::vector< T * > const *data=NULL, EuclideanSpaceProvider< T > const *s=NULL, DissimilarityMeasure< T > *m=NULL, unsigned int maxIterations=0)
Constructor initializing input, seeding the centers and configuring the algorithm.
Definition: lloydtypecf.h:101
std::vector< T > seeding
Definition: lloydtypecf.h:95
Abstract base class for mechanisms that compute a proxy or representative object for a given set of o...
virtual DissimilarityMeasure< T > * clone() const =0
Clustering feature with representation point.
Definition: cfrentry.h:17
virtual void setMeasure(DissimilarityMeasure< T > const *)
Definition: lloydtypecf.h:269
Abstract base class for algorithms.
Definition: algorithm.h:17
virtual LloydtypeCFSolution< T > * compute()
Definition: lloydtypecf.h:150
Interface to propagate the ability to set a DissimilarityMeasure.
Definition: measuresetter.h:13
unsigned int maxiterations
Definition: lloydtypecf.h:98
static LloydtypeCF< T > * toLloydtypeCF(Algorithm *a)
does a dynamic cast of the given Algorithm to LloydtypeCF
Definition: lloydtypecf.h:279
std::vector< std::vector< T > > proxysets
Abstract base class for dissimilarity measurement.
EuclideanSpaceProvider< T > * espace
Definition: lloydtypecf.h:96
std::vector< T * > const * input
Definition: lloydtypecf.h:94
Indicates that a computation entered an invalid configuration state.
Lloyd type algorithm using clustering features.
Definition: lloydtypecf.h:46
virtual EuclideanSpaceProvider< V > * clone() const =0
std::vector< std::vector< std::vector< T * > > > partitions
Interface to propagate the ability to set input data.
Definition: inputsetter.h:13