CluE  1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
proxybasedagglo.h
Go to the documentation of this file.
1 #ifndef PROXYBASEDAGGLO_H
2 #define PROXYBASEDAGGLO_H
3 
4 #include "../base/algorithm.h"
5 #include "../base/inputsetter.h"
6 #include "../base/measuresetter.h"
7 #include "../base/dissimilaritymeasure.h"
8 #include "../base/proxygenerator.h"
9 #include "../base/solutionprovider.h"
10 #include "../datastructure/doublesolution.h"
11 
12 #include <vector>
13 
14 namespace CluE
15 {
16 
25 template<typename T> class ProxyBasedAgglo : public Algorithm, public InputSetter<T>, public MeasureSetter<T>
26 {
27 public:
28 
37  {
40  };
41 
42  ProxyBasedAgglo(std::vector<T*> const* = NULL, ProxyGenerator<T>* = NULL,
44 
47  virtual ~ProxyBasedAgglo();
48 
54  virtual DoubleSolution<T>* compute();
55 
56  virtual void setInput(std::vector<T*> const*);
58  virtual void setMeasure(DissimilarityMeasure<T>*);
59  void setMode(int);
60 
66 
67 private:
68  std::vector<T*> const* input;
71  int mode;
72 };
73 
74 template<typename T> ProxyBasedAgglo<T>::ProxyBasedAgglo(std::vector<T*> const* data,
76  generator(g==NULL?NULL:g->clone()), measure(m==NULL?NULL:m->clone()), mode(cm)
77 {
78 }
79 
80 template<typename T> ProxyBasedAgglo<T>::ProxyBasedAgglo(const ProxyBasedAgglo<T>& rhs) :
81  Algorithm(rhs), input(rhs.input), generator(rhs.generator==NULL?NULL:rhs.generator->clone()),
82  measure(rhs.measure==NULL?NULL:rhs.measure->clone()), mode(rhs.mode)
83 {
84 }
85 
87  const ProxyBasedAgglo<T>& rhs)
88 {
89  Algorithm::operator=(rhs);
90  ProxyGenerator<T>* pg = rhs.generator==NULL?NULL:rhs.generator->clone();
91  DissimilarityMeasure<T>* dm = rhs.measure==NULL?NULL:rhs.measure->clone();
92  delete this->generator;
93  this->generator = pg;
94  delete this->measure;
95  this->measure = dm;
96  this->input = rhs.input;
97  this->mode = rhs.mode;
98  return *this;
99 }
100 
102 {
103  delete this->generator;
104  delete this->measure;
105 }
106 
108 {
109  if (this->input==NULL)
110  throw InvalidRuntimeConfigurationException(0, "Input is NULL.");
111  if (this->generator==NULL)
112  throw InvalidRuntimeConfigurationException(2, "ProxyGenerator is NULL.");
113  if (this->measure==NULL)
114  throw InvalidRuntimeConfigurationException(1, "Dissimilarity measure is NULL.");
115 
116  clock_t start, end;
117  start = clock();
118 
119  DoubleSolution<T>* solution = new DoubleSolution<T>();
120 
121  unsigned int N = this->input->size();
122 
123  // create a first partition (partitions[0]) with one cluster for input data object
124  std::clog << "CluE::ProxyBasedAgglo<T>::compute() - starting with " << N << " singleton clusters..." << std::endl;
125  std::vector<std::vector<T*> > partitionzero;
126  for (unsigned int i=0; i<N; i++)
127  {
128  std::vector<T*> singleton;
129  singleton.push_back((*this->input)[i]);
130  partitionzero.push_back(singleton);
131  }
132  solution->partitions.push_back(partitionzero);
133 
134  // compute a proxy for each cluster of the initial partition
135  std::vector<T> zeroproxies;
136  for (unsigned int i=0; i<N; i++)
137  zeroproxies.push_back(generator->generate(solution->partitions[0][i]));
138  solution->proxysets.push_back(zeroproxies);
139 
140  // precompute dissimilarities
141  double* dis = NULL;
142 
143  if (this->mode==Precompute)
144  {
145  std::clog << "CluE::ProxyBasedAgglo<T>::compute() - precomputing dissimilarities..." << std::endl;
146  dis = new double[N*N];
147 
148  for (unsigned int i=0; i<N; i++)
149  for (unsigned int j=0; j<N; j++)
150  {
151  dis[N*i+j] = this->measure->dissimilarity(solution->proxysets[0][i], solution->proxysets[0][j]);
152  }
153  }
154 
155  for (unsigned int n=1; n<N; n++)
156  {
157  std::clog << "CluE::ProxyBasedAgglo<T>::compute() - currently computing round " << n << std::endl;
158  // find the two clusterproxies with minimum dissimilarity
159  int first = -1;
160  int second = -1;
161  double min = std::numeric_limits<double>::infinity();
162  for (unsigned int i=0; i<N-n+1; i++)
163  for (unsigned int j=0; j<N-n+1; j++)
164  {
165  //cout << "i=" << i << ", j=" << j << std::endl;
166  if (i!=j)
167  {
168  double d;
169  if (this->mode==Precompute)
170  d = dis[N*i+j];
171  else
172  d = this->measure->dissimilarity(solution->proxysets[n-1][i], solution->proxysets[n-1][j]);
173 
174  if (d < min)
175  {
176  min = d;
177  first = i;
178  second = j;
179  }
180  }
181  }
182 
183  if (second<first)
184  {
185  int i = first;
186  first = second;
187  second = i;
188  }
189  //cout << "merging cluster " << first << " and " << second << " (min. dissimilarity of "
190  // << min << " between " << solution->proxysets[n-1][first] << " and "
191  // << solution->proxysets[n-1][second] << std::endl;
192 
193  // merge found clusters at next granularity (merged cluster at index first and last cluster moved to index second)
194  solution->partitions.push_back(solution->partitions[n-1]);
195  while (!solution->partitions[n][second].empty())
196  {
197  solution->partitions[n][first].push_back(solution->partitions[n][second].back());
198  solution->partitions[n][second].pop_back();
199  }
200  solution->partitions[n][second] = solution->partitions[n].back();
201  solution->partitions[n].pop_back();
202 
203  // compute proxies for merged cluster at next granularity
204  solution->proxysets.push_back(solution->proxysets[n-1]);
205  solution->proxysets[n][first] = generator->generate(solution->partitions[n][first]);
206  solution->proxysets[n][second] = solution->proxysets[n].back();
207  solution->proxysets[n].pop_back();
208 
209  if (this->mode==Precompute)
210  {
211  // update dissimilarities
212  for (unsigned int i=0; i<N-n; i++)
213  {
214  dis[N*second+i] = dis[N*(N-n)+i];
215  dis[N*i+second] = dis[N*i+(N-n)];
216  }
217 
218  for (unsigned int i=0; i<N-n; i++)
219  {
220  dis[N*first+i] = this->measure->dissimilarity(solution->proxysets[n][first], solution->proxysets[n][i]);
221  dis[N*i+first] = this->measure->dissimilarity(solution->proxysets[n][i], solution->proxysets[n][first]);
222  }
223  }
224  }
225 
226  end = clock();
227  solution->seconds=double(end-start)/CLOCKS_PER_SEC;
228  std::clog << "CluE::ProxyBasedAgglo<T>::compute() - finished" << std::endl;
229  return solution;
230 }
231 
232 template<typename T> void ProxyBasedAgglo<T>::setInput(std::vector<T*> const* data)
233 {
234  this->input = data;
235 }
236 
238 {
239  this->generator = g==NULL?NULL:g->clone();
240 }
241 
243 {
244  this->measure = m==NULL?NULL:m->clone();
245 }
246 
247 template<typename T> void ProxyBasedAgglo<T>::setMode(int m)
248 {
249  this->mode = m;
250 }
251 
253 {
254  return dynamic_cast<ProxyBasedAgglo<T>*>(s);
255 }
256 
257 }
258 
259 #endif
Data structure for partitions and proxies.
DissimilarityMeasure< T > * measure
virtual void setMeasure(DissimilarityMeasure< T > *)
ProxyBasedAgglo< T > & operator=(const ProxyBasedAgglo< T > &)
virtual ProxyGenerator< T > * clone() const =0
ComputationMode
On demand computation / pre-computation.
ProxyGenerator< T > * generator
static ProxyBasedAgglo< T > * toProxyBasedAgglo(Algorithm *s)
Does a dynamic cast of the given Algorithm to ProxyBasedAgglo.
Abstract base class for mechanisms that compute a proxy or representative object for a given set of o...
std::vector< T * > const * input
virtual DoubleSolution< T > * compute()
virtual DissimilarityMeasure< T > * clone() const =0
Abstract base class for algorithms.
Definition: algorithm.h:17
Interface to propagate the ability to set a DissimilarityMeasure.
Definition: measuresetter.h:13
slow, needs no extra memory
void setGenerator(ProxyGenerator< T > *)
std::vector< std::vector< T > > proxysets
Abstract base class for dissimilarity measurement.
Indicates that a computation entered an invalid configuration state.
virtual void setInput(std::vector< T * > const *)
Agglomerative proxy-based clustering algorithm.
ProxyBasedAgglo(std::vector< T * > const *=NULL, ProxyGenerator< T > *=NULL, DissimilarityMeasure< T > *=NULL, ComputationMode=OnDemand)
std::vector< std::vector< std::vector< T * > > > partitions
Interface to propagate the ability to set input data.
Definition: inputsetter.h:13