1 #ifndef KUMARSABHARWALSEN_H
2 #define KUMARSABHARWALSEN_H
7 #include "../base/algorithm.h"
8 #include "../base/inputsetter.h"
9 #include "../base/measuresetter.h"
10 #include "../datastructure/doublesolution.h"
11 #include "../base/proxygenerator.h"
12 #include "../base/dissimilaritymeasure.h"
13 #include "../sampling/uniformsampling.h"
14 #include "../misc/fixedsizesubsetiterator.h"
15 #include "../measure/pointsetdistance.h"
17 #include "../exception/invalidargumentexception.h"
18 #include "../exception/invalidruntimeconfigurationexception.h"
57 virtual void setInput(std::vector<T*>
const*);
94 cost(std::numeric_limits<double>::max())
101 Tupel irredKMeans(std::set<T*> Q,
size_t m,
size_t k, std::map<size_t, T> C,
double Sum);
113 measure(measure==0 ? 0 : measure->clone()),
114 centroidGenerator(centGen==0 ? 0 : centGen->clone()),
117 numberOfClusters(numberOfClusters),
121 input = std::set<T*>(data->begin(), data->end());
125 measure(kss.measure == 0 ? 0 : kss.measure->clone()),
127 centroidGenerator(kss.centroidGenerator==0 ? 0 : kss.centroidGenerator->clone()),
128 superSetSize(kss.superSetSize),
129 subsetSize(kss.subsetSize),
130 numberOfClusters(kss.numberOfClusters),
137 if(this->measure != 0)
138 delete this->measure;
139 if(this->centroidGenerator != 0)
140 delete this->centroidGenerator;
142 Algorithm::operator= (kss);
145 this->input = kss.
input;
157 if(this->measure != 0)
158 delete this->measure;
159 if(this->centroidGenerator != 0)
160 delete this->centroidGenerator;
165 if(numberOfClusters == 0)
167 if(numberOfClusters > input.size())
169 if(subsetSize == 0 || superSetSize == 0)
172 std::map<size_t, T> empty;
174 for(
size_t i = 1; i <= numberOfClusters; i++)
176 Tupel candidate(irredKMeans(input, i, i, empty, 0));
180 lastCost = result.
cost;
193 step1ds->
partitions.push_back(std::vector<std::vector<T*> >());
194 step1ds->
proxysets.push_back(std::vector<T>());
196 typename std::map<size_t, T>::iterator endC = C.end();
197 for(
typename std::map<size_t, T>::iterator itC = C.begin(); itC != endC; itC++)
199 step1ds->
partitions[0].push_back(std::vector<T*>());
200 step1ds->
proxysets[0].push_back((*itC).second);
204 typename std::set<T*>::iterator endQ = Q.end();
205 for(
typename std::set<T*>::iterator itQ = Q.begin(); itQ != endQ; itQ++)
207 double minDist = measure->dissimilarity(**itQ, C.begin()->second);
209 for(
typename std::map<size_t, T>::iterator itC = C.begin()++; itC != endC; itC++)
211 double tmpDist = measure->dissimilarity(**itQ, itC->second);
212 if(tmpDist < minDist)
215 minIndex = itC->first;
219 step1ds->
partitions[0][minIndex].push_back(*itQ);
222 Tupel step1result(step1ds, Sum);
227 size_t actualSuperSetSize = superSetSize;
228 if(actualSuperSetSize > Q.size())
229 actualSuperSetSize = Q.size();
231 std::vector<T*> qVector(Q.begin(), Q.end());
235 size_t actualSubsetSize = subsetSize;
236 if(actualSubsetSize > supersetSolution->
proxysets[0].size())
237 actualSubsetSize = supersetSolution->
proxysets[0].size();
243 std::vector<T*> currentSubset = subsetIterator.
vector();
244 T centroid = centroidGenerator->generate(currentSubset);
245 std::map<size_t, T> cCentroid(C);
246 cCentroid.insert(std::pair<size_t, T>(C.size(), centroid));
247 Tupel candidate(irredKMeans(Q, m-1, k, cCentroid, Sum));
248 if(candidate.
cost < step2result.
cost)
250 delete step2result.
ds;
251 step2result = candidate;
257 subsetIterator.
next();
263 if(C.size() > 0 && Q.size() > 1)
269 for(
typename std::map<size_t, T>::iterator it = C.begin(); it != C.end(); it++)
270 setC.insert(&(it->second));
272 std::vector<T*> U(Q.begin(), Q.end());
273 typename std::vector<T*>::iterator U_half = U.begin() + U.size()/2;
274 std::nth_element(U.begin(), U_half, U.end(), dts);
275 U.erase(U_half++, U.end());
278 std::set<T*> setU(U.begin(), U.end());
279 Tupel assignedU(irredKMeans(setU, 0, C.size(), C, Sum));
282 for(
typename std::vector<T*>::iterator it = U.begin(); it != U.end(); it++)
284 Tupel step3result(irredKMeans(Q, m, k, C, Sum));
287 step3result.
cost += assignedU.cost;
288 for(
size_t cluster = 0; cluster < assignedU.ds->partitions[0].size(); cluster++)
290 for(
size_t point = 0; point < assignedU.ds->partitions[0][cluster].size(); point++)
291 step3result.
ds->partitions[0][cluster].push_back(assignedU.ds->partitions[0][cluster][point]);
292 step3result.
ds->proxysets[0].push_back(assignedU.ds->proxysets[0][cluster]);
297 if(step2result.
cost < step3result.
cost)
299 delete step3result.
ds;
304 delete step2result.
ds;
323 input = std::set<T*>(data->begin(), data->end());
328 if(this->measure == 0)
329 delete this->measure;
331 this->measure = measure==0 ? 0 : measure->
clone();
336 if(this->centroidGenerator == 0)
337 delete this->centroidGenerator;
339 this->centroidGenerator = centroidGenerator==0 ? 0 : centroidGenerator->
clone();
344 if(subsetSize >= superSetSize)
347 this->superSetSize = superSetSize;
348 this->subsetSize = subsetSize;
Data structure for partitions and proxies.
virtual bool hasMore() const
Returns if the Graycode sequence is incomplete yet or not.
Iterates over all fixed-size subsets of a given superset.
static KumarSabharwalSen< T > * toKumarSabharwalSen(Algorithm *s)
does a dynamic cast of the given Algorithm to FarthestFirstTraversal
unsigned int numberOfClusters
Calculates the (minimum) distance between a point and a set of points.
virtual void setInput(std::vector< T * > const *)
Data structure for discrete proxies.
double getLastCost()
Returns the cost of the last computation.
void setSetSizes(unsigned int superSetSize, unsigned int subsetSize)
Set the superset and subset sizes used for guessing clusters.
virtual void setMeasure(DissimilarityMeasure< T > const *measure)
Used to return a DoubleSolution and its cost.
virtual DoubleSolution< T > * compute()
virtual ProxyGenerator< T > * clone() const =0
std::vector< std::vector< T * > > proxysets
Abstract base class for mechanisms that compute a proxy or representative object for a given set of o...
virtual DissimilarityMeasure< T > * clone() const =0
Tupel(DoubleSolution< T > *ds, double cost)
KumarSabharwalSen(DissimilarityMeasure< T > const *measure=0, ProxyGenerator< T > const *centroidGenerator=0, unsigned int numberOfClusters=2, std::vector< T * > const *input=0)
Constructor for general usage.
virtual std::vector< T * > vector() const
Returns the current subset as a vector.
Abstract base class for algorithms.
Interface to propagate the ability to set a DissimilarityMeasure.
KumarSabharwalSen< T > & operator=(const KumarSabharwalSen< T > &)
Indicates invalid values of arguments.
ProxyGenerator< T > * centroidGenerator
DissimilarityMeasure< T > * measure
KumarSabharwalSen algorithm.
std::vector< std::vector< T > > proxysets
virtual ~KumarSabharwalSen()
Tupel irredKMeans(std::set< T * > Q, size_t m, size_t k, std::map< size_t, T > C, double Sum)
Abstract base class for dissimilarity measurement.
virtual void next()
Generates the next subset.
void setCentroidGenerator(ProxyGenerator< T > const *centroidGenerator)
Set the centroid generator used for cluster center approximation.
Indicates that a computation entered an invalid configuration state.
std::vector< std::vector< std::vector< T * > > > partitions
unsigned int superSetSize