CluE  1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
cftree.h
Go to the documentation of this file.
1 #ifndef CFTREE_H
2 #define CFTREE_H
3 
4 #include <iostream>
5 #include <stack>
6 #include <utility>
7 
8 #include <lemon/list_graph.h>
9 
10 #include "cfentry.h"
11 #include "../base/dissimilaritymeasure.h"
12 #include "../base/attributecalculator.h"
13 #include "../base/euclideanspaceprovider.h"
14 #include "../exception/invalidargumentexception.h"
15 #include "../exception/invalidruntimeconfigurationexception.h"
16 
17 namespace CluE
18 {
19 
34 template<typename T> class CFTree
35 {
36 public:
37  class ThresholdCalculator;
38 
53  int innerBranching, int leafBranching, double threshold, int maxSize);
54 
55  CFTree(const CFTree<T>&);
56 
58 
59  virtual ~CFTree();
60 
66  void insert(T point);
67 
73  void insert(CFEntry<T> feature);
74 
79  void rebuild(double newThreshold);
80 
84  std::vector<CFEntry<T> > leafEntries();
85 
89  int size() const;
90 
94  size_t numberOfPoints() const;
95 
97  {
101  };
102 
106  struct ThresholdCalculator : public std::unary_function<double, double>
107  {
108  virtual double operator()(double oldThreshold) = 0;
109  };
110 
111 #ifdef CLUE_GRAPH
112 
115  void print(std::ostream& os);
116 #endif
117 
118 private:
122  lemon::ListDigraph::Node addNode();
123 
127  void eraseNode(lemon::ListDigraph::Node node);
128 
132  void rebuildTraverse(lemon::ListDigraph::Node currentOldNode, CFTree<T>& newTree, lemon::ListDigraph::Node currentNewNode);
133 
137  void leafEntriesTraverse(lemon::ListDigraph::Node currentNode, std::vector<CFEntry<T> >* vec);
138 
142  void clean(lemon::ListDigraph::Node node);
143 
150  std::pair<lemon::ListDigraph::Node, lemon::ListDigraph::Node> split(lemon::ListDigraph::Node node, CFEntry<T>* keepTrack = 0, bool fillUp = false);
151 
158  lemon::ListDigraph::Node merge(lemon::ListDigraph::Node node1, lemon::ListDigraph::Node node2);
159 
160 #ifdef CLUE_GRAPH
161  void print(std::ostream& os, lemon::ListDigraph::Node node);
162 #endif
163 
166  double threshold;
167  int maxSize;
168 
173 
174  lemon::ListDigraph tree;
175  lemon::ListDigraph::ArcMap<CFEntry<T>*> innerFeatures;
176  lemon::ListDigraph::NodeMap<std::vector<CFEntry<T>*>*> leafFeatures;
177  lemon::ListDigraph::Node root;
178 };
179 
180 //TODO Require valid EuclideanSpaceProvider etc.
181 template<typename T> CFTree<T>::CFTree(EuclideanSpaceProvider<T> * euclidianProvider,
182  DissimilarityMeasure<CFEntry<T> > *clusterDistanceMeasure,
183  AttributeCalculator<CFEntry<T> > *thresholdAttribute,
184  ThresholdCalculator *thresholdFunctor,
185  int innerBranching, int leafBranching, double threshold, int maxSize) :
186  euclidianProvider(euclidianProvider->clone()),
187  clusterDistanceMeasure(clusterDistanceMeasure->clone()),
188  thresholdAttribute(thresholdAttribute->clone()),
189  thresholdFunctor(thresholdFunctor),
190  innerBranching(innerBranching),
191  leafBranching(leafBranching),
192  threshold(threshold),
193  maxSize(maxSize),
194  tree(),
195  innerFeatures(tree),
196  leafFeatures(tree),
197  root(addNode())
198 {
199 }
200 
201 template<typename T> CFTree<T>::CFTree(const CFTree<T>& x) :
202  euclidianProvider(x.euclidianProvider->clone()),
203  clusterDistanceMeasure(x.clusterDistanceMeasure->clone()),
204  thresholdAttribute(x.thresholdAttribute->clone()),
205  thresholdFunctor(x.thresholdFunctor),
206  innerBranching(x.innerBranching),
207  leafBranching(x.leafBranching),
208  maxSize(x.maxSize),
209  threshold(x.threshold),
210  tree(),
211  innerFeatures(tree),
212  leafFeatures(tree)
213 {
214  using namespace lemon;
215 
216  DigraphCopy<ListDigraph, ListDigraph> cg(x.tree, tree);
217  cg.arcMap(x.innerFeatures, innerFeatures);
218  cg.nodeMap(x.leafFeatures, leafFeatures);
219  cg.node(x.root, root);
220  cg.run();
221 
222  for (ListDigraph::ArcIt a(tree); a != INVALID; ++a)
223  {
224  if(innerFeatures[a] != 0)
225  innerFeatures[a] = new CFEntry<T>(*innerFeatures[a]);
226  }
227  for (ListDigraph::NodeIt n(tree); n != INVALID; ++n)
228  {
229  if(leafFeatures[n] != 0)
230  {
231  leafFeatures[n] = new std::vector<CFEntry<T>*>(*leafFeatures[n]);
232  for(typename std::vector<CFEntry<T>*>::iterator it = leafFeatures[n]->begin(); it != leafFeatures[n]->end(); ++it)
233  *it = new CFEntry<T>(**it);
234  }
235  }
236 }
237 
238 template<typename T> CFTree<T>& CFTree<T>::operator= (const CFTree<T>& x)
239 {
240  using namespace lemon;
241 
242  if(this == &x)
243  return *this;
244 
245  // Delete...
246  for (ListDigraph::ArcIt a(tree); a != INVALID; ++a)
247  {
248  if(innerFeatures[a] != 0)
249  {
250  delete innerFeatures[a];
251  innerFeatures[a] = 0;
252  }
253  }
254  for (ListDigraph::NodeIt n(tree); n != INVALID; ++n)
255  {
256  if(leafFeatures[n] != 0)
257  {
258  for(typename std::vector<CFEntry<T>*>::iterator it = leafFeatures[n]->begin(); it != leafFeatures[n]->end(); ++it)
259  delete *it;
260  delete leafFeatures[n];
261  leafFeatures[n] = 0;
262  }
263  }
264  delete euclidianProvider;
265  delete clusterDistanceMeasure;
266  delete thresholdAttribute;
267 
268  // Copy...
275  maxSize = x.maxSize;
276  threshold = x.threshold;
277 
278  DigraphCopy<ListDigraph, ListDigraph> cg(x.tree, tree);
279  cg.arcMap(x.innerFeatures, innerFeatures);
280  cg.nodeMap(x.leafFeatures, leafFeatures);
281  cg.node(x.root, root);
282  cg.run();
283 
284  for (ListDigraph::ArcIt a(tree); a != INVALID; ++a)
285  {
286  if(innerFeatures[a] != 0)
287  innerFeatures[a] = new CFEntry<T>(*innerFeatures[a]);
288  }
289  for (ListDigraph::NodeIt n(tree); n != INVALID; ++n)
290  {
291  if(leafFeatures[n] != 0)
292  {
293  leafFeatures[n] = new std::vector<CFEntry<T>*>(*leafFeatures[n]);
294  for(typename std::vector<CFEntry<T>*>::iterator it = leafFeatures[n]->begin(); it != leafFeatures[n]->end(); ++it)
295  *it = new CFEntry<T>(**it);
296  }
297  }
298 }
299 
300 template<typename T> CFTree<T>::~CFTree()
301 {
302  using namespace lemon;
303 
304  for (ListDigraph::ArcIt a(tree); a != INVALID; ++a)
305  {
306  if(innerFeatures[a] != 0)
307  {
308  delete innerFeatures[a];
309  innerFeatures[a] = 0;
310  }
311  }
312  for (ListDigraph::NodeIt n(tree); n != INVALID; ++n)
313  {
314  if(leafFeatures[n] != 0)
315  {
316  for(typename std::vector<CFEntry<T>*>::iterator it = leafFeatures[n]->begin(); it != leafFeatures[n]->end(); ++it)
317  delete *it;
318  delete leafFeatures[n];
319  leafFeatures[n] = 0;
320  }
321  }
322  delete euclidianProvider;
323  delete clusterDistanceMeasure;
324  delete thresholdAttribute;
325 }
326 
327 template<typename T> void CFTree<T>::insert(T point)
328 {
329  CFEntry<T> pointFeature(1, point, point*point);
330  insert(pointFeature);
331 }
332 
333 template<typename T> void CFTree<T>::insert(CFEntry<T> pointFeature)
334 {
335  using namespace lemon;
336 
337  // 1. Identify matching leaf
338  ListDigraph::Node currentNode = root;
339  while(countOutArcs(tree, currentNode) > 0)
340  {
341  ListDigraph::Node minNode = INVALID;
342  ListDigraph::Arc minArc = INVALID;
343  double minDist = -1;
344  for (ListDigraph::OutArcIt arc(tree, currentNode); arc != INVALID; ++arc)
345  {
346  double dist = clusterDistanceMeasure->dissimilarity(*innerFeatures[arc], pointFeature);
347  if(dist < minDist || minNode == INVALID)
348  {
349  minNode = tree.target(arc);
350  minArc = arc;
351  minDist = dist;
352  }
353  }
354  currentNode = minNode;
355  }
356 
357  // 2. Modify leaf
358  std::vector<CFEntry<T>*>& leafEntries = *leafFeatures[currentNode];
359  CFEntry<T>* minEntry = 0;
360  double minDist = -1;
361  for (typename std::vector<CFEntry<T>*>::iterator it = leafEntries.begin(); it!=leafEntries.end(); it++)
362  {
363  double dist = clusterDistanceMeasure->dissimilarity(**it, pointFeature);
364  if(dist < minDist || minEntry == 0)
365  {
366  minEntry = *it;
367  minDist = dist;
368  }
369  }
370 
371  bool lastSplitAtLeaf = true;
372  bool splitted = false;
373  std::pair<lemon::ListDigraph::Node, lemon::ListDigraph::Node> lastSplitPair;
374  if(minEntry != 0 && thresholdAttribute->calculate(*minEntry + pointFeature) <= threshold)
375  {
376  // Update CFEntry
377  *minEntry += pointFeature;
378  }
379  else
380  {
381  // Add new CFEntry
382  CFEntry<T>* newEntry = new CFEntry<T>(pointFeature);
383  leafEntries.push_back(newEntry);
384  if(leafEntries.size() > leafBranching)
385  {
386  lastSplitPair = split(currentNode, newEntry);
387  currentNode = lastSplitPair.first;
388  splitted = true;
389  }
390  }
391 
392  // 3. Modify path
393  while(countInArcs(tree, currentNode) > 0)
394  {
395  ListDigraph::InArcIt arc(tree, currentNode);
396 
397  currentNode = tree.source(arc);
398  if(splitted && countOutArcs(tree, currentNode) > innerBranching)
399  {
400  lastSplitPair = split(currentNode, innerFeatures[arc]);
401  currentNode = lastSplitPair.first;
402  }
403  else if(countOutArcs(tree, currentNode) > innerBranching)
404  {
405  // DEBUG
406  throw 0;
407  }
408  else if(splitted)
409  {
410  splitted = false;
411 
412  if(countOutArcs(tree, currentNode) > 2 && !lastSplitAtLeaf)
413  {
414  ListDigraph::Arc minArc1 = INVALID;
415  ListDigraph::Arc minArc2 = INVALID;
416  double minDist = -1;
417  for (ListDigraph::OutArcIt arc1(tree, currentNode); arc1 != INVALID; ++arc1)
418  {
419  for (ListDigraph::OutArcIt arc2(arc1); arc2 != INVALID; ++arc2)
420  {
421  if(arc1 == arc2)
422  continue;
423  double dist = clusterDistanceMeasure->dissimilarity(*innerFeatures[arc1], *innerFeatures[arc2]);
424  if(dist < minDist || minArc1 == INVALID)
425  {
426  minArc1 = arc1;
427  minArc2 = arc2;
428  minDist = dist;
429  }
430  }
431  }
432  if(!(tree.target(minArc1) == lastSplitPair.first && tree.target(minArc2) == lastSplitPair.second) &&
433  !(tree.target(minArc1) == lastSplitPair.second && tree.target(minArc2) == lastSplitPair.first))
434  {
435  ListDigraph::Node mergedNode = merge(tree.target(minArc1), tree.target(minArc2));
436  if(countOutArcs(tree, mergedNode) > innerBranching)
437  lastSplitPair = split(mergedNode, 0, true);
438  }
439  }
440  }
441  else
442  {
443  // !splitted
444  *innerFeatures[arc] += pointFeature;
445  }
446  lastSplitAtLeaf = false;
447  }
448 
449  //Rebuild tree if needed
450  while(this->size() > maxSize)
451  {
452  double th = (*thresholdFunctor)(threshold);
453  rebuild(th);
454  }
455 }
456 
457 template<typename T> void CFTree<T>::rebuild(double newThreshold)
458 {
459  threshold = newThreshold;
460  if(countNodes(tree) > 1)
461  {
463  rebuildTraverse(root, newTree, newTree.root);
464  lemon::ListDigraph::Node oldRoot = root;
465  *this = newTree;
466  int s = size();
467  clean(oldRoot);
468  int t = size();
469  int a = 0;
470  }
471 }
472 
473 template<typename T> void CFTree<T>::rebuildTraverse(lemon::ListDigraph::Node oldNode, CFTree<T>& newCFTree, lemon::ListDigraph::Node newNode)
474 {
475  using namespace lemon;
476 
477  if(countOutArcs(tree, oldNode) > 0)
478  {
479  // Inner node
480  for (ListDigraph::OutArcIt arc(tree, oldNode); arc != INVALID; ++arc)
481  {
482  ListDigraph::Node oldChild = tree.target(arc);
483  ListDigraph::Node newChild = newCFTree.addNode();
484  ListDigraph::Arc newArc = newCFTree.tree.addArc(newNode, newChild);
485  newCFTree.innerFeatures[newArc] = new CFEntry<T>(0, newCFTree.euclidianProvider->nullVector(), 0);
486  rebuildTraverse(oldChild, newCFTree, newChild);
487  }
488  }
489  else
490  {
491  // Leaf
492  for (typename std::vector<CFEntry<T>*>::iterator currentEntryIt = leafFeatures[oldNode]->begin(); currentEntryIt!=leafFeatures[oldNode]->end(); currentEntryIt++)
493  {
494  // 1. Identify NewClosestPath leaf
495  ListDigraph::Node newTravNode = newCFTree.root;
496  while(countOutArcs(newCFTree.tree, newTravNode) > 0)
497  {
498  ListDigraph::Node minNode = INVALID;
499  ListDigraph::Arc minArc = INVALID;
500  double minClosestDist = -1;
501  for (ListDigraph::OutArcIt arc(newCFTree.tree, newTravNode); arc != INVALID; ++arc)
502  {
503  double dist = clusterDistanceMeasure->dissimilarity(*newCFTree.innerFeatures[arc], **currentEntryIt);
504  if((dist < minClosestDist || minNode == INVALID) && (*newCFTree.innerFeatures[arc]).number != 0) //number != 0 --> don't add to empty NewCurrentPath
505  {
506  minNode = newCFTree.tree.target(arc);
507  minArc = arc;
508  minClosestDist = dist;
509  }
510  }
511  if(minNode != INVALID)
512  newTravNode = minNode;
513  else
514  break;
515  }
516 
517  // 2. Identify best CFEntrys
518  std::vector<CFEntry<T>*>& leafClosestEntries = *newCFTree.leafFeatures[newTravNode];
519  CFEntry<T>* minClosestEntry = 0;
520  double minClosestDist = -1;
521  for (typename std::vector<CFEntry<T>*>::iterator newEntryCandidateIt = leafClosestEntries.begin(); newEntryCandidateIt!=leafClosestEntries.end(); newEntryCandidateIt++)
522  {
523  double dist = clusterDistanceMeasure->dissimilarity(**newEntryCandidateIt, **currentEntryIt);
524  if(dist < minClosestDist || minClosestEntry == 0)
525  {
526  minClosestEntry = *newEntryCandidateIt;
527  minClosestDist = dist;
528  }
529  }
530  std::vector<CFEntry<T>*>& leafCurrentEntries = *newCFTree.leafFeatures[newNode];
531  CFEntry<T>* minCurrentEntry = 0;
532  double minCurrrentDist = -1;
533  for (typename std::vector<CFEntry<T>*>::iterator newEntryCandidateIt = leafCurrentEntries.begin(); newEntryCandidateIt!=leafCurrentEntries.end(); newEntryCandidateIt++)
534  {
535  double dist = clusterDistanceMeasure->dissimilarity(**newEntryCandidateIt, **currentEntryIt);
536  if(dist < minCurrrentDist || minCurrentEntry == 0)
537  {
538  minCurrentEntry = *newEntryCandidateIt;
539  minCurrrentDist = dist;
540  }
541  }
542 
543  // 3. Decide where to add the OldCurrentPath's entry
544  if(minClosestEntry != 0 && newTravNode != newNode && minClosestEntry != 0 && newCFTree.thresholdAttribute->calculate(*minClosestEntry + **currentEntryIt) <= threshold)
545  {
546  // Update CFEntry at new position (NewClosestPath)
547  *minClosestEntry += **currentEntryIt;
548  }
549  else if(minClosestEntry != 0 && newTravNode != newNode && leafClosestEntries.size() < newCFTree.leafBranching)
550  {
551  // Add new CFEntry at new position (NewClosestPath)
552  leafClosestEntries.push_back(new CFEntry<T>(**currentEntryIt));
553  }
554  else if(minCurrentEntry != 0 && newCFTree.thresholdAttribute->calculate(*minCurrentEntry + **currentEntryIt) <= threshold)
555  {
556  // Update CFEntry at old position (NewCurrentPath)
557  *minCurrentEntry += **currentEntryIt;
558  newTravNode = newNode;
559  }
560  else
561  {
562  // Add new CFEntry at old position (NewCurrentPath)
563  newCFTree.leafFeatures[newNode]->push_back(new CFEntry<T>(**currentEntryIt));
564  newTravNode = newNode;
565  }
566 
567  // 4. Update parent path
568  while(newTravNode != newCFTree.root)
569  {
570  ListDigraph::InArcIt parentArc(newCFTree.tree, newTravNode);
571  *newCFTree.innerFeatures[parentArc] += **currentEntryIt;
572  newTravNode = newCFTree.tree.source(parentArc);
573  }
574  }
575  }
576 }
577 
578 template<typename T> std::vector<CFEntry<T> > CFTree<T>::leafEntries()
579 {
580  std::vector<CFEntry<T> > ret;
581  leafEntriesTraverse(root, &ret);
582  return ret;
583 }
584 
585 template<typename T> void CFTree<T>::leafEntriesTraverse(lemon::ListDigraph::Node currentNode, std::vector<CFEntry<T> >* vec)
586 {
587  using namespace lemon;
588 
589  if(countOutArcs(tree, currentNode) > 0)
590  {
591  // Inner node
592  for (ListDigraph::OutArcIt arc(tree, currentNode); arc != INVALID; ++arc)
593  leafEntriesTraverse(tree.target(arc), vec);
594  }
595  else
596  {
597  // Leaf
598  std::vector<CFEntry<T>*>& leafEntries = *leafFeatures[currentNode];
599  for (typename std::vector<CFEntry<T>*>::iterator it = leafEntries.begin(); it!=leafEntries.end(); it++)
600  {
601  CFEntry<T> ins(**it);
602  vec->push_back(ins);
603  }
604  }
605 }
606 
607 template<typename T> void CFTree<T>::clean(lemon::ListDigraph::Node node)
608 {
609  using namespace lemon;
610 
611  if(countOutArcs(tree, node) > 0)
612  {
613  // Inner node
614  for (ListDigraph::OutArcIt arc(tree, node); arc != INVALID; ++arc)
615  clean(tree.target(arc));
616  if(countOutArcs(tree, node) == 0)
617  {
618  ListDigraph::InArcIt parentArc(tree, node);
619  delete innerFeatures[parentArc];
620  innerFeatures[parentArc] = 0;
621  tree.erase(parentArc);
622  eraseNode(node);
623  }
624  }
625  else
626  {
627  // Leaf
628  if(leafFeatures[node]->size() == 0)
629  {
630  ListDigraph::InArcIt parentArc(tree, node);
631  tree.erase(parentArc);
632  eraseNode(node);
633  }
634  }
635 }
636 
637 template<typename T> int CFTree<T>::size() const
638 {
639  return lemon::countNodes(tree);
640 }
641 
642 template<typename T> size_t CFTree<T>::numberOfPoints() const
643 {
644  size_t num = 0;
645  for (lemon::ListDigraph::OutArcIt arc(tree, root); arc != lemon::INVALID; ++arc)
646  num += innerFeatures[arc]->number;
647  return num;
648 }
649 
650 template<typename T> std::pair<lemon::ListDigraph::Node, lemon::ListDigraph::Node> CFTree<T>::split(lemon::ListDigraph::Node node, CFEntry<T>* keepTrack, bool fillUp)
651 {
652  using namespace lemon;
653 
654  std::pair<lemon::ListDigraph::Node, lemon::ListDigraph::Node> ret;
655 
656  // Split of inner node or leaf?
657  if(countOutArcs(tree, node) > 0)
658  {
659  // Inner node
660  // Identify farthest pair
661  ListDigraph::Arc maxArc1 = INVALID;
662  ListDigraph::Arc maxArc2 = INVALID;
663  double maxDist = -1;
664  for (ListDigraph::OutArcIt arc1(tree, node); arc1 != INVALID; ++arc1)
665  {
666  for (ListDigraph::OutArcIt arc2(arc1); arc2 != INVALID; ++arc2)
667  {
668  if(arc1 == arc2)
669  continue;
670  double dist = clusterDistanceMeasure->dissimilarity(*innerFeatures[arc1], *innerFeatures[arc2]);
671  if(dist > maxDist)
672  {
673  maxArc1 = arc1;
674  maxArc2 = arc2;
675  maxDist = dist;
676  }
677  }
678  }
679 
680  // Decide to which of the two new child nodes the clustering features have to be assigned
681  ListDigraph::Node child1 = addNode();
682  ListDigraph::Node child2 = addNode();
683  T nullVector = euclidianProvider->nullVector();
684  CFEntry<T> ent1(0, nullVector, 0);
685  CFEntry<T> ent2(0, nullVector, 0);
686  std::vector<ListDigraph::Arc> to1;
687  std::vector<ListDigraph::Arc> to2;
688  // Default pair / check for tracking maxArcs
689  bool trackChild1 = innerFeatures[maxArc1] == keepTrack ? true : false;
690  // Insert seeds
691  to1.push_back(maxArc1);
692  to2.push_back(maxArc2);
693  ent1 += *innerFeatures[maxArc1];
694  ent2 += *innerFeatures[maxArc2];
695  for (ListDigraph::OutArcIt arc(tree, node); arc != INVALID; ++arc)
696  {
697  if(arc == maxArc1 || arc == maxArc2)
698  continue;
700  bool hasTo1Space = !fillUp || to1.size() < innerBranching;
701  bool hasTo2Space = !fillUp || to2.size() < innerBranching;
702  if((isNearerToMaxArc1 && hasTo1Space) || (!hasTo2Space))
703  {
704  to1.push_back(arc);
705  ent1 += *innerFeatures[arc];
706  if(innerFeatures[arc] == keepTrack)
707  trackChild1 = true;
708  }
709  else
710  {
711  to2.push_back(arc);
712  ent2 += *innerFeatures[arc];
713  if(innerFeatures[arc] == keepTrack)
714  trackChild1 = false;
715  }
716  }
717  //Prepare return pair
718  if(trackChild1)
719  {
720  ret.first = child1;
721  ret.second = child2;
722  }
723  else
724  {
725  ret.first = child2;
726  ret.second = child1;
727  }
728 
729  // Change sources from old to new nodes
730  for (typename std::vector<ListDigraph::Arc>::iterator it = to1.begin(); it != to1.end(); it++)
731  tree.changeSource(*it, child1);
732  for (typename std::vector<ListDigraph::Arc>::iterator it = to2.begin(); it != to2.end(); it++)
733  tree.changeSource(*it, child2);
734 
735  // DEBUG Consistency check
736  if(countOutArcs(tree, node) != 0)
737  throw 0;
738 
739  // Update tree
740  ListDigraph::Node parent = INVALID;
741  if(node == root)
742  {
743  // Root
744  parent = addNode();
745  this->root = parent;
746  eraseNode(node);
747  }
748  else
749  {
750  // Not root
751  ListDigraph::InArcIt arc(tree, node);
752  parent = tree.source(arc);
753  tree.erase(arc);
754  eraseNode(node);
755  }
756 
757  ListDigraph::Arc childArc1 = tree.addArc(parent, child1);
758  ListDigraph::Arc childArc2 = tree.addArc(parent, child2);
759  innerFeatures[childArc1] = new CFEntry<T>(ent1);
760  innerFeatures[childArc2] = new CFEntry<T>(ent2);
761  }
762  else
763  {
764  // Leaf
765  // Identify farthest pair
766  CFEntry<T>* maxEnt1 = 0;
767  CFEntry<T>* maxEnt2 = 0;
768  double maxDist = -1;
769  std::vector<CFEntry<T>*>& entries = *leafFeatures[node];
770  for (typename std::vector<CFEntry<T>*>::iterator ent1 = entries.begin(); ent1 != entries.end(); ++ent1)
771  {
772  for (typename std::vector<CFEntry<T>*>::iterator ent2(ent1); ent2 != entries.end(); ++ent2)
773  {
774  if(*ent1 == *ent2)
775  continue;
776  double dist = clusterDistanceMeasure->dissimilarity(**ent1, **ent2);
777  if(dist > maxDist)
778  {
779  maxEnt1 = *ent1;
780  maxEnt2 = *ent2;
781  maxDist = dist;
782  }
783  }
784  }
785 
786  // Assign clustering features to the two new child nodes
787  ListDigraph::Node child1 = addNode();
788  ListDigraph::Node child2 = addNode();
789  T nullVector = euclidianProvider->nullVector();
790  CFEntry<T> ent1(0, nullVector, 0);
791  CFEntry<T> ent2(0, nullVector, 0);
792  std::vector<CFEntry<T>*>& to1 = * new std::vector<CFEntry<T>*>();
793  std::vector<CFEntry<T>*>& to2 = * new std::vector<CFEntry<T>*>();
794  for (typename std::vector<CFEntry<T>*>::iterator ent = entries.begin(); ent != entries.end(); ++ent)
795  {
796  bool isMaxEnt1 = *ent == maxEnt1;
797  bool isNotMaxEnt2 = *ent != maxEnt2;
798  bool isNearerToMaxEnt1 = clusterDistanceMeasure->dissimilarity(**ent, *maxEnt1) < clusterDistanceMeasure->dissimilarity(**ent, *maxEnt2);
799  bool isTo1Full = to1.size() >= innerBranching;
800  bool isTo2Full = to2.size() >= innerBranching;
801  if((isMaxEnt1 || (isNotMaxEnt2 && isNearerToMaxEnt1)) && (!fillUp || !isTo1Full || isTo2Full))
802  {
803  to1.push_back(*ent);
804  ent1 += **ent;
805  if(*ent == keepTrack)
806  {
807  ret.first = child1;
808  ret.second = child2;
809  }
810  }
811  else
812  {
813  to2.push_back(*ent);
814  ent2 += **ent;
815  if(*ent == keepTrack)
816  {
817  ret.first = child2;
818  ret.second = child1;
819  }
820  }
821  }
822  entries.clear();
823 
824  // Update tree
825  ListDigraph::Node parent = INVALID;
826  if(node == root)
827  {
828  // Root
829  parent = addNode();
830  this->root = parent;
831  eraseNode(node);
832  }
833  else
834  {
835  // Not root
836  ListDigraph::InArcIt arc(tree, node);
837  parent = tree.source(arc);
838  tree.erase(arc);
839  eraseNode(node);
840  }
841 
842  leafFeatures[child1] = &to1;
843  leafFeatures[child2] = &to2;
844  ListDigraph::Arc childArc1 = tree.addArc(parent, child1);
845  ListDigraph::Arc childArc2 = tree.addArc(parent, child2);
846  innerFeatures[childArc1] = new CFEntry<T>(ent1);
847  innerFeatures[childArc2] = new CFEntry<T>(ent2);
848  }
849 
850  return ret;
851 }
852 
853 template<typename T> lemon::ListDigraph::Node CFTree<T>::merge(lemon::ListDigraph::Node node1, lemon::ListDigraph::Node node2)
854 {
855  using namespace lemon;
856 
857  //Same node?
858  if(node1 == node2)
859  throw InvalidArgumentException(CANNOT_MERGE_NODE_ITSELF, "Cannot merge node with itself.", "node1/node2");
860  //Same parent?
861  if(tree.source(ListDigraph::InArcIt(tree, node1)) != tree.source(ListDigraph::InArcIt(tree, node2)))
862  throw InvalidArgumentException(CANNOT_MERGE_NODE_DIFFERENT_PARENTS, "Cannot merge nodes with different parents.", "node1/node2");
863 
864  // Mergingth of inner node or leaf?
865  if(countOutArcs(tree, node1) > 0)
866  {
867  // Inner node
868  ListDigraph::Node child = addNode();
869  T nullVector = euclidianProvider->nullVector();
870  CFEntry<T> ent(0, nullVector, 0);
871  ent += *innerFeatures[ListDigraph::InArcIt(tree, node1)];
872  ent += *innerFeatures[ListDigraph::InArcIt(tree, node2)];
873  std::vector<ListDigraph::Arc> toChange;
874  for (ListDigraph::OutArcIt arc(tree, node1); arc != INVALID; ++arc)
875  toChange.push_back(arc);
876  for (ListDigraph::OutArcIt arc(tree, node2); arc != INVALID; ++arc)
877  toChange.push_back(arc);
878  for (typename std::vector<ListDigraph::Arc>::iterator it = toChange.begin(); it != toChange.end(); it++)
879  tree.changeSource(*it, child);
880 
881  // Update tree
882  ListDigraph::InArcIt arc1(tree, node1);
883  ListDigraph::InArcIt arc2(tree, node2);
884  ListDigraph::Node parent = tree.source(arc1);
885  tree.erase(arc1);
886  tree.erase(arc2);
887  eraseNode(node1);
888  eraseNode(node2);
889  ListDigraph::Arc childArc = tree.addArc(parent, child);
890  innerFeatures[childArc] = new CFEntry<T>(ent);
891 
892  return child;
893  }
894  else
895  {
896  // Leaf
897  throw InvalidRuntimeConfigurationException(CANNOT_MERGE_LEAF_NODES, "Cannot merge leaf entries.");
898  }
899 }
900 
901 template<typename T> lemon::ListDigraph::Node CFTree<T>::addNode()
902 {
903  lemon::ListDigraph::Node node = tree.addNode();
904  leafFeatures[node] = new std::vector<CFEntry<T>*>();
905  return node;
906 }
907 
908 template<typename T> void CFTree<T>::eraseNode(lemon::ListDigraph::Node node)
909 {
910  if(leafFeatures[node] != 0)
911  {
912  for (typename std::vector<CFEntry<T>*>::iterator ent = leafFeatures[node]->begin(); ent != leafFeatures[node]->end(); ++ent)
913  delete *ent;
914  delete leafFeatures[node];
915  leafFeatures[node] = 0;
916  }
917  tree.erase(node);
918 }
919 
920 #ifdef CLUE_GRAPH
921 template<typename T> void CFTree<T>::print(std::ostream& os)
922 {
923  os << "digraph G{\n";
924  os << "node [shape=record];\n";
925  print(os, root);
926  os << "}\n";
927 }
928 
929 template<typename T> void CFTree<T>::print(std::ostream& os, lemon::ListDigraph::Node node)
930 {
931  using namespace lemon;
932 
933  int id = tree.id(node);
934  int outArcs = countOutArcs(tree, node);
935 
936  os << "node" << id << "[label=\"";
937  if(outArcs > 0)
938  {
939  // Inner node
940  int fvalue = 0;
941  for (ListDigraph::OutArcIt arc(tree, node); arc != INVALID; ++arc)
942  {
943  if(fvalue > 0)
944  os << "|";
945  os << "<f" << fvalue << "> " << innerFeatures[arc]->number << "," << innerFeatures[arc]->LS << "," << innerFeatures[arc]->SS;
946  fvalue++;
947  }
948  }
949  else
950  {
951  // Leaf
952  std::vector<CFEntry<T>*>& entries = *leafFeatures[node];
953  int fvalue = 0;
954  for (typename std::vector<CFEntry<T>*>::iterator ent = entries.begin(); ent != entries.end(); ++ent)
955  {
956  if(fvalue > 0)
957  os << "|";
958  os << "<f" << fvalue << "> " << (*ent)->number << "," << (*ent)->LS << "," << (*ent)->SS;
959  fvalue++;
960  }
961  }
962  os << "\"];\n";
963 
964  if(outArcs > 0)
965  {
966  // Inner node
967  int fvalue = 0;
968  for (ListDigraph::OutArcIt arc(tree, node); arc != INVALID; ++arc)
969  {
970  print(os, tree.target(arc));
971  os << "node" << id << ":f" << fvalue << " -> node" << tree.id(tree.target(arc)) << ";\n";
972  fvalue++;
973  }
974  }
975  else
976  {
977  // Leaf
978  }
979 }
980 #endif
981 
982 }
983 
984 #endif
void leafEntriesTraverse(lemon::ListDigraph::Node currentNode, std::vector< CFEntry< T > > *vec)
Definition: cftree.h:585
lemon::ListDigraph::Node root
Definition: cftree.h:177
CFTree(EuclideanSpaceProvider< T > *euclidianProvider, DissimilarityMeasure< CFEntry< T > > *clusterDistanceMeasure, AttributeCalculator< CFEntry< T > > *thresholdAttribute, ThresholdCalculator *thresholdFunctor, int innerBranching, int leafBranching, double threshold, int maxSize)
Constructs a CFTree for a given vector space.
Definition: cftree.h:181
Abstract base class for attribute calculation (e.g. diameter).
DissimilarityMeasure< CFEntry< T > > * clusterDistanceMeasure
Definition: cftree.h:170
int size() const
Counts the number of nodes.
Definition: cftree.h:637
void insert(T point)
Inserts a point.
Definition: cftree.h:327
void rebuild(double newThreshold)
Rebuild the tree.
Definition: cftree.h:457
int innerBranching
Definition: cftree.h:164
virtual double operator()(double oldThreshold)=0
lemon::ListDigraph::Node addNode()
Wrapper to maintain the graph maps.
Definition: cftree.h:901
void eraseNode(lemon::ListDigraph::Node node)
Wrapper to maintain the graph maps.
Definition: cftree.h:908
virtual V nullVector() const =0
virtual ~CFTree()
Definition: cftree.h:300
lemon::ListDigraph::Node merge(lemon::ListDigraph::Node node1, lemon::ListDigraph::Node node2)
Merges two node entries.
Definition: cftree.h:853
size_t numberOfPoints() const
Counts the number of contained points.
Definition: cftree.h:642
lemon::ListDigraph::ArcMap< CFEntry< T > * > innerFeatures
Definition: cftree.h:175
void clean(lemon::ListDigraph::Node node)
Remove empty clustering features.
Definition: cftree.h:607
CFTree< T > & operator=(const CFTree< T > &)
Definition: cftree.h:238
ThresholdCalculator * thresholdFunctor
Definition: cftree.h:172
void rebuildTraverse(lemon::ListDigraph::Node currentOldNode, CFTree< T > &newTree, lemon::ListDigraph::Node currentNewNode)
Used by rebuild()
Definition: cftree.h:473
int leafBranching
Definition: cftree.h:165
double threshold
Definition: cftree.h:166
std::vector< CFEntry< T > > leafEntries()
Returns the leaf entries.
Definition: cftree.h:578
virtual DissimilarityMeasure< T > * clone() const =0
std::pair< lemon::ListDigraph::Node, lemon::ListDigraph::Node > split(lemon::ListDigraph::Node node, CFEntry< T > *keepTrack=0, bool fillUp=false)
Splits a node.
Definition: cftree.h:650
lemon::ListDigraph tree
Definition: cftree.h:174
int maxSize
Definition: cftree.h:167
EuclideanSpaceProvider< T > * euclidianProvider
Definition: cftree.h:169
virtual double calculate(T const &) const =0
Computes a characteristic attribute of a given object.
virtual AttributeCalculator< T > * clone() const =0
Indicates invalid values of arguments.
AttributeCalculator< CFEntry< T > > * thresholdAttribute
Definition: cftree.h:171
Abstract base class for dissimilarity measurement.
Clustering feature tree entry.
Definition: cfentry.h:22
Threshold calculation functor class.
Definition: cftree.h:106
Indicates that a computation entered an invalid configuration state.
Clustering feature tree.
Definition: cftree.h:34
lemon::ListDigraph::NodeMap< std::vector< CFEntry< T > * > * > leafFeatures
Definition: cftree.h:176
virtual double dissimilarity(T const &, T const &) const =0
Computes the dissimilarity between the two given objects.
virtual EuclideanSpaceProvider< V > * clone() const =0