8 #include <lemon/list_graph.h>
11 #include "../base/dissimilaritymeasure.h"
12 #include "../base/attributecalculator.h"
13 #include "../base/euclideanspaceprovider.h"
14 #include "../exception/invalidargumentexception.h"
15 #include "../exception/invalidruntimeconfigurationexception.h"
79 void rebuild(
double newThreshold);
108 virtual double operator()(
double oldThreshold) = 0;
115 void print(std::ostream& os);
122 lemon::ListDigraph::Node
addNode();
127 void eraseNode(lemon::ListDigraph::Node node);
132 void rebuildTraverse(lemon::ListDigraph::Node currentOldNode,
CFTree<T>& newTree, lemon::ListDigraph::Node currentNewNode);
142 void clean(lemon::ListDigraph::Node node);
150 std::pair<lemon::ListDigraph::Node, lemon::ListDigraph::Node>
split(lemon::ListDigraph::Node node,
CFEntry<T>* keepTrack = 0,
bool fillUp =
false);
158 lemon::ListDigraph::Node
merge(lemon::ListDigraph::Node node1, lemon::ListDigraph::Node node2);
161 void print(std::ostream& os, lemon::ListDigraph::Node node);
185 int innerBranching,
int leafBranching,
double threshold,
int maxSize) :
186 euclidianProvider(euclidianProvider->clone()),
187 clusterDistanceMeasure(clusterDistanceMeasure->clone()),
188 thresholdAttribute(thresholdAttribute->clone()),
189 thresholdFunctor(thresholdFunctor),
190 innerBranching(innerBranching),
191 leafBranching(leafBranching),
192 threshold(threshold),
202 euclidianProvider(x.euclidianProvider->clone()),
203 clusterDistanceMeasure(x.clusterDistanceMeasure->clone()),
204 thresholdAttribute(x.thresholdAttribute->clone()),
205 thresholdFunctor(x.thresholdFunctor),
206 innerBranching(x.innerBranching),
207 leafBranching(x.leafBranching),
209 threshold(x.threshold),
214 using namespace lemon;
216 DigraphCopy<ListDigraph, ListDigraph> cg(x.
tree,
tree);
222 for (ListDigraph::ArcIt a(
tree); a != INVALID; ++a)
227 for (ListDigraph::NodeIt n(
tree); n != INVALID; ++n)
240 using namespace lemon;
246 for (ListDigraph::ArcIt a(
tree); a != INVALID; ++a)
254 for (ListDigraph::NodeIt n(
tree); n != INVALID; ++n)
278 DigraphCopy<ListDigraph, ListDigraph> cg(x.
tree,
tree);
284 for (ListDigraph::ArcIt a(
tree); a != INVALID; ++a)
289 for (ListDigraph::NodeIt n(
tree); n != INVALID; ++n)
302 using namespace lemon;
304 for (ListDigraph::ArcIt a(
tree); a != INVALID; ++a)
312 for (ListDigraph::NodeIt n(
tree); n != INVALID; ++n)
329 CFEntry<T> pointFeature(1, point, point*point);
335 using namespace lemon;
338 ListDigraph::Node currentNode =
root;
339 while(countOutArcs(
tree, currentNode) > 0)
341 ListDigraph::Node minNode = INVALID;
342 ListDigraph::Arc minArc = INVALID;
344 for (ListDigraph::OutArcIt arc(
tree, currentNode); arc != INVALID; ++arc)
347 if(dist < minDist || minNode == INVALID)
349 minNode =
tree.target(arc);
354 currentNode = minNode;
364 if(dist < minDist || minEntry == 0)
371 bool lastSplitAtLeaf =
true;
372 bool splitted =
false;
373 std::pair<lemon::ListDigraph::Node, lemon::ListDigraph::Node> lastSplitPair;
377 *minEntry += pointFeature;
386 lastSplitPair =
split(currentNode, newEntry);
387 currentNode = lastSplitPair.first;
393 while(countInArcs(
tree, currentNode) > 0)
395 ListDigraph::InArcIt arc(
tree, currentNode);
397 currentNode =
tree.source(arc);
401 currentNode = lastSplitPair.first;
412 if(countOutArcs(
tree, currentNode) > 2 && !lastSplitAtLeaf)
414 ListDigraph::Arc minArc1 = INVALID;
415 ListDigraph::Arc minArc2 = INVALID;
417 for (ListDigraph::OutArcIt arc1(
tree, currentNode); arc1 != INVALID; ++arc1)
419 for (ListDigraph::OutArcIt arc2(arc1); arc2 != INVALID; ++arc2)
424 if(dist < minDist || minArc1 == INVALID)
432 if(!(
tree.target(minArc1) == lastSplitPair.first &&
tree.target(minArc2) == lastSplitPair.second) &&
433 !(
tree.target(minArc1) == lastSplitPair.second &&
tree.target(minArc2) == lastSplitPair.first))
435 ListDigraph::Node mergedNode =
merge(
tree.target(minArc1),
tree.target(minArc2));
437 lastSplitPair =
split(mergedNode, 0,
true);
446 lastSplitAtLeaf =
false;
452 double th = (*thresholdFunctor)(
threshold);
460 if(countNodes(
tree) > 1)
464 lemon::ListDigraph::Node oldRoot =
root;
475 using namespace lemon;
477 if(countOutArcs(
tree, oldNode) > 0)
480 for (ListDigraph::OutArcIt arc(
tree, oldNode); arc != INVALID; ++arc)
482 ListDigraph::Node oldChild =
tree.target(arc);
483 ListDigraph::Node newChild = newCFTree.
addNode();
484 ListDigraph::Arc newArc = newCFTree.
tree.addArc(newNode, newChild);
495 ListDigraph::Node newTravNode = newCFTree.
root;
496 while(countOutArcs(newCFTree.
tree, newTravNode) > 0)
498 ListDigraph::Node minNode = INVALID;
499 ListDigraph::Arc minArc = INVALID;
500 double minClosestDist = -1;
501 for (ListDigraph::OutArcIt arc(newCFTree.
tree, newTravNode); arc != INVALID; ++arc)
504 if((dist < minClosestDist || minNode == INVALID) && (*newCFTree.
innerFeatures[arc]).number != 0)
506 minNode = newCFTree.
tree.target(arc);
508 minClosestDist = dist;
511 if(minNode != INVALID)
512 newTravNode = minNode;
518 std::vector<CFEntry<T>*>& leafClosestEntries = *newCFTree.
leafFeatures[newTravNode];
520 double minClosestDist = -1;
521 for (
typename std::vector<
CFEntry<T>*>::iterator newEntryCandidateIt = leafClosestEntries.begin(); newEntryCandidateIt!=leafClosestEntries.end(); newEntryCandidateIt++)
524 if(dist < minClosestDist || minClosestEntry == 0)
526 minClosestEntry = *newEntryCandidateIt;
527 minClosestDist = dist;
530 std::vector<CFEntry<T>*>& leafCurrentEntries = *newCFTree.
leafFeatures[newNode];
532 double minCurrrentDist = -1;
533 for (
typename std::vector<
CFEntry<T>*>::iterator newEntryCandidateIt = leafCurrentEntries.begin(); newEntryCandidateIt!=leafCurrentEntries.end(); newEntryCandidateIt++)
536 if(dist < minCurrrentDist || minCurrentEntry == 0)
538 minCurrentEntry = *newEntryCandidateIt;
539 minCurrrentDist = dist;
547 *minClosestEntry += **currentEntryIt;
549 else if(minClosestEntry != 0 && newTravNode != newNode && leafClosestEntries.size() < newCFTree.
leafBranching)
552 leafClosestEntries.push_back(
new CFEntry<T>(**currentEntryIt));
557 *minCurrentEntry += **currentEntryIt;
558 newTravNode = newNode;
564 newTravNode = newNode;
568 while(newTravNode != newCFTree.
root)
570 ListDigraph::InArcIt parentArc(newCFTree.
tree, newTravNode);
572 newTravNode = newCFTree.
tree.source(parentArc);
580 std::vector<CFEntry<T> > ret;
587 using namespace lemon;
589 if(countOutArcs(
tree, currentNode) > 0)
592 for (ListDigraph::OutArcIt arc(
tree, currentNode); arc != INVALID; ++arc)
599 for (
typename std::vector<
CFEntry<T>*>::iterator it = leafEntries.begin(); it!=leafEntries.end(); it++)
609 using namespace lemon;
611 if(countOutArcs(
tree, node) > 0)
614 for (ListDigraph::OutArcIt arc(
tree, node); arc != INVALID; ++arc)
616 if(countOutArcs(
tree, node) == 0)
618 ListDigraph::InArcIt parentArc(
tree, node);
621 tree.erase(parentArc);
630 ListDigraph::InArcIt parentArc(
tree, node);
631 tree.erase(parentArc);
639 return lemon::countNodes(
tree);
645 for (lemon::ListDigraph::OutArcIt arc(
tree,
root); arc != lemon::INVALID; ++arc)
650 template<
typename T> std::pair<lemon::ListDigraph::Node, lemon::ListDigraph::Node>
CFTree<T>::split(lemon::ListDigraph::Node node,
CFEntry<T>* keepTrack,
bool fillUp)
652 using namespace lemon;
654 std::pair<lemon::ListDigraph::Node, lemon::ListDigraph::Node> ret;
657 if(countOutArcs(
tree, node) > 0)
661 ListDigraph::Arc maxArc1 = INVALID;
662 ListDigraph::Arc maxArc2 = INVALID;
664 for (ListDigraph::OutArcIt arc1(
tree, node); arc1 != INVALID; ++arc1)
666 for (ListDigraph::OutArcIt arc2(arc1); arc2 != INVALID; ++arc2)
681 ListDigraph::Node child1 =
addNode();
682 ListDigraph::Node child2 =
addNode();
686 std::vector<ListDigraph::Arc> to1;
687 std::vector<ListDigraph::Arc> to2;
689 bool trackChild1 =
innerFeatures[maxArc1] == keepTrack ?
true :
false;
691 to1.push_back(maxArc1);
692 to2.push_back(maxArc2);
695 for (ListDigraph::OutArcIt arc(
tree, node); arc != INVALID; ++arc)
697 if(arc == maxArc1 || arc == maxArc2)
702 if((isNearerToMaxArc1 && hasTo1Space) || (!hasTo2Space))
730 for (
typename std::vector<ListDigraph::Arc>::iterator it = to1.begin(); it != to1.end(); it++)
731 tree.changeSource(*it, child1);
732 for (
typename std::vector<ListDigraph::Arc>::iterator it = to2.begin(); it != to2.end(); it++)
733 tree.changeSource(*it, child2);
736 if(countOutArcs(
tree, node) != 0)
740 ListDigraph::Node parent = INVALID;
751 ListDigraph::InArcIt arc(
tree, node);
752 parent =
tree.source(arc);
757 ListDigraph::Arc childArc1 =
tree.addArc(parent, child1);
758 ListDigraph::Arc childArc2 =
tree.addArc(parent, child2);
769 std::vector<CFEntry<T>*>& entries = *
leafFeatures[node];
770 for (
typename std::vector<
CFEntry<T>*>::iterator ent1 = entries.begin(); ent1 != entries.end(); ++ent1)
772 for (
typename std::vector<
CFEntry<T>*>::iterator ent2(ent1); ent2 != entries.end(); ++ent2)
787 ListDigraph::Node child1 =
addNode();
788 ListDigraph::Node child2 =
addNode();
792 std::vector<CFEntry<T>*>& to1 = *
new std::vector<CFEntry<T>*>();
793 std::vector<CFEntry<T>*>& to2 = *
new std::vector<CFEntry<T>*>();
794 for (
typename std::vector<
CFEntry<T>*>::iterator ent = entries.begin(); ent != entries.end(); ++ent)
796 bool isMaxEnt1 = *ent == maxEnt1;
797 bool isNotMaxEnt2 = *ent != maxEnt2;
801 if((isMaxEnt1 || (isNotMaxEnt2 && isNearerToMaxEnt1)) && (!fillUp || !isTo1Full || isTo2Full))
805 if(*ent == keepTrack)
815 if(*ent == keepTrack)
825 ListDigraph::Node parent = INVALID;
836 ListDigraph::InArcIt arc(
tree, node);
837 parent =
tree.source(arc);
844 ListDigraph::Arc childArc1 =
tree.addArc(parent, child1);
845 ListDigraph::Arc childArc2 =
tree.addArc(parent, child2);
853 template<
typename T> lemon::ListDigraph::Node
CFTree<T>::merge(lemon::ListDigraph::Node node1, lemon::ListDigraph::Node node2)
855 using namespace lemon;
861 if(
tree.source(ListDigraph::InArcIt(
tree, node1)) !=
tree.source(ListDigraph::InArcIt(
tree, node2)))
865 if(countOutArcs(
tree, node1) > 0)
868 ListDigraph::Node child =
addNode();
873 std::vector<ListDigraph::Arc> toChange;
874 for (ListDigraph::OutArcIt arc(
tree, node1); arc != INVALID; ++arc)
875 toChange.push_back(arc);
876 for (ListDigraph::OutArcIt arc(
tree, node2); arc != INVALID; ++arc)
877 toChange.push_back(arc);
878 for (
typename std::vector<ListDigraph::Arc>::iterator it = toChange.begin(); it != toChange.end(); it++)
879 tree.changeSource(*it, child);
882 ListDigraph::InArcIt arc1(
tree, node1);
883 ListDigraph::InArcIt arc2(
tree, node2);
884 ListDigraph::Node parent =
tree.source(arc1);
889 ListDigraph::Arc childArc =
tree.addArc(parent, child);
903 lemon::ListDigraph::Node node =
tree.addNode();
923 os <<
"digraph G{\n";
924 os <<
"node [shape=record];\n";
929 template<
typename T>
void CFTree<T>::print(std::ostream& os, lemon::ListDigraph::Node node)
931 using namespace lemon;
933 int id =
tree.id(node);
934 int outArcs = countOutArcs(
tree, node);
936 os <<
"node" <<
id <<
"[label=\"";
941 for (ListDigraph::OutArcIt arc(
tree, node); arc != INVALID; ++arc)
952 std::vector<CFEntry<T>*>& entries = *
leafFeatures[node];
954 for (
typename std::vector<CFEntry<T>*>::iterator ent = entries.begin(); ent != entries.end(); ++ent)
958 os <<
"<f" << fvalue <<
"> " << (*ent)->number <<
"," << (*ent)->LS <<
"," << (*ent)->SS;
968 for (ListDigraph::OutArcIt arc(
tree, node); arc != INVALID; ++arc)
970 print(os,
tree.target(arc));
971 os <<
"node" <<
id <<
":f" << fvalue <<
" -> node" <<
tree.id(
tree.target(arc)) <<
";\n";
void leafEntriesTraverse(lemon::ListDigraph::Node currentNode, std::vector< CFEntry< T > > *vec)
lemon::ListDigraph::Node root
CFTree(EuclideanSpaceProvider< T > *euclidianProvider, DissimilarityMeasure< CFEntry< T > > *clusterDistanceMeasure, AttributeCalculator< CFEntry< T > > *thresholdAttribute, ThresholdCalculator *thresholdFunctor, int innerBranching, int leafBranching, double threshold, int maxSize)
Constructs a CFTree for a given vector space.
Abstract base class for attribute calculation (e.g. diameter).
DissimilarityMeasure< CFEntry< T > > * clusterDistanceMeasure
int size() const
Counts the number of nodes.
void insert(T point)
Inserts a point.
void rebuild(double newThreshold)
Rebuild the tree.
virtual double operator()(double oldThreshold)=0
lemon::ListDigraph::Node addNode()
Wrapper to maintain the graph maps.
void eraseNode(lemon::ListDigraph::Node node)
Wrapper to maintain the graph maps.
virtual V nullVector() const =0
lemon::ListDigraph::Node merge(lemon::ListDigraph::Node node1, lemon::ListDigraph::Node node2)
Merges two node entries.
size_t numberOfPoints() const
Counts the number of contained points.
lemon::ListDigraph::ArcMap< CFEntry< T > * > innerFeatures
void clean(lemon::ListDigraph::Node node)
Remove empty clustering features.
CFTree< T > & operator=(const CFTree< T > &)
ThresholdCalculator * thresholdFunctor
void rebuildTraverse(lemon::ListDigraph::Node currentOldNode, CFTree< T > &newTree, lemon::ListDigraph::Node currentNewNode)
Used by rebuild()
std::vector< CFEntry< T > > leafEntries()
Returns the leaf entries.
virtual DissimilarityMeasure< T > * clone() const =0
std::pair< lemon::ListDigraph::Node, lemon::ListDigraph::Node > split(lemon::ListDigraph::Node node, CFEntry< T > *keepTrack=0, bool fillUp=false)
Splits a node.
EuclideanSpaceProvider< T > * euclidianProvider
virtual double calculate(T const &) const =0
Computes a characteristic attribute of a given object.
virtual AttributeCalculator< T > * clone() const =0
Indicates invalid values of arguments.
AttributeCalculator< CFEntry< T > > * thresholdAttribute
Abstract base class for dissimilarity measurement.
Clustering feature tree entry.
Threshold calculation functor class.
Indicates that a computation entered an invalid configuration state.
lemon::ListDigraph::NodeMap< std::vector< CFEntry< T > * > * > leafFeatures
virtual double dissimilarity(T const &, T const &) const =0
Computes the dissimilarity between the two given objects.
virtual EuclideanSpaceProvider< V > * clone() const =0