CluE  1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
adaptivesampling.h
Go to the documentation of this file.
1 #ifndef ADAPTIVESAMPLING_H
2 #define ADAPTIVESAMPLING_H
3 
4 #include "../base/inputsetter.h"
5 #include "../base/measuresetter.h"
6 #include "../base/algorithm.h"
7 #include "../base/dissimilaritymeasure.h"
8 #include "../base/solutionprovider.h"
9 #include "../datastructure/discreteproxysolution.h"
10 #include "../misc/randomness.h"
11 #include "../point/pointweightmodifier.h"
12 #include "../exception/invalidruntimeconfigurationexception.h"
13 
14 #include <set>
15 #include <ctime>
16 #include <vector>
17 #include <limits>
18 
19 namespace CluE
20 {
21 
30 template<typename T> class AdaptiveSampling : public Algorithm, public InputSetter<T>, public MeasureSetter<T>
31 {
32 public:
33  AdaptiveSampling(const std::vector<T*>* data = NULL, DissimilarityMeasure<T>* measure = NULL,
34  unsigned int numberOfSamples = 0);
35 
38  virtual ~AdaptiveSampling();
39 
50 
51  virtual void setInput(std::vector<T*> const*);
52  virtual void setMeasure(DissimilarityMeasure<T> const*);
53  void setNumberOfSamples(unsigned int);
55 
61 
62 private:
63  std::vector<T*> const* input;
65  unsigned int number_of_samples;
67 };
68 
69 template<typename T> AdaptiveSampling<T>::AdaptiveSampling(std::vector<T*> const* data,
70  DissimilarityMeasure<T>* m, unsigned int n) : input(data), measure(m==NULL?NULL:m->clone()),
71  number_of_samples(n),
72  weightModifier(0)
73 {
74 }
75 
77  Algorithm(rhs), input(rhs.input), measure(rhs.measure==NULL?NULL:rhs.measure->clone()),
78  number_of_samples(rhs.number_of_samples),
79  weightModifier(0)
80 {
81 }
82 
84  const AdaptiveSampling<T>& rhs)
85 {
86  Algorithm::operator=(rhs);
87  DissimilarityMeasure<T>* dm = rhs.measure==NULL?NULL:rhs.measure->clone();
88  delete this->measure;
89  this->measure = dm;
90  this->input = rhs.input;
91  this->number_of_samples = rhs.number_of_samples;
92  this->weightModifier = rhs.weightModifier == 0 ? 0 : rhs.weightModifier->clone();
93  return *this;
94 }
95 
97 {
98  delete this->measure;
99 }
100 
102 {
103  if(this->input==NULL)
104  throw InvalidRuntimeConfigurationException(0, "Input is NULL.");
105  if(this->measure==NULL)
106  throw InvalidRuntimeConfigurationException(1, "Dissimilarity measure is NULL.");
107 
108  time_t start, end;
109  start = time(0);
110 
112 
113  unsigned int N = this->input->size();
114  if(N==0)
115  throw InvalidRuntimeConfigurationException(2, "Empty input set.");
116  unsigned int samplenum = this->number_of_samples;
117  if(samplenum==0)
118  throw InvalidRuntimeConfigurationException(3, "Desired number of samples is 0.");
119  if(samplenum>N)
120  throw InvalidRuntimeConfigurationException(4, "Desired number of samples is larger than size of input.");
121 
122  std::vector<T*> unchosen = *this->input; // copy the input vector
123  solution->proxysets.push_back(std::vector<T*>());
124 
126  std::uniform_int_distribution<int> dis(0, N-1);
127  int index = dis(rg);
128 
129  solution->proxysets[0].push_back(unchosen[index]); // choose first center at random from all elements
130  unchosen.erase(unchosen.begin()+index); // remove the first center from the first dataset
131 
132  std::vector<double> weights(N-1, std::numeric_limits<double>::infinity());
133  for(unsigned int i=1; i<samplenum; i++)
134  {
135  // compute weights to be the distance to the nearest center
136  double total = .0;
137  for(unsigned int j=0; j<N-i; j++)
138  {
139  double w = this->measure->dissimilarity(*unchosen[j], *solution->proxysets[0][i-1]);
140  if(weightModifier != 0)
141  w *= weightModifier->getWeight(*unchosen[j]);
142  if(w<weights[j])
143  weights[j] = w;
144  total += weights[j];
145  }
146 
148  std::uniform_real_distribution<> dis(0, total);
149  double pos = dis(rg);
150 
151  index = -1;
152  for(unsigned int j=0; j<N-i&&index<0; j++)
153  if(pos<weights[j])
154  index = j;
155  else
156  pos -= weights[j];
157  if(index<0)
158  index = 0;
159 
160  solution->proxysets[0].push_back(unchosen[index]);
161  unchosen.erase(unchosen.begin()+index);
162  }
163 
164  end = time(0);
165  solution->seconds=end-start;
166  //std::clog << "CluE::AdaptiveSampling<T>::compute() - finished" << std::endl;
167  return solution;
168 }
169 
170 template<typename T> void AdaptiveSampling<T>::setInput(std::vector<T*> const* data)
171 {
172  this->input = data;
173 }
174 
175 template<typename T> void AdaptiveSampling<T>::setMeasure(DissimilarityMeasure<T> const* m)
176 {
177  this->measure = m==NULL?NULL:m->clone();
178 }
179 
180 template<typename T> void AdaptiveSampling<T>::setNumberOfSamples(unsigned int n)
181 {
182  this->number_of_samples = n;
183 }
184 
186 {
187  if(wm != 0)
188  weightModifier = wm->clone();
189  else
190  wm = 0;
191 }
192 
194 {
195  return dynamic_cast<AdaptiveSampling<T>*>(s);
196 }
197 
198 }
199 
200 #endif
Encapsulates an STL random generator.
AdaptiveSampling< T > & operator=(const AdaptiveSampling< T > &)
virtual WeightModifier< T > * clone() const =0
make an exact copy of this object The clone method creates a copy of this object and returns a pointe...
static AdaptiveSampling< T > * toAdaptiveSampling(Algorithm *s)
Does a dynamic cast of the given Algorithm to AdaptiveSampling
Data structure for discrete proxies.
virtual void setMeasure(DissimilarityMeasure< T > const *)
void setWeightModifier(WeightModifier< T > *wm)
virtual DiscreteProxySolution< T > * compute()
Computes the sample set.
std::vector< std::vector< T * > > proxysets
unsigned int number_of_samples
std::vector< T * > const * input
Abstract base class to modify the weight of weighted objects.
static RandomGenerator getRandomGenerator()
Definition: randomness.h:23
k-means++ sampling method
virtual DissimilarityMeasure< T > * clone() const =0
Abstract base class for algorithms.
Definition: algorithm.h:17
WeightModifier< T > * weightModifier
Interface to propagate the ability to set a DissimilarityMeasure.
Definition: measuresetter.h:13
virtual void setInput(std::vector< T * > const *)
DissimilarityMeasure< T > * measure
Abstract base class for dissimilarity measurement.
Indicates that a computation entered an invalid configuration state.
AdaptiveSampling(const std::vector< T * > *data=NULL, DissimilarityMeasure< T > *measure=NULL, unsigned int numberOfSamples=0)
void setNumberOfSamples(unsigned int)
Interface to propagate the ability to set input data.
Definition: inputsetter.h:13