CluE  1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
distinctelements.h
Go to the documentation of this file.
1 #ifndef DISTINCTELEMENTS_H
2 #define DISTINCTELEMENTS_H
3 
4 #include <set>
5 #include <functional>
6 
7 #include "../hashing/hashfunction.h"
8 
9 namespace CluE
10 {
11 
17 template<typename U, typename H> class DistinctElements
18 {
19 
20 public:
21  DistinctElements(unsigned long long sizeOfUniverse, double eps, std::function<HashFunction<U,H>*(unsigned long long sizeOfHashingSpace)> hashfunctionCreator) :
22  sizeOfUniverse(sizeOfUniverse),
23  sizeOfUniverse3(sizeOfUniverse*sizeOfUniverse*sizeOfUniverse),
24  rank(ceil(96/(eps*eps))),
25  eps(eps),
26  hashfunction(hashfunctionCreator(sizeOfUniverse3)),
27  elements()
28  {
29  }
30 
32  {
33  delete hashfunction;
34  }
35 
36  DistinctElements<U,H>& operator<<(U const & element);
37 
38  unsigned long long numberOfDistinctElements();
39 
40 private:
41  unsigned long long sizeOfUniverse;
42  unsigned long long sizeOfUniverse3;
43  unsigned long long rank;
44  double eps;
46  std::set<U> elements;
47 };
48 
49 template<typename U, typename H> DistinctElements<U,H>& DistinctElements<U,H>::operator<<(U const & element)
50 {
51  H hashval = (*hashfunction)(element);
52  if(elements.count(hashval) == 0)
53  {
54  if(elements.size() < rank)
55  elements.insert(hashval);
56  else if(*(--elements.cend()) > hashval)
57  {
58  elements.erase(--elements.end());
59  elements.insert(hashval);
60  }
61  }
62  return *this;
63 }
64 
65 template<typename U, typename H> unsigned long long DistinctElements<U,H>::numberOfDistinctElements()
66 {
67  if(elements.size() == 0)
68  return 0;
69  else
70  return (sizeOfUniverse3 * rank) / *(--elements.cend());
71 }
72 
73 }
74 
75 #endif
Base class template for any hash function mapping an element from universe U to hashing space H...
Definition: hashfunction.h:10
unsigned long long numberOfDistinctElements()
DistinctElements(unsigned long long sizeOfUniverse, double eps, std::function< HashFunction< U, H > *(unsigned long long sizeOfHashingSpace)> hashfunctionCreator)
HashFunction< U, H > * hashfunction
unsigned long long rank
Count distinct elements in a data stream.
unsigned long long sizeOfUniverse3
DistinctElements< U, H > & operator<<(U const &element)
unsigned long long sizeOfUniverse