Skip to content

Commit ab01a9b

Browse files
author
craig
committed
Readded my timer code, and implemented a bloom filter to experiment with C++ 11 features.
1 parent 6592f79 commit ab01a9b

File tree

7 files changed

+880
-0
lines changed

7 files changed

+880
-0
lines changed

playtime/bloomfilter/Makefile

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
2+
CC=g++
3+
CFLAGS=-std=c++0x -O2#-g
4+
IFLAGS=-I../../utility
5+
6+
all:
7+
${CC} ${CFLAGS} ${IFLAGS} bloomfilter_test.cxx spooky.cpp -o bf

playtime/bloomfilter/bloomfilter.h

Whitespace-only changes.
+187
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
2+
// Local
3+
#include "bloomfilter.h"
4+
#include "multihash.h"
5+
6+
// STL
7+
#include <iostream>
8+
#include <vector>
9+
#include <set>
10+
#include <random>
11+
#include <ctime>
12+
13+
// AOS Utility
14+
#include <timer.h>
15+
16+
template<class Container, class Generator>
17+
void insert(Container& cont, unsigned int numvalues, Generator& gen)
18+
{
19+
for ( unsigned int i = 0; i < numvalues; ++i )
20+
{
21+
cont.insert(gen());
22+
}
23+
}
24+
25+
template<class Container, class Generator>
26+
void find(Container& cont, unsigned int numvalues, Generator gen)
27+
{
28+
for ( unsigned int i = 0; i < numvalues; ++i )
29+
{
30+
cont.find(gen());
31+
}
32+
}
33+
34+
// void performanceHelper(BloomFilter<int, MultiHash>& bf,
35+
// std::set<int>& theset,
36+
// {
37+
// }
38+
39+
void performanceTest(unsigned int numvalues, unsigned int numbits, MultiHash mh)
40+
{
41+
42+
{
43+
BloomFilter<int, MultiHash> bf(numbits, mh);
44+
std::set<int> theset;
45+
{
46+
std::mt19937 bfgen;
47+
std::mt19937 setgen;
48+
clock_t bftime = timeFunction(insert<BloomFilter<int, MultiHash>, std::mt19937>, bf, numvalues, bfgen);
49+
clock_t settime = timeFunction(insert<std::set<int>, std::mt19937>, theset, numvalues, setgen);
50+
std::cerr << "Random Insert - BF Time: " << bftime << " Set Time: " << settime << "\n";
51+
}
52+
53+
{
54+
std::mt19937 bfgen;
55+
std::mt19937 setgen;
56+
clock_t bftime = timeFunction(find<BloomFilter<int, MultiHash>, std::mt19937>, bf, 2*numvalues, bfgen);
57+
clock_t settime = timeFunction(find<std::set<int>, std::mt19937>, theset, 2*numvalues, setgen);
58+
std::cerr << "Same Random Find - BF Time: " << bftime << " Set Time: " << settime << "\n";
59+
}
60+
}
61+
62+
63+
{
64+
BloomFilter<int, MultiHash> bf(numbits, mh);
65+
std::set<int> theset;
66+
{
67+
unsigned int i = 0;
68+
auto lf = [&i](){return ++i;};
69+
clock_t bftime = timeFunction(insert<BloomFilter<int, MultiHash>, decltype(lf)>,
70+
bf,
71+
numvalues,
72+
lf);
73+
74+
i = 0;
75+
clock_t settime = timeFunction(insert<std::set<int>, decltype(lf)>,
76+
theset,
77+
numvalues,
78+
lf);
79+
80+
std::cerr << "Sequential Insert - BF Time: " << bftime << " Set Time: " << settime << "\n";
81+
}
82+
83+
{
84+
unsigned int i = 0;
85+
auto lf = [&i](){return ++i;};
86+
clock_t bftime = timeFunction(find<BloomFilter<int, MultiHash>, decltype(lf)>,
87+
bf,
88+
numvalues,
89+
lf);
90+
91+
i = 0;
92+
clock_t settime = timeFunction(find<std::set<int>, decltype(lf)>,
93+
theset,
94+
numvalues,
95+
lf);
96+
97+
std::cerr << "Sequential Find - BF Time: " << bftime << " Set Time: " << settime << "\n";
98+
}
99+
}
100+
}
101+
102+
void probTest(unsigned int numvalues, unsigned int numbits, MultiHash mh)
103+
{
104+
BloomFilter<int, MultiHash> bf(numbits, mh);
105+
std::set<int> theset;
106+
std::mt19937 gen1;
107+
std::mt19937 gen2;
108+
insert(bf, numvalues, gen1);
109+
insert(theset, numvalues, gen2);
110+
111+
112+
std::mt19937 gen3;
113+
gen3.seed(time(0));
114+
const int totalChecked = 10000000;
115+
int numfalsepos = 0;
116+
int numfound = 0;
117+
int numnotfound = 0;
118+
119+
for ( int i = 0; i < totalChecked; ++i )
120+
{
121+
int n = gen3();
122+
bool setfound = theset.find(n) != theset.end();
123+
bool bffound = bf.find(n);
124+
125+
if ( setfound && bffound ) ++numfound;
126+
else if ( setfound && !bffound ) std::cerr << "BADNESS.\n";
127+
else if ( !setfound && bffound ) ++numfalsepos;
128+
else ++numnotfound;
129+
}
130+
131+
std::cerr << "PROB 1\n";
132+
std::cerr << "Found: " << numfound << ", Not Found: " << numnotfound << ", False Pos: " << numfalsepos << "\n";
133+
std::cerr << "False Pos Ratio: " << double(numfalsepos)/double(numnotfound) << "\n";
134+
}
135+
136+
void probTest2(unsigned int numvalues, unsigned int numbits, MultiHash mh)
137+
{
138+
BloomFilter<int, MultiHash> bf(numbits, mh);
139+
std::set<int> theset;
140+
141+
int i = 0;
142+
auto gen = [&i](){return i++;};
143+
insert(bf, numvalues, gen);
144+
i = 0;
145+
insert(theset, numvalues, gen);
146+
147+
const int totalChecked = 10000000;
148+
int numfalsepos = 0;
149+
int numfound = 0;
150+
int numnotfound = 0;
151+
152+
// All should be negative.
153+
for ( int i = numbits; i < totalChecked+numbits; ++i )
154+
{
155+
int n = i;
156+
bool setfound = theset.find(n) != theset.end();
157+
bool bffound = bf.find(n);
158+
159+
if ( setfound && bffound ) ++numfound;
160+
else if ( setfound && !bffound ) std::cerr << "BADNESS.\n";
161+
else if ( !setfound && bffound ) ++numfalsepos;
162+
else ++numnotfound;
163+
}
164+
165+
std::cerr << "PROB 2\n";
166+
std::cerr << "Found: " << numfound << ", Not Found: " << numnotfound << ", False Pos: " << numfalsepos << "\n";
167+
std::cerr << "False Pos Ratio: " << double(numfalsepos)/double(numnotfound) << "\n";
168+
}
169+
170+
171+
int main()
172+
{
173+
std::mt19937 seedGen;
174+
MultiHash mh(10, seedGen);
175+
176+
const unsigned int numvalues = 1000000;
177+
const unsigned int numbits = 9585058; // -(numvalues * ln (probability of false positive) / ln(2)^2)
178+
// using 1% for prob. of false positive.
179+
180+
performanceTest(numvalues, numbits, mh);
181+
probTest(numvalues, numbits, mh);
182+
probTest2(numvalues, numbits, mh);
183+
184+
185+
186+
}
187+

playtime/bloomfilter/multihash.h

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#include "spooky.h"
2+
#include <vector>
3+
#include <boost/bind.hpp>
4+
5+
class MultiHash
6+
{
7+
public:
8+
9+
//! Construct a hash functor capable of returning k independent hash values
10+
//! for a given input.
11+
template<class SeedGenerator>
12+
MultiHash(unsigned int k, SeedGenerator gen)
13+
{
14+
seeds_.reserve(k);
15+
std::generate_n(std::back_inserter(seeds_), k, gen);
16+
}
17+
18+
//! Hash the input.
19+
std::vector<uint64_t> operator()(uint64_t input) const
20+
{
21+
std::vector<uint64_t> out;
22+
out.reserve(seeds_.size());
23+
// See http://www.burtleburtle.net/bob/hash/spooky.html
24+
auto f = boost::bind(SpookyHash::Hash64, &input, sizeof(uint64_t), _1);
25+
std::transform(seeds_.begin(), seeds_.end(), std::back_inserter(out), f);
26+
return out;
27+
}
28+
private:
29+
std::vector<uint64_t> seeds_;
30+
};
31+
32+

0 commit comments

Comments
 (0)