-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
Copy pathbloom.c
123 lines (101 loc) · 2.15 KB
/
bloom.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#include <stdlib.h>
#include "bloom.h"
#include "../hash.h"
#include "../crc/xxhash.h"
#include "../crc/murmur3.h"
#include "../crc/crc32c.h"
#include "../crc/fnv.h"
struct bloom {
uint64_t nentries;
uint32_t *map;
};
#define BITS_PER_INDEX (sizeof(uint32_t) * 8)
#define BITS_INDEX_MASK (BITS_PER_INDEX - 1)
struct bloom_hash {
unsigned int seed;
uint32_t (*fn)(const void *, uint32_t, uint32_t);
};
static uint32_t bloom_crc32c(const void *buf, uint32_t len, uint32_t seed)
{
return fio_crc32c(buf, len);
}
static uint32_t bloom_fnv(const void *buf, uint32_t len, uint32_t seed)
{
return fnv(buf, len, seed);
}
#define BLOOM_SEED 0x8989
static struct bloom_hash hashes[] = {
{
.seed = BLOOM_SEED,
.fn = jhash,
},
{
.seed = BLOOM_SEED,
.fn = XXH32,
},
{
.seed = BLOOM_SEED,
.fn = murmurhash3,
},
{
.seed = BLOOM_SEED,
.fn = bloom_crc32c,
},
{
.seed = BLOOM_SEED,
.fn = bloom_fnv,
},
};
#define N_HASHES 5
struct bloom *bloom_new(uint64_t entries)
{
struct bloom *b;
size_t no_uints;
crc32c_arm64_probe();
crc32c_intel_probe();
b = malloc(sizeof(*b));
b->nentries = entries;
no_uints = (entries + BITS_PER_INDEX - 1) / BITS_PER_INDEX;
b->map = calloc(no_uints, sizeof(uint32_t));
if (!b->map) {
free(b);
return NULL;
}
return b;
}
void bloom_free(struct bloom *b)
{
free(b->map);
free(b);
}
static bool __bloom_check(struct bloom *b, const void *data, unsigned int len,
bool set)
{
uint32_t hash[N_HASHES];
int i, was_set;
for (i = 0; i < N_HASHES; i++) {
hash[i] = hashes[i].fn(data, len, hashes[i].seed);
hash[i] = hash[i] % b->nentries;
}
was_set = 0;
for (i = 0; i < N_HASHES; i++) {
const unsigned int index = hash[i] / BITS_PER_INDEX;
const unsigned int bit = hash[i] & BITS_INDEX_MASK;
if (b->map[index] & (1U << bit))
was_set++;
else if (set)
b->map[index] |= 1U << bit;
else
break;
}
return was_set == N_HASHES;
}
bool bloom_set(struct bloom *b, uint32_t *data, unsigned int nwords)
{
return __bloom_check(b, data, nwords * sizeof(uint32_t), true);
}
bool bloom_string(struct bloom *b, const char *data, unsigned int len,
bool set)
{
return __bloom_check(b, data, len, set);
}