Skip to content

Commit bf42131

Browse files
authored
Merge pull request #95 from ssb-ngi-pointer/prefix-map
Optimize prefixes for queries
2 parents d29a75b + 0559868 commit bf42131

File tree

7 files changed

+353
-19
lines changed

7 files changed

+353
-19
lines changed

README.md

+6
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,12 @@ _Rule of thumb_: use prefix indexes in an EQUAL operation only when
384384
the target `value` of your EQUAL can dynamically assume many (more
385385
than a dozen) possible values.
386386

387+
An additional option `useMap` can be provided that will store the
388+
prefix as a map instead of an array. The map can be seen as an
389+
inverted index that allows for faster queries at the cost of extra
390+
space. Maps don't store empty values meaning they are also a good fit
391+
for sparce indexes such as vote links.
392+
387393
## Low-level API
388394

389395
First some terminology: offset refers to the byte position in the log

benchmark/index.js

+97-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ const {
1818
toPullStream,
1919
paginate,
2020
} = require('../operators')
21-
const { seekType, seekAuthor } = require('../test/helpers')
21+
const { seekType, seekAuthor, seekVoteLink } = require('../test/helpers')
2222
const copy = require('../copy-json-to-bipf-async')
2323

2424
const dir = '/tmp/jitdb-benchmark'
@@ -213,3 +213,99 @@ test('paginate one huge index', (t) => {
213213
)
214214
})
215215
})
216+
217+
test('query a prefix map (first run)', (t) => {
218+
db.onReady(() => {
219+
query(
220+
fromDB(db),
221+
paginate(1),
222+
toCallback((err, { results }) => {
223+
if (err) t.fail(err)
224+
const rootKey = results[0].key
225+
226+
db.onReady(() => {
227+
const start = Date.now()
228+
let i = 0
229+
pull(
230+
query(
231+
fromDB(db),
232+
and(
233+
equal(seekVoteLink, rootKey, {
234+
indexType: 'value_content_vote_link',
235+
useMap: true,
236+
prefix: 32,
237+
})
238+
),
239+
paginate(5),
240+
toPullStream()
241+
),
242+
pull.drain(
243+
(msgs) => {
244+
i++
245+
},
246+
(err) => {
247+
if (err) t.fail(err)
248+
const duration = Date.now() - start
249+
if (i !== 92) t.fail('wrong number of pages read: ' + i)
250+
t.pass(`duration: ${duration}ms`)
251+
fs.appendFileSync(
252+
reportPath,
253+
`| Query a prefix map (1st run) | ${duration}ms |\n`
254+
)
255+
t.end()
256+
}
257+
)
258+
)
259+
})
260+
})
261+
)
262+
})
263+
})
264+
265+
test('query a prefix map (second run)', (t) => {
266+
db.onReady(() => {
267+
query(
268+
fromDB(db),
269+
paginate(1),
270+
toCallback((err, { results }) => {
271+
if (err) t.fail(err)
272+
const rootKey = results[0].key
273+
274+
db.onReady(() => {
275+
const start = Date.now()
276+
let i = 0
277+
pull(
278+
query(
279+
fromDB(db),
280+
and(
281+
equal(seekVoteLink, rootKey, {
282+
indexType: 'value_content_vote_link',
283+
useMap: true,
284+
prefix: 32,
285+
})
286+
),
287+
paginate(5),
288+
toPullStream()
289+
),
290+
pull.drain(
291+
(msgs) => {
292+
i++
293+
},
294+
(err) => {
295+
if (err) t.fail(err)
296+
const duration = Date.now() - start
297+
if (i !== 92) t.fail('wrong number of pages read: ' + i)
298+
t.pass(`duration: ${duration}ms`)
299+
fs.appendFileSync(
300+
reportPath,
301+
`| Query a prefix map (2nd run) | ${duration}ms |\n`
302+
)
303+
t.end()
304+
}
305+
)
306+
)
307+
})
308+
})
309+
)
310+
})
311+
})

files.js

+39
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,43 @@ function loadTypedArrayFile(filename, Type, cb) {
6767
.catch(cb)
6868
}
6969

70+
function savePrefixMapFile(filename, version, offset, count, map, cb) {
71+
if (!cb)
72+
cb = (err) => {
73+
if (err) console.error(err)
74+
}
75+
76+
const jsonMap = JSON.stringify(map)
77+
const b = Buffer.alloc(4 * FIELD_SIZE + jsonMap.length)
78+
b.writeUInt32LE(version, 0)
79+
b.writeUInt32LE(offset, FIELD_SIZE)
80+
b.writeUInt32LE(count, 2 * FIELD_SIZE)
81+
Buffer.from(jsonMap).copy(b, 4 * FIELD_SIZE)
82+
83+
writeFile(filename, b)
84+
.then(() => cb())
85+
.catch(cb)
86+
}
87+
88+
function loadPrefixMapFile(filename, cb) {
89+
readFile(filename)
90+
.then((buf) => {
91+
const version = buf.readUInt32LE(0)
92+
const offset = buf.readUInt32LE(FIELD_SIZE)
93+
const count = buf.readUInt32LE(2 * FIELD_SIZE)
94+
const body = buf.slice(4 * FIELD_SIZE)
95+
const map = JSON.parse(body)
96+
97+
cb(null, {
98+
version,
99+
offset,
100+
count,
101+
map,
102+
})
103+
})
104+
.catch(cb)
105+
}
106+
70107
function saveBitsetFile(filename, version, offset, bitset, cb) {
71108
bitset.trim()
72109
const count = bitset.words.length
@@ -120,6 +157,8 @@ function safeFilename(filename) {
120157
module.exports = {
121158
saveTypedArrayFile,
122159
loadTypedArrayFile,
160+
savePrefixMapFile,
161+
loadPrefixMapFile,
123162
saveBitsetFile,
124163
loadBitsetFile,
125164
listFilesIDB,

index.js

+104-12
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ const debug = require('debug')('jitdb')
1111
const {
1212
saveTypedArrayFile,
1313
loadTypedArrayFile,
14+
savePrefixMapFile,
15+
loadPrefixMapFile,
1416
saveBitsetFile,
1517
loadBitsetFile,
1618
safeFilename,
@@ -113,6 +115,17 @@ module.exports = function (log, indexesPath) {
113115
filepath: path.join(indexesPath, file),
114116
}
115117
cb()
118+
} else if (file.endsWith('.32prefixmap')) {
119+
// Don't load it yet, just tag it `lazy`
120+
indexes[indexName] = {
121+
offset: -1,
122+
count: 0,
123+
map: {},
124+
lazy: true,
125+
prefix: 32,
126+
filepath: path.join(indexesPath, file),
127+
}
128+
cb()
116129
} else if (file.endsWith('.index')) {
117130
// Don't load it yet, just tag it `lazy`
118131
indexes[indexName] = {
@@ -181,6 +194,21 @@ module.exports = function (log, indexesPath) {
181194
)
182195
}
183196

197+
function savePrefixMapIndex(name, prefixIndex, count, cb) {
198+
if (prefixIndex.offset < 0) return
199+
debug('saving prefix map index: %s', name)
200+
const num = prefixIndex.prefix
201+
const filename = path.join(indexesPath, name + `.${num}prefixmap`)
202+
savePrefixMapFile(
203+
filename,
204+
prefixIndex.version || 1,
205+
prefixIndex.offset,
206+
count,
207+
prefixIndex.map,
208+
cb
209+
)
210+
}
211+
184212
function growTarrIndex(index, Type) {
185213
debug('growing index')
186214
const newArray = new Type(index.tarr.length * 2)
@@ -284,12 +312,32 @@ module.exports = function (log, indexesPath) {
284312
}
285313
}
286314

315+
function addToPrefixMap(map, seq, value) {
316+
if (value === 0) return
317+
318+
const arr = map[value] || (map[value] = [])
319+
arr.push(seq)
320+
}
321+
322+
function updatePrefixMapIndex(opData, index, buffer, seq, offset) {
323+
if (seq > index.count - 1) {
324+
const fieldStart = opData.seek(buffer)
325+
if (~fieldStart) {
326+
const buf = bipf.slice(buffer, fieldStart)
327+
addToPrefixMap(index.map, seq, buf.length ? safeReadUint32(buf) : 0)
328+
}
329+
330+
index.offset = offset
331+
index.count = seq + 1
332+
}
333+
}
334+
287335
function updatePrefixIndex(opData, index, buffer, seq, offset) {
288336
if (seq > index.count - 1) {
289337
if (seq > index.tarr.length - 1) growTarrIndex(index, Uint32Array)
290338

291339
const fieldStart = opData.seek(buffer)
292-
if (fieldStart) {
340+
if (~fieldStart) {
293341
const buf = bipf.slice(buffer, fieldStart)
294342
index.tarr[seq] = buf.length ? safeReadUint32(buf) : 0
295343
} else {
@@ -368,7 +416,9 @@ module.exports = function (log, indexesPath) {
368416
updatedSequenceIndex = true
369417

370418
if (indexNeedsUpdate) {
371-
if (op.data.prefix)
419+
if (op.data.prefix && op.data.useMap)
420+
updatePrefixMapIndex(op.data, index, buffer, seq, offset)
421+
else if (op.data.prefix)
372422
updatePrefixIndex(op.data, index, buffer, seq, offset)
373423
else updateIndexValue(op, index, buffer, seq)
374424
}
@@ -389,7 +439,10 @@ module.exports = function (log, indexesPath) {
389439

390440
index.offset = indexes['seq'].offset
391441
if (indexNeedsUpdate) {
392-
if (index.prefix) savePrefixIndex(op.data.indexName, index, count)
442+
if (index.prefix && index.map)
443+
savePrefixMapIndex(op.data.indexName, index, count)
444+
else if (index.prefix)
445+
savePrefixIndex(op.data.indexName, index, count)
393446
else saveIndex(op.data.indexName, index)
394447
}
395448

@@ -401,7 +454,14 @@ module.exports = function (log, indexesPath) {
401454
function createIndexes(opsMissingIndexes, cb) {
402455
const newIndexes = {}
403456
opsMissingIndexes.forEach((op) => {
404-
if (op.data.prefix)
457+
if (op.data.prefix && op.data.useMap) {
458+
newIndexes[op.data.indexName] = {
459+
offset: 0,
460+
count: 0,
461+
map: {},
462+
prefix: typeof op.data.prefix === 'number' ? op.data.prefix : 32,
463+
}
464+
} else if (op.data.prefix)
405465
newIndexes[op.data.indexName] = {
406466
offset: 0,
407467
count: 0,
@@ -443,6 +503,14 @@ module.exports = function (log, indexesPath) {
443503
updatedSequenceIndex = true
444504

445505
opsMissingIndexes.forEach((op) => {
506+
if (op.data.prefix && op.data.useMap)
507+
updatePrefixMapIndex(
508+
op.data,
509+
newIndexes[op.data.indexName],
510+
buffer,
511+
seq,
512+
offset
513+
)
446514
if (op.data.prefix)
447515
updatePrefixIndex(
448516
op.data,
@@ -473,7 +541,9 @@ module.exports = function (log, indexesPath) {
473541
for (var indexName in newIndexes) {
474542
const index = (indexes[indexName] = newIndexes[indexName])
475543
index.offset = indexes['seq'].offset
476-
if (index.prefix) savePrefixIndex(indexName, index, count)
544+
if (index.prefix && index.map)
545+
savePrefixMapIndex(indexName, index, count)
546+
else if (index.prefix) savePrefixIndex(indexName, index, count)
477547
else saveIndex(indexName, index)
478548
}
479549

@@ -485,7 +555,18 @@ module.exports = function (log, indexesPath) {
485555
function loadLazyIndex(indexName, cb) {
486556
debug('lazy loading %s', indexName)
487557
let index = indexes[indexName]
488-
if (index.prefix) {
558+
if (index.prefix && index.map) {
559+
loadPrefixMapFile(index.filepath, (err, data) => {
560+
if (err) return cb(err)
561+
const { version, offset, count, map } = data
562+
index.version = version
563+
index.offset = offset
564+
index.count = count
565+
index.map = map
566+
index.lazy = false
567+
cb()
568+
})
569+
} else if (index.prefix) {
489570
loadTypedArrayFile(index.filepath, Uint32Array, (err, data) => {
490571
if (err) return cb(err)
491572
const { version, offset, count, tarr } = data
@@ -576,16 +657,27 @@ module.exports = function (log, indexesPath) {
576657
function matchAgainstPrefix(op, prefixIndex, cb) {
577658
const target = op.data.value
578659
const targetPrefix = target ? safeReadUint32(target) : 0
579-
const count = prefixIndex.count
580-
const tarr = prefixIndex.tarr
581660
const bitset = new TypedFastBitSet()
582661
const done = multicb({ pluck: 1 })
583-
for (let seq = 0; seq < count; ++seq) {
584-
if (tarr[seq] === targetPrefix) {
585-
bitset.add(seq)
586-
getRecord(seq, done())
662+
663+
if (prefixIndex.map) {
664+
if (prefixIndex.map[targetPrefix]) {
665+
prefixIndex.map[targetPrefix].forEach((seq) => {
666+
bitset.add(seq)
667+
getRecord(seq, done())
668+
})
669+
}
670+
} else {
671+
const count = prefixIndex.count
672+
const tarr = prefixIndex.tarr
673+
for (let seq = 0; seq < count; ++seq) {
674+
if (tarr[seq] === targetPrefix) {
675+
bitset.add(seq)
676+
getRecord(seq, done())
677+
}
587678
}
588679
}
680+
589681
done((err, recs) => {
590682
// FIXME: handle error better, this cb() should support 2 args
591683
if (err) return console.error(err)

0 commit comments

Comments
 (0)