Skip to content

Commit 8157be9

Browse files
authored
[common] Using a faster deserialization method in RoaringBitmap32 (apache#4765)
1 parent ba45766 commit 8157be9

File tree

5 files changed

+144
-33
lines changed

5 files changed

+144
-33
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.paimon.benchmark.bitmap;
20+
21+
import org.apache.paimon.benchmark.Benchmark;
22+
import org.apache.paimon.fs.local.LocalFileIO;
23+
24+
import org.junit.jupiter.api.Test;
25+
import org.junit.jupiter.api.io.TempDir;
26+
import org.roaringbitmap.RoaringBitmap;
27+
28+
import java.io.DataInputStream;
29+
import java.io.DataOutputStream;
30+
import java.io.File;
31+
import java.io.FileOutputStream;
32+
import java.io.IOException;
33+
import java.nio.ByteBuffer;
34+
import java.nio.file.Path;
35+
import java.util.Random;
36+
37+
import static org.assertj.core.api.Assertions.assertThat;
38+
39+
/** Benchmark for {@link RoaringBitmap}. */
40+
public class RoaringBitmapBenchmark {
41+
42+
public static final int ROW_COUNT = 10000000;
43+
44+
@TempDir Path tempDir;
45+
46+
@Test
47+
public void testDeserialize() throws Exception {
48+
Random random = new Random();
49+
RoaringBitmap bitmap = new RoaringBitmap();
50+
for (int i = 0; i < ROW_COUNT; i++) {
51+
if (random.nextBoolean()) {
52+
bitmap.add(i);
53+
}
54+
}
55+
56+
File file = new File(tempDir.toFile(), "bitmap32-deserialize-benchmark");
57+
assertThat(file.createNewFile()).isTrue();
58+
try (FileOutputStream output = new FileOutputStream(file);
59+
DataOutputStream dos = new DataOutputStream(output)) {
60+
bitmap.serialize(dos);
61+
}
62+
63+
Benchmark benchmark =
64+
new Benchmark("bitmap32-deserialize-benchmark", 100)
65+
.setNumWarmupIters(1)
66+
.setOutputPerIteration(true);
67+
68+
benchmark.addCase(
69+
"deserialize(DataInput)",
70+
10,
71+
() -> {
72+
try (LocalFileIO.LocalSeekableInputStream seekableStream =
73+
new LocalFileIO.LocalSeekableInputStream(file);
74+
DataInputStream input = new DataInputStream(seekableStream)) {
75+
new RoaringBitmap().deserialize(input);
76+
} catch (IOException e) {
77+
throw new RuntimeException(e);
78+
}
79+
});
80+
81+
benchmark.addCase(
82+
"deserialize(DataInput, byte[])",
83+
10,
84+
() -> {
85+
try (LocalFileIO.LocalSeekableInputStream seekableStream =
86+
new LocalFileIO.LocalSeekableInputStream(file);
87+
DataInputStream input = new DataInputStream(seekableStream)) {
88+
new RoaringBitmap().deserialize(input, null);
89+
} catch (IOException e) {
90+
throw new RuntimeException(e);
91+
}
92+
});
93+
94+
benchmark.addCase(
95+
"deserialize(ByteBuffer)",
96+
10,
97+
() -> {
98+
try (LocalFileIO.LocalSeekableInputStream seekableStream =
99+
new LocalFileIO.LocalSeekableInputStream(file);
100+
DataInputStream input = new DataInputStream(seekableStream)) {
101+
byte[] bytes = new byte[(int) file.length()];
102+
input.readFully(bytes);
103+
new RoaringBitmap().deserialize(ByteBuffer.wrap(bytes));
104+
} catch (IOException e) {
105+
throw new RuntimeException(e);
106+
}
107+
});
108+
109+
benchmark.run();
110+
}
111+
}

paimon-common/src/main/java/org/apache/paimon/utils/RoaringBitmap32.java

+12-12
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,19 @@ public void serialize(DataOutput out) throws IOException {
8585
roaringBitmap.serialize(out);
8686
}
8787

88+
public byte[] serialize() {
89+
roaringBitmap.runOptimize();
90+
ByteBuffer buffer = ByteBuffer.allocate(roaringBitmap.serializedSizeInBytes());
91+
roaringBitmap.serialize(buffer);
92+
return buffer.array();
93+
}
94+
8895
public void deserialize(DataInput in) throws IOException {
89-
roaringBitmap.deserialize(in);
96+
roaringBitmap.deserialize(in, null);
97+
}
98+
99+
public void deserialize(ByteBuffer buffer) throws IOException {
100+
roaringBitmap.deserialize(buffer);
90101
}
91102

92103
@Override
@@ -105,17 +116,6 @@ public void clear() {
105116
roaringBitmap.clear();
106117
}
107118

108-
public byte[] serialize() {
109-
roaringBitmap.runOptimize();
110-
ByteBuffer buffer = ByteBuffer.allocate(roaringBitmap.serializedSizeInBytes());
111-
roaringBitmap.serialize(buffer);
112-
return buffer.array();
113-
}
114-
115-
public void deserialize(byte[] rbmBytes) throws IOException {
116-
roaringBitmap.deserialize(ByteBuffer.wrap(rbmBytes));
117-
}
118-
119119
public void flip(final long rangeStart, final long rangeEnd) {
120120
roaringBitmap.flip(rangeStart, rangeEnd);
121121
}

paimon-core/src/main/java/org/apache/paimon/deletionvectors/BitmapDeletionVector.java

+5-5
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
import org.apache.paimon.utils.RoaringBitmap32;
2222

2323
import java.io.ByteArrayOutputStream;
24-
import java.io.DataInput;
2524
import java.io.DataOutputStream;
2625
import java.io.IOException;
26+
import java.nio.ByteBuffer;
2727
import java.util.Objects;
2828

2929
/**
@@ -93,10 +93,10 @@ public byte[] serializeToBytes() {
9393
}
9494
}
9595

96-
public static DeletionVector deserializeFromDataInput(DataInput bis) throws IOException {
97-
RoaringBitmap32 roaringBitmap = new RoaringBitmap32();
98-
roaringBitmap.deserialize(bis);
99-
return new BitmapDeletionVector(roaringBitmap);
96+
public static DeletionVector deserializeFromByteBuffer(ByteBuffer buffer) throws IOException {
97+
RoaringBitmap32 bitmap = new RoaringBitmap32();
98+
bitmap.deserialize(buffer);
99+
return new BitmapDeletionVector(bitmap);
100100
}
101101

102102
private void checkPosition(long position) {

paimon-core/src/main/java/org/apache/paimon/deletionvectors/DeletionVector.java

+13-14
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@
2626

2727
import javax.annotation.Nullable;
2828

29-
import java.io.ByteArrayInputStream;
3029
import java.io.DataInputStream;
3130
import java.io.IOException;
31+
import java.nio.ByteBuffer;
3232
import java.util.List;
3333
import java.util.Optional;
3434

@@ -99,11 +99,11 @@ default boolean checkedDelete(long position) {
9999
* @return A DeletionVector instance that represents the deserialized data.
100100
*/
101101
static DeletionVector deserializeFromBytes(byte[] bytes) {
102-
try (ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
103-
DataInputStream dis = new DataInputStream(bis)) {
104-
int magicNum = dis.readInt();
102+
try {
103+
ByteBuffer buffer = ByteBuffer.wrap(bytes);
104+
int magicNum = buffer.getInt();
105105
if (magicNum == BitmapDeletionVector.MAGIC_NUMBER) {
106-
return BitmapDeletionVector.deserializeFromDataInput(dis);
106+
return BitmapDeletionVector.deserializeFromByteBuffer(buffer);
107107
} else {
108108
throw new RuntimeException("Invalid magic number: " + magicNum);
109109
}
@@ -117,22 +117,21 @@ static DeletionVector read(FileIO fileIO, DeletionFile deletionFile) throws IOEx
117117
try (SeekableInputStream input = fileIO.newInputStream(path)) {
118118
input.seek(deletionFile.offset());
119119
DataInputStream dis = new DataInputStream(input);
120-
int actualLength = dis.readInt();
121-
if (actualLength != deletionFile.length()) {
120+
int actualSize = dis.readInt();
121+
if (actualSize != deletionFile.length()) {
122122
throw new RuntimeException(
123123
"Size not match, actual size: "
124-
+ actualLength
124+
+ actualSize
125125
+ ", expert size: "
126126
+ deletionFile.length()
127127
+ ", file path: "
128128
+ path);
129129
}
130-
int magicNum = dis.readInt();
131-
if (magicNum == BitmapDeletionVector.MAGIC_NUMBER) {
132-
return BitmapDeletionVector.deserializeFromDataInput(dis);
133-
} else {
134-
throw new RuntimeException("Invalid magic number: " + magicNum);
135-
}
130+
131+
// read DeletionVector bytes
132+
byte[] bytes = new byte[actualSize];
133+
dis.readFully(bytes);
134+
return deserializeFromBytes(bytes);
136135
}
137136
}
138137

paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldRoaringBitmap32Agg.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import org.apache.paimon.utils.RoaringBitmap32;
2323

2424
import java.io.IOException;
25+
import java.nio.ByteBuffer;
2526

2627
/** roaring bitmap aggregate a field of a row. */
2728
public class FieldRoaringBitmap32Agg extends FieldAggregator {
@@ -43,8 +44,8 @@ public Object agg(Object accumulator, Object inputField) {
4344
}
4445

4546
try {
46-
roaringBitmapAcc.deserialize((byte[]) accumulator);
47-
roaringBitmapInput.deserialize((byte[]) inputField);
47+
roaringBitmapAcc.deserialize(ByteBuffer.wrap((byte[]) accumulator));
48+
roaringBitmapInput.deserialize(ByteBuffer.wrap((byte[]) inputField));
4849
roaringBitmapAcc.or(roaringBitmapInput);
4950
return roaringBitmapAcc.serialize();
5051
} catch (IOException e) {

0 commit comments

Comments
 (0)