Skip to content

Commit

Permalink
DictionaryEncodedColumnPartSerde: Read values using smooshReader.
Browse files Browse the repository at this point in the history
The write side uses a FileSmoosher for valueWriter, which means that the
value component may be split over multiple files. In this case, the
read side needs a SmooshedFileMapper.

It is plausible for multi-valued columns to have value sections in excess
of 2 GB: imagine a segment with 10 million rows and an average of 50 values
per row. The value section would have 500 million ints, which is 2 GB.
  • Loading branch information
gianm committed Jan 30, 2025
1 parent 09fd96e commit 5e96f3f
Showing 1 changed file with 16 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.druid.io.Channels;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.io.smoosh.FileSmoosher;
import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper;
import org.apache.druid.segment.column.BaseColumn;
import org.apache.druid.segment.column.ColumnBuilder;
import org.apache.druid.segment.column.ColumnConfig;
Expand Down Expand Up @@ -332,10 +333,10 @@ public void read(
final WritableSupplier<ColumnarMultiInts> rMultiValuedColumn;

if (hasMultipleValues) {
rMultiValuedColumn = readMultiValuedColumn(rVersion, buffer, rFlags);
rMultiValuedColumn = readMultiValuedColumn(rVersion, buffer, rFlags, builder.getFileMapper());
rSingleValuedColumn = null;
} else {
rSingleValuedColumn = readSingleValuedColumn(rVersion, buffer);
rSingleValuedColumn = readSingleValuedColumn(rVersion, buffer, builder.getFileMapper());
rMultiValuedColumn = null;
}

Expand Down Expand Up @@ -381,20 +382,29 @@ public void read(
}
}

private WritableSupplier<ColumnarInts> readSingleValuedColumn(VERSION version, ByteBuffer buffer)
private WritableSupplier<ColumnarInts> readSingleValuedColumn(
VERSION version,
ByteBuffer buffer,
SmooshedFileMapper smooshReader
)
{
switch (version) {
case UNCOMPRESSED_SINGLE_VALUE:
case UNCOMPRESSED_WITH_FLAGS:
return VSizeColumnarInts.readFromByteBuffer(buffer);
case COMPRESSED:
return CompressedVSizeColumnarIntsSupplier.fromByteBuffer(buffer, byteOrder);
return CompressedVSizeColumnarIntsSupplier.fromByteBuffer(buffer, byteOrder, smooshReader);
default:
throw new IAE("Unsupported single-value version[%s]", version);
}
}

private WritableSupplier<ColumnarMultiInts> readMultiValuedColumn(VERSION version, ByteBuffer buffer, int flags)
private WritableSupplier<ColumnarMultiInts> readMultiValuedColumn(
VERSION version,
ByteBuffer buffer,
int flags,
SmooshedFileMapper smooshReader
)
{
switch (version) {
case UNCOMPRESSED_MULTI_VALUE: {
Expand All @@ -411,7 +421,7 @@ private WritableSupplier<ColumnarMultiInts> readMultiValuedColumn(VERSION versio
if (Feature.MULTI_VALUE.isSet(flags)) {
return CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(buffer, byteOrder);
} else if (Feature.MULTI_VALUE_V3.isSet(flags)) {
return V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(buffer, byteOrder);
return V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(buffer, byteOrder, smooshReader);
} else {
throw new IAE("Unrecognized multi-value flag[%d] for version[%s]", flags, version);
}
Expand Down

0 comments on commit 5e96f3f

Please sign in to comment.