From 6867430140427bcc84755d80670cfa517eb8eecd Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 17 Dec 2024 09:39:49 +0000 Subject: [PATCH 1/4] Revert "Introduces IndexInput#updateReadAdvice to change the ReadAdvice while merging vectors (#13985)" This reverts commit 46204f6b53c5bcbba89cc6acdd27f6cbe283f027. --- .../lucene/codecs/KnnVectorsReader.java | 7 --- .../lucene/codecs/KnnVectorsWriter.java | 9 ---- .../lucene/codecs/hnsw/FlatVectorsReader.java | 11 ---- .../lucene99/Lucene99FlatVectorsReader.java | 19 ------- .../lucene99/Lucene99HnswVectorsReader.java | 21 +------- .../perfield/PerFieldKnnVectorsFormat.java | 21 -------- .../org/apache/lucene/store/IndexInput.java | 8 --- .../lucene/store/MemorySegmentIndexInput.java | 14 ------ .../asserting/AssertingKnnVectorsFormat.java | 50 ------------------- .../tests/store/BaseDirectoryTestCase.java | 38 +------------- .../tests/store/MockIndexInputWrapper.java | 10 +--- 11 files changed, 4 insertions(+), 204 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java index 54d070fa5995..e054ebeb2bb1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java @@ -123,11 +123,4 @@ public abstract void search( public KnnVectorsReader getMergeInstance() { return this; } - - /** - * Optional: reset or close merge resources used in the reader - * - *

The default implementation is empty - */ - public void finishMerge() throws IOException {} } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java index 4bd10215c25d..50af32a7e162 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java @@ -109,18 +109,9 @@ public final void merge(MergeState mergeState) throws IOException { } } } - finishMerge(mergeState); finish(); } - private void finishMerge(MergeState mergeState) throws IOException { - for (KnnVectorsReader reader : mergeState.knnVectorsReaders) { - if (reader != null) { - reader.finishMerge(); - } - } - } - /** Tracks state of one sub-reader that we are merging */ private static class FloatVectorValuesSub extends DocIDMerger.Sub { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsReader.java index b03e42cfbb41..9d776567883e 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsReader.java @@ -88,15 +88,4 @@ public abstract RandomVectorScorer getRandomVectorScorer(String field, float[] t */ public abstract RandomVectorScorer getRandomVectorScorer(String field, byte[] target) throws IOException; - - /** - * Returns an instance optimized for merging. This instance may only be consumed in the thread - * that called {@link #getMergeInstance()}. - * - *

The default implementation returns {@code this} - */ - @Override - public FlatVectorsReader getMergeInstance() { - return this; - } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java index 52839000137c..9b42ddd0f267 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java @@ -21,7 +21,6 @@ import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding; import java.io.IOException; -import java.io.UncheckedIOException; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; @@ -171,17 +170,6 @@ public void checkIntegrity() throws IOException { CodecUtil.checksumEntireFile(vectorData); } - @Override - public FlatVectorsReader getMergeInstance() { - try { - // Update the read advice since vectors are guaranteed to be accessed sequentially for merge - this.vectorData.updateReadAdvice(ReadAdvice.SEQUENTIAL); - return this; - } catch (IOException exception) { - throw new UncheckedIOException(exception); - } - } - private FieldEntry getFieldEntry(String field, VectorEncoding expectedEncoding) { final FieldInfo info = fieldInfos.fieldInfo(field); final FieldEntry fieldEntry; @@ -262,13 +250,6 @@ public RandomVectorScorer getRandomVectorScorer(String field, byte[] target) thr target); } - @Override - public void finishMerge() throws IOException { - // This makes sure that the access pattern hint is reverted back since HNSW implementation - // needs it - this.vectorData.updateReadAdvice(ReadAdvice.RANDOM); - } - @Override public void close() throws IOException { IOUtils.close(vectorData); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java index ed6388b53cb7..2a3088527f5f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java @@ -69,12 +69,11 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader private final FlatVectorsReader flatVectorsReader; private final FieldInfos fieldInfos; - private final IntObjectHashMap fields; + private final IntObjectHashMap fields = new IntObjectHashMap<>(); private final IndexInput vectorIndex; public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatVectorsReader) throws IOException { - this.fields = new IntObjectHashMap<>(); this.flatVectorsReader = flatVectorsReader; boolean success = false; this.fieldInfos = state.fieldInfos; @@ -114,24 +113,6 @@ public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatV } } - private Lucene99HnswVectorsReader( - Lucene99HnswVectorsReader reader, FlatVectorsReader flatVectorsReader) { - this.flatVectorsReader = flatVectorsReader; - this.fieldInfos = reader.fieldInfos; - this.fields = reader.fields; - this.vectorIndex = reader.vectorIndex; - } - - @Override - public KnnVectorsReader getMergeInstance() { - return new Lucene99HnswVectorsReader(this, this.flatVectorsReader.getMergeInstance()); - } - - @Override - public void finishMerge() throws IOException { - flatVectorsReader.finishMerge(); - } - private static IndexInput openDataInput( SegmentReadState state, int versionMeta, diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java index bc18b231e74f..63bad6d48dad 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java @@ -239,27 +239,6 @@ public FieldsReader(final SegmentReadState readState) throws IOException { } } - private FieldsReader(final FieldsReader fieldsReader) { - this.fieldInfos = fieldsReader.fieldInfos; - for (FieldInfo fi : this.fieldInfos) { - if (fi.hasVectorValues() && fieldsReader.fields.containsKey(fi.number)) { - this.fields.put(fi.number, fieldsReader.fields.get(fi.number).getMergeInstance()); - } - } - } - - @Override - public KnnVectorsReader getMergeInstance() { - return new FieldsReader(this); - } - - @Override - public void finishMerge() throws IOException { - for (ObjectCursor knnVectorReader : fields.values()) { - knnVectorReader.value.finishMerge(); - } - } - /** * Return the underlying VectorReader for the given field * diff --git a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java index b649ace8c4db..4a42ef3139d4 100644 --- a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java @@ -228,14 +228,6 @@ public String toString() { */ public void prefetch(long offset, long length) throws IOException {} - /** - * Optional method: Give a hint to this input about the change in read access pattern. IndexInput - * implementations may take advantage of this hint to optimize reads from storage. - * - *

The default implementation is a no-op. - */ - public void updateReadAdvice(ReadAdvice readAdvice) throws IOException {} - /** * Returns a hint whether all the contents of this input are resident in physical memory. It's a * hint because the operating system may have paged out some of the data by the time this method diff --git a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java index 2424b53645bd..8e72fa225858 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java +++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java @@ -360,20 +360,6 @@ public void prefetch(long offset, long length) throws IOException { }); } - @Override - public void updateReadAdvice(ReadAdvice readAdvice) throws IOException { - if (NATIVE_ACCESS.isEmpty()) { - return; - } - final NativeAccess nativeAccess = NATIVE_ACCESS.get(); - - long offset = 0; - for (MemorySegment seg : segments) { - advise(offset, seg.byteSize(), segment -> nativeAccess.madvise(segment, readAdvice)); - offset += seg.byteSize(); - } - } - void advise(long offset, long length, IOConsumer advice) throws IOException { if (NATIVE_ACCESS.isEmpty()) { return; diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java index bd2911fc50a8..21c62090a698 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java @@ -18,7 +18,6 @@ package org.apache.lucene.tests.codecs.asserting; import java.io.IOException; -import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.codecs.KnnFieldVectorsWriter; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; @@ -106,19 +105,11 @@ public long ramBytesUsed() { static class AssertingKnnVectorsReader extends KnnVectorsReader implements HnswGraphProvider { final KnnVectorsReader delegate; final FieldInfos fis; - final boolean mergeInstance; - AtomicInteger mergeInstanceCount = new AtomicInteger(); - AtomicInteger finishMergeCount = new AtomicInteger(); AssertingKnnVectorsReader(KnnVectorsReader delegate, FieldInfos fis) { - this(delegate, fis, false); - } - - AssertingKnnVectorsReader(KnnVectorsReader delegate, FieldInfos fis, boolean mergeInstance) { assert delegate != null; this.delegate = delegate; this.fis = fis; - this.mergeInstance = mergeInstance; } @Override @@ -157,7 +148,6 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { @Override public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { - assert !mergeInstance; FieldInfo fi = fis.fieldInfo(field); assert fi != null && fi.getVectorDimension() > 0 @@ -168,7 +158,6 @@ public void search(String field, float[] target, KnnCollector knnCollector, Bits @Override public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { - assert !mergeInstance; FieldInfo fi = fis.fieldInfo(field); assert fi != null && fi.getVectorDimension() > 0 @@ -176,49 +165,10 @@ public void search(String field, byte[] target, KnnCollector knnCollector, Bits delegate.search(field, target, knnCollector, acceptDocs); } - @Override - public KnnVectorsReader getMergeInstance() { - assert !mergeInstance; - var mergeVectorsReader = delegate.getMergeInstance(); - assert mergeVectorsReader != null; - mergeInstanceCount.incrementAndGet(); - - final var parent = this; - return new AssertingKnnVectorsReader( - mergeVectorsReader, AssertingKnnVectorsReader.this.fis, true) { - @Override - public KnnVectorsReader getMergeInstance() { - assert false; // merging from a merge instance it not allowed - return null; - } - - @Override - public void finishMerge() throws IOException { - assert mergeInstance; - delegate.finishMerge(); - parent.finishMergeCount.incrementAndGet(); - } - - @Override - public void close() { - assert false; // closing the merge instance it not allowed - } - }; - } - - @Override - public void finishMerge() throws IOException { - assert mergeInstance; - delegate.finishMerge(); - finishMergeCount.incrementAndGet(); - } - @Override public void close() throws IOException { - assert !mergeInstance; delegate.close(); delegate.close(); - assert finishMergeCount.get() <= 0 || mergeInstanceCount.get() == finishMergeCount.get(); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java index 6defa5eb8c7a..35ecbddb2b8b 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java @@ -16,11 +16,10 @@ */ package org.apache.lucene.tests.store; -import static com.carrotsearch.randomizedtesting.generators.RandomPicks.randomFrom; - import com.carrotsearch.randomizedtesting.RandomizedTest; import com.carrotsearch.randomizedtesting.generators.RandomBytes; import com.carrotsearch.randomizedtesting.generators.RandomNumbers; +import com.carrotsearch.randomizedtesting.generators.RandomPicks; import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; @@ -57,7 +56,6 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.store.RandomAccessInput; -import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.tests.mockfile.ExtrasFS; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; @@ -644,7 +642,7 @@ public void testThreadSafetyInListAll() throws Exception { if (files.length > 0) { do { - String file = randomFrom(rnd, files); + String file = RandomPicks.randomFrom(rnd, files); try (IndexInput input = dir.openInput(file, newIOContext(random()))) { // Just open, nothing else. assert input != null; @@ -1559,38 +1557,6 @@ public void testPrefetchOnSlice() throws IOException { doTestPrefetch(TestUtil.nextInt(random(), 1, 1024)); } - public void testUpdateReadAdvice() throws IOException { - try (Directory dir = getDirectory(createTempDir("testUpdateReadAdvice"))) { - final int totalLength = TestUtil.nextInt(random(), 16384, 65536); - byte[] arr = new byte[totalLength]; - random().nextBytes(arr); - try (IndexOutput out = dir.createOutput("temp.bin", IOContext.DEFAULT)) { - out.writeBytes(arr, arr.length); - } - - try (IndexInput orig = dir.openInput("temp.bin", IOContext.DEFAULT)) { - IndexInput in = random().nextBoolean() ? orig.clone() : orig; - // Read advice updated at start - in.updateReadAdvice(randomFrom(random(), ReadAdvice.values())); - for (int i = 0; i < totalLength; i++) { - int offset = TestUtil.nextInt(random(), 0, (int) in.length() - 1); - in.seek(offset); - assertEquals(arr[offset], in.readByte()); - } - - // Updating readAdvice in the middle - for (int i = 0; i < 10_000; ++i) { - int offset = TestUtil.nextInt(random(), 0, (int) in.length() - 1); - in.seek(offset); - assertEquals(arr[offset], in.readByte()); - if (random().nextBoolean()) { - in.updateReadAdvice(randomFrom(random(), ReadAdvice.values())); - } - } - } - } - } - private void doTestPrefetch(int startOffset) throws IOException { try (Directory dir = getDirectory(createTempDir())) { final int totalLength = startOffset + TestUtil.nextInt(random(), 16384, 65536); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java index 1dc5456d64ef..3ee61189fc95 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java @@ -41,9 +41,9 @@ public class MockIndexInputWrapper extends FilterIndexInput { // Which MockIndexInputWrapper we were cloned from, or null if we are not a clone: private final MockIndexInputWrapper parent; + private final ReadAdvice readAdvice; private final boolean confined; private final Thread thread; - private ReadAdvice readAdvice; /** Sole constructor */ public MockIndexInputWrapper( @@ -192,14 +192,6 @@ public Optional isLoaded() { return in.isLoaded(); } - @Override - public void updateReadAdvice(ReadAdvice readAdvice) throws IOException { - ensureOpen(); - ensureAccessible(); - this.readAdvice = readAdvice; - in.updateReadAdvice(readAdvice); - } - @Override public long length() { ensureOpen(); From 9649461e3574f0499327f2697bf0da8df6b409b9 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 17 Dec 2024 13:34:02 +0000 Subject: [PATCH 2/4] Seal random or sequential access for the knn codec depending on their access pattern --- ...RWHnswScalarQuantizationVectorsFormat.java | 6 +++-- .../bitvectors/HnswBitVectorsFormat.java | 3 ++- .../lucene99/Lucene99FlatVectorsFormat.java | 9 +++++--- .../lucene99/Lucene99FlatVectorsReader.java | 6 ++--- .../lucene99/Lucene99FlatVectorsWriter.java | 7 ++++-- ...ene99HnswScalarQuantizedVectorsFormat.java | 9 +++++++- .../lucene99/Lucene99HnswVectorsFormat.java | 9 ++++++-- .../Lucene99ScalarQuantizedVectorsFormat.java | 23 +++++++++++++++---- .../Lucene99ScalarQuantizedVectorsReader.java | 8 +++---- .../Lucene99ScalarQuantizedVectorsWriter.java | 20 +++++++++++----- .../lucene/search/AbstractKnnVectorQuery.java | 1 - ...tLucene99ScalarQuantizedVectorsFormat.java | 13 ++++++----- 12 files changed, 77 insertions(+), 37 deletions(-) diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/Lucene99RWHnswScalarQuantizationVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/Lucene99RWHnswScalarQuantizationVectorsFormat.java index 8eac95dd9ef4..54fc4d38b4a0 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/Lucene99RWHnswScalarQuantizationVectorsFormat.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene99/Lucene99RWHnswScalarQuantizationVectorsFormat.java @@ -30,6 +30,7 @@ import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat; import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsWriter; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.ReadAdvice; class Lucene99RWHnswScalarQuantizationVectorsFormat extends Lucene99HnswScalarQuantizedVectorsFormat { @@ -54,7 +55,7 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException static class Lucene99RWScalarQuantizedFormat extends Lucene99ScalarQuantizedVectorsFormat { private static final FlatVectorsFormat rawVectorFormat = - new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer()); + new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer(), ReadAdvice.RANDOM); @Override public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { @@ -62,7 +63,8 @@ public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOExceptio state, null, rawVectorFormat.fieldsWriter(state), - new ScalarQuantizedVectorScorer(new DefaultFlatVectorScorer())); + new ScalarQuantizedVectorScorer(new DefaultFlatVectorScorer()), + ReadAdvice.RANDOM); } } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bitvectors/HnswBitVectorsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bitvectors/HnswBitVectorsFormat.java index f5888c0a03c1..6c03009528b3 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/bitvectors/HnswBitVectorsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bitvectors/HnswBitVectorsFormat.java @@ -41,6 +41,7 @@ import org.apache.lucene.index.Sorter; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.search.TaskExecutor; +import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.util.hnsw.HnswGraph; /** @@ -128,7 +129,7 @@ public HnswBitVectorsFormat( } else { this.mergeExec = null; } - this.flatVectorsFormat = new Lucene99FlatVectorsFormat(new FlatBitVectorsScorer()); + this.flatVectorsFormat = new Lucene99FlatVectorsFormat(new FlatBitVectorsScorer(), ReadAdvice.RANDOM); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsFormat.java index c8ef2709db66..95d8ba6b1939 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsFormat.java @@ -27,6 +27,7 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.ReadAdvice; /** * Lucene 9.9 flat vector format, which encodes numeric vector values @@ -78,21 +79,23 @@ public final class Lucene99FlatVectorsFormat extends FlatVectorsFormat { static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16; private final FlatVectorsScorer vectorsScorer; + private final ReadAdvice readAdvice; /** Constructs a format */ - public Lucene99FlatVectorsFormat(FlatVectorsScorer vectorsScorer) { + public Lucene99FlatVectorsFormat(FlatVectorsScorer vectorsScorer, ReadAdvice readAdvice) { super(NAME); this.vectorsScorer = vectorsScorer; + this.readAdvice = readAdvice; } @Override public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - return new Lucene99FlatVectorsWriter(state, vectorsScorer); + return new Lucene99FlatVectorsWriter(state, vectorsScorer, readAdvice); } @Override public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException { - return new Lucene99FlatVectorsReader(state, vectorsScorer); + return new Lucene99FlatVectorsReader(state, vectorsScorer, readAdvice); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java index 9b42ddd0f267..92a6cd5f5f0c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java @@ -59,7 +59,7 @@ public final class Lucene99FlatVectorsReader extends FlatVectorsReader { private final IndexInput vectorData; private final FieldInfos fieldInfos; - public Lucene99FlatVectorsReader(SegmentReadState state, FlatVectorsScorer scorer) + public Lucene99FlatVectorsReader(SegmentReadState state, FlatVectorsScorer scorer, ReadAdvice readAdvice) throws IOException { super(scorer); int versionMeta = readMetadata(state); @@ -72,9 +72,7 @@ public Lucene99FlatVectorsReader(SegmentReadState state, FlatVectorsScorer score versionMeta, Lucene99FlatVectorsFormat.VECTOR_DATA_EXTENSION, Lucene99FlatVectorsFormat.VECTOR_DATA_CODEC_NAME, - // Flat formats are used to randomly access vectors from their node ID that is stored - // in the HNSW graph. - state.context.withReadAdvice(ReadAdvice.RANDOM)); + state.context.withReadAdvice(readAdvice)); success = true; } finally { if (success == false) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java index b731e758b7a8..0634b48d20db 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java @@ -66,15 +66,18 @@ public final class Lucene99FlatVectorsWriter extends FlatVectorsWriter { RamUsageEstimator.shallowSizeOfInstance(Lucene99FlatVectorsWriter.class); private final SegmentWriteState segmentWriteState; + private final ReadAdvice readAdvice; private final IndexOutput meta, vectorData; private final List> fields = new ArrayList<>(); + private boolean finished; - public Lucene99FlatVectorsWriter(SegmentWriteState state, FlatVectorsScorer scorer) + public Lucene99FlatVectorsWriter(SegmentWriteState state, FlatVectorsScorer scorer, ReadAdvice readAdvice) throws IOException { super(scorer); segmentWriteState = state; + this.readAdvice = readAdvice; String metaFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, Lucene99FlatVectorsFormat.META_EXTENSION); @@ -282,7 +285,7 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex( // to perform random reads. vectorDataInput = segmentWriteState.directory.openInput( - tempVectorData.getName(), IOContext.DEFAULT.withReadAdvice(ReadAdvice.RANDOM)); + tempVectorData.getName(), IOContext.DEFAULT.withReadAdvice(readAdvice)); // copy the temporary file vectors to the actual data file vectorData.copyBytes(vectorDataInput, vectorDataInput.length() - CodecUtil.footerLength()); CodecUtil.retrieveChecksum(vectorDataInput); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java index 1966ed21d654..501daa56c782 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java @@ -32,6 +32,7 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.search.TaskExecutor; +import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.util.hnsw.HnswGraph; /** @@ -134,8 +135,14 @@ public Lucene99HnswScalarQuantizedVectorsFormat( } else { this.mergeExec = null; } + + /** + * Defines the format used for storing, reading, and merging vectors on disk. + * Flat formats enable random access to vectors based on their node ID, as recorded in the HNSW graph. + * To ensure consistent access, the {@link ReadAdvice#RANDOM} read advice is used. + */ this.flatVectorsFormat = - new Lucene99ScalarQuantizedVectorsFormat(confidenceInterval, bits, compress); + new Lucene99ScalarQuantizedVectorsFormat(confidenceInterval, bits, compress, ReadAdvice.RANDOM); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsFormat.java index 04ad32d8aa9c..b4baa4be1f3d 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsFormat.java @@ -29,6 +29,7 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.search.TaskExecutor; +import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.util.hnsw.HnswGraph; import org.apache.lucene.util.hnsw.HnswGraphBuilder; @@ -130,9 +131,13 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat { */ private final int beamWidth; - /** The format for storing, reading, and merging vectors on disk. */ + /** + * Defines the format used for storing, reading, and merging vectors on disk. + * Flat formats enable random access to vectors based on their node ID, as recorded in the HNSW graph. + * To ensure consistent access, the {@link ReadAdvice#RANDOM} read advice is used. + */ private static final FlatVectorsFormat flatVectorsFormat = - new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()); + new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer(), ReadAdvice.RANDOM); private final int numMergeWorkers; private final TaskExecutor mergeExec; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java index 0b3c6d19af83..b8942e5f1471 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java @@ -25,6 +25,7 @@ import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.ReadAdvice; /** * Format supporting vector quantization, storage, and retrieval @@ -50,8 +51,13 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat { static final String META_EXTENSION = "vemq"; static final String VECTOR_DATA_EXTENSION = "veq"; + /** + * Defines the format used for storing, reading, and merging raw vectors on disk. + * For this format, the {@link ReadAdvice#SEQUENTIAL} read advice is employed, + * as nearest neighbors are retrieved exclusively using a brute-force approach. + */ private static final FlatVectorsFormat rawVectorFormat = - new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()); + new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer(), ReadAdvice.SEQUENTIAL); /** The minimum confidence interval */ private static final float MINIMUM_CONFIDENCE_INTERVAL = 0.9f; @@ -71,10 +77,15 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat { final byte bits; final boolean compress; final Lucene99ScalarQuantizedVectorScorer flatVectorScorer; + final ReadAdvice readAdvice; /** Constructs a format using default graph construction parameters */ public Lucene99ScalarQuantizedVectorsFormat() { - this(null, 7, false); + /** + * For this format, the {@link ReadAdvice#SEQUENTIAL} read advice is employed, + * as nearest neighbors are retrieved exclusively using a brute-force approach. + */ + this(null, 7, false, ReadAdvice.SEQUENTIAL); } /** @@ -91,7 +102,7 @@ public Lucene99ScalarQuantizedVectorsFormat() { * during searching, at some decode speed penalty. */ public Lucene99ScalarQuantizedVectorsFormat( - Float confidenceInterval, int bits, boolean compress) { + Float confidenceInterval, int bits, boolean compress, ReadAdvice readAdvice) { super(NAME); if (confidenceInterval != null && confidenceInterval != DYNAMIC_CONFIDENCE_INTERVAL @@ -119,6 +130,7 @@ public Lucene99ScalarQuantizedVectorsFormat( this.compress = compress; this.flatVectorScorer = new Lucene99ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE); + this.readAdvice = readAdvice; } public static float calculateDefaultConfidenceInterval(int vectorDimension) { @@ -151,12 +163,13 @@ public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOExceptio bits, compress, rawVectorFormat.fieldsWriter(state), - flatVectorScorer); + flatVectorScorer, + readAdvice); } @Override public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException { return new Lucene99ScalarQuantizedVectorsReader( - state, rawVectorFormat.fieldsReader(state), flatVectorScorer); + state, rawVectorFormat.fieldsReader(state), flatVectorScorer, readAdvice); } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java index 712e9b91f9d2..cdb3987d4799 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java @@ -62,14 +62,16 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade private final IndexInput quantizedVectorData; private final FlatVectorsReader rawVectorsReader; private final FieldInfos fieldInfos; + private final ReadAdvice readAdvice; public Lucene99ScalarQuantizedVectorsReader( - SegmentReadState state, FlatVectorsReader rawVectorsReader, FlatVectorsScorer scorer) + SegmentReadState state, FlatVectorsReader rawVectorsReader, FlatVectorsScorer scorer, ReadAdvice readAdvice) throws IOException { super(scorer); this.rawVectorsReader = rawVectorsReader; this.fieldInfos = state.fieldInfos; int versionMeta = -1; + this.readAdvice = readAdvice; String metaFileName = IndexFileNames.segmentFileName( state.segmentInfo.name, @@ -99,9 +101,7 @@ public Lucene99ScalarQuantizedVectorsReader( versionMeta, Lucene99ScalarQuantizedVectorsFormat.VECTOR_DATA_EXTENSION, Lucene99ScalarQuantizedVectorsFormat.VECTOR_DATA_CODEC_NAME, - // Quantized vectors are accessed randomly from their node ID stored in the HNSW - // graph. - state.context.withReadAdvice(ReadAdvice.RANDOM)); + state.context.withReadAdvice(readAdvice)); success = true; } finally { if (success == false) { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java index 1a30b5271cd7..50fb202df686 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java @@ -54,6 +54,7 @@ import org.apache.lucene.search.VectorScorer; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.VectorUtil; @@ -101,13 +102,15 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite private final byte bits; private final boolean compress; private final int version; + private final ReadAdvice readAdvice; private boolean finished; public Lucene99ScalarQuantizedVectorsWriter( SegmentWriteState state, Float confidenceInterval, FlatVectorsWriter rawVectorDelegate, - FlatVectorsScorer scorer) + FlatVectorsScorer scorer, + ReadAdvice readAdvice) throws IOException { this( state, @@ -116,7 +119,8 @@ public Lucene99ScalarQuantizedVectorsWriter( (byte) 7, false, rawVectorDelegate, - scorer); + scorer, + readAdvice); if (confidenceInterval != null && confidenceInterval == 0) { throw new IllegalArgumentException("confidenceInterval cannot be set to zero"); } @@ -128,7 +132,8 @@ public Lucene99ScalarQuantizedVectorsWriter( byte bits, boolean compress, FlatVectorsWriter rawVectorDelegate, - FlatVectorsScorer scorer) + FlatVectorsScorer scorer, + ReadAdvice readAdvice) throws IOException { this( state, @@ -137,7 +142,8 @@ public Lucene99ScalarQuantizedVectorsWriter( bits, compress, rawVectorDelegate, - scorer); + scorer, + readAdvice); } private Lucene99ScalarQuantizedVectorsWriter( @@ -147,7 +153,8 @@ private Lucene99ScalarQuantizedVectorsWriter( byte bits, boolean compress, FlatVectorsWriter rawVectorDelegate, - FlatVectorsScorer scorer) + FlatVectorsScorer scorer, + ReadAdvice readAdvice) throws IOException { super(scorer); this.confidenceInterval = confidenceInterval; @@ -167,6 +174,7 @@ private Lucene99ScalarQuantizedVectorsWriter( state.segmentSuffix, Lucene99ScalarQuantizedVectorsFormat.VECTOR_DATA_EXTENSION); this.rawVectorDelegate = rawVectorDelegate; + this.readAdvice = readAdvice; boolean success = false; try { meta = state.directory.createOutput(metaFileName, state.context); @@ -491,7 +499,7 @@ private ScalarQuantizedCloseableRandomVectorScorerSupplier mergeOneFieldToIndex( IOUtils.close(tempQuantizedVectorData); quantizationDataInput = segmentWriteState.directory.openInput( - tempQuantizedVectorData.getName(), segmentWriteState.context); + tempQuantizedVectorData.getName(), segmentWriteState.context.withReadAdvice(readAdvice)); quantizedVectorData.copyBytes( quantizationDataInput, quantizationDataInput.length() - CodecUtil.footerLength()); long vectorDataLength = quantizedVectorData.getFilePointer() - vectorDataOffset; diff --git a/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java b/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java index e9246a8b5756..39d2a2dc1d4b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java @@ -132,7 +132,6 @@ private TopDocs getLeafResults( if (scorer == null) { return NO_RESULTS; } - BitSet acceptDocs = createBitSet(scorer.iterator(), liveDocs, reader.maxDoc()); final int cost = acceptDocs.cardinality(); QueryTimeout queryTimeout = timeLimitingKnnCollectorManager.getQueryTimeout(); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java index cf1436f21092..bccbff2d8427 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java @@ -41,6 +41,7 @@ import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.VectorUtil; @@ -64,7 +65,7 @@ public void setUp() throws Exception { } format = new Lucene99ScalarQuantizedVectorsFormat( - confidenceInterval, bits, bits == 4 ? random().nextBoolean() : false); + confidenceInterval, bits, bits == 4 ? random().nextBoolean() : false, random().nextBoolean() ? ReadAdvice.RANDOM : ReadAdvice.SEQUENTIAL); super.setUp(); } @@ -198,7 +199,7 @@ public void testToString() { new FilterCodec("foo", Codec.getDefault()) { @Override public KnnVectorsFormat knnVectorsFormat() { - return new Lucene99ScalarQuantizedVectorsFormat(0.9f, (byte) 4, false); + return new Lucene99ScalarQuantizedVectorsFormat(0.9f, (byte) 4, false, ReadAdvice.RANDOM); } }; String expectedPattern = @@ -212,16 +213,16 @@ public KnnVectorsFormat knnVectorsFormat() { public void testLimits() { expectThrows( IllegalArgumentException.class, - () -> new Lucene99ScalarQuantizedVectorsFormat(1.1f, 7, false)); + () -> new Lucene99ScalarQuantizedVectorsFormat(1.1f, 7, false, ReadAdvice.RANDOM)); expectThrows( IllegalArgumentException.class, - () -> new Lucene99ScalarQuantizedVectorsFormat(null, -1, false)); + () -> new Lucene99ScalarQuantizedVectorsFormat(null, -1, false, ReadAdvice.RANDOM)); expectThrows( IllegalArgumentException.class, - () -> new Lucene99ScalarQuantizedVectorsFormat(null, 5, false)); + () -> new Lucene99ScalarQuantizedVectorsFormat(null, 5, false, ReadAdvice.RANDOM)); expectThrows( IllegalArgumentException.class, - () -> new Lucene99ScalarQuantizedVectorsFormat(null, 9, false)); + () -> new Lucene99ScalarQuantizedVectorsFormat(null, 9, false, ReadAdvice.RANDOM)); } @Override From 0d4a38ac163a3b919547afb2e081a16be28c763a Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 17 Dec 2024 14:06:07 +0000 Subject: [PATCH 3/4] tidy --- .../codecs/bitvectors/HnswBitVectorsFormat.java | 3 ++- .../codecs/lucene99/Lucene99FlatVectorsReader.java | 4 ++-- .../codecs/lucene99/Lucene99FlatVectorsWriter.java | 4 ++-- .../Lucene99HnswScalarQuantizedVectorsFormat.java | 9 +++++---- .../codecs/lucene99/Lucene99HnswVectorsFormat.java | 9 +++++---- .../Lucene99ScalarQuantizedVectorsFormat.java | 13 +++++++------ .../Lucene99ScalarQuantizedVectorsReader.java | 5 ++++- .../Lucene99ScalarQuantizedVectorsWriter.java | 3 ++- .../TestLucene99ScalarQuantizedVectorsFormat.java | 8 ++++++-- 9 files changed, 35 insertions(+), 23 deletions(-) diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bitvectors/HnswBitVectorsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bitvectors/HnswBitVectorsFormat.java index 6c03009528b3..6caa443d2b68 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/bitvectors/HnswBitVectorsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bitvectors/HnswBitVectorsFormat.java @@ -129,7 +129,8 @@ public HnswBitVectorsFormat( } else { this.mergeExec = null; } - this.flatVectorsFormat = new Lucene99FlatVectorsFormat(new FlatBitVectorsScorer(), ReadAdvice.RANDOM); + this.flatVectorsFormat = + new Lucene99FlatVectorsFormat(new FlatBitVectorsScorer(), ReadAdvice.RANDOM); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java index 92a6cd5f5f0c..440dd4e3a17f 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java @@ -59,8 +59,8 @@ public final class Lucene99FlatVectorsReader extends FlatVectorsReader { private final IndexInput vectorData; private final FieldInfos fieldInfos; - public Lucene99FlatVectorsReader(SegmentReadState state, FlatVectorsScorer scorer, ReadAdvice readAdvice) - throws IOException { + public Lucene99FlatVectorsReader( + SegmentReadState state, FlatVectorsScorer scorer, ReadAdvice readAdvice) throws IOException { super(scorer); int versionMeta = readMetadata(state); this.fieldInfos = state.fieldInfos; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java index 0634b48d20db..e48705994194 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java @@ -73,8 +73,8 @@ public final class Lucene99FlatVectorsWriter extends FlatVectorsWriter { private boolean finished; - public Lucene99FlatVectorsWriter(SegmentWriteState state, FlatVectorsScorer scorer, ReadAdvice readAdvice) - throws IOException { + public Lucene99FlatVectorsWriter( + SegmentWriteState state, FlatVectorsScorer scorer, ReadAdvice readAdvice) throws IOException { super(scorer); segmentWriteState = state; this.readAdvice = readAdvice; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java index 501daa56c782..8e6c80619edd 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java @@ -137,12 +137,13 @@ public Lucene99HnswScalarQuantizedVectorsFormat( } /** - * Defines the format used for storing, reading, and merging vectors on disk. - * Flat formats enable random access to vectors based on their node ID, as recorded in the HNSW graph. - * To ensure consistent access, the {@link ReadAdvice#RANDOM} read advice is used. + * Defines the format used for storing, reading, and merging vectors on disk. Flat formats + * enable random access to vectors based on their node ID, as recorded in the HNSW graph. To + * ensure consistent access, the {@link ReadAdvice#RANDOM} read advice is used. */ this.flatVectorsFormat = - new Lucene99ScalarQuantizedVectorsFormat(confidenceInterval, bits, compress, ReadAdvice.RANDOM); + new Lucene99ScalarQuantizedVectorsFormat( + confidenceInterval, bits, compress, ReadAdvice.RANDOM); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsFormat.java index b4baa4be1f3d..b988e2b96543 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsFormat.java @@ -132,12 +132,13 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat { private final int beamWidth; /** - * Defines the format used for storing, reading, and merging vectors on disk. - * Flat formats enable random access to vectors based on their node ID, as recorded in the HNSW graph. - * To ensure consistent access, the {@link ReadAdvice#RANDOM} read advice is used. + * Defines the format used for storing, reading, and merging vectors on disk. Flat formats enable + * random access to vectors based on their node ID, as recorded in the HNSW graph. To ensure + * consistent access, the {@link ReadAdvice#RANDOM} read advice is used. */ private static final FlatVectorsFormat flatVectorsFormat = - new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer(), ReadAdvice.RANDOM); + new Lucene99FlatVectorsFormat( + FlatVectorScorerUtil.getLucene99FlatVectorsScorer(), ReadAdvice.RANDOM); private final int numMergeWorkers; private final TaskExecutor mergeExec; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java index b8942e5f1471..1eb542ee8848 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java @@ -52,12 +52,13 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat { static final String VECTOR_DATA_EXTENSION = "veq"; /** - * Defines the format used for storing, reading, and merging raw vectors on disk. - * For this format, the {@link ReadAdvice#SEQUENTIAL} read advice is employed, - * as nearest neighbors are retrieved exclusively using a brute-force approach. + * Defines the format used for storing, reading, and merging raw vectors on disk. For this format, + * the {@link ReadAdvice#SEQUENTIAL} read advice is employed, as nearest neighbors are retrieved + * exclusively using a brute-force approach. */ private static final FlatVectorsFormat rawVectorFormat = - new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer(), ReadAdvice.SEQUENTIAL); + new Lucene99FlatVectorsFormat( + FlatVectorScorerUtil.getLucene99FlatVectorsScorer(), ReadAdvice.SEQUENTIAL); /** The minimum confidence interval */ private static final float MINIMUM_CONFIDENCE_INTERVAL = 0.9f; @@ -82,8 +83,8 @@ public class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsFormat { /** Constructs a format using default graph construction parameters */ public Lucene99ScalarQuantizedVectorsFormat() { /** - * For this format, the {@link ReadAdvice#SEQUENTIAL} read advice is employed, - * as nearest neighbors are retrieved exclusively using a brute-force approach. + * For this format, the {@link ReadAdvice#SEQUENTIAL} read advice is employed, as nearest + * neighbors are retrieved exclusively using a brute-force approach. */ this(null, 7, false, ReadAdvice.SEQUENTIAL); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java index cdb3987d4799..f5dd2f24f2b8 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java @@ -65,7 +65,10 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade private final ReadAdvice readAdvice; public Lucene99ScalarQuantizedVectorsReader( - SegmentReadState state, FlatVectorsReader rawVectorsReader, FlatVectorsScorer scorer, ReadAdvice readAdvice) + SegmentReadState state, + FlatVectorsReader rawVectorsReader, + FlatVectorsScorer scorer, + ReadAdvice readAdvice) throws IOException { super(scorer); this.rawVectorsReader = rawVectorsReader; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java index 50fb202df686..50d3984210b3 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java @@ -499,7 +499,8 @@ private ScalarQuantizedCloseableRandomVectorScorerSupplier mergeOneFieldToIndex( IOUtils.close(tempQuantizedVectorData); quantizationDataInput = segmentWriteState.directory.openInput( - tempQuantizedVectorData.getName(), segmentWriteState.context.withReadAdvice(readAdvice)); + tempQuantizedVectorData.getName(), + segmentWriteState.context.withReadAdvice(readAdvice)); quantizedVectorData.copyBytes( quantizationDataInput, quantizationDataInput.length() - CodecUtil.footerLength()); long vectorDataLength = quantizedVectorData.getFilePointer() - vectorDataOffset; diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java index bccbff2d8427..48b070bbb843 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java @@ -65,7 +65,10 @@ public void setUp() throws Exception { } format = new Lucene99ScalarQuantizedVectorsFormat( - confidenceInterval, bits, bits == 4 ? random().nextBoolean() : false, random().nextBoolean() ? ReadAdvice.RANDOM : ReadAdvice.SEQUENTIAL); + confidenceInterval, + bits, + bits == 4 ? random().nextBoolean() : false, + random().nextBoolean() ? ReadAdvice.RANDOM : ReadAdvice.SEQUENTIAL); super.setUp(); } @@ -199,7 +202,8 @@ public void testToString() { new FilterCodec("foo", Codec.getDefault()) { @Override public KnnVectorsFormat knnVectorsFormat() { - return new Lucene99ScalarQuantizedVectorsFormat(0.9f, (byte) 4, false, ReadAdvice.RANDOM); + return new Lucene99ScalarQuantizedVectorsFormat( + 0.9f, (byte) 4, false, ReadAdvice.RANDOM); } }; String expectedPattern = From f4391c987b8164b560f208b7589e27d4116e2486 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 17 Dec 2024 14:23:24 +0000 Subject: [PATCH 4/4] remove unused code --- .../codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java index f5dd2f24f2b8..fe11173a3cd2 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java @@ -62,7 +62,6 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade private final IndexInput quantizedVectorData; private final FlatVectorsReader rawVectorsReader; private final FieldInfos fieldInfos; - private final ReadAdvice readAdvice; public Lucene99ScalarQuantizedVectorsReader( SegmentReadState state, @@ -74,7 +73,6 @@ public Lucene99ScalarQuantizedVectorsReader( this.rawVectorsReader = rawVectorsReader; this.fieldInfos = state.fieldInfos; int versionMeta = -1; - this.readAdvice = readAdvice; String metaFileName = IndexFileNames.segmentFileName( state.segmentInfo.name,