diff --git a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java index 8a0d3b65aea..49a35b75f15 100644 --- a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java +++ b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.HashSet; +import java.util.Random; import java.util.Set; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.document.Document; @@ -40,7 +41,9 @@ import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.QueryTimeout; +import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.StoredFields; import org.apache.lucene.index.Term; import org.apache.lucene.index.VectorEncoding; @@ -481,6 +484,62 @@ public void testSkewedIndex() throws IOException { } } + /** Tests with random vectors, number of documents, etc. Uses RandomIndexWriter. */ + public void testRandomConsistencySingleThreaded() throws IOException { + assertRandomConsistency(false); + } + + @AwaitsFix(bugUrl = "https://github.com/apache/lucene/issues/14180") + public void testRandomConsistencyMultiThreaded() throws IOException { + assertRandomConsistency(true); + } + + private void assertRandomConsistency(boolean multiThreaded) throws IOException { + int numDocs = 100; + int dimension = 4; + int numIters = 10; + boolean everyDocHasAVector = random().nextBoolean(); + Random r = random(); + try (Directory d = newDirectoryForTest()) { + // To ensure consistency between seeded runs, remove some randomness + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + iwc.setMergeScheduler(new SerialMergeScheduler()); + iwc.setMergePolicy(NoMergePolicy.INSTANCE); + iwc.setMaxBufferedDocs(numDocs); + iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); + try (IndexWriter w = new IndexWriter(d, iwc)) { + for (int i = 0; i < numDocs; i++) { + Document doc = new Document(); + if (everyDocHasAVector || random().nextInt(10) != 2) { + doc.add(getKnnVectorField("field", randomVector(dimension))); + } + w.addDocument(doc); + if (r.nextBoolean() && i % 50 == 0) { + w.flush(); + } + } + } + try (IndexReader reader = DirectoryReader.open(d)) { + IndexSearcher searcher = newSearcher(reader, true, true, multiThreaded); + // first get the initial set of docs, and we expect all future queries to be exactly the + // same + int k = random().nextInt(80) + 1; + AbstractKnnVectorQuery query = getKnnVectorQuery("field", randomVector(dimension), k); + int n = random().nextInt(100) + 1; + TopDocs expectedResults = searcher.search(query, n); + for (int i = 0; i < numIters; i++) { + TopDocs results = searcher.search(query, n); + assertEquals(expectedResults.totalHits.value(), results.totalHits.value()); + assertEquals(expectedResults.scoreDocs.length, results.scoreDocs.length); + for (int j = 0; j < results.scoreDocs.length; j++) { + assertEquals(expectedResults.scoreDocs[j].doc, results.scoreDocs[j].doc); + assertEquals(expectedResults.scoreDocs[j].score, results.scoreDocs[j].score, EPSILON); + } + } + } + } + } + /** Tests with random vectors, number of documents, etc. Uses RandomIndexWriter. */ public void testRandom() throws IOException { int numDocs = atLeast(100);