Skip to content

Commit a3a00f3

Browse files
authored
Remove LeafSimScorer abstraction. (#13957)
`LeafSimScorer` is a specialization of a `SimScorer` for a given segment. It doesn't add much value, but benchmarks suggest that it adds measurable overhead to queries sorted by score.
1 parent b12ee52 commit a3a00f3

23 files changed

+204
-205
lines changed

lucene/CHANGES.txt

+3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ API Changes
99
---------------------
1010
* GITHUB#11023: Removing deprecated parameters from CheckIndex. (Jakub Slowinski)
1111

12+
* GITHUB#13957: Removed LeafSimScorer class, to save its overhead. Scorers now
13+
compute scores directly from a SimScorer, postings and norms. (Adrien Grand)
14+
1215
New Features
1316
---------------------
1417
(No changes)

lucene/core/src/java/org/apache/lucene/document/FeatureQuery.java

+1-5
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import org.apache.lucene.index.TermsEnum;
2828
import org.apache.lucene.search.Explanation;
2929
import org.apache.lucene.search.IndexSearcher;
30-
import org.apache.lucene.search.LeafSimScorer;
3130
import org.apache.lucene.search.Query;
3231
import org.apache.lucene.search.QueryVisitor;
3332
import org.apache.lucene.search.ScoreMode;
@@ -120,7 +119,6 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
120119

121120
@Override
122121
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
123-
final Weight thisWeight = this;
124122
Terms terms = Terms.getTerms(context.reader(), fieldName);
125123
TermsEnum termsEnum = terms.iterator();
126124
if (termsEnum.seekExact(new BytesRef(featureName)) == false) {
@@ -135,10 +133,8 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
135133
@Override
136134
public Scorer get(long leadCost) throws IOException {
137135
final SimScorer scorer = function.scorer(boost);
138-
final LeafSimScorer simScorer =
139-
new LeafSimScorer(scorer, context.reader(), fieldName, false);
140136
final ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
141-
return new TermScorer(thisWeight, impacts, simScorer, topLevelScoringClause);
137+
return new TermScorer(impacts, scorer, null, topLevelScoringClause);
142138
}
143139

144140
@Override

lucene/core/src/java/org/apache/lucene/search/LeafSimScorer.java

-72
This file was deleted.

lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java

+17-4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
package org.apache.lucene.search;
1919

2020
import java.io.IOException;
21+
import org.apache.lucene.index.NumericDocValues;
22+
import org.apache.lucene.search.similarities.Similarity.SimScorer;
2123

2224
class PhraseScorer extends Scorer {
2325

@@ -26,16 +28,19 @@ class PhraseScorer extends Scorer {
2628
final MaxScoreCache maxScoreCache;
2729
final PhraseMatcher matcher;
2830
final ScoreMode scoreMode;
29-
private final LeafSimScorer simScorer;
31+
private final SimScorer simScorer;
32+
private final NumericDocValues norms;
3033
final float matchCost;
3134

3235
private float minCompetitiveScore = 0;
3336
private float freq = 0;
3437

35-
PhraseScorer(PhraseMatcher matcher, ScoreMode scoreMode, LeafSimScorer simScorer) {
38+
PhraseScorer(
39+
PhraseMatcher matcher, ScoreMode scoreMode, SimScorer simScorer, NumericDocValues norms) {
3640
this.matcher = matcher;
3741
this.scoreMode = scoreMode;
3842
this.simScorer = simScorer;
43+
this.norms = norms;
3944
this.matchCost = matcher.getMatchCost();
4045
this.approximation = matcher.approximation();
4146
this.impactsApproximation = matcher.impactsApproximation();
@@ -50,7 +55,11 @@ public boolean matches() throws IOException {
5055
matcher.reset();
5156
if (scoreMode == ScoreMode.TOP_SCORES && minCompetitiveScore > 0) {
5257
float maxFreq = matcher.maxFreq();
53-
if (simScorer.score(docID(), maxFreq) < minCompetitiveScore) {
58+
long norm = 1L;
59+
if (norms != null && norms.advanceExact(docID())) {
60+
norm = norms.longValue();
61+
}
62+
if (simScorer.score(maxFreq, norm) < minCompetitiveScore) {
5463
// The maximum score we could get is less than the min competitive score
5564
return false;
5665
}
@@ -79,7 +88,11 @@ public float score() throws IOException {
7988
freq += matcher.sloppyWeight();
8089
}
8190
}
82-
return simScorer.score(docID(), freq);
91+
long norm = 1L;
92+
if (norms != null && norms.advanceExact(docID())) {
93+
norm = norms.longValue();
94+
}
95+
return simScorer.score(freq, norm);
8396
}
8497

8598
@Override

lucene/core/src/java/org/apache/lucene/search/PhraseWeight.java

+9-6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import java.io.IOException;
2121
import org.apache.lucene.index.LeafReaderContext;
22+
import org.apache.lucene.index.NumericDocValues;
2223
import org.apache.lucene.search.similarities.Similarity;
2324
import org.apache.lucene.search.similarities.Similarity.SimScorer;
2425

@@ -63,9 +64,8 @@ protected abstract PhraseMatcher getPhraseMatcher(
6364
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
6465
PhraseMatcher matcher = getPhraseMatcher(context, stats, false);
6566
if (matcher == null) return null;
66-
LeafSimScorer simScorer =
67-
new LeafSimScorer(stats, context.reader(), field, scoreMode.needsScores());
68-
final var scorer = new PhraseScorer(matcher, scoreMode, simScorer);
67+
NumericDocValues norms = scoreMode.needsScores() ? context.reader().getNormValues(field) : null;
68+
final var scorer = new PhraseScorer(matcher, scoreMode, stats, norms);
6969
return new DefaultScorerSupplier(scorer);
7070
}
7171

@@ -83,10 +83,13 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
8383
while (matcher.nextMatch()) {
8484
freq += matcher.sloppyWeight();
8585
}
86-
LeafSimScorer docScorer =
87-
new LeafSimScorer(stats, context.reader(), field, scoreMode.needsScores());
8886
Explanation freqExplanation = Explanation.match(freq, "phraseFreq=" + freq);
89-
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
87+
NumericDocValues norms = scoreMode.needsScores() ? context.reader().getNormValues(field) : null;
88+
long norm = 1L;
89+
if (norms != null && norms.advanceExact(doc)) {
90+
norm = norms.longValue();
91+
}
92+
Explanation scoreExplanation = stats.explain(freqExplanation, norm);
9093
return Explanation.match(
9194
scoreExplanation.getValue(),
9295
"weight("

lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java

+37-19
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.apache.lucene.index.ImpactsEnum;
3131
import org.apache.lucene.index.ImpactsSource;
3232
import org.apache.lucene.index.LeafReaderContext;
33+
import org.apache.lucene.index.NumericDocValues;
3334
import org.apache.lucene.index.PostingsEnum;
3435
import org.apache.lucene.index.SlowImpactsEnum;
3536
import org.apache.lucene.index.Term;
@@ -38,6 +39,7 @@
3839
import org.apache.lucene.index.Terms;
3940
import org.apache.lucene.index.TermsEnum;
4041
import org.apache.lucene.search.similarities.Similarity;
42+
import org.apache.lucene.search.similarities.Similarity.SimScorer;
4143
import org.apache.lucene.util.BytesRef;
4244
import org.apache.lucene.util.IOSupplier;
4345
import org.apache.lucene.util.PriorityQueue;
@@ -259,9 +261,13 @@ public Explanation explain(LeafReaderContext context, int doc) throws IOExceptio
259261
assert scorer instanceof TermScorer;
260262
freq = ((TermScorer) scorer).freq();
261263
}
262-
LeafSimScorer docScorer = new LeafSimScorer(simWeight, context.reader(), field, true);
263264
Explanation freqExplanation = Explanation.match(freq, "termFreq=" + freq);
264-
Explanation scoreExplanation = docScorer.explain(doc, freqExplanation);
265+
NumericDocValues norms = context.reader().getNormValues(field);
266+
long norm = 1L;
267+
if (norms != null && norms.advanceExact(doc)) {
268+
norm = norms.longValue();
269+
}
270+
Explanation scoreExplanation = simWeight.explain(freqExplanation, norm);
265271
return Explanation.match(
266272
scoreExplanation.getValue(),
267273
"weight("
@@ -334,27 +340,27 @@ public Scorer get(long leadCost) throws IOException {
334340
return new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty());
335341
}
336342

337-
LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), field, true);
343+
NumericDocValues norms = context.reader().getNormValues(field);
338344

339345
// we must optimize this case (term not in segment), disjunctions require >= 2 subs
340346
if (iterators.size() == 1) {
341347
final TermScorer scorer;
342348
if (scoreMode == ScoreMode.TOP_SCORES) {
343-
scorer = new TermScorer(impacts.get(0), simScorer);
349+
scorer = new TermScorer(impacts.get(0), simWeight, norms);
344350
} else {
345-
scorer = new TermScorer(iterators.get(0), simScorer);
351+
scorer = new TermScorer(iterators.get(0), simWeight, norms);
346352
}
347353
float boost = termBoosts.get(0);
348354
return scoreMode == ScoreMode.COMPLETE_NO_SCORES || boost == 1f
349355
? scorer
350-
: new FreqBoostTermScorer(boost, scorer, simScorer);
356+
: new FreqBoostTermScorer(boost, scorer, simWeight, norms);
351357
} else {
352358

353359
// we use termscorers + disjunction as an impl detail
354360
DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size());
355361
for (int i = 0; i < iterators.size(); i++) {
356362
PostingsEnum postings = iterators.get(i);
357-
final TermScorer termScorer = new TermScorer(postings, simScorer);
363+
final TermScorer termScorer = new TermScorer(postings, simWeight, norms);
358364
float boost = termBoosts.get(i);
359365
final DisiWrapperFreq wrapper = new DisiWrapperFreq(termScorer, boost);
360366
queue.add(wrapper);
@@ -368,8 +374,7 @@ public Scorer get(long leadCost) throws IOException {
368374
boosts[i] = termBoosts.get(i);
369375
}
370376
ImpactsSource impactsSource = mergeImpacts(impacts.toArray(new ImpactsEnum[0]), boosts);
371-
MaxScoreCache maxScoreCache =
372-
new MaxScoreCache(impactsSource, simScorer.getSimScorer());
377+
MaxScoreCache maxScoreCache = new MaxScoreCache(impactsSource, simWeight);
373378
ImpactsDISI impactsDisi = new ImpactsDISI(iterator, maxScoreCache);
374379

375380
if (scoreMode == ScoreMode.TOP_SCORES) {
@@ -379,7 +384,7 @@ public Scorer get(long leadCost) throws IOException {
379384
iterator = impactsDisi;
380385
}
381386

382-
return new SynonymScorer(queue, iterator, impactsDisi, simScorer);
387+
return new SynonymScorer(queue, iterator, impactsDisi, simWeight, norms);
383388
}
384389
}
385390

@@ -575,18 +580,21 @@ private static class SynonymScorer extends Scorer {
575580
private final DocIdSetIterator iterator;
576581
private final MaxScoreCache maxScoreCache;
577582
private final ImpactsDISI impactsDisi;
578-
private final LeafSimScorer simScorer;
583+
private final SimScorer scorer;
584+
private final NumericDocValues norms;
579585

580586
SynonymScorer(
581587
DisiPriorityQueue queue,
582588
DocIdSetIterator iterator,
583589
ImpactsDISI impactsDisi,
584-
LeafSimScorer simScorer) {
590+
SimScorer scorer,
591+
NumericDocValues norms) {
585592
this.queue = queue;
586593
this.iterator = iterator;
587594
this.maxScoreCache = impactsDisi.getMaxScoreCache();
588595
this.impactsDisi = impactsDisi;
589-
this.simScorer = simScorer;
596+
this.scorer = scorer;
597+
this.norms = norms;
590598
}
591599

592600
@Override
@@ -605,7 +613,11 @@ float freq() throws IOException {
605613

606614
@Override
607615
public float score() throws IOException {
608-
return simScorer.score(iterator.docID(), freq());
616+
long norm = 1L;
617+
if (norms != null && norms.advanceExact(iterator.docID())) {
618+
norm = norms.longValue();
619+
}
620+
return scorer.score(freq(), norm);
609621
}
610622

611623
@Override
@@ -647,17 +659,20 @@ float freq() throws IOException {
647659
private static class FreqBoostTermScorer extends FilterScorer {
648660
final float boost;
649661
final TermScorer in;
650-
final LeafSimScorer docScorer;
662+
final SimScorer scorer;
663+
final NumericDocValues norms;
651664

652-
public FreqBoostTermScorer(float boost, TermScorer in, LeafSimScorer docScorer) {
665+
public FreqBoostTermScorer(
666+
float boost, TermScorer in, SimScorer scorer, NumericDocValues norms) {
653667
super(in);
654668
if (Float.isNaN(boost) || Float.compare(boost, 0f) < 0 || Float.compare(boost, 1f) > 0) {
655669
throw new IllegalArgumentException(
656670
"boost must be a positive float between 0 (exclusive) and 1 (inclusive)");
657671
}
658672
this.boost = boost;
659673
this.in = in;
660-
this.docScorer = docScorer;
674+
this.scorer = scorer;
675+
this.norms = norms;
661676
}
662677

663678
float freq() throws IOException {
@@ -666,8 +681,11 @@ float freq() throws IOException {
666681

667682
@Override
668683
public float score() throws IOException {
669-
assert docID() != DocIdSetIterator.NO_MORE_DOCS;
670-
return docScorer.score(in.docID(), freq());
684+
long norm = 1L;
685+
if (norms != null && norms.advanceExact(in.docID())) {
686+
norm = norms.longValue();
687+
}
688+
return scorer.score(freq(), norm);
671689
}
672690

673691
@Override

0 commit comments

Comments
 (0)