Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add UnwrappingReuseStrategy for AnalyzerWrapper #14154

Merged
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ Improvements
BulkAdder#add(IntsRef) method. They should provide better performance due to less virtual method calls and
more efficient bulk processing. (Ignacio Vera)

* GITHUB#14154: Add UnwrappingReuseStrategy for AnalyzerWrapper that consults the wrapped analyzer's strategy
to decide if components can be reused or need to be updated. (Mayya Sharipova)

Optimizations
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ public QueryAutoStopWordAnalyzer(
public QueryAutoStopWordAnalyzer(
Analyzer delegate, IndexReader indexReader, Collection<String> fields, int maxDocFreq)
throws IOException {
super(delegate.getReuseStrategy());
super(PER_FIELD_REUSE_STRATEGY);
mayya-sharipova marked this conversation as resolved.
Show resolved Hide resolved
this.delegate = delegate;

for (String field : fields) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
package org.apache.lucene.analysis;

import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.util.AttributeFactory;

/**
Expand All @@ -41,6 +43,7 @@
* @since 4.0.0
*/
public abstract class AnalyzerWrapper extends Analyzer {
final Map<String, TokenStreamComponents> wrappedComponentsPerField = new HashMap<>();

/**
* Creates a new AnalyzerWrapper with the given reuse strategy.
Expand All @@ -53,7 +56,10 @@ public abstract class AnalyzerWrapper extends Analyzer {
* @see #getReuseStrategy()
*/
protected AnalyzerWrapper(ReuseStrategy reuseStrategy) {
super(reuseStrategy);
super(
reuseStrategy instanceof DelegatingAnalyzerWrapper.DelegatingReuseStrategy
? reuseStrategy
: new UnwrappingReuseStrategy(reuseStrategy));
}

/**
Expand Down Expand Up @@ -117,7 +123,10 @@ protected Reader wrapReaderForNormalization(String fieldName, Reader reader) {

@Override
protected final TokenStreamComponents createComponents(String fieldName) {
return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName));
TokenStreamComponents wrappedComponents =
getWrappedAnalyzer(fieldName).createComponents(fieldName);
wrappedComponentsPerField.put(fieldName, wrappedComponents);
return wrapComponents(fieldName, wrappedComponents);
}

@Override
Expand Down Expand Up @@ -151,4 +160,46 @@ protected final Reader initReaderForNormalization(String fieldName, Reader reade
protected final AttributeFactory attributeFactory(String fieldName) {
return getWrappedAnalyzer(fieldName).attributeFactory(fieldName);
}

public TokenStreamComponents getWrappedComponents(String fieldName) {
return wrappedComponentsPerField.get(fieldName);
}

/**
* A {@link org.apache.lucene.analysis.Analyzer.ReuseStrategy} that checks the wrapped analyzer's
* strategy for reusability. If the wrapped analyzer's strategy returns null, components need to
* be re-created.
*/
public static final class UnwrappingReuseStrategy extends ReuseStrategy {
private final ReuseStrategy reuseStrategy;

public UnwrappingReuseStrategy(ReuseStrategy reuseStrategy) {
this.reuseStrategy = reuseStrategy;
}

@Override
public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) {
if (analyzer instanceof AnalyzerWrapper wrapper) {
final Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
if (wrappedAnalyzer.getReuseStrategy().getReusableComponents(wrappedAnalyzer, fieldName)
== null) {
return null;
}
}
return reuseStrategy.getReusableComponents(analyzer, fieldName);
}

@Override
public void setReusableComponents(
Analyzer analyzer, String fieldName, TokenStreamComponents components) {
reuseStrategy.setReusableComponents(analyzer, fieldName, components);
if (analyzer instanceof AnalyzerWrapper wrapper) {
final Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
wrappedAnalyzer
.getReuseStrategy()
.setReusableComponents(
wrappedAnalyzer, fieldName, wrapper.getWrappedComponents(fieldName));
}
}
mayya-sharipova marked this conversation as resolved.
Show resolved Hide resolved
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,11 @@ protected final Reader wrapReaderForNormalization(String fieldName, Reader reade
return super.wrapReaderForNormalization(fieldName, reader);
}

private static final class DelegatingReuseStrategy extends ReuseStrategy {
/**
* A {@link org.apache.lucene.analysis.Analyzer.ReuseStrategy} that delegates to the wrapped
* analyzer's strategy for reusability of components.
*/
static final class DelegatingReuseStrategy extends ReuseStrategy {
DelegatingAnalyzerWrapper wrapper;
private final ReuseStrategy fallbackStrategy;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@

package org.apache.lucene.analysis;

import static org.apache.lucene.analysis.Analyzer.PER_FIELD_REUSE_STRATEGY;

import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.analysis.Analyzer.ReuseStrategy;
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
import org.apache.lucene.tests.analysis.CannedTokenStream;
import org.apache.lucene.tests.util.LuceneTestCase;

Expand All @@ -40,7 +44,7 @@ protected TokenStreamComponents createComponents(String fieldName) {
}
};

Analyzer wrapped =
Analyzer wrapper =
new AnalyzerWrapper(analyzer.getReuseStrategy()) {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
Expand All @@ -55,9 +59,73 @@ protected TokenStreamComponents wrapComponents(
}
};

try (TokenStream ts = wrapped.tokenStream("", "text")) {
try (TokenStream ts = wrapper.tokenStream("", "text")) {
assert ts != null;
assertTrue(sourceCalled.get());
}
}

/**
* Test that {@link AnalyzerWrapper.UnwrappingReuseStrategy} consults the wrapped analyzer's reuse
* strategy if components can be reused or need to be updated.
*/
public void testUnwrappingReuseStrategy() {
AtomicBoolean reuse = new AtomicBoolean(true);

final ReuseStrategy wrappedAnalyzerStrategy =
new ReuseStrategy() {
@Override
public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) {
if (reuse.get() == false) {
return null;
} else {
return (TokenStreamComponents) getStoredValue(analyzer);
}
}

@Override
public void setReusableComponents(
Analyzer analyzer, String fieldName, TokenStreamComponents components) {
setStoredValue(analyzer, components);
}
};
Analyzer wrappedAnalyzer =
new Analyzer(wrappedAnalyzerStrategy) {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(r -> {}, new CannedTokenStream());
}
};

AnalyzerWrapper wrapperAnalyzer =
new AnalyzerWrapper(PER_FIELD_REUSE_STRATEGY) {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return wrappedAnalyzer;
}

@Override
protected TokenStreamComponents wrapComponents(
String fieldName, TokenStreamComponents components) {
return new TokenStreamComponents(
components.getSource(), new LowerCaseFilter(components.getTokenStream()));
}
};

TokenStream ts = wrapperAnalyzer.tokenStream("", "text");
TokenStream ts2 = wrapperAnalyzer.tokenStream("", "text");
assertEquals(ts2, ts);

reuse.set(false);
TokenStream ts3 = wrapperAnalyzer.tokenStream("", "text");
assertNotSame(ts3, ts2);
TokenStream ts4 = wrapperAnalyzer.tokenStream("", "text");
assertNotSame(ts4, ts3);

reuse.set(true);
TokenStream ts5 = wrapperAnalyzer.tokenStream("", "text");
assertEquals(ts5, ts4);
TokenStream ts6 = wrapperAnalyzer.tokenStream("", "text");
assertEquals(ts6, ts5);
}
}
Loading