Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add UnwrappingReuseStrategy for AnalyzerWrapper #14154

Merged
4 changes: 4 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ Improvements
mergeFactor segments together when the merge is below the min merge size.
(Adrien Grand)

* GITHUB#14154: Add UnwrappingReuseStrategy for AnalyzerWrapper that consults
the wrapped analyzer's strategy to decide if components can be reused or need
to be updated. (Mayya Sharipova)

Optimizations
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ public void close() {
* TokenFilter} which also serves as the {@link TokenStream} returned by {@link
* Analyzer#tokenStream(String, Reader)}.
*/
public static final class TokenStreamComponents {
public static class TokenStreamComponents {
/** Original source of the tokens. */
protected final Consumer<Reader> source;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
* @since 4.0.0
*/
public abstract class AnalyzerWrapper extends Analyzer {

/**
* Creates a new AnalyzerWrapper with the given reuse strategy.
*
Expand All @@ -53,7 +52,10 @@ public abstract class AnalyzerWrapper extends Analyzer {
* @see #getReuseStrategy()
*/
protected AnalyzerWrapper(ReuseStrategy reuseStrategy) {
super(reuseStrategy);
super(
reuseStrategy instanceof DelegatingAnalyzerWrapper.DelegatingReuseStrategy
? reuseStrategy
: new UnwrappingReuseStrategy(reuseStrategy));
}

/**
Expand Down Expand Up @@ -117,7 +119,10 @@ protected Reader wrapReaderForNormalization(String fieldName, Reader reader) {

@Override
protected final TokenStreamComponents createComponents(String fieldName) {
return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName));
TokenStreamComponents wrappedComponents =
getWrappedAnalyzer(fieldName).createComponents(fieldName);
TokenStreamComponents wrapperComponents = wrapComponents(fieldName, wrappedComponents);
return new TokenStreamComponentsWrapper(wrapperComponents, wrappedComponents);
}

@Override
Expand Down Expand Up @@ -151,4 +156,63 @@ protected final Reader initReaderForNormalization(String fieldName, Reader reade
protected final AttributeFactory attributeFactory(String fieldName) {
return getWrappedAnalyzer(fieldName).attributeFactory(fieldName);
}

/**
* A {@link org.apache.lucene.analysis.Analyzer.ReuseStrategy} that checks the wrapped analyzer's
* strategy for reusability. If the wrapped analyzer's strategy returns null, components need to
* be re-created.
*/
public static final class UnwrappingReuseStrategy extends ReuseStrategy {
private final ReuseStrategy reuseStrategy;

public UnwrappingReuseStrategy(ReuseStrategy reuseStrategy) {
this.reuseStrategy = reuseStrategy;
}

@Override
public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) {
if (analyzer instanceof AnalyzerWrapper wrapper) {
final Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
if (wrappedAnalyzer.getReuseStrategy().getReusableComponents(wrappedAnalyzer, fieldName)
== null) {
return null;
}
}
return reuseStrategy.getReusableComponents(analyzer, fieldName);
}

@Override
public void setReusableComponents(
Analyzer analyzer, String fieldName, TokenStreamComponents components) {
reuseStrategy.setReusableComponents(analyzer, fieldName, components);

if (analyzer instanceof AnalyzerWrapper wrapper) {
assert components instanceof TokenStreamComponentsWrapper;
final TokenStreamComponentsWrapper wrapperComponents =
mayya-sharipova marked this conversation as resolved.
Show resolved Hide resolved
(TokenStreamComponentsWrapper) components;
final Analyzer wrappedAnalyzer = wrapper.getWrappedAnalyzer(fieldName);
wrappedAnalyzer
.getReuseStrategy()
.setReusableComponents(
wrappedAnalyzer, fieldName, wrapperComponents.getWrappedComponents());
}
}
}

/**
* A {@link Analyzer.TokenStreamComponents} that decorates the wrapper with access to the wrapped
* components.
*/
static final class TokenStreamComponentsWrapper extends TokenStreamComponents {
private final TokenStreamComponents wrapped;

TokenStreamComponentsWrapper(TokenStreamComponents wrapper, TokenStreamComponents wrapped) {
super(wrapper.getSource(), wrapper.getTokenStream());
this.wrapped = wrapped;
}

TokenStreamComponents getWrappedComponents() {
return wrapped;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,11 @@ protected final Reader wrapReaderForNormalization(String fieldName, Reader reade
return super.wrapReaderForNormalization(fieldName, reader);
}

private static final class DelegatingReuseStrategy extends ReuseStrategy {
/**
* A {@link org.apache.lucene.analysis.Analyzer.ReuseStrategy} that delegates to the wrapped
* analyzer's strategy for reusability of components.
*/
static final class DelegatingReuseStrategy extends ReuseStrategy {
DelegatingAnalyzerWrapper wrapper;
private final ReuseStrategy fallbackStrategy;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@

package org.apache.lucene.analysis;

import static org.apache.lucene.analysis.Analyzer.PER_FIELD_REUSE_STRATEGY;

import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.analysis.Analyzer.ReuseStrategy;
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
import org.apache.lucene.tests.analysis.CannedTokenStream;
import org.apache.lucene.tests.util.LuceneTestCase;

Expand All @@ -40,7 +44,7 @@ protected TokenStreamComponents createComponents(String fieldName) {
}
};

Analyzer wrapped =
Analyzer wrapper =
new AnalyzerWrapper(analyzer.getReuseStrategy()) {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
Expand All @@ -55,9 +59,73 @@ protected TokenStreamComponents wrapComponents(
}
};

try (TokenStream ts = wrapped.tokenStream("", "text")) {
try (TokenStream ts = wrapper.tokenStream("", "text")) {
assert ts != null;
assertTrue(sourceCalled.get());
}
}

/**
* Test that {@link AnalyzerWrapper.UnwrappingReuseStrategy} consults the wrapped analyzer's reuse
* strategy if components can be reused or need to be updated.
*/
public void testUnwrappingReuseStrategy() {
AtomicBoolean reuse = new AtomicBoolean(true);

final ReuseStrategy wrappedAnalyzerStrategy =
new ReuseStrategy() {
@Override
public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) {
if (reuse.get() == false) {
return null;
} else {
return (TokenStreamComponents) getStoredValue(analyzer);
}
}

@Override
public void setReusableComponents(
Analyzer analyzer, String fieldName, TokenStreamComponents components) {
setStoredValue(analyzer, components);
}
};
Analyzer wrappedAnalyzer =
new Analyzer(wrappedAnalyzerStrategy) {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(r -> {}, new CannedTokenStream());
}
};

AnalyzerWrapper wrapperAnalyzer =
new AnalyzerWrapper(PER_FIELD_REUSE_STRATEGY) {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return wrappedAnalyzer;
}

@Override
protected TokenStreamComponents wrapComponents(
String fieldName, TokenStreamComponents components) {
return new TokenStreamComponents(
components.getSource(), new LowerCaseFilter(components.getTokenStream()));
}
};

TokenStream ts = wrapperAnalyzer.tokenStream("", "text");
TokenStream ts2 = wrapperAnalyzer.tokenStream("", "text");
assertEquals(ts2, ts);

reuse.set(false);
TokenStream ts3 = wrapperAnalyzer.tokenStream("", "text");
assertNotSame(ts3, ts2);
TokenStream ts4 = wrapperAnalyzer.tokenStream("", "text");
assertNotSame(ts4, ts3);

reuse.set(true);
TokenStream ts5 = wrapperAnalyzer.tokenStream("", "text");
assertEquals(ts5, ts4);
TokenStream ts6 = wrapperAnalyzer.tokenStream("", "text");
assertEquals(ts6, ts5);
}
}