-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add UnwrappingReuseStrategy for AnalyzerWrapper #14154
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
|
||
import java.io.Reader; | ||
import org.apache.lucene.util.AttributeFactory; | ||
import org.apache.lucene.util.CloseableThreadLocal; | ||
|
||
/** | ||
* Extension to {@link Analyzer} suitable for Analyzers which wrap other Analyzers. | ||
|
@@ -151,4 +152,57 @@ protected final Reader initReaderForNormalization(String fieldName, Reader reade | |
protected final AttributeFactory attributeFactory(String fieldName) { | ||
return getWrappedAnalyzer(fieldName).attributeFactory(fieldName); | ||
} | ||
|
||
/** | ||
* A {@link org.apache.lucene.analysis.Analyzer.ReuseStrategy} that checks the wrapped analyzer's | ||
* strategy for reusability. If the wrapped analyzer's strategy returns null, components need to | ||
* be re-created. During components creation, this analyzer must store the wrapped analyzer's | ||
* components in {@code wrappedComponents} local thread variable. | ||
*/ | ||
public static final class WrappingReuseStrategy extends ReuseStrategy { | ||
private AnalyzerWrapper wrapper; | ||
private Analyzer wrappedAnalyzer; | ||
private CloseableThreadLocal<TokenStreamComponents> wrappedComponents; | ||
private final ReuseStrategy fallbackStrategy; | ||
|
||
public WrappingReuseStrategy(ReuseStrategy fallbackStrategy) { | ||
this.fallbackStrategy = fallbackStrategy; | ||
} | ||
|
||
public void setUp( | ||
AnalyzerWrapper wrapper, | ||
Analyzer wrappedAnalyzer, | ||
CloseableThreadLocal<TokenStreamComponents> wrappedComponents) { | ||
this.wrapper = wrapper; | ||
this.wrappedAnalyzer = wrappedAnalyzer; | ||
this.wrappedComponents = wrappedComponents; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This bugs me. We allow a reuse strategy to be mutated after construction. It seems ready to cause a bug. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I see that the "wrappedComponents" is key here. its tricky, as the analyzer provides access directly to That seems like a bad API already, but we don't want to change all these long lived interfaces unnecessarily. |
||
|
||
@Override | ||
public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) { | ||
if (analyzer == wrapper) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we really need to know the instance, or can we do |
||
if (wrappedAnalyzer.getReuseStrategy().getReusableComponents(wrappedAnalyzer, fieldName) | ||
== null) { | ||
return null; | ||
} else { | ||
return (TokenStreamComponents) getStoredValue(analyzer); | ||
} | ||
} else { | ||
return fallbackStrategy.getReusableComponents(analyzer, fieldName); | ||
} | ||
} | ||
|
||
@Override | ||
public void setReusableComponents( | ||
Analyzer analyzer, String fieldName, TokenStreamComponents components) { | ||
if (analyzer == wrapper) { | ||
setStoredValue(analyzer, components); | ||
wrappedAnalyzer | ||
.getReuseStrategy() | ||
.setReusableComponents(wrappedAnalyzer, fieldName, wrappedComponents.get()); | ||
} else { | ||
fallbackStrategy.setReusableComponents(analyzer, fieldName, components); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we really need to know the instance, or can we do |
||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ | |
import org.apache.lucene.analysis.AnalyzerWrapper; | ||
import org.apache.lucene.analysis.TokenStreamToAutomaton; | ||
import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter; | ||
import org.apache.lucene.util.CloseableThreadLocal; | ||
|
||
/** | ||
* Wraps an {@link org.apache.lucene.analysis.Analyzer} to provide additional completion-only tuning | ||
|
@@ -66,6 +67,9 @@ public final class CompletionAnalyzer extends AnalyzerWrapper { | |
*/ | ||
private final int maxGraphExpansions; | ||
|
||
private CloseableThreadLocal<TokenStreamComponents> wrappedComponents = | ||
new CloseableThreadLocal<>(); | ||
|
||
/** | ||
* Wraps an analyzer to convert its output token stream to an automaton | ||
* | ||
|
@@ -112,6 +116,25 @@ public CompletionAnalyzer( | |
ConcatenateGraphFilter.DEFAULT_MAX_GRAPH_EXPANSIONS); | ||
} | ||
|
||
/** | ||
* Creates CompletionAnalyzer with the given analyzer, preserving token separation, position | ||
* increments, and using the {@link | ||
* org.apache.lucene.analysis.AnalyzerWrapper.WrappingReuseStrategy} reuse strategy | ||
*/ | ||
public CompletionAnalyzer( | ||
Analyzer analyzer, | ||
boolean preserveSep, | ||
boolean preservePositionIncrements, | ||
ReuseStrategy fallbackStrategy) { | ||
super(new WrappingReuseStrategy(fallbackStrategy)); | ||
// häckidy-hick-hack, because we cannot call super() with a reference to "this": | ||
((WrappingReuseStrategy) getReuseStrategy()).setUp(this, analyzer, wrappedComponents); | ||
this.analyzer = analyzer; | ||
this.preserveSep = preserveSep; | ||
this.preservePositionIncrements = preservePositionIncrements; | ||
this.maxGraphExpansions = ConcatenateGraphFilter.DEFAULT_MAX_GRAPH_EXPANSIONS; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
What if we made the ctor private, removed the setup hack and did it instead within a public static method? |
||
|
||
/** | ||
* Calls {@link #CompletionAnalyzer(org.apache.lucene.analysis.Analyzer, boolean, boolean, int)} | ||
* preserving token separation and position increments | ||
|
@@ -145,9 +168,16 @@ protected Analyzer getWrappedAnalyzer(String fieldName) { | |
return analyzer; | ||
} | ||
|
||
@Override | ||
public void close() { | ||
super.close(); | ||
wrappedComponents.close(); | ||
} | ||
|
||
@Override | ||
protected TokenStreamComponents wrapComponents( | ||
String fieldName, TokenStreamComponents components) { | ||
wrappedComponents.set(components); | ||
CompletionTokenStream tokenStream = | ||
new CompletionTokenStream( | ||
components.getTokenStream(), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems to me that the better name is
UnWrapping
as if we find that the analyzer is a wrapped analyzer, we unwrap it and use the underlying analyzer?