Skip to content

Commit

Permalink
Added more scoring methods; book_author and editor; and constant() me…
Browse files Browse the repository at this point in the history
…thod
  • Loading branch information
romanchyla committed May 18, 2018
1 parent 6820d0e commit 64b0e4e
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.SecondOrderCollector;
import org.apache.lucene.search.SecondOrderCollector.FinalValueType;
import org.apache.lucene.search.SecondOrderCollectorAdsClassicScoringFormula;
Expand Down Expand Up @@ -640,11 +641,11 @@ public Query parse(FunctionQParser fp) throws SyntaxError {
Query innerQuery = fp.parseNestedQuery();

@SuppressWarnings("unchecked")
SolrCacheWrapper<CitationCache<Object, Integer>> referencesWrapper = new SolrCacheWrapper.ReferencesCache(
SolrCacheWrapper<CitationCache<Object, Integer>> referencesWrapper = new SolrCacheWrapper.ReferencesCache(
(CitationCache<Object, Integer>) fp.getReq().getSearcher().getCache("citations-cache"));


return new SecondOrderQuery(innerQuery,
return new SecondOrderQuery(innerQuery,
new SecondOrderCollectorCitesRAM(referencesWrapper), false);
}
});
Expand Down Expand Up @@ -1017,7 +1018,7 @@ public Query parse(FunctionQParser fp) throws SyntaxError {
// so we have to do it ourselves
parsers.put("warm_cache", new AqpSubqueryParserFull() {
@SuppressWarnings("unchecked")
public Query parse(FunctionQParser fp) throws SyntaxError {
public Query parse(FunctionQParser fp) throws SyntaxError {

final SolrQueryRequest req = fp.getReq();
@SuppressWarnings("rawtypes")
Expand All @@ -1028,6 +1029,30 @@ public Query parse(FunctionQParser fp) throws SyntaxError {
return new MatchNoDocsQuery();
}
});

/* @api.doc
*
* def constant(query):
* """
* Applies constant score (that can be set by boost factor)
*
* Example:
*
* ```constant(title:hubble^2)```
*
*
* @since 63.1.0.24
* """
* return "constant(%s)" % (query,)
*
*/
parsers.put("constant", new AqpSubqueryParserFull() {
public Query parse(FunctionQParser fp) throws SyntaxError {
Query innerQuery = fp.parseNestedQuery();

return new ConstantScoreQuery(innerQuery);
}
});

};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,15 @@ else if ("constant_boolean".equals(method)) {
else if ("boolean".equals(method)) {
node.setTag(MultiTermRewriteMethodProcessor.TAG_ID, MultiTermQuery.SCORING_BOOLEAN_REWRITE);
}
else if ("topterms_blended".equals(method)) {
node.setTag(MultiTermRewriteMethodProcessor.TAG_ID, new MultiTermQuery.TopTermsBlendedFreqScoringRewrite(1024));
}
else if ("topterms".equals(method)) {
node.setTag(MultiTermRewriteMethodProcessor.TAG_ID, new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(1024));
}
else if ("topterms_boosted".equals(method)) {
node.setTag(MultiTermRewriteMethodProcessor.TAG_ID, new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(1024));
}
else {
throw new QueryNodeException(new MessageImpl(
QueryParserMessages.PARAMETER_VALUE_NOT_SUPPORTED, "Unknown rewrite method: \"" + method + "\""));
Expand Down
38 changes: 27 additions & 11 deletions contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ public void test() throws Exception {
*/
", \"bibstem\": [\"JNuM\", \"JNuM..455\"]" +
", \"body\": \"Some fulltext hashimoto added\"" +
", \"book_author\": [\"book, author\", \"book, fauthor\"]" +
", \"caption\": [\"caption1 captionFoo\", \"caption2\"]" +
", \"citation\": [\"2014JNuM..455...10C\", \"2014JNuM..455...10D\"]" +
", \"cite_read_boost\": 0.52" +
Expand All @@ -164,18 +165,19 @@ public void test() throws Exception {
", \"comment\": [\"comment1 commentFoo\", \"comment2\"]" +
", \"database\": [\"ASTRONOMY\", \"PHYSICS\"]" +

", \"data\": [\"NED:15\", \"CDS:5\"]" +
", \"data_count\": 20" +
", \"data\": [\"NED:15\", \"CDS:5\"]" +
", \"data_count\": 20" +
// it is solr format for the pubdate, must be in the right format
// we need to add 30 minutes to every day; this allows us to search
// for ranges effectively; thus:
// 2013-08-5 -> 2013-08-05T00:30:00Z
// 2013-08 -> 2013-08-01T00:30:00Z
// 2013 -> 2013-01-01T00:30:00Z
", \"date\": \"2013-08-05T00:30:00Z\"" +
", \"date\": \"2013-08-05T00:30:00Z\"" +
", \"doctype\": \"article\"" +
", \"doctype_facet_hier\": [\"0/Article\", \"1/Article/Book chapter\"]" +
", \"doi\": \"doi:ŽŠČŘĎŤŇ:123456789\"" +
", \"editor\": [\"t' Hooft, van X\"]" +
", \"eid\": \"00001\"" +
", \"email\": [\"-\", \"[email protected]\", \"-\"]" +
// entry_date --> see below
Expand Down Expand Up @@ -345,14 +347,14 @@ public void test() throws Exception {


/*
* caption
*/
assertQ(req("q", "caption:caption1"),
"//*[@numFound='1']",
"//doc/int[@name='recid'][.='100']");
assertQ(req("q", "caption:captionfoo"),
"//*[@numFound='1']",
"//doc/int[@name='recid'][.='100']");
* caption
*/
assertQ(req("q", "caption:caption1"),
"//*[@numFound='1']",
"//doc/int[@name='recid'][.='100']");
assertQ(req("q", "caption:captionfoo"),
"//*[@numFound='1']",
"//doc/int[@name='recid'][.='100']");


/*
Expand Down Expand Up @@ -399,6 +401,13 @@ public void test() throws Exception {
"<str>Anders, John Michael</str>" +
"<str>Einstein, A</str></arr>");

/*
* book_author
*/
assertQ(req("q", "book_author:\"book, fauthor\""),
"//*[@numFound='1']",
"//doc/int[@name='recid'][.='100']");

/*
* author_count
*/
Expand Down Expand Up @@ -477,6 +486,13 @@ public void test() throws Exception {
);


/*
* editor
*/
assertQ(req("q", "editor:\"'t hooft, v x\""),
"//*[@numFound='1']",
"//doc/int[@name='recid'][.='100']");


/*
* email
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.apache.lucene.queryparser.flexible.aqp.TestAqpAdsabs;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
Expand Down Expand Up @@ -319,6 +320,12 @@ public void testUnfieldedSearch() throws Exception {

public void testSpecialCases() throws Exception {

// constant() score
assertQueryEquals(req("defType", "aqp", "q", "constant(title:foo)"),
"ConstantScore(title:foo)", ConstantScoreQuery.class);
assertQueryEquals(req("defType", "aqp", "q", "constant(title:foo^2)"),
"ConstantScore((title:foo)^2.0)", ConstantScoreQuery.class);

// allow to set scoring method for a given field
BooleanQuery q = (BooleanQuery) assertQueryEquals(req("defType", "aqp",
"q", "author:riess", "aqp.qprefix.scoring.author", "boolean"),
Expand All @@ -338,6 +345,12 @@ public void testSpecialCases() throws Exception {
BooleanQuery.class);
assertEquals(((PrefixQuery) q.clauses().get(1).getQuery()).getRewriteMethod(), MultiTermQuery.CONSTANT_SCORE_REWRITE);


q = (BooleanQuery) assertQueryEquals(req("defType", "aqp",
"q", "author:riess", "aqp.qprefix.scoring.author", "topterms"),
"author:riess, author:riess,*",
BooleanQuery.class);
assertEquals(((PrefixQuery) q.clauses().get(1).getQuery()).getRewriteMethod().getClass(), MultiTermQuery.TopTermsScoringBooleanQueryRewrite.class);

//verification for https://github.com/romanchyla/montysolr/issues/45
// expansion of synonyms inside a virtual field together with nested boolean query
Expand Down Expand Up @@ -831,7 +844,8 @@ public void testSpecialCases() throws Exception {
"SecondOrderQuery(title:foo, collector=SecondOrderCollectorAdsClassicScoringFormula(cache=citations-cache, boost=float[] cite_read_boost, lucene=0.5, adsPart=0.5))", SecondOrderQuery.class);
assertQueryEquals(req("defType", "aqp", "q", "cr(title:foo, 0.4)"),
"SecondOrderQuery(title:foo, collector=SecondOrderCollectorAdsClassicScoringFormula(cache=citations-cache, boost=float[] cite_read_boost, lucene=0.4, adsPart=0.6))", SecondOrderQuery.class);



}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1159,7 +1159,13 @@

<field name="first_author_norm" type="normalized_text_ascii_notokenization"
indexed="true" stored="true" />


<field name="book_author" type="author" indexed="true" stored="true"
multiValued="true" omitNorms="true" useDocValuesAsStored="false"/>

<field name="editor" type="author" indexed="true" stored="true"
multiValued="true" omitNorms="true" useDocValuesAsStored="false"/>

<field name="aff" type="affiliation_text" indexed="true" stored="true"
multiValued="true" omitNorms="true"/>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -696,9 +696,9 @@
<str name="aqp.timestampFields">indexstamp,update_timestamp,entry_date,metadata_ctime,metadata_mtime,fulltext_ctime,fulltext_mtime,nonbib_ctime,nonbib_mtime,metrics_ctime,metrics_mtime,orcid_ctime,orcid_mtime</str>
<str name="aqp.floatFields">cite_read_boost,citation_count_norm</str>
<str name="aqp.intFields">recid,pubdate_sort,citation_count,classic_factor,simbid,read_count,author_count,page_count,data_count</str>
<str name="aqp.authorFields">author,first_author</str>
<str name="aqp.authorFields">author,first_author,book_author,editor</str>
<str name="aqp.humanized.dates">pubdate:date,entdate:entry_date</str>
<str name="aqp.force.fuzzy.phrases">author,first_author</str>
<str name="aqp.force.fuzzy.phrases">author,first_author,book_author,editor</str>
</lst>
</queryParser>

Expand Down Expand Up @@ -726,9 +726,9 @@
<str name="aqp.timestampFields">indexstamp,update_timestamp,entry_date,metadata_ctime,metadata_mtime,fulltext_ctime,fulltext_mtime,nonbib_ctime,nonbib_mtime,metrics_ctime,metrics_mtime,orcid_ctime,orcid_mtime</str>
<str name="aqp.floatFields">cite_read_boost,citation_count_norm</str>
<str name="aqp.intFields">recid,pubdate_sort,citation_count,classic_factor,simbid,read_count,author_count,page_count,data_count</str>
<str name="aqp.authorFields">author,first_author</str>
<str name="aqp.authorFields">author,first_author,book_author,editor</str>
<str name="aqp.humanized.dates">pubdate:date,entdate:entry_date:timestamp</str>
<str name="aqp.force.fuzzy.phrases">author,first_author</str>
<str name="aqp.force.fuzzy.phrases">author,first_author,book_author,editor</str>
</lst>
</queryParser>

Expand Down

0 comments on commit 64b0e4e

Please sign in to comment.