From 8667bf6d678d64bdcf8dfe94dd0678c37bc83746 Mon Sep 17 00:00:00 2001 From: Roman Chyla Date: Wed, 28 Aug 2019 19:17:21 -0400 Subject: [PATCH] Made the query params that support better scoring default --- .../builders/AqpAdsabsSubQueryProvider.java | 18 ++++++++++++------ ...AqpAdsabsFieldNodePreAnalysisProcessor.java | 2 +- .../src/test/org/adsabs/TestAdsAllFields.java | 5 ++++- .../TestAdsabsTypeFulltextParsing.java | 8 ++++++-- .../solr/search/TestAqpAdsabsSolrSearch.java | 4 ++++ .../solr/collection1/conf/solrconfig.xml | 16 ++++++++++++++-- 6 files changed, 41 insertions(+), 12 deletions(-) diff --git a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/builders/AqpAdsabsSubQueryProvider.java b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/builders/AqpAdsabsSubQueryProvider.java index 606b8eff3..168b938f0 100644 --- a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/builders/AqpAdsabsSubQueryProvider.java +++ b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/builders/AqpAdsabsSubQueryProvider.java @@ -764,12 +764,15 @@ public Query parse(FunctionQParser fp) throws SyntaxError { LuceneCacheWrapper boostWrapper = getLuceneCache(fp, "cite_read_boost"); - SecondOrderQuery outerQuery = new SecondOrderQuery( // references + SecondOrderQuery outerQuery = + new SecondOrderQuery( // references new SecondOrderQuery( // topn - new SecondOrderQuery(innerQuery, // classic_relevance - new SecondOrderCollectorAdsClassicScoringFormula(referencesWrapper, boostWrapper)), + //new SecondOrderQuery(innerQuery, // classic_relevance + // new SecondOrderCollectorAdsClassicScoringFormula(referencesWrapper, boostWrapper)), + innerQuery, new SecondOrderCollectorTopN(200)), new SecondOrderCollectorCitesRAM(referencesWrapper)); + outerQuery.getcollector().setFinalValueType(FinalValueType.ABS_COUNT_NORM); return outerQuery; }; @@ -841,12 +844,15 @@ public Query parse(FunctionQParser fp) throws SyntaxError { LuceneCacheWrapper boostWrapper = getLuceneCache(fp, "cite_read_boost"); - SecondOrderQuery outerQuery = new SecondOrderQuery( // citations + SecondOrderQuery outerQuery = + new SecondOrderQuery( // citations new SecondOrderQuery( // topn - new SecondOrderQuery(innerQuery, // classic_relevance - new SecondOrderCollectorAdsClassicScoringFormula(citationsWrapper, boostWrapper)), + innerQuery, + //new SecondOrderQuery(innerQuery, // classic_relevance + // new SecondOrderCollectorAdsClassicScoringFormula(citationsWrapper, boostWrapper)), new SecondOrderCollectorTopN(200)), new SecondOrderCollectorCitedBy(citationsWrapper)); + outerQuery.getcollector().setFinalValueType(FinalValueType.ABS_COUNT); return outerQuery; }; diff --git a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsFieldNodePreAnalysisProcessor.java b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsFieldNodePreAnalysisProcessor.java index f65acfc97..fd23bb749 100644 --- a/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsFieldNodePreAnalysisProcessor.java +++ b/contrib/adsabs/src/java/org/apache/lucene/queryparser/flexible/aqp/processors/AqpAdsabsFieldNodePreAnalysisProcessor.java @@ -116,7 +116,7 @@ protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException { } Map statFields = getStaticFields(); - if (statFields.containsKey(field)) { + if (statFields.containsKey(field) && node.getParent() != null && !(node.getParent() instanceof TermRangeQueryNode)) { node = new AqpConstantQueryNode(node); Float boost = statFields.get(field); if (boost != null && boost != 1.0f) { diff --git a/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java b/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java index 247c5edb4..f7ef8f494 100644 --- a/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java +++ b/contrib/adsabs/src/test/org/adsabs/TestAdsAllFields.java @@ -410,19 +410,22 @@ public void test() throws Exception { */ assertQ(req("q", "author:\"Einstein, A\"", "aqp.constant_scoring", "author^13 title^12", + "aqp.classic_scoring.modifier", "0.48", "fl", "recid,score"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']", - "//doc/float[@name='score'][.='13.0']" + "//doc/float[@name='score'][.='13.0']" // 13.00 * (cite_read_boost + aqp.classic_scoring.modifier) ); assertQ(req("q", "author:\"Einstein, A\" AND author:\"Anders\"", "aqp.constant_scoring", "author^13", + "aqp.classic_scoring.modifier", "0.48", "fl", "recid,score"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']", "//doc/float[@name='score'][.='26.0']"); assertQ(req("q", "author:\"Einstein, A\" OR author:\"Anders\"", "aqp.constant_scoring", "author^13", + "aqp.classic_scoring.modifier", "0.48", "fl", "recid,score"), "//*[@numFound='1']", "//doc/int[@name='recid'][.='100']", diff --git a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java index fb0e5172a..b56757728 100644 --- a/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java +++ b/contrib/adsabs/src/test/org/apache/solr/analysis/TestAdsabsTypeFulltextParsing.java @@ -1222,12 +1222,16 @@ public void testOtherCases() throws Exception { assertQ(req("q", "title:\"GBT Survey of 50 Faint Fermi\"~2"), "//*[@numFound>='4']"); - assertQ(req("q", "title:\"A 350-MHz GBT Survey of 50 Faint Fermi γ-ray Sources for Radio Millisecond Pulsars\""), + + assertQ(req("q", "title:\"A 350-MHz GBT Survey of 50 Faint Fermi γ-ray Sources for Radio Millisecond Pulsars\"", + "indent", "true", + "debugQuery", "true"), "//*[@numFound='4']", "//doc/str[@name='id'][.='400']", "//doc/str[@name='id'][.='401']", "//doc/str[@name='id'][.='402']", - "//doc/str[@name='id'][.='403']"); + "//doc/str[@name='id'][.='403']" + ); assertQ(req("q", "title:\"A 350-MHz GBT Survey of 50 Faint Fermi γ ray Sources for Radio Millisecond Pulsars\""), "//*[@numFound='4']", "//doc/str[@name='id'][.='400']", diff --git a/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java b/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java index 943602d72..3fffbc39c 100644 --- a/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java +++ b/contrib/adsabs/src/test/org/apache/solr/search/TestAqpAdsabsSolrSearch.java @@ -138,6 +138,7 @@ public void tearDown() throws Exception { public void testUnfieldedSearch() throws Exception { + // have constant scoring work even for unfielded searches assertQueryEquals(req("defType", "aqp", "q", "foo bar", "qf", "bibcode^5 title^10", @@ -375,6 +376,9 @@ public void testSpecialCases() throws Exception { "title", "title bitle")); assertU(commit("waitSearcher", "true")); + assertQ(req("q", "similar(foo bar baz title bitle, input abstract title, 100, 100, 1, 1)"), + "//*[@numFound='1']", + "//doc/str[@name='id'][.='2']"); // similar() assertQueryEquals(req("defType", "aqp", "q", "similar(foo bar baz, input)"), diff --git a/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml b/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml index 751ecf64c..f6e23a488 100644 --- a/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml +++ b/contrib/examples/adsabs/server/solr/collection1/conf/solrconfig.xml @@ -319,14 +319,23 @@ Modified qf original: first_author^3.0 author^2 title^1.4 abstract^1.3 keyword^1.4 keyword_norm^1.4 all body^0.1 year new: first_author^5 author^2 title^1.5 abstract^1.3 identifier^1 bibstem^1 year^2 + + August 28, 2019 + Modified qf: + old: first_author^5 author^2 title^1.5 abstract^1.3 identifier^1 bibstem^1 year^2 + new: first_author^0.9 author^0.85 year^0.8 title^0.8 abstract^0.7 identifier^0.8 bibstem^0.8 keyword^0.8 --> - first_author^5 author^2 title^1.5 abstract^1.3 identifier^1 bibstem^1 year^2 + first_author^0.9 author^0.85 year^0.8 title^0.8 abstract^0.7 identifier^0.8 bibstem^0.8 keyword^0.8 + aqp disjuncts simple entdate pubdate year edismax_combined_aqp true + 0.5 + first_author^14 author^13 year^10 bibstem^10 + SYNONYM explicit @@ -349,6 +358,9 @@ entdate pubdate year edismax_combined_aqp true + 0.5 + first_author^14 author^13 year^10 bibstem^10 + SYNONYM explicit @@ -697,7 +709,7 @@ cite_read_boost,citation_count_norm recid,pubdate_sort,citation_count,classic_factor,simbid,read_count,author_count,page_count,data_count author,first_author,book_author,editor - pubdate:date,entdate:entry_date + pubdate:date,entdate:entry_date:timestamp author,first_author,book_author,editor