diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 9b834927b04..02a0654c3f8 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -77,6 +77,10 @@ API Changes * LUCENE-8956: QueryRescorer now only sorts the first topN hits instead of all initial hits. (Paul Sanwald via Adrien Grand) +* LUCENE-8921: IndexSearcher.termStatistics() no longer takes a TermStates; it takes the docFreq and totalTermFreq. + And don't call if docFreq <= 0. The previous implementation survives as deprecated and final. It's removed in 9.0. + (Bruno Roustant, David Smiley, Alan Woodward) + New Features * LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida) diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java index 2cea74eea67..7130059c950 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java @@ -41,7 +41,6 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermStates; import org.apache.lucene.index.Terms; import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity; @@ -865,19 +864,20 @@ public class IndexSearcher { } /** - * Returns {@link TermStatistics} for a term, or {@code null} if - * the term does not exist. + * Returns {@link TermStatistics} for a term. * * This can be overridden for example, to return a term's statistics * across a distributed collection. + * + * @param docFreq The document frequency of the term. It must be greater or equal to 1. + * @param totalTermFreq The total term frequency. + * @return A {@link TermStatistics} (never null). + * * @lucene.experimental */ - public TermStatistics termStatistics(Term term, TermStates context) throws IOException { - if (context.docFreq() == 0) { - return null; - } else { - return new TermStatistics(term.bytes(), context.docFreq(), context.totalTermFreq()); - } + public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException { + // This constructor will throw an exception if docFreq <= 0. + return new TermStatistics(term.bytes(), docFreq, totalTermFreq); } /** diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 5f3b2342835..64c855d9eee 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -233,11 +233,8 @@ public class MultiPhraseQuery extends Query { ts = TermStates.build(context, term, scoreMode.needsScores()); termStates.put(term, ts); } - if (scoreMode.needsScores()) { - TermStatistics termStatistics = searcher.termStatistics(term, ts); - if (termStatistics != null) { - allTermStats.add(termStatistics); - } + if (scoreMode.needsScores() && ts.docFreq() > 0) { + allTermStats.add(searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq())); } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index fcb8e1c5812..3cf9159fa3e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -428,9 +428,9 @@ public class PhraseQuery extends Query { final Term term = terms[i]; states[i] = TermStates.build(context, term, scoreMode.needsScores()); if (scoreMode.needsScores()) { - TermStatistics termStatistics = searcher.termStatistics(term, states[i]); - if (termStatistics != null) { - termStats[termUpTo++] = termStatistics; + TermStates ts = states[i]; + if (ts.docFreq() > 0) { + termStats[termUpTo++] = searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq()); } } } diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java index e45287ab859..b2328330026 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java @@ -205,9 +205,10 @@ public final class SynonymQuery extends Query { long totalTermFreq = 0; termStates = new TermStates[terms.length]; for (int i = 0; i < termStates.length; i++) { - termStates[i] = TermStates.build(searcher.getTopReaderContext(), terms[i].term, true); - TermStatistics termStats = searcher.termStatistics(terms[i].term, termStates[i]); - if (termStats != null) { + TermStates ts = TermStates.build(searcher.getTopReaderContext(), terms[i].term, true); + termStates[i] = ts; + if (ts.docFreq() > 0) { + TermStatistics termStats = searcher.termStatistics(terms[i].term, ts.docFreq(), ts.totalTermFreq()); docFreq = Math.max(termStats.docFreq(), docFreq); totalTermFreq += termStats.totalTermFreq(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index 945c1b37f9b..00196eb71d4 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -60,7 +60,7 @@ public class TermQuery extends Query { final TermStatistics termStats; if (scoreMode.needsScores()) { collectionStats = searcher.collectionStatistics(term.field()); - termStats = searcher.termStatistics(term, termStates); + termStats = termStates.docFreq() > 0 ? searcher.termStatistics(term, termStates.docFreq(), termStates.totalTermFreq()) : null; } else { // we do not need the actual stats, use fake stats with docFreq=maxDoc=ttf=1 collectionStats = new CollectionStatistics(term.field(), 1, 1, 1, 1); diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java index c193201613f..d70d2773e05 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -103,9 +103,9 @@ public abstract class SpanWeight extends Weight { TermStatistics[] termStats = new TermStatistics[termStates.size()]; int termUpTo = 0; for (Map.Entry entry : termStates.entrySet()) { - TermStatistics termStatistics = searcher.termStatistics(entry.getKey(), entry.getValue()); - if (termStatistics != null) { - termStats[termUpTo++] = termStatistics; + TermStates ts = entry.getValue(); + if (ts.docFreq() > 0) { + termStats[termUpTo++] = searcher.termStatistics(entry.getKey(), ts.docFreq(), ts.totalTermFreq()); } } CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField()); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java index ea414e7d25f..e6debf20f0c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java @@ -329,10 +329,10 @@ public class TestMinShouldMatch2 extends LuceneTestCase { if (ord >= 0) { boolean success = ords.add(ord); assert success; // no dups - TermStates context = TermStates.build(reader.getContext(), term, true); + TermStates ts = TermStates.build(reader.getContext(), term, true); SimScorer w = weight.similarity.scorer(1f, searcher.collectionStatistics("field"), - searcher.termStatistics(term, context)); + searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq())); sims[(int)ord] = new LeafSimScorer(w, reader, "field", true); } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java index 3d0a0639bfa..48832492fdd 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/BM25FQuery.java @@ -281,9 +281,10 @@ public final class BM25FQuery extends Query implements Accountable { termStates = new TermStates[fieldTerms.length]; for (int i = 0; i < termStates.length; i++) { FieldAndWeight field = fieldAndWeights.get(fieldTerms[i].field()); - termStates[i] = TermStates.build(searcher.getTopReaderContext(), fieldTerms[i], true); - TermStatistics termStats = searcher.termStatistics(fieldTerms[i], termStates[i]); - if (termStats != null) { + TermStates ts = TermStates.build(searcher.getTopReaderContext(), fieldTerms[i], true); + termStates[i] = ts; + if (ts.docFreq() > 0) { + TermStatistics termStats = searcher.termStatistics(fieldTerms[i], ts.docFreq(), ts.totalTermFreq()); docFreq = Math.max(termStats.docFreq(), docFreq); totalTermFreq += (double) field.weight * termStats.totalTermFreq(); } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java index ad324696fc7..79ed166b4e0 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java @@ -360,9 +360,9 @@ public class TermAutomatonQuery extends Query implements Accountable { for(Map.Entry ent : idToTerm.entrySet()) { Integer termID = ent.getKey(); if (ent.getValue() != null) { - TermStatistics stats = searcher.termStatistics(new Term(field, ent.getValue()), termStates.get(termID)); - if (stats != null) { - allTermStats.add(stats); + TermStates ts = termStates.get(termID); + if (ts.docFreq() > 0) { + allTermStats.add(searcher.termStatistics(new Term(field, ent.getValue()), ts.docFreq(), ts.totalTermFreq())); } } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java index adf881889df..4f01cf77067 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java @@ -186,8 +186,10 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase { } try { for(Term term : terms) { - final TermStates termStates = TermStates.build(s.getIndexReader().getContext(), term, true); - stats.put(term, s.termStatistics(term, termStates)); + final TermStates ts = TermStates.build(s.getIndexReader().getContext(), term, true); + if (ts.docFreq() > 0) { + stats.put(term, s.termStatistics(term, ts.docFreq(), ts.totalTermFreq())); + } } } finally { node.searchers.release(s); @@ -262,36 +264,31 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase { } @Override - public TermStatistics termStatistics(Term term, TermStates context) throws IOException { + public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException { assert term != null; - long docFreq = 0; - long totalTermFreq = 0; + long distributedDocFreq = 0; + long distributedTotalTermFreq = 0; for(int nodeID=0;nodeID 0; + return new TermStatistics(term.bytes(), distributedDocFreq, distributedTotalTermFreq); } @Override diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index deb6dc10eba..192adb12d91 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -48,7 +48,6 @@ import org.apache.lucene.index.MultiPostingsEnum; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermStates; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.*; @@ -324,15 +323,15 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI * Override these two methods to provide a way to use global collection stats. */ @Override - public TermStatistics termStatistics(Term term, TermStates context) throws IOException { + public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException { final SolrRequestInfo reqInfo = SolrRequestInfo.getRequestInfo(); if (reqInfo != null) { final StatsSource statsSrc = (StatsSource) reqInfo.getReq().getContext().get(STATS_SOURCE); if (statsSrc != null) { - return statsSrc.termStatistics(this, term, context); + return statsSrc.termStatistics(this, term, docFreq, totalTermFreq); } } - return localTermStatistics(term, context); + return localTermStatistics(term, docFreq, totalTermFreq); } @Override @@ -347,8 +346,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI return localCollectionStatistics(field); } - public TermStatistics localTermStatistics(Term term, TermStates context) throws IOException { - return super.termStatistics(term, context); + public TermStatistics localTermStatistics(Term term, int docFreq, long totalTermFreq) throws IOException { + return super.termStatistics(term, docFreq, totalTermFreq); } public CollectionStatistics localCollectionStatistics(String field) throws IOException { diff --git a/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java b/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java index fe315d2114a..002b19011b8 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java +++ b/solr/core/src/java/org/apache/solr/search/stats/ExactStatsCache.java @@ -28,7 +28,6 @@ import java.util.Set; import com.google.common.collect.Lists; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermStates; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -170,11 +169,8 @@ public class ExactStatsCache extends StatsCache { } @Override - public TermStatistics termStatistics(Term term, TermStates context) throws IOException { - TermStatistics ts = super.termStatistics(term, context); - if (ts == null) { - return null; - } + public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException { + TermStatistics ts = super.termStatistics(term, docFreq, totalTermFreq); terms.add(term); statsMap.put(term.toString(), new TermStats(term.field(), ts)); return ts; @@ -328,7 +324,7 @@ public class ExactStatsCache extends StatsCache { this.colStatsCache = colStatsCache; } - public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermStates context) + public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq) throws IOException { TermStats termStats = termStatsCache.get(term.toString()); // TermStats == null is also true if term has no docFreq anyway, @@ -336,7 +332,7 @@ public class ExactStatsCache extends StatsCache { // Not sure we need a warning here if (termStats == null) { log.debug("Missing global termStats info for term={}, using local stats", term); - return localSearcher.localTermStatistics(term, context); + return localSearcher.localTermStatistics(term, docFreq, totalTermFreq); } else { return termStats.toTermStatistics(); } diff --git a/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java b/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java index c94695acf7f..c49f5e9c165 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java +++ b/solr/core/src/java/org/apache/solr/search/stats/LRUStatsCache.java @@ -24,7 +24,6 @@ import java.util.Map.Entry; import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermStates; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.TermStatistics; import org.apache.solr.core.PluginInfo; @@ -132,12 +131,12 @@ public class LRUStatsCache extends ExactStatsCache { this.colStatsCache = colStatsCache; } @Override - public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermStates context) + public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq) throws IOException { TermStats termStats = termStatsCache.get(term.toString()); if (termStats == null) { log.debug("## Missing global termStats info: {}, using local", term); - return localSearcher.localTermStatistics(term, context); + return localSearcher.localTermStatistics(term, docFreq, totalTermFreq); } else { return termStats.toTermStatistics(); } diff --git a/solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java b/solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java index 3a08a610151..6b331083858 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java +++ b/solr/core/src/java/org/apache/solr/search/stats/LocalStatsSource.java @@ -19,7 +19,6 @@ package org.apache.solr.search.stats; import java.io.IOException; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermStates; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.TermStatistics; import org.apache.solr.search.SolrIndexSearcher; @@ -34,9 +33,9 @@ public final class LocalStatsSource extends StatsSource { } @Override - public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermStates context) + public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq) throws IOException { - return localSearcher.localTermStatistics(term, context); + return localSearcher.localTermStatistics(term, docFreq, totalTermFreq); } @Override diff --git a/solr/core/src/java/org/apache/solr/search/stats/StatsSource.java b/solr/core/src/java/org/apache/solr/search/stats/StatsSource.java index c187fef16fc..735e22d0528 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/StatsSource.java +++ b/solr/core/src/java/org/apache/solr/search/stats/StatsSource.java @@ -19,7 +19,6 @@ package org.apache.solr.search.stats; import java.io.IOException; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermStates; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermStatistics; @@ -34,7 +33,7 @@ import org.apache.solr.search.SolrIndexSearcher; */ public abstract class StatsSource { - public abstract TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermStates context) + public abstract TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq) throws IOException; public abstract CollectionStatistics collectionStatistics(SolrIndexSearcher localSearcher, String field)