Родитель
93d3e5d666
Коммит
fd0c8b9e81
|
@ -77,6 +77,10 @@ API Changes
|
|||
* LUCENE-8956: QueryRescorer now only sorts the first topN hits instead of all
|
||||
initial hits. (Paul Sanwald via Adrien Grand)
|
||||
|
||||
* LUCENE-8921: IndexSearcher.termStatistics() no longer takes a TermStates; it takes the docFreq and totalTermFreq.
|
||||
And don't call if docFreq <= 0. The previous implementation survives as deprecated and final. It's removed in 9.0.
|
||||
(Bruno Roustant, David Smiley, Alan Woodward)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
|
||||
|
|
|
@ -41,7 +41,6 @@ import org.apache.lucene.index.LeafReaderContext;
|
|||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
@ -865,19 +864,20 @@ public class IndexSearcher {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns {@link TermStatistics} for a term, or {@code null} if
|
||||
* the term does not exist.
|
||||
* Returns {@link TermStatistics} for a term.
|
||||
*
|
||||
* This can be overridden for example, to return a term's statistics
|
||||
* across a distributed collection.
|
||||
*
|
||||
* @param docFreq The document frequency of the term. It must be greater or equal to 1.
|
||||
* @param totalTermFreq The total term frequency.
|
||||
* @return A {@link TermStatistics} (never null).
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public TermStatistics termStatistics(Term term, TermStates context) throws IOException {
|
||||
if (context.docFreq() == 0) {
|
||||
return null;
|
||||
} else {
|
||||
return new TermStatistics(term.bytes(), context.docFreq(), context.totalTermFreq());
|
||||
}
|
||||
public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException {
|
||||
// This constructor will throw an exception if docFreq <= 0.
|
||||
return new TermStatistics(term.bytes(), docFreq, totalTermFreq);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -233,11 +233,8 @@ public class MultiPhraseQuery extends Query {
|
|||
ts = TermStates.build(context, term, scoreMode.needsScores());
|
||||
termStates.put(term, ts);
|
||||
}
|
||||
if (scoreMode.needsScores()) {
|
||||
TermStatistics termStatistics = searcher.termStatistics(term, ts);
|
||||
if (termStatistics != null) {
|
||||
allTermStats.add(termStatistics);
|
||||
}
|
||||
if (scoreMode.needsScores() && ts.docFreq() > 0) {
|
||||
allTermStats.add(searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -428,9 +428,9 @@ public class PhraseQuery extends Query {
|
|||
final Term term = terms[i];
|
||||
states[i] = TermStates.build(context, term, scoreMode.needsScores());
|
||||
if (scoreMode.needsScores()) {
|
||||
TermStatistics termStatistics = searcher.termStatistics(term, states[i]);
|
||||
if (termStatistics != null) {
|
||||
termStats[termUpTo++] = termStatistics;
|
||||
TermStates ts = states[i];
|
||||
if (ts.docFreq() > 0) {
|
||||
termStats[termUpTo++] = searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -205,9 +205,10 @@ public final class SynonymQuery extends Query {
|
|||
long totalTermFreq = 0;
|
||||
termStates = new TermStates[terms.length];
|
||||
for (int i = 0; i < termStates.length; i++) {
|
||||
termStates[i] = TermStates.build(searcher.getTopReaderContext(), terms[i].term, true);
|
||||
TermStatistics termStats = searcher.termStatistics(terms[i].term, termStates[i]);
|
||||
if (termStats != null) {
|
||||
TermStates ts = TermStates.build(searcher.getTopReaderContext(), terms[i].term, true);
|
||||
termStates[i] = ts;
|
||||
if (ts.docFreq() > 0) {
|
||||
TermStatistics termStats = searcher.termStatistics(terms[i].term, ts.docFreq(), ts.totalTermFreq());
|
||||
docFreq = Math.max(termStats.docFreq(), docFreq);
|
||||
totalTermFreq += termStats.totalTermFreq();
|
||||
}
|
||||
|
|
|
@ -60,7 +60,7 @@ public class TermQuery extends Query {
|
|||
final TermStatistics termStats;
|
||||
if (scoreMode.needsScores()) {
|
||||
collectionStats = searcher.collectionStatistics(term.field());
|
||||
termStats = searcher.termStatistics(term, termStates);
|
||||
termStats = termStates.docFreq() > 0 ? searcher.termStatistics(term, termStates.docFreq(), termStates.totalTermFreq()) : null;
|
||||
} else {
|
||||
// we do not need the actual stats, use fake stats with docFreq=maxDoc=ttf=1
|
||||
collectionStats = new CollectionStatistics(term.field(), 1, 1, 1, 1);
|
||||
|
|
|
@ -103,9 +103,9 @@ public abstract class SpanWeight extends Weight {
|
|||
TermStatistics[] termStats = new TermStatistics[termStates.size()];
|
||||
int termUpTo = 0;
|
||||
for (Map.Entry<Term, TermStates> entry : termStates.entrySet()) {
|
||||
TermStatistics termStatistics = searcher.termStatistics(entry.getKey(), entry.getValue());
|
||||
if (termStatistics != null) {
|
||||
termStats[termUpTo++] = termStatistics;
|
||||
TermStates ts = entry.getValue();
|
||||
if (ts.docFreq() > 0) {
|
||||
termStats[termUpTo++] = searcher.termStatistics(entry.getKey(), ts.docFreq(), ts.totalTermFreq());
|
||||
}
|
||||
}
|
||||
CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
|
||||
|
|
|
@ -329,10 +329,10 @@ public class TestMinShouldMatch2 extends LuceneTestCase {
|
|||
if (ord >= 0) {
|
||||
boolean success = ords.add(ord);
|
||||
assert success; // no dups
|
||||
TermStates context = TermStates.build(reader.getContext(), term, true);
|
||||
TermStates ts = TermStates.build(reader.getContext(), term, true);
|
||||
SimScorer w = weight.similarity.scorer(1f,
|
||||
searcher.collectionStatistics("field"),
|
||||
searcher.termStatistics(term, context));
|
||||
searcher.termStatistics(term, ts.docFreq(), ts.totalTermFreq()));
|
||||
sims[(int)ord] = new LeafSimScorer(w, reader, "field", true);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -281,9 +281,10 @@ public final class BM25FQuery extends Query implements Accountable {
|
|||
termStates = new TermStates[fieldTerms.length];
|
||||
for (int i = 0; i < termStates.length; i++) {
|
||||
FieldAndWeight field = fieldAndWeights.get(fieldTerms[i].field());
|
||||
termStates[i] = TermStates.build(searcher.getTopReaderContext(), fieldTerms[i], true);
|
||||
TermStatistics termStats = searcher.termStatistics(fieldTerms[i], termStates[i]);
|
||||
if (termStats != null) {
|
||||
TermStates ts = TermStates.build(searcher.getTopReaderContext(), fieldTerms[i], true);
|
||||
termStates[i] = ts;
|
||||
if (ts.docFreq() > 0) {
|
||||
TermStatistics termStats = searcher.termStatistics(fieldTerms[i], ts.docFreq(), ts.totalTermFreq());
|
||||
docFreq = Math.max(termStats.docFreq(), docFreq);
|
||||
totalTermFreq += (double) field.weight * termStats.totalTermFreq();
|
||||
}
|
||||
|
|
|
@ -360,9 +360,9 @@ public class TermAutomatonQuery extends Query implements Accountable {
|
|||
for(Map.Entry<Integer,BytesRef> ent : idToTerm.entrySet()) {
|
||||
Integer termID = ent.getKey();
|
||||
if (ent.getValue() != null) {
|
||||
TermStatistics stats = searcher.termStatistics(new Term(field, ent.getValue()), termStates.get(termID));
|
||||
if (stats != null) {
|
||||
allTermStats.add(stats);
|
||||
TermStates ts = termStates.get(termID);
|
||||
if (ts.docFreq() > 0) {
|
||||
allTermStats.add(searcher.termStatistics(new Term(field, ent.getValue()), ts.docFreq(), ts.totalTermFreq()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -186,8 +186,10 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
|
|||
}
|
||||
try {
|
||||
for(Term term : terms) {
|
||||
final TermStates termStates = TermStates.build(s.getIndexReader().getContext(), term, true);
|
||||
stats.put(term, s.termStatistics(term, termStates));
|
||||
final TermStates ts = TermStates.build(s.getIndexReader().getContext(), term, true);
|
||||
if (ts.docFreq() > 0) {
|
||||
stats.put(term, s.termStatistics(term, ts.docFreq(), ts.totalTermFreq()));
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
node.searchers.release(s);
|
||||
|
@ -262,36 +264,31 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TermStatistics termStatistics(Term term, TermStates context) throws IOException {
|
||||
public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException {
|
||||
assert term != null;
|
||||
long docFreq = 0;
|
||||
long totalTermFreq = 0;
|
||||
long distributedDocFreq = 0;
|
||||
long distributedTotalTermFreq = 0;
|
||||
for(int nodeID=0;nodeID<nodeVersions.length;nodeID++) {
|
||||
|
||||
final TermStatistics subStats;
|
||||
if (nodeID == myNodeID) {
|
||||
subStats = super.termStatistics(term, context);
|
||||
subStats = super.termStatistics(term, docFreq, totalTermFreq);
|
||||
} else {
|
||||
final TermAndShardVersion key = new TermAndShardVersion(nodeID, nodeVersions[nodeID], term);
|
||||
subStats = termStatsCache.get(key);
|
||||
if (subStats == null) {
|
||||
continue; // term not found
|
||||
}
|
||||
}
|
||||
|
||||
if (subStats == null) {
|
||||
continue; // term not found
|
||||
}
|
||||
|
||||
|
||||
long nodeDocFreq = subStats.docFreq();
|
||||
docFreq += nodeDocFreq;
|
||||
distributedDocFreq += nodeDocFreq;
|
||||
|
||||
long nodeTotalTermFreq = subStats.totalTermFreq();
|
||||
totalTermFreq += nodeTotalTermFreq;
|
||||
}
|
||||
|
||||
if (docFreq == 0) {
|
||||
return null; // term not found in any node whatsoever
|
||||
} else {
|
||||
return new TermStatistics(term.bytes(), docFreq, totalTermFreq);
|
||||
distributedTotalTermFreq += nodeTotalTermFreq;
|
||||
}
|
||||
assert distributedDocFreq > 0;
|
||||
return new TermStatistics(term.bytes(), distributedDocFreq, distributedTotalTermFreq);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -48,7 +48,6 @@ import org.apache.lucene.index.MultiPostingsEnum;
|
|||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.*;
|
||||
|
@ -324,15 +323,15 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
* Override these two methods to provide a way to use global collection stats.
|
||||
*/
|
||||
@Override
|
||||
public TermStatistics termStatistics(Term term, TermStates context) throws IOException {
|
||||
public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException {
|
||||
final SolrRequestInfo reqInfo = SolrRequestInfo.getRequestInfo();
|
||||
if (reqInfo != null) {
|
||||
final StatsSource statsSrc = (StatsSource) reqInfo.getReq().getContext().get(STATS_SOURCE);
|
||||
if (statsSrc != null) {
|
||||
return statsSrc.termStatistics(this, term, context);
|
||||
return statsSrc.termStatistics(this, term, docFreq, totalTermFreq);
|
||||
}
|
||||
}
|
||||
return localTermStatistics(term, context);
|
||||
return localTermStatistics(term, docFreq, totalTermFreq);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -347,8 +346,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
return localCollectionStatistics(field);
|
||||
}
|
||||
|
||||
public TermStatistics localTermStatistics(Term term, TermStates context) throws IOException {
|
||||
return super.termStatistics(term, context);
|
||||
public TermStatistics localTermStatistics(Term term, int docFreq, long totalTermFreq) throws IOException {
|
||||
return super.termStatistics(term, docFreq, totalTermFreq);
|
||||
}
|
||||
|
||||
public CollectionStatistics localCollectionStatistics(String field) throws IOException {
|
||||
|
|
|
@ -28,7 +28,6 @@ import java.util.Set;
|
|||
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -170,11 +169,8 @@ public class ExactStatsCache extends StatsCache {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TermStatistics termStatistics(Term term, TermStates context) throws IOException {
|
||||
TermStatistics ts = super.termStatistics(term, context);
|
||||
if (ts == null) {
|
||||
return null;
|
||||
}
|
||||
public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException {
|
||||
TermStatistics ts = super.termStatistics(term, docFreq, totalTermFreq);
|
||||
terms.add(term);
|
||||
statsMap.put(term.toString(), new TermStats(term.field(), ts));
|
||||
return ts;
|
||||
|
@ -328,7 +324,7 @@ public class ExactStatsCache extends StatsCache {
|
|||
this.colStatsCache = colStatsCache;
|
||||
}
|
||||
|
||||
public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermStates context)
|
||||
public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq)
|
||||
throws IOException {
|
||||
TermStats termStats = termStatsCache.get(term.toString());
|
||||
// TermStats == null is also true if term has no docFreq anyway,
|
||||
|
@ -336,7 +332,7 @@ public class ExactStatsCache extends StatsCache {
|
|||
// Not sure we need a warning here
|
||||
if (termStats == null) {
|
||||
log.debug("Missing global termStats info for term={}, using local stats", term);
|
||||
return localSearcher.localTermStatistics(term, context);
|
||||
return localSearcher.localTermStatistics(term, docFreq, totalTermFreq);
|
||||
} else {
|
||||
return termStats.toTermStatistics();
|
||||
}
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.Map.Entry;
|
|||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
|
@ -132,12 +131,12 @@ public class LRUStatsCache extends ExactStatsCache {
|
|||
this.colStatsCache = colStatsCache;
|
||||
}
|
||||
@Override
|
||||
public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermStates context)
|
||||
public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq)
|
||||
throws IOException {
|
||||
TermStats termStats = termStatsCache.get(term.toString());
|
||||
if (termStats == null) {
|
||||
log.debug("## Missing global termStats info: {}, using local", term);
|
||||
return localSearcher.localTermStatistics(term, context);
|
||||
return localSearcher.localTermStatistics(term, docFreq, totalTermFreq);
|
||||
} else {
|
||||
return termStats.toTermStatistics();
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.solr.search.stats;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
@ -34,9 +33,9 @@ public final class LocalStatsSource extends StatsSource {
|
|||
}
|
||||
|
||||
@Override
|
||||
public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermStates context)
|
||||
public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq)
|
||||
throws IOException {
|
||||
return localSearcher.localTermStatistics(term, context);
|
||||
return localSearcher.localTermStatistics(term, docFreq, totalTermFreq);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.solr.search.stats;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
|
@ -34,7 +33,7 @@ import org.apache.solr.search.SolrIndexSearcher;
|
|||
*/
|
||||
public abstract class StatsSource {
|
||||
|
||||
public abstract TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, TermStates context)
|
||||
public abstract TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq)
|
||||
throws IOException;
|
||||
|
||||
public abstract CollectionStatistics collectionStatistics(SolrIndexSearcher localSearcher, String field)
|
||||
|
|
Загрузка…
Ссылка в новой задаче