diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 173ae250268..522671cb49a 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -126,6 +126,8 @@ New Features * SOLR-10786: Add DBSCAN clustering Streaming Evaluator (Joel Bernstein) +* SOLR-13911: Add 'missing' aggregation in JSON FacetModule (hossman, Munendra S N) + Improvements --------------------- diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java index 8d84642695c..5f6fc3f77d7 100644 --- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java +++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java @@ -59,6 +59,7 @@ import org.apache.solr.search.facet.AvgAgg; import org.apache.solr.search.facet.CountAgg; import org.apache.solr.search.facet.HLLAgg; import org.apache.solr.search.facet.MinMaxAgg; +import org.apache.solr.search.facet.MissingAgg; import org.apache.solr.search.facet.PercentileAgg; import org.apache.solr.search.facet.StddevAgg; import org.apache.solr.search.facet.SumAgg; @@ -1014,6 +1015,13 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin { return new StddevAgg(fp.parseValueSource()); } }); + + addParser("agg_missing", new ValueSourceParser() { + @Override + public ValueSource parse(FunctionQParser fp) throws SyntaxError { + return new MissingAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE)); + } + }); /*** addParser("agg_multistat", new ValueSourceParser() { diff --git a/solr/core/src/java/org/apache/solr/search/facet/MissingAgg.java b/solr/core/src/java/org/apache/solr/search/facet/MissingAgg.java new file mode 100644 index 00000000000..2bf25134369 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/facet/MissingAgg.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search.facet; + +import java.io.IOException; +import java.util.function.IntFunction; + +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.queries.function.valuesource.QueryValueSource; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.Query; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.function.FieldNameValueSource; + +/** + * {@link AggValueSource} to compute missing counts for given {@link ValueSource} + */ +public class MissingAgg extends SimpleAggValueSource { + + public MissingAgg(ValueSource vs) { + super("missing", vs); + } + + @Override + public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException { + ValueSource vs = getArg(); + + if (vs instanceof FieldNameValueSource) { + String field = ((FieldNameValueSource)vs).getFieldName(); + SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field); + + if (sf.multiValued() || sf.getType().multiValuedFieldCache()) { + Query query = null; + if (sf.hasDocValues()) { + query = new DocValuesFieldExistsQuery(sf.getName()); + } else { + query = sf.getType().getRangeQuery(null, sf, null, null, false, false); + } + vs = new QueryValueSource(query, 0.0f); + } else { + vs = sf.getType().getValueSource(sf, null); + } + } + return new MissingSlotAcc(vs, fcontext, numSlots); + } + + @Override + public FacetMerger createFacetMerger(Object prototype) { + return new FacetLongMerger(); + } + + class MissingSlotAcc extends LongFuncSlotAcc { + + public MissingSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) { + super(values, fcontext, numSlots, 0); + } + + @Override + public void collect(int doc, int slot, IntFunction slotContext) throws IOException { + if (!values.exists(doc)) { + result[slot]++; + } + } + } + +} diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java index a27ad2da0ba..dbd4f1a6675 100644 --- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java +++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java @@ -1191,6 +1191,9 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { assertFuncEquals("agg_percentile(foo_i,50)", "agg_percentile(foo_i,50)"); assertFuncEquals("agg_variance(foo_i)", "agg_variance(foo_i)"); assertFuncEquals("agg_stddev(foo_i)", "agg_stddev(foo_i)"); + assertFuncEquals("agg_missing(foo_i)", "agg_missing(foo_i)"); + assertFuncEquals("agg(missing(foo_i))", "agg(missing(foo_i))"); + assertFuncEquals("agg_missing(field(foo_i))", "agg_missing(field(foo_i))"); // assertFuncEquals("agg_multistat(foo_i)", "agg_multistat(foo_i)"); } diff --git a/solr/core/src/test/org/apache/solr/search/facet/DistributedFacetSimpleRefinementLongTailTest.java b/solr/core/src/test/org/apache/solr/search/facet/DistributedFacetSimpleRefinementLongTailTest.java index ea3b5ef54e1..14e461dcd83 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/DistributedFacetSimpleRefinementLongTailTest.java +++ b/solr/core/src/test/org/apache/solr/search/facet/DistributedFacetSimpleRefinementLongTailTest.java @@ -42,9 +42,9 @@ import org.junit.Test; */ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistributedSearchTestCase { - // TODO: SOLR-11695: need "num_values" and "missing"... + // TODO: SOLR-11695: need "num_values" // TODO: add hll & variance - update all assertions to test their values (right after any mention of 'stddev') - private static List ALL_STATS = Arrays.asList("min", "max", "sum", "stddev", "avg", "sumsq", "unique"); + private static List ALL_STATS = Arrays.asList("min", "max", "sum", "stddev", "avg", "sumsq", "unique", "missing"); private String STAT_FIELD = "stat_i1"; private String ALL_STATS_JSON = ""; @@ -81,7 +81,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute final SolrClient shard0 = clients.get(0); final SolrClient shard1 = clients.get(1); final SolrClient shard2 = clients.get(2); - + // the 5 top foo_s terms have 100 docs each on every shard for (int i = 0; i < 100; i++) { for (int j = 0; j < 5; j++) { @@ -91,7 +91,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute } } - // 20 foo_s terms that come in "second" with 50 docs each + // 20 foo_s terms that come in "second" with 50 docs each // on both shard0 & shard1 ("bbb_") for (int i = 0; i < 50; i++) { for (int j = 0; j < 20; j++) { @@ -141,7 +141,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute shardFooBuckets[i] = (List) ((NamedList)clients.get(i).query( req ).getResponse().get("facets")).get("foo").get("buckets"); } - + // top 5 same on all shards for (int i = 0; i < 3; i++) { assertEquals(10, shardFooBuckets[i].size()); @@ -187,7 +187,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute private void checkRefinementAndOverrequesting() throws Exception { // // distributed queries // // - + { // w/o refinement, the default overrequest isn't enough to find the long 'tail' *OR* the correct count for 'bbb0'... List foo_buckets = (List) ((NamedList) @@ -200,13 +200,13 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("aaa")); assertEquals(bucket.toString(), 300L, bucket.get("count")); } - + // this will be short the "+1" fo the doc added to shard2... NamedList bucket = foo_buckets.get(5); assertTrue(bucket.toString(), bucket.get("val").equals("bbb0")); // 'tail' is missed assertEquals(bucket.toString(), 100L, bucket.get("count")); // will not include the "+1" for the doc added to shard2 } - + // even if we enable refinement, we still won't find the long 'tail' ... // regardless of wether we use either the default overrequest, or disable overrequesting... for (String over : Arrays.asList( "", "overrequest:0,")) { @@ -231,7 +231,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute assertEquals(-2L, bucket.get("min")); // this min only exists on shard2 assertEquals(1L, bucket.get("max")); // assertEquals(101L, bucket.get("num_values")); // TODO: SOLR-11695 - // assertEquals(0L, bucket.get("missing")); // TODO: SOLR-11695 + assertEquals(0L, bucket.get("missing")); assertEquals(48.0D, bucket.get("sum")); assertEquals(0.475247524752475D, (double) bucket.get("avg"), 0.1E-7); assertEquals(54.0D, (double) bucket.get("sumsq"), 0.1E-7); @@ -244,7 +244,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute // this is because of how the "simple" refinement process works: the "top buckets" are determined based // on the info available in the first pass request. // - // Even though 'tail' is returned in the top6 for shard2, the cummulative total for 'bbb0' from shard0 and shard1 is + // Even though 'tail' is returned in the top6 for shard2, the cumulative total for 'bbb0' from shard0 and shard1 is // high enough that the simple facet refinement ignores 'tail' because it assumes 'bbb0's final total will be greater. // // Meanwhile, for the sub-facet on 'bar', a limit==6 means we should correctly find 'tailB' as the top sub-term of 'tail', @@ -261,14 +261,14 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute "{ foo: { type:terms, limit:6, overrequest:20, refine:simple, field:foo_s, facet:{ " + " bar: { type:terms, limit:6, " + bar_opts + " field:bar_s }}}}" ) ).getResponse().get("facets")).get("foo").get("buckets"); - + assertEquals(6, buckets.size()); for (int i = 0; i < 5; i++) { NamedList bucket = buckets.get(i); assertTrue(bucket.toString(), bucket.get("val").toString().startsWith("aaa")); assertEquals(bucket.toString(), 300L, bucket.get("count")); } - + NamedList bucket = buckets.get(5); assertEquals(bucket.toString(), "tail", bucket.get("val")); assertEquals(bucket.toString(), 135L, bucket.get("count")); @@ -320,8 +320,8 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute assertEquals(bucket.toString(), 14L, bucket.get("count")); } } - - // however: with a lower sub-facet limit==5, and overrequesting disabled, + + // however: with a lower sub-facet limit==5, and overrequesting disabled, // we're going to miss out on tailB even if we have refinement for (String bar_opts : Arrays.asList( "refine:none, overrequest:0,", "refine:simple, overrequest:0," )) { @@ -351,7 +351,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute assertEquals(bucket.toString(), 14L, bucket.get("count")); } } - + } private void checkSubFacetStats() throws Exception { @@ -390,7 +390,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute assertEquals(-99L, aaa0_Bucket.get("min")); assertEquals(693L, aaa0_Bucket.get("max")); // assertEquals(300L, aaa0_Bucket.get("num_values")); // TODO: SOLR-11695 - // assertEquals(0L, aaa0_Bucket.get("missing")); // TODO: SOLR-11695 + assertEquals(0L, aaa0_Bucket.get("missing")); assertEquals(34650.0D, aaa0_Bucket.get("sum")); assertEquals(115.5D, (double) aaa0_Bucket.get("avg"), 0.1E-7); assertEquals(1.674585E7D, (double) aaa0_Bucket.get("sumsq"), 0.1E-7); @@ -404,7 +404,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute assertEquals(0L, tail_Bucket.get("min")); assertEquals(44L, tail_Bucket.get("max")); // assertEquals(90L, tail_Bucket.get("num_values")); // TODO: SOLR-11695 - // assertEquals(45L, tail_Bucket.get("missing")); // TODO: SOLR-11695 + assertEquals(45L, tail_Bucket.get("missing")); assertEquals(1980.0D, tail_Bucket.get("sum")); assertEquals(22.0D, (double) tail_Bucket.get("avg"), 0.1E-7); assertEquals(58740.0D, (double) tail_Bucket.get("sumsq"), 0.1E-7); @@ -420,7 +420,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute assertEquals(35L, tailB_Bucket.get("min")); assertEquals(40L, tailB_Bucket.get("max")); // assertEquals(12L, tailB_Bucket.get("num_values")); // TODO: SOLR-11695 - // assertEquals(5L, tailB_Bucket.get("missing")); // TODO: SOLR-11695 + assertEquals(5L, tailB_Bucket.get("missing")); assertEquals(450.0D, tailB_Bucket.get("sum")); assertEquals(37.5D, (double) tailB_Bucket.get("avg"), 0.1E-7); assertEquals(16910.0D, (double) tailB_Bucket.get("sumsq"), 0.1E-7); diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java index 1e438224aee..52b4fe4d9a3 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java @@ -1125,8 +1125,6 @@ public class TestJsonFacets extends SolrTestCaseHS { client.commit(); - - // test for presence of debugging info ModifiableSolrParams debugP = params(p); debugP.set("debugQuery","true"); @@ -1256,12 +1254,13 @@ public class TestJsonFacets extends SolrTestCaseHS { // test sorting by other stats client.testJQ(params(p, "q", "*:*" - , "json.facet", "{f1:{${terms} type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'min(${num_d})'} }" + - " , f2:{${terms} type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'max(${num_d})'} } " + - " , f3:{${terms} type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'unique(${where_s})'} } " + - " , f4:{${terms} type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'hll(${where_s})'} } " + - " , f5:{${terms} type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'variance(${num_d})'} } " + - " , f6:{type:terms, field:${num_d}, limit:1, sort:'x desc', facet:{x:'hll(${num_i})'} } " + // facet on a field that will cause hashing and exercise hll.resize on numeric field + , "json.facet", "{f1:{${terms} type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'min(${num_d})'} }" + + " , f2:{${terms} type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'max(${num_d})'} } " + + " , f3:{${terms} type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'unique(${where_s})'} } " + + " , f4:{${terms} type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'hll(${where_s})'} } " + + " , f5:{${terms} type:terms, field:'${cat_s}', sort:'x desc', facet:{x:'variance(${num_d})'} } " + + " , f6:{type:terms, field:${num_d}, limit:1, sort:'x desc', facet:{x:'hll(${num_i})'} } " + // facet on a field that will cause hashing and exercise hll.resize on numeric field + " , f7:{type:terms, field:${cat_s}, limit:2, sort:'x desc', facet:{x:'missing(${sparse_num_d})'} } " + "}" ) , "facets=={ 'count':6, " + @@ -1271,6 +1270,7 @@ public class TestJsonFacets extends SolrTestCaseHS { ", f4:{ 'buckets':[{ val:'A', count:2, x:2 }, { val:'B', count:3, x:2 }]} " + ", f5:{ 'buckets':[{ val:'B', count:3, x:74.6666666666666 }, { val:'A', count:2, x:1.0 }]} " + ", f6:{ buckets:[{ val:-9.0, count:1, x:1 }]} " + + ", f7:{ buckets:[{ val:B, count:3, x:3 },{ val:A, count:2, x:0 }]} " + "}" ); @@ -1284,6 +1284,50 @@ public class TestJsonFacets extends SolrTestCaseHS { ", f2:{ 'buckets':[{ val:'B', count:3, n1:-2.0}, { val:'A', count:2, n1:6.0 }]} }" ); + // test sorting by missing stat with function + client.testJQ(params(p, "q", "*:*" + , "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', sort:'n1 desc', facet:{n1:'missing(field(${sparse_num_d}))'} }}" + + " , f2:{terms:{${terms} field:'${cat_s}', sort:'n1 asc', facet:{n1:'missing(field(${sparse_num_d}))'} }} }" + ) + , "facets=={ 'count':6, " + + " f1:{ 'buckets':[{ val:'B', count:3, n1:3 }, { val:'A', count:2, n1:0}]}" + + ", f2:{ 'buckets':[{ val:'A', count:2, n1:0}, { val:'B', count:3, n1:3 }]} }" + ); + + // test sorting by missing stat with domain query + client.testJQ(params(p, "q", "-id:*" + , "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 desc', facet:{n1:'missing(field(${sparse_num_d}))'} }}" + + " , f2:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 asc', facet:{n1:'missing(field(${sparse_num_d}))'} }} }" + ) + , "facets=={ 'count':0, " + + " f1:{ 'buckets':[{ val:'B', count:3, n1:3 }, { val:'A', count:2, n1:0}]}" + + ", f2:{ 'buckets':[{ val:'A', count:2, n1:0}, { val:'B', count:3, n1:3 }]} }" + ); + + // test with sub-facet aggregation with stat on field + client.testJQ(params(p, "q", "*:*" + , "json.facet", " {f1:{terms:{${terms}, field:'${cat_s}', " + + "facet:{f2:{terms:{${terms}, field:${where_s}, sort:'index asc', " + + "facet:{n1:'missing(${sparse_num_d})'}}}}}}}" + ) + , "facets=={ 'count':6, " + + " f1:{ 'buckets':[{ val:'B', count:3, f2:{'buckets':[{val:'NJ', count:2, n1:2},{val:'NY', count:1, n1:1}]} }," + + " { val:'A', count:2, f2:{'buckets':[{val:'NJ', count:1, n1:0},{val:'NY', count:1, n1:0}]}}]}" + + "}" + ); + + // test with sub-facet aggregation with stat on func + client.testJQ(params(p, "q", "*:*" + , "json.facet", " {f1:{terms:{${terms}, field:'${cat_s}', " + + "facet:{f2:{terms:{${terms}, field:${where_s}, sort:'index asc', " + + "facet:{n1:'missing(field(${sparse_num_d}))'}}}}}}}" + ) + , "facets=={ 'count':6, " + + " f1:{ 'buckets':[{ val:'B', count:3, f2:{'buckets':[{val:'NJ', count:2, n1:2},{val:'NY', count:1, n1:1}]} }," + + " { val:'A', count:2, f2:{'buckets':[{val:'NJ', count:1, n1:0},{val:'NY', count:1, n1:0}]}}]}" + + "}" + ); + // facet on numbers to test resize from hashing (may need to be sorting by the metric to test that) client.testJQ(params(p, "q", "*:*" , "json.facet", "{" + @@ -1697,7 +1741,7 @@ public class TestJsonFacets extends SolrTestCaseHS { ", numwhere:'unique(${where_s})', unique_num_i:'unique(${num_i})', unique_num_d:'unique(${num_d})', unique_date:'unique(${date})'" + ", where_hll:'hll(${where_s})', hll_num_i:'hll(${num_i})', hll_num_d:'hll(${num_d})', hll_date:'hll(${date})'" + ", med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)', variance:'variance(${num_d})', stddev:'stddev(${num_d})'" + - ", mini:'min(${num_i})', maxi:'max(${num_i})'" + + ", mini:'min(${num_i})', maxi:'max(${num_i})', missing:'missing(${sparse_num_d})'" + " }" ) , "facets=={ 'count':6, " + @@ -1705,7 +1749,7 @@ public class TestJsonFacets extends SolrTestCaseHS { ", numwhere:2, unique_num_i:4, unique_num_d:5, unique_date:5" + ", where_hll:2, hll_num_i:4, hll_num_d:5, hll_date:5" + ", med:2.0, perc:[-9.0,2.0,11.0], variance:49.04, stddev:7.002856560004639" + - ", mini:-5, maxi:7" + + ", mini:-5, maxi:7, missing:4" + "}" ); @@ -1762,16 +1806,19 @@ public class TestJsonFacets extends SolrTestCaseHS { // test unique on multi-valued field client.testJQ(params(p, "q", "*:*" - , "json.facet", "{" + - "x:'unique(${multi_ss})'" + - ",y:{query:{q:'id:2', facet:{x:'unique(${multi_ss})'} }} " + - ",x2:'hll(${multi_ss})'" + - ",y2:{query:{q:'id:2', facet:{x:'hll(${multi_ss})'} }} " + - - " }" + , "json.facet", "{" + + "x:'unique(${multi_ss})'" + + ",z:'missing(${multi_ss})'" + + ",z1:'missing(${num_is})'" + + ",y:{query:{q:'id:2', facet:{x:'unique(${multi_ss})'} }} " + + ",x2:'hll(${multi_ss})'" + + ",y2:{query:{q:'id:2', facet:{x:'hll(${multi_ss})'} }} " + + " }" ) , "facets=={count:6 " + ",x:2" + + ",z:2" + + ",z1:1" + ",y:{count:1, x:2}" + // single document should yield 2 unique values ",x2:2" + ",y2:{count:1, x:2}" + // single document should yield 2 unique values @@ -2049,12 +2096,12 @@ public class TestJsonFacets extends SolrTestCaseHS { // multi-valued integer client.testJQ(params(p, "q", "*:*" , "json.facet", "{ " + - " c1:'unique(${num_is})', c2:'hll(${num_is})'" + + " c1:'unique(${num_is})', c2:'hll(${num_is})', c3:'missing(${num_is})'" + ",f1:{${terms} type:terms, field:${num_is} } " + "}" ) , "facets=={ count:6 " + - ", c1:5, c2:5" + + ", c1:5, c2:5, c3:1" + ", f1:{ buckets:[ {val:-1,count:2},{val:0,count:2},{val:3,count:2},{val:-5,count:1},{val:2,count:1} ] } " + "} " ); @@ -2062,12 +2109,12 @@ public class TestJsonFacets extends SolrTestCaseHS { // multi-valued float client.testJQ(params(p, "q", "*:*" , "json.facet", "{ " + - " c1:'unique(${num_fs})', c2:'hll(${num_fs})'" + + " c1:'unique(${num_fs})', c2:'hll(${num_fs})', c3:'missing(${num_fs})', c4:'agg(missing(${num_fs}))'" + ",f1:{${terms} type:terms, field:${num_fs} } " + "}" ) , "facets=={ count:6 " + - ", c1:5, c2:5" + + ", c1:5, c2:5, c3:1, c4:1" + ", f1:{ buckets:[ {val:-1.5,count:2},{val:0.0,count:2},{val:3.0,count:2},{val:-5.0,count:1},{val:2.0,count:1} ] } " + "} " ); @@ -2119,11 +2166,11 @@ public class TestJsonFacets extends SolrTestCaseHS { // currently non-sorting stats. client.testJQ(params(p, "q", "*:*" , "json.facet", "{f1:{type:terms, field:'${cat_s}', facet:{h:'hll(${where_s})' , u:'unique(${where_s})', mind:'min(${num_d})', maxd:'max(${num_d})', mini:'min(${num_i})', maxi:'max(${num_i})'" + - ", sumd:'sum(${num_d})', avgd:'avg(${num_d})', variance:'variance(${num_d})', stddev:'stddev(${num_d})' } }}" + ", sumd:'sum(${num_d})', avgd:'avg(${num_d})', variance:'variance(${num_d})', stddev:'stddev(${num_d})', missing:'missing(${multi_ss})'} }}" ) , "facets=={ 'count':6, " + - "'f1':{ buckets:[{val:B, count:3, h:2, u:2, mind:-9.0, maxd:11.0, mini:-5, maxi:7, sumd:-3.0, avgd:-1.0, variance:74.66666666666667, stddev:8.640987597877148}," + - " {val:A, count:2, h:2, u:2, mind:2.0, maxd:4.0, mini:2, maxi:3, sumd:6.0, avgd:3.0, variance:1.0, stddev:1.0}] } } " + "'f1':{ buckets:[{val:B, count:3, h:2, u:2, mind:-9.0, maxd:11.0, mini:-5, maxi:7, sumd:-3.0, avgd:-1.0, variance:74.66666666666667, stddev:8.640987597877148, missing:0}," + + " {val:A, count:2, h:2, u:2, mind:2.0, maxd:4.0, mini:2, maxi:3, sumd:6.0, avgd:3.0, variance:1.0, stddev:1.0, missing:1}] } } " ); diff --git a/solr/solr-ref-guide/src/json-facet-api.adoc b/solr/solr-ref-guide/src/json-facet-api.adoc index 7f667f41524..f7682ebd77f 100644 --- a/solr/solr-ref-guide/src/json-facet-api.adoc +++ b/solr/solr-ref-guide/src/json-facet-api.adoc @@ -573,6 +573,7 @@ Unlike all the facets discussed so far, Aggregation functions (also called *face |avg |`avg(popularity)` |average of numeric values |min |`min(salary)` |minimum value |max |`max(mul(price,popularity))` |maximum value +|missing |`missing(author)` |number of documents which do not have value for given field or function |unique |`unique(author)` |number of unique values of the given field. Beyond 100 values it yields not exact estimate |uniqueBlock |`uniqueBlock(\_root_)` |same as above with smaller footprint strictly for <>. The given field must be unique across blocks, and only singlevalued string fields are supported, docValues are recommended. |hll |`hll(author)` |distributed cardinality estimate via hyper-log-log algorithm