Data Skipping Index Part 3-1: Utils (#491)

This commit is contained in:
Chungmin Lee 2021-08-24 05:10:25 +09:00 коммит произвёл GitHub
Родитель c2f4f04f9a
Коммит b06cbddb18
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
14 изменённых файлов: 1804 добавлений и 3 удалений

Просмотреть файл

@ -25,13 +25,16 @@ object Dependencies {
"org.apache.spark" %% "spark-sql" % sv % "provided" withSources (),
// Test dependencies
"org.mockito" %% "mockito-scala" % "0.4.0" % "test",
"org.scalacheck" %% "scalacheck" % "1.14.2" % "test",
"org.apache.spark" %% "spark-catalyst" % sv % "test" classifier "tests",
"org.apache.spark" %% "spark-core" % sv % "test" classifier "tests",
"org.apache.spark" %% "spark-sql" % sv % "test" classifier "tests") ++
(if (sparkVersion < Version(3, 1, 0))
Seq("org.scalatest" %% "scalatest" % "3.0.5" % "test")
Seq("org.scalatest" %% "scalatest" % "3.0.8" % "test")
else
Seq("org.scalatest" %% "scalatest" % "3.2.7" % "test")) ++
Seq(
"org.scalatest" %% "scalatest" % "3.2.3" % "test",
"org.scalatestplus" %% "scalatestplus-scalacheck" % "3.1.0.0-RC2" % "test")) ++
(if (sparkVersion < Version(3, 0, 0))
Seq(
"io.delta" %% "delta-core" % "0.6.1" % "provided" withSources (),

Просмотреть файл

@ -0,0 +1,41 @@
/*
* Copyright (2021) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.hyperspace.index.dataskipping.util
import org.apache.spark.sql.types._
object ArrayUtils {
/**
* Converts the given sequence of values into an array of values.
*
* If the values can be represented as a primitive type,
* a primitive array is returned.
*/
def toArray(values: Seq[Any], dataType: DataType): Any = {
dataType match {
case BooleanType => values.map(_.asInstanceOf[Boolean]).toArray
case ByteType => values.map(_.asInstanceOf[Byte]).toArray
case ShortType => values.map(_.asInstanceOf[Short]).toArray
case IntegerType => values.map(_.asInstanceOf[Int]).toArray
case LongType => values.map(_.asInstanceOf[Long]).toArray
case FloatType => values.map(_.asInstanceOf[Float]).toArray
case DoubleType => values.map(_.asInstanceOf[Double]).toArray
case _ => values.toArray
}
}
}

Просмотреть файл

@ -0,0 +1,28 @@
/*
* Copyright (2021) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.hyperspace.index.dataskipping.util
import org.apache.spark.sql.catalyst.expressions.{Expression, ExprId}
trait ExprMatcher {
/**
* Returns true if the given expression matches the expression this matcher
* is associated with.
*/
def apply(e: Expression): Boolean
}

Просмотреть файл

@ -16,12 +16,14 @@
package com.microsoft.hyperspace.index.dataskipping.util
import java.util.UUID
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Project, Window}
import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LogicalPlan, Project, Window}
import org.apache.spark.sql.types.DataType
import com.microsoft.hyperspace.HyperspaceException
@ -30,6 +32,8 @@ import com.microsoft.hyperspace.index.dataskipping.sketch.Sketch
object ExpressionUtils {
val nullExprId = ExprId(0, new UUID(0, 0))
/**
* Returns copies of the given sketches with the indexed columns replaced by
* resolved column names and data types.
@ -108,4 +112,43 @@ object ExpressionUtils {
override def dataType: DataType = throw new NotImplementedError
// $COVERAGE-ON$
}
/**
* Returns a normalized expression so that the indexed expression and an
* expression in the filter condition can be matched. For example,
* expressions in the filter condition can have different ExprIds for every
* execution, whereas the indexed expression is fixed.
*/
def normalize(expr: Expression): Expression = {
expr.transformUp {
case a: AttributeReference => a.withExprId(nullExprId).withQualifier(Nil)
case g @ GetStructField(child, ordinal, _) => g.copy(child, ordinal, None)
}
}
// Needed because ScalaUDF has a different number of arguments depending on Spark versions.
private[dataskipping] object ExtractScalaUDF {
def unapply(e: ScalaUDF): Option[(DataType, Seq[Expression])] = {
Some((e.dataType, e.children))
}
}
private[dataskipping] object ExtractIsNullDisjunction {
def unapply(pred: Expression): Option[Seq[Expression]] =
pred match {
case IsNull(arg) => Some(Seq(arg))
case Or(IsNull(arg), ExtractIsNullDisjunction(args)) => Some(arg +: args)
case _ => None
}
}
private[dataskipping] object ExtractKnownNotNullArgs {
def unapply(args: Seq[Expression]): Option[Seq[Expression]] = {
if (args.forall(_.isInstanceOf[KnownNotNull])) {
Some(args.map(_.asInstanceOf[KnownNotNull].child))
} else {
None
}
}
}
}

Просмотреть файл

@ -0,0 +1,30 @@
/*
* Copyright (2021) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.hyperspace.index.dataskipping.util
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, ExprId}
case class NormalizedExprMatcher(expr: Expression, nameMap: Map[ExprId, String])
extends ExprMatcher {
def apply(e: Expression): Boolean = {
val renamed = e.transformUp {
case a: AttributeReference => a.withName(nameMap(a.exprId))
}
val normalized = ExpressionUtils.normalize(renamed)
expr == normalized
}
}

Просмотреть файл

@ -0,0 +1,97 @@
/*
* Copyright (2021) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.hyperspace.index.dataskipping.util
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression}
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral}
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.util.{ArrayData, TypeUtils}
import org.apache.spark.sql.types.IntegerType
/**
* Returns the index to the first element in the array (left) which is not less
* than (greater than or equal to) the value (right), or null if there is no such
* element.
*
* Preconditions (unchecked):
* - The array must not be null.
* - Elements in the array must be in ascending order.
* - The array must not contain null elements.
* - The array must not contain duplicate elements.
* - The value must not be null.
*/
private[dataskipping] case class SortedArrayLowerBound(left: Expression, right: Expression)
extends BinaryExpression {
override def prettyName: String = "sorted_array_lower_bound"
override def dataType: IntegerType = IntegerType
override def nullable: Boolean = true
override def eval(input: InternalRow): Any = {
val arr = left.eval(input).asInstanceOf[ArrayData]
val value = right.eval(input)
val dt = right.dataType
val n = arr.numElements()
if (n > 0) {
if (ordering.lteq(value, arr.get(0, dt))) {
return 1
}
if (ordering.lteq(value, arr.get(n - 1, dt))) {
val (_, index) = SortedArrayUtils.binarySearch(arr, dt, ordering, 0, n, value)
return index + 1
}
}
null
}
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val leftGen = left.genCode(ctx)
val arr = leftGen.value
val rightGen = right.genCode(ctx)
val value = rightGen.value
val dt = right.dataType
val n = ctx.freshName("n")
val firstValueInArr = CodeGenerator.getValue(arr, dt, "0")
val lastValueInArr = CodeGenerator.getValue(arr, dt, s"$n - 1")
val binarySearch = SortedArrayUtils.binarySearchCodeGen(ctx, dt)
val resultCode =
s"""
|int $n = $arr.numElements();
|if ($n > 0) {
| if (!(${ctx.genGreater(dt, value, firstValueInArr)})) {
| ${ev.isNull} = false;
| ${ev.value} = 1;
| } else if (!(${ctx.genGreater(dt, value, lastValueInArr)})) {
| ${ev.isNull} = false;
| ${ev.value} = $binarySearch($arr, 0, $n, $value).index() + 1;
| }
|}
""".stripMargin
ev.copy(code = code"""
${leftGen.code}
${rightGen.code}
boolean ${ev.isNull} = true;
int ${ev.value} = 0;
$resultCode""")
}
@transient private lazy val ordering: Ordering[Any] =
TypeUtils.getInterpretedOrdering(right.dataType)
}

Просмотреть файл

@ -0,0 +1,88 @@
/*
* Copyright (2021) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.hyperspace.index.dataskipping.util
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator}
import org.apache.spark.sql.catalyst.util.ArrayData
import org.apache.spark.sql.types.DataType
private[dataskipping] object SortedArrayUtils {
/**
* Returns true and the index of the element in the array if the value is
* contained in the slice [start, end) of the sorted array.
*
* If the value is not found, then false and the index of the first element
* which is greater than the value are returned.
*
* Preconditions (unchecked):
* - The array must not contain nulls and duplicate elements.
* - The value to compare the elements to must not be null.
*/
def binarySearch(
arr: ArrayData,
dataType: DataType,
ordering: Ordering[Any],
start: Int,
end: Int,
value: Any): (Boolean, Int) = {
var lb = start
var ub = end
while (lb < ub) {
val i = (lb + ub) / 2
val u = arr.get(i, dataType)
val cmp = ordering.compare(value, u)
if (cmp == 0) {
return (true, i)
} else if (cmp < 0) {
ub = i
} else {
lb = i + 1
}
}
(false, lb)
}
def binarySearchCodeGen(ctx: CodegenContext, dataType: DataType): String = {
val javaType = CodeGenerator.javaType(dataType)
val resultType = BinarySearchResult.getClass.getCanonicalName.stripSuffix("$")
val funcName = ctx.freshName("binarySearch")
val funcDef =
s"""
|private $resultType $funcName(ArrayData arr, int start, int end, $javaType value) {
| int lb = start;
| int ub = end;
| while (lb < ub) {
| int i = (lb + ub) / 2;
| $javaType u = ${CodeGenerator.getValue("arr", dataType, "i")};
| int cmp = ${ctx.genComp(dataType, "value", "u")};
| if (cmp == 0) {
| return new $resultType(true, i);
| } else if (cmp < 0) {
| ub = i;
| } else {
| lb = i + 1;
| }
| }
| return new $resultType(false, lb);
|}
""".stripMargin
ctx.addNewFunction(funcName, funcDef)
}
case class BinarySearchResult(found: Boolean, index: Int)
}

Просмотреть файл

@ -0,0 +1,199 @@
/*
* Copyright (2021) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.hyperspace.index.dataskipping.util
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types.{BooleanType, DataType}
// Value extractors returning Option[Literal] check if the value is not null
// because we're only interested in non-null values.
//
// Also note that we don't go overboard to match every pattern because
// we assume that Catalyst optimizer will give us an optimized predicate in NNF.
// It means in general we don't have to deal with Not, or worry about
// foldable expressions because they will be optimized to literals.
//
// There are some differences between Spark versions and that's why we include
// some patterns that are no longer needed in newer Spark versions.
/**
* Extracts the non-null literal value in the predicate if it's equivalent to
* <expr> = <literal>.
*
* For the purpose of data skipping, we don't extract the value if it's null.
* If the literal is not null, then the only way to make the predicate
* <expr> = <literal> or <expr> <=> <literal> is when the expression is not null
* and its value is equal to the literal value.
*/
case class EqualToExtractor(matcher: ExprMatcher) {
def unapply(p: Expression): Option[Literal] =
p match {
case EqualTo(e, v: Literal) if v.value != null && matcher(e) => Some(v)
case EqualTo(v: Literal, e) if v.value != null && matcher(e) => Some(v)
case EqualNullSafe(e, v: Literal) if v.value != null && matcher(e) => Some(v)
case EqualNullSafe(v: Literal, e) if v.value != null && matcher(e) => Some(v)
case _ => None
}
}
/**
* Extracts the non-null literal value in the predicate if it's equivalent to
* <expr> < <literal>.
*/
case class LessThanExtractor(matcher: ExprMatcher) {
def unapply(p: Expression): Option[Literal] =
p match {
case LessThan(e, v: Literal) if v.value != null && matcher(e) => Some(v)
case GreaterThan(v: Literal, e) if v.value != null && matcher(e) => Some(v)
case _ => None
}
}
/**
* Extracts the non-null literal value in the predicate if it's equivalent to
* <expr> <= <literal>.
*/
case class LessThanOrEqualToExtractor(matcher: ExprMatcher) {
def unapply(p: Expression): Option[Literal] =
p match {
case LessThanOrEqual(e, v: Literal) if v.value != null && matcher(e) => Some(v)
case GreaterThanOrEqual(v: Literal, e) if v.value != null && matcher(e) => Some(v)
case _ => None
}
}
/**
* Extracts the non-null literal value in the predicate if it's equivalent to
* <expr> > <literal>.
*/
case class GreaterThanExtractor(matcher: ExprMatcher) {
def unapply(p: Expression): Option[Literal] =
p match {
case GreaterThan(e, v: Literal) if v.value != null && matcher(e) => Some(v)
case LessThan(v: Literal, e) if v.value != null && matcher(e) => Some(v)
case _ => None
}
}
/**
* Extracts the non-null literal value in the predicate if it's equivalent to
* <expr> >= <literal>.
*/
case class GreaterThanOrEqualToExtractor(matcher: ExprMatcher) {
def unapply(p: Expression): Option[Literal] =
p match {
case GreaterThanOrEqual(e, v: Literal) if v.value != null && matcher(e) => Some(v)
case LessThanOrEqual(v: Literal, e) if v.value != null && matcher(e) => Some(v)
case _ => None
}
}
/**
* Matches the predicate if it's equivalent to <expr> IS NULL.
*/
case class IsNullExtractor(matcher: ExprMatcher) {
def unapply(p: Expression): Boolean =
p match {
case IsNull(e) if matcher(e) => true
case EqualNullSafe(e, v: Literal) if v.value == null && matcher(e) => true
case EqualNullSafe(v: Literal, e) if v.value == null && matcher(e) => true
case _ => false
}
}
/**
* Matches the predicate if it's equivalent to <expr> IS NOT NULL.
*
* Not(IsNull(<expr>)) is also matched because it can be in the predicate in
* Spark 2.4. Since Spark 3.0, this is optimized to IsNotNull(<expr>).
*/
case class IsNotNullExtractor(matcher: ExprMatcher) {
def unapply(p: Expression): Boolean =
p match {
case IsNotNull(e) if matcher(e) => true
case Not(IsNull(e)) if matcher(e) => true // for Spark 2.4
case Not(EqualNullSafe(e, v: Literal)) if v.value == null && matcher(e) => true
case Not(EqualNullSafe(v: Literal, e)) if v.value == null && matcher(e) => true
case _ => false
}
}
/**
* Matches the predicate if it's equivalent to <expr> = true.
*
* Note that boolean expressions can be a predicate on their own, not needing
* EqualTo with true. To avoid false matches, we check that the type of the
* expression is BooleanType, although it's not strictly necessary because our
* predicate conversion does not go down the predicate tree unless it's And/Or
* and Spark has already checked the expression is Boolean if it's a direct
* child of And/Or.
*/
case class IsTrueExtractor(matcher: ExprMatcher) {
def unapply(p: Expression): Boolean = {
val EqualTo = EqualToExtractor(matcher)
p.dataType == BooleanType && (p match {
case EqualTo(Literal(true, BooleanType)) => true
case e if matcher(e) => true
case _ => false
})
}
}
/**
* Matches the predicate if it's equivalent to <expr> = false.
*/
case class IsFalseExtractor(matcher: ExprMatcher) {
def unapply(p: Expression): Boolean = {
val EqualTo = EqualToExtractor(matcher)
p.dataType == BooleanType && (p match {
case EqualTo(Literal(false, BooleanType)) => true
case Not(e) if matcher(e) => true
case _ => false
})
}
}
/**
* Extracts non-null values in the predicate if it's equivalent to
* <expr> IN (<lit>*).
*
* In Spark, In is created if the number of values in the list does not exceed
* spark.sql.optimizer.inSetConversionThreshold.
*/
case class InExtractor(matcher: ExprMatcher) {
def unapply(p: Expression): Option[(Seq[Literal])] =
p match {
case In(e, vs) if vs.nonEmpty && vs.forall(v => v.isInstanceOf[Literal]) && matcher(e) =>
Some(vs.map(_.asInstanceOf[Literal]).filter(_.value != null))
case _ => None
}
}
/**
* Extracts non-null values in the predicate if it's equivalent to
* <expr> IN (<lit>*).
*
* InSet is created instead of In if the list size is larger than
* spark.sql.optimizer.inSetConversionThreshold.
*/
case class InSetExtractor(matcher: ExprMatcher) {
def unapply(p: Expression): Option[(Set[Any])] =
p match {
case InSet(e, vs) if matcher(e) => Some(vs)
case _ => None
}
}

Просмотреть файл

@ -0,0 +1,26 @@
/*
* Copyright (2021) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.hyperspace.index.dataskipping
import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
import org.apache.spark.sql.types.{ArrayType, DataType}
trait ArrayTestUtils {
def createArray(values: Seq[Any], dataType: DataType): Expression = {
Literal.create(values, ArrayType(dataType, containsNull = false))
}
}

Просмотреть файл

@ -0,0 +1,88 @@
/*
* Copyright (2021) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.hyperspace.index.dataskipping.util
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import com.microsoft.hyperspace.index.HyperspaceSuite
class ArrayUtilsTest extends HyperspaceSuite {
test("toArray returns Array[Boolean].") {
assert(
ArrayUtils
.toArray(Seq(true, false), BooleanType)
.asInstanceOf[Array[Boolean]]
.sameElements(Array(true, false)))
}
test("toArray returns Array[Byte].") {
assert(
ArrayUtils
.toArray(Seq(0, 1, 2, 10).map(_.toByte), ByteType)
.asInstanceOf[Array[Byte]]
.sameElements(Array[Byte](0, 1, 2, 10)))
}
test("toArray returns Array[Short].") {
assert(
ArrayUtils
.toArray(Seq(0, 1, 2, 10).map(_.toShort), ShortType)
.asInstanceOf[Array[Short]]
.sameElements(Array[Short](0, 1, 2, 10)))
}
test("toArray returns Array[Int].") {
assert(
ArrayUtils
.toArray(Seq(0, 1, 2, 10), IntegerType)
.asInstanceOf[Array[Int]]
.sameElements(Array(0, 1, 2, 10)))
}
test("toArray returns Array[Long].") {
assert(
ArrayUtils
.toArray(Seq(0L, 1L, 2L, 10L), LongType)
.asInstanceOf[Array[Long]]
.sameElements(Array(0L, 1L, 2L, 10L)))
}
test("toArray returns Array[Float].") {
assert(
ArrayUtils
.toArray(Seq(0.0f, 1.0f, 2.0f, 10.0f), FloatType)
.asInstanceOf[Array[Float]]
.sameElements(Array(0.0f, 1.0f, 2.0f, 10.0f)))
}
test("toArray returns Array[Double].") {
assert(
ArrayUtils
.toArray(Seq(0.0, 1.0, 2.0, 10.0), DoubleType)
.asInstanceOf[Array[Double]]
.sameElements(Array(0.0, 1.0, 2.0, 10.0)))
}
test("toArray returns Array[Any] for non-primitive types.") {
assert(
ArrayUtils
.toArray(Seq(UTF8String.fromString("foo"), UTF8String.fromString("bar")), StringType)
.asInstanceOf[Array[Any]]
.sameElements(Array(UTF8String.fromString("foo"), UTF8String.fromString("bar"))))
}
}

Просмотреть файл

@ -0,0 +1,89 @@
/*
* Copyright (2021) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.hyperspace.index.dataskipping.util
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types._
import com.microsoft.hyperspace.index.HyperspaceSuite
class ExpressionUtilsTest extends HyperspaceSuite {
import ExpressionUtils._
test("normalize removes ExprId and qualifiers for AttributeReference.") {
val expr = AttributeReference("A", IntegerType)(ExprId(42), Seq("t"))
val expected = AttributeReference("A", IntegerType)(ExpressionUtils.nullExprId, Nil)
assert(ExpressionUtils.normalize(expr) === expected)
}
test("normalize removes name for GetStructField.") {
val structType = StructType(StructField("b", IntegerType) :: Nil)
val expr = GetStructField(Literal(null, structType), 0, Some("b"))
val expected = GetStructField(Literal(null, structType), 0)
assert(ExpressionUtils.normalize(expr) === expected)
}
test("ExtractIsNullDisjunction matches IsNull.") {
val expr = IsNull(Literal(null))
val args = expr match {
case ExtractIsNullDisjunction(args) => args
}
assert(args === Seq(Literal(null)))
}
test("ExtractIsNullDisjunction matches Or(IsNull, IsNull).") {
val expr = Or(IsNull(Literal(null)), IsNull(Literal(42)))
val args = expr match {
case ExtractIsNullDisjunction(args) => args
}
assert(args === Seq(Literal(null), Literal(42)))
}
test("ExtractIsNullDisjunction matches Or(IsNull, Or(IsNull, IsNull)).") {
val expr = Or(IsNull(Literal(null)), Or(IsNull(Literal(42)), IsNull(Literal(23))))
val args = expr match {
case ExtractIsNullDisjunction(args) => args
}
assert(args === Seq(Literal(null), Literal(42), Literal(23)))
}
test("ExtractIsNullDisjunction does not match other expressions.") {
val expr = IsNotNull(Literal(null))
val args = expr match {
case ExtractIsNullDisjunction(args) => args
case _ => Nil
}
assert(args === Nil)
}
test("ExtractKnownNotNullArgs matches Seq(KnownNotNull*).") {
val exprs = Seq(KnownNotNull(Literal(1)), KnownNotNull(Literal(42)))
val args = exprs match {
case ExtractKnownNotNullArgs(args) => args
}
assert(args === Seq(Literal(1), Literal(42)))
}
test("ExtractKnownNotNullArgs does not match other expressions.") {
val exprs = Seq(KnownNotNull(Literal(1)), Literal(42))
val args = exprs match {
case ExtractKnownNotNullArgs(args) => args
case _ => Nil
}
assert(args === Nil)
}
}

Просмотреть файл

@ -0,0 +1,946 @@
/*
* Copyright (2021) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.hyperspace.index.dataskipping.util
import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types.{BooleanType, IntegerType}
import org.mockito.Mockito.{mock, when}
import com.microsoft.hyperspace.index.HyperspaceSuite
class ExtractorsTest extends HyperspaceSuite {
val a = AttributeReference("A", IntegerType)()
val b = AttributeReference("B", BooleanType)()
val zero = Literal(0, IntegerType)
val one = Literal(1, IntegerType)
val two = Literal(2, IntegerType)
val trueLit = Literal(true, BooleanType)
val falseLit = Literal(false, BooleanType)
val nullInt = Literal(null, IntegerType)
val nullBool = Literal(null, BooleanType)
val aMatcher = {
val matcher = mock(classOf[ExprMatcher])
when(matcher.apply(a)).thenReturn(true)
matcher
}
val bMatcher = {
val matcher = mock(classOf[ExprMatcher])
when(matcher.apply(b)).thenReturn(true)
matcher
}
val nonMatcher = mock(classOf[ExprMatcher])
// EqualToExtractor
{
val AEqualTo = EqualToExtractor(aMatcher)
val NoneEqualTo = EqualToExtractor(nonMatcher)
test("EqualToExtractor matches EqualTo(<expr>, <lit>) if matcher(<expr>).") {
val value = EqualTo(a, zero) match {
case AEqualTo(value) => Some(value)
case _ => None
}
assert(value === Some(zero))
}
test("EqualToExtractor does not match EqualTo(<expr>, null).") {
val value = EqualTo(a, nullInt) match {
case AEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("EqualToExtractor does not match EqualTo(<expr>, <expr>).") {
val value = EqualTo(a, a) match {
case AEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("EqualToExtractor does not match EqualTo(<expr>, <lit>) if !matcher(<expr>).") {
val value = EqualTo(a, zero) match {
case NoneEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("EqualToExtractor matches EqualTo(<lit>, <expr>) if matcher(<expr>).") {
val value = EqualTo(zero, a) match {
case AEqualTo(value) => Some(value)
case _ => None
}
assert(value === Some(zero))
}
test("EqualToExtractor does not match EqualTo(null, <expr>).") {
val value = EqualTo(nullInt, a) match {
case AEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("EqualToExtractor does not match EqualTo(<lit>, <expr>) if !matcher(<expr>).") {
val value = EqualTo(zero, a) match {
case NoneEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("EqualToExtractor matches EqualNullSafe(<expr>, <lit>) if matcher(<expr>).") {
val value = EqualNullSafe(a, zero) match {
case AEqualTo(value) => Some(value)
case _ => None
}
assert(value === Some(zero))
}
test("EqualToExtractor does not match EqualNullSafe(<expr>, null).") {
val value = EqualNullSafe(a, nullInt) match {
case AEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("EqualToExtractor does not match EqualNullSafe(<expr>, <lit>) if !matcher(<expr>).") {
val value = EqualNullSafe(a, zero) match {
case NoneEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("EqualToExtractor matches EqualNullSafe(<lit>, <expr>) if matcher(<expr>).") {
val value = EqualNullSafe(zero, a) match {
case AEqualTo(value) => Some(value)
case _ => None
}
assert(value === Some(zero))
}
test("EqualToExtractor does not match EqualNullSafe(null, <expr>).") {
val value = EqualNullSafe(nullInt, a) match {
case AEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("EqualToExtractor does not match EqualNullSafe(<lit>, <expr>) if !matcher(<expr>).") {
val value = EqualNullSafe(zero, a) match {
case NoneEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("EqualToExtractor does not match expressions other than EqualTo/EqualNullSafe.") {
val value = zero match {
case AEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
}
// LessThanExtractor
{
val ALessThan = LessThanExtractor(aMatcher)
val NoneLessThan = LessThanExtractor(nonMatcher)
test("LessThanExtractor matches LessThan(<expr>, <lit>) if matcher(<expr>).") {
val value = LessThan(a, zero) match {
case ALessThan(value) => Some(value)
case _ => None
}
assert(value === Some(zero))
}
test("LessThanExtractor does not match LessThan(<expr>, null).") {
val value = LessThan(a, nullInt) match {
case ALessThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("LessThanExtractor does not match LessThan(<expr>, <expr>).") {
val value = LessThan(a, a) match {
case ALessThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("LessThanExtractor does not match LessThan(<expr>, <lit>) if !matcher(<expr>).") {
val value = LessThan(a, zero) match {
case NoneLessThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("LessThanExtractor does not match LessThan(<lit>, <expr>).") {
val value = LessThan(zero, a) match {
case ALessThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("LessThanExtractor matches GreaterThan(<lit>, <expr>) if matcher(<expr>).") {
val value = GreaterThan(zero, a) match {
case ALessThan(value) => Some(value)
case _ => None
}
assert(value === Some(zero))
}
test("LessThanExtractor does not match GreaterThan(null, <expr>).") {
val value = GreaterThan(nullInt, a) match {
case ALessThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("LessThanExtractor does not match GreaterThan(<lit>, <expr>) if !matcher(<expr>).") {
val value = GreaterThan(zero, a) match {
case NoneLessThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("LessThanExtractor does not match GreaterThan(<expr>, <lit>).") {
val value = GreaterThan(a, zero) match {
case ALessThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("LessThanExtractor does not match expressions other than LessThan.") {
val value = zero match {
case ALessThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
}
// LessThanOrEqualToExtractor
{
val ALessThanOrEqualTo = LessThanOrEqualToExtractor(aMatcher)
val NoneLessThanOrEqualTo = LessThanOrEqualToExtractor(nonMatcher)
test(
"LessThanOrEqualToExtractor matches LessThanOrEqual(<expr>, <lit>) if matcher(<expr>).") {
val value = LessThanOrEqual(a, zero) match {
case ALessThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === Some(zero))
}
test("LessThanOrEqualToExtractor does not match LessThanOrEqual(<expr>, null).") {
val value = LessThanOrEqual(a, nullInt) match {
case ALessThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("LessThanOrEqualToExtractor does not match LessThanOrEqual(<expr>, <expr>).") {
val value = LessThanOrEqual(a, a) match {
case ALessThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test(
"LessThanOrEqualToExtractor does not match LessThanOrEqual(<expr>, <lit>) " +
"if !matcher(<expr>).") {
val value = LessThanOrEqual(a, zero) match {
case NoneLessThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("LessThanOrEqualToExtractor does not match LessThanOrEqual(<lit>, <expr>).") {
val value = LessThanOrEqual(zero, a) match {
case ALessThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test(
"LessThanOrEqualToExtractor matches GreaterThanOrEqual(<lit>, <expr>) if matcher(<expr>).") {
val value = GreaterThanOrEqual(zero, a) match {
case ALessThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === Some(zero))
}
test("LessThanOrEqualToExtractor does not match GreaterThanOrEqual(null, <expr>).") {
val value = GreaterThanOrEqual(nullInt, a) match {
case ALessThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test(
"LessThanOrEqualToExtractor does not match GreaterThanOrEqual(<lit>, <expr>) " +
"if !matcher(<expr>).") {
val value = GreaterThanOrEqual(zero, a) match {
case NoneLessThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("LessThanOrEqualToExtractor does not match GreaterThanOrEqual(<expr>, <lit>).") {
val value = GreaterThanOrEqual(a, zero) match {
case ALessThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("LessThanOrEqualToExtractor does not match expressions other than LessThanOrEqual.") {
val value = zero match {
case ALessThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
}
// GreaterThanExtractor
{
val AGreaterThan = GreaterThanExtractor(aMatcher)
val NoneGreaterThan = GreaterThanExtractor(nonMatcher)
test("GreaterThanExtractor matches GreaterThan(<expr>, <lit>) if matcher(<expr>).") {
val value = GreaterThan(a, zero) match {
case AGreaterThan(value) => Some(value)
case _ => None
}
assert(value === Some(zero))
}
test("GreaterThanExtractor does not match GreaterThan(<expr>, null).") {
val value = GreaterThan(a, nullInt) match {
case AGreaterThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("GreaterThanExtractor does not match GreaterThan(<expr>, <expr>).") {
val value = GreaterThan(a, a) match {
case AGreaterThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("GreaterThanExtractor does not match GreaterThan(<expr>, <lit>) if !matcher(<expr>).") {
val value = GreaterThan(a, zero) match {
case NoneGreaterThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("GreaterThanExtractor does not match GreaterThan(<lit>, <expr>).") {
val value = GreaterThan(zero, a) match {
case AGreaterThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("GreaterThanExtractor matches LessThan(<lit>, <expr>) if matcher(<expr>).") {
val value = LessThan(zero, a) match {
case AGreaterThan(value) => Some(value)
case _ => None
}
assert(value === Some(zero))
}
test("GreaterThanExtractor does not match LessThan(null, <expr>).") {
val value = LessThan(nullInt, a) match {
case AGreaterThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("GreaterThanExtractor does not match LessThan(<lit>, <expr>) if !matcher(<expr>).") {
val value = LessThan(zero, a) match {
case NoneGreaterThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("GreaterThanExtractor does not match LessThan(<expr>, <lit>).") {
val value = LessThan(a, zero) match {
case AGreaterThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("GreaterThanExtractor does not match expressions other than GreaterThan.") {
val value = zero match {
case AGreaterThan(value) => Some(value)
case _ => None
}
assert(value === None)
}
}
// GreaterThanOrEqualToExtractor
{
val AGreaterThanOrEqualTo = GreaterThanOrEqualToExtractor(aMatcher)
val NoneGreaterThanOrEqualTo = GreaterThanOrEqualToExtractor(nonMatcher)
test(
"GreaterThanOrEqualToExtractor matches GreaterThanOrEqual(<expr>, <lit>) " +
"if matcher(<expr>).") {
val value = GreaterThanOrEqual(a, zero) match {
case AGreaterThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === Some(zero))
}
test("GreaterThanOrEqualToExtractor does not match GreaterThanOrEqual(<expr>, null).") {
val value = GreaterThanOrEqual(a, nullInt) match {
case AGreaterThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("GreaterThanOrEqualToExtractor does not match GreaterThanOrEqual(<expr>, <expr>).") {
val value = GreaterThanOrEqual(a, a) match {
case AGreaterThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test(
"GreaterThanOrEqualToExtractor does not match GreaterThanOrEqual(<expr>, <lit>) " +
"if !matcher(<expr>).") {
val value = GreaterThanOrEqual(a, zero) match {
case NoneGreaterThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("GreaterThanOrEqualToExtractor does not match GreaterThanOrEqual(<lit>, <expr>).") {
val value = GreaterThanOrEqual(zero, a) match {
case AGreaterThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test(
"GreaterThanOrEqualToExtractor matches LessThanOrEqual(<lit>, <expr>) if matcher(<expr>).") {
val value = LessThanOrEqual(zero, a) match {
case AGreaterThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === Some(zero))
}
test("GreaterThanOrEqualToExtractor does not match LessThanOrEqual(null, <expr>).") {
val value = LessThanOrEqual(nullInt, a) match {
case AGreaterThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test(
"GreaterThanOrEqualToExtractor does not match LessThanOrEqual(<lit>, <expr>) " +
"if !matcher(<expr>).") {
val value = LessThanOrEqual(zero, a) match {
case NoneGreaterThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test("GreaterThanOrEqualToExtractor does not match LessThanOrEqual(<expr>, <lit>).") {
val value = LessThanOrEqual(a, zero) match {
case AGreaterThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
test(
"GreaterThanOrEqualToExtractor does not match expressions other than GreaterThanOrEqual.") {
val value = zero match {
case AGreaterThanOrEqualTo(value) => Some(value)
case _ => None
}
assert(value === None)
}
}
// IsNullExtractor
{
val AIsNull = IsNullExtractor(aMatcher)
val NoneIsNull = IsNullExtractor(nonMatcher)
test("IsNullExtractor matches IsNull(<expr>) if matcher(<expr>).") {
val value = IsNull(a) match {
case AIsNull() => true
case _ => false
}
assert(value)
}
test("IsNullExtractor does not match IsNull(<expr>) if !matcher(<expr>).") {
val value = IsNull(a) match {
case NoneIsNull() => true
case _ => false
}
assert(!value)
}
test("IsNullExtractor matches EqualNullSafe(<expr>, null).") {
val value = EqualNullSafe(a, nullInt) match {
case AIsNull() => true
case _ => false
}
assert(value)
}
test("IsNullExtractor does not match EqualNullSafe(<expr>, <lit>).") {
val value = EqualNullSafe(a, zero) match {
case AIsNull() => true
case _ => false
}
assert(!value)
}
test("IsNullExtractor matches EqualNullSafe(null, <expr>).") {
val value = EqualNullSafe(nullInt, a) match {
case AIsNull() => true
case _ => false
}
assert(value)
}
test("IsNullExtractor does not match EqualNullSafe(<lit>, <expr>).") {
val value = EqualNullSafe(zero, a) match {
case AIsNull() => true
case _ => false
}
assert(!value)
}
test("IsNullExtractor does not match expressions other than IsNull/EqualNullSafe.") {
val value = zero match {
case AIsNull() => true
case _ => false
}
assert(!value)
}
}
// IsNotNullExtractor
{
val AIsNotNull = IsNotNullExtractor(aMatcher)
val NoneIsNotNull = IsNotNullExtractor(nonMatcher)
test("IsNotNullExtractor matches IsNotNull(<expr>) if matcher(<expr>).") {
val value = IsNotNull(a) match {
case AIsNotNull() => true
case _ => false
}
assert(value)
}
test("IsNotNullExtractor does not match IsNotNull(<expr>) if !matcher(<expr>).") {
val value = IsNotNull(a) match {
case NoneIsNotNull() => true
case _ => false
}
assert(!value)
}
test("IsNotNullExtractor matches Not(IsNull(<expr>)) if matcher(<expr>).") {
val value = Not(IsNull(a)) match {
case AIsNotNull() => true
case _ => false
}
assert(value)
}
test("IsNotNullExtractor matches Not(IsNull(<expr>)) if !matcher(<expr>).") {
val value = Not(IsNull(a)) match {
case NoneIsNotNull() => true
case _ => false
}
assert(!value)
}
test("IsNotNullExtractor matches Not(EqualNullSafe(<expr>, null)).") {
val value = Not(EqualNullSafe(a, nullInt)) match {
case AIsNotNull() => true
case _ => false
}
assert(value)
}
test("IsNotNullExtractor does not match Not(EqualNullSafe(<expr>, <lit>)).") {
val value = Not(EqualNullSafe(a, zero)) match {
case AIsNotNull() => true
case _ => false
}
assert(!value)
}
test("IsNotNullExtractor matches Not(EqualNullSafe(null, <expr>)).") {
val value = Not(EqualNullSafe(nullInt, a)) match {
case AIsNotNull() => true
case _ => false
}
assert(value)
}
test("IsNotNullExtractor does not match Not(EqualNullSafe(<lit>, <expr>)).") {
val value = Not(EqualNullSafe(zero, a)) match {
case AIsNotNull() => true
case _ => false
}
assert(!value)
}
test(
"IsNotNullExtractor does not match expressions other than IsNotNull/Not(EqualNullSafe).") {
val value = zero match {
case AIsNotNull() => true
case _ => false
}
assert(!value)
}
}
// IsTrueExtractor
{
val AIsTrue = IsTrueExtractor(aMatcher)
val BIsTrue = IsTrueExtractor(bMatcher)
val NoneIsTrue = IsTrueExtractor(nonMatcher)
test("IsTrueExtractor matches <expr> if matcher(<expr>).") {
val value = b match {
case BIsTrue() => true
case _ => false
}
assert(value)
}
test("IsTrueExtractor does not match <expr> if !matcher(<expr>).") {
val value = b match {
case NoneIsTrue() => true
case _ => false
}
assert(!value)
}
test("IsTrueExtractor does not match <expr> if type is not boolean.") {
val value = a match {
case AIsTrue() => true
case _ => false
}
assert(!value)
}
test("IsTrueExtractor matches EqualTo(<expr>, true) if matcher(<expr>).") {
val value = EqualTo(b, trueLit) match {
case BIsTrue() => true
case _ => false
}
assert(value)
}
test("IsTrueExtractor does not match EqualTo(<expr>, false).") {
val value = EqualTo(b, falseLit) match {
case BIsTrue() => true
case _ => false
}
assert(!value)
}
test("IsTrueExtractor matches EqualTo(true, <expr>) if matcher(<expr>).") {
val value = EqualTo(trueLit, b) match {
case BIsTrue() => true
case _ => false
}
assert(value)
}
test("IsTrueExtractor does not match EqualTo(false, <expr>).") {
val value = EqualTo(falseLit, b) match {
case BIsTrue() => true
case _ => false
}
assert(!value)
}
test("IsTrueExtractor matches EqualNullSafe(<expr>, true) if matcher(<expr>).") {
val value = EqualNullSafe(b, trueLit) match {
case BIsTrue() => true
case _ => false
}
assert(value)
}
test("IsTrueExtractor does not match EqualNullSafe(<expr>, false).") {
val value = EqualNullSafe(b, falseLit) match {
case BIsTrue() => true
case _ => false
}
assert(!value)
}
test("IsTrueExtractor matches EqualNullSafe(true, <expr>) if matcher(<expr>).") {
val value = EqualNullSafe(trueLit, b) match {
case BIsTrue() => true
case _ => false
}
assert(value)
}
test("IsTrueExtractor does not match EqualNullSafe(false, <expr>).") {
val value = EqualNullSafe(falseLit, b) match {
case BIsTrue() => true
case _ => false
}
assert(!value)
}
test("IsTrueExtractor does not match <lit>.") {
val value = zero match {
case BIsTrue() => true
case _ => false
}
assert(!value)
}
}
// IsFalseExtractor
{
val BIsFalse = IsFalseExtractor(bMatcher)
val NoneIsFalse = IsFalseExtractor(nonMatcher)
test("IsFalseExtractor matches Not(<expr>) if matcher(<expr>).") {
val value = Not(b) match {
case BIsFalse() => true
case _ => false
}
assert(value)
}
test("IsFalseExtractor does not match Not(<expr>) if !matcher(<expr>).") {
val value = Not(b) match {
case NoneIsFalse() => true
case _ => false
}
assert(!value)
}
test("IsFalseExtractor matches EqualTo(<expr>, false) if matcher(<expr>).") {
val value = EqualTo(b, falseLit) match {
case BIsFalse() => true
case _ => false
}
assert(value)
}
test("IsFalseExtractor does not match EqualTo(<expr>, true).") {
val value = EqualTo(b, trueLit) match {
case BIsFalse() => true
case _ => false
}
assert(!value)
}
test("IsFalseExtractor matches EqualTo(false, <expr>) if matcher(<expr>).") {
val value = EqualTo(falseLit, b) match {
case BIsFalse() => true
case _ => false
}
assert(value)
}
test("IsFalseExtractor does not match EqualTo(true, <expr>).") {
val value = EqualTo(trueLit, b) match {
case BIsFalse() => true
case _ => false
}
assert(!value)
}
test("IsFalseExtractor matches EqualNullSafe(<expr>, false) if matcher(<expr>).") {
val value = EqualNullSafe(b, falseLit) match {
case BIsFalse() => true
case _ => false
}
assert(value)
}
test("IsFalseExtractor does not match EqualNullSafe(<expr>, true).") {
val value = EqualNullSafe(b, trueLit) match {
case BIsFalse() => true
case _ => false
}
assert(!value)
}
test("IsFalseExtractor matches EqualNullSafe(false, <expr>) if matcher(<expr>).") {
val value = EqualNullSafe(falseLit, b) match {
case BIsFalse() => true
case _ => false
}
assert(value)
}
test("IsFalseExtractor does not match EqualNullSafe(true, <expr>).") {
val value = EqualNullSafe(trueLit, b) match {
case BIsFalse() => true
case _ => false
}
assert(!value)
}
test("IsFalseExtractor does not match <lit>.") {
val value = zero match {
case BIsFalse() => true
case _ => false
}
assert(!value)
}
}
// InExtractor
{
val AIn = InExtractor(aMatcher)
val NoneIn = InExtractor(nonMatcher)
test("InExtractor matches In(<expr>, <lit>*) if matcher(<expr>).") {
val value = In(a, Seq(zero, one, two)) match {
case AIn(values) => Some(values)
case _ => None
}
assert(value === Some(Seq(zero, one, two)))
}
test("InExtractor matches In(<expr>, <lit>*) if !matcher(<expr>).") {
val value = In(a, Seq(zero, one, two)) match {
case NoneIn(values) => Some(values)
case _ => None
}
assert(value === None)
}
test("InExtractor does not match In(<expr>, <lit>*) if <lit>* is empty.") {
val value = In(a, Nil) match {
case AIn(values) => Some(values)
case _ => None
}
assert(value === None)
}
test("InExtractor matches In(<expr>, <lit>*) if some <lit>s are null.") {
val value = In(a, Seq(zero, nullInt, nullInt, two)) match {
case AIn(values) => Some(values)
case _ => None
}
assert(value === Some(Seq(zero, two)))
}
test("InExtractor does not match In(<expr>, <expr>*).") {
val value = In(a, Seq(zero, two, a)) match {
case AIn(values) => Some(values)
case _ => None
}
assert(value === None)
}
test("InExtractor does not match other than In.") {
val value = a match {
case AIn(values) => Some(values)
case _ => None
}
assert(value === None)
}
}
// InSetExtractor
{
val AInSet = InSetExtractor(aMatcher)
val NoneInSet = InSetExtractor(nonMatcher)
test("InSetExtractor matches InSet(<expr>, <set>) if matcher(<expr>).") {
val value = InSet(a, Set(0, 1, 2)) match {
case AInSet(values) => Some(values)
case _ => None
}
assert(value === Some(Set(0, 1, 2)))
}
test("InSetExtractor matches InSet(<expr>, <lit>*) if !matcher(<expr>).") {
val value = InSet(a, Set(0, 1, 2)) match {
case NoneInSet(values) => Some(values)
case _ => None
}
assert(value === None)
}
test("InSetExtractor does not match other than InSet.") {
val value = a match {
case AInSet(values) => Some(values)
case _ => None
}
assert(value === None)
}
}
}

Просмотреть файл

@ -0,0 +1,36 @@
/*
* Copyright (2021) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.hyperspace.index.dataskipping.util
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.types._
import com.microsoft.hyperspace.index.HyperspaceSuite
class NormalizedExprMatcherTest extends HyperspaceSuite {
val matcher = NormalizedExprMatcher(
AttributeReference("A", IntegerType)(ExpressionUtils.nullExprId, Nil),
Map(ExprId(42) -> "A"))
test("apply returns true if the expression matches.") {
assert(matcher(AttributeReference("a", IntegerType)(ExprId(42), Nil)) === true)
}
test("apply returns false if the expression does not match") {
assert(matcher(Literal(42)) === false)
}
}

Просмотреть файл

@ -0,0 +1,87 @@
/*
* Copyright (2021) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.hyperspace.index.dataskipping.util
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.types._
import com.microsoft.hyperspace.index.HyperspaceSuite
import com.microsoft.hyperspace.index.dataskipping.ArrayTestUtils
class SortedArrayLowerBoundTest
extends HyperspaceSuite
with ArrayTestUtils
with ExpressionEvalHelper {
val sortedArrayLowerBound =
SortedArrayLowerBound(createArray(Seq(0), IntegerType), Literal.create(0, IntegerType))
test("prettyName returns \"sorted_array_lower_bound\"") {
assert(sortedArrayLowerBound.prettyName === "sorted_array_lower_bound")
}
test("dataType returns IntegerType") {
assert(sortedArrayLowerBound.dataType === IntegerType)
}
test("nullable returns true") {
assert(sortedArrayLowerBound.nullable === true)
}
def expr(dataType: DataType, arr: Seq[Any], value: Any): SortedArrayLowerBound = {
SortedArrayLowerBound(createArray(arr, dataType), Literal.create(value, dataType))
}
test("SortedArrayLowerBound returns null if the array is empty.") {
checkEvaluation(expr(IntegerType, Nil, 0), null)
}
test("SortedArrayLowerBound returns the index if the value is in the array.") {
checkEvaluation(expr(IntegerType, Seq(1), 1), 1)
checkEvaluation(expr(IntegerType, Seq(2), 2), 1)
checkEvaluation(expr(IntegerType, Seq(1, 3), 1), 1)
checkEvaluation(expr(IntegerType, Seq(1, 3), 3), 2)
checkEvaluation(expr(IntegerType, Seq(1, 3, 5), 1), 1)
checkEvaluation(expr(IntegerType, Seq(1, 3, 5), 3), 2)
checkEvaluation(expr(IntegerType, Seq(1, 3, 5), 5), 3)
checkEvaluation(expr(DoubleType, Seq(1.5, 3.0, 4.5), 1.5), 1)
checkEvaluation(expr(DoubleType, Seq(1.5, 3.0, 4.5), 3.0), 2)
checkEvaluation(expr(DoubleType, Seq(1.5, 3.0, 4.5), 4.5), 3)
checkEvaluation(expr(StringType, Seq("foo"), "foo"), 1)
checkEvaluation(expr(StringType, Seq("bar", "foo"), "bar"), 1)
checkEvaluation(expr(StringType, Seq("bar", "foo"), "foo"), 2)
}
test(
"SortedArrayLowerBound returns the index if the first value in the array " +
"which is not less than the value.") {
checkEvaluation(expr(IntegerType, Seq(1), 0), 1)
checkEvaluation(expr(IntegerType, Seq(1, 3), 0), 1)
checkEvaluation(expr(IntegerType, Seq(1, 3), 2), 2)
checkEvaluation(expr(IntegerType, Seq(1, 3, 5), 0), 1)
checkEvaluation(expr(IntegerType, Seq(1, 3, 5), 2), 2)
checkEvaluation(expr(IntegerType, Seq(1, 3, 5), 4), 3)
}
test(
"SortedArrayLowerBound returns null if the value is greater than " +
"the last value in the array.") {
checkEvaluation(expr(IntegerType, Seq(1), 2), null)
checkEvaluation(expr(IntegerType, Seq(1, 3), 4), null)
checkEvaluation(expr(IntegerType, Seq(1, 3, 5), 6), null)
}
}