Introduce scalastyle check derived from Apache Spark (#13)

This commit is contained in:
Terry Kim 2020-06-19 15:58:18 -07:00 коммит произвёл GitHub
Родитель 7b5d35cccb
Коммит 4a805dc920
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
47 изменённых файлов: 619 добавлений и 109 удалений

Просмотреть файл

@ -46,9 +46,24 @@ scalacOptions ++= Seq(
javaOptions += "-Xmx1024m"
/********************************
/*****************************
* ScalaStyle configurations *
*****************************/
scalastyleConfig := baseDirectory.value / "scalastyle-config.xml"
// Run as part of compile task.
lazy val compileScalastyle = taskKey[Unit]("compileScalastyle")
compileScalastyle := scalastyle.in(Compile).toTask("").value
(compile in Compile) := ((compile in Compile) dependsOn compileScalastyle).value
// Run as part of test task.
lazy val testScalastyle = taskKey[Unit]("testScalastyle")
testScalastyle := scalastyle.in(Test).toTask("").value
(test in Test) := ((test in Test) dependsOn testScalastyle).value
/***********************
* Test configurations *
********************************/
***********************/
// Tests cannot be run in parallel since mutiple Spark contexts cannot run in the same JVM.
parallelExecution in Test := false

Просмотреть файл

@ -17,3 +17,5 @@
addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.13")
addSbtPlugin("com.jsuereth" % "sbt-pgp" % "2.0.1")
addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")

427
scalastyle-config.xml Normal file
Просмотреть файл

@ -0,0 +1,427 @@
<!--
~ Licensed to the Apache Software Foundation (ASF) under one or more
~ contributor license agreements. See the NOTICE file distributed with
~ this work for additional information regarding copyright ownership.
~ The ASF licenses this file to You under the Apache License, Version 2.0
~ (the "License"); you may not use this file except in compliance with
~ the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!--
~ This file contains code from the Apache Spark project (original license above).
~It contains modifications, which are licensed as follows:
-->
<!--
~ Copyright (2020) The Hyperspace Project Authors.
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
<!--
If you wish to turn off checking for a section of code, you can put a comment in the source
before and after the section, with the following syntax:
// scalastyle:off
... // stuff that breaks the styles
// scalastyle:on
You can also disable only one rule, by specifying its rule id, as specified in:
http://www.scalastyle.org/rules-0.7.0.html
// scalastyle:off no.finalize
override def finalize(): Unit = ...
// scalastyle:on no.finalize
This file is divided into 3 sections:
(1) rules that we enforce.
(2) rules that we would like to enforce, but haven't cleaned up the codebase to turn on yet
(or we need to make the scalastyle rule more configurable).
(3) rules that we don't want to enforce.
-->
<scalastyle>
<name>Scalastyle standard configuration</name>
<!-- ================================================================================ -->
<!-- rules we enforce -->
<!-- ================================================================================ -->
<check level="error" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
<parameters>
<parameter name="regex">true</parameter>
<parameter name="header"><![CDATA[(?:\Q/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This file contains code from the Apache Spark project (original license above).
* It contains modifications, which are licensed as follows:
*/
\E)?\Q/*
* Copyright (2020) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/\E]]></parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
<check customId="filelinelengthchecker" level="error" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
<parameters>
<parameter name="maxLineLength"><![CDATA[100]]></parameter>
<parameter name="tabSize"><![CDATA[2]]></parameter>
<parameter name="ignoreImports">true</parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
<parameters><parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter></parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
<parameters><parameter name="regex"><![CDATA[(config|[A-Z][A-Za-z]*)]]></parameter></parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
<parameters><parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter></parameters>
</check>
<check customId="argcount" level="error" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
<parameters><parameter name="maxParameters"><![CDATA[10]]></parameter></parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
<parameters>
<parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
<parameter name="doubleLineAllowed"><![CDATA[true]]></parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
<check customId="nonascii" level="error" class="org.scalastyle.scalariform.NonASCIICharacterChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.scalariform.SpaceAfterCommentStartChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceBeforeTokenChecker" enabled="true">
<parameters>
<parameter name="tokens">ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
</parameters>
</check>
<check level="error" class="org.scalastyle.scalariform.EnsureSingleSpaceAfterTokenChecker" enabled="true">
<parameters>
<parameter name="tokens">ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW</parameter>
</parameters>
</check>
<!-- ??? usually shouldn't be checked into the code base. -->
<check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
<!-- As of SPARK-7558, all tests in Spark should extend o.a.s.SparkFunSuite instead of FunSuite directly -->
<check customId="funsuite" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
<parameters><parameter name="regex">^FunSuite[A-Za-z]*$</parameter></parameters>
<customMessage>Tests must extend org.apache.spark.SparkFunSuite instead.</customMessage>
</check>
<!-- As of SPARK-7977 all printlns need to be wrapped in '// scalastyle:off/on println' -->
<check customId="println" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
<parameters><parameter name="regex">^println$</parameter></parameters>
<customMessage><![CDATA[Are you sure you want to println? If yes, wrap the code block with
// scalastyle:off println
println(...)
// scalastyle:on println]]></customMessage>
</check>
<check customId="hadoopconfiguration" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">spark(.sqlContext)?.sparkContext.hadoopConfiguration</parameter></parameters>
<customMessage><![CDATA[
Are you sure that you want to use sparkContext.hadoopConfiguration? In most cases, you should use
spark.sessionState.newHadoopConf() instead, so that the hadoop configurations specified in Spark session
configuration will come into effect.
If you must use sparkContext.hadoopConfiguration, wrap the code block with
// scalastyle:off hadoopconfiguration
spark.sparkContext.hadoopConfiguration...
// scalastyle:on hadoopconfiguration
]]></customMessage>
</check>
<check customId="visiblefortesting" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">@VisibleForTesting</parameter></parameters>
<customMessage><![CDATA[
@VisibleForTesting causes classpath issues. Please note this in the java doc instead (SPARK-11615).
]]></customMessage>
</check>
<check customId="runtimeaddshutdownhook" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">Runtime\.getRuntime\.addShutdownHook</parameter></parameters>
<customMessage><![CDATA[
Are you sure that you want to use Runtime.getRuntime.addShutdownHook? In most cases, you should use
ShutdownHookManager.addShutdownHook instead.
If you must use Runtime.getRuntime.addShutdownHook, wrap the code block with
// scalastyle:off runtimeaddshutdownhook
Runtime.getRuntime.addShutdownHook(...)
// scalastyle:on runtimeaddshutdownhook
]]></customMessage>
</check>
<check customId="mutablesynchronizedbuffer" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">mutable\.SynchronizedBuffer</parameter></parameters>
<customMessage><![CDATA[
Are you sure that you want to use mutable.SynchronizedBuffer? In most cases, you should use
java.util.concurrent.ConcurrentLinkedQueue instead.
If you must use mutable.SynchronizedBuffer, wrap the code block with
// scalastyle:off mutablesynchronizedbuffer
mutable.SynchronizedBuffer[...]
// scalastyle:on mutablesynchronizedbuffer
]]></customMessage>
</check>
<check customId="classforname" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">Class\.forName</parameter></parameters>
<customMessage><![CDATA[
Are you sure that you want to use Class.forName? In most cases, you should use Utils.classForName instead.
If you must use Class.forName, wrap the code block with
// scalastyle:off classforname
Class.forName(...)
// scalastyle:on classforname
]]></customMessage>
</check>
<check customId="awaitresult" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">Await\.result</parameter></parameters>
<customMessage><![CDATA[
Are you sure that you want to use Await.result? In most cases, you should use ThreadUtils.awaitResult instead.
If you must use Await.result, wrap the code block with
// scalastyle:off awaitresult
Await.result(...)
// scalastyle:on awaitresult
]]></customMessage>
</check>
<check customId="awaitready" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">Await\.ready</parameter></parameters>
<customMessage><![CDATA[
Are you sure that you want to use Await.ready? In most cases, you should use ThreadUtils.awaitReady instead.
If you must use Await.ready, wrap the code block with
// scalastyle:off awaitready
Await.ready(...)
// scalastyle:on awaitready
]]></customMessage>
</check>
<check level="error" class="org.scalastyle.scalariform.ImportOrderChecker" enabled="true">
<parameters>
<parameter name="groups">java,scala,3rdParty,hyperspace</parameter>
<parameter name="group.java">javax?\..*</parameter>
<parameter name="group.scala">scala\..*</parameter>
<parameter name="group.3rdParty">(?!com\.microsoft\.hyperspace\.).*</parameter>
<parameter name="group.hyperspace">com\.microsoft\.hyperspace\..*</parameter>
</parameters>
</check>
<check customId="caselocale" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">(\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\)))</parameter></parameters>
<customMessage><![CDATA[
Are you sure that you want to use toUpperCase or toLowerCase without the root locale? In most cases, you
should use toUpperCase(Locale.ROOT) or toLowerCase(Locale.ROOT) instead.
If you must use toUpperCase or toLowerCase without the root locale, wrap the code block with
// scalastyle:off caselocale
.toUpperCase
.toLowerCase
// scalastyle:on caselocale
]]></customMessage>
</check>
<check customId="throwerror" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">throw new \w+Error\(</parameter></parameters>
<customMessage><![CDATA[
Are you sure that you want to throw Error? In most cases, you should use appropriate Exception instead.
If you must throw Error, wrap the code block with
// scalastyle:off throwerror
throw new XXXError(...)
// scalastyle:on throwerror
]]></customMessage>
</check>
<!-- As of SPARK-9613 JavaConversions should be replaced with JavaConverters -->
<check customId="javaconversions" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
<parameters><parameter name="regex">JavaConversions</parameter></parameters>
<customMessage>Instead of importing implicits in scala.collection.JavaConversions._, import
scala.collection.JavaConverters._ and use .asScala / .asJava methods</customMessage>
</check>
<check customId="commonslang2" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
<parameters><parameter name="regex">org\.apache\.commons\.lang\.</parameter></parameters>
<customMessage>Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
of Commons Lang 2 (package org.apache.commons.lang.*)</customMessage>
</check>
<check customId="extractopt" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
<parameters><parameter name="regex">extractOpt</parameter></parameters>
<customMessage>Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter
is slower. </customMessage>
</check>
<check level="error" class="org.scalastyle.scalariform.DisallowSpaceBeforeTokenChecker" enabled="true">
<parameters>
<parameter name="tokens">COMMA</parameter>
</parameters>
</check>
<!-- SPARK-3854: Single Space between ')' and '{' -->
<check customId="SingleSpaceBetweenRParenAndLCurlyBrace" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">\)\{</parameter></parameters>
<customMessage><![CDATA[
Single Space between ')' and `{`.
]]></customMessage>
</check>
<check customId="NoScalaDoc" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">(?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*]</parameter></parameters>
<customMessage>Use Javadoc style indentation for multiline comments</customMessage>
</check>
<check customId="OmitBracesInCase" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters><parameter name="regex">case[^\n>]*=>\s*\{</parameter></parameters>
<customMessage>Omit braces in case clauses.</customMessage>
</check>
<!-- SPARK-16877: Avoid Java annotations -->
<check level="error" class="org.scalastyle.scalariform.OverrideJavaChecker" enabled="true"></check>
<check level="error" class="org.scalastyle.scalariform.DeprecatedJavaChecker" enabled="true"></check>
<!-- ================================================================================ -->
<!-- rules we'd like to enforce, but haven't cleaned up the codebase yet -->
<!-- ================================================================================ -->
<!-- We cannot turn the following two on, because it'd fail a lot of string interpolation use cases. -->
<!-- Ideally the following two rules should be configurable to rule out string interpolation. -->
<check level="error" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="false"></check>
<check level="error" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="false"></check>
<!-- This breaks symbolic method names so we don't turn it on. -->
<!-- Maybe we should update it to allow basic symbolic names, and then we are good to go. -->
<check level="error" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="false">
<parameters>
<parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
</parameters>
</check>
<!-- Should turn this on, but we have a few places that need to be fixed first -->
<check level="error" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
<!-- ================================================================================ -->
<!-- rules we don't want -->
<!-- ================================================================================ -->
<check level="error" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="false">
<parameters><parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter></parameters>
</check>
<!-- We want the opposite of this: NewLineAtEofChecker -->
<check level="error" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
<!-- This one complains about all kinds of random things. Disable. -->
<check level="error" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="false"></check>
<!-- We use return quite a bit for control flows and guards -->
<check level="error" class="org.scalastyle.scalariform.ReturnChecker" enabled="false"></check>
<!-- We use null a lot in low level code and to interface with 3rd party code -->
<check level="error" class="org.scalastyle.scalariform.NullChecker" enabled="false"></check>
<!-- Doesn't seem super big deal here ... -->
<check level="error" class="org.scalastyle.scalariform.NoCloneChecker" enabled="false"></check>
<!-- Doesn't seem super big deal here ... -->
<check level="error" class="org.scalastyle.file.FileLengthChecker" enabled="false">
<parameters><parameter name="maxFileLength">800></parameter></parameters>
</check>
<!-- Doesn't seem super big deal here ... -->
<check level="error" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="false">
<parameters><parameter name="maxTypes">30</parameter></parameters>
</check>
<!-- Doesn't seem super big deal here ... -->
<check level="error" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="false">
<parameters><parameter name="maximum">10</parameter></parameters>
</check>
<!-- Doesn't seem super big deal here ... -->
<check level="error" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="false">
<parameters><parameter name="maxLength">50</parameter></parameters>
</check>
<!-- Not exactly feasible to enforce this right now. -->
<!-- It is also infrequent that somebody introduces a new class with a lot of methods. -->
<check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="false">
<parameters><parameter name="maxMethods"><![CDATA[30]]></parameter></parameters>
</check>
<!-- Doesn't seem super big deal here, and we have a lot of magic numbers ... -->
<check level="error" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="false">
<parameters><parameter name="ignore">-1,0,1,2,3</parameter></parameters>
</check>
</scalastyle>

Просмотреть файл

@ -16,8 +16,8 @@
package com.microsoft.hyperspace.actions
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.types.StructType
import com.microsoft.hyperspace.HyperspaceException
import com.microsoft.hyperspace.actions.Constants.States.{ACTIVE, CREATING, DOESNOTEXIST}

Просмотреть файл

@ -17,11 +17,11 @@
package com.microsoft.hyperspace.actions
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningAwareFileIndex}
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningAwareFileIndex}
import com.microsoft.hyperspace.index.DataFrameWriterExtensions.Bucketizer
import com.microsoft.hyperspace.index._
import com.microsoft.hyperspace.index.DataFrameWriterExtensions.Bucketizer
import com.microsoft.hyperspace.index.serde.LogicalPlanSerDeUtils
/**

Просмотреть файл

@ -16,8 +16,8 @@
package com.microsoft.hyperspace.actions
import org.apache.spark.sql.catalyst.encoders.RowEncoder
import org.apache.spark.sql.{Dataset, Row, SparkSession}
import org.apache.spark.sql.catalyst.encoders.RowEncoder
import com.microsoft.hyperspace.HyperspaceException
import com.microsoft.hyperspace.actions.Constants.States.{ACTIVE, REFRESHING}

Просмотреть файл

@ -17,8 +17,8 @@
package com.microsoft.hyperspace.index
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.internal.SQLConf
import com.microsoft.hyperspace.HyperspaceException
import com.microsoft.hyperspace.actions._

Просмотреть файл

@ -16,6 +16,8 @@
package com.microsoft.hyperspace.index
import java.util.Locale
/**
* IndexConfig specifies the configuration of an index.
*
@ -71,7 +73,7 @@ case class IndexConfig(
s"includedColumns: $includedColumnNames]"
}
private def toLowerCase(seq: Seq[String]): Seq[String] = seq.map(_.toLowerCase)
private def toLowerCase(seq: Seq[String]): Seq[String] = seq.map(_.toLowerCase(Locale.ROOT))
}
/**

Просмотреть файл

@ -51,7 +51,7 @@ class IndexDataManagerImpl(indexPath: Path) extends IndexDataManager {
* This method relies on the naming convention that directory name will be similar to hive
* partitioning scheme, i.e. "root/v__=value/f1.parquet" etc. Here the value represents the
* version id of the data.
**/
*/
override def getLatestVersionId(): Option[Int] = {
if (!fs.exists(indexPath)) {
return None

Просмотреть файл

@ -19,6 +19,7 @@ package com.microsoft.hyperspace.index
import scala.util.{Success, Try}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.util.hyperspace.Utils
/**
* This trait contains the interface that provides the signature of logical plan.
@ -52,7 +53,7 @@ object LogicalPlanSignatureProvider {
* @return signature provider.
*/
def create(name: String): LogicalPlanSignatureProvider = {
Try(Class.forName(name).newInstance) match {
Try(Utils.classForName(name).newInstance) match {
case Success(provider: LogicalPlanSignatureProvider) => provider
case _ =>
throw new IllegalArgumentException(

Просмотреть файл

@ -16,7 +16,7 @@
package com.microsoft.hyperspace.index
import java.util.NoSuchElementException
import java.util.{Locale, NoSuchElementException}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileStatus, Path}
@ -45,7 +45,10 @@ private[hyperspace] class PathResolver(conf: SQLConf) {
val indexNames = fs.listStatus(root)
indexNames
.collectFirst {
case s: FileStatus if s.getPath.getName.toLowerCase.equals(name.toLowerCase) =>
case s: FileStatus
if s.getPath.getName
.toLowerCase(Locale.ROOT)
.equals(name.toLowerCase(Locale.ROOT)) =>
s.getPath
}
.getOrElse(new Path(root, name))

Просмотреть файл

@ -19,14 +19,14 @@ package com.microsoft.hyperspace.index.plananalysis
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.execution._
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InMemoryFileIndex}
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.hyperspace.utils.DataFrameUtils
import org.apache.spark.sql.{DataFrame, SparkSession}
import com.microsoft.hyperspace.index.IndexConstants
import com.microsoft.hyperspace.{HyperspaceException, Implicits}
import com.microsoft.hyperspace.index.IndexConstants
/**
* Provides helper methods for explain API.

Просмотреть файл

@ -23,11 +23,11 @@ import org.apache.spark.internal.Logging
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.analysis.CleanupAliases
import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, AttributeReference, AttributeSet, EqualTo, Expression}
import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Project}
import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeReference, EqualTo, Expression}
import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InMemoryFileIndex, LogicalRelation}
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.types.StructType
import com.microsoft.hyperspace.Hyperspace

Просмотреть файл

@ -21,7 +21,7 @@ import java.util.Base64
import org.apache.hadoop.fs.Path
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.expressions.{Exists, InSubquery, ListQuery, ScalaUDF, ScalarSubquery}
import org.apache.spark.sql.catalyst.expressions.{Exists, InSubquery, ListQuery, ScalarSubquery, ScalaUDF}
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.execution.datasources._

Просмотреть файл

@ -19,7 +19,7 @@ package com.microsoft.hyperspace.index
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable}
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, ExprId, Expression, Predicate, Unevaluable}
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, ExprId, Predicate, Unevaluable}
import org.apache.spark.sql.catalyst.plans.logical.{BinaryNode, LeafNode, LogicalPlan}
import org.apache.spark.sql.execution.FileRelation
import org.apache.spark.sql.execution.datasources.{FileFormat, FileIndex, PartitionDirectory}

Просмотреть файл

@ -1,3 +1,19 @@
/*
* Copyright (2020) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.hyperspace.util
import org.apache.spark.sql.SparkSession

Просмотреть файл

@ -0,0 +1,26 @@
/*
* Copyright (2020) The Hyperspace Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.util.hyperspace
/**
* This class is used to expose package private methods from org.apache.spark.util.Utils.
*/
object Utils {
def classForName(className: String): Class[_] = {
org.apache.spark.util.Utils.classForName(className)
}
}

Просмотреть файл

@ -17,11 +17,12 @@
package com.microsoft.hyperspace
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite}
import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}
trait SparkInvolvedSuite extends BeforeAndAfterAll with BeforeAndAfter {
self: FunSuite =>
self: SparkFunSuite =>
override def suiteName: String = getClass.getSimpleName

Просмотреть файл

@ -16,14 +16,15 @@
package com.microsoft.hyperspace.actions
import org.apache.spark.SparkFunSuite
import org.mockito.ArgumentMatchers._
import org.mockito.Mockito._
import org.scalatest.{BeforeAndAfter, FunSuite}
import org.scalatest.BeforeAndAfter
import com.microsoft.hyperspace.SparkInvolvedSuite
import com.microsoft.hyperspace.index._
class ActionTest extends FunSuite with SparkInvolvedSuite with BeforeAndAfter {
class ActionTest extends SparkFunSuite with SparkInvolvedSuite with BeforeAndAfter {
var mockLogManager: IndexLogManager = _
var testObject: Action = _
val testLogEntry: LogEntry = TestLogEntry(Constants.States.DOESNOTEXIST)

Просмотреть файл

@ -16,15 +16,15 @@
package com.microsoft.hyperspace.actions
import org.apache.spark.SparkFunSuite
import org.mockito.ArgumentMatchers.anyInt
import org.mockito.Mockito.{mock, when}
import org.scalatest.FunSuite
import com.microsoft.hyperspace.SparkInvolvedSuite
import com.microsoft.hyperspace.actions.Constants.States._
import com.microsoft.hyperspace.index._
class CancelActionTest extends FunSuite with SparkInvolvedSuite {
class CancelActionTest extends SparkFunSuite with SparkInvolvedSuite {
private val mockLogManager: IndexLogManager = mock(classOf[IndexLogManager])
override def beforeAll(): Unit = {

Просмотреть файл

@ -17,16 +17,16 @@
package com.microsoft.hyperspace.actions
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.DataFrame
import org.mockito.Mockito._
import org.scalatest.FunSuite
import com.microsoft.hyperspace.{HyperspaceException, SampleData, SparkInvolvedSuite}
import com.microsoft.hyperspace.actions.Constants.States._
import com.microsoft.hyperspace.index._
import com.microsoft.hyperspace.util.FileUtils
import com.microsoft.hyperspace.{HyperspaceException, SampleData, SparkInvolvedSuite}
class CreateActionTest extends FunSuite with SparkInvolvedSuite {
class CreateActionTest extends SparkFunSuite with SparkInvolvedSuite {
private val indexSystemPath = "src/test/resources/indexLocation"
private val sampleData = SampleData.testData
private val sampleParquetDataLocation = "src/test/resources/sampleparquet"

Просмотреть файл

@ -16,15 +16,15 @@
package com.microsoft.hyperspace.actions
import org.apache.spark.SparkFunSuite
import org.mockito.ArgumentMatchers._
import org.mockito.Mockito.{mock, when}
import org.scalatest.FunSuite
import com.microsoft.hyperspace.{HyperspaceException, SparkInvolvedSuite}
import com.microsoft.hyperspace.actions.Constants.States._
import com.microsoft.hyperspace.index._
import com.microsoft.hyperspace.{HyperspaceException, SparkInvolvedSuite}
class DeleteActionTest extends FunSuite with SparkInvolvedSuite {
class DeleteActionTest extends SparkFunSuite with SparkInvolvedSuite {
private val mockLogManager: IndexLogManager = mock(classOf[IndexLogManager])
override def beforeAll(): Unit = {

Просмотреть файл

@ -18,17 +18,17 @@ package com.microsoft.hyperspace.actions
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.DataFrame
import org.mockito.ArgumentMatchers.anyInt
import org.mockito.Mockito.{mock, when}
import org.scalatest.FunSuite
import com.microsoft.hyperspace.{HyperspaceException, SampleData, SparkInvolvedSuite}
import com.microsoft.hyperspace.actions.Constants.States.{ACTIVE, CREATING}
import com.microsoft.hyperspace.index._
import com.microsoft.hyperspace.index.serde.LogicalPlanSerDeUtils
import com.microsoft.hyperspace.{HyperspaceException, SampleData, SparkInvolvedSuite}
class RefreshActionTest extends FunSuite with SparkInvolvedSuite {
class RefreshActionTest extends SparkFunSuite with SparkInvolvedSuite {
private val sampleParquetDataLocation = "src/test/resources/sampleparquet"
private val fileSystem = new Path(sampleParquetDataLocation).getFileSystem(new Configuration)
private val mockLogManager: IndexLogManager = mock(classOf[IndexLogManager])

Просмотреть файл

@ -16,15 +16,15 @@
package com.microsoft.hyperspace.actions
import org.apache.spark.SparkFunSuite
import org.mockito.ArgumentMatchers._
import org.mockito.Mockito.{mock, when}
import org.scalatest.FunSuite
import com.microsoft.hyperspace.{HyperspaceException, SparkInvolvedSuite}
import com.microsoft.hyperspace.actions.Constants.States._
import com.microsoft.hyperspace.index._
import com.microsoft.hyperspace.{HyperspaceException, SparkInvolvedSuite}
class RestoreActionTest extends FunSuite with SparkInvolvedSuite {
class RestoreActionTest extends SparkFunSuite with SparkInvolvedSuite {
private val mockLogManager: IndexLogManager = mock(classOf[IndexLogManager])
override def beforeAll(): Unit = {

Просмотреть файл

@ -16,16 +16,16 @@
package com.microsoft.hyperspace.actions
import org.apache.spark.SparkFunSuite
import org.mockito.ArgumentMatchers._
import org.mockito.Mockito.{mock, verify, when}
import org.mockito.internal.verification.Times
import org.scalatest.FunSuite
import com.microsoft.hyperspace.{HyperspaceException, SparkInvolvedSuite}
import com.microsoft.hyperspace.actions.Constants.States._
import com.microsoft.hyperspace.index._
import com.microsoft.hyperspace.{HyperspaceException, SparkInvolvedSuite}
class VacuumActionTest extends FunSuite with SparkInvolvedSuite {
class VacuumActionTest extends SparkFunSuite with SparkInvolvedSuite {
private val mockLogManager: IndexLogManager = mock(classOf[IndexLogManager])
private val mockDataManager: IndexDataManager = mock(classOf[IndexDataManager])

Просмотреть файл

@ -17,13 +17,13 @@
package com.microsoft.hyperspace.index
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.DataFrame
import org.scalatest.FunSuite
import com.microsoft.hyperspace.util.FileUtils
import com.microsoft.hyperspace.{Hyperspace, HyperspaceException, SampleData, SparkInvolvedSuite}
import com.microsoft.hyperspace.util.FileUtils
class CreateIndexTests extends FunSuite with SparkInvolvedSuite {
class CreateIndexTests extends SparkFunSuite with SparkInvolvedSuite {
private val sampleData = SampleData.testData
private val sampleParquetDataLocation = "src/test/resources/sampleparquet"

Просмотреть файл

@ -21,18 +21,18 @@ import java.io.File
import scala.collection.mutable.ListBuffer
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
import org.apache.spark.sql.execution.datasources.BucketingUtils
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, Row}
import org.scalatest.FunSuite
import com.microsoft.hyperspace.{SampleData, SparkInvolvedSuite}
import com.microsoft.hyperspace.index.DataFrameWriterExtensions.Bucketizer
import com.microsoft.hyperspace.util.FileUtils
import com.microsoft.hyperspace.{SampleData, SparkInvolvedSuite}
class DataFrameWriterExtensionsTests extends FunSuite with SparkInvolvedSuite {
class DataFrameWriterExtensionsTests extends SparkFunSuite with SparkInvolvedSuite {
private val sampleData = SampleData.testData
private val sampleParquetDataLocation = "src/test/resources/sampleparquet"

Просмотреть файл

@ -18,12 +18,12 @@ package com.microsoft.hyperspace.index
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InMemoryFileIndex, LogicalRelation}
import org.apache.spark.sql.{DataFrame, Row}
import com.microsoft.hyperspace.index.rules.{FilterIndexRule, JoinIndexRule}
import com.microsoft.hyperspace.{Hyperspace, Implicits, SampleData}
import com.microsoft.hyperspace.index.rules.{FilterIndexRule, JoinIndexRule}
class E2EHyperspaceRulesTests extends HyperspaceSuite {
@ -66,8 +66,7 @@ class E2EHyperspaceRulesTests extends HyperspaceSuite {
spark.disableHyperspace()
}
test(
"enableHyperspace() plug in optimization rules, disableHyperspace() plug out optimization rules.") {
test("verify enableHyperspace()/disableHyperspace() plug in/out optimization rules.") {
val expectedOptimizationRuleBatch = Seq(JoinIndexRule, FilterIndexRule)
assert(

Просмотреть файл

@ -19,16 +19,16 @@ package com.microsoft.hyperspace.index
import scala.collection.mutable
import org.apache.hadoop.fs.{FileStatus, Path}
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningAwareFileIndex, PartitionSpec}
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitionSpec, PartitioningAwareFileIndex}
import org.apache.spark.sql.types.StructType
import org.scalatest.FunSuite
import com.microsoft.hyperspace.SparkInvolvedSuite
class FileBasedSignatureProviderTests extends FunSuite with SparkInvolvedSuite {
class FileBasedSignatureProviderTests extends SparkFunSuite with SparkInvolvedSuite {
private val fileLength = 100
private val fileModificationTime = 10000
private val filePath = new Path("filePath")
@ -140,10 +140,12 @@ class FileBasedSignatureProviderTests extends FunSuite with SparkInvolvedSuite {
private class MockPartitioningAwareFileIndex(sparkSession: SparkSession, files: Seq[FileStatus])
extends PartitioningAwareFileIndex(sparkSession, Map.empty, None) {
override def partitionSpec(): PartitionSpec = PartitionSpec(StructType(Seq()), Seq())
override protected def leafFiles: mutable.LinkedHashMap[Path, FileStatus] = ???
override protected def leafDirToChildrenFiles: Map[Path, Array[FileStatus]] = ???
override def rootPaths: Seq[Path] = ???
override def refresh(): Unit = ???
override protected def leafFiles: mutable.LinkedHashMap[Path, FileStatus] =
throw new NotImplementedError
override protected def leafDirToChildrenFiles: Map[Path, Array[FileStatus]] =
throw new NotImplementedError
override def rootPaths: Seq[Path] = throw new NotImplementedError
override def refresh(): Unit = throw new NotImplementedError
override def allFiles: Seq[FileStatus] = files
}
}

Просмотреть файл

@ -16,11 +16,11 @@
package com.microsoft.hyperspace.index
import org.scalatest.FunSuite
import org.apache.spark.SparkFunSuite
import com.microsoft.hyperspace.{Hyperspace, SparkInvolvedSuite}
trait HyperspaceSuite extends FunSuite with SparkInvolvedSuite {
trait HyperspaceSuite extends SparkFunSuite with SparkInvolvedSuite {
override def beforeAll(): Unit = {
super.beforeAll()
clearCache()

Просмотреть файл

@ -17,15 +17,15 @@
package com.microsoft.hyperspace.index
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.scalatest.FunSuite
import com.microsoft.hyperspace.{Hyperspace, HyperspaceException, SampleData, SparkInvolvedSuite}
import com.microsoft.hyperspace.actions.Constants
import com.microsoft.hyperspace.util.FileUtils
import com.microsoft.hyperspace.{Hyperspace, HyperspaceException, SampleData, SparkInvolvedSuite}
class IndexCacheTest extends FunSuite with SparkInvolvedSuite {
class IndexCacheTest extends SparkFunSuite with SparkInvolvedSuite {
val sampleParquetDataLocation = "src/test/resources/sampleparquet"
val indexSystemPath = "src/test/resources/indexLocation"
val indexConfig1 = IndexConfig("index1", Seq("RGUID"), Seq("Date"))

Просмотреть файл

@ -17,24 +17,24 @@
package com.microsoft.hyperspace.index
import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
import org.apache.spark.SparkFunSuite
import org.mockito.ArgumentMatchers.any
import org.mockito.Mockito.{mock, when}
import org.scalatest.FunSuite
import com.microsoft.hyperspace.actions.Constants
import com.microsoft.hyperspace.{HyperspaceException, SparkInvolvedSuite}
import com.microsoft.hyperspace.actions.Constants
class IndexCollectionManagerTest extends FunSuite with SparkInvolvedSuite {
class IndexCollectionManagerTest extends SparkFunSuite with SparkInvolvedSuite {
private val indexSystemPath = "src/test/resources/indexLocation"
private val testLogManagerFactory: IndexLogManagerFactory = new IndexLogManagerFactory {
override def create(indexPath: Path): IndexLogManager =
new IndexLogManager {
override def getLog(id: Int): Option[LogEntry] = Some(testLogEntry)
override def getLatestId(): Option[Int] = Some(0)
override def getLatestStableLog(): Option[LogEntry] = ???
override def createLatestStableLog(id: Int): Boolean = ???
override def deleteLatestStableLog(): Boolean = ???
override def writeLog(id: Int, log: LogEntry): Boolean = ???
override def getLatestStableLog(): Option[LogEntry] = throw new NotImplementedError
override def createLatestStableLog(id: Int): Boolean = throw new NotImplementedError
override def deleteLatestStableLog(): Boolean = throw new NotImplementedError
override def writeLog(id: Int, log: LogEntry): Boolean = throw new NotImplementedError
override def getLatestLog(): Option[LogEntry] = Some(testLogEntry)
private val testLogEntry: IndexLogEntry = {

Просмотреть файл

@ -16,9 +16,9 @@
package com.microsoft.hyperspace.index
import org.scalatest.FunSuite
import org.apache.spark.SparkFunSuite
class IndexConfigTests extends FunSuite {
class IndexConfigTests extends SparkFunSuite {
test("Empty index name is not allowed.") {
intercept[IllegalArgumentException](IndexConfig("", Seq("c1"), Seq("c2")))
intercept[IllegalArgumentException](IndexConfig.builder.indexBy("c1").include("c2").create)

Просмотреть файл

@ -16,12 +16,12 @@
package com.microsoft.hyperspace.index
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.scalatest.FunSuite
import com.microsoft.hyperspace.util.JsonUtils
class IndexLogEntryTest extends FunSuite {
class IndexLogEntryTest extends SparkFunSuite {
test("IndexLogEntry spec example") {
val schemaString =
"""{\"type\":\"struct\",

Просмотреть файл

@ -20,13 +20,14 @@ import java.util.UUID
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.scalatest.{BeforeAndAfterAll, FunSuite}
import org.apache.spark.SparkFunSuite
import org.scalatest.BeforeAndAfterAll
import com.microsoft.hyperspace.{SparkInvolvedSuite, TestUtils}
import com.microsoft.hyperspace.index.IndexConstants.HYPERSPACE_LOG
import com.microsoft.hyperspace.util.{FileUtils, JsonUtils}
import com.microsoft.hyperspace.{SparkInvolvedSuite, TestUtils}
class IndexLogManagerImplTest extends FunSuite with SparkInvolvedSuite with BeforeAndAfterAll {
class IndexLogManagerImplTest extends SparkFunSuite with SparkInvolvedSuite with BeforeAndAfterAll {
val testRoot = "src/test/resources/indexLogManagerTests"
val sampleIndexLogEntry: IndexLogEntry = IndexLogEntry(
"entityName",

Просмотреть файл

@ -17,18 +17,18 @@
package com.microsoft.hyperspace.index
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningAwareFileIndex}
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.scalatest.FunSuite
import com.microsoft.hyperspace.{Hyperspace, SampleData, SparkInvolvedSuite}
import com.microsoft.hyperspace.TestUtils.copyWithState
import com.microsoft.hyperspace.actions.Constants
import com.microsoft.hyperspace.index.serde.LogicalPlanSerDeUtils
import com.microsoft.hyperspace.util.FileUtils
import com.microsoft.hyperspace.{Hyperspace, SampleData, SparkInvolvedSuite}
class IndexManagerTests extends FunSuite with SparkInvolvedSuite {
class IndexManagerTests extends SparkFunSuite with SparkInvolvedSuite {
private val sampleParquetDataLocation = "src/test/resources/sampleparquet"
private val indexStorageLocation = "src/test/resources/indexLocation"
private val indexConfig1 = IndexConfig("index1", Seq("RGUID"), Seq("Date"))

Просмотреть файл

@ -16,12 +16,12 @@
package com.microsoft.hyperspace.index
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.scalatest.FunSuite
import com.microsoft.hyperspace.actions.Constants
class IndexTests extends FunSuite {
class IndexTests extends SparkFunSuite {
val indexConfig1 = IndexConfig("myIndex1", Array("id"), Seq("name"))
val indexConfig2 = IndexConfig("myIndex2", Array("id"), Seq("school"))

Просмотреть файл

@ -17,13 +17,13 @@
package com.microsoft.hyperspace.index
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeReference, EqualTo, Exists, InSubquery, IsNotNull, ListQuery, Literal, NamedExpression, ScalaUDF, ScalarSubquery}
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeReference, EqualTo, Exists, InSubquery, IsNotNull, ListQuery, Literal, NamedExpression, ScalarSubquery, ScalaUDF}
import org.apache.spark.sql.catalyst.plans.JoinType
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.execution.datasources._
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.scalatest.FunSuite
import com.microsoft.hyperspace.SparkInvolvedSuite
import com.microsoft.hyperspace.index.serde.LogicalPlanSerDeUtils
@ -32,7 +32,7 @@ import com.microsoft.hyperspace.index.serde.LogicalPlanSerDeUtils
* Some tests are adapted from examples in ExpressionParserSuite.scala, PlanParserSuite.scala,
* and QueryPlanSuite.scala.
*/
class LogicalPlanSerDeTests extends FunSuite with SparkInvolvedSuite {
class LogicalPlanSerDeTests extends SparkFunSuite with SparkInvolvedSuite {
val c1: AttributeReference = AttributeReference("c1", StringType)()
val c2: AttributeReference = AttributeReference("c2", StringType)()
val c3: AttributeReference = AttributeReference("c3", StringType)()

Просмотреть файл

@ -16,9 +16,9 @@
package com.microsoft.hyperspace.index.plananalysis
import org.scalatest.FunSuite
import org.apache.spark.SparkFunSuite
class BufferStreamTest extends FunSuite {
class BufferStreamTest extends SparkFunSuite {
test("Testing buffer stream highlight with spaces.") {
val s = " Please highlight me. "
val testDisplayMode = new TestDisplayMode

Просмотреть файл

@ -16,11 +16,11 @@
package com.microsoft.hyperspace.index.plananalysis
import org.scalatest.FunSuite
import org.apache.spark.SparkFunSuite
import com.microsoft.hyperspace.index.IndexConstants
class DisplayModeTest extends FunSuite {
class DisplayModeTest extends SparkFunSuite {
test("Testing default tags in Display Mode.") {
val htmlMode = new HTMLMode(Map.empty)
assert(htmlMode.beginEndTag.open.equals("<pre>"))

Просмотреть файл

@ -19,13 +19,13 @@ package com.microsoft.hyperspace.index.plananalysis
import org.apache.commons.lang.StringUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.DataFrame
import org.scalatest.FunSuite
import com.microsoft.hyperspace.index.{HyperspaceSuite, IndexConfig, IndexConstants}
import com.microsoft.hyperspace.{Hyperspace, Implicits}
import com.microsoft.hyperspace.index.{HyperspaceSuite, IndexConfig, IndexConstants}
class ExplainTest extends FunSuite with HyperspaceSuite {
class ExplainTest extends SparkFunSuite with HyperspaceSuite {
private val sampleParquetDataLocation = "src/test/resources/sampleparquet"
private val indexStorageLocation = "src/test/resources/indexLocation"
private val fileSystem = new Path(sampleParquetDataLocation).getFileSystem(new Configuration)
@ -72,9 +72,9 @@ class ExplainTest extends FunSuite with HyperspaceSuite {
// Constructing expected output for given query from explain API
val expectedOutput = new StringBuilder
//The format of the explain output looks as follows:
// Expected output looks like below for this case.
// The format of the explain output looks as follows:
// scalastyle:off filelinelengthchecker
/**
*=============================================================
*Plan with indexes:
@ -121,7 +121,11 @@ class ExplainTest extends FunSuite with HyperspaceSuite {
* | SortMergeJoin| 1| 1| 0|
* +------------------+-------------------+------------------+----------+
*/
// scalastyle:on filelinelengthchecker
val joinIndexPath = getIndexFilesPath("joinIndex")
// scalastyle:off filelinelengthchecker
expectedOutput
.append("=============================================================")
.append(defaultDisplayMode.newLine)
@ -215,6 +219,7 @@ class ExplainTest extends FunSuite with HyperspaceSuite {
.append("+------------------+-------------------+------------------+----------+")
.append(defaultDisplayMode.newLine)
.append(defaultDisplayMode.newLine)
// scalastyle:on filelinelengthchecker
val selfJoinDf = df.join(df, df("Col1") === df("Col1"))
verifyExplainOutput(selfJoinDf, expectedOutput.toString(), verbose = true) { df =>
@ -229,7 +234,9 @@ class ExplainTest extends FunSuite with HyperspaceSuite {
val displayMode = new PlainTextMode(getHighlightConf("<----", "---->"))
// Constructing expected output for given query from explain API
val expectedOutput = new StringBuilder
//The format of the explain output looks as follows:
// The format of the explain output looks as follows:
// scalastyle:off filelinelengthchecker
/**
* =============================================================
* Plan with indexes:
@ -278,6 +285,9 @@ class ExplainTest extends FunSuite with HyperspaceSuite {
* |WholeStageCodegen| 1| 1| 0|
* +-----------------+-------------------+------------------+----------+
*/
// scalastyle:on filelinelengthchecker
// scalastyle:off filelinelengthchecker
expectedOutput
.append("=============================================================")
.append(displayMode.newLine)
@ -390,6 +400,7 @@ class ExplainTest extends FunSuite with HyperspaceSuite {
.append("+-----------------+-------------------+------------------+----------+")
.append(displayMode.newLine)
.append(displayMode.newLine)
// scalastyle:on filelinelengthchecker
df.createOrReplaceTempView("query")
hyperspace.createIndex(df, indexConfig)
@ -428,6 +439,7 @@ class ExplainTest extends FunSuite with HyperspaceSuite {
val indexConfig = IndexConfig("filterIndex", Seq("Col2"), Seq("Col1"))
hyperspace.createIndex(df, indexConfig)
// scalastyle:off filelinelengthchecker
/**
* Expected output with displayMode-specific strings substituted (not shown below):
*
@ -451,6 +463,7 @@ class ExplainTest extends FunSuite with HyperspaceSuite {
* filterIndex:src/test/resources/indexLocation/indexes/filterIndex
*
*/
// scalastyle:on filelinelengthchecker
val expectedOutput = new StringBuilder
expectedOutput
.append(displayMode.beginEndTag.open)
@ -543,9 +556,10 @@ class ExplainTest extends FunSuite with HyperspaceSuite {
/**
* Helper method to truncate long string.
* Note: This method truncates long InMemoryFileIndex string to get the similar explainString for comparing
* with Hyperspace's explain API output. For reference, the similar truncation logic for InMemoryFileIndex string
* is in spark code base in DataSourceScanExec.scala in simpleString method.
* Note: This method truncates long InMemoryFileIndex string to get the similar explainString for
* comparing with Hyperspace's explain API output. For reference, the similar truncation logic for
* InMemoryFileIndex string is in spark code base in DataSourceScanExec.scala in simpleString
* method.
*
* @param s long string.
* @return truncated string.

Просмотреть файл

@ -16,16 +16,16 @@
package com.microsoft.hyperspace.index.plananalysis
import org.apache.spark.SparkFunSuite
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
import org.apache.spark.sql.catalyst.plans.physical.{Distribution, Partitioning, UnknownPartitioning}
import org.apache.spark.sql.execution.SparkPlan
import org.scalatest.FunSuite
import com.microsoft.hyperspace.SparkInvolvedSuite
class PhysicalOperatorAnalyzerTest extends FunSuite with SparkInvolvedSuite {
class PhysicalOperatorAnalyzerTest extends SparkFunSuite with SparkInvolvedSuite {
test("Two plans are the same") {
val plan = DummySparkPlan("plan1", Seq(DummySparkPlan("plan1"), DummySparkPlan("plan2")))
runPhysicalOperatorComparisonTest(

Просмотреть файл

@ -16,14 +16,14 @@
package com.microsoft.hyperspace.index.rankers
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.scalatest.FunSuite
import com.microsoft.hyperspace.actions.Constants
import com.microsoft.hyperspace.index._
class JoinIndexRankerTest extends FunSuite {
class JoinIndexRankerTest extends SparkFunSuite {
val t1c1 = AttributeReference("t1c1", IntegerType)()
val t1c2 = AttributeReference("t1c2", StringType)()

Просмотреть файл

@ -17,13 +17,13 @@
package com.microsoft.hyperspace.index.rules
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.JoinType
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.execution.datasources._
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SparkSession}
import com.microsoft.hyperspace.actions.Constants
import com.microsoft.hyperspace.index._

Просмотреть файл

@ -18,9 +18,9 @@ package com.microsoft.hyperspace.util
import java.util.UUID
import org.scalatest.FunSuite
import org.apache.spark.SparkFunSuite
class HashingUtilsTests extends FunSuite {
class HashingUtilsTests extends SparkFunSuite {
test("For md5Hashing(), same input has the same output hash code.") {
val randomUUID = UUID.randomUUID.toString
val hashCode1 = HashingUtils.md5Hex(randomUUID)

Просмотреть файл

@ -16,9 +16,9 @@
package com.microsoft.hyperspace.util
import org.scalatest.FunSuite
import org.apache.spark.SparkFunSuite
class IndexNameUtilsTests extends FunSuite {
class IndexNameUtilsTests extends SparkFunSuite {
test("Test normalizeIndexName() function.") {
val indexName = " my index 1 "
val expectedIndexNameAfterNorm = "my_index_1"

Просмотреть файл

@ -16,13 +16,13 @@
package com.microsoft.hyperspace.util
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.scalatest.FunSuite
import com.microsoft.hyperspace.actions.Constants
import com.microsoft.hyperspace.index._
class JsonUtilsTests extends FunSuite {
class JsonUtilsTests extends SparkFunSuite {
test("Test for JsonUtils.") {
val schema = StructType(
Seq(