Add Integration Tests with Docker
Run these tests with `sbt dockerComposeTest`. You can also startup the docker containers separately with `sbt dockerComposeRun`. Plenty of weirdness with spark in this patch. Some of the potential spark bugs are labeled as such.
This commit is contained in:
Родитель
c9b7a77871
Коммит
ca8de72394
11
.travis.yml
11
.travis.yml
|
@ -1,6 +1,11 @@
|
||||||
|
sudo: required
|
||||||
language: scala
|
language: scala
|
||||||
jdk:
|
jdk:
|
||||||
- oraclejdk8
|
- oraclejdk8
|
||||||
|
addons:
|
||||||
|
apt:
|
||||||
|
packages: # Required for docker build
|
||||||
|
- net-tools
|
||||||
# These directories are cached to S3 at the end of the build
|
# These directories are cached to S3 at the end of the build
|
||||||
cache:
|
cache:
|
||||||
directories:
|
directories:
|
||||||
|
@ -10,9 +15,13 @@ before_cache:
|
||||||
# Tricks to avoid unnecessary cache updates
|
# Tricks to avoid unnecessary cache updates
|
||||||
- find $HOME/.ivy2 -name "ivydata-*.properties" -delete
|
- find $HOME/.ivy2 -name "ivydata-*.properties" -delete
|
||||||
- find $HOME/.sbt -name "*.lock" -delete
|
- find $HOME/.sbt -name "*.lock" -delete
|
||||||
|
services:
|
||||||
|
- docker
|
||||||
scala:
|
scala:
|
||||||
- 2.11.8
|
- 2.11.8
|
||||||
|
env:
|
||||||
|
- DOCKER_DIR="../docker/"
|
||||||
script:
|
script:
|
||||||
- sbt ci
|
- sbt ci
|
||||||
after_success:
|
after_success:
|
||||||
- bash <(curl -s https://codecov.io/bash)
|
- bash <(curl -s https://codecov.io/bash)
|
||||||
|
|
27
build.sbt
27
build.sbt
|
@ -15,22 +15,41 @@ organization := "com.mozilla"
|
||||||
|
|
||||||
scalaVersion in ThisBuild := "2.11.8"
|
scalaVersion in ThisBuild := "2.11.8"
|
||||||
|
|
||||||
val sparkVersion = "2.1.1"
|
val sparkVersion = "2.2.0"
|
||||||
|
|
||||||
lazy val root = (project in file(".")).
|
lazy val root = (project in file(".")).
|
||||||
settings(
|
settings(
|
||||||
libraryDependencies += "com.mozilla.telemetry" %% "moztelemetry" % "1.0-SNAPSHOT",
|
libraryDependencies += "com.mozilla.telemetry" %% "moztelemetry" % "1.0-SNAPSHOT",
|
||||||
libraryDependencies += "com.mozilla.telemetry" %% "spark-hyperloglog" % "2.0.0-SNAPSHOT",
|
libraryDependencies += "com.mozilla.telemetry" %% "spark-hyperloglog" % "2.0.0-SNAPSHOT",
|
||||||
libraryDependencies += "org.scalatest" %% "scalatest" % "2.2.1" % "test",
|
libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.1" % "test",
|
||||||
libraryDependencies += "org.apache.spark" %% "spark-core" % sparkVersion % "provided",
|
libraryDependencies += "org.apache.spark" %% "spark-core" % sparkVersion % "provided",
|
||||||
libraryDependencies += "org.apache.spark" %% "spark-streaming" % sparkVersion % "provided",
|
libraryDependencies += "org.apache.spark" %% "spark-streaming" % sparkVersion % "provided",
|
||||||
libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
|
libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
|
||||||
libraryDependencies += "org.apache.spark" %% "spark-sql-kafka-0-10" % sparkVersion,
|
libraryDependencies += "org.apache.spark" %% "spark-sql-kafka-0-10" % sparkVersion,
|
||||||
libraryDependencies += "org.rogach" %% "scallop" % "1.0.2",
|
libraryDependencies += "org.rogach" %% "scallop" % "1.0.2",
|
||||||
libraryDependencies += "com.google.protobuf" % "protobuf-java" % "2.5.0",
|
libraryDependencies += "com.google.protobuf" % "protobuf-java" % "2.5.0",
|
||||||
libraryDependencies += "joda-time" % "joda-time" % "2.9.2"
|
libraryDependencies += "joda-time" % "joda-time" % "2.9.2",
|
||||||
|
libraryDependencies += "org.apache.kafka" % "kafka_2.11" % "0.10.0.1"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Setup docker task
|
||||||
|
enablePlugins(DockerComposePlugin, DockerPlugin)
|
||||||
|
dockerImageCreationTask := docker.value
|
||||||
|
composeFile := sys.props.getOrElse("DOCKER_DIR", default = "docker/") + "docker-compose.yml"
|
||||||
|
variablesForSubstitutionTask := {
|
||||||
|
val dockerKafkaHost: String = "./docker_setup.sh" !!;
|
||||||
|
Map("DOCKER_KAFKA_HOST" -> dockerKafkaHost)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only run docker tasks on `sbt dockerComposeTest`
|
||||||
|
testOptions in Test += Tests.Argument("-l", "DockerComposeTag")
|
||||||
|
|
||||||
|
dockerfile in docker := {
|
||||||
|
new Dockerfile {
|
||||||
|
from("java")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// make run command include the provided dependencies
|
// make run command include the provided dependencies
|
||||||
run in Compile <<= Defaults.runTask(fullClasspath in Compile, mainClass in (Compile, run), runner in (Compile, run))
|
run in Compile <<= Defaults.runTask(fullClasspath in Compile, mainClass in (Compile, run), runner in (Compile, run))
|
||||||
|
|
||||||
|
@ -52,4 +71,4 @@ assemblyMergeStrategy in assembly := {
|
||||||
oldStrategy(x)
|
oldStrategy(x)
|
||||||
}
|
}
|
||||||
|
|
||||||
addCommandAlias("ci", ";clean ;compile ;scalastyle ;coverage ;test ;coverageReport")
|
addCommandAlias("ci", ";clean ;compile ;scalastyle ;coverage ;dockerComposeTest ;coverageReport")
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
version: '2'
|
||||||
|
services:
|
||||||
|
zookeeper:
|
||||||
|
image: wurstmeister/zookeeper
|
||||||
|
ports:
|
||||||
|
- "2181:2181"
|
||||||
|
kafka:
|
||||||
|
image: wurstmeister/kafka
|
||||||
|
ports:
|
||||||
|
- "9092:9092"
|
||||||
|
environment:
|
||||||
|
KAFKA_ADVERTISED_HOST_NAME: ${DOCKER_KAFKA_HOST}
|
||||||
|
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
depends_on:
|
||||||
|
- zookeeper
|
|
@ -0,0 +1,11 @@
|
||||||
|
rm -rf /tmp/checkpoint /tmp/parquet
|
||||||
|
EN=$(ifconfig en0 | grep "inet " | grep -oE '([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)' | head -n 1)
|
||||||
|
ETH=$(ifconfig eth0 | grep "inet " | grep -oE '([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)' | head -n 1)
|
||||||
|
|
||||||
|
if test -z "$EN"; then
|
||||||
|
DOCKER_KAFKA_HOST=$ETH
|
||||||
|
else
|
||||||
|
DOCKER_KAFKA_HOST=$EN
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo $DOCKER_KAFKA_HOST
|
|
@ -8,3 +8,6 @@ resolvers += "Artima Maven Repository" at "http://repo.artima.com/releases"
|
||||||
|
|
||||||
addSbtPlugin("com.artima.supersafe" % "sbtplugin" % "1.1.0")
|
addSbtPlugin("com.artima.supersafe" % "sbtplugin" % "1.1.0")
|
||||||
|
|
||||||
|
addSbtPlugin("com.tapad" % "sbt-docker-compose" % "1.0.27")
|
||||||
|
|
||||||
|
addSbtPlugin("se.marcuslonnberg" % "sbt-docker" % "1.4.1")
|
||||||
|
|
|
@ -21,9 +21,12 @@ import org.joda.time.DateTime
|
||||||
|
|
||||||
object ErrorAggregator {
|
object ErrorAggregator {
|
||||||
|
|
||||||
|
val kafkaTopic = "telemetry"
|
||||||
|
val outputPrefix = "error_aggregates/v2"
|
||||||
|
val queryName = "error_aggregates"
|
||||||
|
|
||||||
private val allowedDocTypes = List("main", "crash")
|
private val allowedDocTypes = List("main", "crash")
|
||||||
private val allowedAppNames = List("Firefox")
|
private val allowedAppNames = List("Firefox")
|
||||||
private val outputPrefix = "error_aggregates/v2"
|
|
||||||
private val kafkaCacheMaxCapacity = 1000
|
private val kafkaCacheMaxCapacity = 1000
|
||||||
|
|
||||||
// This is the number of files output per submission_date
|
// This is the number of files output per submission_date
|
||||||
|
@ -311,7 +314,7 @@ object ErrorAggregator {
|
||||||
.option("failOnDataLoss", opts.failOnDataLoss())
|
.option("failOnDataLoss", opts.failOnDataLoss())
|
||||||
.option("kafka.max.partition.fetch.bytes", 8 * 1024 * 1024) // 8MB
|
.option("kafka.max.partition.fetch.bytes", 8 * 1024 * 1024) // 8MB
|
||||||
.option("spark.streaming.kafka.consumer.cache.maxCapacity", kafkaCacheMaxCapacity)
|
.option("spark.streaming.kafka.consumer.cache.maxCapacity", kafkaCacheMaxCapacity)
|
||||||
.option("subscribe", "telemetry")
|
.option("subscribe", kafkaTopic)
|
||||||
.option("startingOffsets", opts.startingOffsets())
|
.option("startingOffsets", opts.startingOffsets())
|
||||||
.load()
|
.load()
|
||||||
|
|
||||||
|
@ -319,6 +322,7 @@ object ErrorAggregator {
|
||||||
|
|
||||||
aggregate(pings.select("value"), raiseOnError = opts.raiseOnError())
|
aggregate(pings.select("value"), raiseOnError = opts.raiseOnError())
|
||||||
.writeStream
|
.writeStream
|
||||||
|
.queryName(queryName)
|
||||||
.format("parquet")
|
.format("parquet")
|
||||||
.option("path", s"${outputPath}/${outputPrefix}")
|
.option("path", s"${outputPath}/${outputPrefix}")
|
||||||
.option("checkpointLocation", opts.checkpointPath())
|
.option("checkpointLocation", opts.checkpointPath())
|
||||||
|
@ -370,11 +374,11 @@ object ErrorAggregator {
|
||||||
|
|
||||||
val spark = SparkSession.builder()
|
val spark = SparkSession.builder()
|
||||||
.appName("Error Aggregates")
|
.appName("Error Aggregates")
|
||||||
|
.config("spark.streaming.stopGracefullyOnShutdown", "true")
|
||||||
.getOrCreate()
|
.getOrCreate()
|
||||||
|
|
||||||
spark.udf.register("HllCreate", hllCreate _)
|
spark.udf.register("HllCreate", hllCreate _)
|
||||||
|
|
||||||
|
|
||||||
opts.kafkaBroker.get match {
|
opts.kafkaBroker.get match {
|
||||||
case Some(_) => writeStreamingAggregates(spark, opts)
|
case Some(_) => writeStreamingAggregates(spark, opts)
|
||||||
case None => writeBatchAggregates(spark, opts)
|
case None => writeBatchAggregates(spark, opts)
|
||||||
|
|
|
@ -22,7 +22,7 @@ object TestUtils {
|
||||||
val today = new DateTime(testTimestampMillis)
|
val today = new DateTime(testTimestampMillis)
|
||||||
val todayDays = new Duration(new DateTime(0), today).getStandardDays().toInt
|
val todayDays = new Duration(new DateTime(0), today).getStandardDays().toInt
|
||||||
|
|
||||||
def generateCrashMessages(size: Int, fieldsOverride: Option[Map[String, Any]]=None): Seq[Message] = {
|
def generateCrashMessages(size: Int, fieldsOverride: Option[Map[String, Any]]=None, timestamp: Option[Long]=None): Seq[Message] = {
|
||||||
val defaultMap = Map(
|
val defaultMap = Map(
|
||||||
"clientId" -> "client1",
|
"clientId" -> "client1",
|
||||||
"docType" -> "crash",
|
"docType" -> "crash",
|
||||||
|
@ -80,11 +80,12 @@ object TestUtils {
|
||||||
| },
|
| },
|
||||||
| "application": ${applicationJson}
|
| "application": ${applicationJson}
|
||||||
|}""".stripMargin),
|
|}""".stripMargin),
|
||||||
timestamp=testTimestampNano
|
timestamp=timestamp.getOrElse(testTimestampNano)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
def generateMainMessages(size: Int, fieldsOverride: Option[Map[String, Any]]=None): Seq[Message] = {
|
|
||||||
|
def generateMainMessages(size: Int, fieldsOverride: Option[Map[String, Any]]=None, timestamp: Option[Long]=None): Seq[Message] = {
|
||||||
val defaultMap = Map(
|
val defaultMap = Map(
|
||||||
"clientId" -> "client1",
|
"clientId" -> "client1",
|
||||||
"docType" -> "main",
|
"docType" -> "main",
|
||||||
|
@ -182,7 +183,7 @@ object TestUtils {
|
||||||
| }
|
| }
|
||||||
| }
|
| }
|
||||||
|}""".stripMargin),
|
|}""".stripMargin),
|
||||||
timestamp=testTimestampNano
|
timestamp=timestamp.getOrElse(testTimestampNano)
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,22 +4,46 @@
|
||||||
package com.mozilla.telemetry.streaming
|
package com.mozilla.telemetry.streaming
|
||||||
|
|
||||||
import java.sql.Timestamp
|
import java.sql.Timestamp
|
||||||
|
import java.util.Properties
|
||||||
|
|
||||||
|
import kafka.admin.AdminUtils
|
||||||
|
import kafka.utils.ZkUtils
|
||||||
|
|
||||||
import com.mozilla.spark.sql.hyperloglog.functions.{hllCreate, hllCardinality}
|
import com.mozilla.spark.sql.hyperloglog.functions.{hllCreate, hllCardinality}
|
||||||
import com.mozilla.telemetry.streaming.TestUtils.todayDays
|
import com.mozilla.telemetry.streaming.TestUtils.todayDays
|
||||||
|
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
|
||||||
import org.apache.spark.sql.SparkSession
|
import org.apache.spark.sql.SparkSession
|
||||||
import org.joda.time.{DateTime, Duration}
|
import org.apache.spark.sql.streaming.StreamingQueryListener
|
||||||
|
import org.joda.time.{Duration, DateTime}
|
||||||
import org.json4s.DefaultFormats
|
import org.json4s.DefaultFormats
|
||||||
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}
|
import org.scalatest.{BeforeAndAfterAll, AsyncFlatSpec, Matchers, Tag}
|
||||||
|
|
||||||
class TestErrorAggregator extends FlatSpec with Matchers with BeforeAndAfterAll {
|
import scala.collection.JavaConversions._
|
||||||
|
import scala.concurrent.ExecutionContext.Implicits.global
|
||||||
|
import scala.concurrent.Future
|
||||||
|
import scala.sys.process._
|
||||||
|
|
||||||
|
class TestErrorAggregator extends AsyncFlatSpec with Matchers with BeforeAndAfterAll {
|
||||||
|
|
||||||
|
object DockerComposeTag extends Tag("DockerComposeTag")
|
||||||
|
|
||||||
|
val zkHostInfo = "localhost:2181"
|
||||||
|
val kafkaTopicPartitions = 1
|
||||||
|
val kafkaBrokers = "localhost:9092"
|
||||||
|
|
||||||
implicit val formats = DefaultFormats
|
implicit val formats = DefaultFormats
|
||||||
val k = TestUtils.scalarValue
|
val k = TestUtils.scalarValue
|
||||||
val app = TestUtils.application
|
val app = TestUtils.application
|
||||||
|
|
||||||
|
// 2016-04-07T02:01:56.000Z
|
||||||
|
val earlierTimestamp = 1459994516000000000L
|
||||||
|
|
||||||
|
// 2016-04-07T02:35:16.000Z
|
||||||
|
val laterTimestamp = 1459996516000000000L
|
||||||
|
|
||||||
val spark = SparkSession.builder()
|
val spark = SparkSession.builder()
|
||||||
.appName("Error Aggregates")
|
.appName("Error Aggregates")
|
||||||
|
.config("spark.streaming.stopGracefullyOnShutdown", "true")
|
||||||
.master("local[1]")
|
.master("local[1]")
|
||||||
.getOrCreate()
|
.getOrCreate()
|
||||||
|
|
||||||
|
@ -30,6 +54,27 @@ class TestErrorAggregator extends FlatSpec with Matchers with BeforeAndAfterAll
|
||||||
spark.stop()
|
spark.stop()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def topicExists(zkUtils: ZkUtils, topic: String): Boolean = {
|
||||||
|
// taken from
|
||||||
|
// https://github.com/apache/spark/blob/master/external/kafka-0-10-sql +
|
||||||
|
// src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala#L350
|
||||||
|
return zkUtils.getAllTopics().contains(topic)
|
||||||
|
}
|
||||||
|
|
||||||
|
def createTopic(topic: String, numPartitions: Int) = {
|
||||||
|
val timeoutMs = 10000
|
||||||
|
val isSecureKafkaCluster = false
|
||||||
|
val replicationFactor = 1
|
||||||
|
val topicConfig = new Properties
|
||||||
|
val zkUtils = ZkUtils.apply(zkHostInfo, timeoutMs, timeoutMs, isSecureKafkaCluster)
|
||||||
|
|
||||||
|
if(!topicExists(zkUtils, topic)) {
|
||||||
|
AdminUtils.createTopic(zkUtils, topic, numPartitions, replicationFactor, topicConfig)
|
||||||
|
}
|
||||||
|
|
||||||
|
zkUtils.close()
|
||||||
|
}
|
||||||
|
|
||||||
"The aggregator" should "sum metrics over a set of dimensions" in {
|
"The aggregator" should "sum metrics over a set of dimensions" in {
|
||||||
import spark.implicits._
|
import spark.implicits._
|
||||||
val messages =
|
val messages =
|
||||||
|
@ -160,7 +205,7 @@ class TestErrorAggregator extends FlatSpec with Matchers with BeforeAndAfterAll
|
||||||
val messages = (crashMessage ++ mainMessage).map(_.toByteArray).seq
|
val messages = (crashMessage ++ mainMessage).map(_.toByteArray).seq
|
||||||
val df = ErrorAggregator.aggregate(spark.sqlContext.createDataset(messages).toDF, raiseOnError = true, online = false)
|
val df = ErrorAggregator.aggregate(spark.sqlContext.createDataset(messages).toDF, raiseOnError = true, online = false)
|
||||||
|
|
||||||
//one count for each experiment-branch, and one for null-null
|
// one count for each experiment-branch, and one for null-null
|
||||||
df.count() should be (3)
|
df.count() should be (3)
|
||||||
|
|
||||||
val inspectedFields = List(
|
val inspectedFields = List(
|
||||||
|
@ -264,7 +309,91 @@ class TestErrorAggregator extends FlatSpec with Matchers with BeforeAndAfterAll
|
||||||
val df = ErrorAggregator.aggregate(spark.sqlContext.createDataset(messages).toDF, raiseOnError = false, online = false)
|
val df = ErrorAggregator.aggregate(spark.sqlContext.createDataset(messages).toDF, raiseOnError = false, online = false)
|
||||||
|
|
||||||
df.where("application <> 'Firefox'").count() should be (0)
|
df.where("application <> 'Firefox'").count() should be (0)
|
||||||
|
}
|
||||||
|
|
||||||
|
"the aggregator" should "correctly read from kafka" taggedAs(DockerComposeTag) in {
|
||||||
|
spark.sparkContext.setLogLevel("WARN")
|
||||||
|
|
||||||
|
val conf = new Properties()
|
||||||
|
conf.put("bootstrap.servers", kafkaBrokers)
|
||||||
|
conf.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
|
||||||
|
conf.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer")
|
||||||
|
|
||||||
|
val kafkaProducer = new KafkaProducer[String, Array[Byte]](conf)
|
||||||
|
createTopic(ErrorAggregator.kafkaTopic, kafkaTopicPartitions)
|
||||||
|
|
||||||
|
def send(rs: Seq[Array[Byte]]): Unit = {
|
||||||
|
rs.foreach{ v =>
|
||||||
|
val record = new ProducerRecord[String, Array[Byte]](ErrorAggregator.kafkaTopic, v)
|
||||||
|
kafkaProducer.send(record)
|
||||||
|
kafkaProducer.flush()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val earlier = (TestUtils.generateMainMessages(k, timestamp=Some(earlierTimestamp)) ++
|
||||||
|
TestUtils.generateCrashMessages(k, timestamp=Some(earlierTimestamp))).map(_.toByteArray)
|
||||||
|
|
||||||
|
val later = TestUtils.generateMainMessages(1, timestamp=Some(laterTimestamp)).map(_.toByteArray)
|
||||||
|
|
||||||
|
val expectedTotalMsgs = 2 * k
|
||||||
|
|
||||||
|
val listener = new StreamingQueryListener {
|
||||||
|
val DefaultWatermark = "1970-01-01T00:00:00.000Z"
|
||||||
|
|
||||||
|
var messagesSeen = 0L
|
||||||
|
var sentMessages = false
|
||||||
|
var watermarks: Set[String] = Set(DefaultWatermark)
|
||||||
|
|
||||||
|
override def onQueryStarted(event: StreamingQueryListener.QueryStartedEvent): Unit = {}
|
||||||
|
|
||||||
|
override def onQueryProgress(event: StreamingQueryListener.QueryProgressEvent): Unit = {
|
||||||
|
messagesSeen += event.progress.numInputRows
|
||||||
|
|
||||||
|
if(!sentMessages){
|
||||||
|
send(earlier)
|
||||||
|
sentMessages = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we only send this message once (i.e. set a flag that we've sent it), Spark will recieve
|
||||||
|
// it and process the new rows (should be 3: 1 per experiment), and will update the eventTime["max"]
|
||||||
|
// to be this message's time -- but it will not update the watermark, and thus will not write
|
||||||
|
// the old rows (from earlier) to disk. You can follow this by reading the QueryProgress log events.
|
||||||
|
// If we send more than one, however, it eventually updates the value.
|
||||||
|
if(messagesSeen >= expectedTotalMsgs){
|
||||||
|
send(later)
|
||||||
|
}
|
||||||
|
|
||||||
|
val watermark = event.progress.eventTime.getOrDefault("watermark", DefaultWatermark)
|
||||||
|
watermarks = watermarks | Set(watermark)
|
||||||
|
|
||||||
|
// We're done when we've gone through 3 watermarks -- the default, the earlier, and the later
|
||||||
|
// when we're on the later watermark, the data from the earlier window is written to disk
|
||||||
|
if(watermarks.size == 3){
|
||||||
|
spark.streams.active.foreach(_.processAllAvailable)
|
||||||
|
spark.streams.active.foreach(_.stop)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
override def onQueryTerminated(event: StreamingQueryListener.QueryTerminatedEvent): Unit = {
|
||||||
|
if(messagesSeen < expectedTotalMsgs){
|
||||||
|
println(s"Terminated Early: Expected $expectedTotalMsgs messages, saw $messagesSeen")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
spark.streams.addListener(listener)
|
||||||
|
|
||||||
|
val outputPath = "/tmp/parquet"
|
||||||
|
val args = "--kafkaBroker" :: kafkaBrokers ::
|
||||||
|
"--outputPath" :: outputPath ::
|
||||||
|
"--startingOffsets" :: "latest" ::
|
||||||
|
"--raiseOnError" :: Nil
|
||||||
|
|
||||||
|
val mainRes: Future[Unit] = Future {
|
||||||
|
ErrorAggregator.main(args.toArray)
|
||||||
|
}
|
||||||
|
|
||||||
|
mainRes map {_ => assert(spark.read.parquet(s"$outputPath/${ErrorAggregator.outputPrefix}").count() == 3)}
|
||||||
}
|
}
|
||||||
|
|
||||||
"The resulting schema" should "not have fields belonging to the tempSchema" in {
|
"The resulting schema" should "not have fields belonging to the tempSchema" in {
|
||||||
|
|
Загрузка…
Ссылка в новой задаче