diff --git a/artifacts/spark-cdm-connector-assembly-0.19.1.jar b/artifacts/spark-cdm-connector-assembly-0.19.1.jar deleted file mode 100644 index 8430d47..0000000 Binary files a/artifacts/spark-cdm-connector-assembly-0.19.1.jar and /dev/null differ diff --git a/artifacts/spark-cdm-connector-assembly-synapse-spark3-1.19.2.jar b/artifacts/spark-cdm-connector-assembly-synapse-spark3-1.19.2.jar deleted file mode 100644 index 101bef0..0000000 Binary files a/artifacts/spark-cdm-connector-assembly-synapse-spark3-1.19.2.jar and /dev/null differ diff --git a/artifacts/spark-cdm-connector-assembly-synapse-spark3.2-1.19.4.jar b/artifacts/spark-cdm-connector-assembly-synapse-spark3.3-1.19.5.jar similarity index 69% rename from artifacts/spark-cdm-connector-assembly-synapse-spark3.2-1.19.4.jar rename to artifacts/spark-cdm-connector-assembly-synapse-spark3.3-1.19.5.jar index 34ae23f..1209092 100644 Binary files a/artifacts/spark-cdm-connector-assembly-synapse-spark3.2-1.19.4.jar and b/artifacts/spark-cdm-connector-assembly-synapse-spark3.3-1.19.5.jar differ diff --git a/build.sbt b/build.sbt index 13b1b31..7a97229 100644 --- a/build.sbt +++ b/build.sbt @@ -17,34 +17,30 @@ pomPostProcess := { (node: XmlNode) => }).transform(node).head } -version := "spark3.2-1.19.4" +version := "spark3.3-1.19.5" crossPaths := false ThisBuild / scalaVersion := "2.12.15" libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.8" % "test" -libraryDependencies += "com.fasterxml.jackson.datatype" % "jackson-datatype-jdk8" % "2.13.3" -libraryDependencies += "com.fasterxml.jackson.datatype" % "jackson-datatype-jsr310" % "2.13.3" +libraryDependencies += "com.fasterxml.jackson.datatype" % "jackson-datatype-jdk8" % "2.13.4" +libraryDependencies += "com.fasterxml.jackson.datatype" % "jackson-datatype-jsr310" % "2.13.4" //these libraries already exist in spark HDI 2.4.0 - don't include them building the uber jar -dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-databind" % "2.13.3" -libraryDependencies += "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.13.3" -libraryDependencies += "com.fasterxml.jackson.core" % "jackson-core" % "2.13.3" -libraryDependencies += "com.fasterxml.jackson.core" % "jackson-annotations" % "2.13.3" -libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.5" % "provided" -libraryDependencies += "log4j" % "log4j" % "1.2.17" % "provided" -libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.2.1" % "provided" -libraryDependencies += "org.apache.spark" %% "spark-core" % "3.2.1" % "provided" -libraryDependencies += "org.apache.httpcomponents" % "httpclient" % "4.5.6" % "provided" +dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-databind" % "2.13.4.1" +libraryDependencies += "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.13.4" +libraryDependencies += "com.fasterxml.jackson.core" % "jackson-core" % "2.13.4" +libraryDependencies += "com.fasterxml.jackson.core" % "jackson-annotations" % "2.13.4" +libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.12.0" % "provided" +libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.3.0" % "provided" +libraryDependencies += "org.apache.spark" %% "spark-core" % "3.3.0" % "provided" +libraryDependencies += "org.apache.httpcomponents" % "httpclient" % "4.5.13" % "provided" libraryDependencies += "com.google.guava" % "guava" % "14.0.1" % "provided" -libraryDependencies += "commons-io" % "commons-io" % "2.4" % "provided" -libraryDependencies += "com.microsoft.azure" % "adal4j" % "1.6.3" +libraryDependencies += "commons-io" % "commons-io" % "2.11.0" % "provided" libraryDependencies += "com.microsoft.azure" % "msal4j" % "1.10.1" libraryDependencies += "org.apache.hadoop" % "hadoop-azure" % "3.3.1" % "provided" libraryDependencies += "org.apache.hadoop" % "hadoop-common" % "3.3.1" % "provided" -libraryDependencies += "org.wildfly.openssl" % "wildfly-openssl" % "1.0.7.Final" % "provided" - resolvers += "Maven Twitter Releases" at "https://maven.twttr.com/" libraryDependencies += "com.hadoop.gplcompression" % "hadoop-lzo" % "0.4.20" @@ -67,10 +63,10 @@ lazy val grandchild = Project("DatabricksADTokenMock", file("DatabricksTokenProv //assembly assemblyShadeRules in assembly := Seq( + ShadeRule.rename("com.microsoft.aad.msal4j.**" -> "shadeiomsal4j.@1").inAll, ShadeRule.rename("com.fasterxml.jackson.**" -> "shadeio.@1").inAll, ShadeRule.rename("com.nimbusds.**" -> "shadeionimbusds.@1").inAll, - ShadeRule.rename("com.microsoft.aad.adal4j.**" -> "shadeioadal4j.@1").inAll, - ShadeRule.rename("com.microsoft.aad.msal4j.**" -> "shadeiomsal4j.@1").inAll + ShadeRule.rename("net.minidev.**" -> "shadeiominidev.@1").inAll ) diff --git a/project/plugins.sbt b/project/plugins.sbt index 13fe6ad..385b155 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,4 +1,5 @@ addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.6") addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.4") addSbtPlugin("com.jsuereth" % "sbt-pgp" % "2.0.0-M2") -addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.9.0") \ No newline at end of file +addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.9.0") +addDependencyTreePlugin diff --git a/src/main/scala/com/microsoft/cdm/CDMCatalog.scala b/src/main/scala/com/microsoft/cdm/CDMCatalog.scala index 0d448c8..4cb9fdd 100644 --- a/src/main/scala/com/microsoft/cdm/CDMCatalog.scala +++ b/src/main/scala/com/microsoft/cdm/CDMCatalog.scala @@ -13,22 +13,25 @@ import org.slf4j.LoggerFactory import org.slf4j.event.Level class CDMCatalog extends CatalogPlugin with TableCatalog with SupportsNamespaces { - val logger = LoggerFactory.getLogger(classOf[CDMCatalog]) - var cdmOptions: CDMOptions = null; - var tables: HadoopTables = null; + var cdmOptions: CDMOptions = _ + var tables: HadoopTables = _ + private var options: CaseInsensitiveStringMap = _ override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = { logger.info("Initializing CDM Catalog...") this.tables = new HadoopTables() } + def setupOptions(options: CaseInsensitiveStringMap): Unit = { + this.options = options + } + @throws(classOf[NoSuchTableException]) override def loadTable(ident: Identifier): SparkTable = { try { - cdmOptions = ident.asInstanceOf[CDMIdentifier].cdmOptions - val cdmEntity = tables.load(cdmOptions) - new SparkTable(cdmEntity.schema, ident.asInstanceOf[CDMIdentifier].optionsAsHashMap) + val cdmEntity = tables.load(new CDMOptions(options)) + new SparkTable(cdmEntity.schema, options) } catch { case e: EntityNotFoundException => throw new NoSuchTableException(e.getMessage) case e: ManifestNotFoundException => throw new NoSuchTableException(e.getMessage) @@ -37,7 +40,7 @@ class CDMCatalog extends CatalogPlugin with TableCatalog with SupportsNamespaces @throws(classOf[TableAlreadyExistsException]) override def createTable(ident: Identifier, schema: StructType, partitions: Array[Transform], properties: util.Map[String, String]): Table = { - new SparkTable(schema, ident.asInstanceOf[CDMIdentifier].optionsAsHashMap) //make it write options + new SparkTable(schema, options) //make it write options } override def alterTable(ident: Identifier, changes: TableChange*): Table = { @@ -58,14 +61,13 @@ class CDMCatalog extends CatalogPlugin with TableCatalog with SupportsNamespaces override def alterNamespace(namespace: Array[String], changes: NamespaceChange*): Unit = throw new UnsupportedOperationException("Not supported") - override def dropNamespace(namespace: Array[String]): Boolean = throw new UnsupportedOperationException("Not supported") + override def dropNamespace(namespace: Array[String], cascade: Boolean): Boolean = throw new UnsupportedOperationException("Not supported") override def listTables(namespace: Array[String]): Array[Identifier] = throw new UnsupportedOperationException("Not supported") override def toString = s"${this.getClass.getCanonicalName}($name)" - override def name(): String = "CDM" - + override def name(): String = "cdm" private def getRequiredArgument(options: CaseInsensitiveStringMap, arg: String): String = { val result = if (options.containsKey(arg)) options.get(arg) else { diff --git a/src/main/scala/com/microsoft/cdm/CDMIdentifier.scala b/src/main/scala/com/microsoft/cdm/CDMIdentifier.scala index e8cdc64..6714eac 100644 --- a/src/main/scala/com/microsoft/cdm/CDMIdentifier.scala +++ b/src/main/scala/com/microsoft/cdm/CDMIdentifier.scala @@ -6,12 +6,9 @@ import org.apache.spark.sql.connector.catalog.Identifier import org.apache.spark.sql.util.CaseInsensitiveStringMap class CDMIdentifier(options: CaseInsensitiveStringMap) extends Identifier{ - - val cdmOptions = new CDMOptions(options) + private val cdmOptions = new CDMOptions(options) // used to do option validation override def namespace(): Array[String] = Array(cdmOptions.storage, cdmOptions.container, cdmOptions.manifestFileName) override def name(): String = cdmOptions.entity - - val optionsAsHashMap = options } diff --git a/src/main/scala/com/microsoft/cdm/DefaultSource.scala b/src/main/scala/com/microsoft/cdm/DefaultSource.scala index 8525938..ce7c17e 100644 --- a/src/main/scala/com/microsoft/cdm/DefaultSource.scala +++ b/src/main/scala/com/microsoft/cdm/DefaultSource.scala @@ -34,25 +34,15 @@ class DefaultSource extends SupportsCatalogOptions{ override def supportsExternalMetadata(): Boolean = true - def setupDefaultSparkCatalog(spark: SparkSession, options: CaseInsensitiveStringMap) = { - spark.conf.set("spark.sql.catalog.cdm", "com.microsoft.cdm.CDMCatalog") -// spark.conf.set("spark.sql.catalog.cdm.appId", options.get("appId")) -// spark.conf.set("spark.sql.catalog.cdm.appKey", options.get("appKey")) -// spark.conf.set("spark.sql.catalog.cdm.tenantId", options.get("tenantId")) -// spark.conf.set("spark.sql.catalog.cdm.storage", options.get("storage")) -// spark.conf.set("spark.sql.catalog.cdm.container", options.get("container")) - spark.sessionState.catalogManager.catalog("cdm") - } - override def extractIdentifier(options: CaseInsensitiveStringMap): Identifier = { - val spark = SparkSession.active; - setupDefaultSparkCatalog(spark, options); + spark.conf.set("spark.sql.catalog.cdm", "com.microsoft.cdm.CDMCatalog") + val cdmcatalog = spark.sessionState.catalogManager.catalog("cdm") + cdmcatalog.asInstanceOf[CDMCatalog].setupOptions(options) new CDMIdentifier(options) } override def extractCatalog(options: CaseInsensitiveStringMap): String = { "cdm" } - -} \ No newline at end of file +}