зеркало из
1
0
Форкнуть 0
This commit is contained in:
kecheung 2023-03-14 14:05:43 -07:00 коммит произвёл GitHub
Родитель 32517aa73b
Коммит 2fe91231af
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 33 добавлений и 47 удалений

Двоичный файл не отображается.

Двоичный файл не отображается.

Просмотреть файл

@ -17,34 +17,30 @@ pomPostProcess := { (node: XmlNode) =>
}).transform(node).head
}
version := "spark3.2-1.19.4"
version := "spark3.3-1.19.5"
crossPaths := false
ThisBuild / scalaVersion := "2.12.15"
libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.8" % "test"
libraryDependencies += "com.fasterxml.jackson.datatype" % "jackson-datatype-jdk8" % "2.13.3"
libraryDependencies += "com.fasterxml.jackson.datatype" % "jackson-datatype-jsr310" % "2.13.3"
libraryDependencies += "com.fasterxml.jackson.datatype" % "jackson-datatype-jdk8" % "2.13.4"
libraryDependencies += "com.fasterxml.jackson.datatype" % "jackson-datatype-jsr310" % "2.13.4"
//these libraries already exist in spark HDI 2.4.0 - don't include them building the uber jar
dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-databind" % "2.13.3"
libraryDependencies += "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.13.3"
libraryDependencies += "com.fasterxml.jackson.core" % "jackson-core" % "2.13.3"
libraryDependencies += "com.fasterxml.jackson.core" % "jackson-annotations" % "2.13.3"
libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.5" % "provided"
libraryDependencies += "log4j" % "log4j" % "1.2.17" % "provided"
libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.2.1" % "provided"
libraryDependencies += "org.apache.spark" %% "spark-core" % "3.2.1" % "provided"
libraryDependencies += "org.apache.httpcomponents" % "httpclient" % "4.5.6" % "provided"
dependencyOverrides += "com.fasterxml.jackson.core" % "jackson-databind" % "2.13.4.1"
libraryDependencies += "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.13.4"
libraryDependencies += "com.fasterxml.jackson.core" % "jackson-core" % "2.13.4"
libraryDependencies += "com.fasterxml.jackson.core" % "jackson-annotations" % "2.13.4"
libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.12.0" % "provided"
libraryDependencies += "org.apache.spark" %% "spark-sql" % "3.3.0" % "provided"
libraryDependencies += "org.apache.spark" %% "spark-core" % "3.3.0" % "provided"
libraryDependencies += "org.apache.httpcomponents" % "httpclient" % "4.5.13" % "provided"
libraryDependencies += "com.google.guava" % "guava" % "14.0.1" % "provided"
libraryDependencies += "commons-io" % "commons-io" % "2.4" % "provided"
libraryDependencies += "com.microsoft.azure" % "adal4j" % "1.6.3"
libraryDependencies += "commons-io" % "commons-io" % "2.11.0" % "provided"
libraryDependencies += "com.microsoft.azure" % "msal4j" % "1.10.1"
libraryDependencies += "org.apache.hadoop" % "hadoop-azure" % "3.3.1" % "provided"
libraryDependencies += "org.apache.hadoop" % "hadoop-common" % "3.3.1" % "provided"
libraryDependencies += "org.wildfly.openssl" % "wildfly-openssl" % "1.0.7.Final" % "provided"
resolvers += "Maven Twitter Releases" at "https://maven.twttr.com/"
libraryDependencies += "com.hadoop.gplcompression" % "hadoop-lzo" % "0.4.20"
@ -67,10 +63,10 @@ lazy val grandchild = Project("DatabricksADTokenMock", file("DatabricksTokenProv
//assembly
assemblyShadeRules in assembly := Seq(
ShadeRule.rename("com.microsoft.aad.msal4j.**" -> "shadeiomsal4j.@1").inAll,
ShadeRule.rename("com.fasterxml.jackson.**" -> "shadeio.@1").inAll,
ShadeRule.rename("com.nimbusds.**" -> "shadeionimbusds.@1").inAll,
ShadeRule.rename("com.microsoft.aad.adal4j.**" -> "shadeioadal4j.@1").inAll,
ShadeRule.rename("com.microsoft.aad.msal4j.**" -> "shadeiomsal4j.@1").inAll
ShadeRule.rename("net.minidev.**" -> "shadeiominidev.@1").inAll
)

Просмотреть файл

@ -2,3 +2,4 @@ addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.6")
addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.4")
addSbtPlugin("com.jsuereth" % "sbt-pgp" % "2.0.0-M2")
addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.9.0")
addDependencyTreePlugin

Просмотреть файл

@ -13,22 +13,25 @@ import org.slf4j.LoggerFactory
import org.slf4j.event.Level
class CDMCatalog extends CatalogPlugin with TableCatalog with SupportsNamespaces {
val logger = LoggerFactory.getLogger(classOf[CDMCatalog])
var cdmOptions: CDMOptions = null;
var tables: HadoopTables = null;
var cdmOptions: CDMOptions = _
var tables: HadoopTables = _
private var options: CaseInsensitiveStringMap = _
override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
logger.info("Initializing CDM Catalog...")
this.tables = new HadoopTables()
}
def setupOptions(options: CaseInsensitiveStringMap): Unit = {
this.options = options
}
@throws(classOf[NoSuchTableException])
override def loadTable(ident: Identifier): SparkTable = {
try {
cdmOptions = ident.asInstanceOf[CDMIdentifier].cdmOptions
val cdmEntity = tables.load(cdmOptions)
new SparkTable(cdmEntity.schema, ident.asInstanceOf[CDMIdentifier].optionsAsHashMap)
val cdmEntity = tables.load(new CDMOptions(options))
new SparkTable(cdmEntity.schema, options)
} catch {
case e: EntityNotFoundException => throw new NoSuchTableException(e.getMessage)
case e: ManifestNotFoundException => throw new NoSuchTableException(e.getMessage)
@ -37,7 +40,7 @@ class CDMCatalog extends CatalogPlugin with TableCatalog with SupportsNamespaces
@throws(classOf[TableAlreadyExistsException])
override def createTable(ident: Identifier, schema: StructType, partitions: Array[Transform], properties: util.Map[String, String]): Table = {
new SparkTable(schema, ident.asInstanceOf[CDMIdentifier].optionsAsHashMap) //make it write options
new SparkTable(schema, options) //make it write options
}
override def alterTable(ident: Identifier, changes: TableChange*): Table = {
@ -58,14 +61,13 @@ class CDMCatalog extends CatalogPlugin with TableCatalog with SupportsNamespaces
override def alterNamespace(namespace: Array[String], changes: NamespaceChange*): Unit = throw new UnsupportedOperationException("Not supported")
override def dropNamespace(namespace: Array[String]): Boolean = throw new UnsupportedOperationException("Not supported")
override def dropNamespace(namespace: Array[String], cascade: Boolean): Boolean = throw new UnsupportedOperationException("Not supported")
override def listTables(namespace: Array[String]): Array[Identifier] = throw new UnsupportedOperationException("Not supported")
override def toString = s"${this.getClass.getCanonicalName}($name)"
override def name(): String = "CDM"
override def name(): String = "cdm"
private def getRequiredArgument(options: CaseInsensitiveStringMap, arg: String): String = {
val result = if (options.containsKey(arg)) options.get(arg) else {

Просмотреть файл

@ -6,12 +6,9 @@ import org.apache.spark.sql.connector.catalog.Identifier
import org.apache.spark.sql.util.CaseInsensitiveStringMap
class CDMIdentifier(options: CaseInsensitiveStringMap) extends Identifier{
val cdmOptions = new CDMOptions(options)
private val cdmOptions = new CDMOptions(options) // used to do option validation
override def namespace(): Array[String] = Array(cdmOptions.storage, cdmOptions.container, cdmOptions.manifestFileName)
override def name(): String = cdmOptions.entity
val optionsAsHashMap = options
}

Просмотреть файл

@ -34,25 +34,15 @@ class DefaultSource extends SupportsCatalogOptions{
override def supportsExternalMetadata(): Boolean = true
def setupDefaultSparkCatalog(spark: SparkSession, options: CaseInsensitiveStringMap) = {
spark.conf.set("spark.sql.catalog.cdm", "com.microsoft.cdm.CDMCatalog")
// spark.conf.set("spark.sql.catalog.cdm.appId", options.get("appId"))
// spark.conf.set("spark.sql.catalog.cdm.appKey", options.get("appKey"))
// spark.conf.set("spark.sql.catalog.cdm.tenantId", options.get("tenantId"))
// spark.conf.set("spark.sql.catalog.cdm.storage", options.get("storage"))
// spark.conf.set("spark.sql.catalog.cdm.container", options.get("container"))
spark.sessionState.catalogManager.catalog("cdm")
}
override def extractIdentifier(options: CaseInsensitiveStringMap): Identifier = {
val spark = SparkSession.active;
setupDefaultSparkCatalog(spark, options);
spark.conf.set("spark.sql.catalog.cdm", "com.microsoft.cdm.CDMCatalog")
val cdmcatalog = spark.sessionState.catalogManager.catalog("cdm")
cdmcatalog.asInstanceOf[CDMCatalog].setupOptions(options)
new CDMIdentifier(options)
}
override def extractCatalog(options: CaseInsensitiveStringMap): String = {
"cdm"
}
}