зеркало из
1
0
Форкнуть 0
This commit is contained in:
Marvin Buss 2020-09-10 16:16:36 +02:00
Родитель eb32da4806
Коммит d1b0ec1640
5 изменённых файлов: 545 добавлений и 0 удалений

48
.gitattributes поставляемый Normal file
Просмотреть файл

@ -0,0 +1,48 @@
###############################################################################
# Set default behavior to automatically normalize line endings.
###############################################################################
* text=auto
###############################################################################
# Set the merge driver for project and solution files
###############################################################################
*.sh text eol=lf
*.py text eol=lf diff=python
*.json text
*.yaml text
*.yml text
Dockerfile text
###############################################################################
# behavior for image files
#
# image files are treated as binary by default.
###############################################################################
*.jpg binary
*.png binary
*.gif binary
###############################################################################
# diff behavior for common document formats
#
# Convert binary document formats to text before diffing them. This feature
# is only available from the command line. Turn it on by uncommenting the
# entries below.
###############################################################################
*.doc diff=astextplain
*.DOC diff=astextplain
*.docx diff=astextplain
*.DOCX diff=astextplain
*.dot diff=astextplain
*.DOT diff=astextplain
*.pdf diff=astextplain
*.PDF diff=astextplain
*.rtf diff=astextplain
*.RTF diff=astextplain
*.md text
###############################################################################
# Exclude files from exporting
###############################################################################
.gitattributes export-ignore
.gitignore export-ignore

351
.gitignore поставляемый Normal file
Просмотреть файл

@ -0,0 +1,351 @@
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
# User-specific files
*.rsuser
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Mono auto generated files
mono_crash.*
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
[Ll]ogs/
# Visual Studio 2015/2017 cache/options directory
.vs/
.vscode/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# Visual Studio 2017 auto generated files
Generated\ Files/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUnit
*.VisualState.xml
TestResult.xml
nunit-*.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
# StyleCop
StyleCopReport.xml
# Files built by Visual Studio
*_i.c
*_p.c
*_h.h
*.ilk
*.meta
*.obj
*.iobj
*.pch
*.pdb
*.ipdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*_wpftmp.csproj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# Visual Studio Trace Files
*.e2e
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# AxoCover is a Code Coverage Tool
.axoCover/*
!.axoCover/settings.json
# Visual Studio code coverage results
*.coverage
*.coveragexml
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# Note: Comment the next line if you want to checkin your web deploy settings,
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/
# NuGet Packages
*.nupkg
# NuGet Symbol Packages
*.snupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/[Pp]ackages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
*.appx
*.appxbundle
*.appxupload
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!?*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Including strong name files can present a security risk
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
#*.snk
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
ServiceFabricBackup/
*.rptproj.bak
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
*.rptproj.rsuser
*- [Bb]ackup.rdl
*- [Bb]ackup ([0-9]).rdl
*- [Bb]ackup ([0-9][0-9]).rdl
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
node_modules/
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
paket-files/
# FAKE - F# Make
.fake/
# CodeRush personal settings
.cr/personal
# Python Tools for Visual Studio (PTVS)
__pycache__/
*.pyc
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Tabs Studio
*.tss
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
# OpenCover UI analysis results
OpenCover/
# Azure Stream Analytics local run output
ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
# NVidia Nsight GPU debugger configuration file
*.nvuser
# MFractors (Xamarin productivity tool) working folder
.mfractor/
# Local History for Visual Studio
.localhistory/
# BeatPulse healthcheck temp database
healthchecksdb
# Backup folder for Package Reference Convert tool in Visual Studio 2017
MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/

Просмотреть файл

@ -1,3 +1,9 @@
# Notes on Service Principal
**Access required:**
* Access to resource group
# Contributing

Просмотреть файл

@ -0,0 +1,42 @@
# Databricks Hive Metastore configuration
Set the following spark configuration:
```bash
spark.hadoop.javax.jdo.option.ConnectionURL jdbc:sqlserver://<your-sql-server-name>.database.windows.net:1433;database=<your-sql-database-name>;user=<your-sql-server-username>@<your-sql-server-name>;password=<your-sql-server-password>;encrypt=true;trustServerCertificate=false;hostNameInCertificate=*.database.windows.net;loginTimeout=30;
spark.hadoop.javax.jdo.option.ConnectionUserName <your-sql-server-username>
spark.hadoop.javax.jdo.option.ConnectionPassword <your-sql-server-password>
spark.hadoop.javax.jdo.option.ConnectionDriverName com.microsoft.sqlserver.jdbc.SQLServerDriver
spark.sql.hive.metastore.version 1.2.1
spark.sql.hive.metastore.jars builtin
datanucleus.autoCreateSchema true
datanucleus.fixedDatastore false
```
Test the metastore
```sql
%sql
CREATE TABLE Persons (
PersonID int,
LastName varchar(255),
FirstName varchar(255),
Address varchar(255),
City varchar(255)
);
```
```sql
%sql
show tables;
```
Databricks runtime versions working with Hive Metastore version 1.2.1:
* Databricks Runtime Version 5.5 LTS
* Databricks Runtime Version 6.6 (includes Apache Spark 2.4.5, Scala 2.11)
Newer Databricks runtime versions (7.X) don't work with any Hive Metastore version, if `spark.sql.hive.metastore.jars` is set to `builtin`.
Also, none of the Databricks versions work with a Hive Metastore version higher than 1.2.1, if `spark.sql.hive.metastore.jars` is set to `builtin`.
What we would like to achieve is, that we don't have to lock the user into using a specific Databricks runtime version, while also automatically attaching all Databricks clusters to the external Hive metastore. This could potentielly be achieved if we execute an init script in each of the clusters through cluster policies and pull the correct jars based on the selected Databricks runtime version.

Просмотреть файл

@ -0,0 +1,98 @@
{
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"privateEndpointName": {
"type": "string",
"metadata": {
"description": "Specifies the name of your private endpoint."
}
},
"privateEndpointLocation": {
"type": "string",
"metadata": {
"description": "Specifies the location of your private endpoint."
}
},
"privateLinkServiceResourceId": {
"type": "string",
"metadata": {
"description": "Specifies the id of the resource to enable private endpoint for."
}
},
"privateLinkServiceGroupId": {
"type": "string",
"metadata": {
"description": "Specifies the id of the sub service to create a link for (e.g. sqlServer, blob, table)."
}
},
"subnetResourceId": {
"type": "string",
"metadata": {
"description": "Specifies the resource id of the subnet that should be used for the private endpoint."
}
},
"privateDnsZoneId": {
"type": "string",
"metadata": {
"description": "Specifies the resource id of the private DNS zone that should be used for the private endpoint."
}
}
},
"functions": [],
"variables": {
"privateEndpointName": "[parameters('privateEndpointName')]",
"privateEndpointLocation": "[parameters('privateEndpointLocation')]",
"privateLinkServiceResourceId": "[parameters('privateLinkServiceResourceId')]",
"privateLinkServiceGroupId": "[parameters('privateLinkServiceGroupId')]",
"subnetResourceId": "[parameters('subnetResourceId')]",
"privateDnsZoneId": "[parameters('privateDnsZoneId')]"
},
"resources": [
{
"type": "Microsoft.Network/privateEndpoints",
"apiVersion": "2020-05-01",
"name": "[variables('privateEndpointName')]",
"location": "[variables('privateEndpointLocation')]",
"properties": {
"privateLinkServiceConnections": [
{
"name": "[variables('privateEndpointName')]",
"properties": {
"privateLinkServiceId": "[variables('privateLinkServiceResourceId')]",
"groupIds": [
"[variables('privateLinkServiceGroupId')]"
]
}
}
],
"manualPrivateLinkServiceConnections": [
],
"subnet": {
"id": "[variables('subnetResourceId')]"
}
}
},
{
"name": "[concat(variables('privateEndpointName'), '/aRecord')]",
"type": "Microsoft.Network/privateEndpoints/privateDnsZoneGroups",
"apiVersion": "2020-05-01",
"dependsOn": [
"[resourceId('Microsoft.Network/privateEndpoints', variables('privateEndpointName'))]"
],
"location": "[variables('privateEndpointLocation')]",
"properties": {
"privateDnsZoneConfigs": [
{
"name": "[concat(variables('privateEndpointName'), '-aRecord')]",
"properties": {
"privateDnsZoneId": "[variables('privateDnsZoneId')]"
}
}
]
}
}
],
"outputs": {}
}