From d1b0ec1640b5971d0d6efc6cafbd1d10cd182b55 Mon Sep 17 00:00:00 2001 From: Marvin Buss Date: Thu, 10 Sep 2020 16:16:36 +0200 Subject: [PATCH] initial push --- .gitattributes | 48 +++ .gitignore | 351 ++++++++++++++++++ README.md | 6 + infra/Databricks/README.md | 42 +++ .../deploy.privateEndpoint.json | 98 +++++ 5 files changed, 545 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 infra/Databricks/README.md create mode 100644 infra/PrivateEndpoint/deploy.privateEndpoint.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b62e19c --- /dev/null +++ b/.gitattributes @@ -0,0 +1,48 @@ +############################################################################### +# Set default behavior to automatically normalize line endings. +############################################################################### +* text=auto + +############################################################################### +# Set the merge driver for project and solution files +############################################################################### +*.sh text eol=lf +*.py text eol=lf diff=python +*.json text +*.yaml text +*.yml text +Dockerfile text + +############################################################################### +# behavior for image files +# +# image files are treated as binary by default. +############################################################################### +*.jpg binary +*.png binary +*.gif binary + +############################################################################### +# diff behavior for common document formats +# +# Convert binary document formats to text before diffing them. This feature +# is only available from the command line. Turn it on by uncommenting the +# entries below. +############################################################################### +*.doc diff=astextplain +*.DOC diff=astextplain +*.docx diff=astextplain +*.DOCX diff=astextplain +*.dot diff=astextplain +*.DOT diff=astextplain +*.pdf diff=astextplain +*.PDF diff=astextplain +*.rtf diff=astextplain +*.RTF diff=astextplain +*.md text + +############################################################################### +# Exclude files from exporting +############################################################################### +.gitattributes export-ignore +.gitignore export-ignore \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c8aa4a0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,351 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +.vscode/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ diff --git a/README.md b/README.md index 8eeee9c..0be1593 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,9 @@ +# Notes on Service Principal + +**Access required:** +* Access to resource group + + # Contributing diff --git a/infra/Databricks/README.md b/infra/Databricks/README.md new file mode 100644 index 0000000..29f6b65 --- /dev/null +++ b/infra/Databricks/README.md @@ -0,0 +1,42 @@ +# Databricks Hive Metastore configuration + +Set the following spark configuration: + +```bash +spark.hadoop.javax.jdo.option.ConnectionURL jdbc:sqlserver://.database.windows.net:1433;database=;user=@;password=;encrypt=true;trustServerCertificate=false;hostNameInCertificate=*.database.windows.net;loginTimeout=30; +spark.hadoop.javax.jdo.option.ConnectionUserName +spark.hadoop.javax.jdo.option.ConnectionPassword +spark.hadoop.javax.jdo.option.ConnectionDriverName com.microsoft.sqlserver.jdbc.SQLServerDriver +spark.sql.hive.metastore.version 1.2.1 +spark.sql.hive.metastore.jars builtin +datanucleus.autoCreateSchema true +datanucleus.fixedDatastore false +``` + +Test the metastore + +```sql +%sql + +CREATE TABLE Persons ( + PersonID int, + LastName varchar(255), + FirstName varchar(255), + Address varchar(255), + City varchar(255) +); +``` + +```sql +%sql + +show tables; +``` + +Databricks runtime versions working with Hive Metastore version 1.2.1: +* Databricks Runtime Version 5.5 LTS +* Databricks Runtime Version 6.6 (includes Apache Spark 2.4.5, Scala 2.11) + +Newer Databricks runtime versions (7.X) don't work with any Hive Metastore version, if `spark.sql.hive.metastore.jars` is set to `builtin`. +Also, none of the Databricks versions work with a Hive Metastore version higher than 1.2.1, if `spark.sql.hive.metastore.jars` is set to `builtin`. +What we would like to achieve is, that we don't have to lock the user into using a specific Databricks runtime version, while also automatically attaching all Databricks clusters to the external Hive metastore. This could potentielly be achieved if we execute an init script in each of the clusters through cluster policies and pull the correct jars based on the selected Databricks runtime version. diff --git a/infra/PrivateEndpoint/deploy.privateEndpoint.json b/infra/PrivateEndpoint/deploy.privateEndpoint.json new file mode 100644 index 0000000..342b620 --- /dev/null +++ b/infra/PrivateEndpoint/deploy.privateEndpoint.json @@ -0,0 +1,98 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "privateEndpointName": { + "type": "string", + "metadata": { + "description": "Specifies the name of your private endpoint." + } + }, + "privateEndpointLocation": { + "type": "string", + "metadata": { + "description": "Specifies the location of your private endpoint." + } + }, + "privateLinkServiceResourceId": { + "type": "string", + "metadata": { + "description": "Specifies the id of the resource to enable private endpoint for." + } + }, + "privateLinkServiceGroupId": { + "type": "string", + "metadata": { + "description": "Specifies the id of the sub service to create a link for (e.g. sqlServer, blob, table)." + } + }, + "subnetResourceId": { + "type": "string", + "metadata": { + "description": "Specifies the resource id of the subnet that should be used for the private endpoint." + } + }, + "privateDnsZoneId": { + "type": "string", + "metadata": { + "description": "Specifies the resource id of the private DNS zone that should be used for the private endpoint." + } + } + }, + "functions": [], + "variables": { + "privateEndpointName": "[parameters('privateEndpointName')]", + "privateEndpointLocation": "[parameters('privateEndpointLocation')]", + "privateLinkServiceResourceId": "[parameters('privateLinkServiceResourceId')]", + "privateLinkServiceGroupId": "[parameters('privateLinkServiceGroupId')]", + "subnetResourceId": "[parameters('subnetResourceId')]", + "privateDnsZoneId": "[parameters('privateDnsZoneId')]" + }, + "resources": [ + { + "type": "Microsoft.Network/privateEndpoints", + "apiVersion": "2020-05-01", + "name": "[variables('privateEndpointName')]", + "location": "[variables('privateEndpointLocation')]", + "properties": { + "privateLinkServiceConnections": [ + { + "name": "[variables('privateEndpointName')]", + "properties": { + "privateLinkServiceId": "[variables('privateLinkServiceResourceId')]", + "groupIds": [ + "[variables('privateLinkServiceGroupId')]" + ] + } + } + ], + "manualPrivateLinkServiceConnections": [ + ], + "subnet": { + "id": "[variables('subnetResourceId')]" + } + } + }, + { + "name": "[concat(variables('privateEndpointName'), '/aRecord')]", + "type": "Microsoft.Network/privateEndpoints/privateDnsZoneGroups", + "apiVersion": "2020-05-01", + "dependsOn": [ + "[resourceId('Microsoft.Network/privateEndpoints', variables('privateEndpointName'))]" + ], + "location": "[variables('privateEndpointLocation')]", + "properties": { + "privateDnsZoneConfigs": [ + { + "name": "[concat(variables('privateEndpointName'), '-aRecord')]", + "properties": { + "privateDnsZoneId": "[variables('privateDnsZoneId')]" + } + } + ] + } + } + + ], + "outputs": {} +} \ No newline at end of file