Merge pull request #8 from riv/master
[NEW FEATURE] MSGHT Non-Incremental Pipeline Setup
This commit is contained in:
Коммит
8c275a4a1f
|
@ -251,3 +251,4 @@ paket-files/
|
||||||
.idea/
|
.idea/
|
||||||
*.sln.iml
|
*.sln.iml
|
||||||
|
|
||||||
|
/ghinsights/DataFactory/ProductionEnvironment.json
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://datafactories.schema.management.azure.com/schemas/2015-08-01/Microsoft.DataFactory.Table.json",
|
||||||
|
"name": "MSEventDetail",
|
||||||
|
"properties": {
|
||||||
|
"type": "AzureBlob",
|
||||||
|
"linkedServiceName": "MSPrestagedEventsAzureStorage",
|
||||||
|
"typeProperties": {
|
||||||
|
"folderPath": "raw/{EventName}/v1/{Year}/{Month}",
|
||||||
|
"fileName": "{EventName}_{Year}_{Month}_{Day}.json.gz",
|
||||||
|
"partitionedBy": [
|
||||||
|
{
|
||||||
|
"name": "Year",
|
||||||
|
"value": {
|
||||||
|
"type": "DateTime",
|
||||||
|
"date": "SliceStart",
|
||||||
|
"format": "yyyy"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Month",
|
||||||
|
"value": {
|
||||||
|
"type": "DateTime",
|
||||||
|
"date": "SliceStart",
|
||||||
|
"format": "MM"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Day",
|
||||||
|
"value": {
|
||||||
|
"type": "DateTime",
|
||||||
|
"date": "SliceStart",
|
||||||
|
"format": "dd"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"external": false,
|
||||||
|
"availability": {
|
||||||
|
"frequency": "Day",
|
||||||
|
"interval": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,39 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://datafactories.schema.management.azure.com/schemas/2015-08-01/Microsoft.DataFactory.Pipeline.json",
|
||||||
|
"name": "MSGHTEventDetailPipeline",
|
||||||
|
"properties": {
|
||||||
|
"description": "Custom DotNetActivity pipeline to gather microsoft github mongodb backups and unpack the seperate events into a new container.",
|
||||||
|
"activities": [
|
||||||
|
{
|
||||||
|
"name": "MongoDbDumpTransformActivity",
|
||||||
|
"type": "DotNetActivity",
|
||||||
|
"inputs": [ { "name": "MSMongoDbDump" } ],
|
||||||
|
"outputs": [ { "name": "MSEventDetail" } ],
|
||||||
|
"typeProperties": {
|
||||||
|
"assemblyName": "GHInsights.DataFactory.dll",
|
||||||
|
"entryPoint": "GHInsights.DataFactory.MongoDbDumpTransformActivity",
|
||||||
|
"packageLinkedService": "GHInsightsAzureStorage",
|
||||||
|
"packageFile": "datafactory/GHInsights.DataFactory.zip",
|
||||||
|
"extendedProperties": {
|
||||||
|
"Year": "$$Text.Format('{0:yyyy}',SliceStart)",
|
||||||
|
"Month": "$$Text.Format('{0:MM}',SliceStart)",
|
||||||
|
"Day": "$$Text.Format('{0:dd}',SliceStart)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"linkedServiceName": "BatchProcessor",
|
||||||
|
"policy": {
|
||||||
|
"concurrency": 6,
|
||||||
|
"executionPriorityOrder": "NewestFirst",
|
||||||
|
"retry": 0,
|
||||||
|
"timeout": "04:00:00"
|
||||||
|
},
|
||||||
|
"scheduler": {
|
||||||
|
"frequency": "Day",
|
||||||
|
"interval": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"start": "2016-06-13T00:00:00Z",
|
||||||
|
"end": "9999-09-09T00:00:00Z"
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,11 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://datafactories.schema.management.azure.com/schemas/2015-10-01/Microsoft.DataFactory.LinkedService.json",
|
||||||
|
|
||||||
|
"name": "MSGHTorrentAzureStorage",
|
||||||
|
"properties": {
|
||||||
|
"type": "AzureStorageSas",
|
||||||
|
"typeProperties": {
|
||||||
|
"sasUri": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,61 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://datafactories.schema.management.azure.com/schemas/2015-08-01/Microsoft.DataFactory.Table.json",
|
||||||
|
"name": "MSMongoDbDump",
|
||||||
|
"properties": {
|
||||||
|
"type": "AzureBlob",
|
||||||
|
"linkedServiceName": "MSGHTorrentAzureStorage",
|
||||||
|
// "structure": [],
|
||||||
|
"typeProperties": {
|
||||||
|
"folderPath": "msght-azure-storage/downloads/",
|
||||||
|
"fileName": "mongo-dump-{Year}-{Month}-{Day}.tar.gz",
|
||||||
|
"partitionedBy": [
|
||||||
|
{
|
||||||
|
"name": "Year",
|
||||||
|
"value": {
|
||||||
|
"type": "DateTime",
|
||||||
|
"date": "SliceStart",
|
||||||
|
"format": "yyyy"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Month",
|
||||||
|
"value": {
|
||||||
|
"type": "DateTime",
|
||||||
|
"date": "SliceStart",
|
||||||
|
"format": "MM"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Day",
|
||||||
|
"value": {
|
||||||
|
"type": "DateTime",
|
||||||
|
"date": "SliceStart",
|
||||||
|
"format": "dd"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
//,"format": {
|
||||||
|
// "type": "<Two formats are supported: TextFormat, AvroFormat. If the type is 'TextFormat', you can specify the following properties. The type 'Avro' does not require any additional properties>",
|
||||||
|
// "columnDelimiter": "<The character used as a column separator in a file.This property is optional. The default value is comma (,)>",
|
||||||
|
// "rowDelimiter": "<The character used as a row separator in a file. This property is optional. The default value is any of the following: (” \n”)>",
|
||||||
|
// "EscapeChar": "<The character used to escape any special character in the blob content. This property is optional. No default value>",
|
||||||
|
// "NullValue": "<The character used to represent null value in the blob content. This property is optional. The default value is ” \n”>"
|
||||||
|
// }
|
||||||
|
},
|
||||||
|
"external": true,
|
||||||
|
"availability": {
|
||||||
|
"frequency": "Day",
|
||||||
|
"interval": 1
|
||||||
|
},
|
||||||
|
"policy": {
|
||||||
|
"validation": {
|
||||||
|
"minimumSizeMB": 0.1
|
||||||
|
},
|
||||||
|
"externalData": {
|
||||||
|
"retryInterval": "01:00:00",
|
||||||
|
"retryTimeout": "00:10:00",
|
||||||
|
"maximumRetry": 10
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,10 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://datafactories.schema.management.azure.com/schemas/2015-08-01/Microsoft.DataFactory.LinkedService.json",
|
||||||
|
"name": "MSPrestagedEventsAzureStorage",
|
||||||
|
"properties": {
|
||||||
|
"type": "AzureStorage",
|
||||||
|
"typeProperties": {
|
||||||
|
"connectionString": ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -54,7 +54,7 @@
|
||||||
"externalData": {
|
"externalData": {
|
||||||
"retryInterval": "01:00:00",
|
"retryInterval": "01:00:00",
|
||||||
"retryTimeout": "00:10:00",
|
"retryTimeout": "00:10:00",
|
||||||
"maximumRetry": 24
|
"maximumRetry": 10
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,7 +55,7 @@ SELECT GHInsights.USql.Utility.GetString(Data, "repo") AS Repo
|
||||||
,GHInsights.USql.Utility.GetString(Data, "base.user.login") AS BaseUserLogin
|
,GHInsights.USql.Utility.GetString(Data, "base.user.login") AS BaseUserLogin
|
||||||
,GHInsights.USql.Utility.GetBoolean(Data, "base.user.site_admin") AS BaseUserSiteAdmin
|
,GHInsights.USql.Utility.GetBoolean(Data, "base.user.site_admin") AS BaseUserSiteAdmin
|
||||||
,GHInsights.USql.Utility.GetString(Data, "base.user.type") AS BaseUserType
|
,GHInsights.USql.Utility.GetString(Data, "base.user.type") AS BaseUserType
|
||||||
,GHInsights.USql.Utility.GetUsqlString(Data, "body") AS Body
|
,GHInsights.USql.Utility.GetUSqlString(Data, "body") AS Body
|
||||||
,GHInsights.USql.Utility.GetInteger(Data, "changed_files") AS ChangedFiles
|
,GHInsights.USql.Utility.GetInteger(Data, "changed_files") AS ChangedFiles
|
||||||
,GHInsights.USql.Utility.GetDateTime(Data, "closed_at") AS ClosedAt
|
,GHInsights.USql.Utility.GetDateTime(Data, "closed_at") AS ClosedAt
|
||||||
,GHInsights.USql.Utility.GetInteger(Data, "comments") AS Comments
|
,GHInsights.USql.Utility.GetInteger(Data, "comments") AS Comments
|
||||||
|
|
Загрузка…
Ссылка в новой задаче