Merge pull request #8 from riv/master
[NEW FEATURE] MSGHT Non-Incremental Pipeline Setup
This commit is contained in:
Коммит
8c275a4a1f
|
@ -251,3 +251,4 @@ paket-files/
|
|||
.idea/
|
||||
*.sln.iml
|
||||
|
||||
/ghinsights/DataFactory/ProductionEnvironment.json
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
{
|
||||
"$schema": "http://datafactories.schema.management.azure.com/schemas/2015-08-01/Microsoft.DataFactory.Table.json",
|
||||
"name": "MSEventDetail",
|
||||
"properties": {
|
||||
"type": "AzureBlob",
|
||||
"linkedServiceName": "MSPrestagedEventsAzureStorage",
|
||||
"typeProperties": {
|
||||
"folderPath": "raw/{EventName}/v1/{Year}/{Month}",
|
||||
"fileName": "{EventName}_{Year}_{Month}_{Day}.json.gz",
|
||||
"partitionedBy": [
|
||||
{
|
||||
"name": "Year",
|
||||
"value": {
|
||||
"type": "DateTime",
|
||||
"date": "SliceStart",
|
||||
"format": "yyyy"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Month",
|
||||
"value": {
|
||||
"type": "DateTime",
|
||||
"date": "SliceStart",
|
||||
"format": "MM"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Day",
|
||||
"value": {
|
||||
"type": "DateTime",
|
||||
"date": "SliceStart",
|
||||
"format": "dd"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"external": false,
|
||||
"availability": {
|
||||
"frequency": "Day",
|
||||
"interval": 1
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
{
|
||||
"$schema": "http://datafactories.schema.management.azure.com/schemas/2015-08-01/Microsoft.DataFactory.Pipeline.json",
|
||||
"name": "MSGHTEventDetailPipeline",
|
||||
"properties": {
|
||||
"description": "Custom DotNetActivity pipeline to gather microsoft github mongodb backups and unpack the seperate events into a new container.",
|
||||
"activities": [
|
||||
{
|
||||
"name": "MongoDbDumpTransformActivity",
|
||||
"type": "DotNetActivity",
|
||||
"inputs": [ { "name": "MSMongoDbDump" } ],
|
||||
"outputs": [ { "name": "MSEventDetail" } ],
|
||||
"typeProperties": {
|
||||
"assemblyName": "GHInsights.DataFactory.dll",
|
||||
"entryPoint": "GHInsights.DataFactory.MongoDbDumpTransformActivity",
|
||||
"packageLinkedService": "GHInsightsAzureStorage",
|
||||
"packageFile": "datafactory/GHInsights.DataFactory.zip",
|
||||
"extendedProperties": {
|
||||
"Year": "$$Text.Format('{0:yyyy}',SliceStart)",
|
||||
"Month": "$$Text.Format('{0:MM}',SliceStart)",
|
||||
"Day": "$$Text.Format('{0:dd}',SliceStart)"
|
||||
}
|
||||
},
|
||||
"linkedServiceName": "BatchProcessor",
|
||||
"policy": {
|
||||
"concurrency": 6,
|
||||
"executionPriorityOrder": "NewestFirst",
|
||||
"retry": 0,
|
||||
"timeout": "04:00:00"
|
||||
},
|
||||
"scheduler": {
|
||||
"frequency": "Day",
|
||||
"interval": 1
|
||||
}
|
||||
}
|
||||
],
|
||||
"start": "2016-06-13T00:00:00Z",
|
||||
"end": "9999-09-09T00:00:00Z"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"$schema": "http://datafactories.schema.management.azure.com/schemas/2015-10-01/Microsoft.DataFactory.LinkedService.json",
|
||||
|
||||
"name": "MSGHTorrentAzureStorage",
|
||||
"properties": {
|
||||
"type": "AzureStorageSas",
|
||||
"typeProperties": {
|
||||
"sasUri": ""
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
{
|
||||
"$schema": "http://datafactories.schema.management.azure.com/schemas/2015-08-01/Microsoft.DataFactory.Table.json",
|
||||
"name": "MSMongoDbDump",
|
||||
"properties": {
|
||||
"type": "AzureBlob",
|
||||
"linkedServiceName": "MSGHTorrentAzureStorage",
|
||||
// "structure": [],
|
||||
"typeProperties": {
|
||||
"folderPath": "msght-azure-storage/downloads/",
|
||||
"fileName": "mongo-dump-{Year}-{Month}-{Day}.tar.gz",
|
||||
"partitionedBy": [
|
||||
{
|
||||
"name": "Year",
|
||||
"value": {
|
||||
"type": "DateTime",
|
||||
"date": "SliceStart",
|
||||
"format": "yyyy"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Month",
|
||||
"value": {
|
||||
"type": "DateTime",
|
||||
"date": "SliceStart",
|
||||
"format": "MM"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Day",
|
||||
"value": {
|
||||
"type": "DateTime",
|
||||
"date": "SliceStart",
|
||||
"format": "dd"
|
||||
}
|
||||
}
|
||||
]
|
||||
//,"format": {
|
||||
// "type": "<Two formats are supported: TextFormat, AvroFormat. If the type is 'TextFormat', you can specify the following properties. The type 'Avro' does not require any additional properties>",
|
||||
// "columnDelimiter": "<The character used as a column separator in a file.This property is optional. The default value is comma (,)>",
|
||||
// "rowDelimiter": "<The character used as a row separator in a file. This property is optional. The default value is any of the following: (” \n”)>",
|
||||
// "EscapeChar": "<The character used to escape any special character in the blob content. This property is optional. No default value>",
|
||||
// "NullValue": "<The character used to represent null value in the blob content. This property is optional. The default value is ” \n”>"
|
||||
// }
|
||||
},
|
||||
"external": true,
|
||||
"availability": {
|
||||
"frequency": "Day",
|
||||
"interval": 1
|
||||
},
|
||||
"policy": {
|
||||
"validation": {
|
||||
"minimumSizeMB": 0.1
|
||||
},
|
||||
"externalData": {
|
||||
"retryInterval": "01:00:00",
|
||||
"retryTimeout": "00:10:00",
|
||||
"maximumRetry": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"$schema": "http://datafactories.schema.management.azure.com/schemas/2015-08-01/Microsoft.DataFactory.LinkedService.json",
|
||||
"name": "MSPrestagedEventsAzureStorage",
|
||||
"properties": {
|
||||
"type": "AzureStorage",
|
||||
"typeProperties": {
|
||||
"connectionString": ""
|
||||
}
|
||||
}
|
||||
}
|
|
@ -54,7 +54,7 @@
|
|||
"externalData": {
|
||||
"retryInterval": "01:00:00",
|
||||
"retryTimeout": "00:10:00",
|
||||
"maximumRetry": 24
|
||||
"maximumRetry": 10
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ SELECT GHInsights.USql.Utility.GetString(Data, "repo") AS Repo
|
|||
,GHInsights.USql.Utility.GetString(Data, "base.user.login") AS BaseUserLogin
|
||||
,GHInsights.USql.Utility.GetBoolean(Data, "base.user.site_admin") AS BaseUserSiteAdmin
|
||||
,GHInsights.USql.Utility.GetString(Data, "base.user.type") AS BaseUserType
|
||||
,GHInsights.USql.Utility.GetUsqlString(Data, "body") AS Body
|
||||
,GHInsights.USql.Utility.GetUSqlString(Data, "body") AS Body
|
||||
,GHInsights.USql.Utility.GetInteger(Data, "changed_files") AS ChangedFiles
|
||||
,GHInsights.USql.Utility.GetDateTime(Data, "closed_at") AS ClosedAt
|
||||
,GHInsights.USql.Utility.GetInteger(Data, "comments") AS Comments
|
||||
|
|
Загрузка…
Ссылка в новой задаче