Merge pull request #1414 from ewiseblatt/install_monitoring

Install scripts for monitoring support.
This commit is contained in:
Eric Wiseblatt 2017-02-13 14:21:15 -05:00 коммит произвёл GitHub
Родитель ffc1c67943 33bbea849e
Коммит 0e478caa87
21 изменённых файлов: 4708 добавлений и 1075 удалений

Просмотреть файл

@ -231,7 +231,7 @@ services:
spectator: spectator:
webEndpoint: webEndpoint:
enabled: false enabled: true
stackdriver: stackdriver:
enabled: false enabled: false

Просмотреть файл

@ -238,7 +238,7 @@ services:
spectator: spectator:
webEndpoint: webEndpoint:
enabled: false enabled: true
stackdriver: stackdriver:
enabled: ${SPINNAKER_STACKDRIVER_ENABLED:false} enabled: ${SPINNAKER_STACKDRIVER_ENABLED:false}

Просмотреть файл

@ -309,6 +309,36 @@
}, },
"title": "Avg Clouddriver Controller Invocation Time (ms per minute)" "title": "Avg Clouddriver Controller Invocation Time (ms per minute)"
}, },
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:clouddriver.operations_count{success:true} by {operationtype})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Successful Operations (per minute)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:clouddriver.operations_count{success:false} by {operationtype})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Failed Operations (per minute)"
},
{ {
"definition": { "definition": {
"viz": "timeseries", "viz": "timeseries",
@ -422,28 +452,99 @@
"viz": "timeseries", "viz": "timeseries",
"requests": [ "requests": [
{ {
"q": "sum:orca.threadpool.activeCount{*}", "q": "diff(sum:orca.task.invocations{executiontype:orchestration,status:running} by {taskname})",
"aggregator": "avg", "aggregator": "avg",
"conditional_formats": [], "conditional_formats": [],
"type": "line" "type": "bars"
} }
] ],
"autoscale": true
}, },
"title": "Active Orca Threads (per minute)" "title": "Active Orchestrations (orca)"
}, },
{ {
"definition": { "definition": {
"viz": "timeseries", "viz": "timeseries",
"requests": [ "requests": [
{ {
"q": "per_minute(sum:igor.controller.invocations_totalTime{*} by {method}) / 1000000 / per_minute(avg:igor.controller.invocations_count{*} by {method})", "q": "diff(sum:orca.task.invocations{executiontype:orchestration,status:succeeded} by {taskname})",
"aggregator": "avg", "aggregator": "avg",
"conditional_formats": [], "conditional_formats": [],
"type": "line" "type": "bars",
"style": {
"palette": "cool"
}
},
{
"q": "- diff(sum:orca.task.invocations{executiontype:orchestration,status:terminal} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "warm"
}
} }
] ],
"autoscale": true
}, },
"title": "Igor Controller Invocation Time (ms per minute)" "title": "Completed Orchestrations (orca)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:orca.task.invocations{status:running,executiontype:pipeline} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Active Pipelines (orca)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:orca.task.invocations{status:succeeded,executiontype:pipeline} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "cool"
}
},
{
"q": "- diff(sum:orca.task.invocations{status:terminal,executiontype:pipeline} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "warm"
}
}
],
"autoscale": true
},
"title": "Completed Pipelines (orca)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "sum:orca.threadpool.activeCount{*} by {id}",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Active Orca Threads (per minute)"
}, },
{ {
"definition": { "definition": {
@ -460,6 +561,20 @@
}, },
"title": "Last known Orca Active Threads" "title": "Last known Orca Active Threads"
}, },
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "per_minute(sum:igor.controller.invocations_totalTime{*} by {method}) / 1000000 / per_minute(avg:igor.controller.invocations_count{*} by {method})",
"aggregator": "avg",
"conditional_formats": [],
"type": "line"
}
]
},
"title": "Igor Controller Invocation Time (ms per minute)"
},
{ {
"definition": { "definition": {
"viz": "timeseries", "viz": "timeseries",
@ -545,24 +660,31 @@
"viz": "timeseries", "viz": "timeseries",
"requests": [ "requests": [
{ {
"q": "per_minute(sum:rosco.bakes{*} by {success})", "q": "diff(sum:rosco.bakesActive{*})",
"aggregator": "avg", "aggregator": "avg",
"style": { "conditional_formats": [],
"palette": "cool" "type": "line"
},
"type": "bars",
"conditional_formats": []
}, },
{ {
"q": "per_minute(sum:rosco.bakes.local{*} by {active})", "q": "diff(sum:rosco.bakesRequested{*} by {flavor})",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "cool"
}
},
{
"q": "diff(sum:rosco.bakesCompleted_count{success:false} by {region})",
"conditional_formats": [],
"type": "bars",
"style": { "style": {
"palette": "warm" "palette": "warm"
}, }
"type": "bars"
} }
] ],
"autoscale": true
}, },
"title": "Rosco Bakes (local warm, per minute)" "title": "Rosco Bake Activity"
}, },
{ {
"definition": { "definition": {
@ -817,7 +939,7 @@
"viz": "timeseries", "viz": "timeseries",
"requests": [ "requests": [
{ {
"q": " - diff(sum:clouddriver.google.api_count{success:false} by {api,scope}.as_rate()), diff(sum:clouddriver.google.api_count{success:true} by {api,scope}.as_rate())", "q": "- diff(sum:clouddriver.google.api_count{success:false} by {api,scope}.as_rate()), diff(sum:clouddriver.google.api_count{success:true} by {api,scope}.as_rate())",
"aggregator": "avg", "aggregator": "avg",
"style": { "style": {
"palette": "dog_classic" "palette": "dog_classic"
@ -828,7 +950,7 @@
], ],
"autoscale": true "autoscale": true
}, },
"title": "Google API Call Rate (per minute, lines by scope, bars by api)" "title": "Google API Call Rate"
}, },
{ {
"definition": { "definition": {
@ -856,7 +978,7 @@
"q": "per_minute(sum:clouddriver.google.api_totalTime{*} by {api,scope}) / 1000000 / per_minute(sum:clouddriver.google.api_count{*} by {api,scope}.as_count())", "q": "per_minute(sum:clouddriver.google.api_totalTime{*} by {api,scope}) / 1000000 / per_minute(sum:clouddriver.google.api_count{*} by {api,scope}.as_count())",
"aggregator": "avg", "aggregator": "avg",
"conditional_formats": [], "conditional_formats": [],
"type": "bars" "type": "line"
} }
], ],
"autoscale": true "autoscale": true
@ -868,20 +990,58 @@
"viz": "timeseries", "viz": "timeseries",
"requests": [ "requests": [
{ {
"q": "diff(sum:clouddriver.google.operationWaits_count{*} by {basephase})", "q": "per_minute(sum:clouddriver.google.batchExecute_count{*} by {context})",
"aggregator": "avg",
"conditional_formats": [],
"type": "line"
}
],
"autoscale": true
},
"title": "Google Batch Count (per minute)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "per_minute(sum:clouddriver.google.batchExecute_totalTime{*} by {context}) / 1000000 / per_minute(sum:clouddriver.google.batchExecute_count{*} by {context})",
"aggregator": "avg",
"conditional_formats": [],
"type": "line"
}
],
"autoscale": true
},
"title": "Batch Call Latency (ms per call minute)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "per_minute(sum:clouddriver.google.batchSize{*} by {context})",
"aggregator": "avg",
"conditional_formats": [],
"type": "line"
}
],
"autoscale": true
},
"title": "Batch Call Size (per minute)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:clouddriver.google.operationWaits_count{status:done} by {basephase,scope})",
"aggregator": "avg", "aggregator": "avg",
"conditional_formats": [], "conditional_formats": [],
"type": "bars", "type": "bars",
"style": { "style": {
"palette": "cool" "palette": "dog_classic"
} }
},
{
"q": "diff(sum:clouddriver.google.operationWaits_count{*} by {scope})",
"style": {
"palette": "warm"
},
"type": "line"
} }
], ],
"autoscale": true, "autoscale": true,
@ -891,7 +1051,53 @@
} }
} }
}, },
"title": "Google Waiting Operations (diff, bars by phase, lines by scope)" "title": "Successful Google Operations (clouddriver)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:clouddriver.google.operationWaits_count{!status:done} by {basephase,scope})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "dog_classic"
}
}
],
"autoscale": true,
"yaxis": {
"filter": {
"below": 0.01
}
}
},
"title": "Failed Google Operations (clouddriver)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:clouddriver.google.operationWaitRequests{*} by {basephase,scope})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "dog_classic"
}
}
],
"autoscale": true,
"yaxis": {
"filter": {
"below": 0.01
}
}
},
"title": "Google Operations Started (clouddriver)"
}, },
{ {
"definition": { "definition": {
@ -928,36 +1134,6 @@
}, },
"title": "Google Operation Waits by Phase" "title": "Google Operation Waits by Phase"
}, },
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "per_minute(sum:clouddriver.onDemand_total_totalTime{*} by {ondemandtype}) / 1000000000 / per_minute(sum:clouddriver.onDemand_total_count{*} by {ondemandtype})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Clouddriver OnDemand Invocation Time (ms per call per minute)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:clouddriver.operations_count{*} by {operationtype})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Clouddriver Operations (type per minute)"
},
{ {
"definition": { "definition": {
"viz": "timeseries", "viz": "timeseries",
@ -972,23 +1148,8 @@
"autoscale": true "autoscale": true
}, },
"title": "Spectator Time Series Streams" "title": "Spectator Time Series Streams"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:clouddriver.google.batchExecute_count{*} by {context})",
"aggregator": "avg",
"conditional_formats": [],
"type": "area"
}
],
"autoscale": true
},
"title": "Google Batch Execution Count"
} }
], ],
"description": "Contains graphs of various metrics within Spinnaker to illustrate what is available.", "description": "Contains graphs of various metrics within Spinnaker to illustrate what is available.",
"title": "FullSpinnaker" "title": "Spinnaker Kitchen Sink"
} }

Просмотреть файл

@ -0,0 +1,299 @@
{
"read_only": false,
"graphs": [
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:gate.hystrix.rollingCountShortCircuited{*} by {metricgroup})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
},
{
"q": "diff(sum:igor.hystrix.rollingCountShortCircuited{*} by {metricgroup})",
"type": "bars"
},
{
"q": "diff(sum:front50.hystrix.rollingCountShortCircuited{*} by {metricgroup})",
"type": "bars"
}
],
"autoscale": true
},
"title": "Hystrix Short Circuited (global)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:front50.hystrix.countExceptionsThrown{*})",
"aggregator": "avg",
"conditional_formats": [],
"type": "line"
},
{
"q": "diff(sum:igor.hystrix.countExceptionsThrown{*}.as_count())",
"type": "bars"
},
{
"q": "diff(sum:gate.hystrix.countExceptionsThrown{*})",
"type": "line"
}
],
"autoscale": true
},
"title": "Hystrix Exceptions (global)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:orca.task.invocations{executiontype:orchestration,status:running} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Active Orchestrations (orca)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:orca.task.invocations{executiontype:orchestration,status:succeeded} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "cool"
}
},
{
"q": "- diff(sum:orca.task.invocations{executiontype:orchestration,status:terminal} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "warm"
}
}
],
"autoscale": true
},
"title": "Completed Orchestrations (orca)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:orca.task.invocations{status:running,executiontype:pipeline} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Active Pipelines (orca)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:orca.task.invocations{status:succeeded,executiontype:pipeline} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "cool"
}
},
{
"q": " - diff(sum:orca.task.invocations{status:terminal,executiontype:pipeline} by {taskname})",
"style": {
"palette": "warm"
},
"type": "bars"
}
],
"autoscale": true
},
"title": "Completed Pipelines (orca)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:clouddriver.operations_count{success:true} by {operationtype}), - diff(sum:clouddriver.operations_count{!success:true} by {operationtype})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Active Threads (orca)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:echo.pipelines.triggered{*} by {application})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Pipelines Triggered (echo)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:rosco.bakesActive{*})",
"aggregator": "avg",
"conditional_formats": [],
"type": "line"
},
{
"q": "diff(sum:rosco.bakesRequested{*} by {flavor})",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "cool"
}
},
{
"q": "diff(sum:rosco.bakesCompleted_count{success:false} by {region})",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "warm"
}
}
],
"autoscale": true
},
"title": "Bake Activity (rosco)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "sum:front50.storageServiceSupport.cacheSize{*} by {objecttype}",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Item Cache Size (front50)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:clouddriver.operations_count{success:true} by {operationtype})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Successful Operations (clouddriver)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:clouddriver.operations_count{success:false} by {operationtype})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Failed Operations (clouddriver)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "avg:system.load.1{*}",
"aggregator": "avg",
"conditional_formats": [],
"type": "line"
},
{
"q": "avg:system.mem.used{*} / avg:system.mem.total{*}",
"type": "line"
}
],
"autoscale": true
},
"title": "System Load and Pct Memory Used"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "sum:front50.jvm.memory.used{*}",
"aggregator": "avg",
"conditional_formats": [],
"type": "line"
},
{
"q": "sum:clouddriver.jvm.memory.used{*}",
"type": "line"
},
{
"q": "sum:orca.jvm.memory.used{*}",
"type": "line"
},
{
"q": "sum:gate.jvm.memory.used{*}",
"type": "line"
},
{
"q": "sum:igor.jvm.memory.used{*}",
"type": "line"
},
{
"q": "sum:rosco.jvm.memory.used{*}",
"type": "line"
},
{
"q": "sum:echo.jvm.memory.used{*}",
"type": "line"
}
],
"autoscale": true
},
"title": "Microservice JVM Memory Used"
}
],
"description": "A bare-bones dashboard for monitoring a Spinnaker deployment",
"title": "Minimal Spinnaker"
}

Просмотреть файл

@ -1,6 +1,139 @@
{ {
"read_only": false, "read_only": false,
"graphs": [ "graphs": [
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:orca.task.invocations{$SourceApplication,executiontype:orchestration,status:running} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "$SourceApplication Active Orchestrations (orca)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:orca.task.invocations{$SourceApplication,executiontype:orchestration,status:running} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "$SourceApplication Active Orchestrations (orca)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:orca.task.invocations{$SourceApplication,status:running,executiontype:pipeline} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "$SourceApplication Active Pipelines (orca)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:orca.task.invocations{$SourceApplication,status:succeeded,executiontype:pipeline} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "cool"
}
},
{
"q": "- diff(sum:orca.task.invocations{$SourceApplication,status:terminal,executiontype:pipeline} by {taskname})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "warm"
}
}
],
"autoscale": true
},
"title": "$SourceApplication Completed Pipelines (orca)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:echo.pipelines.triggered{$Application} by {name})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "$Application Pipelines Triggered (echo)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:rosco.bakesActive{*})",
"aggregator": "avg",
"conditional_formats": [],
"type": "line"
},
{
"q": "diff(sum:rosco.bakesRequested{*} by {flavor})",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "cool"
}
},
{
"q": "diff(sum:rosco.bakesCompleted_count{success:false} by {region})",
"conditional_formats": [],
"type": "bars",
"style": {
"palette": "warm"
}
}
],
"autoscale": true
},
"title": "Global Bake Activity (rosco)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "per_minute(sum:rosco.bakesCompleted_totalTime{*} by {region}) / 1000000000 / per_minute(sum:rosco.bakesCompleted_count{*} by {region}) / 60",
"aggregator": "avg",
"conditional_formats": [],
"type": "area"
}
],
"autoscale": true
},
"title": "Global Bake Completion Time Minutes (rosco)"
},
{ {
"definition": { "definition": {
"viz": "timeseries", "viz": "timeseries",
@ -22,7 +155,7 @@
], ],
"autoscale": true "autoscale": true
}, },
"title": "Hystrix Short Circuited" "title": "Hystrix Short Circuited (global)"
}, },
{ {
"definition": { "definition": {
@ -35,8 +168,8 @@
"type": "line" "type": "line"
}, },
{ {
"q": "diff(sum:igor.hystrix.countExceptionsThrown{*})", "q": "diff(sum:igor.hystrix.countExceptionsThrown{*}.as_count())",
"type": "line" "type": "bars"
}, },
{ {
"q": "diff(sum:gate.hystrix.countExceptionsThrown{*})", "q": "diff(sum:gate.hystrix.countExceptionsThrown{*})",
@ -45,156 +178,9 @@
], ],
"autoscale": true "autoscale": true
}, },
"title": "Hystrix Exceptions" "title": "Hystrix Exceptions Thrown (global)"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:clouddriver.operations_count{success:true} by {operationtype}), - diff(sum:clouddriver.operations_count{!success:true} by {operationtype})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Clouddriver Operations"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "avg:system.load.1{*}",
"aggregator": "avg",
"conditional_formats": [],
"type": "line"
}
],
"autoscale": true
},
"title": "This Slot Is Reserved For Future Use"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:clouddriver.operations_count{success:true} by {operationtype}), - diff(sum:clouddriver.operations_count{!success:true} by {operationtype})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Active Orca Threads"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "diff(sum:echo.pipelines.triggered{*} by {application})",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Pipelines Triggered"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "sum:rosco.bakesActive{*}",
"conditional_formats": [],
"type": "line"
}
],
"autoscale": true
},
"title": "Bakes in Progress"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "sum:front50.storageServiceSupport.cacheSize{*} by {objecttype}",
"aggregator": "avg",
"conditional_formats": [],
"type": "bars"
}
],
"autoscale": true
},
"title": "Front50 Item Cache Sizes"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "avg:system.load.1{*}",
"aggregator": "avg",
"conditional_formats": [],
"type": "line"
},
{
"q": "avg:system.mem.used{*} / avg:system.mem.total{*}",
"type": "line"
}
],
"autoscale": true
},
"title": "System Load and Pct Memory Used"
},
{
"definition": {
"viz": "timeseries",
"requests": [
{
"q": "sum:front50.jvm.memory.used{*}",
"aggregator": "avg",
"conditional_formats": [],
"type": "line"
},
{
"q": "sum:clouddriver.jvm.memory.used{*}",
"type": "line"
},
{
"q": "sum:orca.jvm.memory.used{*}",
"type": "line"
},
{
"q": "sum:gate.jvm.memory.used{*}",
"type": "line"
},
{
"q": "sum:igor.jvm.memory.used{*}",
"type": "line"
},
{
"q": "sum:rosco.jvm.memory.used{*}",
"type": "line"
},
{
"q": "sum:echo.jvm.memory.used{*}",
"type": "line"
}
],
"autoscale": true
},
"title": "Microservice JVM Memory Used"
} }
], ],
"description": "A bare-bones dashboard for monitoring a Spinnaker deployment", "description": "Templated dashboard to show details for a specific Application",
"title": "MinimalSpinnaker" "title": "Specific Spinnaker Application"
} }

Просмотреть файл

@ -0,0 +1,52 @@
#!/bin/bash
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
SOURCE_DIR=$(dirname $0)
HAVE_KEYS=0
function prompt_if_unset() {
local name=$1
local tmp
while [[ "${!name}" == "" ]]; do
read -e -p "ENTER $name: " tmp
eval ${name}=$tmp
done
}
prompt_if_unset DATADOG_API_KEY
prompt_if_unset DATADOG_APP_KEY
environ_file=$(readlink -f "${SOURCE_DIR}/../../environ")
echo "Storing keys into $environ_file"
if [[ ! -f "${environ_file}" ]]; then
sudo touch "${environ_file}"
fi
sudo chmod 600 "${environ_file}"
sudo cat >> "$environ_file" <<EOF
DATADOG_API_KEY=$DATADOG_API_KEY
DATADOG_APP_KEY=$DATADOG_APP_KEY
EOF
echo "Installing Datadog Agent"
DD_API_KEY=$DATADOG_API_KEY bash -c "$(curl -L https://raw.githubusercontent.com/DataDog/dd-agent/master/packaging/datadog-agent/source/install_agent.sh)"
for dashboard in ${SOURCE_DIR}/*Timeboard.json; do
echo "Installing $(basename $dashboard)"
curl -X POST -H "Content-type: application/json" \
-d "@${dasboard}"
"https://app.datadoghq.com/api/v1/dash?api_key=${DATADOG_API_KEY}&application_key=${DATADOG_APP_KEY}"
done

Просмотреть файл

@ -0,0 +1,585 @@
{
"__inputs": [
{
"name": "DS_SPINNAKER",
"label": "Spinnaker",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "4.1.1"
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": ""
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
}
],
"annotations": {
"list": []
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [],
"rows": [
{
"collapse": false,
"height": 254,
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_SPINNAKER}",
"fill": 1,
"id": 1,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "cpu0",
"yaxis": 1
},
{
"alias": "All",
"yaxis": 2
}
],
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(node_cpu{mode!=\"idle\"}[$SamplePeriod])) / sum(rate(node_cpu[$SamplePeriod])) ",
"hide": false,
"intervalFactor": 2,
"legendFormat": "All",
"metric": "",
"refId": "A",
"step": 30
},
{
"expr": "sum(rate(node_cpu{mode!=\"idle\"}[$SamplePeriod])) by (cpu)",
"hide": false,
"intervalFactor": 2,
"legendFormat": "{{cpu}}",
"metric": "node_cpu",
"refId": "B",
"step": 30
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "CPU Utilization",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_SPINNAKER}",
"fill": 1,
"id": 3,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "Dirty",
"yaxis": 2
}
],
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "node_memory_MemFree ",
"hide": false,
"intervalFactor": 2,
"legendFormat": "Free",
"metric": "",
"refId": "B",
"step": 30
},
{
"expr": "(node_memory_MemTotal - node_memory_Committed_AS) ",
"hide": false,
"intervalFactor": 2,
"legendFormat": "Uncommitted",
"metric": "node_memory_Committed_AS",
"refId": "G",
"step": 30
},
{
"expr": "node_memory_Dirty",
"hide": false,
"intervalFactor": 2,
"legendFormat": "Dirty",
"metric": "",
"refId": "F",
"step": 30
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "System Memory Available",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "decbytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "decbytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6"
},
{
"collapse": false,
"height": 255,
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_SPINNAKER}",
"fill": 1,
"id": 4,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "cpu0",
"yaxis": 1
}
],
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_netstat_Ip_InReceives[$SamplePeriod]) ",
"hide": false,
"intervalFactor": 2,
"legendFormat": "In Packets",
"metric": "",
"refId": "A",
"step": 120
},
{
"expr": "rate(node_netstat_Ip_OutRequests[$SamplePeriod])",
"hide": false,
"intervalFactor": 2,
"legendFormat": "Out Packets",
"metric": "",
"refId": "B",
"step": 120
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Networking",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_SPINNAKER}",
"fill": 1,
"id": 2,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "READ",
"yaxis": 2
}
],
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "delta(node_disk_bytes_written[$SamplePeriod])",
"intervalFactor": 2,
"legendFormat": "WRITE",
"metric": "node_disk_bytes_written",
"refId": "A",
"step": 60
},
{
"expr": "delta(node_disk_bytes_read[$SamplePeriod])",
"intervalFactor": 2,
"legendFormat": "READ",
"metric": "node_disk_bytes_read",
"refId": "B",
"step": 60
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Disk IO",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "decbytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "decbytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_SPINNAKER}",
"fill": 1,
"id": 5,
"legend": {
"avg": false,
"current": false,
"max": false,
"min": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [
{
"alias": "READ",
"yaxis": 2
}
],
"span": 4,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "node_filesystem_free{mountpoint!~\"/run.*\"}",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{mountpoint}}",
"metric": "",
"refId": "B",
"step": 60
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "Disk Available",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "decbytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "decbytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"auto": false,
"auto_count": 30,
"auto_min": "10s",
"current": {
"text": "1m",
"value": "1m"
},
"hide": 0,
"label": "Sample Period",
"name": "SamplePeriod",
"options": [
{
"selected": true,
"text": "1m",
"value": "1m"
},
{
"selected": false,
"text": "5m",
"value": "5m"
},
{
"selected": false,
"text": "10m",
"value": "10m"
},
{
"selected": false,
"text": "15m",
"value": "15m"
},
{
"selected": false,
"text": "30m",
"value": "30m"
}
],
"query": "1m,5m,10m,15m,30m",
"refresh": 2,
"type": "interval"
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Machine Stats",
"version": 1
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,80 @@
#!/bin/bash
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
PROMETHEUS_VERSION=prometheus-1.5.0.linux-amd64
PROMETHEUS_PORT=9090
GRAFANA_PORT=3000
CONFIG_DIR=$(readlink -f `dirname $0`)
cd /opt
# Install Prometheus
curl -L -o /tmp/prometheus.gz \
https://github.com/prometheus/prometheus/releases/download/v1.5.0/prometheus-1.5.0.linux-amd64.tar.gz
sudo tar xzf /tmp/prometheus.gz -C /opt
rm /tmp/prometheus.gz
curl -L -o /tmp/node_exporter.gz \
https://github.com/prometheus/node_exporter/releases/download/v0.13.0/node_exporter-0.13.0.linux-amd64.tar.gz
sudo tar xzf /tmp/node_exporter.gz -C /opt/prometheus-1.5.0.linux-amd64
sudo ln -s /opt/prometheus-1.5.0.linux-amd64/node_exporter-0.13.0.linux-amd64/node_exporter /usr/bin/node_exporter
rm /tmp/node_exporter.gz
sudo cp $CONFIG_DIR/spinnaker-prometheus.yml prometheus-1.5.0.linux-amd64
sudo cp $CONFIG_DIR/prometheus.conf /etc/init/prometheus.conf
sudo cp $CONFIG_DIR/node_exporter.conf /etc/init/node_exporter.conf
# Install Grafana
cd /tmp
wget https://grafanarel.s3.amazonaws.com/builds/grafana_4.1.1-1484211277_amd64.deb
sudo apt-get install -y adduser libfontconfig
sudo dpkg -i grafana_4.1.1-1484211277_amd64.deb
sudo update-rc.d grafana-server defaults
rm grafana_4.1.1-1484211277_amd64.deb
# Startup
echo "Starting Prometheus"
sudo service node_exporter start
sudo service prometheus start
sudo service grafana-server start
TRIES=0
until nc -z localhost $GRAFANA_PORT || [[ $TRIES -gt 5 ]]; do
sleep 1
let TRIES+=1
done
echo "Adding datasource"
PAYLOAD="{'name':'Spinnaker','type':'prometheus','url':'http://localhost:${PROMETHEUS_PORT}','access':'direct','isDefault':true}"
curl -u admin:admin http://localhost:${GRAFANA_PORT}/api/datasources \
-H "Content-Type: application/json" \
-X POST \
-d "${PAYLOAD//\'/\"}"
for dashboard in ${CONFIG_DIR}/*Dashboard.json; do
echo "Installing $(basename $dashboard)"
x=$(sed -e "/\"__inputs\"/,/],/d" \
-e "/\"__requires\"/,/],/d" \
-e "s/\${DS_SPINNAKER\}/Spinnaker/g" < "$dashboard")
temp_file=$(mktemp)
echo "{ \"dashboard\": $x }" > $temp_file
curl -u admin:admin http://localhost:${GRAFANA_PORT}/api/dashboards/import \
-H "Content-Type: application/json" \
-X POST \
-d @${temp_file}
rm -f $temp_file
done

Просмотреть файл

@ -0,0 +1,2 @@
start on filesystem or runlevel [2345]
exec /usr/bin/node_exporter

Просмотреть файл

@ -0,0 +1,6 @@
start on filesystem or runlevel [2345]
exec /opt/prometheus-1.5.0.linux-amd64/prometheus \
-config.file /opt/prometheus-1.5.0.linux-amd64/spinnaker-prometheus.yml \
-storage.local.path /opt/prometheus-1.5.0.linux-amd64/data \
> /var/log/prometheus.log 2>&1

Просмотреть файл

@ -0,0 +1,33 @@
# my global config
global:
scrape_interval: 15s
evaluation_interval: 15s
# scrape_timeout is set to the global default (10s).
# Attach these labels to any time series or alerts when communicating with
# external systems (federation, remote storage, Alertmanager).
external_labels:
monitor: 'codelab-monitor'
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first.rules"
# - "second.rules"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
- job_name: 'spinnaker'
static_configs:
- targets: ['localhost:8008']
metrics_path: '/prometheus_metrics'
honor_labels: true
- job_name: 'node'
static_configs:
- targets: ['localhost:9100']
# - job_name: 'prometheus'
# static_configs:
# - targets: ['localhost:9090']

Просмотреть файл

@ -0,0 +1,324 @@
{
"displayName": "Minimal Spinnaker Dashboard",
"version": 14,
"root": {
"gridLayout": {
"widgets": [
{
"title": "Hystrix Short Circuited (global)",
"xyChart": {
"dataSets": [
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/front50/hystrix.countShortCircuited\"",
"perSeriesAligner": "ALIGN_DELTA"
}
},
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/gate/hystrix.countShortCircuited\"",
"perSeriesAligner": "ALIGN_DELTA"
}
},
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/igor/hystrix.countShortCircuited\"",
"perSeriesAligner": "ALIGN_DELTA"
}
}
],
"constantLines": [
{}
],
"options": {},
"y1Axis": {},
"xAxis": {}
}
},
{
"title": "Hystrix Exceptions Thrown (global)",
"xyChart": {
"dataSets": [
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/front50/hystrix.countExceptionsThrown\"",
"perSeriesAligner": "ALIGN_DELTA"
}
},
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/gate/hystrix.countExceptionsThrown\"",
"perSeriesAligner": "ALIGN_DELTA"
}
},
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/igor/hystrix.countExceptionsThrown\"",
"perSeriesAligner": "ALIGN_DELTA"
}
}
],
"constantLines": [
{}
],
"options": {},
"y1Axis": {},
"xAxis": {}
}
},
{
"title": "Active Orchestrations (orca)",
"xyChart": {
"dataSets": [
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/task.invocations\" AND metric.label.isComplete=\"false\" AND metric.label.executionType=\"Orchestration\"",
"perSeriesAligner": "ALIGN_DELTA"
}
}
],
"constantLines": [
{}
],
"options": {},
"y1Axis": {},
"xAxis": {}
}
},
{
"title": "Active Pipelines (orca)",
"xyChart": {
"dataSets": [
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/task.invocations\" AND metric.label.isComplete=\"false\" AND metric.label.executionType=\"Pipeline\"",
"perSeriesAligner": "ALIGN_DELTA"
}
}
],
"constantLines": [
{}
],
"options": {},
"y1Axis": {},
"xAxis": {}
}
},
{
"title": "Completed Orchestrations (orca)",
"xyChart": {
"dataSets": [
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/task.invocations\" AND metric.label.status=\"SUCCEEDED\" AND metric.label.executionType=\"Orchestration\"",
"perSeriesAligner": "ALIGN_DELTA"
}
},
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/task.invocations\" AND metric.label.status!=\"SUCCEEDED\" AND metric.label.executionType=\"Orchestration\"",
"perSeriesAligner": "ALIGN_DELTA"
}
}
],
"constantLines": [
{}
],
"options": {},
"y1Axis": {},
"xAxis": {}
}
},
{
"title": "Completed Pipelines (orca)",
"xyChart": {
"dataSets": [
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/task.invocations\" AND metric.label.status=\"SUCCEEDED\" AND metric.label.executionType=\"Pipeline\"",
"perSeriesAligner": "ALIGN_DELTA"
}
},
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/task.invocations\" AND metric.label.status!=\"SUCCEEDED\" AND metric.label.executionType=\"Pipeline\"",
"perSeriesAligner": "ALIGN_DELTA"
}
}
],
"constantLines": [
{}
],
"options": {},
"y1Axis": {},
"xAxis": {}
}
},
{
"title": "Active Threads (orca)",
"xyChart": {
"dataSets": [
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/threadpool.activeCount\""
}
}
],
"constantLines": [
{}
],
"options": {},
"y1Axis": {},
"xAxis": {}
}
},
{
"title": "Pipelines Triggered (echo)",
"xyChart": {
"dataSets": [
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/echo/pipelines.triggered\"",
"perSeriesAligner": "ALIGN_DELTA"
}
}
],
"constantLines": [
{}
],
"options": {},
"y1Axis": {},
"xAxis": {}
}
},
{
"title": "Bake Activity (rosco)",
"xyChart": {
"dataSets": [
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/rosco/bakesActive\""
}
}
],
"constantLines": [
{}
],
"options": {},
"y1Axis": {},
"xAxis": {}
}
},
{
"title": "Cached Items (front50)",
"xyChart": {
"dataSets": [
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/front50/storageServiceSupport.cacheSize\""
}
}
],
"constantLines": [
{}
],
"options": {},
"y1Axis": {},
"xAxis": {}
}
},
{
"title": "Successful Operations (clouddriver)",
"xyChart": {
"dataSets": [
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/clouddriver/operations__count\" AND metric.label.success=\"true\"",
"perSeriesAligner": "ALIGN_DELTA"
}
}
],
"constantLines": [
{}
],
"options": {},
"y1Axis": {},
"xAxis": {}
}
},
{
"title": "Failed Operations (clouddriver)",
"xyChart": {
"dataSets": [
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/clouddriver/operations__count\" AND metric.label.success=\"false\"",
"perSeriesAligner": "ALIGN_DELTA"
}
}
],
"constantLines": [
{}
],
"options": {},
"y1Axis": {},
"xAxis": {}
}
},
{
"title": "JVM Memory (global)",
"xyChart": {
"dataSets": [
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/clouddriver/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
}
},
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/echo/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
}
},
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/fiat/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
}
},
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/front50/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
}
},
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/gate/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
}
},
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/igor/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
}
},
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
}
},
{
"timeSeriesFilter": {
"filter": "metric.type=\"custom.googleapis.com/spinnaker/rosco/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
}
}
],
"constantLines": [
{}
],
"options": {},
"y1Axis": {},
"xAxis": {}
}
}
]
}
}
}

Просмотреть файл

@ -0,0 +1,31 @@
#!/bin/bash
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
echo "See https://cloud.google.com/monitoring/agent/install-agent"
echo "The agent is optional (and only available on GCP and AWS)"
if [[ -z $STACKDRIVER_API_KEY ]]; then
# Remove this once API is no longer whitelisted.
echo "You need a STACKDRIVER_API_KEY to use this installer."
exit -1
fi
for dashboard in *Dashboard.json; do
google/stackdriver_monitoring/spinnaker_metric_tool.sh \
upload_stackdriver_dashboard --dashboard ${dashboard} \
"$@"
done
--credentials_path=$HOME/.spinnaker/google-credentials.json --dashboard xyz --update

Просмотреть файл

@ -30,37 +30,36 @@ function make_spinnaker_monitor_zip() {
local zip_file="$TEMP_DIR/monitor_spinnaker.zip" local zip_file="$TEMP_DIR/monitor_spinnaker.zip"
cd "$SOURCE_DIR" cd "$SOURCE_DIR"
zip -r "$zip_file" `ls *.py | grep -v _test.py` zip -qr "$zip_file" `ls *.py | grep -v _test.py`
cp spinnaker_metric_tool.py $TEMP_DIR/__main__.py cp spinnaker_metric_tool.py $TEMP_DIR/__main__.py
cd $TEMP_DIR cd $TEMP_DIR
zip $zip_file __main__.py zip -q $zip_file __main__.py
rm -f __main__.py rm -f __main__.py
cd "$BUILD_DIR/spinnaker" cd "$BUILD_DIR/spinnaker"
zip -r $zip_file pylib zip -qr $zip_file pylib
cd "$BUILD_DIR/citest"
zip -r $zip_file citest
} }
function make_install_tar() { function make_install_tar() {
local tar_file="$TEMP_DIR/install.tz" local tar_file="$1"
local staging_dir="$TEMP_DIR/monitor_spinnaker" local staging_dir="$TEMP_DIR/monitor_spinnaker"
mkdir $staging_dir mkdir $staging_dir
cd "$SOURCE_DIR" cd "$SOURCE_DIR"
cp *.json README.md $TEMP_DIR/monitor_spinnaker.zip $staging_dir cp -pr install_monitoring.sh config README.md $TEMP_DIR/monitor_spinnaker.zip $staging_dir
cat requirements.txt | grep -v mock > $staging_dir/requirements.txt cat requirements.txt | grep -v mock > $staging_dir/requirements.txt
cd $TEMP_DIR cd $TEMP_DIR
tar czf $tar_file monitor_spinnaker if [[ "$tar_file" == *.tz || "$tar_file" == *.tar.gz ]]; then
tar czf $tar_file monitor_spinnaker
else
tar cf $tar_file monitor_spinnaker
fi
} }
make_spinnaker_monitor_zip make_spinnaker_monitor_zip
make_install_tar make_install_tar "$TARGET_PATH"
cp "$TEMP_DIR/install.tz" "$TARGET_PATH"
rm -rf $TEMP_DIR
echo "WROTE $TARGET_PATH" echo "WROTE $TARGET_PATH"

Просмотреть файл

@ -0,0 +1,204 @@
#!/bin/bash
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
SOURCE_DIR=$(readlink -f `dirname $0`)
COMMAND_LINE_FLAGS=("$@")
USE_DATADOG=false
USE_PROMETHEUS=false
USE_STACKDRIVER=false
PROVIDERS=""
EXTRA_ARGS=""
function print_usage() {
cat <<-EOF
`basename $0`: <provider_switch>+ \
<monitor_options>* \
<service_options>* \
<provider_options>*
<provider_switch> is one or more of:
--datadog
Install and configure a Datadog agent.
Spinnaker's metric monitoring tool will publish metrics to Datadog.
You will be prompted for your API and APP keys unless you define
environment variables DATADOG_APP_KEY and DATADOG_API_KEY.
--prometheus
Install and configure Prometheus and Grafana Dashboard.
Spinnaker's metric monitoring tool will publish metrics to Prometheus.
--stackdriver
Spinnaker's metric monitoring tool will publish metrics to Stackdriver.
You may also need --credentials_path=<path>
<monitor_options> zero or more of:
--port=8008
The port number to use for the embedded HTTP server within the monitor.
--period=60
Number of seconds between pollings of microservices.
--service_hosts <ip>+
A comma-delimited list of hostnames (or IPs) to poll by default.
The default is localhost.
Make this empty "" to not poll any services by default.
<service_options> are in the form --<service>=<netloc>* where:
<service> is one of clouddriver, echo, fiat, front50, gate, igor, rosco
<netloc> is a comma-delimited list of either a <host> or <host>:<port>
If only a <host> is provided, then the dfeault port will be used.
An empty <netloc> list will disable polling on the service entirely.
A value of "*" refers to all the --service_hosts.
The default <netloc> for each of the services is "*".
<provider_options> are zero or more of:
--credentials_path=<path>
If using --stackdriver, the path for the Google Credentials to use.
The default will be the application default credentials.
EOF
}
function process_args() {
while [[ $# > 0 ]]
do
local key="$1"
shift
case $key in
--datadog)
USE_DATADOG=true
PROVIDERS="$PROVIDERS --datadog"
;;
--prometheus)
USE_PROMETHEUS=true
PROVIDERS="$PROVIDERS --prometheus"
;;
--stackdriver)
USE_STACKDRIVER=true
PROVIDERS="$PROVIDERS --stackdriver"
;;
--help|-h)
print_usage
exit 1
;;
*)
;; # ignore
esac
done
}
function install_dependencies() {
apt-get update
apt-get install python-pip python-dev -y
pip install -r $SOURCE_DIR/requirements.txt
}
function install_metric_services() {
if [[ "$USE_DATADOG" == "true" ]]; then
$SOURCE_DIR/config/datadog/install.sh
fi
if [[ "$USE_PROMETHEUS" == "true" ]]; then
$SOURCE_DIR/config/prometheus/install.sh
fi
if [[ "$USE_STACKDRIVER" == "true" ]]; then
local credentials=""
for arg in ${COMMAND_LINE_FLAGS[@]}; do
if [[ $arg = --credentials_path=* ]]; then
credentials=$arg
fi
done
$SOURCE_DIR/config/stackdriver/install.sh $credentials
fi
}
function write_startup_script() {
cat <<-EOF > "$SOURCE_DIR/monitor_spinnaker.sh"
#!/bin/bash
set -o allexport
if [[ -f /etc/default/spinnaker ]]; then
source /etc/default/spinnaker
fi
if [[ -f "$SOURCE_DIR/environ" ]]; then
source "$SOURCE_DIR/environ"
fi
set +o allexport
PYTHONWARNINGS=once \
python "$SOURCE_DIR/monitor_spinnaker.zip" \
monitor $@ "\$@"
EOF
chmod 755 "$SOURCE_DIR/monitor_spinnaker.sh"
}
function write_upstart_script() {
cat <<-EOF > /etc/init/monitor_spinnaker.conf
start on filesystem or runlevel [2345]
exec $SOURCE_DIR/monitor_spinnaker.sh > /var/log/monitor_spinnaker.log 2>&1
EOF
chmod 644 /etc/init/monitor_spinnaker.conf
}
process_args "${COMMAND_LINE_FLAGS[@]}"
if [[ "$PROVIDERS" == "" ]]; then
print_usage
echo ""
echo "ERROR: No <provider_switch> options were provided."
exit -1
fi
if [[ `/usr/bin/id -u` -ne 0 ]]; then
echo "$0 must be executed with root permissions; exiting"
exit 1
fi
install_dependencies
install_metric_services
write_startup_script "${COMMAND_LINE_FLAGS[@]}"
write_upstart_script
echo "Starting to monitor Spinnaker services..."
service monitor_spinnaker start
cat <<EOF
Be sure that your spinnaker-local.yml has services.spectator.webEndpoint.enabled=true
For more information, see:
http://www.spinnaker.io/docs/monitoring-a-spinnaker-deployment
EOF

Просмотреть файл

@ -15,6 +15,7 @@
import cgi import cgi
import httplib import httplib
import json import json
import os
import logging import logging
from command_processor import CommandHandler from command_processor import CommandHandler
@ -469,6 +470,107 @@ class UpsertCustomDescriptorsHandler(BaseStackdriverCommandHandler):
project, upsert_descriptors, type_map, self.output) project, upsert_descriptors, type_map, self.output)
class ListDashboardsHandler(BaseStackdriverCommandHandler):
"""Administrative handler to list all dashboards (not just spinnaker)."""
def process_commandline_request(self, options):
"""Implements CommandHandler."""
stackdriver = stackdriver_service.make_service(options)
parent = 'projects/{0}'.format(stackdriver.project)
dashboards = stackdriver.stub.projects().dashboards()
request = dashboards.list(parent=parent)
all_dashboards = []
while request:
response = request.execute()
all_dashboards.extend(response.get('dashboards', []))
request = dashboards.list_next(request, response)
found = {elem['name']: elem['displayName'] for elem in all_dashboards}
self.output(options, str(found))
def lookup_dashboard(stackdriver, display_name):
"""Find the dashboard definition with the given display_name."""
parent = 'projects/{0}'.format(stackdriver.project)
dashboards = stackdriver.stub.projects().dashboards()
request = dashboards.list(parent=parent)
while request:
response = request.execute()
for elem in response.get('dashboards', []):
if elem['displayName'] == display_name:
return elem
request = dashboards.list_next(request, response)
return None
class GetDashboardHandler(BaseStackdriverCommandHandler):
"""Administrative handler to get a dashboard from its name."""
def add_argparser(self, subparsers):
"""Implements CommandHandler."""
parser = super(GetDashboardHandler, self).add_argparser(subparsers)
parser.add_argument(
'--name', required=True,
help='The name of the dashboard to get.')
return parser
def process_commandline_request(self, options):
"""Implements CommandHandler."""
display_name = options.get('name', None)
if not display_name:
raise ValueError('No name provided.')
stackdriver = stackdriver_service.make_service(options)
found = lookup_dashboard(stackdriver, display_name)
if found is None:
raise ValueError('"{0}" not found.'.format(display_name))
json_text = json.JSONEncoder(indent=2).encode(found)
self.output(options, json_text)
class UploadDashboardHandler(BaseStackdriverCommandHandler):
"""Administrative handler to upload a dashboard."""
def add_argparser(self, subparsers):
"""Implements CommandHandler."""
parser = super(UploadDashboardHandler, self).add_argparser(subparsers)
parser.add_argument('--dashboard', required=True,
help='The path to the json dashboard file.')
parser.add_argument(
'--update', default=False, action='store_true',
help='Update an existing dashboard rather than create a new one.')
return parser
def process_commandline_request(self, options):
"""Implements CommandHandler."""
path = options.get('dashboard', None)
if not path:
raise ValueError('No dashboard provided.')
with open(path, 'r') as infile:
specification = json.JSONDecoder().decode(infile.read())
stackdriver = stackdriver_service.make_service(options)
dashboards = stackdriver.stub.projects().dashboards()
parent = 'projects/{0}'.format(stackdriver.project)
if options.get('update', False):
display_name = specification['displayName']
found = lookup_dashboard(stackdriver, display_name)
if found is None:
raise ValueError('"{0}" not found.'.format(display_name))
response = dashboards.update(
name=found['name'], body=specification).execute()
action = 'Updated'
else:
response = dashboards.create(parent=parent, body=specification).execute()
action = 'Created'
self.output(options, '{action} "{title}" with name {name}'.format(
action=action, title=response['displayName'], name=response['name']))
def add_handlers(handler_list, subparsers): def add_handlers(handler_list, subparsers):
"""Registers CommandHandlers for interacting with Stackdriver.""" """Registers CommandHandlers for interacting with Stackdriver."""
command_handlers = [ command_handlers = [
@ -487,6 +589,19 @@ def add_handlers(handler_list, subparsers):
' update the existing ones and add the new ones.' ' update the existing ones and add the new ones.'
' WARNING: Historic time-series data may be lost on update.') ' WARNING: Historic time-series data may be lost on update.')
] ]
if os.environ.get('STACKDRIVER_API_KEY'):
command_handlers.extend([
ListDashboardsHandler('/stackdriver/list_dashboards',
'list_stackdriver_dashboards',
'List the available Stackdriver Dashboards'),
GetDashboardHandler(None,
'get_stackdriver_dashboard',
'Get a specific dashboard by display name'),
UploadDashboardHandler(None,
'upload_stackdriver_dashboard',
'Create or update specific dashboard')
])
for handler in command_handlers: for handler in command_handlers:
handler.add_argparser(subparsers) handler.add_argparser(subparsers)
handler_list.append(handler) handler_list.append(handler)

Просмотреть файл

@ -73,6 +73,11 @@ class StackdriverMetricsService(object):
def millis_to_time(millis): def millis_to_time(millis):
return datetime.fromtimestamp(millis / 1000).isoformat('T') + 'Z' return datetime.fromtimestamp(millis / 1000).isoformat('T') + 'Z'
@property
def project(self):
"""Returns the stackdriver project being used."""
return self.__project
@property @property
def stub(self): def stub(self):
"""Returns the stackdriver client stub.""" """Returns the stackdriver client stub."""
@ -385,7 +390,15 @@ def make_service(options):
credentials = GoogleCredentials.get_application_default() credentials = GoogleCredentials.get_application_default()
http = credentials.authorize(http) http = credentials.authorize(http)
return apiclient.discovery.build('monitoring', 'v3', http=http) developerKey = os.environ.get('STACKDRIVER_API_KEY')
if developerKey:
url='https://monitoring.googleapis.com/$discovery/rest?labels=DASHBOARD_TRUSTED_TESTER&key='+developerKey
return apiclient.discovery.build(
'monitoring', 'v3', http=http,
discoveryServiceUrl=url)
else:
return apiclient.discovery.build('monitoring', 'v3', http=http)
return StackdriverMetricsService(make_stub, options) return StackdriverMetricsService(make_stub, options)

Просмотреть файл

@ -320,6 +320,18 @@ function extract_spinnaker_gcr_credentials() {
fi fi
} }
function do_experimental_startup() {
local monitor_config=$(get_instance_metadata_attribute "monitor_spinnaker")
if [[ ! -z $monitor_config && \
-f /opt/spinnaker/install/install_monitor_spinnaker.tz ]]; then
echo "$STATUS_PREFIX Install Monitoring with flags '$monitor_config' "
tar xzf /opt/spinnaker/install/install_monitor_spinnaker.tz \
-C /opt --no-same-owner
/opt/monitor_spinnaker/install_monitoring.sh $monitor_config
clear_instance_metadata "monitor_spinnaker"
fi
}
function process_args() { function process_args() {
while [[ $# > 0 ]] while [[ $# > 0 ]]
do do
@ -382,6 +394,8 @@ extract_spinnaker_credentials
echo "$STATUS_PREFIX Configuring Spinnaker" echo "$STATUS_PREFIX Configuring Spinnaker"
$SPINNAKER_INSTALL_DIR/scripts/reconfigure_spinnaker.sh $SPINNAKER_INSTALL_DIR/scripts/reconfigure_spinnaker.sh
do_experimental_startup
# Replace this first time boot with the normal startup script # Replace this first time boot with the normal startup script
# that just starts spinnaker (and its dependencies) without configuring anymore. # that just starts spinnaker (and its dependencies) without configuring anymore.