зеркало из https://github.com/microsoft/spinnaker.git
Merge pull request #1414 from ewiseblatt/install_monitoring
Install scripts for monitoring support.
This commit is contained in:
Коммит
0e478caa87
|
@ -231,7 +231,7 @@ services:
|
|||
|
||||
spectator:
|
||||
webEndpoint:
|
||||
enabled: false
|
||||
enabled: true
|
||||
|
||||
stackdriver:
|
||||
enabled: false
|
||||
|
|
|
@ -238,7 +238,7 @@ services:
|
|||
|
||||
spectator:
|
||||
webEndpoint:
|
||||
enabled: false
|
||||
enabled: true
|
||||
|
||||
stackdriver:
|
||||
enabled: ${SPINNAKER_STACKDRIVER_ENABLED:false}
|
||||
|
|
|
@ -309,6 +309,36 @@
|
|||
},
|
||||
"title": "Avg Clouddriver Controller Invocation Time (ms per minute)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:clouddriver.operations_count{success:true} by {operationtype})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Successful Operations (per minute)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:clouddriver.operations_count{success:false} by {operationtype})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Failed Operations (per minute)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
|
@ -422,28 +452,99 @@
|
|||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "sum:orca.threadpool.activeCount{*}",
|
||||
"q": "diff(sum:orca.task.invocations{executiontype:orchestration,status:running} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
"type": "bars"
|
||||
}
|
||||
]
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Active Orca Threads (per minute)"
|
||||
"title": "Active Orchestrations (orca)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "per_minute(sum:igor.controller.invocations_totalTime{*} by {method}) / 1000000 / per_minute(avg:igor.controller.invocations_count{*} by {method})",
|
||||
"q": "diff(sum:orca.task.invocations{executiontype:orchestration,status:succeeded} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "cool"
|
||||
}
|
||||
},
|
||||
{
|
||||
"q": "- diff(sum:orca.task.invocations{executiontype:orchestration,status:terminal} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "warm"
|
||||
}
|
||||
}
|
||||
]
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Igor Controller Invocation Time (ms per minute)"
|
||||
"title": "Completed Orchestrations (orca)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:orca.task.invocations{status:running,executiontype:pipeline} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Active Pipelines (orca)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:orca.task.invocations{status:succeeded,executiontype:pipeline} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "cool"
|
||||
}
|
||||
},
|
||||
{
|
||||
"q": "- diff(sum:orca.task.invocations{status:terminal,executiontype:pipeline} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "warm"
|
||||
}
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Completed Pipelines (orca)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "sum:orca.threadpool.activeCount{*} by {id}",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Active Orca Threads (per minute)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
|
@ -460,6 +561,20 @@
|
|||
},
|
||||
"title": "Last known Orca Active Threads"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "per_minute(sum:igor.controller.invocations_totalTime{*} by {method}) / 1000000 / per_minute(avg:igor.controller.invocations_count{*} by {method})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
}
|
||||
]
|
||||
},
|
||||
"title": "Igor Controller Invocation Time (ms per minute)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
|
@ -545,24 +660,31 @@
|
|||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "per_minute(sum:rosco.bakes{*} by {success})",
|
||||
"q": "diff(sum:rosco.bakesActive{*})",
|
||||
"aggregator": "avg",
|
||||
"style": {
|
||||
"palette": "cool"
|
||||
},
|
||||
"type": "bars",
|
||||
"conditional_formats": []
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "per_minute(sum:rosco.bakes.local{*} by {active})",
|
||||
"q": "diff(sum:rosco.bakesRequested{*} by {flavor})",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "cool"
|
||||
}
|
||||
},
|
||||
{
|
||||
"q": "diff(sum:rosco.bakesCompleted_count{success:false} by {region})",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "warm"
|
||||
},
|
||||
"type": "bars"
|
||||
}
|
||||
}
|
||||
]
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Rosco Bakes (local warm, per minute)"
|
||||
"title": "Rosco Bake Activity"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
|
@ -817,7 +939,7 @@
|
|||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": " - diff(sum:clouddriver.google.api_count{success:false} by {api,scope}.as_rate()), diff(sum:clouddriver.google.api_count{success:true} by {api,scope}.as_rate())",
|
||||
"q": "- diff(sum:clouddriver.google.api_count{success:false} by {api,scope}.as_rate()), diff(sum:clouddriver.google.api_count{success:true} by {api,scope}.as_rate())",
|
||||
"aggregator": "avg",
|
||||
"style": {
|
||||
"palette": "dog_classic"
|
||||
|
@ -828,7 +950,7 @@
|
|||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Google API Call Rate (per minute, lines by scope, bars by api)"
|
||||
"title": "Google API Call Rate"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
|
@ -856,7 +978,7 @@
|
|||
"q": "per_minute(sum:clouddriver.google.api_totalTime{*} by {api,scope}) / 1000000 / per_minute(sum:clouddriver.google.api_count{*} by {api,scope}.as_count())",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
"type": "line"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
|
@ -868,20 +990,58 @@
|
|||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:clouddriver.google.operationWaits_count{*} by {basephase})",
|
||||
"q": "per_minute(sum:clouddriver.google.batchExecute_count{*} by {context})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Google Batch Count (per minute)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "per_minute(sum:clouddriver.google.batchExecute_totalTime{*} by {context}) / 1000000 / per_minute(sum:clouddriver.google.batchExecute_count{*} by {context})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Batch Call Latency (ms per call minute)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "per_minute(sum:clouddriver.google.batchSize{*} by {context})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Batch Call Size (per minute)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:clouddriver.google.operationWaits_count{status:done} by {basephase,scope})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "cool"
|
||||
"palette": "dog_classic"
|
||||
}
|
||||
},
|
||||
{
|
||||
"q": "diff(sum:clouddriver.google.operationWaits_count{*} by {scope})",
|
||||
"style": {
|
||||
"palette": "warm"
|
||||
},
|
||||
"type": "line"
|
||||
}
|
||||
],
|
||||
"autoscale": true,
|
||||
|
@ -891,7 +1051,53 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"title": "Google Waiting Operations (diff, bars by phase, lines by scope)"
|
||||
"title": "Successful Google Operations (clouddriver)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:clouddriver.google.operationWaits_count{!status:done} by {basephase,scope})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "dog_classic"
|
||||
}
|
||||
}
|
||||
],
|
||||
"autoscale": true,
|
||||
"yaxis": {
|
||||
"filter": {
|
||||
"below": 0.01
|
||||
}
|
||||
}
|
||||
},
|
||||
"title": "Failed Google Operations (clouddriver)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:clouddriver.google.operationWaitRequests{*} by {basephase,scope})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "dog_classic"
|
||||
}
|
||||
}
|
||||
],
|
||||
"autoscale": true,
|
||||
"yaxis": {
|
||||
"filter": {
|
||||
"below": 0.01
|
||||
}
|
||||
}
|
||||
},
|
||||
"title": "Google Operations Started (clouddriver)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
|
@ -928,36 +1134,6 @@
|
|||
},
|
||||
"title": "Google Operation Waits by Phase"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "per_minute(sum:clouddriver.onDemand_total_totalTime{*} by {ondemandtype}) / 1000000000 / per_minute(sum:clouddriver.onDemand_total_count{*} by {ondemandtype})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Clouddriver OnDemand Invocation Time (ms per call per minute)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:clouddriver.operations_count{*} by {operationtype})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Clouddriver Operations (type per minute)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
|
@ -972,23 +1148,8 @@
|
|||
"autoscale": true
|
||||
},
|
||||
"title": "Spectator Time Series Streams"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:clouddriver.google.batchExecute_count{*} by {context})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "area"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Google Batch Execution Count"
|
||||
}
|
||||
],
|
||||
"description": "Contains graphs of various metrics within Spinnaker to illustrate what is available.",
|
||||
"title": "FullSpinnaker"
|
||||
"title": "Spinnaker Kitchen Sink"
|
||||
}
|
|
@ -0,0 +1,299 @@
|
|||
{
|
||||
"read_only": false,
|
||||
"graphs": [
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:gate.hystrix.rollingCountShortCircuited{*} by {metricgroup})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
},
|
||||
{
|
||||
"q": "diff(sum:igor.hystrix.rollingCountShortCircuited{*} by {metricgroup})",
|
||||
"type": "bars"
|
||||
},
|
||||
{
|
||||
"q": "diff(sum:front50.hystrix.rollingCountShortCircuited{*} by {metricgroup})",
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Hystrix Short Circuited (global)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:front50.hystrix.countExceptionsThrown{*})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "diff(sum:igor.hystrix.countExceptionsThrown{*}.as_count())",
|
||||
"type": "bars"
|
||||
},
|
||||
{
|
||||
"q": "diff(sum:gate.hystrix.countExceptionsThrown{*})",
|
||||
"type": "line"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Hystrix Exceptions (global)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:orca.task.invocations{executiontype:orchestration,status:running} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Active Orchestrations (orca)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:orca.task.invocations{executiontype:orchestration,status:succeeded} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "cool"
|
||||
}
|
||||
},
|
||||
{
|
||||
"q": "- diff(sum:orca.task.invocations{executiontype:orchestration,status:terminal} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "warm"
|
||||
}
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Completed Orchestrations (orca)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:orca.task.invocations{status:running,executiontype:pipeline} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Active Pipelines (orca)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:orca.task.invocations{status:succeeded,executiontype:pipeline} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "cool"
|
||||
}
|
||||
},
|
||||
{
|
||||
"q": " - diff(sum:orca.task.invocations{status:terminal,executiontype:pipeline} by {taskname})",
|
||||
"style": {
|
||||
"palette": "warm"
|
||||
},
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Completed Pipelines (orca)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:clouddriver.operations_count{success:true} by {operationtype}), - diff(sum:clouddriver.operations_count{!success:true} by {operationtype})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Active Threads (orca)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:echo.pipelines.triggered{*} by {application})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Pipelines Triggered (echo)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:rosco.bakesActive{*})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "diff(sum:rosco.bakesRequested{*} by {flavor})",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "cool"
|
||||
}
|
||||
},
|
||||
{
|
||||
"q": "diff(sum:rosco.bakesCompleted_count{success:false} by {region})",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "warm"
|
||||
}
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Bake Activity (rosco)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "sum:front50.storageServiceSupport.cacheSize{*} by {objecttype}",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Item Cache Size (front50)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:clouddriver.operations_count{success:true} by {operationtype})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Successful Operations (clouddriver)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:clouddriver.operations_count{success:false} by {operationtype})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Failed Operations (clouddriver)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "avg:system.load.1{*}",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "avg:system.mem.used{*} / avg:system.mem.total{*}",
|
||||
"type": "line"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "System Load and Pct Memory Used"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "sum:front50.jvm.memory.used{*}",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "sum:clouddriver.jvm.memory.used{*}",
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "sum:orca.jvm.memory.used{*}",
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "sum:gate.jvm.memory.used{*}",
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "sum:igor.jvm.memory.used{*}",
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "sum:rosco.jvm.memory.used{*}",
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "sum:echo.jvm.memory.used{*}",
|
||||
"type": "line"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Microservice JVM Memory Used"
|
||||
}
|
||||
],
|
||||
"description": "A bare-bones dashboard for monitoring a Spinnaker deployment",
|
||||
"title": "Minimal Spinnaker"
|
||||
}
|
|
@ -1,6 +1,139 @@
|
|||
{
|
||||
"read_only": false,
|
||||
"graphs": [
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:orca.task.invocations{$SourceApplication,executiontype:orchestration,status:running} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "$SourceApplication Active Orchestrations (orca)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:orca.task.invocations{$SourceApplication,executiontype:orchestration,status:running} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "$SourceApplication Active Orchestrations (orca)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:orca.task.invocations{$SourceApplication,status:running,executiontype:pipeline} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "$SourceApplication Active Pipelines (orca)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:orca.task.invocations{$SourceApplication,status:succeeded,executiontype:pipeline} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "cool"
|
||||
}
|
||||
},
|
||||
{
|
||||
"q": "- diff(sum:orca.task.invocations{$SourceApplication,status:terminal,executiontype:pipeline} by {taskname})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "warm"
|
||||
}
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "$SourceApplication Completed Pipelines (orca)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:echo.pipelines.triggered{$Application} by {name})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "$Application Pipelines Triggered (echo)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:rosco.bakesActive{*})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "diff(sum:rosco.bakesRequested{*} by {flavor})",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "cool"
|
||||
}
|
||||
},
|
||||
{
|
||||
"q": "diff(sum:rosco.bakesCompleted_count{success:false} by {region})",
|
||||
"conditional_formats": [],
|
||||
"type": "bars",
|
||||
"style": {
|
||||
"palette": "warm"
|
||||
}
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Global Bake Activity (rosco)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "per_minute(sum:rosco.bakesCompleted_totalTime{*} by {region}) / 1000000000 / per_minute(sum:rosco.bakesCompleted_count{*} by {region}) / 60",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "area"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Global Bake Completion Time Minutes (rosco)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
|
@ -22,7 +155,7 @@
|
|||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Hystrix Short Circuited"
|
||||
"title": "Hystrix Short Circuited (global)"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
|
@ -35,8 +168,8 @@
|
|||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "diff(sum:igor.hystrix.countExceptionsThrown{*})",
|
||||
"type": "line"
|
||||
"q": "diff(sum:igor.hystrix.countExceptionsThrown{*}.as_count())",
|
||||
"type": "bars"
|
||||
},
|
||||
{
|
||||
"q": "diff(sum:gate.hystrix.countExceptionsThrown{*})",
|
||||
|
@ -45,156 +178,9 @@
|
|||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Hystrix Exceptions"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:clouddriver.operations_count{success:true} by {operationtype}), - diff(sum:clouddriver.operations_count{!success:true} by {operationtype})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Clouddriver Operations"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "avg:system.load.1{*}",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "This Slot Is Reserved For Future Use"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:clouddriver.operations_count{success:true} by {operationtype}), - diff(sum:clouddriver.operations_count{!success:true} by {operationtype})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Active Orca Threads"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "diff(sum:echo.pipelines.triggered{*} by {application})",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Pipelines Triggered"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "sum:rosco.bakesActive{*}",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Bakes in Progress"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "sum:front50.storageServiceSupport.cacheSize{*} by {objecttype}",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "bars"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Front50 Item Cache Sizes"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "avg:system.load.1{*}",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "avg:system.mem.used{*} / avg:system.mem.total{*}",
|
||||
"type": "line"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "System Load and Pct Memory Used"
|
||||
},
|
||||
{
|
||||
"definition": {
|
||||
"viz": "timeseries",
|
||||
"requests": [
|
||||
{
|
||||
"q": "sum:front50.jvm.memory.used{*}",
|
||||
"aggregator": "avg",
|
||||
"conditional_formats": [],
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "sum:clouddriver.jvm.memory.used{*}",
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "sum:orca.jvm.memory.used{*}",
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "sum:gate.jvm.memory.used{*}",
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "sum:igor.jvm.memory.used{*}",
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "sum:rosco.jvm.memory.used{*}",
|
||||
"type": "line"
|
||||
},
|
||||
{
|
||||
"q": "sum:echo.jvm.memory.used{*}",
|
||||
"type": "line"
|
||||
}
|
||||
],
|
||||
"autoscale": true
|
||||
},
|
||||
"title": "Microservice JVM Memory Used"
|
||||
"title": "Hystrix Exceptions Thrown (global)"
|
||||
}
|
||||
],
|
||||
"description": "A bare-bones dashboard for monitoring a Spinnaker deployment",
|
||||
"title": "MinimalSpinnaker"
|
||||
}
|
||||
"description": "Templated dashboard to show details for a specific Application",
|
||||
"title": "Specific Spinnaker Application"
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2017 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
SOURCE_DIR=$(dirname $0)
|
||||
HAVE_KEYS=0
|
||||
|
||||
function prompt_if_unset() {
|
||||
local name=$1
|
||||
local tmp
|
||||
while [[ "${!name}" == "" ]]; do
|
||||
read -e -p "ENTER $name: " tmp
|
||||
eval ${name}=$tmp
|
||||
done
|
||||
}
|
||||
|
||||
prompt_if_unset DATADOG_API_KEY
|
||||
prompt_if_unset DATADOG_APP_KEY
|
||||
|
||||
environ_file=$(readlink -f "${SOURCE_DIR}/../../environ")
|
||||
echo "Storing keys into $environ_file"
|
||||
if [[ ! -f "${environ_file}" ]]; then
|
||||
sudo touch "${environ_file}"
|
||||
fi
|
||||
sudo chmod 600 "${environ_file}"
|
||||
sudo cat >> "$environ_file" <<EOF
|
||||
DATADOG_API_KEY=$DATADOG_API_KEY
|
||||
DATADOG_APP_KEY=$DATADOG_APP_KEY
|
||||
EOF
|
||||
|
||||
|
||||
echo "Installing Datadog Agent"
|
||||
DD_API_KEY=$DATADOG_API_KEY bash -c "$(curl -L https://raw.githubusercontent.com/DataDog/dd-agent/master/packaging/datadog-agent/source/install_agent.sh)"
|
||||
|
||||
for dashboard in ${SOURCE_DIR}/*Timeboard.json; do
|
||||
echo "Installing $(basename $dashboard)"
|
||||
curl -X POST -H "Content-type: application/json" \
|
||||
-d "@${dasboard}"
|
||||
"https://app.datadoghq.com/api/v1/dash?api_key=${DATADOG_API_KEY}&application_key=${DATADOG_APP_KEY}"
|
||||
done
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,585 @@
|
|||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_SPINNAKER",
|
||||
"label": "Spinnaker",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "prometheus",
|
||||
"pluginName": "Prometheus"
|
||||
}
|
||||
],
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "4.1.1"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "graph",
|
||||
"name": "Graph",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "prometheus",
|
||||
"name": "Prometheus",
|
||||
"version": "1.0.0"
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"hideControls": false,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"rows": [
|
||||
{
|
||||
"collapse": false,
|
||||
"height": 254,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"datasource": "${DS_SPINNAKER}",
|
||||
"fill": 1,
|
||||
"id": 1,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "cpu0",
|
||||
"yaxis": 1
|
||||
},
|
||||
{
|
||||
"alias": "All",
|
||||
"yaxis": 2
|
||||
}
|
||||
],
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(rate(node_cpu{mode!=\"idle\"}[$SamplePeriod])) / sum(rate(node_cpu[$SamplePeriod])) ",
|
||||
"hide": false,
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "All",
|
||||
"metric": "",
|
||||
"refId": "A",
|
||||
"step": 30
|
||||
},
|
||||
{
|
||||
"expr": "sum(rate(node_cpu{mode!=\"idle\"}[$SamplePeriod])) by (cpu)",
|
||||
"hide": false,
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{cpu}}",
|
||||
"metric": "node_cpu",
|
||||
"refId": "B",
|
||||
"step": 30
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "CPU Utilization",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "percentunit",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"datasource": "${DS_SPINNAKER}",
|
||||
"fill": 1,
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "Dirty",
|
||||
"yaxis": 2
|
||||
}
|
||||
],
|
||||
"span": 6,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "node_memory_MemFree ",
|
||||
"hide": false,
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Free",
|
||||
"metric": "",
|
||||
"refId": "B",
|
||||
"step": 30
|
||||
},
|
||||
{
|
||||
"expr": "(node_memory_MemTotal - node_memory_Committed_AS) ",
|
||||
"hide": false,
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Uncommitted",
|
||||
"metric": "node_memory_Committed_AS",
|
||||
"refId": "G",
|
||||
"step": 30
|
||||
},
|
||||
{
|
||||
"expr": "node_memory_Dirty",
|
||||
"hide": false,
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Dirty",
|
||||
"metric": "",
|
||||
"refId": "F",
|
||||
"step": 30
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "System Memory Available",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "decbytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "decbytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6"
|
||||
},
|
||||
{
|
||||
"collapse": false,
|
||||
"height": 255,
|
||||
"panels": [
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"datasource": "${DS_SPINNAKER}",
|
||||
"fill": 1,
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "cpu0",
|
||||
"yaxis": 1
|
||||
}
|
||||
],
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(node_netstat_Ip_InReceives[$SamplePeriod]) ",
|
||||
"hide": false,
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "In Packets",
|
||||
"metric": "",
|
||||
"refId": "A",
|
||||
"step": 120
|
||||
},
|
||||
{
|
||||
"expr": "rate(node_netstat_Ip_OutRequests[$SamplePeriod])",
|
||||
"hide": false,
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "Out Packets",
|
||||
"metric": "",
|
||||
"refId": "B",
|
||||
"step": 120
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Networking",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"datasource": "${DS_SPINNAKER}",
|
||||
"fill": 1,
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "READ",
|
||||
"yaxis": 2
|
||||
}
|
||||
],
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "delta(node_disk_bytes_written[$SamplePeriod])",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "WRITE",
|
||||
"metric": "node_disk_bytes_written",
|
||||
"refId": "A",
|
||||
"step": 60
|
||||
},
|
||||
{
|
||||
"expr": "delta(node_disk_bytes_read[$SamplePeriod])",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "READ",
|
||||
"metric": "node_disk_bytes_read",
|
||||
"refId": "B",
|
||||
"step": 60
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Disk IO",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "decbytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "decbytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"datasource": "${DS_SPINNAKER}",
|
||||
"fill": 1,
|
||||
"id": 5,
|
||||
"legend": {
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": false,
|
||||
"min": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": false
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"percentage": false,
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [
|
||||
{
|
||||
"alias": "READ",
|
||||
"yaxis": 2
|
||||
}
|
||||
],
|
||||
"span": 4,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "node_filesystem_free{mountpoint!~\"/run.*\"}",
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{mountpoint}}",
|
||||
"metric": "",
|
||||
"refId": "B",
|
||||
"step": 60
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeShift": null,
|
||||
"title": "Disk Available",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 0,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "decbytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "decbytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"repeat": null,
|
||||
"repeatIteration": null,
|
||||
"repeatRowId": null,
|
||||
"showTitle": false,
|
||||
"title": "Dashboard Row",
|
||||
"titleSize": "h6"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 14,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"auto": false,
|
||||
"auto_count": 30,
|
||||
"auto_min": "10s",
|
||||
"current": {
|
||||
"text": "1m",
|
||||
"value": "1m"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Sample Period",
|
||||
"name": "SamplePeriod",
|
||||
"options": [
|
||||
{
|
||||
"selected": true,
|
||||
"text": "1m",
|
||||
"value": "1m"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "5m",
|
||||
"value": "5m"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "10m",
|
||||
"value": "10m"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "15m",
|
||||
"value": "15m"
|
||||
},
|
||||
{
|
||||
"selected": false,
|
||||
"text": "30m",
|
||||
"value": "30m"
|
||||
}
|
||||
],
|
||||
"query": "1m,5m,10m,15m,30m",
|
||||
"refresh": 2,
|
||||
"type": "interval"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "browser",
|
||||
"title": "Machine Stats",
|
||||
"version": 1
|
||||
}
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,80 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2017 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
PROMETHEUS_VERSION=prometheus-1.5.0.linux-amd64
|
||||
PROMETHEUS_PORT=9090
|
||||
GRAFANA_PORT=3000
|
||||
CONFIG_DIR=$(readlink -f `dirname $0`)
|
||||
cd /opt
|
||||
|
||||
# Install Prometheus
|
||||
curl -L -o /tmp/prometheus.gz \
|
||||
https://github.com/prometheus/prometheus/releases/download/v1.5.0/prometheus-1.5.0.linux-amd64.tar.gz
|
||||
sudo tar xzf /tmp/prometheus.gz -C /opt
|
||||
rm /tmp/prometheus.gz
|
||||
|
||||
curl -L -o /tmp/node_exporter.gz \
|
||||
https://github.com/prometheus/node_exporter/releases/download/v0.13.0/node_exporter-0.13.0.linux-amd64.tar.gz
|
||||
sudo tar xzf /tmp/node_exporter.gz -C /opt/prometheus-1.5.0.linux-amd64
|
||||
sudo ln -s /opt/prometheus-1.5.0.linux-amd64/node_exporter-0.13.0.linux-amd64/node_exporter /usr/bin/node_exporter
|
||||
rm /tmp/node_exporter.gz
|
||||
|
||||
sudo cp $CONFIG_DIR/spinnaker-prometheus.yml prometheus-1.5.0.linux-amd64
|
||||
sudo cp $CONFIG_DIR/prometheus.conf /etc/init/prometheus.conf
|
||||
sudo cp $CONFIG_DIR/node_exporter.conf /etc/init/node_exporter.conf
|
||||
|
||||
|
||||
# Install Grafana
|
||||
cd /tmp
|
||||
wget https://grafanarel.s3.amazonaws.com/builds/grafana_4.1.1-1484211277_amd64.deb
|
||||
sudo apt-get install -y adduser libfontconfig
|
||||
sudo dpkg -i grafana_4.1.1-1484211277_amd64.deb
|
||||
sudo update-rc.d grafana-server defaults
|
||||
rm grafana_4.1.1-1484211277_amd64.deb
|
||||
|
||||
|
||||
# Startup
|
||||
echo "Starting Prometheus"
|
||||
sudo service node_exporter start
|
||||
sudo service prometheus start
|
||||
sudo service grafana-server start
|
||||
|
||||
TRIES=0
|
||||
until nc -z localhost $GRAFANA_PORT || [[ $TRIES -gt 5 ]]; do
|
||||
sleep 1
|
||||
let TRIES+=1
|
||||
done
|
||||
|
||||
echo "Adding datasource"
|
||||
PAYLOAD="{'name':'Spinnaker','type':'prometheus','url':'http://localhost:${PROMETHEUS_PORT}','access':'direct','isDefault':true}"
|
||||
curl -u admin:admin http://localhost:${GRAFANA_PORT}/api/datasources \
|
||||
-H "Content-Type: application/json" \
|
||||
-X POST \
|
||||
-d "${PAYLOAD//\'/\"}"
|
||||
|
||||
for dashboard in ${CONFIG_DIR}/*Dashboard.json; do
|
||||
echo "Installing $(basename $dashboard)"
|
||||
x=$(sed -e "/\"__inputs\"/,/],/d" \
|
||||
-e "/\"__requires\"/,/],/d" \
|
||||
-e "s/\${DS_SPINNAKER\}/Spinnaker/g" < "$dashboard")
|
||||
temp_file=$(mktemp)
|
||||
echo "{ \"dashboard\": $x }" > $temp_file
|
||||
curl -u admin:admin http://localhost:${GRAFANA_PORT}/api/dashboards/import \
|
||||
-H "Content-Type: application/json" \
|
||||
-X POST \
|
||||
-d @${temp_file}
|
||||
rm -f $temp_file
|
||||
done
|
|
@ -0,0 +1,2 @@
|
|||
start on filesystem or runlevel [2345]
|
||||
exec /usr/bin/node_exporter
|
|
@ -0,0 +1,6 @@
|
|||
start on filesystem or runlevel [2345]
|
||||
|
||||
exec /opt/prometheus-1.5.0.linux-amd64/prometheus \
|
||||
-config.file /opt/prometheus-1.5.0.linux-amd64/spinnaker-prometheus.yml \
|
||||
-storage.local.path /opt/prometheus-1.5.0.linux-amd64/data \
|
||||
> /var/log/prometheus.log 2>&1
|
|
@ -0,0 +1,33 @@
|
|||
# my global config
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
# scrape_timeout is set to the global default (10s).
|
||||
|
||||
# Attach these labels to any time series or alerts when communicating with
|
||||
# external systems (federation, remote storage, Alertmanager).
|
||||
external_labels:
|
||||
monitor: 'codelab-monitor'
|
||||
|
||||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
|
||||
rule_files:
|
||||
# - "first.rules"
|
||||
# - "second.rules"
|
||||
|
||||
# A scrape configuration containing exactly one endpoint to scrape:
|
||||
# Here it's Prometheus itself.
|
||||
scrape_configs:
|
||||
- job_name: 'spinnaker'
|
||||
static_configs:
|
||||
- targets: ['localhost:8008']
|
||||
metrics_path: '/prometheus_metrics'
|
||||
honor_labels: true
|
||||
|
||||
- job_name: 'node'
|
||||
static_configs:
|
||||
- targets: ['localhost:9100']
|
||||
|
||||
# - job_name: 'prometheus'
|
||||
# static_configs:
|
||||
# - targets: ['localhost:9090']
|
||||
|
|
@ -0,0 +1,324 @@
|
|||
{
|
||||
"displayName": "Minimal Spinnaker Dashboard",
|
||||
"version": 14,
|
||||
"root": {
|
||||
"gridLayout": {
|
||||
"widgets": [
|
||||
{
|
||||
"title": "Hystrix Short Circuited (global)",
|
||||
"xyChart": {
|
||||
"dataSets": [
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/front50/hystrix.countShortCircuited\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/gate/hystrix.countShortCircuited\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/igor/hystrix.countShortCircuited\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
}
|
||||
],
|
||||
"constantLines": [
|
||||
{}
|
||||
],
|
||||
"options": {},
|
||||
"y1Axis": {},
|
||||
"xAxis": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Hystrix Exceptions Thrown (global)",
|
||||
"xyChart": {
|
||||
"dataSets": [
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/front50/hystrix.countExceptionsThrown\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/gate/hystrix.countExceptionsThrown\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/igor/hystrix.countExceptionsThrown\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
}
|
||||
],
|
||||
"constantLines": [
|
||||
{}
|
||||
],
|
||||
"options": {},
|
||||
"y1Axis": {},
|
||||
"xAxis": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Active Orchestrations (orca)",
|
||||
"xyChart": {
|
||||
"dataSets": [
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/task.invocations\" AND metric.label.isComplete=\"false\" AND metric.label.executionType=\"Orchestration\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
}
|
||||
],
|
||||
"constantLines": [
|
||||
{}
|
||||
],
|
||||
"options": {},
|
||||
"y1Axis": {},
|
||||
"xAxis": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Active Pipelines (orca)",
|
||||
"xyChart": {
|
||||
"dataSets": [
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/task.invocations\" AND metric.label.isComplete=\"false\" AND metric.label.executionType=\"Pipeline\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
}
|
||||
],
|
||||
"constantLines": [
|
||||
{}
|
||||
],
|
||||
"options": {},
|
||||
"y1Axis": {},
|
||||
"xAxis": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Completed Orchestrations (orca)",
|
||||
"xyChart": {
|
||||
"dataSets": [
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/task.invocations\" AND metric.label.status=\"SUCCEEDED\" AND metric.label.executionType=\"Orchestration\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/task.invocations\" AND metric.label.status!=\"SUCCEEDED\" AND metric.label.executionType=\"Orchestration\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
}
|
||||
],
|
||||
"constantLines": [
|
||||
{}
|
||||
],
|
||||
"options": {},
|
||||
"y1Axis": {},
|
||||
"xAxis": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Completed Pipelines (orca)",
|
||||
"xyChart": {
|
||||
"dataSets": [
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/task.invocations\" AND metric.label.status=\"SUCCEEDED\" AND metric.label.executionType=\"Pipeline\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/task.invocations\" AND metric.label.status!=\"SUCCEEDED\" AND metric.label.executionType=\"Pipeline\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
}
|
||||
],
|
||||
"constantLines": [
|
||||
{}
|
||||
],
|
||||
"options": {},
|
||||
"y1Axis": {},
|
||||
"xAxis": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Active Threads (orca)",
|
||||
"xyChart": {
|
||||
"dataSets": [
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/threadpool.activeCount\""
|
||||
}
|
||||
}
|
||||
],
|
||||
"constantLines": [
|
||||
{}
|
||||
],
|
||||
"options": {},
|
||||
"y1Axis": {},
|
||||
"xAxis": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Pipelines Triggered (echo)",
|
||||
"xyChart": {
|
||||
"dataSets": [
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/echo/pipelines.triggered\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
}
|
||||
],
|
||||
"constantLines": [
|
||||
{}
|
||||
],
|
||||
"options": {},
|
||||
"y1Axis": {},
|
||||
"xAxis": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Bake Activity (rosco)",
|
||||
"xyChart": {
|
||||
"dataSets": [
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/rosco/bakesActive\""
|
||||
}
|
||||
}
|
||||
],
|
||||
"constantLines": [
|
||||
{}
|
||||
],
|
||||
"options": {},
|
||||
"y1Axis": {},
|
||||
"xAxis": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Cached Items (front50)",
|
||||
"xyChart": {
|
||||
"dataSets": [
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/front50/storageServiceSupport.cacheSize\""
|
||||
}
|
||||
}
|
||||
],
|
||||
"constantLines": [
|
||||
{}
|
||||
],
|
||||
"options": {},
|
||||
"y1Axis": {},
|
||||
"xAxis": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Successful Operations (clouddriver)",
|
||||
"xyChart": {
|
||||
"dataSets": [
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/clouddriver/operations__count\" AND metric.label.success=\"true\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
}
|
||||
],
|
||||
"constantLines": [
|
||||
{}
|
||||
],
|
||||
"options": {},
|
||||
"y1Axis": {},
|
||||
"xAxis": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Failed Operations (clouddriver)",
|
||||
"xyChart": {
|
||||
"dataSets": [
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/clouddriver/operations__count\" AND metric.label.success=\"false\"",
|
||||
"perSeriesAligner": "ALIGN_DELTA"
|
||||
}
|
||||
}
|
||||
],
|
||||
"constantLines": [
|
||||
{}
|
||||
],
|
||||
"options": {},
|
||||
"y1Axis": {},
|
||||
"xAxis": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "JVM Memory (global)",
|
||||
"xyChart": {
|
||||
"dataSets": [
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/clouddriver/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/echo/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/fiat/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/front50/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/gate/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/igor/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/orca/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
|
||||
}
|
||||
},
|
||||
{
|
||||
"timeSeriesFilter": {
|
||||
"filter": "metric.type=\"custom.googleapis.com/spinnaker/rosco/jvm.memory.used\" AND metric.label.memtype=\"HEAP\""
|
||||
}
|
||||
}
|
||||
],
|
||||
"constantLines": [
|
||||
{}
|
||||
],
|
||||
"options": {},
|
||||
"y1Axis": {},
|
||||
"xAxis": {}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2017 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
echo "See https://cloud.google.com/monitoring/agent/install-agent"
|
||||
echo "The agent is optional (and only available on GCP and AWS)"
|
||||
|
||||
if [[ -z $STACKDRIVER_API_KEY ]]; then
|
||||
# Remove this once API is no longer whitelisted.
|
||||
echo "You need a STACKDRIVER_API_KEY to use this installer."
|
||||
exit -1
|
||||
fi
|
||||
|
||||
for dashboard in *Dashboard.json; do
|
||||
google/stackdriver_monitoring/spinnaker_metric_tool.sh \
|
||||
upload_stackdriver_dashboard --dashboard ${dashboard} \
|
||||
"$@"
|
||||
done
|
||||
--credentials_path=$HOME/.spinnaker/google-credentials.json --dashboard xyz --update
|
|
@ -30,37 +30,36 @@ function make_spinnaker_monitor_zip() {
|
|||
local zip_file="$TEMP_DIR/monitor_spinnaker.zip"
|
||||
|
||||
cd "$SOURCE_DIR"
|
||||
zip -r "$zip_file" `ls *.py | grep -v _test.py`
|
||||
zip -qr "$zip_file" `ls *.py | grep -v _test.py`
|
||||
|
||||
cp spinnaker_metric_tool.py $TEMP_DIR/__main__.py
|
||||
cd $TEMP_DIR
|
||||
zip $zip_file __main__.py
|
||||
zip -q $zip_file __main__.py
|
||||
rm -f __main__.py
|
||||
|
||||
cd "$BUILD_DIR/spinnaker"
|
||||
zip -r $zip_file pylib
|
||||
|
||||
cd "$BUILD_DIR/citest"
|
||||
zip -r $zip_file citest
|
||||
zip -qr $zip_file pylib
|
||||
}
|
||||
|
||||
function make_install_tar() {
|
||||
local tar_file="$TEMP_DIR/install.tz"
|
||||
local tar_file="$1"
|
||||
local staging_dir="$TEMP_DIR/monitor_spinnaker"
|
||||
mkdir $staging_dir
|
||||
|
||||
cd "$SOURCE_DIR"
|
||||
cp *.json README.md $TEMP_DIR/monitor_spinnaker.zip $staging_dir
|
||||
cp -pr install_monitoring.sh config README.md $TEMP_DIR/monitor_spinnaker.zip $staging_dir
|
||||
cat requirements.txt | grep -v mock > $staging_dir/requirements.txt
|
||||
|
||||
cd $TEMP_DIR
|
||||
tar czf $tar_file monitor_spinnaker
|
||||
if [[ "$tar_file" == *.tz || "$tar_file" == *.tar.gz ]]; then
|
||||
tar czf $tar_file monitor_spinnaker
|
||||
else
|
||||
tar cf $tar_file monitor_spinnaker
|
||||
fi
|
||||
}
|
||||
|
||||
make_spinnaker_monitor_zip
|
||||
make_install_tar
|
||||
make_install_tar "$TARGET_PATH"
|
||||
|
||||
cp "$TEMP_DIR/install.tz" "$TARGET_PATH"
|
||||
rm -rf $TEMP_DIR
|
||||
|
||||
echo "WROTE $TARGET_PATH"
|
||||
|
|
|
@ -0,0 +1,204 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2017 Google Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
SOURCE_DIR=$(readlink -f `dirname $0`)
|
||||
COMMAND_LINE_FLAGS=("$@")
|
||||
USE_DATADOG=false
|
||||
USE_PROMETHEUS=false
|
||||
USE_STACKDRIVER=false
|
||||
PROVIDERS=""
|
||||
EXTRA_ARGS=""
|
||||
|
||||
function print_usage() {
|
||||
cat <<-EOF
|
||||
`basename $0`: <provider_switch>+ \
|
||||
<monitor_options>* \
|
||||
<service_options>* \
|
||||
<provider_options>*
|
||||
|
||||
<provider_switch> is one or more of:
|
||||
--datadog
|
||||
Install and configure a Datadog agent.
|
||||
Spinnaker's metric monitoring tool will publish metrics to Datadog.
|
||||
You will be prompted for your API and APP keys unless you define
|
||||
environment variables DATADOG_APP_KEY and DATADOG_API_KEY.
|
||||
|
||||
--prometheus
|
||||
Install and configure Prometheus and Grafana Dashboard.
|
||||
Spinnaker's metric monitoring tool will publish metrics to Prometheus.
|
||||
|
||||
--stackdriver
|
||||
Spinnaker's metric monitoring tool will publish metrics to Stackdriver.
|
||||
You may also need --credentials_path=<path>
|
||||
|
||||
|
||||
<monitor_options> zero or more of:
|
||||
--port=8008
|
||||
The port number to use for the embedded HTTP server within the monitor.
|
||||
|
||||
--period=60
|
||||
Number of seconds between pollings of microservices.
|
||||
|
||||
--service_hosts <ip>+
|
||||
A comma-delimited list of hostnames (or IPs) to poll by default.
|
||||
The default is localhost.
|
||||
Make this empty "" to not poll any services by default.
|
||||
|
||||
|
||||
<service_options> are in the form --<service>=<netloc>* where:
|
||||
<service> is one of clouddriver, echo, fiat, front50, gate, igor, rosco
|
||||
<netloc> is a comma-delimited list of either a <host> or <host>:<port>
|
||||
If only a <host> is provided, then the dfeault port will be used.
|
||||
|
||||
An empty <netloc> list will disable polling on the service entirely.
|
||||
A value of "*" refers to all the --service_hosts.
|
||||
The default <netloc> for each of the services is "*".
|
||||
|
||||
|
||||
<provider_options> are zero or more of:
|
||||
--credentials_path=<path>
|
||||
If using --stackdriver, the path for the Google Credentials to use.
|
||||
The default will be the application default credentials.
|
||||
EOF
|
||||
}
|
||||
|
||||
|
||||
function process_args() {
|
||||
while [[ $# > 0 ]]
|
||||
do
|
||||
local key="$1"
|
||||
shift
|
||||
case $key in
|
||||
--datadog)
|
||||
USE_DATADOG=true
|
||||
PROVIDERS="$PROVIDERS --datadog"
|
||||
;;
|
||||
|
||||
--prometheus)
|
||||
USE_PROMETHEUS=true
|
||||
PROVIDERS="$PROVIDERS --prometheus"
|
||||
;;
|
||||
|
||||
--stackdriver)
|
||||
USE_STACKDRIVER=true
|
||||
PROVIDERS="$PROVIDERS --stackdriver"
|
||||
;;
|
||||
|
||||
--help|-h)
|
||||
print_usage
|
||||
exit 1
|
||||
;;
|
||||
|
||||
*)
|
||||
;; # ignore
|
||||
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
function install_dependencies() {
|
||||
apt-get update
|
||||
apt-get install python-pip python-dev -y
|
||||
pip install -r $SOURCE_DIR/requirements.txt
|
||||
}
|
||||
|
||||
|
||||
function install_metric_services() {
|
||||
if [[ "$USE_DATADOG" == "true" ]]; then
|
||||
$SOURCE_DIR/config/datadog/install.sh
|
||||
fi
|
||||
if [[ "$USE_PROMETHEUS" == "true" ]]; then
|
||||
$SOURCE_DIR/config/prometheus/install.sh
|
||||
fi
|
||||
if [[ "$USE_STACKDRIVER" == "true" ]]; then
|
||||
local credentials=""
|
||||
for arg in ${COMMAND_LINE_FLAGS[@]}; do
|
||||
if [[ $arg = --credentials_path=* ]]; then
|
||||
credentials=$arg
|
||||
fi
|
||||
done
|
||||
$SOURCE_DIR/config/stackdriver/install.sh $credentials
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
function write_startup_script() {
|
||||
cat <<-EOF > "$SOURCE_DIR/monitor_spinnaker.sh"
|
||||
#!/bin/bash
|
||||
|
||||
set -o allexport
|
||||
if [[ -f /etc/default/spinnaker ]]; then
|
||||
source /etc/default/spinnaker
|
||||
fi
|
||||
if [[ -f "$SOURCE_DIR/environ" ]]; then
|
||||
source "$SOURCE_DIR/environ"
|
||||
fi
|
||||
set +o allexport
|
||||
|
||||
PYTHONWARNINGS=once \
|
||||
python "$SOURCE_DIR/monitor_spinnaker.zip" \
|
||||
monitor $@ "\$@"
|
||||
EOF
|
||||
chmod 755 "$SOURCE_DIR/monitor_spinnaker.sh"
|
||||
}
|
||||
|
||||
|
||||
function write_upstart_script() {
|
||||
cat <<-EOF > /etc/init/monitor_spinnaker.conf
|
||||
start on filesystem or runlevel [2345]
|
||||
|
||||
exec $SOURCE_DIR/monitor_spinnaker.sh > /var/log/monitor_spinnaker.log 2>&1
|
||||
EOF
|
||||
chmod 644 /etc/init/monitor_spinnaker.conf
|
||||
}
|
||||
|
||||
|
||||
process_args "${COMMAND_LINE_FLAGS[@]}"
|
||||
if [[ "$PROVIDERS" == "" ]]; then
|
||||
print_usage
|
||||
echo ""
|
||||
echo "ERROR: No <provider_switch> options were provided."
|
||||
exit -1
|
||||
fi
|
||||
|
||||
|
||||
if [[ `/usr/bin/id -u` -ne 0 ]]; then
|
||||
echo "$0 must be executed with root permissions; exiting"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
install_dependencies
|
||||
install_metric_services
|
||||
write_startup_script "${COMMAND_LINE_FLAGS[@]}"
|
||||
write_upstart_script
|
||||
|
||||
echo "Starting to monitor Spinnaker services..."
|
||||
service monitor_spinnaker start
|
||||
|
||||
cat <<EOF
|
||||
|
||||
|
||||
Be sure that your spinnaker-local.yml has services.spectator.webEndpoint.enabled=true
|
||||
For more information, see:
|
||||
http://www.spinnaker.io/docs/monitoring-a-spinnaker-deployment
|
||||
EOF
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -15,6 +15,7 @@
|
|||
import cgi
|
||||
import httplib
|
||||
import json
|
||||
import os
|
||||
import logging
|
||||
|
||||
from command_processor import CommandHandler
|
||||
|
@ -469,6 +470,107 @@ class UpsertCustomDescriptorsHandler(BaseStackdriverCommandHandler):
|
|||
project, upsert_descriptors, type_map, self.output)
|
||||
|
||||
|
||||
class ListDashboardsHandler(BaseStackdriverCommandHandler):
|
||||
"""Administrative handler to list all dashboards (not just spinnaker)."""
|
||||
|
||||
def process_commandline_request(self, options):
|
||||
"""Implements CommandHandler."""
|
||||
stackdriver = stackdriver_service.make_service(options)
|
||||
|
||||
parent = 'projects/{0}'.format(stackdriver.project)
|
||||
dashboards = stackdriver.stub.projects().dashboards()
|
||||
request = dashboards.list(parent=parent)
|
||||
all_dashboards = []
|
||||
while request:
|
||||
response = request.execute()
|
||||
all_dashboards.extend(response.get('dashboards', []))
|
||||
request = dashboards.list_next(request, response)
|
||||
|
||||
found = {elem['name']: elem['displayName'] for elem in all_dashboards}
|
||||
self.output(options, str(found))
|
||||
|
||||
|
||||
def lookup_dashboard(stackdriver, display_name):
|
||||
"""Find the dashboard definition with the given display_name."""
|
||||
parent = 'projects/{0}'.format(stackdriver.project)
|
||||
dashboards = stackdriver.stub.projects().dashboards()
|
||||
request = dashboards.list(parent=parent)
|
||||
while request:
|
||||
response = request.execute()
|
||||
for elem in response.get('dashboards', []):
|
||||
if elem['displayName'] == display_name:
|
||||
return elem
|
||||
request = dashboards.list_next(request, response)
|
||||
return None
|
||||
|
||||
|
||||
class GetDashboardHandler(BaseStackdriverCommandHandler):
|
||||
"""Administrative handler to get a dashboard from its name."""
|
||||
|
||||
def add_argparser(self, subparsers):
|
||||
"""Implements CommandHandler."""
|
||||
parser = super(GetDashboardHandler, self).add_argparser(subparsers)
|
||||
parser.add_argument(
|
||||
'--name', required=True,
|
||||
help='The name of the dashboard to get.')
|
||||
return parser
|
||||
|
||||
def process_commandline_request(self, options):
|
||||
"""Implements CommandHandler."""
|
||||
display_name = options.get('name', None)
|
||||
if not display_name:
|
||||
raise ValueError('No name provided.')
|
||||
|
||||
stackdriver = stackdriver_service.make_service(options)
|
||||
found = lookup_dashboard(stackdriver, display_name)
|
||||
|
||||
if found is None:
|
||||
raise ValueError('"{0}" not found.'.format(display_name))
|
||||
json_text = json.JSONEncoder(indent=2).encode(found)
|
||||
self.output(options, json_text)
|
||||
|
||||
|
||||
class UploadDashboardHandler(BaseStackdriverCommandHandler):
|
||||
"""Administrative handler to upload a dashboard."""
|
||||
|
||||
def add_argparser(self, subparsers):
|
||||
"""Implements CommandHandler."""
|
||||
parser = super(UploadDashboardHandler, self).add_argparser(subparsers)
|
||||
parser.add_argument('--dashboard', required=True,
|
||||
help='The path to the json dashboard file.')
|
||||
parser.add_argument(
|
||||
'--update', default=False, action='store_true',
|
||||
help='Update an existing dashboard rather than create a new one.')
|
||||
return parser
|
||||
|
||||
def process_commandline_request(self, options):
|
||||
"""Implements CommandHandler."""
|
||||
path = options.get('dashboard', None)
|
||||
if not path:
|
||||
raise ValueError('No dashboard provided.')
|
||||
with open(path, 'r') as infile:
|
||||
specification = json.JSONDecoder().decode(infile.read())
|
||||
|
||||
stackdriver = stackdriver_service.make_service(options)
|
||||
dashboards = stackdriver.stub.projects().dashboards()
|
||||
|
||||
parent = 'projects/{0}'.format(stackdriver.project)
|
||||
if options.get('update', False):
|
||||
display_name = specification['displayName']
|
||||
found = lookup_dashboard(stackdriver, display_name)
|
||||
if found is None:
|
||||
raise ValueError('"{0}" not found.'.format(display_name))
|
||||
response = dashboards.update(
|
||||
name=found['name'], body=specification).execute()
|
||||
action = 'Updated'
|
||||
else:
|
||||
response = dashboards.create(parent=parent, body=specification).execute()
|
||||
action = 'Created'
|
||||
|
||||
self.output(options, '{action} "{title}" with name {name}'.format(
|
||||
action=action, title=response['displayName'], name=response['name']))
|
||||
|
||||
|
||||
def add_handlers(handler_list, subparsers):
|
||||
"""Registers CommandHandlers for interacting with Stackdriver."""
|
||||
command_handlers = [
|
||||
|
@ -487,6 +589,19 @@ def add_handlers(handler_list, subparsers):
|
|||
' update the existing ones and add the new ones.'
|
||||
' WARNING: Historic time-series data may be lost on update.')
|
||||
]
|
||||
if os.environ.get('STACKDRIVER_API_KEY'):
|
||||
command_handlers.extend([
|
||||
ListDashboardsHandler('/stackdriver/list_dashboards',
|
||||
'list_stackdriver_dashboards',
|
||||
'List the available Stackdriver Dashboards'),
|
||||
GetDashboardHandler(None,
|
||||
'get_stackdriver_dashboard',
|
||||
'Get a specific dashboard by display name'),
|
||||
UploadDashboardHandler(None,
|
||||
'upload_stackdriver_dashboard',
|
||||
'Create or update specific dashboard')
|
||||
])
|
||||
|
||||
for handler in command_handlers:
|
||||
handler.add_argparser(subparsers)
|
||||
handler_list.append(handler)
|
||||
|
|
|
@ -73,6 +73,11 @@ class StackdriverMetricsService(object):
|
|||
def millis_to_time(millis):
|
||||
return datetime.fromtimestamp(millis / 1000).isoformat('T') + 'Z'
|
||||
|
||||
@property
|
||||
def project(self):
|
||||
"""Returns the stackdriver project being used."""
|
||||
return self.__project
|
||||
|
||||
@property
|
||||
def stub(self):
|
||||
"""Returns the stackdriver client stub."""
|
||||
|
@ -385,7 +390,15 @@ def make_service(options):
|
|||
credentials = GoogleCredentials.get_application_default()
|
||||
|
||||
http = credentials.authorize(http)
|
||||
return apiclient.discovery.build('monitoring', 'v3', http=http)
|
||||
developerKey = os.environ.get('STACKDRIVER_API_KEY')
|
||||
if developerKey:
|
||||
url='https://monitoring.googleapis.com/$discovery/rest?labels=DASHBOARD_TRUSTED_TESTER&key='+developerKey
|
||||
return apiclient.discovery.build(
|
||||
'monitoring', 'v3', http=http,
|
||||
discoveryServiceUrl=url)
|
||||
else:
|
||||
return apiclient.discovery.build('monitoring', 'v3', http=http)
|
||||
|
||||
|
||||
return StackdriverMetricsService(make_stub, options)
|
||||
|
||||
|
|
|
@ -320,6 +320,18 @@ function extract_spinnaker_gcr_credentials() {
|
|||
fi
|
||||
}
|
||||
|
||||
function do_experimental_startup() {
|
||||
local monitor_config=$(get_instance_metadata_attribute "monitor_spinnaker")
|
||||
if [[ ! -z $monitor_config && \
|
||||
-f /opt/spinnaker/install/install_monitor_spinnaker.tz ]]; then
|
||||
echo "$STATUS_PREFIX Install Monitoring with flags '$monitor_config' "
|
||||
tar xzf /opt/spinnaker/install/install_monitor_spinnaker.tz \
|
||||
-C /opt --no-same-owner
|
||||
/opt/monitor_spinnaker/install_monitoring.sh $monitor_config
|
||||
clear_instance_metadata "monitor_spinnaker"
|
||||
fi
|
||||
}
|
||||
|
||||
function process_args() {
|
||||
while [[ $# > 0 ]]
|
||||
do
|
||||
|
@ -382,6 +394,8 @@ extract_spinnaker_credentials
|
|||
echo "$STATUS_PREFIX Configuring Spinnaker"
|
||||
$SPINNAKER_INSTALL_DIR/scripts/reconfigure_spinnaker.sh
|
||||
|
||||
do_experimental_startup
|
||||
|
||||
|
||||
# Replace this first time boot with the normal startup script
|
||||
# that just starts spinnaker (and its dependencies) without configuring anymore.
|
||||
|
|
Загрузка…
Ссылка в новой задаче