Bug 1465974 Add option for installing datadog-agent

This commit is contained in:
Jeff Klukas 2018-08-23 10:14:41 -04:00
Родитель 18d2b2c09d
Коммит ad2dced0be
3 изменённых файлов: 74 добавлений и 0 удалений

Просмотреть файл

@ -13,6 +13,17 @@ then
IS_MASTER=$(jq .isMaster /mnt/var/lib/info/instance.json)
fi
# Provide a query interface for describe-cluster results.
describe_cluster() {
local query=$1
jq -r "$query" /mnt/describe-cluster.json
}
# Pull information about the cluster.
CLUSTER_ID=$(jq -r .jobFlowId /mnt/var/lib/info/job-flow.json)
aws emr describe-cluster --region us-west-2 --cluster-id "$CLUSTER_ID" > /mnt/describe-cluster.json
CLUSTER_NAME=$(describe_cluster '.Cluster.Name')
# Parse arguments
while [ $# -gt 0 ]; do
case "$1" in
@ -32,6 +43,10 @@ while [ $# -gt 0 ]; do
shift
EFS_DNS=$1
;;
--metrics-provider)
shift
METRICS_PROVIDER=$1
;;
-*)
# do not exit out, just note failure
echo 1>&2 "unrecognized option: $1"
@ -209,6 +224,62 @@ rm RRO-3.2.1-el6.x86_64.tar.gz
cd RRO-3.2.1; sudo ./install.sh; cd $HOME
$ANACONDA_PATH/bin/pip install rpy2
# Setup metrics reporting
SOPS_RPM=sops-3.0.4-1.x86_64.rpm
SPARK_METRICS_CONF=/mnt/spark-metrics.properties
touch $SPARK_METRICS_CONF
if [ "$METRICS_PROVIDER" = "datadog" ] ; then
# Pull down and decrypt Datadog API key; see https://github.com/mozilla/emr-bootstrap-spark/pull/485
DD_DIR=$(mktemp -d)
aws s3 cp $TELEMETRY_CONF_BUCKET/packages/$SOPS_RPM $DD_DIR/
aws s3 cp $TELEMETRY_CONF_BUCKET/credentials/emr.yaml $DD_DIR/
sudo yum -y localinstall $DD_DIR/$SOPS_RPM
export DD_API_KEY=$(/usr/local/bin/sops --decrypt --extract '["datadog_agent::api_key"]' $DD_DIR/emr.yaml)
# Install datadog-agent; see https://app.datadoghq.com/account/settings#agent/aws
bash -c "$(curl -sLS https://raw.githubusercontent.com/DataDog/datadog-agent/master/cmd/agent/install_script.sh)"
rm -r "$DD_DIR"
# We opt not use to use https://docs.datadoghq.com/integrations/spark/
# Instead, we standardize on statsd metrics using Spark's StatsDSink
# and configure tags in the agent that will be applied by dogstatsd.
# This should make switching to a new metrics provider relatively straightforward.
MIN_SPARK_VERSION=2.3.0
SPARK_VERSION=$(describe_cluster '.Cluster.Applications[] | select(.Name=="Spark") | .Version')
spark_has_statsdsink() {
# Drop into python to take advantage of pkg_version.parse_version,
# which will provide more robust comparison than pure bash.
python <<EOF
from sys import exit
from pkg_resources import parse_version
if (parse_version('${SPARK_VERSION}') < parse_version('${MIN_SPARK_VERSION}')):
exit(1)
EOF
}
if ! spark_has_statsdsink; then
echo "ERROR: Incompatible options. StatsdSink is required to support --metrics-provider=datadog; it was introduced in Spark $MIN_SPARK_VERSION, which is later than $SPARK_VERSION present here. Use emr-5.13.0 or later if you need to enable metrics. Terminating..." 1>&2
# Cause the cluster to terminate with "Bootstrap failure"
exit 1
fi
echo "*.sink.statsd.class=org.apache.spark.metrics.sink.StatsdSink" >> $SPARK_METRICS_CONF
# We add tags to the Datadog agent configuration so the DogStatsd provider
# will attach them to the messages it sends.
# We include all the AWS tags applied to the cluster, so the list here will
# likely include App, Application, Environment, etc.
sudo tee -a /etc/datadog-agent/datadog.yaml <<EOF
tags:
- cluster_name:${CLUSTER_NAME}
- cluster_id:${CLUSTER_ID}
# Following metric tags correspond to EMR cluster tags
$(describe_cluster '.Cluster.Tags[] | "- \(.Key):\(.Value)"')
EOF
sudo restart datadog-agent
elif [ -n "$METRICS_PROVIDER" ]; then
echo 1>&2 "unrecognized value for --metrics-provider: $1"
fi
# Setup global bash file
# .sh files in /etc/profile.d/ run whenever a user logs in
GLOBAL_BASHRC=/etc/profile.d/atmo.sh

Просмотреть файл

@ -411,6 +411,7 @@
"Effect": "Allow",
"Action": [
"elasticmapreduce:Describe*",
"elasticmapreduce:List*",
"ec2:Describe*",
"rds:Describe*"
],

Просмотреть файл

@ -23,6 +23,8 @@
}, {
"Classification": "spark-defaults",
"Properties": {
"spark.metrics.conf": "/mnt/spark-metrics.properties",
"spark.metrics.namespace": "spark",
"spark.ui.killEnabled": "true",
"spark.local.dir": "/mnt,/mnt1",
"spark.driver.maxResultSize": "4g",