Bug 1309622 - Remove Ansible in favor of dockerflow patterns.

This commit is contained in:
Jannis Leidel 2016-12-15 22:04:23 +01:00 коммит произвёл whd
Родитель e676825b5d
Коммит 76abcec3a9
20 изменённых файлов: 333 добавлений и 683 удалений

Просмотреть файл

@ -1,23 +1,67 @@
# AUTHOR: Roberto Vitillo
# DESCRIPTION: Mozilla's Airflow container
# BUILD: docker build --rm -t mozdata/telemetry-airflow
# SOURCE: https://github.com/mozilla/telemetry-airflow
FROM python:2-slim
MAINTAINER Mozilla Telemetry
FROM puckel/docker-airflow:1.7.1.3
MAINTAINER vitillo
ENV PYTHONUNBUFFERED 1
USER root
RUN apt-get update -yqq && \
apt-get install -yqq python-pip python-mysqldb && \
pip install boto3 && \
pip install airflow[async] && \
pip install airflow[password] && \
pip install retrying
ENV AWS_REGION us-west-2
# ENV AWS_ACCESS_KEY_ID
# ENV AWS_SECRET_ACCESS_KEY
ADD ansible/files/airflow/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg
ADD ansible/files/airflow/entrypoint.sh ${AIRFLOW_HOME}/entrypoint.sh
ADD ansible/files/airflow/replace_env.py ${AIRFLOW_HOME}/replace_env.py
RUN chown airflow:airflow ${AIRFLOW_HOME}/airflow.cfg
ENV SPARK_BUCKET telemetry-spark-emr-2
ENV AIRFLOW_BUCKET telemetry-test-bucket
ENV PRIVATE_OUTPUT_BUCKET telemetry-test-bucket
ENV PUBLIC_OUTPUT_BUCKET telemetry-test-bucket
USER airflow
ADD dags/ /usr/local/airflow/dags/
ENV EMR_KEY_NAME mozilla_vitillo
ENV EMR_FLOW_ROLE telemetry-spark-cloudformation-TelemetrySparkInstanceProfile-1SATUBVEXG7E3
ENV EMR_SERVICE_ROLE EMR_DefaultRole
ENV EMR_INSTANCE_TYPE c3.4xlarge
ENV AIRFLOW_HOME /app
ENV AIRFLOW_AUTHENTICATE False
ENV AIRFLOW_BROKER_URL redis://redis:6379/0
ENV AIRFLOW_RESULT_URL ${AIRFLOW_BROKER_URL}
ENV AIRFLOW_FLOWER_PORT 5555
ENV AIRFLOW_DATABASE_URL postgres://postgres@db/postgres
ENV AIRFLOW_FERNET_KEY "VDRN7HAYDw36BTFbLPibEgJ7q2Dzn-dVGwtbu8iKwUg"
ENV AIRFLOW_SECRET_KEY "3Pxs_qg7J6TvRuAIIpu4E2EK_8sHlOYsxZbB-o82mcg"
# ENV AIRFLOW_SMTP_HOST
# ENV AIRFLOW_SMTP_USER
# ENV AIRFLOW_SMTP_PASSWORD
ENV AIRFLOW_SMTP_FROM telemetry-alerts@airflow.dev.mozaws.net
ENV PORT 8000
EXPOSE 8000
# add a non-privileged user for installing and running the application
RUN mkdir /app && \
chown 10001:10001 /app && \
chmod g+w /app && \
groupadd --gid 10001 app && \
useradd --uid 10001 --gid 10001 --home /app app
RUN apt-get update && \
apt-get install -y --no-install-recommends \
apt-transport-https build-essential curl git libpq-dev \
postgresql-client gettext sqlite3 libffi-dev libsasl2-dev && \
apt-get autoremove -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Switch to /tmp to install dependencies outside home dir
WORKDIR /tmp
# Install Python dependencies
COPY requirements.txt /tmp/
RUN pip install --upgrade --no-cache-dir -r requirements.txt
# Switch back to home directory
WORKDIR /app
COPY . /app
RUN chown -R 10001:10001 /app && \
chmod -R g+w /app
USER 10001

43
Makefile Normal file
Просмотреть файл

@ -0,0 +1,43 @@
.PHONY: build clean migrate redis-cli run secret shell stop up
help:
@echo "Welcome to the Telemetry Airflow\n"
@echo "The list of commands for local development:\n"
@echo " build Builds the docker images for the docker-compose setup"
@echo " clean Stops and removes all docker containers"
@echo " migrate Runs the Django database migrations"
@echo " redis-cli Opens a Redis CLI"
@echo " run Run a airflow command"
@echo " secret Create a secret to be used for a config variable"
@echo " shell Opens a Bash shell"
@echo " up Runs the whole stack, served under http://localhost:8000/\n"
@echo " stop Stops the docker containers"
build:
docker-compose build
clean: stop
docker-compose rm -f
migrate:
docker-compose run web airflow initdb
docker-compose run web airflow updatedb
shell:
docker-compose run web bash
redis-cli:
docker-compose run redis redis-cli -h redis
run:
docker-compose run web airflow $(COMMAND)
secret:
@docker-compose run web python -c \
"from cryptography.fernet import Fernet; print Fernet.generate_key().decode()"
stop:
docker-compose stop
up:
docker-compose up

Просмотреть файл

@ -1,25 +1,28 @@
# telemetry-airflow
Airflow is a platform to programmatically author, schedule and monitor workflows.
When workflows are defined as code, they become more maintainable, versionable, testable, and collaborative.
When workflows are defined as code, they become more maintainable, versionable,
testable, and collaborative.
Use Airflow to author workflows as directed acyclic graphs (DAGs) of tasks. The Airflow scheduler executes your tasks on an array of workers while following the specified dependencies. Rich command line utilities make performing complex surgeries on DAGs a snap. The rich user interface makes it easy to visualize pipelines running in production, monitor progress, and troubleshoot issues when needed.
Use Airflow to author workflows as directed acyclic graphs (DAGs) of tasks.
The Airflow scheduler executes your tasks on an array of workers while following
the specified dependencies. Rich command line utilities make performing complex
surgeries on DAGs a snap. The rich user interface makes it easy to visualize
pipelines running in production, monitor progress, and troubleshoot issues when
needed.
### Prerequisites
This app is built and deployed with [docker](https://docs.docker.com/engine/installation/) and [ansible](http://docs.ansible.com/ansible/intro_installation.html).
This app is built and deployed with
[docker](https://docs.docker.com/) and
[docker-compose](https://docs.docker.com/compose/).
### Build Container
An Airflow container can be built with
An Airflow container can be built with
```bash
docker build -t mozdata/telemetry-airflow .
```
and pushed to Docker hub with
```bash
docker push mozdata/telemetry-airflow
make build
```
### Testing
@ -27,58 +30,67 @@ docker push mozdata/telemetry-airflow
A single task, e.g. `spark`, of an Airflow dag, e.g. `example`, can be run with an execution date, e.g. `2016-01-01`, in the `dev` environment with:
```bash
AWS_SECRET_ACCESS_KEY=... AWS_ACCESS_KEY_ID=... \
ansible-playbook ansible/deploy_local.yml -e '@ansible/envs/dev.yml' -e "command='test example spark 20160101'"
make run COMMAND="test example spark 20160101"
```
The container will run the desired task to completion (or failure). Note that if the container is stopped during the execution of a task, the task will
be aborted. In the example's case, the Spark job will be terminated.
The container will run the desired task to completion (or failure).
Note that if the container is stopped during the execution of a task,
the task will be aborted. In the example's case, the Spark job will be
terminated.
The logs of the task can be inspected in real-time with:
```bash
docker logs -f files_scheduler_1
docker logs -f telemetryairflow_scheduler_1
```
### Local Deployment
Assuming you are on OS X and using docker toolbox, first create a docker machine with a sufficient amount of memory with e.g.:
```bash
docker-machine create -d virtualbox --virtualbox-memory 4096 default
```
If you're using OS X and the new Docker for OS X, start the docker service, click the docker icon in the tray, click on preferences and change the available memory to 4GB.
Assuming you're using macOS and Docker for macOS, start the docker service,
click the docker icon in the menu bar, click on preferences and change the
available memory to 4GB.
To deploy the Airflow container on the docker engine, with its required dependencies, run:
```bash
ansible-playbook ansible/deploy_local.yml -e '@ansible/envs/dev.yml'
echo "Airflow web console should now be running locally at http://$(docker-machine ip default):8080"
make up
```
If you get a message saying "Couldn't connect to Docker daemon - you might need to run `docker-machine start default`.", try the following:
```bash
docker-machine start default
eval "$(docker-machine env default)"
```
You can now connect to your local Airflow web console at
`http://localhost:8000/`.
You can now connect to your local Airflow web console with a URL like `http://192.168.99.100:8080` (see above for how to identify the exact IP address).
### Production Setup
### Remote Deployment
When deploying to production make sure to set up the following environment
variables:
In order to deploy Airflow to e.g. the `stage` environment, an ECS cluster has to be created first with at least one container instance:
```bash
ansible-playbook ansible/provision_aws.yml -e '@ansible/envs/stage.yml'
```
- `AWS_ACCESS_KEY_ID` -- The AWS access key ID to spin up the Spark clusters
- `AWS_SECRET_ACCESS_KEY` -- The AWS secret access key
- `SPARK_BUCKET` -- The AWS S3 bucket where Spark related files are stored,
e.g. `telemetry-spark-emr-2`
- `AIRFLOW_BUCKET` -- The AWS S3 bucket where airflow specific files are stored,
e.g. `telemetry-airflow`
- `PUBLIC_OUTPUT_BUCKET` -- The AWS S3 bucket where public job results are
stored in, e.g. `telemetry-public-analysis-2`
- `PRIVATE_OUTPUT_BUCKET` -- The AWS S3 bucket where private job results are
stored in, e.g. `telemetry-parquet`
- `AIRFLOW_DATABASE_URL` -- The connection URI for the Airflow database, e.g.
`postgres://username:password@hostname:port/password`
- `AIRFLOW_BROKER_URL` -- The connection URI for the Airflow worker queue, e.g.
`redis://hostname:6379/0`
Once the ECS cluster is up and running, Airflow can be (re)deployed with:
```bash
ansible-playbook ansible/deploy_aws.yml -e '@ansible/envs/stage.yml'
```
Also, please set
- `AIRFLOW_SECRET_KEY` -- A secret key for Airflow's Flask based webserver
- `AIRFLOW_FERNET_KEY` -- A secret key to save connection passwords in the db
Both values should be set by using the cryptography module's fernet tool that
we've wrapped in a docker-compose call:
make secret
Run this for each key config variable, and **don't use the same for both!**
### Debugging
If you do a remote deploy and the changes are not reflected in the [live site](http://workflow.telemetry.mozilla.org), you may have to remove ECS task definitions from the AWS Console and try again. To do this, go to the ECS page, click Task Definitions, click ecscompose-telemetry-airflow, then remove all the listed task definitions. After that, re-deploy and things should work as expected.
If you get an error about `ecs-cli` missing, follow [these steps](http://docs.aws.amazon.com/AmazonECS/latest/developerguide/ECS_CLI_installation.html) to install it.
Some useful docker tricks for development and debugging:
```bash

Просмотреть файл

@ -1,13 +1,13 @@
[core]
# The home folder for airflow, default is ~/airflow
airflow_home = /usr/local/airflow
airflow_home = $AIRFLOW_HOME
# The folder where your airflow pipelines live, most likely a
# subfolder in a code repository
dags_folder = /usr/local/airflow/dags
dags_folder = $AIRFLOW_HOME/dags
# The folder where airflow should store its log files. This location
base_log_folder = /usr/local/airflow/logs
base_log_folder = $AIRFLOW_HOME/logs
# The executor class that airflow should use. Choices include
# SequentialExecutor, LocalExecutor, CeleryExecutor
@ -16,7 +16,7 @@ executor = CeleryExecutor
# The SqlAlchemy connection string to the metadata database.
# SqlAlchemy supports many different database engine, more information
# their website
sql_alchemy_conn = mysql://${DB_USER}:${DB_PASSWORD}@${DB_URI}/airflow
sql_alchemy_conn = $AIRFLOW_DATABASE_URL
# The SqlAlchemy pool size is the maximum number of database connections
# in the pool.
@ -47,10 +47,10 @@ max_active_runs_per_dag = 5
load_examples = False
# Where your Airflow plugins are stored
plugins_folder = /usr/local/airflow/plugins
plugins_folder = $AIRFLOW_HOME/plugins
# Secret key to save connection passwords in the db
fernet_key = secret
fernet_key = $AIRFLOW_FERNET_KEY
# Whether to disable pickling dags
donot_pickle = False
@ -62,16 +62,16 @@ dagbag_import_timeout = 30
# The base url of your website as airflow cannot guess what domain or
# cname you are using. This is use in automated emails that
# airflow sends to point links to the right web server
base_url = https://workflow.telemetry.mozilla.org
base_url = $URL
# The ip specified when starting the web server
web_server_host = 0.0.0.0
# The port on which to run the web server
web_server_port = 8080
web_server_port = $PORT
# Secret key used to run your flask app
secret_key = temporary_key
secret_key = $AIRFLOW_SECRET_KEY
# Number of workers to run the Gunicorn web server
workers = 4
@ -84,15 +84,12 @@ worker_class = gevent
expose_config = true
# Set to true to turn on authentication : http://pythonhosted.org/airflow/installation.html#web-authentication
authenticate = False
authenticate = $AIRFLOW_AUTHENTICATE
auth_backend = airflow.contrib.auth.backends.password_auth
# Filter the list of dags by owner name (requires authentication to be enabled)
filter_by_owner = False
# Authentication
authenticate = ${AIRFLOW_ENABLE_AUTH}
auth_backend = airflow.contrib.auth.backends.password_auth
[email]
email_backend = airflow.utils.email.send_email_smtp
@ -102,11 +99,11 @@ email_backend = airflow.utils.email.send_email_smtp
# server here
smtp_starttls = True
smtp_ssl = False
smtp_host = ${SMTP_HOST}
smtp_host = $AIRFLOW_SMTP_HOST
smtp_port = 587
smtp_user = ${SMTP_USER}
smtp_password = ${SMTP_PASSWORD}
smtp_mail_from = telemetry-alerts@airflow.dev.mozaws.net
smtp_user = $AIRFLOW_SMTP_USER
smtp_password = $AIRFLOW_SMTP_PASSWORD
smtp_mail_from = $AIRFLOW_SMTP_FROM
[celery]
# This section only applies if you are using the CeleryExecutor in
@ -131,14 +128,14 @@ worker_log_server_port = 8793
# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally
# a sqlalchemy database. Refer to the Celery documentation for more
# information.
broker_url = amqp://airflow:airflow@rabbitmq:5672/airflow
broker_url = $AIRFLOW_BROKER_URL
# Another key Celery setting
celery_result_backend = amqp://airflow:airflow@rabbitmq:5672/airflow
celery_result_backend = $AIRFLOW_RESULT_URL
# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
# it `airflow flower`. This defines the port that Celery Flower runs on
flower_port = 5555
flower_port = $AIRFLOW_FLOWER_PORT
# Default queue that tasks get assigned to and that worker listen on.
default_queue = default

Просмотреть файл

@ -1,3 +1,4 @@
# to be copied to $AIRFLOW_BUCKET/steps/airflow.sh
set -o verbose
HOME=/home/hadoop

Просмотреть файл

@ -1,85 +0,0 @@
- name: provision aws resources
hosts: localhost
vars:
elb_name: telemetry-ecs
command: scheduler
tasks:
- name: create bucket
s3: bucket={{airflow_bucket}} region={{region}} mode=create
- name: copy EMR bootstrap script
s3: bucket={{airflow_bucket}} region={{region}} object=steps/airflow.sh src={{ playbook_dir }}/files/spark/airflow.sh mode=put
- name: create load balancer
ec2_elb_lb:
name: "{{ elb_name }}"
region: "{{ region }}"
state: present
security_group_names: "{{ elb_sg_name }}"
subnets:
- "{{ ecs_vpc_subnet_id }}"
listeners:
- protocol: https
load_balancer_port: 443
instance_protocol: http
instance_port: 8080
ssl_certificate_id: "{{ ssl_cert_id }}"
- protocol: http
load_balancer_port: 80
instance_port: 8080
health_check:
ping_protocol: tcp
ping_port: 8080
response_timeout: 50 # seconds
interval: 60 # seconds
unhealthy_threshold: 2
healthy_threshold: 2
- name: fetch SMTP credentials
s3: bucket={{metadata_bucket}} region={{region}} object=smtp_ses/credentials.json mode=getstr
register: smtp_credentials_json
- set_fact:
smtp_credentials: "{{ smtp_credentials_json.contents|from_json }}"
- name: create task definition
shell: "{{ item }}"
with_items:
- ecs-cli configure --cluster {{ ecs_cluster_name }}
- ecs-cli compose --project-name telemetry-airflow --file {{ playbook_dir }}/files/docker-compose.yml create
environment:
AWS_REGION: "{{ region }}"
EMR_KEY_NAME: "{{ emr_key_name }}"
EMR_FLOW_ROLE: "{{ emr_flow_role }}"
EMR_SERVICE_ROLE: "{{ emr_service_role }}"
EMR_INSTANCE_TYPE: "{{ emr_instance_type }}"
SPARK_BUCKET: "{{ spark_bucket }}"
DB_URI: "{{ db_uri }}"
DB_USER: "{{ db_user }}"
DB_PASSWORD: "{{ db_password }}"
AIRFLOW_BUCKET: "{{ airflow_bucket }}"
AIRFLOW_ENABLE_AUTH: True
PRIVATE_OUTPUT_BUCKET: "{{ private_output_bucket }}"
PUBLIC_OUTPUT_BUCKET: "{{ public_output_bucket }}"
SMTP_HOST: "{{ smtp_credentials.host }}"
SMTP_USER: "{{ smtp_credentials.user }}"
SMTP_PASSWORD: "{{ smtp_credentials.password }}"
# Bug 1286825: Tell the scheduler to exit after 5 runs.
COMMAND: scheduler -n 5 # https://github.com/aws/amazon-ecs-cli/issues/28
# TODO: create a new module capable of updating the service with the new definition or ensure that the
# revision of a new task definition is incremental (for some reason it's not always the case...)
- name: update service
ecs_service:
region: "{{ region }}"
name: telemetry-airflow
cluster: "{{ ecs_cluster_name }}"
desired_count: 1
state: present
role: "{{ ecs_role }}"
load_balancers:
- loadBalancerName: "{{ elb_name }}"
containerName: webserver
containerPort: 8080
task_definition: ecscompose-telemetry-airflow

Просмотреть файл

@ -1,34 +0,0 @@
- name: deploy Airflow containers locally
hosts: localhost
vars:
command: scheduler -n 5 # See bug 1286825
compose_conf:
- "{{ playbook_dir }}/files/docker-compose.yml"
- "{{ playbook_dir }}/files/docker-compose-local.yml"
tasks:
- name: launch Airflow containers
shell: "{{ item }}"
with_items:
- docker-compose -f {{ compose_conf[0] }} -f {{ compose_conf[1] }} down -v --remove-orphans
- docker-compose -f {{ compose_conf[0] }} -f {{ compose_conf[1] }} up -d
environment:
AWS_REGION: "{{ region }}"
EMR_KEY_NAME: "{{ emr_key_name }}"
EMR_FLOW_ROLE: "{{ emr_flow_role }}"
EMR_SERVICE_ROLE: "{{ emr_service_role }}"
EMR_INSTANCE_TYPE: "{{ emr_instance_type }}"
SPARK_BUCKET: "{{ spark_bucket }}"
AWS_ACCESS_KEY_ID: "{{ lookup('env', 'AWS_ACCESS_KEY_ID') }}"
AWS_SECRET_ACCESS_KEY: "{{ lookup('env', 'AWS_SECRET_ACCESS_KEY') }}"
DB_URI: "{{ db_uri }}"
DB_USER: "{{ db_user }}"
DB_PASSWORD: "{{ db_password }}"
AIRFLOW_BUCKET: "{{ airflow_bucket }}"
AIRFLOW_ENABLE_AUTH: False
PRIVATE_OUTPUT_BUCKET: "{{ private_output_bucket }}"
PUBLIC_OUTPUT_BUCKET: "{{ public_output_bucket }}"
SMTP_HOST: "localhost"
SMTP_USER: "dummy"
SMTP_PASSWORD: "dummy_pass"
COMMAND: "{{ command }}"

Просмотреть файл

@ -1,16 +0,0 @@
region: us-west-2
spark_bucket: telemetry-spark-emr-2
airflow_bucket: telemetry-test-bucket
public_output_bucket: telemetry-test-bucket
private_output_bucket: telemetry-test-bucket
emr_key_name: mozilla_vitillo
emr_flow_role: telemetry-spark-cloudformation-TelemetrySparkInstanceProfile-1SATUBVEXG7E3
emr_service_role: EMR_DefaultRole
emr_instance_type: c3.4xlarge
db_uri: mysql
db_user: airflow
db_password: airflow
airflow_enable_auth: false

Просмотреть файл

@ -1 +0,0 @@
# TODO: configure production environment

Просмотреть файл

@ -1,30 +0,0 @@
region: us-west-2
spark_bucket: telemetry-spark-emr-2
airflow_bucket: telemetry-airflow
public_output_bucket: telemetry-public-analysis-2
private_output_bucket: telemetry-parquet
emr_key_name: mozilla_vitillo
emr_flow_role: telemetry-spark-cloudformation-TelemetrySparkInstanceProfile-1SATUBVEXG7E3
emr_service_role: EMR_DefaultRole
emr_instance_type: c3.4xlarge
ecs_sg_name: telemetry-ecs
ecs_key_name: mozilla_vitillo
ecs_vpc_id: vpc-4283d227
ecs_vpc_subnet_id: subnet-125a7c77
ecs_cluster_name: telemetry
ecs_instance_type: c4.2xlarge
ecs_role: arn:aws:iam::927034868273:role/telemetry-ecs-cloudformation-TelemetryECSRole-1UAYK6XNB6DG0
ecs_asg_lc_name: telemetry-ecs-lc
ecs_asg_asg_name: telemetry-ecs-asg
elb_name: telemetry-ecs
elb_sg_name: telemetry-workflow-elb
ssl_cert_id: arn:aws:iam::927034868273:server-certificate/cloudfront/exp20180829_cf_star_telemetry_mozilla_org
db_uri: telemetry-ecs-airflow.cppmil15uwvg.us-west-2.rds.amazonaws.com
db_user: airflow
db_password: telemetry-airflow # DB is not accessible from the outside world
metadata_bucket: net-mozaws-prod-us-west-2-pipeline-metadata

Просмотреть файл

@ -1,58 +0,0 @@
#!/usr/bin/env bash
CMD="airflow"
TRY_LOOP="10"
MYSQL_HOST=${DB_URI:-mysql}
MYSQL_PORT="3306"
RABBITMQ_HOST="rabbitmq"
RABBITMQ_CREDS="airflow:airflow"
# Wait for RabbitMQ
j=0
while ! curl -sI -u $RABBITMQ_CREDS http://$RABBITMQ_HOST:15672/api/whoami |grep '200 OK'; do
j=`expr $j + 1`
if [ $j -ge $TRY_LOOP ]; then
echo "$(date) - $RABBITMQ_HOST still not reachable, giving up"
exit 1
fi
echo "$(date) - waiting for RabbitMQ... $j/$TRY_LOOP"
sleep 5
done
# Generate Fernet key for replacement below
export FERNET_KEY=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY")
# Replace environment vars in airflow config file.
python $AIRFLOW_HOME/replace_env.py $AIRFLOW_HOME/airflow.cfg
i=0
while ! nc $MYSQL_HOST $MYSQL_PORT >/dev/null 2>&1 < /dev/null; do
i=`expr $i + 1`
if [ $i -ge $TRY_LOOP ]; then
echo "$(date) - ${MYSQL_HOST}:${MYSQL_PORT} still not reachable, giving up"
exit 1
fi
echo "$(date) - waiting for ${MYSQL_HOST}:${MYSQL_PORT}... $i/$TRY_LOOP"
sleep 5
done
if [ "$1" = "webserver" ]; then
echo "Initialize database..."
$CMD initdb
$CMD upgradedb
fi
sleep 5
if [[ "$COMMAND" == "scheduler"* ]]; then
# Work around scheduler hangs, see bug 1286825.
# Run the scheduler inside a retry loop.
while echo "Running"; do
eval $CMD "${@:-$COMMAND}"
echo "Scheduler exited with code $?. Respawning.." >&2
date >> /tmp/airflow_scheduler_errors.txt
sleep 1
done
else
eval $CMD "${@:-$COMMAND}"
fi

Просмотреть файл

@ -1,24 +0,0 @@
#!/usr/bin/env python
# Replace environment variables in the given file with their values.
# Usage: python replace_env.py input_file [ output_file ]
import os
import sys
nargs = len(sys.argv)
if nargs < 2 or nargs > 3:
print "Usage: python {} input_file [ output_file ]".format(sys.argv[0])
sys.exit(1)
input_file = sys.argv[1]
if nargs > 2:
output_file = sys.argv[2]
else:
output_file = input_file
with open(input_file, "r") as f:
data = f.read()
expanded = os.path.expandvars(data)
with open(output_file, "w") as f:
f.write(expanded)

Просмотреть файл

@ -1,94 +0,0 @@
{
"AWSTemplateFormatVersion": "2010-09-09",
"Description": "Cloudformation template for Telemetry's ECS",
"Resources": {
"TelemetryECSRole": {
"Type": "AWS::IAM::Role",
"Properties": {
"AssumeRolePolicyDocument": {
"Statement": [
{
"Sid": "",
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
},
"Action": "sts:AssumeRole"
},
{
"Effect": "Allow",
"Principal": {
"Service": [
"ecs.amazonaws.com"
]
},
"Action": [
"sts:AssumeRole"
]
}
]
},
"Policies": [
{
"PolicyName": "telemetry-ecs",
"PolicyDocument": {
"Version": "2012-10-17",
"Statement": [
{
"Sid": "Stmt1430246788000",
"Effect": "Allow",
"Action": [
"elasticmapreduce:Describe*",
"elasticmapreduce:RunJobFlow",
"elasticmapreduce:TerminateJobFlows",
"ecs:CreateCluster",
"ecs:DeregisterContainerInstance",
"ecs:DiscoverPollEndpoint",
"ecs:Poll",
"ecs:RegisterContainerInstance",
"ecs:StartTelemetrySession",
"ecs:Submit*",
"ecr:GetAuthorizationToken",
"ecr:BatchCheckLayerAvailability",
"ecr:GetDownloadUrlForLayer",
"ecr:BatchGetImage",
"iam:PassRole",
"logs:CreateLogStream",
"logs:PutLogEvents",
"elasticloadbalancing:Describe*",
"elasticloadbalancing:DeregisterInstancesFromLoadBalancer",
"elasticloadbalancing:RegisterInstancesWithLoadBalancer",
"ec2:Describe*",
"ec2:AuthorizeSecurityGroupIngress"
],
"Resource": [
"*"
]
}
]
}
}
]
}
},
"TelemetryECSInstanceProfile": {
"Type": "AWS::IAM::InstanceProfile",
"Properties": {
"Path": "/",
"Roles": [
{
"Ref": "TelemetryECSRole"
}
]
}
}
},
"Outputs": {
"InstanceProfile": {
"Description": "ECS instance profile",
"Value": {
"Ref": "TelemetryECSInstanceProfile"
}
}
}
}

Просмотреть файл

@ -1,27 +0,0 @@
mysql:
image: mysql
restart: always
mem_limit: 2147483648
ports:
- "3306:3306"
environment:
- MYSQL_RANDOM_ROOT_PASSWORD=true
- MYSQL_USER=airflow
- MYSQL_PASSWORD=airflow
- MYSQL_DATABASE=airflow
webserver:
links:
- mysql:mysql
scheduler:
links:
- mysql:mysql
worker:
links:
- mysql:mysql
flower:
links:
- mysql:mysql

Просмотреть файл

@ -1,130 +0,0 @@
rabbitmq:
image: rabbitmq:3-management
restart: always
ports:
- "15672:15672"
- "5672:5672"
environment:
- RABBITMQ_DEFAULT_USER=airflow
- RABBITMQ_DEFAULT_PASS=airflow
- RABBITMQ_DEFAULT_VHOST=airflow
webserver:
image: mozdata/telemetry-airflow
restart: always
environment:
- AIRFLOW_HOME=/usr/local/airflow
- AIRFLOW_ENABLE_AUTH
- AWS_REGION
- EMR_KEY_NAME
- EMR_FLOW_ROLE
- EMR_SERVICE_ROLE
- EMR_INSTANCE_TYPE
- SPARK_BUCKET
- AIRFLOW_BUCKET
- PRIVATE_OUTPUT_BUCKET
- PUBLIC_OUTPUT_BUCKET
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- DB_URI
- DB_USER
- DB_PASSWORD
- SMTP_HOST
- SMTP_USER
- SMTP_PASSWORD
ports:
- "8080:8080"
links:
- rabbitmq:rabbitmq
- worker:worker
- scheduler:scheduler
command: webserver
flower:
image: mozdata/telemetry-airflow
restart: always
environment:
- AIRFLOW_HOME=/usr/local/airflow
- AIRFLOW_ENABLE_AUTH
- AWS_REGION
- EMR_KEY_NAME
- EMR_FLOW_ROLE
- EMR_SERVICE_ROLE
- EMR_INSTANCE_TYPE
- SPARK_BUCKET
- AIRFLOW_BUCKET
- PRIVATE_OUTPUT_BUCKET
- PUBLIC_OUTPUT_BUCKET
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- DB_URI
- DB_USER
- DB_PASSWORD
- SMTP_HOST
- SMTP_USER
- SMTP_PASSWORD
ports:
- "5555:5555"
links:
- rabbitmq:rabbitmq
command: flower
scheduler:
image: mozdata/telemetry-airflow
restart: always
mem_limit: 4294967296
environment:
- AIRFLOW_HOME=/usr/local/airflow
- AIRFLOW_ENABLE_AUTH
- AWS_REGION
- EMR_KEY_NAME
- EMR_FLOW_ROLE
- EMR_SERVICE_ROLE
- EMR_INSTANCE_TYPE
- SPARK_BUCKET
- AIRFLOW_BUCKET
- PRIVATE_OUTPUT_BUCKET
- PUBLIC_OUTPUT_BUCKET
- COMMAND
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- DB_URI
- DB_USER
- DB_PASSWORD
- SMTP_HOST
- SMTP_USER
- SMTP_PASSWORD
links:
- rabbitmq:rabbitmq
worker:
image: mozdata/telemetry-airflow
restart: always
mem_limit: 4294967296
environment:
- AIRFLOW_HOME=/usr/local/airflow
ports:
- "8793:8793"
environment:
- AIRFLOW_HOME=/usr/local/airflow
- AIRFLOW_ENABLE_AUTH
- AWS_REGION
- EMR_KEY_NAME
- EMR_FLOW_ROLE
- EMR_SERVICE_ROLE
- EMR_INSTANCE_TYPE
- SPARK_BUCKET
- AIRFLOW_BUCKET
- PRIVATE_OUTPUT_BUCKET
- PUBLIC_OUTPUT_BUCKET
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
- DB_URI
- DB_USER
- DB_PASSWORD
- SMTP_HOST
- SMTP_USER
- SMTP_PASSWORD
links:
- rabbitmq:rabbitmq
command: worker

Просмотреть файл

@ -1,103 +0,0 @@
- name: provision ECS cluster
hosts: localhost
vars:
ecs_cluster_size: 2 # need at least 2 to update a running service
tasks:
- name: create ECS cluster for telemetry
ecs_cluster:
region: "{{ region }}"
name: "{{ ecs_cluster_name }}"
state: present
- name: create role for container instances
cloudformation:
stack_name: "telemetry-ecs-cloudformation"
state: "present"
region: "{{ region }}"
disable_rollback: true
template: "{{ playbook_dir }}/files/cloudformation.json"
tags:
type: "telemetry"
application: "ecs"
register: cloudformation
- name: create security group for load balancer
ec2_group:
name: "{{ elb_sg_name }}"
description: Telemetry Workflow ELB
region: "{{ region }}"
rules:
- proto: tcp
from_port: 80
to_port: 80
cidr_ip: 0.0.0.0/0
- proto: tcp
from_port: 443
to_port: 443
cidr_ip: 0.0.0.0/0
rules_egress:
- proto: all
from_port: all
to_port: all
cidr_ip: 0.0.0.0/0
vpc_id: "{{ ecs_vpc_id }}"
register: elb_sg
- name: create security group for container instances
ec2_group:
name: "{{ ecs_sg_name }}"
description: telemetry ECS
region: "{{ region }}"
rules:
- proto: tcp
from_port: 22
to_port: 22
cidr_ip: 0.0.0.0/0
- proto: tcp
from_port: 8080
to_port: 8080
group_id: "{{ elb_sg.group_id }}"
rules_egress:
- proto: all
from_port: all
to_port: all
cidr_ip: 0.0.0.0/0
vpc_id: "{{ ecs_vpc_id }}"
register: ecs_sg
- name: create launch configuration for the cluster ASG
ec2_lc:
name: "{{ ecs_asg_lc_name }}"
image_id: ami-a28476c2
region: "{{ region }}"
instance_type: "{{ ecs_instance_type }}"
key_name: "{{ ecs_key_name }}"
assign_public_ip: yes
security_groups:
- "{{ ecs_sg.group_id }}"
instance_profile_name: "{{ cloudformation.stack_outputs.InstanceProfile }}"
user_data: |
#!/bin/bash
echo ECS_CLUSTER={{ ecs_cluster_name }} >> /etc/ecs/ecs.config
register: ec2_lc
- name: create cluster ASG
ec2_asg:
name: "{{ ecs_asg_asg_name }}"
region: "{{ region }}"
launch_config_name: "{{ ec2_lc.name }}"
health_check_period: 60
health_check_type: EC2
replace_all_instances: yes
min_size: 2
max_size: 2
desired_capacity: 2
vpc_zone_identifier:
- "{{ ecs_vpc_subnet_id }}"
tags:
- App: pipeline
- Name: telemetry-ecs-instance
- Type: telemetry-ecs-instance
- Owner: telemetry@mozilla.com
- REAPER_SPARE_ME: true

38
bin/run Executable file
Просмотреть файл

@ -0,0 +1,38 @@
#!/usr/bin/env bash
set -eo pipefail
# default variables
: "${PORT:=8000}"
usage() {
echo "usage: bin/run flower|web|worker|scheduler"
exit 1
}
[ $# -lt 1 ] && usage
case $1 in
flower)
exec newrelic-admin run-program airflow flower
;;
web)
newrelic-admin run-program airflow initdb
newrelic-admin run-program airflow upgradedb
exec newrelic-admin run-program airflow webserver -p ${PORT} --workers 4
;;
worker)
exec newrelic-admin run-program airflow worker
;;
scheduler)
while echo "Running"; do
exec newrelic-admin run-program airflow scheduler -n 5
echo "Scheduler exited with code $?. Respawning.." >&2
date >> /tmp/airflow_scheduler_errors.txt
sleep 1
done
;;
*)
usage
;;
esac

58
circle.yml Normal file
Просмотреть файл

@ -0,0 +1,58 @@
# These environment variables must be set in CircleCI UI
#
# DOCKERHUB_REPO - docker hub repo, format: <username>/<repo>
# DOCKER_EMAIL - login info for docker hub
# DOCKER_USER
# DOCKER_PASS
#
machine:
pre:
- curl -sSL https://s3.amazonaws.com/circle-downloads/install-circleci-docker.sh | bash -s -- 1.10.0
services:
- docker
python:
version: 2.7.12
dependencies:
pre:
- sudo apt-get update
- sudo apt-get install python-dev
- sudo pip install -U docker-compose
override:
- docker info
- docker --version
# create a version.json
- >
printf '{"commit":"%s","version":"%s","source":"https://github.com/%s/%s","build":"%s"}\n'
"$CIRCLE_SHA1"
"$CIRCLE_TAG"
"$CIRCLE_PROJECT_USERNAME"
"$CIRCLE_PROJECT_REPONAME"
"$CIRCLE_BUILD_URL"
> version.json
- cp version.json $CIRCLE_ARTIFACTS
- docker build -t app:build .
# appropriately tag and push the container to dockerhub
deployment:
hub_latest:
branch: "master"
commands:
- "[ ! -z $DOCKERHUB_REPO ]"
- docker login -e $DOCKER_EMAIL -u $DOCKER_USER -p $DOCKER_PASS
- "docker tag app:build ${DOCKERHUB_REPO}:latest"
- "docker push ${DOCKERHUB_REPO}:latest"
hub_releases:
# push all tags
tag: /.*/
commands:
- "[ ! -z $DOCKERHUB_REPO ]"
- docker login -e $DOCKER_EMAIL -u $DOCKER_USER -p $DOCKER_PASS
- "echo ${DOCKERHUB_REPO}:${CIRCLE_TAG}"
- "docker tag app:build ${DOCKERHUB_REPO}:${CIRCLE_TAG}"
- "docker images"
- "docker push ${DOCKERHUB_REPO}:${CIRCLE_TAG}"

53
docker-compose.yml Normal file
Просмотреть файл

@ -0,0 +1,53 @@
version: '2'
services:
db:
image: postgres:9.4
redis:
image: redis:3.2
app:
build: .
restart: always
web:
extends:
service: app
ports:
- "8000:8000"
depends_on:
- app
links:
- db
- redis
command: bin/run web
worker:
extends:
service: app
mem_limit: 4294967296
ports:
- "8793:8793"
links:
- db
- redis
command: bin/run worker
scheduler:
mem_limit: 4294967296
extends:
service: app
links:
- db
- redis
command: bin/run scheduler
flower:
extends:
service: app
ports:
- "5555:5555"
links:
- redis
command: bin/run flower

6
requirements.txt Normal file
Просмотреть файл

@ -0,0 +1,6 @@
boto3
airflow[celery,postgres,hive,hdfs,jdbc,async,password,crypto]
retrying
newrelic
redis
hiredis