Bug 1309622 - Remove Ansible in favor of dockerflow patterns.
This commit is contained in:
Родитель
e676825b5d
Коммит
76abcec3a9
82
Dockerfile
82
Dockerfile
|
@ -1,23 +1,67 @@
|
|||
# AUTHOR: Roberto Vitillo
|
||||
# DESCRIPTION: Mozilla's Airflow container
|
||||
# BUILD: docker build --rm -t mozdata/telemetry-airflow
|
||||
# SOURCE: https://github.com/mozilla/telemetry-airflow
|
||||
FROM python:2-slim
|
||||
MAINTAINER Mozilla Telemetry
|
||||
|
||||
FROM puckel/docker-airflow:1.7.1.3
|
||||
MAINTAINER vitillo
|
||||
ENV PYTHONUNBUFFERED 1
|
||||
|
||||
USER root
|
||||
RUN apt-get update -yqq && \
|
||||
apt-get install -yqq python-pip python-mysqldb && \
|
||||
pip install boto3 && \
|
||||
pip install airflow[async] && \
|
||||
pip install airflow[password] && \
|
||||
pip install retrying
|
||||
ENV AWS_REGION us-west-2
|
||||
# ENV AWS_ACCESS_KEY_ID
|
||||
# ENV AWS_SECRET_ACCESS_KEY
|
||||
|
||||
ADD ansible/files/airflow/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg
|
||||
ADD ansible/files/airflow/entrypoint.sh ${AIRFLOW_HOME}/entrypoint.sh
|
||||
ADD ansible/files/airflow/replace_env.py ${AIRFLOW_HOME}/replace_env.py
|
||||
RUN chown airflow:airflow ${AIRFLOW_HOME}/airflow.cfg
|
||||
ENV SPARK_BUCKET telemetry-spark-emr-2
|
||||
ENV AIRFLOW_BUCKET telemetry-test-bucket
|
||||
ENV PRIVATE_OUTPUT_BUCKET telemetry-test-bucket
|
||||
ENV PUBLIC_OUTPUT_BUCKET telemetry-test-bucket
|
||||
|
||||
USER airflow
|
||||
ADD dags/ /usr/local/airflow/dags/
|
||||
ENV EMR_KEY_NAME mozilla_vitillo
|
||||
ENV EMR_FLOW_ROLE telemetry-spark-cloudformation-TelemetrySparkInstanceProfile-1SATUBVEXG7E3
|
||||
ENV EMR_SERVICE_ROLE EMR_DefaultRole
|
||||
ENV EMR_INSTANCE_TYPE c3.4xlarge
|
||||
|
||||
ENV AIRFLOW_HOME /app
|
||||
ENV AIRFLOW_AUTHENTICATE False
|
||||
ENV AIRFLOW_BROKER_URL redis://redis:6379/0
|
||||
ENV AIRFLOW_RESULT_URL ${AIRFLOW_BROKER_URL}
|
||||
ENV AIRFLOW_FLOWER_PORT 5555
|
||||
ENV AIRFLOW_DATABASE_URL postgres://postgres@db/postgres
|
||||
ENV AIRFLOW_FERNET_KEY "VDRN7HAYDw36BTFbLPibEgJ7q2Dzn-dVGwtbu8iKwUg"
|
||||
ENV AIRFLOW_SECRET_KEY "3Pxs_qg7J6TvRuAIIpu4E2EK_8sHlOYsxZbB-o82mcg"
|
||||
# ENV AIRFLOW_SMTP_HOST
|
||||
# ENV AIRFLOW_SMTP_USER
|
||||
# ENV AIRFLOW_SMTP_PASSWORD
|
||||
ENV AIRFLOW_SMTP_FROM telemetry-alerts@airflow.dev.mozaws.net
|
||||
|
||||
ENV PORT 8000
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
# add a non-privileged user for installing and running the application
|
||||
RUN mkdir /app && \
|
||||
chown 10001:10001 /app && \
|
||||
chmod g+w /app && \
|
||||
groupadd --gid 10001 app && \
|
||||
useradd --uid 10001 --gid 10001 --home /app app
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
apt-transport-https build-essential curl git libpq-dev \
|
||||
postgresql-client gettext sqlite3 libffi-dev libsasl2-dev && \
|
||||
apt-get autoremove -y && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
# Switch to /tmp to install dependencies outside home dir
|
||||
WORKDIR /tmp
|
||||
|
||||
# Install Python dependencies
|
||||
COPY requirements.txt /tmp/
|
||||
RUN pip install --upgrade --no-cache-dir -r requirements.txt
|
||||
|
||||
# Switch back to home directory
|
||||
WORKDIR /app
|
||||
|
||||
COPY . /app
|
||||
|
||||
RUN chown -R 10001:10001 /app && \
|
||||
chmod -R g+w /app
|
||||
|
||||
USER 10001
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
.PHONY: build clean migrate redis-cli run secret shell stop up
|
||||
|
||||
help:
|
||||
@echo "Welcome to the Telemetry Airflow\n"
|
||||
@echo "The list of commands for local development:\n"
|
||||
@echo " build Builds the docker images for the docker-compose setup"
|
||||
@echo " clean Stops and removes all docker containers"
|
||||
@echo " migrate Runs the Django database migrations"
|
||||
@echo " redis-cli Opens a Redis CLI"
|
||||
@echo " run Run a airflow command"
|
||||
@echo " secret Create a secret to be used for a config variable"
|
||||
@echo " shell Opens a Bash shell"
|
||||
@echo " up Runs the whole stack, served under http://localhost:8000/\n"
|
||||
@echo " stop Stops the docker containers"
|
||||
|
||||
build:
|
||||
docker-compose build
|
||||
|
||||
clean: stop
|
||||
docker-compose rm -f
|
||||
|
||||
migrate:
|
||||
docker-compose run web airflow initdb
|
||||
docker-compose run web airflow updatedb
|
||||
|
||||
shell:
|
||||
docker-compose run web bash
|
||||
|
||||
redis-cli:
|
||||
docker-compose run redis redis-cli -h redis
|
||||
|
||||
run:
|
||||
docker-compose run web airflow $(COMMAND)
|
||||
|
||||
secret:
|
||||
@docker-compose run web python -c \
|
||||
"from cryptography.fernet import Fernet; print Fernet.generate_key().decode()"
|
||||
|
||||
stop:
|
||||
docker-compose stop
|
||||
|
||||
up:
|
||||
docker-compose up
|
94
README.md
94
README.md
|
@ -1,25 +1,28 @@
|
|||
# telemetry-airflow
|
||||
Airflow is a platform to programmatically author, schedule and monitor workflows.
|
||||
|
||||
When workflows are defined as code, they become more maintainable, versionable, testable, and collaborative.
|
||||
When workflows are defined as code, they become more maintainable, versionable,
|
||||
testable, and collaborative.
|
||||
|
||||
Use Airflow to author workflows as directed acyclic graphs (DAGs) of tasks. The Airflow scheduler executes your tasks on an array of workers while following the specified dependencies. Rich command line utilities make performing complex surgeries on DAGs a snap. The rich user interface makes it easy to visualize pipelines running in production, monitor progress, and troubleshoot issues when needed.
|
||||
Use Airflow to author workflows as directed acyclic graphs (DAGs) of tasks.
|
||||
The Airflow scheduler executes your tasks on an array of workers while following
|
||||
the specified dependencies. Rich command line utilities make performing complex
|
||||
surgeries on DAGs a snap. The rich user interface makes it easy to visualize
|
||||
pipelines running in production, monitor progress, and troubleshoot issues when
|
||||
needed.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
This app is built and deployed with [docker](https://docs.docker.com/engine/installation/) and [ansible](http://docs.ansible.com/ansible/intro_installation.html).
|
||||
This app is built and deployed with
|
||||
[docker](https://docs.docker.com/) and
|
||||
[docker-compose](https://docs.docker.com/compose/).
|
||||
|
||||
### Build Container
|
||||
|
||||
An Airflow container can be built with
|
||||
An Airflow container can be built with
|
||||
|
||||
```bash
|
||||
docker build -t mozdata/telemetry-airflow .
|
||||
```
|
||||
|
||||
and pushed to Docker hub with
|
||||
```bash
|
||||
docker push mozdata/telemetry-airflow
|
||||
make build
|
||||
```
|
||||
|
||||
### Testing
|
||||
|
@ -27,58 +30,67 @@ docker push mozdata/telemetry-airflow
|
|||
A single task, e.g. `spark`, of an Airflow dag, e.g. `example`, can be run with an execution date, e.g. `2016-01-01`, in the `dev` environment with:
|
||||
```bash
|
||||
AWS_SECRET_ACCESS_KEY=... AWS_ACCESS_KEY_ID=... \
|
||||
ansible-playbook ansible/deploy_local.yml -e '@ansible/envs/dev.yml' -e "command='test example spark 20160101'"
|
||||
make run COMMAND="test example spark 20160101"
|
||||
```
|
||||
|
||||
The container will run the desired task to completion (or failure). Note that if the container is stopped during the execution of a task, the task will
|
||||
be aborted. In the example's case, the Spark job will be terminated.
|
||||
The container will run the desired task to completion (or failure).
|
||||
Note that if the container is stopped during the execution of a task,
|
||||
the task will be aborted. In the example's case, the Spark job will be
|
||||
terminated.
|
||||
|
||||
The logs of the task can be inspected in real-time with:
|
||||
```bash
|
||||
docker logs -f files_scheduler_1
|
||||
docker logs -f telemetryairflow_scheduler_1
|
||||
```
|
||||
|
||||
### Local Deployment
|
||||
|
||||
Assuming you are on OS X and using docker toolbox, first create a docker machine with a sufficient amount of memory with e.g.:
|
||||
```bash
|
||||
docker-machine create -d virtualbox --virtualbox-memory 4096 default
|
||||
```
|
||||
|
||||
If you're using OS X and the new Docker for OS X, start the docker service, click the docker icon in the tray, click on preferences and change the available memory to 4GB.
|
||||
Assuming you're using macOS and Docker for macOS, start the docker service,
|
||||
click the docker icon in the menu bar, click on preferences and change the
|
||||
available memory to 4GB.
|
||||
|
||||
To deploy the Airflow container on the docker engine, with its required dependencies, run:
|
||||
```bash
|
||||
ansible-playbook ansible/deploy_local.yml -e '@ansible/envs/dev.yml'
|
||||
echo "Airflow web console should now be running locally at http://$(docker-machine ip default):8080"
|
||||
make up
|
||||
```
|
||||
|
||||
If you get a message saying "Couldn't connect to Docker daemon - you might need to run `docker-machine start default`.", try the following:
|
||||
```bash
|
||||
docker-machine start default
|
||||
eval "$(docker-machine env default)"
|
||||
```
|
||||
You can now connect to your local Airflow web console at
|
||||
`http://localhost:8000/`.
|
||||
|
||||
You can now connect to your local Airflow web console with a URL like `http://192.168.99.100:8080` (see above for how to identify the exact IP address).
|
||||
### Production Setup
|
||||
|
||||
### Remote Deployment
|
||||
When deploying to production make sure to set up the following environment
|
||||
variables:
|
||||
|
||||
In order to deploy Airflow to e.g. the `stage` environment, an ECS cluster has to be created first with at least one container instance:
|
||||
```bash
|
||||
ansible-playbook ansible/provision_aws.yml -e '@ansible/envs/stage.yml'
|
||||
```
|
||||
- `AWS_ACCESS_KEY_ID` -- The AWS access key ID to spin up the Spark clusters
|
||||
- `AWS_SECRET_ACCESS_KEY` -- The AWS secret access key
|
||||
- `SPARK_BUCKET` -- The AWS S3 bucket where Spark related files are stored,
|
||||
e.g. `telemetry-spark-emr-2`
|
||||
- `AIRFLOW_BUCKET` -- The AWS S3 bucket where airflow specific files are stored,
|
||||
e.g. `telemetry-airflow`
|
||||
- `PUBLIC_OUTPUT_BUCKET` -- The AWS S3 bucket where public job results are
|
||||
stored in, e.g. `telemetry-public-analysis-2`
|
||||
- `PRIVATE_OUTPUT_BUCKET` -- The AWS S3 bucket where private job results are
|
||||
stored in, e.g. `telemetry-parquet`
|
||||
- `AIRFLOW_DATABASE_URL` -- The connection URI for the Airflow database, e.g.
|
||||
`postgres://username:password@hostname:port/password`
|
||||
- `AIRFLOW_BROKER_URL` -- The connection URI for the Airflow worker queue, e.g.
|
||||
`redis://hostname:6379/0`
|
||||
|
||||
Once the ECS cluster is up and running, Airflow can be (re)deployed with:
|
||||
```bash
|
||||
ansible-playbook ansible/deploy_aws.yml -e '@ansible/envs/stage.yml'
|
||||
```
|
||||
Also, please set
|
||||
|
||||
- `AIRFLOW_SECRET_KEY` -- A secret key for Airflow's Flask based webserver
|
||||
- `AIRFLOW_FERNET_KEY` -- A secret key to save connection passwords in the db
|
||||
|
||||
Both values should be set by using the cryptography module's fernet tool that
|
||||
we've wrapped in a docker-compose call:
|
||||
|
||||
make secret
|
||||
|
||||
Run this for each key config variable, and **don't use the same for both!**
|
||||
|
||||
### Debugging
|
||||
|
||||
If you do a remote deploy and the changes are not reflected in the [live site](http://workflow.telemetry.mozilla.org), you may have to remove ECS task definitions from the AWS Console and try again. To do this, go to the ECS page, click Task Definitions, click ecscompose-telemetry-airflow, then remove all the listed task definitions. After that, re-deploy and things should work as expected.
|
||||
|
||||
If you get an error about `ecs-cli` missing, follow [these steps](http://docs.aws.amazon.com/AmazonECS/latest/developerguide/ECS_CLI_installation.html) to install it.
|
||||
|
||||
Some useful docker tricks for development and debugging:
|
||||
|
||||
```bash
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
[core]
|
||||
# The home folder for airflow, default is ~/airflow
|
||||
airflow_home = /usr/local/airflow
|
||||
airflow_home = $AIRFLOW_HOME
|
||||
|
||||
# The folder where your airflow pipelines live, most likely a
|
||||
# subfolder in a code repository
|
||||
dags_folder = /usr/local/airflow/dags
|
||||
dags_folder = $AIRFLOW_HOME/dags
|
||||
|
||||
# The folder where airflow should store its log files. This location
|
||||
base_log_folder = /usr/local/airflow/logs
|
||||
base_log_folder = $AIRFLOW_HOME/logs
|
||||
|
||||
# The executor class that airflow should use. Choices include
|
||||
# SequentialExecutor, LocalExecutor, CeleryExecutor
|
||||
|
@ -16,7 +16,7 @@ executor = CeleryExecutor
|
|||
# The SqlAlchemy connection string to the metadata database.
|
||||
# SqlAlchemy supports many different database engine, more information
|
||||
# their website
|
||||
sql_alchemy_conn = mysql://${DB_USER}:${DB_PASSWORD}@${DB_URI}/airflow
|
||||
sql_alchemy_conn = $AIRFLOW_DATABASE_URL
|
||||
|
||||
# The SqlAlchemy pool size is the maximum number of database connections
|
||||
# in the pool.
|
||||
|
@ -47,10 +47,10 @@ max_active_runs_per_dag = 5
|
|||
load_examples = False
|
||||
|
||||
# Where your Airflow plugins are stored
|
||||
plugins_folder = /usr/local/airflow/plugins
|
||||
plugins_folder = $AIRFLOW_HOME/plugins
|
||||
|
||||
# Secret key to save connection passwords in the db
|
||||
fernet_key = secret
|
||||
fernet_key = $AIRFLOW_FERNET_KEY
|
||||
|
||||
# Whether to disable pickling dags
|
||||
donot_pickle = False
|
||||
|
@ -62,16 +62,16 @@ dagbag_import_timeout = 30
|
|||
# The base url of your website as airflow cannot guess what domain or
|
||||
# cname you are using. This is use in automated emails that
|
||||
# airflow sends to point links to the right web server
|
||||
base_url = https://workflow.telemetry.mozilla.org
|
||||
base_url = $URL
|
||||
|
||||
# The ip specified when starting the web server
|
||||
web_server_host = 0.0.0.0
|
||||
|
||||
# The port on which to run the web server
|
||||
web_server_port = 8080
|
||||
web_server_port = $PORT
|
||||
|
||||
# Secret key used to run your flask app
|
||||
secret_key = temporary_key
|
||||
secret_key = $AIRFLOW_SECRET_KEY
|
||||
|
||||
# Number of workers to run the Gunicorn web server
|
||||
workers = 4
|
||||
|
@ -84,15 +84,12 @@ worker_class = gevent
|
|||
expose_config = true
|
||||
|
||||
# Set to true to turn on authentication : http://pythonhosted.org/airflow/installation.html#web-authentication
|
||||
authenticate = False
|
||||
authenticate = $AIRFLOW_AUTHENTICATE
|
||||
auth_backend = airflow.contrib.auth.backends.password_auth
|
||||
|
||||
# Filter the list of dags by owner name (requires authentication to be enabled)
|
||||
filter_by_owner = False
|
||||
|
||||
# Authentication
|
||||
authenticate = ${AIRFLOW_ENABLE_AUTH}
|
||||
auth_backend = airflow.contrib.auth.backends.password_auth
|
||||
|
||||
[email]
|
||||
email_backend = airflow.utils.email.send_email_smtp
|
||||
|
||||
|
@ -102,11 +99,11 @@ email_backend = airflow.utils.email.send_email_smtp
|
|||
# server here
|
||||
smtp_starttls = True
|
||||
smtp_ssl = False
|
||||
smtp_host = ${SMTP_HOST}
|
||||
smtp_host = $AIRFLOW_SMTP_HOST
|
||||
smtp_port = 587
|
||||
smtp_user = ${SMTP_USER}
|
||||
smtp_password = ${SMTP_PASSWORD}
|
||||
smtp_mail_from = telemetry-alerts@airflow.dev.mozaws.net
|
||||
smtp_user = $AIRFLOW_SMTP_USER
|
||||
smtp_password = $AIRFLOW_SMTP_PASSWORD
|
||||
smtp_mail_from = $AIRFLOW_SMTP_FROM
|
||||
|
||||
[celery]
|
||||
# This section only applies if you are using the CeleryExecutor in
|
||||
|
@ -131,14 +128,14 @@ worker_log_server_port = 8793
|
|||
# The Celery broker URL. Celery supports RabbitMQ, Redis and experimentally
|
||||
# a sqlalchemy database. Refer to the Celery documentation for more
|
||||
# information.
|
||||
broker_url = amqp://airflow:airflow@rabbitmq:5672/airflow
|
||||
broker_url = $AIRFLOW_BROKER_URL
|
||||
|
||||
# Another key Celery setting
|
||||
celery_result_backend = amqp://airflow:airflow@rabbitmq:5672/airflow
|
||||
celery_result_backend = $AIRFLOW_RESULT_URL
|
||||
|
||||
# Celery Flower is a sweet UI for Celery. Airflow has a shortcut to start
|
||||
# it `airflow flower`. This defines the port that Celery Flower runs on
|
||||
flower_port = 5555
|
||||
flower_port = $AIRFLOW_FLOWER_PORT
|
||||
|
||||
# Default queue that tasks get assigned to and that worker listen on.
|
||||
default_queue = default
|
|
@ -1,3 +1,4 @@
|
|||
# to be copied to $AIRFLOW_BUCKET/steps/airflow.sh
|
||||
set -o verbose
|
||||
|
||||
HOME=/home/hadoop
|
|
@ -1,85 +0,0 @@
|
|||
- name: provision aws resources
|
||||
hosts: localhost
|
||||
vars:
|
||||
elb_name: telemetry-ecs
|
||||
command: scheduler
|
||||
|
||||
tasks:
|
||||
- name: create bucket
|
||||
s3: bucket={{airflow_bucket}} region={{region}} mode=create
|
||||
|
||||
- name: copy EMR bootstrap script
|
||||
s3: bucket={{airflow_bucket}} region={{region}} object=steps/airflow.sh src={{ playbook_dir }}/files/spark/airflow.sh mode=put
|
||||
|
||||
- name: create load balancer
|
||||
ec2_elb_lb:
|
||||
name: "{{ elb_name }}"
|
||||
region: "{{ region }}"
|
||||
state: present
|
||||
security_group_names: "{{ elb_sg_name }}"
|
||||
subnets:
|
||||
- "{{ ecs_vpc_subnet_id }}"
|
||||
listeners:
|
||||
- protocol: https
|
||||
load_balancer_port: 443
|
||||
instance_protocol: http
|
||||
instance_port: 8080
|
||||
ssl_certificate_id: "{{ ssl_cert_id }}"
|
||||
- protocol: http
|
||||
load_balancer_port: 80
|
||||
instance_port: 8080
|
||||
health_check:
|
||||
ping_protocol: tcp
|
||||
ping_port: 8080
|
||||
response_timeout: 50 # seconds
|
||||
interval: 60 # seconds
|
||||
unhealthy_threshold: 2
|
||||
healthy_threshold: 2
|
||||
|
||||
- name: fetch SMTP credentials
|
||||
s3: bucket={{metadata_bucket}} region={{region}} object=smtp_ses/credentials.json mode=getstr
|
||||
register: smtp_credentials_json
|
||||
|
||||
- set_fact:
|
||||
smtp_credentials: "{{ smtp_credentials_json.contents|from_json }}"
|
||||
|
||||
- name: create task definition
|
||||
shell: "{{ item }}"
|
||||
with_items:
|
||||
- ecs-cli configure --cluster {{ ecs_cluster_name }}
|
||||
- ecs-cli compose --project-name telemetry-airflow --file {{ playbook_dir }}/files/docker-compose.yml create
|
||||
environment:
|
||||
AWS_REGION: "{{ region }}"
|
||||
EMR_KEY_NAME: "{{ emr_key_name }}"
|
||||
EMR_FLOW_ROLE: "{{ emr_flow_role }}"
|
||||
EMR_SERVICE_ROLE: "{{ emr_service_role }}"
|
||||
EMR_INSTANCE_TYPE: "{{ emr_instance_type }}"
|
||||
SPARK_BUCKET: "{{ spark_bucket }}"
|
||||
DB_URI: "{{ db_uri }}"
|
||||
DB_USER: "{{ db_user }}"
|
||||
DB_PASSWORD: "{{ db_password }}"
|
||||
AIRFLOW_BUCKET: "{{ airflow_bucket }}"
|
||||
AIRFLOW_ENABLE_AUTH: True
|
||||
PRIVATE_OUTPUT_BUCKET: "{{ private_output_bucket }}"
|
||||
PUBLIC_OUTPUT_BUCKET: "{{ public_output_bucket }}"
|
||||
SMTP_HOST: "{{ smtp_credentials.host }}"
|
||||
SMTP_USER: "{{ smtp_credentials.user }}"
|
||||
SMTP_PASSWORD: "{{ smtp_credentials.password }}"
|
||||
# Bug 1286825: Tell the scheduler to exit after 5 runs.
|
||||
COMMAND: scheduler -n 5 # https://github.com/aws/amazon-ecs-cli/issues/28
|
||||
|
||||
# TODO: create a new module capable of updating the service with the new definition or ensure that the
|
||||
# revision of a new task definition is incremental (for some reason it's not always the case...)
|
||||
- name: update service
|
||||
ecs_service:
|
||||
region: "{{ region }}"
|
||||
name: telemetry-airflow
|
||||
cluster: "{{ ecs_cluster_name }}"
|
||||
desired_count: 1
|
||||
state: present
|
||||
role: "{{ ecs_role }}"
|
||||
load_balancers:
|
||||
- loadBalancerName: "{{ elb_name }}"
|
||||
containerName: webserver
|
||||
containerPort: 8080
|
||||
task_definition: ecscompose-telemetry-airflow
|
|
@ -1,34 +0,0 @@
|
|||
- name: deploy Airflow containers locally
|
||||
hosts: localhost
|
||||
vars:
|
||||
command: scheduler -n 5 # See bug 1286825
|
||||
compose_conf:
|
||||
- "{{ playbook_dir }}/files/docker-compose.yml"
|
||||
- "{{ playbook_dir }}/files/docker-compose-local.yml"
|
||||
|
||||
tasks:
|
||||
- name: launch Airflow containers
|
||||
shell: "{{ item }}"
|
||||
with_items:
|
||||
- docker-compose -f {{ compose_conf[0] }} -f {{ compose_conf[1] }} down -v --remove-orphans
|
||||
- docker-compose -f {{ compose_conf[0] }} -f {{ compose_conf[1] }} up -d
|
||||
environment:
|
||||
AWS_REGION: "{{ region }}"
|
||||
EMR_KEY_NAME: "{{ emr_key_name }}"
|
||||
EMR_FLOW_ROLE: "{{ emr_flow_role }}"
|
||||
EMR_SERVICE_ROLE: "{{ emr_service_role }}"
|
||||
EMR_INSTANCE_TYPE: "{{ emr_instance_type }}"
|
||||
SPARK_BUCKET: "{{ spark_bucket }}"
|
||||
AWS_ACCESS_KEY_ID: "{{ lookup('env', 'AWS_ACCESS_KEY_ID') }}"
|
||||
AWS_SECRET_ACCESS_KEY: "{{ lookup('env', 'AWS_SECRET_ACCESS_KEY') }}"
|
||||
DB_URI: "{{ db_uri }}"
|
||||
DB_USER: "{{ db_user }}"
|
||||
DB_PASSWORD: "{{ db_password }}"
|
||||
AIRFLOW_BUCKET: "{{ airflow_bucket }}"
|
||||
AIRFLOW_ENABLE_AUTH: False
|
||||
PRIVATE_OUTPUT_BUCKET: "{{ private_output_bucket }}"
|
||||
PUBLIC_OUTPUT_BUCKET: "{{ public_output_bucket }}"
|
||||
SMTP_HOST: "localhost"
|
||||
SMTP_USER: "dummy"
|
||||
SMTP_PASSWORD: "dummy_pass"
|
||||
COMMAND: "{{ command }}"
|
|
@ -1,16 +0,0 @@
|
|||
region: us-west-2
|
||||
spark_bucket: telemetry-spark-emr-2
|
||||
airflow_bucket: telemetry-test-bucket
|
||||
public_output_bucket: telemetry-test-bucket
|
||||
private_output_bucket: telemetry-test-bucket
|
||||
|
||||
emr_key_name: mozilla_vitillo
|
||||
emr_flow_role: telemetry-spark-cloudformation-TelemetrySparkInstanceProfile-1SATUBVEXG7E3
|
||||
emr_service_role: EMR_DefaultRole
|
||||
emr_instance_type: c3.4xlarge
|
||||
|
||||
db_uri: mysql
|
||||
db_user: airflow
|
||||
db_password: airflow
|
||||
|
||||
airflow_enable_auth: false
|
|
@ -1 +0,0 @@
|
|||
# TODO: configure production environment
|
|
@ -1,30 +0,0 @@
|
|||
region: us-west-2
|
||||
spark_bucket: telemetry-spark-emr-2
|
||||
airflow_bucket: telemetry-airflow
|
||||
public_output_bucket: telemetry-public-analysis-2
|
||||
private_output_bucket: telemetry-parquet
|
||||
|
||||
emr_key_name: mozilla_vitillo
|
||||
emr_flow_role: telemetry-spark-cloudformation-TelemetrySparkInstanceProfile-1SATUBVEXG7E3
|
||||
emr_service_role: EMR_DefaultRole
|
||||
emr_instance_type: c3.4xlarge
|
||||
|
||||
ecs_sg_name: telemetry-ecs
|
||||
ecs_key_name: mozilla_vitillo
|
||||
ecs_vpc_id: vpc-4283d227
|
||||
ecs_vpc_subnet_id: subnet-125a7c77
|
||||
ecs_cluster_name: telemetry
|
||||
ecs_instance_type: c4.2xlarge
|
||||
ecs_role: arn:aws:iam::927034868273:role/telemetry-ecs-cloudformation-TelemetryECSRole-1UAYK6XNB6DG0
|
||||
ecs_asg_lc_name: telemetry-ecs-lc
|
||||
ecs_asg_asg_name: telemetry-ecs-asg
|
||||
|
||||
elb_name: telemetry-ecs
|
||||
elb_sg_name: telemetry-workflow-elb
|
||||
ssl_cert_id: arn:aws:iam::927034868273:server-certificate/cloudfront/exp20180829_cf_star_telemetry_mozilla_org
|
||||
|
||||
db_uri: telemetry-ecs-airflow.cppmil15uwvg.us-west-2.rds.amazonaws.com
|
||||
db_user: airflow
|
||||
db_password: telemetry-airflow # DB is not accessible from the outside world
|
||||
|
||||
metadata_bucket: net-mozaws-prod-us-west-2-pipeline-metadata
|
|
@ -1,58 +0,0 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
CMD="airflow"
|
||||
TRY_LOOP="10"
|
||||
MYSQL_HOST=${DB_URI:-mysql}
|
||||
MYSQL_PORT="3306"
|
||||
RABBITMQ_HOST="rabbitmq"
|
||||
RABBITMQ_CREDS="airflow:airflow"
|
||||
|
||||
# Wait for RabbitMQ
|
||||
j=0
|
||||
while ! curl -sI -u $RABBITMQ_CREDS http://$RABBITMQ_HOST:15672/api/whoami |grep '200 OK'; do
|
||||
j=`expr $j + 1`
|
||||
if [ $j -ge $TRY_LOOP ]; then
|
||||
echo "$(date) - $RABBITMQ_HOST still not reachable, giving up"
|
||||
exit 1
|
||||
fi
|
||||
echo "$(date) - waiting for RabbitMQ... $j/$TRY_LOOP"
|
||||
sleep 5
|
||||
done
|
||||
|
||||
# Generate Fernet key for replacement below
|
||||
export FERNET_KEY=$(python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print FERNET_KEY")
|
||||
|
||||
# Replace environment vars in airflow config file.
|
||||
python $AIRFLOW_HOME/replace_env.py $AIRFLOW_HOME/airflow.cfg
|
||||
|
||||
i=0
|
||||
while ! nc $MYSQL_HOST $MYSQL_PORT >/dev/null 2>&1 < /dev/null; do
|
||||
i=`expr $i + 1`
|
||||
if [ $i -ge $TRY_LOOP ]; then
|
||||
echo "$(date) - ${MYSQL_HOST}:${MYSQL_PORT} still not reachable, giving up"
|
||||
exit 1
|
||||
fi
|
||||
echo "$(date) - waiting for ${MYSQL_HOST}:${MYSQL_PORT}... $i/$TRY_LOOP"
|
||||
sleep 5
|
||||
done
|
||||
|
||||
if [ "$1" = "webserver" ]; then
|
||||
echo "Initialize database..."
|
||||
$CMD initdb
|
||||
$CMD upgradedb
|
||||
fi
|
||||
|
||||
sleep 5
|
||||
|
||||
if [[ "$COMMAND" == "scheduler"* ]]; then
|
||||
# Work around scheduler hangs, see bug 1286825.
|
||||
# Run the scheduler inside a retry loop.
|
||||
while echo "Running"; do
|
||||
eval $CMD "${@:-$COMMAND}"
|
||||
echo "Scheduler exited with code $?. Respawning.." >&2
|
||||
date >> /tmp/airflow_scheduler_errors.txt
|
||||
sleep 1
|
||||
done
|
||||
else
|
||||
eval $CMD "${@:-$COMMAND}"
|
||||
fi
|
|
@ -1,24 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
# Replace environment variables in the given file with their values.
|
||||
# Usage: python replace_env.py input_file [ output_file ]
|
||||
import os
|
||||
import sys
|
||||
|
||||
nargs = len(sys.argv)
|
||||
if nargs < 2 or nargs > 3:
|
||||
print "Usage: python {} input_file [ output_file ]".format(sys.argv[0])
|
||||
sys.exit(1)
|
||||
|
||||
input_file = sys.argv[1]
|
||||
if nargs > 2:
|
||||
output_file = sys.argv[2]
|
||||
else:
|
||||
output_file = input_file
|
||||
|
||||
with open(input_file, "r") as f:
|
||||
data = f.read()
|
||||
|
||||
expanded = os.path.expandvars(data)
|
||||
|
||||
with open(output_file, "w") as f:
|
||||
f.write(expanded)
|
|
@ -1,94 +0,0 @@
|
|||
{
|
||||
"AWSTemplateFormatVersion": "2010-09-09",
|
||||
"Description": "Cloudformation template for Telemetry's ECS",
|
||||
"Resources": {
|
||||
"TelemetryECSRole": {
|
||||
"Type": "AWS::IAM::Role",
|
||||
"Properties": {
|
||||
"AssumeRolePolicyDocument": {
|
||||
"Statement": [
|
||||
{
|
||||
"Sid": "",
|
||||
"Effect": "Allow",
|
||||
"Principal": {
|
||||
"Service": "ec2.amazonaws.com"
|
||||
},
|
||||
"Action": "sts:AssumeRole"
|
||||
},
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Principal": {
|
||||
"Service": [
|
||||
"ecs.amazonaws.com"
|
||||
]
|
||||
},
|
||||
"Action": [
|
||||
"sts:AssumeRole"
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"Policies": [
|
||||
{
|
||||
"PolicyName": "telemetry-ecs",
|
||||
"PolicyDocument": {
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Sid": "Stmt1430246788000",
|
||||
"Effect": "Allow",
|
||||
"Action": [
|
||||
"elasticmapreduce:Describe*",
|
||||
"elasticmapreduce:RunJobFlow",
|
||||
"elasticmapreduce:TerminateJobFlows",
|
||||
"ecs:CreateCluster",
|
||||
"ecs:DeregisterContainerInstance",
|
||||
"ecs:DiscoverPollEndpoint",
|
||||
"ecs:Poll",
|
||||
"ecs:RegisterContainerInstance",
|
||||
"ecs:StartTelemetrySession",
|
||||
"ecs:Submit*",
|
||||
"ecr:GetAuthorizationToken",
|
||||
"ecr:BatchCheckLayerAvailability",
|
||||
"ecr:GetDownloadUrlForLayer",
|
||||
"ecr:BatchGetImage",
|
||||
"iam:PassRole",
|
||||
"logs:CreateLogStream",
|
||||
"logs:PutLogEvents",
|
||||
"elasticloadbalancing:Describe*",
|
||||
"elasticloadbalancing:DeregisterInstancesFromLoadBalancer",
|
||||
"elasticloadbalancing:RegisterInstancesWithLoadBalancer",
|
||||
"ec2:Describe*",
|
||||
"ec2:AuthorizeSecurityGroupIngress"
|
||||
],
|
||||
"Resource": [
|
||||
"*"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"TelemetryECSInstanceProfile": {
|
||||
"Type": "AWS::IAM::InstanceProfile",
|
||||
"Properties": {
|
||||
"Path": "/",
|
||||
"Roles": [
|
||||
{
|
||||
"Ref": "TelemetryECSRole"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"Outputs": {
|
||||
"InstanceProfile": {
|
||||
"Description": "ECS instance profile",
|
||||
"Value": {
|
||||
"Ref": "TelemetryECSInstanceProfile"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
mysql:
|
||||
image: mysql
|
||||
restart: always
|
||||
mem_limit: 2147483648
|
||||
ports:
|
||||
- "3306:3306"
|
||||
environment:
|
||||
- MYSQL_RANDOM_ROOT_PASSWORD=true
|
||||
- MYSQL_USER=airflow
|
||||
- MYSQL_PASSWORD=airflow
|
||||
- MYSQL_DATABASE=airflow
|
||||
|
||||
webserver:
|
||||
links:
|
||||
- mysql:mysql
|
||||
|
||||
scheduler:
|
||||
links:
|
||||
- mysql:mysql
|
||||
|
||||
worker:
|
||||
links:
|
||||
- mysql:mysql
|
||||
|
||||
flower:
|
||||
links:
|
||||
- mysql:mysql
|
|
@ -1,130 +0,0 @@
|
|||
rabbitmq:
|
||||
image: rabbitmq:3-management
|
||||
restart: always
|
||||
ports:
|
||||
- "15672:15672"
|
||||
- "5672:5672"
|
||||
environment:
|
||||
- RABBITMQ_DEFAULT_USER=airflow
|
||||
- RABBITMQ_DEFAULT_PASS=airflow
|
||||
- RABBITMQ_DEFAULT_VHOST=airflow
|
||||
|
||||
webserver:
|
||||
image: mozdata/telemetry-airflow
|
||||
restart: always
|
||||
environment:
|
||||
- AIRFLOW_HOME=/usr/local/airflow
|
||||
- AIRFLOW_ENABLE_AUTH
|
||||
- AWS_REGION
|
||||
- EMR_KEY_NAME
|
||||
- EMR_FLOW_ROLE
|
||||
- EMR_SERVICE_ROLE
|
||||
- EMR_INSTANCE_TYPE
|
||||
- SPARK_BUCKET
|
||||
- AIRFLOW_BUCKET
|
||||
- PRIVATE_OUTPUT_BUCKET
|
||||
- PUBLIC_OUTPUT_BUCKET
|
||||
- AWS_ACCESS_KEY_ID
|
||||
- AWS_SECRET_ACCESS_KEY
|
||||
- DB_URI
|
||||
- DB_USER
|
||||
- DB_PASSWORD
|
||||
- SMTP_HOST
|
||||
- SMTP_USER
|
||||
- SMTP_PASSWORD
|
||||
ports:
|
||||
- "8080:8080"
|
||||
links:
|
||||
- rabbitmq:rabbitmq
|
||||
- worker:worker
|
||||
- scheduler:scheduler
|
||||
command: webserver
|
||||
|
||||
flower:
|
||||
image: mozdata/telemetry-airflow
|
||||
restart: always
|
||||
environment:
|
||||
- AIRFLOW_HOME=/usr/local/airflow
|
||||
- AIRFLOW_ENABLE_AUTH
|
||||
- AWS_REGION
|
||||
- EMR_KEY_NAME
|
||||
- EMR_FLOW_ROLE
|
||||
- EMR_SERVICE_ROLE
|
||||
- EMR_INSTANCE_TYPE
|
||||
- SPARK_BUCKET
|
||||
- AIRFLOW_BUCKET
|
||||
- PRIVATE_OUTPUT_BUCKET
|
||||
- PUBLIC_OUTPUT_BUCKET
|
||||
- AWS_ACCESS_KEY_ID
|
||||
- AWS_SECRET_ACCESS_KEY
|
||||
- DB_URI
|
||||
- DB_USER
|
||||
- DB_PASSWORD
|
||||
- SMTP_HOST
|
||||
- SMTP_USER
|
||||
- SMTP_PASSWORD
|
||||
ports:
|
||||
- "5555:5555"
|
||||
links:
|
||||
- rabbitmq:rabbitmq
|
||||
command: flower
|
||||
|
||||
scheduler:
|
||||
image: mozdata/telemetry-airflow
|
||||
restart: always
|
||||
mem_limit: 4294967296
|
||||
environment:
|
||||
- AIRFLOW_HOME=/usr/local/airflow
|
||||
- AIRFLOW_ENABLE_AUTH
|
||||
- AWS_REGION
|
||||
- EMR_KEY_NAME
|
||||
- EMR_FLOW_ROLE
|
||||
- EMR_SERVICE_ROLE
|
||||
- EMR_INSTANCE_TYPE
|
||||
- SPARK_BUCKET
|
||||
- AIRFLOW_BUCKET
|
||||
- PRIVATE_OUTPUT_BUCKET
|
||||
- PUBLIC_OUTPUT_BUCKET
|
||||
- COMMAND
|
||||
- AWS_ACCESS_KEY_ID
|
||||
- AWS_SECRET_ACCESS_KEY
|
||||
- DB_URI
|
||||
- DB_USER
|
||||
- DB_PASSWORD
|
||||
- SMTP_HOST
|
||||
- SMTP_USER
|
||||
- SMTP_PASSWORD
|
||||
links:
|
||||
- rabbitmq:rabbitmq
|
||||
|
||||
worker:
|
||||
image: mozdata/telemetry-airflow
|
||||
restart: always
|
||||
mem_limit: 4294967296
|
||||
environment:
|
||||
- AIRFLOW_HOME=/usr/local/airflow
|
||||
ports:
|
||||
- "8793:8793"
|
||||
environment:
|
||||
- AIRFLOW_HOME=/usr/local/airflow
|
||||
- AIRFLOW_ENABLE_AUTH
|
||||
- AWS_REGION
|
||||
- EMR_KEY_NAME
|
||||
- EMR_FLOW_ROLE
|
||||
- EMR_SERVICE_ROLE
|
||||
- EMR_INSTANCE_TYPE
|
||||
- SPARK_BUCKET
|
||||
- AIRFLOW_BUCKET
|
||||
- PRIVATE_OUTPUT_BUCKET
|
||||
- PUBLIC_OUTPUT_BUCKET
|
||||
- AWS_ACCESS_KEY_ID
|
||||
- AWS_SECRET_ACCESS_KEY
|
||||
- DB_URI
|
||||
- DB_USER
|
||||
- DB_PASSWORD
|
||||
- SMTP_HOST
|
||||
- SMTP_USER
|
||||
- SMTP_PASSWORD
|
||||
links:
|
||||
- rabbitmq:rabbitmq
|
||||
command: worker
|
|
@ -1,103 +0,0 @@
|
|||
- name: provision ECS cluster
|
||||
hosts: localhost
|
||||
vars:
|
||||
ecs_cluster_size: 2 # need at least 2 to update a running service
|
||||
|
||||
tasks:
|
||||
- name: create ECS cluster for telemetry
|
||||
ecs_cluster:
|
||||
region: "{{ region }}"
|
||||
name: "{{ ecs_cluster_name }}"
|
||||
state: present
|
||||
|
||||
- name: create role for container instances
|
||||
cloudformation:
|
||||
stack_name: "telemetry-ecs-cloudformation"
|
||||
state: "present"
|
||||
region: "{{ region }}"
|
||||
disable_rollback: true
|
||||
template: "{{ playbook_dir }}/files/cloudformation.json"
|
||||
tags:
|
||||
type: "telemetry"
|
||||
application: "ecs"
|
||||
register: cloudformation
|
||||
|
||||
- name: create security group for load balancer
|
||||
ec2_group:
|
||||
name: "{{ elb_sg_name }}"
|
||||
description: Telemetry Workflow ELB
|
||||
region: "{{ region }}"
|
||||
rules:
|
||||
- proto: tcp
|
||||
from_port: 80
|
||||
to_port: 80
|
||||
cidr_ip: 0.0.0.0/0
|
||||
- proto: tcp
|
||||
from_port: 443
|
||||
to_port: 443
|
||||
cidr_ip: 0.0.0.0/0
|
||||
rules_egress:
|
||||
- proto: all
|
||||
from_port: all
|
||||
to_port: all
|
||||
cidr_ip: 0.0.0.0/0
|
||||
vpc_id: "{{ ecs_vpc_id }}"
|
||||
register: elb_sg
|
||||
|
||||
- name: create security group for container instances
|
||||
ec2_group:
|
||||
name: "{{ ecs_sg_name }}"
|
||||
description: telemetry ECS
|
||||
region: "{{ region }}"
|
||||
rules:
|
||||
- proto: tcp
|
||||
from_port: 22
|
||||
to_port: 22
|
||||
cidr_ip: 0.0.0.0/0
|
||||
- proto: tcp
|
||||
from_port: 8080
|
||||
to_port: 8080
|
||||
group_id: "{{ elb_sg.group_id }}"
|
||||
rules_egress:
|
||||
- proto: all
|
||||
from_port: all
|
||||
to_port: all
|
||||
cidr_ip: 0.0.0.0/0
|
||||
vpc_id: "{{ ecs_vpc_id }}"
|
||||
register: ecs_sg
|
||||
|
||||
- name: create launch configuration for the cluster ASG
|
||||
ec2_lc:
|
||||
name: "{{ ecs_asg_lc_name }}"
|
||||
image_id: ami-a28476c2
|
||||
region: "{{ region }}"
|
||||
instance_type: "{{ ecs_instance_type }}"
|
||||
key_name: "{{ ecs_key_name }}"
|
||||
assign_public_ip: yes
|
||||
security_groups:
|
||||
- "{{ ecs_sg.group_id }}"
|
||||
instance_profile_name: "{{ cloudformation.stack_outputs.InstanceProfile }}"
|
||||
user_data: |
|
||||
#!/bin/bash
|
||||
echo ECS_CLUSTER={{ ecs_cluster_name }} >> /etc/ecs/ecs.config
|
||||
register: ec2_lc
|
||||
|
||||
- name: create cluster ASG
|
||||
ec2_asg:
|
||||
name: "{{ ecs_asg_asg_name }}"
|
||||
region: "{{ region }}"
|
||||
launch_config_name: "{{ ec2_lc.name }}"
|
||||
health_check_period: 60
|
||||
health_check_type: EC2
|
||||
replace_all_instances: yes
|
||||
min_size: 2
|
||||
max_size: 2
|
||||
desired_capacity: 2
|
||||
vpc_zone_identifier:
|
||||
- "{{ ecs_vpc_subnet_id }}"
|
||||
tags:
|
||||
- App: pipeline
|
||||
- Name: telemetry-ecs-instance
|
||||
- Type: telemetry-ecs-instance
|
||||
- Owner: telemetry@mozilla.com
|
||||
- REAPER_SPARE_ME: true
|
|
@ -0,0 +1,38 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# default variables
|
||||
: "${PORT:=8000}"
|
||||
|
||||
usage() {
|
||||
echo "usage: bin/run flower|web|worker|scheduler"
|
||||
exit 1
|
||||
}
|
||||
|
||||
[ $# -lt 1 ] && usage
|
||||
|
||||
case $1 in
|
||||
flower)
|
||||
exec newrelic-admin run-program airflow flower
|
||||
;;
|
||||
web)
|
||||
newrelic-admin run-program airflow initdb
|
||||
newrelic-admin run-program airflow upgradedb
|
||||
exec newrelic-admin run-program airflow webserver -p ${PORT} --workers 4
|
||||
;;
|
||||
worker)
|
||||
exec newrelic-admin run-program airflow worker
|
||||
;;
|
||||
scheduler)
|
||||
while echo "Running"; do
|
||||
exec newrelic-admin run-program airflow scheduler -n 5
|
||||
echo "Scheduler exited with code $?. Respawning.." >&2
|
||||
date >> /tmp/airflow_scheduler_errors.txt
|
||||
sleep 1
|
||||
done
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
|
@ -0,0 +1,58 @@
|
|||
# These environment variables must be set in CircleCI UI
|
||||
#
|
||||
# DOCKERHUB_REPO - docker hub repo, format: <username>/<repo>
|
||||
# DOCKER_EMAIL - login info for docker hub
|
||||
# DOCKER_USER
|
||||
# DOCKER_PASS
|
||||
#
|
||||
machine:
|
||||
pre:
|
||||
- curl -sSL https://s3.amazonaws.com/circle-downloads/install-circleci-docker.sh | bash -s -- 1.10.0
|
||||
services:
|
||||
- docker
|
||||
python:
|
||||
version: 2.7.12
|
||||
|
||||
dependencies:
|
||||
pre:
|
||||
- sudo apt-get update
|
||||
- sudo apt-get install python-dev
|
||||
- sudo pip install -U docker-compose
|
||||
|
||||
override:
|
||||
- docker info
|
||||
- docker --version
|
||||
|
||||
# create a version.json
|
||||
- >
|
||||
printf '{"commit":"%s","version":"%s","source":"https://github.com/%s/%s","build":"%s"}\n'
|
||||
"$CIRCLE_SHA1"
|
||||
"$CIRCLE_TAG"
|
||||
"$CIRCLE_PROJECT_USERNAME"
|
||||
"$CIRCLE_PROJECT_REPONAME"
|
||||
"$CIRCLE_BUILD_URL"
|
||||
> version.json
|
||||
- cp version.json $CIRCLE_ARTIFACTS
|
||||
|
||||
- docker build -t app:build .
|
||||
|
||||
# appropriately tag and push the container to dockerhub
|
||||
deployment:
|
||||
hub_latest:
|
||||
branch: "master"
|
||||
commands:
|
||||
- "[ ! -z $DOCKERHUB_REPO ]"
|
||||
- docker login -e $DOCKER_EMAIL -u $DOCKER_USER -p $DOCKER_PASS
|
||||
- "docker tag app:build ${DOCKERHUB_REPO}:latest"
|
||||
- "docker push ${DOCKERHUB_REPO}:latest"
|
||||
|
||||
hub_releases:
|
||||
# push all tags
|
||||
tag: /.*/
|
||||
commands:
|
||||
- "[ ! -z $DOCKERHUB_REPO ]"
|
||||
- docker login -e $DOCKER_EMAIL -u $DOCKER_USER -p $DOCKER_PASS
|
||||
- "echo ${DOCKERHUB_REPO}:${CIRCLE_TAG}"
|
||||
- "docker tag app:build ${DOCKERHUB_REPO}:${CIRCLE_TAG}"
|
||||
- "docker images"
|
||||
- "docker push ${DOCKERHUB_REPO}:${CIRCLE_TAG}"
|
|
@ -0,0 +1,53 @@
|
|||
version: '2'
|
||||
|
||||
services:
|
||||
db:
|
||||
image: postgres:9.4
|
||||
|
||||
redis:
|
||||
image: redis:3.2
|
||||
|
||||
app:
|
||||
build: .
|
||||
restart: always
|
||||
|
||||
web:
|
||||
extends:
|
||||
service: app
|
||||
ports:
|
||||
- "8000:8000"
|
||||
depends_on:
|
||||
- app
|
||||
links:
|
||||
- db
|
||||
- redis
|
||||
command: bin/run web
|
||||
|
||||
worker:
|
||||
extends:
|
||||
service: app
|
||||
mem_limit: 4294967296
|
||||
ports:
|
||||
- "8793:8793"
|
||||
links:
|
||||
- db
|
||||
- redis
|
||||
command: bin/run worker
|
||||
|
||||
scheduler:
|
||||
mem_limit: 4294967296
|
||||
extends:
|
||||
service: app
|
||||
links:
|
||||
- db
|
||||
- redis
|
||||
command: bin/run scheduler
|
||||
|
||||
flower:
|
||||
extends:
|
||||
service: app
|
||||
ports:
|
||||
- "5555:5555"
|
||||
links:
|
||||
- redis
|
||||
command: bin/run flower
|
|
@ -0,0 +1,6 @@
|
|||
boto3
|
||||
airflow[celery,postgres,hive,hdfs,jdbc,async,password,crypto]
|
||||
retrying
|
||||
newrelic
|
||||
redis
|
||||
hiredis
|
Загрузка…
Ссылка в новой задаче