bigquery-etl/.circleci/config.yml

154 строки
4.6 KiB
YAML

version: 2
jobs:
build:
docker: &docker
- image: python:3.8
steps:
- checkout
- restore_cache:
keys:
# when lock files change, use increasingly general patterns to restore cache
- &cache_key
python-packages-v1-{{ .Branch }}-{{ checksum "requirements.in" }}-{{ checksum "requirements.txt" }}
- python-packages-v1-{{ .Branch }}-{{ checksum "requirements.in" }}-
- python-packages-v1-{{ .Branch }}-
- python-packages-v1-
- &build
run:
name: Build
command: |
python3.8 -m venv venv/
venv/bin/pip install pip-tools
venv/bin/pip-sync
- run:
name: PyTest with linters
command: PATH="venv/bin:$PATH" script/entrypoint
- save_cache:
paths:
- venv/
key: *cache_key
verify-format-sql:
docker: *docker
steps:
- checkout
- run:
name: Verify that SQL is correctly formatted
command: script/format_sql --check $(git ls-tree -d HEAD --name-only)
verify-requirements:
docker: *docker
steps:
- checkout
- run:
name: Verify that requirements.txt contains the right dependencies for this python version
command: |
pip install pip-tools
pip-compile --quiet --generate-hashes requirements.in
git diff --exit-code requirements.txt
dry-run-sql:
docker: *docker
steps:
- checkout
- run:
name: Verify that BigQuery validates each query
command: script/dryrun
validate-metadata:
docker: *docker
steps:
- checkout
- *build
- run:
name: Verify that metadata files are valid
command: PATH="venv/bin:$PATH" script/validate_metadata
integration:
docker: *docker
steps:
- checkout
- &skip_forked_pr
run:
name: Early return if this build is from a forked PR
command: |
if [ -n "$CIRCLE_PR_NUMBER" ]; then
echo "Cannot pass creds to forked PRs, so marking this step successful"
circleci step halt
fi
- *build
- &pytest_integration_test
run:
name: PyTest Integration Test
# Google's client libraries will check for GOOGLE_APPLICATION_CREDENTIALS
# and use a file in that location for credentials if present;
# See https://cloud.google.com/docs/authentication/production
command: |
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
PATH="venv/bin:$PATH" script/entrypoint -m integration
validate-dags:
# based on https://github.com/mozilla/telemetry-airflow/blob/master/.circleci/config.yml
machine:
image: ubuntu-1604:201903-01
docker_layer_caching: true
steps:
- checkout
- run:
name: Pull telemetry-airflow
command: |
git clone https://github.com/mozilla/telemetry-airflow.git
cp -a dags/. telemetry-airflow/dags/
- run:
command: |
cd telemetry-airflow
docker-compose pull
docker-compose build
# now take ownership of the folder
sudo chown -R 10001:10001 .
- run:
name: Test if dag scripts can be parsed
command: |
cd telemetry-airflow
bash bin/test-parse
deploy:
parameters:
image:
type: string
docker:
# bash required for step: Determine docker image name
- image: relud/docker-bash
steps:
- checkout
- setup_remote_docker:
docker_layer_caching: true
- run:
name: Determine docker image name
command: echo 'IMAGE="${CIRCLE_PROJECT_USERNAME+$CIRCLE_PROJECT_USERNAME/}${CIRCLE_PROJECT_REPONAME:-bigquery-etl}:${CIRCLE_TAG:-latest}"' > $BASH_ENV
- run:
name: Build docker image
command: docker build . --pull --tag "$IMAGE"
- run:
name: Deploy to Dockerhub
command: |
echo "${DOCKER_PASS:?}" | docker login -u "${DOCKER_USER:?}" --password-stdin
docker push "$IMAGE"
workflows:
version: 2
build:
jobs:
- build:
context: data-eng-circleci-tests
- verify-format-sql
- verify-requirements
- dry-run-sql
- validate-metadata
- integration
- validate-dags
- deploy:
context: data-eng-bigquery-etl-dockerhub
requires:
# can't run in parallel because CIRCLE_BUILD_NUM is same
- build
filters:
branches:
only: master
tags:
only: /.*/