732 строки
25 KiB
YAML
732 строки
25 KiB
YAML
---
|
|
version: 2.1
|
|
|
|
orbs:
|
|
gcp-gcr: circleci/gcp-gcr@0.13.0
|
|
docker: circleci/docker@1.5
|
|
python: circleci/python@2.1.1
|
|
|
|
parameters:
|
|
python-version:
|
|
type: string
|
|
default: '3.10'
|
|
|
|
executors:
|
|
ubuntu-machine-executor:
|
|
machine:
|
|
image: ubuntu-2004:202111-02
|
|
|
|
jobs:
|
|
build:
|
|
docker: &docker
|
|
- image: python:<< pipeline.parameters.python-version >>
|
|
steps:
|
|
- checkout
|
|
- &restore_venv_cache
|
|
restore_cache:
|
|
keys:
|
|
# when lock files change, use increasingly general
|
|
# patterns to restore cache
|
|
- &python_cache_key
|
|
# yamllint disable-line rule:line-length
|
|
python-<< pipeline.parameters.python-version >>-packages-v1-{{ .Branch }}-{{ checksum "requirements.in" }}-{{ checksum "requirements.txt" }}
|
|
# yamllint disable-line rule:line-length
|
|
- python-<< pipeline.parameters.python-version >>-packages-v1-{{ .Branch }}-{{ checksum "requirements.in" }}-
|
|
# yamllint disable-line rule:line-length
|
|
- python-<< pipeline.parameters.python-version >>-packages-v1-{{ .Branch }}-
|
|
- python-<< pipeline.parameters.python-version >>-packages-v1-main-
|
|
- &build
|
|
run:
|
|
name: Build
|
|
command: |
|
|
python3 -m venv venv/
|
|
venv/bin/pip install pip-tools --constraint requirements.in
|
|
venv/bin/pip-sync --pip-args=--no-deps
|
|
- run:
|
|
name: Yamllint Test
|
|
command: PATH="venv/bin:$PATH" yamllint -c .yamllint.yaml .
|
|
- run:
|
|
name: PyTest with linters
|
|
# integration tests are run in a separate `integration` step;
|
|
# SQL and routine tests are split out into a separate `test-sql` test
|
|
# since those tests take the longest to run and running those tests
|
|
# in parallel speeds up CI
|
|
command: |
|
|
PATH="venv/bin:$PATH" script/entrypoint --black --flake8 \
|
|
--isort --mypy-ignore-missing-imports --pydocstyle \
|
|
-m "not (routine or sql or integration)" \
|
|
-n 8
|
|
- save_cache:
|
|
paths:
|
|
- venv/
|
|
key: *python_cache_key
|
|
verify-format-sql:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- *restore_venv_cache
|
|
- *build
|
|
- &attach_generated_sql
|
|
attach_workspace:
|
|
at: /tmp/workspace
|
|
- ©_generated_sql
|
|
run:
|
|
name: Move generated-sql into place
|
|
command: |
|
|
rm -rf sql/
|
|
cp -r /tmp/workspace/generated-sql/sql sql
|
|
- run:
|
|
name: Verify that SQL is correctly formatted
|
|
command: |
|
|
PATH="venv/bin:$PATH" script/bqetl format --check \
|
|
$(git ls-tree -d HEAD --name-only)
|
|
verify-requirements:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- run:
|
|
name: Verify that requirements.txt contains the right dependencies for
|
|
this python version
|
|
# use `--constraint` with `requirements.in` not `requirements.txt`
|
|
# because for pip>=20.3 "Constraints are only allowed to take the form
|
|
# of a package name and a version specifier"
|
|
command: |
|
|
pip install pip-tools --constraint requirements.in
|
|
pip-compile --allow-unsafe --generate-hashes --quiet
|
|
git diff --exit-code -G '^ *[^# ]' -- requirements.txt
|
|
test-sql:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- *restore_venv_cache
|
|
- *build
|
|
- *attach_generated_sql
|
|
- ©_staged_sql
|
|
run:
|
|
name: Move sql deployed on stage into place
|
|
command: |
|
|
rm -rf sql/
|
|
cp -r /tmp/workspace/staged-generated-sql/sql sql
|
|
rm -rf tests/
|
|
cp -r /tmp/workspace/staged-generated-sql/tests tests
|
|
- run:
|
|
name: Run SQL tests
|
|
command: |
|
|
if [ -n "$CIRCLE_PR_NUMBER" ]; then
|
|
echo "Cannot pass creds to forked PRs," \
|
|
"so skipping routine and SQL tests"
|
|
else
|
|
PATH="venv/bin:$PATH" script/entrypoint -m "routine or sql" -n 8
|
|
fi
|
|
dry-run-sql:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- *restore_venv_cache
|
|
- *build
|
|
- *attach_generated_sql
|
|
- *copy_staged_sql
|
|
- run:
|
|
name: Dry run queries
|
|
# yamllint disable rule:line-length
|
|
# Dry runs on PRs are executed on sql/bigquery-etl-integration-test
|
|
# Artifacts (queries, views, UDFs) that are changed will be moved to the
|
|
# bigquery-etl-integration-test folder and deployed to the corresponding
|
|
# project. This ensures that dry runs can be executed before changes
|
|
# have been deployed to prod. (bigquery-etl-integration-test is treated
|
|
# as a stage environment)
|
|
command: |
|
|
if [ "$CIRCLE_BRANCH" = main ]; then
|
|
echo "Check dry run for all queries because branch is" \
|
|
"$CIRCLE_BRANCH"
|
|
PATHS=sql
|
|
elif git log --format=%B --no-merges -n 1 |
|
|
grep -qF '[run-tests]'; then
|
|
echo "Check dry run for all queries because [run-tests] in" \
|
|
"commit message"
|
|
PATHS=sql
|
|
else
|
|
PATHS="sql/bigquery-etl-integration-test"
|
|
fi
|
|
echo $PATHS
|
|
PATH="venv/bin:$PATH" script/bqetl dryrun --validate-schemas $PATHS
|
|
# yamllint enable rule:line-length
|
|
validate-backfills:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- *restore_venv_cache
|
|
- *build
|
|
- *attach_generated_sql
|
|
- *copy_staged_sql
|
|
- run:
|
|
name: Verify that backfill.yaml files are valid
|
|
command: |
|
|
PATH="venv/bin:$PATH" script/bqetl backfill validate
|
|
validate-metadata:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- *restore_venv_cache
|
|
- *build
|
|
- *attach_generated_sql
|
|
- *copy_staged_sql
|
|
- run:
|
|
name: Verify that metadata files are valid
|
|
command: |
|
|
# TODO: Add check here to make sure all queries have metadata.yaml
|
|
PATH="venv/bin:$PATH" script/bqetl query validate \
|
|
--respect-dryrun-skip
|
|
integration:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- &skip_forked_pr
|
|
run:
|
|
name: Early return if this build is from a forked PR
|
|
command: |
|
|
if [ -n "$CIRCLE_PR_NUMBER" ]; then
|
|
echo "Cannot pass creds to forked PRs," \
|
|
"so marking this step successful"
|
|
circleci-agent step halt
|
|
fi
|
|
- *restore_venv_cache
|
|
- *build
|
|
- run:
|
|
name: PyTest Integration Test
|
|
# yamllint disable rule:line-length
|
|
command: |
|
|
PATH="venv/bin:$PATH" script/entrypoint -m 'integration' -n 8
|
|
generate-dags:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- *restore_venv_cache
|
|
- *build
|
|
- *attach_generated_sql
|
|
- *copy_generated_sql
|
|
- run:
|
|
name: Generate DAGs
|
|
command: |
|
|
PATH="venv/bin:$PATH" script/bqetl dag generate
|
|
cp -R dags /tmp/workspace/generated-sql
|
|
- run:
|
|
name: Verify that DAGs were correctly generated and are up-to-date
|
|
command: |
|
|
git diff --exit-code dags/
|
|
diff <(git ls-files dags/*.py) <(ls dags/*.py)
|
|
# this task is overwriting the content produced by generate-sql;
|
|
# the behaviour here is additive, generated DAGs are just added to
|
|
# the generated-sql output
|
|
- persist_to_workspace:
|
|
root: /tmp/workspace
|
|
paths:
|
|
- generated-sql
|
|
validate-dags:
|
|
executor:
|
|
name: python/default
|
|
tag: 3.10.12
|
|
steps:
|
|
- checkout
|
|
- run:
|
|
name: Early return when job not modified
|
|
command: |
|
|
if [ "$CIRCLE_BRANCH" = main ]; then
|
|
echo "Run job because branch is $CIRCLE_BRANCH"
|
|
elif git log --format=%B --no-merges -n 1 |
|
|
grep -qF '[run-tests]'; then
|
|
echo "Run job because [run-tests] in commit message"
|
|
elif ! git diff --quiet origin/main... \
|
|
-- "$(git rev-parse --show-toplevel)"/{.circleci,dags}; then
|
|
echo "Run job because .circleci/ and/or dags/ were modified" \
|
|
"since branching off main"
|
|
else
|
|
echo "Skipping job because .circleci/ and dags/ were not modified"
|
|
circleci-agent step halt
|
|
fi
|
|
- run:
|
|
name: Pull telemetry-airflow
|
|
command: |
|
|
git clone https://github.com/mozilla/telemetry-airflow.git ~/telemetry-airflow
|
|
- run:
|
|
name: Replace telemetry-airflow DAGs with BigQuery ETL DAGs
|
|
command: |
|
|
rm ~/telemetry-airflow/dags/* -f || true
|
|
cp -a dags/. ~/telemetry-airflow/dags/
|
|
- *attach_generated_sql
|
|
- *copy_generated_sql
|
|
- run:
|
|
name: Install telemetry-airflow dependencies
|
|
command: |
|
|
cd ~/telemetry-airflow
|
|
virtualenv .venv
|
|
source .venv/bin/activate
|
|
pip install -r requirements.txt
|
|
- run:
|
|
name: 🧪 Test valid DAGs
|
|
command: |
|
|
cd ~/telemetry-airflow
|
|
source .venv/bin/activate
|
|
python -m pytest tests/dags/test_dag_validity.py --junitxml=~/telemetry-airflow/test-results/junit.xml
|
|
- store_test_results:
|
|
path: ~/telemetry-airflow/test-results/junit.xml
|
|
validate-docs:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- *restore_venv_cache
|
|
- *build
|
|
- *attach_generated_sql
|
|
- *copy_generated_sql
|
|
- run:
|
|
name: Validate doc examples
|
|
command: |
|
|
PATH="venv/bin:$PATH" script/bqetl routine validate --docs-only
|
|
validate-views:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- *restore_venv_cache
|
|
- *build
|
|
- *attach_generated_sql
|
|
- *copy_staged_sql
|
|
- run:
|
|
name: Validate views
|
|
command: PATH="venv/bin:$PATH" script/bqetl view validate
|
|
docs:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- *skip_forked_pr
|
|
- *restore_venv_cache
|
|
- *build
|
|
- *attach_generated_sql
|
|
- add_ssh_keys:
|
|
fingerprints: "22:b9:3c:1b:82:ab:3f:e4:b5:79:70:d1:7b:b9:28:d2"
|
|
- run:
|
|
name: Build and deploy docs
|
|
command: |
|
|
rm -r sql/ && cp -r /tmp/workspace/generated-sql/sql sql/
|
|
PATH="venv/bin:$PATH" script/bqetl docs generate \
|
|
--output_dir=generated_docs/
|
|
cd generated_docs/
|
|
PATH="../venv/bin:$PATH" mkdocs gh-deploy \
|
|
-m "[ci skip] Deployed {sha} with MkDocs version: {version}"
|
|
generate-sql:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- *restore_venv_cache
|
|
- *build
|
|
- run:
|
|
name: Generate SQL content
|
|
command: |
|
|
mkdir -p /tmp/workspace/generated-sql
|
|
cp -r sql/ /tmp/workspace/generated-sql/sql
|
|
# Don't depend on dry run for PRs
|
|
PATH="venv/bin:$PATH" script/bqetl generate all \
|
|
--output-dir /tmp/workspace/generated-sql/sql/ \
|
|
--target-project moz-fx-data-shared-prod
|
|
PATH="venv/bin:$PATH" script/bqetl query render \
|
|
--sql-dir /tmp/workspace/generated-sql/sql/ \
|
|
--output-dir /tmp/workspace/generated-sql/sql/ \
|
|
/tmp/workspace/generated-sql/sql/
|
|
PATH="venv/bin:$PATH" script/bqetl dependency record \
|
|
--skip-existing \
|
|
"/tmp/workspace/generated-sql/sql/"
|
|
- persist_to_workspace:
|
|
root: /tmp/workspace
|
|
paths:
|
|
- generated-sql
|
|
deploy-changes-to-stage:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- *skip_forked_pr
|
|
- *restore_venv_cache
|
|
- *build
|
|
- *attach_generated_sql
|
|
- *copy_generated_sql
|
|
- add_ssh_keys:
|
|
fingerprints: "22:b9:3c:1b:82:ab:3f:e4:b5:79:70:d1:7b:b9:28:d2"
|
|
- run:
|
|
name: Pull in generated-sql branch from remote
|
|
command: |
|
|
ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts
|
|
git clone --single-branch --branch generated-sql \
|
|
git@github.com:mozilla/bigquery-etl \
|
|
generated-sql
|
|
- run:
|
|
name: Deploy changes to stage
|
|
command: |
|
|
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
|
|
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
|
|
|
|
PATHS="$(git diff --no-index --name-only --diff-filter=d generated-sql/sql sql)" || true
|
|
echo $PATHS
|
|
PATH="venv/bin:$PATH" script/bqetl stage deploy \
|
|
--dataset-suffix=$CIRCLE_SHA1 \
|
|
--remove-updated-artifacts \
|
|
$PATHS
|
|
- run:
|
|
name: Copy generated SQL to temporary stage directory
|
|
command: |
|
|
mkdir -p /tmp/workspace/staged-generated-sql
|
|
cp -r sql/ /tmp/workspace/staged-generated-sql/sql
|
|
cp -r tests/ /tmp/workspace/staged-generated-sql/tests
|
|
- persist_to_workspace:
|
|
root: /tmp/workspace
|
|
paths:
|
|
- staged-generated-sql
|
|
push-generated-sql:
|
|
docker: *docker
|
|
steps:
|
|
- *attach_generated_sql
|
|
- add_ssh_keys:
|
|
fingerprints: "22:b9:3c:1b:82:ab:3f:e4:b5:79:70:d1:7b:b9:28:d2"
|
|
- run:
|
|
name: Push to generated-sql branch
|
|
command: |
|
|
ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts
|
|
git config --global user.name "CircleCI generate-sql job"
|
|
git config --global user.email "dataops+generated-sql@mozilla.com"
|
|
git clone --single-branch --branch generated-sql \
|
|
git@github.com:mozilla/bigquery-etl \
|
|
generated-sql
|
|
cd generated-sql/
|
|
rm -rf sql/
|
|
cp -r /tmp/workspace/generated-sql/sql sql
|
|
rm -rf dags/
|
|
cp -r /tmp/workspace/generated-sql/dags dags
|
|
git add .
|
|
git commit -m "Auto-push due to change on main branch [ci skip]" \
|
|
&& git push \
|
|
|| echo "Skipping push since it looks like there were no changes"
|
|
deploy:
|
|
executor: ubuntu-machine-executor
|
|
steps:
|
|
- checkout
|
|
- *attach_generated_sql
|
|
- *copy_generated_sql
|
|
- docker/check:
|
|
docker-password: DOCKER_PASS
|
|
docker-username: DOCKER_USER
|
|
- docker/build: &public-image
|
|
image: ${CIRCLE_PROJECT_USERNAME+$CIRCLE_PROJECT_USERNAME/}${CIRCLE_PROJECT_REPONAME:-bigquery-etl}
|
|
tag: ${CIRCLE_TAG:-latest}
|
|
- docker/push: *public-image
|
|
private-generate-sql:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- *skip_forked_pr
|
|
- *restore_venv_cache
|
|
- *build
|
|
- add_ssh_keys:
|
|
# deploy key to private-bigquery-etl
|
|
fingerprints: "9d:1e:af:52:78:2c:e8:ec:33:4c:db:cd:5a:ff:70:0a"
|
|
- run:
|
|
name: Install rsync
|
|
command: |
|
|
apt update
|
|
apt install -y rsync
|
|
- run:
|
|
name: Pull down private SQL content
|
|
command: |
|
|
ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts
|
|
git clone --single-branch --branch main \
|
|
git@github.com:mozilla/private-bigquery-etl.git \
|
|
~/private-bigquery-etl
|
|
rsync --archive ~/private-bigquery-etl/sql/ sql/
|
|
- run:
|
|
name: Generate SQL content
|
|
command: |
|
|
mkdir -p /tmp/workspace/private-generated-sql
|
|
cp -r sql/ /tmp/workspace/private-generated-sql/sql
|
|
# Don't depend on dry run for PRs
|
|
PATH="venv/bin:$PATH" script/bqetl generate all \
|
|
--output-dir /tmp/workspace/private-generated-sql/sql/ \
|
|
--target-project moz-fx-data-shared-prod
|
|
PATH="venv/bin:$PATH" script/bqetl dependency record \
|
|
--skip-existing \
|
|
"/tmp/workspace/private-generated-sql/sql/"
|
|
- persist_to_workspace:
|
|
root: /tmp/workspace
|
|
paths:
|
|
- private-generated-sql
|
|
push-private-generated-sql:
|
|
docker: *docker
|
|
steps:
|
|
- *attach_generated_sql
|
|
- add_ssh_keys:
|
|
fingerprints: "9d:1e:af:52:78:2c:e8:ec:33:4c:db:cd:5a:ff:70:0a"
|
|
- run:
|
|
name: Push to private-generated-sql branch
|
|
# yamllint disable rule:line-length
|
|
command: |
|
|
ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts
|
|
git config --global user.name "CircleCI private-generate-sql job"
|
|
git config --global user.email "dataops+private-generated-sql@mozilla.com"
|
|
git clone --single-branch --branch private-generated-sql \
|
|
git@github.com:mozilla/private-bigquery-etl \
|
|
private-generated-sql
|
|
cd private-generated-sql/
|
|
rm -rf sql/
|
|
cp -r /tmp/workspace/private-generated-sql/sql sql
|
|
git add .
|
|
git commit -m "Auto-push due to change on main branch [ci skip]" \
|
|
&& git push \
|
|
|| echo "Skipping push since it looks like there were no changes"
|
|
# yamllint enable rule:line-length
|
|
deploy-to-private-gcr:
|
|
executor: ubuntu-machine-executor
|
|
steps:
|
|
- checkout
|
|
- *attach_generated_sql
|
|
- run:
|
|
name: Move generated-sql into place
|
|
command: |
|
|
rm -rf sql/
|
|
cp -r /tmp/workspace/private-generated-sql/sql sql
|
|
- gcp-gcr/gcr-auth
|
|
- gcp-gcr/build-image: &private-image
|
|
image: bigquery-etl
|
|
tag: ${CIRCLE_TAG:-latest}
|
|
- gcp-gcr/push-image: *private-image
|
|
main-generate-sql-and-dags:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- run:
|
|
name: Switch to main branch
|
|
command: |
|
|
git remote add mozilla git@github.com:mozilla/bigquery-etl
|
|
git fetch mozilla main
|
|
git checkout mozilla/main
|
|
- attach_workspace:
|
|
at: /tmp/workspace
|
|
- *restore_venv_cache
|
|
- *build
|
|
- run:
|
|
name: Generate SQL content
|
|
command: |
|
|
export PATH="venv/bin:$PATH"
|
|
./script/bqetl generate all \
|
|
--target-project moz-fx-data-shared-prod
|
|
./script/bqetl dependency record \
|
|
--skip-existing \
|
|
"sql/"
|
|
./script/bqetl dag generate
|
|
|
|
mkdir -p /tmp/workspace/main-generated-sql
|
|
cp -r sql/ /tmp/workspace/main-generated-sql/sql
|
|
cp -r dags/ /tmp/workspace/main-generated-sql/dags
|
|
- persist_to_workspace:
|
|
root: /tmp/workspace
|
|
paths:
|
|
- main-generated-sql
|
|
generate-diff:
|
|
docker: *docker
|
|
steps:
|
|
- attach_workspace:
|
|
at: /tmp/workspace
|
|
- run:
|
|
name: Generate diff
|
|
command: |
|
|
diff -bur --no-dereference \
|
|
/tmp/workspace/main-generated-sql/sql/ /tmp/workspace/generated-sql/sql/ \
|
|
> /tmp/workspace/generated-sql/sql.diff || true
|
|
diff -bur --no-dereference \
|
|
/tmp/workspace/main-generated-sql/dags/ /tmp/workspace/generated-sql/dags/ \
|
|
>> /tmp/workspace/generated-sql/sql.diff || true
|
|
- persist_to_workspace:
|
|
root: /tmp/workspace
|
|
paths:
|
|
- generated-sql
|
|
post-diff:
|
|
docker:
|
|
- image: circleci/node:8.10.0
|
|
steps:
|
|
- checkout
|
|
- attach_workspace:
|
|
at: /tmp/workspace
|
|
- run: npm i circle-github-bot
|
|
- run: .circleci/post-diff.js
|
|
- store_artifacts:
|
|
path: /tmp/integration
|
|
destination: /app/integration
|
|
reset-stage-env:
|
|
docker: *docker
|
|
steps:
|
|
- checkout
|
|
- *skip_forked_pr
|
|
- *build
|
|
- run:
|
|
name: "Delete stage datasets"
|
|
command: |
|
|
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
|
|
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
|
|
|
|
PATH="venv/bin:$PATH" script/bqetl stage clean --dataset-suffix=$CIRCLE_SHA1 --delete-expired
|
|
manual-trigger-required-for-fork:
|
|
docker: *docker
|
|
steps:
|
|
- &skip_upstream
|
|
run:
|
|
name: Early return if this build is running on upstream
|
|
command: |
|
|
if [ -n "$CIRCLE_PR_NUMBER" ]; then
|
|
echo "Build on fork"
|
|
else
|
|
echo "Build on upstream"
|
|
circleci-agent step halt
|
|
fi
|
|
- checkout
|
|
- run:
|
|
name: Manually trigger integration tests for fork
|
|
# yamllint disable rule:line-length
|
|
command: |
|
|
apt update
|
|
apt install jq -y
|
|
|
|
CIRCLE_PR_BRANCH=`curl -s https://api.github.com/repos/${CIRCLE_PROJECT_USERNAME}/${CIRCLE_PROJECT_REPONAME}/pulls/${CIRCLE_PR_NUMBER} | jq -r '.head.label'`
|
|
|
|
echo "Integration tests for this fork need to be triggered manually"
|
|
echo "Users with write access to the repository can trigger" \
|
|
"integration tests by following these steps: "
|
|
echo " Open the following page:"
|
|
echo " https://github.com/mozilla/bigquery-etl/actions/workflows/push-to-upstream.yml"
|
|
echo " Choose the 'Run workflow' dropdown and provide '$CIRCLE_PR_BRANCH' as parameter."
|
|
|
|
exit 1
|
|
# yamllint enable rule:line-length
|
|
|
|
workflows:
|
|
version: 2
|
|
build:
|
|
jobs: &build_jobs
|
|
- manual-trigger-required-for-fork
|
|
- build:
|
|
context: data-eng-circleci-tests
|
|
- verify-format-sql:
|
|
requires:
|
|
- generate-sql
|
|
- deploy-changes-to-stage:
|
|
requires:
|
|
- generate-sql
|
|
- verify-requirements
|
|
- test-sql:
|
|
context: data-eng-circleci-tests
|
|
requires:
|
|
- deploy-changes-to-stage
|
|
- dry-run-sql:
|
|
requires:
|
|
- deploy-changes-to-stage
|
|
- validate-metadata:
|
|
requires:
|
|
- deploy-changes-to-stage
|
|
- integration
|
|
- validate-backfills:
|
|
requires:
|
|
- deploy-changes-to-stage
|
|
- validate-dags:
|
|
requires:
|
|
- generate-dags
|
|
- validate-docs:
|
|
requires:
|
|
- generate-sql
|
|
- validate-views:
|
|
requires:
|
|
- deploy-changes-to-stage
|
|
- generate-sql
|
|
- main-generate-sql-and-dags:
|
|
filters:
|
|
branches:
|
|
ignore: main
|
|
- generate-diff:
|
|
requires:
|
|
- generate-dags
|
|
- main-generate-sql-and-dags
|
|
filters:
|
|
branches:
|
|
ignore: main
|
|
- post-diff:
|
|
requires:
|
|
- generate-diff
|
|
filters:
|
|
branches:
|
|
ignore: main
|
|
- generate-dags:
|
|
requires:
|
|
- generate-sql
|
|
- docs:
|
|
requires:
|
|
- generate-sql
|
|
filters:
|
|
branches:
|
|
only: main
|
|
- push-generated-sql:
|
|
requires:
|
|
- validate-dags
|
|
filters:
|
|
branches:
|
|
only:
|
|
- main
|
|
- reset-stage-env:
|
|
requires:
|
|
- push-generated-sql
|
|
- test-sql
|
|
- validate-views
|
|
- validate-docs
|
|
- validate-metadata
|
|
- dry-run-sql
|
|
- deploy:
|
|
context: data-eng-bigquery-etl-dockerhub
|
|
requires:
|
|
- generate-sql
|
|
# Public image must be pushed after the private one because of
|
|
# webhooks used in Ops logic. For details, see:
|
|
# https://bugzilla.mozilla.org/show_bug.cgi?id=1715628#c0
|
|
- deploy-to-private-gcr
|
|
filters:
|
|
branches:
|
|
only:
|
|
- main
|
|
# The following "private" jobs are basically clones of the public jobs
|
|
# for generate-sql, deploy, and push-generated-sql, except that they pull
|
|
# in some additional content from an internal Mozilla repository for
|
|
# cases where ETL code cannot be public. Although the CI logic is
|
|
# consolidated in this public repository, note that we are both pulling
|
|
# from the internal repository and pushing generated results back to
|
|
# a branch on that internal repository, which may be initially
|
|
# surprising.
|
|
- private-generate-sql
|
|
- push-private-generated-sql:
|
|
requires:
|
|
- private-generate-sql
|
|
filters:
|
|
branches:
|
|
only:
|
|
- main
|
|
- deploy-to-private-gcr:
|
|
context: data-eng-airflow-gcr
|
|
requires:
|
|
- private-generate-sql
|
|
# can't run in parallel because CIRCLE_BUILD_NUM is same
|
|
- build
|
|
- generate-sql
|
|
filters:
|
|
branches:
|
|
only:
|
|
- main
|
|
nightly:
|
|
# Run after schema-generator to ensure we are up-to-date
|
|
triggers:
|
|
- schedule:
|
|
cron: "0 5 * * *"
|
|
filters:
|
|
branches:
|
|
only:
|
|
- main
|
|
jobs: *build_jobs
|