Require authentication for dry run function and run gcloud auth when … (#5171)
* Require authentication for dry run function and run gcloud auth when not logged in * authenticate step in CI, remove interactive gcloud auth * Skip dryrun for ltv_state_values_v2 * Refactor skip_fork in CI, clarify login requirements
This commit is contained in:
Родитель
b2997d932e
Коммит
70a355a0dd
|
@ -119,9 +119,25 @@ jobs:
|
|||
- << pipeline.parameters.validate-bqetl >>
|
||||
- << pipeline.parameters.deploy >>
|
||||
steps:
|
||||
- &skip_forked_pr
|
||||
run:
|
||||
name: Early return if this build is from a forked PR
|
||||
command: |
|
||||
if [ -n "$CIRCLE_PR_NUMBER" ]; then
|
||||
echo "Cannot pass creds to forked PRs," \
|
||||
"so marking this step successful"
|
||||
circleci-agent step halt
|
||||
fi
|
||||
- checkout
|
||||
- *restore_venv_cache
|
||||
- *build
|
||||
- &authenticate
|
||||
run:
|
||||
name: Authenticate to GCP
|
||||
command: |
|
||||
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
|
||||
echo 'export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"' >> "$BASH_ENV"
|
||||
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
|
||||
- run:
|
||||
name: PyTest with linters
|
||||
# integration tests are run in a separate `integration` step;
|
||||
|
@ -143,6 +159,7 @@ jobs:
|
|||
- when:
|
||||
condition: *validate-sql-or-routines
|
||||
steps:
|
||||
- *skip_forked_pr
|
||||
- checkout
|
||||
- *restore_venv_cache
|
||||
- *build
|
||||
|
@ -160,12 +177,7 @@ jobs:
|
|||
- run:
|
||||
name: Run SQL tests
|
||||
command: |
|
||||
if [ -n "$CIRCLE_PR_NUMBER" ]; then
|
||||
echo "Cannot pass creds to forked PRs," \
|
||||
"so skipping routine and SQL tests"
|
||||
else
|
||||
PATH="venv/bin:$PATH" script/entrypoint -m sql -n 8
|
||||
fi
|
||||
- unless:
|
||||
condition: *validate-sql-or-routines
|
||||
steps:
|
||||
|
@ -176,11 +188,13 @@ jobs:
|
|||
- when:
|
||||
condition: *validate-sql-or-routines
|
||||
steps:
|
||||
- *skip_forked_pr
|
||||
- checkout
|
||||
- *restore_venv_cache
|
||||
- *build
|
||||
- *attach_generated_sql
|
||||
- *copy_staged_sql
|
||||
- *authenticate
|
||||
- run:
|
||||
name: Dry run queries
|
||||
# yamllint disable rule:line-length
|
||||
|
@ -238,11 +252,13 @@ jobs:
|
|||
- when:
|
||||
condition: *validate-sql
|
||||
steps:
|
||||
- *skip_forked_pr
|
||||
- checkout
|
||||
- *restore_venv_cache
|
||||
- *build
|
||||
- *attach_generated_sql
|
||||
- *copy_staged_sql
|
||||
- *authenticate
|
||||
- run:
|
||||
name: Verify that metadata files are valid
|
||||
command: |
|
||||
|
@ -259,16 +275,8 @@ jobs:
|
|||
- when:
|
||||
condition: *validate-bqetl
|
||||
steps:
|
||||
- *skip_forked_pr
|
||||
- checkout
|
||||
- &skip_forked_pr
|
||||
run:
|
||||
name: Early return if this build is from a forked PR
|
||||
command: |
|
||||
if [ -n "$CIRCLE_PR_NUMBER" ]; then
|
||||
echo "Cannot pass creds to forked PRs," \
|
||||
"so marking this step successful"
|
||||
circleci-agent step halt
|
||||
fi
|
||||
- *restore_venv_cache
|
||||
- *build
|
||||
- run:
|
||||
|
@ -288,6 +296,7 @@ jobs:
|
|||
- when:
|
||||
condition: *validate-sql
|
||||
steps:
|
||||
- *skip_forked_pr
|
||||
- checkout
|
||||
- *restore_venv_cache
|
||||
- *build
|
||||
|
@ -298,6 +307,7 @@ jobs:
|
|||
command: |
|
||||
rm -rf sql/
|
||||
cp -r /tmp/workspace/generated-sql/sql sql
|
||||
- *authenticate
|
||||
- run:
|
||||
name: Generate DAGs
|
||||
command: |
|
||||
|
@ -378,6 +388,7 @@ jobs:
|
|||
- << pipeline.parameters.validate-routines >>
|
||||
- << pipeline.parameters.deploy >>
|
||||
steps:
|
||||
- *skip_forked_pr
|
||||
- checkout
|
||||
- *restore_venv_cache
|
||||
- *build
|
||||
|
@ -386,12 +397,7 @@ jobs:
|
|||
- run:
|
||||
name: Run routine tests
|
||||
command: |
|
||||
if [ -n "$CIRCLE_PR_NUMBER" ]; then
|
||||
echo "Cannot pass creds to forked PRs," \
|
||||
"so skipping routine tests"
|
||||
else
|
||||
PATH="venv/bin:$PATH" script/entrypoint -m routine -n 8
|
||||
fi
|
||||
PATH="venv/bin:$PATH" script/entrypoint -m routine -n 8
|
||||
- run:
|
||||
name: Validate doc examples
|
||||
command: |
|
||||
|
@ -406,14 +412,17 @@ jobs:
|
|||
- when:
|
||||
condition: *validate-sql-or-routines
|
||||
steps:
|
||||
- *skip_forked_pr
|
||||
- checkout
|
||||
- *restore_venv_cache
|
||||
- *build
|
||||
- *attach_generated_sql
|
||||
- *copy_staged_sql
|
||||
- *authenticate
|
||||
- run:
|
||||
name: Validate views
|
||||
command: PATH="venv/bin:$PATH" script/bqetl view validate
|
||||
command: |
|
||||
PATH="venv/bin:$PATH" script/bqetl view validate
|
||||
- unless:
|
||||
condition: *validate-sql-or-routines
|
||||
steps:
|
||||
|
@ -424,8 +433,8 @@ jobs:
|
|||
- when:
|
||||
condition: *validate-sql-or-routines
|
||||
steps:
|
||||
- checkout
|
||||
- *skip_forked_pr
|
||||
- checkout
|
||||
- *restore_venv_cache
|
||||
- *build
|
||||
- *attach_generated_sql
|
||||
|
@ -452,9 +461,11 @@ jobs:
|
|||
- when:
|
||||
condition: *validate-sql-or-routines
|
||||
steps:
|
||||
- *skip_forked_pr
|
||||
- checkout
|
||||
- *restore_venv_cache
|
||||
- *build
|
||||
- *authenticate
|
||||
- run:
|
||||
name: Generate SQL content
|
||||
command: |
|
||||
|
@ -546,8 +557,8 @@ jobs:
|
|||
- when:
|
||||
condition: *validate-sql-or-routines
|
||||
steps:
|
||||
- checkout
|
||||
- *skip_forked_pr
|
||||
- checkout
|
||||
- *restore_venv_cache
|
||||
- *build
|
||||
- *attach_generated_sql
|
||||
|
@ -562,13 +573,11 @@ jobs:
|
|||
git clone --single-branch --branch generated-sql \
|
||||
git@github.com:mozilla/bigquery-etl \
|
||||
generated-sql
|
||||
- *authenticate
|
||||
- run:
|
||||
name: Deploy changes to stage
|
||||
command: |
|
||||
if [ "<< pipeline.parameters.skip-stage-deploys >>" = "false" ]; then
|
||||
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
|
||||
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
|
||||
|
||||
PATHS="$(git diff --no-index --name-only --diff-filter=d generated-sql/sql sql)" || true
|
||||
echo $PATHS
|
||||
PATH="venv/bin:$PATH" script/bqetl stage deploy \
|
||||
|
@ -701,10 +710,11 @@ jobs:
|
|||
- when:
|
||||
condition: *deploy
|
||||
steps:
|
||||
- checkout
|
||||
- *skip_forked_pr
|
||||
- checkout
|
||||
- *restore_venv_cache
|
||||
- *build
|
||||
- *authenticate
|
||||
- add_ssh_keys:
|
||||
# deploy key to private-bigquery-etl
|
||||
fingerprints:
|
||||
|
@ -823,6 +833,7 @@ jobs:
|
|||
- when:
|
||||
condition: *validate-sql-or-routines
|
||||
steps:
|
||||
- *skip_forked_pr
|
||||
- checkout
|
||||
- run:
|
||||
name: Switch to main branch
|
||||
|
@ -834,6 +845,7 @@ jobs:
|
|||
at: /tmp/workspace
|
||||
- *restore_venv_cache
|
||||
- *build
|
||||
- *authenticate
|
||||
- run:
|
||||
name: Generate SQL content
|
||||
command: |
|
||||
|
@ -973,15 +985,13 @@ jobs:
|
|||
- when:
|
||||
condition: *validate-sql-or-routines
|
||||
steps:
|
||||
- checkout
|
||||
- *skip_forked_pr
|
||||
- checkout
|
||||
- *build
|
||||
- *authenticate
|
||||
- run:
|
||||
name: "Delete stage datasets"
|
||||
command: |
|
||||
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
|
||||
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
|
||||
|
||||
PATH="venv/bin:$PATH" script/bqetl stage clean --dataset-suffix=$CIRCLE_SHA1 --delete-expired
|
||||
- unless:
|
||||
condition: *validate-sql-or-routines
|
||||
|
|
|
@ -19,7 +19,7 @@ For more information, see [https://mozilla.github.io/bigquery-etl/](https://mozi
|
|||
|
||||
### GCP CLI tools
|
||||
|
||||
- **For Mozilla Employees or Contributors (not in Data Engineering)** - Set up GCP command line tools, [as described on docs.telemetry.mozilla.org](https://docs.telemetry.mozilla.org/cookbooks/bigquery/access.html#using-the-bq-command-line-tool). Note that some functionality (e.g. writing UDFs or backfilling queries) may not be allowed.
|
||||
- **For Mozilla Employees (not in Data Engineering)** - Set up GCP command line tools, [as described on docs.telemetry.mozilla.org](https://docs.telemetry.mozilla.org/cookbooks/bigquery/access.html#using-the-bq-command-line-tool). Note that some functionality (e.g. writing UDFs or backfilling queries) may not be allowed. Run `gcloud auth login --update-adc` to authenticate against GCP.
|
||||
- **For Data Engineering** - In addition to setting up the command line tools, you will want to log in to `shared-prod` if making changes to production systems. Run `gcloud auth login --update-adc --project=moz-fx-data-shared-prod` (if you have not run it previously).
|
||||
|
||||
### Installing bqetl
|
||||
|
|
|
@ -189,7 +189,7 @@ def get_backfill_entries_to_initiate(
|
|||
bigquery.Client(project="")
|
||||
except DefaultCredentialsError:
|
||||
click.echo(
|
||||
"Authentication to GCP required. Run `gcloud auth login` "
|
||||
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
|
||||
"and check that the project is set correctly."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
|
|
@ -424,7 +424,7 @@ def complete(ctx, qualified_table_name, sql_dir, project_id):
|
|||
"""Complete backfill entry in backfill.yaml file(s)."""
|
||||
if not is_authenticated():
|
||||
click.echo(
|
||||
"Authentication to GCP required. Run `gcloud auth login` "
|
||||
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
|
||||
"and check that the project is set correctly."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
|
|
@ -199,7 +199,7 @@ def run(ctx, dataset, project_id, sql_dir, marker, dry_run):
|
|||
"""Run a check."""
|
||||
if not is_authenticated():
|
||||
click.echo(
|
||||
"Authentication to GCP required. Run `gcloud auth login` "
|
||||
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
|
||||
"and check that the project is set correctly."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
|
|
@ -97,7 +97,9 @@ def dryrun(
|
|||
sys.exit(0)
|
||||
|
||||
if not use_cloud_function and not is_authenticated():
|
||||
click.echo("Not authenticated to GCP. Run `gcloud auth login` to login.")
|
||||
click.echo(
|
||||
"Not authenticated to GCP. Run `gcloud auth login --update-adc` to login."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
sql_file_valid = partial(
|
||||
|
|
|
@ -731,7 +731,7 @@ def backfill(
|
|||
"""Run a backfill."""
|
||||
if not is_authenticated():
|
||||
click.echo(
|
||||
"Authentication to GCP required. Run `gcloud auth login` "
|
||||
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
|
||||
"and check that the project is set correctly."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
@ -906,7 +906,7 @@ def run(
|
|||
"""Run a query."""
|
||||
if not is_authenticated():
|
||||
click.echo(
|
||||
"Authentication to GCP required. Run `gcloud auth login` "
|
||||
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
|
||||
"and check that the project is set correctly."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
@ -1667,7 +1667,7 @@ def update(
|
|||
"""CLI command for generating the query schema."""
|
||||
if not is_authenticated():
|
||||
click.echo(
|
||||
"Authentication to GCP required. Run `gcloud auth login` "
|
||||
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
|
||||
"and check that the project is set correctly."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
@ -1752,7 +1752,7 @@ def _update_query_schema_with_downstream(
|
|||
if not is_authenticated():
|
||||
click.echo(
|
||||
"Cannot update downstream dependencies."
|
||||
"Authentication to GCP required. Run `gcloud auth login` "
|
||||
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
|
||||
"and check that the project is set correctly."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
@ -2052,7 +2052,7 @@ def deploy(
|
|||
"""CLI command for deploying destination table schemas."""
|
||||
if not is_authenticated():
|
||||
click.echo(
|
||||
"Authentication to GCP required. Run `gcloud auth login` "
|
||||
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
|
||||
"and check that the project is set correctly."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
|
|
@ -14,6 +14,7 @@ proxy the queries through the dry run service endpoint.
|
|||
import glob
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from enum import Enum
|
||||
from os.path import basename, dirname, exists
|
||||
from pathlib import Path
|
||||
|
@ -21,7 +22,10 @@ from typing import Optional, Set
|
|||
from urllib.request import Request, urlopen
|
||||
|
||||
import click
|
||||
import google.auth
|
||||
from google.auth.transport.requests import Request as GoogleAuthRequest
|
||||
from google.cloud import bigquery
|
||||
from google.oauth2.id_token import fetch_id_token
|
||||
|
||||
from .config import ConfigLoader
|
||||
from .metadata.parse_metadata import Metadata
|
||||
|
@ -69,6 +73,15 @@ class DryRun:
|
|||
except FileNotFoundError:
|
||||
self.metadata = None
|
||||
|
||||
from bigquery_etl.cli.utils import is_authenticated
|
||||
|
||||
if not is_authenticated():
|
||||
print(
|
||||
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
|
||||
"and check that the project is set correctly."
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
@staticmethod
|
||||
def skipped_files(sql_dir=ConfigLoader.get("default", "sql_dir")) -> Set[str]:
|
||||
"""Return files skipped by dry run."""
|
||||
|
@ -160,10 +173,26 @@ class DryRun:
|
|||
dataset = basename(dirname(dirname(self.sqlfile)))
|
||||
try:
|
||||
if self.use_cloud_function:
|
||||
auth_req = GoogleAuthRequest()
|
||||
creds, _ = google.auth.default(
|
||||
scopes=["https://www.googleapis.com/auth/cloud-platform"]
|
||||
)
|
||||
creds.refresh(auth_req)
|
||||
if hasattr(creds, "id_token"):
|
||||
# Get token from default credentials for the current environment created via Cloud SDK run
|
||||
id_token = creds.id_token
|
||||
else:
|
||||
# If the environment variable GOOGLE_APPLICATION_CREDENTIALS is set to service account JSON file,
|
||||
# then ID token is acquired using this service account credentials.
|
||||
id_token = fetch_id_token(auth_req, self.dry_run_url)
|
||||
|
||||
r = urlopen(
|
||||
Request(
|
||||
self.dry_run_url,
|
||||
headers={"Content-Type": "application/json"},
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {id_token}",
|
||||
},
|
||||
data=json.dumps(
|
||||
{
|
||||
"dataset": dataset,
|
||||
|
|
|
@ -197,6 +197,8 @@ dry_run:
|
|||
- sql/moz-fx-data-shared-prod/org_mozilla_tiktokreporter/**/*.sql
|
||||
- sql/moz-fx-data-shared-prod/org_mozilla_ios_tiktok_reporter_tiktok_reportershare/**/*.sql
|
||||
- sql/moz-fx-data-shared-prod/org_mozilla_ios_tiktok_reporter/**/*.sql
|
||||
- sql/moz-fx-data-shared-prod/fenix_derived/ltv_state_values_v1/query.sql
|
||||
- sql/moz-fx-data-shared-prod/fenix_derived/ltv_state_values_v2/query.sql
|
||||
# Materialized views
|
||||
- sql/moz-fx-data-shared-prod/telemetry_derived/experiment_search_events_live_v1/init.sql
|
||||
- sql/moz-fx-data-shared-prod/telemetry_derived/experiment_events_live_v1/init.sql
|
||||
|
|
Загрузка…
Ссылка в новой задаче