Require authentication for dry run function and run gcloud auth when … (#5171)

* Require authentication for dry run function and run gcloud auth when not logged in

* authenticate step in CI, remove interactive gcloud auth

* Skip dryrun for ltv_state_values_v2

* Refactor skip_fork in CI, clarify login requirements
This commit is contained in:
Anna Scholtz 2024-03-06 15:06:29 -08:00 коммит произвёл GitHub
Родитель b2997d932e
Коммит 70a355a0dd
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
9 изменённых файлов: 85 добавлений и 42 удалений

Просмотреть файл

@ -119,9 +119,25 @@ jobs:
- << pipeline.parameters.validate-bqetl >>
- << pipeline.parameters.deploy >>
steps:
- &skip_forked_pr
run:
name: Early return if this build is from a forked PR
command: |
if [ -n "$CIRCLE_PR_NUMBER" ]; then
echo "Cannot pass creds to forked PRs," \
"so marking this step successful"
circleci-agent step halt
fi
- checkout
- *restore_venv_cache
- *build
- &authenticate
run:
name: Authenticate to GCP
command: |
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
echo 'export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"' >> "$BASH_ENV"
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
- run:
name: PyTest with linters
# integration tests are run in a separate `integration` step;
@ -143,6 +159,7 @@ jobs:
- when:
condition: *validate-sql-or-routines
steps:
- *skip_forked_pr
- checkout
- *restore_venv_cache
- *build
@ -160,12 +177,7 @@ jobs:
- run:
name: Run SQL tests
command: |
if [ -n "$CIRCLE_PR_NUMBER" ]; then
echo "Cannot pass creds to forked PRs," \
"so skipping routine and SQL tests"
else
PATH="venv/bin:$PATH" script/entrypoint -m sql -n 8
fi
- unless:
condition: *validate-sql-or-routines
steps:
@ -176,11 +188,13 @@ jobs:
- when:
condition: *validate-sql-or-routines
steps:
- *skip_forked_pr
- checkout
- *restore_venv_cache
- *build
- *attach_generated_sql
- *copy_staged_sql
- *authenticate
- run:
name: Dry run queries
# yamllint disable rule:line-length
@ -238,11 +252,13 @@ jobs:
- when:
condition: *validate-sql
steps:
- *skip_forked_pr
- checkout
- *restore_venv_cache
- *build
- *attach_generated_sql
- *copy_staged_sql
- *authenticate
- run:
name: Verify that metadata files are valid
command: |
@ -259,16 +275,8 @@ jobs:
- when:
condition: *validate-bqetl
steps:
- *skip_forked_pr
- checkout
- &skip_forked_pr
run:
name: Early return if this build is from a forked PR
command: |
if [ -n "$CIRCLE_PR_NUMBER" ]; then
echo "Cannot pass creds to forked PRs," \
"so marking this step successful"
circleci-agent step halt
fi
- *restore_venv_cache
- *build
- run:
@ -288,6 +296,7 @@ jobs:
- when:
condition: *validate-sql
steps:
- *skip_forked_pr
- checkout
- *restore_venv_cache
- *build
@ -298,6 +307,7 @@ jobs:
command: |
rm -rf sql/
cp -r /tmp/workspace/generated-sql/sql sql
- *authenticate
- run:
name: Generate DAGs
command: |
@ -378,6 +388,7 @@ jobs:
- << pipeline.parameters.validate-routines >>
- << pipeline.parameters.deploy >>
steps:
- *skip_forked_pr
- checkout
- *restore_venv_cache
- *build
@ -386,12 +397,7 @@ jobs:
- run:
name: Run routine tests
command: |
if [ -n "$CIRCLE_PR_NUMBER" ]; then
echo "Cannot pass creds to forked PRs," \
"so skipping routine tests"
else
PATH="venv/bin:$PATH" script/entrypoint -m routine -n 8
fi
PATH="venv/bin:$PATH" script/entrypoint -m routine -n 8
- run:
name: Validate doc examples
command: |
@ -406,14 +412,17 @@ jobs:
- when:
condition: *validate-sql-or-routines
steps:
- *skip_forked_pr
- checkout
- *restore_venv_cache
- *build
- *attach_generated_sql
- *copy_staged_sql
- *authenticate
- run:
name: Validate views
command: PATH="venv/bin:$PATH" script/bqetl view validate
command: |
PATH="venv/bin:$PATH" script/bqetl view validate
- unless:
condition: *validate-sql-or-routines
steps:
@ -424,8 +433,8 @@ jobs:
- when:
condition: *validate-sql-or-routines
steps:
- checkout
- *skip_forked_pr
- checkout
- *restore_venv_cache
- *build
- *attach_generated_sql
@ -452,9 +461,11 @@ jobs:
- when:
condition: *validate-sql-or-routines
steps:
- *skip_forked_pr
- checkout
- *restore_venv_cache
- *build
- *authenticate
- run:
name: Generate SQL content
command: |
@ -546,8 +557,8 @@ jobs:
- when:
condition: *validate-sql-or-routines
steps:
- checkout
- *skip_forked_pr
- checkout
- *restore_venv_cache
- *build
- *attach_generated_sql
@ -562,13 +573,11 @@ jobs:
git clone --single-branch --branch generated-sql \
git@github.com:mozilla/bigquery-etl \
generated-sql
- *authenticate
- run:
name: Deploy changes to stage
command: |
if [ "<< pipeline.parameters.skip-stage-deploys >>" = "false" ]; then
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
PATHS="$(git diff --no-index --name-only --diff-filter=d generated-sql/sql sql)" || true
echo $PATHS
PATH="venv/bin:$PATH" script/bqetl stage deploy \
@ -701,10 +710,11 @@ jobs:
- when:
condition: *deploy
steps:
- checkout
- *skip_forked_pr
- checkout
- *restore_venv_cache
- *build
- *authenticate
- add_ssh_keys:
# deploy key to private-bigquery-etl
fingerprints:
@ -823,6 +833,7 @@ jobs:
- when:
condition: *validate-sql-or-routines
steps:
- *skip_forked_pr
- checkout
- run:
name: Switch to main branch
@ -834,6 +845,7 @@ jobs:
at: /tmp/workspace
- *restore_venv_cache
- *build
- *authenticate
- run:
name: Generate SQL content
command: |
@ -973,15 +985,13 @@ jobs:
- when:
condition: *validate-sql-or-routines
steps:
- checkout
- *skip_forked_pr
- checkout
- *build
- *authenticate
- run:
name: "Delete stage datasets"
command: |
export GOOGLE_APPLICATION_CREDENTIALS="/tmp/gcp.json"
echo "$GCLOUD_SERVICE_KEY" > "$GOOGLE_APPLICATION_CREDENTIALS"
PATH="venv/bin:$PATH" script/bqetl stage clean --dataset-suffix=$CIRCLE_SHA1 --delete-expired
- unless:
condition: *validate-sql-or-routines

Просмотреть файл

@ -19,7 +19,7 @@ For more information, see [https://mozilla.github.io/bigquery-etl/](https://mozi
### GCP CLI tools
- **For Mozilla Employees or Contributors (not in Data Engineering)** - Set up GCP command line tools, [as described on docs.telemetry.mozilla.org](https://docs.telemetry.mozilla.org/cookbooks/bigquery/access.html#using-the-bq-command-line-tool). Note that some functionality (e.g. writing UDFs or backfilling queries) may not be allowed.
- **For Mozilla Employees (not in Data Engineering)** - Set up GCP command line tools, [as described on docs.telemetry.mozilla.org](https://docs.telemetry.mozilla.org/cookbooks/bigquery/access.html#using-the-bq-command-line-tool). Note that some functionality (e.g. writing UDFs or backfilling queries) may not be allowed. Run `gcloud auth login --update-adc` to authenticate against GCP.
- **For Data Engineering** - In addition to setting up the command line tools, you will want to log in to `shared-prod` if making changes to production systems. Run `gcloud auth login --update-adc --project=moz-fx-data-shared-prod` (if you have not run it previously).
### Installing bqetl

Просмотреть файл

@ -189,7 +189,7 @@ def get_backfill_entries_to_initiate(
bigquery.Client(project="")
except DefaultCredentialsError:
click.echo(
"Authentication to GCP required. Run `gcloud auth login` "
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
"and check that the project is set correctly."
)
sys.exit(1)

Просмотреть файл

@ -424,7 +424,7 @@ def complete(ctx, qualified_table_name, sql_dir, project_id):
"""Complete backfill entry in backfill.yaml file(s)."""
if not is_authenticated():
click.echo(
"Authentication to GCP required. Run `gcloud auth login` "
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
"and check that the project is set correctly."
)
sys.exit(1)

Просмотреть файл

@ -199,7 +199,7 @@ def run(ctx, dataset, project_id, sql_dir, marker, dry_run):
"""Run a check."""
if not is_authenticated():
click.echo(
"Authentication to GCP required. Run `gcloud auth login` "
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
"and check that the project is set correctly."
)
sys.exit(1)

Просмотреть файл

@ -97,7 +97,9 @@ def dryrun(
sys.exit(0)
if not use_cloud_function and not is_authenticated():
click.echo("Not authenticated to GCP. Run `gcloud auth login` to login.")
click.echo(
"Not authenticated to GCP. Run `gcloud auth login --update-adc` to login."
)
sys.exit(1)
sql_file_valid = partial(

Просмотреть файл

@ -731,7 +731,7 @@ def backfill(
"""Run a backfill."""
if not is_authenticated():
click.echo(
"Authentication to GCP required. Run `gcloud auth login` "
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
"and check that the project is set correctly."
)
sys.exit(1)
@ -906,7 +906,7 @@ def run(
"""Run a query."""
if not is_authenticated():
click.echo(
"Authentication to GCP required. Run `gcloud auth login` "
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
"and check that the project is set correctly."
)
sys.exit(1)
@ -1667,7 +1667,7 @@ def update(
"""CLI command for generating the query schema."""
if not is_authenticated():
click.echo(
"Authentication to GCP required. Run `gcloud auth login` "
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
"and check that the project is set correctly."
)
sys.exit(1)
@ -1752,7 +1752,7 @@ def _update_query_schema_with_downstream(
if not is_authenticated():
click.echo(
"Cannot update downstream dependencies."
"Authentication to GCP required. Run `gcloud auth login` "
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
"and check that the project is set correctly."
)
sys.exit(1)
@ -2052,7 +2052,7 @@ def deploy(
"""CLI command for deploying destination table schemas."""
if not is_authenticated():
click.echo(
"Authentication to GCP required. Run `gcloud auth login` "
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
"and check that the project is set correctly."
)
sys.exit(1)

Просмотреть файл

@ -14,6 +14,7 @@ proxy the queries through the dry run service endpoint.
import glob
import json
import re
import sys
from enum import Enum
from os.path import basename, dirname, exists
from pathlib import Path
@ -21,7 +22,10 @@ from typing import Optional, Set
from urllib.request import Request, urlopen
import click
import google.auth
from google.auth.transport.requests import Request as GoogleAuthRequest
from google.cloud import bigquery
from google.oauth2.id_token import fetch_id_token
from .config import ConfigLoader
from .metadata.parse_metadata import Metadata
@ -69,6 +73,15 @@ class DryRun:
except FileNotFoundError:
self.metadata = None
from bigquery_etl.cli.utils import is_authenticated
if not is_authenticated():
print(
"Authentication to GCP required. Run `gcloud auth login --update-adc` "
"and check that the project is set correctly."
)
sys.exit(1)
@staticmethod
def skipped_files(sql_dir=ConfigLoader.get("default", "sql_dir")) -> Set[str]:
"""Return files skipped by dry run."""
@ -160,10 +173,26 @@ class DryRun:
dataset = basename(dirname(dirname(self.sqlfile)))
try:
if self.use_cloud_function:
auth_req = GoogleAuthRequest()
creds, _ = google.auth.default(
scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
creds.refresh(auth_req)
if hasattr(creds, "id_token"):
# Get token from default credentials for the current environment created via Cloud SDK run
id_token = creds.id_token
else:
# If the environment variable GOOGLE_APPLICATION_CREDENTIALS is set to service account JSON file,
# then ID token is acquired using this service account credentials.
id_token = fetch_id_token(auth_req, self.dry_run_url)
r = urlopen(
Request(
self.dry_run_url,
headers={"Content-Type": "application/json"},
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {id_token}",
},
data=json.dumps(
{
"dataset": dataset,

Просмотреть файл

@ -197,6 +197,8 @@ dry_run:
- sql/moz-fx-data-shared-prod/org_mozilla_tiktokreporter/**/*.sql
- sql/moz-fx-data-shared-prod/org_mozilla_ios_tiktok_reporter_tiktok_reportershare/**/*.sql
- sql/moz-fx-data-shared-prod/org_mozilla_ios_tiktok_reporter/**/*.sql
- sql/moz-fx-data-shared-prod/fenix_derived/ltv_state_values_v1/query.sql
- sql/moz-fx-data-shared-prod/fenix_derived/ltv_state_values_v2/query.sql
# Materialized views
- sql/moz-fx-data-shared-prod/telemetry_derived/experiment_search_events_live_v1/init.sql
- sql/moz-fx-data-shared-prod/telemetry_derived/experiment_events_live_v1/init.sql