#!/bin/bash set -ex function write_scalars { local product=$1 local dataset=$2 local table=$3 local dst_project=$4 local sql_dir=$5 local directory="${sql_dir}/${dst_project}/glam_etl/${product}__clients_daily_scalar_aggregates_${table}" mkdir -p "$directory" if ! python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates \ --source-table "$dataset.$table" \ --product "$product" \ > "$directory/query.sql"; then echo "skipping $directory/query.sql: no probes found" rm -r "$directory" else echo "generated $directory/query.sql" fi } function write_histograms { local product=$1 local dataset=$2 local table=$3 local dst_project=$4 local sql_dir=$5 local directory="${sql_dir}/${dst_project}/glam_etl/${product}__clients_daily_histogram_aggregates_${table}" mkdir -p "$directory" if ! python3 -m bigquery_etl.glam.clients_daily_histogram_aggregates \ --source-table "$dataset.$table" \ --product "$product" \ > "$directory/query.sql"; then echo "skipping $directory/query.sql: no probes found" rm -r "$directory" else echo "generated $directory/query.sql" fi } function write_clients_daily_aggregates { local product=$1 local src_project=$2 local dst_project=$3 local sql_dir=$4 local dataset="${product}_stable" local qualified="$src_project:$dataset" # validate inputs with set -e, however note that this will fail silently if ! bq ls "$qualified" &> /dev/null; then echo "could not list $qualified" exit 1 fi if ! bq show "$qualified.baseline_v1" &> /dev/null; then echo "could not find $qualified.baseline_v1" exit 1 fi # e.g. baseline_v1 local tables; # GLAM only supports tables with glean_ping_* schema. # Also excluding use_counters because they are not supported # and their generated query is too big to run tables=$(bq ls --format=json "$qualified" | \ jq -r '.[] | select(.labels.schema_id | test("^glean_ping_[0-9]+")) | select(.type == "TABLE") | select(.tableReference.tableId | test("^use_counters.*") | not) | "\(.tableReference.tableId)"') # generate all of the schemas in parallel for table in $tables; do write_scalars "$product" "$dataset" "$table" "$dst_project" "$sql_dir" & write_histograms "$product" "$dataset" "$table" "$dst_project" "$sql_dir" & done # wait for all of the processes before continuing wait } cd "$(dirname "$0")/../.." error="STAGE must be one of (daily, incremental, all)" # The project for generating the clients daily tables src_project=${SRC_PROJECT:-moz-fx-data-shared-prod} # We may also define the PROJECT as the destination project for backwards # compatibility. dst_project=${DST_PROJECT:-${PROJECT:-glam-fenix-dev}} sql_dir=${SQL_DIR:-sql} # also remove trailing slash sql_dir=${sql_dir%/} product=${PRODUCT?PRODUCT must be defined} stage=${STAGE?$error} # ensure the sql directory exists mkdir -p $sql_dir/$dst_project/glam_etl if [[ $stage == "daily" ]]; then write_clients_daily_aggregates "$product" "$src_project" "$dst_project" "$sql_dir" python3 -m bigquery_etl.glam.generate \ --sql-root "$sql_dir" \ --project "$dst_project" \ --prefix "${product}" \ --daily-view-only elif [[ $stage == "incremental" ]]; then python3 -m bigquery_etl.glam.generate \ --sql-root "$sql_dir" \ --project "$dst_project" \ --prefix "${product}" elif [[ $stage == "all" ]]; then write_clients_daily_aggregates "$product" "$src_project" "$dst_project" "$sql_dir" python3 -m bigquery_etl.glam.generate \ --sql-root "$sql_dir" \ --project "$dst_project" \ --prefix "${product}" else echo "$error" exit 1 fi