2020-05-21 21:54:36 +03:00
|
|
|
#!/bin/bash
|
|
|
|
|
2022-04-28 12:06:12 +03:00
|
|
|
set -ex
|
2020-05-21 21:54:36 +03:00
|
|
|
|
|
|
|
function write_scalars {
|
|
|
|
local product=$1
|
|
|
|
local dataset=$2
|
|
|
|
local table=$3
|
2020-10-22 21:40:52 +03:00
|
|
|
local dst_project=$4
|
2021-07-23 00:13:59 +03:00
|
|
|
local sql_dir=$5
|
|
|
|
local directory="${sql_dir}/${dst_project}/glam_etl/${product}__clients_daily_scalar_aggregates_${table}"
|
2020-05-21 21:54:36 +03:00
|
|
|
mkdir -p "$directory"
|
|
|
|
if ! python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates \
|
|
|
|
--source-table "$dataset.$table" \
|
2022-05-11 01:33:35 +03:00
|
|
|
--product "$product" \
|
2020-05-21 21:54:36 +03:00
|
|
|
> "$directory/query.sql"; then
|
|
|
|
echo "skipping $directory/query.sql: no probes found"
|
|
|
|
rm -r "$directory"
|
|
|
|
else
|
|
|
|
echo "generated $directory/query.sql"
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
function write_histograms {
|
|
|
|
local product=$1
|
|
|
|
local dataset=$2
|
|
|
|
local table=$3
|
2020-10-22 21:40:52 +03:00
|
|
|
local dst_project=$4
|
2021-07-23 00:13:59 +03:00
|
|
|
local sql_dir=$5
|
|
|
|
local directory="${sql_dir}/${dst_project}/glam_etl/${product}__clients_daily_histogram_aggregates_${table}"
|
2020-05-21 21:54:36 +03:00
|
|
|
mkdir -p "$directory"
|
|
|
|
if ! python3 -m bigquery_etl.glam.clients_daily_histogram_aggregates \
|
|
|
|
--source-table "$dataset.$table" \
|
2022-05-11 01:33:35 +03:00
|
|
|
--product "$product" \
|
2020-05-21 21:54:36 +03:00
|
|
|
> "$directory/query.sql"; then
|
|
|
|
echo "skipping $directory/query.sql: no probes found"
|
|
|
|
rm -r "$directory"
|
|
|
|
else
|
|
|
|
echo "generated $directory/query.sql"
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
|
|
|
function write_clients_daily_aggregates {
|
|
|
|
local product=$1
|
2020-10-22 21:40:52 +03:00
|
|
|
local src_project=$2
|
|
|
|
local dst_project=$3
|
2021-07-23 00:13:59 +03:00
|
|
|
local sql_dir=$4
|
2020-05-21 21:54:36 +03:00
|
|
|
|
|
|
|
local dataset="${product}_stable"
|
2020-10-22 21:40:52 +03:00
|
|
|
local qualified="$src_project:$dataset"
|
2020-05-21 21:54:36 +03:00
|
|
|
# validate inputs with set -e, however note that this will fail silently
|
|
|
|
if ! bq ls "$qualified" &> /dev/null; then
|
|
|
|
echo "could not list $qualified"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
if ! bq show "$qualified.baseline_v1" &> /dev/null; then
|
|
|
|
echo "could not find $qualified.baseline_v1"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
# e.g. baseline_v1
|
|
|
|
local tables;
|
2024-03-19 18:14:21 +03:00
|
|
|
# GLAM only supports tables with glean_ping_* schema.
|
|
|
|
# Also excluding use_counters because they are not supported
|
|
|
|
# and their generated query is too big to run
|
2024-06-14 03:38:41 +03:00
|
|
|
tables=$(bq ls --format=json "$qualified" | \
|
|
|
|
jq -r '.[] |
|
|
|
|
select(.labels.schema_id | test("^glean_ping_[0-9]+")) |
|
|
|
|
select(.type == "TABLE") |
|
|
|
|
select(.tableReference.tableId | test("^use_counters.*") | not) |
|
|
|
|
"\(.tableReference.tableId)"')
|
2020-05-21 21:54:36 +03:00
|
|
|
# generate all of the schemas in parallel
|
|
|
|
for table in $tables; do
|
2021-07-23 00:13:59 +03:00
|
|
|
write_scalars "$product" "$dataset" "$table" "$dst_project" "$sql_dir" &
|
|
|
|
write_histograms "$product" "$dataset" "$table" "$dst_project" "$sql_dir" &
|
2020-05-21 21:54:36 +03:00
|
|
|
done
|
|
|
|
|
|
|
|
# wait for all of the processes before continuing
|
|
|
|
wait
|
|
|
|
}
|
|
|
|
|
2020-08-18 01:05:15 +03:00
|
|
|
cd "$(dirname "$0")/../.."
|
|
|
|
error="STAGE must be one of (daily, incremental, all)"
|
2020-10-22 21:40:52 +03:00
|
|
|
# The project for generating the clients daily tables
|
|
|
|
src_project=${SRC_PROJECT:-moz-fx-data-shared-prod}
|
|
|
|
# We may also define the PROJECT as the destination project for backwards
|
|
|
|
# compatibility.
|
|
|
|
dst_project=${DST_PROJECT:-${PROJECT:-glam-fenix-dev}}
|
2021-07-23 00:13:59 +03:00
|
|
|
sql_dir=${SQL_DIR:-sql}
|
|
|
|
# also remove trailing slash
|
|
|
|
sql_dir=${sql_dir%/}
|
|
|
|
|
2020-08-18 01:05:15 +03:00
|
|
|
product=${PRODUCT?PRODUCT must be defined}
|
|
|
|
stage=${STAGE?$error}
|
2020-05-21 21:54:36 +03:00
|
|
|
|
2020-12-09 21:37:20 +03:00
|
|
|
# ensure the sql directory exists
|
2021-07-23 00:13:59 +03:00
|
|
|
mkdir -p $sql_dir/$dst_project/glam_etl
|
2020-12-09 21:37:20 +03:00
|
|
|
|
2020-08-18 01:05:15 +03:00
|
|
|
if [[ $stage == "daily" ]]; then
|
2021-07-23 00:13:59 +03:00
|
|
|
write_clients_daily_aggregates "$product" "$src_project" "$dst_project" "$sql_dir"
|
|
|
|
python3 -m bigquery_etl.glam.generate \
|
|
|
|
--sql-root "$sql_dir" \
|
|
|
|
--project "$dst_project" \
|
|
|
|
--prefix "${product}" \
|
|
|
|
--daily-view-only
|
2020-08-18 01:05:15 +03:00
|
|
|
elif [[ $stage == "incremental" ]]; then
|
2021-07-23 00:13:59 +03:00
|
|
|
python3 -m bigquery_etl.glam.generate \
|
|
|
|
--sql-root "$sql_dir" \
|
|
|
|
--project "$dst_project" \
|
|
|
|
--prefix "${product}"
|
2020-08-18 01:05:15 +03:00
|
|
|
elif [[ $stage == "all" ]]; then
|
2021-07-23 00:13:59 +03:00
|
|
|
write_clients_daily_aggregates "$product" "$src_project" "$dst_project" "$sql_dir"
|
|
|
|
python3 -m bigquery_etl.glam.generate \
|
|
|
|
--sql-root "$sql_dir" \
|
|
|
|
--project "$dst_project" \
|
|
|
|
--prefix "${product}"
|
2020-08-18 01:05:15 +03:00
|
|
|
else
|
|
|
|
echo "$error"
|
|
|
|
exit 1
|
|
|
|
fi
|