From b31fbe34977c39dd6c4b6cf340b10819269d58a5 Mon Sep 17 00:00:00 2001 From: Anna Scholtz Date: Fri, 17 Jan 2020 15:46:47 -0800 Subject: [PATCH] Metadata publish improvements and update clients_daily_v6 metadata --- script/generate_sql | 9 ----- script/publish_metadata | 36 +++++++++++++++---- .../clients_daily_v6/metadata.yaml | 11 ------ .../clients_daily_v6/metadata.yaml | 8 ++--- 4 files changed, 32 insertions(+), 32 deletions(-) delete mode 100644 sql/telemetry_derived/clients_daily_v6/metadata.yaml diff --git a/script/generate_sql b/script/generate_sql index e5d1262926..89f7bd658c 100755 --- a/script/generate_sql +++ b/script/generate_sql @@ -24,9 +24,6 @@ from bigquery_etl.parse_udf import ( ) # noqa E402 -METADATA_FILE = "metadata.yaml" - - parser = ArgumentParser(description=__doc__) parser.add_argument( "--destination", @@ -69,12 +66,6 @@ def main(): with open(os.path.join(basename, filename), "a+") as output_file: output_file.write(full_text) - if METADATA_FILE in files: - shutil.copyfile( - os.path.join(root, METADATA_FILE), - os.path.join(basename, METADATA_FILE), - ) - if os.path.exists(args.destination): shutil.rmtree(args.destination, ignore_errors=True) shutil.copytree(d, args.destination) diff --git a/script/publish_metadata b/script/publish_metadata index c7a528d16b..a415deef7c 100755 --- a/script/publish_metadata +++ b/script/publish_metadata @@ -1,11 +1,12 @@ #!/usr/bin/env python3 -"""Update metadata of BigQuery tables.""" +"""Update metadata of BigQuery tables and views.""" from argparse import ArgumentParser import logging import os import yaml +import re from google.cloud import bigquery @@ -13,6 +14,15 @@ from google.cloud import bigquery METADATA_FILE = "metadata.yaml" +def is_valid_label(label): + """ + Check if a label has the right format: + Only hyphens (-), underscores (_), lowercase characters, and numbers are allowed. + International characters are allowed. + """ + return re.match(r"[\w\d_-]+", label) + + def publish_metadata(client, dataset, table, metadata_file): with open(metadata_file, "r") as yaml_stream: try: @@ -27,10 +37,21 @@ def publish_metadata(client, dataset, table, metadata_file): table.description = metadata["description"] if "labels" in metadata: - # only string type and lowercased labels are allowed - table.labels = { - k: str(v).lower() for k, v in metadata["labels"].items() - } + table.labels = {} + + for key, label in metadata["labels"].items(): + if is_valid_label(str(key)) and is_valid_label(str(label)): + table.labels[str(key)] = str(label) + else: + print( + """ + Invalid label format: {}: {}. Only hyphens (-), + underscores (_), lowercase characters, and numbers + are allowed. International characters are allowed. + """.format( + key, label + ) + ) client.update_table(table, ["friendly_name", "description", "labels"]) except yaml.YAMLError as e: @@ -59,8 +80,9 @@ def main(): if os.path.isdir(target): for root, dirs, files in os.walk(target): if METADATA_FILE in files: - dataset = root.split("/")[-2] - table = root.split("/")[-1] + path = os.path.normpath(root) + dataset = path.split(os.sep)[-2] + table = path.split(os.sep)[-1] metadata_file = os.path.join(root, METADATA_FILE) publish_metadata(client, dataset, table, metadata_file) diff --git a/sql/telemetry_derived/clients_daily_v6/metadata.yaml b/sql/telemetry_derived/clients_daily_v6/metadata.yaml deleted file mode 100644 index f406fc97b3..0000000000 --- a/sql/telemetry_derived/clients_daily_v6/metadata.yaml +++ /dev/null @@ -1,11 +0,0 @@ -friendly_name: Clients Daily -description: > - A daily aggregate of each Firefox client, - partitioned by day -owners: - - relud@mozilla.com -labels: - application: firefox - refresh: daily - version: 6 - incremental: true diff --git a/templates/telemetry_derived/clients_daily_v6/metadata.yaml b/templates/telemetry_derived/clients_daily_v6/metadata.yaml index f406fc97b3..c5835b42a1 100644 --- a/templates/telemetry_derived/clients_daily_v6/metadata.yaml +++ b/templates/telemetry_derived/clients_daily_v6/metadata.yaml @@ -1,11 +1,9 @@ friendly_name: Clients Daily description: > - A daily aggregate of each Firefox client, + A daily aggregate of main pings from each Firefox desktop client, partitioned by day owners: - - relud@mozilla.com + - dthorn@mozilla.com labels: application: firefox - refresh: daily - version: 6 - incremental: true + schedule: daily