Metadata publish improvements and update clients_daily_v6 metadata

This commit is contained in:
Anna Scholtz 2020-01-17 15:46:47 -08:00
Родитель 165fe50cc8
Коммит b31fbe3497
4 изменённых файлов: 32 добавлений и 32 удалений

Просмотреть файл

@ -24,9 +24,6 @@ from bigquery_etl.parse_udf import (
) # noqa E402
METADATA_FILE = "metadata.yaml"
parser = ArgumentParser(description=__doc__)
parser.add_argument(
"--destination",
@ -69,12 +66,6 @@ def main():
with open(os.path.join(basename, filename), "a+") as output_file:
output_file.write(full_text)
if METADATA_FILE in files:
shutil.copyfile(
os.path.join(root, METADATA_FILE),
os.path.join(basename, METADATA_FILE),
)
if os.path.exists(args.destination):
shutil.rmtree(args.destination, ignore_errors=True)
shutil.copytree(d, args.destination)

Просмотреть файл

@ -1,11 +1,12 @@
#!/usr/bin/env python3
"""Update metadata of BigQuery tables."""
"""Update metadata of BigQuery tables and views."""
from argparse import ArgumentParser
import logging
import os
import yaml
import re
from google.cloud import bigquery
@ -13,6 +14,15 @@ from google.cloud import bigquery
METADATA_FILE = "metadata.yaml"
def is_valid_label(label):
"""
Check if a label has the right format:
Only hyphens (-), underscores (_), lowercase characters, and numbers are allowed.
International characters are allowed.
"""
return re.match(r"[\w\d_-]+", label)
def publish_metadata(client, dataset, table, metadata_file):
with open(metadata_file, "r") as yaml_stream:
try:
@ -27,10 +37,21 @@ def publish_metadata(client, dataset, table, metadata_file):
table.description = metadata["description"]
if "labels" in metadata:
# only string type and lowercased labels are allowed
table.labels = {
k: str(v).lower() for k, v in metadata["labels"].items()
}
table.labels = {}
for key, label in metadata["labels"].items():
if is_valid_label(str(key)) and is_valid_label(str(label)):
table.labels[str(key)] = str(label)
else:
print(
"""
Invalid label format: {}: {}. Only hyphens (-),
underscores (_), lowercase characters, and numbers
are allowed. International characters are allowed.
""".format(
key, label
)
)
client.update_table(table, ["friendly_name", "description", "labels"])
except yaml.YAMLError as e:
@ -59,8 +80,9 @@ def main():
if os.path.isdir(target):
for root, dirs, files in os.walk(target):
if METADATA_FILE in files:
dataset = root.split("/")[-2]
table = root.split("/")[-1]
path = os.path.normpath(root)
dataset = path.split(os.sep)[-2]
table = path.split(os.sep)[-1]
metadata_file = os.path.join(root, METADATA_FILE)
publish_metadata(client, dataset, table, metadata_file)

Просмотреть файл

@ -1,11 +0,0 @@
friendly_name: Clients Daily
description: >
A daily aggregate of each Firefox client,
partitioned by day
owners:
- relud@mozilla.com
labels:
application: firefox
refresh: daily
version: 6
incremental: true

Просмотреть файл

@ -1,11 +1,9 @@
friendly_name: Clients Daily
description: >
A daily aggregate of each Firefox client,
A daily aggregate of main pings from each Firefox desktop client,
partitioned by day
owners:
- relud@mozilla.com
- dthorn@mozilla.com
labels:
application: firefox
refresh: daily
version: 6
incremental: true
schedule: daily