Metadata publish improvements and update clients_daily_v6 metadata
This commit is contained in:
Родитель
165fe50cc8
Коммит
b31fbe3497
|
@ -24,9 +24,6 @@ from bigquery_etl.parse_udf import (
|
|||
) # noqa E402
|
||||
|
||||
|
||||
METADATA_FILE = "metadata.yaml"
|
||||
|
||||
|
||||
parser = ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--destination",
|
||||
|
@ -69,12 +66,6 @@ def main():
|
|||
with open(os.path.join(basename, filename), "a+") as output_file:
|
||||
output_file.write(full_text)
|
||||
|
||||
if METADATA_FILE in files:
|
||||
shutil.copyfile(
|
||||
os.path.join(root, METADATA_FILE),
|
||||
os.path.join(basename, METADATA_FILE),
|
||||
)
|
||||
|
||||
if os.path.exists(args.destination):
|
||||
shutil.rmtree(args.destination, ignore_errors=True)
|
||||
shutil.copytree(d, args.destination)
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
"""Update metadata of BigQuery tables."""
|
||||
"""Update metadata of BigQuery tables and views."""
|
||||
|
||||
from argparse import ArgumentParser
|
||||
import logging
|
||||
import os
|
||||
import yaml
|
||||
import re
|
||||
|
||||
from google.cloud import bigquery
|
||||
|
||||
|
@ -13,6 +14,15 @@ from google.cloud import bigquery
|
|||
METADATA_FILE = "metadata.yaml"
|
||||
|
||||
|
||||
def is_valid_label(label):
|
||||
"""
|
||||
Check if a label has the right format:
|
||||
Only hyphens (-), underscores (_), lowercase characters, and numbers are allowed.
|
||||
International characters are allowed.
|
||||
"""
|
||||
return re.match(r"[\w\d_-]+", label)
|
||||
|
||||
|
||||
def publish_metadata(client, dataset, table, metadata_file):
|
||||
with open(metadata_file, "r") as yaml_stream:
|
||||
try:
|
||||
|
@ -27,10 +37,21 @@ def publish_metadata(client, dataset, table, metadata_file):
|
|||
table.description = metadata["description"]
|
||||
|
||||
if "labels" in metadata:
|
||||
# only string type and lowercased labels are allowed
|
||||
table.labels = {
|
||||
k: str(v).lower() for k, v in metadata["labels"].items()
|
||||
}
|
||||
table.labels = {}
|
||||
|
||||
for key, label in metadata["labels"].items():
|
||||
if is_valid_label(str(key)) and is_valid_label(str(label)):
|
||||
table.labels[str(key)] = str(label)
|
||||
else:
|
||||
print(
|
||||
"""
|
||||
Invalid label format: {}: {}. Only hyphens (-),
|
||||
underscores (_), lowercase characters, and numbers
|
||||
are allowed. International characters are allowed.
|
||||
""".format(
|
||||
key, label
|
||||
)
|
||||
)
|
||||
|
||||
client.update_table(table, ["friendly_name", "description", "labels"])
|
||||
except yaml.YAMLError as e:
|
||||
|
@ -59,8 +80,9 @@ def main():
|
|||
if os.path.isdir(target):
|
||||
for root, dirs, files in os.walk(target):
|
||||
if METADATA_FILE in files:
|
||||
dataset = root.split("/")[-2]
|
||||
table = root.split("/")[-1]
|
||||
path = os.path.normpath(root)
|
||||
dataset = path.split(os.sep)[-2]
|
||||
table = path.split(os.sep)[-1]
|
||||
metadata_file = os.path.join(root, METADATA_FILE)
|
||||
|
||||
publish_metadata(client, dataset, table, metadata_file)
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
friendly_name: Clients Daily
|
||||
description: >
|
||||
A daily aggregate of each Firefox client,
|
||||
partitioned by day
|
||||
owners:
|
||||
- relud@mozilla.com
|
||||
labels:
|
||||
application: firefox
|
||||
refresh: daily
|
||||
version: 6
|
||||
incremental: true
|
|
@ -1,11 +1,9 @@
|
|||
friendly_name: Clients Daily
|
||||
description: >
|
||||
A daily aggregate of each Firefox client,
|
||||
A daily aggregate of main pings from each Firefox desktop client,
|
||||
partitioned by day
|
||||
owners:
|
||||
- relud@mozilla.com
|
||||
- dthorn@mozilla.com
|
||||
labels:
|
||||
application: firefox
|
||||
refresh: daily
|
||||
version: 6
|
||||
incremental: true
|
||||
schedule: daily
|
||||
|
|
Загрузка…
Ссылка в новой задаче