Script for updating metadata of table

This commit is contained in:
Anna Scholtz 2020-01-16 16:06:24 -08:00
Родитель 47f77b7c62
Коммит 165fe50cc8
2 изменённых файлов: 83 добавлений и 1 удалений

Просмотреть файл

@ -70,7 +70,10 @@ def main():
output_file.write(full_text)
if METADATA_FILE in files:
shutil.copyfile(os.path.join(root, METADATA_FILE), os.path.join(basename, METADATA_FILE))
shutil.copyfile(
os.path.join(root, METADATA_FILE),
os.path.join(basename, METADATA_FILE),
)
if os.path.exists(args.destination):
shutil.rmtree(args.destination, ignore_errors=True)

79
script/publish_metadata Executable file
Просмотреть файл

@ -0,0 +1,79 @@
#!/usr/bin/env python3
"""Update metadata of BigQuery tables."""
from argparse import ArgumentParser
import logging
import os
import yaml
from google.cloud import bigquery
METADATA_FILE = "metadata.yaml"
def publish_metadata(client, dataset, table, metadata_file):
with open(metadata_file, "r") as yaml_stream:
try:
metadata = yaml.safe_load(yaml_stream)
table_ref = client.dataset(dataset).table(table)
table = client.get_table(table_ref)
if "friendly_name" in metadata:
table.friendly_name = metadata["friendly_name"]
if "description" in metadata:
table.description = metadata["description"]
if "labels" in metadata:
# only string type and lowercased labels are allowed
table.labels = {
k: str(v).lower() for k, v in metadata["labels"].items()
}
client.update_table(table, ["friendly_name", "description", "labels"])
except yaml.YAMLError as e:
print(e)
def main():
parser = ArgumentParser(description=__doc__)
parser.add_argument("--project-id", help="Default project")
parser.add_argument(
"--target", nargs="+", help="File or directory containing metadata files"
)
parser.add_argument("--log-level", default="INFO", help="Defaults to INFO")
args = parser.parse_args()
client = bigquery.Client(args.project_id)
# set log level
try:
logging.basicConfig(level=args.log_level, format="%(levelname)s %(message)s")
except ValueError as e:
parser.error(f"argument --log-level: {e}")
for target in args.target:
if os.path.isdir(target):
for root, dirs, files in os.walk(target):
if METADATA_FILE in files:
dataset = root.split("/")[-2]
table = root.split("/")[-1]
metadata_file = os.path.join(root, METADATA_FILE)
publish_metadata(client, dataset, table, metadata_file)
else:
print(
"""
Invalid target: {}, target must be a directory with
structure /<dataset>/<table>/metadata.yaml.
""".format(
args.target
)
)
if __name__ == "__main__":
main()