Support dataset READMEs and metadata, cosmetic changes (#2579)
This commit is contained in:
Родитель
2a20483099
Коммит
73d6256799
|
@ -1,14 +1,17 @@
|
||||||
"""Generate documentation for derived datasets."""
|
"""Generate documentation for derived datasets."""
|
||||||
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import yaml
|
|
||||||
from jinja2 import Environment, FileSystemLoader
|
from jinja2 import Environment, FileSystemLoader
|
||||||
|
|
||||||
from bigquery_etl.dependency import extract_table_references
|
from bigquery_etl.dependency import extract_table_references
|
||||||
|
from bigquery_etl.metadata.parse_metadata import DatasetMetadata, Metadata
|
||||||
|
|
||||||
|
logging.basicConfig(format="%(levelname)s (%(filename)s:%(lineno)d) - %(message)s")
|
||||||
|
|
||||||
VIEW_FILE = "view.sql"
|
VIEW_FILE = "view.sql"
|
||||||
METADATA_FILE = "metadata.yaml"
|
METADATA_FILE = "metadata.yaml"
|
||||||
|
DATASET_METADATA_FILE = "dataset_metadata.yaml"
|
||||||
README_FILE = "README.md"
|
README_FILE = "README.md"
|
||||||
NON_USER_FACING_DATASET_SUFFIXES = (
|
NON_USER_FACING_DATASET_SUFFIXES = (
|
||||||
"_derived",
|
"_derived",
|
||||||
|
@ -19,20 +22,23 @@ NON_USER_FACING_DATASET_SUFFIXES = (
|
||||||
SOURCE_URL = "https://github.com/mozilla/bigquery-etl/blob/generated-sql"
|
SOURCE_URL = "https://github.com/mozilla/bigquery-etl/blob/generated-sql"
|
||||||
|
|
||||||
|
|
||||||
def _get_metadata(table_path):
|
def _get_metadata(path, metadata_filename=METADATA_FILE):
|
||||||
metadata = {}
|
metadata_path = path / metadata_filename
|
||||||
metadata_file = table_path / METADATA_FILE
|
try:
|
||||||
if metadata_file.exists():
|
if metadata_filename == METADATA_FILE:
|
||||||
with open(metadata_file) as stream:
|
metadata = Metadata.from_file(metadata_path)
|
||||||
try:
|
return metadata
|
||||||
metadata = yaml.safe_load(stream)
|
elif metadata_filename == DATASET_METADATA_FILE:
|
||||||
except yaml.YAMLError as error:
|
metadata = DatasetMetadata.from_file(metadata_path)
|
||||||
print(error)
|
return metadata
|
||||||
return metadata
|
else:
|
||||||
|
raise Exception(f"Invalid metadata filename provided - {metadata_filename}")
|
||||||
|
except FileNotFoundError:
|
||||||
|
logging.warning(f"Metadata not found at {str(metadata_path)}")
|
||||||
|
|
||||||
|
|
||||||
def _get_readme_content(table_path):
|
def _get_readme_content(path):
|
||||||
readme_file = table_path / README_FILE
|
readme_file = path / README_FILE
|
||||||
if readme_file.exists():
|
if readme_file.exists():
|
||||||
return readme_file.read_text()
|
return readme_file.read_text()
|
||||||
|
|
||||||
|
@ -84,6 +90,7 @@ def _iter_table_markdown(table_paths, template):
|
||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
readme_content=readme_content,
|
readme_content=readme_content,
|
||||||
table_name=table_path.name,
|
table_name=table_path.name,
|
||||||
|
qualified_table_name=f"{table_path.parent.name}.{table_path.name}",
|
||||||
source_urls=source_urls,
|
source_urls=source_urls,
|
||||||
referenced_tables=referenced_tables,
|
referenced_tables=referenced_tables,
|
||||||
project_url=f"{SOURCE_URL}/sql",
|
project_url=f"{SOURCE_URL}/sql",
|
||||||
|
@ -115,12 +122,28 @@ def generate_derived_dataset_docs(out_dir, project_dir):
|
||||||
|
|
||||||
file_loader = FileSystemLoader("bigquery_etl/docs/derived_datasets/templates")
|
file_loader = FileSystemLoader("bigquery_etl/docs/derived_datasets/templates")
|
||||||
env = Environment(loader=file_loader)
|
env = Environment(loader=file_loader)
|
||||||
template = env.get_template("table.md")
|
table_template = env.get_template("table.md")
|
||||||
|
dataset_header_template = env.get_template("dataset_header.md")
|
||||||
|
|
||||||
|
dataset_metadata = _get_metadata(
|
||||||
|
dataset_path, metadata_filename=DATASET_METADATA_FILE
|
||||||
|
)
|
||||||
|
dataset_readme_content = _get_readme_content(dataset_path)
|
||||||
|
|
||||||
with open(output_path / f"{dataset_path.name}.md", "w") as dataset_doc:
|
with open(output_path / f"{dataset_path.name}.md", "w") as dataset_doc:
|
||||||
# Manually set title to prevent Mkdocs from removing
|
# In the template, we manually set title to prevent Mkdocs from removing
|
||||||
# underscores and capitalizing file names
|
# underscores and capitalizing file names
|
||||||
# https://github.com/mkdocs/mkdocs/issues/1915#issuecomment-561311801
|
# https://github.com/mkdocs/mkdocs/issues/1915#issuecomment-561311801
|
||||||
dataset_doc.write(f"---\ntitle: {dataset_path.name}\n---\n\n")
|
dataset_header = dataset_header_template.render(
|
||||||
|
title=dataset_metadata.friendly_name
|
||||||
|
if dataset_metadata
|
||||||
|
else dataset_path.name,
|
||||||
|
description=dataset_metadata.description if dataset_metadata else None,
|
||||||
|
readme_content=dataset_readme_content,
|
||||||
|
source_url=f"{SOURCE_URL}/{str(dataset_path)}",
|
||||||
|
)
|
||||||
|
|
||||||
dataset_doc.write("".join(_iter_table_markdown(table_paths, template)))
|
dataset_doc.write(dataset_header)
|
||||||
|
dataset_doc.write(
|
||||||
|
"".join(_iter_table_markdown(table_paths, table_template))
|
||||||
|
)
|
||||||
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
---
|
||||||
|
title: {{ title }}
|
||||||
|
---
|
||||||
|
{{ description or ""}}
|
||||||
|
{{ readme_content or ""}}
|
||||||
|
|
||||||
|
[Source Directory]({{ source_url }})
|
||||||
|
|
|
@ -1,11 +1,14 @@
|
||||||
## [{{ table_name }}](#{{ table_name }})
|
## [{{ table_name }}](#{{ table_name }})
|
||||||
|
|
||||||
{{ metadata.description | e }}
|
|
||||||
|
|
||||||
{% if metadata.friendly_name -%}
|
{% if metadata.friendly_name -%}
|
||||||
* Friendly name: {{metadata.friendly_name}}
|
**{{ metadata.friendly_name }}**
|
||||||
|
|
||||||
{% endif -%}
|
{% endif -%}
|
||||||
|
|
||||||
|
`{{ qualified_table_name }}`
|
||||||
|
|
||||||
|
{{ metadata.description | e }}
|
||||||
|
|
||||||
{% if metadata.labels -%}
|
{% if metadata.labels -%}
|
||||||
{% if metadata.labels.schedule -%}
|
{% if metadata.labels.schedule -%}
|
||||||
* Schedule: {{metadata.labels.schedule}}
|
* Schedule: {{metadata.labels.schedule}}
|
||||||
|
@ -21,11 +24,8 @@
|
||||||
{%- endfor %}
|
{%- endfor %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{% if readme_content -%}
|
{{ readme_content or "" }}
|
||||||
|
|
||||||
{{ readme_content }}
|
|
||||||
|
|
||||||
{% endif %}
|
|
||||||
{% if referenced_tables -%}
|
{% if referenced_tables -%}
|
||||||
<table>
|
<table>
|
||||||
<caption>Referenced Tables</caption>
|
<caption>Referenced Tables</caption>
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
Legacy UDFs
|
## Legacy UDFs
|
||||||
===
|
|
||||||
|
|
||||||
This directory contains compatibility functions for query migrations from Athena/Presto, and is named `udf_legacy` to discourage their ongoing use.
|
This directory contains compatibility functions for query migrations from Athena/Presto, and is named `udf_legacy` to discourage their ongoing use.
|
||||||
|
|
Загрузка…
Ссылка в новой задаче