Support shared-prod UDFs (#4708)
This commit is contained in:
Родитель
067d016fe1
Коммит
463dc15bf1
|
@ -63,7 +63,7 @@ log_level_option = click.option(
|
|||
def generate(project_dirs, docs_dir, output_dir, log_level):
|
||||
"""Generate documentation for project."""
|
||||
from bigquery_etl.docs.bqetl.generate_bqetl_docs import generate_bqetl_docs
|
||||
from bigquery_etl.docs.mozfun.generate_mozfun_docs import generate_mozfun_docs
|
||||
from bigquery_etl.docs.mozfun.generate_mozfun_docs import generate_udf_docs
|
||||
|
||||
out_dir = os.path.join(output_dir, "docs")
|
||||
|
||||
|
@ -86,14 +86,10 @@ def generate(project_dirs, docs_dir, output_dir, log_level):
|
|||
# generate bqetl command docs
|
||||
generate_bqetl_docs(Path(out_dir) / "bqetl.md")
|
||||
|
||||
# move files to docs/
|
||||
# generate docs
|
||||
for project_dir in project_dirs:
|
||||
if not os.path.isdir(project_dir):
|
||||
continue
|
||||
|
||||
if "mozfun" in project_dir:
|
||||
generate_mozfun_docs(out_dir, project_dir)
|
||||
else:
|
||||
generate_udf_docs(out_dir, project_dir)
|
||||
if "mozfun" not in project_dir:
|
||||
generate_derived_dataset_docs(out_dir, project_dir)
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
"""Generate documentation for mozfun."""
|
||||
import itertools
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
@ -42,82 +43,61 @@ def add_source_and_edit(source_url, edit_url):
|
|||
return f"[Source]({source_url}) | [Edit]({edit_url})"
|
||||
|
||||
|
||||
def generate_mozfun_docs(out_dir, project_dir):
|
||||
"""Generate documentation for mozfun."""
|
||||
for root, _dirs, files in os.walk(project_dir):
|
||||
if DOCS_FILE in files:
|
||||
# copy doc file to output and replace example references
|
||||
src = os.path.join(root, DOCS_FILE)
|
||||
# remove empty strings from path parts
|
||||
path_parts = list(filter(None, root.split(os.sep)))
|
||||
name = path_parts[-1]
|
||||
path = Path(os.sep.join(path_parts[1:-1]))
|
||||
if os.path.split(root)[1] == "":
|
||||
# project level-doc file
|
||||
project_doc_dir = out_dir / path / name
|
||||
project_doc_dir.mkdir(parents=True, exist_ok=True)
|
||||
dest = project_doc_dir / "about.md"
|
||||
dest.write_text(load_with_examples(src))
|
||||
else:
|
||||
description = None
|
||||
if METADATA_FILE in files:
|
||||
source_link = f"{ConfigLoader.get('docs', 'source_url')}/{root}"
|
||||
edit_link = (
|
||||
f"{ConfigLoader.get('docs', 'edit_url')}/{root}/{METADATA_FILE}"
|
||||
)
|
||||
def _gen_udf_content(udf_file_path: Path) -> str:
|
||||
"""Generate markdown documentation content for a udf.sql file."""
|
||||
udf_path = udf_file_path.parent
|
||||
routine_type = "Stored Procedure" if udf_file_path.name == PROCEDURE_FILE else "UDF"
|
||||
udf_content = f"## {udf_path.name} ({routine_type})\n\n"
|
||||
|
||||
with open(os.path.join(root, METADATA_FILE)) as stream:
|
||||
try:
|
||||
description = yaml.safe_load(stream).get(
|
||||
"description", None
|
||||
)
|
||||
except yaml.YAMLError:
|
||||
pass
|
||||
# dataset or UDF level doc file
|
||||
if UDF_FILE in files or PROCEDURE_FILE in files:
|
||||
# UDF-level doc; append to dataset doc
|
||||
dataset_name = os.path.basename(path)
|
||||
dataset_doc = out_dir / path.parent / f"{dataset_name}.md"
|
||||
docfile_content = load_with_examples(src)
|
||||
with open(dataset_doc, "a") as dataset_doc_file:
|
||||
dataset_doc_file.write("\n\n")
|
||||
# Inject a level-2 header with the UDF name & type
|
||||
is_udf = UDF_FILE in files
|
||||
routine_type = "UDF" if is_udf else "Stored Procedure"
|
||||
dataset_doc_file.write(f"## {name} ({routine_type})\n\n")
|
||||
# Inject the "description" from metadata.yaml
|
||||
if description:
|
||||
formated = format_url(description)
|
||||
dataset_doc_file.write(f"{formated}\n\n")
|
||||
# Inject the contents of the README.md
|
||||
dataset_doc_file.write(docfile_content)
|
||||
# Inject input and output parameters from sql
|
||||
if is_udf:
|
||||
with open(os.path.join(root, UDF_FILE), "r") as udf_file:
|
||||
input_str, output_str = get_mozfun_parameters(
|
||||
udf_file.read()
|
||||
)
|
||||
else:
|
||||
with open(
|
||||
os.path.join(root, PROCEDURE_FILE), "r"
|
||||
) as procedure_file:
|
||||
input_str, output_str = get_mozfun_parameters(
|
||||
procedure_file.read()
|
||||
)
|
||||
metadata, source_link, edit_link = None, None, None
|
||||
if (metadata_file := udf_path / "metadata.yaml").exists():
|
||||
metadata = yaml.safe_load(metadata_file.read_text())
|
||||
|
||||
if input_str or output_str:
|
||||
dataset_doc_file.write("\n### Parameters\n\n")
|
||||
if input_str:
|
||||
dataset_doc_file.write("\n**INPUTS**\n\n")
|
||||
dataset_doc_file.write(f"```\n{input_str}\n```\n\n")
|
||||
if output_str:
|
||||
dataset_doc_file.write("\n**OUTPUTS**\n\n")
|
||||
dataset_doc_file.write(f"```\n{output_str}\n```\n\n")
|
||||
if metadata is not None:
|
||||
if (description := metadata.get("description")) is not None:
|
||||
udf_content += f"{format_url(description)}\n\n"
|
||||
source_link = f"{ConfigLoader.get('docs', 'source_url')}/{udf_path}"
|
||||
edit_link = f"{ConfigLoader.get('docs', 'edit_url')}/{udf_path}/{METADATA_FILE}"
|
||||
|
||||
# Add links to source and edit
|
||||
sourced = add_source_and_edit(source_link, edit_link)
|
||||
dataset_doc_file.write(f"{sourced}\n\n")
|
||||
else:
|
||||
# dataset-level doc; create a new doc file
|
||||
dest = out_dir / path / f"{name}.md"
|
||||
dest.write_text(load_with_examples(src))
|
||||
if (readme := udf_path / DOCS_FILE).exists():
|
||||
udf_content += f"{load_with_examples(readme)}\n\n"
|
||||
|
||||
input_str, output_str = get_mozfun_parameters(udf_file_path.read_text())
|
||||
if input_str or output_str:
|
||||
udf_content += "\n### Parameters\n\n"
|
||||
if input_str:
|
||||
udf_content += f"\n**INPUTS**\n\n```\n{input_str}\n```\n\n"
|
||||
if output_str:
|
||||
udf_content += f"\n**OUTPUTS**\n\n```\n{output_str}\n```\n\n"
|
||||
|
||||
if source_link is not None and edit_link is not None:
|
||||
udf_content += f"{add_source_and_edit(source_link, edit_link)}\n\n"
|
||||
|
||||
return udf_content
|
||||
|
||||
|
||||
def generate_udf_docs(out_dir: str, project_dir: str) -> None:
|
||||
"""Generate documentation for UDFs."""
|
||||
project_path = Path(project_dir)
|
||||
target_path = Path(out_dir) / project_path.name
|
||||
target_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if (project_docs := project_path / DOCS_FILE).exists():
|
||||
(target_path / "about.md").write_text(project_docs.read_text())
|
||||
|
||||
# Group by dataset to generate one markdown file per BQ dataset with its UDFs:
|
||||
for dataset_path, udf_paths in itertools.groupby(
|
||||
sorted(
|
||||
list(project_path.glob(f"*/*/{UDF_FILE}"))
|
||||
+ list(project_path.glob(f"*/*/{PROCEDURE_FILE}"))
|
||||
),
|
||||
lambda path: path.parent.parent, # path is project/dataset/udf/udf.sql
|
||||
):
|
||||
if not (udfs_content := (_gen_udf_content(path) for path in udf_paths)):
|
||||
continue
|
||||
|
||||
file_content = ""
|
||||
if (dataset_docs := dataset_path / DOCS_FILE).exists():
|
||||
file_content += f"{dataset_docs.read_text()}\n"
|
||||
file_content += "\n".join(udfs_content)
|
||||
(target_path / f"{dataset_path.name}.md").write_text(file_content)
|
||||
|
|
|
@ -36,6 +36,7 @@ nav:
|
|||
- ... | mozdata/**.md
|
||||
- UDFs:
|
||||
- ... | mozfun/**.md
|
||||
- ... | moz-fx-data-shared-prod/**.md
|
||||
- Cookbooks:
|
||||
- Common workflows: cookbooks/common_workflows.md
|
||||
- Creating a derived dataset: cookbooks/creating_a_derived_dataset.md
|
||||
|
|
Загрузка…
Ссылка в новой задаче