This commit is contained in:
Alexander 2023-12-14 13:45:13 -05:00 коммит произвёл GitHub
Родитель 067d016fe1
Коммит 463dc15bf1
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 61 добавлений и 84 удалений

Просмотреть файл

@ -63,7 +63,7 @@ log_level_option = click.option(
def generate(project_dirs, docs_dir, output_dir, log_level):
"""Generate documentation for project."""
from bigquery_etl.docs.bqetl.generate_bqetl_docs import generate_bqetl_docs
from bigquery_etl.docs.mozfun.generate_mozfun_docs import generate_mozfun_docs
from bigquery_etl.docs.mozfun.generate_mozfun_docs import generate_udf_docs
out_dir = os.path.join(output_dir, "docs")
@ -86,14 +86,10 @@ def generate(project_dirs, docs_dir, output_dir, log_level):
# generate bqetl command docs
generate_bqetl_docs(Path(out_dir) / "bqetl.md")
# move files to docs/
# generate docs
for project_dir in project_dirs:
if not os.path.isdir(project_dir):
continue
if "mozfun" in project_dir:
generate_mozfun_docs(out_dir, project_dir)
else:
generate_udf_docs(out_dir, project_dir)
if "mozfun" not in project_dir:
generate_derived_dataset_docs(out_dir, project_dir)

Просмотреть файл

@ -1,4 +1,5 @@
"""Generate documentation for mozfun."""
import itertools
import os
import re
from pathlib import Path
@ -42,82 +43,61 @@ def add_source_and_edit(source_url, edit_url):
return f"[Source]({source_url}) | [Edit]({edit_url})"
def generate_mozfun_docs(out_dir, project_dir):
"""Generate documentation for mozfun."""
for root, _dirs, files in os.walk(project_dir):
if DOCS_FILE in files:
# copy doc file to output and replace example references
src = os.path.join(root, DOCS_FILE)
# remove empty strings from path parts
path_parts = list(filter(None, root.split(os.sep)))
name = path_parts[-1]
path = Path(os.sep.join(path_parts[1:-1]))
if os.path.split(root)[1] == "":
# project level-doc file
project_doc_dir = out_dir / path / name
project_doc_dir.mkdir(parents=True, exist_ok=True)
dest = project_doc_dir / "about.md"
dest.write_text(load_with_examples(src))
else:
description = None
if METADATA_FILE in files:
source_link = f"{ConfigLoader.get('docs', 'source_url')}/{root}"
edit_link = (
f"{ConfigLoader.get('docs', 'edit_url')}/{root}/{METADATA_FILE}"
)
def _gen_udf_content(udf_file_path: Path) -> str:
"""Generate markdown documentation content for a udf.sql file."""
udf_path = udf_file_path.parent
routine_type = "Stored Procedure" if udf_file_path.name == PROCEDURE_FILE else "UDF"
udf_content = f"## {udf_path.name} ({routine_type})\n\n"
with open(os.path.join(root, METADATA_FILE)) as stream:
try:
description = yaml.safe_load(stream).get(
"description", None
)
except yaml.YAMLError:
pass
# dataset or UDF level doc file
if UDF_FILE in files or PROCEDURE_FILE in files:
# UDF-level doc; append to dataset doc
dataset_name = os.path.basename(path)
dataset_doc = out_dir / path.parent / f"{dataset_name}.md"
docfile_content = load_with_examples(src)
with open(dataset_doc, "a") as dataset_doc_file:
dataset_doc_file.write("\n\n")
# Inject a level-2 header with the UDF name & type
is_udf = UDF_FILE in files
routine_type = "UDF" if is_udf else "Stored Procedure"
dataset_doc_file.write(f"## {name} ({routine_type})\n\n")
# Inject the "description" from metadata.yaml
if description:
formated = format_url(description)
dataset_doc_file.write(f"{formated}\n\n")
# Inject the contents of the README.md
dataset_doc_file.write(docfile_content)
# Inject input and output parameters from sql
if is_udf:
with open(os.path.join(root, UDF_FILE), "r") as udf_file:
input_str, output_str = get_mozfun_parameters(
udf_file.read()
)
else:
with open(
os.path.join(root, PROCEDURE_FILE), "r"
) as procedure_file:
input_str, output_str = get_mozfun_parameters(
procedure_file.read()
)
metadata, source_link, edit_link = None, None, None
if (metadata_file := udf_path / "metadata.yaml").exists():
metadata = yaml.safe_load(metadata_file.read_text())
if metadata is not None:
if (description := metadata.get("description")) is not None:
udf_content += f"{format_url(description)}\n\n"
source_link = f"{ConfigLoader.get('docs', 'source_url')}/{udf_path}"
edit_link = f"{ConfigLoader.get('docs', 'edit_url')}/{udf_path}/{METADATA_FILE}"
if (readme := udf_path / DOCS_FILE).exists():
udf_content += f"{load_with_examples(readme)}\n\n"
input_str, output_str = get_mozfun_parameters(udf_file_path.read_text())
if input_str or output_str:
dataset_doc_file.write("\n### Parameters\n\n")
udf_content += "\n### Parameters\n\n"
if input_str:
dataset_doc_file.write("\n**INPUTS**\n\n")
dataset_doc_file.write(f"```\n{input_str}\n```\n\n")
udf_content += f"\n**INPUTS**\n\n```\n{input_str}\n```\n\n"
if output_str:
dataset_doc_file.write("\n**OUTPUTS**\n\n")
dataset_doc_file.write(f"```\n{output_str}\n```\n\n")
udf_content += f"\n**OUTPUTS**\n\n```\n{output_str}\n```\n\n"
# Add links to source and edit
sourced = add_source_and_edit(source_link, edit_link)
dataset_doc_file.write(f"{sourced}\n\n")
else:
# dataset-level doc; create a new doc file
dest = out_dir / path / f"{name}.md"
dest.write_text(load_with_examples(src))
if source_link is not None and edit_link is not None:
udf_content += f"{add_source_and_edit(source_link, edit_link)}\n\n"
return udf_content
def generate_udf_docs(out_dir: str, project_dir: str) -> None:
"""Generate documentation for UDFs."""
project_path = Path(project_dir)
target_path = Path(out_dir) / project_path.name
target_path.mkdir(parents=True, exist_ok=True)
if (project_docs := project_path / DOCS_FILE).exists():
(target_path / "about.md").write_text(project_docs.read_text())
# Group by dataset to generate one markdown file per BQ dataset with its UDFs:
for dataset_path, udf_paths in itertools.groupby(
sorted(
list(project_path.glob(f"*/*/{UDF_FILE}"))
+ list(project_path.glob(f"*/*/{PROCEDURE_FILE}"))
),
lambda path: path.parent.parent, # path is project/dataset/udf/udf.sql
):
if not (udfs_content := (_gen_udf_content(path) for path in udf_paths)):
continue
file_content = ""
if (dataset_docs := dataset_path / DOCS_FILE).exists():
file_content += f"{dataset_docs.read_text()}\n"
file_content += "\n".join(udfs_content)
(target_path / f"{dataset_path.name}.md").write_text(file_content)

Просмотреть файл

@ -36,6 +36,7 @@ nav:
- ... | mozdata/**.md
- UDFs:
- ... | mozfun/**.md
- ... | moz-fx-data-shared-prod/**.md
- Cookbooks:
- Common workflows: cookbooks/common_workflows.md
- Creating a derived dataset: cookbooks/creating_a_derived_dataset.md