Extract mozfun docs generation and modify call order (#2689)

This commit is contained in:
Alexander Nicholson 2022-01-28 11:44:17 -05:00 коммит произвёл GitHub
Родитель 10ec0a4510
Коммит c1925039de
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
3 изменённых файлов: 106 добавлений и 98 удалений

Просмотреть файл

@ -1,27 +1,20 @@
"""Generates documentations for provided projects.""" """Generates documentations for provided projects."""
import os import os
import re
import shutil import shutil
from argparse import ArgumentParser from argparse import ArgumentParser
from pathlib import Path from pathlib import Path
import yaml
from bigquery_etl.docs.bqetl.generate_bqetl_docs import generate_bqetl_docs from bigquery_etl.docs.bqetl.generate_bqetl_docs import generate_bqetl_docs
from bigquery_etl.docs.derived_datasets import generate_derived_dataset_docs from bigquery_etl.docs.derived_datasets.generate_derived_dataset_docs import (
generate_derived_dataset_docs,
)
from bigquery_etl.docs.mozfun.generate_mozfun_docs import generate_mozfun_docs
from bigquery_etl.util import standard_args from bigquery_etl.util import standard_args
DEFAULT_PROJECTS_DIRS = ["sql/mozfun/", "sql/moz-fx-data-shared-prod/"] DEFAULT_PROJECTS_DIRS = ["sql/mozfun/", "sql/moz-fx-data-shared-prod/"]
DOCS_FILE = "README.md"
UDF_FILE = "udf.sql"
PROCEDURE_FILE = "stored_procedure.sql"
METADATA_FILE = "metadata.yaml"
DOCS_DIR = "docs/" DOCS_DIR = "docs/"
INDEX_MD = "index.md" INDEX_MD = "index.md"
SQL_REF_RE = r"@sql\((.+)\)"
SOURCE_URL = "https://github.com/mozilla/bigquery-etl/blob/generated-sql"
EDIT_URL = "https://github.com/mozilla/bigquery-etl/edit/generated-sql"
parser = ArgumentParser(description=__doc__) parser = ArgumentParser(description=__doc__)
parser.add_argument( parser.add_argument(
@ -46,33 +39,6 @@ parser.add_argument(
standard_args.add_log_level(parser) standard_args.add_log_level(parser)
def format_url(doc):
"""Create links for urls in documentation."""
doc = re.sub(r"(?<!\()(https?://[^\s]+)(?!\))", r"<\1>", doc)
return doc
def load_with_examples(file):
"""Load doc file and replace SQL references with examples."""
with open(file) as doc_file:
file_content = doc_file.read()
path, _ = os.path.split(file)
for sql_ref in re.findall(SQL_REF_RE, file_content):
sql_example_file = path / Path(sql_ref)
with open(sql_example_file) as example_sql:
md_sql = f"```sql\n{example_sql.read().strip()}\n```"
file_content = file_content.replace(f"@sql({sql_ref})", md_sql)
return file_content
def add_source_and_edit(source_url, edit_url):
"""Add links to the function directory and metadata.yaml editor."""
return f"[Source]({source_url}) | [Edit]({edit_url})"
def main(): def main():
"""Generate documentation for project.""" """Generate documentation for project."""
args = parser.parse_args() args = parser.parse_args()
@ -102,65 +68,10 @@ def main():
if not os.path.isdir(project_dir): if not os.path.isdir(project_dir):
continue continue
for root, _dirs, files in os.walk(project_dir): if "mozfun" in project_dir:
if DOCS_FILE in files: generate_mozfun_docs(out_dir, project_dir)
# copy doc file to output and replace example references else:
src = os.path.join(root, DOCS_FILE) generate_derived_dataset_docs(out_dir, project_dir)
# remove empty strings from path parts
path_parts = list(filter(None, root.split(os.sep)))
name = path_parts[-1]
path = Path(os.sep.join(path_parts[1:-1]))
if "mozfun" in project_dir:
if os.path.split(root)[1] == "":
# project level-doc file
project_doc_dir = out_dir / path / name
project_doc_dir.mkdir(parents=True, exist_ok=True)
dest = project_doc_dir / "about.md"
dest.write_text(load_with_examples(src))
else:
description = None
if METADATA_FILE in files:
source_link = f"{SOURCE_URL}/{root}"
edit_link = f"{EDIT_URL}/{root}/{METADATA_FILE}"
with open(os.path.join(root, METADATA_FILE)) as stream:
try:
description = yaml.safe_load(stream).get(
"description", None
)
except yaml.YAMLError:
pass
# dataset or UDF level doc file
if UDF_FILE in files or PROCEDURE_FILE in files:
# UDF-level doc; append to dataset doc
dataset_name = os.path.basename(path)
dataset_doc = out_dir / path.parent / f"{dataset_name}.md"
docfile_content = load_with_examples(src)
with open(dataset_doc, "a") as dataset_doc_file:
dataset_doc_file.write("\n\n")
# Inject a level-2 header with the UDF name & type
is_udf = UDF_FILE in files
routine_type = "UDF" if is_udf else "Stored Procedure"
dataset_doc_file.write(
f"## {name} ({routine_type})\n\n"
)
# Inject the "description" from metadata.yaml
if description:
formated = format_url(description)
dataset_doc_file.write(f"{formated}\n\n")
# Inject the contents of the README.md
dataset_doc_file.write(docfile_content)
# Add links to source and edit
sourced = add_source_and_edit(source_link, edit_link)
dataset_doc_file.write(f"{sourced}\n\n")
else:
# dataset-level doc; create a new doc file
dest = out_dir / path / f"{name}.md"
dest.write_text(load_with_examples(src))
else:
generate_derived_dataset_docs.generate_derived_dataset_docs(
out_dir, project_dir
)
if __name__ == "__main__": if __name__ == "__main__":

Просмотреть файл

@ -0,0 +1,97 @@
"""Generate documentation for mozfun."""
import os
import re
from pathlib import Path
import yaml
DOCS_FILE = "README.md"
METADATA_FILE = "metadata.yaml"
SOURCE_URL = "https://github.com/mozilla/bigquery-etl/blob/generated-sql"
EDIT_URL = "https://github.com/mozilla/bigquery-etl/edit/generated-sql"
UDF_FILE = "udf.sql"
PROCEDURE_FILE = "stored_procedure.sql"
SQL_REF_RE = r"@sql\((.+)\)"
def format_url(doc):
"""Create links for urls in documentation."""
doc = re.sub(r"(?<!\()(https?://[^\s]+)(?!\))", r"<\1>", doc)
return doc
def load_with_examples(file):
"""Load doc file and replace SQL references with examples."""
with open(file) as doc_file:
file_content = doc_file.read()
path, _ = os.path.split(file)
for sql_ref in re.findall(SQL_REF_RE, file_content):
sql_example_file = path / Path(sql_ref)
with open(sql_example_file) as example_sql:
md_sql = f"```sql\n{example_sql.read().strip()}\n```"
file_content = file_content.replace(f"@sql({sql_ref})", md_sql)
return file_content
def add_source_and_edit(source_url, edit_url):
"""Add links to the function directory and metadata.yaml editor."""
return f"[Source]({source_url}) | [Edit]({edit_url})"
def generate_mozfun_docs(out_dir, project_dir):
"""Generate documentation for mozfun."""
for root, _dirs, files in os.walk(project_dir):
if DOCS_FILE in files:
# copy doc file to output and replace example references
src = os.path.join(root, DOCS_FILE)
# remove empty strings from path parts
path_parts = list(filter(None, root.split(os.sep)))
name = path_parts[-1]
path = Path(os.sep.join(path_parts[1:-1]))
if os.path.split(root)[1] == "":
# project level-doc file
project_doc_dir = out_dir / path / name
project_doc_dir.mkdir(parents=True, exist_ok=True)
dest = project_doc_dir / "about.md"
dest.write_text(load_with_examples(src))
else:
description = None
if METADATA_FILE in files:
source_link = f"{SOURCE_URL}/{root}"
edit_link = f"{EDIT_URL}/{root}/{METADATA_FILE}"
with open(os.path.join(root, METADATA_FILE)) as stream:
try:
description = yaml.safe_load(stream).get(
"description", None
)
except yaml.YAMLError:
pass
# dataset or UDF level doc file
if UDF_FILE in files or PROCEDURE_FILE in files:
# UDF-level doc; append to dataset doc
dataset_name = os.path.basename(path)
dataset_doc = out_dir / path.parent / f"{dataset_name}.md"
docfile_content = load_with_examples(src)
with open(dataset_doc, "a") as dataset_doc_file:
dataset_doc_file.write("\n\n")
# Inject a level-2 header with the UDF name & type
is_udf = UDF_FILE in files
routine_type = "UDF" if is_udf else "Stored Procedure"
dataset_doc_file.write(f"## {name} ({routine_type})\n\n")
# Inject the "description" from metadata.yaml
if description:
formated = format_url(description)
dataset_doc_file.write(f"{formated}\n\n")
# Inject the contents of the README.md
dataset_doc_file.write(docfile_content)
# Add links to source and edit
sourced = add_source_and_edit(source_link, edit_link)
dataset_doc_file.write(f"{sourced}\n\n")
else:
# dataset-level doc; create a new doc file
dest = out_dir / path / f"{name}.md"
dest.write_text(load_with_examples(src))

Просмотреть файл

@ -3,7 +3,7 @@ from pathlib import Path
import pytest import pytest
from bigquery_etl.docs.generate_docs import load_with_examples from bigquery_etl.docs.mozfun.generate_mozfun_docs import load_with_examples
TEST_DIR = Path(__file__).parent.parent TEST_DIR = Path(__file__).parent.parent