From c1925039de0e3bf11923ad32652e98b51368e4bc Mon Sep 17 00:00:00 2001 From: Alexander Nicholson Date: Fri, 28 Jan 2022 11:44:17 -0500 Subject: [PATCH] Extract mozfun docs generation and modify call order (#2689) --- bigquery_etl/docs/generate_docs.py | 105 ++---------------- .../docs/mozfun/generate_mozfun_docs.py | 97 ++++++++++++++++ tests/docs/test_generate_docs.py | 2 +- 3 files changed, 106 insertions(+), 98 deletions(-) create mode 100644 bigquery_etl/docs/mozfun/generate_mozfun_docs.py diff --git a/bigquery_etl/docs/generate_docs.py b/bigquery_etl/docs/generate_docs.py index cea74a43b7..f4b6b05769 100644 --- a/bigquery_etl/docs/generate_docs.py +++ b/bigquery_etl/docs/generate_docs.py @@ -1,27 +1,20 @@ """Generates documentations for provided projects.""" import os -import re import shutil from argparse import ArgumentParser from pathlib import Path -import yaml - from bigquery_etl.docs.bqetl.generate_bqetl_docs import generate_bqetl_docs -from bigquery_etl.docs.derived_datasets import generate_derived_dataset_docs +from bigquery_etl.docs.derived_datasets.generate_derived_dataset_docs import ( + generate_derived_dataset_docs, +) +from bigquery_etl.docs.mozfun.generate_mozfun_docs import generate_mozfun_docs from bigquery_etl.util import standard_args DEFAULT_PROJECTS_DIRS = ["sql/mozfun/", "sql/moz-fx-data-shared-prod/"] -DOCS_FILE = "README.md" -UDF_FILE = "udf.sql" -PROCEDURE_FILE = "stored_procedure.sql" -METADATA_FILE = "metadata.yaml" DOCS_DIR = "docs/" INDEX_MD = "index.md" -SQL_REF_RE = r"@sql\((.+)\)" -SOURCE_URL = "https://github.com/mozilla/bigquery-etl/blob/generated-sql" -EDIT_URL = "https://github.com/mozilla/bigquery-etl/edit/generated-sql" parser = ArgumentParser(description=__doc__) parser.add_argument( @@ -46,33 +39,6 @@ parser.add_argument( standard_args.add_log_level(parser) -def format_url(doc): - """Create links for urls in documentation.""" - doc = re.sub(r"(?", doc) - return doc - - -def load_with_examples(file): - """Load doc file and replace SQL references with examples.""" - with open(file) as doc_file: - file_content = doc_file.read() - - path, _ = os.path.split(file) - - for sql_ref in re.findall(SQL_REF_RE, file_content): - sql_example_file = path / Path(sql_ref) - with open(sql_example_file) as example_sql: - md_sql = f"```sql\n{example_sql.read().strip()}\n```" - file_content = file_content.replace(f"@sql({sql_ref})", md_sql) - - return file_content - - -def add_source_and_edit(source_url, edit_url): - """Add links to the function directory and metadata.yaml editor.""" - return f"[Source]({source_url}) | [Edit]({edit_url})" - - def main(): """Generate documentation for project.""" args = parser.parse_args() @@ -102,65 +68,10 @@ def main(): if not os.path.isdir(project_dir): continue - for root, _dirs, files in os.walk(project_dir): - if DOCS_FILE in files: - # copy doc file to output and replace example references - src = os.path.join(root, DOCS_FILE) - # remove empty strings from path parts - path_parts = list(filter(None, root.split(os.sep))) - name = path_parts[-1] - path = Path(os.sep.join(path_parts[1:-1])) - if "mozfun" in project_dir: - if os.path.split(root)[1] == "": - # project level-doc file - project_doc_dir = out_dir / path / name - project_doc_dir.mkdir(parents=True, exist_ok=True) - dest = project_doc_dir / "about.md" - dest.write_text(load_with_examples(src)) - else: - description = None - if METADATA_FILE in files: - source_link = f"{SOURCE_URL}/{root}" - edit_link = f"{EDIT_URL}/{root}/{METADATA_FILE}" - - with open(os.path.join(root, METADATA_FILE)) as stream: - try: - description = yaml.safe_load(stream).get( - "description", None - ) - except yaml.YAMLError: - pass - # dataset or UDF level doc file - if UDF_FILE in files or PROCEDURE_FILE in files: - # UDF-level doc; append to dataset doc - dataset_name = os.path.basename(path) - dataset_doc = out_dir / path.parent / f"{dataset_name}.md" - docfile_content = load_with_examples(src) - with open(dataset_doc, "a") as dataset_doc_file: - dataset_doc_file.write("\n\n") - # Inject a level-2 header with the UDF name & type - is_udf = UDF_FILE in files - routine_type = "UDF" if is_udf else "Stored Procedure" - dataset_doc_file.write( - f"## {name} ({routine_type})\n\n" - ) - # Inject the "description" from metadata.yaml - if description: - formated = format_url(description) - dataset_doc_file.write(f"{formated}\n\n") - # Inject the contents of the README.md - dataset_doc_file.write(docfile_content) - # Add links to source and edit - sourced = add_source_and_edit(source_link, edit_link) - dataset_doc_file.write(f"{sourced}\n\n") - else: - # dataset-level doc; create a new doc file - dest = out_dir / path / f"{name}.md" - dest.write_text(load_with_examples(src)) - else: - generate_derived_dataset_docs.generate_derived_dataset_docs( - out_dir, project_dir - ) + if "mozfun" in project_dir: + generate_mozfun_docs(out_dir, project_dir) + else: + generate_derived_dataset_docs(out_dir, project_dir) if __name__ == "__main__": diff --git a/bigquery_etl/docs/mozfun/generate_mozfun_docs.py b/bigquery_etl/docs/mozfun/generate_mozfun_docs.py new file mode 100644 index 0000000000..b886d1ab22 --- /dev/null +++ b/bigquery_etl/docs/mozfun/generate_mozfun_docs.py @@ -0,0 +1,97 @@ +"""Generate documentation for mozfun.""" +import os +import re +from pathlib import Path + +import yaml + +DOCS_FILE = "README.md" +METADATA_FILE = "metadata.yaml" +SOURCE_URL = "https://github.com/mozilla/bigquery-etl/blob/generated-sql" +EDIT_URL = "https://github.com/mozilla/bigquery-etl/edit/generated-sql" +UDF_FILE = "udf.sql" +PROCEDURE_FILE = "stored_procedure.sql" +SQL_REF_RE = r"@sql\((.+)\)" + + +def format_url(doc): + """Create links for urls in documentation.""" + doc = re.sub(r"(?", doc) + return doc + + +def load_with_examples(file): + """Load doc file and replace SQL references with examples.""" + with open(file) as doc_file: + file_content = doc_file.read() + + path, _ = os.path.split(file) + + for sql_ref in re.findall(SQL_REF_RE, file_content): + sql_example_file = path / Path(sql_ref) + with open(sql_example_file) as example_sql: + md_sql = f"```sql\n{example_sql.read().strip()}\n```" + file_content = file_content.replace(f"@sql({sql_ref})", md_sql) + + return file_content + + +def add_source_and_edit(source_url, edit_url): + """Add links to the function directory and metadata.yaml editor.""" + return f"[Source]({source_url}) | [Edit]({edit_url})" + + +def generate_mozfun_docs(out_dir, project_dir): + """Generate documentation for mozfun.""" + for root, _dirs, files in os.walk(project_dir): + if DOCS_FILE in files: + # copy doc file to output and replace example references + src = os.path.join(root, DOCS_FILE) + # remove empty strings from path parts + path_parts = list(filter(None, root.split(os.sep))) + name = path_parts[-1] + path = Path(os.sep.join(path_parts[1:-1])) + if os.path.split(root)[1] == "": + # project level-doc file + project_doc_dir = out_dir / path / name + project_doc_dir.mkdir(parents=True, exist_ok=True) + dest = project_doc_dir / "about.md" + dest.write_text(load_with_examples(src)) + else: + description = None + if METADATA_FILE in files: + source_link = f"{SOURCE_URL}/{root}" + edit_link = f"{EDIT_URL}/{root}/{METADATA_FILE}" + + with open(os.path.join(root, METADATA_FILE)) as stream: + try: + description = yaml.safe_load(stream).get( + "description", None + ) + except yaml.YAMLError: + pass + # dataset or UDF level doc file + if UDF_FILE in files or PROCEDURE_FILE in files: + # UDF-level doc; append to dataset doc + dataset_name = os.path.basename(path) + dataset_doc = out_dir / path.parent / f"{dataset_name}.md" + docfile_content = load_with_examples(src) + with open(dataset_doc, "a") as dataset_doc_file: + dataset_doc_file.write("\n\n") + # Inject a level-2 header with the UDF name & type + is_udf = UDF_FILE in files + routine_type = "UDF" if is_udf else "Stored Procedure" + dataset_doc_file.write(f"## {name} ({routine_type})\n\n") + # Inject the "description" from metadata.yaml + if description: + formated = format_url(description) + dataset_doc_file.write(f"{formated}\n\n") + # Inject the contents of the README.md + dataset_doc_file.write(docfile_content) + # Add links to source and edit + sourced = add_source_and_edit(source_link, edit_link) + dataset_doc_file.write(f"{sourced}\n\n") + else: + # dataset-level doc; create a new doc file + dest = out_dir / path / f"{name}.md" + dest.write_text(load_with_examples(src)) diff --git a/tests/docs/test_generate_docs.py b/tests/docs/test_generate_docs.py index 5f50a8631d..713af887c6 100644 --- a/tests/docs/test_generate_docs.py +++ b/tests/docs/test_generate_docs.py @@ -3,7 +3,7 @@ from pathlib import Path import pytest -from bigquery_etl.docs.generate_docs import load_with_examples +from bigquery_etl.docs.mozfun.generate_mozfun_docs import load_with_examples TEST_DIR = Path(__file__).parent.parent