Extract mozfun docs generation and modify call order (#2689)
This commit is contained in:
Родитель
10ec0a4510
Коммит
c1925039de
|
@ -1,27 +1,20 @@
|
||||||
"""Generates documentations for provided projects."""
|
"""Generates documentations for provided projects."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import shutil
|
import shutil
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
from bigquery_etl.docs.bqetl.generate_bqetl_docs import generate_bqetl_docs
|
from bigquery_etl.docs.bqetl.generate_bqetl_docs import generate_bqetl_docs
|
||||||
from bigquery_etl.docs.derived_datasets import generate_derived_dataset_docs
|
from bigquery_etl.docs.derived_datasets.generate_derived_dataset_docs import (
|
||||||
|
generate_derived_dataset_docs,
|
||||||
|
)
|
||||||
|
from bigquery_etl.docs.mozfun.generate_mozfun_docs import generate_mozfun_docs
|
||||||
from bigquery_etl.util import standard_args
|
from bigquery_etl.util import standard_args
|
||||||
|
|
||||||
DEFAULT_PROJECTS_DIRS = ["sql/mozfun/", "sql/moz-fx-data-shared-prod/"]
|
DEFAULT_PROJECTS_DIRS = ["sql/mozfun/", "sql/moz-fx-data-shared-prod/"]
|
||||||
DOCS_FILE = "README.md"
|
|
||||||
UDF_FILE = "udf.sql"
|
|
||||||
PROCEDURE_FILE = "stored_procedure.sql"
|
|
||||||
METADATA_FILE = "metadata.yaml"
|
|
||||||
DOCS_DIR = "docs/"
|
DOCS_DIR = "docs/"
|
||||||
INDEX_MD = "index.md"
|
INDEX_MD = "index.md"
|
||||||
SQL_REF_RE = r"@sql\((.+)\)"
|
|
||||||
SOURCE_URL = "https://github.com/mozilla/bigquery-etl/blob/generated-sql"
|
|
||||||
EDIT_URL = "https://github.com/mozilla/bigquery-etl/edit/generated-sql"
|
|
||||||
|
|
||||||
parser = ArgumentParser(description=__doc__)
|
parser = ArgumentParser(description=__doc__)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
@ -46,33 +39,6 @@ parser.add_argument(
|
||||||
standard_args.add_log_level(parser)
|
standard_args.add_log_level(parser)
|
||||||
|
|
||||||
|
|
||||||
def format_url(doc):
|
|
||||||
"""Create links for urls in documentation."""
|
|
||||||
doc = re.sub(r"(?<!\()(https?://[^\s]+)(?!\))", r"<\1>", doc)
|
|
||||||
return doc
|
|
||||||
|
|
||||||
|
|
||||||
def load_with_examples(file):
|
|
||||||
"""Load doc file and replace SQL references with examples."""
|
|
||||||
with open(file) as doc_file:
|
|
||||||
file_content = doc_file.read()
|
|
||||||
|
|
||||||
path, _ = os.path.split(file)
|
|
||||||
|
|
||||||
for sql_ref in re.findall(SQL_REF_RE, file_content):
|
|
||||||
sql_example_file = path / Path(sql_ref)
|
|
||||||
with open(sql_example_file) as example_sql:
|
|
||||||
md_sql = f"```sql\n{example_sql.read().strip()}\n```"
|
|
||||||
file_content = file_content.replace(f"@sql({sql_ref})", md_sql)
|
|
||||||
|
|
||||||
return file_content
|
|
||||||
|
|
||||||
|
|
||||||
def add_source_and_edit(source_url, edit_url):
|
|
||||||
"""Add links to the function directory and metadata.yaml editor."""
|
|
||||||
return f"[Source]({source_url}) | [Edit]({edit_url})"
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Generate documentation for project."""
|
"""Generate documentation for project."""
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
@ -102,65 +68,10 @@ def main():
|
||||||
if not os.path.isdir(project_dir):
|
if not os.path.isdir(project_dir):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for root, _dirs, files in os.walk(project_dir):
|
if "mozfun" in project_dir:
|
||||||
if DOCS_FILE in files:
|
generate_mozfun_docs(out_dir, project_dir)
|
||||||
# copy doc file to output and replace example references
|
else:
|
||||||
src = os.path.join(root, DOCS_FILE)
|
generate_derived_dataset_docs(out_dir, project_dir)
|
||||||
# remove empty strings from path parts
|
|
||||||
path_parts = list(filter(None, root.split(os.sep)))
|
|
||||||
name = path_parts[-1]
|
|
||||||
path = Path(os.sep.join(path_parts[1:-1]))
|
|
||||||
if "mozfun" in project_dir:
|
|
||||||
if os.path.split(root)[1] == "":
|
|
||||||
# project level-doc file
|
|
||||||
project_doc_dir = out_dir / path / name
|
|
||||||
project_doc_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
dest = project_doc_dir / "about.md"
|
|
||||||
dest.write_text(load_with_examples(src))
|
|
||||||
else:
|
|
||||||
description = None
|
|
||||||
if METADATA_FILE in files:
|
|
||||||
source_link = f"{SOURCE_URL}/{root}"
|
|
||||||
edit_link = f"{EDIT_URL}/{root}/{METADATA_FILE}"
|
|
||||||
|
|
||||||
with open(os.path.join(root, METADATA_FILE)) as stream:
|
|
||||||
try:
|
|
||||||
description = yaml.safe_load(stream).get(
|
|
||||||
"description", None
|
|
||||||
)
|
|
||||||
except yaml.YAMLError:
|
|
||||||
pass
|
|
||||||
# dataset or UDF level doc file
|
|
||||||
if UDF_FILE in files or PROCEDURE_FILE in files:
|
|
||||||
# UDF-level doc; append to dataset doc
|
|
||||||
dataset_name = os.path.basename(path)
|
|
||||||
dataset_doc = out_dir / path.parent / f"{dataset_name}.md"
|
|
||||||
docfile_content = load_with_examples(src)
|
|
||||||
with open(dataset_doc, "a") as dataset_doc_file:
|
|
||||||
dataset_doc_file.write("\n\n")
|
|
||||||
# Inject a level-2 header with the UDF name & type
|
|
||||||
is_udf = UDF_FILE in files
|
|
||||||
routine_type = "UDF" if is_udf else "Stored Procedure"
|
|
||||||
dataset_doc_file.write(
|
|
||||||
f"## {name} ({routine_type})\n\n"
|
|
||||||
)
|
|
||||||
# Inject the "description" from metadata.yaml
|
|
||||||
if description:
|
|
||||||
formated = format_url(description)
|
|
||||||
dataset_doc_file.write(f"{formated}\n\n")
|
|
||||||
# Inject the contents of the README.md
|
|
||||||
dataset_doc_file.write(docfile_content)
|
|
||||||
# Add links to source and edit
|
|
||||||
sourced = add_source_and_edit(source_link, edit_link)
|
|
||||||
dataset_doc_file.write(f"{sourced}\n\n")
|
|
||||||
else:
|
|
||||||
# dataset-level doc; create a new doc file
|
|
||||||
dest = out_dir / path / f"{name}.md"
|
|
||||||
dest.write_text(load_with_examples(src))
|
|
||||||
else:
|
|
||||||
generate_derived_dataset_docs.generate_derived_dataset_docs(
|
|
||||||
out_dir, project_dir
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -0,0 +1,97 @@
|
||||||
|
"""Generate documentation for mozfun."""
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
DOCS_FILE = "README.md"
|
||||||
|
METADATA_FILE = "metadata.yaml"
|
||||||
|
SOURCE_URL = "https://github.com/mozilla/bigquery-etl/blob/generated-sql"
|
||||||
|
EDIT_URL = "https://github.com/mozilla/bigquery-etl/edit/generated-sql"
|
||||||
|
UDF_FILE = "udf.sql"
|
||||||
|
PROCEDURE_FILE = "stored_procedure.sql"
|
||||||
|
SQL_REF_RE = r"@sql\((.+)\)"
|
||||||
|
|
||||||
|
|
||||||
|
def format_url(doc):
|
||||||
|
"""Create links for urls in documentation."""
|
||||||
|
doc = re.sub(r"(?<!\()(https?://[^\s]+)(?!\))", r"<\1>", doc)
|
||||||
|
return doc
|
||||||
|
|
||||||
|
|
||||||
|
def load_with_examples(file):
|
||||||
|
"""Load doc file and replace SQL references with examples."""
|
||||||
|
with open(file) as doc_file:
|
||||||
|
file_content = doc_file.read()
|
||||||
|
|
||||||
|
path, _ = os.path.split(file)
|
||||||
|
|
||||||
|
for sql_ref in re.findall(SQL_REF_RE, file_content):
|
||||||
|
sql_example_file = path / Path(sql_ref)
|
||||||
|
with open(sql_example_file) as example_sql:
|
||||||
|
md_sql = f"```sql\n{example_sql.read().strip()}\n```"
|
||||||
|
file_content = file_content.replace(f"@sql({sql_ref})", md_sql)
|
||||||
|
|
||||||
|
return file_content
|
||||||
|
|
||||||
|
|
||||||
|
def add_source_and_edit(source_url, edit_url):
|
||||||
|
"""Add links to the function directory and metadata.yaml editor."""
|
||||||
|
return f"[Source]({source_url}) | [Edit]({edit_url})"
|
||||||
|
|
||||||
|
|
||||||
|
def generate_mozfun_docs(out_dir, project_dir):
|
||||||
|
"""Generate documentation for mozfun."""
|
||||||
|
for root, _dirs, files in os.walk(project_dir):
|
||||||
|
if DOCS_FILE in files:
|
||||||
|
# copy doc file to output and replace example references
|
||||||
|
src = os.path.join(root, DOCS_FILE)
|
||||||
|
# remove empty strings from path parts
|
||||||
|
path_parts = list(filter(None, root.split(os.sep)))
|
||||||
|
name = path_parts[-1]
|
||||||
|
path = Path(os.sep.join(path_parts[1:-1]))
|
||||||
|
if os.path.split(root)[1] == "":
|
||||||
|
# project level-doc file
|
||||||
|
project_doc_dir = out_dir / path / name
|
||||||
|
project_doc_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
dest = project_doc_dir / "about.md"
|
||||||
|
dest.write_text(load_with_examples(src))
|
||||||
|
else:
|
||||||
|
description = None
|
||||||
|
if METADATA_FILE in files:
|
||||||
|
source_link = f"{SOURCE_URL}/{root}"
|
||||||
|
edit_link = f"{EDIT_URL}/{root}/{METADATA_FILE}"
|
||||||
|
|
||||||
|
with open(os.path.join(root, METADATA_FILE)) as stream:
|
||||||
|
try:
|
||||||
|
description = yaml.safe_load(stream).get(
|
||||||
|
"description", None
|
||||||
|
)
|
||||||
|
except yaml.YAMLError:
|
||||||
|
pass
|
||||||
|
# dataset or UDF level doc file
|
||||||
|
if UDF_FILE in files or PROCEDURE_FILE in files:
|
||||||
|
# UDF-level doc; append to dataset doc
|
||||||
|
dataset_name = os.path.basename(path)
|
||||||
|
dataset_doc = out_dir / path.parent / f"{dataset_name}.md"
|
||||||
|
docfile_content = load_with_examples(src)
|
||||||
|
with open(dataset_doc, "a") as dataset_doc_file:
|
||||||
|
dataset_doc_file.write("\n\n")
|
||||||
|
# Inject a level-2 header with the UDF name & type
|
||||||
|
is_udf = UDF_FILE in files
|
||||||
|
routine_type = "UDF" if is_udf else "Stored Procedure"
|
||||||
|
dataset_doc_file.write(f"## {name} ({routine_type})\n\n")
|
||||||
|
# Inject the "description" from metadata.yaml
|
||||||
|
if description:
|
||||||
|
formated = format_url(description)
|
||||||
|
dataset_doc_file.write(f"{formated}\n\n")
|
||||||
|
# Inject the contents of the README.md
|
||||||
|
dataset_doc_file.write(docfile_content)
|
||||||
|
# Add links to source and edit
|
||||||
|
sourced = add_source_and_edit(source_link, edit_link)
|
||||||
|
dataset_doc_file.write(f"{sourced}\n\n")
|
||||||
|
else:
|
||||||
|
# dataset-level doc; create a new doc file
|
||||||
|
dest = out_dir / path / f"{name}.md"
|
||||||
|
dest.write_text(load_with_examples(src))
|
|
@ -3,7 +3,7 @@ from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from bigquery_etl.docs.generate_docs import load_with_examples
|
from bigquery_etl.docs.mozfun.generate_mozfun_docs import load_with_examples
|
||||||
|
|
||||||
TEST_DIR = Path(__file__).parent.parent
|
TEST_DIR = Path(__file__).parent.parent
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче