experimenter/schemas/generate_json_schema.py

280 строки
8.5 KiB
Python

"""
Heavily inspired by pydantic-to-typescript2 (itself a fork of pydantic-to-typescript):
https://github.com/Darius-Labs/pydantic-to-typescript2/blob/main/pydantic2ts/cli/script.py
"""
import json
import re
import shutil
import subprocess
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any, Iterable
import click
from polyfactory.factories.pydantic_factory import ModelFactory
from pydantic import BaseModel, create_model
from mozilla_nimbus_schemas import experiments, jetstream
NEWLINES_RE = re.compile("\n+")
def clean_output_file(ts_path: Path) -> None:
"""Clean up the output file typescript definitions were written to by:
1. Removing the 'top model'.
This is a faux pydantic model with references to all the *actual* models necessary
for generating clean typescript definitions without any duplicates. We don't
actually want it in the output, so this function removes it from the generated
typescript file.
2. Adding a banner comment with clear instructions for how to regenerate the
typescript definitions.
"""
with ts_path.open("r") as f:
lines = f.readlines()
start, end = None, None
for i, line in enumerate(lines):
if line.rstrip("\r\n") == "export interface _TopModel_ {":
start = i
elif (start is not None) and line.rstrip("\r\n") == "}":
end = i
break
banner_comment_lines = [
"/* tslint:disable */\n",
"/* eslint-disable */\n",
"/**\n",
"/* This file was automatically generated from pydantic models.\n",
"/* Do not modify by hand - update the pydantic models and re-run\n",
" * make schemas_build\n",
" */\n\n",
]
new_lines = banner_comment_lines + lines[:start] + lines[(end + 1) :]
with ts_path.open("w") as f:
f.writelines(new_lines)
def clean_schema(schema: dict[str, Any]) -> None:
"""Clean up the resulting JSON schemas by:
1. Removing titles from JSON schema properties.
If we don't do this, each property will have its own interface in the
resulting typescript file (which is a LOT of unnecessary noise).
2. Getting rid of the useless "An enumeration." description applied to Enums
which don't have a docstring.
"""
for prop in schema.get("properties", {}).values():
prop.pop("title", None)
if "$ref" in prop:
# json-schema-to-typescript will generate a redundant type.
prop.pop("description", None)
if "enum" in schema and schema.get("description") == "An enumeration.":
del schema["description"]
# this prevents json2ts from adding `[k: string]: unknown;` to every interface
if not schema.get("additionalProperties"):
schema["additionalProperties"] = False
def iterate_models() -> dict[str, Any]:
model_names = list(experiments.__all__) + list(jetstream.__all__)
models = []
for model_name_str in model_names:
if model_name_str in experiments.__all__:
model = getattr(experiments, model_name_str)
else:
model = getattr(jetstream, model_name_str)
if not issubclass(model, ModelFactory):
models.append(model)
top_model: BaseModel = create_model(
"_TopModel_", **{m.__name__: (m, ...) for m in models}
)
schema: dict = top_model.model_json_schema(mode="serialization")
for d in schema.get("$defs", {}).values():
clean_schema(d)
return schema
def prettify_json_schema(schema: dict[str, Any]) -> dict[str, Any]:
# Add a $schema field.
pretty_schema = {
"$schema": "https://json-schema.org/draft/2019-09/schema",
}
# Re-order the properties in the dict so that they are in a sensible order
# for humans consuming these schemas.
# Use this order for top-level keys.
key_order = [
"title",
"description",
"type",
"properties",
"required",
"additionalProperties",
"if",
"then",
"$defs",
]
# If there are any other keys not listed above, splice them in before $defs.
key_order = [
*key_order[:-1],
*(set(schema.keys()) - set(key_order)),
key_order[-1],
]
pretty_schema.update({key: schema[key] for key in key_order if key in schema})
# Assert that the schemas have not structurally changed.
#
# We have to add the $schema field back to the original schema for comparison.
schema["$schema"] = pretty_schema["$schema"]
assert schema == pretty_schema
# Next, lets walk the schema and remove attributes we don't care about.
def _walk_objects(objs: Iterable[dict[str, Any]]):
for obj in objs:
_walk_object(obj)
def _walk_object(obj: dict[str, Any], top_level: bool = False):
# All but the top-level title will be auto-generated base on field names. They are
# not useful.
if not top_level:
obj.pop("title", None)
# We don't support defaults.
obj.pop("default", None)
# This is an OpenAPI extension and it leads to incorrect code generation in our
# case (due to using a boolean discriminator).
obj.pop("discriminator", None)
# Strip newlines from descriptions.
if description := obj.get("description"):
obj["description"] = NEWLINES_RE.sub(" ", description)
# Remove redundant enum entries for constants.
if obj.get("const") is not None:
obj.pop("enum", None)
match obj.get("type"):
case "object":
if properties := obj.get("properties"):
_walk_objects(properties.values())
case "array":
if items := obj.get("items"):
_walk_object(items)
for group_key in ("allOf", "anyOf", "oneOf"):
if group := obj.get(group_key):
_walk_objects(group)
_walk_object(pretty_schema, top_level=True)
if defs := pretty_schema.get("$defs"):
_walk_objects(defs.values())
return pretty_schema
def write_json_schemas(json_schemas_path: Path, python_package_dir: Path):
json_schemas_path.mkdir(exist_ok=True)
models = {
model_name: getattr(experiments, model_name)
for model_name in experiments.__all__
if issubclass(getattr(experiments, model_name), BaseModel)
}
written_paths = set()
for model_name, model in models.items():
model_schema_path = json_schemas_path / f"{model_name}.schema.json"
written_paths.add(model_schema_path)
json_schema = prettify_json_schema(model.model_json_schema())
with model_schema_path.open("w") as f:
json.dump(json_schema, f, indent=2)
f.write("\n")
# Ensure we don't include any files in schemas/ that we did not generate (e.g., if a
# model gets removed).
for path in list(json_schemas_path.iterdir()):
if path not in written_paths:
path.unlink()
# Copy schemas into the python package.
schemas_dist_dir = python_package_dir / "schemas"
if schemas_dist_dir.exists():
shutil.rmtree(schemas_dist_dir)
shutil.copytree(json_schemas_path, schemas_dist_dir)
@click.command()
@click.option(
"--output",
"ts_output_path",
type=Path,
default=Path("index.d.ts"),
help="Output typescript file.",
)
@click.option(
"--json-schemas",
"json_schemas_path",
type=Path,
default=Path("schemas"),
help="Output JSON Schemas to this directory.",
)
@click.option(
"--python-package-dir",
"python_package_dir",
type=Path,
default=Path("mozilla_nimbus_schemas"),
help=(
"The directory to the mozilla-nimbus-schemas python package.\n"
"\n"
"Schemas will be installed inside this package at the schemas dir."
),
)
def main(*, ts_output_path: Path, json_schemas_path: Path, python_package_dir: Path):
json_schema = iterate_models()
with TemporaryDirectory() as tmp_dir:
tmp_dir = Path(tmp_dir)
schema_file_path = tmp_dir / "schema.json"
with schema_file_path.open("w") as f:
json.dump(json_schema, f)
subprocess.run(
[
"yarn",
"json2ts",
"-i",
str(schema_file_path),
"-o",
str(ts_output_path),
"--bannerComment",
"",
],
check=True,
)
clean_output_file(ts_output_path)
write_json_schemas(json_schemas_path, python_package_dir)
if __name__ == "__main__":
main()