Swift: document and partially simplify codegen

This adds:
* a base `README.md` file to `codegen`
* module docstrings for the modules in `generators`
* help strings on all command line flags

Moreover some unneeded command line flags (`--namespace`,
`--include-dir` and `--trap-affix`) have been dropped.
This commit is contained in:
Paolo Tranquilli 2022-06-02 13:00:40 +02:00
Родитель 2187bf5dde
Коммит ede6bd8ffe
13 изменённых файлов: 158 добавлений и 84 удалений

37
swift/codegen/README.md Normal file
Просмотреть файл

@ -0,0 +1,37 @@
# Code generation suite
This directory contains the code generation suite used by the Swift extractor and the QL library. This suite will use
the abstract class specification of [`schema.yml`](schema.yml) to generate:
* [the `dbscheme` file](../ql/lib/swift.dbscheme) (see [`dbschemegen.py`](generators/dbschemegen.py))
* [the QL generated code](../ql/lib/codeql/swift/generated) and when
appropriate [the corresponding stubs](../ql/lib/codeql/swift/elements) (see [`qlgen.py`](generators/qlgen.py))
* C++ tags and trap entries (see [`trapgen.py`](generators/trapgen.py))
* C++ structured classes (see [`cppgen.py`](generators/cppgen.py))
## Usage
By default `bazel run //swift/codegen` will update all checked-in generated files (`dbscheme` and QL sources). You can
append `--` followed by other options to tweak the behaviour, which is mainly intended for debugging.
See `bazel run //swift/codegen -- --help` for a list of all options. In particular `--generate` can be used with a comma
separated list to select what to generate (choosing among `dbscheme`, `ql`, `trap` and `cpp`).
C++ code is generated during build (see [`swift/extractor/trap/BUILD.bazel`](../extractor/trap/BUILD.bazel)). After a
build you can browse the generated code in `bazel-bin/swift/extractor/trap/generated`.
## Implementation notes
The suite uses [mustache templating](https://mustache.github.io/) for generation. Templates are
in [the `templates` directory](templates), prefixed with the generation target they are used for.
Rather than passing dictionaries to the templating engine, python dataclasses are used as defined
in [the `lib` directory](lib). For each of the four generation targets the entry point for the implementation is
specified as the `generate` function in the modules within [the `generators` directory](generators).
Finally, [`codegen.py`](codegen.py) is the driver script gluing everything together and specifying the command line
options.
Unit tests are in [the `test` directory](test) and can be run via `bazel test //swift/codegen/test`.
For more details about each specific generation target, please refer to the module docstrings
in [the `generators` directory](generators).

Просмотреть файл

@ -14,21 +14,29 @@ from swift.codegen.generators import generate
def _parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser()
p.add_argument("--generate", type=lambda x: x.split(","), default=["dbscheme", "ql"])
p.add_argument("--verbose", "-v", action="store_true")
p.add_argument("--swift-dir", type=_abspath, default=paths.swift_dir)
p.add_argument("--schema", type=_abspath, default=paths.swift_dir / "codegen/schema.yml")
p.add_argument("--dbscheme", type=_abspath, default=paths.swift_dir / "ql/lib/swift.dbscheme")
p.add_argument("--ql-output", type=_abspath, default=paths.swift_dir / "ql/lib/codeql/swift/generated")
p.add_argument("--ql-stub-output", type=_abspath, default=paths.swift_dir / "ql/lib/codeql/swift/elements")
p.add_argument("--ql-format", action="store_true", default=True)
p.add_argument("--no-ql-format", action="store_false", dest="ql_format")
p.add_argument("--codeql-binary", default="codeql")
p.add_argument("--cpp-output", type=_abspath)
p.add_argument("--cpp-namespace", default="codeql")
p.add_argument("--trap-affix", default="Trap")
p.add_argument("--cpp-include-dir", default="swift/extractor/trap")
p = argparse.ArgumentParser(description="Code generation suite")
p.add_argument("--generate", type=lambda x: x.split(","), default=["dbscheme", "ql"],
help="specify what targets to generate as a comma separated list, choosing among dbscheme, ql, trap "
"and cpp")
p.add_argument("--verbose", "-v", action="store_true", help="print more information")
p.add_argument("--swift-dir", type=_abspath, default=paths.swift_dir,
help="the directory that should be regarded as the root of the swift codebase. Used to compute QL "
"imports and in some comments (default %(default)s)")
p.add_argument("--schema", type=_abspath, default=paths.swift_dir / "codegen/schema.yml",
help="input schema file (default %(default)s)")
p.add_argument("--dbscheme", type=_abspath, default=paths.swift_dir / "ql/lib/swift.dbscheme",
help="output file for dbscheme generation, input file for trap generation (default %(default)s)")
p.add_argument("--ql-output", type=_abspath, default=paths.swift_dir / "ql/lib/codeql/swift/generated",
help="output directory for generated QL files (default %(default)s)")
p.add_argument("--ql-stub-output", type=_abspath, default=paths.swift_dir / "ql/lib/codeql/swift/elements",
help="output directory for QL stub/customization files (default %(default)s). Defines also the "
"generated qll file importing every class file")
p.add_argument("--ql-format", action="store_true", default=True,
help="use codeql to autoformat QL files (which is the default default)")
p.add_argument("--no-ql-format", action="store_false", dest="ql_format", help="do not format QL files")
p.add_argument("--codeql-binary", default="codeql", help="command to use for QL formatting (default %(default)s)")
p.add_argument("--cpp-output", type=_abspath,
help="output directory for generated C++ files, required if trap or cpp is provided to --generate")
return p.parse_args()

Просмотреть файл

@ -1,3 +1,16 @@
"""
C++ trap class generation
`generate(opts, renderer)` will generate `TrapClasses.h` out of a `yml` schema file.
Each class in the schema gets a corresponding `struct` in `TrapClasses.h`, where:
* inheritance is preserved
* each property will be a corresponding field in the `struct` (with repeated properties mapping to `std::vector` and
optional ones to `std::optional`)
* final classes get a streaming operator that serializes the whole class into the corresponding trap emissions (using
`TrapEntries.h` from `trapgen`).
"""
import functools
from typing import Dict
@ -7,7 +20,7 @@ from toposort import toposort_flatten
from swift.codegen.lib import cpp, schema
def _get_type(t: str, trap_affix: str) -> str:
def _get_type(t: str) -> str:
if t is None:
# this is a predicate
return "bool"
@ -16,11 +29,11 @@ def _get_type(t: str, trap_affix: str) -> str:
if t == "boolean":
return "bool"
if t[0].isupper():
return f"{trap_affix}Label<{t}Tag>"
return f"TrapLabel<{t}Tag>"
return t
def _get_field(cls: schema.Class, p: schema.Property, trap_affix: str) -> cpp.Field:
def _get_field(cls: schema.Class, p: schema.Property) -> cpp.Field:
trap_name = None
if not p.is_single:
trap_name = inflection.camelize(f"{cls.name}_{p.name}")
@ -28,7 +41,7 @@ def _get_field(cls: schema.Class, p: schema.Property, trap_affix: str) -> cpp.Fi
trap_name = inflection.pluralize(trap_name)
args = dict(
field_name=p.name + ("_" if p.name in cpp.cpp_keywords else ""),
type=_get_type(p.type, trap_affix),
type=_get_type(p.type),
is_optional=p.is_optional,
is_repeated=p.is_repeated,
is_predicate=p.is_predicate,
@ -39,9 +52,8 @@ def _get_field(cls: schema.Class, p: schema.Property, trap_affix: str) -> cpp.Fi
class Processor:
def __init__(self, data: Dict[str, schema.Class], trap_affix: str):
def __init__(self, data: Dict[str, schema.Class]):
self._classmap = data
self._trap_affix = trap_affix
@functools.lru_cache(maxsize=None)
def _get_class(self, name: str) -> cpp.Class:
@ -52,7 +64,7 @@ class Processor:
return cpp.Class(
name=name,
bases=[self._get_class(b) for b in cls.bases],
fields=[_get_field(cls, p, self._trap_affix) for p in cls.properties],
fields=[_get_field(cls, p) for p in cls.properties],
final=not cls.derived,
trap_name=trap_name,
)
@ -64,7 +76,6 @@ class Processor:
def generate(opts, renderer):
assert opts.cpp_output
processor = Processor({cls.name: cls for cls in schema.load(opts.schema).classes}, opts.trap_affix)
processor = Processor({cls.name: cls for cls in schema.load(opts.schema).classes})
out = opts.cpp_output
renderer.render(cpp.ClassList(processor.get_classes(), opts.cpp_namespace, opts.trap_affix,
opts.cpp_include_dir, opts.schema), out / f"{opts.trap_affix}Classes.h")
renderer.render(cpp.ClassList(processor.get_classes(), opts.schema), out / f"TrapClasses.h")

Просмотреть файл

@ -1,4 +1,19 @@
#!/usr/bin/env python3
"""
dbscheme file generation
`generate(opts, renderer)` will generate a `dbscheme` file out of a `yml` schema file.
Each final class in the schema file will get a corresponding defining DB table with the id and single properties as
columns.
Moreover:
* single properties in non-final classes will also trigger generation of a table with an id reference and all single
properties as columns
* each optional property will trigger generation of a table with an id reference and the property value as columns
* each repeated property will trigger generation of a table with an id reference, an `int` index and the property value
as columns
The type hierarchy will be translated to corresponding `union` declarations.
"""
import pathlib
import inflection
@ -63,11 +78,10 @@ def cls_to_dbscheme(cls: schema.Class):
name=inflection.underscore(f"{cls.name}_{f.name}"),
columns=[
Column("id", type=dbtype(cls.name)),
],
],
)
def get_declarations(data: schema.Schema):
return [d for cls in data.classes for d in cls_to_dbscheme(cls)]

Просмотреть файл

@ -1,4 +1,15 @@
#!/usr/bin/env python3
"""
C++ trap entry generation
`generate(opts, renderer)` will generate `TrapTags.h` (for types of labels) and `TrapEntries.h` (for trap emission) out
of a dbscheme file.
Each table in the `dbscheme` gets a corresponding `struct` defined in `TrapEntries.h` with a field for each column and
an appropriate streaming operator for the trap emission.
Unions in the `dbscheme` are used to populate a hierarchy of tags (empty structs) in `TrapTags.h` that is used to
enforce a type system on trap labels (see `TrapLabel.h`).
"""
import logging
@ -15,10 +26,10 @@ def get_tag_name(s):
return inflection.camelize(s[1:])
def get_cpp_type(schema_type: str, trap_affix: str):
def get_cpp_type(schema_type: str):
if schema_type.startswith("@"):
tag = get_tag_name(schema_type)
return f"{trap_affix}Label<{tag}Tag>"
return f"TrapLabel<{tag}Tag>"
if schema_type == "string":
return "std::string"
if schema_type == "boolean":
@ -26,13 +37,13 @@ def get_cpp_type(schema_type: str, trap_affix: str):
return schema_type
def get_field(c: dbscheme.Column, trap_affix: str):
def get_field(c: dbscheme.Column):
args = {
"field_name": c.schema_name,
"type": c.type,
}
args.update(cpp.get_field_override(c.schema_name))
args["type"] = get_cpp_type(args["type"], trap_affix)
args["type"] = get_cpp_type(args["type"])
return cpp.Field(**args)
@ -43,14 +54,14 @@ def get_binding_column(t: dbscheme.Table):
return None
def get_trap(t: dbscheme.Table, trap_affix: str):
def get_trap(t: dbscheme.Table):
id = get_binding_column(t)
if id:
id = get_field(id, trap_affix)
id = get_field(id)
return cpp.Trap(
table_name=t.name,
name=inflection.camelize(t.name),
fields=[get_field(c, trap_affix) for c in t.columns],
fields=[get_field(c) for c in t.columns],
id=id,
)
@ -63,14 +74,14 @@ def generate(opts, renderer):
traps = []
for e in dbscheme.iterload(opts.dbscheme):
if e.is_table:
traps.append(get_trap(e, opts.trap_affix))
traps.append(get_trap(e))
elif e.is_union:
tag_graph.setdefault(e.lhs, set())
for d in e.rhs:
tag_graph.setdefault(d.type, set()).add(e.lhs)
renderer.render(cpp.TrapList(traps, opts.cpp_namespace, opts.trap_affix, opts.cpp_include_dir, opts.dbscheme),
out / f"{opts.trap_affix}Entries.h")
renderer.render(cpp.TrapList(traps, opts.dbscheme),
out / f"TrapEntries.h")
tags = []
for index, tag in enumerate(toposort_flatten(tag_graph)):
@ -80,4 +91,4 @@ def generate(opts, renderer):
index=index,
id=tag,
))
renderer.render(cpp.TagList(tags, opts.cpp_namespace, opts.dbscheme), out / f"{opts.trap_affix}Tags.h")
renderer.render(cpp.TagList(tags, opts.dbscheme), out / f"TrapTags.h")

Просмотреть файл

@ -101,9 +101,6 @@ class TrapList:
template: ClassVar = 'trap_traps'
traps: List[Trap]
namespace: str
trap_affix: str
include_dir: str
source: str
@ -112,7 +109,6 @@ class TagList:
template: ClassVar = 'trap_tags'
tags: List[Tag]
namespace: str
source: str
@ -149,7 +145,4 @@ class ClassList:
template: ClassVar = "cpp_classes"
classes: List[Class]
namespace: str
trap_affix: str
include_dir: str
source: str

Просмотреть файл

@ -1,3 +1,17 @@
"""
QL files generation
`generate(opts, renderer)` will generate QL classes and manage stub files out of a `yml` schema file.
Each class (for example, `Foo`) in the schema triggers:
* generation of a `FooBase` class implementation translating all properties into appropriate getters
* if not created or already customized, generation of a stub file which defines `Foo` as extending `FooBase`. This can
be used to add hand-written code to `Foo`, which requires removal of the `// generated` header comment in that file.
All generated base classes actually import these customizations when referencing other classes.
Generated files that do not correspond any more to any class in the schema are deleted. Customized stubs are however
left behind and must be dealt with by hand.
"""
import pathlib
from dataclasses import dataclass, field
from typing import List, ClassVar

Просмотреть файл

@ -6,10 +6,10 @@
#include <optional>
#include <vector>
#include "{{include_dir}}/{{trap_affix}}Label.h"
#include "./{{trap_affix}}Entries.h"
#include "swift/extractor/trap/TrapLabel.h"
#include "./TrapEntries.h"
namespace {{namespace}} {
namespace codeql {
{{#classes}}
struct {{name}}{{#final}} : Binding<{{name}}Tag>{{#bases}}, {{ref.name}}{{/bases}}{{/final}}{{^final}}{{#has_bases}}: {{#bases}}{{^first}}, {{/first}}{{ref.name}}{{/bases}}{{/has_bases}}{{/final}} {
@ -25,29 +25,29 @@ struct {{name}}{{#final}} : Binding<{{name}}Tag>{{#bases}}, {{ref.name}}{{/bases
{{/final}}
protected:
void emit({{^final}}{{trap_affix}}Label<{{name}}Tag> id, {{/final}}std::ostream& out) const {
void emit({{^final}}TrapLabel<{{name}}Tag> id, {{/final}}std::ostream& out) const {
{{#trap_name}}
out << {{.}}{{trap_affix}}{id{{#single_fields}}, {{field_name}}{{/single_fields}}} << '\n';
out << {{.}}Trap{id{{#single_fields}}, {{field_name}}{{/single_fields}}} << '\n';
{{/trap_name}}
{{#bases}}
{{ref.name}}::emit(id, out);
{{/bases}}
{{#fields}}
{{#is_predicate}}
if ({{field_name}}) out << {{trap_name}}{{trap_affix}}{id} << '\n';
if ({{field_name}}) out << {{trap_name}}Trap{id} << '\n';
{{/is_predicate}}
{{#is_optional}}
{{^is_repeated}}
if ({{field_name}}) out << {{trap_name}}{{trap_affix}}{id, *{{field_name}}} << '\n';
if ({{field_name}}) out << {{trap_name}}Trap{id, *{{field_name}}} << '\n';
{{/is_repeated}}
{{/is_optional}}
{{#is_repeated}}
for (auto i = 0u; i < {{field_name}}.size(); ++i) {
{{^is_optional}}
out << {{trap_name}}{{trap_affix}}{id, i, {{field_name}}[i]};
out << {{trap_name}}Trap{id, i, {{field_name}}[i]};
{{/is_optional}}
{{#is_optional}}
if ({{field_name}}[i]) out << {{trap_name}}{{trap_affix}}{id, i, *{{field_name}}[i]};
if ({{field_name}}[i]) out << {{trap_name}}Trap{id, i, *{{field_name}}[i]};
{{/is_optional}}
}
{{/is_repeated}}

Просмотреть файл

@ -2,7 +2,7 @@
// clang-format off
#pragma once
namespace {{namespace}} {
namespace codeql {
{{#tags}}
// {{id}}

Просмотреть файл

@ -5,15 +5,15 @@
#include <iostream>
#include <string>
#include "{{include_dir}}/{{trap_affix}}Label.h"
#include "{{include_dir}}/{{trap_affix}}TagTraits.h"
#include "./{{trap_affix}}Tags.h"
#include "swift/extractor/trap/TrapLabel.h"
#include "swift/extractor/trap/TrapTagTraits.h"
#include "./TrapTags.h"
namespace {{namespace}} {
namespace codeql {
{{#traps}}
// {{table_name}}
struct {{name}}{{trap_affix}} {
struct {{name}}Trap {
static constexpr bool is_binding = {{#id}}true{{/id}}{{^id}}false{{/id}};
{{#id}}
{{type}} getBoundLabel() const { return {{field_name}}; }
@ -24,7 +24,7 @@ struct {{name}}{{trap_affix}} {
{{/fields}}
};
inline std::ostream &operator<<(std::ostream &out, const {{name}}{{trap_affix}} &e) {
inline std::ostream &operator<<(std::ostream &out, const {{name}}Trap &e) {
out << "{{table_name}}("{{#fields}}{{^first}} << ", "{{/first}}
<< {{#get_streamer}}e.{{field_name}}{{/get_streamer}}{{/fields}} << ")";
return out;
@ -34,7 +34,7 @@ inline std::ostream &operator<<(std::ostream &out, const {{name}}{{trap_affix}}
namespace detail {
template<>
struct ToBindingTrapFunctor<{{type}}> {
using type = {{name}}{{trap_affix}};
using type = {{name}}Trap;
};
}
{{/id}}

Просмотреть файл

@ -10,19 +10,13 @@ output_dir = pathlib.Path("path", "to", "output")
@pytest.fixture
def generate(opts, renderer, input):
opts.cpp_output = output_dir
opts.cpp_namespace = "test_namespace"
opts.trap_affix = "TestTrapAffix"
opts.cpp_include_dir = "my/include/dir"
def ret(classes):
input.classes = classes
generated = run_generation(cppgen.generate, opts, renderer)
assert set(generated) == {output_dir / "TestTrapAffixClasses.h"}
generated = generated[output_dir / "TestTrapAffixClasses.h"]
assert set(generated) == {output_dir / "TrapClasses.h"}
generated = generated[output_dir / "TrapClasses.h"]
assert isinstance(generated, cpp.ClassList)
assert generated.namespace == opts.cpp_namespace
assert generated.trap_affix == opts.trap_affix
assert generated.include_dir == opts.cpp_include_dir
return generated.classes
return ret
@ -72,7 +66,7 @@ def test_complex_hierarchy_topologically_ordered(generate):
("a", "a"),
("string", "std::string"),
("boolean", "bool"),
("MyClass", "TestTrapAffixLabel<MyClassTag>"),
("MyClass", "TrapLabel<MyClassTag>"),
])
@pytest.mark.parametrize("property_cls,optional,repeated,trap_name", [
(schema.SingleProperty, False, False, None),

Просмотреть файл

@ -10,16 +10,13 @@ output_dir = pathlib.Path("path", "to", "output")
@pytest.fixture
def generate(opts, renderer, dbscheme_input):
opts.cpp_output = output_dir
opts.cpp_namespace = "test_namespace"
opts.trap_affix = "TrapAffix"
opts.cpp_include_dir = "my/include/dir"
def ret(entities):
dbscheme_input.entities = entities
generated = run_generation(trapgen.generate, opts, renderer)
assert set(generated) == {output_dir /
"TrapAffixEntries.h", output_dir / "TrapAffixTags.h"}
return generated[output_dir / "TrapAffixEntries.h"], generated[output_dir / "TrapAffixTags.h"]
"TrapEntries.h", output_dir / "TrapTags.h"}
return generated[output_dir / "TrapEntries.h"], generated[output_dir / "TrapTags.h"]
return ret
@ -29,9 +26,6 @@ def generate_traps(opts, generate):
def ret(entities):
traps, _ = generate(entities)
assert isinstance(traps, cpp.TrapList)
assert traps.namespace == opts.cpp_namespace
assert traps.trap_affix == opts.trap_affix
assert traps.include_dir == opts.cpp_include_dir
return traps.traps
return ret
@ -42,7 +36,6 @@ def generate_tags(opts, generate):
def ret(entities):
_, tags = generate(entities)
assert isinstance(tags, cpp.TagList)
assert tags.namespace == opts.cpp_namespace
return tags.tags
return ret
@ -106,7 +99,7 @@ def test_one_table_with_two_binding_first_is_id(generate_traps):
@pytest.mark.parametrize("column,field", [
(dbscheme.Column("x", "string"), cpp.Field("x", "std::string")),
(dbscheme.Column("y", "boolean"), cpp.Field("y", "bool")),
(dbscheme.Column("z", "@db_type"), cpp.Field("z", "TrapAffixLabel<DbTypeTag>")),
(dbscheme.Column("z", "@db_type"), cpp.Field("z", "TrapLabel<DbTypeTag>")),
])
def test_one_table_special_types(generate_traps, column, field):
assert generate_traps([

Просмотреть файл

@ -15,7 +15,6 @@ genrule(
"--generate=dbscheme,trap,cpp",
"--schema $(location //swift/codegen:schema)",
"--dbscheme $(RULEDIR)/generated/swift.dbscheme",
"--cpp-include-dir " + package_name(),
"--cpp-output $(RULEDIR)/generated",
]),
exec_tools = ["//swift/codegen"],