MAINT: Add mypy type annotations.

Additionally, this fixes the black formatter check in CI and fixes a few black violations that have slipped through.
2020-02-28 16:52:20 -05:00 · 2020-02-28 16:52:20 -05:00 · 68e40a9a16
--- a/5
+++ b/5
@ -36,10 +36,11 @@ clean-test: ## remove test and coverage artifacts

 lint: ## check style with flake8
 	python3 -m flake8 glean_parser tests
-	bash -c 'if [[ `python3 --version` =~ "Python 3\.[678]\..*" ]]; then \
+	if python3 --version | grep 'Python 3\.[678]\..*'; then \
 		python3 -m black --check glean_parser tests setup.py; \
-	fi'
+	fi
 	python3 -m yamllint glean_parser tests
+	python3 -m mypy glean_parser

 test: ## run tests quickly with the default Python
 	py.test
--- a/glean_parser/kotlin.py
+++ b/glean_parser/kotlin.py
@ -11,11 +11,15 @@ Outputter to generate Kotlin code for metrics.
 from collections import OrderedDict
 import enum
 import json
+from pathlib import Path
+from typing import Any, Dict, List, Union  # noqa

+from . import metrics
+from . import pings
 from . import util


-def kotlin_datatypes_filter(value):
+def kotlin_datatypes_filter(value: util.JSONType) -> str:
    """
    A Jinja2 filter that renders Kotlin literals.

@ -65,7 +69,7 @@ def kotlin_datatypes_filter(value):
    return "".join(KotlinEncoder().iterencode(value))


-def type_name(obj):
+def type_name(obj: Union[metrics.Metric, pings.Ping]) -> str:
    """
    Returns the Kotlin type to use for a given metric or ping object.
    """
@ -86,7 +90,7 @@ def type_name(obj):
    return class_name(obj.type)


-def class_name(obj_type):
+def class_name(obj_type: str) -> str:
    """
    Returns the Kotlin class name for a given metric or ping type.
    """
@ -97,7 +101,9 @@ def class_name(obj_type):
    return util.Camelize(obj_type) + "MetricType"


-def output_gecko_lookup(objs, output_dir, options={}):
+def output_gecko_lookup(
+    objs: metrics.ObjectTree, output_dir: Path, options: Dict[str, Any] = {}
+) -> None:
    """
    Given a tree of objects, generate a Kotlin map between Gecko histograms and
    Glean SDK metric types.
@ -138,7 +144,9 @@ def output_gecko_lookup(objs, output_dir, options={}):
    #   },
    #   "other-type": {}
    # }
-    gecko_metrics = OrderedDict()
+    gecko_metrics = (
+        OrderedDict()
+    )  # type: OrderedDict[str, OrderedDict[str, List[Dict[str, str]]]]

    # Define scalar-like types.
    SCALAR_LIKE_TYPES = ["boolean", "string", "quantity"]
@ -148,7 +156,9 @@ def output_gecko_lookup(objs, output_dir, options={}):
        # Glean SDK and GeckoView. See bug 1566356 for more context.
        for metric in category_val.values():
            # This is not a Gecko metric, skip it.
-            if not getattr(metric, "gecko_datapoint", False):
+            if isinstance(metric, pings.Ping) or not getattr(
+                metric, "gecko_datapoint", False
+            ):
                continue

            # Put scalars in their own categories, histogram-like in "histograms" and
@ -186,7 +196,9 @@ def output_gecko_lookup(objs, output_dir, options={}):
        fd.write("\n")


-def output_kotlin(objs, output_dir, options={}):
+def output_kotlin(
+    objs: metrics.ObjectTree, output_dir: Path, options: Dict[str, Any] = {}
+) -> None:
    """
    Given a tree of objects, output Kotlin code to `output_dir`.

--- a/glean_parser/lint.py
+++ b/glean_parser/lint.py
@ -3,25 +3,32 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.


+from pathlib import Path
 import re
 import sys
+from typing import Any, Callable, Dict, Generator, List, Iterable, Tuple, Union  # noqa


+from . import metrics
 from . import parser
 from . import util

-from yamllint.config import YamlLintConfig
-from yamllint import linter
+
+from yamllint.config import YamlLintConfig  # type: ignore
+from yamllint import linter  # type: ignore


-def _split_words(name):
+LintGenerator = Generator[str, None, None]
+
+
+def _split_words(name: str) -> List[str]:
    """
    Helper function to split words on either `.` or `_`.
    """
    return re.split("[._]", name)


-def _hamming_distance(str1, str2):
+def _hamming_distance(str1: str, str2: str) -> int:
    """
    Count the # of differences between strings str1 and str2,
    padding the shorter one with whitespace
@ -39,7 +46,9 @@ def _hamming_distance(str1, str2):
    return diffs


-def check_common_prefix(category_name, metrics):
+def check_common_prefix(
+    category_name: str, metrics: Iterable[metrics.Metric]
+) -> LintGenerator:
    """
    Check if all metrics begin with a common prefix.
    """
@ -63,7 +72,9 @@ def check_common_prefix(category_name, metrics):
        ).format(category_name, common_prefix)


-def check_unit_in_name(metric, parser_config={}):
+def check_unit_in_name(
+    metric: metrics.Metric, parser_config: Dict[str, Any] = {}
+) -> LintGenerator:
    """
    The metric name ends in a unit.
    """
@ -87,10 +98,14 @@ def check_unit_in_name(metric, parser_config={}):
    name_words = _split_words(metric.name)
    unit_in_name = name_words[-1]

-    if hasattr(metric, "time_unit"):
+    time_unit = getattr(metric, "time_unit", None)
+    memory_unit = getattr(metric, "memory_unit", None)
+    unit = getattr(metric, "unit", None)
+
+    if time_unit is not None:
        if (
-            unit_in_name == TIME_UNIT_ABBREV.get(metric.time_unit.name)
-            or unit_in_name == metric.time_unit.name
+            unit_in_name == TIME_UNIT_ABBREV.get(time_unit.name)
+            or unit_in_name == time_unit.name
        ):
            yield (
                "Suffix '{}' is redundant with time_unit. " "Only include time_unit."
@ -104,10 +119,10 @@ def check_unit_in_name(metric, parser_config={}):
                "Confirm the unit is correct and only include time_unit."
            ).format(unit_in_name)

-    elif hasattr(metric, "memory_unit"):
+    elif memory_unit is not None:
        if (
-            unit_in_name == MEMORY_UNIT_ABBREV.get(metric.memory_unit.name)
-            or unit_in_name == metric.memory_unit.name
+            unit_in_name == MEMORY_UNIT_ABBREV.get(memory_unit.name)
+            or unit_in_name == memory_unit.name
        ):
            yield (
                "Suffix '{}' is redundant with memory_unit. "
@ -122,14 +137,16 @@ def check_unit_in_name(metric, parser_config={}):
                "Confirm the unit is correct and only include memory_unit."
            ).format(unit_in_name)

-    elif hasattr(metric, "unit"):
-        if unit_in_name == metric.unit:
+    elif unit is not None:
+        if unit_in_name == unit:
            yield (
                "Suffix '{}' is redundant with unit param. " "Only include unit."
            ).format(unit_in_name)


-def check_category_generic(category_name, metrics):
+def check_category_generic(
+    category_name: str, metrics: Iterable[metrics.Metric]
+) -> LintGenerator:
    """
    The category name is too generic.
    """
@ -139,7 +156,9 @@ def check_category_generic(category_name, metrics):
        yield "Category '{}' is too generic.".format(category_name)


-def check_bug_number(metric, parser_config={}):
+def check_bug_number(
+    metric: metrics.Metric, parser_config: Dict[str, Any] = {}
+) -> LintGenerator:
    number_bugs = [str(bug) for bug in metric.bugs if isinstance(bug, int)]

    if len(number_bugs):
@ -149,7 +168,9 @@ def check_bug_number(metric, parser_config={}):
        ).format(", ".join(number_bugs))


-def check_valid_in_baseline(metric, parser_config={}):
+def check_valid_in_baseline(
+    metric: metrics.Metric, parser_config: Dict[str, Any] = {}
+) -> LintGenerator:
    allow_reserved = parser_config.get("allow_reserved", False)

    if not allow_reserved and "baseline" in metric.send_in_pings:
@ -159,7 +180,9 @@ def check_valid_in_baseline(metric, parser_config={}):
        )


-def check_misspelled_pings(metric, parser_config={}):
+def check_misspelled_pings(
+    metric: metrics.Metric, parser_config: Dict[str, Any] = {}
+) -> LintGenerator:
    builtin_pings = ["metrics", "events"]

    for ping in metric.send_in_pings:
@ -174,7 +197,7 @@ def check_misspelled_pings(metric, parser_config={}):
 CATEGORY_CHECKS = {
    "COMMON_PREFIX": check_common_prefix,
    "CATEGORY_GENERIC": check_category_generic,
-}
+}  # type: Dict[str, Callable[[str, Iterable[metrics.Metric]], LintGenerator]]


 INDIVIDUAL_CHECKS = {
@ -182,10 +205,12 @@ INDIVIDUAL_CHECKS = {
    "BUG_NUMBER": check_bug_number,
    "BASELINE_PING": check_valid_in_baseline,
    "MISSPELLED_PING": check_misspelled_pings,
-}
+}  # type: Dict[str, Callable[[metrics.Metric, dict], LintGenerator]]


-def lint_metrics(objs, parser_config={}, file=sys.stderr):
+def lint_metrics(
+    objs: metrics.ObjectTree, parser_config: Dict[str, Any] = {}, file=sys.stderr
+) -> List[Tuple[str, str, str]]:
    """
    Performs glinter checks on a set of metrics objects.

@ -193,20 +218,29 @@ def lint_metrics(objs, parser_config={}, file=sys.stderr):
    :param file: The stream to write errors to.
    :returns: List of nits.
    """
-    nits = []
-    for (category_name, metrics) in sorted(list(objs.items())):
+    nits = []  # type: List[Tuple[str, str, str]]
+    for (category_name, category) in sorted(list(objs.items())):
        if category_name == "pings":
            continue

-        for (check_name, check_func) in CATEGORY_CHECKS.items():
-            if any(check_name in metric.no_lint for metric in metrics.values()):
+        # Make sure the category has only Metrics, not Pings
+        category_metrics = dict(
+            (name, metric)
+            for (name, metric) in category.items()
+            if isinstance(metric, metrics.Metric)
+        )
+
+        for (cat_check_name, cat_check_func) in CATEGORY_CHECKS.items():
+            if any(
+                cat_check_name in metric.no_lint for metric in category_metrics.values()
+            ):
                continue
            nits.extend(
-                (check_name, category_name, msg)
-                for msg in check_func(category_name, metrics.values())
+                (cat_check_name, category_name, msg)
+                for msg in cat_check_func(category_name, category_metrics.values())
            )

-        for (metric_name, metric) in sorted(list(metrics.items())):
+        for (metric_name, metric) in sorted(list(category_metrics.items())):
            for (check_name, check_func) in INDIVIDUAL_CHECKS.items():
                new_nits = list(check_func(metric, parser_config))
                if len(new_nits):
@ -248,7 +282,7 @@ def lint_metrics(objs, parser_config={}, file=sys.stderr):
    return nits


-def lint_yaml_files(input_filepaths, file=sys.stderr):
+def lint_yaml_files(input_filepaths: Iterable[Path], file=sys.stderr) -> List:
    """
    Performs glinter YAML lint on a set of files.

@ -257,7 +291,9 @@ def lint_yaml_files(input_filepaths, file=sys.stderr):
    :returns: List of nits.
    """

-    nits = []
+    # Generic type since the actual type comes from yamllint, which we don't
+    # control.
+    nits = []  # type: List
    for path in input_filepaths:
        # yamllint needs both the file content and the path.
        file_content = None
@ -277,12 +313,14 @@ def lint_yaml_files(input_filepaths, file=sys.stderr):
    return nits


-def glinter(input_filepaths, parser_config={}, file=sys.stderr):
+def glinter(
+    input_filepaths: Iterable[Path], parser_config: Dict[str, Any] = {}, file=sys.stderr
+) -> int:
    """
    Commandline helper for glinter.

    :param input_filepaths: List of Path objects to load metrics from.
-    :param parser_config: Parser configuration objects, passed to
+    :param parser_config: Parser configuration object, passed to
      `parser.parse_objects`.
    :param file: The stream to write the errors to.
    :return: Non-zero if there were any glinter errors.
--- a/glean_parser/markdown.py
+++ b/glean_parser/markdown.py
@ -8,13 +8,17 @@
 Outputter to generate Markdown documentation for metrics.
 """

+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+
 from . import metrics
 from . import pings
 from . import util
 from collections import defaultdict


-def extra_info(obj):
+def extra_info(obj: Union[metrics.Metric, pings.Ping]) -> List[Tuple[str, str]]:
    """
    Returns a list of string to string tuples with extra information for the type
    (e.g. extra keys for events) or an empty list if nothing is available.
@ -32,7 +36,7 @@ def extra_info(obj):
    return extra_info


-def ping_desc(ping_name, custom_pings_cache={}):
+def ping_desc(ping_name: str, custom_pings_cache: Dict[str, pings.Ping] = {}) -> str:
    """
    Return a text description of the ping. If a custom_pings_cache
    is available, look in there for non-reserved ping names description.
@ -52,7 +56,7 @@ def ping_desc(ping_name, custom_pings_cache={}):
    return desc


-def metrics_docs(obj_name):
+def metrics_docs(obj_name: str) -> str:
    """
    Return a link to the documentation entry for the Glean SDK metric of the
    requested type.
@ -68,7 +72,7 @@ def metrics_docs(obj_name):
    return base_url.format(fixedup_name)


-def ping_docs(ping_name):
+def ping_docs(ping_name: str) -> str:
    """
    Return a link to the documentation entry for the requested Glean SDK
    built-in ping.
@ -79,14 +83,16 @@ def ping_docs(ping_name):
    return "https://mozilla.github.io/glean/book/user/pings/{}.html".format(ping_name)


-def if_empty(ping_name, custom_pings_cache={}):
-    return (
-        custom_pings_cache.get(ping_name)
-        and custom_pings_cache[ping_name].send_if_empty
-    )
+def if_empty(ping_name: str, custom_pings_cache: Dict[str, pings.Ping] = {}) -> bool:
+    if ping_name in custom_pings_cache:
+        return custom_pings_cache[ping_name].send_if_empty
+    else:
+        return False


-def ping_reasons(ping_name, custom_pings_cache):
+def ping_reasons(
+    ping_name: str, custom_pings_cache: Dict[str, pings.Ping]
+) -> Dict[str, str]:
    """
    Returns the reasons dictionary for the ping.
    """
@ -98,28 +104,36 @@ def ping_reasons(ping_name, custom_pings_cache):
    return {}


-def ping_data_reviews(ping_name, custom_pings_cache={}):
-    return (
-        custom_pings_cache.get(ping_name)
-        and custom_pings_cache[ping_name].data_reviews
-    )
+def ping_data_reviews(
+    ping_name: str, custom_pings_cache: Dict[str, pings.Ping] = {}
+) -> Optional[List[str]]:
+    if ping_name in custom_pings_cache:
+        return custom_pings_cache[ping_name].data_reviews
+    else:
+        return None


-def ping_bugs(ping_name, custom_pings_cache={}):
-    return (
-        custom_pings_cache.get(ping_name)
-        and custom_pings_cache[ping_name].bugs
-    )
+def ping_bugs(
+    ping_name: str, custom_pings_cache: Dict[str, pings.Ping] = {}
+) -> Optional[List[str]]:
+    if ping_name in custom_pings_cache:
+        return custom_pings_cache[ping_name].bugs
+    else:
+        return None


-def ping_include_client_id(ping_name, custom_pings_cache={}):
-    return (
-        custom_pings_cache.get(ping_name)
-        and custom_pings_cache[ping_name].include_client_id
-    )
+def ping_include_client_id(
+    ping_name: str, custom_pings_cache: Dict[str, pings.Ping] = {}
+) -> bool:
+    if ping_name in custom_pings_cache:
+        return custom_pings_cache[ping_name].include_client_id
+    else:
+        return False


-def output_markdown(objs, output_dir, options={}):
+def output_markdown(
+    objs: metrics.ObjectTree, output_dir: Path, options: Dict[str, Any] = {}
+) -> None:
    """
    Given a tree of objects, output Markdown docs to `output_dir`.

@ -148,8 +162,8 @@ def output_markdown(objs, output_dir, options={}):
    # }
    #
    # This also builds a dictionary of custom pings, if available.
-    custom_pings_cache = defaultdict()
-    metrics_by_pings = defaultdict(list)
+    custom_pings_cache = defaultdict()  # type: Dict[str, pings.Ping]
+    metrics_by_pings = defaultdict(list)  # type: Dict[str, List[metrics.Metric]]
    for category_key, category_val in objs.items():
        for obj in category_val.values():
            # Filter out custom pings. We will need them for extracting
@ -188,8 +202,10 @@ def output_markdown(objs, output_dir, options={}):
            ("ping_reasons", lambda x: ping_reasons(x, custom_pings_cache)),
            ("ping_data_reviews", lambda x: ping_data_reviews(x, custom_pings_cache)),
            ("ping_bugs", lambda x: ping_bugs(x, custom_pings_cache)),
-            ("ping_include_client_id",
-                lambda x: ping_include_client_id(x, custom_pings_cache)),
+            (
+                "ping_include_client_id",
+                lambda x: ping_include_client_id(x, custom_pings_cache),
+            ),
        ),
    )

--- a/glean_parser/metrics.py
+++ b/glean_parser/metrics.py
@ -10,7 +10,10 @@ Classes for each of the high-level metric types.

 import enum
 import sys
+from typing import Any, Dict, List, Optional, Type, Union  # noqa

+
+from . import pings
 from . import util


@ -30,29 +33,30 @@ class Lifetime(enum.Enum):


 class Metric(base_object):
-    glean_internal_metric_cat = "glean.internal.metrics"
-    metric_types = {}
-    default_store_names = ["metrics"]
+    typename = "ERROR"  # type: str
+    glean_internal_metric_cat = "glean.internal.metrics"  # type: str
+    metric_types = {}  # type: Dict[str, Any]
+    default_store_names = ["metrics"]  # type: List[str]

    def __init__(
        self,
-        type,
-        category,
-        name,
-        bugs,
-        description,
-        notification_emails,
-        expires,
-        data_reviews=None,
-        version=0,
-        disabled=False,
-        lifetime="ping",
-        send_in_pings=None,
-        unit="",
-        gecko_datapoint="",
-        no_lint=None,
-        _config=None,
-        _validated=False,
+        type: str,
+        category: str,
+        name: str,
+        bugs: List[str],
+        description: str,
+        notification_emails: List[str],
+        expires: str,
+        data_reviews: Optional[List[str]] = None,
+        version: int = 0,
+        disabled: bool = False,
+        lifetime: str = "ping",
+        send_in_pings: Optional[List[str]] = None,
+        unit: str = "",
+        gecko_datapoint: str = "",
+        no_lint: Optional[List[str]] = None,
+        _config: Optional[Dict[str, Any]] = None,
+        _validated: bool = False,
    ):
        # Avoid cyclical import
        from . import parser
@ -85,7 +89,7 @@ class Metric(base_object):
            data = {
                "$schema": parser.METRICS_ID,
                self.category: {self.name: self.serialize()},
-            }
+            }  # type: Dict[str, util.JSONType]
            for error in parser.validate(data):
                raise ValueError(error)

@ -101,7 +105,14 @@ class Metric(base_object):
        super().__init_subclass__(**kwargs)

    @classmethod
-    def make_metric(cls, category, name, metric_info, config={}, validated=False):
+    def make_metric(
+        cls,
+        category: str,
+        name: str,
+        metric_info: Dict[str, util.JSONType],
+        config: Dict[str, Any] = {},
+        validated: bool = False,
+    ):
        """
        Given a metric_info dictionary from metrics.yaml, return a metric
        instance.
@ -116,6 +127,8 @@ class Metric(base_object):
        :return: A new Metric instance.
        """
        metric_type = metric_info["type"]
+        if not isinstance(metric_type, str):
+            raise TypeError("Unknown metric type {}".format(metric_type))
        return cls.metric_types[metric_type](
            category=category,
            name=name,
@ -124,7 +137,7 @@ class Metric(base_object):
            **metric_info
        )

-    def serialize(self):
+    def serialize(self) -> Dict[str, util.JSONType]:
        """
        Serialize the metric back to JSON object model.
        """
@ -139,7 +152,7 @@ class Metric(base_object):
        del d["category"]
        return d

-    def identifier(self):
+    def identifier(self) -> str:
        """
        Create an identifier unique for this metric.
        Generally, category.name; however, Glean internal
@ -149,17 +162,17 @@ class Metric(base_object):
            return self.name
        return ".".join((self.category, self.name))

-    def is_disabled(self):
+    def is_disabled(self) -> bool:
        return self.disabled or self.is_expired()

-    def is_expired(self):
+    def is_expired(self) -> bool:
        return util.is_expired(self.expires)

    @staticmethod
-    def validate_expires(expires):
+    def validate_expires(expires) -> None:
        return util.validate_expires(expires)

-    def is_internal_metric(self):
+    def is_internal_metric(self) -> bool:
        return self.category in (Metric.glean_internal_metric_cat, "")


@ -262,7 +275,7 @@ class Event(Metric):
        return sorted(list(self.extra_keys.keys()))

    @staticmethod
-    def validate_extra_keys(extra_keys, config):
+    def validate_extra_keys(extra_keys: Dict[str, str], config: Dict[str, Any]) -> None:
        if not config.get("allow_reserved") and any(
            k.startswith("glean.") for k in extra_keys.keys()
        ):
@ -289,7 +302,7 @@ class Labeled(Metric):
            self.labels = None
        super().__init__(*args, **kwargs)

-    def serialize(self):
+    def serialize(self) -> Dict[str, util.JSONType]:
        """
        Serialize the metric back to JSON object model.
        """
@ -309,3 +322,6 @@ class LabeledString(Labeled, String):

 class LabeledCounter(Labeled, Counter):
    typename = "labeled_counter"
+
+
+ObjectTree = Dict[str, Dict[str, Union[Metric, pings.Ping]]]
--- a/glean_parser/parser.py
+++ b/glean_parser/parser.py
@ -12,11 +12,12 @@ from collections import OrderedDict
 import functools
 from pathlib import Path
 import textwrap
+from typing import Any, Dict, Generator, Iterable, Optional, Tuple, Union

-import jsonschema
-from jsonschema.exceptions import ValidationError
+import jsonschema  # type: ignore
+from jsonschema.exceptions import ValidationError  # type: ignore

-from .metrics import Metric
+from .metrics import Metric, ObjectTree
 from .pings import Ping, RESERVED_PING_NAMES
 from . import util

@ -51,7 +52,9 @@ def _update_validator(validator):
    validator.VALIDATORS["required"] = required


-def _load_file(filepath):
+def _load_file(
+    filepath: Path,
+) -> Generator[str, None, Tuple[Dict[str, util.JSONType], Optional[str]]]:
    """
    Load a metrics.yaml or pings.yaml format file.
    """
@ -67,10 +70,17 @@ def _load_file(filepath):
        )
        return {}, None

+    if not isinstance(content, dict):
+        return {}, None
+
    if content == {}:
        return {}, None

-    filetype = FILE_TYPES.get(content.get("$schema"))
+    schema_key = content.get("$schema")
+    if not isinstance(schema_key, str):
+        raise TypeError("Invalid schema key {}".format(schema_key))
+
+    filetype = FILE_TYPES.get(schema_key)

    for error in validate(content, filepath):
        content = {}
@ -80,7 +90,7 @@ def _load_file(filepath):


@functools.lru_cache(maxsize=1)
-def _load_schemas():
+def _load_schemas() -> Dict[str, Tuple[Any, Any]]:
    """
    Load all of the known schemas from disk, and put them in a map based on the
    schema's $id.
@ -97,7 +107,9 @@ def _load_schemas():
    return schemas


-def _get_schema(schema_id, filepath="<input>"):
+def _get_schema(
+    schema_id: str, filepath: Union[str, Path] = "<input>"
+) -> Tuple[Any, Any]:
    """
    Get the schema for the given schema $id.
    """
@ -113,14 +125,19 @@ def _get_schema(schema_id, filepath="<input>"):
    return schemas[schema_id]


-def _get_schema_for_content(content, filepath):
+def _get_schema_for_content(
+    content: Dict[str, util.JSONType], filepath: Union[str, Path]
+) -> Tuple[Any, Any]:
    """
    Get the appropriate schema for the given JSON content.
    """
-    return _get_schema(content.get("$schema"), filepath)
+    schema_url = content.get("$schema")
+    if not isinstance(schema_url, str):
+        raise TypeError("Invalid $schema type {}".format(schema_url))
+    return _get_schema(schema_url, filepath)


-def get_parameter_doc(key):
+def get_parameter_doc(key: str) -> str:
    """
    Returns documentation about a specific metric parameter.
    """
@ -128,7 +145,7 @@ def get_parameter_doc(key):
    return schema["definitions"]["metric"]["properties"][key]["description"]


-def get_ping_parameter_doc(key):
+def get_ping_parameter_doc(key: str) -> str:
    """
    Returns documentation about a specific ping parameter.
    """
@ -136,7 +153,9 @@ def get_ping_parameter_doc(key):
    return schema["additionalProperties"]["properties"][key]["description"]


-def validate(content, filepath="<input>"):
+def validate(
+    content: Dict[str, util.JSONType], filepath: Union[str, Path] = "<input>"
+) -> Generator[str, None, None]:
    """
    Validate the given content against the appropriate schema.
    """
@ -151,7 +170,13 @@ def validate(content, filepath="<input>"):
        )


-def _instantiate_metrics(all_objects, sources, content, filepath, config):
+def _instantiate_metrics(
+    all_objects: ObjectTree,
+    sources: Dict[Any, Path],
+    content: Dict[str, util.JSONType],
+    filepath: Path,
+    config: Dict[str, Any],
+) -> Generator[str, None, None]:
    """
    Load a list of metrics.yaml files, convert the JSON information into Metric
    objects, and merge them into a single tree.
@ -172,6 +197,10 @@ def _instantiate_metrics(all_objects, sources, content, filepath, config):
            )
            continue
        all_objects.setdefault(category_key, OrderedDict())
+
+        if not isinstance(category_val, dict):
+            raise TypeError("Invalid content for {}".format(category_key))
+
        for metric_key, metric_val in category_val.items():
            try:
                metric_obj = Metric.make_metric(
@ -215,7 +244,13 @@ def _instantiate_metrics(all_objects, sources, content, filepath, config):
                sources[(category_key, metric_key)] = filepath


-def _instantiate_pings(all_objects, sources, content, filepath, config):
+def _instantiate_pings(
+    all_objects: ObjectTree,
+    sources: Dict[Any, Path],
+    content: Dict[str, util.JSONType],
+    filepath: Path,
+    config: Dict[str, Any],
+) -> Generator[str, None, None]:
    """
    Load a list of pings.yaml files, convert the JSON information into Ping
    objects.
@ -231,6 +266,8 @@ def _instantiate_pings(all_objects, sources, content, filepath, config):
                    "Ping uses a reserved name ({})".format(RESERVED_PING_NAMES),
                )
                continue
+        if not isinstance(ping_val, dict):
+            raise TypeError("Invalid content for ping {}".format(ping_key))
        ping_val["name"] = ping_key
        try:
            ping_obj = Ping(**ping_val)
@ -238,7 +275,7 @@ def _instantiate_pings(all_objects, sources, content, filepath, config):
            yield util.format_error(
                filepath, "On instance '{}'".format(ping_key), str(e)
            )
-            ping_obj = None
+            continue

        already_seen = sources.get(ping_key)
        if already_seen is not None:
@ -255,12 +292,15 @@ def _instantiate_pings(all_objects, sources, content, filepath, config):
            sources[ping_key] = filepath


-def _preprocess_objects(objs, config):
+def _preprocess_objects(objs: ObjectTree, config: Dict[str, Any]) -> ObjectTree:
    """
    Preprocess the object tree to better set defaults.
    """
    for category in objs.values():
        for obj in category.values():
+            if not isinstance(obj, Metric):
+                continue
+
            if not config.get("do_not_disable_expired", False) and hasattr(
                obj, "is_disabled"
            ):
@ -276,7 +316,9 @@ def _preprocess_objects(objs, config):


@util.keep_value
-def parse_objects(filepaths, config={}):
+def parse_objects(
+    filepaths: Iterable[Path], config: Dict[str, Any] = {}
+) -> Generator[str, None, ObjectTree]:
    """
    Parse one or more metrics.yaml and/or pings.yaml files, returning a tree of
    `metrics.Metric` and `pings.Ping` instances.
@ -304,8 +346,8 @@ def parse_objects(filepaths, config={}):
          value from the `metrics.yaml`, rather than having it overridden when
          the metric expires.
    """
-    all_objects = OrderedDict()
-    sources = {}
+    all_objects = OrderedDict()  # type: ObjectTree
+    sources = {}  # type: Dict[Any, Path]
    filepaths = util.ensure_list(filepaths)
    for filepath in filepaths:
        content, filetype = yield from _load_file(filepath)
--- a/glean_parser/pings.py
+++ b/glean_parser/pings.py
@ -9,6 +9,10 @@ Classes for managing the description of pings.
 """

 import sys
+from typing import Dict, List, Optional
+
+
+from . import util


 # Import a backport of PEP487 to support __init_subclass__
@ -26,15 +30,15 @@ RESERVED_PING_NAMES = ["baseline", "metrics", "events", "deletion_request"]
 class Ping(base_object):
    def __init__(
        self,
-        name,
-        description,
-        bugs,
-        notification_emails,
-        data_reviews=None,
-        include_client_id=False,
-        send_if_empty=False,
-        reasons=None,
-        _validated=False,
+        name: str,
+        description: str,
+        bugs: List[str],
+        notification_emails: List[str],
+        data_reviews: Optional[List[str]] = None,
+        include_client_id: bool = False,
+        send_if_empty: bool = False,
+        reasons: Dict[str, str] = None,
+        _validated: bool = False,
    ):
        # Avoid cyclical import
        from . import parser
@ -55,21 +59,24 @@ class Ping(base_object):
        # _validated indicates whether this metric has already been jsonschema
        # validated (but not any of the Python-level validation).
        if not _validated:
-            data = {"$schema": parser.PINGS_ID, self.name: self.serialize()}
+            data = {
+                "$schema": parser.PINGS_ID,
+                self.name: self.serialize(),
+            }  # type: Dict[str, util.JSONType]
            for error in parser.validate(data):
                raise ValueError(error)

    _generate_enums = [("reason_codes", "ReasonCodes")]

    @property
-    def type(self):
+    def type(self) -> str:
        return "ping"

    @property
-    def reason_codes(self):
+    def reason_codes(self) -> List[str]:
        return sorted(list(self.reasons.keys()))

-    def serialize(self):
+    def serialize(self) -> Dict[str, util.JSONType]:
        """
        Serialize the metric back to JSON object model.
        """
--- a/glean_parser/swift.py
+++ b/glean_parser/swift.py
@ -10,7 +10,10 @@ Outputter to generate Swift code for metrics.

 import enum
 import json
+from pathlib import Path
+from typing import Any, Dict, Union

+from . import metrics
 from . import pings
 from . import util
 from collections import defaultdict
@ -20,7 +23,7 @@ from collections import defaultdict
 SWIFT_RESERVED_NAMES = ["internal", "typealias"]


-def swift_datatypes_filter(value):
+def swift_datatypes_filter(value: util.JSONType) -> str:
    """
    A Jinja2 filter that renders Swift literals.

@ -62,7 +65,7 @@ def swift_datatypes_filter(value):
    return "".join(SwiftEncoder().iterencode(value))


-def type_name(obj):
+def type_name(obj: Union[metrics.Metric, pings.Ping]) -> str:
    """
    Returns the Swift type to use for a given metric or ping object.
    """
@ -83,7 +86,7 @@ def type_name(obj):
    return class_name(obj.type)


-def class_name(obj_type):
+def class_name(obj_type: str) -> str:
    """
    Returns the Swift class name for a given metric or ping type.
    """
@ -94,7 +97,7 @@ def class_name(obj_type):
    return util.Camelize(obj_type) + "MetricType"


-def variable_name(var):
+def variable_name(var: str) -> str:
    """
    Returns a valid Swift variable name, escaping keywords if necessary.
    """
@ -104,12 +107,14 @@ def variable_name(var):
        return var


-def output_swift(objs, output_dir, options={}):
+def output_swift(
+    objs: metrics.ObjectTree, output_dir: Path, options: Dict[str, Any] = {}
+) -> None:
    """
    Given a tree of objects, output Swift code to `output_dir`.

    :param objects: A tree of objects (metrics and pings) as returned from
-    `parser.parse_objects`.
+        `parser.parse_objects`.
    :param output_dir: Path to an output directory to write to.
    :param options: options dictionary, with the following optional keys:
        - namespace: The namespace to generate metrics in
@ -148,7 +153,7 @@ def output_swift(objs, output_dir, options={}):
        filename = util.Camelize(category_key) + ".swift"
        filepath = output_dir / filename

-        custom_pings = defaultdict()
+        custom_pings = defaultdict()  # type: Dict[str, pings.Ping]
        for obj in category_val.values():
            if isinstance(obj, pings.Ping):
                custom_pings[obj.name] = obj
@ -167,7 +172,7 @@ def output_swift(objs, output_dir, options={}):
                    glean_namespace=glean_namespace,
                    has_labeled_metrics=has_labeled_metrics,
                    is_ping_type=len(custom_pings) > 0,
-                    allow_reserved=options.get("allow_reserved", False)
+                    allow_reserved=options.get("allow_reserved", False),
                )
            )
            # Jinja2 squashes the final newline, so we explicitly add it
--- a/glean_parser/translate.py
+++ b/glean_parser/translate.py
@ -13,11 +13,13 @@ import os
 import shutil
 import sys
 import tempfile
+from typing import Any, Callable, Dict, Iterable, List

 from . import lint
 from . import parser
 from . import kotlin
 from . import markdown
+from . import metrics
 from . import swift
 from . import util

@ -27,22 +29,39 @@ from . import util
 #   does the actual translation.
 # - "clear_output_dir": a flag to clear the target directory before moving there
 #   the generated files.
+# - "extensions": A list of glob patterns to clear in the directory.
+
+
+class Outputter:
+    """
+    Class to define an output format.
+    """
+
+    def __init__(
+        self,
+        output_func: Callable[[metrics.ObjectTree, Path, Dict[str, Any]], None],
+        clear_output_dir: bool,
+        extensions: List[str] = [],
+    ):
+        self.output_func = output_func
+        self.clear_output_dir = clear_output_dir
+        self.extensions = extensions
+
+
 OUTPUTTERS = {
-    "kotlin": {
-        "output_func": kotlin.output_kotlin,
-        "clear_output_dir": True,
-        "extensions": ["*.kt"],
-    },
-    "markdown": {"output_func": markdown.output_markdown, "clear_output_dir": False},
-    "swift": {
-        "output_func": swift.output_swift,
-        "clear_output_dir": True,
-        "extensions": ["*.swift"],
-    },
+    "kotlin": Outputter(kotlin.output_kotlin, True, ["*.kt"]),
+    "markdown": Outputter(markdown.output_markdown, False),
+    "swift": Outputter(swift.output_swift, True, ["*.swift"]),
 }


-def translate(input_filepaths, output_format, output_dir, options={}, parser_config={}):
+def translate(
+    input_filepaths: Iterable[Path],
+    output_format: str,
+    output_dir: Path,
+    options: Dict[str, Any] = {},
+    parser_config: Dict[str, Any] = {},
+) -> int:
    """
    Translate the files in `input_filepaths` to the given `output_format` and
    put the results in `output_dir`.
@ -77,15 +96,13 @@ def translate(input_filepaths, output_format, output_dir, options={}, parser_con
    # real directory, for transactional integrity.
    with tempfile.TemporaryDirectory() as tempdir:
        tempdir_path = Path(tempdir)
-        OUTPUTTERS[output_format]["output_func"](
-            all_objects.value, tempdir_path, options
-        )
+        OUTPUTTERS[output_format].output_func(all_objects.value, tempdir_path, options)

-        if OUTPUTTERS[output_format]["clear_output_dir"]:
+        if OUTPUTTERS[output_format].clear_output_dir:
            if output_dir.is_file():
                output_dir.unlink()
            elif output_dir.is_dir():
-                for extensions in OUTPUTTERS[output_format]["extensions"]:
+                for extensions in OUTPUTTERS[output_format].extensions:
                    for filepath in output_dir.glob(extensions):
                        filepath.unlink()
                if len(list(output_dir.iterdir())):
--- a/glean_parser/util.py
+++ b/glean_parser/util.py
@ -11,13 +11,14 @@ import json
 from pathlib import Path
 import sys
 import textwrap
+from typing import Any, Callable, Iterable, Sequence, Tuple, Union
 import urllib.request

-import appdirs
-import diskcache
+import appdirs  # type: ignore
+import diskcache  # type: ignore
 import jinja2
-import jsonschema
-from jsonschema import _utils
+import jsonschema  # type: ignore
+from jsonschema import _utils  # type: ignore
 import yaml

 if sys.version_info < (3, 7):
@ -27,8 +28,17 @@ if sys.version_info < (3, 7):
 TESTING_MODE = "pytest" in sys.modules


+JSONType = Union[list, dict, str, int, float, None]
+"""
+The types supported by JSON.
+
+This is only an approximation -- this should really be a recursive type.
+"""
+
 # Adapted from
 # https://stackoverflow.com/questions/34667108/ignore-dates-and-times-while-parsing-yaml
+
+
 class _NoDatesSafeLoader(yaml.SafeLoader):
    @classmethod
    def remove_implicit_resolver(cls, tag_to_remove):
@ -96,7 +106,7 @@ else:
        return yaml.dump(data, **kwargs)


-def load_yaml_or_json(path, ordered_dict=False):
+def load_yaml_or_json(path: Path, ordered_dict: bool = False):
    """
    Load the content from either a .json or .yaml file, based on the filename
    extension.
@ -125,7 +135,7 @@ def load_yaml_or_json(path, ordered_dict=False):
        raise ValueError("Unknown file extension {}".format(path.suffix))


-def ensure_list(value):
+def ensure_list(value: Any) -> Sequence[Any]:
    """
    Ensures that the value is a list. If it is anything but a list or tuple, a
    list with a single element containing only value is returned.
@ -135,7 +145,7 @@ def ensure_list(value):
    return value


-def to_camel_case(input, capitalize_first_letter):
+def to_camel_case(input: str, capitalize_first_letter: bool) -> str:
    """
    Convert the value to camelCase.

@ -150,10 +160,10 @@ def to_camel_case(input, capitalize_first_letter):
    if not capitalize_first_letter:
        tokens[0] = tokens[0].lower()
    # Finally join the tokens and capitalize.
-    return ''.join(tokens)
+    return "".join(tokens)


-def camelize(value):
+def camelize(value: str) -> str:
    """
    Convert the value to camelCase (with a lower case first letter).

@ -163,7 +173,7 @@ def camelize(value):
    return to_camel_case(value, False)


-def Camelize(value):
+def Camelize(value: str) -> str:
    """
    Convert the value to CamelCase (with an upper case first letter).

@ -174,7 +184,9 @@ def Camelize(value):


@functools.lru_cache()
-def get_jinja2_template(template_name, filters=()):
+def get_jinja2_template(
+    template_name: str, filters: Iterable[Tuple[str, Callable]] = ()
+):
    """
    Get a Jinja2 template that ships with glean_parser.

@ -236,7 +248,7 @@ def get_null_resolver(schema):
    return NullResolver.from_schema(schema)


-def fetch_remote_url(url, cache=True):
+def fetch_remote_url(url: str, cache: bool = True):
    """
    Fetches the contents from an HTTP url or local file path, and optionally
    caches it to disk.
@ -254,7 +266,7 @@ def fetch_remote_url(url, cache=True):
            if url in dc:
                return dc[url]

-    contents = urllib.request.urlopen(url).read()
+    contents = urllib.request.urlopen(url).read()  # type: ignore

    # On Python 3.5, urlopen does not handle the unicode decoding for us. This
    # is ok because we control these files and we know they are in UTF-8,
@ -272,7 +284,7 @@ def fetch_remote_url(url, cache=True):
 _unset = _utils.Unset()


-def pprint_validation_error(error):
+def pprint_validation_error(error) -> str:
    """
    A version of jsonschema's ValidationError __str__ method that doesn't
    include the schema fragment that failed.  This makes the error messages
@ -313,7 +325,7 @@ def pprint_validation_error(error):
    return "\n".join(parts)


-def format_error(filepath, header, content):
+def format_error(filepath: Union[str, Path], header: str, content: str) -> str:
    """
    Format a jsonshema validation error.
    """
@ -327,7 +339,7 @@ def format_error(filepath, header, content):
        return "{}:\n{}".format(filepath, _utils.indent(content))


-def is_expired(expires):
+def is_expired(expires: str) -> bool:
    """
    Parses the `expires` field in a metric or ping and returns whether
    the object should be considered expired.
@ -352,7 +364,7 @@ def is_expired(expires):
        return date <= datetime.datetime.utcnow().date()


-def validate_expires(expires):
+def validate_expires(expires: str) -> None:
    """
    Raises ValueError if `expires` is not valid.
    """
--- a/glean_parser/validate_ping.py
+++ b/glean_parser/validate_ping.py
@ -14,7 +14,7 @@ import json
 from pathlib import Path
 import sys

-import jsonschema
+import jsonschema  # type: ignore

 from . import util

--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@ -2,6 +2,7 @@ black==19.10b0
 coverage==4.5.2
 flake8==3.7.8
 m2r==0.2.1
+mypy==0.761
 pip
 pytest-runner==4.4
 pytest==4.3.0
--- a/tests/test_kotlin.py
+++ b/tests/test_kotlin.py
@ -107,11 +107,7 @@ def test_ping_parser(tmpdir):
        {"allow_reserved": True},
    )

-    assert set(x.name for x in tmpdir.iterdir()) == set(
-        [
-            "Pings.kt",
-        ]
-    )
+    assert set(x.name for x in tmpdir.iterdir()) == set(["Pings.kt"])

    # Make sure descriptions made it in
    with (tmpdir / "Pings.kt").open("r", encoding="utf-8") as fd:
--- a/tests/test_markdown.py
+++ b/tests/test_markdown.py
@ -142,10 +142,7 @@ def test_reasons(tmpdir):
    tmpdir = Path(str(tmpdir))

    translate.translate(
-        ROOT / "data" / "pings.yaml",
-        "markdown",
-        tmpdir,
-        {"namespace": "Foo"},
+        ROOT / "data" / "pings.yaml", "markdown", tmpdir, {"namespace": "Foo"},
    )

    assert set(x.name for x in tmpdir.iterdir()) == set(["metrics.md"])
--- a/tests/test_swift.py
+++ b/tests/test_swift.py
@ -127,7 +127,7 @@ def test_metric_type_name():
        include_client_id=True,
        bugs=[42],
        notification_emails=["nobody@nowhere.com"],
-        reasons={"foo": "foolicious", "bar": "barlicious"}
+        reasons={"foo": "foolicious", "bar": "barlicious"},
    )
    assert swift.type_name(ping) == "Ping<CustomReasonCodes>"

@ -187,9 +187,7 @@ def test_no_import_glean(tmpdir):
 def test_import_glean(tmpdir):
    tmpdir = Path(str(tmpdir))

-    translate.translate(
-        ROOT / "data" / "smaller.yaml", "swift", tmpdir, {}, {}
-    )
+    translate.translate(ROOT / "data" / "smaller.yaml", "swift", tmpdir, {}, {})

    # Make sure descriptions made it in
    fd = (tmpdir / "Telemetry.swift").open("r", encoding="utf-8")