Bug 1759030 - Vendor `taskcluster-taskgraph` at `3.5.1`, r=ahochheiden

Differential Revision: https://phabricator.services.mozilla.com/D161056
2022-11-04 14:14:56 +00:00 · 2022-11-04 14:14:56 +00:00 · 55811f8006
--- a/third_party/python/fluent.migrate/fluent/migrate/transforms.py
+++ b/third_party/python/fluent.migrate/fluent/migrate/transforms.py
@ -464,7 +464,7 @@ class PLURALS(LegacySource):
        selector = ctx.evaluate(self.selector)
        keys = ctx.plural_categories
        forms = [
-            FTL.TextElement(part.strip())
+            FTL.TextElement(part)
            for part in element.value.split(';')
        ]

--- a/third_party/python/poetry.lock
+++ b/third_party/python/poetry.lock
@ -252,8 +252,8 @@ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7"
 zipp = ">=0.5"

 [package.extras]
-testing = ["importlib-resources (>=1.3)", "pep517", "packaging"]
-docs = ["rst.linker", "sphinx"]
+docs = ["sphinx", "rst.linker"]
+testing = ["packaging", "pep517", "importlib-resources (>=1.3)"]

 [[package]]
 name = "iso8601"
@ -634,7 +634,7 @@ test = ["pytest", "pytest-cov", "pytest-mock", "httmock", "mock", "setuptools-li

 [[package]]
 name = "taskcluster-taskgraph"
-version = "3.2.1"
+version = "3.5.1"
 description = "Build taskcluster taskgraphs"
 category = "main"
 optional = false
@ -653,6 +653,9 @@ slugid = ">=2.0"
 taskcluster-urls = ">=11.0"
 voluptuous = ">=0.12.1"

+[package.extras]
+load-image = ["zstandard"]
+
 [[package]]
 name = "taskcluster-urls"
 version = "13.0.1"
@ -757,7 +760,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=1.2.3)", "pytest-flake8", "pyt
 [metadata]
 lock-version = "1.1"
 python-versions = "^3.6"
-content-hash = "eb26337dadb86c9a4895059df8799e1d81eb31e85150dea8931c2207a0bef168"
+content-hash = "a12185410cebae037b0b16a9f8a3b99e1069c4f8d55157c48e15cc3f14233228"

 [metadata.files]
 aiohttp = [
@ -1153,8 +1156,8 @@ taskcluster = [
    {file = "taskcluster-44.2.2.tar.gz", hash = "sha256:0266a6a901e1a2ec838984a7f24e7adb6d58f9f2e221a7f613388f8f23f786fc"},
 ]
 taskcluster-taskgraph = [
-    {file = "taskcluster-taskgraph-3.2.1.tar.gz", hash = "sha256:c638724f0d514a3fc2d6ba34ddd395cfe021312dcf78b01c789ec4c7bf068cf0"},
-    {file = "taskcluster_taskgraph-3.2.1-py3-none-any.whl", hash = "sha256:87498ce08c5d2bfe0fd0b1a860e3dc2b9eba4d7acb883e9e5c2b6f7f15281a34"},
+    {file = "taskcluster-taskgraph-3.5.1.tar.gz", hash = "sha256:e08b935175349ef8728ff5f19c7e9866a562256180f5580b291da3217cb5016c"},
+    {file = "taskcluster_taskgraph-3.5.1-py3-none-any.whl", hash = "sha256:dc56b87228fb8eb1ef611750202344817a8cf5d825c0dc7e2dcc0f8b2795cbcd"},
 ]
 taskcluster-urls = [
    {file = "taskcluster-urls-13.0.1.tar.gz", hash = "sha256:b25e122ecec249c4299ac7b20b08db76e3e2025bdaeb699a9d444556de5fd367"},
--- a/third_party/python/requirements.in
+++ b/third_party/python/requirements.in
@ -40,7 +40,7 @@ setuptools==51.2.0
 six==1.13.0
 slugid==2.0.0
 taskcluster==44.2.2
-taskcluster-taskgraph==3.2.1
+taskcluster-taskgraph==3.5.1
 taskcluster-urls==13.0.1
 tqdm==4.62.3
 urllib3==1.26
--- a/third_party/python/requirements.txt
+++ b/third_party/python/requirements.txt
@ -328,9 +328,9 @@ six==1.13.0; (python_version >= "2.6" and python_full_version < "3.0.0") or (pyt
 slugid==2.0.0 \
    --hash=sha256:aec8b0e01c4ad32e38e12d609eab3ec912fd129aaf6b2ded0199b56a5f8fd67c \
    --hash=sha256:a950d98b72691178bdd4d6c52743c4a2aa039207cf7a97d71060a111ff9ba297
-taskcluster-taskgraph==3.2.1 \
-    --hash=sha256:87498ce08c5d2bfe0fd0b1a860e3dc2b9eba4d7acb883e9e5c2b6f7f15281a34 \
-    --hash=sha256:c638724f0d514a3fc2d6ba34ddd395cfe021312dcf78b01c789ec4c7bf068cf0
+taskcluster-taskgraph==3.5.1 \
+    --hash=sha256:e08b935175349ef8728ff5f19c7e9866a562256180f5580b291da3217cb5016c \
+    --hash=sha256:dc56b87228fb8eb1ef611750202344817a8cf5d825c0dc7e2dcc0f8b2795cbcd
 taskcluster-urls==13.0.1 \
    --hash=sha256:b25e122ecec249c4299ac7b20b08db76e3e2025bdaeb699a9d444556de5fd367 \
    --hash=sha256:5e25e7e6818e8877178b175ff43d2e6548afad72694aa125f404a7329ece0973 \
--- a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.5.1.dist-info/LICENSE
+++ b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.5.1.dist-info/LICENSE
--- a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.5.1.dist-info/METADATA
+++ b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.5.1.dist-info/METADATA
@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: taskcluster-taskgraph
-Version: 3.2.1
+Version: 3.5.1
 Summary: Build taskcluster taskgraphs
 Home-page: https://github.com/taskcluster/taskgraph
 License: UNKNOWN
@ -26,6 +26,8 @@ Requires-Dist: requests-unixsocket (>=0.2)
 Requires-Dist: slugid (>=2.0)
 Requires-Dist: taskcluster-urls (>=11.0)
 Requires-Dist: voluptuous (>=0.12.1)
+Provides-Extra: load-image
+Requires-Dist: zstandard ; extra == 'load-image'

 UNKNOWN

--- a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.5.1.dist-info/RECORD
+++ b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.5.1.dist-info/RECORD
@ -1,15 +1,16 @@
 taskgraph/__init__.py,sha256=jwOtU7TkmU317LP_IsgIswpj2T1OPUXXgMRv4sIU7nE,707
 taskgraph/config.py,sha256=MoFLjKPUViWYGALi_acWDVXZs7M8cy0zQpUKsJSlBMs,4411
 taskgraph/create.py,sha256=1z2AyLvHMkZfDkmPy6um86HG9xTRhE0Sphnbpd-kuEg,5190
-taskgraph/decision.py,sha256=X94bfSp6LyYkO7hpi4A0ytWSfHl9YtkRLNaJR8loAWQ,12758
-taskgraph/docker.py,sha256=hsMIvRVXiqC8DIGD34WwQrC1JnjaYHSvVWq_lEeNQEE,7471
+taskgraph/decision.py,sha256=ApfQeXumRH7uq55DLt7gjQCh_eKls6lPhnNaH2ZpR-0,12849
+taskgraph/docker.py,sha256=dB282jKjfLnHwL73YSg1Eeqj-ojHQc676vEpWt4PjVw,7835
 taskgraph/files_changed.py,sha256=W3_gEgUT-mVH9DaaU_8X6gYpftrqBU3kgveGbzPLziU,2793
 taskgraph/filter_tasks.py,sha256=R7tYXiaVPGIkQ6O1c9-QJrKZ59m9pFXCloUlPraVnZU,866
-taskgraph/generator.py,sha256=ZfSb8dek6tQRxfpHbvQP2KMxXFzmhqwN821tOlNcvzo,15118
+taskgraph/generator.py,sha256=tonQ3UvaZYRdpWOtmdQ5Mr4en1FRCUJvbvlbzfChluM,15590
 taskgraph/graph.py,sha256=9tE3bSSBRHvRLgJzK4dTieGT3RrzQZdR1YbKizEhzlw,4667
-taskgraph/main.py,sha256=E7dC1q14L4psrNfUe-PMC8QH4cYjsIs91I-aVmzeBaI,23551
+taskgraph/main.py,sha256=rb7cwghT5U97kSpIho0KzXo4HSXp2Iw_jaL2A2Qrf18,23581
 taskgraph/morph.py,sha256=8qxYdruEQkbHGqv7dh3e1OWhH9Y5i6bFUKzDMs-Ctnw,9625
-taskgraph/parameters.py,sha256=8556WayG8J-3w_DZTjF--VKd7Czuaxng1Zl3Cvdz5eg,11644
+taskgraph/optimize.py,sha256=NVshvkqRKr7SQvRdqz5CELmnIXeiODkDxlK0D9QMi9k,16487
+taskgraph/parameters.py,sha256=CYaR9E6pFsysUcRahlFILplEy3unVwUu7scLhP03nQo,11824
 taskgraph/target_tasks.py,sha256=41BIVwiATy8DCQujPduTtnFmgHlKOfw6RPGL4b20WO8,3324
 taskgraph/task.py,sha256=QCrOzMaTsy5QHShKUo89XgjJVMl3cSZGZJPLuHCXItE,3132
 taskgraph/taskgraph.py,sha256=tfj0ZMqjuwEQDET0W57EcP-_KBEbqkxJci9Z6DkeOEQ,2397
@ -25,22 +26,22 @@ taskgraph/loader/transform.py,sha256=olUBPjxk3eEIg25sduxlcyqhjoig4ts5kPlT_zs6g9g
 taskgraph/optimize/__init__.py,sha256=Oqpq1RW8QzOcu7zaMlNQ3BHT9ws9e_93FWfCqzNcQps,123
 taskgraph/optimize/base.py,sha256=WvoDNewyHG46IQbG3th-aau9OxSKegsYNfvdOEmunbA,18341
 taskgraph/optimize/strategies.py,sha256=Y5fS-f_3xsQNfFjCXIwDxrwXBvyp4yZxdPVNh49c7XU,2381
-taskgraph/run-task/fetch-content,sha256=uUoyua3OdIgynY5Q9K6EojBwuaM2zo2OiN9bmNS646Q,24291
+taskgraph/run-task/fetch-content,sha256=z3kx-vxaaaAmfqW-JW7dPKIFpjnxdZiXMdpPj1jAG8M,29915
 taskgraph/run-task/hgrc,sha256=BybWLDR89bWi3pE5T05UqmDHs02CbLypE-omLZWU6Uk,896
-taskgraph/run-task/robustcheckout.py,sha256=xc24zaBd6dyuoga1ace0M27jo14K4UXNwhqcbHutJ7U,28977
-taskgraph/run-task/run-task,sha256=76p0Zo19a6f4NkwTq8s9y4Emt3YW6Q-VdTInlcqjPjo,46956
+taskgraph/run-task/robustcheckout.py,sha256=tZi_FRGFhX27fspaUj2RGsMCmkwn8IfpRiSsPOrGfXQ,29802
+taskgraph/run-task/run-task,sha256=zT83gWFaB0qBWdxCLxOVHiMdq1bmSmi90FjXjcegfpk,43584
 taskgraph/transforms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 taskgraph/transforms/base.py,sha256=N9ec4kw65V_J2KY4C4QRPlbIREbRDYwTlhClstYmOBU,5285
 taskgraph/transforms/cached_tasks.py,sha256=Z10VD1kEBVXJvj8qSsNTq2mYpklh0V1EN8OT6QK3v_E,2607
 taskgraph/transforms/code_review.py,sha256=eE2xrDtdD_n3HT3caQ2HGAkPm6Uutdm4hDCpCoFjEps,707
 taskgraph/transforms/docker_image.py,sha256=ADiOUB-Ngm9Y6uwzGDpQsDJ_-4w6-ZYwLCxQ-0b16E0,7567
-taskgraph/transforms/fetch.py,sha256=jxJw7wlEh_WxAa1Bmy2WIHfpdvL79PDsKwC1DFymbBQ,9584
+taskgraph/transforms/fetch.py,sha256=Q7Co4wdBKL6Tr3Uc-eitJ3NGgGUYmRXNLuC5m-59-M8,10443
 taskgraph/transforms/release_notifications.py,sha256=jrb9CCT-z_etDf690T-AeCvdzIoVWBAeM_FGoW7FIzA,3305
-taskgraph/transforms/task.py,sha256=kWic-qqvK8vEFxQwojRPxc42GAsdkxoV3HVcG1pdBxE,47942
-taskgraph/transforms/job/__init__.py,sha256=GKYODycxov7u05owF_ZWgczd7WHi2yHTd8L5Ftvxge0,16929
-taskgraph/transforms/job/common.py,sha256=onHnerPcmmvbSk0oHt8mvJmOo7AnjHQya0ombgMNLG8,7106
+taskgraph/transforms/task.py,sha256=fBiSCyC0Lzd2GDSZ_QwhQ1RRebXLmkw4ZCPte9fwEL8,48212
+taskgraph/transforms/job/__init__.py,sha256=ayAytoDmlmNvJNArJc-_nBz1Xuc191rZdbobUgp9hQA,17192
+taskgraph/transforms/job/common.py,sha256=XtKSxUCwRYqpPgRTyLD_8JGRuJs2JYuR0RXpTarPdTE,6826
 taskgraph/transforms/job/index_search.py,sha256=Ngh9FFu1bx2kHVTChW2vcrbnb3SzMneRHopXk18RfB4,1220
-taskgraph/transforms/job/run_task.py,sha256=oRR-is7dRKRrSCY3WntmJ-pKK3wx9-BMJpY9qru2FWY,8654
+taskgraph/transforms/job/run_task.py,sha256=z5DqgHmmHYEbKtnpMQqcMY6ksgCnnoB7CugH3Z41Gag,8610
 taskgraph/transforms/job/toolchain.py,sha256=WWsj6L_db9rJxzo26TdEf_0jcrK4MCoHHJDzFBkSFpI,5978
 taskgraph/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 taskgraph/util/archive.py,sha256=nzYn8cQ3NfLAeV-2SuTNoeQ6hg8m40f6FQcSTyVIKwQ,2855
@ -66,9 +67,9 @@ taskgraph/util/vcs.py,sha256=i13idS8y9ooR216mnd1gksdjSgHBNlAZEdq7Xr-ROwE,18536
 taskgraph/util/verify.py,sha256=YETuZVkwnfYe57GRPx2x_vedstgqdGiH46HLWAdcks8,8827
 taskgraph/util/workertypes.py,sha256=5g2mgIbEKMzDpZNnmPMoMNyy7Wahi-jmWcV1amDAcPo,2341
 taskgraph/util/yaml.py,sha256=hfKI_D8Q7dimq4_VvO3WEh8CJsTrsIMwN6set7HIQbY,990
-taskcluster_taskgraph-3.2.1.dist-info/LICENSE,sha256=HyVuytGSiAUQ6ErWBHTqt1iSGHhLmlC8fO7jTCuR8dU,16725
-taskcluster_taskgraph-3.2.1.dist-info/METADATA,sha256=ahNDmBrUgn48sWk5gx2bq4WMRmnUlDkC_E-wXC6Yglg,1050
-taskcluster_taskgraph-3.2.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
-taskcluster_taskgraph-3.2.1.dist-info/entry_points.txt,sha256=VoXNtZpN4LvyXYB1wq47AU9CO-DMYMJ0VktKxjugzbY,51
-taskcluster_taskgraph-3.2.1.dist-info/top_level.txt,sha256=3JNeYn_hNiNXC7DrdH_vcv-WYSE7QdgGjdvUYvSjVp0,10
-taskcluster_taskgraph-3.2.1.dist-info/RECORD,,
+taskcluster_taskgraph-3.5.1.dist-info/LICENSE,sha256=HyVuytGSiAUQ6ErWBHTqt1iSGHhLmlC8fO7jTCuR8dU,16725
+taskcluster_taskgraph-3.5.1.dist-info/METADATA,sha256=uy5bE9DFHpqImbRhKEVM6CSC1me3wjdCW76836B0rEc,1126
+taskcluster_taskgraph-3.5.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
+taskcluster_taskgraph-3.5.1.dist-info/entry_points.txt,sha256=VoXNtZpN4LvyXYB1wq47AU9CO-DMYMJ0VktKxjugzbY,51
+taskcluster_taskgraph-3.5.1.dist-info/top_level.txt,sha256=3JNeYn_hNiNXC7DrdH_vcv-WYSE7QdgGjdvUYvSjVp0,10
+taskcluster_taskgraph-3.5.1.dist-info/RECORD,,
--- a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.5.1.dist-info/WHEEL
+++ b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.5.1.dist-info/WHEEL
--- a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.5.1.dist-info/entry_points.txt
+++ b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.5.1.dist-info/entry_points.txt
--- a/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.5.1.dist-info/top_level.txt
+++ b/third_party/python/taskcluster_taskgraph/taskcluster_taskgraph-3.5.1.dist-info/top_level.txt
--- a/third_party/python/taskcluster_taskgraph/taskgraph/decision.py
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/decision.py
@ -188,9 +188,11 @@ def get_decision_parameters(graph_config, options):
    parameters["filters"] = [
        "target_tasks_method",
    ]
+    parameters["optimize_strategies"] = None
    parameters["optimize_target_tasks"] = True
    parameters["existing_tasks"] = {}
    parameters["do_not_optimize"] = []
+    parameters["enable_always_target"] = True
    parameters["build_number"] = 1
    parameters["version"] = get_version(repo_path)
    parameters["next_version"] = None
@ -224,13 +226,8 @@ def get_decision_parameters(graph_config, options):

    # ..but can be overridden by the commit message: if it contains the special
    # string "DONTBUILD" and this is an on-push decision task, then use the
-    # special 'nothing' target task method. (except on the toolchains project,
-    # where we ignore "DONTBUILD").
-    if (
-        "DONTBUILD" in commit_message
-        and options["tasks_for"] == "hg-push"
-        and project != "toolchains"
-    ):
+    # special 'nothing' target task method.
+    if "DONTBUILD" in commit_message and options["tasks_for"] == "hg-push":
        parameters["target_tasks_method"] = "nothing"

    if options.get("optimize_target_tasks") is not None:
--- a/third_party/python/taskcluster_taskgraph/taskgraph/docker.py
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/docker.py
@ -7,6 +7,12 @@ import json
 import os
 import tarfile
 from io import BytesIO
+from textwrap import dedent
+
+try:
+    import zstandard as zstd
+except ImportError as e:
+    zstd = e

 from taskgraph.util import docker
 from taskgraph.util.taskcluster import get_artifact_url, get_session
@ -115,7 +121,15 @@ def load_image(url, imageName=None, imageTag=None):

    Returns an object with properties 'image', 'tag' and 'layer'.
    """
-    import zstandard as zstd
+    if isinstance(zstd, ImportError):
+        raise ImportError(
+            dedent(
+                """
+                zstandard is not installed! Use `pip install taskcluster-taskgraph[load-image]`
+                to use this feature.
+                """
+            )
+        ) from zstd

    # If imageName is given and we don't have an imageTag
    # we parse out the imageTag from imageName, or default it to 'latest'
--- a/third_party/python/taskcluster_taskgraph/taskgraph/generator.py
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/generator.py
@ -14,7 +14,7 @@ from .config import GraphConfig, load_graph_config
 from .graph import Graph
 from .morph import morph
 from .optimize.base import optimize_task_graph
-from .parameters import Parameters
+from .parameters import parameters_loader
 from .task import Task
 from .taskgraph import TaskGraph
 from .transforms.base import TransformConfig, TransformSequence
@ -249,9 +249,6 @@ class TaskGraphGenerator:
                    continue

    def _run(self):
-        # Initial verifications that don't depend on any generation state.
-        verifications("initial")
-
        logger.info("Loading graph configuration.")
        graph_config = load_graph_config(self.root_dir)

@ -259,6 +256,9 @@ class TaskGraphGenerator:

        graph_config.register()

+        # Initial verifications that don't depend on any generation state.
+        verifications("initial")
+
        if callable(self._parameters):
            parameters = self._parameters(graph_config)
        else:
@ -360,11 +360,14 @@ class TaskGraphGenerator:
            if t.attributes["kind"] == "docker-image"
        }
        # include all tasks with `always_target` set
-        always_target_tasks = {
-            t.label
-            for t in full_task_graph.tasks.values()
-            if t.attributes.get("always_target")
-        }
+        if parameters["enable_always_target"]:
+            always_target_tasks = {
+                t.label
+                for t in full_task_graph.tasks.values()
+                if t.attributes.get("always_target")
+            }
+        else:
+            always_target_tasks = set()
        logger.info(
            "Adding %d tasks with `always_target` attribute"
            % (len(always_target_tasks) - len(always_target_tasks & target_tasks))
@ -383,6 +386,14 @@ class TaskGraphGenerator:
        do_not_optimize = set(parameters.get("do_not_optimize", []))
        if not parameters.get("optimize_target_tasks", True):
            do_not_optimize = set(target_task_set.graph.nodes).union(do_not_optimize)
+
+        # this is used for testing experimental optimization strategies
+        strategies = os.environ.get(
+            "TASKGRAPH_OPTIMIZE_STRATEGIES", parameters.get("optimize_strategies")
+        )
+        if strategies:
+            strategies = find_object(strategies)
+
        optimized_task_graph, label_to_taskid = optimize_task_graph(
            target_task_graph,
            requested_tasks,
@ -390,6 +401,7 @@ class TaskGraphGenerator:
            do_not_optimize,
            self._decision_task_id,
            existing_tasks=existing_tasks,
+            strategy_override=strategies,
        )

        yield self.verify(
@ -428,7 +440,7 @@ def load_tasks_for_kind(parameters, kind, root_dir=None):
    # make parameters read-write
    parameters = dict(parameters)
    parameters["target-kind"] = kind
-    parameters = Parameters(strict=False, **parameters)
+    parameters = parameters_loader(spec=None, strict=False, overrides=parameters)
    tgg = TaskGraphGenerator(root_dir=root_dir, parameters=parameters)
    return {
        task.task["metadata"]["name"]: task
--- a/third_party/python/taskcluster_taskgraph/taskgraph/main.py
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/main.py
@ -47,7 +47,9 @@ def argument(*args, **kwargs):

 def format_taskgraph_labels(taskgraph):
    return "\n".join(
-        taskgraph.tasks[index].label for index in taskgraph.graph.visit_postorder()
+        sorted(
+            taskgraph.tasks[index].label for index in taskgraph.graph.visit_postorder()
+        )
    )


--- a/third_party/python/taskcluster_taskgraph/taskgraph/optimize.py
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/optimize.py
@ -0,0 +1,471 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+"""
+The objective of optimization is to remove as many tasks from the graph as
+possible, as efficiently as possible, thereby delivering useful results as
+quickly as possible.  For example, ideally if only a test script is modified in
+a push, then the resulting graph contains only the corresponding test suite
+task.
+
+See ``taskcluster/docs/optimization.rst`` for more information.
+"""
+
+
+import logging
+import os
+from collections import defaultdict
+
+from slugid import nice as slugid
+
+from . import files_changed
+from .graph import Graph
+from .taskgraph import TaskGraph
+from .util.parameterization import resolve_task_references
+from .util.taskcluster import find_task_id
+
+logger = logging.getLogger(__name__)
+
+TOPSRCDIR = os.path.abspath(os.path.join(__file__, "../../../"))
+
+
+def optimize_task_graph(
+    target_task_graph,
+    params,
+    do_not_optimize,
+    decision_task_id,
+    existing_tasks=None,
+    strategies=None,
+):
+    """
+    Perform task optimization, returning a taskgraph and a map from label to
+    assigned taskId, including replacement tasks.
+    """
+    label_to_taskid = {}
+    if not existing_tasks:
+        existing_tasks = {}
+
+    # instantiate the strategies for this optimization process
+    if not strategies:
+        strategies = _make_default_strategies()
+
+    optimizations = _get_optimizations(target_task_graph, strategies)
+
+    removed_tasks = remove_tasks(
+        target_task_graph=target_task_graph,
+        optimizations=optimizations,
+        params=params,
+        do_not_optimize=do_not_optimize,
+    )
+
+    replaced_tasks = replace_tasks(
+        target_task_graph=target_task_graph,
+        optimizations=optimizations,
+        params=params,
+        do_not_optimize=do_not_optimize,
+        label_to_taskid=label_to_taskid,
+        existing_tasks=existing_tasks,
+        removed_tasks=removed_tasks,
+    )
+
+    return (
+        get_subgraph(
+            target_task_graph,
+            removed_tasks,
+            replaced_tasks,
+            label_to_taskid,
+            decision_task_id,
+        ),
+        label_to_taskid,
+    )
+
+
+def _make_default_strategies():
+    return {
+        "never": OptimizationStrategy(),  # "never" is the default behavior
+        "index-search": IndexSearch(),
+        "skip-unless-changed": SkipUnlessChanged(),
+    }
+
+
+def _get_optimizations(target_task_graph, strategies):
+    def optimizations(label):
+        task = target_task_graph.tasks[label]
+        if task.optimization:
+            opt_by, arg = list(task.optimization.items())[0]
+            return (opt_by, strategies[opt_by], arg)
+        else:
+            return ("never", strategies["never"], None)
+
+    return optimizations
+
+
+def _log_optimization(verb, opt_counts, opt_reasons=None):
+    if opt_reasons:
+        message = "optimize: {label} {action} because of {reason}"
+        for label, (action, reason) in opt_reasons.items():
+            logger.debug(message.format(label=label, action=action, reason=reason))
+
+    if opt_counts:
+        logger.info(
+            f"{verb.title()} "
+            + ", ".join(f"{c} tasks by {b}" for b, c in sorted(opt_counts.items()))
+            + " during optimization."
+        )
+    else:
+        logger.info(f"No tasks {verb} during optimization")
+
+
+def remove_tasks(target_task_graph, params, optimizations, do_not_optimize):
+    """
+    Implement the "Removing Tasks" phase, returning a set of task labels of all removed tasks.
+    """
+    opt_counts = defaultdict(int)
+    opt_reasons = {}
+    removed = set()
+    dependents_of = target_task_graph.graph.reverse_links_dict()
+    tasks = target_task_graph.tasks
+    prune_candidates = set()
+
+    # Traverse graph so dependents (child nodes) are guaranteed to be processed
+    # first.
+    for label in target_task_graph.graph.visit_preorder():
+        # Dependents that can be pruned away (shouldn't cause this task to run).
+        # Only dependents that either:
+        #   A) Explicitly reference this task in their 'if_dependencies' list, or
+        #   B) Don't have an 'if_dependencies' attribute (i.e are in 'prune_candidates'
+        #      because they should be removed but have prune_deps themselves)
+        # should be considered.
+        prune_deps = {
+            l
+            for l in dependents_of[label]
+            if l in prune_candidates
+            if not tasks[l].if_dependencies or label in tasks[l].if_dependencies
+        }
+
+        def _keep(reason):
+            """Mark a task as being kept in the graph. Also recursively removes
+            any dependents from `prune_candidates`, assuming they should be
+            kept because of this task.
+            """
+            opt_reasons[label] = ("kept", reason)
+
+            # Removes dependents that were in 'prune_candidates' from a task
+            # that ended up being kept (and therefore the dependents should
+            # also be kept).
+            queue = list(prune_deps)
+            while queue:
+                l = queue.pop()
+
+                # If l is a prune_dep of multiple tasks it could be queued up
+                # multiple times. Guard against it being already removed.
+                if l not in prune_candidates:
+                    continue
+
+                # If a task doesn't set 'if_dependencies' itself (rather it was
+                # added to 'prune_candidates' due to one of its depenendents),
+                # then we shouldn't remove it.
+                if not tasks[l].if_dependencies:
+                    continue
+
+                prune_candidates.remove(l)
+                queue.extend([r for r in dependents_of[l] if r in prune_candidates])
+
+        def _remove(reason):
+            """Potentially mark a task as being removed from the graph. If the
+            task has dependents that can be pruned, add this task to
+            `prune_candidates` rather than removing it.
+            """
+            if prune_deps:
+                # If there are prune_deps, unsure if we can remove this task yet.
+                prune_candidates.add(label)
+            else:
+                opt_reasons[label] = ("removed", reason)
+                opt_counts[reason] += 1
+                removed.add(label)
+
+        # if we're not allowed to optimize, that's easy..
+        if label in do_not_optimize:
+            _keep("do not optimize")
+            continue
+
+        # If there are remaining tasks depending on this one, do not remove.
+        if any(
+            l for l in dependents_of[label] if l not in removed and l not in prune_deps
+        ):
+            _keep("dependent tasks")
+            continue
+
+        # Call the optimization strategy.
+        task = tasks[label]
+        opt_by, opt, arg = optimizations(label)
+        if opt.should_remove_task(task, params, arg):
+            _remove(opt_by)
+            continue
+
+        # Some tasks should only run if their dependency was also run. Since we
+        # haven't processed dependencies yet, we add them to a list of
+        # candidate tasks for pruning.
+        if task.if_dependencies:
+            opt_reasons[label] = ("kept", opt_by)
+            prune_candidates.add(label)
+        else:
+            _keep(opt_by)
+
+    if prune_candidates:
+        reason = "if-dependencies pruning"
+        for label in prune_candidates:
+            # There's an edge case where a triangle graph can cause a
+            # dependency to stay in 'prune_candidates' when the dependent
+            # remains. Do a final check to ensure we don't create any bad
+            # edges.
+            dependents = any(
+                d
+                for d in dependents_of[label]
+                if d not in prune_candidates
+                if d not in removed
+            )
+            if dependents:
+                opt_reasons[label] = ("kept", "dependent tasks")
+                continue
+            removed.add(label)
+            opt_counts[reason] += 1
+            opt_reasons[label] = ("removed", reason)
+
+    _log_optimization("removed", opt_counts, opt_reasons)
+    return removed
+
+
+def replace_tasks(
+    target_task_graph,
+    params,
+    optimizations,
+    do_not_optimize,
+    label_to_taskid,
+    removed_tasks,
+    existing_tasks,
+):
+    """
+    Implement the "Replacing Tasks" phase, returning a set of task labels of
+    all replaced tasks. The replacement taskIds are added to label_to_taskid as
+    a side-effect.
+    """
+    opt_counts = defaultdict(int)
+    replaced = set()
+    links_dict = target_task_graph.graph.links_dict()
+
+    for label in target_task_graph.graph.visit_postorder():
+        # if we're not allowed to optimize, that's easy..
+        if label in do_not_optimize:
+            continue
+
+        # if this task depends on un-replaced, un-removed tasks, do not replace
+        if any(l not in replaced and l not in removed_tasks for l in links_dict[label]):
+            continue
+
+        # if the task already exists, that's an easy replacement
+        repl = existing_tasks.get(label)
+        if repl:
+            label_to_taskid[label] = repl
+            replaced.add(label)
+            opt_counts["existing_tasks"] += 1
+            continue
+
+        # call the optimization strategy
+        task = target_task_graph.tasks[label]
+        opt_by, opt, arg = optimizations(label)
+        repl = opt.should_replace_task(task, params, arg)
+        if repl:
+            if repl is True:
+                # True means remove this task; get_subgraph will catch any
+                # problems with removed tasks being depended on
+                removed_tasks.add(label)
+            else:
+                label_to_taskid[label] = repl
+                replaced.add(label)
+            opt_counts[opt_by] += 1
+            continue
+
+    _log_optimization("replaced", opt_counts)
+    return replaced
+
+
+def get_subgraph(
+    target_task_graph,
+    removed_tasks,
+    replaced_tasks,
+    label_to_taskid,
+    decision_task_id,
+):
+    """
+    Return the subgraph of target_task_graph consisting only of
+    non-optimized tasks and edges between them.
+
+    To avoid losing track of taskIds for tasks optimized away, this method
+    simultaneously substitutes real taskIds for task labels in the graph, and
+    populates each task definition's `dependencies` key with the appropriate
+    taskIds.  Task references are resolved in the process.
+    """
+
+    # check for any dependency edges from included to removed tasks
+    bad_edges = [
+        (l, r, n)
+        for l, r, n in target_task_graph.graph.edges
+        if l not in removed_tasks and r in removed_tasks
+    ]
+    if bad_edges:
+        probs = ", ".join(
+            f"{l} depends on {r} as {n} but it has been removed"
+            for l, r, n in bad_edges
+        )
+        raise Exception("Optimization error: " + probs)
+
+    # fill in label_to_taskid for anything not removed or replaced
+    assert replaced_tasks <= set(label_to_taskid)
+    for label in sorted(
+        target_task_graph.graph.nodes - removed_tasks - set(label_to_taskid)
+    ):
+        label_to_taskid[label] = slugid()
+
+    # resolve labels to taskIds and populate task['dependencies']
+    tasks_by_taskid = {}
+    named_links_dict = target_task_graph.graph.named_links_dict()
+    omit = removed_tasks | replaced_tasks
+    for label, task in target_task_graph.tasks.items():
+        if label in omit:
+            continue
+        task.task_id = label_to_taskid[label]
+        named_task_dependencies = {
+            name: label_to_taskid[label]
+            for name, label in named_links_dict.get(label, {}).items()
+        }
+
+        # Add remaining soft dependencies
+        if task.soft_dependencies:
+            named_task_dependencies.update(
+                {
+                    label: label_to_taskid[label]
+                    for label in task.soft_dependencies
+                    if label in label_to_taskid and label not in omit
+                }
+            )
+
+        task.task = resolve_task_references(
+            task.label,
+            task.task,
+            task_id=task.task_id,
+            decision_task_id=decision_task_id,
+            dependencies=named_task_dependencies,
+        )
+        deps = task.task.setdefault("dependencies", [])
+        deps.extend(sorted(named_task_dependencies.values()))
+        tasks_by_taskid[task.task_id] = task
+
+    # resolve edges to taskIds
+    edges_by_taskid = (
+        (label_to_taskid.get(left), label_to_taskid.get(right), name)
+        for (left, right, name) in target_task_graph.graph.edges
+    )
+    # ..and drop edges that are no longer entirely in the task graph
+    #   (note that this omits edges to replaced tasks, but they are still in task.dependnecies)
+    edges_by_taskid = {
+        (left, right, name)
+        for (left, right, name) in edges_by_taskid
+        if left in tasks_by_taskid and right in tasks_by_taskid
+    }
+
+    return TaskGraph(tasks_by_taskid, Graph(set(tasks_by_taskid), edges_by_taskid))
+
+
+class OptimizationStrategy:
+    def should_remove_task(self, task, params, arg):
+        """Determine whether to optimize this task by removing it.  Returns
+        True to remove."""
+        return False
+
+    def should_replace_task(self, task, params, arg):
+        """Determine whether to optimize this task by replacing it.  Returns a
+        taskId to replace this task, True to replace with nothing, or False to
+        keep the task."""
+        return False
+
+
+class Either(OptimizationStrategy):
+    """Given one or more optimization strategies, remove a task if any of them
+    says to, and replace with a task if any finds a replacement (preferring the
+    earliest).  By default, each substrategy gets the same arg, but split_args
+    can return a list of args for each strategy, if desired."""
+
+    def __init__(self, *substrategies, **kwargs):
+        self.substrategies = substrategies
+        self.split_args = kwargs.pop("split_args", None)
+        if not self.split_args:
+            self.split_args = lambda arg: [arg] * len(substrategies)
+        if kwargs:
+            raise TypeError("unexpected keyword args")
+
+    def _for_substrategies(self, arg, fn):
+        for sub, arg in zip(self.substrategies, self.split_args(arg)):
+            rv = fn(sub, arg)
+            if rv:
+                return rv
+        return False
+
+    def should_remove_task(self, task, params, arg):
+        return self._for_substrategies(
+            arg, lambda sub, arg: sub.should_remove_task(task, params, arg)
+        )
+
+    def should_replace_task(self, task, params, arg):
+        return self._for_substrategies(
+            arg, lambda sub, arg: sub.should_replace_task(task, params, arg)
+        )
+
+
+class IndexSearch(OptimizationStrategy):
+
+    # A task with no dependencies remaining after optimization will be replaced
+    # if artifacts exist for the corresponding index_paths.
+    # Otherwise, we're in one of the following cases:
+    # - the task has un-optimized dependencies
+    # - the artifacts have expired
+    # - some changes altered the index_paths and new artifacts need to be
+    # created.
+    # In every of those cases, we need to run the task to create or refresh
+    # artifacts.
+
+    def should_replace_task(self, task, params, index_paths):
+        "Look for a task with one of the given index paths"
+        for index_path in index_paths:
+            try:
+                task_id = find_task_id(
+                    index_path, use_proxy=bool(os.environ.get("TASK_ID"))
+                )
+                return task_id
+            except KeyError:
+                # 404 will end up here and go on to the next index path
+                pass
+
+        return False
+
+
+class SkipUnlessChanged(OptimizationStrategy):
+    def should_remove_task(self, task, params, file_patterns):
+        if params.get("repository_type") != "hg":
+            raise RuntimeError(
+                "SkipUnlessChanged optimization only works with mercurial repositories"
+            )
+
+        # pushlog_id == -1 - this is the case when run from a cron.yml job
+        if params.get("pushlog_id") == -1:
+            return False
+
+        changed = files_changed.check(params, file_patterns)
+        if not changed:
+            logger.debug(
+                'no files found matching a pattern in `skip-unless-changed` for "{}"'.format(
+                    task.label
+                )
+            )
+            return True
+        return False
--- a/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/parameters.py
@ -37,6 +37,7 @@ base_schema = Schema(
        Required("build_date"): int,
        Required("build_number"): int,
        Required("do_not_optimize"): [str],
+        Required("enable_always_target"): bool,
        Required("existing_tasks"): {str: str},
        Required("filters"): [str],
        Required("head_ref"): str,
@ -46,6 +47,7 @@ base_schema = Schema(
        Required("level"): str,
        Required("moz_build_date"): str,
        Required("next_version"): Any(str, None),
+        Required("optimize_strategies"): Any(str, None),
        Required("optimize_target_tasks"): bool,
        Required("owner"): str,
        Required("project"): str,
@ -93,6 +95,7 @@ def _get_defaults(repo_root=None):
        "build_date": int(time.time()),
        "build_number": 1,
        "do_not_optimize": [],
+        "enable_always_target": True,
        "existing_tasks": {},
        "filters": ["target_tasks_method"],
        "head_ref": repo.branch or repo.head_rev,
@ -102,6 +105,7 @@ def _get_defaults(repo_root=None):
        "level": "3",
        "moz_build_date": datetime.now().strftime("%Y%m%d%H%M%S"),
        "next_version": None,
+        "optimize_strategies": None,
        "optimize_target_tasks": True,
        "owner": "nobody@mozilla.com",
        "project": project,
--- a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/fetch-content
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/fetch-content
@ -16,6 +16,7 @@ import multiprocessing
 import os
 import pathlib
 import random
+import re
 import stat
 import subprocess
 import sys
@ -31,6 +32,11 @@ try:
 except ImportError:
    zstandard = None

+try:
+    import certifi
+except ImportError:
+    certifi = None
+

 CONCURRENCY = multiprocessing.cpu_count()

@ -46,13 +52,13 @@ class IntegrityError(Exception):

 def ZstdCompressor(*args, **kwargs):
    if not zstandard:
-        raise ValueError('zstandard Python package not available')
+        raise ValueError("zstandard Python package not available")
    return zstandard.ZstdCompressor(*args, **kwargs)


 def ZstdDecompressor(*args, **kwargs):
    if not zstandard:
-        raise ValueError('zstandard Python package not available')
+        raise ValueError("zstandard Python package not available")
    return zstandard.ZstdDecompressor(*args, **kwargs)


@ -67,7 +73,7 @@ def rename_after_close(fname, *args, **kwargs):
    manager.
    """
    path = pathlib.Path(fname)
-    tmp = path.with_name('%s.tmp' % path.name)
+    tmp = path.with_name("%s.tmp" % path.name)
    try:
        with tmp.open(*args, **kwargs) as fh:
            yield fh
@ -127,7 +133,9 @@ def retrier(attempts=5, sleeptime=10, max_sleeptime=300, sleepscale=1.5, jitter=
    jitter = jitter or 0  # py35 barfs on the next line if jitter is None
    if jitter > sleeptime:
        # To prevent negative sleep times
-        raise Exception('jitter ({}) must be less than sleep time ({})'.format(jitter, sleeptime))
+        raise Exception(
+            "jitter ({}) must be less than sleep time ({})".format(jitter, sleeptime)
+        )

    sleeptime_real = sleeptime
    for _ in range(attempts):
@ -149,7 +157,9 @@ def retrier(attempts=5, sleeptime=10, max_sleeptime=300, sleepscale=1.5, jitter=

        # Don't need to sleep the last time
        if _ < attempts - 1:
-            log("sleeping for %.2fs (attempt %i/%i)" % (sleeptime_real, _ + 1, attempts))
+            log(
+                "sleeping for %.2fs (attempt %i/%i)" % (sleeptime_real, _ + 1, attempts)
+            )
            time.sleep(sleeptime_real)


@ -166,7 +176,7 @@ def stream_download(url, sha256=None, size=None, headers=None):
    content, it should be streamed to a file or memory and only operated
    on after the generator is exhausted without raising.
    """
-    log('Downloading %s' % url)
+    log("Downloading %s" % url)
    headers = headers or []

    h = hashlib.sha256()
@ -179,8 +189,10 @@ def stream_download(url, sha256=None, size=None, headers=None):
        req_headers[key.strip()] = val.strip()

    req = urllib.request.Request(url, None, req_headers)
-    with urllib.request.urlopen(req) as fh:
-        if not url.endswith('.gz') and fh.info().get('Content-Encoding') == 'gzip':
+    with urllib.request.urlopen(
+        req, cafile=certifi.where()
+    ) if certifi else urllib.request.urlopen(req) as fh:
+        if not url.endswith(".gz") and fh.info().get("Content-Encoding") == "gzip":
            fh = gzip.GzipFile(fileobj=fh)

        while True:
@ -196,22 +208,26 @@ def stream_download(url, sha256=None, size=None, headers=None):
    duration = time.time() - t0
    digest = h.hexdigest()

-    log('%s resolved to %d bytes with sha256 %s in %.3fs' % (
-        url, length, digest, duration))
+    log(
+        "%s resolved to %d bytes with sha256 %s in %.3fs"
+        % (url, length, digest, duration)
+    )

    if size:
        if size == length:
-            log('Verified size of %s' % url)
+            log("Verified size of %s" % url)
        else:
-            raise IntegrityError('size mismatch on %s: wanted %d; got %d' % (
-                url, size, length))
+            raise IntegrityError(
+                "size mismatch on %s: wanted %d; got %d" % (url, size, length)
+            )

    if sha256:
        if digest == sha256:
-            log('Verified sha256 integrity of %s' % url)
+            log("Verified sha256 integrity of %s" % url)
        else:
-            raise IntegrityError('sha256 mismatch on %s: wanted %s; got %s' % (
-                url, sha256, digest))
+            raise IntegrityError(
+                "sha256 mismatch on %s: wanted %s; got %s" % (url, sha256, digest)
+            )


 def download_to_path(url, path, sha256=None, size=None, headers=None):
@ -227,10 +243,12 @@ def download_to_path(url, path, sha256=None, size=None, headers=None):

    for _ in retrier(attempts=5, sleeptime=60):
        try:
-            log('Downloading %s to %s' % (url, path))
+            log("Downloading %s to %s" % (url, path))

-            with rename_after_close(path, 'wb') as fh:
-                for chunk in stream_download(url, sha256=sha256, size=size, headers=headers):
+            with rename_after_close(path, "wb") as fh:
+                for chunk in stream_download(
+                    url, sha256=sha256, size=size, headers=headers
+                ):
                    fh.write(chunk)

            return
@ -243,65 +261,85 @@ def download_to_path(url, path, sha256=None, size=None, headers=None):
    raise Exception("Download failed, no more retries!")


-def gpg_verify_path(path: pathlib.Path, public_key_data: bytes,
-                    signature_data: bytes):
+def download_to_memory(url, sha256=None, size=None):
+    """Download a URL to memory, possibly with verification."""
+
+    data = b""
+    for _ in retrier(attempts=5, sleeptime=60):
+        try:
+            log("Downloading %s" % (url))
+
+            for chunk in stream_download(url, sha256=sha256, size=size):
+                data += chunk
+
+            return data
+        except IntegrityError:
+            raise
+        except Exception as e:
+            log("Download failed: {}".format(e))
+            continue
+
+    raise Exception("Download failed, no more retries!")
+
+
+def gpg_verify_path(path: pathlib.Path, public_key_data: bytes, signature_data: bytes):
    """Verify that a filesystem path verifies using GPG.

    Takes a Path defining a file to verify. ``public_key_data`` contains
    bytes with GPG public key data. ``signature_data`` contains a signed
    GPG document to use with ``gpg --verify``.
    """
-    log('Validating GPG signature of %s' % path)
-    log('GPG key data:\n%s' % public_key_data.decode('ascii'))
+    log("Validating GPG signature of %s" % path)
+    log("GPG key data:\n%s" % public_key_data.decode("ascii"))

    with tempfile.TemporaryDirectory() as td:
        try:
            # --batch since we're running unattended.
-            gpg_args = ['gpg', '--homedir', td, '--batch']
+            gpg_args = ["gpg", "--homedir", td, "--batch"]

-            log('Importing GPG key...')
-            subprocess.run(gpg_args + ['--import'],
-                           input=public_key_data,
-                           check=True)
+            log("Importing GPG key...")
+            subprocess.run(gpg_args + ["--import"], input=public_key_data, check=True)

-            log('Verifying GPG signature...')
-            subprocess.run(gpg_args + ['--verify', '-', '%s' % path],
-                           input=signature_data,
-                           check=True)
+            log("Verifying GPG signature...")
+            subprocess.run(
+                gpg_args + ["--verify", "-", "%s" % path],
+                input=signature_data,
+                check=True,
+            )

-            log('GPG signature verified!')
+            log("GPG signature verified!")
        finally:
            # There is a race between the agent self-terminating and
            # shutil.rmtree() from the temporary directory cleanup that can
            # lead to exceptions. Kill the agent before cleanup to prevent this.
            env = dict(os.environ)
-            env['GNUPGHOME'] = td
-            subprocess.run(['gpgconf', '--kill', 'gpg-agent'], env=env)
+            env["GNUPGHOME"] = td
+            subprocess.run(["gpgconf", "--kill", "gpg-agent"], env=env)


 def open_tar_stream(path: pathlib.Path):
    """"""
-    if path.suffix == '.bz2':
-        return bz2.open(str(path), 'rb')
-    elif path.suffix == '.gz':
-        return gzip.open(str(path), 'rb')
-    elif path.suffix == '.xz':
-        return lzma.open(str(path), 'rb')
-    elif path.suffix == '.zst':
+    if path.suffix == ".bz2":
+        return bz2.open(str(path), "rb")
+    elif path.suffix == ".gz":
+        return gzip.open(str(path), "rb")
+    elif path.suffix == ".xz":
+        return lzma.open(str(path), "rb")
+    elif path.suffix == ".zst":
        dctx = ZstdDecompressor()
-        return dctx.stream_reader(path.open('rb'))
-    elif path.suffix == '.tar':
-        return path.open('rb')
+        return dctx.stream_reader(path.open("rb"))
+    elif path.suffix == ".tar":
+        return path.open("rb")
    else:
-        raise ValueError('unknown archive format for tar file: %s' % path)
+        raise ValueError("unknown archive format for tar file: %s" % path)


 def archive_type(path: pathlib.Path):
    """Attempt to identify a path as an extractable archive."""
-    if path.suffixes[-2:-1] == ['.tar']:
-        return 'tar'
-    elif path.suffix == '.zip':
-        return 'zip'
+    if path.suffixes[-2:-1] == [".tar"]:
+        return "tar"
+    elif path.suffix == ".zip":
+        return "zip"
    else:
        return None

@ -313,12 +351,12 @@ def extract_archive(path, dest_dir, typ):
    path = path.resolve()
    dest_dir = dest_dir.resolve()

-    log('Extracting %s to %s' % (path, dest_dir))
+    log("Extracting %s to %s" % (path, dest_dir))
    t0 = time.time()

    # We pipe input to the decompressor program so that we can apply
    # custom decompressors that the program may not know about.
-    if typ == 'tar':
+    if typ == "tar":
        ifh = open_tar_stream(path)
        # On Windows, the tar program doesn't support things like symbolic
        # links, while Windows actually support them. The tarfile module in
@ -326,24 +364,25 @@ def extract_archive(path, dest_dir, typ):
        # the tar program on Linux, only use tarfile on Windows (tarfile is
        # also not much slower on Windows, presumably because of the
        # notoriously bad I/O).
-        if sys.platform == 'win32':
-            tar = tarfile.open(fileobj=ifh, mode='r|')
+        if sys.platform == "win32":
+            tar = tarfile.open(fileobj=ifh, mode="r|")
            tar.extractall(str(dest_dir))
            args = []
        else:
-            args = ['tar', 'xf', '-']
+            args = ["tar", "xf", "-"]
            pipe_stdin = True
-    elif typ == 'zip':
+    elif typ == "zip":
        # unzip from stdin has wonky behavior. We don't use a pipe for it.
-        ifh = open(os.devnull, 'rb')
-        args = ['unzip', '-o', str(path)]
+        ifh = open(os.devnull, "rb")
+        args = ["unzip", "-o", str(path)]
        pipe_stdin = False
    else:
-        raise ValueError('unknown archive format: %s' % path)
+        raise ValueError("unknown archive format: %s" % path)

    if args:
-        with ifh, subprocess.Popen(args, cwd=str(dest_dir), bufsize=0,
-                                   stdin=subprocess.PIPE) as p:
+        with ifh, subprocess.Popen(
+            args, cwd=str(dest_dir), bufsize=0, stdin=subprocess.PIPE
+        ) as p:
            while True:
                if not pipe_stdin:
                    break
@ -355,46 +394,50 @@ def extract_archive(path, dest_dir, typ):
                p.stdin.write(chunk)

        if p.returncode:
-            raise Exception('%r exited %d' % (args, p.returncode))
+            raise Exception("%r exited %d" % (args, p.returncode))

-    log('%s extracted in %.3fs' % (path, time.time() - t0))
+    log("%s extracted in %.3fs" % (path, time.time() - t0))


-def repack_archive(orig: pathlib.Path, dest: pathlib.Path,
-                   strip_components=0, prefix=''):
+def repack_archive(
+    orig: pathlib.Path, dest: pathlib.Path, strip_components=0, prefix=""
+):
    assert orig != dest
-    log('Repacking as %s' % dest)
+    log("Repacking as %s" % dest)
    orig_typ = archive_type(orig)
    typ = archive_type(dest)
    if not orig_typ:
-        raise Exception('Archive type not supported for %s' % orig.name)
+        raise Exception("Archive type not supported for %s" % orig.name)
    if not typ:
-        raise Exception('Archive type not supported for %s' % dest.name)
+        raise Exception("Archive type not supported for %s" % dest.name)

-    if dest.suffixes[-2:] != ['.tar', '.zst']:
-        raise Exception('Only producing .tar.zst archives is supported.')
+    if dest.suffixes[-2:] != [".tar", ".zst"]:
+        raise Exception("Only producing .tar.zst archives is supported.")

    if strip_components or prefix:
+
        def filter(name):
            if strip_components:
-                stripped = '/'.join(name.split('/')[strip_components:])
+                stripped = "/".join(name.split("/")[strip_components:])
                if not stripped:
                    raise Exception(
-                        'Stripping %d components would remove files'
-                        % strip_components)
+                        "Stripping %d components would remove files" % strip_components
+                    )
                name = stripped
            return prefix + name
+
    else:
        filter = None

-    with rename_after_close(dest, 'wb') as fh:
+    with rename_after_close(dest, "wb") as fh:
        ctx = ZstdCompressor()
-        if orig_typ == 'zip':
-            assert typ == 'tar'
+        if orig_typ == "zip":
+            assert typ == "tar"
            zip = zipfile.ZipFile(orig)
            # Convert the zip stream to a tar on the fly.
-            with ctx.stream_writer(fh) as compressor, \
-                    tarfile.open(fileobj=compressor, mode='w:') as tar:
+            with ctx.stream_writer(fh) as compressor, tarfile.open(
+                fileobj=compressor, mode="w:"
+            ) as tar:
                for zipinfo in zip.infolist():
                    if zipinfo.is_dir():
                        continue
@ -408,7 +451,8 @@ def repack_archive(orig: pathlib.Path, dest: pathlib.Path,
                    # care about accuracy, but rather about reproducibility,
                    # so we pick UTC.
                    time = datetime.datetime(
-                        *zipinfo.date_time, tzinfo=datetime.timezone.utc)
+                        *zipinfo.date_time, tzinfo=datetime.timezone.utc
+                    )
                    tarinfo.mtime = time.timestamp()
                    # 0 is MS-DOS, 3 is UNIX. Only in the latter case do we
                    # get anything useful for the tar file mode.
@ -424,26 +468,35 @@ def repack_archive(orig: pathlib.Path, dest: pathlib.Path,
                    elif stat.S_ISREG(mode) or stat.S_IFMT(mode) == 0:
                        tar.addfile(tarinfo, zip.open(filename))
                    else:
-                        raise Exception('Unsupported file mode %o'
-                                        % stat.S_IFMT(mode))
+                        raise Exception("Unsupported file mode %o" % stat.S_IFMT(mode))

-        elif orig_typ == 'tar':
-            if typ == 'zip':
-                raise Exception('Repacking a tar to zip is not supported')
-            assert typ == 'tar'
+        elif orig_typ == "tar":
+            if typ == "zip":
+                raise Exception("Repacking a tar to zip is not supported")
+            assert typ == "tar"

            ifh = open_tar_stream(orig)
            if filter:
                # To apply the filter, we need to open the tar stream and
                # tweak it.
-                origtar = tarfile.open(fileobj=ifh, mode='r|')
-                with ctx.stream_writer(fh) as compressor, \
-                        tarfile.open(fileobj=compressor, mode='w:') as tar:
+                origtar = tarfile.open(fileobj=ifh, mode="r|")
+                with ctx.stream_writer(fh) as compressor, tarfile.open(
+                    fileobj=compressor,
+                    mode="w:",
+                    format=origtar.format,
+                ) as tar:
                    for tarinfo in origtar:
                        if tarinfo.isdir():
                            continue
                        tarinfo.name = filter(tarinfo.name)
-                        tar.addfile(tarinfo, origtar.extractfile(tarinfo))
+                        if "path" in tarinfo.pax_headers:
+                            tarinfo.pax_headers["path"] = filter(
+                                tarinfo.pax_headers["path"]
+                            )
+                        if tarinfo.isfile():
+                            tar.addfile(tarinfo, origtar.extractfile(tarinfo))
+                        else:
+                            tar.addfile(tarinfo)
            else:
                # We only change compression here. The tar stream is unchanged.
                ctx.copy_stream(ifh, fh)
@ -457,7 +510,7 @@ def fetch_and_extract(url, dest_dir, extract=True, sha256=None, size=None):
    the destination directory.
    """

-    basename = urllib.parse.urlparse(url).path.split('/')[-1]
+    basename = urllib.parse.urlparse(url).path.split("/")[-1]
    dest_path = dest_dir / basename

    download_to_path(url, dest_path, sha256=sha256, size=size)
@ -468,7 +521,7 @@ def fetch_and_extract(url, dest_dir, extract=True, sha256=None, size=None):
    typ = archive_type(dest_path)
    if typ:
        extract_archive(dest_path, dest_dir, typ)
-        log('Removing %s' % dest_path)
+        log("Removing %s" % dest_path)
        dest_path.unlink()


@ -484,37 +537,152 @@ def fetch_urls(downloads):
            f.result()


-def git_checkout_archive(dest_path: pathlib.Path, repo: str, commit: str,
-                         prefix=None):
+def _git_checkout_github_archive(
+    dest_path: pathlib.Path, repo: str, commit: str, prefix: str
+):
+    "Use github archive generator to speed up github git repo cloning"
+    repo = repo.rstrip("/")
+    github_url = "{repo}/archive/{commit}.tar.gz".format(**locals())
+
+    with tempfile.TemporaryDirectory() as td:
+        temp_dir = pathlib.Path(td)
+        dl_dest = temp_dir / "archive.tar.gz"
+        download_to_path(github_url, dl_dest)
+        repack_archive(dl_dest, dest_path, strip_components=1, prefix=prefix + "/")
+
+
+def _github_submodule_required(repo: str, commit: str):
+    "Use github API to check if submodules are used"
+    url = "{repo}/blob/{commit}/.gitmodules".format(**locals())
+    try:
+        status_code = urllib.request.urlopen(url).getcode()
+        return status_code == 200
+    except:
+        return False
+
+
+def git_checkout_archive(
+    dest_path: pathlib.Path,
+    repo: str,
+    commit: str,
+    prefix=None,
+    ssh_key=None,
+    include_dot_git=False,
+):
    """Produce an archive of the files comprising a Git checkout."""
    dest_path.parent.mkdir(parents=True, exist_ok=True)

-    if dest_path.suffixes[-2:] != ['.tar', '.zst']:
-        raise Exception('Only producing .tar.zst archives is supported.')
+    if not prefix:
+        prefix = repo.rstrip("/").rsplit("/", 1)[-1]
+
+    if dest_path.suffixes[-2:] != [".tar", ".zst"]:
+        raise Exception("Only producing .tar.zst archives is supported.")
+
+    if repo.startswith("https://github.com/"):
+        if not include_dot_git and not _github_submodule_required(repo, commit):
+            log("Using github archive service to speedup archive creation")
+            # Always log sha1 info, either from commit or resolved from repo.
+            if re.match(r"^[a-fA-F0-9]{40}$", commit):
+                revision = commit
+            else:
+                ref_output = subprocess.check_output(["git", "ls-remote", repo,
+                                                      'refs/heads/' + commit])
+                revision, _ = ref_output.decode().split(maxsplit=1)
+            log("Fetching revision {}".format(revision))
+            return _git_checkout_github_archive(dest_path, repo, commit, prefix)

    with tempfile.TemporaryDirectory() as td:
        temp_dir = pathlib.Path(td)

-        if not prefix:
-            prefix = repo.rstrip('/').rsplit('/', 1)[-1]
        git_dir = temp_dir / prefix

        # This could be faster with a shallow clone. However, Git requires a ref
        # to initiate a clone. Since the commit-ish may not refer to a ref, we
        # simply perform a full clone followed by a checkout.
-        print('cloning %s to %s' % (repo, git_dir))
-        subprocess.run(['git', 'clone', '--recurse-submodules', repo, str(git_dir)],
-                       check=True)
+        print("cloning %s to %s" % (repo, git_dir))

-        subprocess.run(['git', 'checkout', '--recurse-submodules', commit],
-                       cwd=str(git_dir), check=True)
+        env = os.environ.copy()
+        keypath = ""
+        if ssh_key:
+            taskcluster_secret_url = api(
+                os.environ.get("TASKCLUSTER_PROXY_URL"),
+                "secrets",
+                "v1",
+                "secret/{keypath}".format(keypath=ssh_key),
+            )
+            taskcluster_secret = b"".join(stream_download(taskcluster_secret_url))
+            taskcluster_secret = json.loads(taskcluster_secret)
+            sshkey = taskcluster_secret["secret"]["ssh_privkey"]

-        print('creating archive %s of commit %s' % (dest_path, commit))
-        proc = subprocess.Popen([
-            'tar', 'cf', '-', '--exclude=.git', '-C', str(temp_dir), prefix,
-        ], stdout=subprocess.PIPE)
+            keypath = temp_dir.joinpath("ssh-key")
+            keypath.write_text(sshkey)
+            keypath.chmod(0o600)

-        with rename_after_close(dest_path, 'wb') as out:
+            env = {
+                "GIT_SSH_COMMAND": "ssh -o 'StrictHostKeyChecking no' -i {keypath}".format(
+                    keypath=keypath
+                )
+            }
+
+        subprocess.run(["git", "clone", "-n", repo, str(git_dir)], check=True, env=env)
+
+        # Always use a detached head so that git prints out what it checked out.
+        subprocess.run(
+            ["git", "checkout", "--detach", commit], cwd=str(git_dir), check=True
+        )
+
+        # When including the .git, we want --depth 1, but a direct clone would not
+        # necessarily be able to give us the right commit.
+        if include_dot_git:
+            initial_clone = git_dir.with_name(git_dir.name + ".orig")
+            git_dir.rename(initial_clone)
+            subprocess.run(
+                [
+                    "git",
+                    "clone",
+                    "file://" + str(initial_clone),
+                    str(git_dir),
+                    "--depth",
+                    "1",
+                ],
+                check=True,
+            )
+            subprocess.run(
+                ["git", "remote", "set-url", "origin", repo],
+                cwd=str(git_dir),
+                check=True,
+            )
+
+        # --depth 1 can induce more work on the server side, so only use it for
+        # submodule initialization when we want to keep the .git directory.
+        depth = ["--depth", "1"] if include_dot_git else []
+        subprocess.run(
+            ["git", "submodule", "update", "--init"] + depth,
+            cwd=str(git_dir),
+            check=True,
+        )
+
+        if keypath:
+            os.remove(keypath)
+
+        print("creating archive %s of commit %s" % (dest_path, commit))
+        exclude_dot_git = [] if include_dot_git else ["--exclude=.git"]
+        proc = subprocess.Popen(
+            [
+                "tar",
+                "cf",
+                "-",
+            ]
+            + exclude_dot_git
+            + [
+                "-C",
+                str(temp_dir),
+                prefix,
+            ],
+            stdout=subprocess.PIPE,
+        )
+
+        with rename_after_close(dest_path, "wb") as out:
            ctx = ZstdCompressor()
            ctx.copy_stream(proc.stdout, out)

@ -525,8 +693,14 @@ def command_git_checkout_archive(args):
    dest = pathlib.Path(args.dest)

    try:
-        git_checkout_archive(dest, args.repo, args.commit,
-                             prefix=args.path_prefix)
+        git_checkout_archive(
+            dest,
+            args.repo,
+            args.commit,
+            prefix=args.path_prefix,
+            ssh_key=args.ssh_key_secret,
+            include_dot_git=args.include_dot_git,
+        )
    except Exception:
        try:
            dest.unlink()
@ -541,25 +715,26 @@ def command_static_url(args):
    gpg_env_key = args.gpg_key_env

    if bool(gpg_sig_url) != bool(gpg_env_key):
-        print('--gpg-sig-url and --gpg-key-env must both be defined')
+        print("--gpg-sig-url and --gpg-key-env must both be defined")
        return 1

    if gpg_sig_url:
-        gpg_signature = b''.join(stream_download(gpg_sig_url))
-        gpg_key = os.environb[gpg_env_key.encode('ascii')]
+        gpg_signature = b"".join(stream_download(gpg_sig_url))
+        gpg_key = os.environb[gpg_env_key.encode("ascii")]

    dest = pathlib.Path(args.dest)
    dest.parent.mkdir(parents=True, exist_ok=True)

-    basename = urllib.parse.urlparse(args.url).path.split('/')[-1]
-    if basename.endswith(''.join(dest.suffixes)):
+    basename = urllib.parse.urlparse(args.url).path.split("/")[-1]
+    if basename.endswith("".join(dest.suffixes)):
        dl_dest = dest
    else:
        dl_dest = dest.parent / basename

    try:
-        download_to_path(args.url, dl_dest, sha256=args.sha256, size=args.size,
-                         headers=args.headers)
+        download_to_path(
+            args.url, dl_dest, sha256=args.sha256, size=args.size, headers=args.headers
+        )

        if gpg_sig_url:
            gpg_verify_path(dl_dest, gpg_key, gpg_signature)
@ -575,112 +750,150 @@ def command_static_url(args):
        raise

    if dl_dest != dest:
-        log('Removing %s' % dl_dest)
+        log("Removing %s" % dl_dest)
        dl_dest.unlink()


 def api(root_url, service, version, path):
    # taskcluster-lib-urls is not available when this script runs, so
    # simulate its behavior:
-    if root_url == 'https://taskcluster.net':
-        return 'https://{service}.taskcluster.net/{version}/{path}'.format(
-                service=service, version=version, path=path)
-    return '{root_url}/api/{service}/{version}/{path}'.format(
-            root_url=root_url, service=service, version=version, path=path)
+    return "{root_url}/api/{service}/{version}/{path}".format(
+        root_url=root_url, service=service, version=version, path=path
+    )
+
+
+def get_hash(fetch, root_url):
+    path = "task/{task}/artifacts/{artifact}".format(
+        task=fetch["task"], artifact="public/chain-of-trust.json"
+    )
+    url = api(root_url, "queue", "v1", path)
+    cot = json.loads(download_to_memory(url))
+    return cot["artifacts"][fetch["artifact"]]["sha256"]


 def command_task_artifacts(args):
    start = time.monotonic()
-    fetches = json.loads(os.environ['MOZ_FETCHES'])
+    fetches = json.loads(os.environ["MOZ_FETCHES"])
    downloads = []
    for fetch in fetches:
        extdir = pathlib.Path(args.dest)
-        if 'dest' in fetch:
-            extdir = extdir.joinpath(fetch['dest'])
+        if "dest" in fetch:
+            # Note: normpath doesn't like pathlib.Path in python 3.5
+            extdir = pathlib.Path(os.path.normpath(str(extdir.joinpath(fetch["dest"]))))
        extdir.mkdir(parents=True, exist_ok=True)
-        root_url = os.environ['TASKCLUSTER_ROOT_URL']
-        if fetch['artifact'].startswith('public/'):
-            path = 'task/{task}/artifacts/{artifact}'.format(
-                    task=fetch['task'], artifact=fetch['artifact'])
-            url = api(root_url, 'queue', 'v1', path)
+        root_url = os.environ["TASKCLUSTER_ROOT_URL"]
+        sha256 = None
+        if fetch.get("verify-hash"):
+            sha256 = get_hash(fetch, root_url)
+        if fetch["artifact"].startswith("public/"):
+            path = "task/{task}/artifacts/{artifact}".format(
+                task=fetch["task"], artifact=fetch["artifact"]
+            )
+            url = api(root_url, "queue", "v1", path)
        else:
-            url = ('{proxy_url}/api/queue/v1/task/{task}/artifacts/{artifact}').format(
-                    proxy_url=os.environ['TASKCLUSTER_PROXY_URL'],
-                    task=fetch['task'],
-                    artifact=fetch['artifact'])
-        downloads.append((url, extdir, fetch['extract']))
+            url = ("{proxy_url}/api/queue/v1/task/{task}/artifacts/{artifact}").format(
+                proxy_url=os.environ["TASKCLUSTER_PROXY_URL"],
+                task=fetch["task"],
+                artifact=fetch["artifact"],
+            )
+        downloads.append((url, extdir, fetch["extract"], sha256))

    fetch_urls(downloads)
    end = time.monotonic()

    perfherder_data = {
-        'framework': {'name': 'build_metrics'},
-        'suites': [{
-            'name': 'fetch_content',
-            'value': end - start,
-            'lowerIsBetter': True,
-            'shouldAlert': False,
-            'subtests': [],
-        }],
+        "framework": {"name": "build_metrics"},
+        "suites": [
+            {
+                "name": "fetch_content",
+                "value": end - start,
+                "lowerIsBetter": True,
+                "shouldAlert": False,
+                "subtests": [],
+            }
+        ],
    }
-    print('PERFHERDER_DATA: {}'.format(json.dumps(perfherder_data)), file=sys.stderr)
+    print("PERFHERDER_DATA: {}".format(json.dumps(perfherder_data)), file=sys.stderr)


 def main():
    parser = argparse.ArgumentParser()
-    subparsers = parser.add_subparsers(title='sub commands')
+    subparsers = parser.add_subparsers(title="sub commands")

    git_checkout = subparsers.add_parser(
-        'git-checkout-archive',
-        help='Obtain an archive of files from a Git repository checkout')
+        "git-checkout-archive",
+        help="Obtain an archive of files from a Git repository checkout",
+    )
    git_checkout.set_defaults(func=command_git_checkout_archive)
-    git_checkout.add_argument('--path-prefix',
-                              help='Prefix for paths in produced archive')
-    git_checkout.add_argument('repo',
-                              help='URL to Git repository to be cloned')
-    git_checkout.add_argument('commit',
-                              help='Git commit to check out')
-    git_checkout.add_argument('dest',
-                              help='Destination path of archive')
+    git_checkout.add_argument(
+        "--path-prefix", help="Prefix for paths in produced archive"
+    )
+    git_checkout.add_argument("repo", help="URL to Git repository to be cloned")
+    git_checkout.add_argument("commit", help="Git commit to check out")
+    git_checkout.add_argument("dest", help="Destination path of archive")
+    git_checkout.add_argument(
+        "--ssh-key-secret", help="The scope path of the ssh key to used for checkout"
+    )
+    git_checkout.add_argument(
+        "--include-dot-git", action="store_true", help="Include the .git directory"
+    )

-    url = subparsers.add_parser('static-url', help='Download a static URL')
+    url = subparsers.add_parser("static-url", help="Download a static URL")
    url.set_defaults(func=command_static_url)
-    url.add_argument('--sha256', required=True,
-                     help='SHA-256 of downloaded content')
-    url.add_argument('--size', required=True, type=int,
-                     help='Size of downloaded content, in bytes')
-    url.add_argument('--gpg-sig-url',
-                     help='URL containing signed GPG document validating '
-                          'URL to fetch')
-    url.add_argument('--gpg-key-env',
-                     help='Environment variable containing GPG key to validate')
-    url.add_argument('--strip-components', type=int, default=0,
-                     help='Number of leading components to strip from file '
-                          'names in the downloaded archive')
-    url.add_argument('--add-prefix', default='',
-                     help='Prefix to add to file names in the downloaded '
-                          'archive')
-    url.add_argument('-H', '--header', default=[], action='append', dest='headers',
-                     help='Header to send as part of the request, can be passed '
-                          'multiple times')
-    url.add_argument('url', help='URL to fetch')
-    url.add_argument('dest', help='Destination path')
+    url.add_argument("--sha256", required=True, help="SHA-256 of downloaded content")
+    url.add_argument(
+        "--size", required=True, type=int, help="Size of downloaded content, in bytes"
+    )
+    url.add_argument(
+        "--gpg-sig-url",
+        help="URL containing signed GPG document validating " "URL to fetch",
+    )
+    url.add_argument(
+        "--gpg-key-env", help="Environment variable containing GPG key to validate"
+    )
+    url.add_argument(
+        "--strip-components",
+        type=int,
+        default=0,
+        help="Number of leading components to strip from file "
+        "names in the downloaded archive",
+    )
+    url.add_argument(
+        "--add-prefix",
+        default="",
+        help="Prefix to add to file names in the downloaded " "archive",
+    )
+    url.add_argument(
+        "-H",
+        "--header",
+        default=[],
+        action="append",
+        dest="headers",
+        help="Header to send as part of the request, can be passed " "multiple times",
+    )
+    url.add_argument("url", help="URL to fetch")
+    url.add_argument("dest", help="Destination path")

-    artifacts = subparsers.add_parser('task-artifacts',
-                                      help='Fetch task artifacts')
+    artifacts = subparsers.add_parser("task-artifacts", help="Fetch task artifacts")
    artifacts.set_defaults(func=command_task_artifacts)
-    artifacts.add_argument('-d', '--dest', default=os.environ.get('MOZ_FETCHES_DIR'),
-                           help='Destination directory which will contain all '
-                                'artifacts (defaults to $MOZ_FETCHES_DIR)')
+    artifacts.add_argument(
+        "-d",
+        "--dest",
+        default=os.environ.get("MOZ_FETCHES_DIR"),
+        help="Destination directory which will contain all "
+        "artifacts (defaults to $MOZ_FETCHES_DIR)",
+    )

    args = parser.parse_args()

    if not args.dest:
-        parser.error('no destination directory specified, either pass in --dest '
-                     'or set $MOZ_FETCHES_DIR')
+        parser.error(
+            "no destination directory specified, either pass in --dest "
+            "or set $MOZ_FETCHES_DIR"
+        )

    return args.func(args)


-if __name__ == '__main__':
+if __name__ == "__main__":
    sys.exit(main())
--- a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/robustcheckout.py
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/robustcheckout.py
@ -9,6 +9,7 @@ from a source repo using best practices to ensure optimal clone
 times and storage efficiency.
 """

+from __future__ import absolute_import

 import contextlib
 import json
@ -40,8 +41,8 @@ from mercurial import (
 # Causes worker to purge caches on process exit and for task to retry.
 EXIT_PURGE_CACHE = 72

-testedwith = b'4.5 4.6 4.7 4.8 4.9 5.0 5.1 5.2 5.3 5.4 5.5'
-minimumhgversion = b'4.5'
+testedwith = b"4.5 4.6 4.7 4.8 4.9 5.0 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9"
+minimumhgversion = b"4.5"

 cmdtable = {}
 command = registrar.command(cmdtable)
@ -49,41 +50,60 @@ command = registrar.command(cmdtable)
 configtable = {}
 configitem = registrar.configitem(configtable)

-configitem(b'robustcheckout', b'retryjittermin', default=configitems.dynamicdefault)
-configitem(b'robustcheckout', b'retryjittermax', default=configitems.dynamicdefault)
+configitem(b"robustcheckout", b"retryjittermin", default=configitems.dynamicdefault)
+configitem(b"robustcheckout", b"retryjittermax", default=configitems.dynamicdefault)


 def getsparse():
    from mercurial import sparse
+
    return sparse


 def peerlookup(remote, v):
-    # TRACKING hg46 4.6 added commandexecutor API.
-    if util.safehasattr(remote, 'commandexecutor'):
-        with remote.commandexecutor() as e:
-            return e.callcommand(b'lookup', {b'key': v}).result()
-    else:
-        return remote.lookup(v)
+    with remote.commandexecutor() as e:
+        return e.callcommand(b"lookup", {b"key": v}).result()


-@command(b'robustcheckout', [
-    (b'', b'upstream', b'', b'URL of upstream repo to clone from'),
-    (b'r', b'revision', b'', b'Revision to check out'),
-    (b'b', b'branch', b'', b'Branch to check out'),
-    (b'', b'purge', False, b'Whether to purge the working directory'),
-    (b'', b'sharebase', b'', b'Directory where shared repos should be placed'),
-    (b'', b'networkattempts', 3, b'Maximum number of attempts for network '
-                                 b'operations'),
-    (b'', b'sparseprofile', b'', b'Sparse checkout profile to use (path in repo)'),
-    (b'U', b'noupdate', False, b'the clone will include an empty working directory\n'
-                               b'(only a repository)'),
+@command(
+    b"robustcheckout",
+    [
+        (b"", b"upstream", b"", b"URL of upstream repo to clone from"),
+        (b"r", b"revision", b"", b"Revision to check out"),
+        (b"b", b"branch", b"", b"Branch to check out"),
+        (b"", b"purge", False, b"Whether to purge the working directory"),
+        (b"", b"sharebase", b"", b"Directory where shared repos should be placed"),
+        (
+            b"",
+            b"networkattempts",
+            3,
+            b"Maximum number of attempts for network " b"operations",
+        ),
+        (b"", b"sparseprofile", b"", b"Sparse checkout profile to use (path in repo)"),
+        (
+            b"U",
+            b"noupdate",
+            False,
+            b"the clone will include an empty working directory\n"
+            b"(only a repository)",
+        ),
    ],
-    b'[OPTION]... URL DEST',
-    norepo=True)
-def robustcheckout(ui, url, dest, upstream=None, revision=None, branch=None,
-                   purge=False, sharebase=None, networkattempts=None,
-                   sparseprofile=None, noupdate=False):
+    b"[OPTION]... URL DEST",
+    norepo=True,
+)
+def robustcheckout(
+    ui,
+    url,
+    dest,
+    upstream=None,
+    revision=None,
+    branch=None,
+    purge=False,
+    sharebase=None,
+    networkattempts=None,
+    sparseprofile=None,
+    noupdate=False,
+):
    """Ensure a working copy has the specified revision checked out.

    Repository data is automatically pooled into the common directory
@ -115,21 +135,28 @@ def robustcheckout(ui, url, dest, upstream=None, revision=None, branch=None,
    4.3 or newer and the ``sparse`` extension must be enabled.
    """
    if not revision and not branch:
-        raise error.Abort(b'must specify one of --revision or --branch')
+        raise error.Abort(b"must specify one of --revision or --branch")

    if revision and branch:
-        raise error.Abort(b'cannot specify both --revision and --branch')
+        raise error.Abort(b"cannot specify both --revision and --branch")

    # Require revision to look like a SHA-1.
    if revision:
-        if len(revision) < 12 or len(revision) > 40 or not re.match(b'^[a-f0-9]+$', revision):
-            raise error.Abort(b'--revision must be a SHA-1 fragment 12-40 '
-                              b'characters long')
+        if (
+            len(revision) < 12
+            or len(revision) > 40
+            or not re.match(b"^[a-f0-9]+$", revision)
+        ):
+            raise error.Abort(
+                b"--revision must be a SHA-1 fragment 12-40 " b"characters long"
+            )

-    sharebase = sharebase or ui.config(b'share', b'pool')
+    sharebase = sharebase or ui.config(b"share", b"pool")
    if not sharebase:
-        raise error.Abort(b'share base directory not defined; refusing to operate',
-                          hint=b'define share.pool config option or pass --sharebase')
+        raise error.Abort(
+            b"share base directory not defined; refusing to operate",
+            hint=b"define share.pool config option or pass --sharebase",
+        )

    # Sparse profile support was added in Mercurial 4.3, where it was highly
    # experimental. Because of the fragility of it, we only support sparse
@ -139,16 +166,17 @@ def robustcheckout(ui, url, dest, upstream=None, revision=None, branch=None,
    # fast if we can't satisfy the desired checkout request.
    if sparseprofile:
        try:
-            extensions.find(b'sparse')
+            extensions.find(b"sparse")
        except KeyError:
-            raise error.Abort(b'sparse extension must be enabled to use '
-                              b'--sparseprofile')
+            raise error.Abort(
+                b"sparse extension must be enabled to use " b"--sparseprofile"
+            )

-    ui.warn(b'(using Mercurial %s)\n' % util.version())
+    ui.warn(b"(using Mercurial %s)\n" % util.version())

    # worker.backgroundclose only makes things faster if running anti-virus,
    # which our automation doesn't. Disable it.
-    ui.setconfig(b'worker', b'backgroundclose', False)
+    ui.setconfig(b"worker", b"backgroundclose", False)

    # By default the progress bar starts after 3s and updates every 0.1s. We
    # change this so it shows and updates every 1.0s.
@ -156,9 +184,9 @@ def robustcheckout(ui, url, dest, upstream=None, revision=None, branch=None,
    # even if there is no known TTY.
    # We make the config change here instead of in a config file because
    # otherwise we're at the whim of whatever configs are used in automation.
-    ui.setconfig(b'progress', b'delay', 1.0)
-    ui.setconfig(b'progress', b'refresh', 1.0)
-    ui.setconfig(b'progress', b'assume-tty', True)
+    ui.setconfig(b"progress", b"delay", 1.0)
+    ui.setconfig(b"progress", b"refresh", 1.0)
+    ui.setconfig(b"progress", b"assume-tty", True)

    sharebase = os.path.realpath(sharebase)

@ -167,9 +195,21 @@ def robustcheckout(ui, url, dest, upstream=None, revision=None, branch=None,
    start = time.time()

    try:
-        return _docheckout(ui, url, dest, upstream, revision, branch, purge,
-                           sharebase, optimes, behaviors, networkattempts,
-                           sparse_profile=sparseprofile, noupdate=noupdate)
+        return _docheckout(
+            ui,
+            url,
+            dest,
+            upstream,
+            revision,
+            branch,
+            purge,
+            sharebase,
+            optimes,
+            behaviors,
+            networkattempts,
+            sparse_profile=sparseprofile,
+            noupdate=noupdate,
+        )
    finally:
        overall = time.time() - start

@ -177,89 +217,118 @@ def robustcheckout(ui, url, dest, upstream=None, revision=None, branch=None,
        # the various "flavors" of operations.

        # ``overall`` is always the total operation time.
-        optimes.append(('overall', overall))
+        optimes.append(("overall", overall))

        def record_op(name):
            # If special behaviors due to "corrupt" storage occur, we vary the
            # name to convey that.
-            if 'remove-store' in behaviors:
-                name += '_rmstore'
-            if 'remove-wdir' in behaviors:
-                name += '_rmwdir'
+            if "remove-store" in behaviors:
+                name += "_rmstore"
+            if "remove-wdir" in behaviors:
+                name += "_rmwdir"

            optimes.append((name, overall))

        # We break out overall operations primarily by their network interaction
        # We have variants within for working directory operations.
-        if 'clone' in behaviors and 'create-store' in behaviors:
-            record_op('overall_clone')
+        if "clone" in behaviors and "create-store" in behaviors:
+            record_op("overall_clone")

-            if 'sparse-update' in behaviors:
-                record_op('overall_clone_sparsecheckout')
+            if "sparse-update" in behaviors:
+                record_op("overall_clone_sparsecheckout")
            else:
-                record_op('overall_clone_fullcheckout')
+                record_op("overall_clone_fullcheckout")

-        elif 'pull' in behaviors or 'clone' in behaviors:
-            record_op('overall_pull')
+        elif "pull" in behaviors or "clone" in behaviors:
+            record_op("overall_pull")

-            if 'sparse-update' in behaviors:
-                record_op('overall_pull_sparsecheckout')
+            if "sparse-update" in behaviors:
+                record_op("overall_pull_sparsecheckout")
            else:
-                record_op('overall_pull_fullcheckout')
+                record_op("overall_pull_fullcheckout")

-            if 'empty-wdir' in behaviors:
-                record_op('overall_pull_emptywdir')
+            if "empty-wdir" in behaviors:
+                record_op("overall_pull_emptywdir")
            else:
-                record_op('overall_pull_populatedwdir')
+                record_op("overall_pull_populatedwdir")

        else:
-            record_op('overall_nopull')
+            record_op("overall_nopull")

-            if 'sparse-update' in behaviors:
-                record_op('overall_nopull_sparsecheckout')
+            if "sparse-update" in behaviors:
+                record_op("overall_nopull_sparsecheckout")
            else:
-                record_op('overall_nopull_fullcheckout')
+                record_op("overall_nopull_fullcheckout")

-            if 'empty-wdir' in behaviors:
-                record_op('overall_nopull_emptywdir')
+            if "empty-wdir" in behaviors:
+                record_op("overall_nopull_emptywdir")
            else:
-                record_op('overall_nopull_populatedwdir')
+                record_op("overall_nopull_populatedwdir")

        server_url = urllibcompat.urlreq.urlparse(url).netloc

-        if 'TASKCLUSTER_INSTANCE_TYPE' in os.environ:
+        if "TASKCLUSTER_INSTANCE_TYPE" in os.environ:
            perfherder = {
-                'framework': {
-                    'name': 'vcs',
+                "framework": {
+                    "name": "vcs",
                },
-                'suites': [],
+                "suites": [],
            }
            for op, duration in optimes:
-                perfherder['suites'].append({
-                    'name': op,
-                    'value': duration,
-                    'lowerIsBetter': True,
-                    'shouldAlert': False,
-                    'serverUrl': server_url.decode('utf-8'),
-                    'hgVersion': util.version().decode('utf-8'),
-                    'extraOptions': [os.environ['TASKCLUSTER_INSTANCE_TYPE']],
-                    'subtests': [],
-                })
-            ui.write(b'PERFHERDER_DATA: %s\n' %
-                     pycompat.bytestr(json.dumps(perfherder, sort_keys=True)))
+                perfherder["suites"].append(
+                    {
+                        "name": op,
+                        "value": duration,
+                        "lowerIsBetter": True,
+                        "shouldAlert": False,
+                        "serverUrl": server_url.decode("utf-8"),
+                        "hgVersion": util.version().decode("utf-8"),
+                        "extraOptions": [os.environ["TASKCLUSTER_INSTANCE_TYPE"]],
+                        "subtests": [],
+                    }
+                )
+            ui.write(
+                b"PERFHERDER_DATA: %s\n"
+                % pycompat.bytestr(json.dumps(perfherder, sort_keys=True))
+            )

-def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,
-                optimes, behaviors, networkattemptlimit, networkattempts=None,
-                sparse_profile=None, noupdate=False):
+
+def _docheckout(
+    ui,
+    url,
+    dest,
+    upstream,
+    revision,
+    branch,
+    purge,
+    sharebase,
+    optimes,
+    behaviors,
+    networkattemptlimit,
+    networkattempts=None,
+    sparse_profile=None,
+    noupdate=False,
+):
    if not networkattempts:
        networkattempts = [1]

    def callself():
-        return _docheckout(ui, url, dest, upstream, revision, branch, purge,
-                           sharebase, optimes, behaviors, networkattemptlimit,
-                           networkattempts=networkattempts,
-                           sparse_profile=sparse_profile,
-                           noupdate=noupdate)
+        return _docheckout(
+            ui,
+            url,
+            dest,
+            upstream,
+            revision,
+            branch,
+            purge,
+            sharebase,
+            optimes,
+            behaviors,
+            networkattemptlimit,
+            networkattempts=networkattempts,
+            sparse_profile=sparse_profile,
+            noupdate=noupdate,
+        )

    @contextlib.contextmanager
    def timeit(op, behavior):
@ -275,12 +344,11 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,
            elapsed = time.time() - start

            if errored:
-                op += '_errored'
+                op += "_errored"

            optimes.append((op, elapsed))

-    ui.write(b'ensuring %s@%s is available at %s\n' % (url, revision or branch,
-                                                      dest))
+    ui.write(b"ensuring %s@%s is available at %s\n" % (url, revision or branch, dest))

    # We assume that we're the only process on the machine touching the
    # repository paths that we were told to use. This means our recovery
@ -293,70 +361,75 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,
    destvfs = vfs.vfs(dest, audit=False, realpath=True)

    def deletesharedstore(path=None):
-        storepath = path or destvfs.read(b'.hg/sharedpath').strip()
-        if storepath.endswith(b'.hg'):
+        storepath = path or destvfs.read(b".hg/sharedpath").strip()
+        if storepath.endswith(b".hg"):
            storepath = os.path.dirname(storepath)

        storevfs = vfs.vfs(storepath, audit=False)
        storevfs.rmtree(forcibly=True)

-    if destvfs.exists() and not destvfs.exists(b'.hg'):
-        raise error.Abort(b'destination exists but no .hg directory')
+    if destvfs.exists() and not destvfs.exists(b".hg"):
+        raise error.Abort(b"destination exists but no .hg directory")

    # Refuse to enable sparse checkouts on existing checkouts. The reasoning
    # here is that another consumer of this repo may not be sparse aware. If we
    # enabled sparse, we would lock them out.
-    if destvfs.exists() and sparse_profile and not destvfs.exists(b'.hg/sparse'):
-        raise error.Abort(b'cannot enable sparse profile on existing '
-                          b'non-sparse checkout',
-                          hint=b'use a separate working directory to use sparse')
+    if destvfs.exists() and sparse_profile and not destvfs.exists(b".hg/sparse"):
+        raise error.Abort(
+            b"cannot enable sparse profile on existing " b"non-sparse checkout",
+            hint=b"use a separate working directory to use sparse",
+        )

    # And the other direction for symmetry.
-    if not sparse_profile and destvfs.exists(b'.hg/sparse'):
-        raise error.Abort(b'cannot use non-sparse checkout on existing sparse '
-                          b'checkout',
-                          hint=b'use a separate working directory to use sparse')
+    if not sparse_profile and destvfs.exists(b".hg/sparse"):
+        raise error.Abort(
+            b"cannot use non-sparse checkout on existing sparse " b"checkout",
+            hint=b"use a separate working directory to use sparse",
+        )

    # Require checkouts to be tied to shared storage because efficiency.
-    if destvfs.exists(b'.hg') and not destvfs.exists(b'.hg/sharedpath'):
-        ui.warn(b'(destination is not shared; deleting)\n')
-        with timeit('remove_unshared_dest', 'remove-wdir'):
+    if destvfs.exists(b".hg") and not destvfs.exists(b".hg/sharedpath"):
+        ui.warn(b"(destination is not shared; deleting)\n")
+        with timeit("remove_unshared_dest", "remove-wdir"):
            destvfs.rmtree(forcibly=True)

    # Verify the shared path exists and is using modern pooled storage.
-    if destvfs.exists(b'.hg/sharedpath'):
-        storepath = destvfs.read(b'.hg/sharedpath').strip()
+    if destvfs.exists(b".hg/sharedpath"):
+        storepath = destvfs.read(b".hg/sharedpath").strip()

-        ui.write(b'(existing repository shared store: %s)\n' % storepath)
+        ui.write(b"(existing repository shared store: %s)\n" % storepath)

        if not os.path.exists(storepath):
-            ui.warn(b'(shared store does not exist; deleting destination)\n')
-            with timeit('removed_missing_shared_store', 'remove-wdir'):
+            ui.warn(b"(shared store does not exist; deleting destination)\n")
+            with timeit("removed_missing_shared_store", "remove-wdir"):
                destvfs.rmtree(forcibly=True)
-        elif not re.search(br'[a-f0-9]{40}/\.hg$', storepath.replace(b'\\', b'/')):
-            ui.warn(b'(shared store does not belong to pooled storage; '
-                    b'deleting destination to improve efficiency)\n')
-            with timeit('remove_unpooled_store', 'remove-wdir'):
+        elif not re.search(b"[a-f0-9]{40}/\.hg$", storepath.replace(b"\\", b"/")):
+            ui.warn(
+                b"(shared store does not belong to pooled storage; "
+                b"deleting destination to improve efficiency)\n"
+            )
+            with timeit("remove_unpooled_store", "remove-wdir"):
                destvfs.rmtree(forcibly=True)

-    if destvfs.isfileorlink(b'.hg/wlock'):
-        ui.warn(b'(dest has an active working directory lock; assuming it is '
-                b'left over from a previous process and that the destination '
-                b'is corrupt; deleting it just to be sure)\n')
-        with timeit('remove_locked_wdir', 'remove-wdir'):
+    if destvfs.isfileorlink(b".hg/wlock"):
+        ui.warn(
+            b"(dest has an active working directory lock; assuming it is "
+            b"left over from a previous process and that the destination "
+            b"is corrupt; deleting it just to be sure)\n"
+        )
+        with timeit("remove_locked_wdir", "remove-wdir"):
            destvfs.rmtree(forcibly=True)

    def handlerepoerror(e):
-        if pycompat.bytestr(e) == _(b'abandoned transaction found'):
-            ui.warn(b'(abandoned transaction found; trying to recover)\n')
+        if pycompat.bytestr(e) == _(b"abandoned transaction found"):
+            ui.warn(b"(abandoned transaction found; trying to recover)\n")
            repo = hg.repository(ui, dest)
            if not repo.recover():
-                ui.warn(b'(could not recover repo state; '
-                        b'deleting shared store)\n')
-                with timeit('remove_unrecovered_shared_store', 'remove-store'):
+                ui.warn(b"(could not recover repo state; " b"deleting shared store)\n")
+                with timeit("remove_unrecovered_shared_store", "remove-store"):
                    deletesharedstore()

-            ui.warn(b'(attempting checkout from beginning)\n')
+            ui.warn(b"(attempting checkout from beginning)\n")
            return callself()

        raise
@ -366,11 +439,14 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,

    def handlenetworkfailure():
        if networkattempts[0] >= networkattemptlimit:
-            raise error.Abort(b'reached maximum number of network attempts; '
-                              b'giving up\n')
+            raise error.Abort(
+                b"reached maximum number of network attempts; " b"giving up\n"
+            )

-        ui.warn(b'(retrying after network failure on attempt %d of %d)\n' %
-                (networkattempts[0], networkattemptlimit))
+        ui.warn(
+            b"(retrying after network failure on attempt %d of %d)\n"
+            % (networkattempts[0], networkattemptlimit)
+        )

        # Do a backoff on retries to mitigate the thundering herd
        # problem. This is an exponential backoff with a multipler
@ -380,10 +456,10 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,
        # 2) 5.5 - 9.5
        # 3) 11.5 - 15.5
        backoff = (2 ** networkattempts[0] - 1) * 1.5
-        jittermin = ui.configint(b'robustcheckout', b'retryjittermin', 1000)
-        jittermax = ui.configint(b'robustcheckout', b'retryjittermax', 5000)
+        jittermin = ui.configint(b"robustcheckout", b"retryjittermin", 1000)
+        jittermax = ui.configint(b"robustcheckout", b"retryjittermax", 5000)
        backoff += float(random.randint(jittermin, jittermax)) / 1000.0
-        ui.warn(b'(waiting %.2fs before retry)\n' % backoff)
+        ui.warn(b"(waiting %.2fs before retry)\n" % backoff)
        time.sleep(backoff)

        networkattempts[0] += 1
@ -394,19 +470,19 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,
        Returns True if caller should call ``callself()`` to retry.
        """
        if isinstance(e, error.Abort):
-            if e.args[0] == _(b'repository is unrelated'):
-                ui.warn(b'(repository is unrelated; deleting)\n')
+            if e.args[0] == _(b"repository is unrelated"):
+                ui.warn(b"(repository is unrelated; deleting)\n")
                destvfs.rmtree(forcibly=True)
                return True
-            elif e.args[0].startswith(_(b'stream ended unexpectedly')):
-                ui.warn(b'%s\n' % e.args[0])
+            elif e.args[0].startswith(_(b"stream ended unexpectedly")):
+                ui.warn(b"%s\n" % e.args[0])
                # Will raise if failure limit reached.
                handlenetworkfailure()
                return True
        # TODO test this branch
        elif isinstance(e, error.ResponseError):
-            if e.args[0].startswith(_(b'unexpected response from remote server:')):
-                ui.warn(b'(unexpected response from remote server; retrying)\n')
+            if e.args[0].startswith(_(b"unexpected response from remote server:")):
+                ui.warn(b"(unexpected response from remote server; retrying)\n")
                destvfs.rmtree(forcibly=True)
                # Will raise if failure limit reached.
                handlenetworkfailure()
@ -415,20 +491,28 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,
            # Assume all SSL errors are due to the network, as Mercurial
            # should convert non-transport errors like cert validation failures
            # to error.Abort.
-            ui.warn(b'ssl error: %s\n' % e)
+            ui.warn(b"ssl error: %s\n" % pycompat.bytestr(str(e)))
            handlenetworkfailure()
            return True
        elif isinstance(e, urllibcompat.urlerr.urlerror):
            if isinstance(e.reason, socket.error):
-                ui.warn(b'socket error: %s\n' % pycompat.bytestr(e.reason))
+                ui.warn(b"socket error: %s\n" % pycompat.bytestr(str(e.reason)))
                handlenetworkfailure()
                return True
            else:
-                ui.warn(b'unhandled URLError; reason type: %s; value: %s\n' % (
-                    e.reason.__class__.__name__, e.reason))
+                ui.warn(
+                    b"unhandled URLError; reason type: %s; value: %s\n"
+                    % (
+                        pycompat.bytestr(e.reason.__class__.__name__),
+                        pycompat.bytestr(str(e.reason)),
+                    )
+                )
        else:
-            ui.warn(b'unhandled exception during network operation; type: %s; '
-                    b'value: %s\n' % (e.__class__.__name__, e))
+            ui.warn(
+                b"unhandled exception during network operation; type: %s; "
+                b"value: %s\n"
+                % (pycompat.bytestr(e.__class__.__name__), pycompat.bytestr(str(e)))
+            )

        return False

@ -440,59 +524,69 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,

    try:
        clonepeer = hg.peer(ui, {}, cloneurl)
-        rootnode = peerlookup(clonepeer, b'0')
+        rootnode = peerlookup(clonepeer, b"0")
    except error.RepoLookupError:
-        raise error.Abort(b'unable to resolve root revision from clone '
-                          b'source')
+        raise error.Abort(b"unable to resolve root revision from clone " b"source")
    except (error.Abort, ssl.SSLError, urllibcompat.urlerr.urlerror) as e:
        if handlepullerror(e):
            return callself()
        raise

    if rootnode == nullid:
-        raise error.Abort(b'source repo appears to be empty')
+        raise error.Abort(b"source repo appears to be empty")

    storepath = os.path.join(sharebase, hex(rootnode))
    storevfs = vfs.vfs(storepath, audit=False)

-    if storevfs.isfileorlink(b'.hg/store/lock'):
-        ui.warn(b'(shared store has an active lock; assuming it is left '
-                b'over from a previous process and that the store is '
-                b'corrupt; deleting store and destination just to be '
-                b'sure)\n')
+    if storevfs.isfileorlink(b".hg/store/lock"):
+        ui.warn(
+            b"(shared store has an active lock; assuming it is left "
+            b"over from a previous process and that the store is "
+            b"corrupt; deleting store and destination just to be "
+            b"sure)\n"
+        )
        if destvfs.exists():
-            with timeit('remove_dest_active_lock', 'remove-wdir'):
+            with timeit("remove_dest_active_lock", "remove-wdir"):
                destvfs.rmtree(forcibly=True)

-        with timeit('remove_shared_store_active_lock', 'remove-store'):
+        with timeit("remove_shared_store_active_lock", "remove-store"):
            storevfs.rmtree(forcibly=True)

-    if storevfs.exists() and not storevfs.exists(b'.hg/requires'):
-        ui.warn(b'(shared store missing requires file; this is a really '
-                b'odd failure; deleting store and destination)\n')
+    if storevfs.exists() and not storevfs.exists(b".hg/requires"):
+        ui.warn(
+            b"(shared store missing requires file; this is a really "
+            b"odd failure; deleting store and destination)\n"
+        )
        if destvfs.exists():
-            with timeit('remove_dest_no_requires', 'remove-wdir'):
+            with timeit("remove_dest_no_requires", "remove-wdir"):
                destvfs.rmtree(forcibly=True)

-        with timeit('remove_shared_store_no_requires', 'remove-store'):
+        with timeit("remove_shared_store_no_requires", "remove-store"):
            storevfs.rmtree(forcibly=True)

-    if storevfs.exists(b'.hg/requires'):
-        requires = set(storevfs.read(b'.hg/requires').splitlines())
+    if storevfs.exists(b".hg/requires"):
+        requires = set(storevfs.read(b".hg/requires").splitlines())
+        # "share-safe" (enabled by default as of hg 6.1) moved most
+        # requirements to a new file, so we need to look there as well to avoid
+        # deleting and re-cloning each time
+        if b"share-safe" in requires:
+            requires |= set(storevfs.read(b".hg/store/requires").splitlines())
        # FUTURE when we require generaldelta, this is where we can check
        # for that.
-        required = {b'dotencode', b'fncache'}
+        required = {b"dotencode", b"fncache"}

        missing = required - requires
        if missing:
-            ui.warn(b'(shared store missing requirements: %s; deleting '
-                    b'store and destination to ensure optimal behavior)\n' %
-                    b', '.join(sorted(missing)))
+            ui.warn(
+                b"(shared store missing requirements: %s; deleting "
+                b"store and destination to ensure optimal behavior)\n"
+                % b", ".join(sorted(missing))
+            )
            if destvfs.exists():
-                with timeit('remove_dest_missing_requires', 'remove-wdir'):
+                with timeit("remove_dest_missing_requires", "remove-wdir"):
                    destvfs.rmtree(forcibly=True)

-            with timeit('remove_shared_store_missing_requires', 'remove-store'):
+            with timeit("remove_shared_store_missing_requires", "remove-store"):
                storevfs.rmtree(forcibly=True)

    created = False
@ -500,7 +594,7 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,
    if not destvfs.exists():
        # Ensure parent directories of destination exist.
        # Mercurial 3.8 removed ensuredirs and made makedirs race safe.
-        if util.safehasattr(util, 'ensuredirs'):
+        if util.safehasattr(util, "ensuredirs"):
            makedirs = util.ensuredirs
        else:
            makedirs = util.makedirs
@ -509,17 +603,23 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,
        makedirs(sharebase, notindexed=True)

        if upstream:
-            ui.write(b'(cloning from upstream repo %s)\n' % upstream)
+            ui.write(b"(cloning from upstream repo %s)\n" % upstream)

        if not storevfs.exists():
-            behaviors.add(b'create-store')
+            behaviors.add(b"create-store")

        try:
-            with timeit('clone', 'clone'):
-                shareopts = {b'pool': sharebase, b'mode': b'identity'}
-                res = hg.clone(ui, {}, clonepeer, dest=dest, update=False,
-                               shareopts=shareopts,
-                               stream=True)
+            with timeit("clone", "clone"):
+                shareopts = {b"pool": sharebase, b"mode": b"identity"}
+                res = hg.clone(
+                    ui,
+                    {},
+                    clonepeer,
+                    dest=dest,
+                    update=False,
+                    shareopts=shareopts,
+                    stream=True,
+                )
        except (error.Abort, ssl.SSLError, urllibcompat.urlerr.urlerror) as e:
            if handlepullerror(e):
                return callself()
@ -527,18 +627,18 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,
        except error.RepoError as e:
            return handlerepoerror(e)
        except error.RevlogError as e:
-            ui.warn(b'(repo corruption: %s; deleting shared store)\n' % e)
-            with timeit('remove_shared_store_revlogerror', 'remote-store'):
+            ui.warn(b"(repo corruption: %s; deleting shared store)\n" % e)
+            with timeit("remove_shared_store_revlogerror", "remote-store"):
                deletesharedstore()
            return callself()

        # TODO retry here.
        if res is None:
-            raise error.Abort(b'clone failed')
+            raise error.Abort(b"clone failed")

        # Verify it is using shared pool storage.
-        if not destvfs.exists(b'.hg/sharedpath'):
-            raise error.Abort(b'clone did not create a shared repo')
+        if not destvfs.exists(b".hg/sharedpath"):
+            raise error.Abort(b"clone did not create a shared repo")

        created = True

@ -559,15 +659,16 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,

        if ctx:
            if not ctx.hex().startswith(revision):
-                raise error.Abort(b'--revision argument is ambiguous',
-                                  hint=b'must be the first 12+ characters of a '
-                                       b'SHA-1 fragment')
+                raise error.Abort(
+                    b"--revision argument is ambiguous",
+                    hint=b"must be the first 12+ characters of a " b"SHA-1 fragment",
+                )

            checkoutrevision = ctx.hex()
            havewantedrev = True

    if not havewantedrev:
-        ui.write(b'(pulling to obtain %s)\n' % (revision or branch,))
+        ui.write(b"(pulling to obtain %s)\n" % (revision or branch,))

        remote = None
        try:
@ -575,17 +676,18 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,
            pullrevs = [peerlookup(remote, revision or branch)]
            checkoutrevision = hex(pullrevs[0])
            if branch:
-                ui.warn(b'(remote resolved %s to %s; '
-                        b'result is not deterministic)\n' %
-                        (branch, checkoutrevision))
+                ui.warn(
+                    b"(remote resolved %s to %s; "
+                    b"result is not deterministic)\n" % (branch, checkoutrevision)
+                )

            if checkoutrevision in repo:
-                ui.warn(b'(revision already present locally; not pulling)\n')
+                ui.warn(b"(revision already present locally; not pulling)\n")
            else:
-                with timeit('pull', 'pull'):
+                with timeit("pull", "pull"):
                    pullop = exchange.pull(repo, remote, heads=pullrevs)
                    if not pullop.rheads:
-                        raise error.Abort(b'unable to pull requested revision')
+                        raise error.Abort(b"unable to pull requested revision")
        except (error.Abort, ssl.SSLError, urllibcompat.urlerr.urlerror) as e:
            if handlepullerror(e):
                return callself()
@ -593,7 +695,7 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,
        except error.RepoError as e:
            return handlerepoerror(e)
        except error.RevlogError as e:
-            ui.warn(b'(repo corruption: %s; deleting shared store)\n' % e)
+            ui.warn(b"(repo corruption: %s; deleting shared store)\n" % e)
            deletesharedstore()
            return callself()
        finally:
@ -605,47 +707,46 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,

    # Avoid any working directory manipulations if `-U`/`--noupdate` was passed
    if noupdate:
-        ui.write(b'(skipping update since `-U` was passed)\n')
+        ui.write(b"(skipping update since `-U` was passed)\n")
        return None

    # Purge if requested. We purge before update because this way we're
    # guaranteed to not have conflicts on `hg update`.
    if purge and not created:
-        ui.write(b'(purging working directory)\n')
-        purgeext = extensions.find(b'purge')
+        ui.write(b"(purging working directory)\n")
+        purge = getattr(commands, "purge", None)
+        if not purge:
+            purge = extensions.find(b"purge").purge

        # Mercurial 4.3 doesn't purge files outside the sparse checkout.
        # See https://bz.mercurial-scm.org/show_bug.cgi?id=5626. Force
        # purging by monkeypatching the sparse matcher.
        try:
-            old_sparse_fn = getattr(repo.dirstate, '_sparsematchfn', None)
+            old_sparse_fn = getattr(repo.dirstate, "_sparsematchfn", None)
            if old_sparse_fn is not None:
-                # TRACKING hg50
-                # Arguments passed to `matchmod.always` were unused and have been removed
-                if util.versiontuple(n=2) >= (5, 0):
-                    repo.dirstate._sparsematchfn = lambda: matchmod.always()
-                else:
-                    repo.dirstate._sparsematchfn = lambda: matchmod.always(repo.root, '')
+                repo.dirstate._sparsematchfn = lambda: matchmod.always()

-            with timeit('purge', 'purge'):
-                if purgeext.purge(ui, repo, all=True, abort_on_err=True,
-                                  # The function expects all arguments to be
-                                  # defined.
-                                  **{'print': None,
-                                     'print0': None,
-                                     'dirs': None,
-                                     'files': None}):
-                    raise error.Abort(b'error purging')
+            with timeit("purge", "purge"):
+                if purge(
+                    ui,
+                    repo,
+                    all=True,
+                    abort_on_err=True,
+                    # The function expects all arguments to be
+                    # defined.
+                    **{"print": None, "print0": None, "dirs": None, "files": None}
+                ):
+                    raise error.Abort(b"error purging")
        finally:
            if old_sparse_fn is not None:
                repo.dirstate._sparsematchfn = old_sparse_fn

    # Update the working directory.

-    if repo[b'.'].node() == nullid:
-        behaviors.add('empty-wdir')
+    if repo[b"."].node() == nullid:
+        behaviors.add("empty-wdir")
    else:
-        behaviors.add('populated-wdir')
+        behaviors.add("populated-wdir")

    if sparse_profile:
        sparsemod = getsparse()
@ -655,58 +756,70 @@ def _docheckout(ui, url, dest, upstream, revision, branch, purge, sharebase,
        try:
            repo.filectx(sparse_profile, changeid=checkoutrevision).data()
        except error.ManifestLookupError:
-            raise error.Abort(b'sparse profile %s does not exist at revision '
-                              b'%s' % (sparse_profile, checkoutrevision))
+            raise error.Abort(
+                b"sparse profile %s does not exist at revision "
+                b"%s" % (sparse_profile, checkoutrevision)
+            )

-        # TRACKING hg48 - parseconfig takes `action` param
-        if util.versiontuple(n=2) >= (4, 8):
-            old_config = sparsemod.parseconfig(repo.ui, repo.vfs.tryread(b'sparse'), b'sparse')
-        else:
-            old_config = sparsemod.parseconfig(repo.ui, repo.vfs.tryread(b'sparse'))
+        old_config = sparsemod.parseconfig(
+            repo.ui, repo.vfs.tryread(b"sparse"), b"sparse"
+        )

        old_includes, old_excludes, old_profiles = old_config

-        if old_profiles == {sparse_profile} and not old_includes and not \
-                old_excludes:
-            ui.write(b'(sparse profile %s already set; no need to update '
-                     b'sparse config)\n' % sparse_profile)
+        if old_profiles == {sparse_profile} and not old_includes and not old_excludes:
+            ui.write(
+                b"(sparse profile %s already set; no need to update "
+                b"sparse config)\n" % sparse_profile
+            )
        else:
            if old_includes or old_excludes or old_profiles:
-                ui.write(b'(replacing existing sparse config with profile '
-                         b'%s)\n' % sparse_profile)
+                ui.write(
+                    b"(replacing existing sparse config with profile "
+                    b"%s)\n" % sparse_profile
+                )
            else:
-                ui.write(b'(setting sparse config to profile %s)\n' %
-                         sparse_profile)
+                ui.write(b"(setting sparse config to profile %s)\n" % sparse_profile)

            # If doing an incremental update, this will perform two updates:
            # one to change the sparse profile and another to update to the new
            # revision. This is not desired. But there's not a good API in
            # Mercurial to do this as one operation.
-            with repo.wlock(), timeit('sparse_update_config',
-                                      'sparse-update-config'):
-                fcounts = map(len, sparsemod._updateconfigandrefreshwdir(
-                    repo, [], [], [sparse_profile], force=True))
+            with repo.wlock(), repo.dirstate.parentchange(), timeit(
+                "sparse_update_config", "sparse-update-config"
+            ):
+                # pylint --py3k: W1636
+                fcounts = list(
+                    map(
+                        len,
+                        sparsemod._updateconfigandrefreshwdir(
+                            repo, [], [], [sparse_profile], force=True
+                        ),
+                    )
+                )

-                repo.ui.status(b'%d files added, %d files dropped, '
-                               b'%d files conflicting\n' % tuple(fcounts))
+                repo.ui.status(
+                    b"%d files added, %d files dropped, "
+                    b"%d files conflicting\n" % tuple(fcounts)
+                )

-            ui.write(b'(sparse refresh complete)\n')
+            ui.write(b"(sparse refresh complete)\n")

-    op = 'update_sparse' if sparse_profile else 'update'
-    behavior = 'update-sparse' if sparse_profile else 'update'
+    op = "update_sparse" if sparse_profile else "update"
+    behavior = "update-sparse" if sparse_profile else "update"

    with timeit(op, behavior):
        if commands.update(ui, repo, rev=checkoutrevision, clean=True):
-            raise error.Abort(b'error updating')
+            raise error.Abort(b"error updating")

-    ui.write(b'updated to %s\n' % checkoutrevision)
+    ui.write(b"updated to %s\n" % checkoutrevision)

    return None


 def extsetup(ui):
    # Ensure required extensions are loaded.
-    for ext in (b'purge', b'share'):
+    for ext in (b"purge", b"share"):
        try:
            extensions.find(ext)
        except KeyError:
--- a/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/run-task/run-task
--- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/fetch.py
@ -157,6 +157,10 @@ def make_task(config, jobs):
                "tier": 1,
            }

+        if job.get("secret", None):
+            task["scopes"] = ["secrets:get:" + job.get("secret")]
+            task["worker"]["taskcluster-proxy"] = True
+
        if not taskgraph.fast:
            cache_name = task["label"].replace(f"{config.kind}-", "", 1)

@ -282,8 +286,14 @@ def create_fetch_url_task(config, name, fetch):
    schema={
        Required("repo"): str,
        Required("revision"): str,
+        Optional("include-dot-git"): bool,
        Optional("artifact-name"): str,
        Optional("path-prefix"): str,
+        # ssh-key is a taskcluster secret path (e.g. project/civet/github-deploy-key)
+        # In the secret dictionary, the key should be specified as
+        #  "ssh_privkey": "-----BEGIN OPENSSH PRIVATE KEY-----\nkfksnb3jc..."
+        # n.b. The OpenSSH private key file format requires a newline at the end of the file.
+        Optional("ssh-key"): str,
    },
 )
 def create_git_fetch_task(config, name, fetch):
@ -307,8 +317,19 @@ def create_git_fetch_task(config, name, fetch):
        "/builds/worker/artifacts/%s" % artifact_name,
    ]

+    ssh_key = fetch.get("ssh-key")
+    if ssh_key:
+        args.append("--ssh-key-secret")
+        args.append(ssh_key)
+
+    digest_data = [fetch["revision"], path_prefix, artifact_name]
+    if fetch.get("include-dot-git", False):
+        args.append("--include-dot-git")
+        digest_data.append(".git")
+
    return {
        "command": args,
        "artifact_name": artifact_name,
-        "digest_data": [fetch["revision"], path_prefix, artifact_name],
+        "digest_data": digest_data,
+        "secret": ssh_key,
    }
--- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/init.py
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/init.py
@ -79,6 +79,7 @@ job_description_schema = Schema(
                    Required("artifact"): str,
                    Optional("dest"): str,
                    Optional("extract"): bool,
+                    Optional("verify-hash"): bool,
                },
            ],
        },
@ -298,10 +299,12 @@ def use_fetches(config, jobs):
                        path = artifact
                        dest = None
                        extract = True
+                        verify_hash = False
                    else:
                        path = artifact["artifact"]
                        dest = artifact.get("dest")
                        extract = artifact.get("extract", True)
+                        verify_hash = artifact.get("verify-hash", False)

                    fetch = {
                        "artifact": f"{prefix}/{path}",
@ -310,6 +313,8 @@ def use_fetches(config, jobs):
                    }
                    if dest is not None:
                        fetch["dest"] = dest
+                    if verify_hash:
+                        fetch["verify-hash"] = verify_hash
                    job_fetches.append(fetch)

        job_artifact_prefixes = {
--- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/common.py
@ -191,11 +191,6 @@ def support_vcs_checkout(config, job, taskdesc, repo_configs, sparse=False):
        if repo_config.ssh_secret_name:
            taskdesc["scopes"].append(f"secrets:get:{repo_config.ssh_secret_name}")

-    if any(repo_config.type == "hg" for repo_config in repo_configs.values()):
-        # Give task access to hgfingerprint secret so it can pin the certificate
-        # for hg.mozilla.org.
-        taskdesc["scopes"].append("secrets:get:project/taskcluster/gecko/hgfingerprint")
-
    # only some worker platforms have taskcluster-proxy enabled
    if job["worker"]["implementation"] in ("docker-worker",):
        taskdesc["worker"]["taskcluster-proxy"] = True
--- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/job/run_task.py
@ -157,7 +157,6 @@ def docker_worker_run_task(config, job, taskdesc):
    if isinstance(run_command, str) or isinstance(run_command, dict):
        exec_cmd = EXEC_COMMANDS[run.pop("exec-with", "bash")]
        run_command = exec_cmd + [run_command]
-    command.append("--fetch-hgfingerprint")
    if run["run-as-root"]:
        command.extend(("--user", "root", "--group", "root"))
    command.append("--")
--- a/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py
+++ b/third_party/python/taskcluster_taskgraph/taskgraph/transforms/task.py
@ -268,6 +268,7 @@ def verify_index(config, index):
        Required("loopback-audio"): bool,
        Required("docker-in-docker"): bool,  # (aka 'dind')
        Required("privileged"): bool,
+        Required("disable-seccomp"): bool,
        # Paths to Docker volumes.
        #
        # For in-tree Docker images, volumes can be parsed from Dockerfile.
@ -406,6 +407,10 @@ def build_docker_worker_payload(config, task, task_def):
        capabilities["privileged"] = True
        task_def["scopes"].append("docker-worker:capability:privileged")

+    if worker.get("disable-seccomp"):
+        capabilities["disableSeccomp"] = True
+        task_def["scopes"].append("docker-worker:capability:disableSeccomp")
+
    task_def["payload"] = payload = {
        "image": image,
        "env": worker["env"],
@ -831,6 +836,7 @@ def set_defaults(config, tasks):
            worker.setdefault("loopback-audio", False)
            worker.setdefault("docker-in-docker", False)
            worker.setdefault("privileged", False)
+            worker.setdefault("disable-seccomp", False)
            worker.setdefault("volumes", [])
            worker.setdefault("env", {})
            if "caches" in worker:
@ -992,7 +998,7 @@ def build_task(config, tasks):

            branch_rev = get_branch_rev(config)

-            if config.params["tasks_for"] == "github-pull-request":
+            if config.params["tasks_for"].startswith("github-pull-request"):
                # In the past we used `project` for this, but that ends up being
                # set to the repository name of the _head_ repo, which is not correct
                # (and causes scope issues) if it doesn't match the name of the