Bug 1415619: Use `gecko.caches` for indexing docker tasks. r=dustin

MozReview-Commit-ID: IN17XmVk7HJ --HG-- extra : rebase_source : 213d14d89b920db74887714b45df8c1d64a6dbb6
2017-11-09 17:08:41 -07:00 · 2017-11-09 17:08:41 -07:00 · e034757087
--- a/taskcluster/docs/docker-images.rst
+++ b/taskcluster/docs/docker-images.rst
@ -78,9 +78,9 @@ Task Image Index Namespace
 Images that are built on push and uploaded as an artifact of a task will be indexed under the
 following namespaces.

-* docker.images.v2.level-{level}.{image_name}.latest
-* docker.images.v2.level-{level}.{image_name}.pushdate.{year}.{month}-{day}-{pushtime}
-* docker.images.v2.level-{level}.{image_name}.hash.{context_hash}
+* gecko.cache.level-{level}.docker.v2.{name}.hash.{digest}
+* gecko.cache.level-{level}.docker.v2.{name}.latest
+* gecko.cache.level-{level}.docker.v2.{name}.pushdate.{year}.{month}-{day}-{pushtime}

 Not only can images be browsed by the pushdate and context hash, but the 'latest' namespace
 is meant to view the latest built image.  This functions similarly to the 'latest' tag
--- a/taskcluster/taskgraph/docker.py
+++ b/taskcluster/taskgraph/docker.py
@ -21,16 +21,16 @@ from taskgraph.util.taskcluster import (
    find_task_id,
    get_artifact_url,
 )
+from taskgraph.util.cached_tasks import cached_index_path
 from . import GECKO

-DOCKER_INDEX = docker.INDEX_PREFIX + '.{}.{}.hash.{}'
-

 def load_image_by_name(image_name, tag=None):
    context_path = os.path.join(GECKO, 'taskcluster', 'docker', image_name)
    context_hash = docker.generate_context_hash(GECKO, context_path, image_name)

-    index_path = DOCKER_INDEX.format('level-3', image_name, context_hash)
+    index_path = cached_index_path(
+        level=3, cache_type='docker-images.v1', cache_name=image_name, digest=context_hash)
    task_id = find_task_id(index_path)

    return load_image_by_task_id(task_id, tag)
--- a/taskcluster/taskgraph/transforms/docker_image.py
+++ b/taskcluster/taskgraph/transforms/docker_image.py
@ -16,17 +16,11 @@ from .. import GECKO
 from taskgraph.util.docker import (
    docker_image,
    generate_context_hash,
-    INDEX_PREFIX,
 )
+from taskgraph.util.cached_tasks import add_optimization

 transforms = TransformSequence()

-ROUTE_TEMPLATES = [
-    'index.{index_prefix}.level-{level}.{image_name}.latest',
-    'index.{index_prefix}.level-{level}.{image_name}.pushdate.{year}.{month}-{day}-{pushtime}',
-    'index.{index_prefix}.level-{level}.{image_name}.hash.{context_hash}',
-]
-

@transforms.add
 def fill_template(config, tasks):
@ -40,31 +34,6 @@ def fill_template(config, tasks):
        description = 'Build the docker image {} for use by dependent tasks'.format(
            image_name)

-        routes = []
-        for tpl in ROUTE_TEMPLATES:
-            routes.append(tpl.format(
-                index_prefix=INDEX_PREFIX,
-                level=config.params['level'],
-                image_name=image_name,
-                project=config.params['project'],
-                head_rev=config.params['head_rev'],
-                pushlog_id=config.params.get('pushlog_id', 0),
-                pushtime=config.params['moz_build_date'][8:],
-                year=config.params['moz_build_date'][0:4],
-                month=config.params['moz_build_date'][4:6],
-                day=config.params['moz_build_date'][6:8],
-                context_hash=context_hash,
-            ))
-
-        # As an optimization, if the context hash exists for a high level, that image
-        # task ID will be used.  The reasoning behind this is that eventually everything ends
-        # up on level 3 at some point if most tasks use this as a common image
-        # for a given context hash, a worker within Taskcluster does not need to contain
-        # the same image per branch.
-        optimization = {'index-search': ['{}.level-{}.{}.hash.{}'.format(
-            INDEX_PREFIX, level, image_name, context_hash)
-            for level in reversed(range(int(config.params['level']), 4))]}
-
        # Adjust the zstandard compression level based on the execution level.
        # We use faster compression for level 1 because we care more about
        # end-to-end times. We use slower/better compression for other levels
@ -79,8 +48,6 @@ def fill_template(config, tasks):
            'description': description,
            'attributes': {'image_name': image_name},
            'expires-after': '1 year',
-            'routes': routes,
-            'optimization': optimization,
            'scopes': ['secrets:get:project/taskcluster/gecko/hgfingerprint'],
            'treeherder': {
                'symbol': job_symbol,
@ -130,4 +97,11 @@ def fill_template(config, tasks):
            },
        }

+        add_optimization(
+            config, taskdesc,
+            cache_type="docker-images.v1",
+            cache_name=image_name,
+            digest=context_hash,
+        )
+
        yield taskdesc
--- a/taskcluster/taskgraph/util/cached_tasks.py
+++ b/taskcluster/taskgraph/util/cached_tasks.py
@ -66,3 +66,30 @@ def add_optimization(config, taskdesc, cache_type, cache_name, digest=None, dige
        'index.{}'.format(route.format(**subs))
        for route in EXTRA_CACHE_INDEXES
    ])
+
+
+def cached_index_path(level, cache_type, cache_name, digest=None, digest_data=None):
+    """
+    Get the index path needed to locate the task that would be created by
+    :func:`add_optimization`.
+
+    :param int level: The SCM level of the task to look for.
+    :param str cache_type: The type of task result being cached.
+    :param str cache_name: The name of the object being cached.
+    :param digest: A unique string indentifying this version of the artifacts
+        being generated. Typically this will be the hash of inputs to the task.
+    :type digest: bytes or None
+    :param digest_data: A list of bytes representing the inputs of this task.
+        They will be concatenated and hashed to create the digest for this
+        task.
+    :type digest_data: list of bytes or None
+
+    :return str: The index path.
+    """
+    if (digest is None) == (digest_data is None):
+        raise Exception("Must pass exactly one of `digest` and `digest_data`.")
+    if digest is None:
+        digest = hashlib.sha256('\n'.join(digest_data)).hexdigest()
+
+    return TARGET_CACHE_INDEX.format(
+        level=level, type=cache_type, name=cache_name, digest=digest)
--- a/taskcluster/taskgraph/util/docker.py
+++ b/taskcluster/taskgraph/util/docker.py
@ -19,7 +19,6 @@ from .. import GECKO


 IMAGE_DIR = os.path.join(GECKO, 'taskcluster', 'docker')
-INDEX_PREFIX = 'docker.images.v2'


 def docker_image(name, by_tag=False):