gecko-dev/taskcluster/taskgraph/action.py

# -*- coding: utf-8 -*-

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from __future__ import absolute_import, print_function, unicode_literals

import json
import logging
import requests
import yaml

from .create import create_tasks
from .decision import write_artifact
from .optimize import optimize_task_graph
from .taskgraph import TaskGraph

logger = logging.getLogger(__name__)
TASKCLUSTER_QUEUE_URL = "https://queue.taskcluster.net/v1/task"
TREEHERDER_URL = "https://treeherder.mozilla.org/api"

# We set this to 5 for now because this is what SETA sets the
# count to for every repository/job. If this is ever changed,
# we'll need to have an API added to Treeherder to let us query
# how far back we should look.
MAX_BACKFILL_RESULTSETS = 5


def add_tasks(decision_task_id, task_labels, prefix=''):
    """
    Run the add-tasks task.  This function implements `mach taskgraph add-tasks`,
    and is responsible for

     * creating taskgraph of tasks asked for in parameters with respect to
     a given gecko decision task and schedule these jobs.
    """
    # read in the full graph for reference
    full_task_json = get_artifact(decision_task_id, "public/full-task-graph.json")
    decision_params = get_artifact(decision_task_id, "public/parameters.yml")
    all_tasks, full_task_graph = TaskGraph.from_json(full_task_json)

    target_tasks = set(task_labels)
    target_graph = full_task_graph.graph.transitive_closure(target_tasks)
    target_task_graph = TaskGraph(
        {l: all_tasks[l] for l in target_graph.nodes},
        target_graph)

    existing_tasks = get_artifact(decision_task_id, "public/label-to-taskid.json")

    # We don't want to optimize target tasks since they have been requested by user
    # Hence we put `target_tasks under` `do_not_optimize`
    optimized_graph, label_to_taskid = optimize_task_graph(target_task_graph=target_task_graph,
                                                           params=decision_params,
                                                           do_not_optimize=target_tasks,
                                                           existing_tasks=existing_tasks)

    # write out the optimized task graph to describe what will actually happen,
    # and the map of labels to taskids
    write_artifact('{}task-graph.json'.format(prefix), optimized_graph.to_json())
    write_artifact('{}label-to-taskid.json'.format(prefix), label_to_taskid)
    # actually create the graph
    create_tasks(optimized_graph, label_to_taskid, decision_params)


def get_artifact(task_id, path):
    resp = requests.get(url="{}/{}/artifacts/{}".format(TASKCLUSTER_QUEUE_URL, task_id, path))
    if path.endswith('.json'):
        artifact = json.loads(resp.text)
    elif path.endswith('.yml'):
        artifact = yaml.load(resp.text)
    return artifact


def backfill(project, job_id):
    """
    Run the backfill task.  This function implements `mach taskgraph backfill-task`,
    and is responsible for

     * Scheduling backfill jobs from a given treeherder resultset backwards until either
     a successful job is found or `N` jobs have been scheduled.
    """
    s = requests.Session()
    s.headers.update({"User-Agent": "gecko-intree-backfill-task"})

    job = s.get(url="{}/project/{}/jobs/{}/".format(TREEHERDER_URL, project, job_id)).json()

    if job["build_system_type"] != "taskcluster":
        logger.warning("Invalid build system type! Must be a Taskcluster job. Aborting.")
        return

    filters = dict((k, job[k]) for k in ("build_platform_id", "platform_option", "job_type_id"))

    resultset_url = "{}/project/{}/resultset/".format(TREEHERDER_URL, project)
    params = {"id__lt": job["result_set_id"], "count": MAX_BACKFILL_RESULTSETS}
    results = s.get(url=resultset_url, params=params).json()["results"]
    resultsets = [resultset["id"] for resultset in results]

    for decision in load_decisions(s, project, resultsets, filters):
        add_tasks(decision, [job["job_type_name"]], '{}-'.format(decision))


def load_decisions(s, project, resultsets, filters):
    """
    Given a project, a list of revisions, and a dict of filters, return
    a list of taskIds from decision tasks.
    """
    project_url = "{}/project/{}/jobs/".format(TREEHERDER_URL, project)
    decision_url = "{}/jobdetail/".format(TREEHERDER_URL)
    decisions = []
    decision_ids = []

    for resultset in resultsets:
        unfiltered = []
        offset = 0
        jobs_per_call = 250
        while True:
            params = {"push_id": resultset, "count": jobs_per_call, "offset": offset}
            results = s.get(url=project_url, params=params).json()["results"]
            unfiltered += results
            if (len(results) < jobs_per_call):
                break
            offset += jobs_per_call
        filtered = [j for j in unfiltered if all([j[k] == filters[k] for k in filters])]
        if len(filtered) > 1:
            raise Exception("Too many jobs matched. Aborting.")
        elif len(filtered) == 1:
            if filtered[0]["result"] == "success":
                break
        decisions += [t for t in unfiltered if t["job_type_name"] == "Gecko Decision Task"]

    for decision in decisions:
        params = {"job_guid": decision["job_guid"]}
        details = s.get(url=decision_url, params=params).json()["results"]
        inspect = [detail["url"] for detail in details if detail["value"] == "Inspect Task"][0]

        # Pull out the taskId from the URL e.g.
        # oN1NErz_Rf2DZJ1hi7YVfA from tools.taskcluster.net/task-inspector/#oN1NErz_Rf2DZJ1hi7YVfA/
        decision_ids.append(inspect.partition('#')[-1].rpartition('/')[0])
    return decision_ids
Bug 1281062 - Create Action Tasks to schedule new jobs. r=dustin MozReview-Commit-ID: 5MvqLfGrlLC --HG-- extra : rebase_source : dd954acce8ef9ed2f3b9aa7c5c2cbd916a82f1f1 2016-07-11 20:13:58 +03:00			`# -- coding: utf-8 --`

			`# This Source Code Form is subject to the terms of the Mozilla Public`
			`# License, v. 2.0. If a copy of the MPL was not distributed with this`
			`# file, You can obtain one at http://mozilla.org/MPL/2.0/.`

			`from __future__ import absolute_import, print_function, unicode_literals`

			`import json`
			`import logging`
			`import requests`
Bug 1302831 - Downloading parameters.yml in the action task. r=dustin MozReview-Commit-ID: IbAXfHBylAm --HG-- extra : transplant_source : %F9%60b%1E%3EAf%C4%C0v%FE%5Cu%14%9E%0A%E1%20%3B%D7 2016-09-14 23:39:06 +03:00			`import yaml`
Bug 1281062 - Create Action Tasks to schedule new jobs. r=dustin MozReview-Commit-ID: 5MvqLfGrlLC --HG-- extra : rebase_source : dd954acce8ef9ed2f3b9aa7c5c2cbd916a82f1f1 2016-07-11 20:13:58 +03:00
			`from .create import create_tasks`
			`from .decision import write_artifact`
			`from .optimize import optimize_task_graph`
			`from .taskgraph import TaskGraph`

			`logger = logging.getLogger(__name__)`
Bug 1289823 - Add backfilling as an action-task r=armenzg,dustin MozReview-Commit-ID: HALwE6Q0Lch --HG-- extra : rebase_source : de9329c46bb9d50e44d29181095577326e039b73 2016-12-08 02:33:20 +03:00			`TASKCLUSTER_QUEUE_URL = "https://queue.taskcluster.net/v1/task"`
			`TREEHERDER_URL = "https://treeherder.mozilla.org/api"`
Bug 1281062 - Create Action Tasks to schedule new jobs. r=dustin MozReview-Commit-ID: 5MvqLfGrlLC --HG-- extra : rebase_source : dd954acce8ef9ed2f3b9aa7c5c2cbd916a82f1f1 2016-07-11 20:13:58 +03:00
Bug 1289823 - Add backfilling as an action-task r=armenzg,dustin MozReview-Commit-ID: HALwE6Q0Lch --HG-- extra : rebase_source : de9329c46bb9d50e44d29181095577326e039b73 2016-12-08 02:33:20 +03:00			`# We set this to 5 for now because this is what SETA sets the`
			`# count to for every repository/job. If this is ever changed,`
			`# we'll need to have an API added to Treeherder to let us query`
			`# how far back we should look.`
			`MAX_BACKFILL_RESULTSETS = 5`
Bug 1281062 - Create Action Tasks to schedule new jobs. r=dustin MozReview-Commit-ID: 5MvqLfGrlLC --HG-- extra : rebase_source : dd954acce8ef9ed2f3b9aa7c5c2cbd916a82f1f1 2016-07-11 20:13:58 +03:00
Bug 1289823 - Add backfilling as an action-task r=armenzg,dustin MozReview-Commit-ID: HALwE6Q0Lch --HG-- extra : rebase_source : de9329c46bb9d50e44d29181095577326e039b73 2016-12-08 02:33:20 +03:00
			`def add_tasks(decision_task_id, task_labels, prefix=''):`
Bug 1281062 - Create Action Tasks to schedule new jobs. r=dustin MozReview-Commit-ID: 5MvqLfGrlLC --HG-- extra : rebase_source : dd954acce8ef9ed2f3b9aa7c5c2cbd916a82f1f1 2016-07-11 20:13:58 +03:00			`"""`
Bug 1289823 - Add backfilling as an action-task r=armenzg,dustin MozReview-Commit-ID: HALwE6Q0Lch --HG-- extra : rebase_source : de9329c46bb9d50e44d29181095577326e039b73 2016-12-08 02:33:20 +03:00			Run the add-tasks task. This function implements `mach taskgraph add-tasks`,
Bug 1281062 - Create Action Tasks to schedule new jobs. r=dustin MozReview-Commit-ID: 5MvqLfGrlLC --HG-- extra : rebase_source : dd954acce8ef9ed2f3b9aa7c5c2cbd916a82f1f1 2016-07-11 20:13:58 +03:00			`and is responsible for`

			`* creating taskgraph of tasks asked for in parameters with respect to`
			`a given gecko decision task and schedule these jobs.`
			`"""`
			`# read in the full graph for reference`
			`full_task_json = get_artifact(decision_task_id, "public/full-task-graph.json")`
Bug 1302831 - Downloading parameters.yml in the action task. r=dustin MozReview-Commit-ID: IbAXfHBylAm --HG-- extra : transplant_source : %F9%60b%1E%3EAf%C4%C0v%FE%5Cu%14%9E%0A%E1%20%3B%D7 2016-09-14 23:39:06 +03:00			`decision_params = get_artifact(decision_task_id, "public/parameters.yml")`
Bug 1304428 - Adding a from_json test in decision task. r=jlund MozReview-Commit-ID: 7QgfSutjoFZ --HG-- extra : rebase_source : 5a84e2bf54135e615ba2ed365abef3c60be99bae 2016-09-23 16:56:39 +03:00			`all_tasks, full_task_graph = TaskGraph.from_json(full_task_json)`
Bug 1281062 - Create Action Tasks to schedule new jobs. r=dustin MozReview-Commit-ID: 5MvqLfGrlLC --HG-- extra : rebase_source : dd954acce8ef9ed2f3b9aa7c5c2cbd916a82f1f1 2016-07-11 20:13:58 +03:00
Bug 1289823 - Add backfilling as an action-task r=armenzg,dustin MozReview-Commit-ID: HALwE6Q0Lch --HG-- extra : rebase_source : de9329c46bb9d50e44d29181095577326e039b73 2016-12-08 02:33:20 +03:00			`target_tasks = set(task_labels)`
Bug 1281062 - Create Action Tasks to schedule new jobs. r=dustin MozReview-Commit-ID: 5MvqLfGrlLC --HG-- extra : rebase_source : dd954acce8ef9ed2f3b9aa7c5c2cbd916a82f1f1 2016-07-11 20:13:58 +03:00			`target_graph = full_task_graph.graph.transitive_closure(target_tasks)`
			`target_task_graph = TaskGraph(`
			`{l: all_tasks[l] for l in target_graph.nodes},`
			`target_graph)`

			`existing_tasks = get_artifact(decision_task_id, "public/label-to-taskid.json")`

			`# We don't want to optimize target tasks since they have been requested by user`
			# Hence we put `target_tasks under` `do_not_optimize`
			`optimized_graph, label_to_taskid = optimize_task_graph(target_task_graph=target_task_graph,`
Bug 1302831 - Downloading parameters.yml in the action task. r=dustin MozReview-Commit-ID: IbAXfHBylAm --HG-- extra : transplant_source : %F9%60b%1E%3EAf%C4%C0v%FE%5Cu%14%9E%0A%E1%20%3B%D7 2016-09-14 23:39:06 +03:00			`params=decision_params,`
Bug 1281062 - Create Action Tasks to schedule new jobs. r=dustin MozReview-Commit-ID: 5MvqLfGrlLC --HG-- extra : rebase_source : dd954acce8ef9ed2f3b9aa7c5c2cbd916a82f1f1 2016-07-11 20:13:58 +03:00			`do_not_optimize=target_tasks,`
			`existing_tasks=existing_tasks)`

			`# write out the optimized task graph to describe what will actually happen,`
			`# and the map of labels to taskids`
Bug 1289823 - Add backfilling as an action-task r=armenzg,dustin MozReview-Commit-ID: HALwE6Q0Lch --HG-- extra : rebase_source : de9329c46bb9d50e44d29181095577326e039b73 2016-12-08 02:33:20 +03:00			`write_artifact('{}task-graph.json'.format(prefix), optimized_graph.to_json())`
			`write_artifact('{}label-to-taskid.json'.format(prefix), label_to_taskid)`
Bug 1281062 - Create Action Tasks to schedule new jobs. r=dustin MozReview-Commit-ID: 5MvqLfGrlLC --HG-- extra : rebase_source : dd954acce8ef9ed2f3b9aa7c5c2cbd916a82f1f1 2016-07-11 20:13:58 +03:00			`# actually create the graph`
Bug 1305989 - Update usage of create_tasks() in action tasks. r=dustin --HG-- extra : rebase_source : c8bdd1c4972ec2363f31d371ed9e1e9368d367f0 2016-09-28 16:43:00 +03:00			`create_tasks(optimized_graph, label_to_taskid, decision_params)`
Bug 1281062 - Create Action Tasks to schedule new jobs. r=dustin MozReview-Commit-ID: 5MvqLfGrlLC --HG-- extra : rebase_source : dd954acce8ef9ed2f3b9aa7c5c2cbd916a82f1f1 2016-07-11 20:13:58 +03:00

			`def get_artifact(task_id, path):`
Bug 1289823 - Add backfilling as an action-task r=armenzg,dustin MozReview-Commit-ID: HALwE6Q0Lch --HG-- extra : rebase_source : de9329c46bb9d50e44d29181095577326e039b73 2016-12-08 02:33:20 +03:00			`resp = requests.get(url="{}/{}/artifacts/{}".format(TASKCLUSTER_QUEUE_URL, task_id, path))`
Bug 1302831 - Downloading parameters.yml in the action task. r=dustin MozReview-Commit-ID: IbAXfHBylAm --HG-- extra : transplant_source : %F9%60b%1E%3EAf%C4%C0v%FE%5Cu%14%9E%0A%E1%20%3B%D7 2016-09-14 23:39:06 +03:00			`if path.endswith('.json'):`
			`artifact = json.loads(resp.text)`
			`elif path.endswith('.yml'):`
			`artifact = yaml.load(resp.text)`
Bug 1281062 - Create Action Tasks to schedule new jobs. r=dustin MozReview-Commit-ID: 5MvqLfGrlLC --HG-- extra : rebase_source : dd954acce8ef9ed2f3b9aa7c5c2cbd916a82f1f1 2016-07-11 20:13:58 +03:00			`return artifact`
Bug 1289823 - Add backfilling as an action-task r=armenzg,dustin MozReview-Commit-ID: HALwE6Q0Lch --HG-- extra : rebase_source : de9329c46bb9d50e44d29181095577326e039b73 2016-12-08 02:33:20 +03:00

			`def backfill(project, job_id):`
			`"""`
			Run the backfill task. This function implements `mach taskgraph backfill-task`,
			`and is responsible for`

			`* Scheduling backfill jobs from a given treeherder resultset backwards until either`
			a successful job is found or `N` jobs have been scheduled.
			`"""`
			`s = requests.Session()`
			`s.headers.update({"User-Agent": "gecko-intree-backfill-task"})`

			`job = s.get(url="{}/project/{}/jobs/{}/".format(TREEHERDER_URL, project, job_id)).json()`

			`if job["build_system_type"] != "taskcluster":`
			`logger.warning("Invalid build system type! Must be a Taskcluster job. Aborting.")`
			`return`

			`filters = dict((k, job[k]) for k in ("build_platform_id", "platform_option", "job_type_id"))`

			`resultset_url = "{}/project/{}/resultset/".format(TREEHERDER_URL, project)`
			`params = {"id__lt": job["result_set_id"], "count": MAX_BACKFILL_RESULTSETS}`
			`results = s.get(url=resultset_url, params=params).json()["results"]`
			`resultsets = [resultset["id"] for resultset in results]`

			`for decision in load_decisions(s, project, resultsets, filters):`
			`add_tasks(decision, [job["job_type_name"]], '{}-'.format(decision))`


			`def load_decisions(s, project, resultsets, filters):`
			`"""`
			`Given a project, a list of revisions, and a dict of filters, return`
			`a list of taskIds from decision tasks.`
			`"""`
			`project_url = "{}/project/{}/jobs/".format(TREEHERDER_URL, project)`
			`decision_url = "{}/jobdetail/".format(TREEHERDER_URL)`
			`decisions = []`
			`decision_ids = []`

			`for resultset in resultsets:`
			`unfiltered = []`
			`offset = 0`
			`jobs_per_call = 250`
			`while True:`
			`params = {"push_id": resultset, "count": jobs_per_call, "offset": offset}`
			`results = s.get(url=project_url, params=params).json()["results"]`
			`unfiltered += results`
			`if (len(results) < jobs_per_call):`
			`break`
			`offset += jobs_per_call`
			`filtered = [j for j in unfiltered if all([j[k] == filters[k] for k in filters])]`
			`if len(filtered) > 1:`
			`raise Exception("Too many jobs matched. Aborting.")`
			`elif len(filtered) == 1:`
			`if filtered[0]["result"] == "success":`
			`break`
			`decisions += [t for t in unfiltered if t["job_type_name"] == "Gecko Decision Task"]`

			`for decision in decisions:`
			`params = {"job_guid": decision["job_guid"]}`
			`details = s.get(url=decision_url, params=params).json()["results"]`
			`inspect = [detail["url"] for detail in details if detail["value"] == "Inspect Task"][0]`

			`# Pull out the taskId from the URL e.g.`
			`# oN1NErz_Rf2DZJ1hi7YVfA from tools.taskcluster.net/task-inspector/#oN1NErz_Rf2DZJ1hi7YVfA/`
			`decision_ids.append(inspect.partition('#')[-1].rpartition('/')[0])`
			`return decision_ids`