From 2e4e3a4dda0508fd036a26f57f6ed5c6a01d2cc0 Mon Sep 17 00:00:00 2001 From: Tarek Ziade Date: Fri, 13 Mar 2020 21:27:12 +0100 Subject: [PATCH] Bug 1616975 - Create a unified changelog (#5949) * Add management task for fetching github commits based on repository fixes and 2 tables for storing that data * Add changelog web api * Add tests * Refactor Github utilities into one file and move http utilities from common.py to new file --- newrelic.ini | 2 +- tests/changelog/__init__.py | 0 tests/changelog/test_collector.py | 81 +++++++++++++ tests/changelog/test_tasks.py | 19 +++ tests/test_setup.py | 2 +- tests/webapp/api/test_changelog_api.py | 34 ++++++ treeherder/changelog/__init__.py | 0 treeherder/changelog/changes.py | 12 ++ treeherder/changelog/collector.py | 81 +++++++++++++ treeherder/changelog/filters.py | 38 ++++++ treeherder/changelog/management/__init__.py | 0 .../changelog/management/commands/__init__.py | 0 .../management/commands/update_changelog.py | 19 +++ .../changelog/migrations/0001_initial.py | 46 +++++++ treeherder/changelog/migrations/__init__.py | 0 treeherder/changelog/models.py | 36 ++++++ treeherder/changelog/repositories.json | 113 ++++++++++++++++++ treeherder/changelog/tasks.py | 42 +++++++ treeherder/config/settings.py | 1 + treeherder/etl/bugzilla.py | 2 +- treeherder/etl/common.py | 41 ------- treeherder/etl/management/commands/ingest.py | 11 +- treeherder/etl/push_loader.py | 4 +- treeherder/etl/pushlog.py | 2 +- treeherder/etl/runnable_jobs.py | 2 +- .../log_parser/artifactbuildercollection.py | 2 +- treeherder/log_parser/failureline.py | 2 +- treeherder/push_health/compare.py | 2 +- treeherder/services/pulse/consumers.py | 2 +- treeherder/utils/github.py | 31 +++-- treeherder/utils/http.py | 40 +++++++ treeherder/webapp/api/bugzilla.py | 2 +- treeherder/webapp/api/changelog.py | 19 +++ treeherder/webapp/api/serializers.py | 12 ++ treeherder/webapp/api/urls.py | 3 + 35 files changed, 636 insertions(+), 67 deletions(-) create mode 100644 tests/changelog/__init__.py create mode 100644 tests/changelog/test_collector.py create mode 100644 tests/changelog/test_tasks.py create mode 100644 tests/webapp/api/test_changelog_api.py create mode 100644 treeherder/changelog/__init__.py create mode 100644 treeherder/changelog/changes.py create mode 100644 treeherder/changelog/collector.py create mode 100644 treeherder/changelog/filters.py create mode 100644 treeherder/changelog/management/__init__.py create mode 100644 treeherder/changelog/management/commands/__init__.py create mode 100644 treeherder/changelog/management/commands/update_changelog.py create mode 100644 treeherder/changelog/migrations/0001_initial.py create mode 100644 treeherder/changelog/migrations/__init__.py create mode 100644 treeherder/changelog/models.py create mode 100644 treeherder/changelog/repositories.json create mode 100644 treeherder/changelog/tasks.py create mode 100644 treeherder/utils/http.py create mode 100644 treeherder/webapp/api/changelog.py diff --git a/newrelic.ini b/newrelic.ini index aa4133349..56c34084d 100644 --- a/newrelic.ini +++ b/newrelic.ini @@ -30,4 +30,4 @@ shutdown_timeout = 15 # List finite-duration commands here to enable their annotation by the agent. # For infinite duration commands (such as `pulse_listener_*`) see: # https://docs.newrelic.com/docs/agents/python-agent/supported-features/python-background-tasks#wrapping -instrumentation.scripts.django_admin = check cycle_data load_initial_data migrate update_bugscache run_intermittents_commenter synthesize_backfill_report +instrumentation.scripts.django_admin = update_changelog check cycle_data load_initial_data migrate update_bugscache run_intermittents_commenter synthesize_backfill_report diff --git a/tests/changelog/__init__.py b/tests/changelog/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/changelog/test_collector.py b/tests/changelog/test_collector.py new file mode 100644 index 000000000..3c97e3764 --- /dev/null +++ b/tests/changelog/test_collector.py @@ -0,0 +1,81 @@ +import binascii +import json +import os +import re +from datetime import (datetime, + timedelta) + +import responses + +from treeherder.changelog.collector import collect # noqa isort:skip + + +def random_id(): + return binascii.hexlify(os.urandom(16)).decode("utf8") + + +RELEASES = re.compile(r"https://api.github.com/repos/.*/.*/releases.*") +COMMITS = re.compile(r"https://api.github.com/repos/.*/.*/commits\?.*") +COMMIT_INFO = re.compile(r"https://api.github.com/repos/.*/.*/commits/.*") + + +def prepare_responses(): + now = datetime.now().strftime("%Y-%m-%dT%H:%M:%S") + + def releases(request): + + data = [ + { + "name": "ok", + "published_at": now, + "id": random_id(), + "html_url": "url", + "tag_name": "some tag", + "author": {"login": "tarek"}, + } + ] + return 200, {}, json.dumps(data) + + responses.add_callback( + responses.GET, RELEASES, callback=releases, content_type="application/json" + ) + + def _commit(): + files = [{"filename": "file1"}, {"filename": "file2"}] + return { + "files": files, + "name": "ok", + "sha": random_id(), + "html_url": "url", + "tag_name": "some tag", + "commit": { + "message": "yeah", + "author": {"name": "tarek", "date": now}, + "files": files, + }, + } + + def commit(request): + return 200, {}, json.dumps(_commit()) + + def commits(request): + return 200, {}, json.dumps([_commit()]) + + responses.add_callback( + responses.GET, COMMITS, callback=commits, content_type="application/json" + ) + responses.add_callback( + responses.GET, COMMIT_INFO, callback=commit, content_type="application/json" + ) + + +@responses.activate +def test_collect(): + yesterday = datetime.now() - timedelta(days=1) + yesterday = yesterday.strftime("%Y-%m-%dT%H:%M:%S") + prepare_responses() + res = list(collect(yesterday)) + + # we're not looking into much details here, we can do this + # once we start to tweak the filters + assert len(res) > 0 diff --git a/tests/changelog/test_tasks.py b/tests/changelog/test_tasks.py new file mode 100644 index 000000000..75fc91cd1 --- /dev/null +++ b/tests/changelog/test_tasks.py @@ -0,0 +1,19 @@ +import pytest +import responses + +from tests.changelog.test_collector import prepare_responses +from treeherder.changelog.models import Changelog +from treeherder.changelog.tasks import update_changelog + + +@pytest.mark.django_db() +@responses.activate +def test_update_changelog(): + prepare_responses() + num_entries = Changelog.objects.count() + + update_changelog() + + # we're not looking into much details here, we can do this + # once we start to tweak the filters + assert Changelog.objects.count() > num_entries diff --git a/tests/test_setup.py b/tests/test_setup.py index 15c33f338..601edef3d 100644 --- a/tests/test_setup.py +++ b/tests/test_setup.py @@ -5,7 +5,7 @@ from django.core.cache import cache from django.core.management import call_command from treeherder.config.utils import get_tls_redis_url -from treeherder.etl.common import fetch_text +from treeherder.utils.http import fetch_text def test_block_unmocked_requests(): diff --git a/tests/webapp/api/test_changelog_api.py b/tests/webapp/api/test_changelog_api.py new file mode 100644 index 000000000..55b0ea93b --- /dev/null +++ b/tests/webapp/api/test_changelog_api.py @@ -0,0 +1,34 @@ +from datetime import datetime + +from django.db import transaction +from django.urls import reverse + +from treeherder.changelog.models import (Changelog, + ChangelogFile) + + +def test_changelog_list(client, test_job_with_notes): + """ + test retrieving a list of changes from the changelog endpoint + """ + # adding some data + entry = { + "date": datetime.now(), + "author": "tarek", + "message": "commit", + "remote_id": "2689367b205c16ce32ed4200942b8b8b1e262dfc70d9bc9fbc77c49699a4f1df", + "type": "commit", + "url": "http://example.com/some/url", + } + files = ["file1", "file2", "file3"] + with transaction.atomic(): + changelog = Changelog.objects.create(**entry) + [ChangelogFile.objects.create(name=name, changelog=changelog) for name in files] + + # now let's check that we get it from the API call + resp = client.get(reverse("changelog-list")) + + assert resp.status_code == 200 + result = resp.json() + assert result[0]["files"] == ["file1", "file2", "file3"] + assert result[0]["author"] == "tarek" diff --git a/treeherder/changelog/__init__.py b/treeherder/changelog/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/treeherder/changelog/changes.py b/treeherder/changelog/changes.py new file mode 100644 index 000000000..6d38fd707 --- /dev/null +++ b/treeherder/changelog/changes.py @@ -0,0 +1,12 @@ +from datetime import timedelta + +from django.utils import timezone + +from treeherder.changelog.models import Changelog + + +def get_changes(days=15): + """Grabbing the latest changes done in the past days. + """ + min_date = timezone.now() - timedelta(days=days) + return Changelog.objects.filter(date__gte=min_date).order_by("date") diff --git a/treeherder/changelog/collector.py b/treeherder/changelog/collector.py new file mode 100644 index 000000000..1e7491d53 --- /dev/null +++ b/treeherder/changelog/collector.py @@ -0,0 +1,81 @@ +""" Collector, grabs changes in various sources and put them in a DB. +""" +import json +import os + +from treeherder.changelog.filters import Filters +from treeherder.utils import github + +MAX_ITEMS = 100 +CFG = os.path.join(os.path.dirname(__file__), "repositories.json") + +with open(CFG) as f: + CFG = json.loads(f.read()) + + +class GitHub: + def __init__(self): + self.filters = Filters() + + def get_changes(self, **kw): + owner = kw["user"] + repository = kw["repository"] + filters = kw.get("filters") + gh_options = {"number": kw.get("number", MAX_ITEMS)} + + for release in github.get_releases(owner, repository, params=gh_options): + release["files"] = [] + # no "since" option for releases() we filter manually here + if "since" in kw and release["published_at"] <= kw["since"]: + continue + name = release["name"] or release["tag_name"] + yield { + "date": release["published_at"], + "author": release["author"]["login"], + "message": "Released " + name, + "remote_id": release["id"], + "type": "release", + "url": release["html_url"], + } + + if "since" in kw: + gh_options["since"] = kw["since"] + + for commit in github.commits_info(owner, repository, params=gh_options): + if filters: + for filter in filters: + if isinstance(filter, list) and filter[0] == "filter_by_path": + commit_info = github.commit_info( + owner, repository, commit["sha"] + ) + commit["files"] = commit_info["files"] + break + + message = commit["commit"]["message"] + message = message.split("\n")[0] + res = { + "date": commit["commit"]["author"]["date"], + "author": commit["commit"]["author"]["name"], + "message": message, + "remote_id": commit["sha"], + "type": "commit", + "url": commit["html_url"], + "files": [f["filename"] for f in commit.get("files", [])], + } + res = self.filters(res, filters) + if res: + yield res + + +def collect(since): + readers = {"github": GitHub()} + + for repo_info in CFG["repositories"]: + source = dict(repo_info["source"]) + reader = readers.get(source["type"]) + if not reader: + raise NotImplementedError(source["type"]) + source["since"] = since + for change in reader.get_changes(**source): + change.update(repo_info["metadata"]) # XXX duplicated for now + yield change diff --git a/treeherder/changelog/filters.py b/treeherder/changelog/filters.py new file mode 100644 index 000000000..222432f47 --- /dev/null +++ b/treeherder/changelog/filters.py @@ -0,0 +1,38 @@ +import fnmatch + + +class Filters: + def deployment(self, change, *options): + message = change["message"] + if "*PRODUCTION*" in message or "*STAGING*" in message: + change["tags"] = ["deployment"] + return change + + def only_releases(self, change, *options): + if change["type"] == "release": + return change + + def remove_auto_commits(self, change, *options): + message = change["message"] + start_text = ("Scheduled weekly dependency update", "Merge pull request") + if not message.startswith(start_text): + return change + + def filter_by_path(self, change, *options): + if "files" not in change: + return + for file in change["files"]: + for filter in options: + if fnmatch.fnmatch(file, filter): + return change + + def __call__(self, message, filters): + for filter in filters: + if isinstance(filter, list): + filter, options = filter[0], filter[1:] + else: + options = [] + message = getattr(self, filter)(message, *options) + if message is None: + return None + return message diff --git a/treeherder/changelog/management/__init__.py b/treeherder/changelog/management/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/treeherder/changelog/management/commands/__init__.py b/treeherder/changelog/management/commands/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/treeherder/changelog/management/commands/update_changelog.py b/treeherder/changelog/management/commands/update_changelog.py new file mode 100644 index 000000000..a9c82fd40 --- /dev/null +++ b/treeherder/changelog/management/commands/update_changelog.py @@ -0,0 +1,19 @@ +from django.core.management.base import BaseCommand + +from treeherder.changelog.tasks import update_changelog + + +class Command(BaseCommand): + help = """ + Update the changelog manually. + + This is mostly useful for testing + """ + + def add_arguments(self, parser): + parser.add_argument( + "--days", help="Number of days to look at", type=int, default=1 + ) + + def handle(self, *args, **options): + update_changelog(options["days"]) diff --git a/treeherder/changelog/migrations/0001_initial.py b/treeherder/changelog/migrations/0001_initial.py new file mode 100644 index 000000000..b281eac32 --- /dev/null +++ b/treeherder/changelog/migrations/0001_initial.py @@ -0,0 +1,46 @@ +# Generated by Django 3.0.3 on 2020-03-13 17:43 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='Changelog', + fields=[ + ('id', models.AutoField(primary_key=True, serialize=False)), + ('remote_id', models.CharField(max_length=255)), + ('date', models.DateTimeField(db_index=True)), + ('author', models.CharField(max_length=100)), + ('owner', models.CharField(max_length=100)), + ('project', models.CharField(max_length=100)), + ('project_url', models.CharField(max_length=360)), + ('message', models.CharField(max_length=360)), + ('description', models.CharField(max_length=360)), + ('type', models.CharField(max_length=100)), + ('url', models.CharField(max_length=360)), + ], + options={ + 'db_table': 'changelog_entry', + 'unique_together': {('id', 'remote_id', 'type')}, + }, + ), + migrations.CreateModel( + name='ChangelogFile', + fields=[ + ('id', models.AutoField(primary_key=True, serialize=False)), + ('name', models.SlugField(max_length=255)), + ('changelog', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='files', to='changelog.Changelog')), + ], + options={ + 'db_table': 'changelog_file', + }, + ), + ] diff --git a/treeherder/changelog/migrations/__init__.py b/treeherder/changelog/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/treeherder/changelog/models.py b/treeherder/changelog/models.py new file mode 100644 index 000000000..c4d9703df --- /dev/null +++ b/treeherder/changelog/models.py @@ -0,0 +1,36 @@ +from django.db import models + + +class Changelog(models.Model): + id = models.AutoField(primary_key=True) + remote_id = models.CharField(max_length=255) + type = models.CharField(max_length=100) + date = models.DateTimeField(db_index=True) + author = models.CharField(max_length=100) + owner = models.CharField(max_length=100) + project = models.CharField(max_length=100) + project_url = models.CharField(max_length=360) + message = models.CharField(max_length=360) + description = models.CharField(max_length=360) + url = models.CharField(max_length=360) + + class Meta: + db_table = "changelog_entry" + unique_together = ('id', 'remote_id', 'type') + + def __str__(self): + return "[%s] %s by %s" % (self.id, self.message, self.author) + + +class ChangelogFile(models.Model): + id = models.AutoField(primary_key=True) + changelog = models.ForeignKey( + Changelog, related_name="files", on_delete=models.CASCADE + ) + name = models.SlugField(max_length=255) + + class Meta: + db_table = "changelog_file" + + def __str__(self): + return self.name diff --git a/treeherder/changelog/repositories.json b/treeherder/changelog/repositories.json new file mode 100644 index 000000000..bc85e3784 --- /dev/null +++ b/treeherder/changelog/repositories.json @@ -0,0 +1,113 @@ +{ + "repositories": [ + { + "metadata": { + "description": "Tool for creating Windows cloud", + "owner": "Release Engineering", + "project": "OCC", + "project_url": "https://github.com/mozilla-releng/OpenCloudConfig" + }, + "source": { + "filters": ["deployment"], + "items": 10, + "repository": "OpenCloudConfig", + "type": "github", + "user": "mozilla-releng" + } + }, + { + "metadata": { + "description": "Tool for creating Windows cloud", + "owner": "Release Engineering", + "project": "Build Puppet", + "project_url": "https://github.com/mozilla-releng/build-puppet" + }, + "source": { + "filters": ["remove_auto_commits"], + "items": 10, + "repository": "build-puppet", + "type": "github", + "user": "mozilla-releng" + } + }, + { + "metadata": { + "description": "Framework that supports Mozilla's continuous integration and release processes.", + "owner": "Release Engineering", + "project": "TaskCluster", + "project_url": "https://github.com/taskcluster/taskcluster" + }, + "source": { + "filters": ["only_releases"], + "items": 10, + "repository": "taskcluster", + "type": "github", + "user": "taskcluster" + } + }, + + { + "metadata": { + "description": "Docker task host for linux", + "owner": "Release Engineering", + "project": "Docker Worker", + "project_url": "https://github.com/taskcluster/docker-worker" + }, + "source": { + "filters": ["only_releases"], + "items": 10, + "repository": "docker-worker", + "type": "github", + "user": "taskcluster" + } + }, + + { + "metadata": { + "description": "A generic worker for TaskCluster, written in go", + "owner": "Release Engineering", + "project": "Generic Worker", + "project_url": "https://github.com/taskcluster/generic-worker" + }, + "source": { + "filters": ["only_releases"], + "items": 10, + "repository": "generic-worker", + "type": "github", + "user": "taskcluster" + } + }, + + { + "metadata": { + "description": "Taskcluster authentication proxy", + "owner": "Release Engineering", + "project": "Taskcluster Proxy", + "project_url": "https://github.com/taskcluster/taskcluster-proxy" + }, + "source": { + "filters": ["only_releases"], + "items": 10, + "repository": "taskcluster-proxy", + "type": "github", + "user": "taskcluster" + } + }, + { + "metadata": { + "description": "Simple implementation of a test run manager for mozilla tests at bitbar.", + "owner": "Bob Clary", + "project": "Bitbar device pool", + "project_url": "https://github.com/bclary/mozilla-bitbar-devicepool/" + }, + "source": { + "filters": [], + "items": 10, + "repository": "mozilla-bitbar-devicepool", + "type": "github", + "user": "bclary", + "filters": [["filter_by_path", "config/config.yml"]] + } + } + ] +} diff --git a/treeherder/changelog/tasks.py b/treeherder/changelog/tasks.py new file mode 100644 index 000000000..daaf8b180 --- /dev/null +++ b/treeherder/changelog/tasks.py @@ -0,0 +1,42 @@ +import datetime +import logging + +from django.db import transaction + +from treeherder.changelog.collector import collect +from treeherder.changelog.models import (Changelog, + ChangelogFile) + +logger = logging.getLogger(__name__) + + +def update_changelog(days=1): + """ + Collect changes and update the DB. + """ + logger.info("Updating unified changelog (days=%d)" % days) + # collecting last day of changes across all sources + since = datetime.datetime.now() - datetime.timedelta(days=days) + since = since.strftime("%Y-%m-%dT%H:%M:%S") + + created = 0 + existed = 0 + + with transaction.atomic(): + for entry in collect(since): + files = entry.pop("files", []) + # lame hack to remove TZ awareness + if entry["date"].endswith("Z"): + entry["date"] = entry["date"][:-1] + changelog, line_created = Changelog.objects.update_or_create(**entry) + if not line_created: + existed += 1 + continue + created += 1 + [ + ChangelogFile.objects.create(name=name, changelog=changelog) + for name in files + ] + + logger.info("Found %d items, %d existed and %d where created." % ( + created + existed, existed, created)) diff --git a/treeherder/config/settings.py b/treeherder/config/settings.py index 698d70e46..e7ddc3b88 100644 --- a/treeherder/config/settings.py +++ b/treeherder/config/settings.py @@ -82,6 +82,7 @@ INSTALLED_APPS = [ 'treeherder.autoclassify', 'treeherder.seta', 'treeherder.intermittents_commenter', + 'treeherder.changelog', ] if DEBUG: INSTALLED_APPS.append('django_extensions') diff --git a/treeherder/etl/bugzilla.py b/treeherder/etl/bugzilla.py index 924ae770f..4301f944e 100644 --- a/treeherder/etl/bugzilla.py +++ b/treeherder/etl/bugzilla.py @@ -3,8 +3,8 @@ import logging import dateutil.parser from django.conf import settings -from treeherder.etl.common import fetch_json from treeherder.model.models import Bugscache +from treeherder.utils.github import fetch_json logger = logging.getLogger(__name__) diff --git a/treeherder/etl/common.py b/treeherder/etl/common.py index cc1951328..a58c31190 100644 --- a/treeherder/etl/common.py +++ b/treeherder/etl/common.py @@ -1,47 +1,6 @@ import calendar -import newrelic.agent -import requests from dateutil import parser -from django.conf import settings - -from treeherder.config.settings import GITHUB_TOKEN - - -def make_request(url, method='GET', headers=None, timeout=30, **kwargs): - """A wrapper around requests to set defaults & call raise_for_status().""" - headers = headers or {} - headers['User-Agent'] = 'treeherder/{}'.format(settings.SITE_HOSTNAME) - if url.find("api.github.com") > -1: - if GITHUB_TOKEN: - headers["Authorization"] = "token {}".format(GITHUB_TOKEN) - response = requests.request(method, - url, - headers=headers, - timeout=timeout, - **kwargs) - if response.history: - params = { - 'url': url, - 'redirects': len(response.history), - 'duration': sum(r.elapsed.total_seconds() for r in response.history) - } - newrelic.agent.record_custom_event('RedirectedRequest', params=params) - - response.raise_for_status() - return response - - -def fetch_json(url, params=None): - response = make_request(url, - params=params, - headers={'Accept': 'application/json'}) - return response.json() - - -def fetch_text(url): - response = make_request(url) - return response.text def get_guid_root(guid): diff --git a/treeherder/etl/management/commands/ingest.py b/treeherder/etl/management/commands/ingest.py index fb87cea08..1893232ca 100644 --- a/treeherder/etl/management/commands/ingest.py +++ b/treeherder/etl/management/commands/ingest.py @@ -14,7 +14,6 @@ from django.core.management.base import BaseCommand from treeherder.client.thclient import TreeherderClient from treeherder.config.settings import GITHUB_TOKEN -from treeherder.etl.common import fetch_json from treeherder.etl.db_semaphore import (acquire_connection, release_connection) from treeherder.etl.job_loader import JobLoader @@ -24,6 +23,7 @@ from treeherder.etl.taskcluster_pulse.handler import (EXCHANGE_EVENT_MAP, handleMessage) from treeherder.model.models import Repository from treeherder.utils import github +from treeherder.utils.github import fetch_json logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -182,7 +182,7 @@ def query_data(repo_meta, commit): event_base_sha = repo_meta["branch"] # First we try with `master` being the base sha # e.g. https://api.github.com/repos/servo/servo/compare/master...1418c0555ff77e5a3d6cf0c6020ba92ece36be2e - compareResponse = github.compare_shas(repo_meta, repo_meta["branch"], commit) + compareResponse = github.compare_shas(repo_meta["owner"], repo_meta["repo"], repo_meta["branch"], commit) merge_base_commit = compareResponse.get("merge_base_commit") if merge_base_commit: commiter_date = merge_base_commit["commit"]["committer"]["date"] @@ -211,7 +211,7 @@ def query_data(repo_meta, commit): assert event_base_sha != repo_meta["branch"] logger.info("We have a new base: %s", event_base_sha) # When using the correct event_base_sha the "commits" field will be correct - compareResponse = github.compare_shas(repo_meta, event_base_sha, commit) + compareResponse = github.compare_shas(repo_meta["owner"], repo_meta["repo"], event_base_sha, commit) commits = [] for _commit in compareResponse["commits"]: @@ -268,12 +268,13 @@ def ingest_git_pushes(project, dry_run=False): logger.info("--> Converting Github commits to pushes") _repo = repo_meta(project) - github_commits = github.commits_info(_repo) + owner, repo = _repo["owner"], _repo["repo"] + github_commits = github.commits_info(owner, repo) not_push_revision = [] push_revision = [] push_to_date = {} for _commit in github_commits: - info = github.commit_info(_repo, _commit["sha"]) + info = github.commit_info(owner, repo, _commit["sha"]) # Revisions that are marked as non-push should be ignored if _commit["sha"] in not_push_revision: logger.debug("Not a revision of a push: {}".format(_commit["sha"])) diff --git a/treeherder/etl/push_loader.py b/treeherder/etl/push_loader.py index 91fc7fb2c..002000915 100644 --- a/treeherder/etl/push_loader.py +++ b/treeherder/etl/push_loader.py @@ -4,10 +4,10 @@ import environ import newrelic.agent from django.core.exceptions import ObjectDoesNotExist -from treeherder.etl.common import (fetch_json, - to_timestamp) +from treeherder.etl.common import to_timestamp from treeherder.etl.push import store_push_data from treeherder.model.models import Repository +from treeherder.utils.github import fetch_json env = environ.Env() logger = logging.getLogger(__name__) diff --git a/treeherder/etl/pushlog.py b/treeherder/etl/pushlog.py index 7b34cf313..6beda2cbe 100644 --- a/treeherder/etl/pushlog.py +++ b/treeherder/etl/pushlog.py @@ -5,10 +5,10 @@ import newrelic.agent import requests from django.core.cache import cache -from treeherder.etl.common import fetch_json from treeherder.etl.exceptions import CollectionNotStoredException from treeherder.etl.push import store_push from treeherder.model.models import Repository +from treeherder.utils.github import fetch_json logger = logging.getLogger(__name__) ONE_WEEK_IN_SECONDS = 604800 diff --git a/treeherder/etl/runnable_jobs.py b/treeherder/etl/runnable_jobs.py index adf855dbc..228563139 100644 --- a/treeherder/etl/runnable_jobs.py +++ b/treeherder/etl/runnable_jobs.py @@ -4,7 +4,7 @@ import requests from django.core.exceptions import ValidationError from django.core.validators import URLValidator -from treeherder.etl.common import fetch_json +from treeherder.utils.github import fetch_json logger = logging.getLogger(__name__) diff --git a/treeherder/log_parser/artifactbuildercollection.py b/treeherder/log_parser/artifactbuildercollection.py index 57efdbdd6..8993c552f 100644 --- a/treeherder/log_parser/artifactbuildercollection.py +++ b/treeherder/log_parser/artifactbuildercollection.py @@ -2,7 +2,7 @@ import logging import newrelic.agent -from treeherder.etl.common import make_request +from treeherder.utils.http import make_request from .artifactbuilders import (BuildbotJobArtifactBuilder, BuildbotLogViewArtifactBuilder, diff --git a/treeherder/log_parser/failureline.py b/treeherder/log_parser/failureline.py index 6f8db3f8a..3ada5625a 100644 --- a/treeherder/log_parser/failureline.py +++ b/treeherder/log_parser/failureline.py @@ -9,11 +9,11 @@ from django.db.utils import (IntegrityError, OperationalError) from requests.exceptions import HTTPError -from treeherder.etl.common import fetch_text from treeherder.etl.text import astral_filter from treeherder.model.models import (FailureLine, Group, JobLog) +from treeherder.utils.http import fetch_text logger = logging.getLogger(__name__) diff --git a/treeherder/push_health/compare.py b/treeherder/push_health/compare.py index 133914ec2..89f0e2421 100644 --- a/treeherder/push_health/compare.py +++ b/treeherder/push_health/compare.py @@ -1,8 +1,8 @@ import logging -from treeherder.etl.common import fetch_json from treeherder.model.models import (Commit, Push) +from treeherder.utils.http import fetch_json from treeherder.webapp.api.serializers import RepositorySerializer logger = logging.getLogger(__name__) diff --git a/treeherder/services/pulse/consumers.py b/treeherder/services/pulse/consumers.py index 2096e182e..22123d6c2 100644 --- a/treeherder/services/pulse/consumers.py +++ b/treeherder/services/pulse/consumers.py @@ -8,9 +8,9 @@ from kombu import (Connection, Queue) from kombu.mixins import ConsumerMixin -from treeherder.etl.common import fetch_json from treeherder.etl.tasks.pulse_tasks import (store_pulse_pushes, store_pulse_tasks) +from treeherder.utils.http import fetch_json from .exchange import get_exchange diff --git a/treeherder/utils/github.py b/treeherder/utils/github.py index 0745a62e6..9dc5f59a4 100644 --- a/treeherder/utils/github.py +++ b/treeherder/utils/github.py @@ -1,17 +1,30 @@ -from treeherder.etl.common import fetch_json +from treeherder.config.settings import GITHUB_TOKEN +from treeherder.utils.http import fetch_json -def fetch_api(path): - return fetch_json("https://api.github.com/{}".format(path)) +def fetch_api(path, params=None): + if GITHUB_TOKEN: + headers = {"Authorization": "token {}".format(GITHUB_TOKEN)} + else: + headers = {} + return fetch_json("https://api.github.com/{}".format(path), params, headers) -def compare_shas(_repo, base, head): - return fetch_api("repos/{}/{}/compare/{}...{}".format(_repo["owner"], _repo["repo"], base, head)) +def get_releases(owner, repo, params=None): + return fetch_api("repos/{}/{}/releases".format(owner, repo), params) -def commits_info(_repo): - return fetch_api("repos/{}/{}/commits".format(_repo["owner"], _repo["repo"])) +def get_repo(owner, repo, params=None): + return fetch_api("repos/{}/{}".format(owner, repo), params) -def commit_info(_repo, sha): - return fetch_api("repos/{}/{}/commits/{}".format(_repo["owner"], _repo["repo"], sha)) +def compare_shas(owner, repo, base, head): + return fetch_api("repos/{}/{}/compare/{}...{}".format(owner, repo, base, head)) + + +def commits_info(owner, repo, params=None): + return fetch_api("repos/{}/{}/commits".format(owner, repo), params) + + +def commit_info(owner, repo, sha, params=None): + return fetch_api("repos/{}/{}/commits/{}".format(owner, repo, sha), params) diff --git a/treeherder/utils/http.py b/treeherder/utils/http.py new file mode 100644 index 000000000..3961fbec9 --- /dev/null +++ b/treeherder/utils/http.py @@ -0,0 +1,40 @@ +import newrelic.agent +import requests +from django.conf import settings + + +def make_request(url, method='GET', headers=None, timeout=30, **kwargs): + """A wrapper around requests to set defaults & call raise_for_status().""" + headers = headers or {} + headers['User-Agent'] = 'treeherder/{}'.format(settings.SITE_HOSTNAME) + response = requests.request(method, + url, + headers=headers, + timeout=timeout, + **kwargs) + if response.history: + params = { + 'url': url, + 'redirects': len(response.history), + 'duration': sum(r.elapsed.total_seconds() for r in response.history) + } + newrelic.agent.record_custom_event('RedirectedRequest', params=params) + + response.raise_for_status() + return response + + +def fetch_json(url, params=None, headers=None): + if headers is None: + headers = {'Accept': 'application/json'} + else: + headers['Accept'] = 'application/json' + response = make_request(url, + params=params, + headers=headers) + return response.json() + + +def fetch_text(url): + response = make_request(url) + return response.text diff --git a/treeherder/webapp/api/bugzilla.py b/treeherder/webapp/api/bugzilla.py index 1c878e749..271381778 100644 --- a/treeherder/webapp/api/bugzilla.py +++ b/treeherder/webapp/api/bugzilla.py @@ -7,7 +7,7 @@ from rest_framework.decorators import action from rest_framework.response import Response from rest_framework.status import HTTP_400_BAD_REQUEST -from treeherder.etl.common import make_request +from treeherder.utils.http import make_request class BugzillaViewSet(viewsets.ViewSet): diff --git a/treeherder/webapp/api/changelog.py b/treeherder/webapp/api/changelog.py new file mode 100644 index 000000000..c73f1b9c5 --- /dev/null +++ b/treeherder/webapp/api/changelog.py @@ -0,0 +1,19 @@ +from rest_framework import viewsets +from rest_framework.response import Response + +from treeherder.changelog.changes import get_changes + +from .serializers import ChangelogSerializer + + +class ChangelogViewSet(viewsets.ViewSet): + """ + This viewset is responsible for the changelog endpoint. + """ + + def list(self, request): + """ + GET method implementation for list view + """ + serializer = ChangelogSerializer(get_changes(), many=True) + return Response(serializer.data) diff --git a/treeherder/webapp/api/serializers.py b/treeherder/webapp/api/serializers.py index 81c9e7c09..64e1eac4a 100644 --- a/treeherder/webapp/api/serializers.py +++ b/treeherder/webapp/api/serializers.py @@ -4,6 +4,7 @@ from django.contrib.auth.models import User from django.core.exceptions import ObjectDoesNotExist from rest_framework import serializers +from treeherder.changelog.models import Changelog from treeherder.model import models from treeherder.webapp.api.utils import (REPO_GROUPS, to_timestamp) @@ -365,3 +366,14 @@ class MachinePlatformSerializer(serializers.ModelSerializer): class Meta: model = models.MachinePlatform fields = ('id', 'platform') + + +class ChangelogSerializer(serializers.ModelSerializer): + + files = serializers.StringRelatedField(many=True) + + class Meta: + model = Changelog + fields = ('id', 'remote_id', 'date', 'author', 'message', 'description', + 'owner', 'project', 'project_url', 'type', 'url', + 'files') diff --git a/treeherder/webapp/api/urls.py b/treeherder/webapp/api/urls.py index e89f2f3bf..9beef1bfb 100644 --- a/treeherder/webapp/api/urls.py +++ b/treeherder/webapp/api/urls.py @@ -7,6 +7,7 @@ from rest_framework import routers from treeherder.webapp.api import (auth, bug, bugzilla, + changelog, csp_report, intermittents_view, job_log_url, @@ -133,6 +134,8 @@ default_router.register(r'jobdetail', jobs.JobDetailViewSet, basename='jobdetail') default_router.register(r'auth', auth.AuthViewSet, basename='auth') +default_router.register(r'changelog', changelog.ChangelogViewSet, + basename='changelog') urlpatterns = [ url(r'^project/(?P[\w-]{0,50})/', include(project_bound_router.urls)),