* Bug 1453760 - bug commentor
replacement for Orange Factor bug commentor
This commit is contained in:
Sarah Clements 2018-07-20 16:36:40 -07:00 коммит произвёл GitHub
Родитель af6211e722
Коммит 5386f7ed40
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
13 изменённых файлов: 509 добавлений и 3 удалений

Просмотреть файл

@ -8,7 +8,7 @@ worker_store_pulse_jobs: newrelic-admin run-program celery worker -A treeherder
worker_store_pulse_resultsets: newrelic-admin run-program celery worker -A treeherder --without-gossip --without-mingle --without-heartbeat -Q store_pulse_resultsets --concurrency=3
worker_read_pulse_jobs: newrelic-admin run-program ./manage.py read_pulse_jobs
worker_read_pulse_pushes: newrelic-admin run-program ./manage.py read_pulse_pushes
worker_default: newrelic-admin run-program celery worker -A treeherder --without-gossip --without-mingle --without-heartbeat -Q default,cycle_data,fetch_bugs,fetch_runnablejobs,generate_perf_alerts,seta_analyze_failures --concurrency=3
worker_default: newrelic-admin run-program celery worker -A treeherder --without-gossip --without-mingle --without-heartbeat -Q default,cycle_data,fetch_bugs,fetch_runnablejobs,generate_perf_alerts,seta_analyze_failures,intermittents_commenter --concurrency=3
worker_hp: newrelic-admin run-program celery worker -A treeherder --without-gossip --without-mingle --without-heartbeat -Q classification_mirroring,publish_to_pulse --concurrency=1
worker_log_parser: newrelic-admin run-program celery worker -A treeherder --without-gossip --without-mingle --without-heartbeat -Q log_parser,log_parser_fail,log_store_failure_lines,log_store_failure_lines_fail,log_crossreference_error_lines,log_crossreference_error_lines_fail,log_autoclassify,log_autoclassify_fail --maxtasksperchild=50 --concurrency=7

Просмотреть файл

@ -9,5 +9,5 @@ cd "$SRC_DIR"
source vagrant/env.sh
exec newrelic-admin run-program celery -A treeherder worker -c 3 \
-Q default,cycle_data,fetch_bugs,fetch_runnablejobs,generate_perf_alerts,seta_analyze_failures \
-Q default,cycle_data,fetch_bugs,fetch_runnablejobs,generate_perf_alerts,seta_analyze_failures,intermittents_commenter \
-E -l INFO -n default.%h

Просмотреть файл

@ -0,0 +1,12 @@
1 failures in 10 pushes (0.1 failures/push) were associated with this bug in the last 7 days.
This is the #1 most frequent failure this week.
Repository breakdown:
* test_treeherder_jobs: 1
Platform breakdown:
* b2g-emu-jb: 1
For more details, see:
https://treeherder.mozilla.org/intermittent-failures.html#/bugdetails?bug=1&startday=2012-05-09&endday=2018-05-10&tree=all

Просмотреть файл

@ -0,0 +1,40 @@
import responses
from treeherder.intermittents_commenter.commenter import Commenter
from treeherder.intermittents_commenter.constants import TRIAGE_PARAMS
@responses.activate
def test_intermittents_commenter(bug_data):
startday = '2012-05-09'
endday = '2018-05-10'
alt_startday = startday
alt_endday = endday
process = Commenter(weekly_mode=True, dry_run=True)
comment_params = process.generate_bug_changes(startday, endday, alt_startday, alt_endday)
url = process.create_url(bug_data['bug_id']) + '?include_fields={}'.format(TRIAGE_PARAMS['include_fields'])
content = {
"bugs": [
{
"component": "General",
"priority": "P3",
"product": "Testing",
"whiteboard": "[stockwell infra] [see summary at comment 92]"
}
],
"faults": []
}
responses.add(responses.Response(
method='GET',
url=url,
json=content,
match_querystring=True,
status=200))
with open('tests/intermittents_commenter/expected_comment.text', 'r') as comment:
expected_comment = comment.read()
assert comment_params[0]['changes']['comment']['body'] == expected_comment

Просмотреть файл

@ -4,6 +4,7 @@ import re
from datetime import timedelta
import environ
from celery.schedules import crontab
from furl import furl
from kombu import (Exchange,
Queue)
@ -57,6 +58,7 @@ INSTALLED_APPS = [
'treeherder.autoclassify',
'treeherder.credentials',
'treeherder.seta',
'treeherder.intermittents_commenter',
]
if DEBUG:
INSTALLED_APPS.append('django_extensions')
@ -291,6 +293,7 @@ CELERY_QUEUES = [
Queue('store_pulse_jobs', Exchange('default'), routing_key='store_pulse_jobs'),
Queue('store_pulse_resultsets', Exchange('default'), routing_key='store_pulse_resultsets'),
Queue('seta_analyze_failures', Exchange('default'), routing_key='seta_analyze_failures'),
Queue('intermittents_commenter', Exchange('default'), routing_key='intermittents_commenter'),
]
# Celery broker setup
@ -391,6 +394,23 @@ CELERYBEAT_SCHEDULE = {
'queue': "seta_analyze_failures"
}
},
'daily-intermittents-commenter': {
# Executes every morning at 7 a.m. UTC
'task': 'intermittents-commenter',
'schedule': crontab(hour=7),
'options': {
'queue': 'intermittents_commenter'
},
},
'weekly-intermittents-commenter': {
# Executes every sunday morning at 8 a.m. UTC
'task': 'intermittents-commenter',
'schedule': crontab(hour=8, day_of_week='sunday'),
'kwargs': {'weekly_mode': True},
'options': {
'queue': 'intermittents_commenter'
},
},
}
# CORS Headers
@ -451,7 +471,10 @@ WHITENOISE_ROOT = path("..", "dist")
# to stage bmo, while suggestions can still be fetched from prod bmo
BZ_API_URL = "https://bugzilla.mozilla.org"
BUGFILER_API_URL = env("BUGZILLA_API_URL", default=BZ_API_URL)
BUGFILER_API_KEY = env("BUGZILLA_API_KEY", default=None)
BUGFILER_API_KEY = env("BUG_FILER_API_KEY", default=None)
# For intermittents commenter
COMMENTER_API_KEY = env("BUG_COMMENTER_API_KEY", default=None)
# Log Parsing
PARSER_MAX_STEP_ERROR_LINES = 100

Просмотреть файл

Просмотреть файл

@ -0,0 +1,32 @@
{{total}} failures in {{test_run_count}} pushes ({{failure_rate}} failures/push) were associated with this bug {% if weekly_mode %}in the last 7 days{% else %}yesterday{% endif %}.
{%- if rank %}
This is the #{{rank}} most frequent failure this week.{% endif %}
{%- if priority==1 %}
** This failure happened more than 75 times this week! Resolving this bug is a very high priority. **
** Try to resolve this bug as soon as possible. If unresolved for 1 week, the affected test(s) may be disabled. ** {% endif %}
{%- if priority==2 %}
** This failure happened more than 30 times this week! Resolving this bug is a high priority. **
** Try to resolve this bug as soon as possible. If unresolved for 2 weeks, the affected test(s) may be disabled. ** {% endif %}
{%-if priority==3 %}
** This test has failed more than 150 times in the last 21 days. It should be disabled until it can be fixed. ** {% endif %}
Repository breakdown:
{% for repository, count in repositories.iteritems() -%}
* {{repository}}: {{count}}
{% endfor %}
Platform breakdown:
{% for platform, count in platforms.iteritems() -%}
* {{platform}}: {{count}}
{% endfor %}
For more details, see:
https://treeherder.mozilla.org/intermittent-failures.html#/bugdetails?bug={{bug_id}}&startday={{startday}}&endday={{endday}}&tree=all

Просмотреть файл

@ -0,0 +1,300 @@
from __future__ import division
import json
import logging
import re
import time
from collections import Counter
from datetime import (date,
datetime,
timedelta)
import requests
from django.conf import settings
from django.db.models import Count
from jinja2 import Template
from requests.exceptions import RequestException
from six import iteritems
from treeherder.intermittents_commenter.constants import (COMPONENTS,
TRIAGE_PARAMS,
WHITEBOARD_NEEDSWORK_OWNER)
from treeherder.model.models import (BugJobMap,
Push)
from treeherder.webapp.api.utils import get_repository
logger = logging.getLogger(__name__)
class Commenter(object):
"""Handles fetching, composing and submitting bug comments based on
daily or weekly thresholds and date range, and updating whiteboard
and priority status as need; if in dry_run, comments will be output
to stdout rather than submitting to bugzilla."""
def __init__(self, weekly_mode=False, dry_run=False):
self.weekly_mode = weekly_mode
self.dry_run = dry_run
self.session = self.new_request()
def run(self):
startday, endday = self.calculate_date_strings(self.weekly_mode, 6)
alt_startday, alt_endday = self.calculate_date_strings(True, 21)
all_bug_changes = self.generate_bug_changes(startday, endday, alt_startday, alt_endday)
self.print_or_submit_changes(all_bug_changes)
def generate_bug_changes(self, startday, endday, alt_startday, alt_endday):
"""Returns a list of dicts containing a bug id, a bug comment (only
for bugs whose total number of daily or weekly occurrences meet
the appropriate threshold) and potentially an updated whiteboard
or priority status."""
bug_stats = self.get_bug_stats(startday, endday)
alt_bug_stats = self.get_bug_stats(alt_startday, alt_endday)
test_run_count = self.get_test_runs(startday, endday)
all_bug_changes = []
template = Template(self.open_file('comment.template', False))
if self.weekly_mode:
top_bugs = [bug[0] for bug in sorted(bug_stats.items(), key=lambda x: x[1]['total'],
reverse=True)][:50]
for bug_id, counts in iteritems(bug_stats):
change_priority = None
bug_info = None
whiteboard = None
priority = 0
rank = None
# recommend disabling when more than 150 failures tracked over 21 days
if alt_bug_stats[bug_id]['total'] >= 150:
bug_info, whiteboard = self.check_bug_info(bug_info, bug_id)
if not self.check_whiteboard_status(whiteboard):
priority = 3
whiteboard = self.update_whiteboard(whiteboard, '[stockwell disable-recommended]')
if self.weekly_mode:
priority = self.assign_priority(priority, counts)
if priority == 2:
bug_info, whiteboard = self.check_bug_info(bug_info, bug_id)
change_priority, whiteboard = self.check_needswork_owner(change_priority, bug_info, whiteboard)
# change [stockwell needswork] to [stockwell unknown] when failures drop below 20 failures/week
if (counts['total'] < 20):
bug_info, whiteboard = self.check_bug_info(bug_info, bug_id)
whiteboard = self.check_needswork(whiteboard)
if bug_id in top_bugs:
rank = top_bugs.index(bug_id)+1
else:
bug_info, whiteboard = self.check_bug_info(bug_info, bug_id)
change_priority, whiteboard = self.check_needswork_owner(change_priority, bug_info, whiteboard)
comment = template.render(bug_id=bug_id,
total=counts['total'],
test_run_count=test_run_count,
rank=rank,
priority=priority,
failure_rate=round(counts['total']/float(test_run_count), 3),
repositories=counts['per_repository'],
platforms=counts['per_platform'],
startday=startday,
endday=endday.split()[0],
weekly_mode=self.weekly_mode)
bug_changes = {'bug_id': bug_id,
'changes': {
'comment': {'body': comment}
}
}
if whiteboard:
bug_changes['changes']['whiteboard'] = whiteboard
if change_priority:
bug_changes['changes']['priority'] = change_priority
all_bug_changes.append(bug_changes)
return all_bug_changes
def check_needswork_owner(self, change_priority, bug_info, whiteboard):
if (([bug_info['product'], bug_info['component']] in COMPONENTS) and
not self.check_whiteboard_status(whiteboard)):
if bug_info['priority'] not in ['--', 'P1', 'P2', 'P3']:
change_priority = '--'
stockwell_text = re.search(r'\[stockwell (.+?)\]', whiteboard)
if stockwell_text is not None and stockwell_text.group() != WHITEBOARD_NEEDSWORK_OWNER:
whiteboard = self.update_whiteboard(whiteboard, WHITEBOARD_NEEDSWORK_OWNER)
return change_priority, whiteboard
def check_needswork(self, whiteboard):
if whiteboard is not None:
stockwell_text = re.search(r'\[stockwell (.+?)\]', whiteboard)
# covers all 'needswork' possibilities, ie 'needswork:owner'
if stockwell_text is not None and stockwell_text.group(1).split(':')[0] == 'needswork':
whiteboard = self.update_whiteboard(whiteboard, '[stockwell unknown]')
return whiteboard
def assign_priority(self, priority, counts):
if priority == 0 and counts['total'] >= 75:
priority = 1
elif priority == 0 and counts['total'] >= 30:
priority = 2
return priority
def check_bug_info(self, bug_info, bug_id):
"""Check for previously fetched bug metadata."""
if not bug_info:
bug_info = self.fetch_bug_details(TRIAGE_PARAMS, bug_id)
if bug_info is None:
return None, None
return bug_info, bug_info['whiteboard']
def print_or_submit_changes(self, all_bug_changes):
for bug in all_bug_changes:
if self.dry_run:
logger.info('\n' + bug['changes']['comment']['body'] + '\n')
elif settings.COMMENTER_API_KEY is None:
# prevent duplicate comments when on stage/dev
pass
else:
self.submit_bug_changes(bug['changes'], bug['bug_id'])
# sleep between comment submissions to avoid overwhelming servers
time.sleep(1)
def open_file(self, filename, load):
with open('treeherder/intermittents_commenter/{}'.format(filename), 'r') as myfile:
if load:
return json.load(myfile)
else:
return myfile.read()
def calculate_date_strings(self, mode, numDays):
"""Returns a tuple of start (in YYYY-MM-DD format) and end date
strings (in YYYY-MM-DD HH:MM:SS format for an inclusive day)."""
yesterday = date.today() - timedelta(days=1)
endday = datetime(yesterday.year, yesterday.month, yesterday.day, 23, 59, 59, 999)
if mode:
startday = yesterday - timedelta(days=numDays)
else:
# daily mode
startday = yesterday
return startday.isoformat(), endday.strftime('%Y-%m-%d %H:%M:%S.%f')
def check_whiteboard_status(self, whiteboard):
"""Extracts stockwell text from a bug's whiteboard status to
determine whether it matches specified stockwell text;
returns a boolean."""
stockwell_text = re.search(r'\[stockwell (.+?)\]', whiteboard)
if stockwell_text is not None:
text = stockwell_text.group(1).split(':')[0]
if text == 'fixed' or text == 'disable-recommended' or text == 'infra' or text == 'disabled':
return True
return False
def update_whiteboard(self, existing, new):
return re.sub('\[stockwell.*?\]', new, existing)
def new_request(self):
session = requests.Session()
# Use a custom HTTP adapter, so we can set a non-zero max_retries value.
session.mount("https://", requests.adapters.HTTPAdapter(max_retries=3))
session.headers = {
'User-Agent': 'treeherder/{}'.format(settings.SITE_HOSTNAME),
'x-bugzilla-api-key': settings.COMMENTER_API_KEY,
'Accept': 'application/json'
}
return session
def create_url(self, bug_id):
return settings.BZ_API_URL + '/rest/bug/' + str(bug_id)
def fetch_bug_details(self, params, bug_id):
"""Fetches bug metadata from bugzilla and returns an encoded
dict if successful, otherwise returns None."""
try:
response = self.session.get(self.create_url(bug_id), headers=self.session.headers, params=params,
timeout=30)
response.raise_for_status()
except RequestException as e:
logger.warning('error fetching bugzilla metadata for bug {} due to {}'.format(bug_id, e))
return None
# slow down: bmo server may refuse service if too many requests made too frequently
time.sleep(0.5)
data = response.json()
if 'bugs' not in data:
return None
return {key.encode('UTF8'): value.encode('UTF8') for key, value in iteritems(data['bugs'][0])}
def submit_bug_changes(self, changes, bug_id):
try:
response = self.session.put(self.create_url(bug_id), headers=self.session.headers, json=changes,
timeout=30)
response.raise_for_status()
except RequestException as e:
logger.error('error posting comment to bugzilla for bug {} due to {}'.format(bug_id, e))
def get_test_runs(self, startday, endday):
"""Returns an aggregate of pushes for specified date range and
repository."""
test_runs = (Push.objects.filter(repository_id__in=get_repository('all'),
time__range=(startday, endday))
.aggregate(Count('author')))
return test_runs['author__count']
def get_bug_stats(self, startday, endday):
"""Get all intermittent failures per specified date range and repository,
returning a dict of bug_id's with total, repository and platform totals
if totals are greater than or equal to the threshold.
eg:
{
"1206327": {
"total": 5,
"per_repository": {
"fx-team": 2,
"mozilla-inbound": 3
},
"per_platform": {
"osx-10-10": 4,
"b2g-emu-ics": 1
}
},
...
}
"""
# Min required failures per bug in order to post a comment
threshold = 1 if self.weekly_mode else 15
bugs = (BugJobMap.failures.default(get_repository('all'), startday, endday)
.values('job__repository__name', 'job__machine_platform__platform',
'bug_id'))
bug_map = dict()
for bug in bugs:
platform = bug['job__machine_platform__platform']
repo = bug['job__repository__name']
bug_id = bug['bug_id']
if bug_id in bug_map:
bug_map[bug_id]['total'] += 1
bug_map[bug_id]['per_platform'][platform] += 1
bug_map[bug_id]['per_repository'][repo] += 1
else:
bug_map[bug_id] = {}
bug_map[bug_id]['total'] = 1
bug_map[bug_id]['per_platform'] = Counter([platform])
bug_map[bug_id]['per_repository'] = Counter([repo])
return {key: value for key, value in iteritems(bug_map) if value['total'] >= threshold}

Просмотреть файл

@ -0,0 +1,73 @@
WHITEBOARD_NEEDSWORK_OWNER = '[stockwell needswork:owner]'
TRIAGE_PARAMS = {'include_fields': 'product, component, priority, whiteboard'}
COMPONENTS = [
['Core', 'Canvas: 2D'],
['Core', 'Canvas: WebGL'],
['Core', 'DOM'],
['Core', 'DOM: Core & HTML'],
['Core', 'DOM: Device Interfaces'],
['Core', 'DOM: Events'],
['Core', 'DOM: IndexedDB'],
['Core', 'DOM: Push Notifications'],
['Core', 'DOM: Quota Manager'],
['Core', 'DOM: Service Workers'],
['Core', 'DOM: Workers'],
['Core', 'DOM:Content Processes'],
['Core', 'Document Navigation'],
['Core', 'Event Handling'],
['Core', 'GFX: Color Management'],
['Core', 'Graphics'],
['Core', 'Graphics: Layers'],
['Core', 'Graphics: Text'],
['Core', 'Graphics: WebRender'],
['Core', 'HTML: Form Submission'],
['Core', 'HTML: Parser'],
['Core', 'IPC'],
['Core', 'Image Blocking'],
['Core', 'ImageLib'],
['Core', 'Javascript Engine'],
['Core', 'Javascript Engine: JIT'],
['Core', 'Javascript: GC'],
['Core', 'Javascript: Internationalization API'],
['Core', 'Javascript: Standard Library'],
['Core', 'Keyboard: Navigation'],
['Core', 'Networking'],
['Core', 'Networking: Cache'],
['Core', 'Networking: Cookies'],
['Core', 'Networking: DNS'],
['Core', 'Networking: Domain Lists'],
['Core', 'Networking: FTP'],
['Core', 'Networking: File'],
['Core', 'Networking: HTTP'],
['Core', 'Networking: JAR'],
['Core', 'Networking: WebSockets'],
['Core', 'Plug-ins'],
['Core', 'Security: Sandboxing Process'],
['Core', 'Serializers'],
['Core', 'Widget'],
['Core', 'Widget: Win32'],
['Core', 'Widget: WinRT'],
['Core', 'XBL'],
['Core', 'XML'],
['Core', 'XPConnect'],
['Core', 'XSLT'],
['Core', 'js-ctypes'],
['Firefox for Android', 'Add-ons Manager'],
['Firefox for Android', 'Testing'],
['Firefox', 'Disability Access'],
['Firefox', 'Toolbars and Customization'],
['Toolkit', 'Add-ons Manager'],
['Toolkit', 'Reader Mode'],
['Toolkit', 'Toolbars and Toolbar Customization'],
['Toolkit', 'WebExtensions: Android'],
['Toolkit', 'WebExtensions: Android'],
['Toolkit', 'WebExtensions: Compatibility'],
['Toolkit', 'WebExtensions: Developer Tools'],
['Toolkit', 'WebExtensions: Experiments'],
['Toolkit', 'WebExtensions: Frontend'],
['Toolkit', 'WebExtensions: General'],
['Toolkit', 'WebExtensions: Request Handling'],
['Toolkit', 'WebExtensions: Untriaged'],
]

Просмотреть файл

Просмотреть файл

Просмотреть файл

@ -0,0 +1,16 @@
from django.core.management.base import BaseCommand
from treeherder.intermittents_commenter.commenter import Commenter
class Command(BaseCommand):
"""Management command to manually initiate intermittent failures commenter
default is daily and non-test mode, use flags for weekly and test mode."""
def add_arguments(self, parser):
parser.add_argument('-w', '--weekly', action='store_true', dest='weekly_mode', help='generate weekly, rather than daily, comment summaries')
parser.add_argument('--dry-run', action='store_true', dest='dry_run', help='output comments to stdout rather than submitting to Bugzilla')
def handle(self, *args, **options):
process = Commenter(weekly_mode=options['weekly_mode'], dry_run=options['dry_run'])
process.run()

Просмотреть файл

@ -0,0 +1,10 @@
from celery import task
from treeherder.intermittents_commenter.commenter import Commenter
@task(name='intermittents-commenter', soft_time_limit=30 * 60, time_limit=31 * 60)
def run_commenter(weekly_mode=False):
"""Run the intermittents commenter in either daily or weekly mode."""
process = Commenter(weekly_mode=weekly_mode)
process.run()