From e675564e56624dcb373af9c763c45e994f999854 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Sun, 26 Feb 2017 21:33:07 +0100 Subject: [PATCH] first commit --- .gitignore | 3 + Procfile | 2 + bin/schedule.py | 19 +++ file2crashes/__init__.py | 45 +++++++ file2crashes/analyze.py | 187 ++++++++++++++++++++++++++++ file2crashes/api.py | 21 ++++ file2crashes/html.py | 40 ++++++ file2crashes/models.py | 111 +++++++++++++++++ file2crashes/templates/crashes.html | 20 +++ file2crashes/templates/list.html | 14 +++ file2crashes/utils.py | 59 +++++++++ requirements.txt | 10 ++ 12 files changed, 531 insertions(+) create mode 100644 .gitignore create mode 100644 Procfile create mode 100644 bin/schedule.py create mode 100644 file2crashes/__init__.py create mode 100644 file2crashes/analyze.py create mode 100644 file2crashes/api.py create mode 100644 file2crashes/html.py create mode 100644 file2crashes/models.py create mode 100644 file2crashes/templates/crashes.html create mode 100644 file2crashes/templates/list.html create mode 100644 file2crashes/utils.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6300279 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*~ +*.pyc +*.pyo diff --git a/Procfile b/Procfile new file mode 100644 index 0000000..1354179 --- /dev/null +++ b/Procfile @@ -0,0 +1,2 @@ +web: gunicorn -b 0.0.0.0:$PORT file2crashes:app +clock: python bin/schedule.py \ No newline at end of file diff --git a/bin/schedule.py b/bin/schedule.py new file mode 100644 index 0000000..6246a8c --- /dev/null +++ b/bin/schedule.py @@ -0,0 +1,19 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +from apscheduler.schedulers.blocking import BlockingScheduler +from file2crashes import models +import logging + + +logging.basicConfig() +sched = BlockingScheduler() + + +@sched.scheduled_job('cron', day_of_week='*', hour='2,6,10,14,18,22') +def timed_job(): + models.update() + + +sched.start() diff --git a/file2crashes/__init__.py b/file2crashes/__init__.py new file mode 100644 index 0000000..1bc8d26 --- /dev/null +++ b/file2crashes/__init__.py @@ -0,0 +1,45 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +from flask import Flask +from flask_cors import CORS, cross_origin +from flask_sqlalchemy import SQLAlchemy +import logging +import os + + +app = Flask(__name__) +app.config['SQLALCHEMY_DATABASE_URI'] = os.environ.get('DATABASE_URL') +app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False +db = SQLAlchemy(app) +cors = CORS(app) +app.config['CORS_HEADERS'] = 'Content-Type' +log = logging.getLogger(__name__) + + +@app.route('/crashes', methods=['GET']) +@cross_origin() +def crashes(): + from file2crashes import api + return api.crashes() + + +@app.route('/list', methods=['GET']) +@cross_origin() +def listdirs(): + from file2crashes import api + return api.listdirs() + + +@app.route('/') +@app.route('/list.html') +def list_html(): + from file2crashes import html + return html.listdirs() + + +@app.route('/crashes.html') +def crashes_html(): + from file2crashes import html + return html.crashes() diff --git a/file2crashes/analyze.py b/file2crashes/analyze.py new file mode 100644 index 0000000..c6686c6 --- /dev/null +++ b/file2crashes/analyze.py @@ -0,0 +1,187 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +import functools +from datetime import timedelta +import re +import copy +from collections import defaultdict +from libmozdata import socorro +from libmozdata import utils +from libmozdata.connection import (Connection, Query) + + +hg_pattern = re.compile('hg:hg.mozilla.org[^:]*:([^:]*):([a-z0-9]+)') +forbidden_dirs = {'obj-firefox'} + + +def is_allowed(name): + return all(not name.startswith(d) for d in forbidden_dirs) + + +def get_file(hg_uri): + """Get node for file name from path + + Args: + path (str): path from socorro + + Returns: + (str, str): filename and node + """ + if hg_uri: + m = hg_pattern.match(hg_uri) + if m: + f = m.group(1) + if is_allowed(f): + return f + + return '' + + +def get_files(info, verbose=False): + """Get info from different backtraces + + Args: + info (dict): proto -> uuid + + Returns: + dict: info about the different backtraces + """ + + def handler(proto, json, data): + jd = json['json_dump'] + if 'threads' in jd and 'crashedThread' in json: + thread_nb = json['crashedThread'] + if thread_nb is not None: + frames = jd['threads'][thread_nb]['frames'] + data[proto] = set(map(lambda f: get_file(f['file']), + filter(lambda f: 'file' in f, + frames))) + + data = {} + queries = [] + + for proto, value in info.items(): + queries.append(Query(socorro.ProcessedCrash.URL, + params={'crash_id': value['uuid']}, + handler=functools.partial(handler, proto), + handlerdata=data)) + + if queries: + socorro.ProcessedCrash(queries=queries).wait() + + return data + + +def get_new_signatures(data, threshold=0): + new_signatures = [] + for sgn, stats in data.items(): + stats = sorted(stats.items(), key=lambda p: p[0]) + numbers = [n for _, n in stats] + if all(i == 0 for i in numbers[:-1]) and numbers[-1] >= threshold: + new_signatures.append(sgn) + return sorted(new_signatures) + + +def get_uuids(channel, + product='Firefox', + date='today', + limit=10000, + max_days=3, + threshold=5): + end_date = utils.get_date_ymd(date) + start_date = end_date - timedelta(days=max_days + 1) + search_date = socorro.SuperSearch.get_search_date(start_date, end_date) + + r = range(max_days + 1) + default_trend = {start_date + timedelta(days=i): 0 for i in r} + data = defaultdict(lambda: copy.deepcopy(default_trend)) + + def handler(json, data): + if not json['errors']: + for facets in json['facets']['histogram_date']: + d = utils.get_date_ymd(facets['term']) + s = facets['facets']['signature'] + for signature in s: + count = signature['count'] + sgn = signature['term'] + data[sgn][d] += count + + socorro.SuperSearch(params={'product': product, + 'date': search_date, + 'release_channel': channel, + '_histogram.date': 'signature', + '_facets_size': limit, + '_results_number': 1}, + handler=handler, handlerdata=data).wait() + + new_signatures = get_new_signatures(data, threshold=threshold) + + if new_signatures: + data = {} + queries = [] + + def handler(json, data): + if not json['errors']: + for facets in json['facets']['proto_signature']: + proto = facets['term'] + count = facets['count'] + facets = facets['facets'] + signature = facets['signature'][0]['term'] + first_uuid = facets['uuid'][0]['term'] + data[proto] = {'uuid': first_uuid, + 'count': count, + 'signature': signature} + + for sgns in Connection.chunks(new_signatures, 5): + queries.append(Query(socorro.SuperSearch.URL, + {'product': product, + 'date': search_date, + 'signature': ['=' + s for s in sgns], + 'release_channel': channel, + '_aggs.proto_signature': ['uuid', + 'signature'], + '_facets_size': 1000, + '_results_number': 0}, + handler=handler, handlerdata=data)) + + socorro.SuperSearch(queries=queries).wait() + return data, search_date + + return {}, '' + + +def get(channels, + products, + date='today', + limit=10000, + max_days=3, + threshold=0, + verbose=False): + results = defaultdict(lambda: dict()) + for channel in channels: + for product in products: + protos, search_date = get_uuids(channel, + product=product, + date=date, + limit=limit, + max_days=max_days, + threshold=threshold) + if protos: + interesting = defaultdict(lambda: []) + pf = get_files(protos, verbose=verbose) + for proto, files in pf.items(): + for f in filter(lambda f: f is not '', files): + params = {'release_channel': channel, + 'product': product, + 'date': search_date, + 'proto_signature': '=' + proto} + url = socorro.SuperSearch.get_link(params) + p = protos[proto] + interesting[f].append({'url': url, + 'count': p['count'], + 'signature': p['signature']}) + results[channel][product] = dict(interesting) + + return dict(results) diff --git a/file2crashes/api.py b/file2crashes/api.py new file mode 100644 index 0000000..a5f3be8 --- /dev/null +++ b/file2crashes/api.py @@ -0,0 +1,21 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +from flask import request, jsonify +from file2crashes import models, log + + +def crashes(): + product = request.args.get('product', 'Firefox') + path = request.args.get('dir', '') + date = request.args.get('date', 'today') + log.info('Get crashes for {}, the {}: {}'.format(product, path, date)) + return jsonify(models.Crashes.get(product, 'nightly', path, date)) + + +def listdirs(): + product = request.args.get('product', 'Firefox') + date = request.args.get('date', 'today') + log.info('List directories for {}, the {}'.format(product, date)) + return jsonify(models.Crashes.listdirs(product, 'nightly', date)) diff --git a/file2crashes/html.py b/file2crashes/html.py new file mode 100644 index 0000000..f06d493 --- /dev/null +++ b/file2crashes/html.py @@ -0,0 +1,40 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +from file2crashes import utils, models +from flask import request, render_template +from requests.utils import quote + + +def listdirs(): + product = request.args.get('product', '') + product = utils.get_correct_product(product) + date = request.args.get('date', 'today') + date = utils.get_correct_date(date) + channel = 'nightly' + dirs = models.Crashes.listdirs(product, channel, date) + url = 'crashes.html?product={}&channel={}&date={}&dir='.format(product, + channel, + date) + return render_template('list.html', + quote=quote, + base_url=url, + dirs=dirs) + + +def crashes(): + product = request.args.get('product', '') + product = utils.get_correct_product(product) + date = request.args.get('date', 'today') + date = utils.get_correct_date(date) + directory = request.args.get('dir', '') + channel = 'nightly' + crashes = models.Crashes.get(product, channel, directory, date) + + def plural(n): + return 'crash' if n == 1 else 'crashes' + + return render_template('crashes.html', + plural=plural, + crashes=crashes) diff --git a/file2crashes/models.py b/file2crashes/models.py new file mode 100644 index 0000000..e1bad7c --- /dev/null +++ b/file2crashes/models.py @@ -0,0 +1,111 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +from collections import defaultdict +from file2crashes import utils as f2cutils +from file2crashes import app, db, analyze + + +class Crashes(db.Model): + __tablename__ = 'crashes' + + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + product = db.Column(db.String(20)) + channel = db.Column(db.String(20)) + date = db.Column(db.Date) + directory = db.Column(db.String(256)) + file = db.Column(db.String(128)) + url = db.Column(db.UnicodeText) + count = db.Column(db.Integer, default=0) + signature = db.Column(db.String(512)) + + def __init__(self, product, channel, date, path, url, count, signature): + self.product = product + self.channel = channel + self.date = f2cutils.get_date(date) + self.directory, self.file = f2cutils.get_file(path) + self.url = url + self.count = count + self.signature = signature + + @staticmethod + def put(product, channel, date, file, url, count, signature, commit=True): + c = db.session.query(Crashes).filter_by(product=product, + channel=channel, + date=date, + file=file, + url=url) + if c.first(): + c = c.first() + c.count = count + else: + c = Crashes(product, channel, date, file, url, count, signature) + + db.session.add(c) + + if commit: + db.session.commit() + + @staticmethod + def put_data(data, date, commit=True): + if data: + for channel, info1 in data.items(): + for product, info2 in info1.items(): + for file, url_count in info2.items(): + for v in url_count: + Crashes.put(product, + channel, + date, + file, + v['url'], + v['count'], + v['signature'], + commit=False) + + if commit: + db.session.commit() + + return True + return False + + @staticmethod + def get(product, channel, directory, date): + if directory: + date = f2cutils.get_date(date) + if date: + cs = db.session.query(Crashes).filter_by(product=product, + channel=channel, + date=date, + directory=directory) + r = defaultdict(lambda: list()) + for c in cs: + r[c.file].append([c.url, c.count, c.signature]) + + return {f: sorted(u, key=lambda p: p[1]) for f, u in r.items()} + + return {} + + @staticmethod + def listdirs(product, channel, date): + cs = db.session.query(Crashes).filter_by(product=product, + channel=channel, + date=date) + dirs = set(c.directory for c in cs) + dirs = list(sorted(dirs)) + + return dirs + + +def update(date='today'): + results = analyze.get(['nightly'], + ['Firefox', 'FennecAndroid'], + date=date) + Crashes.put_data(results, date) + + +def create(): + engine = db.get_engine(app) + if not engine.dialect.has_table(engine, 'crashes'): + db.create_all() + update() diff --git a/file2crashes/templates/crashes.html b/file2crashes/templates/crashes.html new file mode 100644 index 0000000..cef2fb1 --- /dev/null +++ b/file2crashes/templates/crashes.html @@ -0,0 +1,20 @@ + + + + + Files + + + + + diff --git a/file2crashes/templates/list.html b/file2crashes/templates/list.html new file mode 100644 index 0000000..871bdea --- /dev/null +++ b/file2crashes/templates/list.html @@ -0,0 +1,14 @@ + + + + + Directories + + + + + diff --git a/file2crashes/utils.py b/file2crashes/utils.py new file mode 100644 index 0000000..8835d7a --- /dev/null +++ b/file2crashes/utils.py @@ -0,0 +1,59 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +import datetime +import six +import sys +from libmozdata import utils + + +def get_products(): + return ['Firefox', 'FennecAndroid'] + + +def get_channels(): + return ['nightly', 'aurora', 'beta', 'release'] + + +def disp(*args): + print(args) + sys.stdout.flush() + + +def get_date(date): + if date: + try: + if isinstance(date, six.string_types): + date = utils.get_date_ymd(date) + return datetime.date(date.year, date.month, date.day) + elif isinstance(date, datetime.date): + return date + elif isinstance(date, datetime.datetime): + return datetime.date(date.year, date.month, date.day) + except: + pass + return None + + +def get_correct_date(date): + date = get_date(date) + if date: + return utils.get_date_str(date) + return utils.get_date('today') + + +def get_correct_product(p): + prods = {'firefox': 'Firefox', + 'fennecandroid': 'FennecAndroid'} + return prods.get(p.lower(), 'Firefox') + + +def get_correct_channel(c): + c = c.lower() + return c if c in get_channels() else 'nightly' + + +def get_file(path): + i = path.rfind('/') + return path[:i], path[(i + 1):] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4c23f78 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +libmozdata>=0.1.31 +requests[security]>=2.7.0 +flask>=0.11.1 +flask_sqlalchemy>=2.1 +flask_cors>=3.0.2 +sqlalchemy>=1.1.5 +python-dateutil>=2.5.2 +gunicorn>=19.6.0 +psycopg2>=2.6.2 +apscheduler>=3.3.1