first commit

2017-02-26 21:33:07 +01:00 · 2017-02-26 21:33:07 +01:00 · e675564e56
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
+*~
+*.pyc
+*.pyo
--- a/2
+++ b/2
@ -0,0 +1,2 @@
+web: gunicorn -b 0.0.0.0:$PORT file2crashes:app
+clock: python bin/schedule.py
--- a/bin/schedule.py
+++ b/bin/schedule.py
@ -0,0 +1,19 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from apscheduler.schedulers.blocking import BlockingScheduler
+from file2crashes import models
+import logging
+
+
+logging.basicConfig()
+sched = BlockingScheduler()
+
+
+@sched.scheduled_job('cron', day_of_week='*', hour='2,6,10,14,18,22')
+def timed_job():
+    models.update()
+
+
+sched.start()
--- a/file2crashes/init.py
+++ b/file2crashes/init.py
@ -0,0 +1,45 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from flask import Flask
+from flask_cors import CORS, cross_origin
+from flask_sqlalchemy import SQLAlchemy
+import logging
+import os
+
+
+app = Flask(__name__)
+app.config['SQLALCHEMY_DATABASE_URI'] = os.environ.get('DATABASE_URL')
+app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
+db = SQLAlchemy(app)
+cors = CORS(app)
+app.config['CORS_HEADERS'] = 'Content-Type'
+log = logging.getLogger(__name__)
+
+
+@app.route('/crashes', methods=['GET'])
+@cross_origin()
+def crashes():
+    from file2crashes import api
+    return api.crashes()
+
+
+@app.route('/list', methods=['GET'])
+@cross_origin()
+def listdirs():
+    from file2crashes import api
+    return api.listdirs()
+
+
+@app.route('/')
+@app.route('/list.html')
+def list_html():
+    from file2crashes import html
+    return html.listdirs()
+
+
+@app.route('/crashes.html')
+def crashes_html():
+    from file2crashes import html
+    return html.crashes()
--- a/file2crashes/analyze.py
+++ b/file2crashes/analyze.py
@ -0,0 +1,187 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import functools
+from datetime import timedelta
+import re
+import copy
+from collections import defaultdict
+from libmozdata import socorro
+from libmozdata import utils
+from libmozdata.connection import (Connection, Query)
+
+
+hg_pattern = re.compile('hg:hg.mozilla.org[^:]*:([^:]*):([a-z0-9]+)')
+forbidden_dirs = {'obj-firefox'}
+
+
+def is_allowed(name):
+    return all(not name.startswith(d) for d in forbidden_dirs)
+
+
+def get_file(hg_uri):
+    """Get node for file name from path
+
+    Args:
+        path (str): path from socorro
+
+    Returns:
+        (str, str): filename and node
+    """
+    if hg_uri:
+        m = hg_pattern.match(hg_uri)
+        if m:
+            f = m.group(1)
+            if is_allowed(f):
+                return f
+
+    return ''
+
+
+def get_files(info, verbose=False):
+    """Get info from different backtraces
+
+    Args:
+        info (dict): proto -> uuid
+
+    Returns:
+        dict: info about the different backtraces
+    """
+
+    def handler(proto, json, data):
+        jd = json['json_dump']
+        if 'threads' in jd and 'crashedThread' in json:
+            thread_nb = json['crashedThread']
+            if thread_nb is not None:
+                frames = jd['threads'][thread_nb]['frames']
+                data[proto] = set(map(lambda f: get_file(f['file']),
+                                      filter(lambda f: 'file' in f,
+                                             frames)))
+
+    data = {}
+    queries = []
+
+    for proto, value in info.items():
+        queries.append(Query(socorro.ProcessedCrash.URL,
+                             params={'crash_id': value['uuid']},
+                             handler=functools.partial(handler, proto),
+                             handlerdata=data))
+
+    if queries:
+        socorro.ProcessedCrash(queries=queries).wait()
+
+    return data
+
+
+def get_new_signatures(data, threshold=0):
+    new_signatures = []
+    for sgn, stats in data.items():
+        stats = sorted(stats.items(), key=lambda p: p[0])
+        numbers = [n for _, n in stats]
+        if all(i == 0 for i in numbers[:-1]) and numbers[-1] >= threshold:
+            new_signatures.append(sgn)
+    return sorted(new_signatures)
+
+
+def get_uuids(channel,
+              product='Firefox',
+              date='today',
+              limit=10000,
+              max_days=3,
+              threshold=5):
+    end_date = utils.get_date_ymd(date)
+    start_date = end_date - timedelta(days=max_days + 1)
+    search_date = socorro.SuperSearch.get_search_date(start_date, end_date)
+
+    r = range(max_days + 1)
+    default_trend = {start_date + timedelta(days=i): 0 for i in r}
+    data = defaultdict(lambda: copy.deepcopy(default_trend))
+
+    def handler(json, data):
+        if not json['errors']:
+            for facets in json['facets']['histogram_date']:
+                d = utils.get_date_ymd(facets['term'])
+                s = facets['facets']['signature']
+                for signature in s:
+                    count = signature['count']
+                    sgn = signature['term']
+                    data[sgn][d] += count
+
+    socorro.SuperSearch(params={'product': product,
+                                'date': search_date,
+                                'release_channel': channel,
+                                '_histogram.date': 'signature',
+                                '_facets_size': limit,
+                                '_results_number': 1},
+                        handler=handler, handlerdata=data).wait()
+
+    new_signatures = get_new_signatures(data, threshold=threshold)
+
+    if new_signatures:
+        data = {}
+        queries = []
+
+        def handler(json, data):
+            if not json['errors']:
+                for facets in json['facets']['proto_signature']:
+                    proto = facets['term']
+                    count = facets['count']
+                    facets = facets['facets']
+                    signature = facets['signature'][0]['term']
+                    first_uuid = facets['uuid'][0]['term']
+                    data[proto] = {'uuid': first_uuid,
+                                   'count': count,
+                                   'signature': signature}
+
+        for sgns in Connection.chunks(new_signatures, 5):
+            queries.append(Query(socorro.SuperSearch.URL,
+                                 {'product': product,
+                                  'date': search_date,
+                                  'signature': ['=' + s for s in sgns],
+                                  'release_channel': channel,
+                                  '_aggs.proto_signature': ['uuid',
+                                                            'signature'],
+                                  '_facets_size': 1000,
+                                  '_results_number': 0},
+                                 handler=handler, handlerdata=data))
+
+        socorro.SuperSearch(queries=queries).wait()
+        return data, search_date
+
+    return {}, ''
+
+
+def get(channels,
+        products,
+        date='today',
+        limit=10000,
+        max_days=3,
+        threshold=0,
+        verbose=False):
+    results = defaultdict(lambda: dict())
+    for channel in channels:
+        for product in products:
+            protos, search_date = get_uuids(channel,
+                                            product=product,
+                                            date=date,
+                                            limit=limit,
+                                            max_days=max_days,
+                                            threshold=threshold)
+            if protos:
+                interesting = defaultdict(lambda: [])
+                pf = get_files(protos, verbose=verbose)
+                for proto, files in pf.items():
+                    for f in filter(lambda f: f is not '', files):
+                        params = {'release_channel': channel,
+                                  'product': product,
+                                  'date': search_date,
+                                  'proto_signature': '=' + proto}
+                        url = socorro.SuperSearch.get_link(params)
+                        p = protos[proto]
+                        interesting[f].append({'url': url,
+                                               'count': p['count'],
+                                               'signature': p['signature']})
+                results[channel][product] = dict(interesting)
+
+    return dict(results)
--- a/file2crashes/api.py
+++ b/file2crashes/api.py
@ -0,0 +1,21 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from flask import request, jsonify
+from file2crashes import models, log
+
+
+def crashes():
+    product = request.args.get('product', 'Firefox')
+    path = request.args.get('dir', '')
+    date = request.args.get('date', 'today')
+    log.info('Get crashes for {}, the {}: {}'.format(product, path, date))
+    return jsonify(models.Crashes.get(product, 'nightly', path, date))
+
+
+def listdirs():
+    product = request.args.get('product', 'Firefox')
+    date = request.args.get('date', 'today')
+    log.info('List directories for {}, the {}'.format(product, date))
+    return jsonify(models.Crashes.listdirs(product, 'nightly', date))
--- a/file2crashes/html.py
+++ b/file2crashes/html.py
@ -0,0 +1,40 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from file2crashes import utils, models
+from flask import request, render_template
+from requests.utils import quote
+
+
+def listdirs():
+    product = request.args.get('product', '')
+    product = utils.get_correct_product(product)
+    date = request.args.get('date', 'today')
+    date = utils.get_correct_date(date)
+    channel = 'nightly'
+    dirs = models.Crashes.listdirs(product, channel, date)
+    url = 'crashes.html?product={}&channel={}&date={}&dir='.format(product,
+                                                                   channel,
+                                                                   date)
+    return render_template('list.html',
+                           quote=quote,
+                           base_url=url,
+                           dirs=dirs)
+
+
+def crashes():
+    product = request.args.get('product', '')
+    product = utils.get_correct_product(product)
+    date = request.args.get('date', 'today')
+    date = utils.get_correct_date(date)
+    directory = request.args.get('dir', '')
+    channel = 'nightly'
+    crashes = models.Crashes.get(product, channel, directory, date)
+
+    def plural(n):
+        return 'crash' if n == 1 else 'crashes'
+
+    return render_template('crashes.html',
+                           plural=plural,
+                           crashes=crashes)
--- a/file2crashes/models.py
+++ b/file2crashes/models.py
@ -0,0 +1,111 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from collections import defaultdict
+from file2crashes import utils as f2cutils
+from file2crashes import app, db, analyze
+
+
+class Crashes(db.Model):
+    __tablename__ = 'crashes'
+
+    id = db.Column(db.Integer, primary_key=True, autoincrement=True)
+    product = db.Column(db.String(20))
+    channel = db.Column(db.String(20))
+    date = db.Column(db.Date)
+    directory = db.Column(db.String(256))
+    file = db.Column(db.String(128))
+    url = db.Column(db.UnicodeText)
+    count = db.Column(db.Integer, default=0)
+    signature = db.Column(db.String(512))
+
+    def __init__(self, product, channel, date, path, url, count, signature):
+        self.product = product
+        self.channel = channel
+        self.date = f2cutils.get_date(date)
+        self.directory, self.file = f2cutils.get_file(path)
+        self.url = url
+        self.count = count
+        self.signature = signature
+
+    @staticmethod
+    def put(product, channel, date, file, url, count, signature, commit=True):
+        c = db.session.query(Crashes).filter_by(product=product,
+                                                channel=channel,
+                                                date=date,
+                                                file=file,
+                                                url=url)
+        if c.first():
+            c = c.first()
+            c.count = count
+        else:
+            c = Crashes(product, channel, date, file, url, count, signature)
+
+        db.session.add(c)
+
+        if commit:
+            db.session.commit()
+
+    @staticmethod
+    def put_data(data, date, commit=True):
+        if data:
+            for channel, info1 in data.items():
+                for product, info2 in info1.items():
+                    for file, url_count in info2.items():
+                        for v in url_count:
+                            Crashes.put(product,
+                                        channel,
+                                        date,
+                                        file,
+                                        v['url'],
+                                        v['count'],
+                                        v['signature'],
+                                        commit=False)
+
+            if commit:
+                db.session.commit()
+
+            return True
+        return False
+
+    @staticmethod
+    def get(product, channel, directory, date):
+        if directory:
+            date = f2cutils.get_date(date)
+            if date:
+                cs = db.session.query(Crashes).filter_by(product=product,
+                                                         channel=channel,
+                                                         date=date,
+                                                         directory=directory)
+            r = defaultdict(lambda: list())
+            for c in cs:
+                r[c.file].append([c.url, c.count, c.signature])
+
+            return {f: sorted(u, key=lambda p: p[1]) for f, u in r.items()}
+
+        return {}
+
+    @staticmethod
+    def listdirs(product, channel, date):
+        cs = db.session.query(Crashes).filter_by(product=product,
+                                                 channel=channel,
+                                                 date=date)
+        dirs = set(c.directory for c in cs)
+        dirs = list(sorted(dirs))
+
+        return dirs
+
+
+def update(date='today'):
+    results = analyze.get(['nightly'],
+                          ['Firefox', 'FennecAndroid'],
+                          date=date)
+    Crashes.put_data(results, date)
+
+
+def create():
+    engine = db.get_engine(app)
+    if not engine.dialect.has_table(engine, 'crashes'):
+        db.create_all()
+        update()
--- a/file2crashes/templates/crashes.html
+++ b/file2crashes/templates/crashes.html
@ -0,0 +1,20 @@
+<!doctype html>
+<html lang="en-us">
+<head>
+  <meta charset="utf-8">
+  <title>Files</title>
+</head>
+<body>
+  <ul>
+    {% for file, info in crashes.items() -%}
+    <li>{{ file }}
+      <ul>
+        {% for url, count, signature in info -%}
+        <li>{{ count }} {{ plural(count) }}, signature is <a href="{{ url|safe }}">{{ signature }}</a></li>
+        {% endfor -%}
+      </ul>
+    </li>
+    {% endfor -%}
+  </ul>
+</body>
+</html>
--- a/file2crashes/templates/list.html
+++ b/file2crashes/templates/list.html
@ -0,0 +1,14 @@
+<!doctype html>
+<html lang="en-us">
+<head>
+  <meta charset="utf-8">
+  <title>Directories</title>
+</head>
+<body>
+  <ul>
+    {% for d in dirs -%}
+    <li><a href="{{ (base_url + quote(d, safe=''))|safe}}">{{ d }}</a></li>
+    {% endfor -%}
+  </ul>
+</body>
+</html>
--- a/file2crashes/utils.py
+++ b/file2crashes/utils.py
@ -0,0 +1,59 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this file,
+# You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import datetime
+import six
+import sys
+from libmozdata import utils
+
+
+def get_products():
+    return ['Firefox', 'FennecAndroid']
+
+
+def get_channels():
+    return ['nightly', 'aurora', 'beta', 'release']
+
+
+def disp(*args):
+    print(args)
+    sys.stdout.flush()
+
+
+def get_date(date):
+    if date:
+        try:
+            if isinstance(date, six.string_types):
+                date = utils.get_date_ymd(date)
+                return datetime.date(date.year, date.month, date.day)
+            elif isinstance(date, datetime.date):
+                return date
+            elif isinstance(date, datetime.datetime):
+                return datetime.date(date.year, date.month, date.day)
+        except:
+            pass
+    return None
+
+
+def get_correct_date(date):
+    date = get_date(date)
+    if date:
+        return utils.get_date_str(date)
+    return utils.get_date('today')
+
+
+def get_correct_product(p):
+    prods = {'firefox': 'Firefox',
+             'fennecandroid': 'FennecAndroid'}
+    return prods.get(p.lower(), 'Firefox')
+
+
+def get_correct_channel(c):
+    c = c.lower()
+    return c if c in get_channels() else 'nightly'
+
+
+def get_file(path):
+    i = path.rfind('/')
+    return path[:i], path[(i + 1):]
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,10 @@
+libmozdata>=0.1.31
+requests[security]>=2.7.0
+flask>=0.11.1
+flask_sqlalchemy>=2.1
+flask_cors>=3.0.2
+sqlalchemy>=1.1.5
+python-dateutil>=2.5.2
+gunicorn>=19.6.0
+psycopg2>=2.6.2
+apscheduler>=3.3.1