This commit is contained in:
Calixte Denizet 2017-02-26 21:33:07 +01:00
Коммит e675564e56
12 изменённых файлов: 531 добавлений и 0 удалений

3
.gitignore поставляемый Normal file
Просмотреть файл

@ -0,0 +1,3 @@
*~
*.pyc
*.pyo

2
Procfile Normal file
Просмотреть файл

@ -0,0 +1,2 @@
web: gunicorn -b 0.0.0.0:$PORT file2crashes:app
clock: python bin/schedule.py

19
bin/schedule.py Normal file
Просмотреть файл

@ -0,0 +1,19 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from apscheduler.schedulers.blocking import BlockingScheduler
from file2crashes import models
import logging
logging.basicConfig()
sched = BlockingScheduler()
@sched.scheduled_job('cron', day_of_week='*', hour='2,6,10,14,18,22')
def timed_job():
models.update()
sched.start()

45
file2crashes/__init__.py Normal file
Просмотреть файл

@ -0,0 +1,45 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from flask import Flask
from flask_cors import CORS, cross_origin
from flask_sqlalchemy import SQLAlchemy
import logging
import os
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = os.environ.get('DATABASE_URL')
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
db = SQLAlchemy(app)
cors = CORS(app)
app.config['CORS_HEADERS'] = 'Content-Type'
log = logging.getLogger(__name__)
@app.route('/crashes', methods=['GET'])
@cross_origin()
def crashes():
from file2crashes import api
return api.crashes()
@app.route('/list', methods=['GET'])
@cross_origin()
def listdirs():
from file2crashes import api
return api.listdirs()
@app.route('/')
@app.route('/list.html')
def list_html():
from file2crashes import html
return html.listdirs()
@app.route('/crashes.html')
def crashes_html():
from file2crashes import html
return html.crashes()

187
file2crashes/analyze.py Normal file
Просмотреть файл

@ -0,0 +1,187 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import functools
from datetime import timedelta
import re
import copy
from collections import defaultdict
from libmozdata import socorro
from libmozdata import utils
from libmozdata.connection import (Connection, Query)
hg_pattern = re.compile('hg:hg.mozilla.org[^:]*:([^:]*):([a-z0-9]+)')
forbidden_dirs = {'obj-firefox'}
def is_allowed(name):
return all(not name.startswith(d) for d in forbidden_dirs)
def get_file(hg_uri):
"""Get node for file name from path
Args:
path (str): path from socorro
Returns:
(str, str): filename and node
"""
if hg_uri:
m = hg_pattern.match(hg_uri)
if m:
f = m.group(1)
if is_allowed(f):
return f
return ''
def get_files(info, verbose=False):
"""Get info from different backtraces
Args:
info (dict): proto -> uuid
Returns:
dict: info about the different backtraces
"""
def handler(proto, json, data):
jd = json['json_dump']
if 'threads' in jd and 'crashedThread' in json:
thread_nb = json['crashedThread']
if thread_nb is not None:
frames = jd['threads'][thread_nb]['frames']
data[proto] = set(map(lambda f: get_file(f['file']),
filter(lambda f: 'file' in f,
frames)))
data = {}
queries = []
for proto, value in info.items():
queries.append(Query(socorro.ProcessedCrash.URL,
params={'crash_id': value['uuid']},
handler=functools.partial(handler, proto),
handlerdata=data))
if queries:
socorro.ProcessedCrash(queries=queries).wait()
return data
def get_new_signatures(data, threshold=0):
new_signatures = []
for sgn, stats in data.items():
stats = sorted(stats.items(), key=lambda p: p[0])
numbers = [n for _, n in stats]
if all(i == 0 for i in numbers[:-1]) and numbers[-1] >= threshold:
new_signatures.append(sgn)
return sorted(new_signatures)
def get_uuids(channel,
product='Firefox',
date='today',
limit=10000,
max_days=3,
threshold=5):
end_date = utils.get_date_ymd(date)
start_date = end_date - timedelta(days=max_days + 1)
search_date = socorro.SuperSearch.get_search_date(start_date, end_date)
r = range(max_days + 1)
default_trend = {start_date + timedelta(days=i): 0 for i in r}
data = defaultdict(lambda: copy.deepcopy(default_trend))
def handler(json, data):
if not json['errors']:
for facets in json['facets']['histogram_date']:
d = utils.get_date_ymd(facets['term'])
s = facets['facets']['signature']
for signature in s:
count = signature['count']
sgn = signature['term']
data[sgn][d] += count
socorro.SuperSearch(params={'product': product,
'date': search_date,
'release_channel': channel,
'_histogram.date': 'signature',
'_facets_size': limit,
'_results_number': 1},
handler=handler, handlerdata=data).wait()
new_signatures = get_new_signatures(data, threshold=threshold)
if new_signatures:
data = {}
queries = []
def handler(json, data):
if not json['errors']:
for facets in json['facets']['proto_signature']:
proto = facets['term']
count = facets['count']
facets = facets['facets']
signature = facets['signature'][0]['term']
first_uuid = facets['uuid'][0]['term']
data[proto] = {'uuid': first_uuid,
'count': count,
'signature': signature}
for sgns in Connection.chunks(new_signatures, 5):
queries.append(Query(socorro.SuperSearch.URL,
{'product': product,
'date': search_date,
'signature': ['=' + s for s in sgns],
'release_channel': channel,
'_aggs.proto_signature': ['uuid',
'signature'],
'_facets_size': 1000,
'_results_number': 0},
handler=handler, handlerdata=data))
socorro.SuperSearch(queries=queries).wait()
return data, search_date
return {}, ''
def get(channels,
products,
date='today',
limit=10000,
max_days=3,
threshold=0,
verbose=False):
results = defaultdict(lambda: dict())
for channel in channels:
for product in products:
protos, search_date = get_uuids(channel,
product=product,
date=date,
limit=limit,
max_days=max_days,
threshold=threshold)
if protos:
interesting = defaultdict(lambda: [])
pf = get_files(protos, verbose=verbose)
for proto, files in pf.items():
for f in filter(lambda f: f is not '', files):
params = {'release_channel': channel,
'product': product,
'date': search_date,
'proto_signature': '=' + proto}
url = socorro.SuperSearch.get_link(params)
p = protos[proto]
interesting[f].append({'url': url,
'count': p['count'],
'signature': p['signature']})
results[channel][product] = dict(interesting)
return dict(results)

21
file2crashes/api.py Normal file
Просмотреть файл

@ -0,0 +1,21 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from flask import request, jsonify
from file2crashes import models, log
def crashes():
product = request.args.get('product', 'Firefox')
path = request.args.get('dir', '')
date = request.args.get('date', 'today')
log.info('Get crashes for {}, the {}: {}'.format(product, path, date))
return jsonify(models.Crashes.get(product, 'nightly', path, date))
def listdirs():
product = request.args.get('product', 'Firefox')
date = request.args.get('date', 'today')
log.info('List directories for {}, the {}'.format(product, date))
return jsonify(models.Crashes.listdirs(product, 'nightly', date))

40
file2crashes/html.py Normal file
Просмотреть файл

@ -0,0 +1,40 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from file2crashes import utils, models
from flask import request, render_template
from requests.utils import quote
def listdirs():
product = request.args.get('product', '')
product = utils.get_correct_product(product)
date = request.args.get('date', 'today')
date = utils.get_correct_date(date)
channel = 'nightly'
dirs = models.Crashes.listdirs(product, channel, date)
url = 'crashes.html?product={}&channel={}&date={}&dir='.format(product,
channel,
date)
return render_template('list.html',
quote=quote,
base_url=url,
dirs=dirs)
def crashes():
product = request.args.get('product', '')
product = utils.get_correct_product(product)
date = request.args.get('date', 'today')
date = utils.get_correct_date(date)
directory = request.args.get('dir', '')
channel = 'nightly'
crashes = models.Crashes.get(product, channel, directory, date)
def plural(n):
return 'crash' if n == 1 else 'crashes'
return render_template('crashes.html',
plural=plural,
crashes=crashes)

111
file2crashes/models.py Normal file
Просмотреть файл

@ -0,0 +1,111 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from collections import defaultdict
from file2crashes import utils as f2cutils
from file2crashes import app, db, analyze
class Crashes(db.Model):
__tablename__ = 'crashes'
id = db.Column(db.Integer, primary_key=True, autoincrement=True)
product = db.Column(db.String(20))
channel = db.Column(db.String(20))
date = db.Column(db.Date)
directory = db.Column(db.String(256))
file = db.Column(db.String(128))
url = db.Column(db.UnicodeText)
count = db.Column(db.Integer, default=0)
signature = db.Column(db.String(512))
def __init__(self, product, channel, date, path, url, count, signature):
self.product = product
self.channel = channel
self.date = f2cutils.get_date(date)
self.directory, self.file = f2cutils.get_file(path)
self.url = url
self.count = count
self.signature = signature
@staticmethod
def put(product, channel, date, file, url, count, signature, commit=True):
c = db.session.query(Crashes).filter_by(product=product,
channel=channel,
date=date,
file=file,
url=url)
if c.first():
c = c.first()
c.count = count
else:
c = Crashes(product, channel, date, file, url, count, signature)
db.session.add(c)
if commit:
db.session.commit()
@staticmethod
def put_data(data, date, commit=True):
if data:
for channel, info1 in data.items():
for product, info2 in info1.items():
for file, url_count in info2.items():
for v in url_count:
Crashes.put(product,
channel,
date,
file,
v['url'],
v['count'],
v['signature'],
commit=False)
if commit:
db.session.commit()
return True
return False
@staticmethod
def get(product, channel, directory, date):
if directory:
date = f2cutils.get_date(date)
if date:
cs = db.session.query(Crashes).filter_by(product=product,
channel=channel,
date=date,
directory=directory)
r = defaultdict(lambda: list())
for c in cs:
r[c.file].append([c.url, c.count, c.signature])
return {f: sorted(u, key=lambda p: p[1]) for f, u in r.items()}
return {}
@staticmethod
def listdirs(product, channel, date):
cs = db.session.query(Crashes).filter_by(product=product,
channel=channel,
date=date)
dirs = set(c.directory for c in cs)
dirs = list(sorted(dirs))
return dirs
def update(date='today'):
results = analyze.get(['nightly'],
['Firefox', 'FennecAndroid'],
date=date)
Crashes.put_data(results, date)
def create():
engine = db.get_engine(app)
if not engine.dialect.has_table(engine, 'crashes'):
db.create_all()
update()

Просмотреть файл

@ -0,0 +1,20 @@
<!doctype html>
<html lang="en-us">
<head>
<meta charset="utf-8">
<title>Files</title>
</head>
<body>
<ul>
{% for file, info in crashes.items() -%}
<li>{{ file }}
<ul>
{% for url, count, signature in info -%}
<li>{{ count }} {{ plural(count) }}, signature is <a href="{{ url|safe }}">{{ signature }}</a></li>
{% endfor -%}
</ul>
</li>
{% endfor -%}
</ul>
</body>
</html>

Просмотреть файл

@ -0,0 +1,14 @@
<!doctype html>
<html lang="en-us">
<head>
<meta charset="utf-8">
<title>Directories</title>
</head>
<body>
<ul>
{% for d in dirs -%}
<li><a href="{{ (base_url + quote(d, safe=''))|safe}}">{{ d }}</a></li>
{% endfor -%}
</ul>
</body>
</html>

59
file2crashes/utils.py Normal file
Просмотреть файл

@ -0,0 +1,59 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import datetime
import six
import sys
from libmozdata import utils
def get_products():
return ['Firefox', 'FennecAndroid']
def get_channels():
return ['nightly', 'aurora', 'beta', 'release']
def disp(*args):
print(args)
sys.stdout.flush()
def get_date(date):
if date:
try:
if isinstance(date, six.string_types):
date = utils.get_date_ymd(date)
return datetime.date(date.year, date.month, date.day)
elif isinstance(date, datetime.date):
return date
elif isinstance(date, datetime.datetime):
return datetime.date(date.year, date.month, date.day)
except:
pass
return None
def get_correct_date(date):
date = get_date(date)
if date:
return utils.get_date_str(date)
return utils.get_date('today')
def get_correct_product(p):
prods = {'firefox': 'Firefox',
'fennecandroid': 'FennecAndroid'}
return prods.get(p.lower(), 'Firefox')
def get_correct_channel(c):
c = c.lower()
return c if c in get_channels() else 'nightly'
def get_file(path):
i = path.rfind('/')
return path[:i], path[(i + 1):]

10
requirements.txt Normal file
Просмотреть файл

@ -0,0 +1,10 @@
libmozdata>=0.1.31
requests[security]>=2.7.0
flask>=0.11.1
flask_sqlalchemy>=2.1
flask_cors>=3.0.2
sqlalchemy>=1.1.5
python-dateutil>=2.5.2
gunicorn>=19.6.0
psycopg2>=2.6.2
apscheduler>=3.3.1