Scripts to analyze validation results (bug 1183396)

Fixes #613
This commit is contained in:
Mark Striemer 2015-07-22 15:47:41 -05:00
Родитель a49256cb6d
Коммит 81a97dc1f0
3 изменённых файлов: 258 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,71 @@
"""
Fetch data from the olympia database for validation results and unlisted
addons for use with the validations.py script.
Expected environment variables:
MYSQL_HOST - The MySQL host.
MYSQL_USER - The MySQL username.
MYSQL_PASSWORD - The MySQL password.
Actions supported:
validations - Fetch validation data for the last 30 days and write it to
the filesystem in files named `validations/YYYY-MM-DD.txt`.
unlisted - Fetch all unlisted addon guids and write the results to
`validations/unlisted-addons.txt`.
Usage:
python fetch_validation_data.py <action>
"""
import os
import sys
from datetime import datetime, timedelta
import MySQLdb
date_format = '%Y-%m-%d'
db = MySQLdb.connect(host=os.environ['MYSQL_HOST'],
user=os.environ['MYSQL_USER'],
passwd=os.environ['MYSQL_PASSWORD'],
db="addons_mozilla_org")
cursor = db.cursor()
QUERY_FORMAT = """
SELECT validation
FROM file_uploads
WHERE created LIKE %s
AND validation IS NOT NULL
ORDER BY created DESC;
"""
def fetch_data_for_date(date):
date_string = date.strftime(date_format)
print 'Fetching for {date}'.format(date=date_string)
cursor.execute(QUERY_FORMAT, [date_string + '%'])
with open('validations/{date}.txt'.format(date=date_string), 'w') as f:
for row in cursor:
f.write(row[0])
f.write('\n')
def fetch_unlisted_addon_ids():
print 'Fetching unlisted addons'
cursor.execute('SELECT guid FROM addons WHERE is_listed=0 '
'AND guid IS NOT NULL;')
with open('validations/unlisted-addons.txt', 'w') as f:
for row in cursor:
f.write(row[0])
f.write('\n')
if __name__ == '__main__':
action = len(sys.argv) == 2 and sys.argv[1]
if action == 'validations':
today = datetime.today()
for i in range(30, 0, -1):
date = today - timedelta(days=i)
fetch_data_for_date(date)
elif action == 'unlisted':
fetch_unlisted_addon_ids()
else:
print 'Unknown action "{action}"'.format(action=action or '')

Просмотреть файл

@ -0,0 +1,42 @@
from validations import (parse_validations, severe_validations,
unlisted_validations)
TEST_ADDON_LISTED_FALSE = {'metadata': {'listed': False, 'id': 'wat'}}
TEST_ADDON_UNLISTED_ID = {'metadata': {'id': 'baz'}}
TEST_ADDONS = [
{'metadata': {'listed': True, 'id': 'yo'}},
TEST_ADDON_LISTED_FALSE,
{'metadata': {'id': 'foobar'}},
TEST_ADDON_UNLISTED_ID,
]
def test_parse_validations():
results = parse_validations([
'{"foo":"bar"}\n',
'["baz",1,{"wat":99}]\n'
])
assert list(results) == [{'foo': 'bar'}, ['baz', 1, {'wat': 99}]]
def test_unlisted_validations_without_unlisted_addons():
unlisted = unlisted_validations(TEST_ADDONS, set())
assert list(unlisted) == [TEST_ADDON_LISTED_FALSE]
def test_unlisted_validations_with_unlisted_addons():
unlisted = unlisted_validations(TEST_ADDONS, set(['baz', 'wat']))
assert list(unlisted) == [TEST_ADDON_LISTED_FALSE, TEST_ADDON_UNLISTED_ID]
def test_severe_validations():
nope = {'signing_summary':
{'high': 0, 'medium': 0, 'trivial': 0, 'low': 0}}
minor = {'signing_summary':
{'high': 0, 'medium': 0, 'trivial': 0, 'low': 1}}
trivial = {'signing_summary':
{'high': 0, 'medium': 0, 'trivial': 1, 'low': 0}}
severe = {'signing_summary':
{'high': 10, 'medium': 0, 'trivial': 0, 'low': 0}}
results = severe_validations([nope, trivial, minor, nope, severe, nope])
assert list(results) == [minor, severe]

145
scripts/validations.py Normal file
Просмотреть файл

@ -0,0 +1,145 @@
"""
Process validation data retrieved using fetch_validation_data.py. Two types
of data are expected. A file at `validations/unlisted-addons.txt` that contains
the guid of each unlisted addon and input on STDIN which has the validation
JSON data for each validation to check. See fetch_validation_data.py for how
this data is retrieved. Results are returned on STDOUT.
The following reports are supported:
* count - Return signing errors ordered by addon unique frequency in the
format: `error.id.dot.separated total_count unique_addon_count`.
* context - Return the context for 5 most common signing errors in the JSON
format: `{"context": ["", ...], "error": "error.id"}`.
Usage:
cat my-test-data-*.txt | python validations.py <report> > results.txt
"""
import itertools
import json
import sys
ACTION_CONTEXT = 'context'
ACTION_COUNT = 'count'
ACTIONS = (ACTION_CONTEXT, ACTION_COUNT)
def parse_validations(results):
return (json.loads(result) for result in results)
def unlisted_validations(results, unlisted_addons=None):
if unlisted_addons is None:
unlisted_addons = get_unlisted_addons()
return (result
for result in results
if ('id' in result['metadata'] and
(not result['metadata'].get('listed', True)
or result['metadata']['id'] in unlisted_addons)))
def severe_validations(results):
return (result
for result in results
if (result['signing_summary']['high'] > 0 or
result['signing_summary']['medium'] > 0 or
result['signing_summary']['low'] > 0))
def error_messages(results):
return ({'addon': result['metadata']['id'],
'message_id': '.'.join(message['id']),
'context': message['context']}
for result in results
for message in result['messages']
if 'signing_severity' in message)
def sort_by_message(results):
return sorted(results, key=lambda r: r['message_id'])
def group_by_message(results):
return itertools.groupby(results, lambda r: r['message_id'])
def extract_error_results(results):
for error, messages in results:
all_messages = list(messages)
yield {
'error': error,
'total': len(all_messages),
'unique': len(set(msg['addon'] for msg in all_messages)),
'contexts': [msg['context'] for msg in all_messages],
}
def sort_results_by_unique(results):
return sorted(results, reverse=True, key=lambda r: r['unique'])
def format_error_count(results):
return ('{error} {total} {unique}'.format(**result)
for result in results)
def format_contexts(results):
for result in results:
for context in result['contexts']:
yield json.dumps({
'error': result['error'],
'context': context,
})
def get_unlisted_addons():
with open('validations/unlisted-addons.txt') as f:
return set(guid.strip() for guid in f)
def main(action):
pipeline = [
parse_validations,
unlisted_validations,
severe_validations,
error_messages,
sort_by_message,
group_by_message,
extract_error_results,
sort_results_by_unique,
]
if action == ACTION_CONTEXT:
# Only get context for the top 5 errors (they're already sorted by
# unique occurrences so we can just take the first 5).
pipeline.append(lambda results: itertools.islice(results, 5))
pipeline.append(format_contexts)
elif action == ACTION_COUNT:
pipeline.append(format_error_count)
else:
raise ValueError('{0} is not a valid action'.format(action))
process_pipeline(pipeline)
def process_pipeline(pipeline):
# Read from STDIN.
val = sys.stdin
# Process through the pipeline.
for fn in pipeline:
val = fn(val)
# Print the results.
for line in val:
print line
if __name__ == '__main__':
if len(sys.argv) != 2 or sys.argv[1] not in ACTIONS:
print """Usage: python {name} <action>
action: {actions}
values are read from STDIN""".format(
name=sys.argv[0], actions='|'.join(ACTIONS))
sys.exit(1)
else:
main(sys.argv[1])