Split downloading of bugs and retrieval of bugs for training

Former-commit-id: 67c300263f
This commit is contained in:
Marco Castelluccio 2018-10-11 19:40:53 +02:00
Родитель d733796ea7
Коммит 7462bbea6b
3 изменённых файлов: 18 добавлений и 10 удалений

Просмотреть файл

@ -55,14 +55,21 @@ def append_db(path, bugs):
f.write('\n')
def get_bugs(bug_ids):
def get_bugs():
bugs = {}
for bug in read_db(BUGS_DB):
bugs[bug['id']] = bug
return bugs
bug_ids = [bug_id for bug_id in bug_ids if bug_id not in bugs]
print('Loaded ' + str(len(bugs)) + ' bugs.')
def download_bugs(bug_ids):
old_bug_ids = set()
for bug in read_db(BUGS_DB):
old_bug_ids.add(bug['id'])
bug_ids = [bug_id for bug_id in bug_ids if bug_id not in old_bug_ids]
print('Loaded ' + str(len(old_bug_ids)) + ' bugs.')
print('To download ' + str(len(bug_ids)) + ' bugs.')
@ -103,15 +110,11 @@ def get_bugs(bug_ids):
bugzilla.Bugzilla(bug_ids, bughandler=bughandler, commenthandler=commenthandler, comment_include_fields=COMMENT_INCLUDE_FIELDS, attachmenthandler=attachmenthandler, attachment_include_fields=ATTACHMENT_INCLUDE_FIELDS, historyhandler=historyhandler).get_data().wait()
print('Total number of bugs: {}'.format(len(bugs) + len(new_bugs)))
print('Total number of bugs: {}'.format(len(old_bug_ids) + len(new_bugs)))
if len(new_bugs):
append_db(BUGS_DB, new_bugs.values())
bugs.update(new_bugs)
return bugs
def get_labels():
with open('classes.csv', 'r') as f:
@ -132,3 +135,8 @@ def get_labels():
assert is_bug == 'True' or is_bug == 'False'
return dict([(int(bug_id), True if is_bug == 'True' else False) for bug_id, is_bug in classes.items()])
if __name__ == '__main__':
classes = get_labels()
download_bugs([bug_id for bug_id in classes.keys()])

Просмотреть файл

@ -9,7 +9,7 @@ from get_bugs import get_labels
classes = get_labels()
bugs = get_bugs([bug_id for bug_id in classes.keys()])
bugs = get_bugs()
true_positives = 0
true_negatives = 0

2
run.py
Просмотреть файл

@ -43,7 +43,7 @@ def go(lemmatization=False):
classes = get_labels()
# Retrieve bugs from the local db.
bugs_map = get_bugs([bug_id for bug_id in classes.keys()])
bugs_map = get_bugs()
# Use bugs marked as 'regression' or 'feature', as they are basically labelled.
for bug_id, bug in bugs_map.items():