Former-commit-id: d25ee7891f
This commit is contained in:
Marco Castelluccio 2018-10-11 15:43:44 +02:00
Родитель e11bac35b9
Коммит b174cec6a4
3 изменённых файлов: 27 добавлений и 15 удалений

Просмотреть файл

@ -7,11 +7,6 @@ The dataset currently contains 1913 bugs, the accuracy of the current classifier
## Setup
1. Run `pip install -r requirements.txt` and `pip install -r test-requirements.txt`
2. Install MongoDB
3. Run `mongo bugbug --eval "db.bugs.drop()"`
4. Run `cat data/bugs.json.xz.part* | unxz > data/bugs.json`
5. Run `mongoimport --db bugbug --collection bugs --file data/bugs.json`
2. Run `cat data/bugs.json.xz.part* | unxz > data/bugs.json`
If you update the bugs database, run:
1. `mongoexport -d bugbug -c bugs -o data/bugs.json`
2. `cat data/bugs.json | xz -v1 - | split -d -b 20MB - data/bugs.json.xz.part`
If you update the bugs database, run `cat data/bugs.json | xz -v3 - | split -d -b 20MB - data/bugs.json.xz.part`.

Просмотреть файл

@ -9,7 +9,9 @@ import os
import requests
from libmozdata import bugzilla
from pymongo import MongoClient
BUGS_DB = 'data/bugs.json'
ATTACHMENT_INCLUDE_FIELDS = [
'id', 'is_obsolete', 'flags', 'is_patch', 'creator', 'content_type',
@ -33,13 +35,29 @@ def get_bug_fields():
return r.json()['fields']
def get_bugs(bug_ids):
client = MongoClient()
db = client['bugbug']
collection = db['bugs']
def read_db(path):
with open(path, 'r') as f:
for line in f:
yield json.loads(line)
def write_db(path, bugs):
with open(path, 'w') as f:
for bug in bugs:
f.write(json.dumps(bug))
f.write('\n')
def append_db(path, bugs):
with open(path, 'a') as f:
for bug in bugs:
f.write(json.dumps(bug))
f.write('\n')
def get_bugs(bug_ids):
bugs = {}
for bug in collection.find():
for bug in read_db(BUGS_DB):
bugs[bug['id']] = bug
bug_ids = [bug_id for bug_id in bug_ids if bug_id not in bugs]
@ -88,7 +106,7 @@ def get_bugs(bug_ids):
print('Total number of bugs: {}'.format(len(bugs) + len(new_bugs)))
if len(new_bugs):
collection.insert_many(list(new_bugs.values()))
append_db(BUGS_DB, new_bugs.values())
bugs.update(new_bugs)

Просмотреть файл

@ -1,5 +1,4 @@
libmozdata==0.1.40
pymongo==3.7.1
scikit-learn==0.19.2
xgboost==0.80
requests==2.19.1