зеркало из https://github.com/mozilla/bugbug.git
Родитель
e11bac35b9
Коммит
b174cec6a4
|
@ -7,11 +7,6 @@ The dataset currently contains 1913 bugs, the accuracy of the current classifier
|
|||
## Setup
|
||||
|
||||
1. Run `pip install -r requirements.txt` and `pip install -r test-requirements.txt`
|
||||
2. Install MongoDB
|
||||
3. Run `mongo bugbug --eval "db.bugs.drop()"`
|
||||
4. Run `cat data/bugs.json.xz.part* | unxz > data/bugs.json`
|
||||
5. Run `mongoimport --db bugbug --collection bugs --file data/bugs.json`
|
||||
2. Run `cat data/bugs.json.xz.part* | unxz > data/bugs.json`
|
||||
|
||||
If you update the bugs database, run:
|
||||
1. `mongoexport -d bugbug -c bugs -o data/bugs.json`
|
||||
2. `cat data/bugs.json | xz -v1 - | split -d -b 20MB - data/bugs.json.xz.part`
|
||||
If you update the bugs database, run `cat data/bugs.json | xz -v3 - | split -d -b 20MB - data/bugs.json.xz.part`.
|
||||
|
|
32
get_bugs.py
32
get_bugs.py
|
@ -9,7 +9,9 @@ import os
|
|||
|
||||
import requests
|
||||
from libmozdata import bugzilla
|
||||
from pymongo import MongoClient
|
||||
|
||||
|
||||
BUGS_DB = 'data/bugs.json'
|
||||
|
||||
ATTACHMENT_INCLUDE_FIELDS = [
|
||||
'id', 'is_obsolete', 'flags', 'is_patch', 'creator', 'content_type',
|
||||
|
@ -33,13 +35,29 @@ def get_bug_fields():
|
|||
return r.json()['fields']
|
||||
|
||||
|
||||
def get_bugs(bug_ids):
|
||||
client = MongoClient()
|
||||
db = client['bugbug']
|
||||
collection = db['bugs']
|
||||
def read_db(path):
|
||||
with open(path, 'r') as f:
|
||||
for line in f:
|
||||
yield json.loads(line)
|
||||
|
||||
|
||||
def write_db(path, bugs):
|
||||
with open(path, 'w') as f:
|
||||
for bug in bugs:
|
||||
f.write(json.dumps(bug))
|
||||
f.write('\n')
|
||||
|
||||
|
||||
def append_db(path, bugs):
|
||||
with open(path, 'a') as f:
|
||||
for bug in bugs:
|
||||
f.write(json.dumps(bug))
|
||||
f.write('\n')
|
||||
|
||||
|
||||
def get_bugs(bug_ids):
|
||||
bugs = {}
|
||||
for bug in collection.find():
|
||||
for bug in read_db(BUGS_DB):
|
||||
bugs[bug['id']] = bug
|
||||
|
||||
bug_ids = [bug_id for bug_id in bug_ids if bug_id not in bugs]
|
||||
|
@ -88,7 +106,7 @@ def get_bugs(bug_ids):
|
|||
print('Total number of bugs: {}'.format(len(bugs) + len(new_bugs)))
|
||||
|
||||
if len(new_bugs):
|
||||
collection.insert_many(list(new_bugs.values()))
|
||||
append_db(BUGS_DB, new_bugs.values())
|
||||
|
||||
bugs.update(new_bugs)
|
||||
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
libmozdata==0.1.40
|
||||
pymongo==3.7.1
|
||||
scikit-learn==0.19.2
|
||||
xgboost==0.80
|
||||
requests==2.19.1
|
||||
|
|
Загрузка…
Ссылка в новой задаче