зеркало из https://github.com/mozilla/bugbug.git
Add script to retrieve data from a Mercurial repository
This commit is contained in:
Родитель
466aa8446b
Коммит
121b42a9a3
|
@ -8,5 +8,7 @@ The dataset currently contains 2110 bugs, the accuracy of the current classifier
|
|||
|
||||
1. Run `pip install -r requirements.txt` and `pip install -r test-requirements.txt`
|
||||
2. Run `cat data/bugs.json.xz.part* | unxz > data/bugs.json`
|
||||
3. Run `cat data/commits.json.xz.part* | unxz > data/commits.json`
|
||||
|
||||
If you update the bugs database, run `cat data/bugs.json | xz -v9 - | split -d -b 20MB - data/bugs.json.xz.part`.
|
||||
If you update the commits database, run `cat data/commits.json | xz -v9 - | split -d -b 20MB - data/commits.json.xz.part`.
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import argparse
|
||||
|
||||
import hglib
|
||||
|
||||
import db
|
||||
|
||||
COMMITS_DB = 'data/commits.json'
|
||||
|
||||
|
||||
def get_commits():
|
||||
return db.read(COMMITS_DB)
|
||||
|
||||
|
||||
def download_commits(repo_dir):
|
||||
hg = hglib.open(repo_dir)
|
||||
|
||||
commits = hg.log()
|
||||
|
||||
def transform(commit):
|
||||
return {
|
||||
'rev': commit[0].decode('utf-8'),
|
||||
'node': commit[1].decode('utf-8'),
|
||||
'tags': commit[2].decode('utf-8'),
|
||||
'branch': commit[3].decode('utf-8'),
|
||||
'author': commit[4].decode('utf-8'),
|
||||
'desc': commit[5].decode('utf-8'),
|
||||
'date': str(commit[6]),
|
||||
}
|
||||
|
||||
commits = [transform(commit) for commit in commits]
|
||||
|
||||
db.write(COMMITS_DB, commits)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('repository_dir', help='Path to the repository', action='store')
|
||||
args = parser.parse_args()
|
||||
|
||||
download_commits(args.repository_dir)
|
|
@ -5,3 +5,4 @@ requests==2.19.1
|
|||
numpy==1.15.2
|
||||
imbalanced-learn=0.3.3
|
||||
spacy==2.0.12
|
||||
python-hglib==2.6.1
|
||||
|
|
Загрузка…
Ссылка в новой задаче