зеркало из https://github.com/mozilla/bugbug.git
Perform under-sampling of the majority class
Former-commit-id: 8d3c7c3ba4
This commit is contained in:
Родитель
f9c03d0f8f
Коммит
540b7ebaa7
|
@ -4,3 +4,4 @@ scikit-learn==0.19.2
|
|||
xgboost==0.80
|
||||
requests==2.19.1
|
||||
numpy==1.15.2
|
||||
imbalanced-learn=0.3.3
|
||||
|
|
4
run.py
4
run.py
|
@ -7,6 +7,7 @@ from typing import Dict
|
|||
|
||||
import numpy as np
|
||||
import xgboost
|
||||
from imblearn.under_sampling import RandomUnderSampler
|
||||
from sklearn import metrics
|
||||
from sklearn.feature_extraction import DictVectorizer
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
|
@ -109,6 +110,9 @@ extraction_pipeline = Pipeline([
|
|||
|
||||
X = extraction_pipeline.fit_transform(bugs)
|
||||
|
||||
# Under-sample the 'bug' class, as there are too many compared to 'feature'.
|
||||
X, y = RandomUnderSampler().fit_sample(X, y)
|
||||
|
||||
# Split dataset in training and test.
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
|
||||
print(X_train.shape, y_train.shape)
|
||||
|
|
Загрузка…
Ссылка в новой задаче