Set a fixed seed for the random under-sampler, so we get consistent results

Former-commit-id: da8d6fc7b5
2018-09-24 23:08:49 +01:00 · 2018-09-24 23:08:49 +01:00 · 2df82f0a1d
--- a/run.py
+++ b/run.py
@ -109,7 +109,7 @@ extraction_pipeline = Pipeline([
 X = extraction_pipeline.fit_transform(bugs)

 # Under-sample the 'bug' class, as there are too many compared to 'feature'.
-X, y = RandomUnderSampler().fit_sample(X, y)
+X, y = RandomUnderSampler(random_state=0).fit_sample(X, y)

 # Split dataset in training and test.
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)