Add function to cleanup synonyms, and clean 'safe mode' synonyms

This commit is contained in:
Marco Castelluccio 2019-01-02 16:02:02 +01:00
Родитель 7f1d3b77c8
Коммит 859cd9fd8b
2 изменённых файлов: 18 добавлений и 1 удалений

Просмотреть файл

@ -190,11 +190,20 @@ def cleanup_fileref(text):
return re.sub(r'\w+\.py\b|\w+\.json\b|\w+\.js\b|\w+\.jsm\b|\w+\.html\b|\w+\.css\b|\w+\.c\b|\w+\.cpp\b|\w+\.h\b', 'FILE_REFERENCE', text)
def cleanup_synonyms(text):
synonyms = [('safemode', ['safe mode', 'safemode'])]
for synonym_group, synonym_list in synonyms:
text = re.sub('|'.join(synonym_list), synonym_group, text)
return text
class BugExtractor(BaseEstimator, TransformerMixin):
def __init__(self, feature_extractors, commit_messages_map=None):
self.feature_extractors = feature_extractors
self.commit_messages_map = commit_messages_map
self.cleanup_functions = [cleanup_url, cleanup_fileref]
self.cleanup_functions = [cleanup_url, cleanup_fileref, cleanup_synonyms]
def fit(self, x, y=None):
return self

Просмотреть файл

@ -23,3 +23,11 @@ def test_cleanup_fileref():
]
for orig_text, cleaned_text in tests:
assert bug_features.cleanup_fileref(orig_text) == cleaned_text
def test_cleanup_synonyms():
tests = [
('I was in safemode, but the problem occurred in safe mode too', 'I was in safemode, but the problem occurred in safemode too'),
]
for orig_text, cleaned_text in tests:
assert bug_features.cleanup_synonyms(orig_text) == cleaned_text