2020-02-20 22:55:50 +03:00
|
|
|
import pytest
|
2020-02-28 02:22:14 +03:00
|
|
|
from recon.recognizer import *
|
2020-02-20 22:55:50 +03:00
|
|
|
|
|
|
|
|
|
|
|
def test_base_recognizer(test_texts):
|
|
|
|
recognizer = EntityRecognizer()
|
|
|
|
|
|
|
|
with pytest.raises(NotImplementedError):
|
|
|
|
recognizer.labels
|
|
|
|
with pytest.raises(NotImplementedError):
|
|
|
|
recognizer.predict(test_texts)
|
2020-02-28 02:22:14 +03:00
|
|
|
|
2020-02-20 22:55:50 +03:00
|
|
|
|
|
|
|
def test_spacy_recognizer(nlp, test_texts):
|
2020-02-28 02:22:14 +03:00
|
|
|
ruler = nlp.create_pipe("entity_ruler")
|
|
|
|
ruler.add_patterns(
|
|
|
|
[
|
|
|
|
{"label": "SKILL", "pattern": "Machine learning"},
|
|
|
|
{"label": "SKILL", "pattern": "researched"},
|
|
|
|
{"label": "SKILL", "pattern": "AI"},
|
|
|
|
{"label": "JOB_ROLE", "pattern": "Software Engineer"},
|
|
|
|
]
|
|
|
|
)
|
2020-02-20 22:55:50 +03:00
|
|
|
|
|
|
|
nlp.add_pipe(ruler)
|
|
|
|
|
|
|
|
recognizer = SpacyEntityRecognizer(nlp)
|
|
|
|
assert recognizer.labels == ["JOB_ROLE", "SKILL"]
|
|
|
|
|
2020-02-28 02:31:21 +03:00
|
|
|
examples = list(recognizer.predict(test_texts))
|
2020-02-20 22:55:50 +03:00
|
|
|
|
|
|
|
assert examples[0].text == test_texts[0]
|
|
|
|
assert len(examples[0].spans) == 3
|
|
|
|
assert len(examples[1].spans) == 2
|