presidio-research/tests/test_flair_model.py

import pytest

from presidio_evaluator import InputSample
from presidio_evaluator.evaluation import Evaluator

try:
    from flair.models import SequenceTagger
except:
    ImportError("Flair is not installed by default")


from presidio_evaluator.models.flair_model import FlairModel

import numpy as np


# no-unit because flair is not a dependency by default
@pytest.mark.skip(reason="Flair not installed by default")
def test_flair_simple():
    import os

    dir_path = os.path.dirname(os.path.realpath(__file__))
    input_samples = InputSample.read_dataset_json(
        os.path.join(dir_path, "data/generated_small.json")
    )

    model = SequenceTagger.load("ner-ontonotes-fast")  # .load('ner')

    flair_model = FlairModel(model=model, entities_to_keep=["PERSON"])
    evaluator = Evaluator(model=flair_model)
    evaluation_results = evaluator.evaluate_all(input_samples)
    scores = evaluator.calculate_score(evaluation_results)

    np.testing.assert_almost_equal(
        scores.pii_precision, scores.entity_precision_dict["PERSON"]
    )
    np.testing.assert_almost_equal(
        scores.pii_recall, scores.entity_recall_dict["PERSON"]
    )
    assert scores.pii_recall > 0
    assert scores.pii_precision > 0
reintroduced analyzer 2020-05-27 15:21:33 +03:00			`import pytest`

new faker based generator and package updates 2021-12-26 18:37:45 +03:00			`from presidio_evaluator import InputSample`
updates to presidio 2 and spacy 3 2021-04-26 12:40:05 +03:00			`from presidio_evaluator.evaluation import Evaluator`

initial commit 2020-01-06 23:59:12 +03:00			`try:`
			`from flair.models import SequenceTagger`
reintroduced analyzer 2020-05-27 15:21:33 +03:00			`except:`
			`ImportError("Flair is not installed by default")`
initial commit 2020-01-06 23:59:12 +03:00
new faker based generator and package updates 2021-12-26 18:37:45 +03:00
updates to presidio 2 and spacy 3 2021-04-26 12:40:05 +03:00			`from presidio_evaluator.models.flair_model import FlairModel`
initial commit 2020-01-06 23:59:12 +03:00
			`import numpy as np`

updates to presidio 2 and spacy 3 2021-04-26 12:40:05 +03:00
initial commit 2020-01-06 23:59:12 +03:00			`# no-unit because flair is not a dependency by default`
reintroduced analyzer 2020-05-27 15:21:33 +03:00			`@pytest.mark.skip(reason="Flair not installed by default")`
			`def test_flair_simple():`
initial commit 2020-01-06 23:59:12 +03:00			`import os`
reintroduced analyzer 2020-05-27 15:21:33 +03:00
initial commit 2020-01-06 23:59:12 +03:00			`dir_path = os.path.dirname(os.path.realpath(__file__))`
new faker based generator and package updates 2021-12-26 18:37:45 +03:00			`input_samples = InputSample.read_dataset_json(`
			`os.path.join(dir_path, "data/generated_small.json")`
reintroduced analyzer 2020-05-27 15:21:33 +03:00			`)`
initial commit 2020-01-06 23:59:12 +03:00
reintroduced analyzer 2020-05-27 15:21:33 +03:00			`model = SequenceTagger.load("ner-ontonotes-fast") # .load('ner')`
initial commit 2020-01-06 23:59:12 +03:00
updates to presidio 2 and spacy 3 2021-04-26 12:40:05 +03:00			`flair_model = FlairModel(model=model, entities_to_keep=["PERSON"])`
			`evaluator = Evaluator(model=flair_model)`
			`evaluation_results = evaluator.evaluate_all(input_samples)`
			`scores = evaluator.calculate_score(evaluation_results)`
initial commit 2020-01-06 23:59:12 +03:00
reintroduced analyzer 2020-05-27 15:21:33 +03:00			`np.testing.assert_almost_equal(`
			`scores.pii_precision, scores.entity_precision_dict["PERSON"]`
			`)`
			`np.testing.assert_almost_equal(`
			`scores.pii_recall, scores.entity_recall_dict["PERSON"]`
			`)`
initial commit 2020-01-06 23:59:12 +03:00			`assert scores.pii_recall > 0`
			`assert scores.pii_precision > 0`