Merge pull request #14 from microsoft/omri/reintroduce_analyzer

reintroduced analyzer
2020-05-27 15:35:14 +03:00 · 2020-05-27 15:35:14 +03:00 · 0d5c4ba3fe
--- a/presidio_evaluator/presidio_analyzer.py
+++ b/presidio_evaluator/presidio_analyzer.py
@ -1,33 +1,31 @@
-'''
-Presidio Analyzer not yet on PyPI, cannot explicitly reference it
-'''
+from typing import List
+
+from presidio_analyzer import AnalyzerEngine

-from typing import List, Dict
-#
 from presidio_evaluator import ModelEvaluator, InputSample, span_to_tag
-#
 from presidio_evaluator.data_generator import read_synth_dataset


-#
-#
 class PresidioAnalyzer(ModelEvaluator):
-
-    def __init__(self, analyzer,
-                 entities_to_keep: List[str] = None,
-                 verbose: bool = False,
-                 labeling_scheme="BIO",
-                 compare_by_io=True,
-                 score_threshold=0.4
-                 ):
+    def __init__(
+        self,
+        analyzer=AnalyzerEngine(),
+        entities_to_keep: List[str] = None,
+        verbose: bool = False,
+        labeling_scheme="BIO",
+        compare_by_io=True,
+        score_threshold=0.4,
+    ):
        """
        Evaluation wrapper for the Presidio Analyzer
        :param analyzer: object of type AnalyzerEngine (from presidio-analyzer)
        """
-        super().__init__(entities_to_keep=entities_to_keep,
-                         verbose=verbose,
-                         labeling_scheme=labeling_scheme,
-                         compare_by_io=compare_by_io)
+        super().__init__(
+            entities_to_keep=entities_to_keep,
+            verbose=verbose,
+            labeling_scheme=labeling_scheme,
+            compare_by_io=compare_by_io,
+        )
        self.analyzer = analyzer

        self.score_threshold = score_threshold
@ -37,8 +35,12 @@ class PresidioAnalyzer(ModelEvaluator):
            all_fields = True
        else:
            all_fields = None
-        results = self.analyzer.analyze(sample.full_text, self.entities,
-                                        language='en', all_fields=all_fields)
+        results = self.analyzer.analyze(
+            text=sample.full_text,
+            entities=self.entities,
+            language="en",
+            all_fields=all_fields,
+        )
        starts = []
        ends = []
        scores = []
@ -52,13 +54,15 @@ class PresidioAnalyzer(ModelEvaluator):
                tags.append(res.entity_type)
                scores.append(res.score)
        #
-        response_tags = span_to_tag(scheme=self.labeling_scheme,
-                                    text=sample.full_text,
-                                    start=starts,
-                                    end=ends,
-                                    tokens=sample.tokens,
-                                    scores=scores,
-                                    tag=tags)
+        response_tags = span_to_tag(
+            scheme=self.labeling_scheme,
+            text=sample.full_text,
+            start=starts,
+            end=ends,
+            tokens=sample.tokens,
+            scores=scores,
+            tag=tags,
+        )
        return response_tags


@ -70,41 +74,53 @@ if __name__ == "__main__":

    # Mapping between dataset entities and Presidio entities. Key: Dataset entity, Value: Presidio entity
    entities_mapping = {
-        'PERSON': 'PERSON',
-        'EMAIL': 'EMAIL_ADDRESS',
-        'CREDIT_CARD': 'CREDIT_CARD',
-        'FIRST_NAME': 'PERSON',
-        'PHONE_NUMBER': 'PHONE_NUMBER',
-        'BIRTHDAY': 'DATE_TIME',
-        'DATE': 'DATE_TIME',
-        'DOMAIN': 'DOMAIN',
-        'CITY': 'LOCATION',
-        'ADDRESS': 'LOCATION',
-        'IBAN': 'IBAN_CODE',
-        'URL': 'DOMAIN_NAME',
-        'US_SSN': 'US_SSN',
-        'IP_ADDRESS': 'IP_ADDRESS',
-        'ORGANIZATION': 'ORG',
-        'O': 'O'
+        "PERSON": "PERSON",
+        "EMAIL": "EMAIL_ADDRESS",
+        "CREDIT_CARD": "CREDIT_CARD",
+        "FIRST_NAME": "PERSON",
+        "PHONE_NUMBER": "PHONE_NUMBER",
+        "BIRTHDAY": "DATE_TIME",
+        "DATE": "DATE_TIME",
+        "DOMAIN": "DOMAIN",
+        "CITY": "LOCATION",
+        "ADDRESS": "LOCATION",
+        "IBAN": "IBAN_CODE",
+        "URL": "DOMAIN_NAME",
+        "US_SSN": "US_SSN",
+        "IP_ADDRESS": "IP_ADDRESS",
+        "ORGANIZATION": "ORG",
+        "O": "O",
    }

-    updated_samples = ModelEvaluator.align_input_samples_to_presidio_analyzer(input_samples,
-                                                                              entities_mapping)
+    updated_samples = ModelEvaluator.align_input_samples_to_presidio_analyzer(
+        input_samples, entities_mapping
+    )

    flatten = lambda l: [item for sublist in l for item in sublist]
    from collections import Counter

    count_per_entity = Counter(
-        [span.entity_type for span in flatten([input_sample.spans for input_sample in updated_samples])])
+        [
+            span.entity_type
+            for span in flatten(
+                [input_sample.spans for input_sample in updated_samples]
+            )
+        ]
+    )

    print("Evaluating samples")
    analyzer = PresidioAnalyzer(entities_to_keep=count_per_entity.keys())
    evaluated_samples = analyzer.evaluate_all(updated_samples)
    #
    print("Estimating metrics")
-    precision, recall, \
-    entity_recall, entity_precision, \
-    f, errors = analyzer.calculate_score(evaluation_results=evaluated_samples, beta=2.5)
+    (
+        precision,
+        recall,
+        entity_recall,
+        entity_precision,
+        f,
+        errors,
+    ) = analyzer.calculate_score(evaluation_results=evaluated_samples, beta=2.5)
    #
    print("precision: {}".format(precision))
    print("Recall: {}".format(recall))
@ -112,22 +128,24 @@ if __name__ == "__main__":
    print("Precision per entity: {}".format(entity_precision))
    print("Recall per entity: {}".format(entity_recall))
    #
-    FN_mistakes = [mistake for mistake in flatten(errors) if mistake[0:2] == 'FN']
-    FP_mistakes = [mistake for mistake in flatten(errors) if mistake[0:2] == 'FP']
-    other_mistakes = [mistake for mistake in flatten(errors) if "Wrong entity" in mistake]
+    FN_mistakes = [mistake for mistake in flatten(errors) if mistake[0:2] == "FN"]
+    FP_mistakes = [mistake for mistake in flatten(errors) if mistake[0:2] == "FP"]
+    other_mistakes = [
+        mistake for mistake in flatten(errors) if "Wrong entity" in mistake
+    ]

-    fn = open('../data/fn_30000.txt', 'w+', encoding='utf-8')
-    fn1 = '\n'.join(FN_mistakes)
+    fn = open("../data/fn_30000.txt", "w+", encoding="utf-8")
+    fn1 = "\n".join(FN_mistakes)
    fn.write(fn1)
    fn.close()

-    fp = open('../data/fp_30000.txt', 'w+', encoding='utf-8')
-    fp1 = '\n'.join(FP_mistakes)
+    fp = open("../data/fp_30000.txt", "w+", encoding="utf-8")
+    fp1 = "\n".join(FP_mistakes)
    fp.write(fp1)
    fp.close()

-    mistakes_file = open('../data/mistakes_30000.txt', 'w+', encoding='utf-8')
-    mistakes1 = '\n'.join(other_mistakes)
+    mistakes_file = open("../data/mistakes_30000.txt", "w+", encoding="utf-8")
+    mistakes1 = "\n".join(other_mistakes)
    mistakes_file.write(mistakes1)
    mistakes_file.close()

--- a/presidio_evaluator/presidio_recognizer_evaluator.py
+++ b/presidio_evaluator/presidio_recognizer_evaluator.py
@ -1,40 +1,50 @@
-'''
+"""
 Presidio Analyzer not yet on PyPI, therefore it cannot be referenced explicitly
-'''
+"""

 import math
 from typing import List, Tuple, Dict

-from presidio_evaluator import ModelEvaluator, InputSample
+from presidio_analyzer.nlp_engine import SpacyNlpEngine
+
+from presidio_evaluator import ModelEvaluator, InputSample, EvaluationResult
 from presidio_evaluator.span_to_tag import span_to_tag


 class PresidioRecognizerEvaluator(ModelEvaluator):
-    def __init__(self, recognizer, nlp_engine, entities_to_keep=None,
-                 with_nlp_artifacts=False, verbose=False, compare_by_io=True,
-                 ):
+    def __init__(
+        self,
+        recognizer,
+        nlp_engine,
+        entities_to_keep=None,
+        with_nlp_artifacts=False,
+        verbose=False,
+        compare_by_io=True,
+    ):
        """
        Evaluator for one recognizer
        :param recognizer: An object of type EntityRecognizer (in presidion-analyzer)
        :param nlp_engine: An object of type NlpEngine, e.g. SpacyNlpEngine (in presidio-analyzer)
        """
-        super().__init__(entities_to_keep=entities_to_keep,
-                         verbose=verbose, compare_by_io=compare_by_io)
+        super().__init__(
+            entities_to_keep=entities_to_keep,
+            verbose=verbose,
+            compare_by_io=compare_by_io,
+        )
        self.withNlpArtifacts = with_nlp_artifacts
        self.recognizer = recognizer
        self.nlp_engine = nlp_engine

    #
    def __make_nlp_artifacts(self, text: str):
-        return self.nlp_engine.process_text(text, 'en')
+        return self.nlp_engine.process_text(text, "en")

    #
    def predict(self, sample: InputSample) -> List[str]:
        nlpArtifacts = None
        if self.withNlpArtifacts:
            nlpArtifacts = self.__make_nlp_artifacts(sample.full_text)
-        results = self.recognizer.analyze(sample.full_text, self.entities,
-                                          nlpArtifacts)
+        results = self.recognizer.analyze(sample.full_text, self.entities, nlpArtifacts)
        starts = []
        ends = []
        tags = []
@ -46,37 +56,33 @@ class PresidioRecognizerEvaluator(ModelEvaluator):
            ends.append(res.end)
            tags.append(res.entity_type)
            scores.append(res.score)
-        response_tags = span_to_tag(scheme=self.labeling_scheme,
-                                    text=sample.full_text,
-                                    start=starts,
-                                    end=ends,
-                                    tag=tags,
-                                    tokens=sample.tokens,
-                                    scores=scores,
-                                    io_tags_only=self.compare_by_io)
+        response_tags = span_to_tag(
+            scheme=self.labeling_scheme,
+            text=sample.full_text,
+            start=starts,
+            end=ends,
+            tag=tags,
+            tokens=sample.tokens,
+            scores=scores,
+            io_tags_only=self.compare_by_io,
+        )
        if len(sample.tags) == 0:
-            sample.tags = ['0' for word in response_tags]
+            sample.tags = ["0" for word in response_tags]
        return response_tags


-def score_presidio_recognizer(recognizer, entities_to_keep, input_samples,
-                              withNlpArtifacts=False) \
-        -> Tuple[Dict[str, float], Dict[str, float], Dict[str, float], Dict[
-            str, float], Dict[str, float], List[str]]:
-    model = PresidioRecognizerEvaluator(recognizer=recognizer,
-                                        entities_to_keep=entities_to_keep,
-                                        with_nlp_artifacts=withNlpArtifacts)
+def score_presidio_recognizer(
+    recognizer, entities_to_keep, input_samples, withNlpArtifacts=False
+) -> EvaluationResult:
+    model = PresidioRecognizerEvaluator(
+        recognizer=recognizer,
+        entities_to_keep=entities_to_keep,
+        nlp_engine=SpacyNlpEngine(),
+        with_nlp_artifacts=withNlpArtifacts,
+    )
    evaluated_samples = model.evaluate_all(input_samples[:])
-    precision, recall, ent_recall, \
-    ent_precision, fscore, mistakes = model.calculate_score(
-        evaluated_samples, beta=2.5)
-    print("p={precision}, r={recall},f={f},"
-          "entity recall={ent},entity precision={prec}".format(
-        precision=precision,
-        recall=recall,
-        f=fscore,
-        ent=ent_recall,
-        prec=ent_precision))
-    if math.isnan(precision):
-        precision = 0
-    return precision, recall, ent_recall, ent_precision, fscore, mistakes
+    evaluation_result = model.calculate_score(evaluated_samples, beta=2.5)
+    evaluation_result.print()
+    if math.isnan(evaluation_result.pii_precision):
+        evaluation_result.pii_precision = 0
+    return evaluation_result
--- a/requirements.txt
+++ b/requirements.txt
@ -15,3 +15,4 @@ regex
 #flair
 sklearn_crfsuite
 pytest
+presidio_analyzer
--- a/tests/test_flair_evaluator.py
+++ b/tests/test_flair_evaluator.py
@ -1,7 +1,9 @@
+import pytest
+
 try:
    from flair.models import SequenceTagger
-except ImportError:
-    print("Flair is not installed by default")
+except:
+    ImportError("Flair is not installed by default")

 from presidio_evaluator.data_generator import read_synth_dataset
 from presidio_evaluator.flair_evaluator import FlairEvaluator
@ -9,18 +11,26 @@ from presidio_evaluator.flair_evaluator import FlairEvaluator
 import numpy as np

 # no-unit because flair is not a dependency by default
-def no_unit_test_flair_simple():
+@pytest.mark.skip(reason="Flair not installed by default")
+def test_flair_simple():
    import os
+
    dir_path = os.path.dirname(os.path.realpath(__file__))
-    input_samples = read_synth_dataset(os.path.join(dir_path, "data/generated_small.txt"))
+    input_samples = read_synth_dataset(
+        os.path.join(dir_path, "data/generated_small.txt")
+    )

-    model = SequenceTagger.load('ner-ontonotes-fast')  # .load('ner')
+    model = SequenceTagger.load("ner-ontonotes-fast")  # .load('ner')

-    flair_evaluator = FlairEvaluator(model=model, entities_to_keep=['PERSON'])
+    flair_evaluator = FlairEvaluator(model=model, entities_to_keep=["PERSON"])
    evaluation_results = flair_evaluator.evaluate_all(input_samples)
    scores = flair_evaluator.calculate_score(evaluation_results)

-    np.testing.assert_almost_equal(scores.pii_precision, scores.entity_precision_dict['PERSON'])
-    np.testing.assert_almost_equal(scores.pii_recall, scores.entity_recall_dict['PERSON'])
+    np.testing.assert_almost_equal(
+        scores.pii_precision, scores.entity_precision_dict["PERSON"]
+    )
+    np.testing.assert_almost_equal(
+        scores.pii_recall, scores.entity_recall_dict["PERSON"]
+    )
    assert scores.pii_recall > 0
    assert scores.pii_precision > 0
--- a/tests/test_presidio_analyzer.py
+++ b/tests/test_presidio_analyzer.py
@ -1,80 +1,101 @@
-'''
-Presidio Analyzer not yet on PyPI, ignoring temporarily
-'''
-#
-# import pytest
-#
-# from presidio_evaluator import InputSample, Span
-# from presidio_evaluator.data_generator import read_synth_dataset
-# from presidio_evaluator.presidio_analyzer import PresidioAnalyzer
-#
-#
-# class GeneratedTextTestCase:
-#     def __init__(self, test_name, test_input, acceptance_threshold, marks):
-#         self.test_name = test_name
-#         self.test_input = test_input
-#         self.acceptance_threshold = acceptance_threshold
-#         self.marks = marks
-#
-#     def to_pytest_param(self):
-#         return pytest.param(self.test_input, self.acceptance_threshold,
-#                             id=self.test_name, marks=self.marks)
-#
-#
-# # generated-text test cases
-# analyzer_test_generate_text_testdata = [
-#     # small set fixture which expects all results.
-#     GeneratedTextTestCase(
-#         test_name="small-set",
-#         test_input="{}/data/generated_small.txt",
-#         acceptance_threshold=0.3,
-#         marks=pytest.mark.none
-#     )
-# ]
-#
-#
-# @pytest.mark.skip(reason="Presidio analyzer not on PyPi")
-# def test_analyzer_simple_input():
-#     model = PresidioAnalyzer(entities_to_keep=['PERSON'])
-#
-#     sample = InputSample(full_text="My name is Mike",
-#                          masked="My name is [PERSON]",
-#                          spans=[Span('PERSON', 'Mike', 10, 14)],
-#                          create_tags_from_span=True)
-#
-#     evaluated = model.evaluate_sample(sample)
-#     metrics = model.calculate_score(
-#         [evaluated])
-#
-#     assert metrics.pii_precision == 1
-#     assert metrics.pii_recall == 1
-#
-#
-# # analyzer tests on generated data
-# @pytest.mark.skip(reason="Presidio analyzer not on PyPi")
-# @pytest.mark.parametrize("test_input,acceptance_threshold",
-#                          [testcase.to_pytest_param() for testcase in
-#                           analyzer_test_generate_text_testdata])
-# def test_analyzer_with_generated_text(test_input, acceptance_threshold):
-#     """
-#         Test analyzer with a generated dataset text file
-#         :param test_input: input text file location
-#         :param acceptance_threshold: minimim precision/recall
-#          allowed for tests to pass
-#     """
-#     # read test input from generated file
-#
-#     import os
-#     dir_path = os.path.dirname(os.path.realpath(__file__))
-#     input_samples = read_synth_dataset(
-#         test_input.format(dir_path))
-#
-#     updated_samples = PresidioAnalyzer. \
-#         align_input_samples_to_presidio_analyzer(input_samples)
-#
-#     analyzer = PresidioAnalyzer()
-#     evaluated_samples = analyzer.evaluate_all(updated_samples)
-#     scores = analyzer.calculate_score(evaluation_results=evaluated_samples)
-#
-#     assert acceptance_threshold <= scores.pii_precision
-#     assert acceptance_threshold <= scores.pii_recall
+import pytest
+
+from presidio_evaluator import InputSample, Span
+from presidio_evaluator.data_generator import read_synth_dataset
+from presidio_evaluator.presidio_analyzer import PresidioAnalyzer
+
+# Mapping between dataset entities and Presidio entities. Key: Dataset entity, Value: Presidio entity
+entities_mapping = {
+    "PERSON": "PERSON",
+    "EMAIL": "EMAIL_ADDRESS",
+    "CREDIT_CARD": "CREDIT_CARD",
+    "FIRST_NAME": "PERSON",
+    "PHONE_NUMBER": "PHONE_NUMBER",
+    "BIRTHDAY": "DATE_TIME",
+    "DATE": "DATE_TIME",
+    "DOMAIN": "DOMAIN",
+    "CITY": "LOCATION",
+    "ADDRESS": "LOCATION",
+    "IBAN": "IBAN_CODE",
+    "URL": "DOMAIN_NAME",
+    "US_SSN": "US_SSN",
+    "IP_ADDRESS": "IP_ADDRESS",
+    "ORGANIZATION": "ORG",
+    "O": "O",
+}
+
+
+class GeneratedTextTestCase:
+    def __init__(self, test_name, test_input, acceptance_threshold, marks):
+        self.test_name = test_name
+        self.test_input = test_input
+        self.acceptance_threshold = acceptance_threshold
+        self.marks = marks
+
+    def to_pytest_param(self):
+        return pytest.param(
+            self.test_input,
+            self.acceptance_threshold,
+            id=self.test_name,
+            marks=self.marks,
+        )
+
+
+# generated-text test cases
+analyzer_test_generate_text_testdata = [
+    # small set fixture which expects all results.
+    GeneratedTextTestCase(
+        test_name="small-set",
+        test_input="{}/data/generated_small.txt",
+        acceptance_threshold=0.3,
+        marks=pytest.mark.none,
+    )
+]
+
+
+def test_analyzer_simple_input():
+    model = PresidioAnalyzer(entities_to_keep=["PERSON"])
+
+    sample = InputSample(
+        full_text="My name is Mike",
+        masked="My name is [PERSON]",
+        spans=[Span("PERSON", "Mike", 10, 14)],
+        create_tags_from_span=True,
+    )
+
+    evaluated = model.evaluate_sample(sample)
+    metrics = model.calculate_score([evaluated])
+
+    assert metrics.pii_precision == 1
+    assert metrics.pii_recall == 1
+
+
+# analyzer tests on generated data
+@pytest.mark.parametrize(
+    "test_input,acceptance_threshold",
+    [testcase.to_pytest_param() for testcase in analyzer_test_generate_text_testdata],
+)
+def test_analyzer_with_generated_text(test_input, acceptance_threshold):
+    """
+        Test analyzer with a generated dataset text file
+        :param test_input: input text file location
+        :param acceptance_threshold: minimim precision/recall
+         allowed for tests to pass
+    """
+    # read test input from generated file
+
+    import os
+
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    input_samples = read_synth_dataset(test_input.format(dir_path))
+
+    updated_samples = PresidioAnalyzer.align_input_samples_to_presidio_analyzer(
+        input_samples=input_samples, entities_mapping=entities_mapping
+    )
+
+    analyzer = PresidioAnalyzer()
+    evaluated_samples = analyzer.evaluate_all(updated_samples)
+    scores = analyzer.calculate_score(evaluation_results=evaluated_samples)
+
+    assert acceptance_threshold <= scores.pii_precision
+    assert acceptance_threshold <= scores.pii_recall
--- a/tests/test_recognizers_generated_text.py
+++ b/tests/test_recognizers_generated_text.py
@ -1,62 +1,58 @@
-'''
-Presidio Analyzer not yet on PyPI, ignoring temporarily
-'''
+from presidio_evaluator.data_generator import read_synth_dataset
+from presidio_evaluator.presidio_recognizer_evaluator import score_presidio_recognizer
+import pytest

-# from presidio_evaluator.data_generator import read_synth_dataset
-# from presidio_evaluator.presidio_recognizer_evaluator import score_presidio_recognizer
-# import pytest
-#
-# from analyzer.predefined_recognizers.credit_card_recognizer import CreditCardRecognizer
-#
-# # test case parameters for tests with dataset which was previously generated.
-# class GeneratedTextTestCase:
-#     def __init__(self, test_name, test_input, acceptance_threshold, marks):
-#         self.test_name = test_name
-#         self.test_input = test_input
-#         self.acceptance_threshold = acceptance_threshold
-#         self.marks = marks
-#
-#     def to_pytest_param(self):
-#         return pytest.param(self.test_input, self.acceptance_threshold,
-#                             id=self.test_name, marks=self.marks)
-#
-#
-# # generated-text test cases
-# cc_test_generate_text_testdata = [
-#     # small set fixture which expects all type results.
-#     GeneratedTextTestCase(
-#         test_name="small-set",
-#         test_input="{}/data/generated_small.txt",
-#         acceptance_threshold=1,
-#         marks=pytest.mark.none
-#     ),
-#     # large set fixture which expects all type results. marked as "slow"
-#     GeneratedTextTestCase(
-#         test_name="large_set",
-#         test_input="{}/data/generated_large.txt",
-#         acceptance_threshold=1,
-#         marks=pytest.mark.slow
-#     )
-# ]
-#
-#
-# # credit card recognizer tests on generated data
-# @pytest.mark.parametrize("test_input,acceptance_threshold",
-#                          [testcase.to_pytest_param()
-#                           for testcase in cc_test_generate_text_testdata])
-# def test_credit_card_recognizer_with_generated_text(test_input, acceptance_threshold):
-#     """
-#         Test credit card recognizer with a generated dataset text file
-#         :param test_input: input text file location
-#         :param acceptance_threshold: minimim precision/recall
-#          allowed for tests to pass
-#     """
-#
-#     # read test input from generated file
-#     import os
-#     dir_path = os.path.dirname(os.path.realpath(__file__))
-#     input_samples = read_synth_dataset(
-#         test_input.format(dir_path))
-#     scores = score_presidio_recognizer(
-#         CreditCardRecognizer(), 'CREDIT_CARD', input_samples)
-#     assert acceptance_threshold <= scores.pii_f
+from presidio_analyzer.predefined_recognizers.credit_card_recognizer import CreditCardRecognizer
+
+# test case parameters for tests with dataset which was previously generated.
+class GeneratedTextTestCase:
+    def __init__(self, test_name, test_input, acceptance_threshold, marks):
+        self.test_name = test_name
+        self.test_input = test_input
+        self.acceptance_threshold = acceptance_threshold
+        self.marks = marks
+
+    def to_pytest_param(self):
+        return pytest.param(self.test_input, self.acceptance_threshold,
+                            id=self.test_name, marks=self.marks)
+
+
+# generated-text test cases
+cc_test_generate_text_testdata = [
+    # small set fixture which expects all type results.
+    GeneratedTextTestCase(
+        test_name="small-set",
+        test_input="{}/data/generated_small.txt",
+        acceptance_threshold=1,
+        marks=pytest.mark.none
+    ),
+    # large set fixture which expects all type results. marked as "slow"
+    GeneratedTextTestCase(
+        test_name="large_set",
+        test_input="{}/data/generated_large.txt",
+        acceptance_threshold=1,
+        marks=pytest.mark.slow
+    )
+]
+
+
+# credit card recognizer tests on generated data
+@pytest.mark.parametrize("test_input,acceptance_threshold",
+                         [testcase.to_pytest_param()
+                          for testcase in cc_test_generate_text_testdata])
+def test_credit_card_recognizer_with_generated_text(test_input, acceptance_threshold):
+    """
+        Test credit card recognizer with a generated dataset text file
+        :param test_input: input text file location
+        :param acceptance_threshold: minimim precision/recall
+         allowed for tests to pass
+    """
+
+    # read test input from generated file
+    import os
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    input_samples = read_synth_dataset(
+        test_input.format(dir_path))
+    scores = score_presidio_recognizer(
+        CreditCardRecognizer(), 'CREDIT_CARD', input_samples)
+    assert acceptance_threshold <= scores.pii_f
--- a/tests/test_recognizers_template_csv.py
+++ b/tests/test_recognizers_template_csv.py
@ -1,83 +1,79 @@
-'''
-Presidio Analyzer not yet on PyPI, ignoring temporarily
-'''
+from presidio_evaluator.data_generator import generate
+from presidio_evaluator.presidio_recognizer_evaluator import \
+    score_presidio_recognizer
+import pytest
+import numpy as np

-# from presidio_evaluator.data_generator import generate
-# from presidio_evaluator.presidio_recognizer_evaluator import \
-#     score_presidio_recognizer
-# import pytest
-# import numpy as np
-#
-# from analyzer.predefined_recognizers.credit_card_recognizer import CreditCardRecognizer
-#
-# # test case parameters for tests with dataset generated from a template and csv values
-# class TemplateTextTestCase:
-#     def __init__(self, test_name, pii_csv, utterances, dictionary_path,
-#                  num_of_examples, acceptance_threshold, marks):
-#         self.test_name = test_name
-#         self.pii_csv = pii_csv
-#         self.utterances = utterances
-#         self.dictionary_path = dictionary_path
-#         self.num_of_examples = num_of_examples
-#         self.acceptance_threshold = acceptance_threshold
-#         self.marks = marks
-#
-#     def to_pytest_param(self):
-#         return pytest.param(self.pii_csv, self.utterances, self.dictionary_path,
-#                             self.num_of_examples, self.acceptance_threshold,
-#                             id=self.test_name, marks=self.marks)
-#
-#
-# # template-dataset test cases
-# cc_test_template_testdata = [
-#     # large dataset fixture. marked as slow
-#     TemplateTextTestCase(
-#         test_name="fake-names-100",
-#         pii_csv="{}/data/FakeNameGenerator.com_100.csv",
-#         utterances="{}/data/templates.txt",
-#         dictionary_path="{}/data/Dictionary_test.csv",
-#         num_of_examples=100,
-#         acceptance_threshold=0.9,
-#         marks=pytest.mark.slow
-#     )
-# ]
-#
-#
-# # credit card recognizer tests on template-generates data
-# @pytest.mark.parametrize("pii_csv, "
-#                          "utterances, "
-#                          "dictionary_path, "
-#                          "num_of_examples, "
-#                          "acceptance_threshold",
-#                          [testcase.to_pytest_param()
-#                           for testcase in cc_test_template_testdata])
-# def test_credit_card_recognizer_with_template(pii_csv, utterances,
-#                                               dictionary_path,
-#                                               num_of_examples,
-#                                               acceptance_threshold):
-#     """
-#         Test credit card recognizer with a dataset generated from
-#         template and a CSV values file
-#         :param pii_csv: input csv file location
-#         :param utterances: template file location
-#         :param dictionary_path: dictionary/vocabulary file location
-#         :param num_of_examples: number of samples to be used from dataset
-#         to test
-#         :param acceptance_threshold: minimim precision/recall
-#          allowed for tests to pass
-#     """
-#
-#     # read template and CSV files
-#     import os
-#     dir_path = os.path.dirname(os.path.realpath(__file__))
-#
-#     input_samples = generate(fake_pii_csv=pii_csv.format(dir_path),
-#                              utterances_file=utterances.format(dir_path),
-#                              dictionary_path=dictionary_path.format(dir_path),
-#                              lower_case_ratio=0.5,
-#                              num_of_examples=num_of_examples)
-#
-#     scores = score_presidio_recognizer(
-#         CreditCardRecognizer(), 'CREDIT_CARD', input_samples)
-#     if not np.isnan(scores.pii_f):
-#         assert acceptance_threshold <= scores.pii_f
+from presidio_analyzer.predefined_recognizers.credit_card_recognizer import CreditCardRecognizer
+
+# test case parameters for tests with dataset generated from a template and csv values
+class TemplateTextTestCase:
+    def __init__(self, test_name, pii_csv, utterances, dictionary_path,
+                 num_of_examples, acceptance_threshold, marks):
+        self.test_name = test_name
+        self.pii_csv = pii_csv
+        self.utterances = utterances
+        self.dictionary_path = dictionary_path
+        self.num_of_examples = num_of_examples
+        self.acceptance_threshold = acceptance_threshold
+        self.marks = marks
+
+    def to_pytest_param(self):
+        return pytest.param(self.pii_csv, self.utterances, self.dictionary_path,
+                            self.num_of_examples, self.acceptance_threshold,
+                            id=self.test_name, marks=self.marks)
+
+
+# template-dataset test cases
+cc_test_template_testdata = [
+    # large dataset fixture. marked as slow
+    TemplateTextTestCase(
+        test_name="fake-names-100",
+        pii_csv="{}/data/FakeNameGenerator.com_100.csv",
+        utterances="{}/data/templates.txt",
+        dictionary_path="{}/data/Dictionary_test.csv",
+        num_of_examples=100,
+        acceptance_threshold=0.9,
+        marks=pytest.mark.slow
+    )
+]
+
+
+# credit card recognizer tests on template-generates data
+@pytest.mark.parametrize("pii_csv, "
+                         "utterances, "
+                         "dictionary_path, "
+                         "num_of_examples, "
+                         "acceptance_threshold",
+                         [testcase.to_pytest_param()
+                          for testcase in cc_test_template_testdata])
+def test_credit_card_recognizer_with_template(pii_csv, utterances,
+                                              dictionary_path,
+                                              num_of_examples,
+                                              acceptance_threshold):
+    """
+        Test credit card recognizer with a dataset generated from
+        template and a CSV values file
+        :param pii_csv: input csv file location
+        :param utterances: template file location
+        :param dictionary_path: dictionary/vocabulary file location
+        :param num_of_examples: number of samples to be used from dataset
+        to test
+        :param acceptance_threshold: minimim precision/recall
+         allowed for tests to pass
+    """
+
+    # read template and CSV files
+    import os
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+
+    input_samples = generate(fake_pii_csv=pii_csv.format(dir_path),
+                             utterances_file=utterances.format(dir_path),
+                             dictionary_path=dictionary_path.format(dir_path),
+                             lower_case_ratio=0.5,
+                             num_of_examples=num_of_examples)
+
+    scores = score_presidio_recognizer(
+        CreditCardRecognizer(), 'CREDIT_CARD', input_samples)
+    if not np.isnan(scores.pii_f):
+        assert acceptance_threshold <= scores.pii_f
--- a/tests/test_recognizers_template_join_csv.py
+++ b/tests/test_recognizers_template_join_csv.py
@ -1,148 +1,144 @@
-'''
-Presidio Analyzer not yet on PyPI, ignoring temporarily
-'''
+from presidio_evaluator.data_generator import FakeDataGenerator
+from presidio_evaluator.presidio_recognizer_evaluator import \
+    score_presidio_recognizer
+import pandas as pd
+import pytest
+import numpy as np

-# from presidio_evaluator.data_generator import FakeDataGenerator
-# from presidio_evaluator.presidio_recognizer_evaluator import \
-#     score_presidio_recognizer
-# import pandas as pd
-# import pytest
-# import numpy as np
-#
-# from analyzer import Pattern, PatternRecognizer
-#
-# # test case parameters for tests with dataset generated from a template and
-# # two csv value files, one containing the common-entities and another one with custom entities
-# class PatternRecognizerTestCase:
-#     def __init__(self, test_name, entity_name, pattern, score, pii_csv, ext_csv,
-#                  utterances, dictionary_path, num_of_examples, acceptance_threshold,
-#                  max_mistakes_number, marks):
-#         self.test_name = test_name
-#         self.entity_name = entity_name
-#         self.pattern = pattern
-#         self.score = score
-#         self.pii_csv = pii_csv
-#         self.ext_csv = ext_csv
-#         self.utterances = utterances
-#         self.dictionary_path = dictionary_path
-#         self.num_of_examples = num_of_examples
-#         self.acceptance_threshold = acceptance_threshold
-#         self.max_mistakes_number = max_mistakes_number
-#         self.marks = marks
-#
-#     def to_pytest_param(self):
-#         return pytest.param(self.pii_csv, self.ext_csv, self.utterances,
-#                             self.dictionary_path,
-#                             self.entity_name, self.pattern, self.score,
-#                             self.num_of_examples, self.acceptance_threshold,
-#                             self.max_mistakes_number, id=self.test_name,
-#                             marks=self.marks)
-#
-#
-# # template-dataset test cases
-# rocket_test_template_testdata = [
-#     # large dataset fixture. marked as slow.
-#     # all input is correct, test is conclusive
-#     PatternRecognizerTestCase(
-#         test_name="rocket-no-errors",
-#         entity_name="ROCKET",
-#         pattern=r'\W*(rocket)\W*',
-#         score=0.8,
-#         pii_csv="{}/data/FakeNameGenerator.com_100.csv",
-#         ext_csv="{}/data/FakeRocketGenerator.csv",
-#         utterances="{}/data/rocket_example_sentences.txt",
-#         dictionary_path="{}/data/Dictionary_test.csv",
-#         num_of_examples=100,
-#         acceptance_threshold=1,
-#         max_mistakes_number=0,
-#         marks=pytest.mark.slow
-#     ),
-#     # large dataset fixture. marked as slow
-#     # all input is correct, test is conclusive
-#     PatternRecognizerTestCase(
-#         test_name="rocket-all-errors",
-#         entity_name="ROCKET",
-#         pattern=r'\W*(rocket)\W*',
-#         score=0.8,
-#         pii_csv="{}/data/FakeNameGenerator.com_100.csv",
-#         ext_csv="{}/data/FakeRocketErrorsGenerator.csv",
-#         utterances="{}/data/rocket_example_sentences.txt",
-#         dictionary_path="{}/data/Dictionary_test.csv",
-#         num_of_examples=100,
-#         acceptance_threshold=0,
-#         max_mistakes_number=100,
-#         marks=pytest.mark.slow
-#     ),
-#     # large dataset fixture. marked as slow
-#     # some input is correct some is not, test is inconclusive
-#     PatternRecognizerTestCase(
-#         test_name="rocket-some-errors",
-#         entity_name="ROCKET",
-#         pattern=r'\W*(rocket)\W*',
-#         score=0.8,
-#         pii_csv="{}/data/FakeNameGenerator.com_100.csv",
-#         ext_csv="{}/data/FakeRocket50PercentErrorsGenerator.csv",
-#         utterances="{}/data/rocket_example_sentences.txt",
-#         dictionary_path="{}/data/Dictionary_test.csv",
-#         num_of_examples=100,
-#         acceptance_threshold=0.3,
-#         max_mistakes_number=70,
-#         marks=[pytest.mark.slow, pytest.mark.inconclusive]
-#     )
-# ]
-#
-#
-# @pytest.mark.parametrize(
-#     "pii_csv, ext_csv, utterances, dictionary_path, "
-#     "entity_name, pattern, score, num_of_examples, "
-#     "acceptance_threshold, max_mistakes_number",
-#     [testcase.to_pytest_param()
-#      for testcase in rocket_test_template_testdata])
-# def test_pattern_recognizer(pii_csv, ext_csv, utterances, dictionary_path,
-#                             entity_name, pattern,
-#                             score, num_of_examples, acceptance_threshold,
-#                             max_mistakes_number):
-#     """
-#         Test generic pattern recognizer with a dataset generated from template, a CSV values file with common entities
-#         and another CSV values file with a custom entity
-#         :param pii_csv: input csv file location with the common entities
-#         :param ext_csv: input csv file location with custom entities
-#         :param utterances: template file location
-#         :param dictionary_path: vocabulary/dictionary file location
-#         :param entity_name: custom entity name
-#         :param pattern: recognizer pattern
-#         :param num_of_examples: number of samples to be used from dataset to test
-#         :param acceptance_threshold: minimim precision/recall
-#          allowed for tests to pass
-#     """
-#
-#     import os
-#     dir_path = os.path.dirname(os.path.realpath(__file__))
-#     dfpii = pd.read_csv(pii_csv.format(dir_path), encoding='utf-8')
-#     dfext = pd.read_csv(ext_csv.format(dir_path), encoding='utf-8')
-#     dictionary_path = dictionary_path.format(dir_path)
-#     ext_column_name = dfext.columns[0]
-#
-#     def get_from_ext(i):
-#         index = i % dfext.shape[0]
-#         return dfext.iat[index, 0]
-#
-#     # extend pii with ext data
-#     dfpii[ext_column_name] = [get_from_ext(i) for i in range(0, dfpii.shape[0])]
-#
-#     # generate examples
-#     generator = FakeDataGenerator(fake_pii_csv_file=dfpii,
-#                                   utterances_file=utterances.format(dir_path),
-#                                   dictionary_path=dictionary_path)
-#     examples = generator.sample_examples(num_of_examples)
-#
-#     pattern = Pattern("test pattern", pattern, score)
-#     pattern_recognizer = PatternRecognizer(entity_name,
-#                                            name="test recognizer",
-#                                            patterns=[pattern])
-#
-#     scores = score_presidio_recognizer(
-#         pattern_recognizer, [entity_name], examples)
-#     if not np.isnan(scores.pii_f):
-#         assert acceptance_threshold <= scores.pii_f
-#     assert max_mistakes_number >= len(scores.model_errors)
+from presidio_analyzer import Pattern, PatternRecognizer
+
+# test case parameters for tests with dataset generated from a template and
+# two csv value files, one containing the common-entities and another one with custom entities
+class PatternRecognizerTestCase:
+    def __init__(self, test_name, entity_name, pattern, score, pii_csv, ext_csv,
+                 utterances, dictionary_path, num_of_examples, acceptance_threshold,
+                 max_mistakes_number, marks):
+        self.test_name = test_name
+        self.entity_name = entity_name
+        self.pattern = pattern
+        self.score = score
+        self.pii_csv = pii_csv
+        self.ext_csv = ext_csv
+        self.utterances = utterances
+        self.dictionary_path = dictionary_path
+        self.num_of_examples = num_of_examples
+        self.acceptance_threshold = acceptance_threshold
+        self.max_mistakes_number = max_mistakes_number
+        self.marks = marks
+
+    def to_pytest_param(self):
+        return pytest.param(self.pii_csv, self.ext_csv, self.utterances,
+                            self.dictionary_path,
+                            self.entity_name, self.pattern, self.score,
+                            self.num_of_examples, self.acceptance_threshold,
+                            self.max_mistakes_number, id=self.test_name,
+                            marks=self.marks)
+
+
+# template-dataset test cases
+rocket_test_template_testdata = [
+    # large dataset fixture. marked as slow.
+    # all input is correct, test is conclusive
+    PatternRecognizerTestCase(
+        test_name="rocket-no-errors",
+        entity_name="ROCKET",
+        pattern=r'\W*(rocket)\W*',
+        score=0.8,
+        pii_csv="{}/data/FakeNameGenerator.com_100.csv",
+        ext_csv="{}/data/FakeRocketGenerator.csv",
+        utterances="{}/data/rocket_example_sentences.txt",
+        dictionary_path="{}/data/Dictionary_test.csv",
+        num_of_examples=100,
+        acceptance_threshold=1,
+        max_mistakes_number=0,
+        marks=pytest.mark.slow
+    ),
+    # large dataset fixture. marked as slow
+    # all input is correct, test is conclusive
+    PatternRecognizerTestCase(
+        test_name="rocket-all-errors",
+        entity_name="ROCKET",
+        pattern=r'\W*(rocket)\W*',
+        score=0.8,
+        pii_csv="{}/data/FakeNameGenerator.com_100.csv",
+        ext_csv="{}/data/FakeRocketErrorsGenerator.csv",
+        utterances="{}/data/rocket_example_sentences.txt",
+        dictionary_path="{}/data/Dictionary_test.csv",
+        num_of_examples=100,
+        acceptance_threshold=0,
+        max_mistakes_number=100,
+        marks=pytest.mark.slow
+    ),
+    # large dataset fixture. marked as slow
+    # some input is correct some is not, test is inconclusive
+    PatternRecognizerTestCase(
+        test_name="rocket-some-errors",
+        entity_name="ROCKET",
+        pattern=r'\W*(rocket)\W*',
+        score=0.8,
+        pii_csv="{}/data/FakeNameGenerator.com_100.csv",
+        ext_csv="{}/data/FakeRocket50PercentErrorsGenerator.csv",
+        utterances="{}/data/rocket_example_sentences.txt",
+        dictionary_path="{}/data/Dictionary_test.csv",
+        num_of_examples=100,
+        acceptance_threshold=0.3,
+        max_mistakes_number=70,
+        marks=[pytest.mark.slow, pytest.mark.inconclusive]
+    )
+]
+
+
+@pytest.mark.parametrize(
+    "pii_csv, ext_csv, utterances, dictionary_path, "
+    "entity_name, pattern, score, num_of_examples, "
+    "acceptance_threshold, max_mistakes_number",
+    [testcase.to_pytest_param()
+     for testcase in rocket_test_template_testdata])
+def test_pattern_recognizer(pii_csv, ext_csv, utterances, dictionary_path,
+                            entity_name, pattern,
+                            score, num_of_examples, acceptance_threshold,
+                            max_mistakes_number):
+    """
+        Test generic pattern recognizer with a dataset generated from template, a CSV values file with common entities
+        and another CSV values file with a custom entity
+        :param pii_csv: input csv file location with the common entities
+        :param ext_csv: input csv file location with custom entities
+        :param utterances: template file location
+        :param dictionary_path: vocabulary/dictionary file location
+        :param entity_name: custom entity name
+        :param pattern: recognizer pattern
+        :param num_of_examples: number of samples to be used from dataset to test
+        :param acceptance_threshold: minimim precision/recall
+         allowed for tests to pass
+    """
+
+    import os
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    dfpii = pd.read_csv(pii_csv.format(dir_path), encoding='utf-8')
+    dfext = pd.read_csv(ext_csv.format(dir_path), encoding='utf-8')
+    dictionary_path = dictionary_path.format(dir_path)
+    ext_column_name = dfext.columns[0]
+
+    def get_from_ext(i):
+        index = i % dfext.shape[0]
+        return dfext.iat[index, 0]
+
+    # extend pii with ext data
+    dfpii[ext_column_name] = [get_from_ext(i) for i in range(0, dfpii.shape[0])]
+
+    # generate examples
+    generator = FakeDataGenerator(fake_pii_csv_file=dfpii,
+                                  utterances_file=utterances.format(dir_path),
+                                  dictionary_path=dictionary_path)
+    examples = generator.sample_examples(num_of_examples)
+
+    pattern = Pattern("test pattern", pattern, score)
+    pattern_recognizer = PatternRecognizer(entity_name,
+                                           name="test recognizer",
+                                           patterns=[pattern])
+
+    scores = score_presidio_recognizer(
+        pattern_recognizer, [entity_name], examples)
+    if not np.isnan(scores.pii_f):
+        assert acceptance_threshold <= scores.pii_f
+    assert max_mistakes_number >= len(scores.model_errors)
--- a/tests/test_spacy_recognizer_generated_text.py
+++ b/tests/test_spacy_recognizer_generated_text.py
@ -1,63 +1,59 @@
-'''
-Presidio Analyzer not yet on PyPI, ignoring temporarily
-'''
+from presidio_evaluator.data_generator import read_synth_dataset
+from presidio_evaluator.presidio_recognizer_evaluator import \
+    score_presidio_recognizer

-# from presidio_evaluator.data_generator import read_synth_dataset
-# from presidio_evaluator.presidio_recognizer_evaluator import \
-#     score_presidio_recognizer
-#
-# import pytest
-# from analyzer.predefined_recognizers.spacy_recognizer import SpacyRecognizer
-#
-# # test case parameters for tests with dataset which was previously generated.
-# class GeneratedTextTestCase:
-#     def __init__(self, test_name, test_input, acceptance_threshold, marks):
-#         self.test_name = test_name
-#         self.test_input = test_input
-#         self.acceptance_threshold = acceptance_threshold
-#         self.marks = marks
-#
-#     def to_pytest_param(self):
-#         return pytest.param(self.test_input, self.acceptance_threshold,
-#                             id=self.test_name, marks=self.marks)
-#
-#
-# # generated-text test cases
-# cc_test_generate_text_testdata = [
-#     # small dataset, inconclusive results
-#     GeneratedTextTestCase(
-#         test_name="small-set",
-#         test_input="{}/data/generated_small.txt",
-#         acceptance_threshold=0.5,
-#         marks=pytest.mark.inconclusive
-#     ),
-#     # large dataset - test is slow and inconclusive
-#     GeneratedTextTestCase(
-#         test_name="large-set",
-#         test_input="{}/data/generated_large.txt",
-#         acceptance_threshold=0.5,
-#         marks=pytest.mark.slow
-#     )
-# ]
-#
-#
-# # credit card recognizer tests on generated data
-# @pytest.mark.parametrize("test_input,acceptance_threshold",
-#                          [testcase.to_pytest_param() for testcase in
-#                           cc_test_generate_text_testdata])
-# def test_spacy_recognizer_with_generated_text(test_input, acceptance_threshold):
-#     """
-#         Test spacy recognizer with a generated dataset text file
-#         :param test_input: input text file location
-#         :param acceptance_threshold: minimim precision/recall
-#          allowed for tests to pass
-#     """
-#
-#     # read test input from generated file
-#     import os
-#     dir_path = os.path.dirname(os.path.realpath(__file__))
-#     input_samples = read_synth_dataset(
-#         test_input.format(dir_path))
-#     scores = score_presidio_recognizer(
-#         SpacyRecognizer(), ['PERSON'], input_samples, True)
-#     assert acceptance_threshold <= scores.pii_f
+import pytest
+from presidio_analyzer.predefined_recognizers.spacy_recognizer import SpacyRecognizer
+
+# test case parameters for tests with dataset which was previously generated.
+class GeneratedTextTestCase:
+    def __init__(self, test_name, test_input, acceptance_threshold, marks):
+        self.test_name = test_name
+        self.test_input = test_input
+        self.acceptance_threshold = acceptance_threshold
+        self.marks = marks
+
+    def to_pytest_param(self):
+        return pytest.param(self.test_input, self.acceptance_threshold,
+                            id=self.test_name, marks=self.marks)
+
+
+# generated-text test cases
+cc_test_generate_text_testdata = [
+    # small dataset, inconclusive results
+    GeneratedTextTestCase(
+        test_name="small-set",
+        test_input="{}/data/generated_small.txt",
+        acceptance_threshold=0.5,
+        marks=pytest.mark.inconclusive
+    ),
+    # large dataset - test is slow and inconclusive
+    GeneratedTextTestCase(
+        test_name="large-set",
+        test_input="{}/data/generated_large.txt",
+        acceptance_threshold=0.5,
+        marks=pytest.mark.slow
+    )
+]
+
+
+# credit card recognizer tests on generated data
+@pytest.mark.parametrize("test_input,acceptance_threshold",
+                         [testcase.to_pytest_param() for testcase in
+                          cc_test_generate_text_testdata])
+def test_spacy_recognizer_with_generated_text(test_input, acceptance_threshold):
+    """
+        Test spacy recognizer with a generated dataset text file
+        :param test_input: input text file location
+        :param acceptance_threshold: minimim precision/recall
+         allowed for tests to pass
+    """
+
+    # read test input from generated file
+    import os
+    dir_path = os.path.dirname(os.path.realpath(__file__))
+    input_samples = read_synth_dataset(
+        test_input.format(dir_path))
+    scores = score_presidio_recognizer(
+        SpacyRecognizer(), ['PERSON'], input_samples, True)
+    assert acceptance_threshold <= scores.pii_f