Issue 754 (#755)

2021-09-05 19:58:17 +10:00 · 2021-09-05 19:58:17 +10:00 · b8d76c6a7e
--- a/docs/supported_entities.md
+++ b/docs/supported_entities.md
@ -53,6 +53,15 @@ For more information, refer to the [adding new recognizers documentation](analyz
 |--- |--- |--- |
 |FIN/NRIC| A National Registration Identification Card | Pattern match and context |

+### Australia
+
+|FieldType|Description|Detection Method|
+|--- |--- |--- |
+|AU_ABN| The Australian Business Number (ABN) is a unique 11 digit identifier issued to all entities registered in the Australian Business Register (ABR). | Pattern match, context, and checksum |
+|AU_ACN| An Australian Company Number is a unique nine-digit number issued by the Australian Securities and Investments Commission to every company registered under the Commonwealth Corporations Act 2001 as an identifier. | Pattern match, context, and checksum |
+|AU_TFN| The tax file number (TFN) is a unique identifier issued by the Australian Taxation Office to each taxpaying entity | Pattern match, context, and checksum |
+|AU_MEDICARE| Medicare number is a unique identifier issued by Australian Government that enables the cardholder to receive a rebates of medical expenses under Australia's Medicare system| Pattern match, context, and checksum |
+
 ## Adding a custom PII entity

 See [this documentation](analyzer/adding_recognizers.md) for instructions on how to add a new Recognizer for a new type of PII entity.
--- a/e2e-tests/resources/demo.txt
+++ b/e2e-tests/resources/demo.txt
@ -5,7 +5,7 @@ My credit card number is 4095-2609-9393-4932 and my crypto wallet id is 16Yeky6G

 On September 18 I visited microsoft.com and sent an email to test@presidio.site,  from the IP 192.168.0.1.

-My passport: 191280345 and my phone number: (212) 555-1234.
+My passport: 191280342 and my phone number: (212) 555-1234.

 This is a valid International Bank Account Number: IL150120690000003111111 . Can you please check the status on bank account 954567876544?

--- a/e2e-tests/tests/test_analyzer.py
+++ b/e2e-tests/tests/test_analyzer.py
@ -234,7 +234,7 @@ def test_given_a_correct_input_for_supported_entities_then_expect_a_correct_resp
    expected_response = """
        ["PHONE_NUMBER", "US_DRIVER_LICENSE", "US_PASSPORT", "SG_NRIC_FIN", "LOCATION", "CREDIT_CARD", "CRYPTO", 
        "UK_NHS", "US_SSN", "US_BANK_NUMBER", "EMAIL_ADDRESS", "DATE_TIME", "IP_ADDRESS", "PERSON", "IBAN_CODE", 
-        "NRP", "US_ITIN", "DOMAIN_NAME", "MEDICAL_LICENSE"]
+        "NRP", "US_ITIN", "DOMAIN_NAME", "MEDICAL_LICENSE", "AU_ABN", "AU_ACN", "AU_TFN", "AU_MEDICARE"]
    """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
@ -266,7 +266,8 @@ def test_given_an_illegal_input_for_supported_entities_then_igonre_and_proceed()
    expected_response = """ 
        ["PHONE_NUMBER", "US_DRIVER_LICENSE", "US_PASSPORT", "SG_NRIC_FIN", "LOCATION", "CREDIT_CARD", 
         "CRYPTO", "UK_NHS", "US_SSN", "US_BANK_NUMBER", "EMAIL_ADDRESS", "DATE_TIME", "IP_ADDRESS",
-          "PERSON", "IBAN_CODE", "NRP", "US_ITIN", "DOMAIN_NAME", "MEDICAL_LICENSE"]
+          "PERSON", "IBAN_CODE", "NRP", "US_ITIN", "DOMAIN_NAME", "MEDICAL_LICENSE", "AU_ABN", 
+          "AU_ACN", "AU_TFN", "AU_MEDICARE"]
    """
    assert response_status == 200
    assert equal_json_strings(expected_response, response_content)
--- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/init.py
+++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/init.py
@ -21,6 +21,10 @@ from .us_passport_recognizer import UsPassportRecognizer
 from .us_phone_recognizer import UsPhoneRecognizer
 from .us_ssn_recognizer import UsSsnRecognizer
 from .es_nif_recognizer import EsNifRecognizer
+from .au_abn_recognizer import AuAbnRecognizer
+from .au_acn_recognizer import AuAcnRecognizer
+from .au_tfn_recognizer import AuTfnRecognizer
+from .au_medicare_recognizer import AuMedicareRecognizer

 NLP_RECOGNIZERS = {"spacy": SpacyRecognizer, "stanza": StanzaRecognizer}

@ -49,4 +53,8 @@ __all__ = [
    "SpacyRecognizer",
    "StanzaRecognizer",
    "NLP_RECOGNIZERS",
+    "AuAbnRecognizer",
+    "AuAcnRecognizer",
+    "AuTfnRecognizer",
+    "AuMedicareRecognizer",
 ]
--- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/au_abn_recognizer.py
+++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/au_abn_recognizer.py
@ -0,0 +1,97 @@
+from typing import Optional, List, Tuple
+
+from presidio_analyzer import Pattern, PatternRecognizer
+
+
+class AuAbnRecognizer(PatternRecognizer):
+    """
+    Recognizes Australian Business Number ("ABN").
+
+    The Australian Business Number (ABN) is a unique 11
+    digit identifier issued to all entities registered in
+    the Australian Business Register (ABR).
+    The 11 digit ABN is structured as a 9 digit identifier
+    with two leading check digits.
+    The leading check digits are derived using a modulus 89 calculation.
+    This recognizer identifies ABN using regex, context words and checksum.
+    Reference: https://abr.business.gov.au/Help/AbnFormat
+
+    :param patterns: List of patterns to be used by this recognizer
+    :param context: List of context words to increase confidence in detection
+    :param supported_language: Language this recognizer supports
+    :param supported_entity: The entity this recognizer can detect
+    :param replacement_pairs: List of tuples with potential replacement values
+    for different strings to be used during pattern matching.
+    This can allow a greater variety in input, for example by removing dashes or spaces.
+    """
+
+    PATTERNS = [
+        Pattern(
+            "ABN (Medium)",
+            r"\b\d{2}\s\d{3}\s\d{3}\s\d{3}\b",
+            0.1,
+        ),
+        Pattern(
+            "ABN (Low)",
+            r"\b\d{11}\b",
+            0.01,
+        ),
+    ]
+
+    CONTEXT = [
+        "australian business number",
+        "abn",
+    ]
+
+    def __init__(
+        self,
+        patterns: Optional[List[Pattern]] = None,
+        context: Optional[List[str]] = None,
+        supported_language: str = "en",
+        supported_entity: str = "AU_ABN",
+        replacement_pairs: Optional[List[Tuple[str, str]]] = None,
+    ):
+        self.replacement_pairs = (
+            replacement_pairs if replacement_pairs else [("-", ""), (" ", "")]
+        )
+        context = context if context else self.CONTEXT
+        patterns = patterns if patterns else self.PATTERNS
+        super().__init__(
+            supported_entity=supported_entity,
+            patterns=patterns,
+            context=context,
+            supported_language=supported_language,
+        )
+
+    def validate_result(self, pattern_text: str) -> bool:
+        """
+        Validate the pattern logic e.g., by running checksum on a detected pattern.
+
+        :param pattern_text: the text to validated.
+        Only the part in text that was detected by the regex engine
+        :return: A bool indicating whether the validation was successful.
+        """
+        # Pre-processing before validation checks
+        text = self.__sanitize_value(pattern_text, self.replacement_pairs)
+        abn_list = [int(digit) for digit in text]
+
+        # Set weights based on digit position
+        weight = [10, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19]
+
+        # Perform checksums
+        abn_list[0] = 9 if abn_list[0] == 0 else abn_list[0] - 1
+        sum_product = 0
+        for i in range(11):
+            sum_product += abn_list[i] * weight[i]
+        remainder = sum_product % 89
+        if remainder == 0:
+            result = True
+        else:
+            result = None
+        return result
+
+    @staticmethod
+    def __sanitize_value(text: str, replacement_pairs: List[Tuple[str, str]]) -> str:
+        for search_string, replacement_string in replacement_pairs:
+            text = text.replace(search_string, replacement_string)
+        return text
--- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/au_acn_recognizer.py
+++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/au_acn_recognizer.py
@ -0,0 +1,94 @@
+from typing import Optional, List, Tuple
+
+from presidio_analyzer import Pattern, PatternRecognizer
+
+
+class AuAcnRecognizer(PatternRecognizer):
+    """
+    Recognizes Australian Company Number ("ACN").
+
+    The Australian Company Number (ACN) is a nine digit number
+    with the last digit being a check digit calculated using a
+    modified modulus 10 calculation.
+    This recognizer identifies ACN using regex, context words, and checksum.
+    Reference: https://asic.gov.au/
+
+    :param patterns: List of patterns to be used by this recognizer
+    :param context: List of context words to increase confidence in detection
+    :param supported_language: Language this recognizer supports
+    :param supported_entity: The entity this recognizer can detect
+    :param replacement_pairs: List of tuples with potential replacement values
+    for different strings to be used during pattern matching.
+    This can allow a greater variety in input, for example by removing dashes or spaces.
+    """
+
+    PATTERNS = [
+        Pattern(
+            "ACN (Medium)",
+            r"\b\d{3}\s\d{3}\s\d{3}\b",
+            0.1,
+        ),
+        Pattern(
+            "ACN (Low)",
+            r"\b\d{9}\b",
+            0.01,
+        ),
+    ]
+
+    CONTEXT = [
+        "australian company number",
+        "acn",
+    ]
+
+    def __init__(
+        self,
+        patterns: Optional[List[Pattern]] = None,
+        context: Optional[List[str]] = None,
+        supported_language: str = "en",
+        supported_entity: str = "AU_ACN",
+        replacement_pairs: Optional[List[Tuple[str, str]]] = None,
+    ):
+        self.replacement_pairs = (
+            replacement_pairs if replacement_pairs else [("-", ""), (" ", "")]
+        )
+        context = context if context else self.CONTEXT
+        patterns = patterns if patterns else self.PATTERNS
+        super().__init__(
+            supported_entity=supported_entity,
+            patterns=patterns,
+            context=context,
+            supported_language=supported_language,
+        )
+
+    def validate_result(self, pattern_text: str) -> bool:
+        """
+        Validate the pattern logic e.g., by running checksum on a detected pattern.
+
+        :param pattern_text: the text to validated.
+        Only the part in text that was detected by the regex engine
+        :return: A bool indicating whether the validation was successful.
+        """
+        # Pre-processing before validation checks
+        text = self.__sanitize_value(pattern_text, self.replacement_pairs)
+        acn_list = [int(digit) for digit in text]
+
+        # Set weights based on digit position
+        weight = [8, 7, 6, 5, 4, 3, 2, 1]
+
+        # Perform checksums
+        sum_product = 0
+        for i in range(8):
+            sum_product += acn_list[i] * weight[i]
+        remainder = sum_product % 10
+        complement = 10 - remainder
+        if complement == acn_list[-1]:
+            result = True
+        else:
+            result = None
+        return result
+
+    @staticmethod
+    def __sanitize_value(text: str, replacement_pairs: List[Tuple[str, str]]) -> str:
+        for search_string, replacement_string in replacement_pairs:
+            text = text.replace(search_string, replacement_string)
+        return text
--- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/au_medicare_recognizer.py
+++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/au_medicare_recognizer.py
@ -0,0 +1,93 @@
+from typing import Optional, List, Tuple
+
+from presidio_analyzer import Pattern, PatternRecognizer
+
+
+class AuMedicareRecognizer(PatternRecognizer):
+    """
+    Recognizes Australian Medicare number using regex, context words, and checksum.
+
+    Medicare number is a unique identifier issued by Australian Government
+    that enables the cardholder to receive a rebates of medical expenses
+    under Australia's Medicare system.
+    It uses a modulus 10 checksum scheme to validate the number.
+    Reference: https://en.wikipedia.org/wiki/Medicare_card_(Australia)
+
+
+    :param patterns: List of patterns to be used by this recognizer
+    :param context: List of context words to increase confidence in detection
+    :param supported_language: Language this recognizer supports
+    :param supported_entity: The entity this recognizer can detect
+    :param replacement_pairs: List of tuples with potential replacement values
+    for different strings to be used during pattern matching.
+    This can allow a greater variety in input, for example by removing dashes or spaces.
+    """
+
+    PATTERNS = [
+        Pattern(
+            "Australian Medicare Number (Medium)",
+            r"\b[2-6]\d{3}\s\d{5}\s\d\b",
+            0.1,
+        ),
+        Pattern(
+            "Australian Medicare Number (Low)",
+            r"\b[2-6]\d{9}\b",
+            0.01,
+        ),
+    ]
+
+    CONTEXT = [
+        "medicare",
+    ]
+
+    def __init__(
+        self,
+        patterns: Optional[List[Pattern]] = None,
+        context: Optional[List[str]] = None,
+        supported_language: str = "en",
+        supported_entity: str = "AU_MEDICARE",
+        replacement_pairs: Optional[List[Tuple[str, str]]] = None,
+    ):
+        self.replacement_pairs = (
+            replacement_pairs if replacement_pairs else [("-", ""), (" ", "")]
+        )
+        context = context if context else self.CONTEXT
+        patterns = patterns if patterns else self.PATTERNS
+        super().__init__(
+            supported_entity=supported_entity,
+            patterns=patterns,
+            context=context,
+            supported_language=supported_language,
+        )
+
+    def validate_result(self, pattern_text: str) -> bool:
+        """
+        Validate the pattern logic e.g., by running checksum on a detected pattern.
+
+        :param pattern_text: the text to validated.
+        Only the part in text that was detected by the regex engine
+        :return: A bool indicating whether the validation was successful.
+        """
+        # Pre-processing before validation checks
+        text = self.__sanitize_value(pattern_text, self.replacement_pairs)
+        medicare_list = [int(digit) for digit in text]
+
+        # Set weights based on digit position
+        weight = [1, 3, 7, 9, 1, 3, 7, 9]
+
+        # Perform checksums
+        sum_product = 0
+        for i in range(8):
+            sum_product += medicare_list[i] * weight[i]
+        remainder = sum_product % 10
+        if remainder == medicare_list[8]:
+            result = True
+        else:
+            result = None
+        return result
+
+    @staticmethod
+    def __sanitize_value(text: str, replacement_pairs: List[Tuple[str, str]]) -> str:
+        for search_string, replacement_string in replacement_pairs:
+            text = text.replace(search_string, replacement_string)
+        return text
--- a/presidio-analyzer/presidio_analyzer/predefined_recognizers/au_tfn_recognizer.py
+++ b/presidio-analyzer/presidio_analyzer/predefined_recognizers/au_tfn_recognizer.py
@ -0,0 +1,99 @@
+from typing import Optional, List, Tuple
+
+from presidio_analyzer import Pattern, PatternRecognizer
+
+
+class AuTfnRecognizer(PatternRecognizer):
+    """
+    Recognizes Australian Tax File Numbers ("TFN").
+
+    The tax file number (TFN) is a unique identifier
+    issued by the Australian Taxation Office
+    to each taxpaying entity — an individual, company,
+    superannuation fund, partnership, or trust.
+    The TFN consists of a nine digit number, usually
+    presented in the format NNN NNN NNN.
+    TFN includes a check digit for detecting erroneous
+    number based on simple modulo 11.
+    This recognizer uses regex, context words,
+    and checksum to identify TFN.
+    Reference: https://www.ato.gov.au/individuals/tax-file-number/
+
+    :param patterns: List of patterns to be used by this recognizer
+    :param context: List of context words to increase confidence in detection
+    :param supported_language: Language this recognizer supports
+    :param supported_entity: The entity this recognizer can detect
+    :param replacement_pairs: List of tuples with potential replacement values
+    for different strings to be used during pattern matching.
+    This can allow a greater variety in input, for example by removing dashes or spaces.
+    """
+
+    PATTERNS = [
+        Pattern(
+            "TFN (Medium)",
+            r"\b\d{3}\s\d{3}\s\d{3}\b",
+            0.1,
+        ),
+        Pattern(
+            "TFN (Low)",
+            r"\b\d{9}\b",
+            0.01,
+        ),
+    ]
+
+    CONTEXT = [
+        "tax file number",
+        "tfn",
+    ]
+
+    def __init__(
+        self,
+        patterns: Optional[List[Pattern]] = None,
+        context: Optional[List[str]] = None,
+        supported_language: str = "en",
+        supported_entity: str = "AU_TFN",
+        replacement_pairs: Optional[List[Tuple[str, str]]] = None,
+    ):
+        self.replacement_pairs = (
+            replacement_pairs if replacement_pairs else [("-", ""), (" ", "")]
+        )
+        context = context if context else self.CONTEXT
+        patterns = patterns if patterns else self.PATTERNS
+        super().__init__(
+            supported_entity=supported_entity,
+            patterns=patterns,
+            context=context,
+            supported_language=supported_language,
+        )
+
+    def validate_result(self, pattern_text: str) -> bool:
+        """
+        Validate the pattern logic e.g., by running checksum on a detected pattern.
+
+        :param pattern_text: the text to validated.
+        Only the part in text that was detected by the regex engine
+        :return: A bool indicating whether the validation was successful.
+        """
+        # Pre-processing before validation checks
+        text = self.__sanitize_value(pattern_text, self.replacement_pairs)
+        tfn_list = [int(digit) for digit in text]
+
+        # Set weights based on digit position
+        weight = [1, 4, 3, 7, 5, 8, 6, 9, 10]
+
+        # Perform checksums
+        sum_product = 0
+        for i in range(9):
+            sum_product += tfn_list[i] * weight[i]
+        remainder = sum_product % 11
+        if remainder == 0:
+            result = True
+        else:
+            result = None
+        return result
+
+    @staticmethod
+    def __sanitize_value(text: str, replacement_pairs: List[Tuple[str, str]]) -> str:
+        for search_string, replacement_string in replacement_pairs:
+            text = text.replace(search_string, replacement_string)
+        return text
--- a/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizer_registry.py
+++ b/presidio-analyzer/presidio_analyzer/recognizer_registry/recognizer_registry.py
@ -24,6 +24,10 @@ from presidio_analyzer.predefined_recognizers import (
    SpacyRecognizer,
    EsNifRecognizer,
    StanzaRecognizer,
+    AuAbnRecognizer,
+    AuAcnRecognizer,
+    AuTfnRecognizer,
+    AuMedicareRecognizer,
 )

 logger = logging.getLogger("presidio-analyzer")
@ -68,6 +72,10 @@ class RecognizerRegistry:
                UsSsnRecognizer,
                NhsRecognizer,
                SgFinRecognizer,
+                AuAbnRecognizer,
+                AuAcnRecognizer,
+                AuTfnRecognizer,
+                AuMedicareRecognizer,
            ],
            "es": [EsNifRecognizer],
            "ALL": [
--- a/presidio-analyzer/tests/test_au_abn_recognizer.py
+++ b/presidio-analyzer/tests/test_au_abn_recognizer.py
@ -0,0 +1,50 @@
+import pytest
+
+from tests import assert_result_within_score_range
+from presidio_analyzer.predefined_recognizers import AuAbnRecognizer
+
+
+@pytest.fixture(scope="module")
+def recognizer():
+    return AuAbnRecognizer()
+
+
+@pytest.fixture(scope="module")
+def entities():
+    return ["AU_ABN"]
+
+
+@pytest.mark.parametrize(
+    "text, expected_len, expected_positions, expected_score_ranges",
+    [
+        # Valid formatting and valid ABNs 
+        ("51 824 753 556", 1, ((0, 14),), ((1.0, 1.0),), ),
+        ("51824753556", 1, ((0, 11),), ((1.0, 1.0),), ),
+        # Valid formatting but invalid ABNs 
+        ("52 824 753 556", 1, ((0, 14),), ((0.01, 0.1),),),
+        ("52824753556", 1, ((0, 11),), ((0.01, 0.1),),),
+        # Invalid formatting and ABNs.  
+        ("5282475355632", 0, (), (),),
+        ("52824753556AF", 0, (), (),),
+        ("51 824 753 5564", 0, (), (),),
+    ],
+)
+def test_when_all_abns_then_succeed(
+    text,
+    expected_len,
+    expected_positions,
+    expected_score_ranges,
+    recognizer,
+    entities,
+    max_score,
+):
+    results = recognizer.analyze(text, entities)
+    assert len(results) == expected_len
+    for res, (st_pos, fn_pos), (st_score, fn_score) in zip(
+        results, expected_positions, expected_score_ranges
+    ):
+        if fn_score == "max":
+            fn_score = max_score
+        assert_result_within_score_range(
+            res, entities[0], st_pos, fn_pos, st_score, fn_score
+        )
--- a/presidio-analyzer/tests/test_au_acn_recognizer.py
+++ b/presidio-analyzer/tests/test_au_acn_recognizer.py
@ -0,0 +1,51 @@
+import pytest
+
+from tests import assert_result_within_score_range
+from presidio_analyzer.predefined_recognizers import AuAcnRecognizer
+
+
+@pytest.fixture(scope="module")
+def recognizer():
+    return AuAcnRecognizer()
+
+
+@pytest.fixture(scope="module")
+def entities():
+    return ["AU_ACN"]
+
+
+@pytest.mark.parametrize(
+    "text, expected_len, expected_positions, expected_score_ranges",
+    [
+        # Valid formatting and valid ACNs 
+        ("000 000 019", 1, ((0, 11),), ((1.0, 1.0),), ),
+        ("005 499 981", 1, ((0, 11),), ((1.0, 1.0),), ),
+        ("006249976", 1, ((0, 9),), ((1.0, 1.0),), ),
+        # Valid formatting but invalid ACNs 
+        ("824 753 557", 1, ((0, 11),), ((0.01, 0.1),),),
+        ("824753557", 1, ((0, 9),), ((0.01, 0.1),),),
+        # Invalid formatting and ACNs.  
+        ("5282475355632", 0, (), (),),
+        ("52824753556AF", 0, (), (),),
+        ("51 824 753 5564", 0, (), (),),
+    ],
+)
+def test_when_all_acns_then_succeed(
+    text,
+    expected_len,
+    expected_positions,
+    expected_score_ranges,
+    recognizer,
+    entities,
+    max_score,
+):
+    results = recognizer.analyze(text, entities)
+    assert len(results) == expected_len
+    for res, (st_pos, fn_pos), (st_score, fn_score) in zip(
+        results, expected_positions, expected_score_ranges
+    ):
+        if fn_score == "max":
+            fn_score = max_score
+        assert_result_within_score_range(
+            res, entities[0], st_pos, fn_pos, st_score, fn_score
+        )
--- a/presidio-analyzer/tests/test_au_medicare_recognizer.py
+++ b/presidio-analyzer/tests/test_au_medicare_recognizer.py
@ -0,0 +1,49 @@
+import pytest
+
+from tests import assert_result_within_score_range
+from presidio_analyzer.predefined_recognizers import AuMedicareRecognizer
+
+
+@pytest.fixture(scope="module")
+def recognizer():
+    return AuMedicareRecognizer()
+
+
+@pytest.fixture(scope="module")
+def entities():
+    return ["AU_MEDICARE"]
+
+
+@pytest.mark.parametrize(
+    "text, expected_len, expected_positions, expected_score_ranges",
+    [
+        # Valid formatting and valid Medicare number.  
+        ("2123 45670 1", 1, ((0, 12),), ((1.0, 1.0),), ),
+        ("2123456701", 1, ((0, 10),), ((1.0, 1.0),), ),
+        # Valid formatting but invalid Medicare number.
+        ("2123 25870 1", 1, ((0, 12),), ((0.01, 0.1),),),
+        ("2123258701", 1, ((0, 10),), ((0.01, 0.1),),),
+        # Invalid formatting and Medicare number.  
+        ("212345670221", 0, (), (),),
+        ("2123456702AF", 0, (), (),),
+    ],
+)
+def test_when_all_medicares_then_succeed(
+    text,
+    expected_len,
+    expected_positions,
+    expected_score_ranges,
+    recognizer,
+    entities,
+    max_score,
+):
+    results = recognizer.analyze(text, entities)
+    assert len(results) == expected_len
+    for res, (st_pos, fn_pos), (st_score, fn_score) in zip(
+        results, expected_positions, expected_score_ranges
+    ):
+        if fn_score == "max":
+            fn_score = max_score
+        assert_result_within_score_range(
+            res, entities[0], st_pos, fn_pos, st_score, fn_score
+        )
--- a/presidio-analyzer/tests/test_au_tfn_recognizer.py
+++ b/presidio-analyzer/tests/test_au_tfn_recognizer.py
@ -0,0 +1,50 @@
+import pytest
+
+from tests import assert_result_within_score_range
+from presidio_analyzer.predefined_recognizers import AuTfnRecognizer
+
+
+@pytest.fixture(scope="module")
+def recognizer():
+    return AuTfnRecognizer()
+
+
+@pytest.fixture(scope="module")
+def entities():
+    return ["AU_TFN"]
+
+
+@pytest.mark.parametrize(
+    "text, expected_len, expected_positions, expected_score_ranges",
+    [
+        # Valid formatting and valid TFNs 
+        ("876 543 210", 1, ((0, 11),), ((1.0, 1.0),), ),
+        ("876543210", 1, ((0, 9),), ((1.0, 1.0),), ),
+        # Valid formatting but invalid TFNs 
+        ("824 753 557", 1, ((0, 11),), ((0.01, 0.1),),),
+        ("824753557", 1, ((0, 9),), ((0.01, 0.1),),),
+        # Invalid formatting and TFNs.  
+        ("5282475355632", 0, (), (),),
+        ("52824753556AF", 0, (), (),),
+        ("51 824 753 5564", 0, (), (),),
+    ],
+)
+def test_when_all_tfns_then_succeed(
+    text,
+    expected_len,
+    expected_positions,
+    expected_score_ranges,
+    recognizer,
+    entities,
+    max_score,
+):
+    results = recognizer.analyze(text, entities)
+    assert len(results) == expected_len
+    for res, (st_pos, fn_pos), (st_score, fn_score) in zip(
+        results, expected_positions, expected_score_ranges
+    ):
+        if fn_score == "max":
+            fn_score = max_score
+        assert_result_within_score_range(
+            res, entities[0], st_pos, fn_pos, st_score, fn_score
+        )
--- a/presidio-analyzer/tests/test_recognizer_registry.py
+++ b/presidio-analyzer/tests/test_recognizer_registry.py
@ -52,8 +52,8 @@ def test_when_get_recognizers_then_all_recognizers_returned(mock_recognizer_regi
    registry = mock_recognizer_registry
    registry.load_predefined_recognizers()
    recognizers = registry.get_recognizers(language="en", all_fields=True)
-    # 1 custom recognizer in english + 17 predefined
-    assert len(recognizers) == 1 + 17
+    # 1 custom recognizer in english + 21 predefined
+    assert len(recognizers) == 1 + 21


 def test_when_get_recognizers_then_return_all_fields(mock_recognizer_registry):