зеркало из https://github.com/microsoft/presidio.git
V2 transformation to anonymizer (#526)
* Clean the engine a little by moving the text handling to another entity. First draft.
This commit is contained in:
Родитель
28dbfae6de
Коммит
7093281062
|
@ -31,7 +31,7 @@ Presidio _(Origin from Latin praesidium ‘protection, garrison’)_ helps to en
|
|||
## Presidio's modules
|
||||
|
||||
1. [Presidio analyzer](analyzer/index.md): PII identification in text
|
||||
2. [Presidio anonymizer](anonymizer/index.md): Anonymize detected PII entities using different transformations
|
||||
2. [Presidio anonymizer](anonymizer/index.md): Anonymize detected PII entities using different anonymizers
|
||||
3. [Presidio image redactor](image-redactor/index.md): Redact PII entities from images using OCR and PII identification
|
||||
|
||||
## Installing Presidio
|
||||
|
|
|
@ -133,13 +133,13 @@
|
|||
"# Anonymize Text with Identified PII Entities\n",
|
||||
"\n",
|
||||
"<br>Presidio Anonymizer iterates over the Presidio Analyzer result, and provides anonymization capabilities for the identified text.\n",
|
||||
"<br>The anonymizer provides 4 types of transformations - replace, redact, mask and hash. The default is **replace**\n",
|
||||
"<br>The anonymizer provides 4 types of anonymizers - replace, redact, mask and hash. The default is **replace**\n",
|
||||
"\n",
|
||||
"<br>The following code sample will:\n",
|
||||
"<ol>\n",
|
||||
"<li>Convert analyzer results to anonymizer input - a list of dict</li>\n",
|
||||
"<li>Setup the anonymizer engine </li>\n",
|
||||
"<li>Create an anonymizer request - text to anonymize, list of transformations to apply and the results from the analyzer request</li>\n",
|
||||
"<li>Create an anonymizer request - text to anonymize, list of anonymizers to apply and the results from the analyzer request</li>\n",
|
||||
"<li>Anonymize the text</li>\n",
|
||||
"</ol>"
|
||||
],
|
||||
|
@ -167,7 +167,7 @@
|
|||
"\n",
|
||||
"request = {\n",
|
||||
" \"text\": text_to_anonymize,\n",
|
||||
" \"transformations\": {\n",
|
||||
" \"anonymizers\": {\n",
|
||||
" \"DEFAULT\": {\"type\": \"replace\", \"new_value\": \"<ANONYMIZED>\"},\n",
|
||||
" \"PHONE_NUMBER\": {\n",
|
||||
" \"type\": \"mask\",\n",
|
||||
|
|
|
@ -51,7 +51,7 @@ def test_given_text_with_pii_then_analyze_and_anonymize_successfully():
|
|||
|
||||
anonymizer_request = {
|
||||
"text": analyzer_request["text"],
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"DEFAULT": {"type": "replace", "new_value": "ANONYMIZED"},
|
||||
"US_DRIVER_LICENSE": {"type": "mask", "masking_char": "*", "chars_to_mask": 4, "from_end": True},
|
||||
"PERSON": {"type": "replace", "new_value": "<PERSON>"}
|
||||
|
@ -87,7 +87,7 @@ def test_given_a_correct_analyze_input_high_threashold_then_anonymize_partially(
|
|||
|
||||
anonymizer_request = {
|
||||
"text": analyzer_request["text"],
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"DEFAULT": {"type": "replace", "new_value": "ANONYMIZED"},
|
||||
"US_DRIVER_LICENSE": {"type": "mask", "masking_char": "*", "chars_to_mask": 4, "from_end": True},
|
||||
"PERSON": {"type": "replace", "new_value": "<PERSON>"}
|
||||
|
@ -131,7 +131,7 @@ def test_given_a_correct_analyze_input_with_high_threshold_and_unmatched_entitie
|
|||
|
||||
anonymizer_request = {
|
||||
"text": analyzer_request["text"],
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"DEFAULT": {"type": "replace", "new_value": "ANONYMIZED"},
|
||||
"US_DRIVER_LICENSE": {"type": "mask", "masking_char": "*", "chars_to_mask": 4, "from_end": True},
|
||||
"PERSON": {"type": "replace", "new_value": "<PERSON>"}
|
||||
|
@ -180,7 +180,7 @@ def test_given_an_unknown_entity_then_anonymize_uses_defaults():
|
|||
|
||||
anonymizer_request = {
|
||||
"text": analyzer_request["text"],
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"ABC": {"type": "replace", "new_value": "<PERSON>"}
|
||||
},
|
||||
"analyzer_results": analyzer_data
|
||||
|
|
|
@ -9,7 +9,7 @@ def test_given_anonymize_called_with_valid_request_then_expected_valid_response_
|
|||
request_body = """
|
||||
{
|
||||
"text": "hello world, my name is Jane Doe. My number is: 034453334",
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"DEFAULT": { "type": "replace", "new_value": "ANONYMIZED" },
|
||||
"PHONE_NUMBER": { "type": "mask", "masking_char": "*", "chars_to_mask": 4, "from_end": true }
|
||||
},
|
||||
|
@ -37,7 +37,7 @@ def test_given_anonymize_called_with_empty_text_then_invalid_input_message_retur
|
|||
request_body = """
|
||||
{
|
||||
"text": "",
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"DEFAULT": { "type": "replace", "new_value": "ANONYMIZED" }
|
||||
},
|
||||
"analyzer_results": [
|
||||
|
@ -58,7 +58,7 @@ def test_given_anonymize_called_with_empty_analyzer_results_then_invalid_input_m
|
|||
request_body = """
|
||||
{
|
||||
"text": "hello world, my name is Jane Doe. My number is: 034453334",
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"DEFAULT": { "type": "replace", "new_value": "ANONYMIZED" },
|
||||
"PHONE_NUMBER": { "type": "mask", "masking_char": "*", "chars_to_mask": 4, "from_end": true }
|
||||
},
|
||||
|
@ -79,7 +79,7 @@ def test_given_anonymize_called_with_deformed_body_then_internal_server_error_re
|
|||
request_body = """
|
||||
{
|
||||
"text": "hello world, my name is Jane Doe. My number is: 034453334",
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"DEFAULT": {"type": "replace", "new_value": "ANONYMIZED"},
|
||||
},
|
||||
"analyzer_results": [
|
||||
|
|
|
@ -36,7 +36,7 @@ Presidio anonymizer comes by default with the following anonymizers:
|
|||
- "masking_char" - the character to be replaced with.
|
||||
- "from_end" - Whether to mask the PII from it's end.
|
||||
|
||||
Please notice: if default value is not stated in transformations object, the default
|
||||
Please notice: if default value is not stated in anonymizers object, the default
|
||||
anonymizer is "replace" for all entities. The replacing value will be the entity type
|
||||
e.g.: <PHONE_NUMBER>
|
||||
|
||||
|
@ -44,7 +44,7 @@ As the input text could potentially have overlapping PII entities, there are dif
|
|||
anonymization scenarios:
|
||||
|
||||
- No overlap (single PII) - single PII over text entity, uses a given or default
|
||||
transformation to anonymize and replace the PII text entity.
|
||||
anonymizer to anonymize and replace the PII text entity.
|
||||
- Full overlap of PIIs - When one text have several PIIs, the PII with the higher score
|
||||
will be taken. Between PIIs with identical scores, the selection will be arbitrary.
|
||||
- One PII is contained in another - anonymizer will use the PII with larger text.
|
||||
|
@ -109,7 +109,7 @@ Payload:
|
|||
```json
|
||||
{
|
||||
"text": "hello world, my name is Jane Doe. My number is: 034453334",
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"PHONE_NUMBER": {
|
||||
"type": "mask",
|
||||
"masking_char": "*",
|
||||
|
|
|
@ -11,7 +11,7 @@ class AnonymizerEngine:
|
|||
AnonymizerEngine class.
|
||||
|
||||
Handles the entire logic of the Presidio-anonymizer. Gets the original text
|
||||
and replaces the PII entities with the desired transformations.
|
||||
and replaces the PII entities with the desired anonymizers.
|
||||
"""
|
||||
|
||||
logger = logging.getLogger("presidio-anonymizer")
|
||||
|
@ -22,9 +22,9 @@ class AnonymizerEngine:
|
|||
def __init__(
|
||||
self,
|
||||
):
|
||||
"""Handle text replacement for PIIs with requested transformations.
|
||||
"""Handle text replacement for PIIs with requested anonymizers.
|
||||
|
||||
:param data: a map which contains the transformations, analyzer_results and text
|
||||
:param data: a map which contains the anonymizers, analyzer_results and text
|
||||
"""
|
||||
|
||||
def anonymize(self, engine_request: AnonymizerRequest) -> str:
|
||||
|
@ -48,15 +48,15 @@ class AnonymizerEngine:
|
|||
text_to_anonymize = text_builder.get_text_in_position(
|
||||
analyzer_result.start, analyzer_result.end)
|
||||
|
||||
transformation = engine_request.get_transformation(
|
||||
anonymizer_dto = engine_request.get_anonymizer_dto(
|
||||
analyzer_result.entity_type
|
||||
)
|
||||
self.logger.debug(
|
||||
f"for analyzer result {analyzer_result} received transformation "
|
||||
f"{str(transformation)}"
|
||||
f"for analyzer result {analyzer_result} received anonymizer "
|
||||
f"{str(anonymizer_dto)}"
|
||||
)
|
||||
|
||||
anonymized_text = self.__extract_anonymizer_and_anonymize(transformation,
|
||||
anonymized_text = self.__extract_anonymizer_and_anonymize(anonymizer_dto,
|
||||
text_to_anonymize)
|
||||
text_builder.replace_text(anonymized_text, analyzer_result.start,
|
||||
analyzer_result.end)
|
||||
|
@ -68,11 +68,11 @@ class AnonymizerEngine:
|
|||
names = [p for p in self.builtin_anonymizers.keys()]
|
||||
return names
|
||||
|
||||
def __extract_anonymizer_and_anonymize(self, transformation, text_to_anonymize):
|
||||
anonymizer = transformation.get("anonymizer")()
|
||||
def __extract_anonymizer_and_anonymize(self, anonymizer_dto, text_to_anonymize):
|
||||
anonymizer = anonymizer_dto.get("anonymizer")()
|
||||
# if the anonymizer is not valid, a InvalidParamException
|
||||
anonymizer.validate(params=transformation)
|
||||
anonymizer.validate(params=anonymizer_dto)
|
||||
anonymized_text = anonymizer.anonymize(
|
||||
params=transformation, text=text_to_anonymize
|
||||
params=anonymizer_dto, text=text_to_anonymize
|
||||
)
|
||||
return anonymized_text
|
||||
|
|
|
@ -11,7 +11,7 @@ class AnalyzerResults(list):
|
|||
|
||||
It includes removal of unused results and sort by indices order.
|
||||
Additional information about the rational of this class:
|
||||
- One PII - uses a given or default transformation to anonymize and replace the PII
|
||||
- One PII - uses a given or default anonymizer to anonymize and replace the PII
|
||||
text entity.
|
||||
- Full overlap of PIIs - When one text have several PIIs, the PII with the higher
|
||||
score will be taken.
|
||||
|
@ -29,7 +29,7 @@ class AnalyzerResults(list):
|
|||
_remove_conflicts method - removes results which impact the same text and
|
||||
should be ignored.
|
||||
using the logic:
|
||||
- One PII - uses a given or default transformation to anonymize and
|
||||
- One PII - uses a given or default anonymizer to anonymize and
|
||||
replace the PII text entity.
|
||||
- Full overlap of PIIs - When one text have several PIIs,
|
||||
the PII with the higher score will be taken.
|
||||
|
|
|
@ -18,32 +18,32 @@ class AnonymizerRequest:
|
|||
def __init__(self, data: dict, anonymizers):
|
||||
"""Handle and validate data for the text replacement.
|
||||
|
||||
:param data: a map which contains the transformations, analyzer_results and text
|
||||
:param data: a map which contains the anonymizers, analyzer_results and text
|
||||
"""
|
||||
self.anonymizers = anonymizers
|
||||
self._transformations = {}
|
||||
self._anonymizers = {}
|
||||
self._analysis_results = AnalyzerResults()
|
||||
self.__validate_and_insert_input(data)
|
||||
self.default_transformation = {
|
||||
self.default_anonymizer = {
|
||||
"type": "replace",
|
||||
"anonymizer": self.anonymizers["replace"],
|
||||
}
|
||||
|
||||
def get_transformation(self, entity_type: str):
|
||||
def get_anonymizer_dto(self, entity_type: str):
|
||||
"""
|
||||
Get the right transformation from the list.
|
||||
Get the right anonymizer_dto from the list.
|
||||
|
||||
When transformation does not exist, we fall back to default.
|
||||
:param analyzer_result: the result we are going to do the transformation on
|
||||
:return: transformation
|
||||
When anonymizer_dto does not exist, we fall back to default.
|
||||
:param analyzer_result: the result we are going to do the anonymization on
|
||||
:return: anonymizer_dto
|
||||
"""
|
||||
transformation = self._transformations.get(entity_type)
|
||||
if not transformation:
|
||||
transformation = self._transformations.get("DEFAULT")
|
||||
if not transformation:
|
||||
transformation = self.default_transformation
|
||||
transformation["entity_type"] = entity_type
|
||||
return transformation
|
||||
anonymizer_dto = self._anonymizers.get(entity_type)
|
||||
if not anonymizer_dto:
|
||||
anonymizer_dto = self._anonymizers.get("DEFAULT")
|
||||
if not anonymizer_dto:
|
||||
anonymizer_dto = self.default_anonymizer
|
||||
anonymizer_dto["entity_type"] = entity_type
|
||||
return anonymizer_dto
|
||||
|
||||
def get_text(self):
|
||||
"""Get the text we are working on."""
|
||||
|
@ -56,13 +56,13 @@ class AnonymizerRequest:
|
|||
def __validate_and_insert_input(self, data: dict):
|
||||
self.__handle_text(data)
|
||||
self.__handle_analyzer_results(data)
|
||||
self.__handle_transformations(data)
|
||||
self.__handle_anonymizers(data)
|
||||
|
||||
def __handle_analyzer_results(self, data):
|
||||
"""
|
||||
Go over analyzer results, check they are valid and convert to AnalyzeResult.
|
||||
|
||||
:param data: contains the text, transformations and analyzer_results
|
||||
:param data: contains the text, anonymizers and analyzer_results
|
||||
:return: None
|
||||
"""
|
||||
analyzer_results = data.get("analyzer_results")
|
||||
|
@ -77,22 +77,22 @@ class AnonymizerRequest:
|
|||
analyzer_result.validate_position_in_text(text_len)
|
||||
self._analysis_results.append(analyzer_result)
|
||||
|
||||
def __handle_transformations(self, data):
|
||||
def __handle_anonymizers(self, data):
|
||||
"""
|
||||
Go over the transformations and get the relevant anonymizer class for it.
|
||||
Go over the anonymizers and get the relevant anonymizer class for it.
|
||||
|
||||
Inserts the class to the transformation so the engine will use it.
|
||||
:param data: contains the text, transformations and analyzer_results
|
||||
Inserts the class to the anonymizer so the engine will use it.
|
||||
:param data: contains the text, anonymizers and analyzer_results
|
||||
:return: None
|
||||
"""
|
||||
transformations = data.get("transformations")
|
||||
if transformations is not None:
|
||||
for key, transformation in transformations.items():
|
||||
self.logger.debug(f"converting {transformation} to anonymizer class")
|
||||
anonymizer = self.__get_anonymizer(transformation)
|
||||
self.logger.debug(f"applying class {anonymizer} to {transformation}")
|
||||
transformation["anonymizer"] = anonymizer
|
||||
self._transformations[key] = transformation
|
||||
anonymizers = data.get("anonymizers")
|
||||
if anonymizers is not None:
|
||||
for key, anonymizer_dto in anonymizers.items():
|
||||
self.logger.debug(f"converting {anonymizer_dto} to anonymizer class")
|
||||
anonymizer = self.__get_anonymizer(anonymizer_dto)
|
||||
self.logger.debug(f"applying class {anonymizer} to {anonymizer_dto}")
|
||||
anonymizer_dto["anonymizer"] = anonymizer
|
||||
self._anonymizers[key] = anonymizer_dto
|
||||
|
||||
def __handle_text(self, data):
|
||||
self._text = data.get("text")
|
||||
|
@ -100,14 +100,14 @@ class AnonymizerRequest:
|
|||
self.logger.debug("invalid input, json is missing text field")
|
||||
raise InvalidParamException("Invalid input, text can not be empty")
|
||||
|
||||
def __get_anonymizer(self, transformation):
|
||||
def __get_anonymizer(self, anonymizer):
|
||||
"""
|
||||
Extract the anonymizer class from the anonymizers list.
|
||||
|
||||
:param transformation: a single transformation value
|
||||
:param anonymizer: a single anonymizer value
|
||||
:return: Anonymizer
|
||||
"""
|
||||
anonymizer_type = transformation.get("type").lower()
|
||||
anonymizer_type = anonymizer.get("type").lower()
|
||||
anonymizer = self.anonymizers.get(anonymizer_type)
|
||||
if not anonymizer:
|
||||
self.logger.error(f"No such anonymizer class {anonymizer_type}")
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"text": "hello world, my name is Jane Doe. My number is: 034453334",
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"DEFAULT": { "type": "hash", "hash_type": "md5" }
|
||||
},
|
||||
"analyzer_results": [
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"text": "hello world, my name is Jane Doe. My number is: 034453334",
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"DEFAULT": { "type": "hash", "hash_type": "sha256" }
|
||||
},
|
||||
"analyzer_results": [
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"text": "hello world, my name is Jane Doe. My number is: 034453334",
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"DEFAULT": { "type": "hash", "hash_type": "sha256" }
|
||||
},
|
||||
"analyzer_results": [
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"text": "hello world, my name is Jane Doe. My number is: 034453334",
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"DEFAULT": { "type": "hash", "hash_type": "sha512" }
|
||||
},
|
||||
"analyzer_results": [
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"text": "hello world, my name is Jane Doe. My number is: 03-4453334",
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"DEFAULT": {
|
||||
"type": "mask",
|
||||
"masking_char": "*",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"text": "hello world, my name is Jane Doe. My number is: 03-4453334",
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"PHONE_NUMBER": {
|
||||
"type": "mask",
|
||||
"masking_char": "non_character",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"text": "hello world, my name is Jane Doe. My number is: 034453334",
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"NAME": {
|
||||
"type": "redact",
|
||||
"new_value": "ANONYMIZED"
|
||||
|
|
|
@ -24,7 +24,7 @@ from presidio_anonymizer.entities.invalid_exception import InvalidParamException
|
|||
"entity_type": "NUMBER"
|
||||
}
|
||||
],
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"default": {
|
||||
"type": "none"
|
||||
}
|
||||
|
@ -63,15 +63,15 @@ def test_given_invalid_json_then_request_creation_should_fail(
|
|||
assert result_text == e.value.err_msg
|
||||
|
||||
|
||||
def test_given_no_transformations_then_we_get_the_default():
|
||||
def test_given_no_anonymizers_then_we_get_the_default():
|
||||
content = get_content()
|
||||
request = AnonymizerRequest(content, AnonymizerEngine().builtin_anonymizers)
|
||||
request._transformations = {}
|
||||
request._anonymizers = {}
|
||||
analyzer_result = Mock()
|
||||
analyzer_result.entity_type = "PHONE"
|
||||
transformation = request.get_transformation(analyzer_result.entity_type)
|
||||
assert transformation.get("type") == "replace"
|
||||
assert type(transformation.get("anonymizer")) == type(Replace)
|
||||
anonymizers = request.get_anonymizer_dto(analyzer_result.entity_type)
|
||||
assert anonymizers.get("type") == "replace"
|
||||
assert type(anonymizers.get("anonymizer")) == type(Replace)
|
||||
|
||||
|
||||
def test_given_valid_json_then_request_creation_should_succeed():
|
||||
|
@ -79,7 +79,7 @@ def test_given_valid_json_then_request_creation_should_succeed():
|
|||
data = AnonymizerRequest(content, AnonymizerEngine().builtin_anonymizers)
|
||||
assert data.get_text() == content.get("text")
|
||||
assert data._text == content.get("text")
|
||||
assert data._transformations == content.get("transformations")
|
||||
assert data._anonymizers == content.get("anonymizers")
|
||||
assert len(data._analysis_results) == len(content.get("analyzer_results"))
|
||||
assert data._analysis_results == data.get_analysis_results()
|
||||
for result_a in data._analysis_results:
|
||||
|
@ -90,26 +90,50 @@ def test_given_valid_json_then_request_creation_should_succeed():
|
|||
assert result_a.score == same_result_in_content.get("score")
|
||||
assert result_a.start == same_result_in_content.get("start")
|
||||
assert result_a.end == same_result_in_content.get("end")
|
||||
assert data.get_transformation(result_a.entity_type)
|
||||
assert data.get_anonymizer_dto(result_a.entity_type)
|
||||
|
||||
|
||||
def test_given_valid_anonymizer_request_then_get_transformations_successfully():
|
||||
def test_given_valid_anonymizer_request_then_get_anonymizers_successfully():
|
||||
content = get_content()
|
||||
data = AnonymizerRequest(content, AnonymizerEngine().builtin_anonymizers)
|
||||
replace_result = data.get_analysis_results()[0]
|
||||
default_replace_transformation = data.get_transformation(replace_result.entity_type)
|
||||
assert default_replace_transformation.get("type") == "replace"
|
||||
assert default_replace_transformation.get("new_value") == "ANONYMIZED"
|
||||
assert type(default_replace_transformation.get("anonymizer")) == type(Replace)
|
||||
mask_transformation = data.get_transformation(
|
||||
default_replace_anonymizer = data.get_anonymizer_dto(replace_result.entity_type)
|
||||
assert default_replace_anonymizer.get("type") == "replace"
|
||||
assert default_replace_anonymizer.get("new_value") == "ANONYMIZED"
|
||||
assert type(default_replace_anonymizer.get("anonymizer")) == type(Replace)
|
||||
mask_anonymizer = data.get_anonymizer_dto(
|
||||
data.get_analysis_results()[3].entity_type
|
||||
)
|
||||
assert mask_transformation.get("type") == "mask"
|
||||
assert mask_transformation.get("from_end")
|
||||
assert mask_transformation.get("chars_to_mask") == 4
|
||||
assert mask_transformation.get("masking_char") == "*"
|
||||
assert mask_transformation.get("anonymizer")
|
||||
assert type(mask_transformation.get("anonymizer")) == type(Mask)
|
||||
assert mask_anonymizer.get("type") == "mask"
|
||||
assert mask_anonymizer.get("from_end")
|
||||
assert mask_anonymizer.get("chars_to_mask") == 4
|
||||
assert mask_anonymizer.get("masking_char") == "*"
|
||||
assert mask_anonymizer.get("anonymizer")
|
||||
assert type(mask_anonymizer.get("anonymizer")) == type(Mask)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
# fmt: off
|
||||
"original_text,start,end",
|
||||
[
|
||||
("hello world", 5, 12),
|
||||
("hello world", 12, 16),
|
||||
],
|
||||
# fmt: on
|
||||
)
|
||||
def test_given_analyzer_result_with_an_incorrect_text_positions_then_we_fail(
|
||||
original_text, start, end):
|
||||
content = {
|
||||
"text": original_text,
|
||||
"analyzer_results": [
|
||||
{"start": start, "end": end, "score": 0.8, "entity_type": "NAME"},
|
||||
],
|
||||
}
|
||||
content.get("analyzer_results")
|
||||
err_msg = f"Invalid analyzer result, start: {start} and end: " \
|
||||
f"{end}, while text length is only 11."
|
||||
with pytest.raises(InvalidParamException, match=err_msg):
|
||||
AnonymizerRequest(content, AnonymizerEngine().builtin_anonymizers)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -146,7 +170,7 @@ def __find_element(content: List, entity_type: str):
|
|||
def get_content():
|
||||
return {
|
||||
"text": "hello world, my name is Jane Doe. My number is: 034453334",
|
||||
"transformations": {
|
||||
"anonymizers": {
|
||||
"DEFAULT": {"type": "replace", "new_value": "ANONYMIZED"},
|
||||
"PHONE_NUMBER": {
|
||||
"type": "mask",
|
||||
|
|
|
@ -4,9 +4,9 @@ from presidio_anonymizer.anonymizer_engine import AnonymizerEngine
|
|||
from presidio_anonymizer.entities import AnalyzerResult
|
||||
|
||||
|
||||
def test_given_several_transformations_then_we_use_the_correct_one():
|
||||
transformation = Mock()
|
||||
transformation.get = get_transformation
|
||||
def test_given_several_anonymizers_then_we_use_the_correct_one():
|
||||
anonymizer = Mock()
|
||||
anonymizer.get = get_anonymizer_dto
|
||||
mock = Mock()
|
||||
mock.get_text = lambda: "Number: 0554555556"
|
||||
analyzer_results = Mock()
|
||||
|
@ -15,7 +15,7 @@ def test_given_several_transformations_then_we_use_the_correct_one():
|
|||
)
|
||||
analyzer_results.to_sorted_unique_results = lambda reverse: [analyzer_result]
|
||||
mock.get_analysis_results = lambda: analyzer_results
|
||||
mock.get_transformation = lambda result: transformation
|
||||
mock.get_anonymizer_dto = lambda result: anonymizer
|
||||
text = AnonymizerEngine().anonymize(mock)
|
||||
assert text == "Number: I am your new text!"
|
||||
|
||||
|
@ -28,6 +28,6 @@ class Anonymizer:
|
|||
pass
|
||||
|
||||
|
||||
def get_transformation(arg):
|
||||
def get_anonymizer_dto(arg):
|
||||
assert arg == "anonymizer"
|
||||
return Anonymizer
|
||||
|
|
Загрузка…
Ссылка в новой задаче