added poetry + fixed failing tests

This commit is contained in:
Omri Mendels 2023-12-25 22:02:00 +02:00
Родитель d07ceeee57
Коммит 98e1efcebe
8 изменённых файлов: 95 добавлений и 90 удалений

3
.gitignore поставляемый
Просмотреть файл

@ -189,4 +189,5 @@ datasets/
/data
*.spacy
*.pickle
*.pickle
/poetry.lock

Просмотреть файл

@ -1,45 +0,0 @@
# Python package
# Create and test a Python package on multiple Python versions.
# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
# https://docs.microsoft.com/azure/devops/pipelines/languages/python
trigger:
- master
pr:
branches:
include:
- master
- feature/*
pool:
vmImage: 'ubuntu-latest'
strategy:
matrix:
Python37:
python.version: '3.7'
Python38:
python.version: '3.8'
Python39:
python.version: '3.9'
Python310:
python.version: '3.10'
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '$(python.version)'
displayName: 'Use Python $(python.version)'
- script: |
python -m pip install --upgrade pip
pip install wheel
pip install -r requirements.txt
python -m spacy download en_core_web_lg
python -m spacy download en_core_web_sm
displayName: 'Install base dependencies'
- script: |
pip install pytest pytest-azurepipelines
pytest
displayName: 'pytest'

Просмотреть файл

@ -36,8 +36,10 @@ conda create --name presidio python=3.9
conda activate presidio
# Install package+dependencies
pip install -r requirements.txt
python setup.py install
pip install poetry
poetry install
# To install with all additional NER dependencies (e.g. Flair, Stanza, CRF), run:
# poetry install -with ner
# Download a spaCy model used by presidio-analyzer
python -m spacy download en_core_web_lg

37
azure-pipelines.yml Normal file
Просмотреть файл

@ -0,0 +1,37 @@
pr:
branches:
include:
- master
- feature/*
pool:
vmImage: 'ubuntu-latest'
strategy:
matrix:
Python37:
python.version: '3.7'
Python38:
python.version: '3.8'
Python39:
python.version: '3.9'
Python310:
python.version: '3.10'
Python311:
python.version: '3.11'
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '$(python.version)'
displayName: 'Use Python $(python.version)'
- script: |
python -m pip install --upgrade pip
pip install poetry
poetry install --with dev,ner
displayName: 'Install dependencies'
- script: |
poetry add pytest-azurepipelines
poetry run pytest --runslow
displayName: 'pytest'

Просмотреть файл

@ -41,12 +41,15 @@ class PresidioRecognizerWrapper(BaseModel):
self.recognizer = recognizer
self.nlp_engine = nlp_engine
if not self.nlp_engine.is_loaded():
self.nlp_engine.load()
#
def __make_nlp_artifacts(self, text: str):
return self.nlp_engine.process_text(text, "en")
#
def predict(self, sample: InputSample) -> List[str]:
def predict(self, sample: InputSample, **kwargs) -> List[str]:
nlp_artifacts = None
if self.with_nlp_artifacts:
nlp_artifacts = self.__make_nlp_artifacts(sample.full_text)

48
pyproject.toml Normal file
Просмотреть файл

@ -0,0 +1,48 @@
[tool.poetry]
name = "presidio_evaluator"
version = "0.1.0"
description = ""
authors = ["Omri Mendels <omri374@users.noreply.github.com>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.9"
spacy = ">=3.2.0, <4.0.0"
numpy = ">=1.20.2,<2.0.0"
jupyter = ">=1"
pandas = ">=1.2.4,<2.0.0"
tqdm = ">=4.60.0,<5.0.0"
haikunator = ">=2.1.0,<3.0.0"
schwifty = ">=2023.11.2,<2024.0.0"
faker = ">=9.6.0,<10.0.0"
scikit-learn = ">1.3.2,<2.0.0"
pytest = ">=6.2.3"
presidio-analyzer = "^2.2.351"
presidio-anonymizer = "^2.2.351"
requests = ">=2.25.1"
xmltodict = ">=0.12.0"
python-dotenv = "^1.0.0"
plotly = "^5.18.0"
azure-ai-textanalytics = ">=5.3.0"
en_core_web_sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz"}
en_core_web_lg = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1.tar.gz"}
# optional dependencies for the different NLP approaches
[tool.poetry.group.ner]
optional=true
[tool.poetry.group.ner.dependencies]
flair = "^0.13.0"
spacy_stanza = "^1.0.0"
sklearn_crfsuite = "^0.3.6"
spacy_huggingface_pipelines = "^0.0.4"
[tool.poetry.group.dev.dependencies]
pytest = ">=6.*"
flake8 = ">=3.*"
pytest-azurepipelines = "^1.0.5"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

Просмотреть файл

@ -1,21 +0,0 @@
spacy>=3.2.0
numpy>=1.20.2
jupyter>=1
pandas>=1.2.4
tqdm>=4.60.0
haikunator>=2.1.0
schwifty
faker>=9.6.0
scikit_learn
#flair
#stanza
#spacy_stanza
#sklearn_crfsuite
pytest>=6.2.3
presidio_analyzer
presidio_anonymizer
requests>=2.25.1
xmltodict>=0.12.0
python-dotenv
plotly
azure-ai-textanalytics==5.2.0

Просмотреть файл

@ -1,20 +0,0 @@
spacy>=3.2.0
numpy>=1.12.4
jupyter>=1
pandas>=1.3.4
tqdm>=4.60.0
haikunator>=2.1.0
schwifty
faker>=9.6.0
scikit_learn<0.24
pytest>=6.2.3
presidio_analyzer
presidio_anonymizer
requests>=2.25.1
xmltodict>=0.12.0
torch>=1.10.1
python-dotenv
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0.tar.gz#egg=en_core_web_sm
flair>=0.10
stanza>=1.3.0
spacy-stanza>=1.0.1