added poetry + fixed failing tests
This commit is contained in:
Родитель
d07ceeee57
Коммит
98e1efcebe
|
@ -189,4 +189,5 @@ datasets/
|
|||
/data
|
||||
|
||||
*.spacy
|
||||
*.pickle
|
||||
*.pickle
|
||||
/poetry.lock
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
# Python package
|
||||
# Create and test a Python package on multiple Python versions.
|
||||
# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
|
||||
# https://docs.microsoft.com/azure/devops/pipelines/languages/python
|
||||
|
||||
trigger:
|
||||
- master
|
||||
|
||||
pr:
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
- feature/*
|
||||
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
strategy:
|
||||
matrix:
|
||||
Python37:
|
||||
python.version: '3.7'
|
||||
Python38:
|
||||
python.version: '3.8'
|
||||
Python39:
|
||||
python.version: '3.9'
|
||||
Python310:
|
||||
python.version: '3.10'
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '$(python.version)'
|
||||
displayName: 'Use Python $(python.version)'
|
||||
|
||||
- script: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install wheel
|
||||
pip install -r requirements.txt
|
||||
python -m spacy download en_core_web_lg
|
||||
python -m spacy download en_core_web_sm
|
||||
|
||||
displayName: 'Install base dependencies'
|
||||
|
||||
- script: |
|
||||
pip install pytest pytest-azurepipelines
|
||||
pytest
|
||||
displayName: 'pytest'
|
|
@ -36,8 +36,10 @@ conda create --name presidio python=3.9
|
|||
conda activate presidio
|
||||
|
||||
# Install package+dependencies
|
||||
pip install -r requirements.txt
|
||||
python setup.py install
|
||||
pip install poetry
|
||||
poetry install
|
||||
# To install with all additional NER dependencies (e.g. Flair, Stanza, CRF), run:
|
||||
# poetry install -with ner
|
||||
|
||||
# Download a spaCy model used by presidio-analyzer
|
||||
python -m spacy download en_core_web_lg
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
pr:
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
- feature/*
|
||||
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
strategy:
|
||||
matrix:
|
||||
Python37:
|
||||
python.version: '3.7'
|
||||
Python38:
|
||||
python.version: '3.8'
|
||||
Python39:
|
||||
python.version: '3.9'
|
||||
Python310:
|
||||
python.version: '3.10'
|
||||
Python311:
|
||||
python.version: '3.11'
|
||||
steps:
|
||||
- task: UsePythonVersion@0
|
||||
inputs:
|
||||
versionSpec: '$(python.version)'
|
||||
displayName: 'Use Python $(python.version)'
|
||||
|
||||
- script: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install poetry
|
||||
poetry install --with dev,ner
|
||||
|
||||
displayName: 'Install dependencies'
|
||||
|
||||
- script: |
|
||||
poetry add pytest-azurepipelines
|
||||
poetry run pytest --runslow
|
||||
displayName: 'pytest'
|
|
@ -41,12 +41,15 @@ class PresidioRecognizerWrapper(BaseModel):
|
|||
self.recognizer = recognizer
|
||||
self.nlp_engine = nlp_engine
|
||||
|
||||
if not self.nlp_engine.is_loaded():
|
||||
self.nlp_engine.load()
|
||||
|
||||
#
|
||||
def __make_nlp_artifacts(self, text: str):
|
||||
return self.nlp_engine.process_text(text, "en")
|
||||
|
||||
#
|
||||
def predict(self, sample: InputSample) -> List[str]:
|
||||
def predict(self, sample: InputSample, **kwargs) -> List[str]:
|
||||
nlp_artifacts = None
|
||||
if self.with_nlp_artifacts:
|
||||
nlp_artifacts = self.__make_nlp_artifacts(sample.full_text)
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
[tool.poetry]
|
||||
name = "presidio_evaluator"
|
||||
version = "0.1.0"
|
||||
description = ""
|
||||
authors = ["Omri Mendels <omri374@users.noreply.github.com>"]
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.9"
|
||||
spacy = ">=3.2.0, <4.0.0"
|
||||
numpy = ">=1.20.2,<2.0.0"
|
||||
jupyter = ">=1"
|
||||
pandas = ">=1.2.4,<2.0.0"
|
||||
tqdm = ">=4.60.0,<5.0.0"
|
||||
haikunator = ">=2.1.0,<3.0.0"
|
||||
schwifty = ">=2023.11.2,<2024.0.0"
|
||||
faker = ">=9.6.0,<10.0.0"
|
||||
scikit-learn = ">1.3.2,<2.0.0"
|
||||
pytest = ">=6.2.3"
|
||||
presidio-analyzer = "^2.2.351"
|
||||
presidio-anonymizer = "^2.2.351"
|
||||
requests = ">=2.25.1"
|
||||
xmltodict = ">=0.12.0"
|
||||
python-dotenv = "^1.0.0"
|
||||
plotly = "^5.18.0"
|
||||
azure-ai-textanalytics = ">=5.3.0"
|
||||
en_core_web_sm = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1.tar.gz"}
|
||||
en_core_web_lg = {url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1.tar.gz"}
|
||||
|
||||
# optional dependencies for the different NLP approaches
|
||||
[tool.poetry.group.ner]
|
||||
optional=true
|
||||
|
||||
[tool.poetry.group.ner.dependencies]
|
||||
flair = "^0.13.0"
|
||||
spacy_stanza = "^1.0.0"
|
||||
sklearn_crfsuite = "^0.3.6"
|
||||
spacy_huggingface_pipelines = "^0.0.4"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = ">=6.*"
|
||||
flake8 = ">=3.*"
|
||||
pytest-azurepipelines = "^1.0.5"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
|
@ -1,21 +0,0 @@
|
|||
spacy>=3.2.0
|
||||
numpy>=1.20.2
|
||||
jupyter>=1
|
||||
pandas>=1.2.4
|
||||
tqdm>=4.60.0
|
||||
haikunator>=2.1.0
|
||||
schwifty
|
||||
faker>=9.6.0
|
||||
scikit_learn
|
||||
#flair
|
||||
#stanza
|
||||
#spacy_stanza
|
||||
#sklearn_crfsuite
|
||||
pytest>=6.2.3
|
||||
presidio_analyzer
|
||||
presidio_anonymizer
|
||||
requests>=2.25.1
|
||||
xmltodict>=0.12.0
|
||||
python-dotenv
|
||||
plotly
|
||||
azure-ai-textanalytics==5.2.0
|
|
@ -1,20 +0,0 @@
|
|||
spacy>=3.2.0
|
||||
numpy>=1.12.4
|
||||
jupyter>=1
|
||||
pandas>=1.3.4
|
||||
tqdm>=4.60.0
|
||||
haikunator>=2.1.0
|
||||
schwifty
|
||||
faker>=9.6.0
|
||||
scikit_learn<0.24
|
||||
pytest>=6.2.3
|
||||
presidio_analyzer
|
||||
presidio_anonymizer
|
||||
requests>=2.25.1
|
||||
xmltodict>=0.12.0
|
||||
torch>=1.10.1
|
||||
python-dotenv
|
||||
https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0.tar.gz#egg=en_core_web_sm
|
||||
flair>=0.10
|
||||
stanza>=1.3.0
|
||||
spacy-stanza>=1.0.1
|
Загрузка…
Ссылка в новой задаче