text analytics upgrade to new sdk version

This commit is contained in:
nonstoptimm 2021-10-14 14:51:56 +02:00
Родитель 20fb5ba240
Коммит 81f466dbe5
2 изменённых файлов: 11 добавлений и 7 удалений

Просмотреть файл

@ -33,7 +33,7 @@ opencensus-ext-azure==1
selenium==3.141.0
bs4==0.0.
##OPTIONAL LOCAL
azure-ai-textanalytics==1.0.0b3 #TO BE UPGRADED -> 5.0.0 -> TA
azure-ai-textanalytics==5.1.0
gensim==3.8.0
spacy==2.3.2
farm==0.4.7

Просмотреть файл

@ -23,6 +23,9 @@ from farm.modeling.tokenization import Tokenizer
from farm.train import Trainer
from farm.utils import set_all_seeds, initialize_device_settings
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential
# Custom functions
import sys
sys.path.append('./src')
@ -62,15 +65,16 @@ class FlairMatcher(object):
return doc
class TextAnalyticsMatcher(object):
name = "textanalytics"
def __init__(self):
self.endpoint = f"https://{he.get_secret('text-analytics-name')}.cognitiveservices.azure.com/text/analytics/v3.0/entities/recognition/general"
self.headers = {"Ocp-Apim-Subscription-Key": he.get_secret('text-analytics-key')}
key = he.get_secret('text-analytics-key')
self.endpoint = f"https://{he.get_secret('text-analytics-name')}.cognitiveservices.azure.com/"
self.key = AzureKeyCredential(key)
self.client = TextAnalyticsClient(endpoint = self.endpoint, credential = self.key)
def __call__(self, doc):
result = requests.post(self.endpoint, headers=self.headers, json={"documents": [{"id": "0", "language": cu.params.get('language'), "text": doc.text}]}).json()['documents'][0]
for entity in result['entities']:
span = doc.char_span(entity['offset'], entity['offset'] + entity['length'], label = entity['category'])
result = self.client.recognize_entities(documents = [{"id": "0", "language": cu.params.get('language'), "text": doc.text}])[0]
for entity in result.entities:
span = doc.char_span(entity.offset, entity.offset + entity.length, label = entity.category)
# Pass, in case a match already exists
try:
doc.ents = list(doc.ents) + [span]