resource and dependency cleanup

This commit is contained in:
nonstoptimm 2021-10-14 21:57:14 +02:00
Родитель d004881454
Коммит bdf4efc018
2 изменённых файлов: 0 добавлений и 48 удалений

Просмотреть файл

@ -33,8 +33,6 @@ def score(task):
return Inferencer.load(_dt.get_path('model_dir')) return Inferencer.load(_dt.get_path('model_dir'))
elif task_type == 'ner': elif task_type == 'ner':
return ner.NER(task=task, inference=True) return ner.NER(task=task, inference=True)
elif task_type == 'om':
return om.OM(task=task, inference=True)
elif task_type == 'qa': elif task_type == 'qa':
return rank.Rank(task=task, inference=True) return rank.Rank(task=task, inference=True)
else: else:

Просмотреть файл

@ -1,46 +0,0 @@
from summarizer import Summarizer
import nltk
nltk.download('punkt')
import re
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
from gensim.summarization.summarizer import summarize
import networkx as nx
import numpy as np
""" BERTABS """
def summarizeText(text, minLength=60):
result = model(text, min_length = minLength)
full = ''.join(result)
return full
""" SAMPLING """
def sentencenize(text):
sentences = []
for sent in text:
sentences.append(sent_tokenize(sent))
sentences = [y for x in sentences for y in x]
return sentences
def extractWordVectors(file):
word_embeddings = {}
for line in file:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
word_embeddings[word] = coefs
file.close()
def removeStopwords(sen, sw):
sentence = " ".join([i for i in sen if i not in sw])
return sentence
""" BERTABS """
model = Summarizer()
""" SAMPLING """
clean_sentences = [removeStopwords(r.split(), sw) for r in clean_sentences]
""" GENSIM """
summarize()