resource and dependency cleanup
This commit is contained in:
Родитель
d004881454
Коммит
bdf4efc018
|
@ -33,8 +33,6 @@ def score(task):
|
||||||
return Inferencer.load(_dt.get_path('model_dir'))
|
return Inferencer.load(_dt.get_path('model_dir'))
|
||||||
elif task_type == 'ner':
|
elif task_type == 'ner':
|
||||||
return ner.NER(task=task, inference=True)
|
return ner.NER(task=task, inference=True)
|
||||||
elif task_type == 'om':
|
|
||||||
return om.OM(task=task, inference=True)
|
|
||||||
elif task_type == 'qa':
|
elif task_type == 'qa':
|
||||||
return rank.Rank(task=task, inference=True)
|
return rank.Rank(task=task, inference=True)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -1,46 +0,0 @@
|
||||||
from summarizer import Summarizer
|
|
||||||
import nltk
|
|
||||||
nltk.download('punkt')
|
|
||||||
import re
|
|
||||||
from nltk.tokenize import sent_tokenize
|
|
||||||
from nltk.corpus import stopwords
|
|
||||||
from sklearn.metrics.pairwise import cosine_similarity
|
|
||||||
from gensim.summarization.summarizer import summarize
|
|
||||||
import networkx as nx
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
""" BERTABS """
|
|
||||||
def summarizeText(text, minLength=60):
|
|
||||||
result = model(text, min_length = minLength)
|
|
||||||
full = ''.join(result)
|
|
||||||
return full
|
|
||||||
|
|
||||||
""" SAMPLING """
|
|
||||||
def sentencenize(text):
|
|
||||||
sentences = []
|
|
||||||
for sent in text:
|
|
||||||
sentences.append(sent_tokenize(sent))
|
|
||||||
sentences = [y for x in sentences for y in x]
|
|
||||||
return sentences
|
|
||||||
|
|
||||||
def extractWordVectors(file):
|
|
||||||
word_embeddings = {}
|
|
||||||
for line in file:
|
|
||||||
values = line.split()
|
|
||||||
word = values[0]
|
|
||||||
coefs = np.asarray(values[1:], dtype='float32')
|
|
||||||
word_embeddings[word] = coefs
|
|
||||||
file.close()
|
|
||||||
|
|
||||||
def removeStopwords(sen, sw):
|
|
||||||
sentence = " ".join([i for i in sen if i not in sw])
|
|
||||||
return sentence
|
|
||||||
|
|
||||||
""" BERTABS """
|
|
||||||
model = Summarizer()
|
|
||||||
|
|
||||||
""" SAMPLING """
|
|
||||||
clean_sentences = [removeStopwords(r.split(), sw) for r in clean_sentences]
|
|
||||||
|
|
||||||
""" GENSIM """
|
|
||||||
summarize()
|
|
Загрузка…
Ссылка в новой задаче