resource and dependency cleanup
This commit is contained in:
Родитель
d004881454
Коммит
bdf4efc018
|
@ -33,8 +33,6 @@ def score(task):
|
|||
return Inferencer.load(_dt.get_path('model_dir'))
|
||||
elif task_type == 'ner':
|
||||
return ner.NER(task=task, inference=True)
|
||||
elif task_type == 'om':
|
||||
return om.OM(task=task, inference=True)
|
||||
elif task_type == 'qa':
|
||||
return rank.Rank(task=task, inference=True)
|
||||
else:
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
from summarizer import Summarizer
|
||||
import nltk
|
||||
nltk.download('punkt')
|
||||
import re
|
||||
from nltk.tokenize import sent_tokenize
|
||||
from nltk.corpus import stopwords
|
||||
from sklearn.metrics.pairwise import cosine_similarity
|
||||
from gensim.summarization.summarizer import summarize
|
||||
import networkx as nx
|
||||
import numpy as np
|
||||
|
||||
""" BERTABS """
|
||||
def summarizeText(text, minLength=60):
|
||||
result = model(text, min_length = minLength)
|
||||
full = ''.join(result)
|
||||
return full
|
||||
|
||||
""" SAMPLING """
|
||||
def sentencenize(text):
|
||||
sentences = []
|
||||
for sent in text:
|
||||
sentences.append(sent_tokenize(sent))
|
||||
sentences = [y for x in sentences for y in x]
|
||||
return sentences
|
||||
|
||||
def extractWordVectors(file):
|
||||
word_embeddings = {}
|
||||
for line in file:
|
||||
values = line.split()
|
||||
word = values[0]
|
||||
coefs = np.asarray(values[1:], dtype='float32')
|
||||
word_embeddings[word] = coefs
|
||||
file.close()
|
||||
|
||||
def removeStopwords(sen, sw):
|
||||
sentence = " ".join([i for i in sen if i not in sw])
|
||||
return sentence
|
||||
|
||||
""" BERTABS """
|
||||
model = Summarizer()
|
||||
|
||||
""" SAMPLING """
|
||||
clean_sentences = [removeStopwords(r.split(), sw) for r in clean_sentences]
|
||||
|
||||
""" GENSIM """
|
||||
summarize()
|
Загрузка…
Ссылка в новой задаче