This commit is contained in:
actuy 2022-07-31 23:20:22 +08:00
Родитель 7a4de9f798
Коммит b11af2087c
2 изменённых файлов: 15 добавлений и 137 удалений

Просмотреть файл

@ -1,127 +0,0 @@
absl-py==0.13.0
antlr4-python3-runtime==4.8
appnope @ file:///opt/concourse/worker/volumes/live/4f734db2-9ca8-4d8b-5b29-6ca15b4b4772/volume/appnope_1606859466979/work
astor==0.8.1
astroid @ file:///opt/concourse/worker/volumes/live/1abcdc37-d4b6-466f-7b88-18a6ab5ae15b/volume/astroid_1628063153464/work
asttokens==2.0.5
attrs==21.4.0
backcall @ file:///home/ktietz/src/ci/backcall_1611930011877/work
blis==0.7.5
catalogue==2.0.6
certifi==2021.10.8
cffi==1.14.6
charset-normalizer==2.0.3
click==8.0.1
clldutils==3.11.1
colorama==0.4.4
colorlog==6.6.0
csvw==2.0.0
cycler==0.10.0
cymem==2.0.6
Cython==0.29.24
dataclasses==0.6
decorator @ file:///tmp/build/80754af9/decorator_1621259047763/work
dlinfo==1.2.1
dtw==1.4.0
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl
executing==0.8.2
fairseq==0.10.2
filelock==3.0.12
gast==0.4.0
graphviz==0.17
huggingface-hub==0.0.12
hydra-core==1.1.0
icecream==2.1.1
idna==3.2
importlib-metadata==4.6.1
importlib-resources==5.2.0
ipykernel @ file:///opt/concourse/worker/volumes/live/73e8766c-12c3-4f76-62a6-3dea9a7da5b7/volume/ipykernel_1596206701501/work/dist/ipykernel-5.3.4-py3-none-any.whl
ipython @ file:///opt/concourse/worker/volumes/live/b31c31c8-c793-4ec3-592f-18b6be2f138f/volume/ipython_1628243923648/work
ipython-genutils @ file:///tmp/build/80754af9/ipython_genutils_1606773439826/work
isodate==0.6.1
isort @ file:///tmp/build/80754af9/isort_1628603791788/work
jedi @ file:///opt/concourse/worker/volumes/live/e0916850-3b97-4da2-5dee-49b0d9627cef/volume/jedi_1611333761589/work
jieba==0.42.1
Jinja2==3.0.2
joblib==1.0.1
jupyter-client @ file:///tmp/build/80754af9/jupyter_client_1616770841739/work
jupyter-core @ file:///opt/concourse/worker/volumes/live/a699b83f-e941-4170-5136-bf87e3f37756/volume/jupyter_core_1612213304212/work
kiwisolver==1.3.1
langcodes==3.3.0
lazy-object-proxy @ file:///opt/concourse/worker/volumes/live/62c2169c-fe65-46ff-434a-480cde88c65b/volume/lazy-object-proxy_1616529070475/work
MarkupSafe==2.0.1
matplotlib==3.4.2
matplotlib-inline @ file:///tmp/build/80754af9/matplotlib-inline_1628242447089/work
mccabe==0.6.1
miditoolkit==0.1.14
mido==1.2.10
murmurhash==1.0.6
networkx==2.6.3
nltk==3.4.5
numpy==1.21.3
omegaconf==2.1.0
packaging==21.0
paddlepaddle==2.1.3
pandas==1.3.4
parso @ file:///tmp/build/80754af9/parso_1617223946239/work
pathy==0.6.1
pexpect @ file:///tmp/build/80754af9/pexpect_1605563209008/work
phonemizer==3.1.1
pickleshare @ file:///tmp/build/80754af9/pickleshare_1606932040724/work
Pillow==8.3.1
pkuseg==0.0.25
portalocker==2.0.0
preshed==3.0.6
prompt-toolkit @ file:///tmp/build/80754af9/prompt-toolkit_1616415428029/work
protobuf==3.19.0
ptyprocess @ file:///tmp/build/80754af9/ptyprocess_1609355006118/work/dist/ptyprocess-0.7.0-py2.py3-none-any.whl
pycparser==2.20
pydantic==1.8.2
Pygments @ file:///tmp/build/80754af9/pygments_1621606182707/work
pylint @ file:///opt/concourse/worker/volumes/live/4949af29-b8b7-4c3a-695e-9cc896d42904/volume/pylint_1627536796184/work
pyparsing==2.4.7
pypinyin==0.42.0
pytextrank==3.2.2
python-dateutil @ file:///tmp/build/80754af9/python-dateutil_1626374649649/work
pytz==2021.3
PyYAML==5.4.1
pyzmq==20.0.0
regex==2021.7.6
requests==2.26.0
rfc3986==1.5.0
sacrebleu==1.5.1
sacremoses==0.0.45
scikit-learn==0.20.4
scipy==1.7.1
segments==2.2.0
Senta==2.0.0
sentencepiece==0.1.83
six==1.11.0
smart-open==5.2.1
spacy==3.2.4
spacy-legacy==3.0.8
spacy-loggers==1.0.2
spacy-pkuseg==0.0.28
srsly==2.4.2
tabulate==0.8.9
textrank4zh==0.3
thinc==8.0.15
tokenizers==0.10.3
toml @ file:///tmp/build/80754af9/toml_1616166611790/work
torch==1.9.0
torchvision==0.10.0
tornado @ file:///opt/concourse/worker/volumes/live/d531d395-893c-4ca1-6a5f-717b318eb08c/volume/tornado_1606942307627/work
tqdm==4.61.2
traitlets @ file:///home/ktietz/src/ci/traitlets_1611929699868/work
transformers==4.9.0
typed-ast @ file:///opt/concourse/worker/volumes/live/22984077-cbd4-449f-4417-2737b09fa494/volume/typed-ast_1624953682230/work
typer==0.4.0
typing-extensions @ file:///tmp/build/80754af9/typing_extensions_1631814937681/work
uritemplate==4.1.1
urllib3==1.26.6
wasabi==0.8.2
wcwidth @ file:///tmp/build/80754af9/wcwidth_1593447189090/work
wrapt==1.12.1
zh-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.1.0/zh_core_web_sm-3.1.0-py3-none-any.whl
zhon==1.1.5
zipp==3.5.0

Просмотреть файл

@ -99,17 +99,22 @@ def get_keyword(text) -> dict:
results (dict): {[keywords]: [score]}
"""
a = 0.5
textrank = dict(get_textrank(text))
tf_idf = infer_tfidf(text, "lyrics_tfidf_model.pkl")
textrank = dict(get_textrank(text), topk=5)
# textrank = dict(get_textrank(text))
# tf_idf = infer_tfidf(text, "lyrics_tfidf_model.pkl")
keys = list(textrank.keys() & tf_idf.keys())
tmp = {}
for key in keys:
textr_score = textrank[key]
tfidf_score = tf_idf[key]
# keys = list(textrank.keys())
# keys = list(textrank.keys() & tf_idf.keys())
# tmp = {}
# for key in keys:
# textr_score = textrank[key]
# tfidf_score = tf_idf[key]
tmp[key] = textr_score * (1-a) + tfidf_score * a
# tmp[key] = textr_score * (1-a) + tfidf_score * a
keys = sorted(tmp, key=tmp.get)
results = { k: tmp[k] for k in keys }
# keys = sorted(tmp, key=tmp.get)
keys = sorted(textrank, key=textrank.get)
# results = { k: tmp[k] for k in keys }
results = { k: textrank[k] for k in keys }
return results