This commit is contained in:
Ideefixze 2021-02-12 13:36:33 +01:00
Родитель 6de6b90802
Коммит edbb5ea103
2 изменённых файлов: 82 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,21 @@
# hotword_adjusting
This script provides an example of hot-word boosting usage. It also allows adjusting your boost values to see how they change the final transcription.
# How to use?
Run using `python 3.9`, while having a `deepspeech` installed.
This works from version 0.9.0 since it was the version that added this feature.
Example of usage:
```
hotword_adjusting.py --model model.pbmm --scorer scorer.scorer --audio audio/filename.wav --min -100.0 --max 100.0 --steps 3 --hot_words hot,cold
```
This tests combinations of hot-words: 'hot' and 'cold' on audiofile 'filename.wav'
Using prios/boost values from range [-100;100] by doing 3 steps: [-100, 0, 100]
# Example output
```
['bad'] = (-20.0,) :: [why are they sad and glad and that i do not know go ask your dad]
['bad'] = (0.0,) :: [why are they sad and glad and that i do not know go ask your dad]
['bad'] = (20.0,) :: [why are they bad and glad and bad i do not know go ask your bad ]
```

Просмотреть файл

@ -0,0 +1,61 @@
import deepspeech
from deepspeech import Model, version
import numpy as np
import wave
import itertools
import argparse
# Example of a valid execution:
# hotwords_adjusting.py --model model.pbmm --scorer.scorer --audio audio/filename.wav --min -100.0 --max 100.0 --steps 3 --hot_words hot,cold
# This tests combinations of hot-words: 'hot' and 'cold' on audiofile 'filename.wav'
# using prios from range [-100;100] by doing 3 steps: [-100, 0, 100]
#Prints out a Cartesian product of hotwords and their prios
def test_file(filename, hotwords, min_prio, max_prio, prio_steps):
fin = wave.open(filename, 'rb')
audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
fin.close()
prio_lists = np.linspace(min_prio, max_prio,prio_steps).tolist()
prio_product = itertools.product(prio_lists, repeat=len(hotwords))
for x in itertools.product(prio_lists, repeat=len(hotwords)):
DeepSpeech.clearHotWords()
for y in enumerate(hotwords):
DeepSpeech.addHotWord(hotwords[y[0]], x[y[0]])
print(f"{hotwords} = {x} :: [{DeepSpeech.stt(audio)}]")
def main():
if(args.min>=args.max):
print("Error: min_prio can't be bigger than max_prio.")
else:
test_file(args.audio, args.hot_words.split(','), args.min, args.max, args.steps)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='DeepSpeech hot-word adjusting.')
parser.add_argument('--model', required=True,
help='Path to the model (protocol buffer binary file)')
parser.add_argument('--scorer', required=True,
help='Path to the external scorer file')
parser.add_argument('--audio', type=str, required=True,
help='Path to the audio file to run (WAV format)')
parser.add_argument('--min', type=float, default=-10.0,
help='Minimum boost value.')
parser.add_argument('--max', type=float, default=10.0,
help='Maximum boost value.')
parser.add_argument('--steps', type=int, default=6,
help='Number of tests per each hot-word.')
parser.add_argument('--hot_words', type=str, required=True,
help='Hot-words separated by comma.')
args = parser.parse_args()
DeepSpeech = Model(args.model)
DeepSpeech.enableExternalScorer(args.scorer)
main()