Hotword adjusting script
This commit is contained in:
Родитель
6de6b90802
Коммит
edbb5ea103
|
@ -0,0 +1,21 @@
|
|||
# hotword_adjusting
|
||||
This script provides an example of hot-word boosting usage. It also allows adjusting your boost values to see how they change the final transcription.
|
||||
|
||||
# How to use?
|
||||
Run using `python 3.9`, while having a `deepspeech` installed.
|
||||
|
||||
This works from version 0.9.0 since it was the version that added this feature.
|
||||
Example of usage:
|
||||
```
|
||||
hotword_adjusting.py --model model.pbmm --scorer scorer.scorer --audio audio/filename.wav --min -100.0 --max 100.0 --steps 3 --hot_words hot,cold
|
||||
```
|
||||
This tests combinations of hot-words: 'hot' and 'cold' on audiofile 'filename.wav'
|
||||
Using prios/boost values from range [-100;100] by doing 3 steps: [-100, 0, 100]
|
||||
|
||||
# Example output
|
||||
```
|
||||
['bad'] = (-20.0,) :: [why are they sad and glad and that i do not know go ask your dad]
|
||||
['bad'] = (0.0,) :: [why are they sad and glad and that i do not know go ask your dad]
|
||||
['bad'] = (20.0,) :: [why are they bad and glad and bad i do not know go ask your bad ]
|
||||
```
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
import deepspeech
|
||||
from deepspeech import Model, version
|
||||
import numpy as np
|
||||
import wave
|
||||
import itertools
|
||||
import argparse
|
||||
|
||||
# Example of a valid execution:
|
||||
# hotwords_adjusting.py --model model.pbmm --scorer.scorer --audio audio/filename.wav --min -100.0 --max 100.0 --steps 3 --hot_words hot,cold
|
||||
# This tests combinations of hot-words: 'hot' and 'cold' on audiofile 'filename.wav'
|
||||
# using prios from range [-100;100] by doing 3 steps: [-100, 0, 100]
|
||||
|
||||
#Prints out a Cartesian product of hotwords and their prios
|
||||
def test_file(filename, hotwords, min_prio, max_prio, prio_steps):
|
||||
|
||||
fin = wave.open(filename, 'rb')
|
||||
audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
|
||||
fin.close()
|
||||
|
||||
prio_lists = np.linspace(min_prio, max_prio,prio_steps).tolist()
|
||||
|
||||
prio_product = itertools.product(prio_lists, repeat=len(hotwords))
|
||||
for x in itertools.product(prio_lists, repeat=len(hotwords)):
|
||||
DeepSpeech.clearHotWords()
|
||||
for y in enumerate(hotwords):
|
||||
DeepSpeech.addHotWord(hotwords[y[0]], x[y[0]])
|
||||
|
||||
|
||||
print(f"{hotwords} = {x} :: [{DeepSpeech.stt(audio)}]")
|
||||
|
||||
|
||||
def main():
|
||||
if(args.min>=args.max):
|
||||
print("Error: min_prio can't be bigger than max_prio.")
|
||||
else:
|
||||
test_file(args.audio, args.hot_words.split(','), args.min, args.max, args.steps)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='DeepSpeech hot-word adjusting.')
|
||||
parser.add_argument('--model', required=True,
|
||||
help='Path to the model (protocol buffer binary file)')
|
||||
parser.add_argument('--scorer', required=True,
|
||||
help='Path to the external scorer file')
|
||||
parser.add_argument('--audio', type=str, required=True,
|
||||
help='Path to the audio file to run (WAV format)')
|
||||
parser.add_argument('--min', type=float, default=-10.0,
|
||||
help='Minimum boost value.')
|
||||
parser.add_argument('--max', type=float, default=10.0,
|
||||
help='Maximum boost value.')
|
||||
parser.add_argument('--steps', type=int, default=6,
|
||||
help='Number of tests per each hot-word.')
|
||||
parser.add_argument('--hot_words', type=str, required=True,
|
||||
help='Hot-words separated by comma.')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
DeepSpeech = Model(args.model)
|
||||
DeepSpeech.enableExternalScorer(args.scorer)
|
||||
|
||||
main()
|
Загрузка…
Ссылка в новой задаче