Hotword adjusting script

2021-02-12 13:36:33 +01:00 · 2021-02-12 13:36:33 +01:00 · edbb5ea103
--- a/hotword_adjusting/README.md
+++ b/hotword_adjusting/README.md
@ -0,0 +1,21 @@
+# hotword_adjusting
+This script provides an example of hot-word boosting usage. It also allows adjusting your boost values to see how they change the final transcription.
+
+# How to use?
+Run using `python 3.9`, while having a `deepspeech` installed.
+
+This works from version 0.9.0 since it was the version that added this feature.
+Example of usage:
+```
+hotword_adjusting.py --model model.pbmm --scorer scorer.scorer --audio audio/filename.wav --min -100.0 --max 100.0 --steps 3 --hot_words hot,cold
+```
+This tests combinations of hot-words: 'hot' and 'cold' on audiofile 'filename.wav'
+Using prios/boost values from range [-100;100] by doing 3 steps: [-100, 0, 100]
+
+# Example output
+```
+['bad'] = (-20.0,) :: [why are they sad and glad and that i do not know go ask your dad]
+['bad'] = (0.0,) :: [why are they sad and glad and that i do not know go ask your dad]
+['bad'] = (20.0,) :: [why are they bad and glad and bad i do not know go ask your bad ]
+```
+
--- a/hotword_adjusting/hotword_adjusting.py
+++ b/hotword_adjusting/hotword_adjusting.py
@ -0,0 +1,61 @@
+import deepspeech
+from deepspeech import Model, version
+import numpy as np
+import wave
+import itertools
+import argparse
+
+# Example of a valid execution:
+# hotwords_adjusting.py --model model.pbmm --scorer.scorer --audio audio/filename.wav --min -100.0 --max 100.0 --steps 3 --hot_words hot,cold
+# This tests combinations of hot-words: 'hot' and 'cold' on audiofile 'filename.wav'
+# using prios from range [-100;100] by doing 3 steps: [-100, 0, 100]
+
+#Prints out a Cartesian product of hotwords and their prios
+def test_file(filename, hotwords, min_prio, max_prio, prio_steps):
+
+    fin = wave.open(filename, 'rb')
+    audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
+    fin.close()
+
+    prio_lists = np.linspace(min_prio, max_prio,prio_steps).tolist()
+
+    prio_product = itertools.product(prio_lists, repeat=len(hotwords))
+    for x in itertools.product(prio_lists, repeat=len(hotwords)):
+        DeepSpeech.clearHotWords()
+        for y in enumerate(hotwords):
+            DeepSpeech.addHotWord(hotwords[y[0]], x[y[0]])
+           
+        
+        print(f"{hotwords} = {x} :: [{DeepSpeech.stt(audio)}]")
+
+
+def main():
+    if(args.min>=args.max):
+        print("Error: min_prio can't be bigger than max_prio.")
+    else:
+        test_file(args.audio, args.hot_words.split(','), args.min, args.max, args.steps)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='DeepSpeech hot-word adjusting.')
+    parser.add_argument('--model', required=True,
+                    help='Path to the model (protocol buffer binary file)')
+    parser.add_argument('--scorer', required=True,
+                    help='Path to the external scorer file')
+    parser.add_argument('--audio', type=str, required=True,
+                    help='Path to the audio file to run (WAV format)')
+    parser.add_argument('--min', type=float, default=-10.0,
+                    help='Minimum boost value.')
+    parser.add_argument('--max', type=float, default=10.0,
+                    help='Maximum boost value.')
+    parser.add_argument('--steps', type=int, default=6,
+                    help='Number of tests per each hot-word.')
+    parser.add_argument('--hot_words', type=str, required=True,
+                    help='Hot-words separated by comma.')
+
+    args = parser.parse_args()
+
+    DeepSpeech = Model(args.model)
+    DeepSpeech.enableExternalScorer(args.scorer)
+
+    main()