From ec4fc4e8a4cabd6ea8490c760fbbc1e0b4885d08 Mon Sep 17 00:00:00 2001
From: cywang <cywang@mail.nankai.edu.cn>
Date: Mon, 25 Oct 2021 06:49:05 +0000
Subject: [PATCH] update script for unispeech sat finetune and inference

---
 .../examples/hubert/scripts/100h_finetune_base.sh    | 11 +++++++++++
 UniSpeech-SAT/examples/hubert/scripts/inference.sh   | 12 ++++++++++++
 2 files changed, 23 insertions(+)
 create mode 100644 UniSpeech-SAT/examples/hubert/scripts/100h_finetune_base.sh
 create mode 100644 UniSpeech-SAT/examples/hubert/scripts/inference.sh

diff --git a/UniSpeech-SAT/examples/hubert/scripts/100h_finetune_base.sh b/UniSpeech-SAT/examples/hubert/scripts/100h_finetune_base.sh
new file mode 100644
index 0000000..e7b33ca
--- /dev/null
+++ b/UniSpeech-SAT/examples/hubert/scripts/100h_finetune_base.sh
@@ -0,0 +1,11 @@
+#:: Copyright (c) Microsoft Corporation.
+#:: Licensed under the MIT License.
+
+model_path=MODEL_PATH
+train_subset=train_clean_100
+valid_subset=dev_other
+
+mkdir -p ${model_path}
+
+
+python train.py --distributed-world-size 8 --distributed-port 0 --nprocs-per-node 8 MANIFEST_PATH --save-dir ${model_path} --post-process letter --train-subset ${train_subset} --valid-subset ${valid_subset} --no-epoch-checkpoints --best-checkpoint-metric wer --num-workers 4 --max-update 80000 --sentence-avg --task hubert_pretraining --fine-tuning --single-target --arch hubert_ctc --w2v-path  PRETRAINED_PATH/UniSpeech-SAT-Base.pt --labels '["ltr"]' --apply-mask --mask-selection static --mask-other 0 --mask-length 10 --mask-prob 0.65 --layerdrop 0.1 --mask-channel-selection static --mask-channel-other 0 --mask-channel-length 64 --mask-channel-prob 0.5 --zero-infinity --feature-grad-mult 0 --freeze-finetune-updates 0 --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-08 --lr 0.00003 --lr-scheduler tri_stage --warmup-steps 8000 --hold-steps 32000 --decay-steps 40000 --final-lr-scale 0.05 --final-dropout 0.1 --dropout 0.1 --activation-dropout 0.1 --criterion ctc --attention-dropout 0.1 --dropout-input 0.1 --max-tokens 3200000 --seed 2337 --log-format json --log-interval 200 --ddp-backend c10d --fp16 --update-freq 1 --keep-interval-updates 1 --find-unused-parameters 
diff --git a/UniSpeech-SAT/examples/hubert/scripts/inference.sh b/UniSpeech-SAT/examples/hubert/scripts/inference.sh
new file mode 100644
index 0000000..00d3f45
--- /dev/null
+++ b/UniSpeech-SAT/examples/hubert/scripts/inference.sh
@@ -0,0 +1,12 @@
+#:: Copyright (c) Microsoft Corporation.
+#:: Licensed under the MIT License.
+
+model_path=MODEL_PATH
+gen_subset=test_other
+result_path=${model_path}/decode_ctc/${gen_subset}
+
+mkdir -p ${result_path}
+export PYTHONENCODING=UTF-8
+
+python examples/speech_recognition/infer.py DATA_PATHmanifest/resource/ --task audio_pretraining --nbest 1 --path ${model_path}/checkpoint_best.pt --gen-subset ${gen_subset} --results-path ${result_path} --w2l-decoder viterbi --word-score -1 --sil-weight 0 --criterion ctc --max-tokens 1100000 --dict-path DICT_PATH --post-process letter --quiet 
+