From ec4fc4e8a4cabd6ea8490c760fbbc1e0b4885d08 Mon Sep 17 00:00:00 2001 From: cywang Date: Mon, 25 Oct 2021 06:49:05 +0000 Subject: [PATCH] update script for unispeech sat finetune and inference --- .../examples/hubert/scripts/100h_finetune_base.sh | 11 +++++++++++ UniSpeech-SAT/examples/hubert/scripts/inference.sh | 12 ++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 UniSpeech-SAT/examples/hubert/scripts/100h_finetune_base.sh create mode 100644 UniSpeech-SAT/examples/hubert/scripts/inference.sh diff --git a/UniSpeech-SAT/examples/hubert/scripts/100h_finetune_base.sh b/UniSpeech-SAT/examples/hubert/scripts/100h_finetune_base.sh new file mode 100644 index 0000000..e7b33ca --- /dev/null +++ b/UniSpeech-SAT/examples/hubert/scripts/100h_finetune_base.sh @@ -0,0 +1,11 @@ +#:: Copyright (c) Microsoft Corporation. +#:: Licensed under the MIT License. + +model_path=MODEL_PATH +train_subset=train_clean_100 +valid_subset=dev_other + +mkdir -p ${model_path} + + +python train.py --distributed-world-size 8 --distributed-port 0 --nprocs-per-node 8 MANIFEST_PATH --save-dir ${model_path} --post-process letter --train-subset ${train_subset} --valid-subset ${valid_subset} --no-epoch-checkpoints --best-checkpoint-metric wer --num-workers 4 --max-update 80000 --sentence-avg --task hubert_pretraining --fine-tuning --single-target --arch hubert_ctc --w2v-path PRETRAINED_PATH/UniSpeech-SAT-Base.pt --labels '["ltr"]' --apply-mask --mask-selection static --mask-other 0 --mask-length 10 --mask-prob 0.65 --layerdrop 0.1 --mask-channel-selection static --mask-channel-other 0 --mask-channel-length 64 --mask-channel-prob 0.5 --zero-infinity --feature-grad-mult 0 --freeze-finetune-updates 0 --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-08 --lr 0.00003 --lr-scheduler tri_stage --warmup-steps 8000 --hold-steps 32000 --decay-steps 40000 --final-lr-scale 0.05 --final-dropout 0.1 --dropout 0.1 --activation-dropout 0.1 --criterion ctc --attention-dropout 0.1 --dropout-input 0.1 --max-tokens 3200000 --seed 2337 --log-format json --log-interval 200 --ddp-backend c10d --fp16 --update-freq 1 --keep-interval-updates 1 --find-unused-parameters diff --git a/UniSpeech-SAT/examples/hubert/scripts/inference.sh b/UniSpeech-SAT/examples/hubert/scripts/inference.sh new file mode 100644 index 0000000..00d3f45 --- /dev/null +++ b/UniSpeech-SAT/examples/hubert/scripts/inference.sh @@ -0,0 +1,12 @@ +#:: Copyright (c) Microsoft Corporation. +#:: Licensed under the MIT License. + +model_path=MODEL_PATH +gen_subset=test_other +result_path=${model_path}/decode_ctc/${gen_subset} + +mkdir -p ${result_path} +export PYTHONENCODING=UTF-8 + +python examples/speech_recognition/infer.py DATA_PATHmanifest/resource/ --task audio_pretraining --nbest 1 --path ${model_path}/checkpoint_best.pt --gen-subset ${gen_subset} --results-path ${result_path} --w2l-decoder viterbi --word-score -1 --sil-weight 0 --criterion ctc --max-tokens 1100000 --dict-path DICT_PATH --post-process letter --quiet +