diff --git a/egs/ami/s5/local/run_dnn.sh b/egs/ami/s5/local/run_dnn.sh index 9f09e8501..4ddde8b1f 100755 --- a/egs/ami/s5/local/run_dnn.sh +++ b/egs/ami/s5/local/run_dnn.sh @@ -67,10 +67,10 @@ if [ $stage -le 2 ]; then $data_fmllr/$mic/train_tr90 $data_fmllr/$mic/train_cv10 data/lang $ali $ali $dir || exit 1; # Decode (reuse HCLG graph) steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf --acwt 0.1 \ - --num-threads 3 --parallel-opts "-pe smp 4" \ + --num-threads 3 \ $graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${lm_suffix} || exit 1; steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf --acwt 0.1 \ - --num-threads 3 --parallel-opts "-pe smp 4" \ + --num-threads 3 \ $graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${lm_suffix} || exit 1; fi @@ -96,11 +96,11 @@ if [ $stage -le 4 ]; then # Decode (reuse HCLG graph) for ITER in 1; do steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \ - --num-threads 3 --parallel-opts "-pe smp 4" \ + --num-threads 3 \ --nnet $dir/${ITER}.nnet --acwt $acwt \ $graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${lm_suffix} || exit 1; steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \ - --num-threads 3 --parallel-opts "-pe smp 4" \ + --num-threads 3 \ --nnet $dir/${ITER}.nnet --acwt $acwt \ $graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${lm_suffix} || exit 1; done @@ -126,11 +126,11 @@ if [ $stage -le 6 ]; then # Decode (reuse HCLG graph) for ITER in 1 2 3 4; do steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \ - --num-threads 3 --parallel-opts "-pe smp 4" \ + --num-threads 3 \ --nnet $dir/${ITER}.nnet --acwt $acwt \ $graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${lm_suffix}_$ITER || exit 1; steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \ - --num-threads 3 --parallel-opts "-pe smp 4" \ + --num-threads 3 \ --nnet $dir/${ITER}.nnet --acwt $acwt \ $graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${lm_suffix}_$ITER || exit 1; done diff --git a/egs/aurora4/s5/cmd.sh b/egs/aurora4/s5/cmd.sh index 328b426ca..139b2cd6c 100644 --- a/egs/aurora4/s5/cmd.sh +++ b/egs/aurora4/s5/cmd.sh @@ -7,10 +7,10 @@ #a) JHU cluster options export train_cmd="queue.pl -l arch=*64" -export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G" -export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G" -export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G" -export cuda_cmd="queue.pl -l gpu=1" +export decode_cmd="queue.pl -l arch=*64 --mem 2G" +export mkgraph_cmd="queue.pl -l arch=*64 --mem 4G" +export big_memory_cmd="queue.pl -l arch=*64 --mem 8G" +export cuda_cmd="queue.pl --gpu 1" #b) BUT cluster options diff --git a/egs/babel/s5/local/score_combine.sh b/egs/babel/s5/local/score_combine.sh index f292c62ab..42d9cfc09 100755 --- a/egs/babel/s5/local/score_combine.sh +++ b/egs/babel/s5/local/score_combine.sh @@ -33,7 +33,7 @@ lat_weights= word_ins_penalty=0.0 min_lmwt=7 max_lmwt=17 -parallel_opts="-pe smp 3" +parallel_opts="--num-threads 3" skip_scoring=false ctm_name= #end configuration section. @@ -49,7 +49,7 @@ Options: --cmd (run.pl|queue.pl...) # specify how to run the sub-processes. --stage (0|1|2) # (createCTM | filterCTM | runSclite). --parallel-opts # extra options to command for combination stage, - # default '-pe smp 3' + # default '--num-threads 3' --cer (0|1) # compute CER in addition to WER "; diff --git a/egs/babel/s5/run-6-combine.sh b/egs/babel/s5/run-6-combine.sh index 07c53bb18..92d749ca4 100755 --- a/egs/babel/s5/run-6-combine.sh +++ b/egs/babel/s5/run-6-combine.sh @@ -13,7 +13,7 @@ set -u if [ ! -f exp/combine_2/decode_dev2h/.done ]; then for iter in 1 2 3 4; do - local/score_combine.sh --cmd "queue.pl -l mem_free=2.0G,ram_free=2.0G" \ + local/score_combine.sh --cmd "queue.pl --mem 2G" \ data/dev2h data/lang exp/tri6_nnet/decode_dev2h exp/sgmm5_mmi_b0.1/decode_dev2h_fmllr_it$iter exp/combine_2/decode_dev2h_it$iter touch exp/combine_2/decode_dev2h/.done done @@ -25,7 +25,7 @@ if [ ! -f exp/combine_3/decode_dev2h/.done ]; then if [ ! -f exp_BNF/sgmm7_mmi_b0.1/decode_dev2h_fmllr_it$iter/.done ]; then echo "BNF decode in exp_BNF/sgmm7_mmi_b0.1/decode_dev2h_fmllr_it$iter is not done, skipping this step." fi - local/score_combine.sh --cmd "queue.pl -l mem_free=2.0G,ram_free=2.0G" \ + local/score_combine.sh --cmd "queue.pl --mem 2G" \ data/dev2h data/lang exp_BNF/sgmm7_mmi_b0.1/decode_dev2h_fmllr_it$iter:10 \ exp/sgmm5_mmi_b0.1/decode_dev2h_fmllr_it$iter exp/tri5_nnet/decode_dev2h exp/combine_3/decode_dev2h_it$iter touch exp_BNF/sgmm7_mmi_b0.1/decode_dev2h_fmllr_it$iter/.done diff --git a/egs/fisher_callhome_spanish/s5/run.sh b/egs/fisher_callhome_spanish/s5/run.sh index 5647e0153..706f37932 100755 --- a/egs/fisher_callhome_spanish/s5/run.sh +++ b/egs/fisher_callhome_spanish/s5/run.sh @@ -236,7 +236,7 @@ utils/mkgraph.sh data/lang_test exp/sgmm5 exp/sgmm5/graph ( - steps/decode_sgmm2.sh --nj 13 --cmd "$decode_cmd" --num-threads 5 --parallel-opts " -pe smp 5" \ + steps/decode_sgmm2.sh --nj 13 --cmd "$decode_cmd" --num-threads 5 \ --config conf/decode.config --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev \ exp/sgmm5/graph data/dev exp/sgmm5/decode_dev )& @@ -247,7 +247,7 @@ steps/align_sgmm2.sh \ data/train data/lang exp/sgmm5 exp/sgmm5_ali steps/make_denlats_sgmm2.sh \ - --nj 32 --sub-split 32 --num-threads 4 --parallel-opts "-pe smp 4"\ + --nj 32 --sub-split 32 --num-threads 4 \ --beam 10.0 --lattice-beam 6 --cmd "$decode_cmd" --transform-dir exp/tri5a_ali \ data/train data/lang exp/sgmm5_ali exp/sgmm5_denlats @@ -262,7 +262,7 @@ steps/decode_fmllr_extra.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parall --config conf/decode.config --scoring-opts "--min-lmwt 8 --max-lmwt 12"\ exp/tri5a/graph data/dev exp/tri5a/decode_dev utils/mkgraph.sh data/lang_test exp/sgmm5 exp/sgmm5/graph -steps/decode_sgmm2.sh --nj 13 --cmd "$decode_cmd" --num-threads 5 --parallel-opts " -pe smp 5" \ +steps/decode_sgmm2.sh --nj 13 --cmd "$decode_cmd" --num-threads 5 \ --config conf/decode.config --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev \ exp/sgmm5/graph data/dev exp/sgmm5/decode_dev for iter in 1 2 3 4; do @@ -276,9 +276,9 @@ done dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \ - --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 --mem 2G") dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1 \ - --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G") + --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 --mem 2G") steps/nnet2/train_pnorm_ensemble.sh \ --mix-up 5000 --initial-learning-rate 0.008 --final-learning-rate 0.0008\ diff --git a/egs/librispeech/s5/cmd.sh b/egs/librispeech/s5/cmd.sh index fec4abbca..6395d96ca 100644 --- a/egs/librispeech/s5/cmd.sh +++ b/egs/librispeech/s5/cmd.sh @@ -7,9 +7,9 @@ #a) JHU cluster options export train_cmd="queue.pl -l arch=*64" -export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G" -export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G" -export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G" +export decode_cmd="queue.pl -l arch=*64 --mem 2G" +export mkgraph_cmd="queue.pl -l arch=*64 --mem 4G" +export big_memory_cmd="queue.pl -l arch=*64 --mem 8G" export cuda_cmd="queue.pl -l gpu=1" diff --git a/egs/sprakbanken/s5/cmd.sh b/egs/sprakbanken/s5/cmd.sh index feac2bf55..43867ccf0 100644 --- a/egs/sprakbanken/s5/cmd.sh +++ b/egs/sprakbanken/s5/cmd.sh @@ -7,9 +7,9 @@ #a) JHU cluster options #export train_cmd="queue.pl -l arch=*64" -#export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G" -#export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G" -#export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G" +#export decode_cmd="queue.pl -l arch=*64 --mem 2G" +#export mkgraph_cmd="queue.pl -l arch=*64 --mem 2G" +#export big_memory_cmd="queue.pl -l arch=*64 --mem 2G" #export cuda_cmd="queue.pl -l gpu=1" diff --git a/egs/swbd/s5c/cmd.sh b/egs/swbd/s5c/cmd.sh index 4abf8546b..036d89a9e 100644 --- a/egs/swbd/s5c/cmd.sh +++ b/egs/swbd/s5c/cmd.sh @@ -7,9 +7,9 @@ #a) JHU cluster options export train_cmd="queue.pl -l arch=*64*" -export decode_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G" +export decode_cmd="queue.pl -l arch=*64* --mem 4G" #export cuda_cmd="..." -export mkgraph_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G" +export mkgraph_cmd="queue.pl -l arch=*64* --mem 4G" #b) BUT cluster options #export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M" diff --git a/egs/tedlium/s5/cmd.sh b/egs/tedlium/s5/cmd.sh index 93be65b63..bed97d340 100644 --- a/egs/tedlium/s5/cmd.sh +++ b/egs/tedlium/s5/cmd.sh @@ -12,8 +12,8 @@ # JHU cluster: export train_cmd="queue.pl -l arch=*64*" -export decode_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G" -export cuda_cmd="queue.pl -l arch=*64*,gpu=1 -q g.q" +export decode_cmd="queue.pl -l arch=*64* --mem 4G" +export cuda_cmd="queue.pl -l arch=*64* --gpu 1" host=$(hostname -f) if [ ${host#*.} == "fit.vutbr.cz" ]; then diff --git a/egs/tedlium/s5/run.sh b/egs/tedlium/s5/run.sh index 32b8bfd3c..7a36e49e8 100755 --- a/egs/tedlium/s5/run.sh +++ b/egs/tedlium/s5/run.sh @@ -70,10 +70,10 @@ if [ $stage -le 3 ]; then utils/mkgraph.sh data/lang_nosp_test exp/tri1 exp/tri1/graph_nosp || exit 1 steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ - --num-threads 4 --parallel-opts "--num-threads 4" \ + --num-threads 4 \ exp/tri1/graph_nosp data/dev exp/tri1/decode_nosp_dev || exit 1 steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ - --num-threads 4 --parallel-opts "--num-threads 4" \ + --num-threads 4 \ exp/tri1/graph_nosp data/test exp/tri1/decode_nosp_test || exit 1 fi @@ -87,10 +87,10 @@ if [ $stage -le 4 ]; then utils/mkgraph.sh data/lang_nosp_test exp/tri2 exp/tri2/graph_nosp || exit 1 steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ - --num-threads 4 --parallel-opts "--num-threads 4" \ + --num-threads 4 \ exp/tri2/graph_nosp data/dev exp/tri2/decode_nosp_dev || exit 1 steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ - --num-threads 4 --parallel-opts "--num-threads 4" \ + --num-threads 4 \ exp/tri2/graph_nosp data/test exp/tri2/decode_nosp_test || exit 1 fi @@ -110,10 +110,10 @@ if [ $stage -le 5 ]; then utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1 steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ - --num-threads 4 --parallel-opts "--num-threads 4" \ + --num-threads 4 \ exp/tri2/graph data/dev exp/tri2/decode_dev || exit 1 steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \ - --num-threads 4 --parallel-opts "--num-threads 4" \ + --num-threads 4 \ exp/tri2/graph data/test exp/tri2/decode_test || exit 1 fi @@ -127,10 +127,10 @@ if [ $stage -le 6 ]; then utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph || exit 1 steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" \ - --num-threads 4 --parallel-opts "--num-threads 4" \ + --num-threads 4 \ exp/tri3/graph data/dev exp/tri3/decode_dev || exit 1 steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" \ - --num-threads 4 --parallel-opts "--num-threads 4" \ + --num-threads 4 \ exp/tri3/graph data/test exp/tri3/decode_test || exit 1 fi @@ -147,10 +147,10 @@ if [ $stage -le 7 ]; then for iter in 4; do steps/decode.sh --transform-dir exp/tri3/decode_dev --nj $decode_nj --cmd "$decode_cmd" --iter $iter \ - --num-threads 4 --parallel-opts "--num-threads 4" \ + --num-threads 4 \ exp/tri3/graph data/dev exp/tri3_mmi_b0.1/decode_dev_it$iter || exit 1 steps/decode.sh --transform-dir exp/tri3/decode_test --nj $decode_nj --cmd "$decode_cmd" --iter $iter \ - --num-threads 4 --parallel-opts "--num-threads 4" \ + --num-threads 4 \ exp/tri3/graph data/test exp/tri3_mmi_b0.1/decode_test_it$iter || exit 1 done fi diff --git a/egs/timit/s5/cmd.sh b/egs/timit/s5/cmd.sh index 6e89243a8..fd91a53ff 100644 --- a/egs/timit/s5/cmd.sh +++ b/egs/timit/s5/cmd.sh @@ -14,8 +14,8 @@ if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then export train_cmd="queue.pl -l arch=*64*" - export decode_cmd="queue.pl -l arch=*64*,ram_free=3G,mem_free=3G" - export mkgraph_cmd="queue.pl -l arch=*64*,ram_free=4G,mem_free=4G" + export decode_cmd="queue.pl -l arch=*64* --mem 3G" + export mkgraph_cmd="queue.pl -l arch=*64* --mem 4G" export cuda_cmd="queue.pl -l gpu=1" elif [[ $(hostname -f) == *.fit.vutbr.cz ]]; then #b) BUT cluster options diff --git a/egs/timit/s5/run.sh b/egs/timit/s5/run.sh index 0ae1ecec3..ce96f64fc 100755 --- a/egs/timit/s5/run.sh +++ b/egs/timit/s5/run.sh @@ -191,7 +191,7 @@ echo " DNN Hybrid Training & Decoding echo ============================================================================ # DNN hybrid system training parameters -dnn_mem_reqs="mem_free=1.0G,ram_free=1.0G" +dnn_mem_reqs="--mem 1G" dnn_extra_opts="--num_epochs 20 --num-epochs-extra 10 --add-layers-period 1 --shrink-interval 3" steps/nnet2/train_tanh.sh --mix-up 5000 --initial-learning-rate 0.015 \ @@ -200,7 +200,7 @@ steps/nnet2/train_tanh.sh --mix-up 5000 --initial-learning-rate 0.015 \ data/train data/lang exp/tri3_ali exp/tri4_nnet [ ! -d exp/tri4_nnet/decode_dev ] && mkdir -p exp/tri4_nnet/decode_dev -decode_extra_opts=(--num-threads 6 --parallel-opts "-pe smp 6 -l mem_free=4G,ram_free=4.0G") +decode_extra_opts=(--num-threads 6) steps/nnet2/decode.sh --cmd "$decode_cmd" --nj "$decode_nj" "${decode_extra_opts[@]}" \ --transform-dir exp/tri3/decode_dev exp/tri3/graph data/dev \ exp/tri4_nnet/decode_dev | tee exp/tri4_nnet/decode_dev/decode.log diff --git a/egs/wsj/s5/cmd.sh b/egs/wsj/s5/cmd.sh index fec4abbca..6395d96ca 100644 --- a/egs/wsj/s5/cmd.sh +++ b/egs/wsj/s5/cmd.sh @@ -7,9 +7,9 @@ #a) JHU cluster options export train_cmd="queue.pl -l arch=*64" -export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G" -export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G" -export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G" +export decode_cmd="queue.pl -l arch=*64 --mem 2G" +export mkgraph_cmd="queue.pl -l arch=*64 --mem 4G" +export big_memory_cmd="queue.pl -l arch=*64 --mem 8G" export cuda_cmd="queue.pl -l gpu=1" diff --git a/egs/wsj/s5/local/online/run_nnet2_perturb_speed.sh b/egs/wsj/s5/local/online/run_nnet2_perturb_speed.sh index 4b21e88a3..1a69e50f3 100755 --- a/egs/wsj/s5/local/online/run_nnet2_perturb_speed.sh +++ b/egs/wsj/s5/local/online/run_nnet2_perturb_speed.sh @@ -31,7 +31,7 @@ else # almost the same, but this may be a little bit slow. num_threads=16 minibatch_size=128 - parallel_opts="-pe smp $num_threads" + parallel_opts="--num-threads $num_threads" dir=$nnet_dir/nnet_a fi diff --git a/egs/wsj/s5/local/run_bnf_sgmm.sh b/egs/wsj/s5/local/run_bnf_sgmm.sh index ecda87fa2..6cfe1df67 100644 --- a/egs/wsj/s5/local/run_bnf_sgmm.sh +++ b/egs/wsj/s5/local/run_bnf_sgmm.sh @@ -16,7 +16,7 @@ bnf_num_gauss_ubm=600 bnf_num_gauss_sgmm=7000 align_dir=exp/tri4b_ali_si284 bnf_decode_acwt=0.0357 -sgmm_group_extra_opts=(--group 3 --parallel-opts "-pe smp 3 -l mem_free=7G,ram_free=7G" --cmd "queue.pl -l arch=*64 -l mem_free=2.0G,ram_free=2.0G") +sgmm_group_extra_opts=(--group 3 --cmd "queue.pl -l arch=*64 --mem 7G") if [ ! -d exp_bnf ]; then echo "$0: before running this script, please run local/run_bnf.sh" diff --git a/egs/wsj/s5/run.sh b/egs/wsj/s5/run.sh index 8a1db2b3d..492a6e26a 100755 --- a/egs/wsj/s5/run.sh +++ b/egs/wsj/s5/run.sh @@ -51,7 +51,7 @@ local/wsj_format_data.sh --lang-suffix "_nosp" || exit 1; # Note: I am commenting out the RNNLM-building commands below. They take up a lot # of CPU time and are not really part of the "main recipe." - # Be careful: appending things like "-l mem_free=10G" to $decode_cmd + # Be careful: appending things like "--mem 10G" to $decode_cmd # won't always work, it depends what $decode_cmd is. ( local/wsj_extend_dict.sh --dict-suffix "_nosp" $wsj1/13-32.1 && \ @@ -61,18 +61,18 @@ local/wsj_format_data.sh --lang-suffix "_nosp" || exit 1; local/wsj_format_local_lms.sh --lang-suffix "_nosp" # && # # ( local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \ - # --cmd "$decode_cmd -l mem_free=10G" data/local/rnnlm.h30.voc10k & + # --cmd "$decode_cmd --mem 10G" data/local/rnnlm.h30.voc10k & # sleep 20; # wait till tools compiled. # local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \ - # --cmd "$decode_cmd -l mem_free=12G" \ + # --cmd "$decode_cmd --mem 12G" \ # --hidden 100 --nwords 20000 --class 350 \ # --direct 1500 data/local/rnnlm.h100.voc20k & # local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \ - # --cmd "$decode_cmd -l mem_free=14G" \ + # --cmd "$decode_cmd --mem 14G" \ # --hidden 200 --nwords 30000 --class 350 \ # --direct 1500 data/local/rnnlm.h200.voc30k & # local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \ - # --cmd "$decode_cmd -l mem_free=16G" \ + # --cmd "$decode_cmd --mem 16G" \ # --hidden 300 --nwords 40000 --class 400 \ # --direct 2000 data/local/rnnlm.h300.voc40k & # ) @@ -81,19 +81,19 @@ local/wsj_format_data.sh --lang-suffix "_nosp" || exit 1; num_threads_rnnlm=8 local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \ --rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \ - --cmd "$decode_cmd -l mem_free=1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \ + --cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \ --hidden 30 --nwords 10000 --direct 1000 data/local/rnnlm-hs.h30.voc10k local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \ --rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \ - --cmd "$decode_cmd -l mem_free=1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \ + --cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \ --hidden 100 --nwords 20000 --direct 1500 data/local/rnnlm-hs.h100.voc20k local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \ --rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \ - --cmd "$decode_cmd -l mem_free=1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \ + --cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \ --hidden 300 --nwords 30000 --direct 1500 data/local/rnnlm-hs.h300.voc30k local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \ --rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \ - --cmd "$decode_cmd -l mem_free=1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \ + --cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \ --hidden 400 --nwords 40000 --direct 2000 data/local/rnnlm-hs.h400.voc40k ) ) & diff --git a/egs/wsj/s5/steps/cleanup/decode_segmentation.sh b/egs/wsj/s5/steps/cleanup/decode_segmentation.sh index 0d929f98c..63e1943cb 100755 --- a/egs/wsj/s5/steps/cleanup/decode_segmentation.sh +++ b/egs/wsj/s5/steps/cleanup/decode_segmentation.sh @@ -16,7 +16,7 @@ beam=13.0 lattice_beam=6.0 acwt=0.083333 # note: only really affects pruning (scoring is on lattices). num_threads=1 # if >1, will use gmm-latgen-faster-parallel -parallel_opts= # If you supply num-threads, you should supply this too. +parallel_opts= # ignored now. scoring_opts= # note: there are no more min-lmwt and max-lmwt options, instead use # e.g. --scoring-opts "--min-lmwt 1 --max-lmwt 20" @@ -48,7 +48,6 @@ if [ $# != 3 ]; then echo " --acwt # acoustic scale used for lattice generation " echo " --scoring-opts # options to local/score.sh" echo " --num-threads # number of threads to use, default 1." - echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" exit 1; fi @@ -115,7 +114,7 @@ if [ $stage -le 0 ]; then [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $model | grep pdfs | awk '{print $NF}'` ] || \ { echo "Mismatch in number of pdfs with $model"; exit 1; } fi - $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \ gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \ --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \ $model "$HCLG" "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; diff --git a/egs/wsj/s5/steps/decode.sh b/egs/wsj/s5/steps/decode.sh index fd6f162cf..85b469221 100755 --- a/egs/wsj/s5/steps/decode.sh +++ b/egs/wsj/s5/steps/decode.sh @@ -16,7 +16,7 @@ beam=13.0 lattice_beam=6.0 acwt=0.083333 # note: only really affects pruning (scoring is on lattices). num_threads=1 # if >1, will use gmm-latgen-faster-parallel -parallel_opts= # If you supply num-threads, you should supply this too. +parallel_opts= # ignored now. scoring_opts= # note: there are no more min-lmwt and max-lmwt options, instead use # e.g. --scoring-opts "--min-lmwt 1 --max-lmwt 20" @@ -48,7 +48,7 @@ if [ $# != 3 ]; then echo " --acwt # acoustic scale used for lattice generation " echo " --scoring-opts # options to local/score.sh" echo " --num-threads # number of threads to use, default 1." - echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" + echo " --parallel-opts # ignored now, present for historical reasons." exit 1; fi @@ -110,7 +110,7 @@ if [ $stage -le 0 ]; then [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $model | grep pdfs | awk '{print $NF}'` ] || \ { echo "Mismatch in number of pdfs with $model"; exit 1; } fi - $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \ gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \ --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \ $model $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; diff --git a/egs/wsj/s5/steps/decode_basis_fmllr.sh b/egs/wsj/s5/steps/decode_basis_fmllr.sh index 76c3259f5..dc3cd4b4a 100755 --- a/egs/wsj/s5/steps/decode_basis_fmllr.sh +++ b/egs/wsj/s5/steps/decode_basis_fmllr.sh @@ -49,7 +49,7 @@ silence_weight=0.01 cmd=run.pl si_dir= num_threads=1 # if >1, will use gmm-latgen-faster-parallel -parallel_opts= # If you supply num-threads, you should supply this too. +parallel_opts= # ignored, present for historical reasons. skip_scoring=false scoring_opts= # End configuration section @@ -75,7 +75,7 @@ if [ $# != 3 ]; then echo " --acwt # default 0.08333 ... used to get posteriors" echo " --scoring-opts # options to local/score.sh" echo " --num-threads # number of threads to use, default 1." - echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" + echo " --parallel-opts # ignored, present for historical reasons." exit 1; fi @@ -121,7 +121,7 @@ if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass. { echo "Mismatch in number of pdfs with $alignment_model"; exit 1; } fi - steps/decode.sh --parallel-opts "$parallel_opts" --scoring-opts "$scoring_opts" \ + steps/decode.sh --scoring-opts "$scoring_opts" \ --num-threads $num_threads --skip-scoring $skip_scoring \ --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam \ --model $alignment_model --max-active \ @@ -178,7 +178,7 @@ if [ $stage -le 2 ]; then [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $adapt_model | grep pdfs | awk '{print $NF}'` ] || \ { echo "Mismatch in number of pdfs with $adapt_model"; exit 1; } fi - $cmd JOB=1:$nj $dir/log/decode.JOB.log \ + $cmd JOB=1:$nj --num-threads $num_threads $dir/log/decode.JOB.log \ gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \ --acoustic-scale=$acwt \ --determinize-lattice=false --allow-partial=true --word-symbol-table=$graphdir/words.txt \ diff --git a/egs/wsj/s5/steps/decode_fmllr.sh b/egs/wsj/s5/steps/decode_fmllr.sh index 97d32e6c5..a5ca6c9ef 100755 --- a/egs/wsj/s5/steps/decode_fmllr.sh +++ b/egs/wsj/s5/steps/decode_fmllr.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright 2012 Johns Hopkins University (Author: Daniel Povey) +# Copyright 2012-2015 Johns Hopkins University (Author: Daniel Povey) # Decoding script that does fMLLR. This can be on top of delta+delta-delta, or # LDA+MLLT features. @@ -42,7 +42,7 @@ cmd=run.pl si_dir= fmllr_update_type=full num_threads=1 # if >1, will use gmm-latgen-faster-parallel -parallel_opts= # If you supply num-threads, you should supply this too. +parallel_opts= # ignored now. skip_scoring=false scoring_opts= max_fmllr_jobs=25 # I've seen the fMLLR jobs overload NFS badly if the decoding @@ -69,7 +69,6 @@ if [ $# != 3 ]; then echo " # Caution-- must be with same tree" echo " --acwt # default 0.08333 ... used to get posteriors" echo " --num-threads # number of threads to use, default 1." - echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" echo " --scoring-opts # options to local/score.sh" exit 1; fi @@ -117,11 +116,11 @@ if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass. [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $alignment_model | grep pdfs | awk '{print $NF}'` ] || \ { echo "Mismatch in number of pdfs with $alignment_model"; exit 1; } fi - steps/decode.sh --parallel-opts "$parallel_opts" --scoring-opts "$scoring_opts" \ - --num-threads $num_threads --skip-scoring $skip_scoring \ - --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam \ - --model $alignment_model --max-active \ - $first_max_active $graphdir $data $si_dir || exit 1; + steps/decode.sh --scoring-opts "$scoring_opts" \ + --num-threads $num_threads --skip-scoring $skip_scoring \ + --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam \ + --model $alignment_model --max-active \ + $first_max_active $graphdir $data $si_dir || exit 1; fi fi ## @@ -171,7 +170,7 @@ if [ $stage -le 2 ]; then [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $adapt_model | grep pdfs | awk '{print $NF}'` ] || \ { echo "Mismatch in number of pdfs with $adapt_model"; exit 1; } fi - $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \ gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \ --acoustic-scale=$acwt --determinize-lattice=false \ --allow-partial=true --word-symbol-table=$graphdir/words.txt \ @@ -208,7 +207,7 @@ feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans. if [ $stage -le 4 ]; then echo "$0: doing a final pass of acoustic rescoring." - $cmd $parallel_opts JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \ gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \ lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=$lattice_beam ark:- \ "ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1; diff --git a/egs/wsj/s5/steps/decode_fmllr_extra.sh b/egs/wsj/s5/steps/decode_fmllr_extra.sh index 421fac6c5..04d4c2ae3 100755 --- a/egs/wsj/s5/steps/decode_fmllr_extra.sh +++ b/egs/wsj/s5/steps/decode_fmllr_extra.sh @@ -56,7 +56,7 @@ si_dir= fmllr_update_type=full skip_scoring=false num_threads=1 # if >1, will use gmm-latgen-faster-parallel -parallel_opts= # If you supply num-threads, you should supply this too. +parallel_opts= # ignored now. scoring_opts= # End configuration section @@ -81,7 +81,6 @@ if [ $# != 3 ]; then echo " # Caution-- must be with same tree" echo " --acwt # default 0.08333 ... used to get posteriors" echo " --num-threads # number of threads to use, default 1." - echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" echo " --scoring-opts # options to local/score.sh" exit 1; fi @@ -129,7 +128,7 @@ if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass. { echo "Mismatch in number of pdfs with $alignment_model" exit 1; } fi steps/decode.sh --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam --model $alignment_model\ - --max-active $first_max_active --parallel-opts "${parallel_opts}" --num-threads $num_threads\ + --max-active $first_max_active --num-threads $num_threads\ --skip-scoring true $graphdir $data $si_dir || exit 1; fi fi @@ -178,7 +177,7 @@ if [ $stage -le 2 ]; then [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $adapt_model | grep pdfs | awk '{print $NF}'` ] || \ { echo "Mismatch in number of pdfs with $adapt_model" exit 1; } fi - $cmd $parallel_opts JOB=1:$nj $dir/log/decode1.JOB.log\ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode1.JOB.log\ gmm-latgen-faster$thread_string --max-active=$first_max_active --max-mem=$max_mem --beam=$first_beam --lattice-beam=$first_lattice_beam \ --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \ $adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat1.JOB.gz" \ @@ -214,7 +213,7 @@ pass2feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/t ## after another stage of adaptation.) if [ $stage -le 4 ]; then echo "$0: doing final lattice generation phase" - $cmd $parallel_opts JOB=1:$nj $dir/log/decode2.JOB.log\ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode2.JOB.log\ gmm-latgen-faster$thread_string --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$lattice_beam \ --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \ $adapt_model $graphdir/HCLG.fst "$pass2feats" "ark:|gzip -c > $dir/lat2.JOB.gz" \ diff --git a/egs/wsj/s5/steps/decode_fmmi.sh b/egs/wsj/s5/steps/decode_fmmi.sh index 5ce22a946..d568709be 100755 --- a/egs/wsj/s5/steps/decode_fmmi.sh +++ b/egs/wsj/s5/steps/decode_fmmi.sh @@ -17,7 +17,7 @@ acwt=0.083333 # note: only really affects pruning (scoring is on lattices). ngselect=2; # Just use the 2 top Gaussians for fMMI/fMPE. Should match train. transform_dir= num_threads=1 # if >1, will use gmm-latgen-faster-parallel -parallel_opts= # If you supply num-threads, you should supply this too. +parallel_opts= # ignored now. scoring_opts= # End configuration section. @@ -46,7 +46,6 @@ if [ $# != 3 ]; then echo " --scoring-opts # options to local/score.sh" echo " # speaker-adapted decoding" echo " --num-threads # number of threads to use, default 1." - echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" exit 1; fi @@ -98,7 +97,7 @@ if [ $stage -le 1 ]; then fi if [ $stage -le 2 ]; then - $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \ gmm-latgen-faster$thread_string --max-active=$maxactive --beam=$beam --lattice-beam=$lattice_beam \ --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \ $model $graphdir/HCLG.fst "$fmpefeats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; diff --git a/egs/wsj/s5/steps/decode_lvtln.sh b/egs/wsj/s5/steps/decode_lvtln.sh index eed2e1687..ac58b2ee0 100755 --- a/egs/wsj/s5/steps/decode_lvtln.sh +++ b/egs/wsj/s5/steps/decode_lvtln.sh @@ -18,7 +18,7 @@ logdet_scale=0.0 cmd=run.pl skip_scoring=false num_threads=1 # if >1, will use gmm-latgen-faster-parallel -parallel_opts= # If you supply num-threads, you should supply this too. +parallel_opts= # ignored now. scoring_opts= cleanup=true # End configuration section @@ -89,7 +89,7 @@ if [ $stage -le 0 ]; then [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $srcdir/final.alimdl | grep pdfs | awk '{print $NF}'` ] || \ { echo "Mismatch in number of pdfs with $srcdir/final.alimdl"; exit 1; } fi - $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \ gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \ --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \ $srcdir/final.alimdl $graphdir/HCLG.fst "$sifeats" "ark:|gzip -c > $dir/lat_pass1.JOB.gz" \ @@ -121,7 +121,7 @@ feats1="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans if [ $stage -le 3 ]; then echo "$0: rescoring the lattices with first-pass LVTLN transforms" - $cmd $parallel_opts JOB=1:$nj $dir/log/rescore.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/rescore.JOB.log \ gmm-rescore-lattice $srcdir/final.mdl "ark:gunzip -c $dir/lat_pass1.JOB.gz|" "$feats1" \ "ark:|gzip -c > $dir/lat_pass2.JOB.gz" || exit 1; fi @@ -144,7 +144,7 @@ if [ $stage -le 5 ]; then # This second rescoring is only really necessary for scoring purposes, # it does not affect the transforms. echo "$0: rescoring the lattices with second-pass LVTLN transforms" - $cmd $parallel_opts JOB=1:$nj $dir/log/rescore.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/rescore.JOB.log \ gmm-rescore-lattice $srcdir/final.mdl "ark:gunzip -c $dir/lat_pass2.JOB.gz|" "$feats" \ "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1; fi diff --git a/egs/wsj/s5/steps/decode_nolats.sh b/egs/wsj/s5/steps/decode_nolats.sh index d30925028..6f5e780cf 100755 --- a/egs/wsj/s5/steps/decode_nolats.sh +++ b/egs/wsj/s5/steps/decode_nolats.sh @@ -23,10 +23,6 @@ max_active=7000 beam=13.0 lattice_beam=6.0 acwt=0.083333 # note: only really affects pruning (scoring is on lattices). -num_threads=1 # if >1, will use gmm-latgen-faster-parallel -parallel_opts= # If you supply num-threads, you should supply this too. - # Note: this functionality doesn't work right now because - # there is no program gmm-decode-faster-parallel write_alignments=false write_words=true # End configuration section. @@ -59,8 +55,6 @@ if [ $# != 3 ]; then echo " --cmd (utils/run.pl|utils/queue.pl ) # how to run jobs." echo " --transform-dir # dir to find fMLLR transforms " echo " --acwt # acoustic scale used for lattice generation " - #echo " --num-threads # number of threads to use, default 1." - #echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" exit 1; fi @@ -89,8 +83,6 @@ echo "decode.sh: feature type is $feat_type"; splice_opts=`cat $srcdir/splice_opts 2>/dev/null` cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null` -thread_string= -[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" case $feat_type in delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";; @@ -124,8 +116,8 @@ if [ $stage -le 0 ]; then [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $model | grep pdfs | awk '{print $NF}'` ] || \ { echo "Mismatch in number of pdfs with $model"; exit 1; } fi - $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \ - gmm-decode-faster$thread_string --max-active=$max_active --beam=$beam \ + $cmd JOB=1:$nj $dir/log/decode.JOB.log \ + gmm-decode-faster --max-active=$max_active --beam=$beam \ --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \ "$model" $graphdir/HCLG.fst "$feats" "$words" "$ali" || exit 1; fi diff --git a/egs/wsj/s5/steps/decode_raw_fmllr.sh b/egs/wsj/s5/steps/decode_raw_fmllr.sh index 3be3e60f0..069dc84a7 100755 --- a/egs/wsj/s5/steps/decode_raw_fmllr.sh +++ b/egs/wsj/s5/steps/decode_raw_fmllr.sh @@ -45,7 +45,7 @@ silence_weight=0.01 cmd=run.pl si_dir= num_threads=1 # if >1, will use gmm-latgen-faster-parallel -parallel_opts= # If you supply num-threads, you should supply this too. +parallel_opts= # ignored now. skip_scoring=false scoring_opts= # End configuration section @@ -70,7 +70,6 @@ if [ $# != 3 ]; then echo " # Caution-- must be with same tree" echo " --acwt # default 0.08333 ... used to get posteriors" echo " --num-threads # number of threads to use, default 1." - echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" echo " --scoring-opts # options to local/score.sh" exit 1; fi @@ -115,7 +114,7 @@ fi if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass. si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si". if [ $stage -le 0 ]; then - steps/decode.sh --parallel-opts "$parallel_opts" --scoring-opts "$scoring_opts" \ + steps/decode.sh --scoring-opts "$scoring_opts" \ --num-threads $num_threads --skip-scoring $skip_scoring \ --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam \ --model $alignment_model --max-active \ @@ -166,7 +165,7 @@ pass1feats="$pass1splicedfeats transform-feats $srcdir/final.mat ark:- ark:- |" ## model, and it's more correct to store the full state-level lattice for this purpose. if [ $stage -le 2 ]; then echo "$0: doing main lattice generation phase" - $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \ gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \ --acoustic-scale=$acwt --determinize-lattice=false \ --allow-partial=true --word-symbol-table=$graphdir/words.txt \ @@ -217,7 +216,7 @@ fi if [ $stage -le 5 ]; then echo "$0: doing a final pass of acoustic rescoring." - $cmd $parallel_opts JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \ gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \ lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=$lattice_beam ark:- \ "ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1; diff --git a/egs/wsj/s5/steps/decode_sgmm2.sh b/egs/wsj/s5/steps/decode_sgmm2.sh index c84d5660e..99f422308 100755 --- a/egs/wsj/s5/steps/decode_sgmm2.sh +++ b/egs/wsj/s5/steps/decode_sgmm2.sh @@ -31,7 +31,7 @@ use_fmllr=false fmllr_iters=10 fmllr_min_count=1000 num_threads=1 # if >1, will use gmm-latgen-faster-parallel -parallel_opts= # If you supply num-threads, you should supply this too. +parallel_opts= # ignored now. skip_scoring=false scoring_opts= # note: there are no more min-lmwt and max-lmwt options, instead use @@ -132,7 +132,7 @@ if [ $stage -le 2 ]; then [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $alignment_model | grep pdfs | awk '{print $NF}'` ] || \ { echo "Mismatch in number of pdfs with $alignment_model"; exit 1; } fi - $cmd $parallel_opts JOB=1:$nj $dir/log/decode_pass1.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode_pass1.JOB.log \ sgmm2-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \ --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \ --word-symbol-table=$graphdir/words.txt --max-mem=$max_mem "$gselect_opt_1stpass" $alignment_model \ @@ -202,7 +202,7 @@ fi # corresponding model. Prune and determinize the lattices to limit # their size. if [ $stage -le 6 ]; then - $cmd $parallel_opts JOB=1:$nj $dir/log/rescore.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/rescore.JOB.log \ sgmm2-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \ $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \ lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=$lattice_beam ark:- \ diff --git a/egs/wsj/s5/steps/kl_hmm/decode_kl_hmm.sh b/egs/wsj/s5/steps/kl_hmm/decode_kl_hmm.sh index c941d142b..d085ac907 100755 --- a/egs/wsj/s5/steps/kl_hmm/decode_kl_hmm.sh +++ b/egs/wsj/s5/steps/kl_hmm/decode_kl_hmm.sh @@ -21,7 +21,6 @@ acwt=0.1 # GMM:0.0833, note: only really affects pruning (scoring is on lattices scoring_opts="--min-lmwt 1 --max-lmwt 12" skip_scoring=false use_gpu="no" # disable gpu -parallel_opts="" # End configuration section. echo "$0 $@" # Print the command line for logging @@ -104,7 +103,7 @@ fi # Run the decoding in the queue if [ $stage -le 0 ]; then - $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \ + $cmd JOB=1:$nj $dir/log/decode.JOB.log \ nnet-forward --feature-transform=$feature_transform --use-gpu=$use_gpu $nnet "$feats" ark:- \| \ latgen-faster-mapped --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$lattice_beam \ --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \ diff --git a/egs/wsj/s5/steps/make_denlats.sh b/egs/wsj/s5/steps/make_denlats.sh index a9a31c6e6..65b4bb8d3 100755 --- a/egs/wsj/s5/steps/make_denlats.sh +++ b/egs/wsj/s5/steps/make_denlats.sh @@ -17,7 +17,7 @@ max_mem=20000000 # This will stop the processes getting too large. # This is in bytes, but not "real" bytes-- you have to multiply # by something like 5 or 10 to get real bytes (not sure why so large) num_threads=1 -parallel_opts= +parallel_opts= # ignored now # End configuration section. echo "$0 $@" # Print the command line for logging @@ -40,7 +40,6 @@ if [ $# != 4 ]; then echo " # will (individually) finish reasonably soon." echo " --transform-dir # directory to find fMLLR transforms." echo " --num-threads # number of threads per decoding job" - echo " --parallel-opts # if >1 thread, add this to 'cmd', e.g. -pe smp 6" exit 1; fi @@ -121,7 +120,7 @@ trap "cleanup" INT QUIT TERM EXIT if [ $sub_split -eq 1 ]; then - $cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode_den.JOB.log \ gmm-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \ $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1; @@ -147,7 +146,7 @@ else mkdir -p $dir/part feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g` - $cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \ gmm-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \ $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || touch $dir/.error & diff --git a/egs/wsj/s5/steps/make_denlats_sgmm2.sh b/egs/wsj/s5/steps/make_denlats_sgmm2.sh index 4fd3e860e..0f250d555 100755 --- a/egs/wsj/s5/steps/make_denlats_sgmm2.sh +++ b/egs/wsj/s5/steps/make_denlats_sgmm2.sh @@ -19,7 +19,7 @@ max_active=5000 transform_dir= max_mem=20000000 # This will stop the processes getting too large. num_threads=1 -parallel_opts= +parallel_opts= # ignored now. # End configuration section. echo "$0 $@" # Print the command line for logging @@ -42,7 +42,6 @@ if [ $# != 4 ]; then echo " # will (individually) finish reasonably soon." echo " --transform-dir # directory to find fMLLR transforms." echo " --num-threads # number of threads per decoding job" - echo " --parallel-opts # if >1 thread, add this to 'cmd', e.g. -pe smp 6" exit 1; fi @@ -57,9 +56,6 @@ cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null` if [ $num_threads -gt 1 ]; then # the -parallel becomes part of the binary name we decode with. thread_string="-parallel --num-threads=$num_threads" - if [ -z $parallel_opts ]; then - parallel_opts="--num-threads $num_threads" - fi fi mkdir -p $dir/log @@ -142,7 +138,7 @@ cleanup() { trap "cleanup" INT QUIT TERM EXIT if [ $sub_split -eq 1 ]; then - $cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode_den.JOB.log \ sgmm2-latgen-faster$thread_string $spkvecs_opt "$gselect_opt" --beam=$beam \ --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ --max-mem=$max_mem --max-active=$max_active \ @@ -171,7 +167,7 @@ else feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g` spkvecs_opt_subset=`echo $spkvecs_opt | sed "s/JOB/$n/g"` gselect_opt_subset=`echo $gselect_opt | sed "s/JOB/$n/g"` - $cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \ sgmm2-latgen-faster$thread_string \ $spkvecs_opt_subset "$gselect_opt_subset" \ --beam=$beam --lattice-beam=$lattice_beam \ diff --git a/egs/wsj/s5/steps/nnet/decode.sh b/egs/wsj/s5/steps/nnet/decode.sh index f05b14dc1..14a8d0c69 100755 --- a/egs/wsj/s5/steps/nnet/decode.sh +++ b/egs/wsj/s5/steps/nnet/decode.sh @@ -26,7 +26,7 @@ skip_scoring=false scoring_opts="--min-lmwt 4 --max-lmwt 15" num_threads=1 # if >1, will use latgen-faster-parallel -parallel_opts="-pe smp $((num_threads+1))" # use 2 CPUs (1 DNN-forward, 1 decoder) +parallel_opts= # Ignored now. use_gpu="no" # yes|no|optionaly # End configuration section. @@ -111,7 +111,7 @@ feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |" # Run the decoding in the queue, if [ $stage -le 0 ]; then - $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \ + $cmd --num-threads $((num_threads+1)) JOB=1:$nj $dir/log/decode.JOB.log \ nnet-forward $nnet_forward_opts --feature-transform=$feature_transform --class-frame-counts=$class_frame_counts --use-gpu=$use_gpu $nnet "$feats" ark:- \| \ latgen-faster-mapped$thread_string --min-active=$min_active --max-active=$max_active --max-mem=$max_mem --beam=$beam \ --lattice-beam=$lattice_beam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \ diff --git a/egs/wsj/s5/steps/nnet/make_denlats.sh b/egs/wsj/s5/steps/nnet/make_denlats.sh index 38cbb9547..02d25c744 100755 --- a/egs/wsj/s5/steps/nnet/make_denlats.sh +++ b/egs/wsj/s5/steps/nnet/make_denlats.sh @@ -21,7 +21,7 @@ max_mem=20000000 # This will stop the processes getting too large. # by something like 5 or 10 to get real bytes (not sure why so large) # End configuration section. use_gpu=no # yes|no|optional -parallel_opts="-pe smp 2" +parallel_opts="--num-threads 2" echo "$0 $@" # Print the command line for logging diff --git a/egs/wsj/s5/steps/nnet2/decode.sh b/egs/wsj/s5/steps/nnet2/decode.sh index 2641e39a3..df8600df3 100755 --- a/egs/wsj/s5/steps/nnet2/decode.sh +++ b/egs/wsj/s5/steps/nnet2/decode.sh @@ -20,7 +20,7 @@ ivector_scale=1.0 lattice_beam=8.0 # Beam we use in lattice generation. iter=final num_threads=1 # if >1, will use gmm-latgen-faster-parallel -parallel_opts= # If you supply num-threads, you should supply this too. +parallel_opts= # ignored now. scoring_opts= skip_scoring=false feat_type= @@ -47,7 +47,7 @@ if [ $# -ne 3 ]; then echo " --iter # Iteration of model to decode; default is final." echo " --scoring-opts # options to local/score.sh" echo " --num-threads # number of threads to use, default 1." - echo " --parallel-opts # e.g. '-pe smp 4' if you supply --num-threads 4" + echo " --parallel-opts # e.g. '--num-threads 4' if you supply --num-threads 4" exit 1; fi @@ -129,7 +129,7 @@ if [ ! -z "$online_ivector_dir" ]; then fi if [ $stage -le 1 ]; then - $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \ nnet-latgen-faster$thread_string \ --minimize=$minimize --max-active=$max_active --min-active=$min_active --beam=$beam \ --lattice-beam=$lattice_beam --acoustic-scale=$acwt --allow-partial=true \ diff --git a/egs/wsj/s5/steps/nnet2/make_denlats.sh b/egs/wsj/s5/steps/nnet2/make_denlats.sh index 099ced56d..eea853eb7 100755 --- a/egs/wsj/s5/steps/nnet2/make_denlats.sh +++ b/egs/wsj/s5/steps/nnet2/make_denlats.sh @@ -19,7 +19,7 @@ max_mem=20000000 # This will stop the processes getting too large. # by something like 5 or 10 to get real bytes (not sure why so large) num_threads=1 online_ivector_dir= -parallel_opts= +parallel_opts= # ignored now feat_type= # you can set this in order to run on top of delta features, although we don't # normally want to do this. # End configuration section. @@ -45,7 +45,6 @@ if [ $# != 4 ]; then echo " # will (individually) finish reasonably soon." echo " --transform-dir # directory to find fMLLR transforms." echo " --num-threads # number of threads per decoding job" - echo " --parallel-opts # if >1 thread, add this to 'cmd', e.g. -pe smp 6" exit 1; fi @@ -156,7 +155,7 @@ trap "cleanup" INT QUIT TERM EXIT if [ $sub_split -eq 1 ]; then - $cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode_den.JOB.log \ nnet-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \ $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1; @@ -182,7 +181,7 @@ else mkdir -p $dir/part feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g` - $cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \ nnet-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \ $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || touch $dir/.error & diff --git a/egs/wsj/s5/steps/nnet2/retrain_fast.sh b/egs/wsj/s5/steps/nnet2/retrain_fast.sh index fe7e6dea8..2e7131b82 100755 --- a/egs/wsj/s5/steps/nnet2/retrain_fast.sh +++ b/egs/wsj/s5/steps/nnet2/retrain_fast.sh @@ -58,10 +58,10 @@ target_multiplier=0 # Set this to e.g. 1.0 to enable perturbed training. mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know. +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= egs_opts= @@ -100,9 +100,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/retrain_simple2.sh b/egs/wsj/s5/steps/nnet2/retrain_simple2.sh index 7b8d2f4e0..d3f5223b5 100755 --- a/egs/wsj/s5/steps/nnet2/retrain_simple2.sh +++ b/egs/wsj/s5/steps/nnet2/retrain_simple2.sh @@ -59,11 +59,11 @@ max_change_per_sample=0.075 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= egs_opts= @@ -101,9 +101,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/retrain_tanh.sh b/egs/wsj/s5/steps/nnet2/retrain_tanh.sh index 5d4ee61e6..f67cb1359 100755 --- a/egs/wsj/s5/steps/nnet2/retrain_tanh.sh +++ b/egs/wsj/s5/steps/nnet2/retrain_tanh.sh @@ -43,7 +43,7 @@ widen=0 # If specified, it will increase the hidden-layer dimension bias_stddev=0.5 # will be used for widen num_threads=16 -parallel_opts="-pe smp $num_threads" # using a smallish #threads by default, out of stability concerns. +parallel_opts="--num-threads $num_threads" # using a smallish #threads by default, out of stability concerns. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. cleanup=true # End configuration section. @@ -75,7 +75,7 @@ if [ $# != 3 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" echo " # use multiple threads." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_block.sh b/egs/wsj/s5/steps/nnet2/train_block.sh index cfc35e2f1..d65fdaa08 100755 --- a/egs/wsj/s5/steps/nnet2/train_block.sh +++ b/egs/wsj/s5/steps/nnet2/train_block.sh @@ -61,7 +61,7 @@ max_change=10.0 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know. +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. cleanup=true egs_dir= @@ -104,9 +104,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_discriminative.sh b/egs/wsj/s5/steps/nnet2/train_discriminative.sh index 4b02cbca7..177ad2fc4 100755 --- a/egs/wsj/s5/steps/nnet2/train_discriminative.sh +++ b/egs/wsj/s5/steps/nnet2/train_discriminative.sh @@ -37,7 +37,7 @@ io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one t num_threads=16 # this is the default but you may want to change it, e.g. to 1 if # using GPUs. -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 4 threads; this lets the queue know. +parallel_opts="--num-threads 16 --mem 1G" # by default we use 4 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. transform_dir= # If this is a SAT system, directory for transforms cleanup=true @@ -72,9 +72,8 @@ if [ $# != 6 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per" echo " # process." diff --git a/egs/wsj/s5/steps/nnet2/train_discriminative2.sh b/egs/wsj/s5/steps/nnet2/train_discriminative2.sh index 4196e0e78..85047efc5 100755 --- a/egs/wsj/s5/steps/nnet2/train_discriminative2.sh +++ b/egs/wsj/s5/steps/nnet2/train_discriminative2.sh @@ -79,8 +79,7 @@ if [ $# != 2 ]; then echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size. With GPU, must be 1." echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " # use multiple threads... " echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." echo " --criterion # Training criterion: may be smbr, mmi or mpfe" diff --git a/egs/wsj/s5/steps/nnet2/train_discriminative_multilang2.sh b/egs/wsj/s5/steps/nnet2/train_discriminative_multilang2.sh index e2d1f3b2c..9d1160638 100755 --- a/egs/wsj/s5/steps/nnet2/train_discriminative_multilang2.sh +++ b/egs/wsj/s5/steps/nnet2/train_discriminative_multilang2.sh @@ -70,9 +70,8 @@ if [ $# -lt 3 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size. With GPU, must be 1." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." echo " --criterion # Training criterion: may be smbr, mmi or mpfe" diff --git a/egs/wsj/s5/steps/nnet2/train_more.sh b/egs/wsj/s5/steps/nnet2/train_more.sh index 0a705ff0d..4c31aeaa1 100755 --- a/egs/wsj/s5/steps/nnet2/train_more.sh +++ b/egs/wsj/s5/steps/nnet2/train_more.sh @@ -39,7 +39,7 @@ shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of mix_up=0 stage=-5 num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know. +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. cleanup=true remove_egs=false @@ -69,9 +69,8 @@ if [ $# != 3 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." echo " --num-iters-final <#iters|20> # Number of final iterations to give to nnet-combine-fast to " diff --git a/egs/wsj/s5/steps/nnet2/train_more2.sh b/egs/wsj/s5/steps/nnet2/train_more2.sh index 22de51770..9734d38f6 100755 --- a/egs/wsj/s5/steps/nnet2/train_more2.sh +++ b/egs/wsj/s5/steps/nnet2/train_more2.sh @@ -43,7 +43,7 @@ num_jobs_nnet=4 mix_up=0 stage=-5 num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know. +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 cleanup=true @@ -76,9 +76,8 @@ if [ $# != 3 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." echo " --num-iters-final <#iters|20> # Number of final iterations to give to nnet-combine-fast to " diff --git a/egs/wsj/s5/steps/nnet2/train_multilang2.sh b/egs/wsj/s5/steps/nnet2/train_multilang2.sh index 0f9ba74c7..04590a99d 100755 --- a/egs/wsj/s5/steps/nnet2/train_multilang2.sh +++ b/egs/wsj/s5/steps/nnet2/train_multilang2.sh @@ -111,9 +111,8 @@ if [ $# -lt 6 -o $[$#%2] -ne 0 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --stage # Used to run a partially-completed training process from somewhere in" echo " # the middle." exit 1; diff --git a/egs/wsj/s5/steps/nnet2/train_multisplice_accel2.sh b/egs/wsj/s5/steps/nnet2/train_multisplice_accel2.sh index 75937f82a..eaa0623a9 100755 --- a/egs/wsj/s5/steps/nnet2/train_multisplice_accel2.sh +++ b/egs/wsj/s5/steps/nnet2/train_multisplice_accel2.sh @@ -76,11 +76,11 @@ precondition_rank_out=80 # relates to online preconditioning mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -131,9 +131,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_multisplice_ensemble.sh b/egs/wsj/s5/steps/nnet2/train_multisplice_ensemble.sh index 6bf567962..02f028041 100755 --- a/egs/wsj/s5/steps/nnet2/train_multisplice_ensemble.sh +++ b/egs/wsj/s5/steps/nnet2/train_multisplice_ensemble.sh @@ -74,11 +74,11 @@ precondition_rank_out=80 # relates to online preconditioning mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -129,9 +129,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm.sh b/egs/wsj/s5/steps/nnet2/train_pnorm.sh index aa5db0b14..87f0e2fc5 100755 --- a/egs/wsj/s5/steps/nnet2/train_pnorm.sh +++ b/egs/wsj/s5/steps/nnet2/train_pnorm.sh @@ -64,7 +64,7 @@ max_change=10.0 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know. +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. cleanup=true egs_dir= @@ -116,9 +116,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_accel2.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_accel2.sh index 78872766b..01dbe9b5d 100755 --- a/egs/wsj/s5/steps/nnet2/train_pnorm_accel2.sh +++ b/egs/wsj/s5/steps/nnet2/train_pnorm_accel2.sh @@ -77,11 +77,11 @@ precondition_rank_out=80 # relates to online preconditioning mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -129,9 +129,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_bottleneck_fast.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_bottleneck_fast.sh index 5fdacec03..787c976c8 100755 --- a/egs/wsj/s5/steps/nnet2/train_pnorm_bottleneck_fast.sh +++ b/egs/wsj/s5/steps/nnet2/train_pnorm_bottleneck_fast.sh @@ -70,10 +70,10 @@ target_multiplier=0 # Set this to e.g. 1.0 to enable perturbed training. mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know. +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -122,9 +122,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh index 2c6812cfc..03f239b85 100755 --- a/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh +++ b/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh @@ -62,7 +62,7 @@ max_change=10.0 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know. +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. cleanup=true egs_dir= @@ -107,9 +107,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh index 314a22588..6627e3b0a 100755 --- a/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh +++ b/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh @@ -72,10 +72,10 @@ target_multiplier=0 # Set this to e.g. 1.0 to enable perturbed training. mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know. +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -124,9 +124,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice.sh index 1cb372dd2..6672ee46a 100755 --- a/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice.sh +++ b/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice.sh @@ -77,11 +77,11 @@ precondition_rank_out=80 # relates to online preconditioning mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -128,9 +128,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice2.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice2.sh index 0b541000e..2708eb856 100755 --- a/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice2.sh +++ b/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice2.sh @@ -72,11 +72,11 @@ precondition_rank_out=80 # relates to online preconditioning mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -121,9 +121,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_simple.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_simple.sh index e59c6d24e..6fd385b20 100755 --- a/egs/wsj/s5/steps/nnet2/train_pnorm_simple.sh +++ b/egs/wsj/s5/steps/nnet2/train_pnorm_simple.sh @@ -74,11 +74,11 @@ precondition_rank_out=80 # relates to online preconditioning mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -125,9 +125,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_simple2.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_simple2.sh index 8aef77431..44639ebd2 100755 --- a/egs/wsj/s5/steps/nnet2/train_pnorm_simple2.sh +++ b/egs/wsj/s5/steps/nnet2/train_pnorm_simple2.sh @@ -83,11 +83,11 @@ precondition_rank_out=80 # relates to online preconditioning mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. combine_num_threads=8 -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. cleanup=true egs_dir= lda_opts= @@ -135,9 +135,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_tanh.sh b/egs/wsj/s5/steps/nnet2/train_tanh.sh index 8d403c698..ef4392d62 100755 --- a/egs/wsj/s5/steps/nnet2/train_tanh.sh +++ b/egs/wsj/s5/steps/nnet2/train_tanh.sh @@ -59,7 +59,7 @@ max_change=10.0 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know. +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. cleanup=true egs_dir= @@ -108,9 +108,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh b/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh index 6bb6bc646..96aeaa29a 100755 --- a/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh +++ b/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh @@ -63,7 +63,7 @@ max_change=10.0 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know. +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. cleanup=true egs_dir= @@ -108,9 +108,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/train_tanh_fast.sh b/egs/wsj/s5/steps/nnet2/train_tanh_fast.sh index 21df49cc6..7fd604131 100755 --- a/egs/wsj/s5/steps/nnet2/train_tanh_fast.sh +++ b/egs/wsj/s5/steps/nnet2/train_tanh_fast.sh @@ -71,9 +71,9 @@ precondition_rank_out=60 # relates to online preconditioning mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know. +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. -combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage. +combine_parallel_opts="--num-threads 8" # queue options for the "combine" stage. combine_num_threads=8 cleanup=true egs_dir= @@ -122,9 +122,8 @@ if [ $# != 4 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/nnet2/update_nnet.sh b/egs/wsj/s5/steps/nnet2/update_nnet.sh index 6633bf6b8..e621bb6c7 100755 --- a/egs/wsj/s5/steps/nnet2/update_nnet.sh +++ b/egs/wsj/s5/steps/nnet2/update_nnet.sh @@ -48,7 +48,7 @@ max_change=10.0 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if # specified.) num_threads=16 -parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know. +parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know. # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads. cleanup=false egs_dir= @@ -81,9 +81,8 @@ if [ $# != 5 ]; then echo " --num-threads # Number of parallel threads per job (will affect results" echo " # as well as speed; may interact with batch size; if you increase" echo " # this, you may want to decrease the batch size." - echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" - echo " # use multiple threads... note, you might have to reduce mem_free,ram_free" - echo " # versus your defaults, because it gets multiplied by the -pe smp argument." + echo " --parallel-opts # extra options to pass to e.g. queue.pl for processes that" + echo " # use multiple threads... " echo " --io-opts # Options given to e.g. queue.pl for jobs that do a lot of I/O." echo " --minibatch-size # Size of minibatch to process (note: product with --num-threads" echo " # should not get too large, e.g. >2k)." diff --git a/egs/wsj/s5/steps/online/nnet2/make_denlats.sh b/egs/wsj/s5/steps/online/nnet2/make_denlats.sh index f0a8c7639..280c75b50 100755 --- a/egs/wsj/s5/steps/online/nnet2/make_denlats.sh +++ b/egs/wsj/s5/steps/online/nnet2/make_denlats.sh @@ -19,7 +19,7 @@ max_mem=20000000 # This will stop the processes getting too large. # This is in bytes, but not "real" bytes-- you have to multiply # by something like 5 or 10 to get real bytes (not sure why so large) num_threads=1 -parallel_opts= +parallel_opts= # ignored now. # End configuration section. echo "$0 $@" # Print the command line for logging @@ -41,7 +41,6 @@ if [ $# != 4 ]; then echo " # large databases so your jobs will be smaller and" echo " # will (individually) finish reasonably soon." echo " --num-threads # number of threads per decoding job" - echo " --parallel-opts # if >1 thread, add this to 'cmd', e.g. -pe smp 6" exit 1; fi @@ -115,7 +114,7 @@ trap "cleanup" INT QUIT TERM EXIT if [ $sub_split -eq 1 ]; then - $cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode_den.JOB.log \ nnet-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \ $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1; @@ -141,7 +140,7 @@ else mkdir -p $dir/part feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g` - $cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \ + $cmd --num-threads $num_threads JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \ nnet-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \ $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || touch $dir/.error & diff --git a/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh b/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh index 9109fef6f..22250ae9e 100755 --- a/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh +++ b/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh @@ -36,7 +36,7 @@ cleanup=true min_gaussian_weight=0.0001 remove_low_count_gaussians=true # set this to false if you need #gauss to stay fixed. num_threads=32 -parallel_opts="-pe smp 32" +parallel_opts= # ignored now. online_cmvn_config=conf/online_cmvn.conf # End configuration section. @@ -68,8 +68,6 @@ if [ $# != 4 ]; then echo " # in initialization phase (then split)" echo " --num-threads # number of threads to use in initialization" echo " # phase (must match with parallel-opts option)" - echo " --parallel-opts # Option should match number of threads in" - echo " # --num-threads option above" echo " --min-gaussian-weight # min Gaussian weight allowed in GMM" echo " # initialization (this relatively high" echo " # value keeps counts fairly even)" @@ -115,7 +113,7 @@ if [ $stage -le -2 ]; then echo "$0: starting from $num_gauss_init Gaussians, reaching $num_gauss;" echo "$0: for $num_iters_init iterations, using at most $num_frames frames of data" - $cmd $parallel_opts $dir/log/gmm_init.log \ + $cmd --num-threads $num_threads $dir/log/gmm_init.log \ gmm-global-init-from-feats --num-threads=$num_threads --num-frames=$num_frames \ --min-gaussian-weight=$min_gaussian_weight \ --num-gauss=$num_gauss --num-gauss-init=$num_gauss_init --num-iters=$num_iters_init \ diff --git a/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh b/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh index 0473ca29d..9b354c075 100755 --- a/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh +++ b/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh @@ -51,7 +51,7 @@ min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out) subsample=2 # This speeds up the training: training on every 2nd feature # (configurable) Since the features are highly correlated across # frames, we don't expect to lose too much from this. -parallel_opts= #Task running engine configuration +parallel_opts= # ignored now. cleanup=true # End configuration section. @@ -76,8 +76,6 @@ if [ $# != 3 ]; then echo " --stage # To control partial reruns" echo " --num-gselect # Number of Gaussians to select using" echo " # diagonal model." - echo " --parallel-opts # e.g. '-pe smp 16 ', the number should be equivalent" - echo " # to --num-processes * --num-threads" exit 1; fi @@ -107,11 +105,6 @@ gmm_feats="ark,s,cs:apply-cmvn-online --config=$dir/online_cmvn.conf $dir/global feats="ark,s,cs:splice-feats $splice_opts scp:$sdata/JOB/feats.scp ark:- | transform-feats $dir/final.mat ark:- ark:- | subsample-feats --n=$subsample ark:- ark:- |" -#We will specify our own parallel-opts only in cases user does not supply anything. -#If user does specify parallel-opts, then we will assume user knows what's right -if [ -z "$parallel_opts" ] ; then - parallel_opts="-pe smp $[$num_threads*$num_processes]" -fi # Initialize the i-vector extractor using the input GMM, which is converted to # full because that's what the i-vector extractor expects. Note: we have to do @@ -157,7 +150,7 @@ while [ $x -lt $num_iters ]; do echo "Accumulating stats (pass $x)" for g in $(seq $nj); do start=$[$num_processes*($g-1)+1] - $cmd $parallel_opts $dir/log/acc.$x.$g.log \ + $cmd --num-threads $[$num_threads*$num_processes] $dir/log/acc.$x.$g.log \ ivector-extractor-sum-accs --parallel=true "${Args[@]:$start:$num_processes}" \ $dir/acc.$x.$g || touch $dir/.error & done @@ -178,7 +171,7 @@ while [ $x -lt $num_iters ]; do # The parallel-opts was either specified by # the user or we computed it correctly in # tge previous stages - $cmd $parallel_opts $dir/log/update.$x.log \ + $cmd --num-threads $[$num_threads*$num_processes] $dir/log/update.$x.log \ ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1; rm $dir/acc.$x.* if $cleanup; then diff --git a/egs/wsj/s5/utils/run.pl b/egs/wsj/s5/utils/run.pl index 86e191d3f..03fd44850 100755 --- a/egs/wsj/s5/utils/run.pl +++ b/egs/wsj/s5/utils/run.pl @@ -80,9 +80,10 @@ if (@ARGV > 0) { } } -if ($ignored_opts ne "") { - print STDERR "run.pl: Warning: ignoring options \"$ignored_opts\"\n"; -} +# Users found this message confusing so we are removing it. +# if ($ignored_opts ne "") { +# print STDERR "run.pl: Warning: ignoring options \"$ignored_opts\"\n"; +# } if ($max_jobs_run == -1) { # If --max-jobs-run option not set, # then work out the number of processors if possible,