diff --git a/egs/ami/s5/local/ami_split_segments.pl b/egs/ami/s5/local/ami_split_segments.pl
index 0bdc98196..251394b5a 100755
--- a/egs/ami/s5/local/ami_split_segments.pl
+++ b/egs/ami/s5/local/ami_split_segments.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2014  University of Edinburgh (Author: Pawel Swietojanski)
 
diff --git a/egs/ami/s5/local/convert2stm.pl b/egs/ami/s5/local/convert2stm.pl
index 703504344..f0b85c65b 100755
--- a/egs/ami/s5/local/convert2stm.pl
+++ b/egs/ami/s5/local/convert2stm.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 #           2013  University of Edinburgh (Author: Pawel Swietojanski)
diff --git a/egs/ami/s5/local/fisher_map_words.pl b/egs/ami/s5/local/fisher_map_words.pl
index ce32fd28d..f3a6c6ac6 100755
--- a/egs/ami/s5/local/fisher_map_words.pl
+++ b/egs/ami/s5/local/fisher_map_words.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/ami/s5/local/run_dnn.sh b/egs/ami/s5/local/run_dnn.sh
index 9f09e8501..4ddde8b1f 100755
--- a/egs/ami/s5/local/run_dnn.sh
+++ b/egs/ami/s5/local/run_dnn.sh
@@ -67,10 +67,10 @@ if [ $stage -le 2 ]; then
     $data_fmllr/$mic/train_tr90 $data_fmllr/$mic/train_cv10 data/lang $ali $ali $dir || exit 1;
   # Decode (reuse HCLG graph)
   steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf --acwt 0.1 \
-    --num-threads 3 --parallel-opts "-pe smp 4" \
+    --num-threads 3 \
     $graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${lm_suffix} || exit 1;
   steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf --acwt 0.1 \
-    --num-threads 3 --parallel-opts "-pe smp 4" \
+    --num-threads 3 \
     $graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${lm_suffix} || exit 1;
 fi
 
@@ -96,11 +96,11 @@ if [ $stage -le 4 ]; then
   # Decode (reuse HCLG graph)
   for ITER in 1; do
     steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \
-      --num-threads 3 --parallel-opts "-pe smp 4" \
+      --num-threads 3 \
       --nnet $dir/${ITER}.nnet --acwt $acwt \
       $graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${lm_suffix} || exit 1;
     steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \
-      --num-threads 3 --parallel-opts "-pe smp 4" \
+      --num-threads 3 \
       --nnet $dir/${ITER}.nnet --acwt $acwt \
       $graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${lm_suffix} || exit 1;
   done 
@@ -126,11 +126,11 @@ if [ $stage -le 6 ]; then
   # Decode (reuse HCLG graph)
   for ITER in 1 2 3 4; do
     steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \
-      --num-threads 3 --parallel-opts "-pe smp 4" \
+      --num-threads 3 \
       --nnet $dir/${ITER}.nnet --acwt $acwt \
       $graph_dir $data_fmllr/$mic/dev $dir/decode_dev_${lm_suffix}_$ITER || exit 1;
     steps/nnet/decode.sh --nj 6 --cmd "$decode_cmd" --config conf/decode_dnn.conf \
-      --num-threads 3 --parallel-opts "-pe smp 4" \
+      --num-threads 3 \
       --nnet $dir/${ITER}.nnet --acwt $acwt \
       $graph_dir $data_fmllr/$mic/eval $dir/decode_eval_${lm_suffix}_$ITER || exit 1;
   done 
diff --git a/egs/aspire/s5/local/fisher_fix_speakerid.pl b/egs/aspire/s5/local/fisher_fix_speakerid.pl
index 8933055bd..d38abc474 100755
--- a/egs/aspire/s5/local/fisher_fix_speakerid.pl
+++ b/egs/aspire/s5/local/fisher_fix_speakerid.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Author: Peng Qi (pengqi@cs.stanford.edu)
 # This script maps Switchboard speaker IDs to the true physical speakers
diff --git a/egs/aspire/s5/local/multi_condition/corrupt.py b/egs/aspire/s5/local/multi_condition/corrupt.py
index 9b6c07e86..6d98a83a5 100755
--- a/egs/aspire/s5/local/multi_condition/corrupt.py
+++ b/egs/aspire/s5/local/multi_condition/corrupt.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # Copyright 2014  Johns Hopkins University (Authors: Vijayaditya Peddinti).  Apache 2.0.
 
 # corrupts the wave files supplied via input pipe with the specified
diff --git a/egs/aspire/s5/local/multi_condition/create_uniform_segments.py b/egs/aspire/s5/local/multi_condition/create_uniform_segments.py
old mode 100644
new mode 100755
index f50776277..68280500f
--- a/egs/aspire/s5/local/multi_condition/create_uniform_segments.py
+++ b/egs/aspire/s5/local/multi_condition/create_uniform_segments.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # Copyright 2014  Johns Hopkins University (Authors: Daniel Povey, Vijayaditya Peddinti).  Apache 2.0.
 
 # creates a segments file in the provided data directory 
diff --git a/egs/aspire/s5/local/multi_condition/get_air_file_patterns.py b/egs/aspire/s5/local/multi_condition/get_air_file_patterns.py
index a68062482..cc06f5861 100755
--- a/egs/aspire/s5/local/multi_condition/get_air_file_patterns.py
+++ b/egs/aspire/s5/local/multi_condition/get_air_file_patterns.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # Copyright 2014  Johns Hopkins University (Authors: Vijayaditya Peddinti).  Apache 2.0.
 
 # script to generate the file_patterns of the AIR database
diff --git a/egs/aspire/s5/local/multi_condition/get_reverberate_parameter_lists.py b/egs/aspire/s5/local/multi_condition/get_reverberate_parameter_lists.py
index c4b099e64..3d58b8811 100755
--- a/egs/aspire/s5/local/multi_condition/get_reverberate_parameter_lists.py
+++ b/egs/aspire/s5/local/multi_condition/get_reverberate_parameter_lists.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # Copyright 2014  Johns Hopkins University (Authors: Vijayaditya Peddinti).  Apache 2.0.
 # script to generate multicondition training data / dev data / test data
 import argparse, glob, math, os, random, scipy.io.wavfile, sys
diff --git a/egs/aspire/s5/local/multi_condition/normalize_wavs.py b/egs/aspire/s5/local/multi_condition/normalize_wavs.py
index 42317ecd2..362401b97 100755
--- a/egs/aspire/s5/local/multi_condition/normalize_wavs.py
+++ b/egs/aspire/s5/local/multi_condition/normalize_wavs.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # Copyright 2014  Johns Hopkins University (Authors: Vijayaditya Peddinti).  Apache 2.0.
 
 # normalizes the wave files provided in input file list with a common scaling factor
diff --git a/egs/aspire/s5/local/multi_condition/read_rir.py b/egs/aspire/s5/local/multi_condition/read_rir.py
index b05d53420..7fb16b3ff 100755
--- a/egs/aspire/s5/local/multi_condition/read_rir.py
+++ b/egs/aspire/s5/local/multi_condition/read_rir.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # Copyright 2014  Johns Hopkins University (Authors: Vijayaditya Peddinti).  Apache 2.0.
 
 # script to read rir files from rwcp/air/reverb2014 databases
diff --git a/egs/aspire/s5/local/multi_condition/resolve_ctm_overlaps.py b/egs/aspire/s5/local/multi_condition/resolve_ctm_overlaps.py
old mode 100644
new mode 100755
index 2a0871318..06f50c421
--- a/egs/aspire/s5/local/multi_condition/resolve_ctm_overlaps.py
+++ b/egs/aspire/s5/local/multi_condition/resolve_ctm_overlaps.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # Copyright 2014  Johns Hopkins University (Authors: Daniel Povey, Vijayaditya Peddinti).  Apache 2.0.
 
 # Script to combine ctms for uniformly segmented, with overlaps
diff --git a/egs/aurora4/s5/cmd.sh b/egs/aurora4/s5/cmd.sh
index 328b426ca..139b2cd6c 100644
--- a/egs/aurora4/s5/cmd.sh
+++ b/egs/aurora4/s5/cmd.sh
@@ -7,10 +7,10 @@
 
 #a) JHU cluster options
 export train_cmd="queue.pl -l arch=*64"
-export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
-export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
-export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
-export cuda_cmd="queue.pl -l gpu=1"
+export decode_cmd="queue.pl -l arch=*64 --mem 2G"
+export mkgraph_cmd="queue.pl -l arch=*64 --mem 4G"
+export big_memory_cmd="queue.pl -l arch=*64 --mem 8G"
+export cuda_cmd="queue.pl --gpu 1"
 
 
 #b) BUT cluster options
diff --git a/egs/aurora4/s5/local/aurora2flist.pl b/egs/aurora4/s5/local/aurora2flist.pl
index 3bf644bb6..255432bbc 100755
--- a/egs/aurora4/s5/local/aurora2flist.pl
+++ b/egs/aurora4/s5/local/aurora2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2010-2011 Microsoft Corporation
 
diff --git a/egs/aurora4/s5/local/cstr_ndx2flist.pl b/egs/aurora4/s5/local/cstr_ndx2flist.pl
index 101834e86..d19db421a 100755
--- a/egs/aurora4/s5/local/cstr_ndx2flist.pl
+++ b/egs/aurora4/s5/local/cstr_ndx2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2010-2011 Microsoft Corporation
 
diff --git a/egs/aurora4/s5/local/dict/add_counts.pl b/egs/aurora4/s5/local/dict/add_counts.pl
index 409277c72..a2ace7e9a 100755
--- a/egs/aurora4/s5/local/dict/add_counts.pl
+++ b/egs/aurora4/s5/local/dict/add_counts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 
 # Add counts to an oovlist.
diff --git a/egs/aurora4/s5/local/dict/count_rules.pl b/egs/aurora4/s5/local/dict/count_rules.pl
index 2805e98c3..1c6cfc4a5 100755
--- a/egs/aurora4/s5/local/dict/count_rules.pl
+++ b/egs/aurora4/s5/local/dict/count_rules.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program takes the output of score_prons.pl and collates
 # it for each (rule, destress) pair so that we get the
diff --git a/egs/aurora4/s5/local/dict/filter_dict.pl b/egs/aurora4/s5/local/dict/filter_dict.pl
index 1210bb5e6..5e32823ef 100755
--- a/egs/aurora4/s5/local/dict/filter_dict.pl
+++ b/egs/aurora4/s5/local/dict/filter_dict.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 
 # This program reads and writes either a dictionary or just a list
diff --git a/egs/aurora4/s5/local/dict/find_acronyms.pl b/egs/aurora4/s5/local/dict/find_acronyms.pl
index ed4655afa..55e474c40 100755
--- a/egs/aurora4/s5/local/dict/find_acronyms.pl
+++ b/egs/aurora4/s5/local/dict/find_acronyms.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Reads a dictionary, and prints out a list of words that seem to be pronounced
 # as acronyms (not including plurals of acronyms, just acronyms).  Uses
diff --git a/egs/aurora4/s5/local/dict/get_acronym_prons.pl b/egs/aurora4/s5/local/dict/get_acronym_prons.pl
index 3f9936818..6294b7046 100755
--- a/egs/aurora4/s5/local/dict/get_acronym_prons.pl
+++ b/egs/aurora4/s5/local/dict/get_acronym_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Reads a dictionary (for prons of letters), and an OOV list,
 # and puts out candidate pronunciations of words in that list
diff --git a/egs/aurora4/s5/local/dict/get_candidate_prons.pl b/egs/aurora4/s5/local/dict/get_candidate_prons.pl
index b13efd203..b091c6d76 100755
--- a/egs/aurora4/s5/local/dict/get_candidate_prons.pl
+++ b/egs/aurora4/s5/local/dict/get_candidate_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This script takes three command-line arguments (typically files, or "-"):
 # the suffix rules (as output by get_rules.pl), the rule-hierarchy 
diff --git a/egs/aurora4/s5/local/dict/get_rule_hierarchy.pl b/egs/aurora4/s5/local/dict/get_rule_hierarchy.pl
index 35805b46b..d7c13a8df 100755
--- a/egs/aurora4/s5/local/dict/get_rule_hierarchy.pl
+++ b/egs/aurora4/s5/local/dict/get_rule_hierarchy.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 #This reads in rules, of the form put out by get_rules.pl, e.g.:
 # ERT,,ER0 T,
diff --git a/egs/aurora4/s5/local/dict/get_rules.pl b/egs/aurora4/s5/local/dict/get_rules.pl
index a5b57b088..b10eccc91 100755
--- a/egs/aurora4/s5/local/dict/get_rules.pl
+++ b/egs/aurora4/s5/local/dict/get_rules.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program creates suggested suffix rules from a dictionary.
 # It outputs quadruples of the form:
diff --git a/egs/aurora4/s5/local/dict/limit_candidate_prons.pl b/egs/aurora4/s5/local/dict/limit_candidate_prons.pl
index ceff9fbad..b01218f6e 100755
--- a/egs/aurora4/s5/local/dict/limit_candidate_prons.pl
+++ b/egs/aurora4/s5/local/dict/limit_candidate_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program enforces the rule that
 # if a "more specific" rule applies, we cannot use the more general rule.
diff --git a/egs/aurora4/s5/local/dict/reverse_candidates.pl b/egs/aurora4/s5/local/dict/reverse_candidates.pl
index d5c5effc2..5b7aabd8a 100755
--- a/egs/aurora4/s5/local/dict/reverse_candidates.pl
+++ b/egs/aurora4/s5/local/dict/reverse_candidates.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This takes the output of e.g. get_candidate_prons.pl or limit_candidate_prons.pl,
 # which is 7-tuples, one per line, of the form:
diff --git a/egs/aurora4/s5/local/dict/reverse_dict.pl b/egs/aurora4/s5/local/dict/reverse_dict.pl
index 75681711b..2cd38c54b 100755
--- a/egs/aurora4/s5/local/dict/reverse_dict.pl
+++ b/egs/aurora4/s5/local/dict/reverse_dict.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Used in conjunction with get_rules.pl
 # example input line: XANTHE  Z AE1 N DH
diff --git a/egs/aurora4/s5/local/dict/score_prons.pl b/egs/aurora4/s5/local/dict/score_prons.pl
index fd5a004d8..6aa72e421 100755
--- a/egs/aurora4/s5/local/dict/score_prons.pl
+++ b/egs/aurora4/s5/local/dict/score_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program takes candidate prons from "get_candidate_prons.pl" or
 # "limit_candidate_prons.pl", and a reference dictionary covering those words,
diff --git a/egs/aurora4/s5/local/dict/score_rules.pl b/egs/aurora4/s5/local/dict/score_rules.pl
index 8d165f7f1..252d94677 100755
--- a/egs/aurora4/s5/local/dict/score_rules.pl
+++ b/egs/aurora4/s5/local/dict/score_rules.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program takes the output of count_rules.pl, which is tuples
 # of the form
diff --git a/egs/aurora4/s5/local/dict/select_candidate_prons.pl b/egs/aurora4/s5/local/dict/select_candidate_prons.pl
index d0018c98a..a24ccdd4d 100755
--- a/egs/aurora4/s5/local/dict/select_candidate_prons.pl
+++ b/egs/aurora4/s5/local/dict/select_candidate_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This takes the output of e.g. get_candidate_prons.pl or limit_candidate_prons.pl
 # or reverse_candidates.pl, which is 7-tuples, one per line, of the form:
diff --git a/egs/aurora4/s5/local/find_transcripts.pl b/egs/aurora4/s5/local/find_transcripts.pl
index 0e5d71f79..6429411b8 100755
--- a/egs/aurora4/s5/local/find_transcripts.pl
+++ b/egs/aurora4/s5/local/find_transcripts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/aurora4/s5/local/flist2scp.pl b/egs/aurora4/s5/local/flist2scp.pl
index 6831d2d7b..234e4add1 100755
--- a/egs/aurora4/s5/local/flist2scp.pl
+++ b/egs/aurora4/s5/local/flist2scp.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/aurora4/s5/local/flist2scp_12.pl b/egs/aurora4/s5/local/flist2scp_12.pl
index 11f33a189..0c5fe4cc9 100755
--- a/egs/aurora4/s5/local/flist2scp_12.pl
+++ b/egs/aurora4/s5/local/flist2scp_12.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/aurora4/s5/local/ndx2flist.pl b/egs/aurora4/s5/local/ndx2flist.pl
index b05704293..48fc3dec1 100755
--- a/egs/aurora4/s5/local/ndx2flist.pl
+++ b/egs/aurora4/s5/local/ndx2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/aurora4/s5/local/normalize_transcript.pl b/egs/aurora4/s5/local/normalize_transcript.pl
index 9dd67af3d..09cee0617 100755
--- a/egs/aurora4/s5/local/normalize_transcript.pl
+++ b/egs/aurora4/s5/local/normalize_transcript.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5/conf/bnf/config_full.py b/egs/babel/s5/conf/bnf/config_full.py
old mode 100644
new mode 100755
index 569a741b7..5ea3ddbb1
--- a/egs/babel/s5/conf/bnf/config_full.py
+++ b/egs/babel/s5/conf/bnf/config_full.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 #################################################
 ## PTDNN - Python Toolkit for Deep Neural Network
 ## Author: Yajie Miao
diff --git a/egs/babel/s5/conf/bnf/config_limited.py b/egs/babel/s5/conf/bnf/config_limited.py
old mode 100644
new mode 100755
index 21c62ea4b..f63c3640d
--- a/egs/babel/s5/conf/bnf/config_limited.py
+++ b/egs/babel/s5/conf/bnf/config_limited.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 #################################################
 ## PTDNN - Python Toolkit for Deep Neural Network
 ## Author: Yajie Miao
diff --git a/egs/babel/s5/local/annotated_kwlist_to_KWs.pl b/egs/babel/s5/local/annotated_kwlist_to_KWs.pl
index 58f31c985..198da36da 100755
--- a/egs/babel/s5/local/annotated_kwlist_to_KWs.pl
+++ b/egs/babel/s5/local/annotated_kwlist_to_KWs.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/augment_original_stm.pl b/egs/babel/s5/local/augment_original_stm.pl
index 9058ae314..70b9ef625 100755
--- a/egs/babel/s5/local/augment_original_stm.pl
+++ b/egs/babel/s5/local/augment_original_stm.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2012  Johns Hopkins University (Author: Jan Trmal)
 # Apache 2.0.
 
diff --git a/egs/babel/s5/local/build_edit_distance_fst.pl b/egs/babel/s5/local/build_edit_distance_fst.pl
index aa6a6317c..51c466677 100755
--- a/egs/babel/s5/local/build_edit_distance_fst.pl
+++ b/egs/babel/s5/local/build_edit_distance_fst.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/count_to_logprob.pl b/egs/babel/s5/local/count_to_logprob.pl
index a3eba3c00..7d7793218 100755
--- a/egs/babel/s5/local/count_to_logprob.pl
+++ b/egs/babel/s5/local/count_to_logprob.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/cstr_ndx2flist.pl b/egs/babel/s5/local/cstr_ndx2flist.pl
index 101834e86..d19db421a 100755
--- a/egs/babel/s5/local/cstr_ndx2flist.pl
+++ b/egs/babel/s5/local/cstr_ndx2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2010-2011 Microsoft Corporation
 
diff --git a/egs/babel/s5/local/dict/add_counts.pl b/egs/babel/s5/local/dict/add_counts.pl
index 409277c72..a2ace7e9a 100755
--- a/egs/babel/s5/local/dict/add_counts.pl
+++ b/egs/babel/s5/local/dict/add_counts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 
 # Add counts to an oovlist.
diff --git a/egs/babel/s5/local/dict/count_rules.pl b/egs/babel/s5/local/dict/count_rules.pl
index 2805e98c3..1c6cfc4a5 100755
--- a/egs/babel/s5/local/dict/count_rules.pl
+++ b/egs/babel/s5/local/dict/count_rules.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program takes the output of score_prons.pl and collates
 # it for each (rule, destress) pair so that we get the
diff --git a/egs/babel/s5/local/dict/filter_dict.pl b/egs/babel/s5/local/dict/filter_dict.pl
index 1210bb5e6..5e32823ef 100755
--- a/egs/babel/s5/local/dict/filter_dict.pl
+++ b/egs/babel/s5/local/dict/filter_dict.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 
 # This program reads and writes either a dictionary or just a list
diff --git a/egs/babel/s5/local/dict/find_acronyms.pl b/egs/babel/s5/local/dict/find_acronyms.pl
index ed4655afa..55e474c40 100755
--- a/egs/babel/s5/local/dict/find_acronyms.pl
+++ b/egs/babel/s5/local/dict/find_acronyms.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Reads a dictionary, and prints out a list of words that seem to be pronounced
 # as acronyms (not including plurals of acronyms, just acronyms).  Uses
diff --git a/egs/babel/s5/local/dict/get_acronym_prons.pl b/egs/babel/s5/local/dict/get_acronym_prons.pl
index 3f9936818..6294b7046 100755
--- a/egs/babel/s5/local/dict/get_acronym_prons.pl
+++ b/egs/babel/s5/local/dict/get_acronym_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Reads a dictionary (for prons of letters), and an OOV list,
 # and puts out candidate pronunciations of words in that list
diff --git a/egs/babel/s5/local/dict/get_candidate_prons.pl b/egs/babel/s5/local/dict/get_candidate_prons.pl
index b13efd203..b091c6d76 100755
--- a/egs/babel/s5/local/dict/get_candidate_prons.pl
+++ b/egs/babel/s5/local/dict/get_candidate_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This script takes three command-line arguments (typically files, or "-"):
 # the suffix rules (as output by get_rules.pl), the rule-hierarchy 
diff --git a/egs/babel/s5/local/dict/get_rule_hierarchy.pl b/egs/babel/s5/local/dict/get_rule_hierarchy.pl
index 35805b46b..d7c13a8df 100755
--- a/egs/babel/s5/local/dict/get_rule_hierarchy.pl
+++ b/egs/babel/s5/local/dict/get_rule_hierarchy.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 #This reads in rules, of the form put out by get_rules.pl, e.g.:
 # ERT,,ER0 T,
diff --git a/egs/babel/s5/local/dict/get_rules.pl b/egs/babel/s5/local/dict/get_rules.pl
index a5b57b088..b10eccc91 100755
--- a/egs/babel/s5/local/dict/get_rules.pl
+++ b/egs/babel/s5/local/dict/get_rules.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program creates suggested suffix rules from a dictionary.
 # It outputs quadruples of the form:
diff --git a/egs/babel/s5/local/dict/limit_candidate_prons.pl b/egs/babel/s5/local/dict/limit_candidate_prons.pl
index ceff9fbad..b01218f6e 100755
--- a/egs/babel/s5/local/dict/limit_candidate_prons.pl
+++ b/egs/babel/s5/local/dict/limit_candidate_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program enforces the rule that
 # if a "more specific" rule applies, we cannot use the more general rule.
diff --git a/egs/babel/s5/local/dict/reverse_candidates.pl b/egs/babel/s5/local/dict/reverse_candidates.pl
index d5c5effc2..5b7aabd8a 100755
--- a/egs/babel/s5/local/dict/reverse_candidates.pl
+++ b/egs/babel/s5/local/dict/reverse_candidates.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This takes the output of e.g. get_candidate_prons.pl or limit_candidate_prons.pl,
 # which is 7-tuples, one per line, of the form:
diff --git a/egs/babel/s5/local/dict/reverse_dict.pl b/egs/babel/s5/local/dict/reverse_dict.pl
index 75681711b..2cd38c54b 100755
--- a/egs/babel/s5/local/dict/reverse_dict.pl
+++ b/egs/babel/s5/local/dict/reverse_dict.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Used in conjunction with get_rules.pl
 # example input line: XANTHE  Z AE1 N DH
diff --git a/egs/babel/s5/local/dict/score_prons.pl b/egs/babel/s5/local/dict/score_prons.pl
index fd5a004d8..6aa72e421 100755
--- a/egs/babel/s5/local/dict/score_prons.pl
+++ b/egs/babel/s5/local/dict/score_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program takes candidate prons from "get_candidate_prons.pl" or
 # "limit_candidate_prons.pl", and a reference dictionary covering those words,
diff --git a/egs/babel/s5/local/dict/score_rules.pl b/egs/babel/s5/local/dict/score_rules.pl
index 8d165f7f1..252d94677 100755
--- a/egs/babel/s5/local/dict/score_rules.pl
+++ b/egs/babel/s5/local/dict/score_rules.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program takes the output of count_rules.pl, which is tuples
 # of the form
diff --git a/egs/babel/s5/local/dict/select_candidate_prons.pl b/egs/babel/s5/local/dict/select_candidate_prons.pl
index d0018c98a..a24ccdd4d 100755
--- a/egs/babel/s5/local/dict/select_candidate_prons.pl
+++ b/egs/babel/s5/local/dict/select_candidate_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This takes the output of e.g. get_candidate_prons.pl or limit_candidate_prons.pl
 # or reverse_candidates.pl, which is 7-tuples, one per line, of the form:
diff --git a/egs/babel/s5/local/extract_oov_words.pl b/egs/babel/s5/local/extract_oov_words.pl
index 567392caa..fbb6e9528 100755
--- a/egs/babel/s5/local/extract_oov_words.pl
+++ b/egs/babel/s5/local/extract_oov_words.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
 
diff --git a/egs/babel/s5/local/filter_keywords.pl b/egs/babel/s5/local/filter_keywords.pl
index a20be87d4..a724ad77f 100755
--- a/egs/babel/s5/local/filter_keywords.pl
+++ b/egs/babel/s5/local/filter_keywords.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 use strict;
 use warnings;
diff --git a/egs/babel/s5/local/find_transcripts.pl b/egs/babel/s5/local/find_transcripts.pl
index 0e5d71f79..6429411b8 100755
--- a/egs/babel/s5/local/find_transcripts.pl
+++ b/egs/babel/s5/local/find_transcripts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5/local/fix_kwslist.pl b/egs/babel/s5/local/fix_kwslist.pl
index 682c4e86d..29afc73e4 100755
--- a/egs/babel/s5/local/fix_kwslist.pl
+++ b/egs/babel/s5/local/fix_kwslist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen, Jan Trmal)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/flist2scp.pl b/egs/babel/s5/local/flist2scp.pl
index 6831d2d7b..234e4add1 100755
--- a/egs/babel/s5/local/flist2scp.pl
+++ b/egs/babel/s5/local/flist2scp.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5/local/gridsearch.pl b/egs/babel/s5/local/gridsearch.pl
index a44d0197c..7b2ad530f 100755
--- a/egs/babel/s5/local/gridsearch.pl
+++ b/egs/babel/s5/local/gridsearch.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 
 use warnings;
 use strict;
diff --git a/egs/babel/s5/local/gridsearch2.pl b/egs/babel/s5/local/gridsearch2.pl
old mode 100644
new mode 100755
index 882cf816a..6645743c1
--- a/egs/babel/s5/local/gridsearch2.pl
+++ b/egs/babel/s5/local/gridsearch2.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 
 use warnings;
 use strict;
diff --git a/egs/babel/s5/local/kwords2indices.pl b/egs/babel/s5/local/kwords2indices.pl
index 176fc1354..47cc3dc27 100755
--- a/egs/babel/s5/local/kwords2indices.pl
+++ b/egs/babel/s5/local/kwords2indices.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
 
diff --git a/egs/babel/s5/local/kws_oracle_threshold.pl b/egs/babel/s5/local/kws_oracle_threshold.pl
index 2d2f22ea4..e8ec21994 100755
--- a/egs/babel/s5/local/kws_oracle_threshold.pl
+++ b/egs/babel/s5/local/kws_oracle_threshold.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/make_lexicon_fst_special.pl b/egs/babel/s5/local/make_lexicon_fst_special.pl
index 9e13a910c..976c28c02 100755
--- a/egs/babel/s5/local/make_lexicon_fst_special.pl
+++ b/egs/babel/s5/local/make_lexicon_fst_special.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 
 # makes lexicon FST -- special version only for use in keyword search
diff --git a/egs/babel/s5/local/naive_comb.pl b/egs/babel/s5/local/naive_comb.pl
index a8a9f17ba..ef43f27ef 100755
--- a/egs/babel/s5/local/naive_comb.pl
+++ b/egs/babel/s5/local/naive_comb.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/ndx2flist.pl b/egs/babel/s5/local/ndx2flist.pl
index b05704293..48fc3dec1 100755
--- a/egs/babel/s5/local/ndx2flist.pl
+++ b/egs/babel/s5/local/ndx2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5/local/normalize_transcript.pl b/egs/babel/s5/local/normalize_transcript.pl
index 9dd67af3d..09cee0617 100755
--- a/egs/babel/s5/local/normalize_transcript.pl
+++ b/egs/babel/s5/local/normalize_transcript.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5/local/prepare_acoustic_training_data.pl b/egs/babel/s5/local/prepare_acoustic_training_data.pl
index 2129522de..6cf875d3a 100755
--- a/egs/babel/s5/local/prepare_acoustic_training_data.pl
+++ b/egs/babel/s5/local/prepare_acoustic_training_data.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use Getopt::Long;
 
 ########################################################################
diff --git a/egs/babel/s5/local/prepare_lexicon.pl b/egs/babel/s5/local/prepare_lexicon.pl
index 11e19b6cb..721322f68 100755
--- a/egs/babel/s5/local/prepare_lexicon.pl
+++ b/egs/babel/s5/local/prepare_lexicon.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use Getopt::Long;
 use Data::Dumper;
 
diff --git a/egs/babel/s5/local/prepare_stm.pl b/egs/babel/s5/local/prepare_stm.pl
index 06fb379b2..31407ae01 100755
--- a/egs/babel/s5/local/prepare_stm.pl
+++ b/egs/babel/s5/local/prepare_stm.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use Getopt::Long;
 use Encode;
 
diff --git a/egs/babel/s5/local/score_combine.sh b/egs/babel/s5/local/score_combine.sh
index f292c62ab..42d9cfc09 100755
--- a/egs/babel/s5/local/score_combine.sh
+++ b/egs/babel/s5/local/score_combine.sh
@@ -33,7 +33,7 @@ lat_weights=
 word_ins_penalty=0.0
 min_lmwt=7
 max_lmwt=17
-parallel_opts="-pe smp 3"
+parallel_opts="--num-threads 3"
 skip_scoring=false
 ctm_name=
 #end configuration section.
@@ -49,7 +49,7 @@ Options:
   --cmd (run.pl|queue.pl...)      # specify how to run the sub-processes.
   --stage (0|1|2)                 # (createCTM | filterCTM | runSclite).
   --parallel-opts <string>        # extra options to command for combination stage,
-                                  # default '-pe smp 3'
+                                  # default '--num-threads 3'
   --cer (0|1)                     # compute CER in addition to WER
 ";
 
diff --git a/egs/babel/s5/local/subset_atwv.pl b/egs/babel/s5/local/subset_atwv.pl
index e303bf5eb..910703db9 100755
--- a/egs/babel/s5/local/subset_atwv.pl
+++ b/egs/babel/s5/local/subset_atwv.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5/local/uem_ctm2segments.pl b/egs/babel/s5/local/uem_ctm2segments.pl
index 6474d53a3..ab560639c 100755
--- a/egs/babel/s5/local/uem_ctm2segments.pl
+++ b/egs/babel/s5/local/uem_ctm2segments.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use Getopt::Long;
 
 ################################################################################
diff --git a/egs/babel/s5/run-6-combine.sh b/egs/babel/s5/run-6-combine.sh
index 07c53bb18..92d749ca4 100755
--- a/egs/babel/s5/run-6-combine.sh
+++ b/egs/babel/s5/run-6-combine.sh
@@ -13,7 +13,7 @@ set -u
 
 if [ ! -f exp/combine_2/decode_dev2h/.done ]; then
   for iter in 1 2 3 4; do
-    local/score_combine.sh --cmd "queue.pl -l mem_free=2.0G,ram_free=2.0G" \
+    local/score_combine.sh --cmd "queue.pl --mem 2G" \
       data/dev2h data/lang exp/tri6_nnet/decode_dev2h exp/sgmm5_mmi_b0.1/decode_dev2h_fmllr_it$iter exp/combine_2/decode_dev2h_it$iter
     touch exp/combine_2/decode_dev2h/.done 
   done
@@ -25,7 +25,7 @@ if [ ! -f exp/combine_3/decode_dev2h/.done ]; then
     if [ ! -f exp_BNF/sgmm7_mmi_b0.1/decode_dev2h_fmllr_it$iter/.done ]; then
       echo "BNF decode in exp_BNF/sgmm7_mmi_b0.1/decode_dev2h_fmllr_it$iter is not done, skipping this step."
     fi
-    local/score_combine.sh --cmd "queue.pl -l mem_free=2.0G,ram_free=2.0G" \
+    local/score_combine.sh --cmd "queue.pl --mem 2G" \
       data/dev2h data/lang exp_BNF/sgmm7_mmi_b0.1/decode_dev2h_fmllr_it$iter:10 \
       exp/sgmm5_mmi_b0.1/decode_dev2h_fmllr_it$iter exp/tri5_nnet/decode_dev2h exp/combine_3/decode_dev2h_it$iter
     touch exp_BNF/sgmm7_mmi_b0.1/decode_dev2h_fmllr_it$iter/.done
diff --git a/egs/babel/s5/steps_BNF/pfile_burst.pl b/egs/babel/s5/steps_BNF/pfile_burst.pl
index e6b4b235c..ecbb43d85 100755
--- a/egs/babel/s5/steps_BNF/pfile_burst.pl
+++ b/egs/babel/s5/steps_BNF/pfile_burst.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2013  Karlsruhe Institute of Technology (Author: Jonas Gehring)
 # Apache 2.0.
diff --git a/egs/babel/s5/steps_BNF/pfile_rconcat.pl b/egs/babel/s5/steps_BNF/pfile_rconcat.pl
index 2b2666672..b5a1fec04 100755
--- a/egs/babel/s5/steps_BNF/pfile_rconcat.pl
+++ b/egs/babel/s5/steps_BNF/pfile_rconcat.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2013  Karlsruhe Institute of Technology (Author: Jonas Gehring)
 # Apache 2.0.
diff --git a/egs/babel/s5b/conf/bnf/config_full.py b/egs/babel/s5b/conf/bnf/config_full.py
old mode 100644
new mode 100755
index 569a741b7..5ea3ddbb1
--- a/egs/babel/s5b/conf/bnf/config_full.py
+++ b/egs/babel/s5b/conf/bnf/config_full.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 #################################################
 ## PTDNN - Python Toolkit for Deep Neural Network
 ## Author: Yajie Miao
diff --git a/egs/babel/s5b/conf/bnf/config_limited.py b/egs/babel/s5b/conf/bnf/config_limited.py
old mode 100644
new mode 100755
index 21c62ea4b..f63c3640d
--- a/egs/babel/s5b/conf/bnf/config_limited.py
+++ b/egs/babel/s5b/conf/bnf/config_limited.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 #################################################
 ## PTDNN - Python Toolkit for Deep Neural Network
 ## Author: Yajie Miao
diff --git a/egs/babel/s5b/local/annotated_kwlist_to_KWs.pl b/egs/babel/s5b/local/annotated_kwlist_to_KWs.pl
index 58f31c985..198da36da 100755
--- a/egs/babel/s5b/local/annotated_kwlist_to_KWs.pl
+++ b/egs/babel/s5b/local/annotated_kwlist_to_KWs.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/apply_map_tab_preserving.pl b/egs/babel/s5b/local/apply_map_tab_preserving.pl
index 0c3e09a35..2a3238c04 100755
--- a/egs/babel/s5b/local/apply_map_tab_preserving.pl
+++ b/egs/babel/s5b/local/apply_map_tab_preserving.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/augment_original_stm.pl b/egs/babel/s5b/local/augment_original_stm.pl
index 55cd23bc2..4c58ccc62 100755
--- a/egs/babel/s5b/local/augment_original_stm.pl
+++ b/egs/babel/s5b/local/augment_original_stm.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2012  Johns Hopkins University (Author: Jan Trmal)
 # Apache 2.0.
 
diff --git a/egs/babel/s5b/local/build_edit_distance_fst.pl b/egs/babel/s5b/local/build_edit_distance_fst.pl
index aa6a6317c..51c466677 100755
--- a/egs/babel/s5b/local/build_edit_distance_fst.pl
+++ b/egs/babel/s5b/local/build_edit_distance_fst.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/count_to_logprob.pl b/egs/babel/s5b/local/count_to_logprob.pl
index a3eba3c00..7d7793218 100755
--- a/egs/babel/s5b/local/count_to_logprob.pl
+++ b/egs/babel/s5b/local/count_to_logprob.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/cstr_ndx2flist.pl b/egs/babel/s5b/local/cstr_ndx2flist.pl
index 101834e86..d19db421a 100755
--- a/egs/babel/s5b/local/cstr_ndx2flist.pl
+++ b/egs/babel/s5b/local/cstr_ndx2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2010-2011 Microsoft Corporation
 
diff --git a/egs/babel/s5b/local/extract_oov_words.pl b/egs/babel/s5b/local/extract_oov_words.pl
index 567392caa..fbb6e9528 100755
--- a/egs/babel/s5b/local/extract_oov_words.pl
+++ b/egs/babel/s5b/local/extract_oov_words.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
 
diff --git a/egs/babel/s5b/local/filter_keywords.pl b/egs/babel/s5b/local/filter_keywords.pl
index a20be87d4..a724ad77f 100755
--- a/egs/babel/s5b/local/filter_keywords.pl
+++ b/egs/babel/s5b/local/filter_keywords.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 use strict;
 use warnings;
diff --git a/egs/babel/s5b/local/find_transcripts.pl b/egs/babel/s5b/local/find_transcripts.pl
index 0e5d71f79..6429411b8 100755
--- a/egs/babel/s5b/local/find_transcripts.pl
+++ b/egs/babel/s5b/local/find_transcripts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5b/local/flist2scp.pl b/egs/babel/s5b/local/flist2scp.pl
index 6831d2d7b..234e4add1 100755
--- a/egs/babel/s5b/local/flist2scp.pl
+++ b/egs/babel/s5b/local/flist2scp.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5b/local/gridsearch.pl b/egs/babel/s5b/local/gridsearch.pl
index a44d0197c..7b2ad530f 100755
--- a/egs/babel/s5b/local/gridsearch.pl
+++ b/egs/babel/s5b/local/gridsearch.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 
 use warnings;
 use strict;
diff --git a/egs/babel/s5b/local/gridsearch2.pl b/egs/babel/s5b/local/gridsearch2.pl
old mode 100644
new mode 100755
index 882cf816a..6645743c1
--- a/egs/babel/s5b/local/gridsearch2.pl
+++ b/egs/babel/s5b/local/gridsearch2.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 
 use warnings;
 use strict;
diff --git a/egs/babel/s5b/local/kwords2indices.pl b/egs/babel/s5b/local/kwords2indices.pl
index 176fc1354..47cc3dc27 100755
--- a/egs/babel/s5b/local/kwords2indices.pl
+++ b/egs/babel/s5b/local/kwords2indices.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
 
diff --git a/egs/babel/s5b/local/kws_oracle_threshold.pl b/egs/babel/s5b/local/kws_oracle_threshold.pl
index 2d2f22ea4..e8ec21994 100755
--- a/egs/babel/s5b/local/kws_oracle_threshold.pl
+++ b/egs/babel/s5b/local/kws_oracle_threshold.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/make_lexicon_fst_special.pl b/egs/babel/s5b/local/make_lexicon_fst_special.pl
index 9e13a910c..976c28c02 100755
--- a/egs/babel/s5b/local/make_lexicon_fst_special.pl
+++ b/egs/babel/s5b/local/make_lexicon_fst_special.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 
 # makes lexicon FST -- special version only for use in keyword search
diff --git a/egs/babel/s5b/local/naive_comb.pl b/egs/babel/s5b/local/naive_comb.pl
index a1fb6076e..e49ac9721 100755
--- a/egs/babel/s5b/local/naive_comb.pl
+++ b/egs/babel/s5b/local/naive_comb.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/ndx2flist.pl b/egs/babel/s5b/local/ndx2flist.pl
index b05704293..48fc3dec1 100755
--- a/egs/babel/s5b/local/ndx2flist.pl
+++ b/egs/babel/s5b/local/ndx2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5b/local/normalize_transcript.pl b/egs/babel/s5b/local/normalize_transcript.pl
index 9dd67af3d..09cee0617 100755
--- a/egs/babel/s5b/local/normalize_transcript.pl
+++ b/egs/babel/s5b/local/normalize_transcript.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5b/local/prepare_acoustic_training_data.pl b/egs/babel/s5b/local/prepare_acoustic_training_data.pl
index 05f7e85d0..4234d570d 100755
--- a/egs/babel/s5b/local/prepare_acoustic_training_data.pl
+++ b/egs/babel/s5b/local/prepare_acoustic_training_data.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use Getopt::Long;
 
 ########################################################################
diff --git a/egs/babel/s5b/local/prepare_lexicon.pl b/egs/babel/s5b/local/prepare_lexicon.pl
index e0f6ef3e0..721e56a0d 100755
--- a/egs/babel/s5b/local/prepare_lexicon.pl
+++ b/egs/babel/s5b/local/prepare_lexicon.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use Getopt::Long;
 use Data::Dumper;
 
diff --git a/egs/babel/s5b/local/prepare_stm.pl b/egs/babel/s5b/local/prepare_stm.pl
index ff65132df..edf1b4367 100755
--- a/egs/babel/s5b/local/prepare_stm.pl
+++ b/egs/babel/s5b/local/prepare_stm.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use Getopt::Long;
 use Encode;
 
diff --git a/egs/babel/s5b/local/resegment/evaluate_segmentation.pl b/egs/babel/s5b/local/resegment/evaluate_segmentation.pl
index 9c0dcaae6..06a762d77 100755
--- a/egs/babel/s5b/local/resegment/evaluate_segmentation.pl
+++ b/egs/babel/s5b/local/resegment/evaluate_segmentation.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2014  Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar 
 # Apache 2.0
diff --git a/egs/babel/s5b/local/subset_atwv.pl b/egs/babel/s5b/local/subset_atwv.pl
index e303bf5eb..910703db9 100755
--- a/egs/babel/s5b/local/subset_atwv.pl
+++ b/egs/babel/s5b/local/subset_atwv.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5b/local/summarize_logs.pl b/egs/babel/s5b/local/summarize_logs.pl
index 57efe1bc1..4f7fc058f 100755
--- a/egs/babel/s5b/local/summarize_logs.pl
+++ b/egs/babel/s5b/local/summarize_logs.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/babel/s5b/local/uem_ctm2segments.pl b/egs/babel/s5b/local/uem_ctm2segments.pl
index 6474d53a3..ab560639c 100755
--- a/egs/babel/s5b/local/uem_ctm2segments.pl
+++ b/egs/babel/s5b/local/uem_ctm2segments.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use Getopt::Long;
 
 ################################################################################
diff --git a/egs/babel/s5c/conf/bnf/config_full.py b/egs/babel/s5c/conf/bnf/config_full.py
old mode 100644
new mode 100755
index 569a741b7..5ea3ddbb1
--- a/egs/babel/s5c/conf/bnf/config_full.py
+++ b/egs/babel/s5c/conf/bnf/config_full.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 #################################################
 ## PTDNN - Python Toolkit for Deep Neural Network
 ## Author: Yajie Miao
diff --git a/egs/babel/s5c/conf/bnf/config_limited.py b/egs/babel/s5c/conf/bnf/config_limited.py
old mode 100644
new mode 100755
index 21c62ea4b..f63c3640d
--- a/egs/babel/s5c/conf/bnf/config_limited.py
+++ b/egs/babel/s5c/conf/bnf/config_limited.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 #################################################
 ## PTDNN - Python Toolkit for Deep Neural Network
 ## Author: Yajie Miao
diff --git a/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl b/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl
index 58f31c985..198da36da 100755
--- a/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl
+++ b/egs/babel/s5c/local/annotated_kwlist_to_KWs.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/apply_map_tab_preserving.pl b/egs/babel/s5c/local/apply_map_tab_preserving.pl
index 0c3e09a35..2a3238c04 100755
--- a/egs/babel/s5c/local/apply_map_tab_preserving.pl
+++ b/egs/babel/s5c/local/apply_map_tab_preserving.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/augment_original_stm.pl b/egs/babel/s5c/local/augment_original_stm.pl
index 55cd23bc2..4c58ccc62 100755
--- a/egs/babel/s5c/local/augment_original_stm.pl
+++ b/egs/babel/s5c/local/augment_original_stm.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2012  Johns Hopkins University (Author: Jan Trmal)
 # Apache 2.0.
 
diff --git a/egs/babel/s5c/local/build_edit_distance_fst.pl b/egs/babel/s5c/local/build_edit_distance_fst.pl
index aa6a6317c..51c466677 100755
--- a/egs/babel/s5c/local/build_edit_distance_fst.pl
+++ b/egs/babel/s5c/local/build_edit_distance_fst.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/count_to_logprob.pl b/egs/babel/s5c/local/count_to_logprob.pl
index a3eba3c00..7d7793218 100755
--- a/egs/babel/s5c/local/count_to_logprob.pl
+++ b/egs/babel/s5c/local/count_to_logprob.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/cstr_ndx2flist.pl b/egs/babel/s5c/local/cstr_ndx2flist.pl
index 101834e86..d19db421a 100755
--- a/egs/babel/s5c/local/cstr_ndx2flist.pl
+++ b/egs/babel/s5c/local/cstr_ndx2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2010-2011 Microsoft Corporation
 
diff --git a/egs/babel/s5c/local/extract_oov_words.pl b/egs/babel/s5c/local/extract_oov_words.pl
index 567392caa..fbb6e9528 100755
--- a/egs/babel/s5c/local/extract_oov_words.pl
+++ b/egs/babel/s5c/local/extract_oov_words.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
 
diff --git a/egs/babel/s5c/local/filter_keywords.pl b/egs/babel/s5c/local/filter_keywords.pl
index a20be87d4..a724ad77f 100755
--- a/egs/babel/s5c/local/filter_keywords.pl
+++ b/egs/babel/s5c/local/filter_keywords.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 use strict;
 use warnings;
diff --git a/egs/babel/s5c/local/find_transcripts.pl b/egs/babel/s5c/local/find_transcripts.pl
index 0e5d71f79..6429411b8 100755
--- a/egs/babel/s5c/local/find_transcripts.pl
+++ b/egs/babel/s5c/local/find_transcripts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/flist2scp.pl b/egs/babel/s5c/local/flist2scp.pl
index 6831d2d7b..234e4add1 100755
--- a/egs/babel/s5c/local/flist2scp.pl
+++ b/egs/babel/s5c/local/flist2scp.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/gridsearch.pl b/egs/babel/s5c/local/gridsearch.pl
index a44d0197c..7b2ad530f 100755
--- a/egs/babel/s5c/local/gridsearch.pl
+++ b/egs/babel/s5c/local/gridsearch.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 
 use warnings;
 use strict;
diff --git a/egs/babel/s5c/local/gridsearch2.pl b/egs/babel/s5c/local/gridsearch2.pl
old mode 100644
new mode 100755
index 882cf816a..6645743c1
--- a/egs/babel/s5c/local/gridsearch2.pl
+++ b/egs/babel/s5c/local/gridsearch2.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 
 use warnings;
 use strict;
diff --git a/egs/babel/s5c/local/kwords2indices.pl b/egs/babel/s5c/local/kwords2indices.pl
index 176fc1354..47cc3dc27 100755
--- a/egs/babel/s5c/local/kwords2indices.pl
+++ b/egs/babel/s5c/local/kwords2indices.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2012  Johns Hopkins University (Author: Yenda Trmal)
 # Apache 2.0.
 
diff --git a/egs/babel/s5c/local/kws_oracle_threshold.pl b/egs/babel/s5c/local/kws_oracle_threshold.pl
index 2d2f22ea4..e8ec21994 100755
--- a/egs/babel/s5c/local/kws_oracle_threshold.pl
+++ b/egs/babel/s5c/local/kws_oracle_threshold.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/make_lexicon_fst_special.pl b/egs/babel/s5c/local/make_lexicon_fst_special.pl
index 9e13a910c..976c28c02 100755
--- a/egs/babel/s5c/local/make_lexicon_fst_special.pl
+++ b/egs/babel/s5c/local/make_lexicon_fst_special.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 
 # makes lexicon FST -- special version only for use in keyword search
diff --git a/egs/babel/s5c/local/naive_comb.pl b/egs/babel/s5c/local/naive_comb.pl
index a1fb6076e..e49ac9721 100755
--- a/egs/babel/s5c/local/naive_comb.pl
+++ b/egs/babel/s5c/local/naive_comb.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/ndx2flist.pl b/egs/babel/s5c/local/ndx2flist.pl
index b05704293..48fc3dec1 100755
--- a/egs/babel/s5c/local/ndx2flist.pl
+++ b/egs/babel/s5c/local/ndx2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/normalize_transcript.pl b/egs/babel/s5c/local/normalize_transcript.pl
index 9dd67af3d..09cee0617 100755
--- a/egs/babel/s5c/local/normalize_transcript.pl
+++ b/egs/babel/s5c/local/normalize_transcript.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/babel/s5c/local/prepare_acoustic_training_data.pl b/egs/babel/s5c/local/prepare_acoustic_training_data.pl
index 05f7e85d0..4234d570d 100755
--- a/egs/babel/s5c/local/prepare_acoustic_training_data.pl
+++ b/egs/babel/s5c/local/prepare_acoustic_training_data.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use Getopt::Long;
 
 ########################################################################
diff --git a/egs/babel/s5c/local/prepare_lexicon.pl b/egs/babel/s5c/local/prepare_lexicon.pl
index e0f6ef3e0..721e56a0d 100755
--- a/egs/babel/s5c/local/prepare_lexicon.pl
+++ b/egs/babel/s5c/local/prepare_lexicon.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use Getopt::Long;
 use Data::Dumper;
 
diff --git a/egs/babel/s5c/local/prepare_stm.pl b/egs/babel/s5c/local/prepare_stm.pl
index ff65132df..edf1b4367 100755
--- a/egs/babel/s5c/local/prepare_stm.pl
+++ b/egs/babel/s5c/local/prepare_stm.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use Getopt::Long;
 use Encode;
 
diff --git a/egs/babel/s5c/local/resegment/evaluate_segmentation.pl b/egs/babel/s5c/local/resegment/evaluate_segmentation.pl
index 9c0dcaae6..06a762d77 100755
--- a/egs/babel/s5c/local/resegment/evaluate_segmentation.pl
+++ b/egs/babel/s5c/local/resegment/evaluate_segmentation.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2014  Johns Hopkins University (Author: Sanjeev Khudanpur), Vimal Manohar 
 # Apache 2.0
diff --git a/egs/babel/s5c/local/subset_atwv.pl b/egs/babel/s5c/local/subset_atwv.pl
index e303bf5eb..910703db9 100755
--- a/egs/babel/s5c/local/subset_atwv.pl
+++ b/egs/babel/s5c/local/subset_atwv.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/babel/s5c/local/summarize_logs.pl b/egs/babel/s5c/local/summarize_logs.pl
index 57efe1bc1..4f7fc058f 100755
--- a/egs/babel/s5c/local/summarize_logs.pl
+++ b/egs/babel/s5c/local/summarize_logs.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/babel/s5c/local/uem_ctm2segments.pl b/egs/babel/s5c/local/uem_ctm2segments.pl
index 6474d53a3..ab560639c 100755
--- a/egs/babel/s5c/local/uem_ctm2segments.pl
+++ b/egs/babel/s5c/local/uem_ctm2segments.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use Getopt::Long;
 
 ################################################################################
diff --git a/egs/callhome_egyptian/s5/local/split_alt_punc.py b/egs/callhome_egyptian/s5/local/split_alt_punc.py
old mode 100644
new mode 100755
diff --git a/egs/chime3/s5/local/cstr_ndx2flist.pl b/egs/chime3/s5/local/cstr_ndx2flist.pl
index 101834e86..d19db421a 100755
--- a/egs/chime3/s5/local/cstr_ndx2flist.pl
+++ b/egs/chime3/s5/local/cstr_ndx2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2010-2011 Microsoft Corporation
 
diff --git a/egs/chime3/s5/local/find_noisy_transcripts.pl b/egs/chime3/s5/local/find_noisy_transcripts.pl
index 720c320c0..fdeb38d94 100755
--- a/egs/chime3/s5/local/find_noisy_transcripts.pl
+++ b/egs/chime3/s5/local/find_noisy_transcripts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/chime3/s5/local/find_transcripts.pl b/egs/chime3/s5/local/find_transcripts.pl
index 0e5d71f79..6429411b8 100755
--- a/egs/chime3/s5/local/find_transcripts.pl
+++ b/egs/chime3/s5/local/find_transcripts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/chime3/s5/local/flist2scp.pl b/egs/chime3/s5/local/flist2scp.pl
index 6831d2d7b..234e4add1 100755
--- a/egs/chime3/s5/local/flist2scp.pl
+++ b/egs/chime3/s5/local/flist2scp.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/chime3/s5/local/normalize_transcript.pl b/egs/chime3/s5/local/normalize_transcript.pl
index 9dd67af3d..09cee0617 100755
--- a/egs/chime3/s5/local/normalize_transcript.pl
+++ b/egs/chime3/s5/local/normalize_transcript.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/chime_wsj0/s5/local/cstr_ndx2flist.pl b/egs/chime_wsj0/s5/local/cstr_ndx2flist.pl
index 4ff029675..d032313de 100755
--- a/egs/chime_wsj0/s5/local/cstr_ndx2flist.pl
+++ b/egs/chime_wsj0/s5/local/cstr_ndx2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2010-2011 Microsoft Corporation
 
diff --git a/egs/chime_wsj0/s5/local/find_noisy_transcripts.pl b/egs/chime_wsj0/s5/local/find_noisy_transcripts.pl
index 720c320c0..fdeb38d94 100755
--- a/egs/chime_wsj0/s5/local/find_noisy_transcripts.pl
+++ b/egs/chime_wsj0/s5/local/find_noisy_transcripts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/chime_wsj0/s5/local/find_transcripts.pl b/egs/chime_wsj0/s5/local/find_transcripts.pl
index 0e5d71f79..6429411b8 100755
--- a/egs/chime_wsj0/s5/local/find_transcripts.pl
+++ b/egs/chime_wsj0/s5/local/find_transcripts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/chime_wsj0/s5/local/flist2scp.pl b/egs/chime_wsj0/s5/local/flist2scp.pl
index 6831d2d7b..234e4add1 100755
--- a/egs/chime_wsj0/s5/local/flist2scp.pl
+++ b/egs/chime_wsj0/s5/local/flist2scp.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/chime_wsj0/s5/local/normalize_transcript.pl b/egs/chime_wsj0/s5/local/normalize_transcript.pl
index 9dd67af3d..09cee0617 100755
--- a/egs/chime_wsj0/s5/local/normalize_transcript.pl
+++ b/egs/chime_wsj0/s5/local/normalize_transcript.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/csj/s5/local/csj_make_trans/csj2kaldi4m.pl b/egs/csj/s5/local/csj_make_trans/csj2kaldi4m.pl
old mode 100644
new mode 100755
index d31de7e3c..7895fa341
--- a/egs/csj/s5/local/csj_make_trans/csj2kaldi4m.pl
+++ b/egs/csj/s5/local/csj_make_trans/csj2kaldi4m.pl
@@ -1,4 +1,5 @@
-#! /usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright  2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki)
 #            2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
diff --git a/egs/csj/s5/local/csj_make_trans/csjconnect.pl b/egs/csj/s5/local/csj_make_trans/csjconnect.pl
old mode 100644
new mode 100755
index 2e43772e9..13866e9f7
--- a/egs/csj/s5/local/csj_make_trans/csjconnect.pl
+++ b/egs/csj/s5/local/csj_make_trans/csjconnect.pl
@@ -1,4 +1,5 @@
-#! /usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright  2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki)
 #            2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
diff --git a/egs/csj/s5/local/csj_make_trans/reform.pl b/egs/csj/s5/local/csj_make_trans/reform.pl
old mode 100644
new mode 100755
index fc00b3de6..1c267e2c4
--- a/egs/csj/s5/local/csj_make_trans/reform.pl
+++ b/egs/csj/s5/local/csj_make_trans/reform.pl
@@ -1,5 +1,4 @@
-#! /usr/bin/perl -w                                                           
-
+#!/usr/bin/env perl
 # Copyright  2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki)
 #            2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
 # Apache 2.0
@@ -7,6 +6,8 @@
 
 # This script is to make lexicon for KALDI format.
 
+use warnings;
+
 while (<>){
     chomp;
     @line=split(/\t/, $_);
diff --git a/egs/csj/s5/local/csj_make_trans/vocab2dic.pl b/egs/csj/s5/local/csj_make_trans/vocab2dic.pl
old mode 100644
new mode 100755
index 9ff4b1011..85288a44a
--- a/egs/csj/s5/local/csj_make_trans/vocab2dic.pl
+++ b/egs/csj/s5/local/csj_make_trans/vocab2dic.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright  2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki)
 #            2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
diff --git a/egs/fisher_callhome_spanish/s5/local/spron.pl b/egs/fisher_callhome_spanish/s5/local/spron.pl
index b903d9e4d..55581bdc0 100755
--- a/egs/fisher_callhome_spanish/s5/local/spron.pl
+++ b/egs/fisher_callhome_spanish/s5/local/spron.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Oct 21, 2015 : Gaurav Kumar (Johns Hopkins University)
 # GNU General Public License, v3.0
diff --git a/egs/fisher_callhome_spanish/s5/run.sh b/egs/fisher_callhome_spanish/s5/run.sh
index 5647e0153..706f37932 100755
--- a/egs/fisher_callhome_spanish/s5/run.sh
+++ b/egs/fisher_callhome_spanish/s5/run.sh
@@ -236,7 +236,7 @@ utils/mkgraph.sh data/lang_test exp/sgmm5 exp/sgmm5/graph
 
 (
 
-  steps/decode_sgmm2.sh --nj 13 --cmd "$decode_cmd" --num-threads 5 --parallel-opts " -pe smp 5" \
+  steps/decode_sgmm2.sh --nj 13 --cmd "$decode_cmd" --num-threads 5 \
     --config conf/decode.config  --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev \
    exp/sgmm5/graph data/dev exp/sgmm5/decode_dev
 )&
@@ -247,7 +247,7 @@ steps/align_sgmm2.sh \
   data/train data/lang exp/sgmm5 exp/sgmm5_ali
 
 steps/make_denlats_sgmm2.sh \
-  --nj 32 --sub-split 32 --num-threads 4 --parallel-opts "-pe smp 4"\
+  --nj 32 --sub-split 32 --num-threads 4 \
   --beam 10.0 --lattice-beam 6 --cmd "$decode_cmd" --transform-dir exp/tri5a_ali \
   data/train data/lang exp/sgmm5_ali exp/sgmm5_denlats
 
@@ -262,7 +262,7 @@ steps/decode_fmllr_extra.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 --parall
   --config conf/decode.config  --scoring-opts "--min-lmwt 8 --max-lmwt 12"\
  exp/tri5a/graph data/dev exp/tri5a/decode_dev
 utils/mkgraph.sh data/lang_test exp/sgmm5 exp/sgmm5/graph
-steps/decode_sgmm2.sh --nj 13 --cmd "$decode_cmd" --num-threads 5 --parallel-opts " -pe smp 5" \
+steps/decode_sgmm2.sh --nj 13 --cmd "$decode_cmd" --num-threads 5 \
   --config conf/decode.config  --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev \
  exp/sgmm5/graph data/dev exp/sgmm5/decode_dev
 for iter in 1 2 3 4; do
@@ -276,9 +276,9 @@ done
 
 
 dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \
-                       --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G")
+                       --parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 --mem 2G")
 dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1 \
-                       --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=2G")
+                       --parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 --mem 2G")
 
 steps/nnet2/train_pnorm_ensemble.sh \
   --mix-up 5000  --initial-learning-rate 0.008 --final-learning-rate 0.0008\
diff --git a/egs/fisher_english/s5/local/fisher_fix_speakerid.pl b/egs/fisher_english/s5/local/fisher_fix_speakerid.pl
old mode 100644
new mode 100755
index 8933055bd..d38abc474
--- a/egs/fisher_english/s5/local/fisher_fix_speakerid.pl
+++ b/egs/fisher_english/s5/local/fisher_fix_speakerid.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Author: Peng Qi (pengqi@cs.stanford.edu)
 # This script maps Switchboard speaker IDs to the true physical speakers
diff --git a/egs/fisher_swbd/s5/local/fisher_map_words.pl b/egs/fisher_swbd/s5/local/fisher_map_words.pl
index edd6c0296..1a6719ba5 100755
--- a/egs/fisher_swbd/s5/local/fisher_map_words.pl
+++ b/egs/fisher_swbd/s5/local/fisher_map_words.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/fisher_swbd/s5/local/format_acronyms_ctm_eval2000.py b/egs/fisher_swbd/s5/local/format_acronyms_ctm_eval2000.py
index b7a8fc246..f2b594804 100755
--- a/egs/fisher_swbd/s5/local/format_acronyms_ctm_eval2000.py
+++ b/egs/fisher_swbd/s5/local/format_acronyms_ctm_eval2000.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # convert acronyms in swbd decode result to fisher convention
 # e.g. convert things like en_4156 B 414.26 0.65 u._c._l._a. to
 # en_4156 B 414.26 0.16 u
diff --git a/egs/fisher_swbd/s5/local/format_acronyms_ctm_rt03.py b/egs/fisher_swbd/s5/local/format_acronyms_ctm_rt03.py
index e3bc5e86b..81acf3a5f 100755
--- a/egs/fisher_swbd/s5/local/format_acronyms_ctm_rt03.py
+++ b/egs/fisher_swbd/s5/local/format_acronyms_ctm_rt03.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # convert acronyms in swbd decode result to fisher convention
 # e.g. convert things like en_4156 B 414.26 0.65 u._c._l._a. to
 # en_4156 B 414.26 0.16 u
diff --git a/egs/fisher_swbd/s5/local/format_acronyms_dict.py b/egs/fisher_swbd/s5/local/format_acronyms_dict.py
index 1249b44ef..61bdd9aac 100755
--- a/egs/fisher_swbd/s5/local/format_acronyms_dict.py
+++ b/egs/fisher_swbd/s5/local/format_acronyms_dict.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # convert acronyms in swbd dict to fisher convention
 # IBM to i._b._m.
 # BBC to b._b._c.
diff --git a/egs/fisher_swbd/s5/local/map_acronyms_transcripts.py b/egs/fisher_swbd/s5/local/map_acronyms_transcripts.py
old mode 100644
new mode 100755
index 66ca3ff79..566684216
--- a/egs/fisher_swbd/s5/local/map_acronyms_transcripts.py
+++ b/egs/fisher_swbd/s5/local/map_acronyms_transcripts.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # convert acronyms in swbd transcript to fisher convention
 # accoring to first two columns in the input acronyms mapping
 
diff --git a/egs/fisher_swbd/s5/local/swbd1_map_words.pl b/egs/fisher_swbd/s5/local/swbd1_map_words.pl
index 0f2472a76..39f90d728 100755
--- a/egs/fisher_swbd/s5/local/swbd1_map_words.pl
+++ b/egs/fisher_swbd/s5/local/swbd1_map_words.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Modified from swbd_map_words.pl in Kaldi s5 recipe to make pattern
 # matches case-insensitive --Arnab (Jan 2013)
diff --git a/egs/gale_arabic/s5/local/normalize_transcript_BW.pl b/egs/gale_arabic/s5/local/normalize_transcript_BW.pl
index 92326c5ce..df01c5d7b 100755
--- a/egs/gale_arabic/s5/local/normalize_transcript_BW.pl
+++ b/egs/gale_arabic/s5/local/normalize_transcript_BW.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2014 QCRI (author: Ahmed Ali)
 # Apache 2.0
diff --git a/egs/gale_mandarin/s5/local/gale_normalize.pl b/egs/gale_mandarin/s5/local/gale_normalize.pl
index be5577a80..749c92f30 100755
--- a/egs/gale_mandarin/s5/local/gale_normalize.pl
+++ b/egs/gale_mandarin/s5/local/gale_normalize.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright Chao Weng 
 
 # normalizations for hkust trascript
diff --git a/egs/gale_mandarin/s5/local/gale_segment.py b/egs/gale_mandarin/s5/local/gale_segment.py
index 582f053fa..975ddb9c1 100755
--- a/egs/gale_mandarin/s5/local/gale_segment.py
+++ b/egs/gale_mandarin/s5/local/gale_segment.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 #coding:utf-8
 #!/usr/bin/env python
 import sys
diff --git a/egs/gp/s1/local/gp_extract_transcripts.pl b/egs/gp/s1/local/gp_extract_transcripts.pl
index 3d89e9a74..34a45b1ab 100755
--- a/egs/gp/s1/local/gp_extract_transcripts.pl
+++ b/egs/gp/s1/local/gp_extract_transcripts.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 use strict;
 
 # Copyright 2012  Arnab Ghoshal
diff --git a/egs/gp/s1/local/gp_make_questions.pl b/egs/gp/s1/local/gp_make_questions.pl
index a7073c5ea..2563a3928 100755
--- a/egs/gp/s1/local/gp_make_questions.pl
+++ b/egs/gp/s1/local/gp_make_questions.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s1/local/gp_norm_dict_GE.pl b/egs/gp/s1/local/gp_norm_dict_GE.pl
index aa61e3538..8959acd87 100755
--- a/egs/gp/s1/local/gp_norm_dict_GE.pl
+++ b/egs/gp/s1/local/gp_norm_dict_GE.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s1/local/gp_norm_dict_PO.pl b/egs/gp/s1/local/gp_norm_dict_PO.pl
index 5dd07fe8d..498921021 100755
--- a/egs/gp/s1/local/gp_norm_dict_PO.pl
+++ b/egs/gp/s1/local/gp_norm_dict_PO.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s1/local/gp_norm_dict_SP.pl b/egs/gp/s1/local/gp_norm_dict_SP.pl
index 1368d52d2..fb3ddbdce 100755
--- a/egs/gp/s1/local/gp_norm_dict_SP.pl
+++ b/egs/gp/s1/local/gp_norm_dict_SP.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s1/local/gp_norm_dict_SW.pl b/egs/gp/s1/local/gp_norm_dict_SW.pl
index 70fd87272..277c27d02 100755
--- a/egs/gp/s1/local/gp_norm_dict_SW.pl
+++ b/egs/gp/s1/local/gp_norm_dict_SW.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s1/local/gp_norm_lm.pl b/egs/gp/s1/local/gp_norm_lm.pl
index f56db6b9f..6680b105a 100755
--- a/egs/gp/s1/local/gp_norm_lm.pl
+++ b/egs/gp/s1/local/gp_norm_lm.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s1/local/gp_norm_trans_GE.pl b/egs/gp/s1/local/gp_norm_trans_GE.pl
index 179546b11..e6514e639 100755
--- a/egs/gp/s1/local/gp_norm_trans_GE.pl
+++ b/egs/gp/s1/local/gp_norm_trans_GE.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s1/local/gp_norm_trans_PO.pl b/egs/gp/s1/local/gp_norm_trans_PO.pl
index c0ad960bf..e3e2db84f 100755
--- a/egs/gp/s1/local/gp_norm_trans_PO.pl
+++ b/egs/gp/s1/local/gp_norm_trans_PO.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s1/local/gp_norm_trans_SP.pl b/egs/gp/s1/local/gp_norm_trans_SP.pl
index 65ced00a8..9655f47a6 100755
--- a/egs/gp/s1/local/gp_norm_trans_SP.pl
+++ b/egs/gp/s1/local/gp_norm_trans_SP.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s1/local/gp_norm_trans_SW.pl b/egs/gp/s1/local/gp_norm_trans_SW.pl
index db12e70c1..4adb4c720 100755
--- a/egs/gp/s1/local/gp_norm_trans_SW.pl
+++ b/egs/gp/s1/local/gp_norm_trans_SW.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s1/utils/add_disambig.pl b/egs/gp/s1/utils/add_disambig.pl
index c605659e1..962ef3867 100755
--- a/egs/gp/s1/utils/add_disambig.pl
+++ b/egs/gp/s1/utils/add_disambig.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/add_lex_disambig.pl b/egs/gp/s1/utils/add_lex_disambig.pl
index 9f9054e17..ded04bb4b 100755
--- a/egs/gp/s1/utils/add_lex_disambig.pl
+++ b/egs/gp/s1/utils/add_lex_disambig.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/eps2disambig.pl b/egs/gp/s1/utils/eps2disambig.pl
index fecbdc833..049802b08 100755
--- a/egs/gp/s1/utils/eps2disambig.pl
+++ b/egs/gp/s1/utils/eps2disambig.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/filter_scp.pl b/egs/gp/s1/utils/filter_scp.pl
index dfe4b13d1..17483ae8b 100755
--- a/egs/gp/s1/utils/filter_scp.pl
+++ b/egs/gp/s1/utils/filter_scp.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/filter_trans_oovs.pl b/egs/gp/s1/utils/filter_trans_oovs.pl
index bd3a2b794..6e3c4d4e0 100755
--- a/egs/gp/s1/utils/filter_trans_oovs.pl
+++ b/egs/gp/s1/utils/filter_trans_oovs.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s1/utils/find_arpa_oovs.pl b/egs/gp/s1/utils/find_arpa_oovs.pl
index abd63f65e..14d898f66 100755
--- a/egs/gp/s1/utils/find_arpa_oovs.pl
+++ b/egs/gp/s1/utils/find_arpa_oovs.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/get_split_id.pl b/egs/gp/s1/utils/get_split_id.pl
index 72a1f0bdd..5cf6d320e 100755
--- a/egs/gp/s1/utils/get_split_id.pl
+++ b/egs/gp/s1/utils/get_split_id.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # 
 # If the command-line argument is 4, this script prints "0 1 2 3";
diff --git a/egs/gp/s1/utils/get_splits.pl b/egs/gp/s1/utils/get_splits.pl
index 742294745..66dac6eb9 100755
--- a/egs/gp/s1/utils/get_splits.pl
+++ b/egs/gp/s1/utils/get_splits.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # If the command-line argument is 4, this script prints "0 1 2 3";
 # If the command-line argument is 20, it prints 10 through 29.
diff --git a/egs/gp/s1/utils/int2sym.pl b/egs/gp/s1/utils/int2sym.pl
index d08c4279e..ad85ef349 100755
--- a/egs/gp/s1/utils/int2sym.pl
+++ b/egs/gp/s1/utils/int2sym.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/make_lexicon_fst.pl b/egs/gp/s1/utils/make_lexicon_fst.pl
index 1e9c291b4..9e088889c 100755
--- a/egs/gp/s1/utils/make_lexicon_fst.pl
+++ b/egs/gp/s1/utils/make_lexicon_fst.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/remove_oovs.pl b/egs/gp/s1/utils/remove_oovs.pl
index 5bcab5984..532d7f295 100755
--- a/egs/gp/s1/utils/remove_oovs.pl
+++ b/egs/gp/s1/utils/remove_oovs.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/s2eps.pl b/egs/gp/s1/utils/s2eps.pl
index de993db67..ffeeb8eb6 100755
--- a/egs/gp/s1/utils/s2eps.pl
+++ b/egs/gp/s1/utils/s2eps.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/silphones.pl b/egs/gp/s1/utils/silphones.pl
index 8cee6df94..3ff85dfe3 100755
--- a/egs/gp/s1/utils/silphones.pl
+++ b/egs/gp/s1/utils/silphones.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/split_scp.pl b/egs/gp/s1/utils/split_scp.pl
index 1521504f2..f1054d323 100755
--- a/egs/gp/s1/utils/split_scp.pl
+++ b/egs/gp/s1/utils/split_scp.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/subset_lm.pl b/egs/gp/s1/utils/subset_lm.pl
index f71ef80e5..3decba9a3 100755
--- a/egs/gp/s1/utils/subset_lm.pl
+++ b/egs/gp/s1/utils/subset_lm.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s1/utils/sym2int.pl b/egs/gp/s1/utils/sym2int.pl
index 54a9ff21f..71492652c 100755
--- a/egs/gp/s1/utils/sym2int.pl
+++ b/egs/gp/s1/utils/sym2int.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s1/utils/utt2spk_to_spk2utt.pl b/egs/gp/s1/utils/utt2spk_to_spk2utt.pl
index 0c9e6417c..0dfb7ba5f 100755
--- a/egs/gp/s1/utils/utt2spk_to_spk2utt.pl
+++ b/egs/gp/s1/utils/utt2spk_to_spk2utt.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/gp/s5/local/gp_extract_transcripts.pl b/egs/gp/s5/local/gp_extract_transcripts.pl
index 8cade6743..f16bc30e2 100755
--- a/egs/gp/s5/local/gp_extract_transcripts.pl
+++ b/egs/gp/s5/local/gp_extract_transcripts.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 use strict;
 
 # Copyright 2012  Arnab Ghoshal
diff --git a/egs/gp/s5/local/gp_make_questions.pl b/egs/gp/s5/local/gp_make_questions.pl
index a7073c5ea..2563a3928 100755
--- a/egs/gp/s5/local/gp_make_questions.pl
+++ b/egs/gp/s5/local/gp_make_questions.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_norm_dict_CZ.pl b/egs/gp/s5/local/gp_norm_dict_CZ.pl
index f9eb4d96e..23d7ac30f 100755
--- a/egs/gp/s5/local/gp_norm_dict_CZ.pl
+++ b/egs/gp/s5/local/gp_norm_dict_CZ.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal;  Milos Janda
 
diff --git a/egs/gp/s5/local/gp_norm_dict_FR.pl b/egs/gp/s5/local/gp_norm_dict_FR.pl
index 4a8ab734f..83580d2d7 100755
--- a/egs/gp/s5/local/gp_norm_dict_FR.pl
+++ b/egs/gp/s5/local/gp_norm_dict_FR.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal;  Milos Janda
 
diff --git a/egs/gp/s5/local/gp_norm_dict_GE.pl b/egs/gp/s5/local/gp_norm_dict_GE.pl
index 76a81046a..7b9d6cb0d 100755
--- a/egs/gp/s5/local/gp_norm_dict_GE.pl
+++ b/egs/gp/s5/local/gp_norm_dict_GE.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_norm_dict_PL.pl b/egs/gp/s5/local/gp_norm_dict_PL.pl
index 78d801e89..9f6b3afe7 100755
--- a/egs/gp/s5/local/gp_norm_dict_PL.pl
+++ b/egs/gp/s5/local/gp_norm_dict_PL.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal;  Milos Janda
 
diff --git a/egs/gp/s5/local/gp_norm_dict_PO.pl b/egs/gp/s5/local/gp_norm_dict_PO.pl
index db56da8bb..870c0a405 100755
--- a/egs/gp/s5/local/gp_norm_dict_PO.pl
+++ b/egs/gp/s5/local/gp_norm_dict_PO.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_norm_dict_RU.pl b/egs/gp/s5/local/gp_norm_dict_RU.pl
index 5fb0d1170..c06c98602 100755
--- a/egs/gp/s5/local/gp_norm_dict_RU.pl
+++ b/egs/gp/s5/local/gp_norm_dict_RU.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Milos Janda;  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_norm_dict_SP.pl b/egs/gp/s5/local/gp_norm_dict_SP.pl
index 45e9b023c..7e76d5ce7 100755
--- a/egs/gp/s5/local/gp_norm_dict_SP.pl
+++ b/egs/gp/s5/local/gp_norm_dict_SP.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_norm_trans_CZ.pl b/egs/gp/s5/local/gp_norm_trans_CZ.pl
index 7a24b0a40..49f7f3863 100755
--- a/egs/gp/s5/local/gp_norm_trans_CZ.pl
+++ b/egs/gp/s5/local/gp_norm_trans_CZ.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Milos Janda
 
diff --git a/egs/gp/s5/local/gp_norm_trans_FR.pl b/egs/gp/s5/local/gp_norm_trans_FR.pl
index c5016fb85..1dd10cc59 100755
--- a/egs/gp/s5/local/gp_norm_trans_FR.pl
+++ b/egs/gp/s5/local/gp_norm_trans_FR.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Milos Janda
 
diff --git a/egs/gp/s5/local/gp_norm_trans_GE.pl b/egs/gp/s5/local/gp_norm_trans_GE.pl
index 6dec75309..85a544e3a 100755
--- a/egs/gp/s5/local/gp_norm_trans_GE.pl
+++ b/egs/gp/s5/local/gp_norm_trans_GE.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_norm_trans_PL.pl b/egs/gp/s5/local/gp_norm_trans_PL.pl
index a5aaa026c..f48f40e97 100755
--- a/egs/gp/s5/local/gp_norm_trans_PL.pl
+++ b/egs/gp/s5/local/gp_norm_trans_PL.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Milos Janda;  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_norm_trans_PO.pl b/egs/gp/s5/local/gp_norm_trans_PO.pl
index 3f4edafac..6bbf93497 100755
--- a/egs/gp/s5/local/gp_norm_trans_PO.pl
+++ b/egs/gp/s5/local/gp_norm_trans_PO.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_norm_trans_RU.pl b/egs/gp/s5/local/gp_norm_trans_RU.pl
index 6bb4cb9c5..e22971d63 100755
--- a/egs/gp/s5/local/gp_norm_trans_RU.pl
+++ b/egs/gp/s5/local/gp_norm_trans_RU.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Milos Janda
 
diff --git a/egs/gp/s5/local/gp_norm_trans_SP.pl b/egs/gp/s5/local/gp_norm_trans_SP.pl
index bf5a5b085..2c86892ee 100755
--- a/egs/gp/s5/local/gp_norm_trans_SP.pl
+++ b/egs/gp/s5/local/gp_norm_trans_SP.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_rmn2utf_FR.pl b/egs/gp/s5/local/gp_rmn2utf_FR.pl
index ba06e20a4..09aac2f02 100755
--- a/egs/gp/s5/local/gp_rmn2utf_FR.pl
+++ b/egs/gp/s5/local/gp_rmn2utf_FR.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_rmn2utf_GE.pl b/egs/gp/s5/local/gp_rmn2utf_GE.pl
index 9e1526d0e..6300ede5e 100755
--- a/egs/gp/s5/local/gp_rmn2utf_GE.pl
+++ b/egs/gp/s5/local/gp_rmn2utf_GE.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_rmn2utf_PL.pl b/egs/gp/s5/local/gp_rmn2utf_PL.pl
index 6e8578cc9..83b431a92 100755
--- a/egs/gp/s5/local/gp_rmn2utf_PL.pl
+++ b/egs/gp/s5/local/gp_rmn2utf_PL.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_rmn2utf_PO.pl b/egs/gp/s5/local/gp_rmn2utf_PO.pl
index 7310bc5b8..e093a9597 100755
--- a/egs/gp/s5/local/gp_rmn2utf_PO.pl
+++ b/egs/gp/s5/local/gp_rmn2utf_PO.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_rmn2utf_RU.pl b/egs/gp/s5/local/gp_rmn2utf_RU.pl
index 2f9f23d36..8ef041625 100755
--- a/egs/gp/s5/local/gp_rmn2utf_RU.pl
+++ b/egs/gp/s5/local/gp_rmn2utf_RU.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/gp/s5/local/gp_rmn2utf_SP.pl b/egs/gp/s5/local/gp_rmn2utf_SP.pl
index 10e98cb8f..0aae0e4f0 100755
--- a/egs/gp/s5/local/gp_rmn2utf_SP.pl
+++ b/egs/gp/s5/local/gp_rmn2utf_SP.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/hkust/s5/local/ext/hkust_word2ch_tran.pl b/egs/hkust/s5/local/ext/hkust_word2ch_tran.pl
index f88975de7..f449112f5 100755
--- a/egs/hkust/s5/local/ext/hkust_word2ch_tran.pl
+++ b/egs/hkust/s5/local/ext/hkust_word2ch_tran.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2013  Hong Kong University of Science and Technology (Author: Ricky Chan Ho Yin) 
 #                 
 # Apache 2.0.
diff --git a/egs/hkust/s5/local/hkust_extract_subdict.pl b/egs/hkust/s5/local/hkust_extract_subdict.pl
index 5f01a94af..f5683a6b3 100755
--- a/egs/hkust/s5/local/hkust_extract_subdict.pl
+++ b/egs/hkust/s5/local/hkust_extract_subdict.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright Hong Kong University of Science and Technology (Author: Ricky Chan) 2013.
 # 
 # A script for dictionary generation with an input dict and a wordlist 
diff --git a/egs/hkust/s5/local/hkust_normalize.pl b/egs/hkust/s5/local/hkust_normalize.pl
index a89450436..01218a31b 100755
--- a/egs/hkust/s5/local/hkust_normalize.pl
+++ b/egs/hkust/s5/local/hkust_normalize.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright Chao Weng 
 
 # normalizations for hkust trascript
diff --git a/egs/hkust/s5/local/hkust_segment.py b/egs/hkust/s5/local/hkust_segment.py
index 3f9ee9f49..dff335fc1 100755
--- a/egs/hkust/s5/local/hkust_segment.py
+++ b/egs/hkust/s5/local/hkust_segment.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 #coding:utf-8
 #!/usr/bin/env python
 import sys
diff --git a/egs/librispeech/s5/cmd.sh b/egs/librispeech/s5/cmd.sh
index fec4abbca..6395d96ca 100644
--- a/egs/librispeech/s5/cmd.sh
+++ b/egs/librispeech/s5/cmd.sh
@@ -7,9 +7,9 @@
 
 #a) JHU cluster options
 export train_cmd="queue.pl -l arch=*64"
-export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
-export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
-export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
+export decode_cmd="queue.pl -l arch=*64 --mem 2G"
+export mkgraph_cmd="queue.pl -l arch=*64 --mem 4G"
+export big_memory_cmd="queue.pl -l arch=*64 --mem 8G"
 export cuda_cmd="queue.pl -l gpu=1"
 
 
diff --git a/egs/librispeech/s5/local/lm/python/pre_filter.py b/egs/librispeech/s5/local/lm/python/pre_filter.py
old mode 100644
new mode 100755
diff --git a/egs/librispeech/s5/local/lm/python/text_post_process.py b/egs/librispeech/s5/local/lm/python/text_post_process.py
old mode 100644
new mode 100755
diff --git a/egs/librispeech/s5/local/lm/python/text_pre_process.py b/egs/librispeech/s5/local/lm/python/text_pre_process.py
old mode 100644
new mode 100755
diff --git a/egs/lre/v1/lid/balance_priors_to_test.pl b/egs/lre/v1/lid/balance_priors_to_test.pl
index 78bda0503..b3f619acf 100755
--- a/egs/lre/v1/lid/balance_priors_to_test.pl
+++ b/egs/lre/v1/lid/balance_priors_to_test.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 my ($train_file, $test_file, $lang_file, $priors_file) = @ARGV;
 open(UTT2LANG_TRAIN, "<$train_file") or die "no utt2lang training file";
diff --git a/egs/lre/v1/lid/remove_dialect.pl b/egs/lre/v1/lid/remove_dialect.pl
index 83556b95b..7fc61b6e8 100755
--- a/egs/lre/v1/lid/remove_dialect.pl
+++ b/egs/lre/v1/lid/remove_dialect.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Removes the dialect parts on an utt2lang file.
 # For example <utt> chinese.wu is converted to <utt> chinese.
 
diff --git a/egs/lre/v1/local/make_callfriend.pl b/egs/lre/v1/local/make_callfriend.pl
index 69981400d..407abce2e 100755
--- a/egs/lre/v1/local/make_callfriend.pl
+++ b/egs/lre/v1/local/make_callfriend.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2014  David Snyder  Daniel Povey
 # Apache 2.0.
diff --git a/egs/lre/v1/local/make_lre03.pl b/egs/lre/v1/local/make_lre03.pl
index b7b29f656..284e3d754 100755
--- a/egs/lre/v1/local/make_lre03.pl
+++ b/egs/lre/v1/local/make_lre03.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 #
 # Copyright 2014  David Snyder  Daniel Povey
  
diff --git a/egs/lre/v1/local/make_lre05.pl b/egs/lre/v1/local/make_lre05.pl
index f77984c4d..914a70cf5 100755
--- a/egs/lre/v1/local/make_lre05.pl
+++ b/egs/lre/v1/local/make_lre05.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2014  David Snyder
 
diff --git a/egs/lre/v1/local/make_lre07.pl b/egs/lre/v1/local/make_lre07.pl
index 088112671..db29880a2 100755
--- a/egs/lre/v1/local/make_lre07.pl
+++ b/egs/lre/v1/local/make_lre07.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2014  David Snyder
 # Usage: make_lre07.pl <path-to-LDC2009S04> <output-dir>
diff --git a/egs/lre/v1/local/make_sre_2008_train.pl b/egs/lre/v1/local/make_sre_2008_train.pl
index 9c8316138..0d1797145 100755
--- a/egs/lre/v1/local/make_sre_2008_train.pl
+++ b/egs/lre/v1/local/make_sre_2008_train.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2013-2014 Daniel Povey
 #                2014 David Snyder
diff --git a/egs/lre/v1/local/vad_split_utts_fix_data.pl b/egs/lre/v1/local/vad_split_utts_fix_data.pl
index 559811ab7..dfede15e4 100755
--- a/egs/lre/v1/local/vad_split_utts_fix_data.pl
+++ b/egs/lre/v1/local/vad_split_utts_fix_data.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2014  David Snyder
 # Apache 2.0.
diff --git a/egs/lre07/v1/lid/balance_priors_to_test.pl b/egs/lre07/v1/lid/balance_priors_to_test.pl
index 955e6059f..9565d403a 100755
--- a/egs/lre07/v1/lid/balance_priors_to_test.pl
+++ b/egs/lre07/v1/lid/balance_priors_to_test.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2014 David Snyder
 # Apache 2.0.
 #
diff --git a/egs/lre07/v1/lid/remove_dialect.pl b/egs/lre07/v1/lid/remove_dialect.pl
index 83556b95b..7fc61b6e8 100755
--- a/egs/lre07/v1/lid/remove_dialect.pl
+++ b/egs/lre07/v1/lid/remove_dialect.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Removes the dialect parts on an utt2lang file.
 # For example <utt> chinese.wu is converted to <utt> chinese.
 
diff --git a/egs/lre07/v1/local/lre07_eval/lre07_targets.pl b/egs/lre07/v1/local/lre07_eval/lre07_targets.pl
index 537cf9f46..24869ed45 100755
--- a/egs/lre07/v1/local/lre07_eval/lre07_targets.pl
+++ b/egs/lre07/v1/local/lre07_eval/lre07_targets.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2014  David Snyder
 # Apache 2.0.
diff --git a/egs/lre07/v1/local/lre07_eval/score_lre07.v01d.pl b/egs/lre07/v1/local/lre07_eval/score_lre07.v01d.pl
index 69d44ce1b..1a1bd9208 100755
--- a/egs/lre07/v1/local/lre07_eval/score_lre07.v01d.pl
+++ b/egs/lre07/v1/local/lre07_eval/score_lre07.v01d.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 ##############################
 # history
diff --git a/egs/lre07/v1/local/make_callfriend.pl b/egs/lre07/v1/local/make_callfriend.pl
index 69981400d..407abce2e 100755
--- a/egs/lre07/v1/local/make_callfriend.pl
+++ b/egs/lre07/v1/local/make_callfriend.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2014  David Snyder  Daniel Povey
 # Apache 2.0.
diff --git a/egs/lre07/v1/local/make_lre03.pl b/egs/lre07/v1/local/make_lre03.pl
index b7b29f656..284e3d754 100755
--- a/egs/lre07/v1/local/make_lre03.pl
+++ b/egs/lre07/v1/local/make_lre03.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 #
 # Copyright 2014  David Snyder  Daniel Povey
  
diff --git a/egs/lre07/v1/local/make_lre05.pl b/egs/lre07/v1/local/make_lre05.pl
index f77984c4d..914a70cf5 100755
--- a/egs/lre07/v1/local/make_lre05.pl
+++ b/egs/lre07/v1/local/make_lre05.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2014  David Snyder
 
diff --git a/egs/lre07/v1/local/make_lre07.pl b/egs/lre07/v1/local/make_lre07.pl
index 088112671..db29880a2 100755
--- a/egs/lre07/v1/local/make_lre07.pl
+++ b/egs/lre07/v1/local/make_lre07.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2014  David Snyder
 # Usage: make_lre07.pl <path-to-LDC2009S04> <output-dir>
diff --git a/egs/lre07/v1/local/make_lre07_train.pl b/egs/lre07/v1/local/make_lre07_train.pl
index ad2e562e7..4de1a4fe7 100755
--- a/egs/lre07/v1/local/make_lre07_train.pl
+++ b/egs/lre07/v1/local/make_lre07_train.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2014  David Snyder
 # Apache 2.0.
diff --git a/egs/lre07/v1/local/make_lre09.pl b/egs/lre07/v1/local/make_lre09.pl
index 18b85b89c..a66807fef 100755
--- a/egs/lre07/v1/local/make_lre09.pl
+++ b/egs/lre07/v1/local/make_lre09.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 #
 # Copyright 2014  David Snyder
 
diff --git a/egs/lre07/v1/local/make_lre96.pl b/egs/lre07/v1/local/make_lre96.pl
index 74eec8282..0e5dcf8f4 100755
--- a/egs/lre07/v1/local/make_lre96.pl
+++ b/egs/lre07/v1/local/make_lre96.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2014  David Snyder  Daniel Povey
 # Apache 2.0.
diff --git a/egs/lre07/v1/local/make_sre_2008_train.pl b/egs/lre07/v1/local/make_sre_2008_train.pl
index 9c8316138..0d1797145 100755
--- a/egs/lre07/v1/local/make_sre_2008_train.pl
+++ b/egs/lre07/v1/local/make_sre_2008_train.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2013-2014 Daniel Povey
 #                2014 David Snyder
diff --git a/egs/lre07/v1/local/vad_split_utts_fix_data.pl b/egs/lre07/v1/local/vad_split_utts_fix_data.pl
index 559811ab7..dfede15e4 100755
--- a/egs/lre07/v1/local/vad_split_utts_fix_data.pl
+++ b/egs/lre07/v1/local/vad_split_utts_fix_data.pl
@@ -1,4 +1,4 @@
-#! /usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2014  David Snyder
 # Apache 2.0.
diff --git a/egs/reverb/s5/local/convert_transcripts.pl b/egs/reverb/s5/local/convert_transcripts.pl
index c1b50e03b..dd6244267 100755
--- a/egs/reverb/s5/local/convert_transcripts.pl
+++ b/egs/reverb/s5/local/convert_transcripts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2013 MERL (author: Felix Weninger)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/reverb/s5/local/find_transcripts_singledot.pl b/egs/reverb/s5/local/find_transcripts_singledot.pl
index 6c4bc4e23..9b6da0cdb 100755
--- a/egs/reverb/s5/local/find_transcripts_singledot.pl
+++ b/egs/reverb/s5/local/find_transcripts_singledot.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2013 MERL (author: Felix Weninger and Shinji Watanabe)
 # Modified from original Kaldi code: find_transcripts.pl
 
diff --git a/egs/reverb/s5/local/find_transcripts_txt.pl b/egs/reverb/s5/local/find_transcripts_txt.pl
index 21b0956b3..04579f590 100755
--- a/egs/reverb/s5/local/find_transcripts_txt.pl
+++ b/egs/reverb/s5/local/find_transcripts_txt.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2013 MERL (author: Felix Weninger)
 # Modified from original Kaldi code: find_transcripts.pl
 
diff --git a/egs/reverb/s5/local/merge_dict.pl b/egs/reverb/s5/local/merge_dict.pl
index aefe5cf6c..93d819785 100755
--- a/egs/reverb/s5/local/merge_dict.pl
+++ b/egs/reverb/s5/local/merge_dict.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2013 MERL (author: Felix Weninger)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/reverb/s5/local/mlf2text.pl b/egs/reverb/s5/local/mlf2text.pl
index 483a5d358..53daf483f 100755
--- a/egs/reverb/s5/local/mlf2text.pl
+++ b/egs/reverb/s5/local/mlf2text.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 use strict;
 
diff --git a/egs/reverb/s5/local/summarize_results.pl b/egs/reverb/s5/local/summarize_results.pl
index ce5ce58aa..0977bd2da 100755
--- a/egs/reverb/s5/local/summarize_results.pl
+++ b/egs/reverb/s5/local/summarize_results.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2013 MERL (author: Felix Weninger)
 
diff --git a/egs/rm/s5/local/make_rm_dict.pl b/egs/rm/s5/local/make_rm_dict.pl
index 12d0a3363..8aee98e74 100755
--- a/egs/rm/s5/local/make_rm_dict.pl
+++ b/egs/rm/s5/local/make_rm_dict.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Yanmin Qian  Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/rm/s5/local/make_rm_lm.pl b/egs/rm/s5/local/make_rm_lm.pl
index c5af12d75..053fb2943 100755
--- a/egs/rm/s5/local/make_rm_lm.pl
+++ b/egs/rm/s5/local/make_rm_lm.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2010-2011 Yanmin Qian  Microsoft Corporation
 
diff --git a/egs/rm/s5/local/make_trans.pl b/egs/rm/s5/local/make_trans.pl
index 1a436ec06..263496526 100755
--- a/egs/rm/s5/local/make_trans.pl
+++ b/egs/rm/s5/local/make_trans.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/rm/s5/local/nnet/run_multisoftmax.sh b/egs/rm/s5/local/nnet/run_multisoftmax.sh
index 7c2e53ba3..a0a719d9b 100755
--- a/egs/rm/s5/local/nnet/run_multisoftmax.sh
+++ b/egs/rm/s5/local/nnet/run_multisoftmax.sh
@@ -28,6 +28,11 @@ wsj_ali=../../wsj/s5/exp/tri4b_ali_si284
 stage=0
 . utils/parse_options.sh || exit 1;
 
+set -u 
+set -e
+set -o pipefail
+set -x
+
 # Make the FBANK features,
 if [ $stage -le 0 ]; then
   # Make datadir copies,
@@ -55,6 +60,7 @@ if [ $stage -le 0 ]; then
   utils/combine_data.sh $train_tr90_wsj ${train}_tr90 $wsj || exit 1
 fi
 
+
 # Prepare the merged targets,
 dir=exp/dnn4e-fbank_multisoftmax
 ali1_dim=$(hmm-info ${gmm}_ali/final.mdl | grep pdfs | awk '{ print $NF }')
@@ -67,13 +73,24 @@ ali1_dir=${gmm}_ali
 #
 if [ $stage -le 1 ]; then
   mkdir -p $dir/log
-  copy-int-vector "ark:gzcat ${wsj_ali}/ali.*.gz |" ark,t:- | awk -v prefix=wsj '{ $1=prefix $1; print; }' | gzip -c >$dir/ali_wsj.gz # Mapping utt key,
+  copy-int-vector "ark:gzcat ${wsj_ali}/ali.*.gz |" ark,t:- | awk -v prefix=wsj '{ $1=prefix $1; print; }' | \
+    gzip -c >$dir/ali_wsj.gz # Mapping keys at wsj alignment,
+
+  # Store posteriors to disk, indexed by 'scp',
+  ali-to-pdf ${gmm}_ali/final.mdl "ark:gzcat ${gmm}_ali/ali.*.gz |" ark:- | \
+    ali-to-post ark:- ark,scp:$dir/post1.ark,$dir/post1.scp
+  ali-to-pdf ${wsj_ali}/final.mdl "ark:gzcat $dir/ali_wsj.gz |" ark:- | \
+    ali-to-post ark:- ark,scp:$dir/post2.ark,$dir/post2.scp
+
   featlen="ark:feat-to-len 'scp:cat $train/feats.scp $wsj/feats.scp |' ark,t:- |"
-  ali1="ark:ali-to-pdf ${gmm}_ali/final.mdl 'ark:gzcat ${gmm}_ali/ali.*.gz |' ark:- | ali-to-post ark:- ark:- |"
-  ali2="ark:ali-to-pdf ${wsj_ali}/final.mdl 'ark:gzcat $dir/ali_wsj.gz |' ark:- | ali-to-post ark:- ark:- |" 
-  paste-post "$featlen" $ali1_dim:$ali2_dim "$ali1" "$ali2" ark,scp:$dir/pasted_post.ark,$dir/pasted_post.scp 2>$dir/log/paste_post.log || exit 1
+  post1=scp:$dir/post1.scp
+  post2=scp:$dir/post2.scp
+
+  paste-post --allow-partial=true "$featlen" $ali1_dim:$ali2_dim "$post1" "$post2" \
+    ark,scp:$dir/pasted_post.ark,$dir/pasted_post.scp 2>$dir/log/paste_post.log
 fi
 
+
 # Train <MultiSoftmax> system,
 if [ $stage -le 2 ]; then
   $cuda_cmd $dir/log/train_nnet.log \
@@ -101,6 +118,8 @@ if [ $stage -le 2 ]; then
     $gmm/graph_ug $dev $dir/decode_ug || exit 1;
 fi
 
+exit 0
+
 # TODO, 
 # make nnet-copy support block selection, 
 # - either by replacing <BlockSoftmax> by <Softmax> and shrinking <AffineTransform>,
diff --git a/egs/sprakbanken/s5/cmd.sh b/egs/sprakbanken/s5/cmd.sh
index feac2bf55..43867ccf0 100644
--- a/egs/sprakbanken/s5/cmd.sh
+++ b/egs/sprakbanken/s5/cmd.sh
@@ -7,9 +7,9 @@
 
 #a) JHU cluster options
 #export train_cmd="queue.pl -l arch=*64"
-#export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
-#export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
-#export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
+#export decode_cmd="queue.pl -l arch=*64 --mem 2G"
+#export mkgraph_cmd="queue.pl -l arch=*64 --mem 2G"
+#export big_memory_cmd="queue.pl -l arch=*64 --mem 2G"
 #export cuda_cmd="queue.pl -l gpu=1"
 
 
diff --git a/egs/sprakbanken/s5/local/cstr_ndx2flist.pl b/egs/sprakbanken/s5/local/cstr_ndx2flist.pl
index 101834e86..d19db421a 100755
--- a/egs/sprakbanken/s5/local/cstr_ndx2flist.pl
+++ b/egs/sprakbanken/s5/local/cstr_ndx2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2010-2011 Microsoft Corporation
 
diff --git a/egs/sprakbanken/s5/local/data_prep.py b/egs/sprakbanken/s5/local/data_prep.py
index 5714b7d9a..58a0898dc 100755
--- a/egs/sprakbanken/s5/local/data_prep.py
+++ b/egs/sprakbanken/s5/local/data_prep.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 '''
 # Copyright 2013-2014 Mirsk Digital Aps  (Author: Andreas Kirkedal)
 
diff --git a/egs/sprakbanken/s5/local/dict/add_counts.pl b/egs/sprakbanken/s5/local/dict/add_counts.pl
index 409277c72..a2ace7e9a 100755
--- a/egs/sprakbanken/s5/local/dict/add_counts.pl
+++ b/egs/sprakbanken/s5/local/dict/add_counts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 
 # Add counts to an oovlist.
diff --git a/egs/sprakbanken/s5/local/dict/count_rules.pl b/egs/sprakbanken/s5/local/dict/count_rules.pl
index 2805e98c3..1c6cfc4a5 100755
--- a/egs/sprakbanken/s5/local/dict/count_rules.pl
+++ b/egs/sprakbanken/s5/local/dict/count_rules.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program takes the output of score_prons.pl and collates
 # it for each (rule, destress) pair so that we get the
diff --git a/egs/sprakbanken/s5/local/dict/filter_dict.pl b/egs/sprakbanken/s5/local/dict/filter_dict.pl
index 1210bb5e6..5e32823ef 100755
--- a/egs/sprakbanken/s5/local/dict/filter_dict.pl
+++ b/egs/sprakbanken/s5/local/dict/filter_dict.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 
 # This program reads and writes either a dictionary or just a list
diff --git a/egs/sprakbanken/s5/local/dict/find_acronyms.pl b/egs/sprakbanken/s5/local/dict/find_acronyms.pl
index ed4655afa..55e474c40 100755
--- a/egs/sprakbanken/s5/local/dict/find_acronyms.pl
+++ b/egs/sprakbanken/s5/local/dict/find_acronyms.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Reads a dictionary, and prints out a list of words that seem to be pronounced
 # as acronyms (not including plurals of acronyms, just acronyms).  Uses
diff --git a/egs/sprakbanken/s5/local/dict/get_acronym_prons.pl b/egs/sprakbanken/s5/local/dict/get_acronym_prons.pl
index 3f9936818..6294b7046 100755
--- a/egs/sprakbanken/s5/local/dict/get_acronym_prons.pl
+++ b/egs/sprakbanken/s5/local/dict/get_acronym_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Reads a dictionary (for prons of letters), and an OOV list,
 # and puts out candidate pronunciations of words in that list
diff --git a/egs/sprakbanken/s5/local/dict/get_candidate_prons.pl b/egs/sprakbanken/s5/local/dict/get_candidate_prons.pl
index b13efd203..b091c6d76 100755
--- a/egs/sprakbanken/s5/local/dict/get_candidate_prons.pl
+++ b/egs/sprakbanken/s5/local/dict/get_candidate_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This script takes three command-line arguments (typically files, or "-"):
 # the suffix rules (as output by get_rules.pl), the rule-hierarchy 
diff --git a/egs/sprakbanken/s5/local/dict/get_rule_hierarchy.pl b/egs/sprakbanken/s5/local/dict/get_rule_hierarchy.pl
index 35805b46b..d7c13a8df 100755
--- a/egs/sprakbanken/s5/local/dict/get_rule_hierarchy.pl
+++ b/egs/sprakbanken/s5/local/dict/get_rule_hierarchy.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 #This reads in rules, of the form put out by get_rules.pl, e.g.:
 # ERT,,ER0 T,
diff --git a/egs/sprakbanken/s5/local/dict/get_rules.pl b/egs/sprakbanken/s5/local/dict/get_rules.pl
index a5b57b088..b10eccc91 100755
--- a/egs/sprakbanken/s5/local/dict/get_rules.pl
+++ b/egs/sprakbanken/s5/local/dict/get_rules.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program creates suggested suffix rules from a dictionary.
 # It outputs quadruples of the form:
diff --git a/egs/sprakbanken/s5/local/dict/limit_candidate_prons.pl b/egs/sprakbanken/s5/local/dict/limit_candidate_prons.pl
index ceff9fbad..b01218f6e 100755
--- a/egs/sprakbanken/s5/local/dict/limit_candidate_prons.pl
+++ b/egs/sprakbanken/s5/local/dict/limit_candidate_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program enforces the rule that
 # if a "more specific" rule applies, we cannot use the more general rule.
diff --git a/egs/sprakbanken/s5/local/dict/reverse_candidates.pl b/egs/sprakbanken/s5/local/dict/reverse_candidates.pl
index d5c5effc2..5b7aabd8a 100755
--- a/egs/sprakbanken/s5/local/dict/reverse_candidates.pl
+++ b/egs/sprakbanken/s5/local/dict/reverse_candidates.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This takes the output of e.g. get_candidate_prons.pl or limit_candidate_prons.pl,
 # which is 7-tuples, one per line, of the form:
diff --git a/egs/sprakbanken/s5/local/dict/reverse_dict.pl b/egs/sprakbanken/s5/local/dict/reverse_dict.pl
index 75681711b..2cd38c54b 100755
--- a/egs/sprakbanken/s5/local/dict/reverse_dict.pl
+++ b/egs/sprakbanken/s5/local/dict/reverse_dict.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Used in conjunction with get_rules.pl
 # example input line: XANTHE  Z AE1 N DH
diff --git a/egs/sprakbanken/s5/local/dict/score_prons.pl b/egs/sprakbanken/s5/local/dict/score_prons.pl
index fd5a004d8..6aa72e421 100755
--- a/egs/sprakbanken/s5/local/dict/score_prons.pl
+++ b/egs/sprakbanken/s5/local/dict/score_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program takes candidate prons from "get_candidate_prons.pl" or
 # "limit_candidate_prons.pl", and a reference dictionary covering those words,
diff --git a/egs/sprakbanken/s5/local/dict/score_rules.pl b/egs/sprakbanken/s5/local/dict/score_rules.pl
index 8d165f7f1..252d94677 100755
--- a/egs/sprakbanken/s5/local/dict/score_rules.pl
+++ b/egs/sprakbanken/s5/local/dict/score_rules.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program takes the output of count_rules.pl, which is tuples
 # of the form
diff --git a/egs/sprakbanken/s5/local/dict/select_candidate_prons.pl b/egs/sprakbanken/s5/local/dict/select_candidate_prons.pl
index d0018c98a..a24ccdd4d 100755
--- a/egs/sprakbanken/s5/local/dict/select_candidate_prons.pl
+++ b/egs/sprakbanken/s5/local/dict/select_candidate_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This takes the output of e.g. get_candidate_prons.pl or limit_candidate_prons.pl
 # or reverse_candidates.pl, which is 7-tuples, one per line, of the form:
diff --git a/egs/sprakbanken/s5/local/find_transcripts.pl b/egs/sprakbanken/s5/local/find_transcripts.pl
index 0e5d71f79..6429411b8 100755
--- a/egs/sprakbanken/s5/local/find_transcripts.pl
+++ b/egs/sprakbanken/s5/local/find_transcripts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/sprakbanken/s5/local/flist2scp.pl b/egs/sprakbanken/s5/local/flist2scp.pl
index 6831d2d7b..234e4add1 100755
--- a/egs/sprakbanken/s5/local/flist2scp.pl
+++ b/egs/sprakbanken/s5/local/flist2scp.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/sprakbanken/s5/local/normalize_transcript.py b/egs/sprakbanken/s5/local/normalize_transcript.py
index 43c2628ea..f759a3973 100755
--- a/egs/sprakbanken/s5/local/normalize_transcript.py
+++ b/egs/sprakbanken/s5/local/normalize_transcript.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 import codecs
 import sys
 import re
diff --git a/egs/sprakbanken/s5/local/normalize_transcript_prefixed.py b/egs/sprakbanken/s5/local/normalize_transcript_prefixed.py
index bbdbadde6..e934533a3 100755
--- a/egs/sprakbanken/s5/local/normalize_transcript_prefixed.py
+++ b/egs/sprakbanken/s5/local/normalize_transcript_prefixed.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 '''
 # Copyright 2013-2014 Mirsk Digital Aps  (Author: Andreas Kirkedal)
 
diff --git a/egs/sprakbanken/s5/local/parallel2kaldi.py b/egs/sprakbanken/s5/local/parallel2kaldi.py
old mode 100644
new mode 100755
index 3ce7218d1..8ef503894
--- a/egs/sprakbanken/s5/local/parallel2kaldi.py
+++ b/egs/sprakbanken/s5/local/parallel2kaldi.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 '''
 This script assumes that the parallel files have the same filename with different extensions and you must
 specify the absolute path to the corpus from the root. The text files may only contain a single line of text.
diff --git a/egs/sprakbanken/s5/local/sprak2kaldi.py b/egs/sprakbanken/s5/local/sprak2kaldi.py
index b769fc4cc..f3abf1d9a 100755
--- a/egs/sprakbanken/s5/local/sprak2kaldi.py
+++ b/egs/sprakbanken/s5/local/sprak2kaldi.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 '''
 # Copyright 2013-2014 Mirsk Digital Aps  (Author: Andreas Kirkedal)
 
diff --git a/egs/sprakbanken/s5/local/sprak2parallel.py b/egs/sprakbanken/s5/local/sprak2parallel.py
index 8bc2f0541..b5fe56fd6 100755
--- a/egs/sprakbanken/s5/local/sprak2parallel.py
+++ b/egs/sprakbanken/s5/local/sprak2parallel.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 '''
 # Copyright 2013-2014 Mirsk Digital ApS  (Author: Andreas Kirkedal)
 
diff --git a/egs/sprakbanken/s5/local/sprakparser.py b/egs/sprakbanken/s5/local/sprakparser.py
index 7d1826af9..7bdf6ac94 100755
--- a/egs/sprakbanken/s5/local/sprakparser.py
+++ b/egs/sprakbanken/s5/local/sprakparser.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 '''
 # Copyright 2013-2014 Mirsk Digital ApS  (Author: Andreas Kirkedal)
 
diff --git a/egs/sprakbanken/s5/local/writenumbers.py b/egs/sprakbanken/s5/local/writenumbers.py
old mode 100644
new mode 100755
index da478e4f5..452cd3e7e
--- a/egs/sprakbanken/s5/local/writenumbers.py
+++ b/egs/sprakbanken/s5/local/writenumbers.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 '''
 # Copyright 2014 Author: Andreas Kirkedal
 
diff --git a/egs/sre08/v1/local/make_fisher.pl b/egs/sre08/v1/local/make_fisher.pl
index 65ed72cc5..6de7c72ae 100755
--- a/egs/sre08/v1/local/make_fisher.pl
+++ b/egs/sre08/v1/local/make_fisher.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 use File::Basename;
 
diff --git a/egs/sre08/v1/local/make_sre_2004_test.pl b/egs/sre08/v1/local/make_sre_2004_test.pl
index 4c4ab1b1a..cfb2d7c96 100755
--- a/egs/sre08/v1/local/make_sre_2004_test.pl
+++ b/egs/sre08/v1/local/make_sre_2004_test.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2014  David Snyder
 # Apache 2.0.
diff --git a/egs/sre08/v1/local/make_sre_2004_train.pl b/egs/sre08/v1/local/make_sre_2004_train.pl
index a6c826bfd..1683bc38f 100755
--- a/egs/sre08/v1/local/make_sre_2004_train.pl
+++ b/egs/sre08/v1/local/make_sre_2004_train.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2013   Daniel Povey
 # Apache 2.0.
diff --git a/egs/sre08/v1/local/make_sre_2005_test.pl b/egs/sre08/v1/local/make_sre_2005_test.pl
index eb11cd5cc..557b0a931 100755
--- a/egs/sre08/v1/local/make_sre_2005_test.pl
+++ b/egs/sre08/v1/local/make_sre_2005_test.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2014  David Snyder
 # Apache 2.0.
diff --git a/egs/sre08/v1/local/make_sre_2005_train.pl b/egs/sre08/v1/local/make_sre_2005_train.pl
index 59a6ddf65..03b4badd8 100755
--- a/egs/sre08/v1/local/make_sre_2005_train.pl
+++ b/egs/sre08/v1/local/make_sre_2005_train.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2013   Daniel Povey
 # Apache 2.0.
diff --git a/egs/sre08/v1/local/make_sre_2006_train.pl b/egs/sre08/v1/local/make_sre_2006_train.pl
index fbdaa3f22..9e4950ba5 100755
--- a/egs/sre08/v1/local/make_sre_2006_train.pl
+++ b/egs/sre08/v1/local/make_sre_2006_train.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2013   Daniel Povey
 # Apache 2.0.
diff --git a/egs/sre08/v1/local/make_sre_2008_train.pl b/egs/sre08/v1/local/make_sre_2008_train.pl
index cf84a6212..4d833bd04 100755
--- a/egs/sre08/v1/local/make_sre_2008_train.pl
+++ b/egs/sre08/v1/local/make_sre_2008_train.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright 2013   Daniel Povey
 # Apache 2.0.
diff --git a/egs/sre08/v1/local/make_swbd2_phase2.pl b/egs/sre08/v1/local/make_swbd2_phase2.pl
index 85a2df605..e0cf3c837 100755
--- a/egs/sre08/v1/local/make_swbd2_phase2.pl
+++ b/egs/sre08/v1/local/make_swbd2_phase2.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright   2013   Daniel Povey
 # Apache 2.0
diff --git a/egs/sre08/v1/local/make_swbd2_phase3.pl b/egs/sre08/v1/local/make_swbd2_phase3.pl
index 829c23292..d6210e5a7 100755
--- a/egs/sre08/v1/local/make_swbd2_phase3.pl
+++ b/egs/sre08/v1/local/make_swbd2_phase3.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright   2013   Daniel Povey
 # Apache 2.0
diff --git a/egs/sre08/v1/local/make_swbd_cellular1.pl b/egs/sre08/v1/local/make_swbd_cellular1.pl
index 57e3f53d4..da4b93444 100755
--- a/egs/sre08/v1/local/make_swbd_cellular1.pl
+++ b/egs/sre08/v1/local/make_swbd_cellular1.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright   2013   Daniel Povey
 # Apache 2.0
diff --git a/egs/sre08/v1/local/make_swbd_cellular2.pl b/egs/sre08/v1/local/make_swbd_cellular2.pl
index acd8555a5..3bfcfede1 100755
--- a/egs/sre08/v1/local/make_swbd_cellular2.pl
+++ b/egs/sre08/v1/local/make_swbd_cellular2.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 #
 # Copyright   2013   Daniel Povey
 # Apache 2.0
diff --git a/egs/swbd/s5/local/fisher_map_words.pl b/egs/swbd/s5/local/fisher_map_words.pl
index edd6c0296..1a6719ba5 100755
--- a/egs/swbd/s5/local/fisher_map_words.pl
+++ b/egs/swbd/s5/local/fisher_map_words.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/swbd/s5/local/swbd1_map_words.pl b/egs/swbd/s5/local/swbd1_map_words.pl
index 0f2472a76..39f90d728 100755
--- a/egs/swbd/s5/local/swbd1_map_words.pl
+++ b/egs/swbd/s5/local/swbd1_map_words.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Modified from swbd_map_words.pl in Kaldi s5 recipe to make pattern
 # matches case-insensitive --Arnab (Jan 2013)
diff --git a/egs/swbd/s5/local/swbd_map_words.pl b/egs/swbd/s5/local/swbd_map_words.pl
index 867efefd7..eac996c1d 100755
--- a/egs/swbd/s5/local/swbd_map_words.pl
+++ b/egs/swbd/s5/local/swbd_map_words.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 
 if ($ARGV[0] eq "-f") {
diff --git a/egs/swbd/s5b/local/extend_segments.pl b/egs/swbd/s5b/local/extend_segments.pl
index 0dfad040a..e8b4894d5 100755
--- a/egs/swbd/s5b/local/extend_segments.pl
+++ b/egs/swbd/s5b/local/extend_segments.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 if (@ARGV != 1 || !($ARGV[0] =~ m/^-?\d+\.?\d*$/ &&  $ARGV[0] >= 0)) {
   print STDERR "Usage: extend_segments.pl time-in-seconds <segments >segments.extended \n" .
diff --git a/egs/swbd/s5b/local/fisher_map_words.pl b/egs/swbd/s5b/local/fisher_map_words.pl
index edd6c0296..1a6719ba5 100755
--- a/egs/swbd/s5b/local/fisher_map_words.pl
+++ b/egs/swbd/s5b/local/fisher_map_words.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/swbd/s5b/local/swbd1_fix_speakerid.pl b/egs/swbd/s5b/local/swbd1_fix_speakerid.pl
old mode 100644
new mode 100755
index ded0de4ea..a3073029b
--- a/egs/swbd/s5b/local/swbd1_fix_speakerid.pl
+++ b/egs/swbd/s5b/local/swbd1_fix_speakerid.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Author: Peng Qi (pengqi@cs.stanford.edu)
 # This script maps Switchboard speaker IDs to the true physical speakers
diff --git a/egs/swbd/s5b/local/swbd1_map_words.pl b/egs/swbd/s5b/local/swbd1_map_words.pl
index 0f2472a76..39f90d728 100755
--- a/egs/swbd/s5b/local/swbd1_map_words.pl
+++ b/egs/swbd/s5b/local/swbd1_map_words.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Modified from swbd_map_words.pl in Kaldi s5 recipe to make pattern
 # matches case-insensitive --Arnab (Jan 2013)
diff --git a/egs/swbd/s5c/cmd.sh b/egs/swbd/s5c/cmd.sh
index 4abf8546b..036d89a9e 100644
--- a/egs/swbd/s5c/cmd.sh
+++ b/egs/swbd/s5c/cmd.sh
@@ -7,9 +7,9 @@
 
 #a) JHU cluster options
 export train_cmd="queue.pl -l arch=*64*"
-export decode_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G"
+export decode_cmd="queue.pl -l arch=*64* --mem 4G"
 #export cuda_cmd="..."
-export mkgraph_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G"
+export mkgraph_cmd="queue.pl -l arch=*64* --mem 4G"
 
 #b) BUT cluster options
 #export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
diff --git a/egs/swbd/s5c/local/extend_segments.pl b/egs/swbd/s5c/local/extend_segments.pl
index 0dfad040a..e8b4894d5 100755
--- a/egs/swbd/s5c/local/extend_segments.pl
+++ b/egs/swbd/s5c/local/extend_segments.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 if (@ARGV != 1 || !($ARGV[0] =~ m/^-?\d+\.?\d*$/ &&  $ARGV[0] >= 0)) {
   print STDERR "Usage: extend_segments.pl time-in-seconds <segments >segments.extended \n" .
diff --git a/egs/swbd/s5c/local/fisher_map_words.pl b/egs/swbd/s5c/local/fisher_map_words.pl
index 0b23a55a4..db907e035 100755
--- a/egs/swbd/s5c/local/fisher_map_words.pl
+++ b/egs/swbd/s5c/local/fisher_map_words.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2013  Arnab Ghoshal
 
diff --git a/egs/swbd/s5c/local/format_acronyms_dict.py b/egs/swbd/s5c/local/format_acronyms_dict.py
index 5af49e311..16a03a60e 100755
--- a/egs/swbd/s5c/local/format_acronyms_dict.py
+++ b/egs/swbd/s5c/local/format_acronyms_dict.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # convert acronyms in swbd dict to fisher convention
 # IBM to i._b._m.
 # BBC to b._b._c.
diff --git a/egs/swbd/s5c/local/map_acronyms_ctm.py b/egs/swbd/s5c/local/map_acronyms_ctm.py
index 08d9ae605..c7f002cb2 100755
--- a/egs/swbd/s5c/local/map_acronyms_ctm.py
+++ b/egs/swbd/s5c/local/map_acronyms_ctm.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # convert acronyms in swbd decode result
 # e.g. convert things like en_4156 B 414.26 0.65 u._c._l._a. to
 # en_4156 B 414.26 0.16 u
diff --git a/egs/swbd/s5c/local/map_acronyms_transcripts.py b/egs/swbd/s5c/local/map_acronyms_transcripts.py
index 749014847..036612454 100755
--- a/egs/swbd/s5c/local/map_acronyms_transcripts.py
+++ b/egs/swbd/s5c/local/map_acronyms_transcripts.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # convert acronyms in swbd transcript to fisher convention
 # accoring to first two columns in the input acronyms mapping
 
diff --git a/egs/swbd/s5c/local/score_sclite_conf.sh b/egs/swbd/s5c/local/score_sclite_conf.sh
index 252f566a3..b44ce462e 100755
--- a/egs/swbd/s5c/local/score_sclite_conf.sh
+++ b/egs/swbd/s5c/local/score_sclite_conf.sh
@@ -49,8 +49,7 @@ if [ $stage -le 0 ]; then
     $cmd LMWT=$min_lmwt:$max_lmwt $dir/scoring/log/get_ctm.LMWT.${wip}.log \
       mkdir -p $dir/score_LMWT_${wip}/ '&&' \
       ACWT=\`perl -e \"print 1.0/LMWT\;\"\` '&&' \
-      lattice-scale --acoustic-scale=\$ACWT "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
-      lattice-add-penalty --word-ins-penalty=$wip ark:- ark:- \| \
+      lattice-add-penalty --word-ins-penalty=$wip "ark:gunzip -c $dir/lat.*.gz|" ark:- \| \
       lattice-align-words $lang/phones/word_boundary.int $model ark:- ark:- \| \
       lattice-to-ctm-conf --decode-mbr=$decode_mbr --acoustic-scale=\$ACWT  ark:- - \| \
       utils/int2sym.pl -f 5 $lang/words.txt  \| \
diff --git a/egs/swbd/s5c/local/swbd1_fix_speakerid.pl b/egs/swbd/s5c/local/swbd1_fix_speakerid.pl
index ded0de4ea..a3073029b 100755
--- a/egs/swbd/s5c/local/swbd1_fix_speakerid.pl
+++ b/egs/swbd/s5c/local/swbd1_fix_speakerid.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Author: Peng Qi (pengqi@cs.stanford.edu)
 # This script maps Switchboard speaker IDs to the true physical speakers
diff --git a/egs/swbd/s5c/local/swbd1_map_words.pl b/egs/swbd/s5c/local/swbd1_map_words.pl
index 0f2472a76..39f90d728 100755
--- a/egs/swbd/s5c/local/swbd1_map_words.pl
+++ b/egs/swbd/s5c/local/swbd1_map_words.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Modified from swbd_map_words.pl in Kaldi s5 recipe to make pattern
 # matches case-insensitive --Arnab (Jan 2013)
diff --git a/egs/tedlium/s5/cmd.sh b/egs/tedlium/s5/cmd.sh
index 93be65b63..bed97d340 100644
--- a/egs/tedlium/s5/cmd.sh
+++ b/egs/tedlium/s5/cmd.sh
@@ -12,8 +12,8 @@
 
 # JHU cluster:
 export train_cmd="queue.pl -l arch=*64*"
-export decode_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G"
-export cuda_cmd="queue.pl -l arch=*64*,gpu=1 -q g.q"
+export decode_cmd="queue.pl -l arch=*64* --mem 4G"
+export cuda_cmd="queue.pl -l arch=*64* --gpu 1"
 
 host=$(hostname -f)
 if [ ${host#*.} == "fit.vutbr.cz" ]; then
diff --git a/egs/tedlium/s5/local/join_suffix.py b/egs/tedlium/s5/local/join_suffix.py
index 6ab803d56..25b097ed0 100755
--- a/egs/tedlium/s5/local/join_suffix.py
+++ b/egs/tedlium/s5/local/join_suffix.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
 # Copyright  2014 Nickolay V. Shmyrev 
 # Apache 2.0
diff --git a/egs/tedlium/s5/run.sh b/egs/tedlium/s5/run.sh
index 32b8bfd3c..7a36e49e8 100755
--- a/egs/tedlium/s5/run.sh
+++ b/egs/tedlium/s5/run.sh
@@ -70,10 +70,10 @@ if [ $stage -le 3 ]; then
   utils/mkgraph.sh data/lang_nosp_test exp/tri1 exp/tri1/graph_nosp || exit 1
 
   steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
-    --num-threads 4 --parallel-opts "--num-threads 4" \
+    --num-threads 4 \
     exp/tri1/graph_nosp data/dev exp/tri1/decode_nosp_dev || exit 1
   steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
-    --num-threads 4 --parallel-opts "--num-threads 4" \
+    --num-threads 4 \
     exp/tri1/graph_nosp data/test exp/tri1/decode_nosp_test || exit 1
 fi
 
@@ -87,10 +87,10 @@ if [ $stage -le 4 ]; then
   utils/mkgraph.sh data/lang_nosp_test exp/tri2 exp/tri2/graph_nosp || exit 1
 
   steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
-    --num-threads 4 --parallel-opts "--num-threads 4" \
+    --num-threads 4 \
     exp/tri2/graph_nosp data/dev exp/tri2/decode_nosp_dev || exit 1
   steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
-    --num-threads 4 --parallel-opts "--num-threads 4" \
+    --num-threads 4 \
     exp/tri2/graph_nosp data/test exp/tri2/decode_nosp_test || exit 1
 fi
 
@@ -110,10 +110,10 @@ if [ $stage -le 5 ]; then
   utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1
 
   steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
-    --num-threads 4 --parallel-opts "--num-threads 4" \
+    --num-threads 4 \
     exp/tri2/graph data/dev exp/tri2/decode_dev || exit 1
   steps/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
-    --num-threads 4 --parallel-opts "--num-threads 4" \
+    --num-threads 4 \
     exp/tri2/graph data/test exp/tri2/decode_test || exit 1
 fi
 
@@ -127,10 +127,10 @@ if [ $stage -le 6 ]; then
   utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph || exit 1
 
   steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" \
-    --num-threads 4 --parallel-opts "--num-threads 4" \
+    --num-threads 4 \
     exp/tri3/graph data/dev exp/tri3/decode_dev || exit 1
   steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" \
-    --num-threads 4 --parallel-opts "--num-threads 4" \
+    --num-threads 4 \
     exp/tri3/graph data/test exp/tri3/decode_test || exit 1
 fi
 
@@ -147,10 +147,10 @@ if [ $stage -le 7 ]; then
 
   for iter in 4; do
   steps/decode.sh --transform-dir exp/tri3/decode_dev --nj $decode_nj --cmd "$decode_cmd" --iter $iter \
-    --num-threads 4 --parallel-opts "--num-threads 4" \
+    --num-threads 4 \
     exp/tri3/graph data/dev exp/tri3_mmi_b0.1/decode_dev_it$iter || exit 1
   steps/decode.sh --transform-dir exp/tri3/decode_test --nj $decode_nj --cmd "$decode_cmd" --iter $iter \
-    --num-threads 4 --parallel-opts "--num-threads 4" \
+    --num-threads 4 \
     exp/tri3/graph data/test exp/tri3_mmi_b0.1/decode_test_it$iter || exit 1
   done
 fi
diff --git a/egs/timit/s3/local/get_word_map.pl b/egs/timit/s3/local/get_word_map.pl
index 45901a3a2..fe90ba68a 100755
--- a/egs/timit/s3/local/get_word_map.pl
+++ b/egs/timit/s3/local/get_word_map.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # A very small modification on ../../../tools/kaldi_lm/get_word_map.pl to account
 # for no OOV vocab terms in timit.  - Navdeep Jaitly.
 
diff --git a/egs/timit/s3/local/make_trans.pl b/egs/timit/s3/local/make_trans.pl
index f9e71ad15..230b4fab2 100755
--- a/egs/timit/s3/local/make_trans.pl
+++ b/egs/timit/s3/local/make_trans.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2012 Navdeep Jaitly.
 # Copyright 2010-2011 Microsoft Corporation
 
diff --git a/egs/timit/s3/scripts/add_disambig.pl b/egs/timit/s3/scripts/add_disambig.pl
index a37af62b3..9036b484e 100755
--- a/egs/timit/s3/scripts/add_disambig.pl
+++ b/egs/timit/s3/scripts/add_disambig.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/add_lex_disambig.pl b/egs/timit/s3/scripts/add_lex_disambig.pl
index 122ca6572..86d96848c 100755
--- a/egs/timit/s3/scripts/add_lex_disambig.pl
+++ b/egs/timit/s3/scripts/add_lex_disambig.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/collapse_phones.pl b/egs/timit/s3/scripts/collapse_phones.pl
index 6233c27e1..f2126a488 100755
--- a/egs/timit/s3/scripts/collapse_phones.pl
+++ b/egs/timit/s3/scripts/collapse_phones.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use strict ; 
 
 my $ignore_first_field = 0;
diff --git a/egs/timit/s3/scripts/eps2disambig.pl b/egs/timit/s3/scripts/eps2disambig.pl
index fecbdc833..049802b08 100755
--- a/egs/timit/s3/scripts/eps2disambig.pl
+++ b/egs/timit/s3/scripts/eps2disambig.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/filter_scp.pl b/egs/timit/s3/scripts/filter_scp.pl
index ac40838b7..c60b9800f 100755
--- a/egs/timit/s3/scripts/filter_scp.pl
+++ b/egs/timit/s3/scripts/filter_scp.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/int2sym.pl b/egs/timit/s3/scripts/int2sym.pl
index d08c4279e..ad85ef349 100755
--- a/egs/timit/s3/scripts/int2sym.pl
+++ b/egs/timit/s3/scripts/int2sym.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/make_lexicon_fst.pl b/egs/timit/s3/scripts/make_lexicon_fst.pl
index 6903ac3ce..ada17f64e 100755
--- a/egs/timit/s3/scripts/make_lexicon_fst.pl
+++ b/egs/timit/s3/scripts/make_lexicon_fst.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/make_phones_symtab.pl b/egs/timit/s3/scripts/make_phones_symtab.pl
index d9fcf0215..03b8cbe7a 100755
--- a/egs/timit/s3/scripts/make_phones_symtab.pl
+++ b/egs/timit/s3/scripts/make_phones_symtab.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/make_rm_dict.pl b/egs/timit/s3/scripts/make_rm_dict.pl
index 12d0a3363..8aee98e74 100755
--- a/egs/timit/s3/scripts/make_rm_dict.pl
+++ b/egs/timit/s3/scripts/make_rm_dict.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Yanmin Qian  Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/make_rm_lm.pl b/egs/timit/s3/scripts/make_rm_lm.pl
index c5af12d75..053fb2943 100755
--- a/egs/timit/s3/scripts/make_rm_lm.pl
+++ b/egs/timit/s3/scripts/make_rm_lm.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2010-2011 Yanmin Qian  Microsoft Corporation
 
diff --git a/egs/timit/s3/scripts/make_roots.pl b/egs/timit/s3/scripts/make_roots.pl
index eeed0e6e3..07c224379 100755
--- a/egs/timit/s3/scripts/make_roots.pl
+++ b/egs/timit/s3/scripts/make_roots.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/make_words_symtab.pl b/egs/timit/s3/scripts/make_words_symtab.pl
index b6aa4377c..509078898 100755
--- a/egs/timit/s3/scripts/make_words_symtab.pl
+++ b/egs/timit/s3/scripts/make_words_symtab.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/remove_oovs.pl b/egs/timit/s3/scripts/remove_oovs.pl
index 5bcab5984..532d7f295 100755
--- a/egs/timit/s3/scripts/remove_oovs.pl
+++ b/egs/timit/s3/scripts/remove_oovs.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/s2eps.pl b/egs/timit/s3/scripts/s2eps.pl
index de993db67..ffeeb8eb6 100755
--- a/egs/timit/s3/scripts/s2eps.pl
+++ b/egs/timit/s3/scripts/s2eps.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/silphones.pl b/egs/timit/s3/scripts/silphones.pl
index 8cee6df94..3ff85dfe3 100755
--- a/egs/timit/s3/scripts/silphones.pl
+++ b/egs/timit/s3/scripts/silphones.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/spk2utt_to_utt2spk.pl b/egs/timit/s3/scripts/spk2utt_to_utt2spk.pl
index ca8a6a124..23992f25d 100755
--- a/egs/timit/s3/scripts/spk2utt_to_utt2spk.pl
+++ b/egs/timit/s3/scripts/spk2utt_to_utt2spk.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/split_scp.pl b/egs/timit/s3/scripts/split_scp.pl
index f30d217a6..9ffb29b76 100755
--- a/egs/timit/s3/scripts/split_scp.pl
+++ b/egs/timit/s3/scripts/split_scp.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/sym2int.pl b/egs/timit/s3/scripts/sym2int.pl
index 4f8b218a7..ee22d3f13 100755
--- a/egs/timit/s3/scripts/sym2int.pl
+++ b/egs/timit/s3/scripts/sym2int.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s3/scripts/utt2spk_to_spk2utt.pl b/egs/timit/s3/scripts/utt2spk_to_spk2utt.pl
index c94eb8d53..f5e61459b 100755
--- a/egs/timit/s3/scripts/utt2spk_to_spk2utt.pl
+++ b/egs/timit/s3/scripts/utt2spk_to_spk2utt.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s4/local/timit_make_questions.pl b/egs/timit/s4/local/timit_make_questions.pl
index e070c8600..a8b1355a6 100755
--- a/egs/timit/s4/local/timit_make_questions.pl
+++ b/egs/timit/s4/local/timit_make_questions.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/timit/s4/local/timit_norm_trans.pl b/egs/timit/s4/local/timit_norm_trans.pl
index 11fa78d9f..07a185048 100755
--- a/egs/timit/s4/local/timit_norm_trans.pl
+++ b/egs/timit/s4/local/timit_norm_trans.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/timit/s4/utils/add_disambig.pl b/egs/timit/s4/utils/add_disambig.pl
index c605659e1..962ef3867 100755
--- a/egs/timit/s4/utils/add_disambig.pl
+++ b/egs/timit/s4/utils/add_disambig.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s4/utils/add_lex_disambig.pl b/egs/timit/s4/utils/add_lex_disambig.pl
index 9f9054e17..ded04bb4b 100755
--- a/egs/timit/s4/utils/add_lex_disambig.pl
+++ b/egs/timit/s4/utils/add_lex_disambig.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s4/utils/eps2disambig.pl b/egs/timit/s4/utils/eps2disambig.pl
index fecbdc833..049802b08 100755
--- a/egs/timit/s4/utils/eps2disambig.pl
+++ b/egs/timit/s4/utils/eps2disambig.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s4/utils/filter_scp.pl b/egs/timit/s4/utils/filter_scp.pl
index dfe4b13d1..17483ae8b 100755
--- a/egs/timit/s4/utils/filter_scp.pl
+++ b/egs/timit/s4/utils/filter_scp.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s4/utils/int2sym.pl b/egs/timit/s4/utils/int2sym.pl
index d08c4279e..ad85ef349 100755
--- a/egs/timit/s4/utils/int2sym.pl
+++ b/egs/timit/s4/utils/int2sym.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s4/utils/make_lexicon_fst.pl b/egs/timit/s4/utils/make_lexicon_fst.pl
index 1e9c291b4..9e088889c 100755
--- a/egs/timit/s4/utils/make_lexicon_fst.pl
+++ b/egs/timit/s4/utils/make_lexicon_fst.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s4/utils/s2eps.pl b/egs/timit/s4/utils/s2eps.pl
index de993db67..ffeeb8eb6 100755
--- a/egs/timit/s4/utils/s2eps.pl
+++ b/egs/timit/s4/utils/s2eps.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s4/utils/silphones.pl b/egs/timit/s4/utils/silphones.pl
index 8cee6df94..3ff85dfe3 100755
--- a/egs/timit/s4/utils/silphones.pl
+++ b/egs/timit/s4/utils/silphones.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s4/utils/split_scp.pl b/egs/timit/s4/utils/split_scp.pl
index 1521504f2..f1054d323 100755
--- a/egs/timit/s4/utils/split_scp.pl
+++ b/egs/timit/s4/utils/split_scp.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s4/utils/sym2int.pl b/egs/timit/s4/utils/sym2int.pl
index 54a9ff21f..71492652c 100755
--- a/egs/timit/s4/utils/sym2int.pl
+++ b/egs/timit/s4/utils/sym2int.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s4/utils/utt2spk_to_spk2utt.pl b/egs/timit/s4/utils/utt2spk_to_spk2utt.pl
index 0c9e6417c..0dfb7ba5f 100755
--- a/egs/timit/s4/utils/utt2spk_to_spk2utt.pl
+++ b/egs/timit/s4/utils/utt2spk_to_spk2utt.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/timit/s5/cmd.sh b/egs/timit/s5/cmd.sh
index 6e89243a8..fd91a53ff 100644
--- a/egs/timit/s5/cmd.sh
+++ b/egs/timit/s5/cmd.sh
@@ -14,8 +14,8 @@
 
 if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
   export train_cmd="queue.pl -l arch=*64*"
-  export decode_cmd="queue.pl -l arch=*64*,ram_free=3G,mem_free=3G"
-  export mkgraph_cmd="queue.pl -l arch=*64*,ram_free=4G,mem_free=4G"
+  export decode_cmd="queue.pl -l arch=*64* --mem 3G"
+  export mkgraph_cmd="queue.pl -l arch=*64* --mem 4G"
   export cuda_cmd="queue.pl -l gpu=1"
 elif [[ $(hostname -f) == *.fit.vutbr.cz ]]; then
   #b) BUT cluster options
diff --git a/egs/timit/s5/local/timit_norm_trans.pl b/egs/timit/s5/local/timit_norm_trans.pl
index 0b6d0d6d2..8894d4cb8 100755
--- a/egs/timit/s5/local/timit_norm_trans.pl
+++ b/egs/timit/s5/local/timit_norm_trans.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # Copyright 2012  Arnab Ghoshal
 
diff --git a/egs/timit/s5/run.sh b/egs/timit/s5/run.sh
index 0ae1ecec3..ce96f64fc 100755
--- a/egs/timit/s5/run.sh
+++ b/egs/timit/s5/run.sh
@@ -191,7 +191,7 @@ echo "                    DNN Hybrid Training & Decoding
 echo ============================================================================
 
 # DNN hybrid system training parameters
-dnn_mem_reqs="mem_free=1.0G,ram_free=1.0G"
+dnn_mem_reqs="--mem 1G"
 dnn_extra_opts="--num_epochs 20 --num-epochs-extra 10 --add-layers-period 1 --shrink-interval 3"
 
 steps/nnet2/train_tanh.sh --mix-up 5000 --initial-learning-rate 0.015 \
@@ -200,7 +200,7 @@ steps/nnet2/train_tanh.sh --mix-up 5000 --initial-learning-rate 0.015 \
   data/train data/lang exp/tri3_ali exp/tri4_nnet
 
 [ ! -d exp/tri4_nnet/decode_dev ] && mkdir -p exp/tri4_nnet/decode_dev
-decode_extra_opts=(--num-threads 6 --parallel-opts "-pe smp 6 -l mem_free=4G,ram_free=4.0G")
+decode_extra_opts=(--num-threads 6)
 steps/nnet2/decode.sh --cmd "$decode_cmd" --nj "$decode_nj" "${decode_extra_opts[@]}" \
   --transform-dir exp/tri3/decode_dev exp/tri3/graph data/dev \
   exp/tri4_nnet/decode_dev | tee exp/tri4_nnet/decode_dev/decode.log
diff --git a/egs/vystadial_cz/online_demo/__init__.py b/egs/vystadial_cz/online_demo/__init__.py
old mode 100644
new mode 100755
index baffc4882..3dc5da3ef
--- a/egs/vystadial_cz/online_demo/__init__.py
+++ b/egs/vystadial_cz/online_demo/__init__.py
@@ -1 +1,2 @@
+#!/usr/bin/env python
 from __future__ import unicode_literals
diff --git a/egs/vystadial_cz/s5/local/make_baseform.pl b/egs/vystadial_cz/s5/local/make_baseform.pl
old mode 100644
new mode 100755
index e4ac17dc1..285842ee4
--- a/egs/vystadial_cz/s5/local/make_baseform.pl
+++ b/egs/vystadial_cz/s5/local/make_baseform.pl
@@ -1,4 +1,5 @@
-#!perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 #
 # ====================================================================
diff --git a/egs/vystadial_cz/s5/local/phonetic_transcription_cs.pl b/egs/vystadial_cz/s5/local/phonetic_transcription_cs.pl
old mode 100644
new mode 100755
index 402bf2f37..9364d1af1
--- a/egs/vystadial_cz/s5/local/phonetic_transcription_cs.pl
+++ b/egs/vystadial_cz/s5/local/phonetic_transcription_cs.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 use strict;
 use warnings;
diff --git a/egs/vystadial_en/s5/local/make_baseform.pl b/egs/vystadial_en/s5/local/make_baseform.pl
old mode 100644
new mode 100755
index e4ac17dc1..285842ee4
--- a/egs/vystadial_en/s5/local/make_baseform.pl
+++ b/egs/vystadial_en/s5/local/make_baseform.pl
@@ -1,4 +1,5 @@
-#!perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 #
 # ====================================================================
diff --git a/egs/wsj/s5/cmd.sh b/egs/wsj/s5/cmd.sh
index fec4abbca..6395d96ca 100644
--- a/egs/wsj/s5/cmd.sh
+++ b/egs/wsj/s5/cmd.sh
@@ -7,9 +7,9 @@
 
 #a) JHU cluster options
 export train_cmd="queue.pl -l arch=*64"
-export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
-export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
-export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
+export decode_cmd="queue.pl -l arch=*64 --mem 2G"
+export mkgraph_cmd="queue.pl -l arch=*64 --mem 4G"
+export big_memory_cmd="queue.pl -l arch=*64 --mem 8G"
 export cuda_cmd="queue.pl -l gpu=1"
 
 
diff --git a/egs/wsj/s5/local/cstr_ndx2flist.pl b/egs/wsj/s5/local/cstr_ndx2flist.pl
index 101834e86..d19db421a 100755
--- a/egs/wsj/s5/local/cstr_ndx2flist.pl
+++ b/egs/wsj/s5/local/cstr_ndx2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2010-2011 Microsoft Corporation
 
diff --git a/egs/wsj/s5/local/dict/add_counts.pl b/egs/wsj/s5/local/dict/add_counts.pl
index 409277c72..a2ace7e9a 100755
--- a/egs/wsj/s5/local/dict/add_counts.pl
+++ b/egs/wsj/s5/local/dict/add_counts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 
 # Add counts to an oovlist.
diff --git a/egs/wsj/s5/local/dict/count_rules.pl b/egs/wsj/s5/local/dict/count_rules.pl
index 2805e98c3..1c6cfc4a5 100755
--- a/egs/wsj/s5/local/dict/count_rules.pl
+++ b/egs/wsj/s5/local/dict/count_rules.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program takes the output of score_prons.pl and collates
 # it for each (rule, destress) pair so that we get the
diff --git a/egs/wsj/s5/local/dict/filter_dict.pl b/egs/wsj/s5/local/dict/filter_dict.pl
index 1210bb5e6..5e32823ef 100755
--- a/egs/wsj/s5/local/dict/filter_dict.pl
+++ b/egs/wsj/s5/local/dict/filter_dict.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 
 # This program reads and writes either a dictionary or just a list
diff --git a/egs/wsj/s5/local/dict/find_acronyms.pl b/egs/wsj/s5/local/dict/find_acronyms.pl
index ed4655afa..55e474c40 100755
--- a/egs/wsj/s5/local/dict/find_acronyms.pl
+++ b/egs/wsj/s5/local/dict/find_acronyms.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Reads a dictionary, and prints out a list of words that seem to be pronounced
 # as acronyms (not including plurals of acronyms, just acronyms).  Uses
diff --git a/egs/wsj/s5/local/dict/get_acronym_prons.pl b/egs/wsj/s5/local/dict/get_acronym_prons.pl
index 3f9936818..6294b7046 100755
--- a/egs/wsj/s5/local/dict/get_acronym_prons.pl
+++ b/egs/wsj/s5/local/dict/get_acronym_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Reads a dictionary (for prons of letters), and an OOV list,
 # and puts out candidate pronunciations of words in that list
diff --git a/egs/wsj/s5/local/dict/get_candidate_prons.pl b/egs/wsj/s5/local/dict/get_candidate_prons.pl
index b13efd203..b091c6d76 100755
--- a/egs/wsj/s5/local/dict/get_candidate_prons.pl
+++ b/egs/wsj/s5/local/dict/get_candidate_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This script takes three command-line arguments (typically files, or "-"):
 # the suffix rules (as output by get_rules.pl), the rule-hierarchy 
diff --git a/egs/wsj/s5/local/dict/get_rule_hierarchy.pl b/egs/wsj/s5/local/dict/get_rule_hierarchy.pl
index 35805b46b..d7c13a8df 100755
--- a/egs/wsj/s5/local/dict/get_rule_hierarchy.pl
+++ b/egs/wsj/s5/local/dict/get_rule_hierarchy.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 #This reads in rules, of the form put out by get_rules.pl, e.g.:
 # ERT,,ER0 T,
diff --git a/egs/wsj/s5/local/dict/get_rules.pl b/egs/wsj/s5/local/dict/get_rules.pl
index a5b57b088..b10eccc91 100755
--- a/egs/wsj/s5/local/dict/get_rules.pl
+++ b/egs/wsj/s5/local/dict/get_rules.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program creates suggested suffix rules from a dictionary.
 # It outputs quadruples of the form:
diff --git a/egs/wsj/s5/local/dict/limit_candidate_prons.pl b/egs/wsj/s5/local/dict/limit_candidate_prons.pl
index ceff9fbad..b01218f6e 100755
--- a/egs/wsj/s5/local/dict/limit_candidate_prons.pl
+++ b/egs/wsj/s5/local/dict/limit_candidate_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program enforces the rule that
 # if a "more specific" rule applies, we cannot use the more general rule.
diff --git a/egs/wsj/s5/local/dict/reverse_candidates.pl b/egs/wsj/s5/local/dict/reverse_candidates.pl
index d5c5effc2..5b7aabd8a 100755
--- a/egs/wsj/s5/local/dict/reverse_candidates.pl
+++ b/egs/wsj/s5/local/dict/reverse_candidates.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This takes the output of e.g. get_candidate_prons.pl or limit_candidate_prons.pl,
 # which is 7-tuples, one per line, of the form:
diff --git a/egs/wsj/s5/local/dict/reverse_dict.pl b/egs/wsj/s5/local/dict/reverse_dict.pl
index 75681711b..2cd38c54b 100755
--- a/egs/wsj/s5/local/dict/reverse_dict.pl
+++ b/egs/wsj/s5/local/dict/reverse_dict.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Used in conjunction with get_rules.pl
 # example input line: XANTHE  Z AE1 N DH
diff --git a/egs/wsj/s5/local/dict/score_prons.pl b/egs/wsj/s5/local/dict/score_prons.pl
index fd5a004d8..6aa72e421 100755
--- a/egs/wsj/s5/local/dict/score_prons.pl
+++ b/egs/wsj/s5/local/dict/score_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program takes candidate prons from "get_candidate_prons.pl" or
 # "limit_candidate_prons.pl", and a reference dictionary covering those words,
diff --git a/egs/wsj/s5/local/dict/score_rules.pl b/egs/wsj/s5/local/dict/score_rules.pl
index 8d165f7f1..252d94677 100755
--- a/egs/wsj/s5/local/dict/score_rules.pl
+++ b/egs/wsj/s5/local/dict/score_rules.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This program takes the output of count_rules.pl, which is tuples
 # of the form
diff --git a/egs/wsj/s5/local/dict/select_candidate_prons.pl b/egs/wsj/s5/local/dict/select_candidate_prons.pl
index d0018c98a..a24ccdd4d 100755
--- a/egs/wsj/s5/local/dict/select_candidate_prons.pl
+++ b/egs/wsj/s5/local/dict/select_candidate_prons.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # This takes the output of e.g. get_candidate_prons.pl or limit_candidate_prons.pl
 # or reverse_candidates.pl, which is 7-tuples, one per line, of the form:
diff --git a/egs/wsj/s5/local/find_transcripts.pl b/egs/wsj/s5/local/find_transcripts.pl
index 0e5d71f79..6429411b8 100755
--- a/egs/wsj/s5/local/find_transcripts.pl
+++ b/egs/wsj/s5/local/find_transcripts.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/local/flist2scp.pl b/egs/wsj/s5/local/flist2scp.pl
index 6831d2d7b..234e4add1 100755
--- a/egs/wsj/s5/local/flist2scp.pl
+++ b/egs/wsj/s5/local/flist2scp.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/local/ndx2flist.pl b/egs/wsj/s5/local/ndx2flist.pl
index b05704293..48fc3dec1 100755
--- a/egs/wsj/s5/local/ndx2flist.pl
+++ b/egs/wsj/s5/local/ndx2flist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/local/normalize_transcript.pl b/egs/wsj/s5/local/normalize_transcript.pl
index 9dd67af3d..09cee0617 100755
--- a/egs/wsj/s5/local/normalize_transcript.pl
+++ b/egs/wsj/s5/local/normalize_transcript.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/local/online/run_nnet2_perturb_speed.sh b/egs/wsj/s5/local/online/run_nnet2_perturb_speed.sh
index 4b21e88a3..1a69e50f3 100755
--- a/egs/wsj/s5/local/online/run_nnet2_perturb_speed.sh
+++ b/egs/wsj/s5/local/online/run_nnet2_perturb_speed.sh
@@ -31,7 +31,7 @@ else
   # almost the same, but this may be a little bit slow.
   num_threads=16
   minibatch_size=128
-  parallel_opts="-pe smp $num_threads"
+  parallel_opts="--num-threads $num_threads"
   dir=$nnet_dir/nnet_a
 fi
 
diff --git a/egs/wsj/s5/local/run_bnf_sgmm.sh b/egs/wsj/s5/local/run_bnf_sgmm.sh
index ecda87fa2..6cfe1df67 100644
--- a/egs/wsj/s5/local/run_bnf_sgmm.sh
+++ b/egs/wsj/s5/local/run_bnf_sgmm.sh
@@ -16,7 +16,7 @@ bnf_num_gauss_ubm=600
 bnf_num_gauss_sgmm=7000
 align_dir=exp/tri4b_ali_si284
 bnf_decode_acwt=0.0357
-sgmm_group_extra_opts=(--group 3 --parallel-opts "-pe smp 3 -l mem_free=7G,ram_free=7G" --cmd "queue.pl -l arch=*64 -l mem_free=2.0G,ram_free=2.0G") 
+sgmm_group_extra_opts=(--group 3 --cmd "queue.pl -l arch=*64 --mem 7G")
 
 if [ ! -d exp_bnf ]; then
   echo "$0: before running this script, please run local/run_bnf.sh"
diff --git a/egs/wsj/s5/run.sh b/egs/wsj/s5/run.sh
index 8a1db2b3d..492a6e26a 100755
--- a/egs/wsj/s5/run.sh
+++ b/egs/wsj/s5/run.sh
@@ -51,7 +51,7 @@ local/wsj_format_data.sh --lang-suffix "_nosp" || exit 1;
 
  # Note: I am commenting out the RNNLM-building commands below.  They take up a lot
  # of CPU time and are not really part of the "main recipe."
- # Be careful: appending things like "-l mem_free=10G" to $decode_cmd
+ # Be careful: appending things like "--mem 10G" to $decode_cmd
  # won't always work, it depends what $decode_cmd is.
   (
    local/wsj_extend_dict.sh --dict-suffix "_nosp" $wsj1/13-32.1  && \
@@ -61,18 +61,18 @@ local/wsj_format_data.sh --lang-suffix "_nosp" || exit 1;
    local/wsj_format_local_lms.sh --lang-suffix "_nosp" # &&
  #
  #   ( local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
- #       --cmd "$decode_cmd -l mem_free=10G" data/local/rnnlm.h30.voc10k &
+ #       --cmd "$decode_cmd --mem 10G" data/local/rnnlm.h30.voc10k &
  #     sleep 20; # wait till tools compiled.
  #     local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
- #       --cmd "$decode_cmd -l mem_free=12G" \
+ #       --cmd "$decode_cmd --mem 12G" \
  #       --hidden 100 --nwords 20000 --class 350 \
  #       --direct 1500 data/local/rnnlm.h100.voc20k &
  #     local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
- #       --cmd "$decode_cmd -l mem_free=14G" \
+ #       --cmd "$decode_cmd --mem 14G" \
  #       --hidden 200 --nwords 30000 --class 350 \
  #       --direct 1500 data/local/rnnlm.h200.voc30k &
  #     local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
- #       --cmd "$decode_cmd -l mem_free=16G" \
+ #       --cmd "$decode_cmd --mem 16G" \
  #       --hidden 300 --nwords 40000 --class 400 \
  #       --direct 2000 data/local/rnnlm.h300.voc40k &
  #   )
@@ -81,19 +81,19 @@ local/wsj_format_data.sh --lang-suffix "_nosp" || exit 1;
        num_threads_rnnlm=8
        local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
          --rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \
-         --cmd "$decode_cmd -l mem_free=1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
+         --cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
          --hidden 30  --nwords 10000 --direct 1000 data/local/rnnlm-hs.h30.voc10k  
        local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
          --rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \
-         --cmd "$decode_cmd -l mem_free=1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
+         --cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
          --hidden 100 --nwords 20000 --direct 1500 data/local/rnnlm-hs.h100.voc20k 
        local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
          --rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \
-         --cmd "$decode_cmd -l mem_free=1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
+         --cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
          --hidden 300 --nwords 30000 --direct 1500 data/local/rnnlm-hs.h300.voc30k 
        local/wsj_train_rnnlms.sh --dict-suffix "_nosp" \
          --rnnlm_ver rnnlm-hs-0.1b --threads $num_threads_rnnlm \
-         --cmd "$decode_cmd -l mem_free=1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
+         --cmd "$decode_cmd --mem 1G --num-threads $num_threads_rnnlm" --bptt 4 --bptt-block 10 \
          --hidden 400 --nwords 40000 --direct 2000 data/local/rnnlm-hs.h400.voc40k 
    )
   ) &
diff --git a/egs/wsj/s5/steps/cleanup/create_segments_from_ctm.pl b/egs/wsj/s5/steps/cleanup/create_segments_from_ctm.pl
index 771961929..98c4e56d5 100755
--- a/egs/wsj/s5/steps/cleanup/create_segments_from_ctm.pl
+++ b/egs/wsj/s5/steps/cleanup/create_segments_from_ctm.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2014  Guoguo Chen
 # Apache 2.0
diff --git a/egs/wsj/s5/steps/cleanup/decode_segmentation.sh b/egs/wsj/s5/steps/cleanup/decode_segmentation.sh
index 0d929f98c..63e1943cb 100755
--- a/egs/wsj/s5/steps/cleanup/decode_segmentation.sh
+++ b/egs/wsj/s5/steps/cleanup/decode_segmentation.sh
@@ -16,7 +16,7 @@ beam=13.0
 lattice_beam=6.0
 acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
 num_threads=1 # if >1, will use gmm-latgen-faster-parallel
-parallel_opts=  # If you supply num-threads, you should supply this too.
+parallel_opts=  # ignored now.
 scoring_opts=
 # note: there are no more min-lmwt and max-lmwt options, instead use
 # e.g. --scoring-opts "--min-lmwt 1 --max-lmwt 20"
@@ -48,7 +48,6 @@ if [ $# != 3 ]; then
    echo "  --acwt <float>                                   # acoustic scale used for lattice generation "
    echo "  --scoring-opts <string>                          # options to local/score.sh"
    echo "  --num-threads <n>                                # number of threads to use, default 1."
-   echo "  --parallel-opts <opts>                           # e.g. '-pe smp 4' if you supply --num-threads 4"
    exit 1;
 fi
 
@@ -115,7 +114,7 @@ if [ $stage -le 0 ]; then
     [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $model | grep pdfs | awk '{print $NF}'` ] || \
       { echo "Mismatch in number of pdfs with $model"; exit 1; }
   fi
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \
     gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
     --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
     $model "$HCLG" "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
diff --git a/egs/wsj/s5/steps/cleanup/make_utterance_fsts.pl b/egs/wsj/s5/steps/cleanup/make_utterance_fsts.pl
index 0929291bc..f457e52f1 100755
--- a/egs/wsj/s5/steps/cleanup/make_utterance_fsts.pl
+++ b/egs/wsj/s5/steps/cleanup/make_utterance_fsts.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # makes unigram decoding-graph FSTs specific to each utterances, where the
 # supplied top-n-words list together with the supervision text of the utterance are
diff --git a/egs/wsj/s5/steps/decode.sh b/egs/wsj/s5/steps/decode.sh
index fd6f162cf..1b3133082 100755
--- a/egs/wsj/s5/steps/decode.sh
+++ b/egs/wsj/s5/steps/decode.sh
@@ -16,7 +16,7 @@ beam=13.0
 lattice_beam=6.0
 acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
 num_threads=1 # if >1, will use gmm-latgen-faster-parallel
-parallel_opts=  # If you supply num-threads, you should supply this too.
+parallel_opts=  # ignored now.
 scoring_opts=
 # note: there are no more min-lmwt and max-lmwt options, instead use
 # e.g. --scoring-opts "--min-lmwt 1 --max-lmwt 20"
@@ -48,7 +48,7 @@ if [ $# != 3 ]; then
    echo "  --acwt <float>                                   # acoustic scale used for lattice generation "
    echo "  --scoring-opts <string>                          # options to local/score.sh"
    echo "  --num-threads <n>                                # number of threads to use, default 1."
-   echo "  --parallel-opts <opts>                           # e.g. '-pe smp 4' if you supply --num-threads 4"
+   echo "  --parallel-opts <opts>                           # ignored now, present for historical reasons."
    exit 1;
 fi
 
@@ -68,6 +68,16 @@ if [ -z "$model" ]; then # if --model <mdl> was not specified on the command lin
   else model=$srcdir/$iter.mdl; fi
 fi
 
+if [ $(basename $model) != final.alimdl ] ; then
+  # Do not use the $srcpath -- look at the path where the model is
+  if [ -f $(dirname $model)/final.alimdl ] ; then
+    echo -e '\n\n' 
+    echo $0 'WARNING: Running speaker independent system decoding using a SAT model!' 
+    echo $0 'WARNING: This is OK if you know what you are doing...' 
+    echo -e '\n\n'
+  fi
+fi
+
 for f in $sdata/1/feats.scp $sdata/1/cmvn.scp $model $graphdir/HCLG.fst; do
   [ ! -f $f ] && echo "decode.sh: no such file $f" && exit 1;
 done
@@ -110,7 +120,7 @@ if [ $stage -le 0 ]; then
     [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $model | grep pdfs | awk '{print $NF}'` ] || \
       { echo "Mismatch in number of pdfs with $model"; exit 1; }
   fi
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \
     gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
     --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
     $model $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
diff --git a/egs/wsj/s5/steps/decode_basis_fmllr.sh b/egs/wsj/s5/steps/decode_basis_fmllr.sh
index 76c3259f5..dc3cd4b4a 100755
--- a/egs/wsj/s5/steps/decode_basis_fmllr.sh
+++ b/egs/wsj/s5/steps/decode_basis_fmllr.sh
@@ -49,7 +49,7 @@ silence_weight=0.01
 cmd=run.pl
 si_dir=
 num_threads=1 # if >1, will use gmm-latgen-faster-parallel
-parallel_opts=  # If you supply num-threads, you should supply this too.
+parallel_opts=  # ignored, present for historical reasons.
 skip_scoring=false
 scoring_opts=
 # End configuration section
@@ -75,7 +75,7 @@ if [ $# != 3 ]; then
    echo "  --acwt <acoustic-weight>                 # default 0.08333 ... used to get posteriors"
    echo "  --scoring-opts <string>                  # options to local/score.sh"
    echo "  --num-threads <n>                        # number of threads to use, default 1."
-   echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
+   echo "  --parallel-opts <opts>                   # ignored, present for historical reasons."
    exit 1;
 fi
 
@@ -121,7 +121,7 @@ if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
         { echo "Mismatch in number of pdfs with $alignment_model"; exit 1; }
     fi
 
-    steps/decode.sh --parallel-opts "$parallel_opts" --scoring-opts "$scoring_opts" \
+    steps/decode.sh --scoring-opts "$scoring_opts" \
               --num-threads $num_threads --skip-scoring $skip_scoring \
               --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam \
               --model $alignment_model --max-active \
@@ -178,7 +178,7 @@ if [ $stage -le 2 ]; then
     [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $adapt_model | grep pdfs | awk '{print $NF}'` ] || \
       { echo "Mismatch in number of pdfs with $adapt_model"; exit 1; }
   fi
-  $cmd JOB=1:$nj $dir/log/decode.JOB.log \
+  $cmd JOB=1:$nj --num-threads $num_threads $dir/log/decode.JOB.log \
     gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
     --acoustic-scale=$acwt  \
     --determinize-lattice=false --allow-partial=true --word-symbol-table=$graphdir/words.txt \
diff --git a/egs/wsj/s5/steps/decode_fmllr.sh b/egs/wsj/s5/steps/decode_fmllr.sh
index 97d32e6c5..a5ca6c9ef 100755
--- a/egs/wsj/s5/steps/decode_fmllr.sh
+++ b/egs/wsj/s5/steps/decode_fmllr.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
+# Copyright 2012-2015  Johns Hopkins University (Author: Daniel Povey)
 
 # Decoding script that does fMLLR.  This can be on top of delta+delta-delta, or
 # LDA+MLLT features.
@@ -42,7 +42,7 @@ cmd=run.pl
 si_dir=
 fmllr_update_type=full
 num_threads=1 # if >1, will use gmm-latgen-faster-parallel
-parallel_opts=  # If you supply num-threads, you should supply this too.
+parallel_opts=  # ignored now.
 skip_scoring=false
 scoring_opts=
 max_fmllr_jobs=25  # I've seen the fMLLR jobs overload NFS badly if the decoding
@@ -69,7 +69,6 @@ if [ $# != 3 ]; then
    echo "                                           # Caution-- must be with same tree"
    echo "  --acwt <acoustic-weight>                 # default 0.08333 ... used to get posteriors"
    echo "  --num-threads <n>                        # number of threads to use, default 1."
-   echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
    echo "  --scoring-opts <opts>                    # options to local/score.sh"
    exit 1;
 fi
@@ -117,11 +116,11 @@ if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
       [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $alignment_model | grep pdfs | awk '{print $NF}'` ] || \
         { echo "Mismatch in number of pdfs with $alignment_model"; exit 1; }
     fi
-    steps/decode.sh --parallel-opts "$parallel_opts" --scoring-opts "$scoring_opts" \
-              --num-threads $num_threads --skip-scoring $skip_scoring \
-              --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam \
-              --model $alignment_model --max-active \
-              $first_max_active $graphdir $data $si_dir || exit 1;
+    steps/decode.sh --scoring-opts "$scoring_opts" \
+           --num-threads $num_threads --skip-scoring $skip_scoring \
+           --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam \
+           --model $alignment_model --max-active \
+           $first_max_active $graphdir $data $si_dir || exit 1;
   fi
 fi
 ##
@@ -171,7 +170,7 @@ if [ $stage -le 2 ]; then
     [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $adapt_model | grep pdfs | awk '{print $NF}'` ] || \
       { echo "Mismatch in number of pdfs with $adapt_model"; exit 1; }
   fi
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \
     gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
     --acoustic-scale=$acwt --determinize-lattice=false \
     --allow-partial=true --word-symbol-table=$graphdir/words.txt \
@@ -208,7 +207,7 @@ feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans.
 
 if [ $stage -le 4 ]; then
   echo "$0: doing a final pass of acoustic rescoring."
-  $cmd $parallel_opts JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
     gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
     lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
     "ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
diff --git a/egs/wsj/s5/steps/decode_fmllr_extra.sh b/egs/wsj/s5/steps/decode_fmllr_extra.sh
index 421fac6c5..04d4c2ae3 100755
--- a/egs/wsj/s5/steps/decode_fmllr_extra.sh
+++ b/egs/wsj/s5/steps/decode_fmllr_extra.sh
@@ -56,7 +56,7 @@ si_dir=
 fmllr_update_type=full
 skip_scoring=false
 num_threads=1 # if >1, will use gmm-latgen-faster-parallel
-parallel_opts=  # If you supply num-threads, you should supply this too.
+parallel_opts=  # ignored now.
 scoring_opts=
 
 # End configuration section
@@ -81,7 +81,6 @@ if [ $# != 3 ]; then
    echo "                                           # Caution-- must be with same tree"
    echo "  --acwt <acoustic-weight>                 # default 0.08333 ... used to get posteriors"
    echo "  --num-threads <n>                        # number of threads to use, default 1."
-   echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
    echo "  --scoring-opts <opts>                    # options to local/score.sh"
    exit 1;
 fi
@@ -129,7 +128,7 @@ if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
       { echo "Mismatch in number of pdfs with $alignment_model" exit 1; }
   fi
     steps/decode.sh --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam --model $alignment_model\
-      --max-active $first_max_active --parallel-opts "${parallel_opts}" --num-threads $num_threads\
+      --max-active $first_max_active --num-threads $num_threads\
       --skip-scoring true $graphdir $data $si_dir || exit 1;
   fi
 fi
@@ -178,7 +177,7 @@ if [ $stage -le 2 ]; then
     [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $adapt_model | grep pdfs | awk '{print $NF}'` ] || \
       { echo "Mismatch in number of pdfs with $adapt_model" exit 1; }
   fi
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode1.JOB.log\
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode1.JOB.log\
     gmm-latgen-faster$thread_string --max-active=$first_max_active --max-mem=$max_mem --beam=$first_beam --lattice-beam=$first_lattice_beam \
     --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
     $adapt_model $graphdir/HCLG.fst "$pass1feats" "ark:|gzip -c > $dir/lat1.JOB.gz" \
@@ -214,7 +213,7 @@ pass2feats="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/t
 ## after another stage of adaptation.)
 if [ $stage -le 4 ]; then
   echo "$0: doing final lattice generation phase"
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode2.JOB.log\
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode2.JOB.log\
     gmm-latgen-faster$thread_string --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$lattice_beam \
     --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
     $adapt_model $graphdir/HCLG.fst "$pass2feats" "ark:|gzip -c > $dir/lat2.JOB.gz" \
diff --git a/egs/wsj/s5/steps/decode_fmmi.sh b/egs/wsj/s5/steps/decode_fmmi.sh
index 5ce22a946..d568709be 100755
--- a/egs/wsj/s5/steps/decode_fmmi.sh
+++ b/egs/wsj/s5/steps/decode_fmmi.sh
@@ -17,7 +17,7 @@ acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
 ngselect=2; # Just use the 2 top Gaussians for fMMI/fMPE.  Should match train.
 transform_dir=
 num_threads=1 # if >1, will use gmm-latgen-faster-parallel
-parallel_opts=  # If you supply num-threads, you should supply this too.
+parallel_opts=  # ignored now.
 scoring_opts=
 # End configuration section.
 
@@ -46,7 +46,6 @@ if [ $# != 3 ]; then
    echo "  --scoring-opts <string>                          # options to local/score.sh"
    echo "                                                   # speaker-adapted decoding"
    echo "  --num-threads <n>                                # number of threads to use, default 1."
-   echo "  --parallel-opts <opts>                           # e.g. '-pe smp 4' if you supply --num-threads 4"
    exit 1;
 fi
 
@@ -98,7 +97,7 @@ if [ $stage -le 1 ]; then
 fi
   
 if [ $stage -le 2 ]; then
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \
     gmm-latgen-faster$thread_string --max-active=$maxactive --beam=$beam --lattice-beam=$lattice_beam \
     --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
     $model $graphdir/HCLG.fst "$fmpefeats" "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
diff --git a/egs/wsj/s5/steps/decode_lvtln.sh b/egs/wsj/s5/steps/decode_lvtln.sh
index eed2e1687..ac58b2ee0 100755
--- a/egs/wsj/s5/steps/decode_lvtln.sh
+++ b/egs/wsj/s5/steps/decode_lvtln.sh
@@ -18,7 +18,7 @@ logdet_scale=0.0
 cmd=run.pl
 skip_scoring=false
 num_threads=1 # if >1, will use gmm-latgen-faster-parallel
-parallel_opts=  # If you supply num-threads, you should supply this too.
+parallel_opts=  # ignored now.
 scoring_opts=
 cleanup=true
 # End configuration section
@@ -89,7 +89,7 @@ if [ $stage -le 0 ]; then
     [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $srcdir/final.alimdl | grep pdfs | awk '{print $NF}'` ] || \
       { echo "Mismatch in number of pdfs with $srcdir/final.alimdl"; exit 1; }
   fi
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \
     gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
      --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
     $srcdir/final.alimdl $graphdir/HCLG.fst "$sifeats" "ark:|gzip -c > $dir/lat_pass1.JOB.gz" \
@@ -121,7 +121,7 @@ feats1="$sifeats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$dir/trans
 
 if [ $stage -le 3 ]; then
   echo "$0: rescoring the lattices with first-pass LVTLN transforms"
-  $cmd $parallel_opts JOB=1:$nj $dir/log/rescore.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/rescore.JOB.log \
     gmm-rescore-lattice $srcdir/final.mdl "ark:gunzip -c $dir/lat_pass1.JOB.gz|" "$feats1" \
      "ark:|gzip -c > $dir/lat_pass2.JOB.gz" || exit 1;
 fi
@@ -144,7 +144,7 @@ if [ $stage -le 5 ]; then
   # This second rescoring is only really necessary for scoring purposes,
   # it does not affect the transforms.
   echo "$0: rescoring the lattices with second-pass LVTLN transforms"
-  $cmd $parallel_opts JOB=1:$nj $dir/log/rescore.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/rescore.JOB.log \
     gmm-rescore-lattice $srcdir/final.mdl "ark:gunzip -c $dir/lat_pass2.JOB.gz|" "$feats" \
      "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
 fi
diff --git a/egs/wsj/s5/steps/decode_nolats.sh b/egs/wsj/s5/steps/decode_nolats.sh
index d30925028..6f5e780cf 100755
--- a/egs/wsj/s5/steps/decode_nolats.sh
+++ b/egs/wsj/s5/steps/decode_nolats.sh
@@ -23,10 +23,6 @@ max_active=7000
 beam=13.0
 lattice_beam=6.0
 acwt=0.083333 # note: only really affects pruning (scoring is on lattices).
-num_threads=1 # if >1, will use gmm-latgen-faster-parallel
-parallel_opts=  # If you supply num-threads, you should supply this too.
-                # Note: this functionality doesn't work right now because
-                # there is no program gmm-decode-faster-parallel
 write_alignments=false
 write_words=true
 # End configuration section.
@@ -59,8 +55,6 @@ if [ $# != 3 ]; then
    echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
    echo "  --transform-dir <trans-dir>                      # dir to find fMLLR transforms "
    echo "  --acwt <float>                                   # acoustic scale used for lattice generation "
-   #echo "  --num-threads <n>                                # number of threads to use, default 1."
-   #echo "  --parallel-opts <opts>                           # e.g. '-pe smp 4' if you supply --num-threads 4"
    exit 1;
 fi
 
@@ -89,8 +83,6 @@ echo "decode.sh: feature type is $feat_type";
 
 splice_opts=`cat $srcdir/splice_opts 2>/dev/null`
 cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null`
-thread_string=
-[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads" 
 
 case $feat_type in
   delta) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |";;
@@ -124,8 +116,8 @@ if [ $stage -le 0 ]; then
     [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $model | grep pdfs | awk '{print $NF}'` ] || \
       { echo "Mismatch in number of pdfs with $model"; exit 1; }
   fi
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
-    gmm-decode-faster$thread_string --max-active=$max_active --beam=$beam  \
+  $cmd JOB=1:$nj $dir/log/decode.JOB.log \
+    gmm-decode-faster --max-active=$max_active --beam=$beam  \
     --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
     "$model" $graphdir/HCLG.fst "$feats" "$words" "$ali" || exit 1;
 fi
diff --git a/egs/wsj/s5/steps/decode_raw_fmllr.sh b/egs/wsj/s5/steps/decode_raw_fmllr.sh
index 3be3e60f0..069dc84a7 100755
--- a/egs/wsj/s5/steps/decode_raw_fmllr.sh
+++ b/egs/wsj/s5/steps/decode_raw_fmllr.sh
@@ -45,7 +45,7 @@ silence_weight=0.01
 cmd=run.pl
 si_dir=
 num_threads=1 # if >1, will use gmm-latgen-faster-parallel
-parallel_opts=  # If you supply num-threads, you should supply this too.
+parallel_opts=  # ignored now.
 skip_scoring=false
 scoring_opts=
 # End configuration section
@@ -70,7 +70,6 @@ if [ $# != 3 ]; then
    echo "                                           # Caution-- must be with same tree"
    echo "  --acwt <acoustic-weight>                 # default 0.08333 ... used to get posteriors"
    echo "  --num-threads <n>                        # number of threads to use, default 1."
-   echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
    echo "  --scoring-opts <opts>                    # options to local/score.sh"
    exit 1;
 fi
@@ -115,7 +114,7 @@ fi
 if [ -z "$si_dir" ]; then # we need to do the speaker-independent decoding pass.
   si_dir=${dir}.si # Name it as our decoding dir, but with suffix ".si".
   if [ $stage -le 0 ]; then
-    steps/decode.sh --parallel-opts "$parallel_opts" --scoring-opts "$scoring_opts" \
+    steps/decode.sh --scoring-opts "$scoring_opts" \
               --num-threads $num_threads --skip-scoring $skip_scoring \
               --acwt $acwt --nj $nj --cmd "$cmd" --beam $first_beam \
               --model $alignment_model --max-active \
@@ -166,7 +165,7 @@ pass1feats="$pass1splicedfeats transform-feats $srcdir/final.mat ark:- ark:- |"
 ## model, and it's more correct to store the full state-level lattice for this purpose.
 if [ $stage -le 2 ]; then
   echo "$0: doing main lattice generation phase"
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \
     gmm-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
     --acoustic-scale=$acwt --determinize-lattice=false \
     --allow-partial=true --word-symbol-table=$graphdir/words.txt \
@@ -217,7 +216,7 @@ fi
 
 if [ $stage -le 5 ]; then
   echo "$0: doing a final pass of acoustic rescoring."
-  $cmd $parallel_opts JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
     gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
     lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
     "ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
diff --git a/egs/wsj/s5/steps/decode_sgmm2.sh b/egs/wsj/s5/steps/decode_sgmm2.sh
index c84d5660e..99f422308 100755
--- a/egs/wsj/s5/steps/decode_sgmm2.sh
+++ b/egs/wsj/s5/steps/decode_sgmm2.sh
@@ -31,7 +31,7 @@ use_fmllr=false
 fmllr_iters=10
 fmllr_min_count=1000
 num_threads=1 # if >1, will use gmm-latgen-faster-parallel
-parallel_opts=  # If you supply num-threads, you should supply this too.
+parallel_opts=  # ignored now.
 skip_scoring=false
 scoring_opts=
 # note: there are no more min-lmwt and max-lmwt options, instead use
@@ -132,7 +132,7 @@ if [ $stage -le 2 ]; then
     [ "`cat $graphdir/num_pdfs`" -eq `am-info --print-args=false $alignment_model | grep pdfs | awk '{print $NF}'` ] || \
       { echo "Mismatch in number of pdfs with $alignment_model"; exit 1; }
   fi
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode_pass1.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode_pass1.JOB.log \
     sgmm2-latgen-faster$thread_string --max-active=$max_active --beam=$beam --lattice-beam=$lattice_beam \
     --acoustic-scale=$acwt --determinize-lattice=false --allow-partial=true \
     --word-symbol-table=$graphdir/words.txt --max-mem=$max_mem "$gselect_opt_1stpass" $alignment_model \
@@ -202,7 +202,7 @@ fi
 # corresponding model.  Prune and determinize the lattices to limit
 # their size.
 if [ $stage -le 6 ]; then
-  $cmd $parallel_opts JOB=1:$nj $dir/log/rescore.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/rescore.JOB.log \
     sgmm2-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
     $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
     lattice-determinize-pruned$thread_string --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
diff --git a/egs/wsj/s5/steps/kl_hmm/decode_kl_hmm.sh b/egs/wsj/s5/steps/kl_hmm/decode_kl_hmm.sh
index c941d142b..d085ac907 100755
--- a/egs/wsj/s5/steps/kl_hmm/decode_kl_hmm.sh
+++ b/egs/wsj/s5/steps/kl_hmm/decode_kl_hmm.sh
@@ -21,7 +21,6 @@ acwt=0.1 # GMM:0.0833, note: only really affects pruning (scoring is on lattices
 scoring_opts="--min-lmwt 1 --max-lmwt 12"
 skip_scoring=false
 use_gpu="no" # disable gpu
-parallel_opts=""
 # End configuration section.
 
 echo "$0 $@"  # Print the command line for logging
@@ -104,7 +103,7 @@ fi
 
 # Run the decoding in the queue
 if [ $stage -le 0 ]; then
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+  $cmd JOB=1:$nj $dir/log/decode.JOB.log \
     nnet-forward --feature-transform=$feature_transform --use-gpu=$use_gpu $nnet "$feats" ark:- \| \
     latgen-faster-mapped --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$lattice_beam \
     --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
diff --git a/egs/wsj/s5/steps/make_denlats.sh b/egs/wsj/s5/steps/make_denlats.sh
index a9a31c6e6..65b4bb8d3 100755
--- a/egs/wsj/s5/steps/make_denlats.sh
+++ b/egs/wsj/s5/steps/make_denlats.sh
@@ -17,7 +17,7 @@ max_mem=20000000 # This will stop the processes getting too large.
 # This is in bytes, but not "real" bytes-- you have to multiply
 # by something like 5 or 10 to get real bytes (not sure why so large)
 num_threads=1
-parallel_opts=
+parallel_opts= # ignored now
 # End configuration section.
 
 echo "$0 $@"  # Print the command line for logging
@@ -40,7 +40,6 @@ if [ $# != 4 ]; then
    echo "                           # will (individually) finish reasonably soon."
    echo "  --transform-dir <transform-dir>   # directory to find fMLLR transforms."
    echo "  --num-threads  <n>                # number of threads per decoding job"
-   echo "  --parallel-opts <string>          # if >1 thread, add this to 'cmd', e.g. -pe smp 6"
    exit 1;
 fi
 
@@ -121,7 +120,7 @@ trap "cleanup" INT QUIT TERM EXIT
 
 
 if [ $sub_split -eq 1 ]; then 
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode_den.JOB.log \
    gmm-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
     --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
      $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
@@ -147,7 +146,7 @@ else
       mkdir -p $dir/part
       feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
 
-      $cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+      $cmd --num-threads $num_threads JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
         gmm-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
         --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
           $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || touch $dir/.error &
diff --git a/egs/wsj/s5/steps/make_denlats_sgmm2.sh b/egs/wsj/s5/steps/make_denlats_sgmm2.sh
index 4fd3e860e..0f250d555 100755
--- a/egs/wsj/s5/steps/make_denlats_sgmm2.sh
+++ b/egs/wsj/s5/steps/make_denlats_sgmm2.sh
@@ -19,7 +19,7 @@ max_active=5000
 transform_dir=
 max_mem=20000000 # This will stop the processes getting too large.
 num_threads=1
-parallel_opts=
+parallel_opts=  # ignored now.
 # End configuration section.
 
 echo "$0 $@"  # Print the command line for logging
@@ -42,7 +42,6 @@ if [ $# != 4 ]; then
    echo "                           # will (individually) finish reasonably soon."
    echo "  --transform-dir <transform-dir>   # directory to find fMLLR transforms."
    echo "  --num-threads  <n>                # number of threads per decoding job"
-   echo "  --parallel-opts <string>          # if >1 thread, add this to 'cmd', e.g. -pe smp 6"
    exit 1;
 fi
 
@@ -57,9 +56,6 @@ cmvn_opts=`cat $srcdir/cmvn_opts 2>/dev/null`
 if [ $num_threads -gt 1 ]; then
   # the -parallel becomes part of the binary name we decode with.
   thread_string="-parallel --num-threads=$num_threads"
-  if [ -z $parallel_opts ]; then
-    parallel_opts="--num-threads $num_threads"
-  fi
 fi
 
 mkdir -p $dir/log
@@ -142,7 +138,7 @@ cleanup() {
 trap "cleanup" INT QUIT TERM EXIT
 
 if [ $sub_split -eq 1 ]; then 
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode_den.JOB.log \
     sgmm2-latgen-faster$thread_string $spkvecs_opt "$gselect_opt" --beam=$beam \
     --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
     --max-mem=$max_mem --max-active=$max_active \
@@ -171,7 +167,7 @@ else
       feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
       spkvecs_opt_subset=`echo $spkvecs_opt | sed "s/JOB/$n/g"`
       gselect_opt_subset=`echo $gselect_opt | sed "s/JOB/$n/g"`
-      $cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+      $cmd --num-threads $num_threads JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
         sgmm2-latgen-faster$thread_string \
         $spkvecs_opt_subset "$gselect_opt_subset" \
         --beam=$beam --lattice-beam=$lattice_beam \
diff --git a/egs/wsj/s5/steps/nnet/decode.sh b/egs/wsj/s5/steps/nnet/decode.sh
index f05b14dc1..14a8d0c69 100755
--- a/egs/wsj/s5/steps/nnet/decode.sh
+++ b/egs/wsj/s5/steps/nnet/decode.sh
@@ -26,7 +26,7 @@ skip_scoring=false
 scoring_opts="--min-lmwt 4 --max-lmwt 15"
 
 num_threads=1 # if >1, will use latgen-faster-parallel
-parallel_opts="-pe smp $((num_threads+1))" # use 2 CPUs (1 DNN-forward, 1 decoder)
+parallel_opts=   # Ignored now.
 use_gpu="no" # yes|no|optionaly
 # End configuration section.
 
@@ -111,7 +111,7 @@ feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
 
 # Run the decoding in the queue,
 if [ $stage -le 0 ]; then
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+  $cmd --num-threads $((num_threads+1)) JOB=1:$nj $dir/log/decode.JOB.log \
     nnet-forward $nnet_forward_opts --feature-transform=$feature_transform --class-frame-counts=$class_frame_counts --use-gpu=$use_gpu $nnet "$feats" ark:- \| \
     latgen-faster-mapped$thread_string --min-active=$min_active --max-active=$max_active --max-mem=$max_mem --beam=$beam \
     --lattice-beam=$lattice_beam --acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
diff --git a/egs/wsj/s5/steps/nnet/make_denlats.sh b/egs/wsj/s5/steps/nnet/make_denlats.sh
index 38cbb9547..02d25c744 100755
--- a/egs/wsj/s5/steps/nnet/make_denlats.sh
+++ b/egs/wsj/s5/steps/nnet/make_denlats.sh
@@ -21,7 +21,7 @@ max_mem=20000000 # This will stop the processes getting too large.
 # by something like 5 or 10 to get real bytes (not sure why so large)
 # End configuration section.
 use_gpu=no # yes|no|optional
-parallel_opts="-pe smp 2"
+parallel_opts="--num-threads 2"
 
 echo "$0 $@"  # Print the command line for logging
 
diff --git a/egs/wsj/s5/steps/nnet2/decode.sh b/egs/wsj/s5/steps/nnet2/decode.sh
index 2641e39a3..df8600df3 100755
--- a/egs/wsj/s5/steps/nnet2/decode.sh
+++ b/egs/wsj/s5/steps/nnet2/decode.sh
@@ -20,7 +20,7 @@ ivector_scale=1.0
 lattice_beam=8.0 # Beam we use in lattice generation.
 iter=final
 num_threads=1 # if >1, will use gmm-latgen-faster-parallel
-parallel_opts=  # If you supply num-threads, you should supply this too.
+parallel_opts=  # ignored now.
 scoring_opts=
 skip_scoring=false
 feat_type=
@@ -47,7 +47,7 @@ if [ $# -ne 3 ]; then
   echo "  --iter <iter>                            # Iteration of model to decode; default is final."
   echo "  --scoring-opts <string>                  # options to local/score.sh"
   echo "  --num-threads <n>                        # number of threads to use, default 1."
-  echo "  --parallel-opts <opts>                   # e.g. '-pe smp 4' if you supply --num-threads 4"
+  echo "  --parallel-opts <opts>                   # e.g. '--num-threads 4' if you supply --num-threads 4"
   exit 1;
 fi
 
@@ -129,7 +129,7 @@ if [ ! -z "$online_ivector_dir" ]; then
 fi
 
 if [ $stage -le 1 ]; then
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode.JOB.log \
     nnet-latgen-faster$thread_string \
      --minimize=$minimize --max-active=$max_active --min-active=$min_active --beam=$beam \
      --lattice-beam=$lattice_beam --acoustic-scale=$acwt --allow-partial=true \
diff --git a/egs/wsj/s5/steps/nnet2/make_denlats.sh b/egs/wsj/s5/steps/nnet2/make_denlats.sh
index 099ced56d..eea853eb7 100755
--- a/egs/wsj/s5/steps/nnet2/make_denlats.sh
+++ b/egs/wsj/s5/steps/nnet2/make_denlats.sh
@@ -19,7 +19,7 @@ max_mem=20000000 # This will stop the processes getting too large.
 # by something like 5 or 10 to get real bytes (not sure why so large)
 num_threads=1
 online_ivector_dir=
-parallel_opts=
+parallel_opts= # ignored now
 feat_type=  # you can set this in order to run on top of delta features, although we don't
             # normally want to do this.
 # End configuration section.
@@ -45,7 +45,6 @@ if [ $# != 4 ]; then
   echo "                           # will (individually) finish reasonably soon."
   echo "  --transform-dir <transform-dir>   # directory to find fMLLR transforms."
   echo "  --num-threads  <n>                # number of threads per decoding job"
-  echo "  --parallel-opts <string>          # if >1 thread, add this to 'cmd', e.g. -pe smp 6"
   exit 1;
 fi
 
@@ -156,7 +155,7 @@ trap "cleanup" INT QUIT TERM EXIT
 
 
 if [ $sub_split -eq 1 ]; then 
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode_den.JOB.log \
    nnet-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
     --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
      $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
@@ -182,7 +181,7 @@ else
       mkdir -p $dir/part
       feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
 
-      $cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+      $cmd --num-threads $num_threads JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
         nnet-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
         --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
           $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || touch $dir/.error &
diff --git a/egs/wsj/s5/steps/nnet2/make_multisplice_configs.py b/egs/wsj/s5/steps/nnet2/make_multisplice_configs.py
old mode 100644
new mode 100755
index c8aa946ca..cff85b7f6
--- a/egs/wsj/s5/steps/nnet2/make_multisplice_configs.py
+++ b/egs/wsj/s5/steps/nnet2/make_multisplice_configs.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # Copyright 2014  Johns Hopkins University (Authors: Daniel Povey and Vijayaditya Peddinti).  Apache 2.0.
 
 # Creates the nnet.config and hidde_*.config scripts used in train_pnorm_multisplice.sh
diff --git a/egs/wsj/s5/steps/nnet2/retrain_fast.sh b/egs/wsj/s5/steps/nnet2/retrain_fast.sh
index fe7e6dea8..2e7131b82 100755
--- a/egs/wsj/s5/steps/nnet2/retrain_fast.sh
+++ b/egs/wsj/s5/steps/nnet2/retrain_fast.sh
@@ -58,10 +58,10 @@ target_multiplier=0 #  Set this to e.g. 1.0 to enable perturbed training.
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
+parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 combine_num_threads=8
-combine_parallel_opts="-pe smp 8"  # queue options for the "combine" stage.
+combine_parallel_opts="--num-threads 8"  # queue options for the "combine" stage.
 cleanup=true
 egs_dir=
 egs_opts=
@@ -100,9 +100,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/retrain_simple2.sh b/egs/wsj/s5/steps/nnet2/retrain_simple2.sh
index 7b8d2f4e0..d3f5223b5 100755
--- a/egs/wsj/s5/steps/nnet2/retrain_simple2.sh
+++ b/egs/wsj/s5/steps/nnet2/retrain_simple2.sh
@@ -59,11 +59,11 @@ max_change_per_sample=0.075
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" 
+parallel_opts="--num-threads 16 --mem 1G" 
   # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 combine_num_threads=8
-combine_parallel_opts="-pe smp 8"  # queue options for the "combine" stage.
+combine_parallel_opts="--num-threads 8"  # queue options for the "combine" stage.
 cleanup=true
 egs_dir=
 egs_opts=
@@ -101,9 +101,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/retrain_tanh.sh b/egs/wsj/s5/steps/nnet2/retrain_tanh.sh
index 5d4ee61e6..f67cb1359 100755
--- a/egs/wsj/s5/steps/nnet2/retrain_tanh.sh
+++ b/egs/wsj/s5/steps/nnet2/retrain_tanh.sh
@@ -43,7 +43,7 @@ widen=0 # If specified, it will increase the hidden-layer dimension
 bias_stddev=0.5 # will be used for widen
 
 num_threads=16
-parallel_opts="-pe smp $num_threads"  # using a smallish #threads by default, out of stability concerns.
+parallel_opts="--num-threads $num_threads"  # using a smallish #threads by default, out of stability concerns.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 cleanup=true
 # End configuration section.
@@ -75,7 +75,7 @@ if [ $# != 3 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16\">            # extra options to pass to e.g. queue.pl for processes that"
+  echo "  --parallel-opts <opts|\"--num-threads 16\">            # extra options to pass to e.g. queue.pl for processes that"
   echo "                                                   # use multiple threads."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_block.sh b/egs/wsj/s5/steps/nnet2/train_block.sh
index cfc35e2f1..d65fdaa08 100755
--- a/egs/wsj/s5/steps/nnet2/train_block.sh
+++ b/egs/wsj/s5/steps/nnet2/train_block.sh
@@ -61,7 +61,7 @@ max_change=10.0
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
+parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 cleanup=true
 egs_dir=
@@ -104,9 +104,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_discriminative.sh b/egs/wsj/s5/steps/nnet2/train_discriminative.sh
index 4b02cbca7..177ad2fc4 100755
--- a/egs/wsj/s5/steps/nnet2/train_discriminative.sh
+++ b/egs/wsj/s5/steps/nnet2/train_discriminative.sh
@@ -37,7 +37,7 @@ io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one t
 
 num_threads=16  # this is the default but you may want to change it, e.g. to 1 if
                 # using GPUs.
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 4 threads; this lets the queue know.
+parallel_opts="--num-threads 16 --mem 1G" # by default we use 4 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 transform_dir= # If this is a SAT system, directory for transforms
 cleanup=true
@@ -72,9 +72,8 @@ if [ $# != 6 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --samples-per-iter <#samples|400000>             # Number of samples of data to process per iteration, per"
   echo "                                                   # process."
diff --git a/egs/wsj/s5/steps/nnet2/train_discriminative2.sh b/egs/wsj/s5/steps/nnet2/train_discriminative2.sh
index 4196e0e78..85047efc5 100755
--- a/egs/wsj/s5/steps/nnet2/train_discriminative2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_discriminative2.sh
@@ -79,8 +79,7 @@ if [ $# != 2 ]; then
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size.  With GPU, must be 1."
   echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "                                                   # use multiple threads... "
   echo "  --stage <stage|-3>                               # Used to run a partially-completed training process from somewhere in"
   echo "                                                   # the middle."
   echo "  --criterion <criterion|smbr>                     # Training criterion: may be smbr, mmi or mpfe"
diff --git a/egs/wsj/s5/steps/nnet2/train_discriminative_multilang2.sh b/egs/wsj/s5/steps/nnet2/train_discriminative_multilang2.sh
index e2d1f3b2c..9d1160638 100755
--- a/egs/wsj/s5/steps/nnet2/train_discriminative_multilang2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_discriminative_multilang2.sh
@@ -70,9 +70,8 @@ if [ $# -lt 3 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size.  With GPU, must be 1."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --stage <stage|-3>                               # Used to run a partially-completed training process from somewhere in"
   echo "                                                   # the middle."
   echo "  --criterion <criterion|smbr>                     # Training criterion: may be smbr, mmi or mpfe"
diff --git a/egs/wsj/s5/steps/nnet2/train_more.sh b/egs/wsj/s5/steps/nnet2/train_more.sh
index 0a705ff0d..4c31aeaa1 100755
--- a/egs/wsj/s5/steps/nnet2/train_more.sh
+++ b/egs/wsj/s5/steps/nnet2/train_more.sh
@@ -39,7 +39,7 @@ shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of
 mix_up=0
 stage=-5
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
+parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know.
    # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 cleanup=true
 remove_egs=false
@@ -69,9 +69,8 @@ if [ $# != 3 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
   echo "  --num-iters-final <#iters|20>                    # Number of final iterations to give to nnet-combine-fast to "
diff --git a/egs/wsj/s5/steps/nnet2/train_more2.sh b/egs/wsj/s5/steps/nnet2/train_more2.sh
index 22de51770..9734d38f6 100755
--- a/egs/wsj/s5/steps/nnet2/train_more2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_more2.sh
@@ -43,7 +43,7 @@ num_jobs_nnet=4
 mix_up=0
 stage=-5
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
+parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know.
    # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 combine_num_threads=8
 cleanup=true
@@ -76,9 +76,8 @@ if [ $# != 3 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
   echo "  --num-iters-final <#iters|20>                    # Number of final iterations to give to nnet-combine-fast to "
diff --git a/egs/wsj/s5/steps/nnet2/train_multilang2.sh b/egs/wsj/s5/steps/nnet2/train_multilang2.sh
index 0f9ba74c7..04590a99d 100755
--- a/egs/wsj/s5/steps/nnet2/train_multilang2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_multilang2.sh
@@ -111,9 +111,8 @@ if [ $# -lt 6 -o $[$#%2] -ne 0 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --stage <stage|-4>                               # Used to run a partially-completed training process from somewhere in"
   echo "                                                   # the middle."
   exit 1;
diff --git a/egs/wsj/s5/steps/nnet2/train_multisplice_accel2.sh b/egs/wsj/s5/steps/nnet2/train_multisplice_accel2.sh
index aa62a28df..eaa0623a9 100755
--- a/egs/wsj/s5/steps/nnet2/train_multisplice_accel2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_multisplice_accel2.sh
@@ -20,7 +20,6 @@ num_epochs=15      # Number of epochs of training;
                    # the number of iterations is worked out from this.
 initial_effective_lrate=0.01
 final_effective_lrate=0.001
-learning_rate_scales_opts=""
 bias_stddev=0.5
 pnorm_input_dim=3000 
 pnorm_output_dim=300
@@ -77,11 +76,11 @@ precondition_rank_out=80 # relates to online preconditioning
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" 
+parallel_opts="--num-threads 16 --mem 1G" 
   # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 combine_num_threads=8
-combine_parallel_opts="-pe smp 8"  # queue options for the "combine" stage.
+combine_parallel_opts="--num-threads 8"  # queue options for the "combine" stage.
 cleanup=true
 egs_dir=
 lda_opts=
@@ -120,10 +119,11 @@ if [ $# != 4 ]; then
   echo "  --initial-effective-lrate <lrate|0.02> # effective learning rate at start of training."
   echo "  --final-effective-lrate <lrate|0.004>   # effective learning rate at end of training."
   echo "                                                   # data, 0.00025 for large data"
-  echo "  --learning-rate-scales-opts <ComponentA=0.1:ComponentB=0.2>   # Scale learning rate of components. separate with ':'"
   echo "  --num-hidden-layers <#hidden-layers|2>           # Number of hidden layers, e.g. 2 for 3 hours of data, 4 for 100hrs"
   echo "  --add-layers-period <#iters|2>                   # Number of iterations between adding hidden layers"
-  echo "  --presoftmax-prior-scale-power <power|-0.25>     # use the specified power value on the priors (inverse priors) to scale the pre-softmax outputs (set to 0.0 to disable the presoftmax element scale)"
+  echo "  --presoftmax-prior-scale-power <power|-0.25>     # use the specified power value on the priors (inverse priors)"
+  echo "                                                   # to scale the pre-softmax outputs."
+  echo "                                                   # (set to 0.0 to disable the presoftmax element scale)"
   echo "  --mix-up <#pseudo-gaussians|0>                   # This option now does nothing; please remove it."
   echo "                                                   # per context-dependent state.  Try a number several times #states."
   echo "  --num-jobs-initial <num-jobs|1>                  # Number of parallel jobs to use for neural net training, at the start."
@@ -131,9 +131,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
@@ -477,11 +476,11 @@ while [ $x -lt $num_iters ]; do
         inp=$[$inp-1]
       fi
 
-      mdl="nnet-init --srand=$x $dir/hidden_${cur_num_hidden_layers}.config - | nnet-insert --insert-at=$inp $dir/$x.mdl - - | nnet-am-copy $learning_rate_scales_opts --learning-rate=$this_learning_rate - -|"
+      mdl="nnet-init --srand=$x $dir/hidden_${cur_num_hidden_layers}.config - | nnet-insert --insert-at=$inp $dir/$x.mdl - - | nnet-am-copy --learning-rate=$this_learning_rate - -|"
     else
       do_average=true
       if [ $x -eq 0 ]; then do_average=false; fi # on iteration 0, pick the best, don't average.
-      mdl="nnet-am-copy $learning_rate_scales_opts --learning-rate=$this_learning_rate $dir/$x.mdl -|"
+      mdl="nnet-am-copy --learning-rate=$this_learning_rate $dir/$x.mdl -|"
     fi
     if $do_average; then
       this_minibatch_size=$minibatch_size
diff --git a/egs/wsj/s5/steps/nnet2/train_multisplice_ensemble.sh b/egs/wsj/s5/steps/nnet2/train_multisplice_ensemble.sh
index 6bf567962..02f028041 100755
--- a/egs/wsj/s5/steps/nnet2/train_multisplice_ensemble.sh
+++ b/egs/wsj/s5/steps/nnet2/train_multisplice_ensemble.sh
@@ -74,11 +74,11 @@ precondition_rank_out=80 # relates to online preconditioning
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" 
+parallel_opts="--num-threads 16 --mem 1G" 
   # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 combine_num_threads=8
-combine_parallel_opts="-pe smp 8"  # queue options for the "combine" stage.
+combine_parallel_opts="--num-threads 8"  # queue options for the "combine" stage.
 cleanup=true
 egs_dir=
 lda_opts=
@@ -129,9 +129,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm.sh b/egs/wsj/s5/steps/nnet2/train_pnorm.sh
index aa5db0b14..87f0e2fc5 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm.sh
@@ -64,7 +64,7 @@ max_change=10.0
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
+parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 cleanup=true
 egs_dir=
@@ -116,9 +116,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_accel2.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_accel2.sh
index 78872766b..01dbe9b5d 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_accel2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_accel2.sh
@@ -77,11 +77,11 @@ precondition_rank_out=80 # relates to online preconditioning
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" 
+parallel_opts="--num-threads 16 --mem 1G" 
   # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 combine_num_threads=8
-combine_parallel_opts="-pe smp 8"  # queue options for the "combine" stage.
+combine_parallel_opts="--num-threads 8"  # queue options for the "combine" stage.
 cleanup=true
 egs_dir=
 lda_opts=
@@ -129,9 +129,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_bottleneck_fast.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_bottleneck_fast.sh
index 5fdacec03..787c976c8 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_bottleneck_fast.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_bottleneck_fast.sh
@@ -70,10 +70,10 @@ target_multiplier=0 #  Set this to e.g. 1.0 to enable perturbed training.
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
+parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 combine_num_threads=8
-combine_parallel_opts="-pe smp 8"  # queue options for the "combine" stage.
+combine_parallel_opts="--num-threads 8"  # queue options for the "combine" stage.
 cleanup=true
 egs_dir=
 lda_opts=
@@ -122,9 +122,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh
index 2c6812cfc..03f239b85 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_ensemble.sh
@@ -62,7 +62,7 @@ max_change=10.0
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
+parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 cleanup=true
 egs_dir=
@@ -107,9 +107,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh
index 314a22588..6627e3b0a 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_fast.sh
@@ -72,10 +72,10 @@ target_multiplier=0 #  Set this to e.g. 1.0 to enable perturbed training.
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
+parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 combine_num_threads=8
-combine_parallel_opts="-pe smp 8"  # queue options for the "combine" stage.
+combine_parallel_opts="--num-threads 8"  # queue options for the "combine" stage.
 cleanup=true
 egs_dir=
 lda_opts=
@@ -124,9 +124,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice.sh
index 1cb372dd2..6672ee46a 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice.sh
@@ -77,11 +77,11 @@ precondition_rank_out=80 # relates to online preconditioning
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" 
+parallel_opts="--num-threads 16 --mem 1G" 
   # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 combine_num_threads=8
-combine_parallel_opts="-pe smp 8"  # queue options for the "combine" stage.
+combine_parallel_opts="--num-threads 8"  # queue options for the "combine" stage.
 cleanup=true
 egs_dir=
 lda_opts=
@@ -128,9 +128,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice2.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice2.sh
index 0b541000e..2708eb856 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_multisplice2.sh
@@ -72,11 +72,11 @@ precondition_rank_out=80 # relates to online preconditioning
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" 
+parallel_opts="--num-threads 16 --mem 1G" 
   # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 combine_num_threads=8
-combine_parallel_opts="-pe smp 8"  # queue options for the "combine" stage.
+combine_parallel_opts="--num-threads 8"  # queue options for the "combine" stage.
 cleanup=true
 egs_dir=
 lda_opts=
@@ -121,9 +121,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_simple.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_simple.sh
index e59c6d24e..6fd385b20 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_simple.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_simple.sh
@@ -74,11 +74,11 @@ precondition_rank_out=80 # relates to online preconditioning
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" 
+parallel_opts="--num-threads 16 --mem 1G" 
   # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 combine_num_threads=8
-combine_parallel_opts="-pe smp 8"  # queue options for the "combine" stage.
+combine_parallel_opts="--num-threads 8"  # queue options for the "combine" stage.
 cleanup=true
 egs_dir=
 lda_opts=
@@ -125,9 +125,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_pnorm_simple2.sh b/egs/wsj/s5/steps/nnet2/train_pnorm_simple2.sh
index 8aef77431..44639ebd2 100755
--- a/egs/wsj/s5/steps/nnet2/train_pnorm_simple2.sh
+++ b/egs/wsj/s5/steps/nnet2/train_pnorm_simple2.sh
@@ -83,11 +83,11 @@ precondition_rank_out=80 # relates to online preconditioning
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" 
+parallel_opts="--num-threads 16 --mem 1G" 
   # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 combine_num_threads=8
-combine_parallel_opts="-pe smp 8"  # queue options for the "combine" stage.
+combine_parallel_opts="--num-threads 8"  # queue options for the "combine" stage.
 cleanup=true
 egs_dir=
 lda_opts=
@@ -135,9 +135,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_tanh.sh b/egs/wsj/s5/steps/nnet2/train_tanh.sh
index 8d403c698..ef4392d62 100755
--- a/egs/wsj/s5/steps/nnet2/train_tanh.sh
+++ b/egs/wsj/s5/steps/nnet2/train_tanh.sh
@@ -59,7 +59,7 @@ max_change=10.0
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
          # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
+parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 cleanup=true
 egs_dir=
@@ -108,9 +108,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh b/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh
index 6bb6bc646..96aeaa29a 100755
--- a/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh
+++ b/egs/wsj/s5/steps/nnet2/train_tanh_bottleneck.sh
@@ -63,7 +63,7 @@ max_change=10.0
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
+parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 cleanup=true
 egs_dir=
@@ -108,9 +108,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/train_tanh_fast.sh b/egs/wsj/s5/steps/nnet2/train_tanh_fast.sh
index 21df49cc6..7fd604131 100755
--- a/egs/wsj/s5/steps/nnet2/train_tanh_fast.sh
+++ b/egs/wsj/s5/steps/nnet2/train_tanh_fast.sh
@@ -71,9 +71,9 @@ precondition_rank_out=60 # relates to online preconditioning
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
          # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
+parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
-combine_parallel_opts="-pe smp 8"  # queue options for the "combine" stage.
+combine_parallel_opts="--num-threads 8"  # queue options for the "combine" stage.
 combine_num_threads=8
 cleanup=true
 egs_dir=
@@ -122,9 +122,8 @@ if [ $# != 4 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/nnet2/update_nnet.sh b/egs/wsj/s5/steps/nnet2/update_nnet.sh
index 6633bf6b8..e621bb6c7 100755
--- a/egs/wsj/s5/steps/nnet2/update_nnet.sh
+++ b/egs/wsj/s5/steps/nnet2/update_nnet.sh
@@ -48,7 +48,7 @@ max_change=10.0
 mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
         # specified.)
 num_threads=16
-parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
+parallel_opts="--num-threads 16 --mem 1G" # by default we use 16 threads; this lets the queue know.
   # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
 cleanup=false
 egs_dir=
@@ -81,9 +81,8 @@ if [ $# != 5 ]; then
   echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
   echo "                                                   # as well as speed; may interact with batch size; if you increase"
   echo "                                                   # this, you may want to decrease the batch size."
-  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
-  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
-  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --parallel-opts <opts|\"--num-threads 16 --mem 1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... "
   echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
   echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
   echo "                                                   # should not get too large, e.g. >2k)."
diff --git a/egs/wsj/s5/steps/online/nnet2/make_denlats.sh b/egs/wsj/s5/steps/online/nnet2/make_denlats.sh
index f0a8c7639..280c75b50 100755
--- a/egs/wsj/s5/steps/online/nnet2/make_denlats.sh
+++ b/egs/wsj/s5/steps/online/nnet2/make_denlats.sh
@@ -19,7 +19,7 @@ max_mem=20000000 # This will stop the processes getting too large.
 # This is in bytes, but not "real" bytes-- you have to multiply
 # by something like 5 or 10 to get real bytes (not sure why so large)
 num_threads=1
-parallel_opts=
+parallel_opts=  # ignored now.
 # End configuration section.
 
 echo "$0 $@"  # Print the command line for logging
@@ -41,7 +41,6 @@ if [ $# != 4 ]; then
   echo "                           # large databases so your jobs will be smaller and"
   echo "                           # will (individually) finish reasonably soon."
   echo "  --num-threads  <n>                # number of threads per decoding job"
-  echo "  --parallel-opts <string>          # if >1 thread, add this to 'cmd', e.g. -pe smp 6"
   exit 1;
 fi
 
@@ -115,7 +114,7 @@ trap "cleanup" INT QUIT TERM EXIT
 
 
 if [ $sub_split -eq 1 ]; then 
-  $cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
+  $cmd --num-threads $num_threads JOB=1:$nj $dir/log/decode_den.JOB.log \
    nnet-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
     --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
      $dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
@@ -141,7 +140,7 @@ else
       mkdir -p $dir/part
       feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
 
-      $cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
+      $cmd --num-threads $num_threads JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
         nnet-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
         --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl  \
           $dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || touch $dir/.error &
diff --git a/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh b/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh
index 9109fef6f..22250ae9e 100755
--- a/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh
+++ b/egs/wsj/s5/steps/online/nnet2/train_diag_ubm.sh
@@ -36,7 +36,7 @@ cleanup=true
 min_gaussian_weight=0.0001
 remove_low_count_gaussians=true # set this to false if you need #gauss to stay fixed.
 num_threads=32
-parallel_opts="-pe smp 32"
+parallel_opts=  # ignored now.
 online_cmvn_config=conf/online_cmvn.conf
 # End configuration section.
 
@@ -68,8 +68,6 @@ if [ $# != 4 ]; then
   echo "                                                   # in initialization phase (then split)"
   echo " --num-threads <n|32>                              # number of threads to use in initialization"
   echo "                                                   # phase (must match with parallel-opts option)"
-  echo " --parallel-opts <string|'-pe smp 32'>             # Option should match number of threads in"
-  echo "                                                   # --num-threads option above"
   echo " --min-gaussian-weight <weight|0.0001>             # min Gaussian weight allowed in GMM"
   echo "                                                   # initialization (this relatively high"
   echo "                                                   # value keeps counts fairly even)"
@@ -115,7 +113,7 @@ if [ $stage -le -2 ]; then
   echo "$0: starting from $num_gauss_init Gaussians, reaching $num_gauss;"
   echo "$0: for $num_iters_init iterations, using at most $num_frames frames of data"
 
-  $cmd $parallel_opts $dir/log/gmm_init.log \
+  $cmd --num-threads $num_threads $dir/log/gmm_init.log \
     gmm-global-init-from-feats --num-threads=$num_threads --num-frames=$num_frames \
      --min-gaussian-weight=$min_gaussian_weight \
      --num-gauss=$num_gauss --num-gauss-init=$num_gauss_init --num-iters=$num_iters_init \
diff --git a/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh b/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh
index 0473ca29d..9b354c075 100755
--- a/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh
+++ b/egs/wsj/s5/steps/online/nnet2/train_ivector_extractor.sh
@@ -51,7 +51,7 @@ min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out)
 subsample=2  # This speeds up the training: training on every 2nd feature
              # (configurable) Since the features are highly correlated across
              # frames, we don't expect to lose too much from this.
-parallel_opts=  #Task running engine configuration
+parallel_opts=  # ignored now.
 cleanup=true
 # End configuration section.
 
@@ -76,8 +76,6 @@ if [ $# != 3 ]; then
   echo "  --stage <stage|-4>                               # To control partial reruns"
   echo "  --num-gselect <n|5>                              # Number of Gaussians to select using"
   echo "                                                   # diagonal model."
-  echo "  --parallel-opts <opts>                           # e.g. '-pe smp 16 ', the number should be equivalent"
-  echo "                                                   # to --num-processes * --num-threads" 
   exit 1;
 fi
 
@@ -107,11 +105,6 @@ gmm_feats="ark,s,cs:apply-cmvn-online --config=$dir/online_cmvn.conf $dir/global
 feats="ark,s,cs:splice-feats $splice_opts scp:$sdata/JOB/feats.scp ark:- | transform-feats $dir/final.mat ark:- ark:- | subsample-feats --n=$subsample ark:- ark:- |"
 
 
-#We will specify our own parallel-opts only in cases user does not supply anything.
-#If user does specify parallel-opts, then we will assume user knows what's right
-if [ -z "$parallel_opts" ] ; then 
-  parallel_opts="-pe smp $[$num_threads*$num_processes]"
-fi
 
 # Initialize the i-vector extractor using the input GMM, which is converted to
 # full because that's what the i-vector extractor expects.  Note: we have to do
@@ -157,7 +150,7 @@ while [ $x -lt $num_iters ]; do
     echo "Accumulating stats (pass $x)"
     for g in $(seq $nj); do
       start=$[$num_processes*($g-1)+1]
-      $cmd $parallel_opts $dir/log/acc.$x.$g.log \
+      $cmd --num-threads $[$num_threads*$num_processes] $dir/log/acc.$x.$g.log \
         ivector-extractor-sum-accs --parallel=true "${Args[@]:$start:$num_processes}" \
           $dir/acc.$x.$g || touch $dir/.error &
     done
@@ -178,7 +171,7 @@ while [ $x -lt $num_iters ]; do
                                       # The parallel-opts was either specified by 
                                       # the user or we computed it correctly in
                                       # tge previous stages
-	$cmd $parallel_opts $dir/log/update.$x.log \
+	$cmd --num-threads $[$num_threads*$num_processes] $dir/log/update.$x.log \
 	  ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1;
 	rm $dir/acc.$x.*
     if $cleanup; then
diff --git a/egs/wsj/s5/steps/train_mono.sh b/egs/wsj/s5/steps/train_mono.sh
index c900b0588..c03fbf4b1 100755
--- a/egs/wsj/s5/steps/train_mono.sh
+++ b/egs/wsj/s5/steps/train_mono.sh
@@ -64,7 +64,7 @@ shared_phones_opt="--shared-phones=$lang/phones/sets.int"
 
 if [ $stage -le -3 ]; then
   # Note: JOB=1 just uses the 1st part of the features-- we only need a subset anyway.
-  if ! feat_dim=`feat-to-dim "$example_feats" - 2>/dev/null`; then
+  if ! feat_dim=`feat-to-dim "$example_feats" - 2>/dev/null` || [ -z $feat_dim ]; then
     feat-to-dim "$example_feats"  
     echo "error getting feature dimension"
     exit 1;
diff --git a/egs/wsj/s5/utils/add_disambig.pl b/egs/wsj/s5/utils/add_disambig.pl
index c605659e1..962ef3867 100755
--- a/egs/wsj/s5/utils/add_disambig.pl
+++ b/egs/wsj/s5/utils/add_disambig.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/add_lex_disambig.pl b/egs/wsj/s5/utils/add_lex_disambig.pl
index aa2c0a11d..6ec168ed3 100755
--- a/egs/wsj/s5/utils/add_lex_disambig.pl
+++ b/egs/wsj/s5/utils/add_lex_disambig.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011  Microsoft Corporation
 #                2013  Johns Hopkins University (author: Daniel Povey)
 #                2015  Hainan Xu
diff --git a/egs/wsj/s5/utils/apply_map.pl b/egs/wsj/s5/utils/apply_map.pl
index 41556369c..11d5ebf03 100755
--- a/egs/wsj/s5/utils/apply_map.pl
+++ b/egs/wsj/s5/utils/apply_map.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
 
diff --git a/egs/wsj/s5/utils/convert_ctm.pl b/egs/wsj/s5/utils/convert_ctm.pl
index c8efa2567..7daec8e52 100755
--- a/egs/wsj/s5/utils/convert_ctm.pl
+++ b/egs/wsj/s5/utils/convert_ctm.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/utils/convert_slf.pl b/egs/wsj/s5/utils/convert_slf.pl
index 251a3667b..ee1941011 100755
--- a/egs/wsj/s5/utils/convert_slf.pl
+++ b/egs/wsj/s5/utils/convert_slf.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2014  Brno University of Technology (author Karel Vesely)
 # Copyright 2013  Korbinian Riedhammer
diff --git a/egs/wsj/s5/utils/create_data_link.pl b/egs/wsj/s5/utils/create_data_link.pl
index 8eb541c1e..0fafa2e04 100755
--- a/egs/wsj/s5/utils/create_data_link.pl
+++ b/egs/wsj/s5/utils/create_data_link.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2013  Guoguo Chen
 #           2014  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/wsj/s5/utils/create_split_dir.pl b/egs/wsj/s5/utils/create_split_dir.pl
index a03900c61..0c4f023f7 100755
--- a/egs/wsj/s5/utils/create_split_dir.pl
+++ b/egs/wsj/s5/utils/create_split_dir.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2013  Guoguo Chen
 # Apache 2.0.
diff --git a/egs/wsj/s5/utils/eps2disambig.pl b/egs/wsj/s5/utils/eps2disambig.pl
index fecbdc833..049802b08 100755
--- a/egs/wsj/s5/utils/eps2disambig.pl
+++ b/egs/wsj/s5/utils/eps2disambig.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/filter_scp.pl b/egs/wsj/s5/utils/filter_scp.pl
index 59b2df695..b76d37f41 100755
--- a/egs/wsj/s5/utils/filter_scp.pl
+++ b/egs/wsj/s5/utils/filter_scp.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2012 Microsoft Corporation
 #                     Johns Hopkins University (author: Daniel Povey)
 
diff --git a/egs/wsj/s5/utils/filter_scps.pl b/egs/wsj/s5/utils/filter_scps.pl
index f0ce8c382..36c96a7a8 100755
--- a/egs/wsj/s5/utils/filter_scps.pl
+++ b/egs/wsj/s5/utils/filter_scps.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2012 Microsoft Corporation
 #                     Johns Hopkins University (author: Daniel Povey)
 #           2015      Xiaohui Zhang
diff --git a/egs/wsj/s5/utils/find_arpa_oovs.pl b/egs/wsj/s5/utils/find_arpa_oovs.pl
index abd63f65e..14d898f66 100755
--- a/egs/wsj/s5/utils/find_arpa_oovs.pl
+++ b/egs/wsj/s5/utils/find_arpa_oovs.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/gen_topo.pl b/egs/wsj/s5/utils/gen_topo.pl
index 58721ced1..2ed331132 100755
--- a/egs/wsj/s5/utils/gen_topo.pl
+++ b/egs/wsj/s5/utils/gen_topo.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (author: Daniel Povey)
 
diff --git a/egs/wsj/s5/utils/int2sym.pl b/egs/wsj/s5/utils/int2sym.pl
index 13cc5ae9b..d6189394a 100755
--- a/egs/wsj/s5/utils/int2sym.pl
+++ b/egs/wsj/s5/utils/int2sym.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
 
diff --git a/egs/wsj/s5/utils/kwslist_post_process.pl b/egs/wsj/s5/utils/kwslist_post_process.pl
index 5b1cbc970..8f8fcf33b 100755
--- a/egs/wsj/s5/utils/kwslist_post_process.pl
+++ b/egs/wsj/s5/utils/kwslist_post_process.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/wsj/s5/utils/ln.pl b/egs/wsj/s5/utils/ln.pl
index 594d3924e..634b4391d 100755
--- a/egs/wsj/s5/utils/ln.pl
+++ b/egs/wsj/s5/utils/ln.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 use File::Spec;
 
 if ( @ARGV < 2 ) {
diff --git a/egs/wsj/s5/utils/make_lexicon_fst.pl b/egs/wsj/s5/utils/make_lexicon_fst.pl
index c8cd15cbe..0558ab20b 100755
--- a/egs/wsj/s5/utils/make_lexicon_fst.pl
+++ b/egs/wsj/s5/utils/make_lexicon_fst.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2010-2011  Microsoft Corporation
 #                2013  Johns Hopkins University (author: Daniel Povey)
 
diff --git a/egs/wsj/s5/utils/make_lexicon_fst_silprob.pl b/egs/wsj/s5/utils/make_lexicon_fst_silprob.pl
index 2fafd4097..4e9055eed 100755
--- a/egs/wsj/s5/utils/make_lexicon_fst_silprob.pl
+++ b/egs/wsj/s5/utils/make_lexicon_fst_silprob.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2010-2011  Microsoft Corporation
 #                2013  Johns Hopkins University (author: Daniel Povey)
 #                2015  Hainan Xu
diff --git a/egs/wsj/s5/utils/make_unigram_grammar.pl b/egs/wsj/s5/utils/make_unigram_grammar.pl
index 314a66a10..6ca740f0a 100755
--- a/egs/wsj/s5/utils/make_unigram_grammar.pl
+++ b/egs/wsj/s5/utils/make_unigram_grammar.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/map_arpa_lm.pl b/egs/wsj/s5/utils/map_arpa_lm.pl
index 085a7049b..25b4781ca 100755
--- a/egs/wsj/s5/utils/map_arpa_lm.pl
+++ b/egs/wsj/s5/utils/map_arpa_lm.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2014  Guoguo Chen
 #           2014  Johns Hopkins University (author: Daniel Povey)
diff --git a/egs/wsj/s5/utils/nnet-cpu/make_nnet_config.pl b/egs/wsj/s5/utils/nnet-cpu/make_nnet_config.pl
index 1e18bb642..68f8272f9 100755
--- a/egs/wsj/s5/utils/nnet-cpu/make_nnet_config.pl
+++ b/egs/wsj/s5/utils/nnet-cpu/make_nnet_config.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/nnet-cpu/make_nnet_config_block.pl b/egs/wsj/s5/utils/nnet-cpu/make_nnet_config_block.pl
index 38851777d..e7ae51730 100755
--- a/egs/wsj/s5/utils/nnet-cpu/make_nnet_config_block.pl
+++ b/egs/wsj/s5/utils/nnet-cpu/make_nnet_config_block.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/nnet-cpu/make_nnet_config_preconditioned.pl b/egs/wsj/s5/utils/nnet-cpu/make_nnet_config_preconditioned.pl
index 78833bf66..3290e6fea 100755
--- a/egs/wsj/s5/utils/nnet-cpu/make_nnet_config_preconditioned.pl
+++ b/egs/wsj/s5/utils/nnet-cpu/make_nnet_config_preconditioned.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/nnet-cpu/update_learning_rates.pl b/egs/wsj/s5/utils/nnet-cpu/update_learning_rates.pl
index 36ef26ee5..4b4cd9e21 100755
--- a/egs/wsj/s5/utils/nnet-cpu/update_learning_rates.pl
+++ b/egs/wsj/s5/utils/nnet-cpu/update_learning_rates.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/nnet/gen_dct_mat.py b/egs/wsj/s5/utils/nnet/gen_dct_mat.py
index 60e3bbac6..d0f043ad7 100755
--- a/egs/wsj/s5/utils/nnet/gen_dct_mat.py
+++ b/egs/wsj/s5/utils/nnet/gen_dct_mat.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 
diff --git a/egs/wsj/s5/utils/nnet/gen_hamm_mat.py b/egs/wsj/s5/utils/nnet/gen_hamm_mat.py
index 436bbaad9..a4262a8cf 100755
--- a/egs/wsj/s5/utils/nnet/gen_hamm_mat.py
+++ b/egs/wsj/s5/utils/nnet/gen_hamm_mat.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 
diff --git a/egs/wsj/s5/utils/nnet/gen_splice.py b/egs/wsj/s5/utils/nnet/gen_splice.py
index a8dddea74..0241aeed6 100755
--- a/egs/wsj/s5/utils/nnet/gen_splice.py
+++ b/egs/wsj/s5/utils/nnet/gen_splice.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # Copyright 2012  Brno University of Technology (author: Karel Vesely)
 
diff --git a/egs/wsj/s5/utils/nnet/make_cnn_proto.py b/egs/wsj/s5/utils/nnet/make_cnn_proto.py
index 0f25c3c9d..35c16e293 100755
--- a/egs/wsj/s5/utils/nnet/make_cnn_proto.py
+++ b/egs/wsj/s5/utils/nnet/make_cnn_proto.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # Copyright 2014  Brno University of Technology (author: Katerina Zmolikova, Karel Vesely)
 
diff --git a/egs/wsj/s5/utils/nnet/make_lstm_proto.py b/egs/wsj/s5/utils/nnet/make_lstm_proto.py
index 063d9ad9e..9b001d5ba 100755
--- a/egs/wsj/s5/utils/nnet/make_lstm_proto.py
+++ b/egs/wsj/s5/utils/nnet/make_lstm_proto.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # Copyright 2015  Brno University of Technology (author: Karel Vesely)
 
diff --git a/egs/wsj/s5/utils/nnet/make_nnet_proto.py b/egs/wsj/s5/utils/nnet/make_nnet_proto.py
index dd705d73e..cf5ae0895 100755
--- a/egs/wsj/s5/utils/nnet/make_nnet_proto.py
+++ b/egs/wsj/s5/utils/nnet/make_nnet_proto.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # Copyright 2014  Brno University of Technology (author: Karel Vesely)
 
diff --git a/egs/wsj/s5/utils/pinyin_map.pl b/egs/wsj/s5/utils/pinyin_map.pl
index 0b4909b27..65b260e24 100755
--- a/egs/wsj/s5/utils/pinyin_map.pl
+++ b/egs/wsj/s5/utils/pinyin_map.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 $num_args = $#ARGV + 1;
 if ($num_args != 1) {
diff --git a/egs/wsj/s5/utils/remove_oovs.pl b/egs/wsj/s5/utils/remove_oovs.pl
index 5bcab5984..532d7f295 100755
--- a/egs/wsj/s5/utils/remove_oovs.pl
+++ b/egs/wsj/s5/utils/remove_oovs.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/reverse_arpa.py b/egs/wsj/s5/utils/reverse_arpa.py
index ff6ea12a5..5437aec43 100755
--- a/egs/wsj/s5/utils/reverse_arpa.py
+++ b/egs/wsj/s5/utils/reverse_arpa.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # Copyright 2012 Mirko Hannemann BUT, mirko.hannemann@gmail.com
 
diff --git a/egs/wsj/s5/utils/run.pl b/egs/wsj/s5/utils/run.pl
index 86e191d3f..6145a7ac5 100755
--- a/egs/wsj/s5/utils/run.pl
+++ b/egs/wsj/s5/utils/run.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 # In general, doing 
 #  run.pl some.log a b c is like running the command a b c in
@@ -80,9 +81,10 @@ if (@ARGV > 0) {
   }
 }
 
-if ($ignored_opts ne "") {
-  print STDERR "run.pl: Warning: ignoring options \"$ignored_opts\"\n";
-}
+# Users found this message confusing so we are removing it.
+# if ($ignored_opts ne "") {
+#  print STDERR "run.pl: Warning: ignoring options \"$ignored_opts\"\n";
+# }
 
 if ($max_jobs_run == -1) { # If --max-jobs-run option not set,
                            # then work out the number of processors if possible,
diff --git a/egs/wsj/s5/utils/s2eps.pl b/egs/wsj/s5/utils/s2eps.pl
index de993db67..ffeeb8eb6 100755
--- a/egs/wsj/s5/utils/s2eps.pl
+++ b/egs/wsj/s5/utils/s2eps.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/segmentation.pl b/egs/wsj/s5/utils/segmentation.pl
index c552bfcbc..41d90f4bd 100755
--- a/egs/wsj/s5/utils/segmentation.pl
+++ b/egs/wsj/s5/utils/segmentation.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2013  Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
 
diff --git a/egs/wsj/s5/utils/shuffle_list.pl b/egs/wsj/s5/utils/shuffle_list.pl
index 91c73ae80..f8090ef1b 100755
--- a/egs/wsj/s5/utils/shuffle_list.pl
+++ b/egs/wsj/s5/utils/shuffle_list.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2013  Johns Hopkins University (author: Daniel Povey)
 
diff --git a/egs/wsj/s5/utils/spk2utt_to_utt2spk.pl b/egs/wsj/s5/utils/spk2utt_to_utt2spk.pl
index ca8a6a124..23992f25d 100755
--- a/egs/wsj/s5/utils/spk2utt_to_utt2spk.pl
+++ b/egs/wsj/s5/utils/spk2utt_to_utt2spk.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/split_scp.pl b/egs/wsj/s5/utils/split_scp.pl
index 2fbbabc4a..70bc8033c 100755
--- a/egs/wsj/s5/utils/split_scp.pl
+++ b/egs/wsj/s5/utils/split_scp.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/ssh.pl b/egs/wsj/s5/utils/ssh.pl
index 3e100ba05..8f2755a5c 100755
--- a/egs/wsj/s5/utils/ssh.pl
+++ b/egs/wsj/s5/utils/ssh.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 
 use Cwd;
 use File::Basename;
diff --git a/egs/wsj/s5/utils/subset_scp.pl b/egs/wsj/s5/utils/subset_scp.pl
index 8a3b1cf5d..a8bcdfc1f 100755
--- a/egs/wsj/s5/utils/subset_scp.pl
+++ b/egs/wsj/s5/utils/subset_scp.pl
@@ -1,4 +1,5 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
+use warnings; #sed replacement for -w perl parameter
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/summarize_logs.pl b/egs/wsj/s5/utils/summarize_logs.pl
index 8b1cea117..63521d8d1 100755
--- a/egs/wsj/s5/utils/summarize_logs.pl
+++ b/egs/wsj/s5/utils/summarize_logs.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/utils/summarize_warnings.pl b/egs/wsj/s5/utils/summarize_warnings.pl
index ccbeb4186..c094a1dc2 100755
--- a/egs/wsj/s5/utils/summarize_warnings.pl
+++ b/egs/wsj/s5/utils/summarize_warnings.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  Apache 2.0.
 
diff --git a/egs/wsj/s5/utils/sym2int.pl b/egs/wsj/s5/utils/sym2int.pl
index be0a577cf..592145c59 100755
--- a/egs/wsj/s5/utils/sym2int.pl
+++ b/egs/wsj/s5/utils/sym2int.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2012 Microsoft Corporation  Johns Hopkins University (Author: Daniel Povey)
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/utt2spk_to_spk2utt.pl b/egs/wsj/s5/utils/utt2spk_to_spk2utt.pl
index 2c375ab4d..6e0e438ca 100755
--- a/egs/wsj/s5/utils/utt2spk_to_spk2utt.pl
+++ b/egs/wsj/s5/utils/utt2spk_to_spk2utt.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 # Copyright 2010-2011 Microsoft Corporation
 
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/egs/wsj/s5/utils/validate_dict_dir.pl b/egs/wsj/s5/utils/validate_dict_dir.pl
index b6a7aebae..ca33f84c8 100755
--- a/egs/wsj/s5/utils/validate_dict_dir.pl
+++ b/egs/wsj/s5/utils/validate_dict_dir.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Apache 2.0.
 # Guoguo Chen (guoguo@jhu.edu)
diff --git a/egs/wsj/s5/utils/validate_lang.pl b/egs/wsj/s5/utils/validate_lang.pl
index 258c55c63..0d00379f8 100755
--- a/egs/wsj/s5/utils/validate_lang.pl
+++ b/egs/wsj/s5/utils/validate_lang.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Apache 2.0.
 # Copyright  2012   Guoguo Chen
diff --git a/egs/wsj/s5/utils/write_kwslist.pl b/egs/wsj/s5/utils/write_kwslist.pl
index f86f975c5..b2f67815d 100755
--- a/egs/wsj/s5/utils/write_kwslist.pl
+++ b/egs/wsj/s5/utils/write_kwslist.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 # Copyright 2012  Johns Hopkins University (Author: Guoguo Chen)
 # Apache 2.0.
diff --git a/egs/yesno/s5/local/create_yesno_txt.pl b/egs/yesno/s5/local/create_yesno_txt.pl
index fe9b644d9..50df35515 100755
--- a/egs/yesno/s5/local/create_yesno_txt.pl
+++ b/egs/yesno/s5/local/create_yesno_txt.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 $in_list = $ARGV[0];
 
diff --git a/egs/yesno/s5/local/create_yesno_wav_scp.pl b/egs/yesno/s5/local/create_yesno_wav_scp.pl
index 13d54dbc2..7086f4107 100755
--- a/egs/yesno/s5/local/create_yesno_wav_scp.pl
+++ b/egs/yesno/s5/local/create_yesno_wav_scp.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 $waves_dir = $ARGV[0];
 $in_list = $ARGV[1];
diff --git a/egs/yesno/s5/local/create_yesno_waves_test_train.pl b/egs/yesno/s5/local/create_yesno_waves_test_train.pl
index e532d3d2f..fefe12fbe 100755
--- a/egs/yesno/s5/local/create_yesno_waves_test_train.pl
+++ b/egs/yesno/s5/local/create_yesno_waves_test_train.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
 
 $full_list = $ARGV[0];
 $test_list = $ARGV[1];
diff --git a/src/cudamatrix/cu-common.h b/src/cudamatrix/cu-common.h
index a3051136a..67889757b 100644
--- a/src/cudamatrix/cu-common.h
+++ b/src/cudamatrix/cu-common.h
@@ -44,6 +44,13 @@
   cudaThreadSynchronize(); \
 } 
 
+#define KALDI_CUDA_ERR(ret, msg) \
+{ \
+  if (ret != 0) { \
+    KALDI_ERR << msg << ", diagnostics: cudaError_t " << ret << " : \"" << cudaGetErrorString((cudaError_t)ret) << "\", in " << __FILE__ << ":" << __LINE__; \
+  } \
+  cudaThreadSynchronize(); \
+} 
 
 namespace kaldi {
 
diff --git a/src/cudamatrix/cu-device.cc b/src/cudamatrix/cu-device.cc
index f45935f0f..a978ad14b 100644
--- a/src/cudamatrix/cu-device.cc
+++ b/src/cudamatrix/cu-device.cc
@@ -80,14 +80,13 @@ void CuDevice::SelectGpuId(std::string use_gpu) {
 
   // Check that we have a gpu available
   int32 n_gpu = 0;
+
   cudaError_t e;
   e = cudaGetDeviceCount(&n_gpu);
-  if ( e != cudaSuccess ) {
-      KALDI_ERR << "Error querying for number of devices: " << cudaGetErrorString(e) << std::endl;
-  }
+
   if (n_gpu == 0) {
     if (use_gpu == "yes" || use_gpu == "wait") {
-      KALDI_ERR << "No CUDA GPU detected!";
+      KALDI_CUDA_ERR(e, "No CUDA GPU detected!");
     }
     if (use_gpu == "optional") {
       KALDI_WARN << "Running on CPU!!! No CUDA GPU detected...";
@@ -100,7 +99,7 @@ void CuDevice::SelectGpuId(std::string use_gpu) {
   // or default gpu_id=0. In the case with no free GPUs a context cannot be created
   // (compute-exclusive mode).
   //
-  e = cudaThreadSynchronize(); //<< CUDA context gets created here.
+  e = cudaThreadSynchronize(); // << CUDA context gets created here.
 
   if (use_gpu != "wait") {
     if (e != cudaSuccess) {
@@ -110,10 +109,10 @@ void CuDevice::SelectGpuId(std::string use_gpu) {
         << " seconds.";
       sleep(sec_sleep);
       cudaGetLastError(); // reset the error state    
-      e = cudaThreadSynchronize(); //<< 2nd trial to get CUDA context.
+      e = cudaThreadSynchronize(); // << 2nd trial to get CUDA context.
       if (e != cudaSuccess) {
         if (use_gpu == "yes") {
-          KALDI_ERR << "Failed to create CUDA context, no more unused GPUs?";
+          KALDI_CUDA_ERR(e, "Failed to create CUDA context, no more unused GPUs?");
         }
         if (use_gpu == "optional") {
           KALDI_WARN << "Running on CPU!!! No more unused CUDA GPUs?";
@@ -181,10 +180,10 @@ void CuDevice::FinalizeActiveGpu() {
     cudaError_t e;
     e = cudaGetDevice(&act_gpu_id);
     if(e != cudaSuccess) {
-      KALDI_ERR << "Failed to get device-id of active device.";
+      KALDI_CUDA_ERR(e, "Failed to get device-id of active device.");
     }
     // Remember the id of active GPU 
-    active_gpu_id_ = act_gpu_id; //CuDevice::Enabled() is true from now on
+    active_gpu_id_ = act_gpu_id; // CuDevice::Enabled() is true from now on
     // Initialize the CUBLAS
     CU_SAFE_CALL(cublasInit());
 
@@ -219,12 +218,12 @@ bool CuDevice::IsComputeExclusive() {
   int32 gpu_id = -1;
   cudaError_t e = cudaGetDevice(&gpu_id);
   if(e != cudaSuccess) {
-    KALDI_ERR << "Failed to get current device";
+    KALDI_CUDA_ERR(e, "Failed to get current device");
   }
   struct cudaDeviceProp gpu_prop;
   e = cudaGetDeviceProperties(&gpu_prop, gpu_id);
   if(e != cudaSuccess) {
-    KALDI_ERR << "Failed to get device properties";
+    KALDI_CUDA_ERR(e,  "Failed to get device properties");
   }
   // find out whether compute exclusive mode is used
   switch (gpu_prop.computeMode) {
@@ -241,48 +240,66 @@ bool CuDevice::IsComputeExclusive() {
     default :
       // The computation mode is not compute-exclusive,
       // in this case we release the GPU context...
-      e = cudaThreadExit(); //deprecated, but for legacy reason not cudaDeviceReset
+      e = cudaThreadExit(); // deprecated, but for legacy reason not cudaDeviceReset
       if(e != cudaSuccess) {
-        KALDI_ERR << "Failed to release CUDA context on a GPU";
+        KALDI_CUDA_ERR(e, "Failed to release CUDA context on a GPU");
       }
       return false;
   }
 }
 
+template<typename TA, typename TB>
+bool greater_pair(const std::pair<TA, TB> &left, const std::pair<TA, TB>& right) {
+  return left.second > right.second;
+}
 
 bool CuDevice::SelectGpuIdAuto() {
   // Check that we have at least one gpu
+  cudaError_t e;
   int32 n_gpu = 0;
-  cudaGetDeviceCount(&n_gpu);
+  e = cudaGetDeviceCount(&n_gpu);
   if(n_gpu == 0) {
     KALDI_WARN << "No CUDA devices found";
+    if (e != cudaSuccess) {
+      KALDI_WARN << "cudaGetDeviceCount() returned " << e 
+        <<", meaning: \"" << cudaGetErrorString(e)  << "\"";
+    }
     return false;
   }
   
   // The GPU is selected according to maximal free memory ratio
-  std::vector<float> free_mem_ratio(n_gpu+1, 0.0);
+  std::vector< std::pair<int, float> > free_mem_ratio(n_gpu);
+
   // Get ratios of memory use, if possible
   KALDI_LOG << "Selecting from " << n_gpu << " GPUs";
   for(int32 n = 0; n < n_gpu; n++) {
     int32 ret = cudaSetDevice(n);
     switch(ret) {
       case cudaSuccess : {
-        //create the CUDA context for the thread
-        cudaThreadSynchronize(); //deprecated, but for legacy not cudaDeviceSynchronize
-        //get GPU name
+        // create the CUDA context for the thread
+        cudaThreadSynchronize(); // deprecated, but for legacy not cudaDeviceSynchronize
+        // get GPU name
         char name[128];
         DeviceGetName(name,128,n);
-        //get GPU memory stats
+        // get GPU memory stats
         int64 free, total;
         std::string mem_stats;
         mem_stats = GetFreeMemory(&free, &total);
-        //log
+        // log
         KALDI_LOG << "cudaSetDevice(" << n << "): "
                   << name << "\t" << mem_stats;
-        //store the free/total ratio
-        free_mem_ratio[n] = free/(float)total;
-        //destroy the CUDA context for the thread
-        cudaThreadExit(); //deprecated, but for legacy reason not cudaDeviceReset
+        
+        // We have seen that in some cases GetFreeMemory returns zero 
+        // That will produce nan after division, which might confuse 
+        // the sorting routine. Or maybe not, but let's keep it clean 
+        if (total <= 0) {
+          KALDI_LOG << "Total memory reported for device " << n << " is zero (or less).";
+        }
+        float mem_ratio = total > 0 ? free/(float)total : 0;
+        free_mem_ratio[n] = std::make_pair(n, mem_ratio);
+
+        // destroy the CUDA context for the thread
+        cudaThreadExit(); // deprecated, but for legacy reason not cudaDeviceReset
       } break;
 
 #if (CUDA_VERSION > 3020)
@@ -301,24 +318,43 @@ bool CuDevice::SelectGpuIdAuto() {
                   << cudaGetErrorString((cudaError_t)ret);
     }
   }
-  //find GPU with max free memory
+  // find GPU with max free memory
   int32 max_id=0;
-  for(int32 n=1; n<free_mem_ratio.size(); n++) {
-    if(free_mem_ratio[n] > free_mem_ratio[max_id]) max_id=n;
-  }
-  //the free_mem_ratio should be bigger than zero
-  KALDI_ASSERT(free_mem_ratio[max_id] > 0.0);
+  std::sort(free_mem_ratio.begin(), free_mem_ratio.end(), 
+      greater_pair<int, float>);
+  // the free_mem_ratio should be bigger than zero
+  KALDI_ASSERT(free_mem_ratio[max_id].second > 0.0);
 
-  //finally select the GPU
-  KALDI_LOG << "Selected device: " << max_id << " (automatically)";
-  CU_SAFE_CALL(cudaSetDevice(max_id));
-  //create the context
-  cudaError_t e;
-  e = cudaThreadSynchronize(); //deprecated, but for legacy not cudaDeviceSynchronize
-  if(e != cudaSuccess) {
-    KALDI_WARN << "Failed to create CUDA context on a GPU.";
+  float dev_id;
+  float mem_ratio;
+  do {
+    // try to select the GPU in the best to worst order
+    // Note we have to check the return codes manually, as the CU_SAFE_CALL 
+    // contains call to KALDI_ERR (which will cause the program to abort)
+
+    dev_id = free_mem_ratio[max_id].first;
+    mem_ratio = free_mem_ratio[max_id].second;
+
+    KALDI_LOG << "Trying to select device: " << dev_id << " (automatically), mem_ratio: " << mem_ratio;
+    e = cudaSetDevice(dev_id); 
+    if(e != cudaSuccess) {
+      KALDI_WARN << "Cannot select this device: return code " << e 
+        << ", Error message: \"" << cudaGetErrorString(e) << "\"";
+    } else {
+      e = cudaThreadSynchronize(); // deprecated, but for legacy not cudaDeviceSynchronize
+      if(e != cudaSuccess) {
+        KALDI_WARN << "Cannot select this device: return code " << e 
+          << ", Error message: \"" << cudaGetErrorString(e) << "\"";
+      }
+    }
+    max_id++;
+  } while ((e != cudaSuccess) && (max_id < free_mem_ratio.size()));
+  
+  if (e != cudaSuccess) {
+    KALDI_WARN << "Failed to (automatically) select any device";
     return false;
-  }
+  } 
+  KALDI_LOG << "Success selecting device " << dev_id << " free mem ratio: " << mem_ratio; 
   return true;
 }
 
@@ -369,23 +405,23 @@ void CuDevice::PrintProfile() {
 std::string CuDevice::GetFreeMemory(int64* free, int64* total) const {
 // WARNING! the CUDA API is inconsistent accross versions!
 #if (CUDA_VERSION >= 3020)
-  //define the function signature type
+  // define the function signature type
   size_t mem_free, mem_total;
 #else
   unsigned int mem_free, mem_total;
 #endif
   { 
-    //we will load the cuMemGetInfo dynamically from libcuda.so
-    //cuMemGetInfo(&mem_free, &mem_total);
-    //pre-fill ``safe'' values that will not cause problems
+    // we will load the cuMemGetInfo dynamically from libcuda.so
+    // cuMemGetInfo(&mem_free, &mem_total);
+    // pre-fill ``safe'' values that will not cause problems
     mem_free = 1; mem_total = 1;
-    //open libcuda.so
+    // open libcuda.so
     void* libcuda = dlopen("libcuda.so",RTLD_LAZY);
     if(NULL == libcuda) { 
       KALDI_WARN << "cannot open libcuda.so"; 
     } else {
-      //define the function signature type
-      //and get the symbol
+      // define the function signature type
+      // and get the symbol
 #if (CUDA_VERSION >= 3020)
       typedef CUresult (*cu_fun_ptr)(size_t*, size_t*);
       cu_fun_ptr dl_cuMemGetInfo = (cu_fun_ptr)dlsym(libcuda,"cuMemGetInfo_v2"); 
@@ -396,10 +432,10 @@ std::string CuDevice::GetFreeMemory(int64* free, int64* total) const {
       if(NULL == dl_cuMemGetInfo) {
         KALDI_WARN << "cannot load cuMemGetInfo from libcuda.so";
       } else {
-        //call the function
+        // call the function
         dl_cuMemGetInfo(&mem_free, &mem_total);
       }
-      //close the library
+      // close the library
       dlclose(libcuda);
     }
   }
@@ -417,24 +453,24 @@ std::string CuDevice::GetFreeMemory(int64* free, int64* total) const {
 
 
 void CuDevice::DeviceGetName(char* name, int32 len, int32 dev) {
-  //prefill with something reasonable
+  // prefill with something reasonable
   strncpy(name,"Unknown GPU",len);
-  //open libcuda.so
+  // open libcuda.so
   void* libcuda = dlopen("libcuda.so",RTLD_LAZY);
   if(NULL == libcuda) {
     KALDI_WARN << "cannot open libcuda.so"; 
   } else {
-    //define the function signature type
+    // define the function signature type
     typedef CUresult (*cu_fun_ptr)(char*,int,CUdevice);
-    //get the symbol
+    // get the symbol
     cu_fun_ptr cuDeviceGetName_ptr = (cu_fun_ptr)dlsym(libcuda,"cuDeviceGetName"); 
     if(NULL == cuDeviceGetName_ptr) {
       KALDI_WARN << "cannot load cuDeviceGetName from libcuda.so"; 
     } else {
-      //call the function
+      // call the function
       cuDeviceGetName_ptr(name, len, dev);
     }
-    //close the library
+    // close the library
     dlclose(libcuda);
   }
 }
@@ -461,388 +497,41 @@ void CuDevice::CheckGpuHealth() {
 }
 
 
-struct CuAllocatorOptions {
-  bool cache_memory; // Enable GPU memory caching, (false = disable).
-  int32 count; // Number of times we free and delete a particular size before we
-               // start to cache it.
-  int32 cleanup_interval_bytes;
-  CuAllocatorOptions()
-   : cache_memory(true), count(1), cleanup_interval_bytes(1000000) { }
-};
-
-
-/// We define class CuAllocator inside the .cc file, because we don't want to
-/// expose it in the header.  Its purpose is to hang on to memory that we have
-/// freed, so that we don't waste time in cudaMalloc and cudaMallocPitch().
-/// For some reason, they are sometimes very slow.
-class CuAllocator {
- public:
-  CuAllocator(const CuAllocatorOptions &opts, CuDevice *device):
-      device_(device), opts_(opts),
-      cleanup_countdown_bytes_(opts.cleanup_interval_bytes) { }
-  
-  inline void *Malloc(size_t size);
-  
-  inline void *MallocPitch(size_t row_bytes, size_t num_rows, size_t *pitch);
-  
-  inline void Free(void *ptr);
-
-  inline void DisableCaching();
-
-  ~CuAllocator();
- private:
-  inline void *MallocInternal(size_t row_bytes, size_t num_rows, size_t *pitch);
-  
-  // struct MemInfoForSize stores information associated with a particular size
-  // of allocated memory.  The row_bytes and num_rows refer to the arguments of
-  // a cudaMallocPitch call; for regular, non-pitch allocations with cudaMalloc,
-  // we make "row_bytes" zero and the size in bytes is "num_rows"... there is a
-  // reason why we do it this way round (make num_rows contain the size in
-  // bytes); it relates to the ordering of the map, and the behavior when
-  // we didn't find the exact size and want to find larger match.
-
-  
-  struct MemInfoForSize {
-    size_t row_bytes; // or zero, if a regular CudaMalloc, not
-                      // CudaMallocPitch.
-    size_t num_rows; // or the number of rows, if it's a regular CudaMalloc
-                     // call, not CudaMallocPitch.
-    size_t pitch; // If CudaMallocPitch, the pitch returned by CudaMallocPitch;
-                  // this code assumes (and checks) that it's a deterministic
-                  // function of row_bytes and num_rows.
-    size_t countdown; // number that have been freed and not cached.
-    size_t currently_used; // number that are "in the wild".. kept for
-                           // diagnostics and error detection.
-    std::vector<void*> freed; // freed and cached...
-      
-    MemInfoForSize(size_t row_bytes,
-                   size_t num_rows,
-                   int32 count):
-        row_bytes(row_bytes),
-        num_rows(num_rows),
-        pitch(0),
-        countdown(count),
-        currently_used(0) { }
-  };
-
-
-  // FindMemInfo returns the MemInfoForSize object for this (row_bytes,
-  // num_rows) combination if it exists; otherwise...
-  // if there is a MemInfoForSize object with the same row_bytes and larger (but
-  // not more than twice larger) num_rows that has freed memory waiting, it
-  // returns that; otherwise, it returns a new MemInfoForSize object for the
-  // requested size).
-  
-  inline MemInfoForSize *FindMemInfo(size_t row_bytes,
-                                     size_t num_rows) {
-    if (row_bytes >= size_to_list_.size())
-      size_to_list_.resize(row_bytes + 1, NULL);
-    
-    // note: we set row_bytes to 0 for regular, linear allocation.
-    KALDI_ASSERT(num_rows != 0);
-
-    if (size_to_list_[row_bytes] == NULL)
-      size_to_list_[row_bytes] = new std::map<size_t, MemInfoForSize*>;
-
-
-    std::map<size_t, MemInfoForSize*> &size_to_list = *(size_to_list_[row_bytes]);
-
-    typedef std::map<size_t, MemInfoForSize* >::iterator IterType;
-
-    // get an iterator to the requested object or the next-larger one.
-    // Here, upper_bound(num_rows - 1) returns an object strictly greater
-    // than num_rows - 1, which could be num_rows itself.  We need to
-    // treat num_rows == 0 as a special case because of size_t being
-    // unsigned.
-    IterType iter = (num_rows == 0 ? size_to_list.begin() :
-                     size_to_list.upper_bound(num_rows - 1));
-    
-    if (iter != size_to_list.end() && iter->first == num_rows) {
-      // Found a MemInfoForSize object
-      // with the requested size -> return it.
-      KALDI_ASSERT(iter->second->row_bytes == row_bytes &&
-                   iter->second->num_rows == num_rows);
-      return iter->second;
-    } else if (iter != size_to_list.end() &&
-               iter->second->num_rows <= 2 * num_rows &&
-               !iter->second->freed.empty()) {
-      // Return the non-matching one with freed memory, which is larger than
-      // this one but not more than twice larger.
-      KALDI_ASSERT(iter->second->row_bytes == row_bytes &&
-                   iter->second->num_rows > num_rows); // confirm expectations.
-      return iter->second;
-    } else {
-      // There was no such object, and the next-larger object either did not
-      // exist, had more than twice the num-rows requested, or had no free
-      // memory -> create an object with the requested size.
-      return (size_to_list[num_rows] =  new MemInfoForSize(row_bytes, num_rows,
-                                                           opts_.count));
-    }
-  }
-                 
-  void PossiblyCleanup(size_t num_bytes);
-
-  // A periodic housekeeping task..
-  void Cleanup();
-
-  // Frees all memory in the "freed" vectors; memory that the
-  // user freed but we held on to.  If destroy == true, also
-  // clean up all memory held in the size_to_list_ object (i.e.
-  // allocated maps and MemInfoForSize objects).
-  void ReleaseAllCachedMemory(bool destroy = false);
-
-  CuDevice *device_; // device this is attached to...
-  CuAllocatorOptions opts_;
-
-
-  unordered_map<void*, MemInfoForSize*> addr_to_list_;
-
-  // size_to_list_ is indexed first by row_bytes (which is zero for linear
-  // mallocs) and then by num_rows (which for linear mallocs, is the actual size
-  // in bytes).
-  std::vector<std::map<size_t, MemInfoForSize*>* > size_to_list_;
-  
-  int32 cleanup_countdown_bytes_; // countdown in bytes, until the next time we check
-                                  // whether we should do cleanup
-};
-
-
-void* CuAllocator::Malloc(size_t size) {
-  KALDI_ASSERT(size > 0);
-  return MallocInternal(0, size, NULL);
+void CuDevice::Free(void *ptr) { 
+  CU_SAFE_CALL(cudaFree(ptr)); 
 }
 
-void* CuAllocator::MallocPitch(size_t num_rows, size_t row_bytes,
-                               size_t *pitch) {
-  KALDI_ASSERT(num_rows > 0 && row_bytes > 0 && pitch != NULL);
-  return MallocInternal(num_rows, row_bytes, pitch);
-}
-
-void* CuAllocator::MallocInternal(size_t row_bytes,
-                                  size_t num_rows,
-                                  size_t *pitch_out) {
-  // we share the code for standard cudaMalloc and cudaMallocPitch
-  // because most of it is the same.  for cudaMalloc, we'll have
-  // row_bytes == 0, and num_rows is just the size to be allocated.
-  KALDI_ASSERT(num_rows != 0 && (row_bytes != 0) == (pitch_out != NULL));
-  
-  MemInfoForSize *info = FindMemInfo(row_bytes, num_rows);
-  if (!info->freed.empty()) { // We can satisfy the request with cached,
-                              // previously-allocated memory.
-    void *ans = info->freed.back();
-    info->freed.pop_back();
-    info->currently_used++;
-    addr_to_list_[ans] = info;
-    if (pitch_out) *pitch_out = info->pitch;
-    return ans;
-  } else {
-    PossiblyCleanup(row_bytes == 0 ? num_rows : row_bytes * num_rows);
-    void *ans;
-    if (row_bytes == 0) { // Simple malloc request, not "MallocPitch".
-      size_t size = num_rows;
-      int32 ret = cudaMalloc(&ans, size);
-      if (ret != 0) {
-        KALDI_WARN << "Allocation of memory block of " << size << " bytes "
-                   << "failed, releasing cached memory and retrying.";
-        cudaGetLastError(); // reset the error state
-        ReleaseAllCachedMemory();
-        ret = cudaMalloc(&ans, size);
-        if (ret != 0) {
-          KALDI_WARN << "Allocation failed for the second time.    Printing "
-                    << "device memory usage and exiting";
-          device_->PrintMemoryUsage();
-          KALDI_ERR << "Memory allocation failure";
-        }
-      }
-    } else {
-      size_t pitch;
-      int32 ret = cudaMallocPitch(&ans, &pitch, row_bytes, num_rows);
-      if (ret != 0) { // allocation failed...
-        KALDI_WARN << "Allocation of " << num_rows << " rows, each of size "
-                   << row_bytes << " bytes failed,  releasing cached "
-                   << "memory and retrying.";
-        cudaGetLastError(); // reset the error state
-        ReleaseAllCachedMemory();
-        ret = cudaMallocPitch(&ans, &pitch, row_bytes, num_rows);
-        if (ret != 0) {
-          KALDI_WARN << "Allocation failed for the second time.    Printing "
-                    << "device memory usage and exiting";
-          device_->PrintMemoryUsage();
-          KALDI_ERR << "Memory allocation failure";
-        }
-      }
-      KALDI_ASSERT(pitch > 0);
-      if (info->pitch == 0) { // First allocation; have not set info->pitch yet.
-        info->pitch = pitch;
-      } else if (pitch != info->pitch) {
-        KALDI_ERR << "Pitch differs between multiple calls with the same "
-                  << "parameters: " << pitch << " vs. " << info->pitch;
-      }
-      *pitch_out = info->pitch;
-    }
-    addr_to_list_[ans] = info;
-    info->currently_used++;
-    return ans;
-  }
-}
-
-void CuAllocator::Free(void *addr) {
-  unordered_map<void*, MemInfoForSize*>::iterator iter
-      = addr_to_list_.find(addr);
-  if (iter == addr_to_list_.end()) {
-    KALDI_ERR << "Attempt to free address " << addr << " that was not allocated "
-              << "by CuDevice::Malloc() (or was previously freed);";
-  }
-  MemInfoForSize *info = iter->second;
-  addr_to_list_.erase(addr); // Erase this element in the addr_to_list_ map.
-  info->currently_used--;
-  if (info->countdown == 0 && opts_.cache_memory) { 
-                              // We have freed [i.e. actually freed with
-                              // CudaFree()] enough of these that we think
-                              // we're wasting too much time this way and
-                              // need to start caching them.
-    info->freed.push_back(addr);
-  } else { // Actually free the address, and decrease "countdown".
-    info->countdown--;
-    /*
-      If you get an "unspecified launch error" after the cudaFree call below, it
-      may not be an error with the immediate call, but it could reflect an error
-      that happened earlier.  We encountered the CUBLAS bug described at
-      https://devtalk.nvidia.com/default/topic/758598/cublas-gemm-leads-to-invalid-reads-for-some-matrix-dimensions/
-      which causes sgemm to access invalid memory.  After reproducibly getting
-      "unspecified launch failure" at the location below, we ran the program in
-      cuda-memcheck and got the following:
-      ========= Invalid __global__ read of size 4
-      =========     at 0x00000180 in sgemm_sm_heavy_nt_ldg
-      =========     by thread (223,0,0) in block (0,0,0)
-      =========     Address 0x4a0052607c is out of bounds
-      (and lots more stuff like that).  It appears to only happen for certain
-      matrix sizes, usually encountered for partial minibatches at the end of a
-      training job.  It happened on K20s but not on K10s. We know this happened
-      with CUDA toolkit version 5.5, and the link above says the bug has been
-      resolved in version 6.5 of the toolkit.  Our fix was to just not run the
-      affected training runs on our K20s, since this bug seemed to show up quite
-      rarely.
-     */
-    CU_SAFE_CALL(cudaFree(addr)); // This is how we free, even if allocated with
-                                  // cudaMallocPitch().
-  }
-}
-
-
-inline void CuAllocator::DisableCaching() {
-  KALDI_LOG << "Disabling caching of GPU memory.";
-  KALDI_ASSERT(size_to_list_.empty()); // No memory allocated yet!
-  opts_.cache_memory = false;
-}
-
-void CuAllocator::ReleaseAllCachedMemory(bool destroy) {
-  KALDI_VLOG(2) << "Releasing all cached memory.";
-  for (size_t i = 0; i < size_to_list_.size(); i++) {
-    if (size_to_list_[i] == NULL)
-      continue;
-    typedef std::map<size_t, MemInfoForSize*>::iterator  IterType;
-    for (IterType iter = size_to_list_[i]->begin();
-         iter != size_to_list_[i]->end(); ++iter) {
-      MemInfoForSize *info = iter->second;
-      if (destroy && !info->freed.empty()) {
-        // When called from the destructor at program end, if verbose level is
-        // high, say the sizes we had.
-        if (info->row_bytes == 0) {
-          KALDI_VLOG(3) << "Releasing " << info->freed.size() << " blocks of "
-                        << info->num_rows << " bytes.";
-        } else {
-          KALDI_VLOG(3) << "Releasing " << info->freed.size()
-                        << " 2-d blocks of " << info->num_rows << " rows of "
-                        << info->row_bytes << " bytes each.";
-        }
-      }
-      if (!destroy) {
-        // We only do this freeing part when we're *not* called from the
-        // destuctor (destroy = false).  This leads to a crash when called from
-        // the destructor, with cudaFree returning "unload of CUDA runtime
-        // failed".  Presumably this has to do with the destruction order of
-        // C++, which we can't really control.
-        while (!info->freed.empty()) {
-          CU_SAFE_CALL(cudaFree(info->freed.back()));
-          info->freed.pop_back();
-        }
-      }
-      if (destroy)
-        delete info;
-    }
-    if (destroy) {
-      delete size_to_list_[i];
-      size_to_list_[i] = NULL;
-    }
-  }
-}
-
-void CuAllocator::Cleanup() {
-  // TODO: implement this or remove it (and also PossiblyCleanup).
-  // Actually we may never implement this, as just calling
-  // ReleaseAllCachedMemory whenever an allocation fails is probably
-  // sufficient.
-}
-void CuAllocator::PossiblyCleanup(size_t num_bytes) {
-  if (static_cast<size_t>(cleanup_countdown_bytes_) <= num_bytes) {
-    Cleanup();
-    cleanup_countdown_bytes_ = opts_.cleanup_interval_bytes;
-  } else {
-    cleanup_countdown_bytes_ -= static_cast<int32>(num_bytes);
-  }
-}
-
-CuAllocator::~CuAllocator() {
-  // Check that nothing was allocated by the user and not freed.
-  std::set<MemInfoForSize*> unfreed_set;
-  typedef unordered_map<void*, MemInfoForSize *>::iterator IterType;
-  for (IterType iter = addr_to_list_.begin(); iter != addr_to_list_.end();
-       ++iter)
-    unfreed_set.insert(iter->second);
-  for (std::set<MemInfoForSize*>::iterator iter = unfreed_set.begin();
-       iter != unfreed_set.end(); ++iter) {
-    MemInfoForSize *info = *iter;
-    KALDI_ASSERT(info->currently_used > 0); // Or should not be in this set
-                                            // (code error or memory corruption)
-    if (info->num_rows == 0) {
-      KALDI_WARN << info->currently_used << " memory chunks of size "
-                 << info->row_bytes << " were allocated and not freed.";
-    } else {
-      KALDI_WARN << info->currently_used << " memory chunks of size "
-                 << info->row_bytes << " per row, and " << info->num_rows
-                 << " rows, were allocated and not freed.";
-    }
-  }
-  
-  bool destroy = true;
-  ReleaseAllCachedMemory(destroy);
-}
-
-void CuDevice::Free(void *ptr) { allocator_->Free(ptr); }
-
 void* CuDevice::MallocPitch(size_t row_bytes, size_t num_rows, size_t *pitch) {
-  return allocator_->MallocPitch(row_bytes, num_rows, pitch);
+  void *ret_ptr = NULL;
+  cudaError_t e = cudaMallocPitch(&ret_ptr, pitch, row_bytes, num_rows);
+  if (e != cudaSuccess) {
+    PrintMemoryUsage();
+    KALDI_ERR << "CuDevice::MallocPitch: cannot allocate the requested memory (" 
+      << row_bytes << " x " << num_rows << " = "
+      << row_bytes * num_rows << " bytes )";
+  }
+  return ret_ptr;
 }
 
 void* CuDevice::Malloc(size_t size) {
-  return allocator_->Malloc(size);
+  void *ret_ptr = NULL;
+  cudaError_t e = cudaMalloc(&ret_ptr, size);
+  if (e != cudaSuccess) {
+    PrintMemoryUsage();
+    KALDI_ERR << "CuDevice::Malloc: cannot allocate the requested memory"
+      << " (" << size << " bytes )";
+  }
+  return ret_ptr;
 }
 
-void CuDevice::DisableCaching() {
-  allocator_->DisableCaching();
-}
-
-CuDevice::CuDevice(): active_gpu_id_(-1), verbose_(true),
-                      allocator_(new CuAllocator(CuAllocatorOptions(), this))
+CuDevice::CuDevice(): active_gpu_id_(-1), verbose_(true)
   { }
 
 
 CuDevice::~CuDevice() {
-  if (allocator_ != NULL)
-    delete allocator_;
-  if (Enabled())
+  if (Enabled()) {
     CU_SAFE_CALL(cublasShutdown());
+  }
 }
   
 // The instance of the static singleton 
diff --git a/src/cudamatrix/cu-device.h b/src/cudamatrix/cu-device.h
index ee46986e1..89d2c110a 100644
--- a/src/cudamatrix/cu-device.h
+++ b/src/cudamatrix/cu-device.h
@@ -33,7 +33,6 @@
 
 namespace kaldi {
 
-class CuAllocator; // Forward declaration.
 
 /**
  * Singleton object which represents CUDA device
@@ -55,9 +54,6 @@ class CuDevice {
   
   void Free(void *ptr);
 
-  /// Disable GPU memory caching
-  void DisableCaching();
-  
   /// Select a GPU for computation, the 'use_gpu' modes are:
   ///  "yes"      -- Select GPU automatically and die if this fails.
   ///  "optional" -- Do as above, but if it fails, back off to CPU. 
@@ -143,7 +139,6 @@ class CuDevice {
 
   bool verbose_;
 
-  CuAllocator *allocator_;
   
 }; // class CuDevice
 
diff --git a/src/gst-plugin/Makefile b/src/gst-plugin/Makefile
index 09af682ea..13f2bf07b 100644
--- a/src/gst-plugin/Makefile
+++ b/src/gst-plugin/Makefile
@@ -5,11 +5,14 @@ ifneq ($(KALDI_FLAVOR), dynamic)
 $(error Kaldi must compiled with dynamic libraries support. Run configure with --shared flag. )
 endif
 
-EXTRA_CXXFLAGS = -Wno-sign-compare -I ../../tools/portaudio/install/include 
+EXTRA_CXXFLAGS += -Wno-sign-compare -I ../../tools/portaudio/install/include 
 EXTRA_CXXFLAGS += $(shell pkg-config --cflags gstreamer-1.0)
 EXTRA_CXXFLAGS += $(shell pkg-config --cflags glib-2.0)
 
-EXTRA_LDLIBS = -pthread -lgstbase-1.0 -lgstcontroller-1.0 -lgstreamer-1.0 -lgobject-2.0 -lgmodule-2.0 -lgthread-2.0 -lrt -lglib-2.0
+EXTRA_LDLIBS += -pthread -lgstbase-1.0 -lgstcontroller-1.0 -lgmodule-2.0 -lgthread-2.0 -lrt 
+EXTRA_LDLIBS += $(shell pkg-config --libs gstreamer-1.0)
+EXTRA_LDLIBS += $(shell pkg-config --libs glib-2.0)
+
 
 #Kaldi shared libraries required by the GStreamer plugin
 EXTRA_LDLIBS += -lkaldi-online -lkaldi-lat -lkaldi-decoder -lkaldi-feat -lkaldi-transform \
diff --git a/src/nnet/nnet-activation.h b/src/nnet/nnet-activation.h
index 3c40f280a..ce7753f91 100644
--- a/src/nnet/nnet-activation.h
+++ b/src/nnet/nnet-activation.h
@@ -22,6 +22,7 @@
 #define KALDI_NNET_NNET_ACTIVATION_H_
 
 #include "nnet/nnet-component.h"
+#include "nnet/nnet-utils.h"
 #include "cudamatrix/cu-math.h"
 #include "cudamatrix/cu-rand.h"
 #include "util/text-utils.h"
@@ -136,6 +137,10 @@ class BlockSoftmax : public Component {
     }
   }
 
+  std::string Info() const {
+    return "\n  softmax-dims " + ToString(block_dims);
+  }
+
   std::vector<int32> block_dims;
   std::vector<int32> block_offset;
 };
diff --git a/src/nnet/nnet-utils.h b/src/nnet/nnet-utils.h
index 8b7e6ca70..59c2186fd 100644
--- a/src/nnet/nnet-utils.h
+++ b/src/nnet/nnet-utils.h
@@ -21,6 +21,9 @@
 #ifndef KALDI_NNET_NNET_UTILS_H_
 #define KALDI_NNET_NNET_UTILS_H_
 
+#include <iterator>
+#include <algorithm>
+
 #include "base/kaldi-common.h"
 #include "cudamatrix/cu-matrix.h"
 #include "cudamatrix/cu-array.h"
@@ -30,6 +33,16 @@
 namespace kaldi {
 namespace nnet1 {
 
+
+/**
+ * Define stream insertion opeartor for 'std::vector', useful for log-prints,
+ */
+template <typename T> 
+std::ostream& operator<<(std::ostream& os, const std::vector<T>& v) {
+  std::copy(v.begin(), v.end(), std::ostream_iterator<T>(os," "));
+  return os;
+}
+
 /**
  * Convert basic type to string (try not to overuse as ostringstream creation is slow)
  */
diff --git a/src/nnetbin/nnet-forward.cc b/src/nnetbin/nnet-forward.cc
index 1bc9233c3..983fcd4b2 100644
--- a/src/nnetbin/nnet-forward.cc
+++ b/src/nnetbin/nnet-forward.cc
@@ -75,7 +75,6 @@ int main(int argc, char *argv[]) {
     //Select the GPU
 #if HAVE_CUDA==1
     CuDevice::Instantiate().SelectGpuId(use_gpu);
-    CuDevice::Instantiate().DisableCaching();
 #endif
 
     Nnet nnet_transf;
diff --git a/src/nnetbin/nnet-train-frmshuff.cc b/src/nnetbin/nnet-train-frmshuff.cc
index 0f997511f..b88df9895 100644
--- a/src/nnetbin/nnet-train-frmshuff.cc
+++ b/src/nnetbin/nnet-train-frmshuff.cc
@@ -92,7 +92,6 @@ int main(int argc, char *argv[]) {
     //Select the GPU
 #if HAVE_CUDA==1
     CuDevice::Instantiate().SelectGpuId(use_gpu);
-    CuDevice::Instantiate().DisableCaching();
 #endif
 
     Nnet nnet_transf;
diff --git a/src/nnetbin/nnet-train-lstm-streams.cc b/src/nnetbin/nnet-train-lstm-streams.cc
index e3b580252..00b367c9d 100644
--- a/src/nnetbin/nnet-train-lstm-streams.cc
+++ b/src/nnetbin/nnet-train-lstm-streams.cc
@@ -114,7 +114,6 @@ int main(int argc, char *argv[]) {
     //Select the GPU
 #if HAVE_CUDA==1
     CuDevice::Instantiate().SelectGpuId(use_gpu);
-    CuDevice::Instantiate().DisableCaching();
 #endif
 
     Nnet nnet_transf;
diff --git a/src/nnetbin/nnet-train-mmi-sequential.cc b/src/nnetbin/nnet-train-mmi-sequential.cc
index 955440d4b..3ed5d2116 100644
--- a/src/nnetbin/nnet-train-mmi-sequential.cc
+++ b/src/nnetbin/nnet-train-mmi-sequential.cc
@@ -157,7 +157,6 @@ int main(int argc, char *argv[]) {
     // Select the GPU
 #if HAVE_CUDA == 1
     CuDevice::Instantiate().SelectGpuId(use_gpu);
-    CuDevice::Instantiate().DisableCaching();
 #endif
 
     Nnet nnet_transf;
diff --git a/src/nnetbin/nnet-train-mpe-sequential.cc b/src/nnetbin/nnet-train-mpe-sequential.cc
index 5bf628a16..7e932ade5 100644
--- a/src/nnetbin/nnet-train-mpe-sequential.cc
+++ b/src/nnetbin/nnet-train-mpe-sequential.cc
@@ -163,7 +163,6 @@ int main(int argc, char *argv[]) {
     // Select the GPU
 #if HAVE_CUDA == 1
     CuDevice::Instantiate().SelectGpuId(use_gpu);
-    CuDevice::Instantiate().DisableCaching();
 #endif
 
     Nnet nnet_transf;
diff --git a/src/nnetbin/nnet-train-perutt.cc b/src/nnetbin/nnet-train-perutt.cc
index 336ab1ad7..4080b349b 100644
--- a/src/nnetbin/nnet-train-perutt.cc
+++ b/src/nnetbin/nnet-train-perutt.cc
@@ -92,7 +92,6 @@ int main(int argc, char *argv[]) {
     //Select the GPU
 #if HAVE_CUDA==1
     CuDevice::Instantiate().SelectGpuId(use_gpu);
-    CuDevice::Instantiate().DisableCaching();
 #endif
 
     Nnet nnet_transf;
diff --git a/src/nnetbin/paste-post.cc b/src/nnetbin/paste-post.cc
index c88b6884d..a7cb88e52 100644
--- a/src/nnetbin/paste-post.cc
+++ b/src/nnetbin/paste-post.cc
@@ -22,22 +22,41 @@
 #include "base/io-funcs.h"
 #include "util/common-utils.h"
 #include "hmm/posterior.h"
+#include "nnet/nnet-utils.h"
 
-/** @brief Convert features into posterior format, used to specify NN training targets. */
+/** @brief Combines 2 or more streams with NN-training targets into single one.
+ *  This is handy when training NN with more than one output layer (softmax).
+ *  The format of NN-targets is 'posterior' and the dimensionality of the output
+ *  stream is the sum of input-stream dimensions.
+ */
 int main(int argc, char *argv[]) {
   using namespace kaldi;
+  using namespace kaldi::nnet1;
   typedef kaldi::int32 int32;
   try {
     const char *usage =
-        "paste-post : paste N posterior streams (combine the posteriors while applying an offset\n"
-        "             to the integer labels in all but the 1st posterior stream)\n"
-        "Useful for multi-task or multi-lingual DNN training.\n"
+        "Combine 2 or more streams with NN-training targets into single stream.\n"
+        "As the posterior streams are pasted, the output dimension is the sum\n"
+        "of the input dimensions. This is used when training NN with\n"
+        "multiple softmaxes on its output. This is used in multi-task, \n"
+        "multi-lingual or multi-database training. Depending on the context,\n"
+        "an utterance is not required to be in all the input streams.\n"
+        "For a multi-database training only 1 output layer will be active.\n"
+        "\n"
+        "The lengths of utterances are provided as 1st argument.\n"
+        "The dimensions of input stream are set as 2nd in argument.\n"
+        "Follow the input and output streams which are in 'posterior' format.\n"
+        "\n"
         "Usage: paste-post <featlen-rspecifier> <dims-csl> <post1-rspecifier> ... <postN-rspecifier> <post-wspecifier>\n"
         "e.g.:\n"
         " paste-post 'ark:feat-to-len $feats ark,t:-|' 1029:1124 ark:post1.ark ark:post2.ark ark:pasted.ark\n";
 
     ParseOptions po(usage);
 
+    bool allow_partial = false;
+    po.Register("allow-partial", &allow_partial, 
+                "Produce output also when the utterance is not in all input streams.");
+
     po.Read(argc, argv);
 
     if (po.NumArgs() < 5) {
@@ -45,63 +64,86 @@ int main(int argc, char *argv[]) {
       exit(1);
     }
 
-    std::string featlen_rspecifier = po.GetArg(1), // segment lengths, will be used for main loop
+    std::string featlen_rspecifier = po.GetArg(1),  // segment lengths,
                 stream_dims_str = po.GetArg(2),
                 post_wspecifier = po.GetArg(po.NumArgs());
-    int32 stream_count = po.NumArgs() - 3; // number of input posterior streams
+    int32 stream_count = po.NumArgs() - 3;  // number of input posterior streams
 
-    // read dims of input posterior streams
+    // read the dims of input posterior streams,
     std::vector<int32> stream_dims;
     if (!kaldi::SplitStringToIntegers(stream_dims_str, ":,", false, &stream_dims))
       KALDI_ERR << "Invalid stream-dims string " << stream_dims_str;
     if (stream_count != stream_dims.size()) {
       KALDI_ERR << "Mismatch in input posterior-stream count " << stream_count
-                << " and --stream-dims count" << stream_dims.size() 
+                << " and --stream-dims count" << stream_dims.size()
                 << ", " << stream_dims_str;
     }
 
-    // prepare dim offsets of input streams
+    // prepare dim offsets of input streams,
     std::vector<int32> stream_offset(stream_dims.size()+1, 0);
-    for (int32 s=0; s<stream_dims.size(); s++) {
+    for (int32 s = 0; s < stream_dims.size(); s++) {
       stream_offset[s+1] = stream_offset[s] + stream_dims[s];
     }
 
-    // open the input posterior readers:
+    // open the input posterior readers,
     std::vector<RandomAccessPosteriorReader> posterior_reader(po.NumArgs()-3);
-    for (int32 s=0; s<stream_count; s++) {
+    for (int32 s = 0; s < stream_count; s++) {
       posterior_reader[s].Open(po.GetArg(s+3));
     }
 
-    int32 num_done = 0, num_err = 0;
+    int32 num_done = 0, num_err = 0, num_empty = 0;
     SequentialInt32Reader featlen_reader(featlen_rspecifier);
     PosteriorWriter posterior_writer(post_wspecifier);
 
     // main loop, posterior pasting happens here,
     for (; !featlen_reader.Done(); featlen_reader.Next()) {
-      bool ok = true;
+      bool ok = true, empty = true;
       std::string utt = featlen_reader.Key();
-      KALDI_VLOG(2) << "Processing " << utt;
       int32 num_frames = featlen_reader.Value();
-      // Create output posteriors: 
-      Posterior post(num_frames);
-      // Fill posterior from input streams:
-      for (int32 s = 0; s < stream_count; s++) {
-        if (!posterior_reader[s].HasKey(utt)) {
-          KALDI_WARN << "No such utterance " << utt
-                     << " in set " << (s+1) << " of posteriors.";
-          ok = false;
-          break;
-        }
-        const Posterior& post_s = posterior_reader[s].Value(utt);
-        KALDI_ASSERT(num_frames <= post_s.size());
-        for (int32 f = 0; f < num_frames; f++) {
-          for (int32 i = 0; i < post_s[f].size(); i++) {
-            int32 id = post_s[f][i].first;
-            BaseFloat val = post_s[f][i].second;
-            KALDI_ASSERT(id < stream_dims[s]);
-            post[f].push_back(std::make_pair(stream_offset[s] + id, val));
+      
+      // show which streams are non-empty,
+      if (allow_partial && kaldi::g_kaldi_verbose_level >= 2) {
+        std::string nonempty_streams;
+        for (int32 s = 0; s < stream_count; s++) {
+          if (posterior_reader[s].HasKey(utt)) {
+            nonempty_streams += " " + ToString(s);
           }
         }
+        KALDI_VLOG(2) << "Processing " << utt 
+                      << ", frames " << num_frames 
+                      << ", pasted-from streams " << nonempty_streams;
+      }
+
+      // Create output posteriors,
+      Posterior post(num_frames);
+
+      // Fill posterior from input streams,
+      for (int32 s = 0; s < stream_count; s++) {
+        if (!posterior_reader[s].HasKey(utt)) {
+          if (!allow_partial) {
+            KALDI_WARN << "No such utterance " << utt
+                       << " in set " << (s+1) << " of posteriors.";
+            ok = false;
+            break;
+          }
+        } else {
+          const Posterior& post_s = posterior_reader[s].Value(utt);
+          KALDI_ASSERT(num_frames <= post_s.size());
+          for (int32 f = 0; f < num_frames; f++) {
+            for (int32 i = 0; i < post_s[f].size(); i++) {
+              int32 id = post_s[f][i].first;
+              BaseFloat val = post_s[f][i].second;
+              KALDI_ASSERT(id < stream_dims[s]);
+              post[f].push_back(std::make_pair(stream_offset[s] + id, val));
+            }
+          }
+          empty = false;
+        }
+      }
+      if (empty) {
+        KALDI_WARN << "Uttenrace with no posteriors " << utt << ", discarding";
+        num_empty++;
+        continue;
       }
       if (ok) {
         posterior_writer.Write(featlen_reader.Key(), post);
@@ -111,7 +153,7 @@ int main(int argc, char *argv[]) {
       }
     }
     KALDI_LOG << "Pasted posteriors for " << num_done << " sentences, "
-              << "failed for " << num_err;
+              << "missing sentences " << num_empty << ", failed for " << num_err;
     return (num_done != 0 ? 0 : 1);
   } catch(const std::exception &e) {
     std::cerr << e.what();
diff --git a/src/nnetbin/rbm-train-cd1-frmshuff.cc b/src/nnetbin/rbm-train-cd1-frmshuff.cc
index 976db9254..3a0c24a35 100644
--- a/src/nnetbin/rbm-train-cd1-frmshuff.cc
+++ b/src/nnetbin/rbm-train-cd1-frmshuff.cc
@@ -92,7 +92,6 @@ int main(int argc, char *argv[]) {
 
 #if HAVE_CUDA==1
     CuDevice::Instantiate().SelectGpuId(use_gpu);
-    CuDevice::Instantiate().DisableCaching();
 #endif
 
     Nnet rbm_transf;