trunk: Adding DNN-based speaker recognition recipe in egs/sre10

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@5223 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
2015-07-10 17:53:28 +00:00 · 2015-07-10 17:53:28 +00:00 · 55d8f863f3
--- a/egs/sre08/v1/sid/extract_ivectors_dnn.sh
+++ b/egs/sre08/v1/sid/extract_ivectors_dnn.sh
@ -0,0 +1,94 @@
+#!/bin/bash
+
+# Copyright     2013  Daniel Povey
+#          2014-2015  David Snyder
+#               2015  Johns Hopkins University (Author: Daniel Garcia-Romero)
+#               2015  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# This script extracts iVectors for a set of utterances, given
+# features and a trained DNN-based iVector extractor.
+
+# Begin configuration section.
+nj=30
+cmd="run.pl"
+stage=0
+min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out)
+posterior_scale=1.0 # This scale helps to control for successive features being highly
+                    # correlated.  E.g. try 0.1 or 0.3.
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 5 ]; then
+  echo "Usage: $0 <extractor-dir> <data> <ivector-dir>"
+  echo " e.g.: $0 exp/extractor_2048_male data/train_male exp/ivectors_male"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --num-iters <#iters|10>                          # Number of iterations of E-M"
+  echo "  --nj <n|10>                                      # Number of jobs (also see num-processes and num-threads)"
+  echo "  --num-threads <n|8>                              # Number of threads for each process"
+  echo "  --stage <stage|0>                                # To control partial reruns"
+  echo "  --num-gselect <n|20>                             # Number of Gaussians to select using"
+  echo "                                                   # diagonal model."
+  echo "  --min-post <min-post|0.025>                      # Pruning threshold for posteriors"
+  exit 1;
+fi
+
+srcdir=$1
+nnet=$2
+data=$3
+data_dnn=$4
+dir=$5
+
+for f in $srcdir/final.ie $srcdir/final.ubm $data/feats.scp ; do
+  [ ! -f $f ] && echo "No such file $f" && exit 1;
+done
+
+# Set various variables.
+mkdir -p $dir/log
+sdata=$data/split$nj;
+utils/split_data.sh $data $nj || exit 1;
+
+sdata_dnn=$data_dnn/split$nj;
+utils/split_data.sh $data_dnn $nj || exit 1;
+
+delta_opts=`cat $srcdir/delta_opts 2>/dev/null`
+
+splice_opts=`cat exp/nnet//splice_opts 2>/dev/null` # frame-splicing options           
+
+## Set up features.
+feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |"
+
+nnet_feats="ark,s,cs:apply-cmvn-sliding --center=true scp:$sdata_dnn/JOB/feats.scp ark:- |"
+
+if [ $stage -le 0 ]; then
+  echo "$0: extracting iVectors"
+  $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \
+    nnet-am-compute --apply-log=true $nnet "$nnet_feats" ark:- \
+    \| select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- \
+    \| logprob-to-post --min-post=$min_post ark:- ark:- \| \
+    scale-post ark:- $posterior_scale ark:- \| \
+    ivector-extract --verbose=2 $srcdir/final.ie "$feats" ark,s,cs:- \
+      ark,scp,t:$dir/ivector.JOB.ark,$dir/ivector.JOB.scp || exit 1;
+fi
+
+if [ $stage -le 1 ]; then
+  echo "$0: combining iVectors across jobs"
+  for j in $(seq $nj); do cat $dir/ivector.$j.scp; done >$dir/ivector.scp || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  # Be careful here: the speaker-level iVectors are now length-normalized,
+  # even if they are otherwise the same as the utterance-level ones.
+  echo "$0: computing mean of iVectors for each speaker and length-normalizing"
+  $cmd $dir/log/speaker_mean.log \
+    ivector-normalize-length scp:$dir/ivector.scp  ark:- \| \
+    ivector-mean ark:$data/spk2utt ark:- ark:- ark,t:$dir/num_utts.ark \| \
+    ivector-normalize-length ark:- ark,scp:$dir/spk_ivector.ark,$dir/spk_ivector.scp || exit 1;
+fi
--- a/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh
+++ b/egs/sre08/v1/sid/init_full_ubm_from_dnn.sh
@ -0,0 +1,79 @@
+#!/bin/bash
+# Copyright 2015   David Snyder
+#           2015   Johns Hopkins University (Author: Daniel Garcia-Romero)
+#           2015   Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0
+
+# This script derives a full-covariance UBM from DNN posteriors and
+# speaker recognition features.
+
+# Begin configuration section.
+nj=40
+cmd="run.pl"
+stage=-2
+delta_window=3
+delta_order=2
+num_components=5297
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+  echo "Usage: steps/init_full_ubm_from_dnn.sh <data-speaker-id> <data-dnn> <dnn-model> <new-ubm-dir>"
+  echo "Initializes a full-covariance UBM from DNN posteriors and speaker recognition features."
+  echo " e.g.: steps/init_full_ubm_from_dnn.sh data/train data/train_dnn exp/dnn/final.mdl exp/full_ubm"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --nj <n|16>                                      # number of parallel training jobs"
+  echo "  --delta-window <n|3>                             # delta window size"
+  echo "  --delta-order <n|2>                              # delta order"
+  echo "  --number-components <n|5297>                     # number of components in the final GMM needs"
+  echo "                                                   # to be equal to the size of the DNN output layer."
+  exit 1;
+fi
+
+data=$1
+data_dnn=$2
+nnet=$3
+dir=$4
+
+for f in $data/feats.scp $data/vad.scp; do
+  [ ! -f $f ] && echo "No such file $f" && exit 1;
+done
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+sdata=$data/split$nj;
+utils/split_data.sh $data $nj || exit 1;
+
+sdata_dnn=$data_dnn/split$nj;
+utils/split_data.sh $data_dnn $nj || exit 1;
+
+delta_opts="--delta-window=$delta_window --delta-order=$delta_order"
+echo $delta_opts > $dir/delta_opts
+
+logdir=$dir/log
+
+nnet_feats="ark,s,cs:apply-cmvn-sliding --center=true scp:$sdata_dnn/JOB/feats.scp ark:- |"
+
+feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | \
+apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | \
+select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |"
+
+$cmd JOB=1:$nj $logdir/make_stats.JOB.log \
+  nnet-am-compute --apply-log=true $nnet "$nnet_feats" ark:- \
+  \| select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- \
+  \| logprob-to-post ark:- ark:- \| \
+  fgmm-global-acc-stats-post ark:- $num_components "$feats" \
+  $dir/stats.JOB.acc || exit 1;
+
+$cmd $dir/log/init.log \
+  fgmm-global-init-from-accs --verbose=2 \
+  "fgmm-global-sum-accs - $dir/stats.*.acc |" $num_components \
+  $dir/final.ubm || exit 1;
+
+exit 0;
--- a/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh
+++ b/egs/sre08/v1/sid/train_ivector_extractor_dnn.sh
@ -0,0 +1,181 @@
+#!/bin/bash
+
+# Copyright 2013  Daniel Povey
+#      2014-2015  David Snyder
+#           2015  Johns Hopkins University (Author: Daniel Garcia-Romero)
+#           2015  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+# This script trains the i-vector extractor using a DNN-based UBM. It also requires
+# an fGMM, usually created by the script sid/init_full_gmm_from_dnn.sh.
+# Note: there are 3 separate levels of parallelization: num_threads, num_processes, 
+# and num_jobs.  This may seem a bit excessive.  It has to do with minimizing 
+# memory usage and disk I/O, subject to various constraints.  The "num_threads" 
+# is how many threads a program uses; the "num_processes" is the number of separate
+# processes a single  job spawns, and then sums the accumulators in memory.
+# Our recommendation:
+#  - Set num_threads to the minimum of (4, or how many virtual cores your machine has).
+#    (because of needing to lock various global quantities, the program can't
+#    use many more than 4 threads with good CPU utilization).
+#  - Set num_processes to the number of virtual cores on each machine you have, divided by 
+#    num_threads.  E.g. 4, if you have 16 virtual cores.   If you're on a shared queue
+#    that's busy with other people's jobs, it may be wise to set it to rather less
+#    than this maximum though, or your jobs won't get scheduled.  And if memory is
+#    tight you need to be careful; in our normal setup, each process uses about 5G.
+#  - Set num_jobs to as many of the jobs (each using $num_threads * $num_processes CPUs)
+#    your queue will let you run at one time, but don't go much more than 10 or 20, or
+#    summing the accumulators will possibly get slow.  If you have a lot of data, you
+#    may want more jobs, though.
+
+# Begin configuration section.
+nj=10   # this is the number of separate queue jobs we run, but each one 
+        # contains num_processes sub-jobs.. the real number of threads we 
+        # run is nj * num_processes * num_threads, and the number of
+        # separate pieces of data is nj * num_processes.
+num_threads=4
+num_processes=4 # each job runs this many processes, each with --num-threads threads
+cmd="run.pl"
+stage=-4
+num_gselect=20 # Gaussian-selection using diagonal model: number of Gaussians to select
+ivector_dim=400 # dimension of the extracted i-vector
+use_weights=false # set to true to turn on the regression of log-weights on the ivector.
+num_iters=10
+min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out)
+num_samples_for_weights=3 # smaller than the default for speed (relates to a sampling method)
+cleanup=true
+posterior_scale=1.0 # This scale helps to control for successve features being highly
+                    # correlated.  E.g. try 0.1 or 0.3
+sum_accs_opt=
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 5 ]; then
+  echo "Usage: $0 <fgmm-model> <dnn-model> <data-speaker-id> <data-dnn> <extractor-dir>"
+  echo " e.g.: $0 exp/sup_ubm/final.ubm exp/dnn/final.mdl data/train data/train_dnn exp/extractor_male"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --num-iters <#iters|10>                          # Number of iterations of E-M"
+  echo "  --nj <n|10>                                      # Number of jobs (also see num-processes and num-threads)"
+  echo "  --num-processes <n|4>                            # Number of processes for each queue job (relates"
+  echo "                                                   # to summing accs in memory)"
+  echo "  --num-threads <n|4>                              # Number of threads for each process (can't be usefully"
+  echo "                                                   # increased much above 4)"
+  echo "  --stage <stage|-4>                               # To control partial reruns"
+  echo "  --num-gselect <n|20>                             # Number of Gaussians to select using"
+  echo "                                                   # diagonal model."
+  echo "  --sum-accs-opt <option|''>                       # Option e.g. '-l hostname=a15' to localize"
+  echo "                                                   # sum-accs process to nfs server."
+  exit 1;
+fi
+
+fgmm_model=$1
+nnet=$2
+data=$3
+data_dnn=$4
+dir=$5
+
+srcdir=$(dirname $fgmm_model)
+
+for f in $fgmm_model $data/feats.scp ; do
+  [ ! -f $f ] && echo "No such file $f" && exit 1;
+done
+
+# Set various variables.
+mkdir -p $dir/log
+nj_full=$[$nj*$num_processes]
+sdata=$data/split$nj_full;
+utils/split_data.sh $data $nj_full || exit 1;
+
+sdata_dnn=$data_dnn/split$nj_full;
+utils/split_data.sh $data_dnn $nj_full || exit 1;
+
+delta_opts=`cat $srcdir/delta_opts 2>/dev/null`
+if [ -f $srcdir/delta_opts ]; then
+  cp $srcdir/delta_opts $dir/ 2>/dev/null
+fi
+
+splice_opts=`cat exp/nnet//splice_opts 2>/dev/null` # frame-splicing options           
+
+parallel_opts="-pe smp $[$num_threads*$num_processes]"
+## Set up features.
+feats="ark,s,cs:add-deltas $delta_opts scp:$sdata/JOB/feats.scp ark:- | apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 ark:- ark:- | select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- |"
+
+nnet_feats="ark,s,cs:apply-cmvn-sliding --center=true scp:$sdata_dnn/JOB/feats.scp ark:- |"
+
+
+# Initialize the i-vector extractor using the FGMM input
+if [ $stage -le -2 ]; then
+  cp $fgmm_model $dir/final.ubm || exit 1;
+  $cmd $dir/log/convert.log \
+    fgmm-global-to-gmm $dir/final.ubm $dir/final.dubm || exit 1;
+  $cmd $dir/log/init.log \
+    ivector-extractor-init --ivector-dim=$ivector_dim --use-weights=$use_weights \
+     $dir/final.ubm $dir/0.ie || exit 1;
+fi 
+
+# Do Gaussian selection and posterior extracion
+
+if [ $stage -le -1 ]; then
+  echo $nj_full > $dir/num_jobs
+  echo "$0: doing DNN posterior computation"
+  $cmd JOB=1:$nj_full $dir/log/post.JOB.log \
+  nnet-am-compute --apply-log=true $nnet "$nnet_feats" ark:- \
+  \| select-voiced-frames ark:- scp,s,cs:$sdata/JOB/vad.scp ark:- \
+  \| logprob-to-post --min-post=$min_post ark,s,cs:- ark:- \| \
+  scale-post ark:- $posterior_scale "ark:|gzip -c >$dir/post.JOB.gz" || exit 1;
+
+else
+  if ! [ $nj_full -eq $(cat $dir/num_jobs) ]; then
+    echo "Num-jobs mismatch $nj_full versus $(cat $dir/num_jobs)"
+    exit 1
+  fi
+fi
+
+x=0
+while [ $x -lt $num_iters ]; do
+  if [ $stage -le $x ]; then
+    rm $dir/.error 2>/dev/null
+
+    Args=() # bash array of training commands for 1:nj, that put accs to stdout.
+    for j in $(seq $nj_full); do
+      Args[$j]=`echo "ivector-extractor-acc-stats --num-threads=$num_threads --num-samples-for-weights=$num_samples_for_weights $dir/$x.ie '$feats' 'ark,s,cs:gunzip -c $dir/post.JOB.gz|' -|" | sed s/JOB/$j/g`
+    done
+
+    echo "Accumulating stats (pass $x)"
+    for g in $(seq $nj); do
+      start=$[$num_processes*($g-1)+1]
+      $cmd $parallel_opts $dir/log/acc.$x.$g.log \
+        ivector-extractor-sum-accs --parallel=true "${Args[@]:$start:$num_processes}" \
+          $dir/acc.$x.$g || touch $dir/.error &
+    done
+    wait
+    [ -f $dir/.error ] && echo "Error accumulating stats on iteration $x" && exit 1;
+	accs=""
+	for j in $(seq $nj); do
+	  accs+="$dir/acc.$x.$j "
+	done
+	echo "Summing accs (pass $x)"
+	$cmd $sum_accs_opt $dir/log/sum_acc.$x.log \
+	  ivector-extractor-sum-accs $accs $dir/acc.$x || exit 1;
+    echo "Updating model (pass $x)"
+    nt=$[$num_threads*$num_processes] # use the same number of threads that
+                                      # each accumulation process uses, since we
+                                      # can be sure the queue will support this many.
+	$cmd -pe smp $nt $dir/log/update.$x.log \
+	  ivector-extractor-est --num-threads=$nt $dir/$x.ie $dir/acc.$x $dir/$[$x+1].ie || exit 1;
+	rm $dir/acc.$x.*
+    if $cleanup; then
+      rm $dir/acc.$x
+      # rm $dir/$x.ie
+    fi
+  fi
+  x=$[$x+1]
+done
+
+ln -s $x.ie $dir/final.ie
--- a/egs/sre10/v1/local/dnn/README
+++ b/egs/sre10/v1/local/dnn/README
@ -0,0 +1,5 @@
+This directory contains DNN scripts based on the nnet2 recipes found in
+the ASR examples (e.g., fisher_english). The scripts have been modified
+for speaker recognition purposes. Most of the scripts are lightly modified
+versions of those appearing in the steps or local directories of
+egs/fisher_english.
--- a/egs/sre10/v1/local/dnn/fisher_create_test_lang.sh
+++ b/egs/sre10/v1/local/dnn/fisher_create_test_lang.sh
@ -0,0 +1,62 @@
+#!/bin/bash 
+#
+
+if [ -f path.sh ]; then . path.sh; fi
+
+mkdir -p data/lang_test
+
+arpa_lm=data/local/lm/3gram-mincount/lm_unpruned.gz
+[ ! -f $arpa_lm ] && echo No such file $arpa_lm && exit 1;
+
+mkdir -p data/lang_test
+cp -r data/lang/* data/lang_test
+
+# grep -v '<s> <s>' etc. is only for future-proofing this script.  Our
+# LM doesn't have these "invalid combinations".  These can cause 
+# determinization failures of CLG [ends up being epsilon cycles].
+# Note: remove_oovs.pl takes a list of words in the LM that aren't in
+# our word list.  Since our LM doesn't have any, we just give it
+# /dev/null [we leave it in the script to show how you'd do it].
+gunzip -c "$arpa_lm" | \
+   grep -v '<s> <s>' | \
+   grep -v '</s> <s>' | \
+   grep -v '</s> </s>' | \
+   arpa2fst - | fstprint | \
+   utils/remove_oovs.pl /dev/null | \
+   utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=data/lang_test/words.txt \
+     --osymbols=data/lang_test/words.txt  --keep_isymbols=false --keep_osymbols=false | \
+    fstrmepsilon | fstarcsort --sort_type=ilabel > data/lang_test/G.fst
+  fstisstochastic data/lang_test/G.fst
+
+
+echo  "Checking how stochastic G is (the first of these numbers should be small):"
+fstisstochastic data/lang_test/G.fst 
+
+## Check lexicon.
+## just have a look and make sure it seems sane.
+echo "First few lines of lexicon FST:"
+fstprint   --isymbols=data/lang/phones.txt --osymbols=data/lang/words.txt data/lang/L.fst  | head
+
+echo Performing further checks
+
+# Checking that G.fst is determinizable.
+fstdeterminize data/lang_test/G.fst /dev/null || echo Error determinizing G.
+
+# Checking that L_disambig.fst is determinizable.
+fstdeterminize data/lang_test/L_disambig.fst /dev/null || echo Error determinizing L.
+
+# Checking that disambiguated lexicon times G is determinizable
+# Note: we do this with fstdeterminizestar not fstdeterminize, as
+# fstdeterminize was taking forever (presumbaly relates to a bug
+# in this version of OpenFst that makes determinization slow for
+# some case).
+fsttablecompose data/lang_test/L_disambig.fst data/lang_test/G.fst | \
+   fstdeterminizestar >/dev/null || echo Error
+
+# Checking that LG is stochastic:
+fsttablecompose data/lang/L_disambig.fst data/lang_test/G.fst | \
+   fstisstochastic || echo "[log:] LG is not stochastic"
+
+
+echo "$0 succeeded"
+
--- a/egs/sre10/v1/local/dnn/fisher_data_prep.sh
+++ b/egs/sre10/v1/local/dnn/fisher_data_prep.sh
@ -0,0 +1,211 @@
+#!/bin/bash
+
+# Copyright 2013  Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+
+stage=0
+
+calldata=
+while test $# -gt 0
+do
+    case "$1" in
+        --calldata) calldata=1
+            ;;
+        *) break;
+            ;;
+    esac
+    shift
+done
+
+. utils/parse_options.sh
+
+if [ $# -eq 0 ]; then
+  echo "$0 [--calldata] <fisher-dir-1> [<fisher-dir-2> ...]"
+  echo " e.g.: $0 /export/corpora3/LDC/LDC2004T19 /export/corpora3/LDC/LDC2005T19\\"
+  echo " /export/corpora3/LDC/LDC2004S13 /export/corpora3/LDC/LDC2005S13"
+  echo " (We also support a single directory that has the contents of all of them)"
+  echo " If specified, --calldata will be used to map Kaldi speaker ID to real"
+  echo " speaker PIN released with the Fisher corpus."
+  exit 1;
+fi
+
+# Check that the arguments are all absolute pathnames.
+
+for dir in $*; do
+  case $dir in /*) ;; *)
+      echo "$0: all arguments must be absolute pathnames."; exit 1;
+  esac
+done
+
+# First check we have the right things in there...
+#
+rm -r data/local/data/links 2>/dev/null
+mkdir -p data/local/data/links || exit 1;
+
+for subdir in fe_03_p1_sph1  fe_03_p1_sph3  fe_03_p1_sph5  fe_03_p1_sph7 \
+  fe_03_p2_sph1  fe_03_p2_sph3  fe_03_p2_sph5  fe_03_p2_sph7 fe_03_p1_sph2 \
+  fe_03_p1_sph4  fe_03_p1_sph6  fe_03_p1_tran  fe_03_p2_sph2  fe_03_p2_sph4 \
+  fe_03_p2_sph6  fe_03_p2_tran; do
+  found_subdir=false
+  for dir in $*; do
+    if [ -d $dir/$subdir ]; then
+      found_subdir=true
+      ln -s $dir/$subdir data/local/data/links
+    else
+      new_style_subdir=$(echo $subdir | sed s/fe_03_p2_sph/fisher_eng_tr_sp_d/)
+      if [ -d $dir/$new_style_subdir ]; then
+        found_subdir=true
+        ln -s $dir/$new_style_subdir data/local/data/links/$subdir
+      fi
+    fi
+  done
+  if ! $found_subdir; then
+    echo "$0: could not find the subdirectory $subdir in any of $*"
+    exit 1;
+  fi
+done
+
+
+tmpdir=`pwd`/data/local/data
+links=data/local/data/links
+
+. ./path.sh # Needed for KALDI_ROOT
+
+sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
+
+if [ ! -x $sph2pipe ]; then
+   echo "Could not find (or execute) the sph2pipe program at $sph2pipe";
+   exit 1;
+fi
+
+# (1) Get transcripts in one file, and clean them up ..
+
+if [ $stage -le 0 ]; then
+
+  find $links/fe_03_p1_tran/data $links/fe_03_p2_tran/data -name '*.txt'  > $tmpdir/transcripts.flist
+
+  for dir in fe_03_p{1,2}_sph{1,2,3,4,5,6,7}; do
+    find $links/$dir/ -name '*.sph'
+  done > $tmpdir/sph.flist
+
+  n=`cat $tmpdir/transcripts.flist | wc -l`
+  if [ $n -ne 11699 ]; then
+    echo "Expected to find 11699 transcript files in the Fisher data, found $n"
+    exit 1;
+  fi
+  n=`cat $tmpdir/sph.flist | wc -l`
+  if [ $n -ne 11699 ]; then
+    echo "Expected to find 11699 .sph files in the Fisher data, found $n"
+    exit 1;
+  fi
+fi
+
+if [ $stage -le 1 ]; then
+  mkdir -p data/train_all_asr
+
+
+## fe_03_00004.sph
+## Transcpribed at the LDC
+#
+#7.38 8.78 A: an- so the topic is 
+
+  echo -n > $tmpdir/text.1 || exit 1;
+
+  perl -e ' 
+   use File::Basename;
+   ($tmpdir)=@ARGV;
+   open(F, "<$tmpdir/transcripts.flist") || die "Opening list of transcripts";
+   open(R, "|sort >data/train_all_asr/reco2file_and_channel") || die "Opening reco2file_and_channel";
+   open(T, ">$tmpdir/text.1") || die "Opening text output";
+   while (<F>) {
+     $file = $_;
+     m:([^/]+)\.txt: || die "Bad filename $_";
+     $call_id = $1;
+     print R "$call_id-A $call_id A\n";
+     print R "$call_id-B $call_id B\n"; 
+     open(I, "<$file") || die "Opening file $_";
+
+     $line1 = <I>;
+     $line1 =~ m/# (.+)\.sph/ || die "Bad first line $line1 in file $file";
+     $call_id eq $1 || die "Mismatch call-id $call_id vs $1\n";
+     while (<I>) {
+       if (m/([0-9.]+)\s+([0-9.]+) ([AB]):\s*(\S.+\S|\S)\s*$/) {
+         $start = sprintf("%06d", $1 * 100.0);
+         $end = sprintf("%06d", $2 * 100.0);
+         length($end) > 6 && die "Time too long $end in file $file";
+         $side = $3; 
+         $words = $4;
+         $utt_id = "${call_id}-$side-$start-$end";
+         print T "$utt_id $words\n" || die "Error writing to text file";
+       }
+     }
+   }
+   close(R); close(T) ' $tmpdir || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  sort $tmpdir/text.1 | grep -v '((' | \
+    awk '{if (NF > 1){ print; }}' | \
+    sed 's:\[laugh\]:[laughter]:g' | \
+    sed 's:\[sigh\]:[noise]:g' | \
+    sed 's:\[cough\]:[noise]:g' | \
+    sed 's:\[sigh\]:[noise]:g' | \
+    sed 's:\[mn\]:[noise]:g' | \
+    sed 's:\[breath\]:[noise]:g' | \
+    sed 's:\[lipsmack\]:[noise]:g' > $tmpdir/text.2
+  cp $tmpdir/text.2 data/train_all_asr/text
+  # create segments file and utt2spk file...
+  ! cat data/train_all_asr/text | perl -ane 'm:([^-]+)-([AB])-(\S+): || die "Bad line $_;"; print "$1-$2-$3 $1-$2\n"; ' > data/train_all_asr/utt2spk  \
+     && echo "Error producing utt2spk file" && exit 1;
+
+  cat data/train_all_asr/text | perl -ane 'm:((\S+-[AB])-(\d+)-(\d+))\s: || die; $utt = $1; $reco = $2; $s = sprintf("%.2f", 0.01*$3);
+                 $e = sprintf("%.2f", 0.01*$4); print "$utt $reco $s $e\n"; ' > data/train_all_asr/segments
+
+  utils/utt2spk_to_spk2utt.pl <data/train_all_asr/utt2spk > data/train_all_asr/spk2utt
+fi
+
+if [ $stage -le 3 ]; then
+  for f in `cat $tmpdir/sph.flist`; do
+    # convert to absolute path
+    readlink -e $f
+  done > $tmpdir/sph_abs.flist
+  
+  cat $tmpdir/sph_abs.flist | perl -ane 'm:/([^/]+)\.sph$: || die "bad line $_; ";  print "$1 $_"; ' > $tmpdir/sph.scp
+  cat $tmpdir/sph.scp | awk -v sph2pipe=$sph2pipe '{printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2); 
+    printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);}' | \
+    sort -k1,1 -u  > data/train_all_asr/wav.scp || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  # get the spk2gender information.  This is not a standard part of our
+  # file formats
+  # The files "filetable2fe_03_p2_sph1 fe_03_05852.sph ff
+  cat $links/fe_03_p1_sph{1,2,3,4,5,6,7}/filetable.txt \
+    $links/fe_03_p2_sph{1,2,3,4,5,6,7}/docs/filetable2.txt | \
+  perl -ane 'm:^\S+ (\S+)\.sph ([fm])([fm]): || die "bad line $_;"; print "$1-A $2\n", "$1-B $3\n"; ' | \
+   sort | uniq | utils/filter_scp.pl data/train_all_asr/spk2utt > data/train_all_asr/spk2gender
+
+  if [ ! -s data/train_all_asr/spk2gender ]; then
+    echo "It looks like our first try at getting the spk2gender info did not work."
+    echo "(possibly older distribution?)  Trying something else."
+    cat $links/fe_03_p1_tran/doc/fe_03_p1_filelist.tbl  $links/fe_03_p2_tran/doc/fe_03_p2_filelist.tbl  | \
+       perl -ane 'm:fe_03_p[12]_sph\d\t(\d+)\t([mf])([mf]): || die "Bad line $_";
+                print "fe_03_$1-A $2\n", "fe_03_$1-B $3\n"; ' | \
+         sort | uniq | utils/filter_scp.pl data/train_all_asr/spk2utt > data/train_all_asr/spk2gender
+  fi
+fi
+
+if [ ! -z "$calldata" ]; then # fix speaker IDs
+  cat $links/fe_03_p{1,2}_tran/doc/*calldata.tbl > $tmpdir/combined-calldata.tbl
+  local/fisher_fix_speakerid.pl $tmpdir/combined-calldata.tbl data/train_all_asr
+  utils/utt2spk_to_spk2utt.pl data/train_all_asr/utt2spk.new > data/train_all_asr/spk2utt.new
+  # patch files
+  for f in spk2utt utt2spk text segments spk2gender; do
+    cp data/train_all_asr/$f data/train_all_asr/$f.old || exit 1;
+    cp data/train_all_asr/$f.new data/train_all_asr/$f || exit 1;
+  done
+  rm $tmpdir/combined-calldata.tbl
+fi
+
+echo "Data preparation succeeded"
+
--- a/egs/sre10/v1/local/dnn/fisher_fix_speakerid.pl
+++ b/egs/sre10/v1/local/dnn/fisher_fix_speakerid.pl
@ -0,0 +1,114 @@
+#!/usr/bin/perl -w
+
+# Author: Peng Qi (pengqi@cs.stanford.edu)
+# This script maps Switchboard speaker IDs to the true physical speakers
+# and fixes the utterances IDs accordingly. Expected to be run one level of
+# directory above.
+
+sub trim {
+    (my $s = $_[0]) =~ s/^\s+|\s+$//g;
+    return $s;        
+}
+
+if ($#ARGV != 1) {
+	print "Usage: swbd1_fix_speakerid.pl <fisher-calldata-tbl-file> <data-dir>\n";
+	print "E.g.:  swbd1_fix_speakerid.pl data/local/train/combined-calldata.tbl data/train_all\n";
+}
+
+$tab_file = $ARGV[0];
+$dir = $ARGV[1];
+
+%conv_to_spk = ();
+
+open(my $conv_tab, '<', $tab_file) or die "Could not open '$tab_file' $!\n";
+ 
+while (my $line = <$conv_tab>) {
+  chomp $line;
+ 
+  my @fields = split "," , $line;
+  #$fields[0] = trim($fields[0]);
+  $fields[5] = trim($fields[5]);
+  $fields[10] = trim($fields[10]);
+  $conv_to_spk{'fe_03_' . $fields[0] . '-A'} = $fields[5];
+  $conv_to_spk{'fe_03_' . $fields[0] . '-B'} = $fields[10];
+}
+
+close($conv_tab);
+
+# fix utt2spk
+
+%missingconv = ();
+
+open(my $utt2spk, '<', $dir . '/utt2spk') or die "Could not open '$dir/utt2spk' $!\n";
+open(my $utt2spk_new, '>', $dir . '/utt2spk.new');
+
+while (my $line = <$utt2spk>) {
+  chomp $line;
+
+  my @fields = split " " , $line;
+  my $convid = substr $fields[0], 0, 13;
+  
+  if (exists $conv_to_spk{ $convid }) {
+    my $spkid = $conv_to_spk{ $convid };
+    $spkid = "fe_03_" . $spkid;
+    my $newuttid = $spkid . '-' . (substr $fields[0], 6);
+
+    print $utt2spk_new "$newuttid $spkid\n";
+  } else {
+    my $convid = substr $convid, 6, 5;
+    $missingconv{$convid} = 1;
+    
+    print $utt2spk_new $fields[0]." ".$fields[1]."\n";
+  }
+}
+
+close($utt2spk);
+close($utt2spk_new);
+
+foreach my $conv (keys %missingconv) {
+  print "Warning: Conversation ID '$conv' not found in conv.tab, retaining old speaker IDs\n"
+}
+
+# fix spk2gender
+
+if (open(my $spk2gender, '<', $dir . '/spk2gender')) {
+  open(my $spk2gender_new, '>', $dir . '/spk2gender.new');
+
+  while (my $line = <$spk2gender>) {
+    chomp $line;
+
+    my @fields = split " ", $line;
+    my $convid = $fields[0];
+
+    if (exists $conv_to_spk{ $convid }) {
+      my $spkid = $conv_to_spk{ $convid };
+      $spkid = "fe_03_" . $spkid;
+
+      print $spk2gender_new $spkid." ".$fields[1]."\n";
+    } else {
+      print $spk2gender_new $fields[0]." ".$fields[1]."\n";
+    }
+  }
+
+  close($spk2gender);
+  close($spk2gender_new);
+}
+
+# fix segments and text
+
+foreach my $file ('segments','text') {
+  open(my $oldfile, '<', "$dir/$file") or die "Could not open '$dir/$file' $!\n";
+  open(my $newfile, '>', "$dir/$file.new");
+
+  while (my $line = <$oldfile>) {
+    chomp $line;
+
+    my $convid = substr $line, 0, 13;
+    if (exists $conv_to_spk{$convid}) {
+      my $spkid = $conv_to_spk{$convid};
+      print $newfile "fe_03_$spkid-" . (substr $line, 6) . "\n";
+    } else {
+      print $newfile "$line\n";
+    }
+  }
+}
--- a/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh
+++ b/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh
@ -0,0 +1,182 @@
+#!/bin/bash
+#
+
+# To be run from one directory above this script.
+
+## The input is some directory containing the switchboard-1 release 2
+## corpus (LDC97S62).  Note: we don't make many assumptions about how
+## you unpacked this.  We are just doing a "find" command to locate
+## the .sph files.
+
+# for example /mnt/matylda2/data/SWITCHBOARD_1R2
+
+. path.sh
+
+# The parts of the output of this that will be needed are
+# [in data/local/dict/ ]
+# lexicon.txt
+# extra_questions.txt
+# nonsilence_phones.txt
+# optional_silence.txt
+# silence_phones.txt
+
+
+#check existing directories
+[ $# != 0 ] && echo "Usage: local/dnn/fisher_prepare_dict.sh" && exit 1;
+
+dir=data/local/dict
+mkdir -p $dir
+echo "Getting CMU dictionary"
+svn co  https://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict  $dir/cmudict
+
+# silence phones, one per line. 
+for w in sil laughter noise oov; do echo $w; done > $dir/silence_phones.txt
+echo sil > $dir/optional_silence.txt
+
+# For this setup we're discarding stress.
+cat $dir/cmudict/cmudict.0.7a.symbols | sed s/[0-9]//g | \
+ tr '[A-Z]' '[a-z]' | perl -ane 's:\r::; print;' | sort | uniq > $dir/nonsilence_phones.txt
+
+# An extra question will be added by including the silence phones in one class.
+cat $dir/silence_phones.txt| awk '{printf("%s ", $1);} END{printf "\n";}' > $dir/extra_questions.txt || exit 1;
+
+grep -v ';;;' $dir/cmudict/cmudict.0.7a |  tr '[A-Z]' '[a-z]' | \
+ perl -ane 'if(!m:^;;;:){ s:(\S+)\(\d+\) :$1 :; s:  : :; print; }' | \
+ perl -ane '@A = split(" ", $_); for ($n = 1; $n<@A;$n++) { $A[$n] =~ s/[0-9]//g; } print join(" ", @A) . "\n";' | \
+ sort | uniq > $dir/lexicon1_raw_nosil.txt || exit 1;
+
+# Add prons for laughter, noise, oov
+for w in `grep -v sil $dir/silence_phones.txt`; do
+  echo "[$w] $w"
+done | cat - $dir/lexicon1_raw_nosil.txt  > $dir/lexicon2_raw.txt || exit 1;
+
+
+# This is just for diagnostics:
+cat data/train_all_asr/text  | \
+  awk '{for (n=2;n<=NF;n++){ count[$n]++; } } END { for(n in count) { print count[n], n; }}' | \
+  sort -nr > $dir/word_counts
+
+cat $dir/word_counts | awk '{print $2}' > $dir/word_list
+
+# between lexicon2_raw and lexicon3_expand we limit it to the words seen in
+# the Fisher data.
+utils/filter_scp.pl $dir/word_list $dir/lexicon2_raw.txt > $dir/lexicon3_expand.txt
+
+# From lexicon2_raw to lexicon3_expand, we also expand the vocab for acronyms 
+# like c._n._n. and other underscore-containing things as long as the new vocab
+# could be divided into finite parts contained in lexicon2_raw
+cat $dir/lexicon2_raw.txt | \
+  perl -e 'while(<STDIN>) { @A=split; $w = shift @A; $pron{$w} = join(" ", @A); }
+     ($w) = @ARGV;  open(W, "<$w") || die "Error opening word-counts from $w";
+     while(<W>) { # reading in words we saw in training data..
+       ($c, $w) = split;
+       if (!defined $pron{$w}) { 
+         @A = split("_", $w);
+         if (@A > 1) {
+           $this_pron = "";
+           $pron_ok = 1;
+           foreach $a (@A) { 
+             if (defined($pron{$a})) { $this_pron = $this_pron . "$pron{$a} "; }
+             else { $pron_ok = 0; print STDERR "Not handling word $w, count is $c\n"; last; } 
+           }
+           if ($pron_ok) { $new_pron{$w} = $this_pron;   }
+         }
+       }
+     }
+     foreach $w (keys %new_pron) { print "$w $new_pron{$w}\n"; }' \
+   $dir/word_counts >> $dir/lexicon3_expand.txt || exit 1;
+
+
+cat $dir/lexicon3_expand.txt  \
+   <( echo "mm m"
+      echo "<unk> oov" ) > $dir/lexicon4_extra.txt
+
+
+cp $dir/lexicon4_extra.txt $dir/lexicon.txt
+rm $dir/lexiconp.txt 2>/dev/null; # can confuse later script if this exists.
+
+awk '{print $1}' $dir/lexicon.txt | \
+  perl -e '($word_counts)=@ARGV;
+   open(W, "<$word_counts")||die "opening word-counts $word_counts";
+   while(<STDIN>) { chop; $seen{$_}=1; }
+   while(<W>) {
+     ($c,$w) = split;
+     if (!defined $seen{$w}) { print; }
+   } ' $dir/word_counts > $dir/oov_counts.txt
+
+echo "*Highest-count OOVs are:"
+head -n 20 $dir/oov_counts.txt
+
+utils/validate_dict_dir.pl $dir
+exit 0;
+
+
+
+srcdir=data/local/train_asr # This is where we downloaded some stuff..
+dir=data/local/dict
+mkdir -p $dir
+srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text
+
+# assume swbd_p1_data_prep.sh was done already.
+[ ! -f "$srcdict" ] && echo "No such file $srcdict" && exit 1;
+
+#(2a) Dictionary preparation:
+# Pre-processing (Upper-case, remove comments)
+awk 'BEGIN{getline}($0 !~ /^#/) {$0=toupper($0); print}' \
+  $srcdict | sort | awk '($0 !~ /^[:space:]*$/) {print}' \
+   > $dir/lexicon1.txt || exit 1;
+
+
+cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \
+  grep -v SIL > $dir/nonsilence_phones.txt  || exit 1;
+
+( echo SIL; echo SPN; echo NSN; echo LAU ) > $dir/silence_phones.txt
+
+echo SIL > $dir/optional_silence.txt
+
+# No "extra questions" in the input to this setup, as we don't
+# have stress or tone.
+echo -n >$dir/extra_questions.txt
+
+# Add to the lexicon the silences, noises etc.
+(echo '!SIL SIL'; echo '[VOCALIZED-NOISE] SPN'; echo '[NOISE] NSN'; echo '[LAUGHTER] LAU';
+ echo '<UNK> SPN' ) | \
+ cat - $dir/lexicon1.txt  > $dir/lexicon2.txt || exit 1;
+
+
+# Map the words in the lexicon.  That is-- for each word in the lexicon, we map it
+# to a new written form.  The transformations we do are:
+# remove laughter markings, e.g.
+# [LAUGHTER-STORY] -> STORY
+# Remove partial-words, e.g.
+# -[40]1K W AH N K EY
+# becomes -1K
+# and
+# -[AN]Y IY
+# becomes
+# -Y
+# -[A]B[OUT]- B
+# becomes
+# -B-
+# Also, curly braces, which appear to be used for "nonstandard"
+# words or non-words, are removed, e.g. 
+# {WOLMANIZED} W OW L M AX N AY Z D
+# -> WOLMANIZED
+# Also, mispronounced words, e.g.
+#  [YEAM/YEAH] Y AE M
+# are changed to just e.g. YEAM, i.e. the orthography
+# of the mispronounced version.
+# Note-- this is only really to be used in training.  The main practical
+# reason is to avoid having tons of disambiguation symbols, which
+# we otherwise would get because there are many partial words with
+# the same phone sequences (most problematic: S).
+# Also, map
+# THEM_1 EH M -> THEM
+# so that multiple pronunciations just have alternate entries
+# in the lexicon.
+
+local/dnn/swbd_map_words.pl -f 1 $dir/lexicon2.txt | sort | uniq > $dir/lexicon3.txt || exit 1;
+
+cp $dir/lexicon3.txt $dir/lexicon.txt # This is the final lexicon.
+
+echo Prepared input dictionary and phone-sets for Switchboard phase 1.
--- a/egs/sre10/v1/local/dnn/fisher_train_lms.sh
+++ b/egs/sre10/v1/local/dnn/fisher_train_lms.sh
@ -0,0 +1,111 @@
+#!/bin/bash
+
+
+# To be run from one directory above this script.
+
+
+text=data/train_all_asr/text
+lexicon=data/local/dict/lexicon.txt 
+
+for f in "$text" "$lexicon"; do
+  [ ! -f $x ] && echo "$0: No such file $f" && exit 1;
+done
+
+# This script takes no arguments.  It assumes you have already run
+# fisher_data_prep.sh and fisher_prepare_dict.sh
+# It takes as input the files
+#data/train_all/text
+#data/local/dict/lexicon.txt
+
+dir=data/local/lm
+mkdir -p $dir
+export LC_ALL=C # You'll get errors about things being not sorted, if you
+# have a different locale.
+export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
+( # First make sure the kaldi_lm toolkit is installed.
+ cd ../../../tools || exit 1;
+ if [ -d kaldi_lm ]; then
+   echo Not installing the kaldi_lm toolkit since it is already there.
+ else
+   echo Downloading and installing the kaldi_lm tools
+   if [ ! -f kaldi_lm.tar.gz ]; then
+     wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
+   fi
+   tar -xvzf kaldi_lm.tar.gz || exit 1;
+   cd kaldi_lm
+   make || exit 1;
+   echo Done making the kaldi_lm tools
+ fi
+) || exit 1;
+
+mkdir -p $dir
+
+
+cleantext=$dir/text.no_oov
+
+cat $text | awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } } 
+  {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<unk> ");} } printf("\n");}' \
+  > $cleantext || exit 1;
+
+
+cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
+   sort -nr > $dir/word.counts || exit 1;
+
+
+# Get counts from acoustic training transcripts, and add  one-count
+# for each word in the lexicon (but not silence, we don't want it
+# in the LM-- we'll add it optionally later).
+cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
+  cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
+   sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
+
+# note: we probably won't really make use of <unk> as there aren't any OOVs
+cat $dir/unigram.counts  | awk '{print $2}' | get_word_map.pl "<s>" "</s>" "<unk>" > $dir/word_map \
+   || exit 1;
+
+# note: ignore 1st field of train.txt, it's the utterance-id.
+cat $cleantext | awk -v wmap=$dir/word_map 'BEGIN{while((getline<wmap)>0)map[$1]=$2;}
+  { for(n=2;n<=NF;n++) { printf map[$n]; if(n<NF){ printf " "; } else { print ""; }}}' | gzip -c >$dir/train.gz \
+   || exit 1;
+
+train_lm.sh --arpa --lmtype 3gram-mincount $dir || exit 1;
+
+# Perplexity over 88307.000000 words (excluding 691.000000 OOVs) is 71.241332
+
+# note: output is
+# data/local/lm/3gram-mincount/lm_unpruned.gz 
+
+
+exit 0
+
+
+# From here is some commands to do a baseline with SRILM (assuming
+# you have it installed).
+heldout_sent=10000 # Don't change this if you want result to be comparable with
+    # kaldi_lm results
+sdir=$dir/srilm # in case we want to use SRILM to double-check perplexities.
+mkdir -p $sdir
+cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
+  head -$heldout_sent > $sdir/heldout
+cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
+  tail -n +$heldout_sent > $sdir/train
+
+cat $dir/word_map | awk '{print $1}' | cat - <(echo "<s>"; echo "</s>" ) > $sdir/wordlist
+
+
+ngram-count -text $sdir/train -order 3 -limit-vocab -vocab $sdir/wordlist -unk \
+  -map-unk "<unk>" -kndiscount -interpolate -lm $sdir/srilm.o3g.kn.gz
+ngram -lm $sdir/srilm.o3g.kn.gz -ppl $sdir/heldout 
+
+# data/local/lm/srilm/srilm.o3g.kn.gz: line 71: warning: non-zero probability for <unk> in closed-vocabulary LM
+# file data/local/lm/srilm/heldout: 10000 sentences, 78998 words, 0 OOVs
+# 0 zeroprobs, logprob= -165170 ppl= 71.7609 ppl1= 123.258
+
+
+# Note: perplexity SRILM gives to Kaldi-LM model is similar to what kaldi-lm reports above.
+# Difference in WSJ must have been due to different treatment of <unk>.
+ngram -lm $dir/3gram-mincount/lm_unpruned.gz  -ppl $sdir/heldout 
+
+# data/local/lm/srilm/srilm.o3g.kn.gz: line 71: warning: non-zero probability for <unk> in closed-vocabulary LM
+# file data/local/lm/srilm/heldout: 10000 sentences, 78998 words, 0 OOVs
+# 0 zeroprobs, logprob= -164990 ppl= 71.4278 ppl1= 122.614
--- a/egs/sre10/v1/local/dnn/get_egs2.sh
+++ b/egs/sre10/v1/local/dnn/get_egs2.sh
@ -0,0 +1,321 @@
+#!/bin/bash
+
+# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey).  
+#                2015 David Snyder
+# Apache 2.0.
+#
+# This script is based off of get_egs2.sh in ../../steps/nnet2/, but has been
+# modified for speaker recogntion purposes to use a sliding window CMN.
+#
+# This script, which will generally be called from other neural-net training
+# scripts, extracts the training examples used to train the neural net (and also
+# the validation examples used for diagnostics), and puts them in separate archives.
+#
+# This script differs from get_egs.sh in that it dumps egs with several frames
+# of labels, controlled by the frames_per_eg config variable (default: 8).  This
+# takes many times less disk space because typically we have 4 to 7 frames of
+# context on the left and right, and this ends up getting shared.  This is at
+# the expense of slightly higher disk I/O during training time.
+#
+# We also have a simpler way of dividing the egs up into pieces, with one level
+# of index, so we have $dir/egs.{0,1,2,...}.ark instead of having two levels of
+# indexes.  The extra files we write to $dir that explain the structure are
+# $dir/info/num_archives, which contains the number of files egs.*.ark, and
+# $dir/info/frames_per_eg, which contains the number of frames of labels per eg
+# (e.g. 7), and $dir/samples_per_archive.  These replace the files
+# iters_per_epoch and num_jobs_nnet and egs_per_iter that the previous script
+# wrote to.  This script takes the directory where the "egs" are located as the
+# argument, not the directory one level up.
+
+# Begin configuration section.
+cmd=run.pl
+feat_type=          # e.g. set it to "raw" to use raw MFCC
+frames_per_eg=8   # number of frames of labels per example.  more->less disk space and
+                  # less time preparing egs, but more I/O during training.
+                  # note: the script may reduce this if reduce_frames_per_eg is true.
+left_context=4    # amount of left-context per eg
+right_context=4   # amount of right-context per eg
+
+reduce_frames_per_eg=true  # If true, this script may reduce the frames_per_eg
+                           # if there is only one archive and even with the
+                           # reduced frames_pe_eg, the number of
+                           # samples_per_iter that would result is less than or
+                           # equal to the user-specified value.
+num_utts_subset=300     # number of utterances in validation and training
+                        # subsets used for shrinkage and diagnostics.
+num_valid_frames_combine=0 # #valid frames for combination weights at the very end.
+num_train_frames_combine=10000 # # train frames for the above.
+num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs
+samples_per_iter=400000 # each iteration of training, see this many samples
+                        # per job.  This is just a guideline; it will pick a number
+                        # that divides the number of samples in the entire data.
+
+transform_dir=     # If supplied, overrides alidir as the place to find fMLLR transforms
+postdir=        # If supplied, we will use posteriors in it as soft training targets.
+
+stage=0
+io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time. 
+random_copy=false
+online_ivector_dir=  # can be used if we are including speaker information as iVectors.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 3 ]; then
+  echo "Usage: $0 [opts] <data> <ali-dir> <egs-dir>"
+  echo " e.g.: $0 data/train exp/tri3_ali exp/tri4_nnet/egs"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options"
+  echo "  --cmd (utils/run.pl;utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --samples-per-iter <#samples;400000>             # Number of samples of data to process per iteration, per"
+  echo "                                                   # process."
+  echo "  --feat-type <lda|raw>                            # (by default it tries to guess).  The feature type you want"
+  echo "                                                   # to use as input to the neural net."
+  echo "  --frames-per-eg <frames;8>                       # number of frames per eg on disk"
+  echo "  --left-context <width;4>                         # Number of frames on left side to append for feature input"
+  echo "  --right-context <width;4>                        # Number of frames on right side to append for feature input"
+  echo "  --num-frames-diagnostic <#frames;4000>           # Number of frames used in computing (train,valid) diagnostics"
+  echo "  --num-valid-frames-combine <#frames;10000>       # Number of frames used in getting combination weights at the"
+  echo "                                                   # very end."
+  echo "  --stage <stage|0>                                # Used to run a partially-completed training process from somewhere in"
+  echo "                                                   # the middle."
+  
+  exit 1;
+fi
+
+data=$1
+alidir=$2
+dir=$3
+
+
+# Check some files.
+[ ! -z "$online_ivector_dir" ] && \
+  extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period"
+
+for f in $data/feats.scp $alidir/ali.1.gz $alidir/final.mdl $alidir/tree $extra_files; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+nj=`cat $alidir/num_jobs` || exit 1;  # number of jobs in alignment dir...
+
+sdata=$data/split$nj
+utils/split_data.sh $data $nj
+
+mkdir -p $dir/log $dir/info
+cp $alidir/tree $dir
+
+# Get list of validation utterances. 
+awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \
+    > $dir/valid_uttlist || exit 1;
+
+if [ -f $data/utt2uniq ]; then
+  echo "File $data/utt2uniq exists, so augmenting valid_uttlist to"
+  echo "include all perturbed versions of the same 'real' utterances."
+  mv $dir/valid_uttlist $dir/valid_uttlist.tmp
+  utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt
+  cat $dir/valid_uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \
+    sort | uniq | utils/apply_map.pl $dir/uniq2utt | \
+    awk '{for(n=1;n<=NF;n++) print $n;}' | sort  > $dir/valid_uttlist
+  rm $dir/uniq2utt $dir/valid_uttlist.tmp
+fi
+
+awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \
+   utils/shuffle_list.pl | head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
+
+[ -z "$transform_dir" ] && transform_dir=$alidir
+
+## Set up features. 
+if [ -z $feat_type ]; then
+  if [ -f $alidir/final.mat ] && [ ! -f $transform_dir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi
+fi
+echo "$0: feature type is $feat_type"
+
+case $feat_type in
+  raw) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |"
+    valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn-sliding --center=true  scp:- ark:- |"
+    train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |"
+   ;;
+  lda) 
+    splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+    # caution: the top-level nnet training script should copy these to its own dir now.
+    cp $alidir/{splice_opts,final.mat} $dir || exit 1;
+    feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+    valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+    train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+if [ -f $transform_dir/trans.1 ] && [ $feat_type != "raw" ]; then
+  echo "$0: using transforms from $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+  valid_feats="$valid_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $transform_dir/trans.*|' ark:- ark:- |"
+  train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $transform_dir/trans.*|' ark:- ark:- |"
+fi
+if [ -f $transform_dir/raw_trans.1 ] && [ $feat_type == "raw" ]; then
+  echo "$0: using raw-fMLLR transforms from $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/raw_trans.JOB ark:- ark:- |"
+  valid_feats="$valid_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $transform_dir/raw_trans.*|' ark:- ark:- |"
+  train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $transform_dir/raw_trans.*|' ark:- ark:- |"
+fi
+if [ ! -z "$online_ivector_dir" ]; then
+  feats_one="$(echo "$feats" | sed s:JOB:1:g)"
+  ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1;
+  echo $ivector_dim > $dir/info/ivector_dim
+  ivectors_opt="--const-feat-dim=$ivector_dim"
+  ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1;
+  feats="$feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $sdata/JOB/utt2spk $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |' ark:- |"
+  valid_feats="$valid_feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |' ark:- |"
+  train_subset_feats="$train_subset_feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |' ark:- |"
+else
+  echo 0 >$dir/info/ivector_dim
+fi
+
+if [ $stage -le 0 ]; then
+  echo "$0: working out number of frames of training data"
+  num_frames=$(steps/nnet2/get_num_frames.sh $data)
+  echo $num_frames > $dir/info/num_frames
+else
+  num_frames=`cat $dir/info/num_frames` || exit 1;
+fi
+
+# the + 1 is to round up, not down... we assume it doesn't divide exactly.
+num_archives=$[$num_frames/($frames_per_eg*$samples_per_iter)+1]
+# (for small data)- while reduce_frames_per_eg == true and the number of
+# archives is 1 and would still be 1 if we reduced frames_per_eg by 1, reduce it
+# by 1.
+reduced=false
+while $reduce_frames_per_eg && [ $frames_per_eg -gt 1 ] && \
+  [ $[$num_frames/(($frames_per_eg-1)*$samples_per_iter)] -eq 0 ]; do
+  frames_per_eg=$[$frames_per_eg-1]
+  num_archives=1
+  reduced=true
+done
+$reduced && echo "$0: reduced frames_per_eg to $frames_per_eg because amount of data is small."
+
+echo $num_archives >$dir/info/num_archives
+echo $frames_per_eg >$dir/info/frames_per_eg
+
+# Working out number of egs per archive
+egs_per_archive=$[$num_frames/($frames_per_eg*$num_archives)]
+! [ $egs_per_archive -le $samples_per_iter ] && \
+  echo "$0: script error: egs_per_archive=$egs_per_archive not <= samples_per_iter=$samples_per_iter" \
+  && exit 1;
+
+echo $egs_per_archive > $dir/info/egs_per_archive
+
+echo "$0: creating $num_archives archives, each with $egs_per_archive egs, with"
+echo "$0:   $frames_per_eg labels per example, and (left,right) context = ($left_context,$right_context)"
+
+# Making soft links to storage directories.  This is a no-up unless
+# the subdirectory $dir/storage/ exists.  See utils/create_split_dir.pl
+for x in `seq $num_archives`; do
+  utils/create_data_link.pl $dir/egs.$x.ark
+  for y in `seq $nj`; do
+    utils/create_data_link.pl $dir/egs_orig.$x.$y.ark
+  done
+done
+
+nnet_context_opts="--left-context=$left_context --right-context=$right_context"
+
+echo $left_context > $dir/info/left_context
+echo $right_context > $dir/info/right_context
+if [ $stage -le 2 ]; then
+  echo "$0: Getting validation and training subset examples."
+  rm $dir/.error 2>/dev/null
+  echo "$0: ... extracting validation and training-subset alignments."
+  set -o pipefail;
+  for id in $(seq $nj); do gunzip -c $alidir/ali.$id.gz; done | \
+    copy-int-vector ark:- ark,t:- | \
+    utils/filter_scp.pl <(cat $dir/valid_uttlist $dir/train_subset_uttlist) | \
+    gzip -c >$dir/ali_special.gz || exit 1;
+  set +o pipefail; # unset the pipefail option.
+
+  $cmd $dir/log/create_valid_subset.log \
+    nnet-get-egs $ivectors_opt $nnet_context_opts "$valid_feats" \
+    "ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
+     "ark:$dir/valid_all.egs" || touch $dir/.error &
+  $cmd $dir/log/create_train_subset.log \
+    nnet-get-egs $ivectors_opt $nnet_context_opts "$train_subset_feats" \
+     "ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
+     "ark:$dir/train_subset_all.egs" || touch $dir/.error &
+  wait;
+  [ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1
+  echo "... Getting subsets of validation examples for diagnostics and combination."
+  $cmd $dir/log/create_valid_subset_combine.log \
+    nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \
+        ark:$dir/valid_combine.egs || touch $dir/.error &
+  $cmd $dir/log/create_valid_subset_diagnostic.log \
+    nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/valid_all.egs \
+    ark:$dir/valid_diagnostic.egs || touch $dir/.error &
+
+  $cmd $dir/log/create_train_subset_combine.log \
+    nnet-subset-egs --n=$num_train_frames_combine ark:$dir/train_subset_all.egs \
+    ark:$dir/train_combine.egs || touch $dir/.error &
+  $cmd $dir/log/create_train_subset_diagnostic.log \
+    nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/train_subset_all.egs \
+    ark:$dir/train_diagnostic.egs || touch $dir/.error &
+  wait
+  sleep 5  # wait for file system to sync.
+  cat $dir/valid_combine.egs $dir/train_combine.egs > $dir/combine.egs
+
+  for f in $dir/{combine,train_diagnostic,valid_diagnostic}.egs; do
+    [ ! -s $f ] && echo "No examples in file $f" && exit 1;
+  done
+  rm $dir/valid_all.egs $dir/train_subset_all.egs $dir/{train,valid}_combine.egs $dir/ali_special.gz
+fi
+
+if [ $stage -le 3 ]; then
+  # create egs_orig.*.*.ark; the first index goes to $num_archives,
+  # the second to $nj (which is the number of jobs in the original alignment
+  # dir)
+
+  egs_list=
+  for n in $(seq $num_archives); do
+    egs_list="$egs_list ark:$dir/egs_orig.$n.JOB.ark"
+  done
+  echo "$0: Generating training examples on disk"
+  # The examples will go round-robin to egs_list. 
+  if [ ! -z $postdir ]; then
+    $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \
+      nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \
+      scp:$postdir/post.JOB.scp ark:- \| \
+      nnet-copy-egs ark:- $egs_list || exit 1;
+  else 
+    $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \
+      nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \
+      "ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \
+      nnet-copy-egs ark:- $egs_list || exit 1;
+  fi
+fi
+if [ $stage -le 4 ]; then
+  echo "$0: recombining and shuffling order of archives on disk"
+  # combine all the "egs_orig.JOB.*.scp" (over the $nj splits of the data) and
+  # shuffle the order, writing to the egs.JOB.ark
+
+  egs_list=
+  for n in $(seq $nj); do 
+    egs_list="$egs_list $dir/egs_orig.JOB.$n.ark"
+  done
+
+  $cmd $io_opts $extra_opts JOB=1:$num_archives $dir/log/shuffle.JOB.log \
+    nnet-shuffle-egs --srand=JOB "ark:cat $egs_list|" ark:$dir/egs.JOB.ark  || exit 1;
+fi
+
+if [ $stage -le 5 ]; then
+  echo "$0: removing temporary archives"
+  for x in `seq $num_archives`; do
+    for y in `seq $nj`; do
+      file=$dir/egs_orig.$x.$y.ark
+      [ -L $file ] && rm $(readlink -f $file)
+      rm $file
+    done
+  done
+fi
+
+echo "$0: Finished preparing training examples"
--- a/egs/sre10/v1/local/dnn/get_lda.sh
+++ b/egs/sre10/v1/local/dnn/get_lda.sh
@ -0,0 +1,181 @@
+#!/bin/bash
+
+# Copyright 2012 Johns Hopkins University (Author: Daniel Povey).  
+#           2015 David Snyder
+# Apache 2.0.
+#
+# This script is based off of get_lda.sh in ../../steps/nnet2/, but has been
+# modified for speaker recogntion purposes to use a sliding window CMN.
+#
+# This script, which will generally be called from other neural-net training
+# scripts, extracts the training examples used to train the neural net (and also
+# the validation examples used for diagnostics), and puts them in separate archives.
+
+# Begin configuration section.
+cmd=run.pl
+
+feat_type=
+stage=0
+splice_width=4 # meaning +- 4 frames on each side for second LDA
+left_context= # left context for second LDA
+right_context= # right context for second LDA
+rand_prune=4.0 # Relates to a speedup we do for LDA.
+within_class_factor=0.0001 # This affects the scaling of the transform rows...
+                           # sorry for no explanation, you'll have to see the code.
+transform_dir=     # If supplied, overrides alidir
+num_feats=10000 # maximum number of feature files to use.  Beyond a certain point it just
+                # gets silly to use more data.
+lda_dim=  # This defaults to no dimension reduction.
+online_ivector_dir=
+ivector_randomize_prob=0.0 # if >0.0, randomizes iVectors during training with
+                           # this prob per iVector.
+ivector_dir=
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 4 ]; then
+  echo "Usage: steps/nnet2/get_lda.sh [opts] <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: steps/nnet2/get_lda.sh data/train data/lang exp/tri3_ali exp/tri4_nnet"
+  echo " As well as extracting the examples, this script will also do the LDA computation,"
+  echo " if --est-lda=true (default:true)"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --splice-width <width|4>                         # Number of frames on each side to append for feature input"
+  echo "                                                   # (note: we splice processed, typically 40-dimensional frames"
+  echo "  --left-context <width;4>                         # Number of frames on left side to append for feature input, overrides splice-width"
+  echo "  --right-context <width;4>                        # Number of frames on right side to append for feature input, overrides splice-width"
+  echo "  --stage <stage|0>                                # Used to run a partially-completed training process from somewhere in"
+  echo "                                                   # the middle."
+  echo "  --online-vector-dir <dir|none>                   # Directory produced by"
+  echo "                                                   # steps/online/nnet2/extract_ivectors_online.sh"
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+dir=$4
+
+[ -z "$left_context" ] && left_context=$splice_width
+[ -z "$right_context" ] && right_context=$splice_width
+
+[ ! -z "$online_ivector_dir" ] && \
+  extra_files="$online_ivector_dir/ivector_online.scp $online_ivector_dir/ivector_period"
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree $extra_files; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+oov=`cat $lang/oov.int`
+num_leaves=`gmm-info $alidir/final.mdl 2>/dev/null | awk '/number of pdfs/{print $NF}'` || exit 1;
+silphonelist=`cat $lang/phones/silence.csl` || exit 1;
+
+nj=`cat $alidir/num_jobs` || exit 1;  # number of jobs in alignment dir...
+# in this dir we'll have just one job.
+sdata=$data/split$nj
+utils/split_data.sh $data $nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+cp $alidir/tree $dir
+
+[ -z "$transform_dir" ] && transform_dir=$alidir
+
+## Set up features.  Note: these are different from the normal features
+## because we have one rspecifier that has the features for the entire
+## training set, not separate ones for each batch.
+if [ -z $feat_type ]; then
+  if [ -f $alidir/final.mat ] && ! [ -f $alidir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi
+fi
+echo "$0: feature type is $feat_type"
+
+
+# If we have more than $num_feats feature files (default: 10k),
+# we use a random subset.  This won't affect the transform much, and will
+# spare us an unnecessary pass over the data.  Probably 10k is
+# way too much, but for small datasets this phase is quite fast.
+N=$[$num_feats/$nj]
+
+case $feat_type in
+  raw) feats="ark,s,cs:utils/subset_scp.pl --quiet $N $sdata/JOB/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- |"
+   ;;
+  lda) 
+    splice_opts=`cat $alidir/splice_opts 2>/dev/null`
+    cp $alidir/{splice_opts,final.mat} $dir || exit 1;
+     feats="ark,s,cs:utils/subset_scp.pl --quiet $N $sdata/JOB/feats.scp | apply-cmvn-sliding --center=true scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
+    ;;
+  *) echo "$0: invalid feature type $feat_type" && exit 1;
+esac
+
+if [ -f $transform_dir/trans.1 ] && [ $feat_type != "raw" ]; then
+  echo "$0: using transforms from $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/trans.JOB ark:- ark:- |"
+fi
+if [ -f $transform_dir/raw_trans.1 ] && [ $feat_type == "raw" ]; then
+  echo "$0: using raw-fMLLR transforms from $transform_dir"
+  feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/raw_trans.JOB ark:- ark:- |"
+fi
+
+
+feats_one="$(echo "$feats" | sed s:JOB:1:g)"
+# note: feat_dim is the raw, un-spliced feature dim without the iVectors.
+feat_dim=$(feat-to-dim "$feats_one" -) || exit 1;
+# by default: no dim reduction.
+
+spliced_feats="$feats splice-feats --left-context=$left_context --right-context=$right_context ark:- ark:- |"
+
+if [ ! -z "$online_ivector_dir" ]; then
+  ivector_period=$(cat $online_ivector_dir/ivector_period) || exit 1;
+  # note: subsample-feats, with negative value of n, repeats each feature n times.
+  spliced_feats="$spliced_feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $sdata/JOB/utt2spk $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- | ivector-randomize --randomize-prob=$ivector_randomize_prob ark:- ark:- |' ark:- |"
+  ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1;
+else
+  ivector_dim=0
+fi
+echo $ivector_dim >$dir/ivector_dim
+
+if [ -z "$lda_dim" ]; then
+  spliced_feats_one="$(echo "$spliced_feats" | sed s:JOB:1:g)"  
+  lda_dim=$(feat-to-dim "$spliced_feats_one" -) || exit 1;
+fi
+
+if [ $stage -le 0 ]; then
+  echo "$0: Accumulating LDA statistics."
+  rm $dir/lda.*.acc 2>/dev/null # in case any left over from before.
+  $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
+    ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
+      weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
+      acc-lda --rand-prune=$rand_prune $alidir/final.mdl "$spliced_feats" ark,s,cs:- \
+       $dir/lda.JOB.acc || exit 1;
+fi
+
+echo $feat_dim > $dir/feat_dim
+echo $lda_dim > $dir/lda_dim
+echo $ivector_dim > $dir/ivector_dim
+
+if [ $stage -le 1 ]; then
+  sum-lda-accs $dir/lda.acc $dir/lda.*.acc 2>$dir/log/lda_sum.log || exit 1;
+  rm $dir/lda.*.acc
+fi
+
+if [ $stage -le 2 ]; then
+  # There are various things that we sometimes (but not always) need
+  # the within-class covariance and its Cholesky factor for, and we
+  # write these to disk just in case.
+  nnet-get-feature-transform --write-cholesky=$dir/cholesky.tpmat \
+     --write-within-covar=$dir/within_covar.spmat \
+     --within-class-factor=$within_class_factor --dim=$lda_dim \
+      $dir/lda.mat $dir/lda.acc \
+      2>$dir/log/lda_est.log || exit 1;
+fi
+
+echo "$0: Finished estimating LDA"
--- a/egs/sre10/v1/local/dnn/remove_dup_utts.sh
+++ b/egs/sre10/v1/local/dnn/remove_dup_utts.sh
@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Remove excess utterances once they appear  more than a specified
+# number of times with the same transcription, in a data set.
+# E.g. useful for removing excess "uh-huh" from training.
+
+if [ $# != 3 ]; then
+  echo "Usage: remove_dup_utts.sh max-count src-data-dir dest-data-dir"
+  exit 1;
+fi
+
+maxcount=$1
+srcdir=$2
+destdir=$3
+mkdir -p $destdir
+
+[ ! -f $srcdir/text ] && echo "Invalid input directory $srcdir" && exit 1;
+
+cp $srcdir/* $destdir
+cat $srcdir/text | \
+  perl -e '
+  $maxcount = shift @ARGV; 
+  @all = ();
+   $p1 = 103349; $p2 = 71147; $k = 0;
+   sub random { # our own random number generator: predictable.
+     $k = ($k + $p1) % $p2;
+     return ($k / $p2);
+  }
+  while(<>) {
+    push @all, $_;
+    @A = split(" ", $_);
+    shift @A;
+    $text = join(" ", @A);
+    $count{$text} ++;
+  }
+  foreach $line (@all) {
+    @A = split(" ", $line);
+    shift @A;
+    $text = join(" ", @A);
+    $n = $count{$text};
+    if ($n < $maxcount || random() < ($maxcount / $n)) {
+      print $line;
+    }
+  }'  $maxcount >$destdir/text 
+
+echo "Reduced number of utterances from `cat $srcdir/text | wc -l` to `cat $destdir/text | wc -l`"
+
+echo "Using fix_data_dir.sh to reconcile the other files."
+utils/fix_data_dir.sh $destdir
+rm -r $destdir/.backup
--- a/egs/sre10/v1/local/dnn/run_nnet2_common.sh
+++ b/egs/sre10/v1/local/dnn/run_nnet2_common.sh
@ -0,0 +1,28 @@
+#!/bin/bash
+
+# Make the features.
+
+. cmd.sh
+
+stage=1
+set -e
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+mkdir -p exp/nnet2_online
+
+if [ $stage -le 1 ]; then
+  # this shows how you can split across multiple file-systems.  we'll split the
+  # MFCC dir across multiple locations.  You might want to be careful here, if you
+  # have multiple copies of Kaldi checked out and run the same recipe, not to let
+  # them overwrite each other.
+  mfccdir=mfcc
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then
+    date=$(date +'%m_%d_%H_%M')
+    utils/create_split_dir.pl /export/b0{1,2,3,4}/$USER/kaldi-data/egs/fisher_english-$date/s5/$mfccdir/storage $mfccdir/storage
+  fi
+  utils/copy_data_dir.sh data/train_asr data/train_hires_asr
+  steps/make_mfcc.sh --nj 70 --mfcc-config conf/mfcc_hires.conf \
+      --cmd "$train_cmd" data/train_hires_asr exp/make_hires/train $mfccdir || exit 1;
+fi
--- a/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh
+++ b/egs/sre10/v1/local/dnn/run_nnet2_multisplice.sh
@ -0,0 +1,71 @@
+#!/bin/bash
+
+# This script is based on run_nnet2_multisplice.sh in
+# egs/fisher_english/s5/local/online. It has been modified
+# for speaker recognition.
+
+. cmd.sh
+
+
+stage=1
+train_stage=-10
+use_gpu=true
+set -e
+. cmd.sh
+. ./path.sh
+. ./utils/parse_options.sh
+
+
+# assume use_gpu=true since it would be way too slow otherwise.
+
+if ! cuda-compiled; then
+  cat <<EOF && exit 1 
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA 
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+parallel_opts="-l gpu=1" 
+num_threads=1
+minibatch_size=512
+dir=exp/nnet2_online/nnet_ms_a
+mkdir -p exp/nnet2_online
+
+
+# Stages 1 through 5 are done in run_nnet2_common.sh,
+# so it can be shared with other similar scripts.
+local/dnn/run_nnet2_common.sh --stage $stage
+
+if [ $stage -le 6 ]; then
+  if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
+    utils/create_split_dir.pl /export/b0{6,7,8,9}/$(USER)/kaldi-dsata/egs/fisher_english/s5/$dir/egs/storage $dir/egs/storage
+  fi
+  
+  # Because we have a lot of data here and we don't want the training to take
+  # too long, we reduce the number of epochs from the defaults (15 + 5) to (3 +
+  # 1).  The option "--io-opts '-tc 12'" is to have more than the default number
+  # (5) of jobs dumping the egs to disk; this is OK since we're splitting our
+  # data across four filesystems for speed.
+
+
+  local/dnn/train_multisplice_accel2.sh --stage $train_stage \
+    --feat-type raw \
+    --splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer3/-3:3 layer4/-7:2" \
+    --num-epochs 6 \
+    --num-hidden-layers 6 \
+    --num-jobs-initial 3 --num-jobs-final 18 \
+    --num-threads "$num_threads" \
+    --minibatch-size "$minibatch_size" \
+    --parallel-opts "$parallel_opts" \
+    --mix-up 10500 \
+    --initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \
+    --cmd "$decode_cmd" \
+    --egs-dir "$common_egs_dir" \
+    --pnorm-input-dim 3500 \
+    --pnorm-output-dim 350 \
+    data/train_hires_asr data/lang exp/tri5a $dir  || exit 1;
+
+fi
+
+exit 0;
+
--- a/egs/sre10/v1/local/dnn/train_dnn.sh
+++ b/egs/sre10/v1/local/dnn/train_dnn.sh
@ -0,0 +1,174 @@
+#!/bin/bash
+
+# This script is based on egs/fisher_english/s5/run.sh. It trains a
+# multisplice time-delay neural network used in the DNN-based speaker
+# recognition recipes.
+
+# It's best to run the commands in this one by one.
+
+. cmd.sh
+. path.sh
+mfccdir=`pwd`/mfcc
+set -e
+
+# the next command produces the data in local/train_all_asr
+local/dnn/fisher_data_prep.sh /export/corpora3/LDC/LDC2004T19 /export/corpora3/LDC/LDC2005T19 \
+   /export/corpora3/LDC/LDC2004S13 /export/corpora3/LDC/LDC2005S13
+# You could also try specifying the --calldata argument to this command as below.
+# If specified, the script will use actual speaker personal identification 
+# numbers released with the dataset, i.e. real speaker IDs. Note: --calldata has
+# to be the first argument of this script.
+# local/fisher_data_prep.sh --calldata /export/corpora3/LDC/LDC2004T19 /export/corpora3/LDC/LDC2005T19 \
+#    /export/corpora3/LDC/LDC2004S13 /export/corpora3/LDC/LDC2005S13
+
+# at BUT:
+# local/fisher_data_prep.sh /mnt/matylda6/jhu09/qpovey/FISHER/LDC2005T19 /mnt/matylda2/data/FISHER/
+
+local/dnn/fisher_prepare_dict.sh
+
+utils/prepare_lang.sh data/local/dict "<unk>" data/local/lang data/lang
+
+local/dnn/fisher_train_lms.sh 
+local/dnn/fisher_create_test_lang.sh
+
+# Use the first 4k sentences as dev set.  Note: when we trained the LM, we used
+# the 1st 10k sentences as dev set, so the 1st 4k won't have been used in the
+# LM training data.   However, they will be in the lexicon, plus speakers
+# may overlap, so it's still not quite equivalent to a test set.
+
+utils/fix_data_dir.sh data/train_all_asr
+
+steps/make_mfcc.sh --nj 40 --cmd "$train_cmd" --mfcc-config conf/mfcc_asr.conf \
+   data/train_all_asr exp/make_mfcc/train_all_asr $mfccdir || exit 1;
+
+utils/fix_data_dir.sh data/train_all_asr
+utils/validate_data_dir.sh data/train_all_asr
+
+
+# The dev and test sets are each about 3.3 hours long.  These are not carefully
+# done; there may be some speaker overlap with each other and with the training
+# set.  Note: in our LM-training setup we excluded the first 10k utterances (they
+# were used for tuning but not for training), so the LM was not (directly) trained
+# on either the dev or test sets.
+utils/subset_data_dir.sh --first data/train_all_asr 10000 data/dev_and_test_asr
+utils/subset_data_dir.sh --first data/dev_and_test_asr 5000 data/dev_asr
+utils/subset_data_dir.sh --last data/dev_and_test_asr 5000 data/test_asr
+rm -r data/dev_and_test_asr
+
+steps/compute_cmvn_stats.sh data/dev_asr exp/make_mfcc/dev_asr $mfccdir 
+steps/compute_cmvn_stats.sh data/test_asr exp/make_mfcc/test_asr $mfccdir 
+
+n=$[`cat data/train_all_asr/segments | wc -l` - 10000]
+utils/subset_data_dir.sh --last data/train_all_asr $n data/train_asr
+steps/compute_cmvn_stats.sh data/train_asr exp/make_mfcc/train_asr $mfccdir 
+
+
+# Now-- there are 1.6 million utterances, and we want to start the monophone training
+# on relatively short utterances (easier to align), but not only the very shortest
+# ones (mostly uh-huh).  So take the 100k shortest ones, and then take 10k random
+# utterances from those.
+
+utils/subset_data_dir.sh --shortest data/train_asr 100000 data/train_asr_100kshort
+utils/subset_data_dir.sh  data/train_asr_100kshort 10000 data/train_asr_10k
+local/dnn/remove_dup_utts.sh 100 data/train_asr_10k data/train_asr_10k_nodup
+utils/subset_data_dir.sh --speakers data/train_asr 30000 data/train_asr_30k
+utils/subset_data_dir.sh --speakers data/train_asr 100000 data/train_asr_100k
+
+
+# The next commands are not necessary for the scripts to run, but increase 
+# efficiency of data access by putting the mfcc's of the subset 
+# in a contiguous place in a file.
+( . path.sh; 
+  # make sure mfccdir is defined as above..
+  cp data/train_asr_10k_nodup/feats.scp{,.bak} 
+  copy-feats scp:data/train_asr_10k_nodup/feats.scp  ark,scp:$mfccdir/kaldi_fish_10k_nodup.ark,$mfccdir/kaldi_fish_10k_nodup.scp \
+  && cp $mfccdir/kaldi_fish_10k_nodup.scp data/train_asr_10k_nodup/feats.scp
+)
+( . path.sh; 
+  # make sure mfccdir is defined as above..
+  cp data/train_asr_30k/feats.scp{,.bak} 
+  copy-feats scp:data/train_asr_30k/feats.scp  ark,scp:$mfccdir/kaldi_fish_30k.ark,$mfccdir/kaldi_fish_30k.scp \
+  && cp $mfccdir/kaldi_fish_30k.scp data/train_asr_30k/feats.scp
+)
+( . path.sh; 
+  # make sure mfccdir is defined as above..
+  cp data/train_asr_100k/feats.scp{,.bak} 
+  copy-feats scp:data/train_asr_100k/feats.scp  ark,scp:$mfccdir/kaldi_fish_100k.ark,$mfccdir/kaldi_fish_100k.scp \
+  && cp $mfccdir/kaldi_fish_100k.scp data/train_asr_100k/feats.scp
+)
+
+steps/train_mono.sh --nj 10 --cmd "$train_cmd" \
+  data/train_asr_10k_nodup data/lang exp/mono0a 
+
+steps/align_si.sh --nj 30 --cmd "$train_cmd" \
+   data/train_asr_30k data/lang exp/mono0a exp/mono0a_ali || exit 1;
+
+steps/train_deltas.sh --cmd "$train_cmd" \
+    2500 20000 data/train_asr_30k data/lang exp/mono0a_ali exp/tri1 || exit 1;
+
+
+(utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph
+ steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+   exp/tri1/graph data/dev exp/tri1/decode_dev)&
+
+steps/align_si.sh --nj 30 --cmd "$train_cmd" \
+   data/train_asr_30k data/lang exp/tri1 exp/tri1_ali || exit 1;
+
+steps/train_deltas.sh --cmd "$train_cmd" \
+    2500 20000 data/train_asr_30k data/lang exp/tri1_ali exp/tri2 || exit 1;
+
+(
+  utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1;
+  steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+   exp/tri2/graph data/dev exp/tri2/decode_dev || exit 1;
+)&
+
+
+steps/align_si.sh --nj 30 --cmd "$train_cmd" \
+  data/train_asr_100k data/lang exp/tri2 exp/tri2_ali || exit 1;
+
+# Train tri3a, which is LDA+MLLT, on 100k data.
+steps/train_lda_mllt.sh --cmd "$train_cmd" \
+   --splice-opts "--left-context=3 --right-context=3" \
+   5000 40000 data/train_asr_100k data/lang exp/tri2_ali exp/tri3a || exit 1;
+(
+  utils/mkgraph.sh data/lang_test exp/tri3a exp/tri3a/graph || exit 1;
+  steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+   exp/tri3a/graph data/dev exp/tri3a/decode_dev || exit 1;
+)&
+
+
+# Next we'll use fMLLR and train with SAT (i.e. on 
+# fMLLR features)
+
+steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
+  data/train_asr_100k data/lang exp/tri3a exp/tri3a_ali || exit 1;
+
+steps/train_sat.sh  --cmd "$train_cmd" \
+  5000 100000 data/train_asr_100k data/lang exp/tri3a_ali  exp/tri4a || exit 1;
+
+(
+  utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph
+  steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+   exp/tri4a/graph data/dev exp/tri4a/decode_dev
+)&
+
+
+steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
+  data/train_asr data/lang exp/tri4a exp/tri4a_ali || exit 1;
+
+
+steps/train_sat.sh  --cmd "$train_cmd" \
+  7000 300000 data/train_asr data/lang exp/tri4a_ali  exp/tri5a || exit 1;
+
+(
+  utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph
+  steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \
+    exp/tri5a/graph data/dev exp/tri5a/decode_dev
+)&
+
+# this will help find issues with the lexicon.
+# steps/cleanup/debug_lexicon.sh --nj 300 --cmd "$train_cmd" data/train_asr_100k data/lang exp/tri5a data/local/dict/lexicon.txt exp/debug_lexicon_100k
+
+## The following is based on the best current neural net recipe.
+local/dnn/run_nnet2_multisplice.sh
--- a/egs/sre10/v1/local/dnn/train_multisplice_accel2.sh
+++ b/egs/sre10/v1/local/dnn/train_multisplice_accel2.sh
@ -0,0 +1,641 @@
+#!/bin/bash
+
+# Copyright 2012-2014  Johns Hopkins University (Author: Daniel Povey). 
+#           2013  Xiaohui Zhang
+#           2013  Guoguo Chen
+#           2014  Vimal Manohar
+#           2014  Vijayaditya Peddinti
+# Apache 2.0.
+
+# This is a modified version of train_multisplice_accel2.sh in
+# steps/nnet2/ for speaker recognition. The main difference is
+# that it uses different get_lda.sh and get_egs2.sh scripts.
+#
+# The original train_multisplice_accel2.sh was a modified version of
+# train_pnorm_multisplice2.sh (still using pnorm).  The "accel" refers to the
+# fact that we increase the number of jobs during training (from
+# --num-jobs-initial to --num-jobs-final).  We dropped "pnorm" from the name as
+# it was getting too long.
+
+
+# Begin configuration section.
+cmd=run.pl
+num_epochs=15      # Number of epochs of training;
+                   # the number of iterations is worked out from this.
+initial_effective_lrate=0.01
+final_effective_lrate=0.001
+bias_stddev=0.5
+pnorm_input_dim=3000 
+pnorm_output_dim=300
+minibatch_size=128 # by default use a smallish minibatch size for neural net
+                   # training; this controls instability which would otherwise
+                   # be a problem with multi-threaded update. 
+
+samples_per_iter=400000 # each iteration of training, see this many samples
+                        # per job.  This option is passed to get_egs.sh
+num_jobs_initial=1  # Number of neural net jobs to run in parallel at the start of training
+num_jobs_final=8   # Number of neural net jobs to run in parallel at the end of training
+prior_subset_size=10000 # 10k samples per job, for computing priors.  Should be
+                        # more than enough.
+num_jobs_compute_prior=10 # these are single-threaded, run on CPU.
+get_egs_stage=0
+online_ivector_dir=
+remove_egs=true  # set to false to disable removing egs.
+
+max_models_combine=20 # The "max_models_combine" is the maximum number of models we give
+  # to the final 'combine' stage, but these models will themselves be averages of
+  # iteration-number ranges.
+
+shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
+                # on each iter.  You could set it to 0 or to a large value for complete
+                # randomization, but this would both consume memory and cause spikes in
+                # disk I/O.  Smaller is easier on disk and memory but less random.  It's
+                # not a huge deal though, as samples are anyway randomized right at the start.
+                # (the point of this is to get data in different minibatches on different iterations,
+                # since in the preconditioning method, 2 samples in the same minibatch can
+                # affect each others' gradients.
+
+add_layers_period=2 # by default, add new layers every 2 iterations.
+num_hidden_layers=3
+stage=-4
+exit_stage=-100 # you can set this to terminate the training early.  Exits before running this stage
+
+splice_indexes="layer0/-4:-3:-2:-1:0:1:2:3:4 layer2/-5:-1:3"
+# Format : layer<hidden_layer>/<frame_indices>....layer<hidden_layer>/<frame_indices> "
+# note: hidden layers which are composed of one or more components,
+# so hidden layer indexing is different from component count
+
+
+io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time.   These don't
+randprune=4.0 # speeds up LDA.
+alpha=4.0 # relates to preconditioning.
+update_period=4 # relates to online preconditioning: says how often we update the subspace.
+num_samples_history=2000 # relates to online preconditioning
+max_change_per_sample=0.075
+precondition_rank_in=20  # relates to online preconditioning
+precondition_rank_out=80 # relates to online preconditioning
+
+mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
+        # specified.)
+num_threads=16
+parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" 
+  # by default we use 16 threads; this lets the queue know.
+  # note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
+combine_num_threads=8
+combine_parallel_opts="-pe smp 8"  # queue options for the "combine" stage.
+cleanup=true
+egs_dir=
+lda_opts=
+lda_dim=
+egs_opts=
+transform_dir=     # If supplied, overrides alidir
+feat_type=  # Can be used to force "raw" features.
+align_cmd=              # The cmd that is passed to steps/nnet2/align.sh
+align_use_gpu=          # Passed to use_gpu in steps/nnet2/align.sh [yes/no]
+realign_times=          # List of times on which we realign.  Each time is 
+                        # floating point number strictly between 0 and 1, which
+                        # will be multiplied by the num-iters to get an iteration
+                        # number.
+num_jobs_align=30       # Number of jobs for realignment
+# End configuration section.
+frames_per_eg=8 # to be passed on to get_egs2.sh
+
+trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 4 ]; then
+  echo "Usage: $0 [opts] <data> <lang> <ali-dir> <exp-dir>"
+  echo " e.g.: $0 data/train data/lang exp/tri3_ali exp/tri4_nnet"
+  echo ""
+  echo "Main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config file containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --num-epochs <#epochs|15>                        # Number of epochs of training"
+  echo "  --initial-effective-lrate <lrate|0.02> # effective learning rate at start of training."
+  echo "  --final-effective-lrate <lrate|0.004>   # effective learning rate at end of training."
+  echo "                                                   # data, 0.00025 for large data"
+  echo "  --num-hidden-layers <#hidden-layers|2>           # Number of hidden layers, e.g. 2 for 3 hours of data, 4 for 100hrs"
+  echo "  --add-layers-period <#iters|2>                   # Number of iterations between adding hidden layers"
+  echo "  --mix-up <#pseudo-gaussians|0>                   # Can be used to have multiple targets in final output layer,"
+  echo "                                                   # per context-dependent state.  Try a number several times #states."
+  echo "  --num-jobs-initial <num-jobs|1>                  # Number of parallel jobs to use for neural net training, at the start."
+  echo "  --num-jobs-final <num-jobs|8>                    # Number of parallel jobs to use for neural net training, at the end"
+  echo "  --num-threads <num-threads|16>                   # Number of parallel threads per job (will affect results"
+  echo "                                                   # as well as speed; may interact with batch size; if you increase"
+  echo "                                                   # this, you may want to decrease the batch size."
+  echo "  --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\">      # extra options to pass to e.g. queue.pl for processes that"
+  echo "                                                   # use multiple threads... note, you might have to reduce mem_free,ram_free"
+  echo "                                                   # versus your defaults, because it gets multiplied by the -pe smp argument."
+  echo "  --io-opts <opts|\"-tc 10\">                      # Options given to e.g. queue.pl for jobs that do a lot of I/O."
+  echo "  --minibatch-size <minibatch-size|128>            # Size of minibatch to process (note: product with --num-threads"
+  echo "                                                   # should not get too large, e.g. >2k)."
+  echo "  --samples-per-iter <#samples|400000>             # Number of samples of data to process per iteration, per"
+  echo "                                                   # process."
+  echo "  --splice-indexes <string|layer0/-4:-3:-2:-1:0:1:2:3:4> "
+  echo "                                                   # Frame indices used for each splice layer."
+  echo "                                                   # Format : layer<hidden_layer_index>/<frame_indices>....layer<hidden_layer>/<frame_indices> "
+  echo "                                                   # (note: we splice processed, typically 40-dimensional frames"
+  echo "  --lda-dim <dim|''>                               # Dimension to reduce spliced features to with LDA"
+  echo "  --realign-epochs <list-of-epochs|''>             # A list of space-separated epoch indices the beginning of which"
+  echo "                                                   # realignment is to be done"
+  echo "  --align-cmd (utils/run.pl|utils/queue.pl <queue opts>) # passed to align.sh"
+  echo "  --align-use-gpu (yes/no)                         # specify is gpu is to be used for realignment"
+  echo "  --num-jobs-align <#njobs|30>                     # Number of jobs to perform realignment"
+  echo "  --stage <stage|-4>                               # Used to run a partially-completed training process from somewhere in"
+  echo "                                                   # the middle."
+
+  
+  exit 1;
+fi
+
+data=$1
+lang=$2
+alidir=$3
+dir=$4
+
+if [ ! -z "$realign_times" ]; then
+  [ -z "$align_cmd" ] && echo "$0: realign_times specified but align_cmd not specified" && exit 1
+  [ -z "$align_use_gpu" ] && echo "$0: realign_times specified but align_use_gpu not specified" && exit 1
+fi
+
+# Check some files.
+for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/final.mdl $alidir/tree; do
+  [ ! -f $f ] && echo "$0: no such file $f" && exit 1;
+done
+
+
+# Set some variables.
+num_leaves=`tree-info $alidir/tree 2>/dev/null | grep num-pdfs | awk '{print $2}'` || exit 1
+[ -z $num_leaves ] && echo "\$num_leaves is unset" && exit 1
+[ "$num_leaves" -eq "0" ] && echo "\$num_leaves is 0" && exit 1
+
+nj=`cat $alidir/num_jobs` || exit 1;  # number of jobs in alignment dir...
+# in this dir we'll have just one job.
+sdata=$data/split$nj
+utils/split_data.sh $data $nj
+
+mkdir -p $dir/log
+echo $nj > $dir/num_jobs
+cp $alidir/tree $dir
+
+# process the splice_inds string, to get a layer-wise context string
+# to be processed by the nnet-components
+# this would be mainly used by SpliceComponent|SpliceMaxComponent
+python steps/nnet2/make_multisplice_configs.py contexts --splice-indexes "$splice_indexes" $dir || exit -1;
+context_string=$(cat $dir/vars) || exit -1
+echo $context_string
+eval $context_string || exit -1; #
+  # initializes variables used by get_lda.sh and get_egs.sh
+  # get_lda.sh : first_left_context, first_right_context,
+  # get_egs.sh : nnet_left_context & nnet_right_context
+
+extra_opts=()
+[ ! -z "$feat_type" ] && extra_opts+=(--feat-type $feat_type)
+[ ! -z "$online_ivector_dir" ] && extra_opts+=(--online-ivector-dir $online_ivector_dir)
+[ -z "$transform_dir" ] && transform_dir=$alidir
+extra_opts+=(--transform-dir $transform_dir)
+
+if [ $stage -le -4 ]; then
+  echo "$0: calling get_lda.sh"
+  local/dnn/get_lda.sh $lda_opts "${extra_opts[@]}" --left-context $first_left_context --right-context $first_right_context --cmd "$cmd" $data $lang $alidir $dir || exit 1;
+fi
+# these files will have been written by get_lda.sh
+feat_dim=$(cat $dir/feat_dim) || exit 1;
+ivector_dim=$(cat $dir/ivector_dim) || exit 1;
+lda_dim=$(cat $dir/lda_dim) || exit 1;
+
+if [ $stage -le -3 ] && [ -z "$egs_dir" ]; then
+
+  extra_opts+=(--left-context $nnet_left_context )
+  extra_opts+=(--right-context $nnet_right_context )
+  echo "$0: calling get_egs2.sh"
+  local/dnn/get_egs2.sh $egs_opts "${extra_opts[@]}" \
+      --samples-per-iter $samples_per_iter --stage $get_egs_stage \
+      --io-opts "$io_opts" \
+      --cmd "$cmd" $egs_opts \
+      --frames-per-eg $frames_per_eg \
+      $data $alidir $dir/egs || exit 1;
+fi
+
+if [ -z $egs_dir ]; then
+  egs_dir=$dir/egs
+  # confirm that the provided egs_dir has the necessary context
+  egs_left_context=$(cat $egs_dir/info/left_context) || exit -1
+  egs_right_context=$(cat $egs_dir/info/right_context) || exit -1
+  echo $egs_left_context  $nnet_left_context $egs_right_context $nnet_right_context
+  ([[ $egs_left_context -lt $nnet_left_context ]] || [[ $egs_right_context -lt $nnet_right_context ]]) &&
+    echo "Provided egs_dir $egs_dir does not have sufficient context to train the neural network." && exit -1;
+fi
+
+frames_per_eg=$(cat $egs_dir/info/frames_per_eg) || { echo "error: no such file $egs_dir/info/frames_per_eg"; exit 1; }
+num_archives=$(cat $egs_dir/info/num_archives) || { echo "error: no such file $egs_dir/info/frames_per_eg"; exit 1; }
+
+# num_archives_expanded considers each separate label-position from
+# 0..frames_per_eg-1 to be a separate archive.
+num_archives_expanded=$[$num_archives*$frames_per_eg]
+
+[ $num_jobs_initial -gt $num_jobs_final ] && \
+  echo "$0: --initial-num-jobs cannot exceed --final-num-jobs" && exit 1;
+
+[ $num_jobs_final -gt $num_archives_expanded ] && \
+  echo "$0: --final-num-jobs cannot exceed #archives $num_archives_expanded." && exit 1;
+
+if ! [ $num_hidden_layers -ge 1 ]; then
+  echo "Invalid num-hidden-layers $num_hidden_layers"
+  exit 1
+fi
+
+if [ $stage -le -2 ]; then
+  echo "$0: initializing neural net";
+  lda_mat=$dir/lda.mat
+  tot_input_dim=$[$feat_dim+$ivector_dim]
+
+  online_preconditioning_opts="alpha=$alpha num-samples-history=$num_samples_history update-period=$update_period rank-in=$precondition_rank_in rank-out=$precondition_rank_out max-change-per-sample=$max_change_per_sample"
+
+  initial_lrate=$(perl -e "print ($initial_effective_lrate*$num_jobs_initial);")
+
+  # create the config files for nnet initialization
+  python steps/nnet2/make_multisplice_configs.py  \
+    --splice-indexes "$splice_indexes"  \
+    --total-input-dim $tot_input_dim  \
+    --ivector-dim $ivector_dim  \
+    --lda-mat "$lda_mat"  \
+    --lda-dim $lda_dim  \
+    --pnorm-input-dim $pnorm_input_dim  \
+    --pnorm-output-dim  $pnorm_output_dim \
+    --online-preconditioning-opts "$online_preconditioning_opts"  \
+    --initial-learning-rate $initial_lrate \
+    --bias-stddev  $bias_stddev  \
+    --num-hidden-layers $num_hidden_layers \
+    --num-targets  $num_leaves  \
+    configs  $dir || exit -1;
+
+  $cmd $dir/log/nnet_init.log \
+    nnet-am-init $alidir/tree $lang/topo "nnet-init $dir/nnet.config -|" \
+    $dir/0.mdl || exit 1;
+fi
+
+if [ $stage -le -1 ]; then
+  echo "Training transition probabilities and setting priors"
+  $cmd $dir/log/train_trans.log \
+    nnet-train-transitions $dir/0.mdl "ark:gunzip -c $alidir/ali.*.gz|" $dir/0.mdl \
+    || exit 1;
+fi
+
+# set num_iters so that as close as possible, we process the data $num_epochs
+# times, i.e. $num_iters*$avg_num_jobs) == $num_epochs*$num_archives_expanded,
+# where avg_num_jobs=(num_jobs_initial+num_jobs_final)/2.
+
+num_archives_to_process=$[$num_epochs*$num_archives_expanded]
+num_archives_processed=0
+num_iters=$[($num_archives_to_process*2)/($num_jobs_initial+$num_jobs_final)]
+
+! [ $num_iters -gt $[$finish_add_layers_iter+2] ] \
+  && echo "$0: Insufficient epochs" && exit 1
+
+finish_add_layers_iter=$[$num_hidden_layers * $add_layers_period]
+
+
+# mix up at the iteration where we've processed about half the data; this keeps
+# the overall training procedure fairly invariant to the number of initial and
+# final jobs.
+# j = initial, k = final, n = num-iters, x = half-of-data epoch,
+# p is proportion of data we want to process (e.g. p=0.5 here).
+# solve for x if the amount of data processed by epoch x is p
+# times the amount by iteration n.
+# put this in wolfram alpha:
+# solve { x*j + (k-j)*x*x/(2*n) = p * (j*n + (k-j)*n/2), {x} }
+# got: x = (j n-sqrt(-n^2 (j^2 (p-1)-k^2 p)))/(j-k) and j!=k and n!=0
+# simplified manually to: n * (sqrt(((1-p)j^2 + p k^2)/2) - j)/(j-k)
+mix_up_iter=$(perl -e '($j,$k,$n,$p)=@ARGV; print int(0.5 + ($j==$k ? $n*$p : $n*(sqrt((1-$p)*$j*$j+$p*$k*$k)-$j)/($k-$j))); ' $num_jobs_initial $num_jobs_final $num_iters 0.5)
+! [ $mix_up_iter -gt $finish_add_layers_iter ] && \
+  echo "Mix-up-iter is $mix_up_iter, should be greater than $finish_add_layers_iter -> add more epochs?" \
+  && exit 1;
+
+echo "$0: Will train for $num_epochs epochs = $num_iters iterations"
+[ $mix_up -gt 0 ] && echo "$0: Will mix up on iteration $mix_up_iter"
+
+if [ $num_threads -eq 1 ]; then
+  parallel_suffix="-simple" # this enables us to use GPU code if
+                         # we have just one thread.
+  parallel_train_opts=
+  if ! cuda-compiled; then
+    echo "$0: WARNING: you are running with one thread but you have not compiled"
+    echo "   for CUDA.  You may be running a setup optimized for GPUs.  If you have"
+    echo "   GPUs and have nvcc installed, go to src/ and do ./configure; make"
+  fi
+else
+  parallel_suffix="-parallel"
+  parallel_train_opts="--num-threads=$num_threads"
+fi
+
+
+approx_iters_per_epoch_final=$[$num_archives_expanded/$num_jobs_final]
+# First work out how many models we want to combine over in the final
+# nnet-combine-fast invocation.  This equals
+# min(max(max_models_combine, approx_iters_per_epoch_final),
+#     2/3 * iters_after_mixup)
+num_models_combine=$max_models_combine
+if [ $num_models_combine -lt $approx_iters_per_epoch_final ]; then
+   num_models_combine=$approx_iters_per_epoch_final
+fi
+iters_after_mixup_23=$[(($num_iters-$mix_up_iter-1)*2)/3]
+if [ $num_models_combine -gt $iters_after_mixup_23 ]; then
+  num_models_combine=$iters_after_mixup_23
+fi
+first_model_combine=$[$num_iters-$num_models_combine+1]
+
+x=0
+
+
+for realign_time in $realign_times; do
+  # Work out the iterations on which we will re-align, if the --realign-times
+  # option was used.  This is slightly approximate.
+  ! perl -e "exit($realign_time > 0.0 && $realign_time < 1.0 ? 0:1);" && \
+    echo "Invalid --realign-times option $realign_times: elements must be strictly between 0 and 1.";
+  # the next formula is based on the one for mix_up_iter above.
+  realign_iter=$(perl -e '($j,$k,$n,$p)=@ARGV; print int(0.5 + ($j==$k ? $n*$p : $n*(sqrt((1-$p)*$j*$j+$p*$k*$k)-$j)/($k-$j))); ' $num_jobs_initial $num_jobs_final $num_iters $realign_time) || exit 1;
+  realign_this_iter[$realign_iter]=$realign_time
+done
+
+cur_egs_dir=$egs_dir
+
+while [ $x -lt $num_iters ]; do
+  [ $x -eq $exit_stage ] && echo "$0: Exiting early due to --exit-stage $exit_stage" && exit 0;
+
+  this_num_jobs=$(perl -e "print int(0.5+$num_jobs_initial+($num_jobs_final-$num_jobs_initial)*$x/$num_iters);")
+
+  ilr=$initial_effective_lrate; flr=$final_effective_lrate; np=$num_archives_processed; nt=$num_archives_to_process;
+  this_learning_rate=$(perl -e "print (($x + 1 >= $num_iters ? $flr : $ilr*exp($np*log($flr/$ilr)/$nt))*$this_num_jobs);");
+
+  echo "On iteration $x, learning rate is $this_learning_rate."    
+
+  if [ ! -z "${realign_this_iter[$x]}" ]; then
+    prev_egs_dir=$cur_egs_dir
+    cur_egs_dir=$dir/egs_${realign_this_iter[$x]}
+  fi
+
+  if [ $x -ge 0 ] && [ $stage -le $x ]; then
+    if [ ! -z "${realign_this_iter[$x]}" ]; then
+      time=${realign_this_iter[$x]}
+
+      echo "Getting average posterior for purposes of adjusting the priors."
+      # Note: this just uses CPUs, using a smallish subset of data.
+      # always use the first egs archive, which makes the script simpler;
+      # we're using different random subsets of it.
+      rm $dir/post.$x.*.vec 2>/dev/null
+      $cmd JOB=1:$num_jobs_compute_prior $dir/log/get_post.$x.JOB.log \
+        nnet-copy-egs --srand=JOB --frame=random ark:$prev_egs_dir/egs.1.ark ark:- \| \
+        nnet-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \
+        nnet-compute-from-egs "nnet-to-raw-nnet $dir/$x.mdl -|" ark:- ark:- \| \
+        matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$x.JOB.vec || exit 1;
+
+      sleep 3;  # make sure there is time for $dir/post.$x.*.vec to appear.
+
+      $cmd $dir/log/vector_sum.$x.log \
+        vector-sum $dir/post.$x.*.vec $dir/post.$x.vec || exit 1;
+      rm $dir/post.$x.*.vec;
+
+      echo "Re-adjusting priors based on computed posteriors"
+      $cmd $dir/log/adjust_priors.$x.log \
+        nnet-adjust-priors $dir/$x.mdl $dir/post.$x.vec $dir/$x.mdl || exit 1;
+
+      sleep 2
+
+      steps/nnet2/align.sh --nj $num_jobs_align --cmd "$align_cmd" --use-gpu $align_use_gpu \
+        --transform-dir "$transform_dir" --online-ivector-dir "$online_ivector_dir" \
+        --iter $x $data $lang $dir $dir/ali_$time || exit 1
+
+      steps/nnet2/relabel_egs2.sh --cmd "$cmd" --iter $x $dir/ali_$time \
+        $prev_egs_dir $cur_egs_dir || exit 1
+
+      if $cleanup && [[ $prev_egs_dir =~ $dir/egs* ]]; then
+        steps/nnet2/remove_egs.sh $prev_egs_dir
+      fi
+    fi
+    
+    # Set off jobs doing some diagnostics, in the background.
+    # Use the egs dir from the previous iteration for the diagnostics
+    $cmd $dir/log/compute_prob_valid.$x.log \
+      nnet-compute-prob $dir/$x.mdl ark:$cur_egs_dir/valid_diagnostic.egs &
+    $cmd $dir/log/compute_prob_train.$x.log \
+      nnet-compute-prob $dir/$x.mdl ark:$cur_egs_dir/train_diagnostic.egs &
+    if [ $x -gt 0 ] && [ ! -f $dir/log/mix_up.$[$x-1].log ]; then
+      $cmd $dir/log/progress.$x.log \
+        nnet-show-progress --use-gpu=no $dir/$[$x-1].mdl $dir/$x.mdl \
+        ark:$cur_egs_dir/train_diagnostic.egs '&&' \
+        nnet-am-info $dir/$x.mdl &
+    fi
+
+    echo "Training neural net (pass $x)"
+
+    if [ $x -gt 0 ] && \
+      [ $x -le $[($num_hidden_layers-1)*$add_layers_period] ] && \
+      [ $[$x%$add_layers_period] -eq 0 ]; then
+      do_average=false # if we've just mixed up, don't do averaging take the best.
+      cur_num_hidden_layers=$[$x/$add_layers_period];
+      mdl="nnet-init --srand=$x $dir/hidden_${cur_num_hidden_layers}.config - | nnet-insert $dir/$x.mdl - - | nnet-am-copy --learning-rate=$this_learning_rate - -|"
+    else
+      do_average=true
+      if [ $x -eq 0 ]; then do_average=false; fi # on iteration 0, pick the best, don't average.
+      mdl="nnet-am-copy --learning-rate=$this_learning_rate $dir/$x.mdl -|"
+    fi
+    if $do_average; then
+      this_minibatch_size=$minibatch_size
+    else
+      # on iteration zero or when we just added a layer, use a smaller minibatch
+      # size and just one job: the model-averaging doesn't seem to be helpful
+      # when the model is changing too fast (i.e. it worsens the objective
+      # function), and the smaller minibatch size will help to keep
+      # the update stable.
+      this_minibatch_size=$[$minibatch_size/2];
+    fi
+
+    rm $dir/.error 2>/dev/null
+
+
+    ( # this sub-shell is so that when we "wait" below,
+      # we only wait for the training jobs that we just spawned,
+      # not the diagnostic jobs that we spawned above.
+      
+      # We can't easily use a single parallel SGE job to do the main training,
+      # because the computation of which archive and which --frame option
+      # to use for each job is a little complex, so we spawn each one separately.
+      for n in $(seq $this_num_jobs); do
+        k=$[$num_archives_processed + $n - 1]; # k is a zero-based index that we'll derive
+                                               # the other indexes from.
+        archive=$[($k%$num_archives)+1]; # work out the 1-based archive index.
+        frame=$[(($k/$num_archives)%$frames_per_eg)]; # work out the 0-based frame
+        # index; this increases more slowly than the archive index because the
+        # same archive with different frame indexes will give similar gradients,
+        # so we want to separate them in time.
+
+        $cmd $parallel_opts $dir/log/train.$x.$n.log \
+          nnet-train$parallel_suffix $parallel_train_opts \
+          --minibatch-size=$this_minibatch_size --srand=$x "$mdl" \
+          "ark:nnet-copy-egs --frame=$frame ark:$cur_egs_dir/egs.$archive.ark ark:-|nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-|" \
+          $dir/$[$x+1].$n.mdl || touch $dir/.error &
+      done
+      wait
+    )
+    # the error message below is not that informative, but $cmd will
+    # have printed a more specific one.
+    [ -f $dir/.error ] && echo "$0: error on iteration $x of training" && exit 1;
+
+    nnets_list=
+    for n in `seq 1 $this_num_jobs`; do
+      nnets_list="$nnets_list $dir/$[$x+1].$n.mdl"
+    done
+
+    if $do_average; then
+      # average the output of the different jobs.
+      $cmd $dir/log/average.$x.log \
+        nnet-am-average $nnets_list $dir/$[$x+1].mdl || exit 1;
+    else
+      # choose the best from the different jobs.
+      n=$(perl -e '($nj,$pat)=@ARGV; $best_n=1; $best_logprob=-1.0e+10; for ($n=1;$n<=$nj;$n++) {
+          $fn = sprintf($pat,$n); open(F, "<$fn") || die "Error opening log file $fn";
+          undef $logprob; while (<F>) { if (m/log-prob-per-frame=(\S+)/) { $logprob=$1; } }
+          close(F); if (defined $logprob && $logprob > $best_logprob) { $best_logprob=$logprob; 
+          $best_n=$n; } } print "$best_n\n"; ' $num_jobs_nnet $dir/log/train.$x.%d.log) || exit 1;
+      [ -z "$n" ] && echo "Error getting best model" && exit 1;
+      cp $dir/$[$x+1].$n.mdl $dir/$[$x+1].mdl || exit 1;
+    fi
+
+    if [ "$mix_up" -gt 0 ] && [ $x -eq $mix_up_iter ]; then
+      # mix up.
+      echo Mixing up from $num_leaves to $mix_up components
+      $cmd $dir/log/mix_up.$x.log \
+        nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \
+        $dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1;
+    fi
+    rm $nnets_list
+    [ ! -f $dir/$[$x+1].mdl ] && exit 1;
+    if [ -f $dir/$[$x-1].mdl ] && $cleanup && \
+       [ $[($x-1)%100] -ne 0  ] && [ $[$x-1] -lt $first_model_combine ]; then
+      rm $dir/$[$x-1].mdl
+    fi
+  fi
+  x=$[$x+1]
+  num_archives_processed=$[$num_archives_processed+$this_num_jobs]
+done
+
+
+if [ $stage -le $num_iters ]; then
+  echo "Doing final combination to produce final.mdl"
+
+  # Now do combination.
+  nnets_list=()
+  # the if..else..fi statement below sets 'nnets_list'.
+  if [ $max_models_combine -lt $num_models_combine ]; then
+    # The number of models to combine is too large, e.g. > 20.  In this case,
+    # each argument to nnet-combine-fast will be an average of multiple models.
+    cur_offset=0 # current offset from first_model_combine.
+    for n in $(seq $max_models_combine); do
+      next_offset=$[($n*$num_models_combine)/$max_models_combine]
+      sub_list="" 
+      for o in $(seq $cur_offset $[$next_offset-1]); do
+        iter=$[$first_model_combine+$o]
+        mdl=$dir/$iter.mdl
+        [ ! -f $mdl ] && echo "Expected $mdl to exist" && exit 1;
+        sub_list="$sub_list $mdl"
+      done
+      nnets_list[$[$n-1]]="nnet-am-average $sub_list - |"
+      cur_offset=$next_offset
+    done
+  else
+    nnets_list=
+    for n in $(seq 0 $[num_models_combine-1]); do
+      iter=$[$first_model_combine+$n]
+      mdl=$dir/$iter.mdl
+      [ ! -f $mdl ] && echo "Expected $mdl to exist" && exit 1;
+      nnets_list[$n]=$mdl
+    done
+  fi
+
+
+  # Below, use --use-gpu=no to disable nnet-combine-fast from using a GPU, as
+  # if there are many models it can give out-of-memory error; set num-threads to 8
+  # to speed it up (this isn't ideal...)
+  num_egs=`nnet-copy-egs ark:$cur_egs_dir/combine.egs ark:/dev/null 2>&1 | tail -n 1 | awk '{print $NF}'`
+  mb=$[($num_egs+$combine_num_threads-1)/$combine_num_threads]
+  [ $mb -gt 512 ] && mb=512
+  # Setting --initial-model to a large value makes it initialize the combination
+  # with the average of all the models.  It's important not to start with a
+  # single model, or, due to the invariance to scaling that these nonlinearities
+  # give us, we get zero diagonal entries in the fisher matrix that
+  # nnet-combine-fast uses for scaling, which after flooring and inversion, has
+  # the effect that the initial model chosen gets much higher learning rates
+  # than the others.  This prevents the optimization from working well.
+  $cmd $combine_parallel_opts $dir/log/combine.log \
+    nnet-combine-fast --initial-model=100000 --num-lbfgs-iters=40 --use-gpu=no \
+      --num-threads=$combine_num_threads \
+      --verbose=3 --minibatch-size=$mb "${nnets_list[@]}" ark:$cur_egs_dir/combine.egs \
+      $dir/final.mdl || exit 1;
+
+  # Normalize stddev for affine or block affine layers that are followed by a
+  # pnorm layer and then a normalize layer.
+  $cmd $dir/log/normalize.log \
+    nnet-normalize-stddev $dir/final.mdl $dir/final.mdl || exit 1;
+
+  # Compute the probability of the final, combined model with
+  # the same subset we used for the previous compute_probs, as the
+  # different subsets will lead to different probs.
+  $cmd $dir/log/compute_prob_valid.final.log \
+    nnet-compute-prob $dir/final.mdl ark:$cur_egs_dir/valid_diagnostic.egs &
+  $cmd $dir/log/compute_prob_train.final.log \
+    nnet-compute-prob $dir/final.mdl ark:$cur_egs_dir/train_diagnostic.egs &
+fi
+
+if [ $stage -le $[$num_iters+1] ]; then
+  echo "Getting average posterior for purposes of adjusting the priors."
+  # Note: this just uses CPUs, using a smallish subset of data.
+  rm $dir/post.$x.*.vec 2>/dev/null
+  $cmd JOB=1:$num_jobs_compute_prior $dir/log/get_post.$x.JOB.log \
+    nnet-copy-egs --frame=random --srand=JOB ark:$cur_egs_dir/egs.1.ark ark:- \| \
+    nnet-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \
+    nnet-compute-from-egs "nnet-to-raw-nnet $dir/final.mdl -|" ark:- ark:- \| \
+    matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$x.JOB.vec || exit 1;
+
+  sleep 3;  # make sure there is time for $dir/post.$x.*.vec to appear.
+
+  $cmd $dir/log/vector_sum.$x.log \
+   vector-sum $dir/post.$x.*.vec $dir/post.$x.vec || exit 1;
+
+  rm $dir/post.$x.*.vec;
+
+  echo "Re-adjusting priors based on computed posteriors"
+  $cmd $dir/log/adjust_priors.final.log \
+    nnet-adjust-priors $dir/final.mdl $dir/post.$x.vec $dir/final.mdl || exit 1;
+fi
+
+
+if [ ! -f $dir/final.mdl ]; then
+  echo "$0: $dir/final.mdl does not exist."
+  # we don't want to clean up if the training didn't succeed.
+  exit 1;
+fi
+
+sleep 2
+
+echo Done
+
+if $cleanup; then
+  echo Cleaning up data
+  if $remove_egs && [[ $cur_egs_dir =~ $dir/egs* ]]; then
+    steps/nnet2/remove_egs.sh $cur_egs_dir
+  fi
+
+  echo Removing most of the models
+  for x in `seq 0 $num_iters`; do
+    if [ $[$x%100] -ne 0 ] && [ $x -ne $num_iters ] && [ -f $dir/$x.mdl ]; then
+       # delete all but every 100th model; don't delete the ones which combine to form the final model.
+      rm $dir/$x.mdl
+    fi
+  done
+fi
+
--- a/egs/sre10/v1/run.sh
+++ b/egs/sre10/v1/run.sh
@ -1,13 +1,12 @@
 #!/bin/bash
 # Copyright 2015   David Snyder
+#           2015   Johns Hopkins University (Author: Daniel Garcia-Romero)
+#           2015   Johns Hopkins University (Author: Daniel Povey)
 # Apache 2.0.
 #
 # See README.txt for more info on data required.
 # Results (EERs) are inline in comments below.

-# This example script is still a bit of a mess, and needs to be
-# cleaned up, but it shows you all the basic ingredients.
-
 . cmd.sh
 . path.sh
 set -e
--- a/egs/sre10/v2/cmd.sh
+++ b/egs/sre10/v2/cmd.sh
@ -0,0 +1,28 @@
+# "queue.pl" uses qsub.  The options to it are
+# options to qsub.  If you have GridEngine installed,
+# change this to a queue you have access to.
+# Otherwise, use "run.pl", which will run jobs locally
+# (make sure your --num-jobs options are no more than
+# the number of cpus on your machine.
+
+#a) JHU cluster options
+export train_cmd="queue.pl -l arch=*64*"
+export decode_cmd="queue.pl -l arch=*64* -l ram_free=4G,mem_free=4G"
+#export cuda_cmd="..."
+export mkgraph_cmd="queue.pl -l arch=*64* ram_free=4G,mem_free=4G"
+
+#b) BUT cluster options
+#export train_cmd="queue.pl -q all.q@@blade -l ram_free=1200M,mem_free=1200M"
+#export decode_cmd="queue.pl -q all.q@@blade -l ram_free=1700M,mem_free=1700M"
+#export decodebig_cmd="queue.pl -q all.q@@blade -l ram_free=4G,mem_free=4G"
+#export cuda_cmd="queue.pl -q long.q@@pco203 -l gpu=1"
+#export cuda_cmd="queue.pl -q long.q@pcspeech-gpu"
+#export mkgraph_cmd="queue.pl -q all.q@@servers -l ram_free=4G,mem_free=4G"
+
+#c) run it locally...
+#export train_cmd=run.pl
+#export decode_cmd=run.pl
+export cuda_cmd=run.pl
+#export mkgraph_cmd=run.pl
+
+
--- a/egs/sre10/v2/conf/decode.config
+++ b/egs/sre10/v2/conf/decode.config
@ -0,0 +1,3 @@
+beam=11.0 # beam for decoding.  Was 13.0 in the scripts.
+first_beam=8.0 # beam for 1st-pass decoding in SAT.
+lattice_beam=6.0
--- a/egs/sre10/v2/conf/decode_dnn.config
+++ b/egs/sre10/v2/conf/decode_dnn.config
@ -0,0 +1,2 @@
+beam=13.0 # beam for decoding.  Was 13.0 in the scripts.
+lattice_beam=8.0 # this has most effect on size of the lattices.
--- a/egs/sre10/v2/conf/mfcc.conf
+++ b/egs/sre10/v2/conf/mfcc.conf
@ -0,0 +1,6 @@
+--sample-frequency=8000 
+--frame-length=25 # the default is 25
+--low-freq=20 # the default.
+--high-freq=3700 # the default is zero meaning use the Nyquist (4k in this case).
+--num-ceps=20 # higher than the default which is 12.
+--snip-edges=false
--- a/egs/sre10/v2/conf/mfcc_asr.conf
+++ b/egs/sre10/v2/conf/mfcc_asr.conf
@ -0,0 +1,3 @@
+--use-energy=false   # only non-default option.
+--sample-frequency=8000 #  Switchboard is sampled at 8kHz
+--snip-edges=false
--- a/egs/sre10/v2/conf/mfcc_hires.conf
+++ b/egs/sre10/v2/conf/mfcc_hires.conf
@ -0,0 +1,11 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--sample-frequency=8000 #  Switchboard is sampled at 8kHz
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=40    # low cutoff frequency for mel bins
+--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800)
+--snip-edges=false
--- a/egs/sre10/v2/conf/vad.conf
+++ b/egs/sre10/v2/conf/vad.conf
@ -0,0 +1,2 @@
+--vad-energy-threshold=5.5
+--vad-energy-mean-scale=0.5
--- a/egs/sre10/v2/local
+++ b/egs/sre10/v2/local
@ -0,0 +1 @@
+../v1/local/
--- a/egs/sre10/v2/run.sh
+++ b/egs/sre10/v2/run.sh
@ -0,0 +1,263 @@
+#!/bin/bash
+# Copyright 2015   David Snyder
+#           2015   Johns Hopkins University (Author: Daniel Garcia-Romero)
+#           2015   Johns Hopkins University (Author: Daniel Povey)
+# Apache 2.0.
+#
+# See README.txt for more info on data required.
+# Results (EERs) are inline in comments below.
+#
+# This example script shows how to replace the GMM-UBM
+# with a DNN trained for ASR. It also demonstrates the 
+# using the DNN to create a supervised-GMM.
+
+. cmd.sh
+. path.sh
+set -e
+mfccdir=`pwd`/mfcc
+vaddir=`pwd`/mfcc
+trials_female=data/sre10_test_female/trials
+trials_male=data/sre10_test_male/trials
+trials=data/sre10_test/trials
+nnet=exp/nnet2_online/nnet_ms_a/final.mdl
+num_components=5297
+
+# Train a DNN on about 1800 hours of the english portion of Fisher.
+local/dnn/train_dnn.sh
+
+# Prepare the SRE 2010 evaluation data.
+local/make_sre_2010_test.pl /export/corpora5/SRE/SRE2010/eval/ data/
+local/make_sre_2010_train.pl /export/corpora5/SRE/SRE2010/eval/ data/
+
+# Prepare a collection of NIST SRE data prior to 2010. This is
+# used to train the PLDA model and is also combined with SWB
+# for UBM and i-vector extractor training data.
+local/make_sre.sh data
+
+# Prepare SWB for UBM and i-vector extractor training.
+local/make_swbd2_phase2.pl /export/corpora5/LDC/LDC99S79 \
+                           data/swbd2_phase2_train
+local/make_swbd2_phase3.pl /export/corpora5/LDC/LDC2002S06 \
+                           data/swbd2_phase3_train
+local/make_swbd_cellular1.pl /export/corpora5/LDC/LDC2001S13 \
+                             data/swbd_cellular1_train
+local/make_swbd_cellular2.pl /export/corpora5/LDC/LDC2004S07 \
+                             data/swbd_cellular2_train
+
+utils/combine_data.sh data/train \
+  data/swbd_cellular1_train data/swbd_cellular2_train \
+  data/swbd2_phase2_train data/swbd2_phase3_train data/sre
+
+cp -r data/train data/train_dnn
+cp -r data/sre data/sre_dnn
+cp -r data/sre10_train data/sre10_train_dnn
+cp -r data/sre10_test data/sre10_test_dnn
+
+# Extract speaker recogntion features.
+steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \
+    data/train exp/make_mfcc $mfccdir
+steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \
+    data/sre exp/make_mfcc $mfccdir
+steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \
+    data/sre10_train exp/make_mfcc $mfccdir
+steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \
+    data/sre10_test exp/make_mfcc $mfccdir
+
+# Extract DNN features.
+steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 40 --cmd "$train_cmd" \
+    data/train_dnn exp/make_mfcc $mfccdir
+steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 40 --cmd "$train_cmd" \
+    data/sre_dnn exp/make_mfcc $mfccdir
+steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 40 --cmd "$train_cmd" \
+    data/sre10_train_dnn exp/make_mfcc $mfccdir
+steps/make_mfcc.sh --mfcc-config conf/mfcc_hires.conf --nj 40 --cmd "$train_cmd" \
+    data/sre10_test_dnn exp/make_mfcc $mfccdir
+
+for name in sre_dnn sre10_train_dnn sre10_test_dnn train_dnn sre sre10_train sre10_test train; do
+  utils/fix_data_dir.sh data/${name}
+done
+
+# Compute VAD decisions. These will be shared across both sets of features.
+sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \
+    data/train exp/make_vad $vaddir
+sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \
+    data/sre exp/make_vad $vaddir
+sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \
+    data/sre10_train exp/make_vad $vaddir
+sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \
+    data/sre10_test exp/make_vad $vaddir
+
+for name sre sre10_train sre10_test train; do
+  cp data/${name}/vad.scp data/${name}_dnn/vad.scp
+  cp data/${name}/utt2spk data/${name}_dnn/utt2spk
+  cp data/${name}/spk2utt data/${name}_dnn/spk2utt
+  utils/fix_data_dir.sh data/${name}
+  utils/fix_data_dir.sh data/${name}_dnn
+done
+
+# Subset training data for faster sup-GMM initialization.
+utils/subset_data_dir.sh data/train_dnn 32000 data/train_dnn_32k
+utils/fix_data_dir.sh data/train_dnn_32k
+utils/subset_data_dir.sh --utt-list data/train_dnn_32k/utt2spk data/train data/train_32k
+utils/fix_data_dir.sh data/train_32k
+
+# Initialize a full GMM from the DNN posteriors and speaker recognition
+# features. This can be used both alone, as a UBM, or to initialize the
+# i-vector extractor in a DNN-based system.
+sid/init_full_ubm_from_dnn.sh --cmd "$train_cmd -l mem_free=6G,ram_free=6G" \
+  data/train_32k \
+  data/train_dnn_32k $nnet exp/full_ubm
+
+# Train an i-vector extractor based on just the supervised-GMM. 
+sid/train_ivector_extractor.sh --cmd "$train_cmd -l mem_free=70G,ram_free=70G" \
+  --ivector-dim 600 \
+  --num-iters 5 exp/full_ubm/final.ubm data/train \
+  exp/extractor_sup_gmm
+
+# Train an i-vector extractor based on the DNN-UBM.
+sid/train_ivector_extractor_dnn.sh --cmd "$train_cmd -l mem_free=80G,ram_free=80G" \
+  --min-post 0.015 \
+  --ivector-dim 600 \
+  --num-iters 5 exp/full_ubm/final.ubm $nnet \
+  data/train \
+  data/train_dnn \
+  exp/extractor_dnn
+
+# Extract i-vectors from the extractor with the sup-GMM UBM.
+sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=8G,ram_free=8G" --nj 50 \
+   exp/extractor_sup_gmm data/sre10_train \
+   exp/ivectors_sre10_train_sup_gmm
+
+sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=8G,ram_free=8G" --nj 50 \
+   exp/extractor_sup_gmm data/sre10_test \
+   exp/ivectors_sre10_test_sup_gmm
+
+sid/extract_ivectors.sh --cmd "$train_cmd -l mem_free=8G,ram_free=8G" --nj 50 \
+   exp/extractor_sup_gmm data/sre \
+   exp/ivectors_sre_sup_gmm
+
+# Extract i-vectors using the extractor with the DNN-UBM.
+sid/extract_ivectors_dnn.sh --cmd "$train_cmd -l mem_free=10G,ram_free=10G" --nj 40 \
+   exp/extractor_dnn \
+   $nnet \
+   data/sre10_test \
+   data/sre10_test_dnn \
+   exp/ivectors10_test_dnn
+
+sid/extract_ivectors_dnn.sh --cmd "$train_cmd -l mem_free=10G,ram_free=10G" --nj 40 \
+   exp/extractor_dnn \
+   $nnet \
+   data/sre10_train \
+   data/sre10_train_dnn \
+   exp/ivectors10_train_dnn
+
+sid/extract_ivectors_dnn.sh --cmd "$train_cmd -l mem_free=10G,ram_free=10G" --nj 40 \
+   exp/extractor_dnn \
+   $nnet \
+   data/sre \
+   data/sre_dnn \
+   exp/ivectors_sre_dnn
+
+# Separate the i-vectors into male and female partitions and calculate
+# i-vector means used by the scoring scripts.
+local/scoring_common.sh data/sre data/sre10_train data/sre10_test \
+  exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm \
+  exp/ivectors_sre10_test_sup_gmm
+
+local/scoring_common.sh data/sre data/sre10_train data/sre10_test \
+  exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn \
+  exp/ivectors_sre10_test_dnn
+
+# The commented out scripts show how to do cosine scoring with and without
+# first reducing the i-vector dimensionality with LDA. PLDA tends to work 
+# best, so we don't focus on the scores obtained here.
+#
+# local/cosine_scoring.sh data/sre10_train data/sre10_test \
+#  exp/ivectors_sre10_train exp/ivectors_sre10_test $trials local/scores_gmm_2048_ind_pooled
+# local/lda_scoring.sh data/sre data/sre10_train data/sre10_test \
+#  exp/ivectors_sre exp/ivectors_sre10_train exp/ivectors_sre10_test $trials local/scores_gmm_2048_ind_pooled
+
+# Create a gender independent PLDA model and do scoring with the sup-GMM system.
+local/plda_scoring.sh data/sre data/sre10_train data/sre10_test \
+  exp/ivectors_sre_test_sup_gmm exp/ivectors_sre10_train_sup_gmm \
+   exp/ivectors_sre10_test_sup_gmm $trials local/scores_sup_gmm_ind_pooled
+local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_female data/sre10_test_female \
+  exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_female \
+  exp/ivectors_sre10_test_sup_gmm_female $trials_female local/scores_sup_gmm_ind_female
+local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_male data/sre10_test_male \
+  exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_male \
+  exp/ivectors_sre10_test_sup_gmm_male $trials_male local/scores_sup_gmm_ind_male
+
+# Create gender dependent PLDA models and do scoring with the sup-GMM system.
+local/plda_scoring.sh data/sre_female data/sre10_train_female data/sre10_test_female \
+  exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_female \
+  exp/ivectors_sre10_test_sup_gmm_female $trials_female local/scores_sup_gmm_dep_female
+local/plda_scoring.sh data/sre_male data/sre10_train_male data/sre10_test_male \
+  exp/ivectors_sre_sup_gmm exp/ivectors_sre10_train_sup_gmm_male \
+   exp/ivectors_sre10_test_sup_gmm_male $trials_male local/scores_sup_gmm_dep_male
+mkdir -p local/scores_sup_gmm_dep_pooled
+cat local/scores_sup_gmm_dep_male/plda_scores local/scores_sup_gmm_dep_female/plda_scores \
+  > local/scores_sup_gmm_dep_pooled/plda_scores
+
+# Create a gender independent PLDA model and do scoring with the DNN system.
+local/plda_scoring.sh data/sre data/sre10_train data/sre10_test \
+  exp/ivectors_sre_test_dnn exp/ivectors_sre10_train_dnn \
+   exp/ivectors_sre10_test_dnn $trials local/scores_dnn_ind_pooled
+local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_female data/sre10_test_female \
+  exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_female \
+  exp/ivectors_sre10_test_dnn_female $trials_female local/scores_dnn_ind_female
+local/plda_scoring.sh --use-existing-models true data/sre data/sre10_train_male data/sre10_test_male \
+  exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_male \
+  exp/ivectors_sre10_test_dnn_male $trials_male local/scores_dnn_ind_male
+
+# Create gender dependent PLDA models and do scoring with the DNN system.
+local/plda_scoring.sh data/sre_female data/sre10_train_female data/sre10_test_female \
+  exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_female \
+  exp/ivectors_sre10_test_dnn_female $trials_female local/scores_dnn_dep_female
+local/plda_scoring.sh data/sre_male data/sre10_train_male data/sre10_test_male \
+  exp/ivectors_sre_dnn exp/ivectors_sre10_train_dnn_male \
+   exp/ivectors_sre10_test_dnn_male $trials_male local/scores_dnn_dep_male
+mkdir -p local/scores_dnn_dep_pooled
+cat local/scores_dnn_dep_male/plda_scores local/scores_dnn_dep_female/plda_scores \
+  > local/scores_dnn_dep_pooled/plda_scores
+
+
+# Sup-GMM PLDA EER
+# ind pooled: 1.94
+# ind female: 1.98
+# ind male:   1.79
+# dep female: 1.87
+# dep male:   1.30
+# dep pooled: 1.65
+echo "Sup-GMM-$num_components EER"
+for x in ind dep; do
+  for y in female male pooled; do
+    eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_sup_gmm_${x}_${y}/plda_scores) 2> /dev/null`
+    echo "${x} ${y}: $eer"
+  done
+done
+
+# DNN PLDA EER
+# ind pooled: 1.20
+# ind female: 1.46
+# ind male:   0.87
+# dep female: 1.43
+# dep male:   0.72
+# dep pooled: 1.09
+echo "DNN-$num_components EER"
+for x in ind dep; do
+  for y in female male pooled; do
+    eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_dnn_${x}_${y}/plda_scores) 2> /dev/null`
+    echo "${x} ${y}: $eer"
+  done
+done
+
+# In comparison, here is the EER for an unsupervised GMM-based system
+# with 5297 components (the same as the number of senones in the DNN):
+# GMM-5297 PLDA EER
+# ind pooled: 2.42
+# ind female: 2.43
+# ind male:   2.40
+# dep female: 2.16
+# dep male:   1.53
+# dep pooled: 2.00
--- a/egs/sre10/v2/sid
+++ b/egs/sre10/v2/sid
@ -0,0 +1 @@
+../v1/sid
--- a/egs/sre10/v2/steps
+++ b/egs/sre10/v2/steps
@ -0,0 +1 @@
+../v1/steps
--- a/egs/sre10/v2/utils
+++ b/egs/sre10/v2/utils
@ -0,0 +1 @@
+../v1/utils
--- a/src/fgmmbin/fgmm-global-acc-stats-post.cc
+++ b/src/fgmmbin/fgmm-global-acc-stats-post.cc
@ -1,6 +1,8 @@
 // fgmmbin/fgmm-global-acc-stats-post.cc

 // Copyright 2015 David Snyder
+//           2015 Johns Hopkins University (Author: Daniel Povey)
+//           2015 Johns Hopkins University (Author: Daniel Garcia-Romero)

 // See ../../COPYING for clarification regarding multiple authors
 //
--- a/src/fgmmbin/fgmm-global-init-from-accs.cc
+++ b/src/fgmmbin/fgmm-global-init-from-accs.cc
@ -1,6 +1,8 @@
 // fgmmbin/fgmm-global-init-from-accs.cc

 // Copyright 2015 David Snyder
+//           2015 Johns Hopkins University (Author: Daniel Povey)
+//           2015 Johns Hopkins University (Author: Daniel Garcia-Romero)

 // See ../../COPYING for clarification regarding multiple authors
 //