sandbox/pawel: download scripts, beamforming scripts, BeamformIt installation under tools, improved text normalisation

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/pawel@4075 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
2014-06-23 08:19:53 +00:00 · 2014-06-23 08:19:53 +00:00 · af0df729d5
--- a/egs/ami/s5/cmd.sh
+++ b/egs/ami/s5/cmd.sh
@ -0,0 +1,17 @@
+# "queue.pl" uses qsub.  The options to it are
+# options to qsub.  If you have GridEngine installed,
+# change this to a queue you have access to.
+# Otherwise, use "run.pl", which will run jobs locally
+# (make sure your --num-jobs options are no more than
+# the number of cpus on your machine.
+
+# On Eddie use:
+#export train_cmd="queue.pl -P inf_hcrc_cstr_nst -l h_rt=08:00:00"
+#export decode_cmd="queue.pl -P inf_hcrc_cstr_nst  -l h_rt=05:00:00 -pe memory-2G 4"
+#export highmem_cmd="queue.pl -P inf_hcrc_cstr_nst -l h_rt=05:00:00 -pe memory-2G 4"
+#export scoring_cmd="queue.pl -P inf_hcrc_cstr_nst  -l h_rt=00:20:00"
+
+# To run locally, use:
+export train_cmd=run.pl
+export decode_cmd=run.pl
+export highmem_cmd=run.pl
--- a/egs/ami/s5/conf/beamformit.conf
+++ b/egs/ami/s5/conf/beamformit.conf
@ -0,0 +1,54 @@
+
+# scrolling size to compute the delays
+scroll_size = 250
+
+# cross correlation computation window size
+window_size = 500
+
+#amount of maximum points for the xcorrelation taken into account
+nbest_amount = 4
+
+#flag wether to apply an automatic noise thresholding 
+do_noise_threshold = 1
+
+#Percentage of frames with lower xcorr taken as noisy
+noise_percent = 10
+
+######## acoustic modelling parameters
+
+#transition probabilities weight for multichannel decoding
+trans_weight_multi = 25
+trans_weight_nbest = 25
+
+###
+
+#flag wether to print the feaures after setting them, or not
+print_features = 1
+
+#flag wether to use the bad frames in the sum process
+do_avoid_bad_frames = 1
+
+#flag to use the best channel (SNR) as a reference
+#defined from command line
+do_compute_reference = 1
+#do_compute_reference = 0
+#reference_channel = 0
+
+#flag wether to use a uem file or not(process all the file)
+do_use_uem_file = 0
+
+#flag wether to use an adaptative weights scheme or fixed weights
+do_adapt_weights = 1
+
+#flag wether to output the sph files or just run the system to create the auxiliary files
+do_write_sph_files = 1
+
+#selects the way that the files are read from the channels file
+full_path = 1
+
+####directories where to store/retrieve info####
+channels_file = ./cfg-files/channels
+
+#show needs to be passed as argument normally, here a default one is given just in case
+show_id = Ttmp
+
--- a/egs/ami/s5/local/ami_beamform.sh
+++ b/egs/ami/s5/local/ami_beamform.sh
@ -0,0 +1,97 @@
+#!/bin/bash
+
+#Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
+#Apache 2.0
+
+wiener_filtering=false
+nj=4
+cmd=run.pl
+
+# End configuration section
+echo "$0 $@"  # Print the command line for logging
+
+[ -f ./path.sh ] && . ./path.sh; # source the path.
+. parse_options.sh || exit 1;
+
+if [ $# != 3 ]; then
+   echo "Wrong #arguments ($#, expected 4)"
+   echo "Usage: steps/ami_beamform.sh [options] <num-mics> <ami-dir> <wav-out-dir>"
+   echo "main options (for others, see top of script file)"
+   echo "  --nj <nj>                                # number of parallel jobs"
+   echo "  --cmd <cmd>                              # Command to run in parallel with"
+   echo "  --wiener-filtering <true/false>          # Cancel noise with Wiener filter prior to beamforming"
+   exit 1;
+fi
+
+numch=$1
+sdir=$2
+odir=$3
+wdir=data/local/beamforming
+
+mkdir -p $odir
+mkdir -p $wdir/log
+
+meetings=$wdir/meetings.list
+
+cat local/split_train.orig local/split_dev.orig local/split_eval.orig | sort > $meetings
+
+ch_inc=$((8/$numch))
+bmf=
+for ch in `seq 1 $ch_inc 8`; do
+  bmf="$bmf $ch"
+done
+
+echo "Will use the following channels: $bmf"
+
+#make the channel file
+if [ -f $wdir/channels_$numch ]; then
+  rm $wdir/channels_$numch
+fi
+touch $wdir/channels_$numch
+
+while read line;
+do
+  channels="$line "
+  for ch in $bmf; do
+    channels="$channels $line/audio/$line.Array1-0$ch.wav"
+  done
+  echo $channels >> $wdir/channels_$numch
+done < $meetings
+
+######
+#do beamforming
+######
+
+echo -e "Beamforming\n"
+
+$cmd JOB=1:$nj $wdir/log/beamform.JOB.log \
+     local/beamformit.sh $nj JOB $numch $meetings $sdir $odir
+
+: << "C"
+(
+
+  utils/split_scp.pl -j $nj JOB $meetings $meetings.JOB
+
+  while read line; do
+    BeamformIt -s $line -c $wdir/channels_$numch \
+                        --config_file=conf/beamformit.cfg \
+                        --source_dir=$sdir \
+                        --result_dir=$odir/temp_dir \
+                        --do_compute_reference=1
+
+    mkdir -p $odir/$line 
+    mv $odir/temp_dir/$line/${line}_seg.del  $odir/$line/${line}_MDM$numch.del
+    mv $odir/temp_dir/$line/${line}_seg.del2 $odir/$line/${line}_MDM$numch.del2
+    mv $odir/temp_dir/$line/${line}_seg.info $odir/$line/${line}_MDM$numch.info
+    mv $odir/temp_dir/$line/${line}_seg.ovl  $odir/$line/${line}_MDM$numch.ovl
+    mv $odir/temp_dir/$line/${line}_seg.weat $odir/$line/${line}_MDM$numch.weat
+    mv $odir/temp_dir/$line/${line}_seg.wa*  $odir/$line/${line}_MDM$numch.wav
+    mv $odir/temp_dir/$line/${line}_seg2.wa* $odir/$line/${line}_MDM${numch}_seg2.wav
+   
+    rm -r $odir/temp_dir  
+  done < $meetings.JOB
+
+)
+C
+
+
--- a/egs/ami/s5/local/ami_download.sh
+++ b/egs/ami/s5/local/ami_download.sh
@ -11,45 +11,61 @@ fi
 mic=$1
 adir=$2
 amiurl=http://groups.inf.ed.ac.uk/ami
+annotver=ami_public_manual_1.6.1.zip
+wdir=data/local/downloads

-mkdir -p $adir/amicorpus
+if [[ ! "$mic" =~ ^(ihm|sdm|mdm)$ ]]; then
+  echo "$0. Wrong <mic> option." 
+  exit 1;
+fi
+
+mics="1 2 3 4 5 6 7 8"
+if [ "$mic" == "sdm" ]; then
+  mics=1
+fi
+
+mkdir -p $adir
+mkdir -p $wdir/log

 #download annotations
-annot="$adir/ami_public_manual_1.6.zip"
+
+annot="$adir/$annotver"
 if [[ ! -d $adir/annotations || ! -f "$annot" ]]; then
  echo "Downloading annotiations..."
-  wget -O $annot $amiurl/AMICorpusAnnotations/ami_public_manual_1.6.zip
+  wget -O $annot $amiurl/AMICorpusAnnotations/$annotver &> $wdir/log/download_ami_annot.log
  mkdir $adir/annotations
  unzip -d $adir/annotations $annot &> /dev/null
 fi
 [ ! -f "$adir/annotations/AMI-metadata.xml" ] && echo "$0: File AMI-Metadata.xml not found under $adir/annotations." && exit 1;

 #download waves
-ihm_template="wget -P amicorpus/IB4011/audio http://groups.inf.ed.ac.uk/ami/AMICorpusMirror//amicorpus/IB4011/audio/IB4011.Headset-3.wav"
-license="wget http://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-04237-Sun-Jun-15-2014.manifest.txt
-wget http://groups.inf.ed.ac.uk/ami/download/temp/Creative-Commons-Attribution-NonCommercial-ShareAlike-2.5.txt"

-wgetfile=$adir/wget_$mic.sh
+cat local/split_train.orig local/split_eval.orig local/split_dev.orig > $wdir/ami_meet_ids.flist
+
+wgetfile=$wdir/wget_$mic.sh
+manifest="wget -O $adir/MANIFEST.TXT http://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-04237-Sun-Jun-15-2014.manifest.txt"
+license="wget -O $adir/LICENCE.TXT http://groups.inf.ed.ac.uk/ami/download/temp/Creative-Commons-Attribution-NonCommercial-ShareAlike-2.5.txt"

 echo "#!/bin/bash" > $wgetfile
+echo $manifest >> $wgetfile
 echo $license >> $wgetfile
-
-cat local/split_train.orig local/split_eval.orig local/split_dev.orig > $adir/ami_file_ids.flist
-
-if [ "$mic" == "ihm" ]; then
-  while read line; do
-     for hid in 0 1 2 3; do
-       echo "wget -P $adir/$line/audio $amiurl/AMICorpusMirror/amicorpus/$line/audio/$line.Headset-$hid.wav" >> $wgetfile
+while read line; do
+   if [ "$mic" == "ihm" ]; then
+     for m in 0 1 2 3; do
+       echo "wget -c -P $adir/$line/audio $amiurl/AMICorpusMirror/amicorpus/$line/audio/$line.Headset-$m.wav" >> $wgetfile
     done
-  done < $adir/ami_file_ids.flist
-elif [ "$mic" == "sdm" ]; then
+   else
+     for m in $mics; do
+       echo "wget -c -P $adir/$line/audio $amiurl/AMICorpusMirror/amicorpus/$line/audio/$line.Array1-0$m.wav" >> $wgetfile
+     done
+   fi
+done < $wdir/ami_meet_ids.flist

-elif [ "$mic" == "mdm" ]; then
+chmod +x $wgetfile
+echo "Downloading audio files for $mic scenario."
+echo "Look at $wdir/log/download_ami_$mic.log for download progress"

-else
-  exit 1;
-fi
+$wgetfile &> $wdir/log/download_ami_$mic.log

-#chmod +x $wgetfile
-#. $wgetfile &> $adir/log/download$mic.log
+echo "Downloads of AMI corpus completed succesfully. License can be found under $adir/LICENSE.TXT"

--- a/egs/ami/s5/local/ami_ihm_data_prep_edin.sh
+++ b/egs/ami/s5/local/ami_ihm_data_prep_edin.sh
@ -1,20 +1,16 @@
 #!/bin/bash

-# Switchboard-1 training data preparation customized for Edinburgh
-# Author:  Arnab Ghoshal (Jan 2013)
+# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
+# AMI Corpus training data preparation 
+# Apache 2.0

 # To be run from one directory above this script.

-## The input is some directory containing the switchboard-1 release 2
-## corpus (LDC97S62).  Note: we don't make many assumptions about how
-## you unpacked this.  We are just doing a "find" command to locate
-## the .sph files.
-
 . path.sh

 #check existing directories
 if [ $# != 2 ]; then
-  echo "Usage: ami_data_prep_edin.sh /path/to/SWBD"
+  echo "Usage: ami_data_prep_edin.sh /path/to/AMI"
  exit 1; 
 fi 

--- a/egs/ami/s5/local/ami_ihm_scoring_data_prep_edin.sh
+++ b/egs/ami/s5/local/ami_ihm_scoring_data_prep_edin.sh
@ -1,14 +1,7 @@
 #!/bin/bash

-# Switchboard-1 training data preparation customized for Edinburgh
-# Author:  Arnab Ghoshal (Jan 2013)
-
-# To be run from one directory above this script.
-
-## The input is some directory containing the switchboard-1 release 2
-## corpus (LDC97S62).  Note: we don't make many assumptions about how
-## you unpacked this.  We are just doing a "find" command to locate
-## the .sph files.
+# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
+# AMI Corpus dev/eval data preparation 

 . path.sh

--- a/egs/ami/s5/local/ami_mdm_data_prep_edin.sh
+++ b/egs/ami/s5/local/ami_mdm_data_prep_edin.sh
@ -1,7 +1,7 @@
 #!/bin/bash

-# Switchboard-1 training data preparation customized for Edinburgh
-# Author:  Arnab Ghoshal (Jan 2013)
+# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
+# AMI Corpus dev/eval data preparation 

 # To be run from one directory above this script.

--- a/egs/ami/s5/local/ami_mdm_scoring_data_prep_edin.sh
+++ b/egs/ami/s5/local/ami_mdm_scoring_data_prep_edin.sh
@ -1,14 +1,7 @@
 #!/bin/bash

-# Switchboard-1 training data preparation customized for Edinburgh
-# Author:  Arnab Ghoshal (Jan 2013)
-
-# To be run from one directory above this script.
-
-## The input is some directory containing the switchboard-1 release 2
-## corpus (LDC97S62).  Note: we don't make many assumptions about how
-## you unpacked this.  We are just doing a "find" command to locate
-## the .sph files.
+# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
+# AMI Corpus dev/eval data preparation 

 . path.sh

--- a/egs/ami/s5/local/ami_sdm_data_prep_edin.sh
+++ b/egs/ami/s5/local/ami_sdm_data_prep_edin.sh
@ -1,8 +1,7 @@
 #!/bin/bash

-# Switchboard-1 training data preparation customized for Edinburgh
-# Author:  Arnab Ghoshal (Jan 2013)
-# To be run from one directory above this script.
+# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
+# AMI Corpus dev/eval data preparation 

 . path.sh

--- a/egs/ami/s5/local/ami_sdm_scoring_data_prep_edin.sh
+++ b/egs/ami/s5/local/ami_sdm_scoring_data_prep_edin.sh
@ -1,14 +1,7 @@
 #!/bin/bash

-# Switchboard-1 training data preparation customized for Edinburgh
-# Author:  Arnab Ghoshal (Jan 2013)
-
-# To be run from one directory above this script.
-
-## The input is some directory containing the switchboard-1 release 2
-## corpus (LDC97S62).  Note: we don't make many assumptions about how
-## you unpacked this.  We are just doing a "find" command to locate
-## the .sph files.
+# Copyright 2014, University of Edinburgh (Author: Pawel Swietojanski)
+# AMI Corpus dev/eval data preparation 

 . path.sh

--- a/egs/ami/s5/local/ami_split_segments.pl
+++ b/egs/ami/s5/local/ami_split_segments.pl
@ -2,7 +2,7 @@

 # Copyright 2014  University of Edinburgh (Author: Pawel Swietojanski)

-# The script splits too long AMI segments based on punctuation signs
+# The script - based on punctuation times - splits segments longer than #words (input parameter)
 # and produces bit more more normalised form of transcripts, as follows
 # MeetID Channel Spkr stime etime transcripts 

@ -72,8 +72,6 @@ sub split_on_comma {
   }   

   print "Splitting $text on $skey at time $otime (stime is $stime)\n";  
-
-   my %transcripts = ();
   my @utts1 = split(/$skey\s+/, $text);
   for (my $i=0; $i<=$#utts1; $i++) {
     my $st = $btime;
@ -102,13 +100,14 @@ sub split_transcripts {
  my ($text, $btime, $etime, $max_words_per_seg) = @_;
  my (@transcript) = @$text;

-  my (@punct_indices) = grep { $transcript[$_] =~ /^[\.,\?]$/ } 0..$#transcript;
+  my (@punct_indices) = grep { $transcript[$_] =~ /^[\.,\?\!\:]$/ } 0..$#transcript;
  my (@time_indices) = grep { $transcript[$_] =~ /^[0-9]+\.[0-9]*/ } 0..$#transcript;
  my (@puncts_times) = delete @transcript[@time_indices]; 
  my (@puncts) = @transcript[@punct_indices];

  if ($#puncts_times != $#puncts) {
-     die 'Ooops, different number of punctuation signs and timestamps!';
+     print 'Ooops, different number of punctuation signs and timestamps! Skipping.';
+     return ();
  }
 
  #first split on full stops
@ -156,13 +155,12 @@ sub normalise_transcripts {

   #DO SOME ROUGH AND OBVIOUS PRELIMINARY NORMALISATION, AS FOLLOWS
   #remove the remaining punctation labels e.g. some text ,0 some text ,1
-   $text =~ s/[\.\,\?][0-9]+//g;
+   $text =~ s/[\.\,\?\!\:][0-9]+//g;
   #there are some extra spurious puncations without spaces, e.g. UM,I, replace with space
   $text =~ s/[A-Z']+,[A-Z']+/ /g;
-   #normalise the standalone '-' signs, e.g. IS THERE D - to IS THERE D-
-   #some extra steps will be required to agree transcripts with dict as '-'
-   #also denotes not finished sentence and may be added to the fully pronounced words
-   $text =~ s/(.*)([A-Z])\s+(\-)(.*)/$1$2$3$4/g;
+   #split words combination, ie. ANTI-TRUST to ANTI TRUST (None of them appears in cmudict anyway)
+   #$text =~ s/(.*)([A-Z])\s+(\-)(.*)/$1$2$3$4/g;
+   $text =~ s/\-/ /g;
   #substitute X_M_L with X. M. L. etc.
   $text =~ s/\_/. /g;
   #normalise and trim spaces
@ -170,40 +168,44 @@ sub normalise_transcripts {
   $text =~ s/\s*$//g;
   $text =~ s/\s+/ /g;
   #some transcripts are empty with -, nullify (and ignore) them
-   $text =~ s/^\-$//;
+   $text =~ s/^\-$//g;
+   $text =~ s/\s+\-$//;

   return $text;
 }

 if (@ARGV != 2) {
-  print STDERR "Usage: ami_prepare_meeting.pl <meet-file> <out-file>\n";
+  print STDERR "Usage: ami_split_segments.pl <meet-file> <out-file>\n";
  exit(1);
 }

-my $meet_file=shift @ARGV;
-my $out_file=shift @ARGV; 
+my $meet_file = shift @ARGV;
+my $out_file = shift @ARGV; 
 my %transcripts = ();

 open(W, ">$out_file") || die "opening output file $out_file";
 open(S, "<$meet_file") || die "opening meeting file $meet_file";
+
 while(<S>) {
+
  my @A = split(" ", $_);
-  @A > 8 || next; 
-  my ($meet_id, $channel, $spk, $channel2, $btime, $etime, $btime2, $etime2) = @A[0..7];
+  if (@A < 9) { print "Skipping line @A"; next; }
+  
+  my ($meet_id, $channel, $spk, $channel2, $trans_btime, $trans_etime, $aut_btime, $aut_etime) = @A[0..7];
  my @transcript = @A[8..$#A];
-  my %transcript = split_transcripts(\@transcript, $btime, $etime, 25); 
+  my %transcript = split_transcripts(\@transcript, $trans_btime, $trans_etime, 30); 
+
  for my $key (keys %transcript) {
    my $value = $transcript{$key};
-    my $seg_name = "AMI_${meet_id}_H0${channel2}_${spk}_${key}"; 
-    my $text = normalise_transcripts($value); 
+    my $segment = normalise_transcripts($value); 
    my @times = split(/\_/, $key);
-    if (length($text)>0) {
-       $transcripts{$seg_name}=$text;
-       print W join " ", $seg_name, $times[0]/100.0, $times[1]/100.0, $transcripts{$seg_name}, "\n";
+    if (length($segment)>0) {
+       print W join " ", $meet_id, "H0${channel2}", $spk, $times[0]/100.0, $times[1]/100.0, $segment, "\n";
    }
  }
+
 }
 close(S);
 close(W);

-
+print STDERR "Finished."
--- a/egs/ami/s5/local/ami_text_prep.sh
+++ b/egs/ami/s5/local/ami_text_prep.sh
@ -0,0 +1,34 @@
+#!/bin/bash
+
+if [ $# -ne 1 ]; then
+  echo "Usage: $0 <ami-dir>"
+  exit 1;
+fi
+
+amidir=$1
+wdir=data/local/annotations
+
+#extract text from AMI XML annotations
+local/ami_xml2text.sh $amidir
+
+[ ! -f $wdir/transcripts1 ] && echo "$0: File $wdir/transcripts1 not found." && exit 1;
+
+echo "Preprocessing transcripts..."
+local/ami_split_segments.pl $wdir/transcripts1 $wdir/transcripts2 &> $wdir/log/split_segments.log
+
+
+#HMM
+#MM HMM
+#MM UHM
+
+
+grep -f local/split_train.orig $wdir/transcripts2 > $wdir/train.txt
+grep -f local/split_dev.orig $wdir/transcripts2 > $wdir/dev.txt
+grep -f local/split_eval.orig $wdir/transcripts2 > $wdir/eval.txt
+
+
+
+
+
+
+
--- a/egs/ami/s5/local/ami_xml2text.sh
+++ b/egs/ami/s5/local/ami_xml2text.sh
@ -24,7 +24,7 @@ if [ ! -f $wdir/transcripts0 ]; then
  echo "Parsing XML files (can take several minutes)..."
  nxtlib=$wdir/nxt/lib
  java -cp $nxtlib/nxt.jar:$nxtlib/xmlParserAPIs.jar:$nxtlib/xalan.jar:$nxtlib \
-     FunctionQuery -c $adir/annotations/AMI-metadata.xml -q '($s segment)' -atts obs who \
+     FunctionQuery -c $adir/annotations/AMI-metadata.xml -q '($s segment)(exists $w1 w):$s^$w1' -atts obs who \
     '@extract(($sp speaker)($m meeting):$m@observation=$s@obs && $m^$sp & $s@who==$sp@nxt_agent,global_name, 0)'\
     '@extract(($sp speaker)($m meeting):$m@observation=$s@obs && $m^$sp & $s@who==$sp@nxt_agent, channel, 0)' \
     transcriber_start transcriber_end starttime endtime '$s' '@extract(($w w):$s^$w & $w@punc="true", starttime,0,0)' \
--- a/egs/ami/s5/local/beamformit.sh
+++ b/egs/ami/s5/local/beamformit.sh
@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Copyright 2014, University of Edibnurgh (Author: Pawel Swietojanski)
+
+. ./path.sh
+
+nj=$1
+job=$2
+numch=$3
+meetings=$4
+sdir=$5
+odir=$6
+wdir=data/local/beamforming
+
+utils/split_scp.pl -j $nj $job $meetings $meetings.$job
+
+while read line; do
+
+#                        --config_file=`pwd`/conf/beamformit.cfg \
+  BeamformIt -s $line -c $wdir/channels_$numch \
+                        --source_dir=$sdir \
+                        --result_dir=$odir/temp_dir \
+                        --do_compute_reference=1
+  mkdir -p $odir/$line
+  mv $odir/temp_dir/$line/${line}_seg.del  $odir/$line/${line}_MDM$numch.del
+  mv $odir/temp_dir/$line/${line}_seg.del2 $odir/$line/${line}_MDM$numch.del2
+  mv $odir/temp_dir/$line/${line}_seg.info $odir/$line/${line}_MDM$numch.info
+  mv $odir/temp_dir/$line/${line}_seg.ovl  $odir/$line/${line}_MDM$numch.ovl
+  mv $odir/temp_dir/$line/${line}_seg.weat $odir/$line/${line}_MDM$numch.weat
+  mv $odir/temp_dir/$line/${line}_seg.wa*  $odir/$line/${line}_MDM$numch.wav
+  mv $odir/temp_dir/$line/${line}_seg2.wa* $odir/$line/${line}_MDM${numch}_seg2.wav
+
+  rm -r $odir/temp_dir
+
+done < $meetings.$job
+
--- a/egs/ami/s5/path.sh
+++ b/egs/ami/s5/path.sh
@ -0,0 +1,32 @@
+
+export LC_ALL=C  # For expected sorting and joining behaviour
+
+KALDI_ROOT=/gpfs/scratch/s1136550/kaldi-code
+
+KALDISRC=$KALDI_ROOT/src
+KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin  
+KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin
+KALDIBIN=$KALDIBIN:$KALDISRC/sgmmbin:$KALDISRC/tiedbin
+
+FSTBIN=$KALDI_ROOT/tools/openfst/bin
+LMBIN=$KALDI_ROOT/tools/irstlm/bin
+BEAMFORMIT=$KALDI_ROOT/tools/BeamformIt-3.5
+
+[ -d $PWD/local ] || { echo "Error: 'local' subdirectory not found."; }
+[ -d $PWD/utils ] || { echo "Error: 'utils' subdirectory not found."; }
+[ -d $PWD/steps ] || { echo "Error: 'steps' subdirectory not found."; }
+
+export kaldi_local=$PWD/local
+export kaldi_utils=$PWD/utils
+export kaldi_steps=$PWD/steps
+SCRIPTS=$kaldi_local:$kaldi_utils:$kaldi_steps
+
+PATH=$PATH:$KALDIBIN:$FSTBIN:$LMBIN:$SCRIPTS:$BEAMFORMIT
+
+#CUDA_VER='cuda-5.0.35'
+
+#export PATH=$PATH:/opt/$CUDA_VER/bin
+#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/$CUDA_VER/lib64:/opt/$CUDA_VER/lib
+
+
+
--- a/egs/ami/s5/run_mdm.sh
+++ b/egs/ami/s5/run_mdm.sh
@ -4,14 +4,19 @@
 . ./path.sh

 #MDM - Multiple Distant Microphones
-mic=$1

-#AMI_DIR=/exports/work/inf_hcrc_cstr_nst/meetings/ami_corpus_wav/
-AMI_DIR=/exports/work/inf_hcrc_cstr_nst/pawel/ami/bmf_wavs/$mic
+nmics=8
+mic=mdm$nmics
+AMI_DIR=
+AMI_DIR=/gpfs/scratch/s1136550/ami/amicorpus

+local/ami_beamform.sh --nj 16 $nmics $AMI_DIR /disk/data1/ami
+
+exit 1;
 #PREPARE DATA STARTING FROM RT09 SEGMENTATIONS

-#local/ami_${mic}_data_prep_edin.sh $AMI_DIR data/local/ami_train_v1_x.segs $mic
+local/ami_text_prep.sh
+local/ami_mdm_data_prep.sh $AMI_DIR

 # We will keep the dict and lang the same as in IHM case
 # local/ami_prepare_dict.sh
--- a/tools/Makefile
+++ b/tools/Makefile
@ -162,3 +162,16 @@ fortran_opt = $(shell gcc -v 2>&1 | perl -e '$$x = join(" ", <STDIN>); if($$x =~
 openblas_compiled:
 	-git clone git://github.com/xianyi/OpenBLAS
 	$(MAKE) PREFIX=`pwd`/OpenBLAS/install FC=gfortran $(fortran_opt) DEBUG=1 USE_THREAD=0 -C OpenBLAS all install
+
+
+beamformit: beamformit-3.5
+
+.PHONY: beamformit-3.5
+
+beamformit-3.5: beamformit-3.5.tgz
+
+beamformit-3.5.tgz:
+    wget http://www.xavieranguera.com/beamformit/releases/BeamformIt-3.5.tgz
+
+
+