зеркало из https://github.com/mozilla/kaldi.git
- Updating the RM example scripts
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/karel@282 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
5b4ff9799b
Коммит
513e401182
|
@ -79,9 +79,9 @@ steps/decode_nnet_mono_pdf.sh &
|
|||
# now, we will train triphone GMM-HMM system to get context-dependent training labels
|
||||
# 500 pdfs
|
||||
time steps/train_tri1.sh
|
||||
(steps/decode_tri1.sh ; steps/decode_tri1_fmllr.sh; steps/decode_tri1_regtree_fmllr.sh ) &
|
||||
steps/decode_tri1.sh &
|
||||
time steps/train_tri2a.sh
|
||||
(steps/decode_tri2a.sh ; steps/decode_tri2a_fmllr.sh; steps/decode_tri2a_fmllr_utt.sh ) &
|
||||
steps/decode_tri2a.sh &
|
||||
|
||||
# train MLP with context-dependent pdf targets
|
||||
# 1-frame of MFCC_D_A_0, per-utternace CMN, global CVN,
|
||||
|
|
|
@ -18,12 +18,18 @@ parser.add_option('--gauss', dest='gauss', help='use gaussian noise for weights'
|
|||
parser.add_option('--negbias', dest='negbias', help='use uniform [-4.1,-3.9] for bias (defaultall 0.0)', action='store_true', default=False)
|
||||
parser.add_option('--inputscale', dest='inputscale', help='scale the weights by 3/sqrt(Ninputs)', action='store_true', default=False)
|
||||
parser.add_option('--linBNdim', dest='linBNdim', help='dim of linear bottleneck (sigmoids will be omitted, bias will be zero)',default=0)
|
||||
parser.add_option('--seed', dest='seedval', help='seed for random generator',default=0)
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if(options.dim == None):
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
#seeding
|
||||
seedval=int(options.seedval)
|
||||
if(seedval != 0):
|
||||
random.seed(seedval)
|
||||
|
||||
|
||||
dimStrL = options.dim.split(':')
|
||||
|
||||
|
|
|
@ -16,8 +16,14 @@
|
|||
# limitations under the License.
|
||||
|
||||
|
||||
#seeding is optional...
|
||||
if($#ARGV==0) {
|
||||
srand($ARGV[0]);
|
||||
}
|
||||
|
||||
|
||||
# This script shuffles lines of a list.
|
||||
# The list is read from stdin and written to stdout.
|
||||
@X = <>;
|
||||
@X = <STDIN>;
|
||||
@X = sort { rand() <=> rand() } @X;
|
||||
print @X;
|
||||
|
|
|
@ -42,5 +42,5 @@ wait
|
|||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
|
||||
> $dir/wer
|
||||
| tee $dir/wer
|
||||
|
||||
|
|
|
@ -57,5 +57,5 @@ wait
|
|||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
|
||||
> $dir/wer
|
||||
| tee $dir/wer
|
||||
|
||||
|
|
|
@ -55,5 +55,5 @@ wait
|
|||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
|
||||
> $dir/wer
|
||||
| tee $dir/wer
|
||||
|
||||
|
|
|
@ -65,5 +65,5 @@ wait
|
|||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
|
||||
> $dir/wer
|
||||
| tee $dir/wer
|
||||
|
||||
|
|
|
@ -66,5 +66,5 @@ wait
|
|||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
|
||||
> $dir/wer
|
||||
| tee $dir/wer
|
||||
|
||||
|
|
|
@ -69,5 +69,5 @@ wait
|
|||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
|
||||
> $dir/wer
|
||||
| tee $dir/wer
|
||||
|
||||
|
|
|
@ -68,5 +68,5 @@ wait
|
|||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", (100.0*n)/d, n, d); }' \
|
||||
> $dir/wer
|
||||
| tee $dir/wer
|
||||
|
||||
|
|
|
@ -42,4 +42,4 @@ wait
|
|||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
||||
| tee $dir/wer
|
||||
|
|
|
@ -1,64 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# deocde_tri_fmllr.sh is as decode_tri.sh but estimating fMLLR in test,
|
||||
# per speaker. There is no SAT.
|
||||
# To be run from ..
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
srcdir=exp/decode_tri1
|
||||
dir=exp/decode_tri1_fmllr
|
||||
mkdir -p $dir
|
||||
model=exp/tri1/final.mdl
|
||||
tree=exp/tri1/tree
|
||||
graphdir=exp/graph_tri1
|
||||
silphones=`cat data/silphones.csl`
|
||||
|
||||
mincount=500 # mincount before we estimate a transform.
|
||||
|
||||
scripts/mkgraph.sh $tree $model $graphdir
|
||||
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
(
|
||||
# Comment the two lines below to make this per-utterance.
|
||||
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
|
||||
|
||||
sifeats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
|
||||
|
||||
ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
|
||||
weight-silence-post 0.01 $silphones $model ark:- ark:- | \
|
||||
gmm-est-fmllr --fmllr-min-count=$mincount $spk2utt_opt $model \
|
||||
"$sifeats" ark,o:- ark:$dir/${test}.fmllr 2>$dir/fmllr_${test}.log
|
||||
|
||||
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.fmllr ark:- ark:- |"
|
||||
|
||||
gmm-decode-faster --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test}
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# deocde_tri_regtree_fmllr.sh is as ../decode_tri.sh but estimating fMLLR in test,
|
||||
# per speaker. There is no SAT. Use a regression-tree with top-level speech/sil
|
||||
# split (no silence weighting).
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
srcdir=exp/decode_tri1
|
||||
dir=exp/decode_tri1_regtree_fmllr
|
||||
mkdir -p $dir
|
||||
model=exp/tri1/final.mdl
|
||||
occs=exp/tri1/final.occs
|
||||
tree=exp/tri1/tree
|
||||
graphdir=exp/graph_tri1
|
||||
silphones=`cat data/silphones.csl`
|
||||
|
||||
regtree=$dir/regtree
|
||||
maxleaves=8 # max # of regression-tree leaves.
|
||||
mincount=5000 # mincount before we add new transform.
|
||||
gmm-make-regtree --sil-phones=$silphones --state-occs=$occs --max-leaves=$maxleaves $model $regtree 2>$dir/make_regtree.out
|
||||
|
||||
scripts/mkgraph.sh $tree $model $graphdir
|
||||
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
(
|
||||
# Comment the two lines below to make this per-utterance.
|
||||
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
|
||||
|
||||
# To deweight silence, would add the line
|
||||
# weight-silence-post 0.0 $silphones $model ark:- ark:- | \
|
||||
# after the line with ali-to-post
|
||||
# This is useful if we don't treat silence specially when building regression tree.
|
||||
|
||||
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
|
||||
ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
|
||||
gmm-est-regtree-fmllr --fmllr-min-count=$mincount $spk2utt_opt $model "$feats" ark:- $regtree ark:$dir/${test}.fmllr 2>$dir/fmllr_${test}.log
|
||||
|
||||
gmm-decode-faster-regtree-fmllr $utt2spk_opt --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst $regtree "$feats" ark:$dir/${test}.fmllr ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test}
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
||||
|
|
@ -42,4 +42,4 @@ wait
|
|||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
||||
| tee $dir/wer
|
||||
|
|
|
@ -1,66 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# deocde_tri_fmllr.sh is as decode_tri.sh but estimating fMLLR in test,
|
||||
# per speaker. There is no SAT.
|
||||
# To be run from ..
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
srcdir=exp/decode_tri2a
|
||||
dir=exp/decode_tri2a_fmllr
|
||||
mkdir -p $dir
|
||||
model=exp/tri2a/final.mdl
|
||||
tree=exp/tri2a/tree
|
||||
graphdir=exp/graph_tri2a
|
||||
silphones=`cat data/silphones.csl`
|
||||
|
||||
mincount=500 # mincount before we estimate a transform.
|
||||
|
||||
scripts/mkgraph.sh $tree $model $graphdir
|
||||
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
(
|
||||
# Comment the two lines below to make this per-utterance.
|
||||
# This would only work if $srcdir was also per-utterance [otherwise
|
||||
# you'd have to mess with the script a bit].
|
||||
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
|
||||
|
||||
sifeats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
|
||||
|
||||
ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
|
||||
weight-silence-post 0.01 $silphones $model ark:- ark:- | \
|
||||
gmm-est-fmllr --fmllr-min-count=$mincount $spk2utt_opt $model \
|
||||
"$sifeats" ark,o:- ark:$dir/${test}.fmllr 2>$dir/fmllr_${test}.log
|
||||
|
||||
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.fmllr ark:- ark:- |"
|
||||
|
||||
gmm-decode-faster --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test}
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# deocde_tri_fmllr.sh is as decode_tri.sh but estimating fMLLR in test,
|
||||
# per speaker. There is no SAT.
|
||||
# To be run from ..
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
srcdir=exp/decode_tri2a
|
||||
dir=exp/decode_tri2a_fmllr_utt
|
||||
mkdir -p $dir
|
||||
model=exp/tri2a/final.mdl
|
||||
tree=exp/tri2a/tree
|
||||
graphdir=exp/graph_tri2a
|
||||
silphones=`cat data/silphones.csl`
|
||||
|
||||
mincount=500 # mincount before we estimate a transform.
|
||||
|
||||
scripts/mkgraph.sh $tree $model $graphdir
|
||||
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
(
|
||||
# Comment the two lines below to make this per-utterance.
|
||||
# This would only work if $srcdir was also per-utterance [otherwise
|
||||
# you'd have to mess with the script a bit].
|
||||
#spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
|
||||
#utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
|
||||
|
||||
sifeats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
|
||||
|
||||
ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
|
||||
weight-silence-post 0.01 $silphones $model ark:- ark:- | \
|
||||
gmm-est-fmllr --fmllr-min-count=$mincount $spk2utt_opt $model \
|
||||
"$sifeats" ark,o:- ark:$dir/${test}.fmllr 2>$dir/fmllr_${test}.log
|
||||
|
||||
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.fmllr ark:- ark:- |"
|
||||
|
||||
gmm-decode-faster --beam=20.0 --acoustic-scale=0.08333 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test}
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
||||
|
|
@ -9,7 +9,7 @@ mkdir -p $dir/{log,nnet}
|
|||
|
||||
|
||||
###### SELECT FEATURES ######
|
||||
cat data/train.scp | scripts/shuffle_list.pl > $dir/train.scp
|
||||
cat data/train.scp | scripts/shuffle_list.pl ${seed:-666} > $dir/train.scp
|
||||
head -n 3591 $dir/train.scp > $dir/train.scp.tr
|
||||
tail -n 399 $dir/train.scp > $dir/train.scp.cv
|
||||
feats="ark:add-deltas --print-args=false scp:$dir/train.scp ark:- |"
|
||||
|
@ -50,7 +50,7 @@ feats_cv="$feats_cv apply-cmvn --print-args=false --norm-vars=true $cvn ark:- ar
|
|||
###### INITIALIZE THE NNET ######
|
||||
mlp_init=$dir/nnet.init
|
||||
num_tgt=$(grep NUMPDFS $dir_ali/final.mdl | awk '{ print $4 }')
|
||||
scripts/gen_mlp_init.py --dim=39:1024:${num_tgt} --gauss --negbias > $mlp_init
|
||||
scripts/gen_mlp_init.py --dim=39:1024:${num_tgt} --gauss --negbias --seed=666 > $mlp_init
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ mkdir -p $dir/{log,nnet}
|
|||
|
||||
|
||||
###### SELECT FEATURES ######
|
||||
cat data/train.scp | scripts/shuffle_list.pl > $dir/train.scp
|
||||
cat data/train.scp | scripts/shuffle_list.pl ${seed:-666} > $dir/train.scp
|
||||
head -n 3591 $dir/train.scp > $dir/train.scp.tr
|
||||
tail -n 399 $dir/train.scp > $dir/train.scp.cv
|
||||
feats="ark:add-deltas --print-args=false scp:$dir/train.scp ark:- |"
|
||||
|
@ -54,7 +54,7 @@ feats_cv="$feats_cv splice-feats --print-args=false --left-context=5 --right-con
|
|||
###### INITIALIZE THE NNET ######
|
||||
mlp_init=$dir/nnet.init
|
||||
num_tgt=$(grep NUMPDFS $dir_ali/final.mdl | awk '{ print $4 }')
|
||||
scripts/gen_mlp_init.py --dim=429:568:${num_tgt} --gauss --negbias > $mlp_init
|
||||
scripts/gen_mlp_init.py --dim=429:568:${num_tgt} --gauss --negbias --seed=666 > $mlp_init
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ mkdir -p $dir/{log,nnet}
|
|||
|
||||
|
||||
###### SELECT FEATURES ######
|
||||
cat data/train.scp | scripts/shuffle_list.pl > $dir/train.scp
|
||||
cat data/train.scp | scripts/shuffle_list.pl ${seed:-666} > $dir/train.scp
|
||||
head -n 3591 $dir/train.scp > $dir/train.scp.tr
|
||||
tail -n 399 $dir/train.scp > $dir/train.scp.cv
|
||||
feats="ark:add-deltas --print-args=false scp:$dir/train.scp ark:- |"
|
||||
|
@ -46,7 +46,7 @@ feats_cv="$feats_cv splice-feats --print-args=false --left-context=5 --right-con
|
|||
###### INITIALIZE THE NNET ######
|
||||
mlp_init=$dir/nnet.init
|
||||
num_tgt=$(grep NUMPDFS $dir_ali/final.mdl | awk '{ print $4 }')
|
||||
scripts/gen_mlp_init.py --dim=429:577:${num_tgt} --gauss --negbias > $mlp_init
|
||||
scripts/gen_mlp_init.py --dim=429:577:${num_tgt} --gauss --negbias --seed=666 > $mlp_init
|
||||
|
||||
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче