зеркало из https://github.com/mozilla/kaldi.git
CSJ recipe: minor bug fix, and update of the results by tuning parameters.
This commit is contained in:
Родитель
662b568d12
Коммит
8d56aede32
|
@ -1,16 +1,27 @@
|
|||
About the Corpus of Spontaneous Japanese:
|
||||
The Corpus of Spontaneous Japanese (CSJ) is a database of spoken
|
||||
Japanese developed by the Japan's national priority area research
|
||||
Japanese developed by the Japan's national priority area research
|
||||
project "Spontaneous Speech: Corpus and Processing Technology".
|
||||
It contains about 650 hours of speech consisting of approximately
|
||||
It contains about 650 hours of speech consisting of approximately
|
||||
7.5 million words that were provided by more than 1,400 speakers.
|
||||
For more details about the corpus, please visit the website of the
|
||||
For more details about the corpus, please visit the website of the
|
||||
National Institute for Japanese Language (NINJAL). It is available
|
||||
from the Institute.
|
||||
http://www.ninjal.ac.jp/english/products/csj/
|
||||
http://pj.ninjal.ac.jp/corpus_center/csj/
|
||||
|
||||
Meta-parameter tuning based on evolution strategy:
|
||||
The meta-parameters of the system contained in conf/config_opt were
|
||||
automatically tuned using evolution strategy. For the details,
|
||||
please refer the following paper:
|
||||
Takafumi Moriya, Tomohiro Tanaka, Takahiro Shinozaki, Shinji Watanabe,
|
||||
and Kevin Duh, "Automation of System Building for State-of-the-art
|
||||
Large Vocabulary Speech Recognition Using Evolution Strategy," Proc.
|
||||
IEEE 2015 Automatic Speech Recognition and Understanding Workshop
|
||||
(ASRU), 2015.
|
||||
|
||||
|
||||
Each subdirectory of this directory contains the
|
||||
scripts for a sequence of experiments.
|
||||
scripts for a sequence of experiments.
|
||||
s5: This is the current recommended recipe.
|
||||
The third edition of CSJ is assumed.
|
||||
The recipe supports the third and fourth editions of CSJ.
|
||||
|
|
|
@ -1,117 +1,118 @@
|
|||
## These are results using the third edition of CSJ.
|
||||
for eval_num in `seq 3`; do echo "=== evaluation set $eval_num ===" ;\
|
||||
for x in exp/{tri,dnn}*/decode_eval${eval_num}*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done ; done
|
||||
|
||||
## Results of using training set that contains "academic" and "other" speech data (default).
|
||||
## If you want to use "trial lecture" and "dialog" data, you should check the following script [local/csj_data_prep.sh line 44].
|
||||
## If you want to use "simulated public speaking" and "dialog" data, you should check the following script [local/csj_data_prep.sh line 44].
|
||||
|
||||
=== evaluation set 1 ===
|
||||
%WER 22.67 [ 6269 / 27651, 522 ins, 1903 del, 3844 sub ] exp/tri1/decode_eval1_csj/wer_12
|
||||
%WER 21.49 [ 5943 / 27651, 541 ins, 1745 del, 3657 sub ] exp/tri2/decode_eval1_csj/wer_12
|
||||
%WER 17.49 [ 4837 / 27651, 613 ins, 1269 del, 2955 sub ] exp/tri3/decode_eval1_csj/wer_16
|
||||
%WER 15.26 [ 4220 / 27651, 566 ins, 1071 del, 2583 sub ] exp/tri4/decode_eval1_csj/wer_17
|
||||
%WER 17.33 [ 4792 / 27651, 628 ins, 1137 del, 3027 sub ] exp/tri4/decode_eval1_csj.si/wer_16
|
||||
%WER 14.59 [ 4033 / 27651, 617 ins, 919 del, 2497 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it4_csj/wer_14
|
||||
%WER 14.14 [ 3911 / 27651, 585 ins, 915 del, 2411 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it5_csj/wer_17
|
||||
%WER 14.00 [ 3871 / 27651, 586 ins, 888 del, 2397 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it6_csj/wer_15
|
||||
%WER 13.92 [ 3850 / 27651, 661 ins, 793 del, 2396 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it7_csj/wer_14
|
||||
%WER 14.15 [ 3913 / 27651, 640 ins, 877 del, 2396 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it8_csj/wer_14
|
||||
%WER 14.39 [ 3979 / 27651, 570 ins, 946 del, 2463 sub ] exp/tri4_mmi_b0.1/decode_eval1_1.mdl_csj/wer_17
|
||||
%WER 14.09 [ 3895 / 27651, 576 ins, 882 del, 2437 sub ] exp/tri4_mmi_b0.1/decode_eval1_2.mdl_csj/wer_15
|
||||
%WER 14.02 [ 3877 / 27651, 602 ins, 858 del, 2417 sub ] exp/tri4_mmi_b0.1/decode_eval1_3.mdl_csj/wer_15
|
||||
%WER 14.00 [ 3870 / 27651, 609 ins, 853 del, 2408 sub ] exp/tri4_mmi_b0.1/decode_eval1_4.mdl_csj/wer_15
|
||||
%WER 11.93 [ 3298 / 27651, 348 ins, 970 del, 1980 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval1_csj/wer_12
|
||||
%WER 11.29 [ 3123 / 27651, 509 ins, 651 del, 1963 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval1_csj/wer_15
|
||||
%WER 10.87 [ 3007 / 27651, 497 ins, 589 del, 1921 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval1_csj/wer_14
|
||||
%WER 22.67 [ 6269 / 27651, 522 ins, 1903 del, 3844 sub ] exp/tri1/decode_eval1_csj/wer_12_0.0
|
||||
%WER 21.49 [ 5943 / 27651, 541 ins, 1745 del, 3657 sub ] exp/tri2/decode_eval1_csj/wer_12_0.0
|
||||
%WER 17.49 [ 4837 / 27651, 613 ins, 1269 del, 2955 sub ] exp/tri3/decode_eval1_csj/wer_16_0.0
|
||||
%WER 15.26 [ 4220 / 27651, 566 ins, 1071 del, 2583 sub ] exp/tri4/decode_eval1_csj/wer_17_0.0
|
||||
%WER 17.33 [ 4792 / 27651, 628 ins, 1137 del, 3027 sub ] exp/tri4/decode_eval1_csj.si/wer_16_0.0
|
||||
%WER 14.59 [ 4033 / 27651, 617 ins, 919 del, 2497 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it4_csj/wer_14_0.0
|
||||
%WER 14.14 [ 3911 / 27651, 585 ins, 915 del, 2411 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it5_csj/wer_17_0.0
|
||||
%WER 14.00 [ 3871 / 27651, 586 ins, 888 del, 2397 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it6_csj/wer_15_0.5
|
||||
%WER 13.92 [ 3850 / 27651, 661 ins, 793 del, 2396 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it7_csj/wer_14_0.5
|
||||
%WER 14.15 [ 3913 / 27651, 640 ins, 877 del, 2396 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it8_csj/wer_14_0.5
|
||||
%WER 14.39 [ 3979 / 27651, 570 ins, 946 del, 2463 sub ] exp/tri4_mmi_b0.1/decode_eval1_1.mdl_csj/wer_17_0.5
|
||||
%WER 14.09 [ 3895 / 27651, 576 ins, 882 del, 2437 sub ] exp/tri4_mmi_b0.1/decode_eval1_2.mdl_csj/wer_15_0.0
|
||||
%WER 14.02 [ 3877 / 27651, 602 ins, 858 del, 2417 sub ] exp/tri4_mmi_b0.1/decode_eval1_3.mdl_csj/wer_15_0.5
|
||||
%WER 14.00 [ 3870 / 27651, 609 ins, 853 del, 2408 sub ] exp/tri4_mmi_b0.1/decode_eval1_4.mdl_csj/wer_15_0.5
|
||||
%WER 11.93 [ 3298 / 27651, 348 ins, 970 del, 1980 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval1_csj/wer_12_0.0
|
||||
%WER 11.29 [ 3123 / 27651, 509 ins, 651 del, 1963 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval1_csj/wer_15_1.0
|
||||
%WER 10.87 [ 3007 / 27651, 497 ins, 589 del, 1921 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval1_csj/wer_14_0.5
|
||||
=== evaluation set 2 ===
|
||||
%WER 19.80 [ 5628 / 28424, 561 ins, 1511 del, 3556 sub ] exp/tri1/decode_eval2_csj/wer_12
|
||||
%WER 19.04 [ 5413 / 28424, 600 ins, 1423 del, 3390 sub ] exp/tri2/decode_eval2_csj/wer_12
|
||||
%WER 15.80 [ 4490 / 28424, 582 ins, 1131 del, 2777 sub ] exp/tri3/decode_eval2_csj/wer_16
|
||||
%WER 13.95 [ 3964 / 28424, 691 ins, 843 del, 2430 sub ] exp/tri4/decode_eval2_csj/wer_13
|
||||
%WER 18.74 [ 5326 / 28424, 804 ins, 1056 del, 3466 sub ] exp/tri4/decode_eval2_csj.si/wer_17
|
||||
%WER 12.77 [ 3631 / 28424, 604 ins, 781 del, 2246 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it4_csj/wer_14
|
||||
%WER 12.27 [ 3488 / 28424, 604 ins, 707 del, 2177 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it5_csj/wer_14
|
||||
%WER 12.32 [ 3502 / 28424, 613 ins, 713 del, 2176 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it6_csj/wer_13
|
||||
%WER 12.32 [ 3502 / 28424, 658 ins, 688 del, 2156 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it7_csj/wer_14
|
||||
%WER 12.56 [ 3569 / 28424, 642 ins, 760 del, 2167 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it8_csj/wer_13
|
||||
%WER 12.51 [ 3557 / 28424, 588 ins, 766 del, 2203 sub ] exp/tri4_mmi_b0.1/decode_eval2_1.mdl_csj/wer_15
|
||||
%WER 12.25 [ 3482 / 28424, 587 ins, 730 del, 2165 sub ] exp/tri4_mmi_b0.1/decode_eval2_2.mdl_csj/wer_14
|
||||
%WER 12.20 [ 3467 / 28424, 599 ins, 706 del, 2162 sub ] exp/tri4_mmi_b0.1/decode_eval2_3.mdl_csj/wer_14
|
||||
%WER 12.33 [ 3504 / 28424, 615 ins, 714 del, 2175 sub ] exp/tri4_mmi_b0.1/decode_eval2_4.mdl_csj/wer_14
|
||||
%WER 10.24 [ 2910 / 28424, 271 ins, 852 del, 1787 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval2_csj/wer_12
|
||||
%WER 9.41 [ 2676 / 28424, 453 ins, 432 del, 1791 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval2_csj/wer_14
|
||||
%WER 9.19 [ 2612 / 28424, 417 ins, 422 del, 1773 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval2_csj/wer_14
|
||||
%WER 19.80 [ 5628 / 28424, 561 ins, 1511 del, 3556 sub ] exp/tri1/decode_eval2_csj/wer_12_0.0
|
||||
%WER 19.04 [ 5413 / 28424, 600 ins, 1423 del, 3390 sub ] exp/tri2/decode_eval2_csj/wer_12_0.0
|
||||
%WER 15.80 [ 4490 / 28424, 582 ins, 1131 del, 2777 sub ] exp/tri3/decode_eval2_csj/wer_16_0.5
|
||||
%WER 13.95 [ 3964 / 28424, 691 ins, 843 del, 2430 sub ] exp/tri4/decode_eval2_csj/wer_13_0.0
|
||||
%WER 18.74 [ 5326 / 28424, 804 ins, 1056 del, 3466 sub ] exp/tri4/decode_eval2_csj.si/wer_17_0.0
|
||||
%WER 12.77 [ 3631 / 28424, 604 ins, 781 del, 2246 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it4_csj/wer_14_0.5
|
||||
%WER 12.27 [ 3488 / 28424, 604 ins, 707 del, 2177 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it5_csj/wer_14_0.5
|
||||
%WER 12.32 [ 3502 / 28424, 613 ins, 713 del, 2176 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it6_csj/wer_13_0.5
|
||||
%WER 12.32 [ 3502 / 28424, 658 ins, 688 del, 2156 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it7_csj/wer_14_1.0
|
||||
%WER 12.56 [ 3569 / 28424, 642 ins, 760 del, 2167 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it8_csj/wer_13_1.0
|
||||
%WER 12.51 [ 3557 / 28424, 588 ins, 766 del, 2203 sub ] exp/tri4_mmi_b0.1/decode_eval2_1.mdl_csj/wer_15_0.5
|
||||
%WER 12.25 [ 3482 / 28424, 587 ins, 730 del, 2165 sub ] exp/tri4_mmi_b0.1/decode_eval2_2.mdl_csj/wer_14_0.5
|
||||
%WER 12.20 [ 3467 / 28424, 599 ins, 706 del, 2162 sub ] exp/tri4_mmi_b0.1/decode_eval2_3.mdl_csj/wer_14_0.5
|
||||
%WER 12.33 [ 3504 / 28424, 615 ins, 714 del, 2175 sub ] exp/tri4_mmi_b0.1/decode_eval2_4.mdl_csj/wer_14_0.5
|
||||
%WER 10.24 [ 2910 / 28424, 271 ins, 852 del, 1787 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval2_csj/wer_12_0.0
|
||||
%WER 9.41 [ 2676 / 28424, 453 ins, 432 del, 1791 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval2_csj/wer_14_1.0
|
||||
%WER 9.19 [ 2612 / 28424, 417 ins, 422 del, 1773 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval2_csj/wer_14_0.5
|
||||
=== evaluation set 3 ===
|
||||
%WER 24.80 [ 4534 / 18283, 447 ins, 1350 del, 2737 sub ] exp/tri1/decode_eval3_csj/wer_15
|
||||
%WER 23.68 [ 4329 / 18283, 497 ins, 1183 del, 2649 sub ] exp/tri2/decode_eval3_csj/wer_13
|
||||
%WER 19.97 [ 3651 / 18283, 582 ins, 828 del, 2241 sub ] exp/tri3/decode_eval3_csj/wer_17
|
||||
%WER 17.27 [ 3158 / 18283, 520 ins, 752 del, 1886 sub ] exp/tri4/decode_eval3_csj/wer_19
|
||||
%WER 21.44 [ 3919 / 18283, 660 ins, 823 del, 2436 sub ] exp/tri4/decode_eval3_csj.si/wer_20
|
||||
%WER 16.56 [ 3028 / 18283, 476 ins, 716 del, 1836 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it4_csj/wer_20
|
||||
%WER 15.79 [ 2887 / 18283, 547 ins, 554 del, 1786 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it5_csj/wer_15
|
||||
%WER 15.89 [ 2906 / 18283, 519 ins, 597 del, 1790 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it6_csj/wer_15
|
||||
%WER 15.64 [ 2860 / 18283, 556 ins, 512 del, 1792 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it7_csj/wer_15
|
||||
%WER 16.38 [ 2994 / 18283, 529 ins, 655 del, 1810 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it8_csj/wer_15
|
||||
%WER 16.13 [ 2949 / 18283, 505 ins, 630 del, 1814 sub ] exp/tri4_mmi_b0.1/decode_eval3_1.mdl_csj/wer_18
|
||||
%WER 15.97 [ 2920 / 18283, 540 ins, 556 del, 1824 sub ] exp/tri4_mmi_b0.1/decode_eval3_2.mdl_csj/wer_14
|
||||
%WER 15.98 [ 2922 / 18283, 564 ins, 537 del, 1821 sub ] exp/tri4_mmi_b0.1/decode_eval3_3.mdl_csj/wer_14
|
||||
%WER 15.98 [ 2921 / 18283, 548 ins, 566 del, 1807 sub ] exp/tri4_mmi_b0.1/decode_eval3_4.mdl_csj/wer_15
|
||||
%WER 13.94 [ 2548 / 18283, 313 ins, 716 del, 1519 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval3_csj/wer_13
|
||||
%WER 12.52 [ 2289 / 18283, 464 ins, 354 del, 1471 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval3_csj/wer_15
|
||||
%WER 12.18 [ 2226 / 18283, 431 ins, 340 del, 1455 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval3_csj/wer_15
|
||||
%WER 24.80 [ 4534 / 18283, 447 ins, 1350 del, 2737 sub ] exp/tri1/decode_eval3_csj/wer_15_0.0
|
||||
%WER 23.68 [ 4329 / 18283, 497 ins, 1183 del, 2649 sub ] exp/tri2/decode_eval3_csj/wer_13_0.0
|
||||
%WER 19.97 [ 3651 / 18283, 582 ins, 828 del, 2241 sub ] exp/tri3/decode_eval3_csj/wer_17_0.5
|
||||
%WER 17.27 [ 3158 / 18283, 520 ins, 752 del, 1886 sub ] exp/tri4/decode_eval3_csj/wer_19_0.0
|
||||
%WER 21.44 [ 3919 / 18283, 660 ins, 823 del, 2436 sub ] exp/tri4/decode_eval3_csj.si/wer_20_1.0
|
||||
%WER 16.56 [ 3028 / 18283, 476 ins, 716 del, 1836 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it4_csj/wer_20_0.0
|
||||
%WER 15.79 [ 2887 / 18283, 547 ins, 554 del, 1786 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it5_csj/wer_15_0.5
|
||||
%WER 15.89 [ 2906 / 18283, 519 ins, 597 del, 1790 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it6_csj/wer_15_0.5
|
||||
%WER 15.64 [ 2860 / 18283, 556 ins, 512 del, 1792 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it7_csj/wer_15_1.0
|
||||
%WER 16.38 [ 2994 / 18283, 529 ins, 655 del, 1810 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it8_csj/wer_15_0.5
|
||||
%WER 16.13 [ 2949 / 18283, 505 ins, 630 del, 1814 sub ] exp/tri4_mmi_b0.1/decode_eval3_1.mdl_csj/wer_18_0.0
|
||||
%WER 15.97 [ 2920 / 18283, 540 ins, 556 del, 1824 sub ] exp/tri4_mmi_b0.1/decode_eval3_2.mdl_csj/wer_14_0.5
|
||||
%WER 15.98 [ 2922 / 18283, 564 ins, 537 del, 1821 sub ] exp/tri4_mmi_b0.1/decode_eval3_3.mdl_csj/wer_14_0.0
|
||||
%WER 15.98 [ 2921 / 18283, 548 ins, 566 del, 1807 sub ] exp/tri4_mmi_b0.1/decode_eval3_4.mdl_csj/wer_15_1.0
|
||||
%WER 13.94 [ 2548 / 18283, 313 ins, 716 del, 1519 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval3_csj/wer_13_0.0
|
||||
%WER 12.52 [ 2289 / 18283, 464 ins, 354 del, 1471 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval3_csj/wer_15_0.0
|
||||
%WER 12.18 [ 2226 / 18283, 431 ins, 340 del, 1455 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval3_csj/wer_15_0.5
|
||||
|
||||
## Results of using training data that contain all types of speech data.
|
||||
## Results of using training data that contain all types of speech data except for dialog type.
|
||||
|
||||
=== evaluation set 1 ===
|
||||
%WER 22.71 [ 6279 / 27651, 524 ins, 1936 del, 3819 sub ] exp/tri1/decode_eval1_csj/wer_13
|
||||
%WER 21.36 [ 5905 / 27651, 529 ins, 1781 del, 3595 sub ] exp/tri2/decode_eval1_csj/wer_13
|
||||
%WER 17.89 [ 4948 / 27651, 586 ins, 1314 del, 3048 sub ] exp/tri3/decode_eval1_csj/wer_16
|
||||
%WER 15.85 [ 4383 / 27651, 580 ins, 1169 del, 2634 sub ] exp/tri4/decode_eval1_csj/wer_17
|
||||
%WER 18.06 [ 4995 / 27651, 671 ins, 1209 del, 3115 sub ] exp/tri4/decode_eval1_csj.si/wer_15
|
||||
%WER 15.17 [ 4196 / 27651, 536 ins, 1105 del, 2555 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it4_csj/wer_17
|
||||
%WER 14.32 [ 3959 / 27651, 578 ins, 949 del, 2432 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it5_csj/wer_15
|
||||
%WER 14.20 [ 3926 / 27651, 598 ins, 885 del, 2443 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it6_csj/wer_13
|
||||
%WER 13.93 [ 3851 / 27651, 631 ins, 829 del, 2391 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it7_csj/wer_14
|
||||
%WER 14.09 [ 3895 / 27651, 621 ins, 847 del, 2427 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it8_csj/wer_12
|
||||
%WER 14.69 [ 4061 / 27651, 587 ins, 981 del, 2493 sub ] exp/tri4_mmi_b0.1/decode_eval1_1.mdl_csj/wer_15
|
||||
%WER 14.48 [ 4003 / 27651, 549 ins, 1001 del, 2453 sub ] exp/tri4_mmi_b0.1/decode_eval1_2.mdl_csj/wer_16
|
||||
%WER 14.33 [ 3963 / 27651, 611 ins, 901 del, 2451 sub ] exp/tri4_mmi_b0.1/decode_eval1_3.mdl_csj/wer_14
|
||||
%WER 14.12 [ 3905 / 27651, 610 ins, 870 del, 2425 sub ] exp/tri4_mmi_b0.1/decode_eval1_4.mdl_csj/wer_14
|
||||
%WER 11.62 [ 3214 / 27651, 381 ins, 799 del, 2034 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval1_csj/wer_12
|
||||
%WER 10.93 [ 3021 / 27651, 475 ins, 566 del, 1980 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval1_csj/wer_14
|
||||
%WER 10.71 [ 2962 / 27651, 516 ins, 496 del, 1950 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval1_csj/wer_13
|
||||
%WER 22.97 [ 6352 / 27651, 514 ins, 1941 del, 3897 sub ] exp/tri1/decode_eval1_csj/wer_13_0.0
|
||||
%WER 21.48 [ 5939 / 27651, 482 ins, 1885 del, 3572 sub ] exp/tri2/decode_eval1_csj/wer_14_0.0
|
||||
%WER 17.86 [ 4939 / 27651, 596 ins, 1305 del, 3038 sub ] exp/tri3/decode_eval1_csj/wer_15_0.0
|
||||
%WER 15.67 [ 4333 / 27651, 584 ins, 1121 del, 2628 sub ] exp/tri4/decode_eval1_csj/wer_16_0.0
|
||||
%WER 17.88 [ 4943 / 27651, 623 ins, 1226 del, 3094 sub ] exp/tri4/decode_eval1_csj.si/wer_16_0.0
|
||||
%WER 15.01 [ 4150 / 27651, 580 ins, 1009 del, 2561 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it4_csj/wer_15_0.0
|
||||
%WER 14.28 [ 3949 / 27651, 578 ins, 929 del, 2442 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it5_csj/wer_15_0.0
|
||||
%WER 14.17 [ 3917 / 27651, 542 ins, 966 del, 2409 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it6_csj/wer_15_0.0
|
||||
%WER 14.00 [ 3871 / 27651, 442 ins, 1085 del, 2344 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it7_csj/wer_12_1.0
|
||||
%WER 14.08 [ 3893 / 27651, 426 ins, 1087 del, 2380 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it8_csj/wer_11_1.0
|
||||
%WER 14.60 [ 4036 / 27651, 458 ins, 1115 del, 2463 sub ] exp/tri4_mmi_b0.1/decode_eval1_1.mdl_csj/wer_15_0.5
|
||||
%WER 14.42 [ 3986 / 27651, 459 ins, 1081 del, 2446 sub ] exp/tri4_mmi_b0.1/decode_eval1_2.mdl_csj/wer_14_0.5
|
||||
%WER 14.22 [ 3931 / 27651, 492 ins, 1022 del, 2417 sub ] exp/tri4_mmi_b0.1/decode_eval1_3.mdl_csj/wer_13_0.5
|
||||
%WER 13.99 [ 3869 / 27651, 504 ins, 949 del, 2416 sub ] exp/tri4_mmi_b0.1/decode_eval1_4.mdl_csj/wer_12_0.5
|
||||
%WER 11.63 [ 3215 / 27651, 384 ins, 804 del, 2027 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval1_csj/wer_12_0.0
|
||||
%WER 10.56 [ 2921 / 27651, 366 ins, 662 del, 1893 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval1_csj/wer_13_1.0
|
||||
%WER 10.34 [ 2859 / 27651, 363 ins, 660 del, 1836 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval1_csj/wer_14_1.0
|
||||
=== evaluation set 2 ===
|
||||
%WER 19.61 [ 5575 / 28424, 577 ins, 1442 del, 3556 sub ] exp/tri1/decode_eval2_csj/wer_12
|
||||
%WER 18.47 [ 5250 / 28424, 572 ins, 1361 del, 3317 sub ] exp/tri2/decode_eval2_csj/wer_12
|
||||
%WER 15.71 [ 4464 / 28424, 577 ins, 1128 del, 2759 sub ] exp/tri3/decode_eval2_csj/wer_15
|
||||
%WER 13.24 [ 3764 / 28424, 535 ins, 921 del, 2308 sub ] exp/tri4/decode_eval2_csj/wer_16
|
||||
%WER 17.90 [ 5088 / 28424, 743 ins, 1057 del, 3288 sub ] exp/tri4/decode_eval2_csj.si/wer_16
|
||||
%WER 12.56 [ 3571 / 28424, 595 ins, 767 del, 2209 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it4_csj/wer_13
|
||||
%WER 11.79 [ 3350 / 28424, 584 ins, 669 del, 2097 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it5_csj/wer_13
|
||||
%WER 11.86 [ 3372 / 28424, 619 ins, 643 del, 2110 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it6_csj/wer_11
|
||||
%WER 11.79 [ 3352 / 28424, 603 ins, 659 del, 2090 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it7_csj/wer_13
|
||||
%WER 12.08 [ 3434 / 28424, 602 ins, 701 del, 2131 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it8_csj/wer_11
|
||||
%WER 12.13 [ 3447 / 28424, 561 ins, 735 del, 2151 sub ] exp/tri4_mmi_b0.1/decode_eval2_1.mdl_csj/wer_14
|
||||
%WER 11.88 [ 3376 / 28424, 575 ins, 676 del, 2125 sub ] exp/tri4_mmi_b0.1/decode_eval2_2.mdl_csj/wer_12
|
||||
%WER 11.77 [ 3345 / 28424, 588 ins, 646 del, 2111 sub ] exp/tri4_mmi_b0.1/decode_eval2_3.mdl_csj/wer_12
|
||||
%WER 11.73 [ 3333 / 28424, 586 ins, 658 del, 2089 sub ] exp/tri4_mmi_b0.1/decode_eval2_4.mdl_csj/wer_12
|
||||
%WER 9.36 [ 2660 / 28424, 357 ins, 561 del, 1742 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval2_csj/wer_10
|
||||
%WER 9.07 [ 2579 / 28424, 467 ins, 404 del, 1708 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval2_csj/wer_13
|
||||
%WER 8.91 [ 2533 / 28424, 439 ins, 399 del, 1695 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval2_csj/wer_15
|
||||
%WER 19.56 [ 5560 / 28424, 560 ins, 1527 del, 3473 sub ] exp/tri1/decode_eval2_csj/wer_12_0.0
|
||||
%WER 18.62 [ 5293 / 28424, 610 ins, 1361 del, 3322 sub ] exp/tri2/decode_eval2_csj/wer_12_0.0
|
||||
%WER 15.58 [ 4429 / 28424, 626 ins, 1026 del, 2777 sub ] exp/tri3/decode_eval2_csj/wer_13_0.0
|
||||
%WER 13.37 [ 3801 / 28424, 643 ins, 844 del, 2314 sub ] exp/tri4/decode_eval2_csj/wer_14_0.0
|
||||
%WER 18.03 [ 5126 / 28424, 665 ins, 1178 del, 3283 sub ] exp/tri4/decode_eval2_csj.si/wer_15_0.5
|
||||
%WER 12.36 [ 3514 / 28424, 475 ins, 880 del, 2159 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it4_csj/wer_13_0.5
|
||||
%WER 11.54 [ 3279 / 28424, 448 ins, 792 del, 2039 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it5_csj/wer_13_0.5
|
||||
%WER 11.47 [ 3260 / 28424, 497 ins, 740 del, 2023 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it6_csj/wer_11_0.5
|
||||
%WER 11.34 [ 3223 / 28424, 476 ins, 713 del, 2034 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it7_csj/wer_10_1.0
|
||||
%WER 11.60 [ 3298 / 28424, 523 ins, 716 del, 2059 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it8_csj/wer_10_0.5
|
||||
%WER 11.86 [ 3372 / 28424, 555 ins, 723 del, 2094 sub ] exp/tri4_mmi_b0.1/decode_eval2_1.mdl_csj/wer_14_0.0
|
||||
%WER 11.57 [ 3289 / 28424, 446 ins, 814 del, 2029 sub ] exp/tri4_mmi_b0.1/decode_eval2_2.mdl_csj/wer_13_0.5
|
||||
%WER 11.46 [ 3256 / 28424, 510 ins, 684 del, 2062 sub ] exp/tri4_mmi_b0.1/decode_eval2_3.mdl_csj/wer_11_0.5
|
||||
%WER 11.58 [ 3292 / 28424, 408 ins, 827 del, 2057 sub ] exp/tri4_mmi_b0.1/decode_eval2_4.mdl_csj/wer_11_1.0
|
||||
%WER 9.15 [ 2601 / 28424, 305 ins, 604 del, 1692 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval2_csj/wer_12_0.0
|
||||
%WER 8.69 [ 2469 / 28424, 367 ins, 444 del, 1658 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval2_csj/wer_12_1.0
|
||||
%WER 8.62 [ 2450 / 28424, 349 ins, 444 del, 1657 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval2_csj/wer_13_1.0
|
||||
=== evaluation set 3 ===
|
||||
%WER 25.01 [ 4573 / 18283, 529 ins, 1219 del, 2825 sub ] exp/tri1/decode_eval3_csj/wer_13
|
||||
%WER 23.62 [ 4319 / 18283, 499 ins, 1176 del, 2644 sub ] exp/tri2/decode_eval3_csj/wer_14
|
||||
%WER 18.04 [ 3298 / 18283, 528 ins, 739 del, 2031 sub ] exp/tri3/decode_eval3_csj/wer_12
|
||||
%WER 15.63 [ 2858 / 18283, 411 ins, 719 del, 1728 sub ] exp/tri4/decode_eval3_csj/wer_15
|
||||
%WER 19.36 [ 3540 / 18283, 506 ins, 836 del, 2198 sub ] exp/tri4/decode_eval3_csj.si/wer_17
|
||||
%WER 14.90 [ 2724 / 18283, 456 ins, 602 del, 1666 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it4_csj/wer_13
|
||||
%WER 13.70 [ 2504 / 18283, 456 ins, 477 del, 1571 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it5_csj/wer_13
|
||||
%WER 13.78 [ 2520 / 18283, 460 ins, 548 del, 1512 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it6_csj/wer_12
|
||||
%WER 13.08 [ 2391 / 18283, 517 ins, 400 del, 1474 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it7_csj/wer_12
|
||||
%WER 13.75 [ 2514 / 18283, 469 ins, 562 del, 1483 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it8_csj/wer_12
|
||||
%WER 14.14 [ 2585 / 18283, 436 ins, 537 del, 1612 sub ] exp/tri4_mmi_b0.1/decode_eval3_1.mdl_csj/wer_14
|
||||
%WER 13.83 [ 2529 / 18283, 429 ins, 547 del, 1553 sub ] exp/tri4_mmi_b0.1/decode_eval3_2.mdl_csj/wer_14
|
||||
%WER 13.54 [ 2475 / 18283, 460 ins, 492 del, 1523 sub ] exp/tri4_mmi_b0.1/decode_eval3_3.mdl_csj/wer_13
|
||||
%WER 13.36 [ 2443 / 18283, 463 ins, 482 del, 1498 sub ] exp/tri4_mmi_b0.1/decode_eval3_4.mdl_csj/wer_13
|
||||
%WER 10.55 [ 1928 / 18283, 242 ins, 482 del, 1204 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval3_csj/wer_13
|
||||
%WER 9.71 [ 1775 / 18283, 338 ins, 271 del, 1166 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval3_csj/wer_13
|
||||
%WER 9.31 [ 1703 / 18283, 336 ins, 247 del, 1120 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval3_csj/wer_13
|
||||
%WER 25.00 [ 4570 / 18283, 515 ins, 1277 del, 2778 sub ] exp/tri1/decode_eval3_csj/wer_14_0.0
|
||||
%WER 23.93 [ 4375 / 18283, 560 ins, 1163 del, 2652 sub ] exp/tri2/decode_eval3_csj/wer_14_0.0
|
||||
%WER 17.66 [ 3229 / 18283, 484 ins, 773 del, 1972 sub ] exp/tri3/decode_eval3_csj/wer_14_0.0
|
||||
%WER 15.46 [ 2827 / 18283, 311 ins, 860 del, 1656 sub ] exp/tri4/decode_eval3_csj/wer_17_0.5
|
||||
%WER 18.92 [ 3459 / 18283, 424 ins, 910 del, 2125 sub ] exp/tri4/decode_eval3_csj.si/wer_16_0.5
|
||||
%WER 14.55 [ 2661 / 18283, 423 ins, 629 del, 1609 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it4_csj/wer_14_0.0
|
||||
%WER 13.38 [ 2446 / 18283, 362 ins, 572 del, 1512 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it5_csj/wer_13_0.5
|
||||
%WER 13.37 [ 2444 / 18283, 484 ins, 470 del, 1490 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it6_csj/wer_11_0.0
|
||||
%WER 12.96 [ 2370 / 18283, 332 ins, 570 del, 1468 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it7_csj/wer_12_1.0
|
||||
%WER 13.62 [ 2490 / 18283, 440 ins, 549 del, 1501 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it8_csj/wer_10_0.5
|
||||
%WER 13.77 [ 2518 / 18283, 323 ins, 664 del, 1531 sub ] exp/tri4_mmi_b0.1/decode_eval3_1.mdl_csj/wer_15_0.5
|
||||
%WER 13.48 [ 2464 / 18283, 334 ins, 618 del, 1512 sub ] exp/tri4_mmi_b0.1/decode_eval3_2.mdl_csj/wer_13_0.5
|
||||
%WER 13.28 [ 2428 / 18283, 379 ins, 546 del, 1503 sub ] exp/tri4_mmi_b0.1/decode_eval3_3.mdl_csj/wer_12_0.5
|
||||
%WER 13.26 [ 2424 / 18283, 388 ins, 543 del, 1493 sub ] exp/tri4_mmi_b0.1/decode_eval3_4.mdl_csj/wer_12_0.5
|
||||
%WER 10.41 [ 1904 / 18283, 289 ins, 422 del, 1193 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval3_csj/wer_10_0.0
|
||||
%WER 9.34 [ 1707 / 18283, 251 ins, 341 del, 1115 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval3_csj/wer_13_1.0
|
||||
%WER 9.10 [ 1664 / 18283, 246 ins, 344 del, 1074 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval3_csj/wer_14_1.0
|
||||
|
|
|
@ -3,7 +3,8 @@
|
|||
# Apache 2.0
|
||||
# Acknowledgement This work was supported by JSPS KAKENHI Grant Number 26280055.
|
||||
|
||||
# Current optimized parameter config for CSJ
|
||||
# Currently optimized parameter config for CSJ
|
||||
|
||||
splice=17
|
||||
nn_depth=6
|
||||
hid_dim=1905
|
||||
|
|
|
@ -1,3 +1,2 @@
|
|||
--use-energy=false # only non-default option.
|
||||
#--sample-frequency=8000 # Switchboard is sampled at 8kHz
|
||||
--sample-frequency=16000 # CSJ is sampled at 16kHz
|
||||
|
|
|
@ -50,7 +50,7 @@ cat $CSJ/dvd{3,5,6,7,8,9,10}/{A*,M*}/*-wav.list 2>/dev/null | sort > $dir/wav.fl
|
|||
n=`cat $dir/wav.flist | wc -l`
|
||||
|
||||
[ $n -ne 986 ] && \
|
||||
echo Warning: expected 986 data data files, found $n
|
||||
echo "Warning: expected 986 data files (Case : Using 'Academic lecture' and 'Other' data), found $n."
|
||||
|
||||
|
||||
# (1a) Transcriptions preparation
|
||||
|
@ -102,7 +102,7 @@ awk '{segment=$1; split(segment,S,"[_]"); spkid=S[1]; print $1 " " spkid}' $dir/
|
|||
|
||||
sort -k 2 $dir/utt2spk | utils/utt2spk_to_spk2utt.pl > $dir/spk2utt || exit 1;
|
||||
|
||||
# Copy stuff into its final locations.
|
||||
# Copy stuff into its final locations [this has been moved from the format_data script]
|
||||
mkdir -p data/train
|
||||
for f in spk2utt utt2spk wav.scp text segments; do
|
||||
cp data/local/train/$f data/train/$f || exit 1;
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
# To be run from one directory above this script.
|
||||
|
||||
# The input is directory name containing the official evaluation test set.
|
||||
# The input is directory containing the official evaluation test set and transcripts.
|
||||
|
||||
if [ $# -ne 2 ]; then
|
||||
echo "Usage: "`basename $0`" <transcription-dir> <eval_num>"
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#!/usr/bin/env perl
|
||||
use warnings; #sed replacement for -w perl parameter
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
# Copyright 2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki)
|
||||
# 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
|
||||
|
@ -204,8 +203,10 @@ while (<>) {
|
|||
$word =~ s/\ン\ー/\ン/g; #
|
||||
$word =~ s/\ヮ/\ワ/g;
|
||||
$word =~ s/\ゎ/\わ/g;
|
||||
$word =~ s/^\ゼロ$/\0/g;
|
||||
$word =~ s/^\零$/\0/g;
|
||||
|
||||
# Normalization
|
||||
# $word =~ s/^\ゼロ$/\0/g;
|
||||
# $word =~ s/^\零$/\0/g;
|
||||
|
||||
# Arrange morpheme
|
||||
# This function is to arrange morpheme.
|
||||
|
|
|
@ -11,22 +11,17 @@ if [ $# -ne 2 ]; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
resource=$1
|
||||
outd=$2
|
||||
|
||||
csjext=./local/csj_make_trans/csj2kaldi4m.pl
|
||||
csjconnect=./local/csj_make_trans/csjconnect.pl
|
||||
k2phone=./local/csj_make_trans/kana2phone
|
||||
vocab2dic=./local/csj_make_trans/vocab2dic.pl
|
||||
reform=./local/csj_make_trans/reform.pl
|
||||
[ ! -e $resource ] && echo "Not exist CSJ or incorrect PATH." && exit 1;
|
||||
|
||||
if [ ! -d ./csj-data/dvd17 ];then
|
||||
if [ ! -e $outd/.done_make_trans ];then
|
||||
(
|
||||
mkdir -p $outd
|
||||
rm $outd/al_sent4lex.txt
|
||||
|
||||
cp ./local/csj_make_trans/overview_csj-data $outd/README.txt
|
||||
cp local/csj_make_trans/overview_csj-data $outd/README.txt
|
||||
|
||||
# Make transcription file for each dvd and each lecture
|
||||
[ ! -x "`which nkf `" ]\
|
||||
|
@ -35,19 +30,14 @@ cp ./local/csj_make_trans/overview_csj-data $outd/README.txt
|
|||
for vol in dvd{3..17} ;do
|
||||
mkdir -p $outd/$vol
|
||||
|
||||
(
|
||||
for id in `ls $resource/$vol`;do
|
||||
mkdir -p $outd/$vol/${id}
|
||||
rm -r $outd/$vol/00README.txt
|
||||
|
||||
(
|
||||
nkf -e -d $resource/$vol/$id/${id}.sdb > $outd/$vol/${id}/sdb.tmp
|
||||
$csjext $outd/$vol/${id}/sdb.tmp $outd/$vol/$id/${id}.4lex $outd/$vol/$id/${id}.4trn.t
|
||||
local/csj_make_trans/csj2kaldi4m.pl $outd/$vol/${id}/sdb.tmp $outd/$vol/$id/${id}.4lex $outd/$vol/$id/${id}.4trn.t
|
||||
|
||||
$csjconnect 0.5 10 $outd/$vol/$id/${id}.4trn.t $id > $outd/$vol/$id/${id}-trans.text
|
||||
|
||||
|
||||
[ -z `grep $id local/csj_make_trans/testset` ]\
|
||||
&& cat $outd/$vol/$id/${id}.4lex >> $outd/al_sent4lex.txt
|
||||
local/csj_make_trans/csjconnect.pl 0.5 10 $outd/$vol/$id/${id}.4trn.t $id > $outd/$vol/$id/${id}-trans.text
|
||||
|
||||
rm $outd/$vol/$id/{${id}.4trn.t,sdb.tmp}
|
||||
|
||||
|
@ -56,30 +46,16 @@ for vol in dvd{3..17} ;do
|
|||
else
|
||||
find $resource/$vol/$id -iname ${id}.wav >$outd/$vol/$id/${id}-wav.list
|
||||
fi
|
||||
|
||||
|
||||
)
|
||||
done
|
||||
)&
|
||||
done
|
||||
wait
|
||||
echo -n >$outd/.done_make_trans
|
||||
)
|
||||
fi
|
||||
|
||||
## make lexicon.txt
|
||||
if [ ! -f ./csj-data/lexicon/lexicon.txt ]; then
|
||||
(
|
||||
mkdir -p $outd/lexicon
|
||||
sort $outd/al_sent4lex.txt >lex.tmp123
|
||||
uniq lex.tmp123 > lex.tmp456
|
||||
${vocab2dic} -p $k2phone -o lex.tmp123 lex.tmp456
|
||||
$reform lex.tmp123 | sort | uniq > $outd/lexicon/lexicon.txt
|
||||
mv $outd/al_sent4lex.txt $outd/lexicon
|
||||
rm lex.tmp123 lex.tmp456 ERROR
|
||||
)
|
||||
fi
|
||||
|
||||
## Exclude speech data given by test set speakers.
|
||||
if [ ! -d ./csj-data/[eval,excluded] ]; then
|
||||
if [ ! -e $outd/.done_mv_eval_dup ]; then
|
||||
(
|
||||
mkdir -p $outd/eval
|
||||
mkdir -p $outd/excluded
|
||||
|
@ -89,10 +65,10 @@ if [ ! -d ./csj-data/[eval,excluded] ]; then
|
|||
|
||||
# Speech data given by test set speakers (eval2 : A01M0056)
|
||||
rm dup_list
|
||||
for line in `cat local/csj_make_trans/A01M0056_duplication | less`; do
|
||||
for line in `cat local/csj_make_trans/A01M0056_duplication`; do
|
||||
find $outd/dvd* -iname $line >>dup_list
|
||||
done
|
||||
for list in `cat dup_list | less`;do
|
||||
for list in `cat dup_list`;do
|
||||
mv $list $outd/excluded
|
||||
cp dup_list $outd/excluded/duplication.list
|
||||
done
|
||||
|
@ -100,10 +76,10 @@ if [ ! -d ./csj-data/[eval,excluded] ]; then
|
|||
|
||||
# Evaluation data
|
||||
rm dup_list
|
||||
for line in `cat local/csj_make_trans/testset | less`; do
|
||||
for line in `cat local/csj_make_trans/testset`; do
|
||||
find $outd/dvd* -iname $line >>dup_list
|
||||
done
|
||||
for list in `cat dup_list | less`;do
|
||||
for list in `cat dup_list`;do
|
||||
mv $list $outd/eval
|
||||
cp dup_list $outd/eval/evaluation.list
|
||||
done
|
||||
|
@ -114,11 +90,28 @@ if [ ! -d ./csj-data/[eval,excluded] ]; then
|
|||
mv $outd/eval/{A01M0110,A01M0137,A01M0097,A04M0123,A04M0121,A04M0051,A03M0156,A03M0112,A03M0106,A05M0011} $outd/eval/eval1
|
||||
mv $outd/eval/{A01M0056,A03F0072,A02M0012,A03M0016,A06M0064,A06F0135,A01F0034,A01F0063,A01F0001,A01M0141} $outd/eval/eval2
|
||||
mv $outd/eval/{S00M0112,S00F0066,S00M0213,S00F0019,S00M0079,S01F0105,S00F0152,S00M0070,S00M0008,S00F0148} $outd/eval/eval3
|
||||
|
||||
echo -n >$outd/.done_mv_eval_dup
|
||||
)
|
||||
fi
|
||||
|
||||
comp_num=`ls -l $outd | wc -l`
|
||||
[ ! $comp_num -eq 20 ] \
|
||||
## make lexicon.txt
|
||||
if [ ! -e $outd/.done_make_lexicon ]; then
|
||||
(
|
||||
cat $outd/{dvd*,excluded}/*/*.4lex >> $outd/al_sent4lex.txt
|
||||
mkdir -p $outd/lexicon
|
||||
sort $outd/al_sent4lex.txt >lex.tmp123
|
||||
uniq lex.tmp123 > lex.tmp456
|
||||
local/csj_make_trans/vocab2dic.pl -p local/csj_make_trans/kana2phone -o lex.tmp123 lex.tmp456
|
||||
local/csj_make_trans/reform.pl lex.tmp123 | sort | uniq > $outd/lexicon/lexicon.txt
|
||||
mv $outd/al_sent4lex.txt $outd/lexicon
|
||||
rm lex.tmp123 lex.tmp456 ERROR
|
||||
|
||||
echo -n >$outd/.done_make_lexicon
|
||||
)
|
||||
fi
|
||||
|
||||
[ ! 3 -le `ls -a $outd | grep done | wc -l` ] \
|
||||
&& echo "ERROR : Processing is incorrect." && exit 1;
|
||||
|
||||
echo "Finish processing original CSJ data"
|
||||
echo "Finish processing original CSJ data" && echo -n >$outd/.done_make_all
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#!/usr/bin/env perl
|
||||
use warnings; #sed replacement for -w perl parameter
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
# Copyright 2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki)
|
||||
# 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
|
||||
|
|
|
@ -141,4 +141,4 @@
|
|||
ヴ+b u
|
||||
ツ+ts u
|
||||
シ+sh i
|
||||
チ+ch i
|
||||
チ+ch i
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#!/usr/bin/env perl
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
# Copyright 2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki)
|
||||
# 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
|
||||
# Apache 2.0
|
||||
|
@ -6,8 +7,6 @@
|
|||
|
||||
# This script is to make lexicon for KALDI format.
|
||||
|
||||
use warnings;
|
||||
|
||||
while (<>){
|
||||
chomp;
|
||||
@line=split(/\t/, $_);
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#!/usr/bin/env perl
|
||||
#! /usr/bin/perl
|
||||
|
||||
# Copyright 2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki)
|
||||
# 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe)
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
# Config:
|
||||
config=conf/config_opt
|
||||
. $config
|
||||
gmmdir=exp/tri4
|
||||
gmmdir=exp/tri4
|
||||
data_fmllr=data-fmllr-tri4
|
||||
stage=0 # resume training with --stage=N
|
||||
# End of config.
|
||||
|
@ -60,7 +60,7 @@ if [ $stage -le 1 ]; then
|
|||
fi
|
||||
|
||||
|
||||
if [ $stage -le 2 ]; then
|
||||
if [ $stage -le 2 ]; then
|
||||
# Train the DNN optimizing per-frame cross-entropy.
|
||||
dir=exp/dnn5b_pretrain-dbn_dnn
|
||||
ali=${gmmdir}_ali_nodup
|
||||
|
@ -86,7 +86,7 @@ dir=exp/dnn5b_pretrain-dbn_dnn_smbr
|
|||
srcdir=exp/dnn5b_pretrain-dbn_dnn
|
||||
acwt=0.0909
|
||||
|
||||
if [ $stage -le 3 ]; then
|
||||
if [ $stage -le 3 ]; then
|
||||
# First we generate lattices and alignments:
|
||||
steps/nnet/align.sh --nj 10 --cmd "$train_cmd" \
|
||||
$data_fmllr/train_nodup data/lang $srcdir ${srcdir}_ali || exit 1;
|
||||
|
|
|
@ -17,14 +17,16 @@ steps/train_sgmm2_group.sh --cmd "$train_cmd" \
|
|||
18000 60000 data/train_nodup data/lang exp/tri4_ali_nodup \
|
||||
exp/ubm5/final.ubm exp/sgmm2_5 || exit 1;
|
||||
|
||||
|
||||
|
||||
graph_dir=exp/sgmm2_5/graph_csj_tg
|
||||
$train_cmd $graph_dir/mkgraph.log \
|
||||
utils/mkgraph.sh data/lang_csj_tg exp/sgmm2_5 $graph_dir
|
||||
for eval_num in `seq 3`; do
|
||||
graph_dir=exp/sgmm2_5/graph_csj_tg
|
||||
$train_cmd $graph_dir/mkgraph.log \
|
||||
utils/mkgraph.sh data/lang_csj_tg exp/sgmm2_5 $graph_dir
|
||||
steps/decode_sgmm2.sh --nj 10 \
|
||||
--cmd "$decode_cmd" --config conf/decode.config \
|
||||
--transform-dir exp/tri4/decode_eval${eval_num}_csj_tg $graph_dir \
|
||||
data/eval${eval_num} exp/sgmm2_5/decode_eval${eval_num}_csj_tg
|
||||
--transform-dir exp/tri4/decode_eval${eval_num}_csj $graph_dir \
|
||||
data/eval${eval_num} exp/sgmm2_5/decode_eval${eval_num}_csj
|
||||
done
|
||||
wait
|
||||
|
||||
|
@ -48,10 +50,10 @@ steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" \
|
|||
for eval_num in `seq 3`; do
|
||||
for iter in 1 2 3 4; do
|
||||
steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \
|
||||
--transform-dir exp/tri4/decode_eval${eval_num}_csj_tg \
|
||||
--transform-dir exp/tri4/decode_eval${eval_num}_csj \
|
||||
data/lang_csj_tg data/eval${eval_num} \
|
||||
exp/sgmm2_5/decode_eval${eval_num}_csj_tg \
|
||||
exp/sgmm2_5_mmi_b0.1/decode_eval${eval_num}_csj_tg_it$iter
|
||||
exp/sgmm2_5/decode_eval${eval_num}_csj \
|
||||
exp/sgmm2_5_mmi_b0.1/decode_eval${eval_num}_csj_it$iter
|
||||
done
|
||||
done
|
||||
wait
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
link ../steps/score_kaldi.sh
|
|
@ -0,0 +1 @@
|
|||
../steps/score_kaldi.sh
|
|
@ -3,5 +3,5 @@
|
|||
perl -e 'foreach $w (@ARGV) { $bad{$w} = 1; }
|
||||
while(<STDIN>) { @A = split(" ", $_); $id = shift @A; print "$id ";
|
||||
foreach $a (@A) { if (!defined $bad{$a}){ @W=split(/\+/,$a); $word=$W[0]; { print "$word "; }}} print "\n"; }' \
|
||||
'<UNK>'
|
||||
'<unk>' '<sp>'
|
||||
|
||||
|
|
|
@ -3,5 +3,5 @@
|
|||
perl -e 'foreach $w (@ARGV) { $bad{$w} = 1; }
|
||||
while(<STDIN>) { @A = split(" ", $_); $id = shift @A; print "$id ";
|
||||
foreach $a (@A) { if (!defined $bad{$a}){ @W=split(/\+/,$a); $word=$W[0]; { print "$word "; }}} print "\n"; }' \
|
||||
'<UNK>'
|
||||
'<unk>' '<sp>'
|
||||
|
||||
|
|
|
@ -3,5 +3,5 @@
|
|||
perl -e 'foreach $w (@ARGV) { $bad{$w} = 1; }
|
||||
while(<STDIN>) { @A = split(" ", $_); $id = shift @A; print "$id ";
|
||||
foreach $a (@A) { if (!defined $bad{$a}){ @W=split(/\+/,$a); $word=$W[0]; { print "$word "; }}} print "\n"; }' \
|
||||
'<UNK>'
|
||||
'<unk>' '<sp>'
|
||||
|
||||
|
|
|
@ -5,4 +5,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI
|
|||
export PATH=$PATH:/usr/local/cuda/bin
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:/usr/local/cuda/bin/nvcc
|
||||
|
||||
export LC_ALL=C
|
||||
#export LC_ALL=C
|
||||
|
|
|
@ -19,14 +19,14 @@ set -e # exit on error
|
|||
|
||||
#: << '#SKIP'
|
||||
|
||||
if [ ! -d data/csj-data/eval ]; then
|
||||
if [ ! -e data ]; then
|
||||
echo "CSJ transcription file does not exist"
|
||||
#local/csj_make_trans/csj_automake.sh <RESOUCE_DIR> <MAKING_PLACE(no change)> || exit 1;
|
||||
local/csj_make_trans/csj_automake.sh /database/NINJAL/CSJ/ data/csj-data 2>/dev/null
|
||||
local/csj_make_trans/csj_automake.sh /database/NINJAL/CSJ data/csj-data 2>/dev/null
|
||||
fi
|
||||
wait
|
||||
|
||||
[ ! -d data/csj-data/eval ]\
|
||||
[ ! -e data/csj-data/.done_make_all ]\
|
||||
&& echo "Not finished processing CSJ data" && exit 1;
|
||||
|
||||
# Prepare Corpus of Spontaneous Japanese (CSJ) data.
|
||||
|
@ -36,7 +36,7 @@ local/csj_data_prep.sh data/csj-data/
|
|||
|
||||
local/csj_prepare_dict.sh
|
||||
|
||||
utils/prepare_lang.sh data/local/dict_nosp "<unk>" data/local/lang_nosp data/lang_nosp
|
||||
utils/prepare_lang.sh --num-sil-states 4 data/local/dict_nosp "<unk>" data/local/lang_nosp data/lang_nosp
|
||||
|
||||
# Now train the language models.
|
||||
local/csj_train_lms.sh data/local/train/text data/local/dict_nosp/lexicon.txt data/local/lm
|
||||
|
@ -155,7 +155,7 @@ $train_cmd $graph_dir/mkgraph.log \
|
|||
utils/mkgraph.sh data/lang_nosp_csj_tg exp/tri3 $graph_dir
|
||||
for eval_num in `seq 3`; do
|
||||
steps/decode.sh --nj 10 --cmd "$decode_cmd" --config conf/decode.config \
|
||||
$graph_dir data/eval${eval_num} exp/tri3/decode_eval${eval_num}_csj
|
||||
$graph_dir data/eval${eval_num} exp/tri3/decode_eval${eval_num}_csj_nosp
|
||||
done
|
||||
|
||||
# Now we compute the pronunciation and silence probabilities from training data,
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
link ../../wsj/s5/steps/
|
|
@ -0,0 +1 @@
|
|||
../../wsj/s5/steps
|
|
@ -1 +0,0 @@
|
|||
link ../../wsj/s5/utils/
|
|
@ -0,0 +1 @@
|
|||
../../wsj/s5/utils
|
Загрузка…
Ссылка в новой задаче