diff --git a/egs/csj/README.txt b/egs/csj/README.txt index 9683c8b54..268a313b4 100644 --- a/egs/csj/README.txt +++ b/egs/csj/README.txt @@ -1,16 +1,27 @@ About the Corpus of Spontaneous Japanese: The Corpus of Spontaneous Japanese (CSJ) is a database of spoken -Japanese developed by the Japan's national priority area research +Japanese developed by the Japan's national priority area research project "Spontaneous Speech: Corpus and Processing Technology". -It contains about 650 hours of speech consisting of approximately +It contains about 650 hours of speech consisting of approximately 7.5 million words that were provided by more than 1,400 speakers. -For more details about the corpus, please visit the website of the +For more details about the corpus, please visit the website of the National Institute for Japanese Language (NINJAL). It is available from the Institute. http://www.ninjal.ac.jp/english/products/csj/ http://pj.ninjal.ac.jp/corpus_center/csj/ +Meta-parameter tuning based on evolution strategy: +The meta-parameters of the system contained in conf/config_opt were +automatically tuned using evolution strategy. For the details, +please refer the following paper: +Takafumi Moriya, Tomohiro Tanaka, Takahiro Shinozaki, Shinji Watanabe, +and Kevin Duh, "Automation of System Building for State-of-the-art +Large Vocabulary Speech Recognition Using Evolution Strategy," Proc. +IEEE 2015 Automatic Speech Recognition and Understanding Workshop +(ASRU), 2015. + + Each subdirectory of this directory contains the -scripts for a sequence of experiments. +scripts for a sequence of experiments. s5: This is the current recommended recipe. - The third edition of CSJ is assumed. + The recipe supports the third and fourth editions of CSJ. diff --git a/egs/csj/s5/RESULTS b/egs/csj/s5/RESULTS index 208d99b8d..340879aed 100644 --- a/egs/csj/s5/RESULTS +++ b/egs/csj/s5/RESULTS @@ -1,117 +1,118 @@ +## These are results using the third edition of CSJ. for eval_num in `seq 3`; do echo "=== evaluation set $eval_num ===" ;\ for x in exp/{tri,dnn}*/decode_eval${eval_num}*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done ; done ## Results of using training set that contains "academic" and "other" speech data (default). -## If you want to use "trial lecture" and "dialog" data, you should check the following script [local/csj_data_prep.sh line 44]. +## If you want to use "simulated public speaking" and "dialog" data, you should check the following script [local/csj_data_prep.sh line 44]. === evaluation set 1 === -%WER 22.67 [ 6269 / 27651, 522 ins, 1903 del, 3844 sub ] exp/tri1/decode_eval1_csj/wer_12 -%WER 21.49 [ 5943 / 27651, 541 ins, 1745 del, 3657 sub ] exp/tri2/decode_eval1_csj/wer_12 -%WER 17.49 [ 4837 / 27651, 613 ins, 1269 del, 2955 sub ] exp/tri3/decode_eval1_csj/wer_16 -%WER 15.26 [ 4220 / 27651, 566 ins, 1071 del, 2583 sub ] exp/tri4/decode_eval1_csj/wer_17 -%WER 17.33 [ 4792 / 27651, 628 ins, 1137 del, 3027 sub ] exp/tri4/decode_eval1_csj.si/wer_16 -%WER 14.59 [ 4033 / 27651, 617 ins, 919 del, 2497 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it4_csj/wer_14 -%WER 14.14 [ 3911 / 27651, 585 ins, 915 del, 2411 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it5_csj/wer_17 -%WER 14.00 [ 3871 / 27651, 586 ins, 888 del, 2397 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it6_csj/wer_15 -%WER 13.92 [ 3850 / 27651, 661 ins, 793 del, 2396 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it7_csj/wer_14 -%WER 14.15 [ 3913 / 27651, 640 ins, 877 del, 2396 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it8_csj/wer_14 -%WER 14.39 [ 3979 / 27651, 570 ins, 946 del, 2463 sub ] exp/tri4_mmi_b0.1/decode_eval1_1.mdl_csj/wer_17 -%WER 14.09 [ 3895 / 27651, 576 ins, 882 del, 2437 sub ] exp/tri4_mmi_b0.1/decode_eval1_2.mdl_csj/wer_15 -%WER 14.02 [ 3877 / 27651, 602 ins, 858 del, 2417 sub ] exp/tri4_mmi_b0.1/decode_eval1_3.mdl_csj/wer_15 -%WER 14.00 [ 3870 / 27651, 609 ins, 853 del, 2408 sub ] exp/tri4_mmi_b0.1/decode_eval1_4.mdl_csj/wer_15 -%WER 11.93 [ 3298 / 27651, 348 ins, 970 del, 1980 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval1_csj/wer_12 -%WER 11.29 [ 3123 / 27651, 509 ins, 651 del, 1963 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval1_csj/wer_15 -%WER 10.87 [ 3007 / 27651, 497 ins, 589 del, 1921 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval1_csj/wer_14 +%WER 22.67 [ 6269 / 27651, 522 ins, 1903 del, 3844 sub ] exp/tri1/decode_eval1_csj/wer_12_0.0 +%WER 21.49 [ 5943 / 27651, 541 ins, 1745 del, 3657 sub ] exp/tri2/decode_eval1_csj/wer_12_0.0 +%WER 17.49 [ 4837 / 27651, 613 ins, 1269 del, 2955 sub ] exp/tri3/decode_eval1_csj/wer_16_0.0 +%WER 15.26 [ 4220 / 27651, 566 ins, 1071 del, 2583 sub ] exp/tri4/decode_eval1_csj/wer_17_0.0 +%WER 17.33 [ 4792 / 27651, 628 ins, 1137 del, 3027 sub ] exp/tri4/decode_eval1_csj.si/wer_16_0.0 +%WER 14.59 [ 4033 / 27651, 617 ins, 919 del, 2497 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it4_csj/wer_14_0.0 +%WER 14.14 [ 3911 / 27651, 585 ins, 915 del, 2411 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it5_csj/wer_17_0.0 +%WER 14.00 [ 3871 / 27651, 586 ins, 888 del, 2397 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it6_csj/wer_15_0.5 +%WER 13.92 [ 3850 / 27651, 661 ins, 793 del, 2396 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it7_csj/wer_14_0.5 +%WER 14.15 [ 3913 / 27651, 640 ins, 877 del, 2396 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it8_csj/wer_14_0.5 +%WER 14.39 [ 3979 / 27651, 570 ins, 946 del, 2463 sub ] exp/tri4_mmi_b0.1/decode_eval1_1.mdl_csj/wer_17_0.5 +%WER 14.09 [ 3895 / 27651, 576 ins, 882 del, 2437 sub ] exp/tri4_mmi_b0.1/decode_eval1_2.mdl_csj/wer_15_0.0 +%WER 14.02 [ 3877 / 27651, 602 ins, 858 del, 2417 sub ] exp/tri4_mmi_b0.1/decode_eval1_3.mdl_csj/wer_15_0.5 +%WER 14.00 [ 3870 / 27651, 609 ins, 853 del, 2408 sub ] exp/tri4_mmi_b0.1/decode_eval1_4.mdl_csj/wer_15_0.5 +%WER 11.93 [ 3298 / 27651, 348 ins, 970 del, 1980 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval1_csj/wer_12_0.0 +%WER 11.29 [ 3123 / 27651, 509 ins, 651 del, 1963 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval1_csj/wer_15_1.0 +%WER 10.87 [ 3007 / 27651, 497 ins, 589 del, 1921 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval1_csj/wer_14_0.5 === evaluation set 2 === -%WER 19.80 [ 5628 / 28424, 561 ins, 1511 del, 3556 sub ] exp/tri1/decode_eval2_csj/wer_12 -%WER 19.04 [ 5413 / 28424, 600 ins, 1423 del, 3390 sub ] exp/tri2/decode_eval2_csj/wer_12 -%WER 15.80 [ 4490 / 28424, 582 ins, 1131 del, 2777 sub ] exp/tri3/decode_eval2_csj/wer_16 -%WER 13.95 [ 3964 / 28424, 691 ins, 843 del, 2430 sub ] exp/tri4/decode_eval2_csj/wer_13 -%WER 18.74 [ 5326 / 28424, 804 ins, 1056 del, 3466 sub ] exp/tri4/decode_eval2_csj.si/wer_17 -%WER 12.77 [ 3631 / 28424, 604 ins, 781 del, 2246 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it4_csj/wer_14 -%WER 12.27 [ 3488 / 28424, 604 ins, 707 del, 2177 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it5_csj/wer_14 -%WER 12.32 [ 3502 / 28424, 613 ins, 713 del, 2176 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it6_csj/wer_13 -%WER 12.32 [ 3502 / 28424, 658 ins, 688 del, 2156 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it7_csj/wer_14 -%WER 12.56 [ 3569 / 28424, 642 ins, 760 del, 2167 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it8_csj/wer_13 -%WER 12.51 [ 3557 / 28424, 588 ins, 766 del, 2203 sub ] exp/tri4_mmi_b0.1/decode_eval2_1.mdl_csj/wer_15 -%WER 12.25 [ 3482 / 28424, 587 ins, 730 del, 2165 sub ] exp/tri4_mmi_b0.1/decode_eval2_2.mdl_csj/wer_14 -%WER 12.20 [ 3467 / 28424, 599 ins, 706 del, 2162 sub ] exp/tri4_mmi_b0.1/decode_eval2_3.mdl_csj/wer_14 -%WER 12.33 [ 3504 / 28424, 615 ins, 714 del, 2175 sub ] exp/tri4_mmi_b0.1/decode_eval2_4.mdl_csj/wer_14 -%WER 10.24 [ 2910 / 28424, 271 ins, 852 del, 1787 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval2_csj/wer_12 -%WER 9.41 [ 2676 / 28424, 453 ins, 432 del, 1791 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval2_csj/wer_14 -%WER 9.19 [ 2612 / 28424, 417 ins, 422 del, 1773 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval2_csj/wer_14 +%WER 19.80 [ 5628 / 28424, 561 ins, 1511 del, 3556 sub ] exp/tri1/decode_eval2_csj/wer_12_0.0 +%WER 19.04 [ 5413 / 28424, 600 ins, 1423 del, 3390 sub ] exp/tri2/decode_eval2_csj/wer_12_0.0 +%WER 15.80 [ 4490 / 28424, 582 ins, 1131 del, 2777 sub ] exp/tri3/decode_eval2_csj/wer_16_0.5 +%WER 13.95 [ 3964 / 28424, 691 ins, 843 del, 2430 sub ] exp/tri4/decode_eval2_csj/wer_13_0.0 +%WER 18.74 [ 5326 / 28424, 804 ins, 1056 del, 3466 sub ] exp/tri4/decode_eval2_csj.si/wer_17_0.0 +%WER 12.77 [ 3631 / 28424, 604 ins, 781 del, 2246 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it4_csj/wer_14_0.5 +%WER 12.27 [ 3488 / 28424, 604 ins, 707 del, 2177 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it5_csj/wer_14_0.5 +%WER 12.32 [ 3502 / 28424, 613 ins, 713 del, 2176 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it6_csj/wer_13_0.5 +%WER 12.32 [ 3502 / 28424, 658 ins, 688 del, 2156 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it7_csj/wer_14_1.0 +%WER 12.56 [ 3569 / 28424, 642 ins, 760 del, 2167 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it8_csj/wer_13_1.0 +%WER 12.51 [ 3557 / 28424, 588 ins, 766 del, 2203 sub ] exp/tri4_mmi_b0.1/decode_eval2_1.mdl_csj/wer_15_0.5 +%WER 12.25 [ 3482 / 28424, 587 ins, 730 del, 2165 sub ] exp/tri4_mmi_b0.1/decode_eval2_2.mdl_csj/wer_14_0.5 +%WER 12.20 [ 3467 / 28424, 599 ins, 706 del, 2162 sub ] exp/tri4_mmi_b0.1/decode_eval2_3.mdl_csj/wer_14_0.5 +%WER 12.33 [ 3504 / 28424, 615 ins, 714 del, 2175 sub ] exp/tri4_mmi_b0.1/decode_eval2_4.mdl_csj/wer_14_0.5 +%WER 10.24 [ 2910 / 28424, 271 ins, 852 del, 1787 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval2_csj/wer_12_0.0 +%WER 9.41 [ 2676 / 28424, 453 ins, 432 del, 1791 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval2_csj/wer_14_1.0 +%WER 9.19 [ 2612 / 28424, 417 ins, 422 del, 1773 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval2_csj/wer_14_0.5 === evaluation set 3 === -%WER 24.80 [ 4534 / 18283, 447 ins, 1350 del, 2737 sub ] exp/tri1/decode_eval3_csj/wer_15 -%WER 23.68 [ 4329 / 18283, 497 ins, 1183 del, 2649 sub ] exp/tri2/decode_eval3_csj/wer_13 -%WER 19.97 [ 3651 / 18283, 582 ins, 828 del, 2241 sub ] exp/tri3/decode_eval3_csj/wer_17 -%WER 17.27 [ 3158 / 18283, 520 ins, 752 del, 1886 sub ] exp/tri4/decode_eval3_csj/wer_19 -%WER 21.44 [ 3919 / 18283, 660 ins, 823 del, 2436 sub ] exp/tri4/decode_eval3_csj.si/wer_20 -%WER 16.56 [ 3028 / 18283, 476 ins, 716 del, 1836 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it4_csj/wer_20 -%WER 15.79 [ 2887 / 18283, 547 ins, 554 del, 1786 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it5_csj/wer_15 -%WER 15.89 [ 2906 / 18283, 519 ins, 597 del, 1790 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it6_csj/wer_15 -%WER 15.64 [ 2860 / 18283, 556 ins, 512 del, 1792 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it7_csj/wer_15 -%WER 16.38 [ 2994 / 18283, 529 ins, 655 del, 1810 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it8_csj/wer_15 -%WER 16.13 [ 2949 / 18283, 505 ins, 630 del, 1814 sub ] exp/tri4_mmi_b0.1/decode_eval3_1.mdl_csj/wer_18 -%WER 15.97 [ 2920 / 18283, 540 ins, 556 del, 1824 sub ] exp/tri4_mmi_b0.1/decode_eval3_2.mdl_csj/wer_14 -%WER 15.98 [ 2922 / 18283, 564 ins, 537 del, 1821 sub ] exp/tri4_mmi_b0.1/decode_eval3_3.mdl_csj/wer_14 -%WER 15.98 [ 2921 / 18283, 548 ins, 566 del, 1807 sub ] exp/tri4_mmi_b0.1/decode_eval3_4.mdl_csj/wer_15 -%WER 13.94 [ 2548 / 18283, 313 ins, 716 del, 1519 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval3_csj/wer_13 -%WER 12.52 [ 2289 / 18283, 464 ins, 354 del, 1471 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval3_csj/wer_15 -%WER 12.18 [ 2226 / 18283, 431 ins, 340 del, 1455 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval3_csj/wer_15 +%WER 24.80 [ 4534 / 18283, 447 ins, 1350 del, 2737 sub ] exp/tri1/decode_eval3_csj/wer_15_0.0 +%WER 23.68 [ 4329 / 18283, 497 ins, 1183 del, 2649 sub ] exp/tri2/decode_eval3_csj/wer_13_0.0 +%WER 19.97 [ 3651 / 18283, 582 ins, 828 del, 2241 sub ] exp/tri3/decode_eval3_csj/wer_17_0.5 +%WER 17.27 [ 3158 / 18283, 520 ins, 752 del, 1886 sub ] exp/tri4/decode_eval3_csj/wer_19_0.0 +%WER 21.44 [ 3919 / 18283, 660 ins, 823 del, 2436 sub ] exp/tri4/decode_eval3_csj.si/wer_20_1.0 +%WER 16.56 [ 3028 / 18283, 476 ins, 716 del, 1836 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it4_csj/wer_20_0.0 +%WER 15.79 [ 2887 / 18283, 547 ins, 554 del, 1786 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it5_csj/wer_15_0.5 +%WER 15.89 [ 2906 / 18283, 519 ins, 597 del, 1790 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it6_csj/wer_15_0.5 +%WER 15.64 [ 2860 / 18283, 556 ins, 512 del, 1792 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it7_csj/wer_15_1.0 +%WER 16.38 [ 2994 / 18283, 529 ins, 655 del, 1810 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it8_csj/wer_15_0.5 +%WER 16.13 [ 2949 / 18283, 505 ins, 630 del, 1814 sub ] exp/tri4_mmi_b0.1/decode_eval3_1.mdl_csj/wer_18_0.0 +%WER 15.97 [ 2920 / 18283, 540 ins, 556 del, 1824 sub ] exp/tri4_mmi_b0.1/decode_eval3_2.mdl_csj/wer_14_0.5 +%WER 15.98 [ 2922 / 18283, 564 ins, 537 del, 1821 sub ] exp/tri4_mmi_b0.1/decode_eval3_3.mdl_csj/wer_14_0.0 +%WER 15.98 [ 2921 / 18283, 548 ins, 566 del, 1807 sub ] exp/tri4_mmi_b0.1/decode_eval3_4.mdl_csj/wer_15_1.0 +%WER 13.94 [ 2548 / 18283, 313 ins, 716 del, 1519 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval3_csj/wer_13_0.0 +%WER 12.52 [ 2289 / 18283, 464 ins, 354 del, 1471 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval3_csj/wer_15_0.0 +%WER 12.18 [ 2226 / 18283, 431 ins, 340 del, 1455 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval3_csj/wer_15_0.5 -## Results of using training data that contain all types of speech data. +## Results of using training data that contain all types of speech data except for dialog type. === evaluation set 1 === -%WER 22.71 [ 6279 / 27651, 524 ins, 1936 del, 3819 sub ] exp/tri1/decode_eval1_csj/wer_13 -%WER 21.36 [ 5905 / 27651, 529 ins, 1781 del, 3595 sub ] exp/tri2/decode_eval1_csj/wer_13 -%WER 17.89 [ 4948 / 27651, 586 ins, 1314 del, 3048 sub ] exp/tri3/decode_eval1_csj/wer_16 -%WER 15.85 [ 4383 / 27651, 580 ins, 1169 del, 2634 sub ] exp/tri4/decode_eval1_csj/wer_17 -%WER 18.06 [ 4995 / 27651, 671 ins, 1209 del, 3115 sub ] exp/tri4/decode_eval1_csj.si/wer_15 -%WER 15.17 [ 4196 / 27651, 536 ins, 1105 del, 2555 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it4_csj/wer_17 -%WER 14.32 [ 3959 / 27651, 578 ins, 949 del, 2432 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it5_csj/wer_15 -%WER 14.20 [ 3926 / 27651, 598 ins, 885 del, 2443 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it6_csj/wer_13 -%WER 13.93 [ 3851 / 27651, 631 ins, 829 del, 2391 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it7_csj/wer_14 -%WER 14.09 [ 3895 / 27651, 621 ins, 847 del, 2427 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it8_csj/wer_12 -%WER 14.69 [ 4061 / 27651, 587 ins, 981 del, 2493 sub ] exp/tri4_mmi_b0.1/decode_eval1_1.mdl_csj/wer_15 -%WER 14.48 [ 4003 / 27651, 549 ins, 1001 del, 2453 sub ] exp/tri4_mmi_b0.1/decode_eval1_2.mdl_csj/wer_16 -%WER 14.33 [ 3963 / 27651, 611 ins, 901 del, 2451 sub ] exp/tri4_mmi_b0.1/decode_eval1_3.mdl_csj/wer_14 -%WER 14.12 [ 3905 / 27651, 610 ins, 870 del, 2425 sub ] exp/tri4_mmi_b0.1/decode_eval1_4.mdl_csj/wer_14 -%WER 11.62 [ 3214 / 27651, 381 ins, 799 del, 2034 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval1_csj/wer_12 -%WER 10.93 [ 3021 / 27651, 475 ins, 566 del, 1980 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval1_csj/wer_14 -%WER 10.71 [ 2962 / 27651, 516 ins, 496 del, 1950 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval1_csj/wer_13 +%WER 22.97 [ 6352 / 27651, 514 ins, 1941 del, 3897 sub ] exp/tri1/decode_eval1_csj/wer_13_0.0 +%WER 21.48 [ 5939 / 27651, 482 ins, 1885 del, 3572 sub ] exp/tri2/decode_eval1_csj/wer_14_0.0 +%WER 17.86 [ 4939 / 27651, 596 ins, 1305 del, 3038 sub ] exp/tri3/decode_eval1_csj/wer_15_0.0 +%WER 15.67 [ 4333 / 27651, 584 ins, 1121 del, 2628 sub ] exp/tri4/decode_eval1_csj/wer_16_0.0 +%WER 17.88 [ 4943 / 27651, 623 ins, 1226 del, 3094 sub ] exp/tri4/decode_eval1_csj.si/wer_16_0.0 +%WER 15.01 [ 4150 / 27651, 580 ins, 1009 del, 2561 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it4_csj/wer_15_0.0 +%WER 14.28 [ 3949 / 27651, 578 ins, 929 del, 2442 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it5_csj/wer_15_0.0 +%WER 14.17 [ 3917 / 27651, 542 ins, 966 del, 2409 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it6_csj/wer_15_0.0 +%WER 14.00 [ 3871 / 27651, 442 ins, 1085 del, 2344 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it7_csj/wer_12_1.0 +%WER 14.08 [ 3893 / 27651, 426 ins, 1087 del, 2380 sub ] exp/tri4_fmmi_b0.1/decode_eval1_it8_csj/wer_11_1.0 +%WER 14.60 [ 4036 / 27651, 458 ins, 1115 del, 2463 sub ] exp/tri4_mmi_b0.1/decode_eval1_1.mdl_csj/wer_15_0.5 +%WER 14.42 [ 3986 / 27651, 459 ins, 1081 del, 2446 sub ] exp/tri4_mmi_b0.1/decode_eval1_2.mdl_csj/wer_14_0.5 +%WER 14.22 [ 3931 / 27651, 492 ins, 1022 del, 2417 sub ] exp/tri4_mmi_b0.1/decode_eval1_3.mdl_csj/wer_13_0.5 +%WER 13.99 [ 3869 / 27651, 504 ins, 949 del, 2416 sub ] exp/tri4_mmi_b0.1/decode_eval1_4.mdl_csj/wer_12_0.5 +%WER 11.63 [ 3215 / 27651, 384 ins, 804 del, 2027 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval1_csj/wer_12_0.0 +%WER 10.56 [ 2921 / 27651, 366 ins, 662 del, 1893 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval1_csj/wer_13_1.0 +%WER 10.34 [ 2859 / 27651, 363 ins, 660 del, 1836 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval1_csj/wer_14_1.0 === evaluation set 2 === -%WER 19.61 [ 5575 / 28424, 577 ins, 1442 del, 3556 sub ] exp/tri1/decode_eval2_csj/wer_12 -%WER 18.47 [ 5250 / 28424, 572 ins, 1361 del, 3317 sub ] exp/tri2/decode_eval2_csj/wer_12 -%WER 15.71 [ 4464 / 28424, 577 ins, 1128 del, 2759 sub ] exp/tri3/decode_eval2_csj/wer_15 -%WER 13.24 [ 3764 / 28424, 535 ins, 921 del, 2308 sub ] exp/tri4/decode_eval2_csj/wer_16 -%WER 17.90 [ 5088 / 28424, 743 ins, 1057 del, 3288 sub ] exp/tri4/decode_eval2_csj.si/wer_16 -%WER 12.56 [ 3571 / 28424, 595 ins, 767 del, 2209 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it4_csj/wer_13 -%WER 11.79 [ 3350 / 28424, 584 ins, 669 del, 2097 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it5_csj/wer_13 -%WER 11.86 [ 3372 / 28424, 619 ins, 643 del, 2110 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it6_csj/wer_11 -%WER 11.79 [ 3352 / 28424, 603 ins, 659 del, 2090 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it7_csj/wer_13 -%WER 12.08 [ 3434 / 28424, 602 ins, 701 del, 2131 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it8_csj/wer_11 -%WER 12.13 [ 3447 / 28424, 561 ins, 735 del, 2151 sub ] exp/tri4_mmi_b0.1/decode_eval2_1.mdl_csj/wer_14 -%WER 11.88 [ 3376 / 28424, 575 ins, 676 del, 2125 sub ] exp/tri4_mmi_b0.1/decode_eval2_2.mdl_csj/wer_12 -%WER 11.77 [ 3345 / 28424, 588 ins, 646 del, 2111 sub ] exp/tri4_mmi_b0.1/decode_eval2_3.mdl_csj/wer_12 -%WER 11.73 [ 3333 / 28424, 586 ins, 658 del, 2089 sub ] exp/tri4_mmi_b0.1/decode_eval2_4.mdl_csj/wer_12 -%WER 9.36 [ 2660 / 28424, 357 ins, 561 del, 1742 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval2_csj/wer_10 -%WER 9.07 [ 2579 / 28424, 467 ins, 404 del, 1708 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval2_csj/wer_13 -%WER 8.91 [ 2533 / 28424, 439 ins, 399 del, 1695 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval2_csj/wer_15 +%WER 19.56 [ 5560 / 28424, 560 ins, 1527 del, 3473 sub ] exp/tri1/decode_eval2_csj/wer_12_0.0 +%WER 18.62 [ 5293 / 28424, 610 ins, 1361 del, 3322 sub ] exp/tri2/decode_eval2_csj/wer_12_0.0 +%WER 15.58 [ 4429 / 28424, 626 ins, 1026 del, 2777 sub ] exp/tri3/decode_eval2_csj/wer_13_0.0 +%WER 13.37 [ 3801 / 28424, 643 ins, 844 del, 2314 sub ] exp/tri4/decode_eval2_csj/wer_14_0.0 +%WER 18.03 [ 5126 / 28424, 665 ins, 1178 del, 3283 sub ] exp/tri4/decode_eval2_csj.si/wer_15_0.5 +%WER 12.36 [ 3514 / 28424, 475 ins, 880 del, 2159 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it4_csj/wer_13_0.5 +%WER 11.54 [ 3279 / 28424, 448 ins, 792 del, 2039 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it5_csj/wer_13_0.5 +%WER 11.47 [ 3260 / 28424, 497 ins, 740 del, 2023 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it6_csj/wer_11_0.5 +%WER 11.34 [ 3223 / 28424, 476 ins, 713 del, 2034 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it7_csj/wer_10_1.0 +%WER 11.60 [ 3298 / 28424, 523 ins, 716 del, 2059 sub ] exp/tri4_fmmi_b0.1/decode_eval2_it8_csj/wer_10_0.5 +%WER 11.86 [ 3372 / 28424, 555 ins, 723 del, 2094 sub ] exp/tri4_mmi_b0.1/decode_eval2_1.mdl_csj/wer_14_0.0 +%WER 11.57 [ 3289 / 28424, 446 ins, 814 del, 2029 sub ] exp/tri4_mmi_b0.1/decode_eval2_2.mdl_csj/wer_13_0.5 +%WER 11.46 [ 3256 / 28424, 510 ins, 684 del, 2062 sub ] exp/tri4_mmi_b0.1/decode_eval2_3.mdl_csj/wer_11_0.5 +%WER 11.58 [ 3292 / 28424, 408 ins, 827 del, 2057 sub ] exp/tri4_mmi_b0.1/decode_eval2_4.mdl_csj/wer_11_1.0 +%WER 9.15 [ 2601 / 28424, 305 ins, 604 del, 1692 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval2_csj/wer_12_0.0 +%WER 8.69 [ 2469 / 28424, 367 ins, 444 del, 1658 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval2_csj/wer_12_1.0 +%WER 8.62 [ 2450 / 28424, 349 ins, 444 del, 1657 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval2_csj/wer_13_1.0 === evaluation set 3 === -%WER 25.01 [ 4573 / 18283, 529 ins, 1219 del, 2825 sub ] exp/tri1/decode_eval3_csj/wer_13 -%WER 23.62 [ 4319 / 18283, 499 ins, 1176 del, 2644 sub ] exp/tri2/decode_eval3_csj/wer_14 -%WER 18.04 [ 3298 / 18283, 528 ins, 739 del, 2031 sub ] exp/tri3/decode_eval3_csj/wer_12 -%WER 15.63 [ 2858 / 18283, 411 ins, 719 del, 1728 sub ] exp/tri4/decode_eval3_csj/wer_15 -%WER 19.36 [ 3540 / 18283, 506 ins, 836 del, 2198 sub ] exp/tri4/decode_eval3_csj.si/wer_17 -%WER 14.90 [ 2724 / 18283, 456 ins, 602 del, 1666 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it4_csj/wer_13 -%WER 13.70 [ 2504 / 18283, 456 ins, 477 del, 1571 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it5_csj/wer_13 -%WER 13.78 [ 2520 / 18283, 460 ins, 548 del, 1512 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it6_csj/wer_12 -%WER 13.08 [ 2391 / 18283, 517 ins, 400 del, 1474 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it7_csj/wer_12 -%WER 13.75 [ 2514 / 18283, 469 ins, 562 del, 1483 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it8_csj/wer_12 -%WER 14.14 [ 2585 / 18283, 436 ins, 537 del, 1612 sub ] exp/tri4_mmi_b0.1/decode_eval3_1.mdl_csj/wer_14 -%WER 13.83 [ 2529 / 18283, 429 ins, 547 del, 1553 sub ] exp/tri4_mmi_b0.1/decode_eval3_2.mdl_csj/wer_14 -%WER 13.54 [ 2475 / 18283, 460 ins, 492 del, 1523 sub ] exp/tri4_mmi_b0.1/decode_eval3_3.mdl_csj/wer_13 -%WER 13.36 [ 2443 / 18283, 463 ins, 482 del, 1498 sub ] exp/tri4_mmi_b0.1/decode_eval3_4.mdl_csj/wer_13 -%WER 10.55 [ 1928 / 18283, 242 ins, 482 del, 1204 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval3_csj/wer_13 -%WER 9.71 [ 1775 / 18283, 338 ins, 271 del, 1166 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval3_csj/wer_13 -%WER 9.31 [ 1703 / 18283, 336 ins, 247 del, 1120 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval3_csj/wer_13 \ No newline at end of file +%WER 25.00 [ 4570 / 18283, 515 ins, 1277 del, 2778 sub ] exp/tri1/decode_eval3_csj/wer_14_0.0 +%WER 23.93 [ 4375 / 18283, 560 ins, 1163 del, 2652 sub ] exp/tri2/decode_eval3_csj/wer_14_0.0 +%WER 17.66 [ 3229 / 18283, 484 ins, 773 del, 1972 sub ] exp/tri3/decode_eval3_csj/wer_14_0.0 +%WER 15.46 [ 2827 / 18283, 311 ins, 860 del, 1656 sub ] exp/tri4/decode_eval3_csj/wer_17_0.5 +%WER 18.92 [ 3459 / 18283, 424 ins, 910 del, 2125 sub ] exp/tri4/decode_eval3_csj.si/wer_16_0.5 +%WER 14.55 [ 2661 / 18283, 423 ins, 629 del, 1609 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it4_csj/wer_14_0.0 +%WER 13.38 [ 2446 / 18283, 362 ins, 572 del, 1512 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it5_csj/wer_13_0.5 +%WER 13.37 [ 2444 / 18283, 484 ins, 470 del, 1490 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it6_csj/wer_11_0.0 +%WER 12.96 [ 2370 / 18283, 332 ins, 570 del, 1468 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it7_csj/wer_12_1.0 +%WER 13.62 [ 2490 / 18283, 440 ins, 549 del, 1501 sub ] exp/tri4_fmmi_b0.1/decode_eval3_it8_csj/wer_10_0.5 +%WER 13.77 [ 2518 / 18283, 323 ins, 664 del, 1531 sub ] exp/tri4_mmi_b0.1/decode_eval3_1.mdl_csj/wer_15_0.5 +%WER 13.48 [ 2464 / 18283, 334 ins, 618 del, 1512 sub ] exp/tri4_mmi_b0.1/decode_eval3_2.mdl_csj/wer_13_0.5 +%WER 13.28 [ 2428 / 18283, 379 ins, 546 del, 1503 sub ] exp/tri4_mmi_b0.1/decode_eval3_3.mdl_csj/wer_12_0.5 +%WER 13.26 [ 2424 / 18283, 388 ins, 543 del, 1493 sub ] exp/tri4_mmi_b0.1/decode_eval3_4.mdl_csj/wer_12_0.5 +%WER 10.41 [ 1904 / 18283, 289 ins, 422 del, 1193 sub ] exp/dnn5b_pretrain-dbn_dnn/decode_eval3_csj/wer_10_0.0 +%WER 9.34 [ 1707 / 18283, 251 ins, 341 del, 1115 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr/decode_eval3_csj/wer_13_1.0 +%WER 9.10 [ 1664 / 18283, 246 ins, 344 del, 1074 sub ] exp/dnn5b_pretrain-dbn_dnn_smbr_i1lats/decode_eval3_csj/wer_14_1.0 diff --git a/egs/csj/s5/conf/config_opt b/egs/csj/s5/conf/config_opt index 5868d671c..e91c33abf 100644 --- a/egs/csj/s5/conf/config_opt +++ b/egs/csj/s5/conf/config_opt @@ -3,7 +3,8 @@ # Apache 2.0 # Acknowledgement This work was supported by JSPS KAKENHI Grant Number 26280055. -# Current optimized parameter config for CSJ +# Currently optimized parameter config for CSJ + splice=17 nn_depth=6 hid_dim=1905 diff --git a/egs/csj/s5/conf/mfcc.conf b/egs/csj/s5/conf/mfcc.conf index 0e7dfcd69..a5b1cbc03 100644 --- a/egs/csj/s5/conf/mfcc.conf +++ b/egs/csj/s5/conf/mfcc.conf @@ -1,3 +1,2 @@ --use-energy=false # only non-default option. -#--sample-frequency=8000 # Switchboard is sampled at 8kHz --sample-frequency=16000 # CSJ is sampled at 16kHz diff --git a/egs/csj/s5/local/csj_data_prep.sh b/egs/csj/s5/local/csj_data_prep.sh index 7458c0ce3..73462f178 100644 --- a/egs/csj/s5/local/csj_data_prep.sh +++ b/egs/csj/s5/local/csj_data_prep.sh @@ -50,7 +50,7 @@ cat $CSJ/dvd{3,5,6,7,8,9,10}/{A*,M*}/*-wav.list 2>/dev/null | sort > $dir/wav.fl n=`cat $dir/wav.flist | wc -l` [ $n -ne 986 ] && \ - echo Warning: expected 986 data data files, found $n + echo "Warning: expected 986 data files (Case : Using 'Academic lecture' and 'Other' data), found $n." # (1a) Transcriptions preparation @@ -102,7 +102,7 @@ awk '{segment=$1; split(segment,S,"[_]"); spkid=S[1]; print $1 " " spkid}' $dir/ sort -k 2 $dir/utt2spk | utils/utt2spk_to_spk2utt.pl > $dir/spk2utt || exit 1; -# Copy stuff into its final locations. +# Copy stuff into its final locations [this has been moved from the format_data script] mkdir -p data/train for f in spk2utt utt2spk wav.scp text segments; do cp data/local/train/$f data/train/$f || exit 1; diff --git a/egs/csj/s5/local/csj_eval_data_prep.sh b/egs/csj/s5/local/csj_eval_data_prep.sh index 623197775..a8b848de4 100644 --- a/egs/csj/s5/local/csj_eval_data_prep.sh +++ b/egs/csj/s5/local/csj_eval_data_prep.sh @@ -9,7 +9,7 @@ # To be run from one directory above this script. -# The input is directory name containing the official evaluation test set. +# The input is directory containing the official evaluation test set and transcripts. if [ $# -ne 2 ]; then echo "Usage: "`basename $0`" " diff --git a/egs/csj/s5/local/csj_make_trans/csj2kaldi4m.pl b/egs/csj/s5/local/csj_make_trans/csj2kaldi4m.pl index 7895fa341..aaeefbd82 100755 --- a/egs/csj/s5/local/csj_make_trans/csj2kaldi4m.pl +++ b/egs/csj/s5/local/csj_make_trans/csj2kaldi4m.pl @@ -1,5 +1,4 @@ -#!/usr/bin/env perl -use warnings; #sed replacement for -w perl parameter +#! /usr/bin/perl -w # Copyright 2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki) # 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe) @@ -204,8 +203,10 @@ while (<>) { $word =~ s/\ン\ー/\ン/g; # $word =~ s/\ヮ/\ワ/g; $word =~ s/\ゎ/\わ/g; - $word =~ s/^\ゼロ$/\0/g; - $word =~ s/^\零$/\0/g; + + # Normalization +# $word =~ s/^\ゼロ$/\0/g; +# $word =~ s/^\零$/\0/g; # Arrange morpheme # This function is to arrange morpheme. diff --git a/egs/csj/s5/local/csj_make_trans/csj_automake.sh b/egs/csj/s5/local/csj_make_trans/csj_automake.sh index 132725c04..8dbb507a6 100644 --- a/egs/csj/s5/local/csj_make_trans/csj_automake.sh +++ b/egs/csj/s5/local/csj_make_trans/csj_automake.sh @@ -11,22 +11,17 @@ if [ $# -ne 2 ]; then exit 1 fi - resource=$1 outd=$2 -csjext=./local/csj_make_trans/csj2kaldi4m.pl -csjconnect=./local/csj_make_trans/csjconnect.pl -k2phone=./local/csj_make_trans/kana2phone -vocab2dic=./local/csj_make_trans/vocab2dic.pl -reform=./local/csj_make_trans/reform.pl +[ ! -e $resource ] && echo "Not exist CSJ or incorrect PATH." && exit 1; -if [ ! -d ./csj-data/dvd17 ];then +if [ ! -e $outd/.done_make_trans ];then ( mkdir -p $outd rm $outd/al_sent4lex.txt -cp ./local/csj_make_trans/overview_csj-data $outd/README.txt +cp local/csj_make_trans/overview_csj-data $outd/README.txt # Make transcription file for each dvd and each lecture [ ! -x "`which nkf `" ]\ @@ -35,19 +30,14 @@ cp ./local/csj_make_trans/overview_csj-data $outd/README.txt for vol in dvd{3..17} ;do mkdir -p $outd/$vol + ( for id in `ls $resource/$vol`;do mkdir -p $outd/$vol/${id} rm -r $outd/$vol/00README.txt - - ( nkf -e -d $resource/$vol/$id/${id}.sdb > $outd/$vol/${id}/sdb.tmp - $csjext $outd/$vol/${id}/sdb.tmp $outd/$vol/$id/${id}.4lex $outd/$vol/$id/${id}.4trn.t + local/csj_make_trans/csj2kaldi4m.pl $outd/$vol/${id}/sdb.tmp $outd/$vol/$id/${id}.4lex $outd/$vol/$id/${id}.4trn.t - $csjconnect 0.5 10 $outd/$vol/$id/${id}.4trn.t $id > $outd/$vol/$id/${id}-trans.text - - - [ -z `grep $id local/csj_make_trans/testset` ]\ - && cat $outd/$vol/$id/${id}.4lex >> $outd/al_sent4lex.txt + local/csj_make_trans/csjconnect.pl 0.5 10 $outd/$vol/$id/${id}.4trn.t $id > $outd/$vol/$id/${id}-trans.text rm $outd/$vol/$id/{${id}.4trn.t,sdb.tmp} @@ -56,30 +46,16 @@ for vol in dvd{3..17} ;do else find $resource/$vol/$id -iname ${id}.wav >$outd/$vol/$id/${id}-wav.list fi - - - ) done + )& done wait +echo -n >$outd/.done_make_trans ) fi -## make lexicon.txt -if [ ! -f ./csj-data/lexicon/lexicon.txt ]; then - ( - mkdir -p $outd/lexicon - sort $outd/al_sent4lex.txt >lex.tmp123 - uniq lex.tmp123 > lex.tmp456 - ${vocab2dic} -p $k2phone -o lex.tmp123 lex.tmp456 - $reform lex.tmp123 | sort | uniq > $outd/lexicon/lexicon.txt - mv $outd/al_sent4lex.txt $outd/lexicon - rm lex.tmp123 lex.tmp456 ERROR - ) -fi - ## Exclude speech data given by test set speakers. -if [ ! -d ./csj-data/[eval,excluded] ]; then +if [ ! -e $outd/.done_mv_eval_dup ]; then ( mkdir -p $outd/eval mkdir -p $outd/excluded @@ -89,10 +65,10 @@ if [ ! -d ./csj-data/[eval,excluded] ]; then # Speech data given by test set speakers (eval2 : A01M0056) rm dup_list - for line in `cat local/csj_make_trans/A01M0056_duplication | less`; do + for line in `cat local/csj_make_trans/A01M0056_duplication`; do find $outd/dvd* -iname $line >>dup_list done - for list in `cat dup_list | less`;do + for list in `cat dup_list`;do mv $list $outd/excluded cp dup_list $outd/excluded/duplication.list done @@ -100,10 +76,10 @@ if [ ! -d ./csj-data/[eval,excluded] ]; then # Evaluation data rm dup_list - for line in `cat local/csj_make_trans/testset | less`; do + for line in `cat local/csj_make_trans/testset`; do find $outd/dvd* -iname $line >>dup_list done - for list in `cat dup_list | less`;do + for list in `cat dup_list`;do mv $list $outd/eval cp dup_list $outd/eval/evaluation.list done @@ -114,11 +90,28 @@ if [ ! -d ./csj-data/[eval,excluded] ]; then mv $outd/eval/{A01M0110,A01M0137,A01M0097,A04M0123,A04M0121,A04M0051,A03M0156,A03M0112,A03M0106,A05M0011} $outd/eval/eval1 mv $outd/eval/{A01M0056,A03F0072,A02M0012,A03M0016,A06M0064,A06F0135,A01F0034,A01F0063,A01F0001,A01M0141} $outd/eval/eval2 mv $outd/eval/{S00M0112,S00F0066,S00M0213,S00F0019,S00M0079,S01F0105,S00F0152,S00M0070,S00M0008,S00F0148} $outd/eval/eval3 + + echo -n >$outd/.done_mv_eval_dup ) fi -comp_num=`ls -l $outd | wc -l` -[ ! $comp_num -eq 20 ] \ +## make lexicon.txt +if [ ! -e $outd/.done_make_lexicon ]; then + ( + cat $outd/{dvd*,excluded}/*/*.4lex >> $outd/al_sent4lex.txt + mkdir -p $outd/lexicon + sort $outd/al_sent4lex.txt >lex.tmp123 + uniq lex.tmp123 > lex.tmp456 + local/csj_make_trans/vocab2dic.pl -p local/csj_make_trans/kana2phone -o lex.tmp123 lex.tmp456 + local/csj_make_trans/reform.pl lex.tmp123 | sort | uniq > $outd/lexicon/lexicon.txt + mv $outd/al_sent4lex.txt $outd/lexicon + rm lex.tmp123 lex.tmp456 ERROR + + echo -n >$outd/.done_make_lexicon + ) +fi + +[ ! 3 -le `ls -a $outd | grep done | wc -l` ] \ && echo "ERROR : Processing is incorrect." && exit 1; -echo "Finish processing original CSJ data" +echo "Finish processing original CSJ data" && echo -n >$outd/.done_make_all diff --git a/egs/csj/s5/local/csj_make_trans/csjconnect.pl b/egs/csj/s5/local/csj_make_trans/csjconnect.pl index 13866e9f7..2e43772e9 100755 --- a/egs/csj/s5/local/csj_make_trans/csjconnect.pl +++ b/egs/csj/s5/local/csj_make_trans/csjconnect.pl @@ -1,5 +1,4 @@ -#!/usr/bin/env perl -use warnings; #sed replacement for -w perl parameter +#! /usr/bin/perl -w # Copyright 2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki) # 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe) diff --git a/egs/csj/s5/local/csj_make_trans/kana2phone b/egs/csj/s5/local/csj_make_trans/kana2phone index 76a0a4bff..6979a3203 100644 --- a/egs/csj/s5/local/csj_make_trans/kana2phone +++ b/egs/csj/s5/local/csj_make_trans/kana2phone @@ -141,4 +141,4 @@ ヴ+b u ツ+ts u シ+sh i -チ+ch i +チ+ch i diff --git a/egs/csj/s5/local/csj_make_trans/reform.pl b/egs/csj/s5/local/csj_make_trans/reform.pl index 1c267e2c4..fc00b3de6 100755 --- a/egs/csj/s5/local/csj_make_trans/reform.pl +++ b/egs/csj/s5/local/csj_make_trans/reform.pl @@ -1,4 +1,5 @@ -#!/usr/bin/env perl +#! /usr/bin/perl -w + # Copyright 2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki) # 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe) # Apache 2.0 @@ -6,8 +7,6 @@ # This script is to make lexicon for KALDI format. -use warnings; - while (<>){ chomp; @line=split(/\t/, $_); diff --git a/egs/csj/s5/local/csj_make_trans/vocab2dic.pl b/egs/csj/s5/local/csj_make_trans/vocab2dic.pl index 85288a44a..9ff4b1011 100755 --- a/egs/csj/s5/local/csj_make_trans/vocab2dic.pl +++ b/egs/csj/s5/local/csj_make_trans/vocab2dic.pl @@ -1,4 +1,4 @@ -#!/usr/bin/env perl +#! /usr/bin/perl # Copyright 2015 Tokyo Institute of Technology (Authors: Takafumi Moriya and Takahiro Shinozaki) # 2015 Mitsubishi Electric Research Laboratories (Author: Shinji Watanabe) diff --git a/egs/csj/s5/local/nnet/run_dnn.sh b/egs/csj/s5/local/nnet/run_dnn.sh index 028be0b03..b0acce39d 100644 --- a/egs/csj/s5/local/nnet/run_dnn.sh +++ b/egs/csj/s5/local/nnet/run_dnn.sh @@ -25,7 +25,7 @@ # Config: config=conf/config_opt . $config -gmmdir=exp/tri4 +gmmdir=exp/tri4 data_fmllr=data-fmllr-tri4 stage=0 # resume training with --stage=N # End of config. @@ -60,7 +60,7 @@ if [ $stage -le 1 ]; then fi -if [ $stage -le 2 ]; then +if [ $stage -le 2 ]; then # Train the DNN optimizing per-frame cross-entropy. dir=exp/dnn5b_pretrain-dbn_dnn ali=${gmmdir}_ali_nodup @@ -86,7 +86,7 @@ dir=exp/dnn5b_pretrain-dbn_dnn_smbr srcdir=exp/dnn5b_pretrain-dbn_dnn acwt=0.0909 -if [ $stage -le 3 ]; then +if [ $stage -le 3 ]; then # First we generate lattices and alignments: steps/nnet/align.sh --nj 10 --cmd "$train_cmd" \ $data_fmllr/train_nodup data/lang $srcdir ${srcdir}_ali || exit 1; diff --git a/egs/csj/s5/local/run_sgmm2.sh b/egs/csj/s5/local/run_sgmm2.sh index a5369e302..ee836dc20 100644 --- a/egs/csj/s5/local/run_sgmm2.sh +++ b/egs/csj/s5/local/run_sgmm2.sh @@ -17,14 +17,16 @@ steps/train_sgmm2_group.sh --cmd "$train_cmd" \ 18000 60000 data/train_nodup data/lang exp/tri4_ali_nodup \ exp/ubm5/final.ubm exp/sgmm2_5 || exit 1; + + +graph_dir=exp/sgmm2_5/graph_csj_tg +$train_cmd $graph_dir/mkgraph.log \ + utils/mkgraph.sh data/lang_csj_tg exp/sgmm2_5 $graph_dir for eval_num in `seq 3`; do - graph_dir=exp/sgmm2_5/graph_csj_tg - $train_cmd $graph_dir/mkgraph.log \ - utils/mkgraph.sh data/lang_csj_tg exp/sgmm2_5 $graph_dir steps/decode_sgmm2.sh --nj 10 \ --cmd "$decode_cmd" --config conf/decode.config \ - --transform-dir exp/tri4/decode_eval${eval_num}_csj_tg $graph_dir \ - data/eval${eval_num} exp/sgmm2_5/decode_eval${eval_num}_csj_tg + --transform-dir exp/tri4/decode_eval${eval_num}_csj $graph_dir \ + data/eval${eval_num} exp/sgmm2_5/decode_eval${eval_num}_csj done wait @@ -48,10 +50,10 @@ steps/train_mmi_sgmm2.sh --cmd "$decode_cmd" \ for eval_num in `seq 3`; do for iter in 1 2 3 4; do steps/decode_sgmm2_rescore.sh --cmd "$decode_cmd" --iter $iter \ - --transform-dir exp/tri4/decode_eval${eval_num}_csj_tg \ + --transform-dir exp/tri4/decode_eval${eval_num}_csj \ data/lang_csj_tg data/eval${eval_num} \ - exp/sgmm2_5/decode_eval${eval_num}_csj_tg \ - exp/sgmm2_5_mmi_b0.1/decode_eval${eval_num}_csj_tg_it$iter + exp/sgmm2_5/decode_eval${eval_num}_csj \ + exp/sgmm2_5_mmi_b0.1/decode_eval${eval_num}_csj_it$iter done done wait diff --git a/egs/csj/s5/local/score.sh b/egs/csj/s5/local/score.sh deleted file mode 100644 index 05981ab99..000000000 --- a/egs/csj/s5/local/score.sh +++ /dev/null @@ -1 +0,0 @@ -link ../steps/score_kaldi.sh \ No newline at end of file diff --git a/egs/csj/s5/local/score.sh b/egs/csj/s5/local/score.sh new file mode 120000 index 000000000..0afefc315 --- /dev/null +++ b/egs/csj/s5/local/score.sh @@ -0,0 +1 @@ +../steps/score_kaldi.sh \ No newline at end of file diff --git a/egs/csj/s5/local/wer_hyp_filter b/egs/csj/s5/local/wer_hyp_filter index c29113173..d07b0cf4c 100644 --- a/egs/csj/s5/local/wer_hyp_filter +++ b/egs/csj/s5/local/wer_hyp_filter @@ -3,5 +3,5 @@ perl -e 'foreach $w (@ARGV) { $bad{$w} = 1; } while() { @A = split(" ", $_); $id = shift @A; print "$id "; foreach $a (@A) { if (!defined $bad{$a}){ @W=split(/\+/,$a); $word=$W[0]; { print "$word "; }}} print "\n"; }' \ - '' + '' '' diff --git a/egs/csj/s5/local/wer_output_filter b/egs/csj/s5/local/wer_output_filter index c29113173..d07b0cf4c 100644 --- a/egs/csj/s5/local/wer_output_filter +++ b/egs/csj/s5/local/wer_output_filter @@ -3,5 +3,5 @@ perl -e 'foreach $w (@ARGV) { $bad{$w} = 1; } while() { @A = split(" ", $_); $id = shift @A; print "$id "; foreach $a (@A) { if (!defined $bad{$a}){ @W=split(/\+/,$a); $word=$W[0]; { print "$word "; }}} print "\n"; }' \ - '' + '' '' diff --git a/egs/csj/s5/local/wer_ref_filter b/egs/csj/s5/local/wer_ref_filter index c29113173..d07b0cf4c 100644 --- a/egs/csj/s5/local/wer_ref_filter +++ b/egs/csj/s5/local/wer_ref_filter @@ -3,5 +3,5 @@ perl -e 'foreach $w (@ARGV) { $bad{$w} = 1; } while() { @A = split(" ", $_); $id = shift @A; print "$id "; foreach $a (@A) { if (!defined $bad{$a}){ @W=split(/\+/,$a); $word=$W[0]; { print "$word "; }}} print "\n"; }' \ - '' + '' '' diff --git a/egs/csj/s5/path.sh b/egs/csj/s5/path.sh index 41f65d7a0..edf752674 100644 --- a/egs/csj/s5/path.sh +++ b/egs/csj/s5/path.sh @@ -5,4 +5,4 @@ export PATH=$PWD/utils/:$KALDI_ROOT/src/bin:$KALDI_ROOT/tools/openfst/bin:$KALDI export PATH=$PATH:/usr/local/cuda/bin export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:/usr/local/cuda/bin/nvcc -export LC_ALL=C +#export LC_ALL=C diff --git a/egs/csj/s5/run.sh b/egs/csj/s5/run.sh index 6c0af8106..fa5355f86 100644 --- a/egs/csj/s5/run.sh +++ b/egs/csj/s5/run.sh @@ -19,14 +19,14 @@ set -e # exit on error #: << '#SKIP' -if [ ! -d data/csj-data/eval ]; then +if [ ! -e data ]; then echo "CSJ transcription file does not exist" #local/csj_make_trans/csj_automake.sh || exit 1; - local/csj_make_trans/csj_automake.sh /database/NINJAL/CSJ/ data/csj-data 2>/dev/null + local/csj_make_trans/csj_automake.sh /database/NINJAL/CSJ data/csj-data 2>/dev/null fi wait -[ ! -d data/csj-data/eval ]\ +[ ! -e data/csj-data/.done_make_all ]\ && echo "Not finished processing CSJ data" && exit 1; # Prepare Corpus of Spontaneous Japanese (CSJ) data. @@ -36,7 +36,7 @@ local/csj_data_prep.sh data/csj-data/ local/csj_prepare_dict.sh -utils/prepare_lang.sh data/local/dict_nosp "" data/local/lang_nosp data/lang_nosp +utils/prepare_lang.sh --num-sil-states 4 data/local/dict_nosp "" data/local/lang_nosp data/lang_nosp # Now train the language models. local/csj_train_lms.sh data/local/train/text data/local/dict_nosp/lexicon.txt data/local/lm @@ -155,7 +155,7 @@ $train_cmd $graph_dir/mkgraph.log \ utils/mkgraph.sh data/lang_nosp_csj_tg exp/tri3 $graph_dir for eval_num in `seq 3`; do steps/decode.sh --nj 10 --cmd "$decode_cmd" --config conf/decode.config \ - $graph_dir data/eval${eval_num} exp/tri3/decode_eval${eval_num}_csj + $graph_dir data/eval${eval_num} exp/tri3/decode_eval${eval_num}_csj_nosp done # Now we compute the pronunciation and silence probabilities from training data, diff --git a/egs/csj/s5/steps b/egs/csj/s5/steps deleted file mode 100644 index 5e5222743..000000000 --- a/egs/csj/s5/steps +++ /dev/null @@ -1 +0,0 @@ -link ../../wsj/s5/steps/ \ No newline at end of file diff --git a/egs/csj/s5/steps b/egs/csj/s5/steps new file mode 120000 index 000000000..6e99bf5b5 --- /dev/null +++ b/egs/csj/s5/steps @@ -0,0 +1 @@ +../../wsj/s5/steps \ No newline at end of file diff --git a/egs/csj/s5/utils b/egs/csj/s5/utils deleted file mode 100644 index 1ebeb7c52..000000000 --- a/egs/csj/s5/utils +++ /dev/null @@ -1 +0,0 @@ -link ../../wsj/s5/utils/ \ No newline at end of file diff --git a/egs/csj/s5/utils b/egs/csj/s5/utils new file mode 120000 index 000000000..b24088521 --- /dev/null +++ b/egs/csj/s5/utils @@ -0,0 +1 @@ +../../wsj/s5/utils \ No newline at end of file