This commit is contained in:
kdavis-mozilla 2020-04-29 06:47:13 +02:00
Родитель 6329684b58
Коммит 3fe94ae482
5 изменённых файлов: 11 добавлений и 11 удалений

Просмотреть файл

@ -39,7 +39,7 @@ then
wget -nc http://data.statmt.org/rsennrich/wmt16_backtranslations/ro-en/corpus.bt.ro-en.ro.gz
# extract data
tar -xf ro-en.tgz
tar -xf ro-en.tgz --no-same-owner
unzip SETIMES2.ro-en.txt.zip
gzip -d corpus.bt.ro-en.en.gz corpus.bt.ro-en.ro.gz

Просмотреть файл

@ -9,7 +9,7 @@ wget -nc http://data.statmt.org/rsennrich/wmt16_backtranslations/ro-en/corpus.bt
wget -nc http://data.statmt.org/rsennrich/wmt16_backtranslations/ro-en/corpus.bt.ro-en.ro.gz
# extract data
tar -xf ro-en.tgz
tar -xf ro-en.tgz --no-same-owner
unzip SETIMES2.ro-en.txt.zip
gzip -d corpus.bt.ro-en.en.gz corpus.bt.ro-en.ro.gz

Просмотреть файл

@ -9,9 +9,9 @@ wget -nc http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz
wget -nc http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz
# extract data
tar -xf de-en.tgz
tar -xf training-parallel-commoncrawl.tgz
tar -xf training-parallel-nc-v12.tgz
tar -xf de-en.tgz --no-same-owner
tar -xf training-parallel-commoncrawl.tgz --no-same-owner
tar -xf training-parallel-nc-v12.tgz --no-same-owner
# create corpus files
cat europarl-v7.de-en.de commoncrawl.de-en.de training/news-commentary-v12.de-en.de > corpus.de

Просмотреть файл

@ -9,9 +9,9 @@ wget -nc http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz
wget -nc http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz
# extract data
tar -xf de-en.tgz
tar -xf training-parallel-commoncrawl.tgz
tar -xf training-parallel-nc-v12.tgz
tar -xf de-en.tgz --no-same-owner
tar -xf training-parallel-commoncrawl.tgz --no-same-owner
tar -xf training-parallel-nc-v12.tgz --no-same-owner
# create corpus files
cat europarl-v7.de-en.de commoncrawl.de-en.de training/news-commentary-v12.de-en.de > corpus.de

Просмотреть файл

@ -9,9 +9,9 @@ wget -nc http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz
wget -nc http://data.statmt.org/wmt17/translation-task/training-parallel-nc-v12.tgz
# extract data
tar -xf de-en.tgz
tar -xf training-parallel-commoncrawl.tgz
tar -xf training-parallel-nc-v12.tgz
tar -xf de-en.tgz --no-same-owner
tar -xf training-parallel-commoncrawl.tgz --no-same-owner
tar -xf training-parallel-nc-v12.tgz --no-same-owner
# create corpus files
cat europarl-v7.de-en.de commoncrawl.de-en.de training/news-commentary-v12.de-en.de > corpus.de