Add Simple Interfacefeatures to Keystroke Interface

2019-08-08 11:32:07 +05:30 · 2019-08-08 11:32:07 +05:30 · 0aae79d1ae
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,4 @@
-model/
+model/
+*.pyc
+db.sqlite3
+opennmt/.git
--- a/InteractiveTranslation/pycache/init.cpython-36.pyc
+++ b/InteractiveTranslation/pycache/init.cpython-36.pyc
--- a/InteractiveTranslation/pycache/init.cpython-37.pyc
+++ b/InteractiveTranslation/pycache/init.cpython-37.pyc
--- a/InteractiveTranslation/pycache/custom_azure.cpython-37.pyc
+++ b/InteractiveTranslation/pycache/custom_azure.cpython-37.pyc
--- a/InteractiveTranslation/pycache/settings.cpython-36.pyc
+++ b/InteractiveTranslation/pycache/settings.cpython-36.pyc
--- a/InteractiveTranslation/pycache/settings.cpython-37.pyc
+++ b/InteractiveTranslation/pycache/settings.cpython-37.pyc
--- a/InteractiveTranslation/pycache/urls.cpython-36.pyc
+++ b/InteractiveTranslation/pycache/urls.cpython-36.pyc
--- a/InteractiveTranslation/pycache/urls.cpython-37.pyc
+++ b/InteractiveTranslation/pycache/urls.cpython-37.pyc
--- a/InteractiveTranslation/pycache/wsgi.cpython-36.pyc
+++ b/InteractiveTranslation/pycache/wsgi.cpython-36.pyc
--- a/InteractiveTranslation/pycache/wsgi.cpython-37.pyc
+++ b/InteractiveTranslation/pycache/wsgi.cpython-37.pyc
--- a/InteractiveTranslation/settings.py
+++ b/InteractiveTranslation/settings.py
@ -146,8 +146,8 @@ STATICFILES_DIRS = [
 LOGIN_REDIRECT_URL = '/dashboard'
 LOGOUT_REDIRECT_URL = '/accounts/login'

-DEFAULT_FILE_STORAGE = 'InteractiveTranslation.custom_azure.AzureMediaStorage'
-STATICFILES_STORAGE = 'InteractiveTranslation.custom_azure.AzureStaticStorage'
+# DEFAULT_FILE_STORAGE = 'InteractiveTranslation.custom_azure.AzureMediaStorage'
+# STATICFILES_STORAGE = 'InteractiveTranslation.custom_azure.AzureStaticStorage'

 STATIC_LOCATION = "static"
 MEDIA_LOCATION = "media"
--- a/InteractiveTranslation/urls.py
+++ b/InteractiveTranslation/urls.py
@ -22,7 +22,7 @@ urlpatterns = [
    path('', include('mt.urls')),
    path('simple/', include('mtsimple.urls')),
    path('para/', include('mtpara.urls')),
-    path('gpt/', include('gpt.urls')),
+    # path('gpt/', include('gpt.urls')),
    path('admin/', admin.site.urls),
    path('accounts/', include('django.contrib.auth.urls')),
 ] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT)
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
-#Introduction
-Interactive Machine Translation app uses Django and jQuery as its tech stack. Please refer to their docs for any doubts.
-
-# Installation Instructions
-1. Clone the repository locally
-2. For dependencies - `pip install -r requirements.txt`
+#Introduction
+Interactive Machine Translation app uses Django and jQuery as its tech stack. Please refer to their docs for any doubts.
+
+# Installation Instructions
+1. Clone the repository locally
+2. For dependencies - `pip install -r requirements.txt`
 3. To run the server - `python manage.py runserver`
--- a/db.sqlite3
+++ b/db.sqlite3
--- a/gpt/pycache/init.cpython-36.pyc
+++ b/gpt/pycache/init.cpython-36.pyc
--- a/gpt/pycache/init.cpython-37.pyc
+++ b/gpt/pycache/init.cpython-37.pyc
--- a/gpt/pycache/admin.cpython-36.pyc
+++ b/gpt/pycache/admin.cpython-36.pyc
--- a/gpt/pycache/admin.cpython-37.pyc
+++ b/gpt/pycache/admin.cpython-37.pyc
--- a/gpt/pycache/models.cpython-36.pyc
+++ b/gpt/pycache/models.cpython-36.pyc
--- a/gpt/pycache/models.cpython-37.pyc
+++ b/gpt/pycache/models.cpython-37.pyc
--- a/gpt/pycache/urls.cpython-36.pyc
+++ b/gpt/pycache/urls.cpython-36.pyc
--- a/gpt/pycache/urls.cpython-37.pyc
+++ b/gpt/pycache/urls.cpython-37.pyc
--- a/gpt/pycache/views.cpython-36.pyc
+++ b/gpt/pycache/views.cpython-36.pyc
--- a/gpt/pycache/views.cpython-37.pyc
+++ b/gpt/pycache/views.cpython-37.pyc
--- a/gpt/migrations/pycache/init.cpython-36.pyc
+++ b/gpt/migrations/pycache/init.cpython-36.pyc
--- a/gpt/migrations/pycache/init.cpython-37.pyc
+++ b/gpt/migrations/pycache/init.cpython-37.pyc
--- a/mt/pycache/init.cpython-36.pyc
+++ b/mt/pycache/init.cpython-36.pyc
--- a/mt/pycache/init.cpython-37.pyc
+++ b/mt/pycache/init.cpython-37.pyc
--- a/mt/pycache/admin.cpython-36.pyc
+++ b/mt/pycache/admin.cpython-36.pyc
--- a/mt/pycache/admin.cpython-37.pyc
+++ b/mt/pycache/admin.cpython-37.pyc
--- a/mt/pycache/models.cpython-36.pyc
+++ b/mt/pycache/models.cpython-36.pyc
--- a/mt/pycache/models.cpython-37.pyc
+++ b/mt/pycache/models.cpython-37.pyc
--- a/mt/pycache/urls.cpython-36.pyc
+++ b/mt/pycache/urls.cpython-36.pyc
--- a/mt/pycache/urls.cpython-37.pyc
+++ b/mt/pycache/urls.cpython-37.pyc
--- a/mt/pycache/views.cpython-36.pyc
+++ b/mt/pycache/views.cpython-36.pyc
--- a/mt/pycache/views.cpython-37.pyc
+++ b/mt/pycache/views.cpython-37.pyc
--- a/mt/migrations/pycache/0001_initial.cpython-36.pyc
+++ b/mt/migrations/pycache/0001_initial.cpython-36.pyc
--- a/mt/migrations/pycache/0001_initial.cpython-37.pyc
+++ b/mt/migrations/pycache/0001_initial.cpython-37.pyc
--- a/mt/migrations/pycache/0002_auto_20190422_2106.cpython-36.pyc
+++ b/mt/migrations/pycache/0002_auto_20190422_2106.cpython-36.pyc
--- a/mt/migrations/pycache/0002_auto_20190422_2106.cpython-37.pyc
+++ b/mt/migrations/pycache/0002_auto_20190422_2106.cpython-37.pyc
--- a/mt/migrations/pycache/0003_auto_20190422_2118.cpython-36.pyc
+++ b/mt/migrations/pycache/0003_auto_20190422_2118.cpython-36.pyc
--- a/mt/migrations/pycache/0003_auto_20190422_2118.cpython-37.pyc
+++ b/mt/migrations/pycache/0003_auto_20190422_2118.cpython-37.pyc
--- a/mt/migrations/pycache/0004_auto_20190423_1902.cpython-36.pyc
+++ b/mt/migrations/pycache/0004_auto_20190423_1902.cpython-36.pyc
--- a/mt/migrations/pycache/0004_auto_20190423_1902.cpython-37.pyc
+++ b/mt/migrations/pycache/0004_auto_20190423_1902.cpython-37.pyc
--- a/mt/migrations/pycache/0005_auto_20190424_1149.cpython-36.pyc
+++ b/mt/migrations/pycache/0005_auto_20190424_1149.cpython-36.pyc
--- a/mt/migrations/pycache/0005_auto_20190424_1149.cpython-37.pyc
+++ b/mt/migrations/pycache/0005_auto_20190424_1149.cpython-37.pyc
--- a/mt/migrations/pycache/0006_corpusdivide.cpython-36.pyc
+++ b/mt/migrations/pycache/0006_corpusdivide.cpython-36.pyc
--- a/mt/migrations/pycache/0006_corpusdivide.cpython-37.pyc
+++ b/mt/migrations/pycache/0006_corpusdivide.cpython-37.pyc
--- a/mt/migrations/pycache/0007_keystrokes_end.cpython-36.pyc
+++ b/mt/migrations/pycache/0007_keystrokes_end.cpython-36.pyc
--- a/mt/migrations/pycache/0007_keystrokes_end.cpython-37.pyc
+++ b/mt/migrations/pycache/0007_keystrokes_end.cpython-37.pyc
--- a/mt/migrations/pycache/0008_auto_20190425_1226.cpython-36.pyc
+++ b/mt/migrations/pycache/0008_auto_20190425_1226.cpython-36.pyc
--- a/mt/migrations/pycache/0008_auto_20190425_1226.cpython-37.pyc
+++ b/mt/migrations/pycache/0008_auto_20190425_1226.cpython-37.pyc
--- a/mt/migrations/pycache/0009_auto_20190603_1138.cpython-37.pyc
+++ b/mt/migrations/pycache/0009_auto_20190603_1138.cpython-37.pyc
--- a/mt/migrations/pycache/0010_auto_20190605_1301.cpython-37.pyc
+++ b/mt/migrations/pycache/0010_auto_20190605_1301.cpython-37.pyc
--- a/mt/migrations/pycache/0011_corpus_helpprovision.cpython-37.pyc
+++ b/mt/migrations/pycache/0011_corpus_helpprovision.cpython-37.pyc
--- a/mt/migrations/pycache/0012_auto_20190606_1531.cpython-37.pyc
+++ b/mt/migrations/pycache/0012_auto_20190606_1531.cpython-37.pyc
--- a/mt/migrations/pycache/0013_auto_20190606_1534.cpython-37.pyc
+++ b/mt/migrations/pycache/0013_auto_20190606_1534.cpython-37.pyc
--- a/mt/migrations/pycache/0014_auto_20190606_1535.cpython-37.pyc
+++ b/mt/migrations/pycache/0014_auto_20190606_1535.cpython-37.pyc
--- a/mt/migrations/pycache/init.cpython-36.pyc
+++ b/mt/migrations/pycache/init.cpython-36.pyc
--- a/mt/migrations/pycache/init.cpython-37.pyc
+++ b/mt/migrations/pycache/init.cpython-37.pyc
--- a/mt/views.py
+++ b/mt/views.py
@ -206,17 +206,42 @@ def getinput(request):


 def quotapos(s, lang="en"):
-    s = re.sub(r"&quot;", r'"', s)
-    return re.sub(r"&apos;", r"'", s)
+    # s = re.sub(r"&quot;", r'"', s)
+    # return re.sub(r"&apos;", r"'", s)
+    return s

 def quotaposr(s, lang="en"):
+    # s = re.sub(r'"', r'&quot;', s)
+    # return re.sub(r"'", r"&apos;", s)
+    return s
+
+def quotaposto(s, lang="en"):
+    s = re.sub(r"&quot;", r'"', s)
+    s = re.sub(r"&apos;", r"'", s)
+    s = re.sub(r"(@@ )|(@@ ?$)", r"", s)
+    #This is work in progress to make writing as natural as possible. taking care of spaces before and after certain characters.
+    # s = re.sub(r"(\s+)([!:?,.।\']+)", r"\2", s)
+    # s = re.sub(r"([({\[<]+)(\s+)", r"\1", s)
+    # s = re.sub(r"(\s+)([)}\]>]+)", r"\2", s)
+    return s
+
+def toquotapos(s, lang="en"):
+    # if lang=="en":
+    s = s.lower()
+    s = re.sub(r"([\“\”])", r'"', s)
+    s = re.sub(r"([\‘\’])", r"'", s)
+    s = re.sub(r"([\ः])", r":", s)
+    s = re.sub(r"([-!$%^&*()_+|~=`{}\[\]:\";<>?,.\/#@।]+)", r" \1 ", s)
    s = re.sub(r'"', r'&quot;', s)
-    return re.sub(r"'", r"&apos;", s)
+    s = re.sub(r"'", r"&apos;", s)
+    s = re.sub(r"(\s+)", r" ", s)
+    
+    return s

@login_required
 def translate_new(request):
-    L1 = request.GET.get('a').strip()
-    L2 = quotaposr(request.GET.get('b', ""))
+    L1 = toquotapos(request.GET.get('a').strip())
+    L2 = request.GET.get('b', "")
    L2split = L2.split()

    langtolangid = request.session['langtolangid']
@ -236,7 +261,7 @@ def translate_new(request):
        src_dir='',
        batch_size=30,
        attn_debug=True,
-        partial = L2
+        partial = toquotapos(L2)
        )

    scores, predictions = translatordict[langtolangid]['translatorbigram'].translate(
@ -245,7 +270,7 @@ def translate_new(request):
        src_dir='',
        batch_size=30,
        attn_debug=False,
-        partial = L2,
+        partial = toquotapos(L2),
        dymax_len = 2,
        )

@ -268,16 +293,17 @@ def translate_new(request):
    print(predictions)
    seen = set()
    seen_add = seen.add
-    sentence = [quotapos(L2 + x.capitalize()[len(L2):], langspecs[langtolangid]['tgt']) + " " for x in predictions if not (x in seen or seen_add(x))]
+    sentence = [quotaposto(L2 + quotaposto(x).capitalize()[len(L2):], langspecs[langtolangid]['tgt']) + " " for x in predictions if not (x in seen or seen_add(x))]
    # sentence = [x.replace(L2, "") for x in sentence]
    sentence = '\n'.join(sentence)
    if langspecs[langtolangid]['provide_help'] and L2:
-        sentence = quotapos(L2 + pred[0][0].capitalize()[len(L2):], langspecs[langtolangid]['tgt']) + '\n' + L2 + '\n' + sentence
+        sentence = quotaposto(L2 + quotaposto(pred[0][0]).capitalize()[len(L2):], langspecs[langtolangid]['tgt']) + '\n' + L2 + '\n' + sentence
    else:
-        sentence = quotapos(L2 + pred[0][0].capitalize()[len(L2):], langspecs[langtolangid]['tgt']) + '\n' + sentence
+        sentence = quotaposto(L2 + quotaposto(pred[0][0]).capitalize()[len(L2):], langspecs[langtolangid]['tgt']) + '\n' + sentence
    
+    print(sentence)
    # print(scores)
-    return JsonResponse({'result': sentence, 'attn': sumattn, 'partial': quotapos(L2)})
+    return JsonResponse({'result': sentence, 'attn': sumattn, 'partial': L2})



--- a/mtpara/pycache/init.cpython-36.pyc
+++ b/mtpara/pycache/init.cpython-36.pyc
--- a/mtpara/pycache/init.cpython-37.pyc
+++ b/mtpara/pycache/init.cpython-37.pyc
--- a/mtpara/pycache/admin.cpython-36.pyc
+++ b/mtpara/pycache/admin.cpython-36.pyc
--- a/mtpara/pycache/admin.cpython-37.pyc
+++ b/mtpara/pycache/admin.cpython-37.pyc
--- a/mtpara/pycache/models.cpython-36.pyc
+++ b/mtpara/pycache/models.cpython-36.pyc
--- a/mtpara/pycache/models.cpython-37.pyc
+++ b/mtpara/pycache/models.cpython-37.pyc
--- a/mtpara/pycache/urls.cpython-36.pyc
+++ b/mtpara/pycache/urls.cpython-36.pyc
--- a/mtpara/pycache/urls.cpython-37.pyc
+++ b/mtpara/pycache/urls.cpython-37.pyc
--- a/mtpara/pycache/views.cpython-36.pyc
+++ b/mtpara/pycache/views.cpython-36.pyc
--- a/mtpara/pycache/views.cpython-37.pyc
+++ b/mtpara/pycache/views.cpython-37.pyc
--- a/mtpara/migrations/pycache/init.cpython-36.pyc
+++ b/mtpara/migrations/pycache/init.cpython-36.pyc
--- a/mtpara/migrations/pycache/init.cpython-37.pyc
+++ b/mtpara/migrations/pycache/init.cpython-37.pyc
--- a/mtsimple/pycache/init.cpython-36.pyc
+++ b/mtsimple/pycache/init.cpython-36.pyc
--- a/mtsimple/pycache/init.cpython-37.pyc
+++ b/mtsimple/pycache/init.cpython-37.pyc
--- a/mtsimple/pycache/admin.cpython-36.pyc
+++ b/mtsimple/pycache/admin.cpython-36.pyc
--- a/mtsimple/pycache/admin.cpython-37.pyc
+++ b/mtsimple/pycache/admin.cpython-37.pyc
--- a/mtsimple/pycache/models.cpython-36.pyc
+++ b/mtsimple/pycache/models.cpython-36.pyc
--- a/mtsimple/pycache/models.cpython-37.pyc
+++ b/mtsimple/pycache/models.cpython-37.pyc
--- a/mtsimple/pycache/urls.cpython-36.pyc
+++ b/mtsimple/pycache/urls.cpython-36.pyc
--- a/mtsimple/pycache/urls.cpython-37.pyc
+++ b/mtsimple/pycache/urls.cpython-37.pyc
--- a/mtsimple/pycache/views.cpython-36.pyc
+++ b/mtsimple/pycache/views.cpython-36.pyc
--- a/mtsimple/pycache/views.cpython-37.pyc
+++ b/mtsimple/pycache/views.cpython-37.pyc
--- a/mtsimple/migrations/pycache/init.cpython-36.pyc
+++ b/mtsimple/migrations/pycache/init.cpython-36.pyc
--- a/mtsimple/migrations/pycache/init.cpython-37.pyc
+++ b/mtsimple/migrations/pycache/init.cpython-37.pyc
--- a/mtsimple/views.py
+++ b/mtsimple/views.py
@ -46,7 +46,7 @@ langspecs = {
    'hi-en' : {
        'src' : 'hi',
        'tgt' : 'en',
-        'model': 'full_iitb_bpe_hien.pt',
+        'model': 'onmt-hien.pt',
        'indic_code': None,
        'provide_help' : False,
    },
--- a/1
+++ b/1
@ -1 +0,0 @@
-Subproject commit 87e5e387642226bc7a05f595890e298d18a07b2f
--- a/opennmt/.gitignore
+++ b/opennmt/.gitignore
@ -0,0 +1,112 @@
+# repo-specific stuff
+pred.txt
+multi-bleu.perl
+*.pt
+\#*#
+.idea
+*.sublime-*
+.DS_Store
+data/
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# Tensorboard
+runs/
--- a/opennmt/.travis.yml
+++ b/opennmt/.travis.yml
@ -0,0 +1,80 @@
+dist: xenial
+language: python
+python:
+  - "2.7"
+  - "3.5"
+git:
+  depth: false
+addons:
+  apt:
+    packages:
+      # Additional dependencies for im2text and speech2text
+      - libsox-dev
+      - libsox-fmt-all
+      - sox
+before_install:
+  # Install CPU version of PyTorch.
+  - if [[ $TRAVIS_PYTHON_VERSION == 2.7 ]]; then pip install https://download.pytorch.org/whl/cpu/torch-1.1.0-cp27-cp27mu-linux_x86_64.whl; fi
+  - if [[ $TRAVIS_PYTHON_VERSION == 3.5 ]]; then pip install https://download.pytorch.org/whl/cpu/torch-1.1.0-cp35-cp35m-linux_x86_64.whl; fi
+  - pip install -r requirements.txt
+  - pip install -r requirements.opt.txt
+install:
+  - python setup.py install
+
+# Please also add tests to `test/pull_request_chk.sh`.
+script:
+  - wget -O /tmp/im2text.tgz http://lstm.seas.harvard.edu/latex/im2text_small.tgz; tar zxf /tmp/im2text.tgz -C /tmp/; head /tmp/im2text/src-train.txt > /tmp/im2text/src-train-head.txt; head /tmp/im2text/tgt-train.txt > /tmp/im2text/tgt-train-head.txt; head /tmp/im2text/src-val.txt > /tmp/im2text/src-val-head.txt; head /tmp/im2text/tgt-val.txt > /tmp/im2text/tgt-val-head.txt
+  - wget -O /tmp/speech.tgz http://lstm.seas.harvard.edu/latex/speech.tgz; tar zxf /tmp/speech.tgz -C /tmp/; head /tmp/speech/src-train.txt > /tmp/speech/src-train-head.txt; head /tmp/speech/tgt-train.txt > /tmp/speech/tgt-train-head.txt; head /tmp/speech/src-val.txt > /tmp/speech/src-val-head.txt; head /tmp/speech/tgt-val.txt > /tmp/speech/tgt-val-head.txt
+  - wget -O /tmp/test_model_speech.pt http://lstm.seas.harvard.edu/latex/model_step_2760.pt
+  - wget -O /tmp/test_model_im2text.pt http://lstm.seas.harvard.edu/latex/test_model_im2text.pt
+  - python -m unittest discover
+  # test nmt preprocessing
+  - python preprocess.py -train_src data/src-train.txt -train_tgt data/tgt-train.txt -valid_src data/src-val.txt -valid_tgt data/tgt-val.txt -save_data /tmp/data -src_vocab_size 1000 -tgt_vocab_size 1000 && rm -rf /tmp/data*.pt
+  # test im2text preprocessing
+  - python preprocess.py -data_type img -shard_size 100 -src_dir /tmp/im2text/images -train_src /tmp/im2text/src-train.txt -train_tgt /tmp/im2text/tgt-train.txt -valid_src /tmp/im2text/src-val.txt -valid_tgt /tmp/im2text/tgt-val.txt -save_data /tmp/im2text/data && rm -rf /tmp/im2text/data*.pt
+  # test speech2text preprocessing
+  - python preprocess.py -data_type audio -shard_size 300 -src_dir /tmp/speech/an4_dataset -train_src /tmp/speech/src-train.txt -train_tgt /tmp/speech/tgt-train.txt -valid_src /tmp/speech/src-val.txt -valid_tgt /tmp/speech/tgt-val.txt -save_data /tmp/speech/data && rm -rf /tmp/speech/data*.pt
+  # test nmt translation
+  - head data/src-test.txt > /tmp/src-test.txt; python translate.py -model onmt/tests/test_model.pt -src /tmp/src-test.txt -verbose
+  # test nmt ensemble translation
+  - head data/src-test.txt > /tmp/src-test.txt; python translate.py -model onmt/tests/test_model.pt onmt/tests/test_model.pt -src /tmp/src-test.txt -verbose
+  # test im2text translation
+  - head /tmp/im2text/src-val.txt > /tmp/im2text/src-val-head.txt; head /tmp/im2text/tgt-val.txt > /tmp/im2text/tgt-val-head.txt; python translate.py -data_type img -src_dir /tmp/im2text/images -model /tmp/test_model_im2text.pt -src /tmp/im2text/src-val-head.txt -tgt /tmp/im2text/tgt-val-head.txt -verbose -out /tmp/im2text/trans
+  # test speech2text translation
+  - head /tmp/speech/src-val.txt > /tmp/speech/src-val-head.txt; head /tmp/speech/tgt-val.txt > /tmp/speech/tgt-val-head.txt; python translate.py -data_type audio -src_dir /tmp/speech/an4_dataset -model /tmp/test_model_speech.pt -src /tmp/speech/src-val-head.txt -tgt /tmp/speech/tgt-val-head.txt -verbose -out /tmp/speech/trans; diff /tmp/speech/tgt-val-head.txt /tmp/speech/trans
+  # test nmt preprocessing and training
+  - head -500 data/src-val.txt > /tmp/src-val.txt; head -500 data/tgt-val.txt > /tmp/tgt-val.txt; python preprocess.py -train_src /tmp/src-val.txt -train_tgt /tmp/tgt-val.txt -valid_src /tmp/src-val.txt -valid_tgt /tmp/tgt-val.txt -save_data /tmp/q -src_vocab_size 1000 -tgt_vocab_size 1000; python train.py -data /tmp/q -rnn_size 2 -batch_size 2 -word_vec_size 5 -report_every 5 -rnn_size 10 -train_steps 10 && rm -rf /tmp/q*.pt
+  # test nmt preprocessing w/ sharding and training w/copy
+  - head -50 data/src-val.txt > /tmp/src-val.txt; head -50 data/tgt-val.txt > /tmp/tgt-val.txt; python preprocess.py -train_src /tmp/src-val.txt -train_tgt /tmp/tgt-val.txt -valid_src /tmp/src-val.txt -valid_tgt /tmp/tgt-val.txt -shard_size 25 -dynamic_dict -save_data /tmp/q -src_vocab_size 1000 -tgt_vocab_size 1000; python train.py -data /tmp/q -rnn_size 2 -batch_size 2 -word_vec_size 5 -report_every 5 -rnn_size 10 -copy_attn -train_steps 10 -pool_factor 10 && rm -rf /tmp/q*.pt
+
+  # test im2text preprocessing and training
+  - head -50 /tmp/im2text/src-val.txt > /tmp/im2text/src-val-head.txt; head -50 /tmp/im2text/tgt-val.txt > /tmp/im2text/tgt-val-head.txt; python preprocess.py -data_type img -src_dir /tmp/im2text/images -train_src /tmp/im2text/src-val-head.txt -train_tgt /tmp/im2text/tgt-val-head.txt -valid_src /tmp/im2text/src-val-head.txt -valid_tgt /tmp/im2text/tgt-val-head.txt -save_data /tmp/im2text/q -tgt_seq_length 100; python train.py -model_type img -data /tmp/im2text/q -rnn_size 2 -batch_size 2 -word_vec_size 5 -report_every 5 -rnn_size 10 -train_steps 10 -pool_factor 10 && rm -rf /tmp/im2text/q*.pt
+  # test speech2text preprocessing and training
+  - head -100 /tmp/speech/src-val.txt > /tmp/speech/src-val-head.txt; head -100 /tmp/speech/tgt-val.txt > /tmp/speech/tgt-val-head.txt; python preprocess.py -data_type audio -src_dir /tmp/speech/an4_dataset -train_src /tmp/speech/src-val-head.txt -train_tgt /tmp/speech/tgt-val-head.txt -valid_src /tmp/speech/src-val-head.txt -valid_tgt /tmp/speech/tgt-val-head.txt -save_data /tmp/speech/q; python train.py -model_type audio -data /tmp/speech/q -rnn_size 2 -batch_size 2 -word_vec_size 5 -report_every 5 -rnn_size 10 -train_steps 10 -pool_factor 10 && rm -rf /tmp/speech/q*.pt
+  # test nmt translation
+  - python translate.py -model onmt/tests/test_model2.pt  -src  data/morph/src.valid  -verbose -batch_size 10 -beam_size 10 -tgt data/morph/tgt.valid -out /tmp/trans; diff  data/morph/tgt.valid /tmp/trans
+  # test nmt translation with random sampling
+  - python translate.py -model onmt/tests/test_model2.pt  -src  data/morph/src.valid  -verbose -batch_size 10 -beam_size 1 -seed 1 -random_sampling_topk "-1" -random_sampling_temp 0.0001 -tgt data/morph/tgt.valid -out /tmp/trans; diff  data/morph/tgt.valid /tmp/trans
+  # test tool
+  - PYTHONPATH=$PYTHONPATH:. python tools/extract_embeddings.py -model onmt/tests/test_model.pt
+
+env:
+  global:
+    # Doctr deploy key for OpenNMT/OpenNMT-py
+    - secure: "gL0Soefo1cQgAqwiHUrlNyZd/+SI1eJAAjLD3BEDQWXW160eXyjQAAujGgJoCirjOM7cPHVwLzwmK3S7Y3PVM3JOZguOX5Yl4uxMh/mhiEM+RG77SZyv4OGoLFsEQ8RTvIdYdtP6AwyjlkRDXvZql88TqFNYjpXDu8NG+JwEfiIoGIDYxxZ5SlbrZN0IqmQSZ4/CsV6VQiuq99Jn5kqi4MnUZBTcmhqjaztCP1omvsMRdbrG2IVhDKQOCDIO0kaPJrMy2SGzP4GV7ar52bdBtpeP3Xbm6ZOuhDNfds7M/OMHp1wGdl7XwKtolw9MeXhnGBC4gcrqhhMfcQ6XtfVLMLnsB09Ezl3FXX5zWgTB5Pm0X6TgnGrMA25MAdVqKGJpfqZxOKTh4EMb04b6OXrVbxZ88mp+V0NopuxwlTPD8PMfYLWlTe9chh1BnT0iQlLqeA4Hv3+NdpiFb4aq3V3cWTTgMqOoWSGq4t318pqIZ3qbBXBq12DLFgO5n6+M6ZrdxbDUGQvgh8nAiZcIEdodKJ4ABHi1SNCeWOzCoedUdegcbjShHfkMVmNKrncB18aRWwQ3GQJ5qdkjgJmC++uZmkS6+GPM8UmmAy1ZIkRW0aWiitjG6teqtvUHOofNd/TCxX4bhnxAj+mtVIrARCE/ci8topJ6uG4wVJ1TrIkUlAY="
+
+matrix:
+  include:
+    - env: LINT_CHECK
+      python: "2.7"
+      install: pip install flake8 pep8-naming==0.7.0
+      script: flake8
+    - python: "3.5"
+      install:
+        - python setup.py install
+        - pip install doctr
+      script:
+        - pip install -r docs/requirements.txt
+        - set -e
+        - cd docs/ && make html && cd ..
+        - doctr deploy --built-docs docs/build/html/ .
--- a/opennmt/CHANGELOG.md
+++ b/opennmt/CHANGELOG.md
@ -0,0 +1,132 @@
+
+**Notes on versioning**
+
+
+## [Unreleased]
+### Fixes and improvements
+
+## [0.9.1](https://github.com/OpenNMT/OpenNMT-py/tree/0.9.1) (2019-06-13)
+* New mechanism for MultiGPU training "1 batch producer / multi batch consumers"
+  resulting in big memory saving when handling huge datasets
+* New APEX AMP (mixed precision) API
+* Option to overwrite shards when preprocessing
+* Small fixes and add-ons
+
+## [0.9.0](https://github.com/OpenNMT/OpenNMT-py/tree/0.9.0) (2019-05-16)
+* Faster vocab building when processing shards (no reloading)
+* New dataweighting feature
+* New dropout scheduler.
+* Small fixes and add-ons
+
+## [0.8.2](https://github.com/OpenNMT/OpenNMT-py/tree/0.8.2) (2019-02-16)
+* Update documentation and Library example
+* Revamp args
+* Bug fixes, save moving average in FP32
+* Allow FP32 inference for FP16 models
+
+## [0.8.1](https://github.com/OpenNMT/OpenNMT-py/tree/0.8.1) (2019-02-12)
+* Update documentation
+* Random sampling scores fixes
+* Bug fixes
+
+## [0.8.0](https://github.com/OpenNMT/OpenNMT-py/tree/0.8.0) (2019-02-09)
+* Many fixes and code cleaning thanks @flauted, @guillaumekln
+* Datasets code refactor (thanks @flauted) you need to r-preeprocess datasets
+
+### New features
+* FP16 Support: Experimental, using Apex, Checkpoints may break in future version.
+* Continuous exponential moving average (thanks @francoishernandez, and Marian)
+* Relative positions encoding (thanks @francoishernanndez, and Google T2T)
+* Deprecate the old beam search, fast batched beam search supports all options
+
+
+## [0.7.2](https://github.com/OpenNMT/OpenNMT-py/tree/0.7.2) (2019-01-31)
+* Many fixes and code cleaning thanks @bpopeters, @flauted, @guillaumekln
+
+### New features
+* Multilevel fields for better handling of text featuer embeddinggs. 
+
+
+## [0.7.1](https://github.com/OpenNMT/OpenNMT-py/tree/0.7.1) (2019-01-24)
+* Many fixes and code refactoring thanks @bpopeters, @flauted, @guillaumekln
+
+### New features
+* Random sampling thanks @daphnei
+* Enable sharding for huge files at translation
+
+## [0.7.0](https://github.com/OpenNMT/OpenNMT-py/tree/0.7.0) (2019-01-02)
+* Many fixes and code refactoring thanks @benopeters
+* Migrated to Pytorch 1.0
+
+## [0.6.0](https://github.com/OpenNMT/OpenNMT-py/tree/0.6.0) (2018-11-28)
+* Many fixes and code improvements
+* New: Ability to load a yml config file. See examples in config folder.
+
+## [0.5.0](https://github.com/OpenNMT/OpenNMT-py/tree/0.5.0) (2018-10-24)
+* Fixed advance n_best beam in translate_batch_fast
+* Fixed remove valid set vocab from total vocab
+* New: Ability to reset optimizer when using train_from
+* New: create_vocabulary tool + fix when loading existing vocab.
+
+## [0.4.1](https://github.com/OpenNMT/OpenNMT-py/tree/0.4.1) (2018-10-11)
+* Fixed preprocessing files names, cleaning intermediary files.
+
+## [0.4.0](https://github.com/OpenNMT/OpenNMT-py/tree/0.4.0) (2018-10-08)
+* Fixed Speech2Text training (thanks Yuntian)
+
+* Removed -max_shard_size, replaced by -shard_size = number of examples in a shard.
+  Default value = 1M which works fine in most Text dataset cases. (will avoid Ram OOM in most cases)
+
+
+## [0.3.0](https://github.com/OpenNMT/OpenNMT-py/tree/0.3.0) (2018-09-27)
+* Now requires Pytorch 0.4.1
+
+* Multi-node Multi-GPU with Torch Distributed
+
+  New options are:
+  -master_ip: ip address of the master node
+  -master_port: port number of th emaster node
+  -world_size = total number of processes to be run (total GPUs accross all nodes)
+  -gpu_ranks = list of indices of processes accross all nodes
+
+* gpuid is deprecated
+See examples in https://github.com/OpenNMT/OpenNMT-py/blob/master/docs/source/FAQ.md
+
+* Fixes to img2text now working
+
+* New sharding based on number of examples
+
+* Fixes to avoid 0.4.1 deprecated functions.
+
+
+## [0.2.1](https://github.com/OpenNMT/OpenNMT-py/tree/0.2.1) (2018-08-31)
+
+### Fixes and improvements
+
+* First compatibility steps with Pytorch 0.4.1 (non breaking)
+* Fix TranslationServer (when various request try to load the same model at the same time)
+* Fix StopIteration error (python 3.7)
+
+### New features
+* Ensemble at inference (thanks @Waino)
+
+## [0.2](https://github.com/OpenNMT/OpenNMT-py/tree/v0.2) (2018-08-28)
+
+### improvements
+
+* Compatibility fixes with Pytorch 0.4 / Torchtext 0.3
+* Multi-GPU based on Torch Distributed
+* Average Attention Network (AAN) for the Transformer (thanks @francoishernandez )
+* New fast beam search (see -fast in translate.py) (thanks @guillaumekln)
+* Sparse attention / sparsemax (thanks to @bpopeters)
+* Refactoring of many parts of the code base:
+ - change from -epoch to -train_steps -valid_steps (see opts.py)
+ - reorg of the logic train => train_multi / train_single => trainer
+* Many fixes / improvements in the translationserver (thanks @pltrdy @francoishernandez)
+* fix BPTT
+
+## [0.1](https://github.com/OpenNMT/OpenNMT-py/tree/v0.1) (2018-06-08)
+
+### First and Last Release using Pytorch 0.3.x
+
+
--- a/opennmt/CONTRIBUTING.md
+++ b/opennmt/CONTRIBUTING.md
@ -0,0 +1,88 @@
+# Contributors
+
+OpenNMT-py is a community developed project and we love developer contributions.
+
+## Guidelines
+Before sending a PR, please do this checklist first:
+
+- Please run `onmt/tests/pull_request_chk.sh` and fix any errors. When adding new functionality, also add tests to this script. Included checks:
+    1. flake8 check for coding style;
+    2. unittest;
+    3. continuous integration tests listed in `.travis.yml`.
+- When adding/modifying class constructor, please make the arguments as same naming style as its superclass in PyTorch.
+- If your change is based on a paper, please include a clear comment and reference in the code (more on that below).
+
+### Docstrings
+Above all, try to follow the Google docstring format
+([Napoleon example](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html),
+[Google styleguide](http://google.github.io/styleguide/pyguide.html)).
+This makes it easy to include your contributions in the Sphinx documentation. And, do feel free
+to autodoc your contributions in the API ``.rst`` files in the `docs/source` folder! If you do, check that
+your additions look right.
+
+```bash
+cd docs
+# install some dependencies if necessary:
+# recommonmark, sphinx_rtd_theme, sphinxcontrib-bibtex
+make html
+firefox build/html/main.html  # or your browser of choice
+```
+
+Some particular advice:
+- Try to follow Python 3 [``typing`` module](https://docs.python.org/3/library/typing.html) conventions when documenting types.
+    - Exception: use "or" instead of unions for more readability
+    - For external types, use the full "import name". Common abbreviations (e.g. ``np``) are acceptable.
+      For ``torch.Tensor`` types, the ``torch.`` is optional.
+    - Please don't use tics like `` (`str`) `` or rst directives like `` (:obj:`str`) ``. Napoleon handles types
+      very well without additional help, so avoid the clutter.
+- [Google docstrings don't support multiple returns](https://stackoverflow.com/questions/29221551/can-sphinx-napoleon-document-function-returning-multiple-arguments).
+For multiple returns, the following works well with Sphinx and is still very readable.
+  ```python
+  def foo(a, b):
+      """This is my docstring.
+
+      Args:
+          a (object): Something.
+          b (class): Another thing.
+
+      Returns:
+          (object, class):
+
+          * a: Something or rather with a long
+            description that spills over.
+          * b: And another thing.
+      """
+
+      return a, b
+  ```
+- When citing a paper, avoid directly linking in the docstring! Add a Bibtex entry to `docs/source/refs.bib`.
+E.g., to cite "Attention Is All You Need", visit [arXiv](https://arxiv.org/abs/1706.03762), choose the
+[bibtext](https://dblp.uni-trier.de/rec/bibtex/journals/corr/VaswaniSPUJGKP17) link, search `docs/source/refs.bib`
+using `CTRL-F` for `DBLP:journals/corr/VaswaniSPUJGKP17`, and if you do not find it then copy-paste the
+citation into `refs.bib`. Then, in your docstring, use ``:cite:`DBLP:journals/corr/VaswaniSPUJGKP17` ``.
+    - However, a link is better than nothing.
+- Please document tensor shapes. Prefer the format
+  ``` ``(a, b, c)`` ```. This style is easy to read, allows using ``x`` for multplication, and is common
+  (PyTorch uses a few variations on the parentheses format, AllenNLP uses exactly this format, Fairseq uses
+  the parentheses format with single ticks).
+    - Again, a different style is better than no shape documentation.
+- Please avoid unnecessary space characters, try to capitalize, and try to punctuate.
+
+  For multi-line docstrings, add a blank line after the closing ``"""``.
+  Don't use a blank line before the closing quotes.
+
+  ``""" not this """`` ``"""This."""``
+
+  ```python
+  """
+      Not this.
+  """
+  ```
+  ```python
+  """This."""
+  ```
+
+  This note is the least important. Focus on content first, but remember that consistent docs look good.
+- Be sensible about the first line. Generally, one stand-alone summary line (per the Google guidelines) is good.
+  Sometimes, it's better to cut directly to the args or an extended description. It's always acceptable to have a
+  "trailing" citation.
--- a/opennmt/Dockerfile
+++ b/opennmt/Dockerfile
@ -0,0 +1,2 @@
+FROM pytorch/pytorch:latest
+RUN git clone https://github.com/OpenNMT/OpenNMT-py.git && cd OpenNMT-py && pip install -r requirements.txt && python setup.py install
--- a/opennmt/LICENSE.md
+++ b/opennmt/LICENSE.md
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017-Present OpenNMT
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/opennmt/README.md
+++ b/opennmt/README.md
@ -0,0 +1,175 @@
+# OpenNMT-py: Open-Source Neural Machine Translation
+
+[![Build Status](https://travis-ci.org/OpenNMT/OpenNMT-py.svg?branch=master)](https://travis-ci.org/OpenNMT/OpenNMT-py)
+[![Run on FH](https://img.shields.io/badge/Run%20on-FloydHub-blue.svg)](https://floydhub.com/run?template=https://github.com/OpenNMT/OpenNMT-py)
+
+This is a [Pytorch](https://github.com/pytorch/pytorch)
+port of [OpenNMT](https://github.com/OpenNMT/OpenNMT),
+an open-source (MIT) neural machine translation system. It is designed to be research friendly to try out new ideas in translation, summary, image-to-text, morphology, and many other domains. Some companies have proven the code to be production ready.
+
+We love contributions. Please consult the Issues page for any [Contributions Welcome](https://github.com/OpenNMT/OpenNMT-py/issues?q=is%3Aissue+is%3Aopen+label%3A%22contributions+welcome%22) tagged post. 
+
+<center style="padding: 40px"><img width="70%" src="http://opennmt.github.io/simple-attn.png" /></center>
+
+Before raising an issue, make sure you read the requirements and the documentation examples.
+
+Unless there is a bug, please use the [Forum](http://forum.opennmt.net) or [Gitter](https://gitter.im/OpenNMT/OpenNMT-py) to ask questions.
+
+
+Table of Contents
+=================
+  * [Full Documentation](http://opennmt.net/OpenNMT-py/)
+  * [Requirements](#requirements)
+  * [Features](#features)
+  * [Quickstart](#quickstart)
+  * [Run on FloydHub](#run-on-floydhub)
+  * [Acknowledgements](#acknowledgements)
+  * [Citation](#citation)
+
+## Requirements
+
+All dependencies can be installed via:
+
+```bash
+pip install -r requirements.txt
+```
+
+NOTE: If you have MemoryError in the install try to use: 
+
+```bash
+pip install -r requirements.txt --no-cache-dir
+```
+Note that we currently only support PyTorch 1.1 (should work with 1.0)
+
+## Features
+
+- [data preprocessing](http://opennmt.net/OpenNMT-py/options/preprocess.html)
+- [Inference (translation) with batching and beam search](http://opennmt.net/OpenNMT-py/options/translate.html)
+- [Multiple source and target RNN (lstm/gru) types and attention (dotprod/mlp) types](http://opennmt.net/OpenNMT-py/options/train.html#model-encoder-decoder)
+- [TensorBoard](http://opennmt.net/OpenNMT-py/options/train.html#logging)
+- [Source word features](http://opennmt.net/OpenNMT-py/options/train.html#model-embeddings)
+- [Pretrained Embeddings](http://opennmt.net/OpenNMT-py/FAQ.html#how-do-i-use-pretrained-embeddings-e-g-glove)
+- [Copy and Coverage Attention](http://opennmt.net/OpenNMT-py/options/train.html#model-attention)
+- [Image-to-text processing](http://opennmt.net/OpenNMT-py/im2text.html)
+- [Speech-to-text processing](http://opennmt.net/OpenNMT-py/speech2text.html)
+- ["Attention is all you need"](http://opennmt.net/OpenNMT-py/FAQ.html#how-do-i-use-the-transformer-model)
+- [Multi-GPU](http://opennmt.net/OpenNMT-py/FAQ.html##do-you-support-multi-gpu)
+- Inference time loss functions.
+- [Conv2Conv convolution model]
+- SRU "RNNs faster than CNN" paper
+- Mixed-precision training with [APEX](https://github.com/NVIDIA/apex), optimized on [Tensor Cores](https://developer.nvidia.com/tensor-cores)
+
+## Quickstart
+
+[Full Documentation](http://opennmt.net/OpenNMT-py/)
+
+
+### Step 1: Preprocess the data
+
+```bash
+python preprocess.py -train_src data/src-train.txt -train_tgt data/tgt-train.txt -valid_src data/src-val.txt -valid_tgt data/tgt-val.txt -save_data data/demo
+```
+
+We will be working with some example data in `data/` folder.
+
+The data consists of parallel source (`src`) and target (`tgt`) data containing one sentence per line with tokens separated by a space:
+
+* `src-train.txt`
+* `tgt-train.txt`
+* `src-val.txt`
+* `tgt-val.txt`
+
+Validation files are required and used to evaluate the convergence of the training. It usually contains no more than 5000 sentences.
+
+
+After running the preprocessing, the following files are generated:
+
+* `demo.train.pt`: serialized PyTorch file containing training data
+* `demo.valid.pt`: serialized PyTorch file containing validation data
+* `demo.vocab.pt`: serialized PyTorch file containing vocabulary data
+
+
+Internally the system never touches the words themselves, but uses these indices.
+
+### Step 2: Train the model
+
+```bash
+python train.py -data data/demo -save_model demo-model
+```
+
+The main train command is quite simple. Minimally it takes a data file
+and a save file.  This will run the default model, which consists of a
+2-layer LSTM with 500 hidden units on both the encoder/decoder.
+If you want to train on GPU, you need to set, as an example:
+CUDA_VISIBLE_DEVICES=1,3
+`-world_size 2 -gpu_ranks 0 1` to use (say) GPU 1 and 3 on this node only.
+To know more about distributed training on single or multi nodes, read the FAQ section.
+
+### Step 3: Translate
+
+```bash
+python translate.py -model demo-model_acc_XX.XX_ppl_XXX.XX_eX.pt -src data/src-test.txt -output pred.txt -replace_unk -verbose
+```
+
+Now you have a model which you can use to predict on new data. We do this by running beam search. This will output predictions into `pred.txt`.
+
+!!! note "Note"
+    The predictions are going to be quite terrible, as the demo dataset is small. Try running on some larger datasets! For example you can download millions of parallel sentences for [translation](http://www.statmt.org/wmt16/translation-task.html) or [summarization](https://github.com/harvardnlp/sent-summary).
+
+## Alternative: Run on FloydHub
+
+[![Run on FloydHub](https://static.floydhub.com/button/button.svg)](https://floydhub.com/run?template=https://github.com/OpenNMT/OpenNMT-py)
+
+Click this button to open a Workspace on [FloydHub](https://www.floydhub.com/?utm_medium=readme&utm_source=opennmt-py&utm_campaign=jul_2018) for training/testing your code.
+
+
+## Pretrained embeddings (e.g. GloVe)
+
+Please see the FAQ: [How to use GloVe pre-trained embeddings in OpenNMT-py](http://opennmt.net/OpenNMT-py/FAQ.html#how-do-i-use-pretrained-embeddings-e-g-glove)
+
+## Pretrained Models
+
+The following pretrained models can be downloaded and used with translate.py.
+
+http://opennmt.net/Models-py/
+
+## Acknowledgements
+
+OpenNMT-py is run as a collaborative open-source project.
+The original code was written by [Adam Lerer](http://github.com/adamlerer) (NYC) to reproduce OpenNMT-Lua using Pytorch.
+
+Major contributors are:
+[Sasha Rush](https://github.com/srush) (Cambridge, MA)
+[Vincent Nguyen](https://github.com/vince62s) (Ubiqus)
+[Ben Peters](http://github.com/bpopeters) (Lisbon)
+[Sebastian Gehrmann](https://github.com/sebastianGehrmann) (Harvard NLP)
+[Yuntian Deng](https://github.com/da03) (Harvard NLP)
+[Guillaume Klein](https://github.com/guillaumekln) (Systran)
+[Paul Tardy](https://github.com/pltrdy) (Ubiqus / Lium)
+[François Hernandez](https://github.com/francoishernandez) (Ubiqus)
+[Jianyu Zhan](http://github.com/jianyuzhan) (Shanghai)
+[Dylan Flaute](http://github.com/flauted (University of Dayton)
+and more !
+
+OpentNMT-py belongs to the OpenNMT project along with OpenNMT-Lua and OpenNMT-tf.
+
+## Citation
+
+[OpenNMT: Neural Machine Translation Toolkit](https://arxiv.org/pdf/1805.11462)
+
+[OpenNMT technical report](https://doi.org/10.18653/v1/P17-4012)
+
+```
+@inproceedings{opennmt,
+  author    = {Guillaume Klein and
+               Yoon Kim and
+               Yuntian Deng and
+               Jean Senellart and
+               Alexander M. Rush},
+  title     = {Open{NMT}: Open-Source Toolkit for Neural Machine Translation},
+  booktitle = {Proc. ACL},
+  year      = {2017},
+  url       = {https://doi.org/10.18653/v1/P17-4012},
+  doi       = {10.18653/v1/P17-4012}
+}
+```
--- a/opennmt/available_models/conf.json
+++ b/opennmt/available_models/conf.json
@ -0,0 +1,13 @@
+{
+    "models_root": "./available_models",
+    "models":[{
+        "model": "onmt-hien.pt",
+        "timeout": -1,
+        "on_timeout": "unload",
+        "model_root": "../model/",
+        "opt": {
+            "batch_size": 1,
+            "beam_size": 10
+        }
+    }]
+}
--- a/opennmt/available_models/example.conf.json
+++ b/opennmt/available_models/example.conf.json
@ -0,0 +1,29 @@
+{
+    "models_root": "./available_models",
+    "models": [
+        {
+            "id": 100,
+            "model": "model_0.pt",
+            "timeout": 600,
+            "on_timeout": "to_cpu",
+            "load": true,
+            "opt": {
+                "gpu": 0,
+                "beam_size": 5
+            },
+            "tokenizer": {
+                "type": "sentencepiece",
+                "model": "wmtenfr.model"
+            }
+        },{
+            "model": "model_0.light.pt",
+            "timeout": -1,
+            "on_timeout": "unload",
+            "model_root": "../other_models",
+            "opt": {
+                "batch_size": 1,
+                "beam_size": 10
+            }
+        }
+    ]
+}
--- a/opennmt/config/config-rnn-summarization.yml
+++ b/opennmt/config/config-rnn-summarization.yml
@ -0,0 +1,31 @@
+data: data/cnndm/CNNDM
+save_model: models/cnndm
+save_checkpoint_steps: 10000
+keep_checkpoint: 10
+seed: 3435
+train_steps: 100000
+valid_steps: 10000
+report_every: 100
+
+encoder_type: brnn
+word_vec_size: 128
+rnn_size: 512
+layers: 1
+
+optim: adagrad
+learning_rate: 0.15
+adagrad_accumulator_init: 0.1
+max_grad_norm: 2
+
+batch_size: 16
+dropout: 0.0
+
+copy_attn: 'true'
+global_attention: mlp
+reuse_copy_attn: 'true'
+bridge: 'true'
+
+world_size: 2
+gpu_ranks:
+- 0
+- 1
--- a/opennmt/config/config-transformer-base-1GPU.yml
+++ b/opennmt/config/config-transformer-base-1GPU.yml
@ -0,0 +1,42 @@
+data: exp/dataset.de-en
+save_model: exp/model.de-en
+save_checkpoint_steps: 10000
+keep_checkpoint: 10
+seed: 3435
+train_steps: 500000
+valid_steps: 10000
+warmup_steps: 8000
+report_every: 100
+
+decoder_type: transformer
+encoder_type: transformer
+word_vec_size: 512
+rnn_size: 512
+layers: 6
+transformer_ff: 2048
+heads: 8
+
+accum_count: 8
+optim: adam
+adam_beta1: 0.9
+adam_beta2: 0.998
+decay_method: noam
+learning_rate: 2.0
+max_grad_norm: 0.0
+
+batch_size: 4096
+batch_type: tokens
+normalization: tokens
+dropout: 0.1
+label_smoothing: 0.1
+
+max_generator_batches: 2
+
+param_init: 0.0
+param_init_glorot: 'true'
+position_encoding: 'true'
+
+world_size: 1
+gpu_ranks:
+- 0
+
--- a/opennmt/config/config-transformer-base-4GPU.yml
+++ b/opennmt/config/config-transformer-base-4GPU.yml
@ -0,0 +1,45 @@
+data: exp/dataset.de-en
+save_model: exp/model.de-en
+save_checkpoint_steps: 10000
+keep_checkpoint: 10
+seed: 3435
+train_steps: 200000
+valid_steps: 10000
+warmup_steps: 8000
+report_every: 100
+
+decoder_type: transformer
+encoder_type: transformer
+word_vec_size: 512
+rnn_size: 512
+layers: 6
+transformer_ff: 2048
+heads: 8
+
+accum_count: 2
+optim: adam
+adam_beta1: 0.9
+adam_beta2: 0.998
+decay_method: noam
+learning_rate: 2.0
+max_grad_norm: 0.0
+
+batch_size: 4096
+batch_type: tokens
+normalization: tokens
+dropout: 0.1
+label_smoothing: 0.1
+
+max_generator_batches: 2
+
+param_init: 0.0
+param_init_glorot: 'true'
+position_encoding: 'true'
+
+world_size: 4
+gpu_ranks:
+- 0
+- 1
+- 2
+- 3
+
--- a/opennmt/docs/Makefile
+++ b/opennmt/docs/Makefile
@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = python3 -msphinx
+SPHINXPROJ    = OpenNMT-py
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/Показать больше
+++ b/Показать больше
				`@ -1 +0,0 @@`
				`Subproject commit 87e5e387642226bc7a05f595890e298d18a07b2f`