Add REST APIs for translation queries

2020-05-27 16:01:24 +05:30 · 2020-05-27 16:01:24 +05:30 · b9bc997f6f
--- a/InteractiveTranslation/settings.py
+++ b/InteractiveTranslation/settings.py
@ -52,6 +52,7 @@ INSTALLED_APPS = [
    'django.contrib.sessions',
    'django.contrib.messages',
    'django.contrib.staticfiles',
    'rest_framework',
    'mt',
    'mtsimple',
    'mtpara',
--- a/InteractiveTranslation/urls.py
+++ b/InteractiveTranslation/urls.py
@ -20,10 +20,16 @@ from django.conf.urls.static import static
 from django.contrib.staticfiles.urls import staticfiles_urlpatterns
 urlpatterns = [
-    path('', include('mt.urls')),
+    path('', include('mt.urls')), #TODO: This redirects to simple/ can we do it directly?
    path('simple/', include('mtsimple.urls')),
    path('para/', include('mtpara.urls')),
    # path('gpt/', include('gpt.urls')),
    path('admin/', admin.site.urls),
    path('accounts/', include('django.contrib.auth.urls')),
    # REST FRAMEWORK URLS
    path('api/simple/', include('mtsimple.api.urls')),
 ] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) + staticfiles_urlpatterns()
--- a/docs/html-documentation-generated.zip
+++ b/docs/html-documentation-generated.zip
--- a/docs/html-documentation-generated/.swagger-codegen-ignore
+++ b/docs/html-documentation-generated/.swagger-codegen-ignore
@ -0,0 +1,23 @@
 # Swagger Codegen Ignore
 # Generated by swagger-codegen https://github.com/swagger-api/swagger-codegen
 # Use this file to prevent files from being overwritten by the generator.
 # The patterns follow closely to .gitignore or .dockerignore.
 # As an example, the C# client generator defines ApiClient.cs.
 # You can make changes and tell Swagger Codgen to ignore just this file by uncommenting the following line:
 #ApiClient.cs
 # You can match any string of characters against a directory, file or extension with a single asterisk (*):
 #foo/*/qux
 # The above matches foo/bar/qux and foo/baz/qux, but not foo/bar/baz/qux
 # You can recursively match patterns against a directory, file or extension with a double asterisk (**):
 #foo/**/qux
 # This matches foo/bar/qux, foo/baz/qux, and foo/bar/baz/qux
 # You can also negate patterns with an exclamation (!).
 # For example, you can ignore all files in a docs folder with the file extension .md:
 #docs/*.md
 # Then explicitly reverse the ignore rule for a single file:
 #!docs/README.md
--- a/docs/html-documentation-generated/.swagger-codegen/VERSION
+++ b/docs/html-documentation-generated/.swagger-codegen/VERSION
@ -0,0 +1 @@
 3.0.19
--- a/docs/html-documentation-generated/index.html
+++ b/docs/html-documentation-generated/index.html
@ -0,0 +1,282 @@
 <!doctype html>
 <html>
  <head>
    <title>SIMPLE TRANSLATION API</title>
    <style type="text/css">
      body {
      	font-family: Trebuchet MS, sans-serif;
      	font-size: 15px;
      	color: #444;
      	margin-right: 24px;
      }
      h1	{
      	font-size: 25px;
      }
      h2	{
      	font-size: 20px;
      }
      h3	{
      	font-size: 16px;
      	font-weight: bold;
      }
      hr	{
      	height: 1px;
      	border: 0;
      	color: #ddd;
      	background-color: #ddd;
      }
      .app-desc {
        clear: both;
        margin-left: 20px;
      }
      .param-name {
        width: 100%;
      }
      .license-info {
        margin-left: 20px;
      }
      .license-url {
        margin-left: 20px;
      }
      .model {
        margin: 0 0 0px 20px;
      }
      .method {
        margin-left: 20px;
      }
      .method-notes	{
      	margin: 10px 0 20px 0;
      	font-size: 90%;
      	color: #555;
      }
      pre {
        padding: 10px;
        margin-bottom: 2px;
      }
      .http-method {
       text-transform: uppercase;
      }
      pre.get {
        background-color: #0f6ab4;
      }
      pre.post {
        background-color: #10a54a;
      }
      pre.put {
        background-color: #c5862b;
      }
      pre.delete {
        background-color: #a41e22;
      }
      .huge	{
      	color: #fff;
      }
      pre.example {
        background-color: #f3f3f3;
        padding: 10px;
        border: 1px solid #ddd;
      }
      code {
        white-space: pre;
      }
      .nickname {
        font-weight: bold;
      }
      .method-path {
        font-size: 1.5em;
        background-color: #0f6ab4;
      }
      .up {
        float:right;
      }
      .parameter {
        width: 500px;
      }
      .param {
        width: 500px;
        padding: 10px 0 0 20px;
        font-weight: bold;
      }
      .param-desc {
        width: 700px;
        padding: 0 0 0 20px;
        color: #777;
      }
      .param-type {
        font-style: italic;
      }
      .param-enum-header {
      width: 700px;
      padding: 0 0 0 60px;
      color: #777;
      font-weight: bold;
      }
      .param-enum {
      width: 700px;
      padding: 0 0 0 80px;
      color: #777;
      font-style: italic;
      }
      .field-label {
        padding: 0;
        margin: 0;
        clear: both;
      }
      .field-items	{
      	padding: 0 0 15px 0;
      	margin-bottom: 15px;
      }
      .return-type {
        clear: both;
        padding-bottom: 10px;
      }
      .param-header {
        font-weight: bold;
      }
      .method-tags {
        text-align: right;
      }
      .method-tag {
        background: none repeat scroll 0% 0% #24A600;
        border-radius: 3px;
        padding: 2px 10px;
        margin: 2px;
        color: #FFF;
        display: inline-block;
        text-decoration: none;
      }
    </style>
  </head>
  <body>
  <h1>SIMPLE TRANSLATION API</h1>
    <div class="app-desc">Api for translation suggestions and attention score</div>
    <div class="app-desc">More information: <a href="https://helloreverb.com">https://helloreverb.com</a></div>
    <div class="app-desc">Contact Info: <a href="hello@helloreverb.com">hello@helloreverb.com</a></div>
    <div class="app-desc">Version: 1.0</div>
    <div class="app-desc">BasePath:/I927/INMT-SIMPLE/1.0</div>
    <div class="license-info">All rights reserved</div>
    <div class="license-url">http://apache.org/licenses/LICENSE-2.0.html</div>
  <h2>Access</h2>
  <h2><a name="__Methods">Methods</a></h2>
  [ Jump to <a href="#__Models">Models</a> ]
  <h3>Table of Contents </h3>
  <div class="method-summary"></div>
  <h4><a href="#Simple">Simple</a></h4>
  <ul>
  <li><a href="#translateNew"><code><span class="http-method">get</span> /api/simple/translate_new</code></a></li>
  </ul>
  <h1><a name="Simple">Simple</a></h1>
  <div class="method"><a name="translateNew"></a>
    <div class="method-path">
    <a class="up" href="#__Methods">Up</a>
    <pre class="get"><code class="huge"><span class="http-method">get</span> /api/simple/translate_new</code></pre></div>
    <div class="method-summary">get suggestions, attension scores, preplex and average score for your partial translation (<span class="nickname">translateNew</span>)</div>
    <div class="method-notes"></div>
    <h3 class="field-label">Query parameters</h3>
    <div class="field-items">
      <div class="param">langspec (required)</div>
            <div class="param-desc"><span class="param-type">Query Parameter</span> &mdash; The type of translation </div>      <div class="param">sentence (required)</div>
            <div class="param-desc"><span class="param-type">Query Parameter</span> &mdash; The sentence that is to be translated </div>      <div class="param">partial_trans (required)</div>
            <div class="param-desc"><span class="param-type">Query Parameter</span> &mdash; Partial translation done so far by the user </div>    </div>  <!-- field-items -->
    <h3 class="field-label">Return type</h3>
    <div class="return-type">
      <a href="#inline_response_200">inline_response_200</a>
    </div>
    <!--Todo: process Response Object and its headers, schema, examples -->
    <h3 class="field-label">Example data</h3>
    <div class="example-data-content-type">Content-Type: application/json</div>
    <pre class="example"><code>{
  "result" : [ "result", "result" ],
  "attn" : [ 0.8008281904610115, 0.8008281904610115 ],
  "avg" : 1.4658129805029452,
  "partial" : "partial",
  "ppl" : 6.027456183070403
 }</code></pre>
    <h3 class="field-label">Produces</h3>
    This API call produces the following media types according to the <span class="header">Accept</span> request header;
    the media type will be conveyed by the <span class="header">Content-Type</span> response header.
    <ul>
      <li><code>application/json</code></li>
    </ul>
    <h3 class="field-label">Responses</h3>
    <h4 class="field-label">200</h4>
    An array containing result (the suggestions), attentions, partial translation by the user, perplexity and average score
        <a href="#inline_response_200">inline_response_200</a>
    <h4 class="field-label">400</h4>
    Bad Request
        <a href="#"></a>
    <h4 class="field-label">500</h4>
    Some internal server error
        <a href="#"></a>
  </div> <!-- method -->
  <hr/>
  <h2><a name="__Models">Models</a></h2>
  [ Jump to <a href="#__Methods">Methods</a> ]
  <h3>Table of Contents</h3>
  <ol>
    <li><a href="#inline_response_200"><code>inline_response_200</code></a></li>
  </ol>
  <div class="model">
    <h3><a name="inline_response_200"><code>inline_response_200</code></a> <a class="up" href="#__Models">Up</a></h3>
    <div class="field-items">
      <div class="param">result (optional)</div><div class="param-desc"><span class="param-type"><a href="#string">array[String]</a></span>  </div>
 <div class="param">attn (optional)</div><div class="param-desc"><span class="param-type"><a href="#BigDecimal">array[BigDecimal]</a></span>  </div>
 <div class="param">partial (optional)</div><div class="param-desc"><span class="param-type"><a href="#string">String</a></span>  </div>
 <div class="param">ppl (optional)</div><div class="param-desc"><span class="param-type"><a href="#BigDecimal">BigDecimal</a></span>  </div>
 <div class="param">avg (optional)</div><div class="param-desc"><span class="param-type"><a href="#BigDecimal">BigDecimal</a></span>  </div>
    </div>  <!-- field-items -->
  </div>
  </body>
 </html>
--- a/docs/openapi-yaml-client-generated.zip
+++ b/docs/openapi-yaml-client-generated.zip
--- a/docs/openapi-yaml-client-generated/.swagger-codegen-ignore
+++ b/docs/openapi-yaml-client-generated/.swagger-codegen-ignore
@ -0,0 +1,23 @@
 # Swagger Codegen Ignore
 # Generated by swagger-codegen https://github.com/swagger-api/swagger-codegen
 # Use this file to prevent files from being overwritten by the generator.
 # The patterns follow closely to .gitignore or .dockerignore.
 # As an example, the C# client generator defines ApiClient.cs.
 # You can make changes and tell Swagger Codgen to ignore just this file by uncommenting the following line:
 #ApiClient.cs
 # You can match any string of characters against a directory, file or extension with a single asterisk (*):
 #foo/*/qux
 # The above matches foo/bar/qux and foo/baz/qux, but not foo/bar/baz/qux
 # You can recursively match patterns against a directory, file or extension with a double asterisk (**):
 #foo/**/qux
 # This matches foo/bar/qux, foo/baz/qux, and foo/bar/baz/qux
 # You can also negate patterns with an exclamation (!).
 # For example, you can ignore all files in a docs folder with the file extension .md:
 #docs/*.md
 # Then explicitly reverse the ignore rule for a single file:
 #!docs/README.md
--- a/docs/openapi-yaml-client-generated/.swagger-codegen/VERSION
+++ b/docs/openapi-yaml-client-generated/.swagger-codegen/VERSION
@ -0,0 +1 @@
 3.0.19
--- a/docs/openapi-yaml-client-generated/README.md
+++ b/docs/openapi-yaml-client-generated/README.md
--- a/docs/openapi-yaml-client-generated/openapi.yaml
+++ b/docs/openapi-yaml-client-generated/openapi.yaml
@ -0,0 +1,75 @@
 openapi: 3.0.0
 info:
  title: SIMPLE TRANSLATION API
  description: Api for translation suggestions and attention score
  version: "1.0"
 servers:
 - url: https://virtserver.swaggerhub.com/I927/INMT-SIMPLE/1.0
  description: SwaggerHub API Auto Mocking
 paths:
  /api/simple/translate_new:
    get:
      tags:
      - simple
      summary: get suggestions, attension scores, preplex and average score for your
        partial translation
      operationId: translate_new
      parameters:
      - name: langspec
        in: query
        description: The type of translation
        required: true
        style: form
        explode: true
        schema:
          type: string
      - name: sentence
        in: query
        description: The sentence that is to be translated
        required: true
        style: form
        explode: true
        schema:
          type: string
      - name: partial_trans
        in: query
        description: Partial translation done so far by the user
        required: true
        style: form
        explode: true
        schema:
          type: string
      responses:
        "200":
          description: An array containing result (the suggestions), attentions, partial
            translation by the user, perplexity and average score
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/inline_response_200'
        "400":
          description: Bad Request
        "500":
          description: Some internal server error
 components:
  schemas:
    inline_response_200:
      type: object
      properties:
        result:
          type: array
          items:
            type: string
        attn:
          type: array
          items:
            multipleOf: 0.1
            type: number
        partial:
          type: string
        ppl:
          multipleOf: 0.1
          type: number
        avg:
          multipleOf: 0.1
          type: number
--- a/mt/migrations/0015_auto_20200519_1612.py
+++ b/mt/migrations/0015_auto_20200519_1612.py
@ -0,0 +1,69 @@
 # Generated by Django 3.0.3 on 2020-05-19 16:12
 from django.db import migrations, models
 import django.db.models.deletion
 import jsonfield.fields
 class Migration(migrations.Migration):
    dependencies = [
        ('mt', '0014_auto_20190606_1535'),
    ]
    operations = [
        migrations.CreateModel(
            name='customKeyboardCommands',
            fields=[
                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                ('select_entire_suggestion', models.IntegerField()),
                ('select_single_word_from_suggestion', models.IntegerField()),
                ('navigate_to_next_corpus_fragment', models.IntegerField()),
                ('navigate_to_previous_corpus_fragment', models.IntegerField()),
                ('submit_translation', models.IntegerField()),
                ('select_next_translation_suggestion', models.IntegerField()),
                ('select_previous_translation_suggestion', models.IntegerField()),
                ('custom_layout_name', models.CharField(max_length=30)),
            ],
            options={
                'verbose_name': 'Custom Keyboard Command Set',
                'verbose_name_plural': 'Custom Keyboard Command Sets',
            },
        ),
        migrations.AlterField(
            model_name='corpus',
            name='helpprovision',
            field=models.CharField(choices=[('IT', 'Interactive Translation'), ('PE', 'Post Editing'), ('BL', 'Baseline')], default='IT', max_length=2),
        ),
        migrations.AlterField(
            model_name='dockeystroke',
            name='keystrokeseries',
            field=jsonfield.fields.JSONField(),
        ),
        migrations.CreateModel(
            name='translatorKeyboardLayouts',
            fields=[
                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                ('customKeyboardCommands', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='translatorconfigs', to='mt.customKeyboardCommands')),
                ('translator', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='translatorconfigs', to='mt.translator')),
            ],
            options={
                'verbose_name': 'Translator Keyboard Layout Specified',
                'verbose_name_plural': 'Translator Keyboard Layout Specified',
                'unique_together': {('translator', 'customKeyboardCommands')},
            },
        ),
        migrations.CreateModel(
            name='translatorcorpus',
            fields=[
                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                ('corpus', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='corpustranslators', to='mt.corpus')),
                ('translator', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='translatorcorpus', to='mt.translator')),
            ],
            options={
                'verbose_name': 'Translator Corpus Possible',
                'verbose_name_plural': '6. Translator Corpus Possible',
                'unique_together': {('translator', 'corpus')},
            },
        ),
    ]
--- a/mt/views.py
+++ b/mt/views.py
@ -43,6 +43,10 @@ from django.conf import settings
 with open(os.path.join(dir_path, 'opt_data'), 'rb') as f:
        opt = pickle.load(f)
        print("###########################DEBUG######################################")
        print(opt) # This is the file that mentions model details
        print(dir_path + "/opt_data")
        print("#######################################################################")
--- a/mtsimple/api/init.py
+++ b/mtsimple/api/init.py
--- a/mtsimple/api/urls.py
+++ b/mtsimple/api/urls.py
@ -0,0 +1,7 @@
 from django.urls import path
 from . import views
 urlpatterns = [
    path('translate_new', views.translate_new, name='translate_new'),
 ]
--- a/mtsimple/api/views.py
+++ b/mtsimple/api/views.py
@ -0,0 +1,191 @@
 from django.http import HttpResponse, JsonResponse
 from rest_framework import status
 from rest_framework.response import Response
 from rest_framework.decorators import api_view
 import re, os, math
 import requests
 import pickle
 from indic_transliteration import sanscript
 from onmt.translate.infertranslator import build_translator
 from onmt.utils.parse import ArgumentParser
 import mtsimple
 dir_path = os.path.dirname(os.path.dirname(mtsimple.__file__))
 #TODO: Find a Way to not repeat the below starter code from mtsimple/views.py
 langspecs = {
    'en-hi' : {
        'src' : 'en',
        'tgt' : 'hi',
        'model': 'full_iitb_enhi_50v.pt',
        'indic_code': sanscript.DEVANAGARI,
        'provide_help' : True,
    },
    'hi-en' : {
        'src' : 'hi',
        'tgt' : 'en',
        'model': 'full_iitb_bpe_hien.pt',
        'indic_code': None,
        'provide_help' : False,
    },
    'hi-gondi' : {
        'src' : 'hi',
        'tgt' : 'gondi',
        'model': 'hi-gondi.pt',
        'indic_code': sanscript.DEVANAGARI,
        'provide_help' : False,
    },
    # '*-e
    # '*-en' : {
    #     'src' : 'hi',
    #     'tgt' : 'en',
    #     'model': 'multiling.pt',
    #     'indic_code': None,
    #     'provide_help' : False,
    # }
 }
 with open(os.path.join(dir_path, 'opt_data'), 'rb') as f:
        opt = pickle.load(f)
 engines = {}
 # The model engines are initialised here after loading opt (maybe it just specifies of how the model looks like?)
 for key, value in langspecs.items():
    opt.models = [os.path.join(dir_path, 'model', value['model'])]
    opt.n_best = 1
    opt.max_length = 100
    opt.global_attention_function = 'sparsemax'
    ArgumentParser.validate_translate_opts(opt)
    engines[key] = {"translatorbest": build_translator(opt, report_score=True)}
    #translatorbest builds the best complete translation of the sentence
    opt.n_best = 5
    opt.max_length = 2
    opt.global_attention_function = 'sparsemax'
    ArgumentParser.validate_translate_opts(opt)
    engines[key]["translatorbigram"] = build_translator(opt, report_score=True)
    #translatorbiagram builds best translations of length two
 def quotaposto(s, lang="en"):
    s = re.sub(r"&quot;", r'"', s)
    s = re.sub(r"&apos;", r"'", s)
    s = re.sub(r"(@@ )|(@@ ?$)", r"", s)
    #This is work in progress to make writing as natural as possible. taking care of spaces before and after certain characters.
    # s = re.sub(r"(\s+)([!:?,.।\']+)", r"\2", s)
    # s = re.sub(r"([({\[<]+)(\s+)", r"\1", s)
    # s = re.sub(r"(\s+)([)}\]>]+)", r"\2", s)
    return s
 def toquotapos(s, lang="en"):
    # if lang=="en":
    s = s.lower()
    s = re.sub(r"([\“\”])", r'"', s)
    s = re.sub(r"([\‘\’])", r"'", s)
    s = re.sub(r"([\ः])", r":", s)
    s = re.sub(r"([-!$%^&*()_+|~=`{}\[\]:\";<>?,.\/#@।]+)", r" \1 ", s)
    s = re.sub(r'"', r'&quot;', s)
    s = re.sub(r"'", r"&apos;", s)
    s = re.sub(r"(\s+)", r" ", s)
    return s
@api_view(['GET',])
 def translate_new(request):
    langspec = request.GET.get('langspec')
    sentence = request.GET.get('sentence')
    partial_trans = request.GET.get('partial_trans', '')
    translatorbest = engines[langspec]["translatorbest"]
    translatorbigram = engines[langspec]["translatorbigram"]
    print("Before processing")
    print("##########################")
    print("##########################")
    print(sentence.strip())
    print("##########################")
    print("##########################")
    print("##########################")
    L1 = toquotapos(sentence.strip()) # request.GET.get('a') contains the whole sentence to be translated
    print("############After Processing########")
    print((L1))
    L2 = partial_trans # request.GET.get('b') contains the partial sentence to be translated
    L2split = L2.split()
    if langspecs[langspec]['indic_code']:
        # print(L2[-1])
        if L2 != '' and bool(re.search(r"([^\s\u0900-\u097F])", L2[-1])):
            params = {}
            params['inString'] = L2split[-1]
            params['lang'] = 'hindi'
            data = requests.get('http://xlit.quillpad.in/quillpad_backend2/processWordJSON', params = params).json()
            L2split[-1] = data['twords'][0]['options'][0]
            L2 = ' '.join(L2split)
            # L2 = transliterate(L2, sanscript.ITRANS, langspec['indic_code'])
    print(L2, u'\u0900-\u097F')
    something, pred, covatn2d, score_total, words_total = translatorbest.translate(
        src=[L1],
        tgt=None,
        src_dir='',
        batch_size=30,
        attn_debug=True,
        partial = toquotapos(L2)
        )
    scores, predictions, score_total, words_total = translatorbigram.translate(
        src=[L1],
        tgt=None,
        src_dir='',
        batch_size=30,
        attn_debug=False,
        partial = toquotapos(L2),
        dymax_len = 2,
        )
    print(covatn2d, 'convatn2d')
    if L2 != '':
        transpattn = [*zip(*covatn2d)]
        attnind = [attn.index(max(attn)) for attn in transpattn]
        print('attnind', attnind)
        attndist = [[ i for i, x in enumerate(attnind) if x==k] for k in range(len(L2.strip().split(" ")))]
        print('attndist', attndist)
        sumattn = [1] * len(L1.split(" "))
        for i in attndist:
            for k in i:
                sumattn[k] = 0
        # attn = covatn2d[:len(L2.strip().split(" "))]
        # sumattn = [sum(i) for i in zip(*attn)]
        # for i in range(len(attn)):
        #     if max(attn[i]) > 0.30:
        #         sumattn[attn[i].index(max(attn[i]))] = 1
        #     print(max(attn[i]))
        # newattn = [float("{0:.2f}".format(1-(k/max(sumattn)))) for k in sumattn]
        # # sumattn = [float("{0:.2f}".format(k/sum(newattn))) for k in newattn]
        # newattn = [ 1.66*max(0, (k-0.4)) for k in newattn]
    else:
        sumattn = [1.00] * len(L1.split(" "))    
    predictions = predictions[0]
    print(predictions)
    seen = set()
    seen_add = seen.add
    sentence = [quotaposto(L2 + x.capitalize()[len(L2):], langspecs[langspec]["tgt"]) + " " for x in predictions if not (x in seen or seen_add(x))]
    # sentence = [x.replace(L2, "") for x in sentence]
    sentence = '\n'.join(sentence)
    print("pred[0][0]", pred[0][0], pred[0][0][len(L2):])
    if langspecs[langspec]['provide_help'] and L2:
        sentence = quotaposto(L2 + pred[0][0].capitalize()[len(L2):], langspecs[langspec]["tgt"]) + '\n' + L2 + '\n' + sentence
    else:
        sentence = quotaposto(L2 + pred[0][0].capitalize()[len(L2):], langspecs[langspec]["tgt"]) + '\n' + sentence
    print(sentence)
    perplexity = float(math.exp(-score_total / words_total))
    avg_score = float(score_total / words_total)
    print("sentence", sentence)
    # print(something, pred)
    return JsonResponse({'result': sentence.split('\n'), 'attn': sumattn, 'partial': L2, 'ppl': perplexity, 'avg': avg_score})
--- a/mtsimple/views.py
+++ b/mtsimple/views.py
@ -29,6 +29,7 @@ import requests
 import math
 # defines the configuration of the translation type selected by the user
 langspecs = {
    'en-hi' : {
        'src' : 'en',
@ -44,6 +45,14 @@ langspecs = {
        'indic_code': None,
        'provide_help' : False,
    },
    'hi-gondi' : {
        'src' : 'hi',
        'tgt' : 'gondi',
        'model': 'hi-gondi.pt',
        'indic_code': sanscript.DEVANAGARI,
        'provide_help' : False,
    },
    # '*-en' : {
    #     'src' : 'hi',
    #     'tgt' : 'en',
@ -85,6 +94,7 @@ with open(os.path.join(dir_path, 'opt_data'), 'rb') as f:
        opt = pickle.load(f)
 engines = {}
 # The model engines are initialised here after loading opt (maybe it just specifies of how the model looks like?)
 for key, value in langspecs.items():
    opt.models = [os.path.join(dir_path, 'model', value['model'])]
    opt.n_best = 1
@ -92,19 +102,23 @@ for key, value in langspecs.items():
    opt.global_attention_function = 'sparsemax'
    ArgumentParser.validate_translate_opts(opt)
    engines[key] = {"translatorbest": build_translator(opt, report_score=True)}
    #translatorbest builds the best complete translation of the sentence
    opt.n_best = 5
    opt.max_length = 2
    opt.global_attention_function = 'sparsemax'
    ArgumentParser.validate_translate_opts(opt)
    engines[key]["translatorbigram"] = build_translator(opt, report_score=True)
    #translatorbiagram builds 5 best translations of length two
 global corpusops
 corpusops = []
 # The view function for the first page url : simple/
 def corpus(request):
    return render(request, 'simplecorpus.html')
 #The view function called after setting languagespecs and getting the input in simple/ (called after corpusinput)
 def translate(request):
    return render(request, 'simpletranslate.html')
@ -112,7 +126,7 @@ def end(request):
    return render(request, 'simpleend.html')
 def split_sentences(st):
-    #Split sentences based 
+    #Split sentences based on !?।|.
    sentences = re.split(r'[!?।|.](?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)', st)
    if sentences[-1]:
@ -120,6 +134,12 @@ def split_sentences(st):
    else:
        return sentences[:-1]
 """ 
 The view function for getting the input for translation on the first page (simple/)
 Splits the sentence based on !?।| cleans it and saves the list in session["corpusinps"]
 """
 def corpusinput(request):
    corpusraw = request.POST.get('translate')
    langselect = request.POST.get('langselect')
@ -128,7 +148,11 @@ def corpusinput(request):
    request.session["langspec"] = langselect
    print(request.session["langspec"])
    s = corpusraw.strip()
    print(s, "DEBUG: raw corpus before split_sentences")
    spsent = [k.strip() for k in split_sentences(s)]
    print(spsent, "DEBUG: raw corpus after split_sentences")
    corpusinps = list(filter(lambda elem: elem.strip(), spsent))
    request.session["corpusinps"] = [[k, ''] for k in corpusinps]
    print(request.session["corpusinps"])
@ -159,11 +183,22 @@ def indic(request):
 def translate_new(request):
    translatorbest = engines[request.session["langspec"]]["translatorbest"]
    translatorbigram = engines[request.session["langspec"]]["translatorbigram"]
-    L1 = toquotapos(request.GET.get('a').strip())
+    print("Before processing")
-    L2 = request.GET.get('b', "")
+    print("##########################")
    print("##########################")
    print(request.GET.get('a').strip())
    print("##########################")
    print("##########################")
    print("##########################")
    L1 = toquotapos(request.GET.get('a').strip()) # request.GET.get('a') contains the whole sentence to be translated
    print("############After Processing########")
    print((L1))
    L2 = request.GET.get('b', "") # request.GET.get('b') contains the partial sentence to be translated
    L2split = L2.split()
    if langspecs[request.session["langspec"]]['indic_code']:
        # print(L2[-1])
        if L2 != '' and bool(re.search(r"([^\s\u0900-\u097F])", L2[-1])):
            params = {}
            params['inString'] = L2split[-1]
@ -173,7 +208,7 @@ def translate_new(request):
            L2 = ' '.join(L2split)
            # L2 = transliterate(L2, sanscript.ITRANS, langspec['indic_code'])
-    print(L2)
+    print(L2, u'\u0900-\u097F')
    something, pred, covatn2d, score_total, words_total = translatorbest.translate(
        src=[L1],
@ -183,6 +218,8 @@ def translate_new(request):
        attn_debug=True,
        partial = toquotapos(L2)
        )
    print("$$$$$$$$$$$$$$$$$$$$$$$$")
    scores, predictions, score_total, words_total = translatorbigram.translate(
        src=[L1],
@ -195,11 +232,13 @@ def translate_new(request):
        )
-    # print(covatn2d)
+    print(covatn2d, 'convatn2d')
    if L2 != '':
        transpattn = [*zip(*covatn2d)]
        attnind = [attn.index(max(attn)) for attn in transpattn]
        print('attnind', attnind)
        attndist = [[ i for i, x in enumerate(attnind) if x==k] for k in range(len(L2.strip().split(" ")))]
        print('attndist', attndist)
        sumattn = [1] * len(L1.split(" "))
        for i in attndist:
            for k in i:
@ -223,6 +262,7 @@ def translate_new(request):
    sentence = [quotaposto(L2 + x.capitalize()[len(L2):], langspecs[request.session["langspec"]]["tgt"]) + " " for x in predictions if not (x in seen or seen_add(x))]
    # sentence = [x.replace(L2, "") for x in sentence]
    sentence = '\n'.join(sentence)
    print("pred[0][0]", pred[0][0], pred[0][0][len(L2):])
    if langspecs[request.session["langspec"]]['provide_help'] and L2:
        sentence = quotaposto(L2 + pred[0][0].capitalize()[len(L2):], langspecs[request.session["langspec"]]["tgt"]) + '\n' + L2 + '\n' + sentence
    else:
@ -231,6 +271,6 @@ def translate_new(request):
    print(sentence)
    perplexity = float(math.exp(-score_total / words_total))
    avg_score = float(score_total / words_total)
-    # print(scores)
+    print("sentence", sentence)
    # print(something, pred)
    return JsonResponse({'result': sentence, 'attn': sumattn, 'partial': L2, 'ppl': perplexity, 'avg': avg_score})
--- a/opennmt/onmt/translate/infertranslator.py
+++ b/opennmt/onmt/translate/infertranslator.py
@ -307,6 +307,7 @@ class Translator(object):
            * all_scores is a list of `batch_size` lists of `n_best` scores
            * all_predictions is a list of `batch_size` lists
                of `n_best` predictions
            * attns is a list of attention scores for translation having highest cumilative log likelihood
        """
        self.dymax_len = dymax_len
        self.partialf = None
@ -322,6 +323,7 @@ class Translator(object):
        # Logic for partial and partialf 
        if partial and partial != '':
            partials = partial.split()
            print(partials, '~~~~partials~~~')
            vocabdict = dict(self.fields)["tgt"].base_field.vocab
            # if vocabdict.stoi[partials[-1]] == 0:
            if partialfcheck:
@ -335,6 +337,9 @@ class Translator(object):
                # self.partialf = [20.0] + [i[0] for i in sorted(editarr, key=lambda x: x[1])]
                self.partial = [vocabdict.stoi[x] for x in partials[:-1]]
                print("#########vocabdict.stoi########")
                print(self.partial)
                print("##################################")
                self.partialf = [v for k, v in vocabdict.stoi.items() if k.startswith(partials[-1]) and v]
            else:
@ -384,7 +389,7 @@ class Translator(object):
        pred_score_total, pred_words_total = 0, 0
        gold_score_total, gold_words_total = 0, 0
-        all_scores = []
+        all_scores = [] # I guess this is the cumilative log likelihood score of each sentence
        all_predictions = []
        start_time = time.time()
@ -396,6 +401,8 @@ class Translator(object):
            translations = xlation_builder.from_batch(batch_data)
            for trans in translations:
                print("Loop")
                print(trans, trans.pred_sents)
                all_scores += [trans.pred_scores[:self.n_best]]
                pred_score_total += trans.pred_scores[0]
                pred_words_total += len(trans.pred_sents[0])
@ -405,6 +412,12 @@ class Translator(object):
                n_best_preds = [" ".join(pred)
                                for pred in trans.pred_sents[:self.n_best]]
                print("############n_best_preds###############")
                print(n_best_preds)
                print("############n_best_preds###############")
                if self.report_align:
                    align_pharaohs = [build_align_pharaoh(align) for align
                                      in trans.word_aligns[:self.n_best]]
@ -433,7 +446,7 @@ class Translator(object):
                        srcs = trans.src_raw
                    else:
                        srcs = [str(item) for item in range(len(attns[0]))]
-                    output = report_matrix(srcs, preds, attns)
+                    output = report_matrix(srcs, preds, attns) # This prints attentions in output for the sentence having highest cumilative log likelihood score
                    if self.logger:
                        self.logger.info(output)
--- a/static/translate-page.js
+++ b/static/translate-page.js
@ -387,6 +387,8 @@ function parseProcessedJsonResultsfunction(data, partial) {
  var container = $('<div />');
  // Code for adding suggestions//
  var countcontainer = 0
  finalresult = []
  for(var i = 0; i < result.length; i++) {
--- a/static/translation.js
+++ b/static/translation.js
@ -1,4 +1,4 @@
-
+// This page serves as the script for simpletranslate.html
 /*
 *************************************************
 *************************************************
@ -53,6 +53,7 @@ function sharedStart(feed, partial) {
    part1text = partial.substring(0, lastspace)
    part2text = partial.substring(lastspace+1)
    var count = 0
    console.log("DEBUG part1text", part1text, )
    if (part1text) {
        newfeed = feed.replace(part1text + " ", '')
    } else {
@ -504,6 +505,7 @@ function parseProcessedJsonResultsfunction(data, partial) {
        finalresult = []
        for(var i = 0; i < result.length; i++) {
            var repres = sharedStart(result[i], partialret)
            console.log(result[i] + '%%%%%%%%%%%%%%%%%%%%%%%%%%%')
            if (repres !== "") {
                container.append('<span id="res'+countcontainer+'" class="res'+countcontainer+' spanres p-1"> ' + repres + '</span>');
                countcontainer += 1;
@ -516,7 +518,7 @@ function parseProcessedJsonResultsfunction(data, partial) {
        // Coloring the drop down box selections
        partial.closest('.bmo').find('.dropdown').html(container);
        resetcolors('.res', $('.spanres').length)
-        $('.res' + selecte).css("background-color","#eee")
+        $('.res' + selecte).css("background-color","#fff")
        if (countcontainer>1) {
            partial.closest('.bmo').find('.dropdown').css('visibility', 'visible');
        }
@ -528,7 +530,7 @@ function parseProcessedJsonResultsfunction(data, partial) {
            for (m=0; m<attn.length; m++) {
                if (attn[m] != 0) {
                    // partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(255,0,0,' + attn[m] + ')')
-                    partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(255,0,0,0.5')
+                    partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(255,0,0,0.5)')
                }
                else {
                    partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(0,255,0,0.5)')
@ -580,25 +582,28 @@ $(document).ready(function() {
        inputs = data.result;
        langspec = data.langspec
        // langtolangid = data.langtolangid;
        console.log(inputs)
        $('#cardscoll').html('')
        $('#corpusinput').html('')
        for (i=0; i<inputs.length; i++) {
            /*To set the source part of the page*/ 
            if (langspec == 'hi-en') {
-                $('#corpusinput').append('<span class="corp_inp">' + inputs[i][0] + '</span>| ')
+                $('#corpusinput').append('<span class="corp_inp">' + inputs[i][0] + '</span>| ') /* 1st index is the text with which the editable division is intitalised */
            } else {
                $('#corpusinput').append('<span class="corp_inp">' + inputs[i][0] + '</span>. ')
            }
            /*--------------------------------*/ 
            $('#cardscoll').append(
                `<div class="shadow p-3 my-3 rounded bmo cardescoll">
                                <div class="row">
                                <div class="col-9">
-                                <div class="hin_inp pb-2" contenteditable="false">`+ inputSpan(inputs[i][0]) + `</div>
+                                <div class="hin_inp pb-2" contenteditable="false">`+ inputSpan(inputs[i][0]) /*Wraps each word of sentence around span and returns*/ + `</div> 
                                <div class="dropcontainer">
                                    <div class="partcontainer">
                                        <div class="suggest transtext" contenteditable="false"></div>
-                                            <div class=" partial transtext" id="card` + i + `" contenteditable="true"
+                                            <div class="partial transtext" id="card` + i + `" contenteditable="true"
                                            data-tab=0 data-enter=0 data-up=0 data-down=0 data-others=0 data-pgup=0 data-pgdn=0 data-end=0 data-right=0 data-left=0 data-bkspc=0 data-time=0
                                            >`+ inputs[i][1] + `</div>
                                    </div>
@ -767,10 +772,16 @@ $(document).ready(function() {
            var hin_inp = partial.closest('.bmo').find('.hin_inp')
            globalPartial = partial;
            console.log("#########################################3")
            console.log("#########################################3")
            console.log(partial.clone().children().remove().end().text())
            console.log("#########################################4")
            console.log("#########################################3")
            if (sockets_use == true) {
                connectSocket.send(JSON.stringify({
-                    'partial_translation': partial.clone().children().remove().end().text(),
+                    'partial_translation': partial.clone().children().remove().end().text(), // The text translated by user so far
-                    'original': hin_inp.text(),
+                    'original': hin_inp.text(), // The full sentence to be translated
                    'langspec': langspec
                }));
            } 
@ -778,7 +789,7 @@ $(document).ready(function() {
            //OLD, JANKY HTTP REQUEST!!
                searchRequest =  $.getJSON(http_translate, {
-                    a: hin_inp.text(),
+                    a: hin_inp.text(), // Maybe use some good names here?
                    b: partial.clone().children().remove().end().text()
                }, function(data) {
                    // console.log(data)
--- a/templates/simplecorpus.html
+++ b/templates/simplecorpus.html
@ -23,7 +23,7 @@
                if (corpusinput) {
                  $.ajax({
                      type: "POST",
-                      url: '/simple/corpusinput',
+                      url: '/simple/corpusinput', //corpusinput function in mtsimple/views.py
                      data: {
                        'translate': corpusinput,
                        'langselect': $('#src').find(":selected").val() + "-" + $('#tgt').find(":selected").val(),
@ -51,11 +51,18 @@
                if (lang == "bn-en") {
                  text = "মৌসুমি বৃষ্টি একটি অভিশাপ দ্বারা আশীর্বাদ করা একটি আশীর্বাদ। যখন পরিমাণগত পরিমাণে বৃষ্টির পরিমাণ কম হয়, তখন এটি একটি আশীর্বাদের জন্য আমাদের পরে গরম তাপের গ্রীষ্ম। ফসলের প্রাচুর্যের কারণে এটি কৃষকদের জন্য একটি আশীর্বাদ। শুষ্ক গ্রীষ্মের পরে নদী ভরাট।"
                }
                if (lang == "hi-gondi") {
                  text = "आज मौसम सुहावना है। हमें शाम को बाहर जाना चाहिए।"
                }
                console.log(text)
                $("#corpusinput").val(text);
                $("#corpusinput").focus();
                $("#corpusinput").trigger('autoresize');
            });
            function limitTgtOptions() {
              // TODO: Limit target language to only hindi
            }
        });
    </script>
@ -81,6 +88,7 @@
                    <select class="form-control" id="tgt">
                      <option value="en">English</option value="en">
                      <option value="hi">Hindi</option value="en">
                      <option value="gondi" onselect="limitTgtOptions">Gondi</option value="en">
                    </select>
                  </div>