Add REST APIs for translation queries

2020-05-27 16:01:24 +05:30 · 2020-05-27 16:01:24 +05:30 · b9bc997f6f
--- a/InteractiveTranslation/settings.py
+++ b/InteractiveTranslation/settings.py
@ -52,6 +52,7 @@ INSTALLED_APPS = [
    'django.contrib.sessions',
    'django.contrib.messages',
    'django.contrib.staticfiles',
+    'rest_framework',
    'mt',
    'mtsimple',
    'mtpara',
--- a/InteractiveTranslation/urls.py
+++ b/InteractiveTranslation/urls.py
@ -20,10 +20,16 @@ from django.conf.urls.static import static
 from django.contrib.staticfiles.urls import staticfiles_urlpatterns

 urlpatterns = [
-    path('', include('mt.urls')),
+    path('', include('mt.urls')), #TODO: This redirects to simple/ can we do it directly?
    path('simple/', include('mtsimple.urls')),
    path('para/', include('mtpara.urls')),
    # path('gpt/', include('gpt.urls')),
    path('admin/', admin.site.urls),
    path('accounts/', include('django.contrib.auth.urls')),
+
+
+    # REST FRAMEWORK URLS
+    path('api/simple/', include('mtsimple.api.urls')),
+
+
 ] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) + staticfiles_urlpatterns()
--- a/docs/html-documentation-generated.zip
+++ b/docs/html-documentation-generated.zip
--- a/docs/html-documentation-generated/.swagger-codegen-ignore
+++ b/docs/html-documentation-generated/.swagger-codegen-ignore
@ -0,0 +1,23 @@
+# Swagger Codegen Ignore
+# Generated by swagger-codegen https://github.com/swagger-api/swagger-codegen
+
+# Use this file to prevent files from being overwritten by the generator.
+# The patterns follow closely to .gitignore or .dockerignore.
+
+# As an example, the C# client generator defines ApiClient.cs.
+# You can make changes and tell Swagger Codgen to ignore just this file by uncommenting the following line:
+#ApiClient.cs
+
+# You can match any string of characters against a directory, file or extension with a single asterisk (*):
+#foo/*/qux
+# The above matches foo/bar/qux and foo/baz/qux, but not foo/bar/baz/qux
+
+# You can recursively match patterns against a directory, file or extension with a double asterisk (**):
+#foo/**/qux
+# This matches foo/bar/qux, foo/baz/qux, and foo/bar/baz/qux
+
+# You can also negate patterns with an exclamation (!).
+# For example, you can ignore all files in a docs folder with the file extension .md:
+#docs/*.md
+# Then explicitly reverse the ignore rule for a single file:
+#!docs/README.md
--- a/docs/html-documentation-generated/.swagger-codegen/VERSION
+++ b/docs/html-documentation-generated/.swagger-codegen/VERSION
@ -0,0 +1 @@
+3.0.19
--- a/docs/html-documentation-generated/index.html
+++ b/docs/html-documentation-generated/index.html
@ -0,0 +1,282 @@
+<!doctype html>
+<html>
+  <head>
+    <title>SIMPLE TRANSLATION API</title>
+    <style type="text/css">
+      body {
+      	font-family: Trebuchet MS, sans-serif;
+      	font-size: 15px;
+      	color: #444;
+      	margin-right: 24px;
+      }
+      
+      h1	{
+      	font-size: 25px;
+      }
+      h2	{
+      	font-size: 20px;
+      }
+      h3	{
+      	font-size: 16px;
+      	font-weight: bold;
+      }
+      hr	{
+      	height: 1px;
+      	border: 0;
+      	color: #ddd;
+      	background-color: #ddd;
+      }
+      
+      .app-desc {
+        clear: both;
+        margin-left: 20px;
+      }
+      .param-name {
+        width: 100%;
+      }
+      .license-info {
+        margin-left: 20px;
+      }
+      
+      .license-url {
+        margin-left: 20px;
+      }
+      
+      .model {
+        margin: 0 0 0px 20px;
+      }
+      
+      .method {
+        margin-left: 20px;
+      }
+      
+      .method-notes	{
+      	margin: 10px 0 20px 0;
+      	font-size: 90%;
+      	color: #555;
+      }
+      
+      pre {
+        padding: 10px;
+        margin-bottom: 2px;
+      }
+      
+      .http-method {
+       text-transform: uppercase;
+      }
+      
+      pre.get {
+        background-color: #0f6ab4;
+      }
+      
+      pre.post {
+        background-color: #10a54a;
+      }
+      
+      pre.put {
+        background-color: #c5862b;
+      }
+      
+      pre.delete {
+        background-color: #a41e22;
+      }
+      
+      .huge	{
+      	color: #fff;
+      }
+      
+      pre.example {
+        background-color: #f3f3f3;
+        padding: 10px;
+        border: 1px solid #ddd;
+      }
+      
+      code {
+        white-space: pre;
+      }
+      
+      .nickname {
+        font-weight: bold;
+      }
+      
+      .method-path {
+        font-size: 1.5em;
+        background-color: #0f6ab4;
+      }
+      
+      .up {
+        float:right;
+      }
+      
+      .parameter {
+        width: 500px;
+      }
+      
+      .param {
+        width: 500px;
+        padding: 10px 0 0 20px;
+        font-weight: bold;
+      }
+      
+      .param-desc {
+        width: 700px;
+        padding: 0 0 0 20px;
+        color: #777;
+      }
+      
+      .param-type {
+        font-style: italic;
+      }
+      
+      .param-enum-header {
+      width: 700px;
+      padding: 0 0 0 60px;
+      color: #777;
+      font-weight: bold;
+      }
+      
+      .param-enum {
+      width: 700px;
+      padding: 0 0 0 80px;
+      color: #777;
+      font-style: italic;
+      }
+      
+      .field-label {
+        padding: 0;
+        margin: 0;
+        clear: both;
+      }
+      
+      .field-items	{
+      	padding: 0 0 15px 0;
+      	margin-bottom: 15px;
+      }
+      
+      .return-type {
+        clear: both;
+        padding-bottom: 10px;
+      }
+      
+      .param-header {
+        font-weight: bold;
+      }
+      
+      .method-tags {
+        text-align: right;
+      }
+      
+      .method-tag {
+        background: none repeat scroll 0% 0% #24A600;
+        border-radius: 3px;
+        padding: 2px 10px;
+        margin: 2px;
+        color: #FFF;
+        display: inline-block;
+        text-decoration: none;
+      }
+    </style>
+  </head>
+  <body>
+  <h1>SIMPLE TRANSLATION API</h1>
+    <div class="app-desc">Api for translation suggestions and attention score</div>
+    <div class="app-desc">More information: <a href="https://helloreverb.com">https://helloreverb.com</a></div>
+    <div class="app-desc">Contact Info: <a href="hello@helloreverb.com">hello@helloreverb.com</a></div>
+    <div class="app-desc">Version: 1.0</div>
+    <div class="app-desc">BasePath:/I927/INMT-SIMPLE/1.0</div>
+    <div class="license-info">All rights reserved</div>
+    <div class="license-url">http://apache.org/licenses/LICENSE-2.0.html</div>
+  <h2>Access</h2>
+
+  <h2><a name="__Methods">Methods</a></h2>
+  [ Jump to <a href="#__Models">Models</a> ]
+
+  <h3>Table of Contents </h3>
+  <div class="method-summary"></div>
+  <h4><a href="#Simple">Simple</a></h4>
+  <ul>
+  <li><a href="#translateNew"><code><span class="http-method">get</span> /api/simple/translate_new</code></a></li>
+  </ul>
+
+  <h1><a name="Simple">Simple</a></h1>
+  <div class="method"><a name="translateNew"></a>
+    <div class="method-path">
+    <a class="up" href="#__Methods">Up</a>
+    <pre class="get"><code class="huge"><span class="http-method">get</span> /api/simple/translate_new</code></pre></div>
+    <div class="method-summary">get suggestions, attension scores, preplex and average score for your partial translation (<span class="nickname">translateNew</span>)</div>
+    <div class="method-notes"></div>
+
+
+
+
+
+    <h3 class="field-label">Query parameters</h3>
+    <div class="field-items">
+      <div class="param">langspec (required)</div>
+      
+            <div class="param-desc"><span class="param-type">Query Parameter</span> &mdash; The type of translation </div>      <div class="param">sentence (required)</div>
+      
+            <div class="param-desc"><span class="param-type">Query Parameter</span> &mdash; The sentence that is to be translated </div>      <div class="param">partial_trans (required)</div>
+      
+            <div class="param-desc"><span class="param-type">Query Parameter</span> &mdash; Partial translation done so far by the user </div>    </div>  <!-- field-items -->
+
+
+    <h3 class="field-label">Return type</h3>
+    <div class="return-type">
+      <a href="#inline_response_200">inline_response_200</a>
+      
+    </div>
+
+    <!--Todo: process Response Object and its headers, schema, examples -->
+
+    <h3 class="field-label">Example data</h3>
+    <div class="example-data-content-type">Content-Type: application/json</div>
+    <pre class="example"><code>{
+  "result" : [ "result", "result" ],
+  "attn" : [ 0.8008281904610115, 0.8008281904610115 ],
+  "avg" : 1.4658129805029452,
+  "partial" : "partial",
+  "ppl" : 6.027456183070403
+}</code></pre>
+
+    <h3 class="field-label">Produces</h3>
+    This API call produces the following media types according to the <span class="header">Accept</span> request header;
+    the media type will be conveyed by the <span class="header">Content-Type</span> response header.
+    <ul>
+      <li><code>application/json</code></li>
+    </ul>
+
+    <h3 class="field-label">Responses</h3>
+    <h4 class="field-label">200</h4>
+    An array containing result (the suggestions), attentions, partial translation by the user, perplexity and average score
+        <a href="#inline_response_200">inline_response_200</a>
+    <h4 class="field-label">400</h4>
+    Bad Request
+        <a href="#"></a>
+    <h4 class="field-label">500</h4>
+    Some internal server error
+        <a href="#"></a>
+  </div> <!-- method -->
+  <hr/>
+
+  <h2><a name="__Models">Models</a></h2>
+  [ Jump to <a href="#__Methods">Methods</a> ]
+
+  <h3>Table of Contents</h3>
+  <ol>
+    <li><a href="#inline_response_200"><code>inline_response_200</code></a></li>
+  </ol>
+
+  <div class="model">
+    <h3><a name="inline_response_200"><code>inline_response_200</code></a> <a class="up" href="#__Models">Up</a></h3>
+    
+    <div class="field-items">
+      <div class="param">result (optional)</div><div class="param-desc"><span class="param-type"><a href="#string">array[String]</a></span>  </div>
+<div class="param">attn (optional)</div><div class="param-desc"><span class="param-type"><a href="#BigDecimal">array[BigDecimal]</a></span>  </div>
+<div class="param">partial (optional)</div><div class="param-desc"><span class="param-type"><a href="#string">String</a></span>  </div>
+<div class="param">ppl (optional)</div><div class="param-desc"><span class="param-type"><a href="#BigDecimal">BigDecimal</a></span>  </div>
+<div class="param">avg (optional)</div><div class="param-desc"><span class="param-type"><a href="#BigDecimal">BigDecimal</a></span>  </div>
+    </div>  <!-- field-items -->
+  </div>
+  </body>
+</html>
--- a/docs/openapi-yaml-client-generated.zip
+++ b/docs/openapi-yaml-client-generated.zip
--- a/docs/openapi-yaml-client-generated/.swagger-codegen-ignore
+++ b/docs/openapi-yaml-client-generated/.swagger-codegen-ignore
@ -0,0 +1,23 @@
+# Swagger Codegen Ignore
+# Generated by swagger-codegen https://github.com/swagger-api/swagger-codegen
+
+# Use this file to prevent files from being overwritten by the generator.
+# The patterns follow closely to .gitignore or .dockerignore.
+
+# As an example, the C# client generator defines ApiClient.cs.
+# You can make changes and tell Swagger Codgen to ignore just this file by uncommenting the following line:
+#ApiClient.cs
+
+# You can match any string of characters against a directory, file or extension with a single asterisk (*):
+#foo/*/qux
+# The above matches foo/bar/qux and foo/baz/qux, but not foo/bar/baz/qux
+
+# You can recursively match patterns against a directory, file or extension with a double asterisk (**):
+#foo/**/qux
+# This matches foo/bar/qux, foo/baz/qux, and foo/bar/baz/qux
+
+# You can also negate patterns with an exclamation (!).
+# For example, you can ignore all files in a docs folder with the file extension .md:
+#docs/*.md
+# Then explicitly reverse the ignore rule for a single file:
+#!docs/README.md
--- a/docs/openapi-yaml-client-generated/.swagger-codegen/VERSION
+++ b/docs/openapi-yaml-client-generated/.swagger-codegen/VERSION
@ -0,0 +1 @@
+3.0.19
--- a/docs/openapi-yaml-client-generated/README.md
+++ b/docs/openapi-yaml-client-generated/README.md
--- a/docs/openapi-yaml-client-generated/openapi.yaml
+++ b/docs/openapi-yaml-client-generated/openapi.yaml
@ -0,0 +1,75 @@
+openapi: 3.0.0
+info:
+  title: SIMPLE TRANSLATION API
+  description: Api for translation suggestions and attention score
+  version: "1.0"
+servers:
+- url: https://virtserver.swaggerhub.com/I927/INMT-SIMPLE/1.0
+  description: SwaggerHub API Auto Mocking
+paths:
+  /api/simple/translate_new:
+    get:
+      tags:
+      - simple
+      summary: get suggestions, attension scores, preplex and average score for your
+        partial translation
+      operationId: translate_new
+      parameters:
+      - name: langspec
+        in: query
+        description: The type of translation
+        required: true
+        style: form
+        explode: true
+        schema:
+          type: string
+      - name: sentence
+        in: query
+        description: The sentence that is to be translated
+        required: true
+        style: form
+        explode: true
+        schema:
+          type: string
+      - name: partial_trans
+        in: query
+        description: Partial translation done so far by the user
+        required: true
+        style: form
+        explode: true
+        schema:
+          type: string
+      responses:
+        "200":
+          description: An array containing result (the suggestions), attentions, partial
+            translation by the user, perplexity and average score
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/inline_response_200'
+        "400":
+          description: Bad Request
+        "500":
+          description: Some internal server error
+components:
+  schemas:
+    inline_response_200:
+      type: object
+      properties:
+        result:
+          type: array
+          items:
+            type: string
+        attn:
+          type: array
+          items:
+            multipleOf: 0.1
+            type: number
+        partial:
+          type: string
+        ppl:
+          multipleOf: 0.1
+          type: number
+        avg:
+          multipleOf: 0.1
+          type: number
--- a/mt/migrations/0015_auto_20200519_1612.py
+++ b/mt/migrations/0015_auto_20200519_1612.py
@ -0,0 +1,69 @@
+# Generated by Django 3.0.3 on 2020-05-19 16:12
+
+from django.db import migrations, models
+import django.db.models.deletion
+import jsonfield.fields
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('mt', '0014_auto_20190606_1535'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='customKeyboardCommands',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('select_entire_suggestion', models.IntegerField()),
+                ('select_single_word_from_suggestion', models.IntegerField()),
+                ('navigate_to_next_corpus_fragment', models.IntegerField()),
+                ('navigate_to_previous_corpus_fragment', models.IntegerField()),
+                ('submit_translation', models.IntegerField()),
+                ('select_next_translation_suggestion', models.IntegerField()),
+                ('select_previous_translation_suggestion', models.IntegerField()),
+                ('custom_layout_name', models.CharField(max_length=30)),
+            ],
+            options={
+                'verbose_name': 'Custom Keyboard Command Set',
+                'verbose_name_plural': 'Custom Keyboard Command Sets',
+            },
+        ),
+        migrations.AlterField(
+            model_name='corpus',
+            name='helpprovision',
+            field=models.CharField(choices=[('IT', 'Interactive Translation'), ('PE', 'Post Editing'), ('BL', 'Baseline')], default='IT', max_length=2),
+        ),
+        migrations.AlterField(
+            model_name='dockeystroke',
+            name='keystrokeseries',
+            field=jsonfield.fields.JSONField(),
+        ),
+        migrations.CreateModel(
+            name='translatorKeyboardLayouts',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('customKeyboardCommands', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='translatorconfigs', to='mt.customKeyboardCommands')),
+                ('translator', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='translatorconfigs', to='mt.translator')),
+            ],
+            options={
+                'verbose_name': 'Translator Keyboard Layout Specified',
+                'verbose_name_plural': 'Translator Keyboard Layout Specified',
+                'unique_together': {('translator', 'customKeyboardCommands')},
+            },
+        ),
+        migrations.CreateModel(
+            name='translatorcorpus',
+            fields=[
+                ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('corpus', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='corpustranslators', to='mt.corpus')),
+                ('translator', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='translatorcorpus', to='mt.translator')),
+            ],
+            options={
+                'verbose_name': 'Translator Corpus Possible',
+                'verbose_name_plural': '6. Translator Corpus Possible',
+                'unique_together': {('translator', 'corpus')},
+            },
+        ),
+    ]
--- a/mt/views.py
+++ b/mt/views.py
@ -43,6 +43,10 @@ from django.conf import settings

 with open(os.path.join(dir_path, 'opt_data'), 'rb') as f:
        opt = pickle.load(f)
+        print("###########################DEBUG######################################")
+        print(opt) # This is the file that mentions model details
+        print(dir_path + "/opt_data")
+        print("#######################################################################")



--- a/mtsimple/api/init.py
+++ b/mtsimple/api/init.py
--- a/mtsimple/api/urls.py
+++ b/mtsimple/api/urls.py
@ -0,0 +1,7 @@
+from django.urls import path
+
+from . import views
+
+urlpatterns = [
+    path('translate_new', views.translate_new, name='translate_new'),
+]
--- a/mtsimple/api/views.py
+++ b/mtsimple/api/views.py
@ -0,0 +1,191 @@
+from django.http import HttpResponse, JsonResponse
+from rest_framework import status
+from rest_framework.response import Response
+from rest_framework.decorators import api_view
+import re, os, math
+import requests
+import pickle
+
+from indic_transliteration import sanscript
+
+from onmt.translate.infertranslator import build_translator
+from onmt.utils.parse import ArgumentParser
+import mtsimple
+dir_path = os.path.dirname(os.path.dirname(mtsimple.__file__))
+
+#TODO: Find a Way to not repeat the below starter code from mtsimple/views.py
+
+langspecs = {
+    'en-hi' : {
+        'src' : 'en',
+        'tgt' : 'hi',
+        'model': 'full_iitb_enhi_50v.pt',
+        'indic_code': sanscript.DEVANAGARI,
+        'provide_help' : True,
+    },
+    'hi-en' : {
+        'src' : 'hi',
+        'tgt' : 'en',
+        'model': 'full_iitb_bpe_hien.pt',
+        'indic_code': None,
+        'provide_help' : False,
+    },
+    'hi-gondi' : {
+        'src' : 'hi',
+        'tgt' : 'gondi',
+        'model': 'hi-gondi.pt',
+        'indic_code': sanscript.DEVANAGARI,
+        'provide_help' : False,
+    },
+    # '*-e
+    # '*-en' : {
+    #     'src' : 'hi',
+    #     'tgt' : 'en',
+    #     'model': 'multiling.pt',
+    #     'indic_code': None,
+    #     'provide_help' : False,
+    # }
+}
+
+with open(os.path.join(dir_path, 'opt_data'), 'rb') as f:
+        opt = pickle.load(f)
+
+engines = {}
+# The model engines are initialised here after loading opt (maybe it just specifies of how the model looks like?)
+for key, value in langspecs.items():
+    opt.models = [os.path.join(dir_path, 'model', value['model'])]
+    opt.n_best = 1
+    opt.max_length = 100
+    opt.global_attention_function = 'sparsemax'
+    ArgumentParser.validate_translate_opts(opt)
+    engines[key] = {"translatorbest": build_translator(opt, report_score=True)}
+    #translatorbest builds the best complete translation of the sentence
+
+    opt.n_best = 5
+    opt.max_length = 2
+    opt.global_attention_function = 'sparsemax'
+    ArgumentParser.validate_translate_opts(opt)
+    engines[key]["translatorbigram"] = build_translator(opt, report_score=True)
+    #translatorbiagram builds best translations of length two
+
+def quotaposto(s, lang="en"):
+    s = re.sub(r"&quot;", r'"', s)
+    s = re.sub(r"&apos;", r"'", s)
+    s = re.sub(r"(@@ )|(@@ ?$)", r"", s)
+    #This is work in progress to make writing as natural as possible. taking care of spaces before and after certain characters.
+    # s = re.sub(r"(\s+)([!:?,.।\']+)", r"\2", s)
+    # s = re.sub(r"([({\[<]+)(\s+)", r"\1", s)
+    # s = re.sub(r"(\s+)([)}\]>]+)", r"\2", s)
+    return s
+
+def toquotapos(s, lang="en"):
+    # if lang=="en":
+    s = s.lower()
+    s = re.sub(r"([\“\”])", r'"', s)
+    s = re.sub(r"([\‘\’])", r"'", s)
+    s = re.sub(r"([\ः])", r":", s)
+    s = re.sub(r"([-!$%^&*()_+|~=`{}\[\]:\";<>?,.\/#@।]+)", r" \1 ", s)
+    s = re.sub(r'"', r'&quot;', s)
+    s = re.sub(r"'", r"&apos;", s)
+    s = re.sub(r"(\s+)", r" ", s)
+    
+    return s
+
+@api_view(['GET',])
+def translate_new(request):
+    langspec = request.GET.get('langspec')
+    sentence = request.GET.get('sentence')
+    partial_trans = request.GET.get('partial_trans', '')
+    translatorbest = engines[langspec]["translatorbest"]
+    translatorbigram = engines[langspec]["translatorbigram"]
+    print("Before processing")
+    print("##########################")
+    print("##########################")
+    print(sentence.strip())
+    print("##########################")
+    print("##########################")
+    print("##########################")
+
+    L1 = toquotapos(sentence.strip()) # request.GET.get('a') contains the whole sentence to be translated
+    print("############After Processing########")
+    print((L1))
+    L2 = partial_trans # request.GET.get('b') contains the partial sentence to be translated
+    L2split = L2.split()
+
+    if langspecs[langspec]['indic_code']:
+        # print(L2[-1])
+        if L2 != '' and bool(re.search(r"([^\s\u0900-\u097F])", L2[-1])):
+            params = {}
+            params['inString'] = L2split[-1]
+            params['lang'] = 'hindi'
+            data = requests.get('http://xlit.quillpad.in/quillpad_backend2/processWordJSON', params = params).json()
+            L2split[-1] = data['twords'][0]['options'][0]
+            L2 = ' '.join(L2split)
+            # L2 = transliterate(L2, sanscript.ITRANS, langspec['indic_code'])
+
+    print(L2, u'\u0900-\u097F')
+
+    something, pred, covatn2d, score_total, words_total = translatorbest.translate(
+        src=[L1],
+        tgt=None,
+        src_dir='',
+        batch_size=30,
+        attn_debug=True,
+        partial = toquotapos(L2)
+        )
+
+    scores, predictions, score_total, words_total = translatorbigram.translate(
+        src=[L1],
+        tgt=None,
+        src_dir='',
+        batch_size=30,
+        attn_debug=False,
+        partial = toquotapos(L2),
+        dymax_len = 2,
+        )
+
+
+    print(covatn2d, 'convatn2d')
+    if L2 != '':
+        transpattn = [*zip(*covatn2d)]
+        attnind = [attn.index(max(attn)) for attn in transpattn]
+        print('attnind', attnind)
+        attndist = [[ i for i, x in enumerate(attnind) if x==k] for k in range(len(L2.strip().split(" ")))]
+        print('attndist', attndist)
+        sumattn = [1] * len(L1.split(" "))
+        for i in attndist:
+            for k in i:
+                sumattn[k] = 0
+        # attn = covatn2d[:len(L2.strip().split(" "))]
+        # sumattn = [sum(i) for i in zip(*attn)]
+        # for i in range(len(attn)):
+        #     if max(attn[i]) > 0.30:
+        #         sumattn[attn[i].index(max(attn[i]))] = 1
+        #     print(max(attn[i]))
+        # newattn = [float("{0:.2f}".format(1-(k/max(sumattn)))) for k in sumattn]
+        # # sumattn = [float("{0:.2f}".format(k/sum(newattn))) for k in newattn]
+        # newattn = [ 1.66*max(0, (k-0.4)) for k in newattn]
+
+    else:
+        sumattn = [1.00] * len(L1.split(" "))    
+    predictions = predictions[0]
+    print(predictions)
+    seen = set()
+    seen_add = seen.add
+    sentence = [quotaposto(L2 + x.capitalize()[len(L2):], langspecs[langspec]["tgt"]) + " " for x in predictions if not (x in seen or seen_add(x))]
+    # sentence = [x.replace(L2, "") for x in sentence]
+    sentence = '\n'.join(sentence)
+    print("pred[0][0]", pred[0][0], pred[0][0][len(L2):])
+    if langspecs[langspec]['provide_help'] and L2:
+        sentence = quotaposto(L2 + pred[0][0].capitalize()[len(L2):], langspecs[langspec]["tgt"]) + '\n' + L2 + '\n' + sentence
+    else:
+        sentence = quotaposto(L2 + pred[0][0].capitalize()[len(L2):], langspecs[langspec]["tgt"]) + '\n' + sentence
+    
+    print(sentence)
+    perplexity = float(math.exp(-score_total / words_total))
+    avg_score = float(score_total / words_total)
+    
+    print("sentence", sentence)
+    # print(something, pred)
+    return JsonResponse({'result': sentence.split('\n'), 'attn': sumattn, 'partial': L2, 'ppl': perplexity, 'avg': avg_score})
+
--- a/mtsimple/views.py
+++ b/mtsimple/views.py
@ -29,6 +29,7 @@ import requests

 import math

+# defines the configuration of the translation type selected by the user
 langspecs = {
    'en-hi' : {
        'src' : 'en',
@ -44,6 +45,14 @@ langspecs = {
        'indic_code': None,
        'provide_help' : False,
    },
+
+    'hi-gondi' : {
+        'src' : 'hi',
+        'tgt' : 'gondi',
+        'model': 'hi-gondi.pt',
+        'indic_code': sanscript.DEVANAGARI,
+        'provide_help' : False,
+    },
    # '*-en' : {
    #     'src' : 'hi',
    #     'tgt' : 'en',
@ -85,6 +94,7 @@ with open(os.path.join(dir_path, 'opt_data'), 'rb') as f:
        opt = pickle.load(f)

 engines = {}
+# The model engines are initialised here after loading opt (maybe it just specifies of how the model looks like?)
 for key, value in langspecs.items():
    opt.models = [os.path.join(dir_path, 'model', value['model'])]
    opt.n_best = 1
@ -92,19 +102,23 @@ for key, value in langspecs.items():
    opt.global_attention_function = 'sparsemax'
    ArgumentParser.validate_translate_opts(opt)
    engines[key] = {"translatorbest": build_translator(opt, report_score=True)}
+    #translatorbest builds the best complete translation of the sentence

    opt.n_best = 5
    opt.max_length = 2
    opt.global_attention_function = 'sparsemax'
    ArgumentParser.validate_translate_opts(opt)
    engines[key]["translatorbigram"] = build_translator(opt, report_score=True)
+    #translatorbiagram builds 5 best translations of length two

 global corpusops
 corpusops = []

+# The view function for the first page url : simple/
 def corpus(request):
    return render(request, 'simplecorpus.html')

+#The view function called after setting languagespecs and getting the input in simple/ (called after corpusinput)
 def translate(request):
    return render(request, 'simpletranslate.html')

@ -112,7 +126,7 @@ def end(request):
    return render(request, 'simpleend.html')

 def split_sentences(st):
-    #Split sentences based 
+    #Split sentences based on !?।|.
    sentences = re.split(r'[!?।|.](?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)', st)
    
    if sentences[-1]:
@ -120,6 +134,12 @@ def split_sentences(st):
    else:
        return sentences[:-1]

+""" 
+The view function for getting the input for translation on the first page (simple/)
+
+Splits the sentence based on !?।| cleans it and saves the list in session["corpusinps"]
+"""
+
 def corpusinput(request):
    corpusraw = request.POST.get('translate')
    langselect = request.POST.get('langselect')
@ -128,7 +148,11 @@ def corpusinput(request):
    request.session["langspec"] = langselect
    print(request.session["langspec"])
    s = corpusraw.strip()
+
+    print(s, "DEBUG: raw corpus before split_sentences")
    spsent = [k.strip() for k in split_sentences(s)]
+    print(spsent, "DEBUG: raw corpus after split_sentences")
+
    corpusinps = list(filter(lambda elem: elem.strip(), spsent))
    request.session["corpusinps"] = [[k, ''] for k in corpusinps]
    print(request.session["corpusinps"])
@ -159,11 +183,22 @@ def indic(request):
 def translate_new(request):
    translatorbest = engines[request.session["langspec"]]["translatorbest"]
    translatorbigram = engines[request.session["langspec"]]["translatorbigram"]
-    L1 = toquotapos(request.GET.get('a').strip())
-    L2 = request.GET.get('b', "")
+    print("Before processing")
+    print("##########################")
+    print("##########################")
+    print(request.GET.get('a').strip())
+    print("##########################")
+    print("##########################")
+    print("##########################")
+
+    L1 = toquotapos(request.GET.get('a').strip()) # request.GET.get('a') contains the whole sentence to be translated
+    print("############After Processing########")
+    print((L1))
+    L2 = request.GET.get('b', "") # request.GET.get('b') contains the partial sentence to be translated
    L2split = L2.split()

    if langspecs[request.session["langspec"]]['indic_code']:
+        # print(L2[-1])
        if L2 != '' and bool(re.search(r"([^\s\u0900-\u097F])", L2[-1])):
            params = {}
            params['inString'] = L2split[-1]
@ -173,7 +208,7 @@ def translate_new(request):
            L2 = ' '.join(L2split)
            # L2 = transliterate(L2, sanscript.ITRANS, langspec['indic_code'])

-    print(L2)
+    print(L2, u'\u0900-\u097F')

    something, pred, covatn2d, score_total, words_total = translatorbest.translate(
        src=[L1],
@ -183,6 +218,8 @@ def translate_new(request):
        attn_debug=True,
        partial = toquotapos(L2)
        )
+    
+    print("$$$$$$$$$$$$$$$$$$$$$$$$")

    scores, predictions, score_total, words_total = translatorbigram.translate(
        src=[L1],
@ -195,11 +232,13 @@ def translate_new(request):
        )


-    # print(covatn2d)
+    print(covatn2d, 'convatn2d')
    if L2 != '':
        transpattn = [*zip(*covatn2d)]
        attnind = [attn.index(max(attn)) for attn in transpattn]
+        print('attnind', attnind)
        attndist = [[ i for i, x in enumerate(attnind) if x==k] for k in range(len(L2.strip().split(" ")))]
+        print('attndist', attndist)
        sumattn = [1] * len(L1.split(" "))
        for i in attndist:
            for k in i:
@ -223,6 +262,7 @@ def translate_new(request):
    sentence = [quotaposto(L2 + x.capitalize()[len(L2):], langspecs[request.session["langspec"]]["tgt"]) + " " for x in predictions if not (x in seen or seen_add(x))]
    # sentence = [x.replace(L2, "") for x in sentence]
    sentence = '\n'.join(sentence)
+    print("pred[0][0]", pred[0][0], pred[0][0][len(L2):])
    if langspecs[request.session["langspec"]]['provide_help'] and L2:
        sentence = quotaposto(L2 + pred[0][0].capitalize()[len(L2):], langspecs[request.session["langspec"]]["tgt"]) + '\n' + L2 + '\n' + sentence
    else:
@ -231,6 +271,6 @@ def translate_new(request):
    print(sentence)
    perplexity = float(math.exp(-score_total / words_total))
    avg_score = float(score_total / words_total)
-    # print(scores)
+    print("sentence", sentence)
    # print(something, pred)
    return JsonResponse({'result': sentence, 'attn': sumattn, 'partial': L2, 'ppl': perplexity, 'avg': avg_score})
--- a/opennmt/onmt/translate/infertranslator.py
+++ b/opennmt/onmt/translate/infertranslator.py
@ -307,6 +307,7 @@ class Translator(object):
            * all_scores is a list of `batch_size` lists of `n_best` scores
            * all_predictions is a list of `batch_size` lists
                of `n_best` predictions
+            * attns is a list of attention scores for translation having highest cumilative log likelihood
        """
        self.dymax_len = dymax_len
        self.partialf = None
@ -322,6 +323,7 @@ class Translator(object):
        # Logic for partial and partialf 
        if partial and partial != '':
            partials = partial.split()
+            print(partials, '~~~~partials~~~')
            vocabdict = dict(self.fields)["tgt"].base_field.vocab
            # if vocabdict.stoi[partials[-1]] == 0:
            if partialfcheck:
@ -335,6 +337,9 @@ class Translator(object):
                # self.partialf = [20.0] + [i[0] for i in sorted(editarr, key=lambda x: x[1])]
                
                self.partial = [vocabdict.stoi[x] for x in partials[:-1]]
+                print("#########vocabdict.stoi########")
+                print(self.partial)
+                print("##################################")
                
                self.partialf = [v for k, v in vocabdict.stoi.items() if k.startswith(partials[-1]) and v]
            else:
@ -384,7 +389,7 @@ class Translator(object):
        pred_score_total, pred_words_total = 0, 0
        gold_score_total, gold_words_total = 0, 0

-        all_scores = []
+        all_scores = [] # I guess this is the cumilative log likelihood score of each sentence
        all_predictions = []

        start_time = time.time()
@ -396,6 +401,8 @@ class Translator(object):
            translations = xlation_builder.from_batch(batch_data)

            for trans in translations:
+                print("Loop")
+                print(trans, trans.pred_sents)
                all_scores += [trans.pred_scores[:self.n_best]]
                pred_score_total += trans.pred_scores[0]
                pred_words_total += len(trans.pred_sents[0])
@ -405,6 +412,12 @@ class Translator(object):

                n_best_preds = [" ".join(pred)
                                for pred in trans.pred_sents[:self.n_best]]
+                
+                print("############n_best_preds###############")
+                print(n_best_preds)
+                print("############n_best_preds###############")
+
+
                if self.report_align:
                    align_pharaohs = [build_align_pharaoh(align) for align
                                      in trans.word_aligns[:self.n_best]]
@ -433,7 +446,7 @@ class Translator(object):
                        srcs = trans.src_raw
                    else:
                        srcs = [str(item) for item in range(len(attns[0]))]
-                    output = report_matrix(srcs, preds, attns)
+                    output = report_matrix(srcs, preds, attns) # This prints attentions in output for the sentence having highest cumilative log likelihood score
                    
                    if self.logger:
                        self.logger.info(output)
--- a/static/translate-page.js
+++ b/static/translate-page.js
@ -387,6 +387,8 @@ function parseProcessedJsonResultsfunction(data, partial) {
  
  var container = $('<div />');

+  // Code for adding suggestions//
+
  var countcontainer = 0
  finalresult = []
  for(var i = 0; i < result.length; i++) {
--- a/static/translation.js
+++ b/static/translation.js
@ -1,4 +1,4 @@
-
+// This page serves as the script for simpletranslate.html
 /*
 *************************************************
 *************************************************
@ -53,6 +53,7 @@ function sharedStart(feed, partial) {
    part1text = partial.substring(0, lastspace)
    part2text = partial.substring(lastspace+1)
    var count = 0
+    console.log("DEBUG part1text", part1text, )
    if (part1text) {
        newfeed = feed.replace(part1text + " ", '')
    } else {
@ -504,6 +505,7 @@ function parseProcessedJsonResultsfunction(data, partial) {
        finalresult = []
        for(var i = 0; i < result.length; i++) {
            var repres = sharedStart(result[i], partialret)
+            console.log(result[i] + '%%%%%%%%%%%%%%%%%%%%%%%%%%%')
            if (repres !== "") {
                container.append('<span id="res'+countcontainer+'" class="res'+countcontainer+' spanres p-1"> ' + repres + '</span>');
                countcontainer += 1;
@ -516,7 +518,7 @@ function parseProcessedJsonResultsfunction(data, partial) {
        // Coloring the drop down box selections
        partial.closest('.bmo').find('.dropdown').html(container);
        resetcolors('.res', $('.spanres').length)
-        $('.res' + selecte).css("background-color","#eee")
+        $('.res' + selecte).css("background-color","#fff")
        if (countcontainer>1) {
            partial.closest('.bmo').find('.dropdown').css('visibility', 'visible');
        }
@ -528,7 +530,7 @@ function parseProcessedJsonResultsfunction(data, partial) {
            for (m=0; m<attn.length; m++) {
                if (attn[m] != 0) {
                    // partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(255,0,0,' + attn[m] + ')')
-                    partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(255,0,0,0.5')
+                    partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(255,0,0,0.5)')
                }
                else {
                    partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(0,255,0,0.5)')
@ -580,25 +582,28 @@ $(document).ready(function() {
        inputs = data.result;
        langspec = data.langspec
        // langtolangid = data.langtolangid;
+
        console.log(inputs)
        $('#cardscoll').html('')
        $('#corpusinput').html('')
        
        for (i=0; i<inputs.length; i++) {
+            /*To set the source part of the page*/ 
            if (langspec == 'hi-en') {
-                $('#corpusinput').append('<span class="corp_inp">' + inputs[i][0] + '</span>| ')
+                $('#corpusinput').append('<span class="corp_inp">' + inputs[i][0] + '</span>| ') /* 1st index is the text with which the editable division is intitalised */
            } else {
                $('#corpusinput').append('<span class="corp_inp">' + inputs[i][0] + '</span>. ')
            }
+            /*--------------------------------*/ 
            $('#cardscoll').append(
                `<div class="shadow p-3 my-3 rounded bmo cardescoll">
                                <div class="row">
                                <div class="col-9">
-                                <div class="hin_inp pb-2" contenteditable="false">`+ inputSpan(inputs[i][0]) + `</div>
+                                <div class="hin_inp pb-2" contenteditable="false">`+ inputSpan(inputs[i][0]) /*Wraps each word of sentence around span and returns*/ + `</div> 
                                <div class="dropcontainer">
                                    <div class="partcontainer">
                                        <div class="suggest transtext" contenteditable="false"></div>
-                                            <div class=" partial transtext" id="card` + i + `" contenteditable="true"
+                                            <div class="partial transtext" id="card` + i + `" contenteditable="true"
                                            data-tab=0 data-enter=0 data-up=0 data-down=0 data-others=0 data-pgup=0 data-pgdn=0 data-end=0 data-right=0 data-left=0 data-bkspc=0 data-time=0
                                            >`+ inputs[i][1] + `</div>
                                    </div>
@ -767,10 +772,16 @@ $(document).ready(function() {

            var hin_inp = partial.closest('.bmo').find('.hin_inp')
            globalPartial = partial;
+            console.log("#########################################3")
+            console.log("#########################################3")
+            console.log(partial.clone().children().remove().end().text())
+            console.log("#########################################4")
+            console.log("#########################################3")
+
            if (sockets_use == true) {
                connectSocket.send(JSON.stringify({
-                    'partial_translation': partial.clone().children().remove().end().text(),
-                    'original': hin_inp.text(),
+                    'partial_translation': partial.clone().children().remove().end().text(), // The text translated by user so far
+                    'original': hin_inp.text(), // The full sentence to be translated
                    'langspec': langspec
                }));
            } 
@ -778,7 +789,7 @@ $(document).ready(function() {

            //OLD, JANKY HTTP REQUEST!!
                searchRequest =  $.getJSON(http_translate, {
-                    a: hin_inp.text(),
+                    a: hin_inp.text(), // Maybe use some good names here?
                    b: partial.clone().children().remove().end().text()
                }, function(data) {
                    // console.log(data)
--- a/templates/simplecorpus.html
+++ b/templates/simplecorpus.html
@ -23,7 +23,7 @@
                if (corpusinput) {
                  $.ajax({
                      type: "POST",
-                      url: '/simple/corpusinput',
+                      url: '/simple/corpusinput', //corpusinput function in mtsimple/views.py
                      data: {
                        'translate': corpusinput,
                        'langselect': $('#src').find(":selected").val() + "-" + $('#tgt').find(":selected").val(),
@ -51,11 +51,18 @@
                if (lang == "bn-en") {
                  text = "মৌসুমি বৃষ্টি একটি অভিশাপ দ্বারা আশীর্বাদ করা একটি আশীর্বাদ। যখন পরিমাণগত পরিমাণে বৃষ্টির পরিমাণ কম হয়, তখন এটি একটি আশীর্বাদের জন্য আমাদের পরে গরম তাপের গ্রীষ্ম। ফসলের প্রাচুর্যের কারণে এটি কৃষকদের জন্য একটি আশীর্বাদ। শুষ্ক গ্রীষ্মের পরে নদী ভরাট।"
                }
+                if (lang == "hi-gondi") {
+                  text = "आज मौसम सुहावना है। हमें शाम को बाहर जाना चाहिए।"
+                }
                console.log(text)
                $("#corpusinput").val(text);
                $("#corpusinput").focus();
                $("#corpusinput").trigger('autoresize');
            });
+
+            function limitTgtOptions() {
+              // TODO: Limit target language to only hindi
+            }
        });
        
    </script>
@ -81,6 +88,7 @@
                    <select class="form-control" id="tgt">
                      <option value="en">English</option value="en">
                      <option value="hi">Hindi</option value="en">
+                      <option value="gondi" onselect="limitTgtOptions">Gondi</option value="en">
                    </select>
                  </div>