зеркало из https://github.com/microsoft/inmt.git
Add REST APIs for translation queries
This commit is contained in:
Родитель
b8c8d36476
Коммит
b9bc997f6f
|
@ -52,6 +52,7 @@ INSTALLED_APPS = [
|
||||||
'django.contrib.sessions',
|
'django.contrib.sessions',
|
||||||
'django.contrib.messages',
|
'django.contrib.messages',
|
||||||
'django.contrib.staticfiles',
|
'django.contrib.staticfiles',
|
||||||
|
'rest_framework',
|
||||||
'mt',
|
'mt',
|
||||||
'mtsimple',
|
'mtsimple',
|
||||||
'mtpara',
|
'mtpara',
|
||||||
|
|
|
@ -20,10 +20,16 @@ from django.conf.urls.static import static
|
||||||
from django.contrib.staticfiles.urls import staticfiles_urlpatterns
|
from django.contrib.staticfiles.urls import staticfiles_urlpatterns
|
||||||
|
|
||||||
urlpatterns = [
|
urlpatterns = [
|
||||||
path('', include('mt.urls')),
|
path('', include('mt.urls')), #TODO: This redirects to simple/ can we do it directly?
|
||||||
path('simple/', include('mtsimple.urls')),
|
path('simple/', include('mtsimple.urls')),
|
||||||
path('para/', include('mtpara.urls')),
|
path('para/', include('mtpara.urls')),
|
||||||
# path('gpt/', include('gpt.urls')),
|
# path('gpt/', include('gpt.urls')),
|
||||||
path('admin/', admin.site.urls),
|
path('admin/', admin.site.urls),
|
||||||
path('accounts/', include('django.contrib.auth.urls')),
|
path('accounts/', include('django.contrib.auth.urls')),
|
||||||
|
|
||||||
|
|
||||||
|
# REST FRAMEWORK URLS
|
||||||
|
path('api/simple/', include('mtsimple.api.urls')),
|
||||||
|
|
||||||
|
|
||||||
] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) + staticfiles_urlpatterns()
|
] + static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) + staticfiles_urlpatterns()
|
Двоичный файл не отображается.
|
@ -0,0 +1,23 @@
|
||||||
|
# Swagger Codegen Ignore
|
||||||
|
# Generated by swagger-codegen https://github.com/swagger-api/swagger-codegen
|
||||||
|
|
||||||
|
# Use this file to prevent files from being overwritten by the generator.
|
||||||
|
# The patterns follow closely to .gitignore or .dockerignore.
|
||||||
|
|
||||||
|
# As an example, the C# client generator defines ApiClient.cs.
|
||||||
|
# You can make changes and tell Swagger Codgen to ignore just this file by uncommenting the following line:
|
||||||
|
#ApiClient.cs
|
||||||
|
|
||||||
|
# You can match any string of characters against a directory, file or extension with a single asterisk (*):
|
||||||
|
#foo/*/qux
|
||||||
|
# The above matches foo/bar/qux and foo/baz/qux, but not foo/bar/baz/qux
|
||||||
|
|
||||||
|
# You can recursively match patterns against a directory, file or extension with a double asterisk (**):
|
||||||
|
#foo/**/qux
|
||||||
|
# This matches foo/bar/qux, foo/baz/qux, and foo/bar/baz/qux
|
||||||
|
|
||||||
|
# You can also negate patterns with an exclamation (!).
|
||||||
|
# For example, you can ignore all files in a docs folder with the file extension .md:
|
||||||
|
#docs/*.md
|
||||||
|
# Then explicitly reverse the ignore rule for a single file:
|
||||||
|
#!docs/README.md
|
|
@ -0,0 +1 @@
|
||||||
|
3.0.19
|
|
@ -0,0 +1,282 @@
|
||||||
|
<!doctype html>
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>SIMPLE TRANSLATION API</title>
|
||||||
|
<style type="text/css">
|
||||||
|
body {
|
||||||
|
font-family: Trebuchet MS, sans-serif;
|
||||||
|
font-size: 15px;
|
||||||
|
color: #444;
|
||||||
|
margin-right: 24px;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 {
|
||||||
|
font-size: 25px;
|
||||||
|
}
|
||||||
|
h2 {
|
||||||
|
font-size: 20px;
|
||||||
|
}
|
||||||
|
h3 {
|
||||||
|
font-size: 16px;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
hr {
|
||||||
|
height: 1px;
|
||||||
|
border: 0;
|
||||||
|
color: #ddd;
|
||||||
|
background-color: #ddd;
|
||||||
|
}
|
||||||
|
|
||||||
|
.app-desc {
|
||||||
|
clear: both;
|
||||||
|
margin-left: 20px;
|
||||||
|
}
|
||||||
|
.param-name {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
.license-info {
|
||||||
|
margin-left: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.license-url {
|
||||||
|
margin-left: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.model {
|
||||||
|
margin: 0 0 0px 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.method {
|
||||||
|
margin-left: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.method-notes {
|
||||||
|
margin: 10px 0 20px 0;
|
||||||
|
font-size: 90%;
|
||||||
|
color: #555;
|
||||||
|
}
|
||||||
|
|
||||||
|
pre {
|
||||||
|
padding: 10px;
|
||||||
|
margin-bottom: 2px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.http-method {
|
||||||
|
text-transform: uppercase;
|
||||||
|
}
|
||||||
|
|
||||||
|
pre.get {
|
||||||
|
background-color: #0f6ab4;
|
||||||
|
}
|
||||||
|
|
||||||
|
pre.post {
|
||||||
|
background-color: #10a54a;
|
||||||
|
}
|
||||||
|
|
||||||
|
pre.put {
|
||||||
|
background-color: #c5862b;
|
||||||
|
}
|
||||||
|
|
||||||
|
pre.delete {
|
||||||
|
background-color: #a41e22;
|
||||||
|
}
|
||||||
|
|
||||||
|
.huge {
|
||||||
|
color: #fff;
|
||||||
|
}
|
||||||
|
|
||||||
|
pre.example {
|
||||||
|
background-color: #f3f3f3;
|
||||||
|
padding: 10px;
|
||||||
|
border: 1px solid #ddd;
|
||||||
|
}
|
||||||
|
|
||||||
|
code {
|
||||||
|
white-space: pre;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nickname {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.method-path {
|
||||||
|
font-size: 1.5em;
|
||||||
|
background-color: #0f6ab4;
|
||||||
|
}
|
||||||
|
|
||||||
|
.up {
|
||||||
|
float:right;
|
||||||
|
}
|
||||||
|
|
||||||
|
.parameter {
|
||||||
|
width: 500px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.param {
|
||||||
|
width: 500px;
|
||||||
|
padding: 10px 0 0 20px;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.param-desc {
|
||||||
|
width: 700px;
|
||||||
|
padding: 0 0 0 20px;
|
||||||
|
color: #777;
|
||||||
|
}
|
||||||
|
|
||||||
|
.param-type {
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
.param-enum-header {
|
||||||
|
width: 700px;
|
||||||
|
padding: 0 0 0 60px;
|
||||||
|
color: #777;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.param-enum {
|
||||||
|
width: 700px;
|
||||||
|
padding: 0 0 0 80px;
|
||||||
|
color: #777;
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
.field-label {
|
||||||
|
padding: 0;
|
||||||
|
margin: 0;
|
||||||
|
clear: both;
|
||||||
|
}
|
||||||
|
|
||||||
|
.field-items {
|
||||||
|
padding: 0 0 15px 0;
|
||||||
|
margin-bottom: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.return-type {
|
||||||
|
clear: both;
|
||||||
|
padding-bottom: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.param-header {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.method-tags {
|
||||||
|
text-align: right;
|
||||||
|
}
|
||||||
|
|
||||||
|
.method-tag {
|
||||||
|
background: none repeat scroll 0% 0% #24A600;
|
||||||
|
border-radius: 3px;
|
||||||
|
padding: 2px 10px;
|
||||||
|
margin: 2px;
|
||||||
|
color: #FFF;
|
||||||
|
display: inline-block;
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>SIMPLE TRANSLATION API</h1>
|
||||||
|
<div class="app-desc">Api for translation suggestions and attention score</div>
|
||||||
|
<div class="app-desc">More information: <a href="https://helloreverb.com">https://helloreverb.com</a></div>
|
||||||
|
<div class="app-desc">Contact Info: <a href="hello@helloreverb.com">hello@helloreverb.com</a></div>
|
||||||
|
<div class="app-desc">Version: 1.0</div>
|
||||||
|
<div class="app-desc">BasePath:/I927/INMT-SIMPLE/1.0</div>
|
||||||
|
<div class="license-info">All rights reserved</div>
|
||||||
|
<div class="license-url">http://apache.org/licenses/LICENSE-2.0.html</div>
|
||||||
|
<h2>Access</h2>
|
||||||
|
|
||||||
|
<h2><a name="__Methods">Methods</a></h2>
|
||||||
|
[ Jump to <a href="#__Models">Models</a> ]
|
||||||
|
|
||||||
|
<h3>Table of Contents </h3>
|
||||||
|
<div class="method-summary"></div>
|
||||||
|
<h4><a href="#Simple">Simple</a></h4>
|
||||||
|
<ul>
|
||||||
|
<li><a href="#translateNew"><code><span class="http-method">get</span> /api/simple/translate_new</code></a></li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<h1><a name="Simple">Simple</a></h1>
|
||||||
|
<div class="method"><a name="translateNew"></a>
|
||||||
|
<div class="method-path">
|
||||||
|
<a class="up" href="#__Methods">Up</a>
|
||||||
|
<pre class="get"><code class="huge"><span class="http-method">get</span> /api/simple/translate_new</code></pre></div>
|
||||||
|
<div class="method-summary">get suggestions, attension scores, preplex and average score for your partial translation (<span class="nickname">translateNew</span>)</div>
|
||||||
|
<div class="method-notes"></div>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<h3 class="field-label">Query parameters</h3>
|
||||||
|
<div class="field-items">
|
||||||
|
<div class="param">langspec (required)</div>
|
||||||
|
|
||||||
|
<div class="param-desc"><span class="param-type">Query Parameter</span> — The type of translation </div> <div class="param">sentence (required)</div>
|
||||||
|
|
||||||
|
<div class="param-desc"><span class="param-type">Query Parameter</span> — The sentence that is to be translated </div> <div class="param">partial_trans (required)</div>
|
||||||
|
|
||||||
|
<div class="param-desc"><span class="param-type">Query Parameter</span> — Partial translation done so far by the user </div> </div> <!-- field-items -->
|
||||||
|
|
||||||
|
|
||||||
|
<h3 class="field-label">Return type</h3>
|
||||||
|
<div class="return-type">
|
||||||
|
<a href="#inline_response_200">inline_response_200</a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!--Todo: process Response Object and its headers, schema, examples -->
|
||||||
|
|
||||||
|
<h3 class="field-label">Example data</h3>
|
||||||
|
<div class="example-data-content-type">Content-Type: application/json</div>
|
||||||
|
<pre class="example"><code>{
|
||||||
|
"result" : [ "result", "result" ],
|
||||||
|
"attn" : [ 0.8008281904610115, 0.8008281904610115 ],
|
||||||
|
"avg" : 1.4658129805029452,
|
||||||
|
"partial" : "partial",
|
||||||
|
"ppl" : 6.027456183070403
|
||||||
|
}</code></pre>
|
||||||
|
|
||||||
|
<h3 class="field-label">Produces</h3>
|
||||||
|
This API call produces the following media types according to the <span class="header">Accept</span> request header;
|
||||||
|
the media type will be conveyed by the <span class="header">Content-Type</span> response header.
|
||||||
|
<ul>
|
||||||
|
<li><code>application/json</code></li>
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
<h3 class="field-label">Responses</h3>
|
||||||
|
<h4 class="field-label">200</h4>
|
||||||
|
An array containing result (the suggestions), attentions, partial translation by the user, perplexity and average score
|
||||||
|
<a href="#inline_response_200">inline_response_200</a>
|
||||||
|
<h4 class="field-label">400</h4>
|
||||||
|
Bad Request
|
||||||
|
<a href="#"></a>
|
||||||
|
<h4 class="field-label">500</h4>
|
||||||
|
Some internal server error
|
||||||
|
<a href="#"></a>
|
||||||
|
</div> <!-- method -->
|
||||||
|
<hr/>
|
||||||
|
|
||||||
|
<h2><a name="__Models">Models</a></h2>
|
||||||
|
[ Jump to <a href="#__Methods">Methods</a> ]
|
||||||
|
|
||||||
|
<h3>Table of Contents</h3>
|
||||||
|
<ol>
|
||||||
|
<li><a href="#inline_response_200"><code>inline_response_200</code></a></li>
|
||||||
|
</ol>
|
||||||
|
|
||||||
|
<div class="model">
|
||||||
|
<h3><a name="inline_response_200"><code>inline_response_200</code></a> <a class="up" href="#__Models">Up</a></h3>
|
||||||
|
|
||||||
|
<div class="field-items">
|
||||||
|
<div class="param">result (optional)</div><div class="param-desc"><span class="param-type"><a href="#string">array[String]</a></span> </div>
|
||||||
|
<div class="param">attn (optional)</div><div class="param-desc"><span class="param-type"><a href="#BigDecimal">array[BigDecimal]</a></span> </div>
|
||||||
|
<div class="param">partial (optional)</div><div class="param-desc"><span class="param-type"><a href="#string">String</a></span> </div>
|
||||||
|
<div class="param">ppl (optional)</div><div class="param-desc"><span class="param-type"><a href="#BigDecimal">BigDecimal</a></span> </div>
|
||||||
|
<div class="param">avg (optional)</div><div class="param-desc"><span class="param-type"><a href="#BigDecimal">BigDecimal</a></span> </div>
|
||||||
|
</div> <!-- field-items -->
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
Двоичный файл не отображается.
|
@ -0,0 +1,23 @@
|
||||||
|
# Swagger Codegen Ignore
|
||||||
|
# Generated by swagger-codegen https://github.com/swagger-api/swagger-codegen
|
||||||
|
|
||||||
|
# Use this file to prevent files from being overwritten by the generator.
|
||||||
|
# The patterns follow closely to .gitignore or .dockerignore.
|
||||||
|
|
||||||
|
# As an example, the C# client generator defines ApiClient.cs.
|
||||||
|
# You can make changes and tell Swagger Codgen to ignore just this file by uncommenting the following line:
|
||||||
|
#ApiClient.cs
|
||||||
|
|
||||||
|
# You can match any string of characters against a directory, file or extension with a single asterisk (*):
|
||||||
|
#foo/*/qux
|
||||||
|
# The above matches foo/bar/qux and foo/baz/qux, but not foo/bar/baz/qux
|
||||||
|
|
||||||
|
# You can recursively match patterns against a directory, file or extension with a double asterisk (**):
|
||||||
|
#foo/**/qux
|
||||||
|
# This matches foo/bar/qux, foo/baz/qux, and foo/bar/baz/qux
|
||||||
|
|
||||||
|
# You can also negate patterns with an exclamation (!).
|
||||||
|
# For example, you can ignore all files in a docs folder with the file extension .md:
|
||||||
|
#docs/*.md
|
||||||
|
# Then explicitly reverse the ignore rule for a single file:
|
||||||
|
#!docs/README.md
|
|
@ -0,0 +1 @@
|
||||||
|
3.0.19
|
|
@ -0,0 +1,75 @@
|
||||||
|
openapi: 3.0.0
|
||||||
|
info:
|
||||||
|
title: SIMPLE TRANSLATION API
|
||||||
|
description: Api for translation suggestions and attention score
|
||||||
|
version: "1.0"
|
||||||
|
servers:
|
||||||
|
- url: https://virtserver.swaggerhub.com/I927/INMT-SIMPLE/1.0
|
||||||
|
description: SwaggerHub API Auto Mocking
|
||||||
|
paths:
|
||||||
|
/api/simple/translate_new:
|
||||||
|
get:
|
||||||
|
tags:
|
||||||
|
- simple
|
||||||
|
summary: get suggestions, attension scores, preplex and average score for your
|
||||||
|
partial translation
|
||||||
|
operationId: translate_new
|
||||||
|
parameters:
|
||||||
|
- name: langspec
|
||||||
|
in: query
|
||||||
|
description: The type of translation
|
||||||
|
required: true
|
||||||
|
style: form
|
||||||
|
explode: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: sentence
|
||||||
|
in: query
|
||||||
|
description: The sentence that is to be translated
|
||||||
|
required: true
|
||||||
|
style: form
|
||||||
|
explode: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: partial_trans
|
||||||
|
in: query
|
||||||
|
description: Partial translation done so far by the user
|
||||||
|
required: true
|
||||||
|
style: form
|
||||||
|
explode: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
responses:
|
||||||
|
"200":
|
||||||
|
description: An array containing result (the suggestions), attentions, partial
|
||||||
|
translation by the user, perplexity and average score
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/inline_response_200'
|
||||||
|
"400":
|
||||||
|
description: Bad Request
|
||||||
|
"500":
|
||||||
|
description: Some internal server error
|
||||||
|
components:
|
||||||
|
schemas:
|
||||||
|
inline_response_200:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
result:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: string
|
||||||
|
attn:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
multipleOf: 0.1
|
||||||
|
type: number
|
||||||
|
partial:
|
||||||
|
type: string
|
||||||
|
ppl:
|
||||||
|
multipleOf: 0.1
|
||||||
|
type: number
|
||||||
|
avg:
|
||||||
|
multipleOf: 0.1
|
||||||
|
type: number
|
|
@ -0,0 +1,69 @@
|
||||||
|
# Generated by Django 3.0.3 on 2020-05-19 16:12
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
import django.db.models.deletion
|
||||||
|
import jsonfield.fields
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('mt', '0014_auto_20190606_1535'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='customKeyboardCommands',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('select_entire_suggestion', models.IntegerField()),
|
||||||
|
('select_single_word_from_suggestion', models.IntegerField()),
|
||||||
|
('navigate_to_next_corpus_fragment', models.IntegerField()),
|
||||||
|
('navigate_to_previous_corpus_fragment', models.IntegerField()),
|
||||||
|
('submit_translation', models.IntegerField()),
|
||||||
|
('select_next_translation_suggestion', models.IntegerField()),
|
||||||
|
('select_previous_translation_suggestion', models.IntegerField()),
|
||||||
|
('custom_layout_name', models.CharField(max_length=30)),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Custom Keyboard Command Set',
|
||||||
|
'verbose_name_plural': 'Custom Keyboard Command Sets',
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='corpus',
|
||||||
|
name='helpprovision',
|
||||||
|
field=models.CharField(choices=[('IT', 'Interactive Translation'), ('PE', 'Post Editing'), ('BL', 'Baseline')], default='IT', max_length=2),
|
||||||
|
),
|
||||||
|
migrations.AlterField(
|
||||||
|
model_name='dockeystroke',
|
||||||
|
name='keystrokeseries',
|
||||||
|
field=jsonfield.fields.JSONField(),
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='translatorKeyboardLayouts',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('customKeyboardCommands', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='translatorconfigs', to='mt.customKeyboardCommands')),
|
||||||
|
('translator', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='translatorconfigs', to='mt.translator')),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Translator Keyboard Layout Specified',
|
||||||
|
'verbose_name_plural': 'Translator Keyboard Layout Specified',
|
||||||
|
'unique_together': {('translator', 'customKeyboardCommands')},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='translatorcorpus',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('corpus', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='corpustranslators', to='mt.corpus')),
|
||||||
|
('translator', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='translatorcorpus', to='mt.translator')),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Translator Corpus Possible',
|
||||||
|
'verbose_name_plural': '6. Translator Corpus Possible',
|
||||||
|
'unique_together': {('translator', 'corpus')},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
|
@ -43,6 +43,10 @@ from django.conf import settings
|
||||||
|
|
||||||
with open(os.path.join(dir_path, 'opt_data'), 'rb') as f:
|
with open(os.path.join(dir_path, 'opt_data'), 'rb') as f:
|
||||||
opt = pickle.load(f)
|
opt = pickle.load(f)
|
||||||
|
print("###########################DEBUG######################################")
|
||||||
|
print(opt) # This is the file that mentions model details
|
||||||
|
print(dir_path + "/opt_data")
|
||||||
|
print("#######################################################################")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
from django.urls import path
|
||||||
|
|
||||||
|
from . import views
|
||||||
|
|
||||||
|
urlpatterns = [
|
||||||
|
path('translate_new', views.translate_new, name='translate_new'),
|
||||||
|
]
|
|
@ -0,0 +1,191 @@
|
||||||
|
from django.http import HttpResponse, JsonResponse
|
||||||
|
from rest_framework import status
|
||||||
|
from rest_framework.response import Response
|
||||||
|
from rest_framework.decorators import api_view
|
||||||
|
import re, os, math
|
||||||
|
import requests
|
||||||
|
import pickle
|
||||||
|
|
||||||
|
from indic_transliteration import sanscript
|
||||||
|
|
||||||
|
from onmt.translate.infertranslator import build_translator
|
||||||
|
from onmt.utils.parse import ArgumentParser
|
||||||
|
import mtsimple
|
||||||
|
dir_path = os.path.dirname(os.path.dirname(mtsimple.__file__))
|
||||||
|
|
||||||
|
#TODO: Find a Way to not repeat the below starter code from mtsimple/views.py
|
||||||
|
|
||||||
|
langspecs = {
|
||||||
|
'en-hi' : {
|
||||||
|
'src' : 'en',
|
||||||
|
'tgt' : 'hi',
|
||||||
|
'model': 'full_iitb_enhi_50v.pt',
|
||||||
|
'indic_code': sanscript.DEVANAGARI,
|
||||||
|
'provide_help' : True,
|
||||||
|
},
|
||||||
|
'hi-en' : {
|
||||||
|
'src' : 'hi',
|
||||||
|
'tgt' : 'en',
|
||||||
|
'model': 'full_iitb_bpe_hien.pt',
|
||||||
|
'indic_code': None,
|
||||||
|
'provide_help' : False,
|
||||||
|
},
|
||||||
|
'hi-gondi' : {
|
||||||
|
'src' : 'hi',
|
||||||
|
'tgt' : 'gondi',
|
||||||
|
'model': 'hi-gondi.pt',
|
||||||
|
'indic_code': sanscript.DEVANAGARI,
|
||||||
|
'provide_help' : False,
|
||||||
|
},
|
||||||
|
# '*-e
|
||||||
|
# '*-en' : {
|
||||||
|
# 'src' : 'hi',
|
||||||
|
# 'tgt' : 'en',
|
||||||
|
# 'model': 'multiling.pt',
|
||||||
|
# 'indic_code': None,
|
||||||
|
# 'provide_help' : False,
|
||||||
|
# }
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(os.path.join(dir_path, 'opt_data'), 'rb') as f:
|
||||||
|
opt = pickle.load(f)
|
||||||
|
|
||||||
|
engines = {}
|
||||||
|
# The model engines are initialised here after loading opt (maybe it just specifies of how the model looks like?)
|
||||||
|
for key, value in langspecs.items():
|
||||||
|
opt.models = [os.path.join(dir_path, 'model', value['model'])]
|
||||||
|
opt.n_best = 1
|
||||||
|
opt.max_length = 100
|
||||||
|
opt.global_attention_function = 'sparsemax'
|
||||||
|
ArgumentParser.validate_translate_opts(opt)
|
||||||
|
engines[key] = {"translatorbest": build_translator(opt, report_score=True)}
|
||||||
|
#translatorbest builds the best complete translation of the sentence
|
||||||
|
|
||||||
|
opt.n_best = 5
|
||||||
|
opt.max_length = 2
|
||||||
|
opt.global_attention_function = 'sparsemax'
|
||||||
|
ArgumentParser.validate_translate_opts(opt)
|
||||||
|
engines[key]["translatorbigram"] = build_translator(opt, report_score=True)
|
||||||
|
#translatorbiagram builds best translations of length two
|
||||||
|
|
||||||
|
def quotaposto(s, lang="en"):
|
||||||
|
s = re.sub(r""", r'"', s)
|
||||||
|
s = re.sub(r"'", r"'", s)
|
||||||
|
s = re.sub(r"(@@ )|(@@ ?$)", r"", s)
|
||||||
|
#This is work in progress to make writing as natural as possible. taking care of spaces before and after certain characters.
|
||||||
|
# s = re.sub(r"(\s+)([!:?,.।\']+)", r"\2", s)
|
||||||
|
# s = re.sub(r"([({\[<]+)(\s+)", r"\1", s)
|
||||||
|
# s = re.sub(r"(\s+)([)}\]>]+)", r"\2", s)
|
||||||
|
return s
|
||||||
|
|
||||||
|
def toquotapos(s, lang="en"):
|
||||||
|
# if lang=="en":
|
||||||
|
s = s.lower()
|
||||||
|
s = re.sub(r"([\“\”])", r'"', s)
|
||||||
|
s = re.sub(r"([\‘\’])", r"'", s)
|
||||||
|
s = re.sub(r"([\ः])", r":", s)
|
||||||
|
s = re.sub(r"([-!$%^&*()_+|~=`{}\[\]:\";<>?,.\/#@।]+)", r" \1 ", s)
|
||||||
|
s = re.sub(r'"', r'"', s)
|
||||||
|
s = re.sub(r"'", r"'", s)
|
||||||
|
s = re.sub(r"(\s+)", r" ", s)
|
||||||
|
|
||||||
|
return s
|
||||||
|
|
||||||
|
@api_view(['GET',])
|
||||||
|
def translate_new(request):
|
||||||
|
langspec = request.GET.get('langspec')
|
||||||
|
sentence = request.GET.get('sentence')
|
||||||
|
partial_trans = request.GET.get('partial_trans', '')
|
||||||
|
translatorbest = engines[langspec]["translatorbest"]
|
||||||
|
translatorbigram = engines[langspec]["translatorbigram"]
|
||||||
|
print("Before processing")
|
||||||
|
print("##########################")
|
||||||
|
print("##########################")
|
||||||
|
print(sentence.strip())
|
||||||
|
print("##########################")
|
||||||
|
print("##########################")
|
||||||
|
print("##########################")
|
||||||
|
|
||||||
|
L1 = toquotapos(sentence.strip()) # request.GET.get('a') contains the whole sentence to be translated
|
||||||
|
print("############After Processing########")
|
||||||
|
print((L1))
|
||||||
|
L2 = partial_trans # request.GET.get('b') contains the partial sentence to be translated
|
||||||
|
L2split = L2.split()
|
||||||
|
|
||||||
|
if langspecs[langspec]['indic_code']:
|
||||||
|
# print(L2[-1])
|
||||||
|
if L2 != '' and bool(re.search(r"([^\s\u0900-\u097F])", L2[-1])):
|
||||||
|
params = {}
|
||||||
|
params['inString'] = L2split[-1]
|
||||||
|
params['lang'] = 'hindi'
|
||||||
|
data = requests.get('http://xlit.quillpad.in/quillpad_backend2/processWordJSON', params = params).json()
|
||||||
|
L2split[-1] = data['twords'][0]['options'][0]
|
||||||
|
L2 = ' '.join(L2split)
|
||||||
|
# L2 = transliterate(L2, sanscript.ITRANS, langspec['indic_code'])
|
||||||
|
|
||||||
|
print(L2, u'\u0900-\u097F')
|
||||||
|
|
||||||
|
something, pred, covatn2d, score_total, words_total = translatorbest.translate(
|
||||||
|
src=[L1],
|
||||||
|
tgt=None,
|
||||||
|
src_dir='',
|
||||||
|
batch_size=30,
|
||||||
|
attn_debug=True,
|
||||||
|
partial = toquotapos(L2)
|
||||||
|
)
|
||||||
|
|
||||||
|
scores, predictions, score_total, words_total = translatorbigram.translate(
|
||||||
|
src=[L1],
|
||||||
|
tgt=None,
|
||||||
|
src_dir='',
|
||||||
|
batch_size=30,
|
||||||
|
attn_debug=False,
|
||||||
|
partial = toquotapos(L2),
|
||||||
|
dymax_len = 2,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
print(covatn2d, 'convatn2d')
|
||||||
|
if L2 != '':
|
||||||
|
transpattn = [*zip(*covatn2d)]
|
||||||
|
attnind = [attn.index(max(attn)) for attn in transpattn]
|
||||||
|
print('attnind', attnind)
|
||||||
|
attndist = [[ i for i, x in enumerate(attnind) if x==k] for k in range(len(L2.strip().split(" ")))]
|
||||||
|
print('attndist', attndist)
|
||||||
|
sumattn = [1] * len(L1.split(" "))
|
||||||
|
for i in attndist:
|
||||||
|
for k in i:
|
||||||
|
sumattn[k] = 0
|
||||||
|
# attn = covatn2d[:len(L2.strip().split(" "))]
|
||||||
|
# sumattn = [sum(i) for i in zip(*attn)]
|
||||||
|
# for i in range(len(attn)):
|
||||||
|
# if max(attn[i]) > 0.30:
|
||||||
|
# sumattn[attn[i].index(max(attn[i]))] = 1
|
||||||
|
# print(max(attn[i]))
|
||||||
|
# newattn = [float("{0:.2f}".format(1-(k/max(sumattn)))) for k in sumattn]
|
||||||
|
# # sumattn = [float("{0:.2f}".format(k/sum(newattn))) for k in newattn]
|
||||||
|
# newattn = [ 1.66*max(0, (k-0.4)) for k in newattn]
|
||||||
|
|
||||||
|
else:
|
||||||
|
sumattn = [1.00] * len(L1.split(" "))
|
||||||
|
predictions = predictions[0]
|
||||||
|
print(predictions)
|
||||||
|
seen = set()
|
||||||
|
seen_add = seen.add
|
||||||
|
sentence = [quotaposto(L2 + x.capitalize()[len(L2):], langspecs[langspec]["tgt"]) + " " for x in predictions if not (x in seen or seen_add(x))]
|
||||||
|
# sentence = [x.replace(L2, "") for x in sentence]
|
||||||
|
sentence = '\n'.join(sentence)
|
||||||
|
print("pred[0][0]", pred[0][0], pred[0][0][len(L2):])
|
||||||
|
if langspecs[langspec]['provide_help'] and L2:
|
||||||
|
sentence = quotaposto(L2 + pred[0][0].capitalize()[len(L2):], langspecs[langspec]["tgt"]) + '\n' + L2 + '\n' + sentence
|
||||||
|
else:
|
||||||
|
sentence = quotaposto(L2 + pred[0][0].capitalize()[len(L2):], langspecs[langspec]["tgt"]) + '\n' + sentence
|
||||||
|
|
||||||
|
print(sentence)
|
||||||
|
perplexity = float(math.exp(-score_total / words_total))
|
||||||
|
avg_score = float(score_total / words_total)
|
||||||
|
|
||||||
|
print("sentence", sentence)
|
||||||
|
# print(something, pred)
|
||||||
|
return JsonResponse({'result': sentence.split('\n'), 'attn': sumattn, 'partial': L2, 'ppl': perplexity, 'avg': avg_score})
|
||||||
|
|
|
@ -29,6 +29,7 @@ import requests
|
||||||
|
|
||||||
import math
|
import math
|
||||||
|
|
||||||
|
# defines the configuration of the translation type selected by the user
|
||||||
langspecs = {
|
langspecs = {
|
||||||
'en-hi' : {
|
'en-hi' : {
|
||||||
'src' : 'en',
|
'src' : 'en',
|
||||||
|
@ -44,6 +45,14 @@ langspecs = {
|
||||||
'indic_code': None,
|
'indic_code': None,
|
||||||
'provide_help' : False,
|
'provide_help' : False,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
'hi-gondi' : {
|
||||||
|
'src' : 'hi',
|
||||||
|
'tgt' : 'gondi',
|
||||||
|
'model': 'hi-gondi.pt',
|
||||||
|
'indic_code': sanscript.DEVANAGARI,
|
||||||
|
'provide_help' : False,
|
||||||
|
},
|
||||||
# '*-en' : {
|
# '*-en' : {
|
||||||
# 'src' : 'hi',
|
# 'src' : 'hi',
|
||||||
# 'tgt' : 'en',
|
# 'tgt' : 'en',
|
||||||
|
@ -85,6 +94,7 @@ with open(os.path.join(dir_path, 'opt_data'), 'rb') as f:
|
||||||
opt = pickle.load(f)
|
opt = pickle.load(f)
|
||||||
|
|
||||||
engines = {}
|
engines = {}
|
||||||
|
# The model engines are initialised here after loading opt (maybe it just specifies of how the model looks like?)
|
||||||
for key, value in langspecs.items():
|
for key, value in langspecs.items():
|
||||||
opt.models = [os.path.join(dir_path, 'model', value['model'])]
|
opt.models = [os.path.join(dir_path, 'model', value['model'])]
|
||||||
opt.n_best = 1
|
opt.n_best = 1
|
||||||
|
@ -92,19 +102,23 @@ for key, value in langspecs.items():
|
||||||
opt.global_attention_function = 'sparsemax'
|
opt.global_attention_function = 'sparsemax'
|
||||||
ArgumentParser.validate_translate_opts(opt)
|
ArgumentParser.validate_translate_opts(opt)
|
||||||
engines[key] = {"translatorbest": build_translator(opt, report_score=True)}
|
engines[key] = {"translatorbest": build_translator(opt, report_score=True)}
|
||||||
|
#translatorbest builds the best complete translation of the sentence
|
||||||
|
|
||||||
opt.n_best = 5
|
opt.n_best = 5
|
||||||
opt.max_length = 2
|
opt.max_length = 2
|
||||||
opt.global_attention_function = 'sparsemax'
|
opt.global_attention_function = 'sparsemax'
|
||||||
ArgumentParser.validate_translate_opts(opt)
|
ArgumentParser.validate_translate_opts(opt)
|
||||||
engines[key]["translatorbigram"] = build_translator(opt, report_score=True)
|
engines[key]["translatorbigram"] = build_translator(opt, report_score=True)
|
||||||
|
#translatorbiagram builds 5 best translations of length two
|
||||||
|
|
||||||
global corpusops
|
global corpusops
|
||||||
corpusops = []
|
corpusops = []
|
||||||
|
|
||||||
|
# The view function for the first page url : simple/
|
||||||
def corpus(request):
|
def corpus(request):
|
||||||
return render(request, 'simplecorpus.html')
|
return render(request, 'simplecorpus.html')
|
||||||
|
|
||||||
|
#The view function called after setting languagespecs and getting the input in simple/ (called after corpusinput)
|
||||||
def translate(request):
|
def translate(request):
|
||||||
return render(request, 'simpletranslate.html')
|
return render(request, 'simpletranslate.html')
|
||||||
|
|
||||||
|
@ -112,7 +126,7 @@ def end(request):
|
||||||
return render(request, 'simpleend.html')
|
return render(request, 'simpleend.html')
|
||||||
|
|
||||||
def split_sentences(st):
|
def split_sentences(st):
|
||||||
#Split sentences based
|
#Split sentences based on !?।|.
|
||||||
sentences = re.split(r'[!?।|.](?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)', st)
|
sentences = re.split(r'[!?।|.](?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)', st)
|
||||||
|
|
||||||
if sentences[-1]:
|
if sentences[-1]:
|
||||||
|
@ -120,6 +134,12 @@ def split_sentences(st):
|
||||||
else:
|
else:
|
||||||
return sentences[:-1]
|
return sentences[:-1]
|
||||||
|
|
||||||
|
"""
|
||||||
|
The view function for getting the input for translation on the first page (simple/)
|
||||||
|
|
||||||
|
Splits the sentence based on !?।| cleans it and saves the list in session["corpusinps"]
|
||||||
|
"""
|
||||||
|
|
||||||
def corpusinput(request):
|
def corpusinput(request):
|
||||||
corpusraw = request.POST.get('translate')
|
corpusraw = request.POST.get('translate')
|
||||||
langselect = request.POST.get('langselect')
|
langselect = request.POST.get('langselect')
|
||||||
|
@ -128,7 +148,11 @@ def corpusinput(request):
|
||||||
request.session["langspec"] = langselect
|
request.session["langspec"] = langselect
|
||||||
print(request.session["langspec"])
|
print(request.session["langspec"])
|
||||||
s = corpusraw.strip()
|
s = corpusraw.strip()
|
||||||
|
|
||||||
|
print(s, "DEBUG: raw corpus before split_sentences")
|
||||||
spsent = [k.strip() for k in split_sentences(s)]
|
spsent = [k.strip() for k in split_sentences(s)]
|
||||||
|
print(spsent, "DEBUG: raw corpus after split_sentences")
|
||||||
|
|
||||||
corpusinps = list(filter(lambda elem: elem.strip(), spsent))
|
corpusinps = list(filter(lambda elem: elem.strip(), spsent))
|
||||||
request.session["corpusinps"] = [[k, ''] for k in corpusinps]
|
request.session["corpusinps"] = [[k, ''] for k in corpusinps]
|
||||||
print(request.session["corpusinps"])
|
print(request.session["corpusinps"])
|
||||||
|
@ -159,11 +183,22 @@ def indic(request):
|
||||||
def translate_new(request):
|
def translate_new(request):
|
||||||
translatorbest = engines[request.session["langspec"]]["translatorbest"]
|
translatorbest = engines[request.session["langspec"]]["translatorbest"]
|
||||||
translatorbigram = engines[request.session["langspec"]]["translatorbigram"]
|
translatorbigram = engines[request.session["langspec"]]["translatorbigram"]
|
||||||
L1 = toquotapos(request.GET.get('a').strip())
|
print("Before processing")
|
||||||
L2 = request.GET.get('b', "")
|
print("##########################")
|
||||||
|
print("##########################")
|
||||||
|
print(request.GET.get('a').strip())
|
||||||
|
print("##########################")
|
||||||
|
print("##########################")
|
||||||
|
print("##########################")
|
||||||
|
|
||||||
|
L1 = toquotapos(request.GET.get('a').strip()) # request.GET.get('a') contains the whole sentence to be translated
|
||||||
|
print("############After Processing########")
|
||||||
|
print((L1))
|
||||||
|
L2 = request.GET.get('b', "") # request.GET.get('b') contains the partial sentence to be translated
|
||||||
L2split = L2.split()
|
L2split = L2.split()
|
||||||
|
|
||||||
if langspecs[request.session["langspec"]]['indic_code']:
|
if langspecs[request.session["langspec"]]['indic_code']:
|
||||||
|
# print(L2[-1])
|
||||||
if L2 != '' and bool(re.search(r"([^\s\u0900-\u097F])", L2[-1])):
|
if L2 != '' and bool(re.search(r"([^\s\u0900-\u097F])", L2[-1])):
|
||||||
params = {}
|
params = {}
|
||||||
params['inString'] = L2split[-1]
|
params['inString'] = L2split[-1]
|
||||||
|
@ -173,7 +208,7 @@ def translate_new(request):
|
||||||
L2 = ' '.join(L2split)
|
L2 = ' '.join(L2split)
|
||||||
# L2 = transliterate(L2, sanscript.ITRANS, langspec['indic_code'])
|
# L2 = transliterate(L2, sanscript.ITRANS, langspec['indic_code'])
|
||||||
|
|
||||||
print(L2)
|
print(L2, u'\u0900-\u097F')
|
||||||
|
|
||||||
something, pred, covatn2d, score_total, words_total = translatorbest.translate(
|
something, pred, covatn2d, score_total, words_total = translatorbest.translate(
|
||||||
src=[L1],
|
src=[L1],
|
||||||
|
@ -183,6 +218,8 @@ def translate_new(request):
|
||||||
attn_debug=True,
|
attn_debug=True,
|
||||||
partial = toquotapos(L2)
|
partial = toquotapos(L2)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
print("$$$$$$$$$$$$$$$$$$$$$$$$")
|
||||||
|
|
||||||
scores, predictions, score_total, words_total = translatorbigram.translate(
|
scores, predictions, score_total, words_total = translatorbigram.translate(
|
||||||
src=[L1],
|
src=[L1],
|
||||||
|
@ -195,11 +232,13 @@ def translate_new(request):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# print(covatn2d)
|
print(covatn2d, 'convatn2d')
|
||||||
if L2 != '':
|
if L2 != '':
|
||||||
transpattn = [*zip(*covatn2d)]
|
transpattn = [*zip(*covatn2d)]
|
||||||
attnind = [attn.index(max(attn)) for attn in transpattn]
|
attnind = [attn.index(max(attn)) for attn in transpattn]
|
||||||
|
print('attnind', attnind)
|
||||||
attndist = [[ i for i, x in enumerate(attnind) if x==k] for k in range(len(L2.strip().split(" ")))]
|
attndist = [[ i for i, x in enumerate(attnind) if x==k] for k in range(len(L2.strip().split(" ")))]
|
||||||
|
print('attndist', attndist)
|
||||||
sumattn = [1] * len(L1.split(" "))
|
sumattn = [1] * len(L1.split(" "))
|
||||||
for i in attndist:
|
for i in attndist:
|
||||||
for k in i:
|
for k in i:
|
||||||
|
@ -223,6 +262,7 @@ def translate_new(request):
|
||||||
sentence = [quotaposto(L2 + x.capitalize()[len(L2):], langspecs[request.session["langspec"]]["tgt"]) + " " for x in predictions if not (x in seen or seen_add(x))]
|
sentence = [quotaposto(L2 + x.capitalize()[len(L2):], langspecs[request.session["langspec"]]["tgt"]) + " " for x in predictions if not (x in seen or seen_add(x))]
|
||||||
# sentence = [x.replace(L2, "") for x in sentence]
|
# sentence = [x.replace(L2, "") for x in sentence]
|
||||||
sentence = '\n'.join(sentence)
|
sentence = '\n'.join(sentence)
|
||||||
|
print("pred[0][0]", pred[0][0], pred[0][0][len(L2):])
|
||||||
if langspecs[request.session["langspec"]]['provide_help'] and L2:
|
if langspecs[request.session["langspec"]]['provide_help'] and L2:
|
||||||
sentence = quotaposto(L2 + pred[0][0].capitalize()[len(L2):], langspecs[request.session["langspec"]]["tgt"]) + '\n' + L2 + '\n' + sentence
|
sentence = quotaposto(L2 + pred[0][0].capitalize()[len(L2):], langspecs[request.session["langspec"]]["tgt"]) + '\n' + L2 + '\n' + sentence
|
||||||
else:
|
else:
|
||||||
|
@ -231,6 +271,6 @@ def translate_new(request):
|
||||||
print(sentence)
|
print(sentence)
|
||||||
perplexity = float(math.exp(-score_total / words_total))
|
perplexity = float(math.exp(-score_total / words_total))
|
||||||
avg_score = float(score_total / words_total)
|
avg_score = float(score_total / words_total)
|
||||||
# print(scores)
|
print("sentence", sentence)
|
||||||
# print(something, pred)
|
# print(something, pred)
|
||||||
return JsonResponse({'result': sentence, 'attn': sumattn, 'partial': L2, 'ppl': perplexity, 'avg': avg_score})
|
return JsonResponse({'result': sentence, 'attn': sumattn, 'partial': L2, 'ppl': perplexity, 'avg': avg_score})
|
||||||
|
|
|
@ -307,6 +307,7 @@ class Translator(object):
|
||||||
* all_scores is a list of `batch_size` lists of `n_best` scores
|
* all_scores is a list of `batch_size` lists of `n_best` scores
|
||||||
* all_predictions is a list of `batch_size` lists
|
* all_predictions is a list of `batch_size` lists
|
||||||
of `n_best` predictions
|
of `n_best` predictions
|
||||||
|
* attns is a list of attention scores for translation having highest cumilative log likelihood
|
||||||
"""
|
"""
|
||||||
self.dymax_len = dymax_len
|
self.dymax_len = dymax_len
|
||||||
self.partialf = None
|
self.partialf = None
|
||||||
|
@ -322,6 +323,7 @@ class Translator(object):
|
||||||
# Logic for partial and partialf
|
# Logic for partial and partialf
|
||||||
if partial and partial != '':
|
if partial and partial != '':
|
||||||
partials = partial.split()
|
partials = partial.split()
|
||||||
|
print(partials, '~~~~partials~~~')
|
||||||
vocabdict = dict(self.fields)["tgt"].base_field.vocab
|
vocabdict = dict(self.fields)["tgt"].base_field.vocab
|
||||||
# if vocabdict.stoi[partials[-1]] == 0:
|
# if vocabdict.stoi[partials[-1]] == 0:
|
||||||
if partialfcheck:
|
if partialfcheck:
|
||||||
|
@ -335,6 +337,9 @@ class Translator(object):
|
||||||
# self.partialf = [20.0] + [i[0] for i in sorted(editarr, key=lambda x: x[1])]
|
# self.partialf = [20.0] + [i[0] for i in sorted(editarr, key=lambda x: x[1])]
|
||||||
|
|
||||||
self.partial = [vocabdict.stoi[x] for x in partials[:-1]]
|
self.partial = [vocabdict.stoi[x] for x in partials[:-1]]
|
||||||
|
print("#########vocabdict.stoi########")
|
||||||
|
print(self.partial)
|
||||||
|
print("##################################")
|
||||||
|
|
||||||
self.partialf = [v for k, v in vocabdict.stoi.items() if k.startswith(partials[-1]) and v]
|
self.partialf = [v for k, v in vocabdict.stoi.items() if k.startswith(partials[-1]) and v]
|
||||||
else:
|
else:
|
||||||
|
@ -384,7 +389,7 @@ class Translator(object):
|
||||||
pred_score_total, pred_words_total = 0, 0
|
pred_score_total, pred_words_total = 0, 0
|
||||||
gold_score_total, gold_words_total = 0, 0
|
gold_score_total, gold_words_total = 0, 0
|
||||||
|
|
||||||
all_scores = []
|
all_scores = [] # I guess this is the cumilative log likelihood score of each sentence
|
||||||
all_predictions = []
|
all_predictions = []
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
@ -396,6 +401,8 @@ class Translator(object):
|
||||||
translations = xlation_builder.from_batch(batch_data)
|
translations = xlation_builder.from_batch(batch_data)
|
||||||
|
|
||||||
for trans in translations:
|
for trans in translations:
|
||||||
|
print("Loop")
|
||||||
|
print(trans, trans.pred_sents)
|
||||||
all_scores += [trans.pred_scores[:self.n_best]]
|
all_scores += [trans.pred_scores[:self.n_best]]
|
||||||
pred_score_total += trans.pred_scores[0]
|
pred_score_total += trans.pred_scores[0]
|
||||||
pred_words_total += len(trans.pred_sents[0])
|
pred_words_total += len(trans.pred_sents[0])
|
||||||
|
@ -405,6 +412,12 @@ class Translator(object):
|
||||||
|
|
||||||
n_best_preds = [" ".join(pred)
|
n_best_preds = [" ".join(pred)
|
||||||
for pred in trans.pred_sents[:self.n_best]]
|
for pred in trans.pred_sents[:self.n_best]]
|
||||||
|
|
||||||
|
print("############n_best_preds###############")
|
||||||
|
print(n_best_preds)
|
||||||
|
print("############n_best_preds###############")
|
||||||
|
|
||||||
|
|
||||||
if self.report_align:
|
if self.report_align:
|
||||||
align_pharaohs = [build_align_pharaoh(align) for align
|
align_pharaohs = [build_align_pharaoh(align) for align
|
||||||
in trans.word_aligns[:self.n_best]]
|
in trans.word_aligns[:self.n_best]]
|
||||||
|
@ -433,7 +446,7 @@ class Translator(object):
|
||||||
srcs = trans.src_raw
|
srcs = trans.src_raw
|
||||||
else:
|
else:
|
||||||
srcs = [str(item) for item in range(len(attns[0]))]
|
srcs = [str(item) for item in range(len(attns[0]))]
|
||||||
output = report_matrix(srcs, preds, attns)
|
output = report_matrix(srcs, preds, attns) # This prints attentions in output for the sentence having highest cumilative log likelihood score
|
||||||
|
|
||||||
if self.logger:
|
if self.logger:
|
||||||
self.logger.info(output)
|
self.logger.info(output)
|
||||||
|
|
|
@ -387,6 +387,8 @@ function parseProcessedJsonResultsfunction(data, partial) {
|
||||||
|
|
||||||
var container = $('<div />');
|
var container = $('<div />');
|
||||||
|
|
||||||
|
// Code for adding suggestions//
|
||||||
|
|
||||||
var countcontainer = 0
|
var countcontainer = 0
|
||||||
finalresult = []
|
finalresult = []
|
||||||
for(var i = 0; i < result.length; i++) {
|
for(var i = 0; i < result.length; i++) {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
|
// This page serves as the script for simpletranslate.html
|
||||||
/*
|
/*
|
||||||
*************************************************
|
*************************************************
|
||||||
*************************************************
|
*************************************************
|
||||||
|
@ -53,6 +53,7 @@ function sharedStart(feed, partial) {
|
||||||
part1text = partial.substring(0, lastspace)
|
part1text = partial.substring(0, lastspace)
|
||||||
part2text = partial.substring(lastspace+1)
|
part2text = partial.substring(lastspace+1)
|
||||||
var count = 0
|
var count = 0
|
||||||
|
console.log("DEBUG part1text", part1text, )
|
||||||
if (part1text) {
|
if (part1text) {
|
||||||
newfeed = feed.replace(part1text + " ", '')
|
newfeed = feed.replace(part1text + " ", '')
|
||||||
} else {
|
} else {
|
||||||
|
@ -504,6 +505,7 @@ function parseProcessedJsonResultsfunction(data, partial) {
|
||||||
finalresult = []
|
finalresult = []
|
||||||
for(var i = 0; i < result.length; i++) {
|
for(var i = 0; i < result.length; i++) {
|
||||||
var repres = sharedStart(result[i], partialret)
|
var repres = sharedStart(result[i], partialret)
|
||||||
|
console.log(result[i] + '%%%%%%%%%%%%%%%%%%%%%%%%%%%')
|
||||||
if (repres !== "") {
|
if (repres !== "") {
|
||||||
container.append('<span id="res'+countcontainer+'" class="res'+countcontainer+' spanres p-1"> ' + repres + '</span>');
|
container.append('<span id="res'+countcontainer+'" class="res'+countcontainer+' spanres p-1"> ' + repres + '</span>');
|
||||||
countcontainer += 1;
|
countcontainer += 1;
|
||||||
|
@ -516,7 +518,7 @@ function parseProcessedJsonResultsfunction(data, partial) {
|
||||||
// Coloring the drop down box selections
|
// Coloring the drop down box selections
|
||||||
partial.closest('.bmo').find('.dropdown').html(container);
|
partial.closest('.bmo').find('.dropdown').html(container);
|
||||||
resetcolors('.res', $('.spanres').length)
|
resetcolors('.res', $('.spanres').length)
|
||||||
$('.res' + selecte).css("background-color","#eee")
|
$('.res' + selecte).css("background-color","#fff")
|
||||||
if (countcontainer>1) {
|
if (countcontainer>1) {
|
||||||
partial.closest('.bmo').find('.dropdown').css('visibility', 'visible');
|
partial.closest('.bmo').find('.dropdown').css('visibility', 'visible');
|
||||||
}
|
}
|
||||||
|
@ -528,7 +530,7 @@ function parseProcessedJsonResultsfunction(data, partial) {
|
||||||
for (m=0; m<attn.length; m++) {
|
for (m=0; m<attn.length; m++) {
|
||||||
if (attn[m] != 0) {
|
if (attn[m] != 0) {
|
||||||
// partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(255,0,0,' + attn[m] + ')')
|
// partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(255,0,0,' + attn[m] + ')')
|
||||||
partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(255,0,0,0.5')
|
partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(255,0,0,0.5)')
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(0,255,0,0.5)')
|
partial.closest('.bmo').find('.hin_inp_part' + m).css('background-color', 'rgba(0,255,0,0.5)')
|
||||||
|
@ -580,25 +582,28 @@ $(document).ready(function() {
|
||||||
inputs = data.result;
|
inputs = data.result;
|
||||||
langspec = data.langspec
|
langspec = data.langspec
|
||||||
// langtolangid = data.langtolangid;
|
// langtolangid = data.langtolangid;
|
||||||
|
|
||||||
console.log(inputs)
|
console.log(inputs)
|
||||||
$('#cardscoll').html('')
|
$('#cardscoll').html('')
|
||||||
$('#corpusinput').html('')
|
$('#corpusinput').html('')
|
||||||
|
|
||||||
for (i=0; i<inputs.length; i++) {
|
for (i=0; i<inputs.length; i++) {
|
||||||
|
/*To set the source part of the page*/
|
||||||
if (langspec == 'hi-en') {
|
if (langspec == 'hi-en') {
|
||||||
$('#corpusinput').append('<span class="corp_inp">' + inputs[i][0] + '</span>| ')
|
$('#corpusinput').append('<span class="corp_inp">' + inputs[i][0] + '</span>| ') /* 1st index is the text with which the editable division is intitalised */
|
||||||
} else {
|
} else {
|
||||||
$('#corpusinput').append('<span class="corp_inp">' + inputs[i][0] + '</span>. ')
|
$('#corpusinput').append('<span class="corp_inp">' + inputs[i][0] + '</span>. ')
|
||||||
}
|
}
|
||||||
|
/*--------------------------------*/
|
||||||
$('#cardscoll').append(
|
$('#cardscoll').append(
|
||||||
`<div class="shadow p-3 my-3 rounded bmo cardescoll">
|
`<div class="shadow p-3 my-3 rounded bmo cardescoll">
|
||||||
<div class="row">
|
<div class="row">
|
||||||
<div class="col-9">
|
<div class="col-9">
|
||||||
<div class="hin_inp pb-2" contenteditable="false">`+ inputSpan(inputs[i][0]) + `</div>
|
<div class="hin_inp pb-2" contenteditable="false">`+ inputSpan(inputs[i][0]) /*Wraps each word of sentence around span and returns*/ + `</div>
|
||||||
<div class="dropcontainer">
|
<div class="dropcontainer">
|
||||||
<div class="partcontainer">
|
<div class="partcontainer">
|
||||||
<div class="suggest transtext" contenteditable="false"></div>
|
<div class="suggest transtext" contenteditable="false"></div>
|
||||||
<div class=" partial transtext" id="card` + i + `" contenteditable="true"
|
<div class="partial transtext" id="card` + i + `" contenteditable="true"
|
||||||
data-tab=0 data-enter=0 data-up=0 data-down=0 data-others=0 data-pgup=0 data-pgdn=0 data-end=0 data-right=0 data-left=0 data-bkspc=0 data-time=0
|
data-tab=0 data-enter=0 data-up=0 data-down=0 data-others=0 data-pgup=0 data-pgdn=0 data-end=0 data-right=0 data-left=0 data-bkspc=0 data-time=0
|
||||||
>`+ inputs[i][1] + `</div>
|
>`+ inputs[i][1] + `</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -767,10 +772,16 @@ $(document).ready(function() {
|
||||||
|
|
||||||
var hin_inp = partial.closest('.bmo').find('.hin_inp')
|
var hin_inp = partial.closest('.bmo').find('.hin_inp')
|
||||||
globalPartial = partial;
|
globalPartial = partial;
|
||||||
|
console.log("#########################################3")
|
||||||
|
console.log("#########################################3")
|
||||||
|
console.log(partial.clone().children().remove().end().text())
|
||||||
|
console.log("#########################################4")
|
||||||
|
console.log("#########################################3")
|
||||||
|
|
||||||
if (sockets_use == true) {
|
if (sockets_use == true) {
|
||||||
connectSocket.send(JSON.stringify({
|
connectSocket.send(JSON.stringify({
|
||||||
'partial_translation': partial.clone().children().remove().end().text(),
|
'partial_translation': partial.clone().children().remove().end().text(), // The text translated by user so far
|
||||||
'original': hin_inp.text(),
|
'original': hin_inp.text(), // The full sentence to be translated
|
||||||
'langspec': langspec
|
'langspec': langspec
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
@ -778,7 +789,7 @@ $(document).ready(function() {
|
||||||
|
|
||||||
//OLD, JANKY HTTP REQUEST!!
|
//OLD, JANKY HTTP REQUEST!!
|
||||||
searchRequest = $.getJSON(http_translate, {
|
searchRequest = $.getJSON(http_translate, {
|
||||||
a: hin_inp.text(),
|
a: hin_inp.text(), // Maybe use some good names here?
|
||||||
b: partial.clone().children().remove().end().text()
|
b: partial.clone().children().remove().end().text()
|
||||||
}, function(data) {
|
}, function(data) {
|
||||||
// console.log(data)
|
// console.log(data)
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
if (corpusinput) {
|
if (corpusinput) {
|
||||||
$.ajax({
|
$.ajax({
|
||||||
type: "POST",
|
type: "POST",
|
||||||
url: '/simple/corpusinput',
|
url: '/simple/corpusinput', //corpusinput function in mtsimple/views.py
|
||||||
data: {
|
data: {
|
||||||
'translate': corpusinput,
|
'translate': corpusinput,
|
||||||
'langselect': $('#src').find(":selected").val() + "-" + $('#tgt').find(":selected").val(),
|
'langselect': $('#src').find(":selected").val() + "-" + $('#tgt').find(":selected").val(),
|
||||||
|
@ -51,11 +51,18 @@
|
||||||
if (lang == "bn-en") {
|
if (lang == "bn-en") {
|
||||||
text = "মৌসুমি বৃষ্টি একটি অভিশাপ দ্বারা আশীর্বাদ করা একটি আশীর্বাদ। যখন পরিমাণগত পরিমাণে বৃষ্টির পরিমাণ কম হয়, তখন এটি একটি আশীর্বাদের জন্য আমাদের পরে গরম তাপের গ্রীষ্ম। ফসলের প্রাচুর্যের কারণে এটি কৃষকদের জন্য একটি আশীর্বাদ। শুষ্ক গ্রীষ্মের পরে নদী ভরাট।"
|
text = "মৌসুমি বৃষ্টি একটি অভিশাপ দ্বারা আশীর্বাদ করা একটি আশীর্বাদ। যখন পরিমাণগত পরিমাণে বৃষ্টির পরিমাণ কম হয়, তখন এটি একটি আশীর্বাদের জন্য আমাদের পরে গরম তাপের গ্রীষ্ম। ফসলের প্রাচুর্যের কারণে এটি কৃষকদের জন্য একটি আশীর্বাদ। শুষ্ক গ্রীষ্মের পরে নদী ভরাট।"
|
||||||
}
|
}
|
||||||
|
if (lang == "hi-gondi") {
|
||||||
|
text = "आज मौसम सुहावना है। हमें शाम को बाहर जाना चाहिए।"
|
||||||
|
}
|
||||||
console.log(text)
|
console.log(text)
|
||||||
$("#corpusinput").val(text);
|
$("#corpusinput").val(text);
|
||||||
$("#corpusinput").focus();
|
$("#corpusinput").focus();
|
||||||
$("#corpusinput").trigger('autoresize');
|
$("#corpusinput").trigger('autoresize');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
function limitTgtOptions() {
|
||||||
|
// TODO: Limit target language to only hindi
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
|
@ -81,6 +88,7 @@
|
||||||
<select class="form-control" id="tgt">
|
<select class="form-control" id="tgt">
|
||||||
<option value="en">English</option value="en">
|
<option value="en">English</option value="en">
|
||||||
<option value="hi">Hindi</option value="en">
|
<option value="hi">Hindi</option value="en">
|
||||||
|
<option value="gondi" onselect="limitTgtOptions">Gondi</option value="en">
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче