This commit is contained in:
Sebastin Santy 2020-07-21 09:39:58 +05:30
Родитель d3f82b2e85
Коммит 988d2cd520
4 изменённых файлов: 44 добавлений и 20 удалений

Просмотреть файл

@ -70,7 +70,7 @@ langspecs = {
}
}
id_to_spec = {'2': 'hi-en', '1': 'en-hi'}
id_to_spec = {'2': 'hi-en', '1': 'en-hi', '3': 'ta-en', '4': 'en-ta'}
translatordict = {}

Просмотреть файл

@ -17,20 +17,34 @@ dir_path = os.path.dirname(os.path.dirname(mtsimple.__file__))
#TODO: Find a Way to not repeat the below starter code from mtsimple/views.py
langspecs = {
'en-hi' : {
'src' : 'en',
'tgt' : 'hi',
'model': 'full_iitb_enhi_50v.pt',
'indic_code': sanscript.DEVANAGARI,
'provide_help' : True,
},
'hi-en' : {
'src' : 'hi',
# 'en-hi' : {
# 'src' : 'en',
# 'tgt' : 'hi',
# 'model': 'full_iitb_enhi_50v.pt',
# 'indic_code': sanscript.DEVANAGARI,
# 'provide_help' : True,
# }d,
# 'hi-en' : {
# 'src' : 'hi',
# 'tgt' : 'en',
# 'model': 'onmt-hien.pt',
# 'indic_code': None,
# 'provide_help' : False,
# },
'ta-en' : {
'src' : 'ta',
'tgt' : 'en',
'model': 'onmt-hien.pt',
'model': 'taen_final_step_100000.pt',
'indic_code': None,
'provide_help' : False,
},
'en-ta' : {
'src' : 'en',
'tgt' : 'ta',
'model': 'enta_final_step_100000.pt',
'indic_code': None,
'provide_help' : True,
},
}
with open(os.path.join(dir_path, 'opt_data'), 'rb') as f:
@ -57,6 +71,7 @@ def quotaposto(s, lang="en"):
s = re.sub(r""", r'"', s)
s = re.sub(r"'", r"'", s)
s = re.sub(r"(@@ )|(@@ ?$)", r"", s)
s = re.sub(r"<|unk|>", r"", s)
#This is work in progress to make writing as natural as possible. taking care of spaces before and after certain characters.
# s = re.sub(r"(\s+)([!:?,.।\']+)", r"\2", s)
# s = re.sub(r"([({\[<]+)(\s+)", r"\1", s)
@ -88,11 +103,14 @@ def translate_new(request):
L2 = partial_trans
L2split = L2.split()
if langspecs[langspec]['indic_code']:
if L2 != '' and bool(re.search(r"([^\s\u0900-\u097F])", L2[-1])):
if langspecs[langspec]['provide_help']:
if L2 != '' and (bool(re.search(r"([^\s\u0900-\u097F])", L2[-1])) or bool(re.search(r"([^\s\u0B80-\u0BFF])", L2[-1]))):
params = {}
params['inString'] = L2split[-1]
params['lang'] = 'hindi'
if langspecs[langspec]['tgt'] == 'ta':
params['lang'] = 'tamil'
if langspecs[langspec]['tgt'] == 'hi':
params['lang'] = 'hindi'
data = requests.get('http://xlit.quillpad.in/quillpad_backend2/processWordJSON', params = params).json()
L2split[-1] = data['twords'][0]['options'][0]
L2 = ' '.join(L2split)
@ -155,6 +173,7 @@ def translate_new(request):
perplexity = float(math.exp(-score_total / words_total))
avg_score = float(score_total / words_total)
print("sentence", sentence)
return JsonResponse({'result': sentence, 'attn': sumattn, 'partial': L2, 'ppl': perplexity, 'avg': avg_score})
print("sentence", quotaposto(sentence))
print(quotaposto("என் <unk> என்னை"))
return JsonResponse({'result': quotaposto(sentence), 'attn': sumattn, 'partial': L2, 'ppl': perplexity, 'avg': avg_score})

Просмотреть файл

@ -142,8 +142,8 @@ Splits the sentence based on !?।| cleans it and saves the list in session["cor
def corpusinput(request):
corpusraw = request.POST.get('translate')
langselect = request.POST.get('langselect')
if langselect not in langspecs:
langselect = '*-en'
# if langselect not in langspecs:
# langselect = '*-en'
request.session["langspec"] = langselect
s = corpusraw.strip()

Просмотреть файл

@ -45,6 +45,9 @@
if (lang == "hi-en") {
text = "आज मौसम सुहावना है। हमें शाम को बाहर जाना चाहिए।"
}
if (lang == "en-ta") {
text = "The weather is pleasant today. Let us go out to in the evening."
}
if (lang == "ml-en") {
text = "മൺസൂൺ മഴ ഒരു അനുഗ്രഹവും ശാപവുമാണെന്ന് വിശ്വസിക്കപ്പെടുന്നു. ആവശ്യത്തിന് അളവിൽ മഴ പെയ്യുമ്പോൾ, വേനൽക്കാലത്തെ കടുത്ത ചൂടിനുശേഷം ഇത് ഞങ്ങൾക്ക് ഒരു അനുഗ്രഹമാണ്. വിളകൾ ധാരാളമായി വളരുന്നതിനാൽ ഇത് കർഷകർക്കും ഒരു അനുഗ്രഹമാണ്. വരണ്ട വേനൽക്കാലത്ത് നദികൾ നിറയുന്നു."
}
@ -79,7 +82,8 @@
<label for="sourcelang">Source Language</label>
<select class="form-control" id="src">
<option value="hi">Hindi</option>
<option value="en">English</option value="en">
<option value="en">English</option>
<option value="ta">Tamil</option>
</select>
</div>
@ -88,7 +92,8 @@
<select class="form-control" id="tgt">
<option value="en">English</option value="en">
<option value="hi">Hindi</option value="en">
<option value="gondi" onselect="limitTgtOptions">Gondi</option value="en">
<option value="ta">Tamil</option>
<!-- <option value="gondi" onselect="limitTgtOptions">Gondi</option value="en"> -->
</select>
</div>