зеркало из https://github.com/microsoft/inmt.git
Add Tamil translation models
This commit is contained in:
Родитель
d3f82b2e85
Коммит
988d2cd520
|
@ -70,7 +70,7 @@ langspecs = {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
id_to_spec = {'2': 'hi-en', '1': 'en-hi'}
|
id_to_spec = {'2': 'hi-en', '1': 'en-hi', '3': 'ta-en', '4': 'en-ta'}
|
||||||
|
|
||||||
translatordict = {}
|
translatordict = {}
|
||||||
|
|
||||||
|
|
|
@ -17,20 +17,34 @@ dir_path = os.path.dirname(os.path.dirname(mtsimple.__file__))
|
||||||
#TODO: Find a Way to not repeat the below starter code from mtsimple/views.py
|
#TODO: Find a Way to not repeat the below starter code from mtsimple/views.py
|
||||||
|
|
||||||
langspecs = {
|
langspecs = {
|
||||||
'en-hi' : {
|
# 'en-hi' : {
|
||||||
'src' : 'en',
|
# 'src' : 'en',
|
||||||
'tgt' : 'hi',
|
# 'tgt' : 'hi',
|
||||||
'model': 'full_iitb_enhi_50v.pt',
|
# 'model': 'full_iitb_enhi_50v.pt',
|
||||||
'indic_code': sanscript.DEVANAGARI,
|
# 'indic_code': sanscript.DEVANAGARI,
|
||||||
'provide_help' : True,
|
# 'provide_help' : True,
|
||||||
},
|
# }d,
|
||||||
'hi-en' : {
|
# 'hi-en' : {
|
||||||
'src' : 'hi',
|
# 'src' : 'hi',
|
||||||
|
# 'tgt' : 'en',
|
||||||
|
# 'model': 'onmt-hien.pt',
|
||||||
|
# 'indic_code': None,
|
||||||
|
# 'provide_help' : False,
|
||||||
|
# },
|
||||||
|
'ta-en' : {
|
||||||
|
'src' : 'ta',
|
||||||
'tgt' : 'en',
|
'tgt' : 'en',
|
||||||
'model': 'onmt-hien.pt',
|
'model': 'taen_final_step_100000.pt',
|
||||||
'indic_code': None,
|
'indic_code': None,
|
||||||
'provide_help' : False,
|
'provide_help' : False,
|
||||||
},
|
},
|
||||||
|
'en-ta' : {
|
||||||
|
'src' : 'en',
|
||||||
|
'tgt' : 'ta',
|
||||||
|
'model': 'enta_final_step_100000.pt',
|
||||||
|
'indic_code': None,
|
||||||
|
'provide_help' : True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
with open(os.path.join(dir_path, 'opt_data'), 'rb') as f:
|
with open(os.path.join(dir_path, 'opt_data'), 'rb') as f:
|
||||||
|
@ -57,6 +71,7 @@ def quotaposto(s, lang="en"):
|
||||||
s = re.sub(r""", r'"', s)
|
s = re.sub(r""", r'"', s)
|
||||||
s = re.sub(r"'", r"'", s)
|
s = re.sub(r"'", r"'", s)
|
||||||
s = re.sub(r"(@@ )|(@@ ?$)", r"", s)
|
s = re.sub(r"(@@ )|(@@ ?$)", r"", s)
|
||||||
|
s = re.sub(r"<|unk|>", r"", s)
|
||||||
#This is work in progress to make writing as natural as possible. taking care of spaces before and after certain characters.
|
#This is work in progress to make writing as natural as possible. taking care of spaces before and after certain characters.
|
||||||
# s = re.sub(r"(\s+)([!:?,.।\']+)", r"\2", s)
|
# s = re.sub(r"(\s+)([!:?,.।\']+)", r"\2", s)
|
||||||
# s = re.sub(r"([({\[<]+)(\s+)", r"\1", s)
|
# s = re.sub(r"([({\[<]+)(\s+)", r"\1", s)
|
||||||
|
@ -88,11 +103,14 @@ def translate_new(request):
|
||||||
L2 = partial_trans
|
L2 = partial_trans
|
||||||
L2split = L2.split()
|
L2split = L2.split()
|
||||||
|
|
||||||
if langspecs[langspec]['indic_code']:
|
if langspecs[langspec]['provide_help']:
|
||||||
if L2 != '' and bool(re.search(r"([^\s\u0900-\u097F])", L2[-1])):
|
if L2 != '' and (bool(re.search(r"([^\s\u0900-\u097F])", L2[-1])) or bool(re.search(r"([^\s\u0B80-\u0BFF])", L2[-1]))):
|
||||||
params = {}
|
params = {}
|
||||||
params['inString'] = L2split[-1]
|
params['inString'] = L2split[-1]
|
||||||
params['lang'] = 'hindi'
|
if langspecs[langspec]['tgt'] == 'ta':
|
||||||
|
params['lang'] = 'tamil'
|
||||||
|
if langspecs[langspec]['tgt'] == 'hi':
|
||||||
|
params['lang'] = 'hindi'
|
||||||
data = requests.get('http://xlit.quillpad.in/quillpad_backend2/processWordJSON', params = params).json()
|
data = requests.get('http://xlit.quillpad.in/quillpad_backend2/processWordJSON', params = params).json()
|
||||||
L2split[-1] = data['twords'][0]['options'][0]
|
L2split[-1] = data['twords'][0]['options'][0]
|
||||||
L2 = ' '.join(L2split)
|
L2 = ' '.join(L2split)
|
||||||
|
@ -155,6 +173,7 @@ def translate_new(request):
|
||||||
perplexity = float(math.exp(-score_total / words_total))
|
perplexity = float(math.exp(-score_total / words_total))
|
||||||
avg_score = float(score_total / words_total)
|
avg_score = float(score_total / words_total)
|
||||||
|
|
||||||
print("sentence", sentence)
|
print("sentence", quotaposto(sentence))
|
||||||
return JsonResponse({'result': sentence, 'attn': sumattn, 'partial': L2, 'ppl': perplexity, 'avg': avg_score})
|
print(quotaposto("என் <unk> என்னை"))
|
||||||
|
return JsonResponse({'result': quotaposto(sentence), 'attn': sumattn, 'partial': L2, 'ppl': perplexity, 'avg': avg_score})
|
||||||
|
|
||||||
|
|
|
@ -142,8 +142,8 @@ Splits the sentence based on !?।| cleans it and saves the list in session["cor
|
||||||
def corpusinput(request):
|
def corpusinput(request):
|
||||||
corpusraw = request.POST.get('translate')
|
corpusraw = request.POST.get('translate')
|
||||||
langselect = request.POST.get('langselect')
|
langselect = request.POST.get('langselect')
|
||||||
if langselect not in langspecs:
|
# if langselect not in langspecs:
|
||||||
langselect = '*-en'
|
# langselect = '*-en'
|
||||||
request.session["langspec"] = langselect
|
request.session["langspec"] = langselect
|
||||||
s = corpusraw.strip()
|
s = corpusraw.strip()
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,9 @@
|
||||||
if (lang == "hi-en") {
|
if (lang == "hi-en") {
|
||||||
text = "आज मौसम सुहावना है। हमें शाम को बाहर जाना चाहिए।"
|
text = "आज मौसम सुहावना है। हमें शाम को बाहर जाना चाहिए।"
|
||||||
}
|
}
|
||||||
|
if (lang == "en-ta") {
|
||||||
|
text = "The weather is pleasant today. Let us go out to in the evening."
|
||||||
|
}
|
||||||
if (lang == "ml-en") {
|
if (lang == "ml-en") {
|
||||||
text = "മൺസൂൺ മഴ ഒരു അനുഗ്രഹവും ശാപവുമാണെന്ന് വിശ്വസിക്കപ്പെടുന്നു. ആവശ്യത്തിന് അളവിൽ മഴ പെയ്യുമ്പോൾ, വേനൽക്കാലത്തെ കടുത്ത ചൂടിനുശേഷം ഇത് ഞങ്ങൾക്ക് ഒരു അനുഗ്രഹമാണ്. വിളകൾ ധാരാളമായി വളരുന്നതിനാൽ ഇത് കർഷകർക്കും ഒരു അനുഗ്രഹമാണ്. വരണ്ട വേനൽക്കാലത്ത് നദികൾ നിറയുന്നു."
|
text = "മൺസൂൺ മഴ ഒരു അനുഗ്രഹവും ശാപവുമാണെന്ന് വിശ്വസിക്കപ്പെടുന്നു. ആവശ്യത്തിന് അളവിൽ മഴ പെയ്യുമ്പോൾ, വേനൽക്കാലത്തെ കടുത്ത ചൂടിനുശേഷം ഇത് ഞങ്ങൾക്ക് ഒരു അനുഗ്രഹമാണ്. വിളകൾ ധാരാളമായി വളരുന്നതിനാൽ ഇത് കർഷകർക്കും ഒരു അനുഗ്രഹമാണ്. വരണ്ട വേനൽക്കാലത്ത് നദികൾ നിറയുന്നു."
|
||||||
}
|
}
|
||||||
|
@ -79,7 +82,8 @@
|
||||||
<label for="sourcelang">Source Language</label>
|
<label for="sourcelang">Source Language</label>
|
||||||
<select class="form-control" id="src">
|
<select class="form-control" id="src">
|
||||||
<option value="hi">Hindi</option>
|
<option value="hi">Hindi</option>
|
||||||
<option value="en">English</option value="en">
|
<option value="en">English</option>
|
||||||
|
<option value="ta">Tamil</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@ -88,7 +92,8 @@
|
||||||
<select class="form-control" id="tgt">
|
<select class="form-control" id="tgt">
|
||||||
<option value="en">English</option value="en">
|
<option value="en">English</option value="en">
|
||||||
<option value="hi">Hindi</option value="en">
|
<option value="hi">Hindi</option value="en">
|
||||||
<option value="gondi" onselect="limitTgtOptions">Gondi</option value="en">
|
<option value="ta">Tamil</option>
|
||||||
|
<!-- <option value="gondi" onselect="limitTgtOptions">Gondi</option value="en"> -->
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче