Add language support for qnamaker build related api (#1083)

* support output to file for kb:export command

* add language support for qna builder

* add more locale to language mappings

* add more test cases and validation for locale or language support

* fix typo
This commit is contained in:
Fei Chen 2021-02-08 10:06:28 +08:00 коммит произвёл GitHub
Родитель 17c7565834
Коммит b300d92648
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
9 изменённых файлов: 397 добавлений и 22 удалений

Просмотреть файл

@ -355,7 +355,7 @@ const qnaCrossTrain = function (qnaFileIdToResourceMap, luFileIdToResourceMap, i
try {
for (const qnaObjectId of Array.from(qnaFileIdToResourceMap.keys())) {
let fileName = path.basename(qnaObjectId, path.extname(qnaObjectId))
const culture = fileHelper.getCultureFromPath(qnaObjectId)
const culture = fileHelper.getQnACultureFromPath(qnaObjectId)
fileName = culture ? fileName.substring(0, fileName.length - culture.length - 1) : fileName
const luObjectId = Array.from(luFileIdToResourceMap.keys()).find(x => x.toLowerCase() === qnaObjectId.toLowerCase())

Просмотреть файл

@ -43,7 +43,7 @@ export class Builder {
let fileCulture: string
let fileName: string
let cultureFromPath = fileHelper.getCultureFromPath(file)
let cultureFromPath = fileHelper.getLuisCultureFromPath(file)
if (cultureFromPath) {
fileCulture = cultureFromPath
let fileNameWithCulture = path.basename(file, path.extname(file))

Просмотреть файл

@ -20,6 +20,7 @@ const Content = require('./../lu/qna')
const KB = require('./../qna/qnamaker/kb')
const recognizerType = require('./../utils/enums/recognizertypes')
const qnaOptions = require('./../lu/qnaOptions')
const localeToQnALanguageMap = require('./../utils/enums/localeToQnALanguageMap')
export class Builder {
private readonly handler: (input: string) => any
@ -38,7 +39,7 @@ export class Builder {
for (const file of files) {
let fileCulture: string
let fileName: string
let cultureFromPath = fileHelper.getCultureFromPath(file)
let cultureFromPath = fileHelper.getQnACultureFromPath(file)
if (cultureFromPath) {
fileCulture = cultureFromPath
let fileNameWithCulture = path.basename(file, path.extname(file))
@ -48,6 +49,10 @@ export class Builder {
fileName = path.basename(file, path.extname(file))
}
if (!fileCulture) {
throw (new exception(retCode.errorCode.INVALID_INPUT_FILE, 'Culture is not set or unsupported by qnamaker service.'))
}
let fileContent = ''
let qnaFiles = await fileHelper.getLuObjects(undefined, file, true, fileExtEnum.QnAFile)
@ -209,7 +214,15 @@ export class Builder {
// set kb name
if (!currentQna.kb.name) currentQna.kb.name = `${botName}(${suffix}).${qnamakerContent.language}.qna`
// set kb locale and map it to language that qna service can recognize
let locale = qnamakerContent.language
let language = localeToQnALanguageMap[locale]
if (!language) {
throw new Error(`${locale} is not supported in current qnamaker service.`)
}
let currentKB = currentQna.kb
currentKB.language = language
let currentAlt = currentQna.alterations
let hostName = ''
let kbId = ''
@ -446,6 +459,7 @@ export class Builder {
await delay(delayDuration)
const emptyKBJson = {
name: currentKB.name,
language: currentKB.language,
qnaList: [],
urls: [],
files: []

Просмотреть файл

@ -0,0 +1,295 @@
/**
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License.
*/
module.exports = {
'ar': 'Arabic',
'ar-dz': 'Arabic',
'ar-bh': 'Arabic',
'ar-eg': 'Arabic',
'ar-iq': 'Arabic',
'ar-jo': 'Arabic',
'ar-kw': 'Arabic',
'ar-lb': 'Arabic',
'ar-ly': 'Arabic',
'ar-ma': 'Arabic',
'ar-om': 'Arabic',
'ar-qa': 'Arabic',
'ar-sa': 'Arabic',
'ar-sy': 'Arabic',
'ar-tn': 'Arabic',
'ar-ae': 'Arabic',
'ar-ye': 'Arabic',
'hy': 'Armenian',
'hy-am': 'Armenian',
'bn': 'Bangla',
'bn-bd': 'Bangla',
'bn-in': 'Bangla',
'eu': 'Basque',
'eu-es': 'Basque',
'bg': 'Bulgarian',
'bg-bg': 'Bulgarian',
'ca': 'Catalan',
'ca-es': 'Catalan',
'zh': 'Chinese_Simplified',
'zh-hans': 'Chinese_Simplified',
'zh-cn': 'Chinese_Simplified',
'zh-sg': 'Chinese_Simplified',
'zh-hant': 'Chinese_Traditional',
'zh-hk': 'Chinese_Traditional',
'zh-mo': 'Chinese_Traditional',
'zh-tw': 'Chinese_Traditional',
'hr': 'Croatian',
'hr-ba': 'Croatian',
'hr-hr': 'Croatian',
'cs': 'Czech',
'cs-cz': 'Czech',
'da': 'Danish',
'da-dk': 'Danish',
'nl': 'Dutch',
'nl-be': 'Dutch',
'nl-nl': 'Dutch',
'en': 'English',
'en-as': 'English',
'en-ai': 'English',
'en-ag': 'English',
'en-au': 'English',
'en-at': 'English',
'en-bs': 'English',
'en-bb': 'English',
'en-be': 'English',
'en-bz': 'English',
'en-bm': 'English',
'en-bw': 'English',
'en-io': 'English',
'en-vg': 'English',
'en-bi': 'English',
'en-cm': 'English',
'en-ca': 'English',
'en-029': 'English',
'en-ky': 'English',
'en-cx': 'English',
'en-cc': 'English',
'en-ck': 'English',
'en-cy': 'English',
'en-dk': 'English',
'en-dm': 'English',
'en-er': 'English',
'en-150': 'English',
'en-fk': 'English',
'en-fj': 'English',
'en-fi': 'English',
'en-gm': 'English',
'en-de': 'English',
'en-gh': 'English',
'en-gi': 'English',
'en-gd': 'English',
'en-gu': 'English',
'en-gg': 'English',
'en-gy': 'English',
'en-hk': 'English',
'en-in': 'English',
'en-id': 'English',
'en-ie': 'English',
'en-im': 'English',
'en-il': 'English',
'en-jm': 'English',
'en-je': 'English',
'en-ke': 'English',
'en-ki': 'English',
'en-ls': 'English',
'en-lr': 'English',
'en-mo': 'English',
'en-mg': 'English',
'en-mw': 'English',
'en-my': 'English',
'en-mt': 'English',
'en-mh': 'English',
'en-mu': 'English',
'en-fm': 'English',
'en-ms': 'English',
'en-na': 'English',
'en-nr': 'English',
'en-nl': 'English',
'en-nz': 'English',
'en-ng': 'English',
'en-nu': 'English',
'en-nf': 'English',
'en-mp': 'English',
'en-pk': 'English',
'en-pw': 'English',
'en-pg': 'English',
'en-ph': 'English',
'en-pn': 'English',
'en-pr': 'English',
'en-rw': 'English',
'en-kn': 'English',
'en-lc': 'English',
'en-vc': 'English',
'en-ws': 'English',
'en-sc': 'English',
'en-sl': 'English',
'en-sg': 'English',
'en-sx': 'English',
'en-si': 'English',
'en-sb': 'English',
'en-za': 'English',
'en-ss': 'English',
'en-sh': 'English',
'en-sd': 'English',
'en-sz': 'English',
'en-se': 'English',
'en-ch': 'English',
'en-tz': 'English',
'en-tk': 'English',
'en-to': 'English',
'en-tt': 'English',
'en-tc': 'English',
'en-tv': 'English',
'en-um': 'English',
'en-vi': 'English',
'en-ug': 'English',
'en-gb': 'English',
'en-us': 'English',
'en-vu': 'English',
'en-001': 'English',
'en-zm': 'English',
'en-zw': 'English',
'et': 'Estonian',
'et-ee': 'Estonian',
'fi': 'Finnish',
'fi-fi': 'Finnish',
'fr': 'French',
'fr-be': 'French',
'fr-cm': 'French',
'fr-ca': 'French',
'fr-029': 'French',
'fr-ci': 'French',
'fr-fr': 'French',
'fr-ht': 'French',
'fr-lu': 'French',
'fr-ml': 'French',
'fr-mc': 'French',
'fr-ma': 'French',
'fr-re': 'French',
'fr-sn': 'French',
'fr-ch': 'French',
'fr-cd': 'French',
'gl': 'Galician',
'gl-es': 'Galician',
'de': 'German',
'de-at': 'German',
'de-de': 'German',
'de-li': 'German',
'de-lu': 'German',
'de-ch': 'German',
'el': 'Greek',
'el-gr': 'Greek',
'gu': 'Gujarati',
'gu-in': 'Gujarati',
'he': 'Hebrew',
'he-il': 'Hebrew',
'hi': 'Hindi',
'hi-in': 'Hindi',
'hu': 'Hungarian',
'hu-hu': 'Hungarian',
'is': 'Icelandic',
'is-is': 'Icelandic',
'id': 'Indonesian',
'id-id': 'Indonesian',
'ga': 'Irish',
'ga-ie': 'Irish',
'it': 'Italian',
'it-it': 'Italian',
'it-ch': 'Italian',
'ja': 'Japanese',
'ja-jp': 'Japanese',
'kn': 'Kannada',
'kn-in': 'Kannada',
'ko': 'Korean',
'ko-kr': 'Korean',
'lv': 'Latvian',
'lv-lv': 'Latvian',
'lt': 'Lithuanian',
'lt-lt': 'Lithuanian',
'ml': 'Malayalam',
'ml-in': 'Malayalam',
'ms': 'Malay',
'ms-bn': 'Malay',
'ms-my': 'Malay',
'no': 'Norwegian',
'nb': 'Norwegian',
'nb-no': 'Norwegian',
'nn': 'Norwegian',
'nn-no': 'Norwegian',
'pl': 'Polish',
'pl-pl': 'Polish',
'pt': 'Portuguese',
'pt-br': 'Portuguese',
'pt-pt': 'Portuguese',
'pa': 'Punjabi',
'pa-arab': 'Punjabi',
'pa-in': 'Punjabi',
'pa-arab-pk': 'Punjabi',
'ro': 'Romanian',
'ro-md': 'Romanian',
'ro-ro': 'Romanian',
'ru': 'Russian',
'ru-md': 'Russian',
'ru-ru': 'Russian',
'sr': 'Serbian_Cyrillic',
'sr-cyrl': 'Serbian_Cyrillic',
'sr-cyrl-ba': 'Serbian_Cyrillic',
'sr-cyrl-me': 'Serbian_Cyrillic',
'sr-cyrl-rs': 'Serbian_Cyrillic',
'sr-latn': 'Serbian_Latin',
'sr-latn-ba': 'Serbian_Latin',
'sr-latn-me': 'Serbian_Latin',
'sr-latn-rs': 'Serbian_Latin',
'sk': 'Slovak',
'sk-sk': 'Slovak',
'sl': 'Slovenian',
'sl-si': 'Slovenian',
'es': 'Spanish',
'es-ar': 'Spanish',
'es-bo': 'Spanish',
'es-cl': 'Spanish',
'es-co': 'Spanish',
'es-cr': 'Spanish',
'es-cu': 'Spanish',
'es-do': 'Spanish',
'es-ec': 'Spanish',
'es-sv': 'Spanish',
'es-gt': 'Spanish',
'es-hn': 'Spanish',
'es-419': 'Spanish',
'es-mx': 'Spanish',
'es-ni': 'Spanish',
'es-pa': 'Spanish',
'es-py': 'Spanish',
'es-pe': 'Spanish',
'es-pr': 'Spanish',
'es-es': 'Spanish',
'es-us': 'Spanish',
'es-uy': 'Spanish',
'es-ve': 'Spanish',
'sv': 'Swedish',
'sv-fi': 'Swedish',
'sv-se': 'Swedish',
'ta': 'Tamil',
'ta-in': 'Tamil',
'ta-lk': 'Tamil',
'te': 'Telugu',
'te-in': 'Telugu',
'th': 'Thai',
'th-th': 'Thai',
'tr': 'Turkish',
'tr-tr': 'Turkish',
'uk': 'Ukrainian',
'uk-ua': 'Ukrainian',
'ur': 'Urdu',
'ur-in': 'Urdu',
'ur-pk': 'Urdu',
'vi': 'Vietnamese',
'vi-vn': 'Vietnamese'
};

Просмотреть файл

@ -16,6 +16,7 @@ const LUOptions = require('./../parser/lu/luOptions')
const QnAOptions = require('./../parser/lu/qnaOptions')
const luParser = require('./../parser/lufile/luParser')
const LUSectionTypes = require('./../parser/utils/enums/lusectiontypes')
const localeToQnALanguageMap = require('./../parser/utils/enums/localeToQnALanguageMap')
const globby = require('globby')
/* tslint:disable:prefer-for-of no-unused*/
@ -285,29 +286,42 @@ export function parseJSON(input: string, appType: string) {
}
}
export function getCultureFromPath(file: string): string | null {
export function getLuisCultureFromPath(file: string): string | null {
let fn = path.basename(file, path.extname(file))
let lang = path.extname(fn).substring(1)
switch (lang.toLowerCase()) {
case 'en-us':
case 'zh-cn':
case 'nl-nl':
case 'fr-fr':
case 'fr-ca':
case 'de-de':
case 'it-it':
case 'ja-jp':
case 'ko-kr':
case 'pt-br':
case 'es-es':
case 'es-mx':
case 'tr-tr':
return lang
default:
return null
case 'en-us':
case 'ar-ar':
case 'zh-cn':
case 'nl-nl':
case 'fr-fr':
case 'fr-ca':
case 'de-de':
case 'gu-in':
case 'hi-in':
case 'it-it':
case 'ja-jp':
case 'ko-kr':
case 'mr-in':
case 'pt-br':
case 'es-es':
case 'es-mx':
case 'ta-in':
case 'te-in':
case 'tr-tr':
return lang
default:
return null
}
}
export function getQnACultureFromPath(file: string): string | null {
let fn = path.basename(file, path.extname(file))
let lang = path.extname(fn).substring(1)
return localeToQnALanguageMap[lang] ? lang : null
}
export function isFileSectionEmpty(content: any): boolean {
if (content === undefined) return true

4
packages/lu/test/fixtures/testcases/locale-test.hr-hr.qna поставляемый Normal file
Просмотреть файл

@ -0,0 +1,4 @@
#? pozdrav
```
zdravo
```

Просмотреть файл

@ -8,7 +8,7 @@ const qnaObject = require('../../../src/parser/lu/qna')
const qnaOptions = require('../../../src/parser/lu/qnaOptions')
const txtfile = require('../../../src/parser/lufile/read-text-file');
const rootDir = path.join(__dirname, './../../fixtures/testcases/import-resolver/qna-import-resolver')
const rootDir = path.join(__dirname, './../../fixtures/testcases/')
describe('builder: importUrlOrFileReference function return lu content from file sucessfully', () => {
before(function () {
@ -439,7 +439,7 @@ describe('builder: loadContents function can resolve import files with customize
const builder = new Builder(() => { })
const result = await builder.loadContents(
[`${path.join(rootDir, "common.en-us.qna")}`], {
[`${path.join(rootDir, "import-resolver/qna-import-resolver/common.en-us.qna")}`], {
culture: 'en-us',
importResolver: importResolver
})
@ -450,6 +450,26 @@ describe('builder: loadContents function can resolve import files with customize
})
})
describe('builder: loadContents function can handle locale successfully', () => {
it('should load supported locale sucessfully', async () => {
const builder = new Builder(() => { })
const result = await builder.loadContents([`${path.join(rootDir, "locale-test.hr-hr.qna")}`], {})
assert.equal(result.length, 1)
assert.equal(result[0].language, 'hr-hr')
})
it('should throw exception for unsupported locale', async () => {
const builder = new Builder(() => { })
try {
await builder.loadContents([`${path.join(rootDir, "locale-test.ab-ab.qna")}`], {})
assert.fail('Exception is not thrown.')
} catch (error) {
assert.equal(error.text, 'Culture is not set or unsupported by qnamaker service.')
}
})
})
describe('builder: build function can catch relative endpoint exception successfully', () => {
it('should throw exception for non absolute endpoint', async () => {
const builder = new Builder(() => { })

Просмотреть файл

@ -923,3 +923,27 @@ describe('qnamaker:build throw qnamaker build failed exception successfully', ()
expect(ctx.stderr).to.contain('Qnamaker build failed: Access denied due to invalid subscription key.')
})
})
describe('qnamaker:build throw locale(language) not supported exception successfully', () => {
before(function () {
nock('https://westus.api.cognitive.microsoft.com')
.get(uri => uri.includes('qnamaker'))
.reply(200, {
knowledgebases:
[{
name: 'test(development).en-us.qna',
id: 'f8c64e2a-1111-3a09-8f78-39d7adc76ec5',
hostName: 'https://myqnamakerbot.azurewebsites.net'
}]
})
})
test
.stdout()
.stderr()
.command(['qnamaker:build', '--in', './test/fixtures/testcases/qnabuild/locale/unsupported-locale.ab-ab.qna', '--subscriptionKey', uuidv1(), '--botName', 'test', '--log', '--suffix', 'development', '--defaultCulture', 'ab-ab'])
.exit(1)
.it('should throw locale not supported exception successfully', ctx => {
expect(ctx.stderr).to.contain('Qnamaker build failed: ab-ab is not supported in current qnamaker service.')
})
})

Просмотреть файл

@ -0,0 +1,4 @@
#? greeting
```
hello
```