diff --git a/packages/lu/src/parser/cross-train/crossTrainer.js b/packages/lu/src/parser/cross-train/crossTrainer.js index 083495f5..acde2ce5 100644 --- a/packages/lu/src/parser/cross-train/crossTrainer.js +++ b/packages/lu/src/parser/cross-train/crossTrainer.js @@ -355,7 +355,7 @@ const qnaCrossTrain = function (qnaFileIdToResourceMap, luFileIdToResourceMap, i try { for (const qnaObjectId of Array.from(qnaFileIdToResourceMap.keys())) { let fileName = path.basename(qnaObjectId, path.extname(qnaObjectId)) - const culture = fileHelper.getCultureFromPath(qnaObjectId) + const culture = fileHelper.getQnACultureFromPath(qnaObjectId) fileName = culture ? fileName.substring(0, fileName.length - culture.length - 1) : fileName const luObjectId = Array.from(luFileIdToResourceMap.keys()).find(x => x.toLowerCase() === qnaObjectId.toLowerCase()) diff --git a/packages/lu/src/parser/lubuild/builder.ts b/packages/lu/src/parser/lubuild/builder.ts index eee1a585..6149ca3c 100644 --- a/packages/lu/src/parser/lubuild/builder.ts +++ b/packages/lu/src/parser/lubuild/builder.ts @@ -43,7 +43,7 @@ export class Builder { let fileCulture: string let fileName: string - let cultureFromPath = fileHelper.getCultureFromPath(file) + let cultureFromPath = fileHelper.getLuisCultureFromPath(file) if (cultureFromPath) { fileCulture = cultureFromPath let fileNameWithCulture = path.basename(file, path.extname(file)) diff --git a/packages/lu/src/parser/qnabuild/builder.ts b/packages/lu/src/parser/qnabuild/builder.ts index d16102d9..f57aeeec 100644 --- a/packages/lu/src/parser/qnabuild/builder.ts +++ b/packages/lu/src/parser/qnabuild/builder.ts @@ -20,6 +20,7 @@ const Content = require('./../lu/qna') const KB = require('./../qna/qnamaker/kb') const recognizerType = require('./../utils/enums/recognizertypes') const qnaOptions = require('./../lu/qnaOptions') +const localeToQnALanguageMap = require('./../utils/enums/localeToQnALanguageMap') export class Builder { private readonly handler: (input: string) => any @@ -38,7 +39,7 @@ export class Builder { for (const file of files) { let fileCulture: string let fileName: string - let cultureFromPath = fileHelper.getCultureFromPath(file) + let cultureFromPath = fileHelper.getQnACultureFromPath(file) if (cultureFromPath) { fileCulture = cultureFromPath let fileNameWithCulture = path.basename(file, path.extname(file)) @@ -48,6 +49,10 @@ export class Builder { fileName = path.basename(file, path.extname(file)) } + if (!fileCulture) { + throw (new exception(retCode.errorCode.INVALID_INPUT_FILE, 'Culture is not set or unsupported by qnamaker service.')) + } + let fileContent = '' let qnaFiles = await fileHelper.getLuObjects(undefined, file, true, fileExtEnum.QnAFile) @@ -209,7 +214,15 @@ export class Builder { // set kb name if (!currentQna.kb.name) currentQna.kb.name = `${botName}(${suffix}).${qnamakerContent.language}.qna` + // set kb locale and map it to language that qna service can recognize + let locale = qnamakerContent.language + let language = localeToQnALanguageMap[locale] + if (!language) { + throw new Error(`${locale} is not supported in current qnamaker service.`) + } + let currentKB = currentQna.kb + currentKB.language = language let currentAlt = currentQna.alterations let hostName = '' let kbId = '' @@ -446,6 +459,7 @@ export class Builder { await delay(delayDuration) const emptyKBJson = { name: currentKB.name, + language: currentKB.language, qnaList: [], urls: [], files: [] diff --git a/packages/lu/src/parser/utils/enums/localeToQnALanguageMap.js b/packages/lu/src/parser/utils/enums/localeToQnALanguageMap.js new file mode 100644 index 00000000..b4e2e26b --- /dev/null +++ b/packages/lu/src/parser/utils/enums/localeToQnALanguageMap.js @@ -0,0 +1,295 @@ +/** + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. + */ +module.exports = { + 'ar': 'Arabic', + 'ar-dz': 'Arabic', + 'ar-bh': 'Arabic', + 'ar-eg': 'Arabic', + 'ar-iq': 'Arabic', + 'ar-jo': 'Arabic', + 'ar-kw': 'Arabic', + 'ar-lb': 'Arabic', + 'ar-ly': 'Arabic', + 'ar-ma': 'Arabic', + 'ar-om': 'Arabic', + 'ar-qa': 'Arabic', + 'ar-sa': 'Arabic', + 'ar-sy': 'Arabic', + 'ar-tn': 'Arabic', + 'ar-ae': 'Arabic', + 'ar-ye': 'Arabic', + 'hy': 'Armenian', + 'hy-am': 'Armenian', + 'bn': 'Bangla', + 'bn-bd': 'Bangla', + 'bn-in': 'Bangla', + 'eu': 'Basque', + 'eu-es': 'Basque', + 'bg': 'Bulgarian', + 'bg-bg': 'Bulgarian', + 'ca': 'Catalan', + 'ca-es': 'Catalan', + 'zh': 'Chinese_Simplified', + 'zh-hans': 'Chinese_Simplified', + 'zh-cn': 'Chinese_Simplified', + 'zh-sg': 'Chinese_Simplified', + 'zh-hant': 'Chinese_Traditional', + 'zh-hk': 'Chinese_Traditional', + 'zh-mo': 'Chinese_Traditional', + 'zh-tw': 'Chinese_Traditional', + 'hr': 'Croatian', + 'hr-ba': 'Croatian', + 'hr-hr': 'Croatian', + 'cs': 'Czech', + 'cs-cz': 'Czech', + 'da': 'Danish', + 'da-dk': 'Danish', + 'nl': 'Dutch', + 'nl-be': 'Dutch', + 'nl-nl': 'Dutch', + 'en': 'English', + 'en-as': 'English', + 'en-ai': 'English', + 'en-ag': 'English', + 'en-au': 'English', + 'en-at': 'English', + 'en-bs': 'English', + 'en-bb': 'English', + 'en-be': 'English', + 'en-bz': 'English', + 'en-bm': 'English', + 'en-bw': 'English', + 'en-io': 'English', + 'en-vg': 'English', + 'en-bi': 'English', + 'en-cm': 'English', + 'en-ca': 'English', + 'en-029': 'English', + 'en-ky': 'English', + 'en-cx': 'English', + 'en-cc': 'English', + 'en-ck': 'English', + 'en-cy': 'English', + 'en-dk': 'English', + 'en-dm': 'English', + 'en-er': 'English', + 'en-150': 'English', + 'en-fk': 'English', + 'en-fj': 'English', + 'en-fi': 'English', + 'en-gm': 'English', + 'en-de': 'English', + 'en-gh': 'English', + 'en-gi': 'English', + 'en-gd': 'English', + 'en-gu': 'English', + 'en-gg': 'English', + 'en-gy': 'English', + 'en-hk': 'English', + 'en-in': 'English', + 'en-id': 'English', + 'en-ie': 'English', + 'en-im': 'English', + 'en-il': 'English', + 'en-jm': 'English', + 'en-je': 'English', + 'en-ke': 'English', + 'en-ki': 'English', + 'en-ls': 'English', + 'en-lr': 'English', + 'en-mo': 'English', + 'en-mg': 'English', + 'en-mw': 'English', + 'en-my': 'English', + 'en-mt': 'English', + 'en-mh': 'English', + 'en-mu': 'English', + 'en-fm': 'English', + 'en-ms': 'English', + 'en-na': 'English', + 'en-nr': 'English', + 'en-nl': 'English', + 'en-nz': 'English', + 'en-ng': 'English', + 'en-nu': 'English', + 'en-nf': 'English', + 'en-mp': 'English', + 'en-pk': 'English', + 'en-pw': 'English', + 'en-pg': 'English', + 'en-ph': 'English', + 'en-pn': 'English', + 'en-pr': 'English', + 'en-rw': 'English', + 'en-kn': 'English', + 'en-lc': 'English', + 'en-vc': 'English', + 'en-ws': 'English', + 'en-sc': 'English', + 'en-sl': 'English', + 'en-sg': 'English', + 'en-sx': 'English', + 'en-si': 'English', + 'en-sb': 'English', + 'en-za': 'English', + 'en-ss': 'English', + 'en-sh': 'English', + 'en-sd': 'English', + 'en-sz': 'English', + 'en-se': 'English', + 'en-ch': 'English', + 'en-tz': 'English', + 'en-tk': 'English', + 'en-to': 'English', + 'en-tt': 'English', + 'en-tc': 'English', + 'en-tv': 'English', + 'en-um': 'English', + 'en-vi': 'English', + 'en-ug': 'English', + 'en-gb': 'English', + 'en-us': 'English', + 'en-vu': 'English', + 'en-001': 'English', + 'en-zm': 'English', + 'en-zw': 'English', + 'et': 'Estonian', + 'et-ee': 'Estonian', + 'fi': 'Finnish', + 'fi-fi': 'Finnish', + 'fr': 'French', + 'fr-be': 'French', + 'fr-cm': 'French', + 'fr-ca': 'French', + 'fr-029': 'French', + 'fr-ci': 'French', + 'fr-fr': 'French', + 'fr-ht': 'French', + 'fr-lu': 'French', + 'fr-ml': 'French', + 'fr-mc': 'French', + 'fr-ma': 'French', + 'fr-re': 'French', + 'fr-sn': 'French', + 'fr-ch': 'French', + 'fr-cd': 'French', + 'gl': 'Galician', + 'gl-es': 'Galician', + 'de': 'German', + 'de-at': 'German', + 'de-de': 'German', + 'de-li': 'German', + 'de-lu': 'German', + 'de-ch': 'German', + 'el': 'Greek', + 'el-gr': 'Greek', + 'gu': 'Gujarati', + 'gu-in': 'Gujarati', + 'he': 'Hebrew', + 'he-il': 'Hebrew', + 'hi': 'Hindi', + 'hi-in': 'Hindi', + 'hu': 'Hungarian', + 'hu-hu': 'Hungarian', + 'is': 'Icelandic', + 'is-is': 'Icelandic', + 'id': 'Indonesian', + 'id-id': 'Indonesian', + 'ga': 'Irish', + 'ga-ie': 'Irish', + 'it': 'Italian', + 'it-it': 'Italian', + 'it-ch': 'Italian', + 'ja': 'Japanese', + 'ja-jp': 'Japanese', + 'kn': 'Kannada', + 'kn-in': 'Kannada', + 'ko': 'Korean', + 'ko-kr': 'Korean', + 'lv': 'Latvian', + 'lv-lv': 'Latvian', + 'lt': 'Lithuanian', + 'lt-lt': 'Lithuanian', + 'ml': 'Malayalam', + 'ml-in': 'Malayalam', + 'ms': 'Malay', + 'ms-bn': 'Malay', + 'ms-my': 'Malay', + 'no': 'Norwegian', + 'nb': 'Norwegian', + 'nb-no': 'Norwegian', + 'nn': 'Norwegian', + 'nn-no': 'Norwegian', + 'pl': 'Polish', + 'pl-pl': 'Polish', + 'pt': 'Portuguese', + 'pt-br': 'Portuguese', + 'pt-pt': 'Portuguese', + 'pa': 'Punjabi', + 'pa-arab': 'Punjabi', + 'pa-in': 'Punjabi', + 'pa-arab-pk': 'Punjabi', + 'ro': 'Romanian', + 'ro-md': 'Romanian', + 'ro-ro': 'Romanian', + 'ru': 'Russian', + 'ru-md': 'Russian', + 'ru-ru': 'Russian', + 'sr': 'Serbian_Cyrillic', + 'sr-cyrl': 'Serbian_Cyrillic', + 'sr-cyrl-ba': 'Serbian_Cyrillic', + 'sr-cyrl-me': 'Serbian_Cyrillic', + 'sr-cyrl-rs': 'Serbian_Cyrillic', + 'sr-latn': 'Serbian_Latin', + 'sr-latn-ba': 'Serbian_Latin', + 'sr-latn-me': 'Serbian_Latin', + 'sr-latn-rs': 'Serbian_Latin', + 'sk': 'Slovak', + 'sk-sk': 'Slovak', + 'sl': 'Slovenian', + 'sl-si': 'Slovenian', + 'es': 'Spanish', + 'es-ar': 'Spanish', + 'es-bo': 'Spanish', + 'es-cl': 'Spanish', + 'es-co': 'Spanish', + 'es-cr': 'Spanish', + 'es-cu': 'Spanish', + 'es-do': 'Spanish', + 'es-ec': 'Spanish', + 'es-sv': 'Spanish', + 'es-gt': 'Spanish', + 'es-hn': 'Spanish', + 'es-419': 'Spanish', + 'es-mx': 'Spanish', + 'es-ni': 'Spanish', + 'es-pa': 'Spanish', + 'es-py': 'Spanish', + 'es-pe': 'Spanish', + 'es-pr': 'Spanish', + 'es-es': 'Spanish', + 'es-us': 'Spanish', + 'es-uy': 'Spanish', + 'es-ve': 'Spanish', + 'sv': 'Swedish', + 'sv-fi': 'Swedish', + 'sv-se': 'Swedish', + 'ta': 'Tamil', + 'ta-in': 'Tamil', + 'ta-lk': 'Tamil', + 'te': 'Telugu', + 'te-in': 'Telugu', + 'th': 'Thai', + 'th-th': 'Thai', + 'tr': 'Turkish', + 'tr-tr': 'Turkish', + 'uk': 'Ukrainian', + 'uk-ua': 'Ukrainian', + 'ur': 'Urdu', + 'ur-in': 'Urdu', + 'ur-pk': 'Urdu', + 'vi': 'Vietnamese', + 'vi-vn': 'Vietnamese' +}; \ No newline at end of file diff --git a/packages/lu/src/utils/filehelper.ts b/packages/lu/src/utils/filehelper.ts index 2a1e940e..b4798ae8 100644 --- a/packages/lu/src/utils/filehelper.ts +++ b/packages/lu/src/utils/filehelper.ts @@ -16,6 +16,7 @@ const LUOptions = require('./../parser/lu/luOptions') const QnAOptions = require('./../parser/lu/qnaOptions') const luParser = require('./../parser/lufile/luParser') const LUSectionTypes = require('./../parser/utils/enums/lusectiontypes') +const localeToQnALanguageMap = require('./../parser/utils/enums/localeToQnALanguageMap') const globby = require('globby') /* tslint:disable:prefer-for-of no-unused*/ @@ -285,29 +286,42 @@ export function parseJSON(input: string, appType: string) { } } -export function getCultureFromPath(file: string): string | null { +export function getLuisCultureFromPath(file: string): string | null { let fn = path.basename(file, path.extname(file)) let lang = path.extname(fn).substring(1) switch (lang.toLowerCase()) { - case 'en-us': - case 'zh-cn': - case 'nl-nl': - case 'fr-fr': - case 'fr-ca': - case 'de-de': - case 'it-it': - case 'ja-jp': - case 'ko-kr': - case 'pt-br': - case 'es-es': - case 'es-mx': - case 'tr-tr': - return lang - default: - return null + case 'en-us': + case 'ar-ar': + case 'zh-cn': + case 'nl-nl': + case 'fr-fr': + case 'fr-ca': + case 'de-de': + case 'gu-in': + case 'hi-in': + case 'it-it': + case 'ja-jp': + case 'ko-kr': + case 'mr-in': + case 'pt-br': + case 'es-es': + case 'es-mx': + case 'ta-in': + case 'te-in': + case 'tr-tr': + return lang + default: + return null } } +export function getQnACultureFromPath(file: string): string | null { + let fn = path.basename(file, path.extname(file)) + let lang = path.extname(fn).substring(1) + + return localeToQnALanguageMap[lang] ? lang : null +} + export function isFileSectionEmpty(content: any): boolean { if (content === undefined) return true diff --git a/packages/lu/test/fixtures/testcases/locale-test.hr-hr.qna b/packages/lu/test/fixtures/testcases/locale-test.hr-hr.qna new file mode 100644 index 00000000..99a3e35d --- /dev/null +++ b/packages/lu/test/fixtures/testcases/locale-test.hr-hr.qna @@ -0,0 +1,4 @@ +#? pozdrav +``` +zdravo +``` \ No newline at end of file diff --git a/packages/lu/test/parser/qnabuild/qnabuild.test.js b/packages/lu/test/parser/qnabuild/qnabuild.test.js index bac157fd..198389e4 100644 --- a/packages/lu/test/parser/qnabuild/qnabuild.test.js +++ b/packages/lu/test/parser/qnabuild/qnabuild.test.js @@ -8,7 +8,7 @@ const qnaObject = require('../../../src/parser/lu/qna') const qnaOptions = require('../../../src/parser/lu/qnaOptions') const txtfile = require('../../../src/parser/lufile/read-text-file'); -const rootDir = path.join(__dirname, './../../fixtures/testcases/import-resolver/qna-import-resolver') +const rootDir = path.join(__dirname, './../../fixtures/testcases/') describe('builder: importUrlOrFileReference function return lu content from file sucessfully', () => { before(function () { @@ -439,7 +439,7 @@ describe('builder: loadContents function can resolve import files with customize const builder = new Builder(() => { }) const result = await builder.loadContents( - [`${path.join(rootDir, "common.en-us.qna")}`], { + [`${path.join(rootDir, "import-resolver/qna-import-resolver/common.en-us.qna")}`], { culture: 'en-us', importResolver: importResolver }) @@ -450,6 +450,26 @@ describe('builder: loadContents function can resolve import files with customize }) }) +describe('builder: loadContents function can handle locale successfully', () => { + it('should load supported locale sucessfully', async () => { + const builder = new Builder(() => { }) + const result = await builder.loadContents([`${path.join(rootDir, "locale-test.hr-hr.qna")}`], {}) + + assert.equal(result.length, 1) + assert.equal(result[0].language, 'hr-hr') + }) + + it('should throw exception for unsupported locale', async () => { + const builder = new Builder(() => { }) + try { + await builder.loadContents([`${path.join(rootDir, "locale-test.ab-ab.qna")}`], {}) + assert.fail('Exception is not thrown.') + } catch (error) { + assert.equal(error.text, 'Culture is not set or unsupported by qnamaker service.') + } + }) +}) + describe('builder: build function can catch relative endpoint exception successfully', () => { it('should throw exception for non absolute endpoint', async () => { const builder = new Builder(() => { }) diff --git a/packages/qnamaker/test/commands/qnamaker/build.test.ts b/packages/qnamaker/test/commands/qnamaker/build.test.ts index 96f5999c..69ec35d1 100644 --- a/packages/qnamaker/test/commands/qnamaker/build.test.ts +++ b/packages/qnamaker/test/commands/qnamaker/build.test.ts @@ -923,3 +923,27 @@ describe('qnamaker:build throw qnamaker build failed exception successfully', () expect(ctx.stderr).to.contain('Qnamaker build failed: Access denied due to invalid subscription key.') }) }) + +describe('qnamaker:build throw locale(language) not supported exception successfully', () => { + before(function () { + nock('https://westus.api.cognitive.microsoft.com') + .get(uri => uri.includes('qnamaker')) + .reply(200, { + knowledgebases: + [{ + name: 'test(development).en-us.qna', + id: 'f8c64e2a-1111-3a09-8f78-39d7adc76ec5', + hostName: 'https://myqnamakerbot.azurewebsites.net' + }] + }) + }) + + test + .stdout() + .stderr() + .command(['qnamaker:build', '--in', './test/fixtures/testcases/qnabuild/locale/unsupported-locale.ab-ab.qna', '--subscriptionKey', uuidv1(), '--botName', 'test', '--log', '--suffix', 'development', '--defaultCulture', 'ab-ab']) + .exit(1) + .it('should throw locale not supported exception successfully', ctx => { + expect(ctx.stderr).to.contain('Qnamaker build failed: ab-ab is not supported in current qnamaker service.') + }) +}) \ No newline at end of file diff --git a/packages/qnamaker/test/fixtures/testcases/qnabuild/locale/unsupported-locale.ab-ab.qna b/packages/qnamaker/test/fixtures/testcases/qnabuild/locale/unsupported-locale.ab-ab.qna new file mode 100644 index 00000000..2e8679a8 --- /dev/null +++ b/packages/qnamaker/test/fixtures/testcases/qnabuild/locale/unsupported-locale.ab-ab.qna @@ -0,0 +1,4 @@ +#? greeting +``` +hello +``` \ No newline at end of file