add getWaveFormat API to AudioStreamFormat to support non-PCM wav for… (#460)
* add getWaveFormat API to AudioStreamFormat to support non-PCM wav formats, include test and test files * extend gitattributes for mulaw, alaw * Use CTS specific key for CTS tests * use different CTS key * key change again
This commit is contained in:
Родитель
0757c64c89
Коммит
a552ae3427
|
@ -18,3 +18,5 @@ LICENSE text
|
||||||
|
|
||||||
# Binary extensions:
|
# Binary extensions:
|
||||||
*.wav binary
|
*.wav binary
|
||||||
|
*.alaw binary
|
||||||
|
*.mulaw binary
|
||||||
|
|
|
@ -58,6 +58,8 @@ jobs:
|
||||||
BotRegion:$(BotRegionJS) ^
|
BotRegion:$(BotRegionJS) ^
|
||||||
SpeakerIDSubscriptionKey:$(SpeakerRecognition-WestUS-Key) ^
|
SpeakerIDSubscriptionKey:$(SpeakerRecognition-WestUS-Key) ^
|
||||||
SpeakerIDRegion:westus ^
|
SpeakerIDRegion:westus ^
|
||||||
|
ConversationTranscriptionKey:$(ConverstationTranscriptionKeyWestUSOnline) ^
|
||||||
|
ConversationTranscriptionRegion:westus ^
|
||||||
CustomVoiceSubscriptionKey:$(speech-ne-s0-key1) ^
|
CustomVoiceSubscriptionKey:$(speech-ne-s0-key1) ^
|
||||||
CustomVoiceRegion:northeurope
|
CustomVoiceRegion:northeurope
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,22 @@ export abstract class AudioStreamFormat {
|
||||||
return AudioStreamFormatImpl.getDefaultInputFormat();
|
return AudioStreamFormatImpl.getDefaultInputFormat();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an audio stream format object with the specified format characteristics.
|
||||||
|
* @member AudioStreamFormat.getWaveFormat
|
||||||
|
* @function
|
||||||
|
* @public
|
||||||
|
* @param {number} samplesPerSecond - Sample rate, in samples per second (Hertz).
|
||||||
|
* @param {number} bitsPerSample - Bits per sample, typically 16.
|
||||||
|
* @param {number} channels - Number of channels in the waveform-audio data. Monaural data
|
||||||
|
* uses one channel and stereo data uses two channels.
|
||||||
|
* @param {AudioFormatTag} format - Audio format (PCM, alaw or mulaw).
|
||||||
|
* @returns {AudioStreamFormat} The audio stream format being created.
|
||||||
|
*/
|
||||||
|
public static getWaveFormat(samplesPerSecond: number, bitsPerSample: number, channels: number, format: AudioFormatTag): AudioStreamFormat {
|
||||||
|
return new AudioStreamFormatImpl(samplesPerSecond, bitsPerSample, channels, format);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an audio stream format object with the specified pcm waveformat characteristics.
|
* Creates an audio stream format object with the specified pcm waveformat characteristics.
|
||||||
* @member AudioStreamFormat.getWaveFormatPCM
|
* @member AudioStreamFormat.getWaveFormatPCM
|
||||||
|
@ -72,6 +88,8 @@ export class AudioStreamFormatImpl extends AudioStreamFormat {
|
||||||
*/
|
*/
|
||||||
public constructor(samplesPerSec: number = 16000, bitsPerSample: number = 16, channels: number = 1, format: AudioFormatTag = AudioFormatTag.PCM) {
|
public constructor(samplesPerSec: number = 16000, bitsPerSample: number = 16, channels: number = 1, format: AudioFormatTag = AudioFormatTag.PCM) {
|
||||||
super();
|
super();
|
||||||
|
|
||||||
|
let isWavFormat: boolean = true;
|
||||||
/* 1 for PCM; 6 for alaw; 7 for mulaw */
|
/* 1 for PCM; 6 for alaw; 7 for mulaw */
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case AudioFormatTag.PCM:
|
case AudioFormatTag.PCM:
|
||||||
|
@ -84,6 +102,7 @@ export class AudioStreamFormatImpl extends AudioStreamFormat {
|
||||||
this.formatTag = 7;
|
this.formatTag = 7;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
isWavFormat = false;
|
||||||
}
|
}
|
||||||
this.bitsPerSample = bitsPerSample;
|
this.bitsPerSample = bitsPerSample;
|
||||||
this.samplesPerSec = samplesPerSec;
|
this.samplesPerSec = samplesPerSec;
|
||||||
|
@ -91,35 +110,37 @@ export class AudioStreamFormatImpl extends AudioStreamFormat {
|
||||||
this.avgBytesPerSec = this.samplesPerSec * this.channels * (this.bitsPerSample / 8);
|
this.avgBytesPerSec = this.samplesPerSec * this.channels * (this.bitsPerSample / 8);
|
||||||
this.blockAlign = this.channels * Math.max(this.bitsPerSample, 8);
|
this.blockAlign = this.channels * Math.max(this.bitsPerSample, 8);
|
||||||
|
|
||||||
this.privHeader = new ArrayBuffer(44);
|
if (isWavFormat) {
|
||||||
|
this.privHeader = new ArrayBuffer(44);
|
||||||
|
|
||||||
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/DataView
|
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/DataView
|
||||||
const view = new DataView(this.privHeader);
|
const view = new DataView(this.privHeader);
|
||||||
|
|
||||||
/* RIFF identifier */
|
/* RIFF identifier */
|
||||||
this.setString(view, 0, "RIFF");
|
this.setString(view, 0, "RIFF");
|
||||||
/* file length */
|
/* file length */
|
||||||
view.setUint32(4, 0, true);
|
view.setUint32(4, 0, true);
|
||||||
/* RIFF type & Format */
|
/* RIFF type & Format */
|
||||||
this.setString(view, 8, "WAVEfmt ");
|
this.setString(view, 8, "WAVEfmt ");
|
||||||
/* format chunk length */
|
/* format chunk length */
|
||||||
view.setUint32(16, 16, true);
|
view.setUint32(16, 16, true);
|
||||||
/* audio format */
|
/* audio format */
|
||||||
view.setUint16(20, this.formatTag, true);
|
view.setUint16(20, this.formatTag, true);
|
||||||
/* channel count */
|
/* channel count */
|
||||||
view.setUint16(22, this.channels, true);
|
view.setUint16(22, this.channels, true);
|
||||||
/* sample rate */
|
/* sample rate */
|
||||||
view.setUint32(24, this.samplesPerSec, true);
|
view.setUint32(24, this.samplesPerSec, true);
|
||||||
/* byte rate (sample rate * block align) */
|
/* byte rate (sample rate * block align) */
|
||||||
view.setUint32(28, this.avgBytesPerSec, true);
|
view.setUint32(28, this.avgBytesPerSec, true);
|
||||||
/* block align (channel count * bytes per sample) */
|
/* block align (channel count * bytes per sample) */
|
||||||
view.setUint16(32, this.channels * (this.bitsPerSample / 8), true);
|
view.setUint16(32, this.channels * (this.bitsPerSample / 8), true);
|
||||||
/* bits per sample */
|
/* bits per sample */
|
||||||
view.setUint16(34, this.bitsPerSample, true);
|
view.setUint16(34, this.bitsPerSample, true);
|
||||||
/* data chunk identifier */
|
/* data chunk identifier */
|
||||||
this.setString(view, 36, "data");
|
this.setString(view, 36, "data");
|
||||||
/* data chunk length */
|
/* data chunk length */
|
||||||
view.setUint32(40, 0, true);
|
view.setUint32(40, 0, true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
// Licensed under the MIT license.
|
// Licensed under the MIT license.
|
||||||
|
|
||||||
export { AudioConfig } from "./Audio/AudioConfig";
|
export { AudioConfig } from "./Audio/AudioConfig";
|
||||||
export { AudioStreamFormat } from "./Audio/AudioStreamFormat";
|
export { AudioStreamFormat, AudioFormatTag } from "./Audio/AudioStreamFormat";
|
||||||
export { AudioInputStream, PullAudioInputStream, PushAudioInputStream } from "./Audio/AudioInputStream";
|
export { AudioInputStream, PullAudioInputStream, PushAudioInputStream } from "./Audio/AudioInputStream";
|
||||||
export { AudioOutputStream, PullAudioOutputStream, PushAudioOutputStream} from "./Audio/AudioOutputStream";
|
export { AudioOutputStream, PullAudioOutputStream, PushAudioOutputStream} from "./Audio/AudioOutputStream";
|
||||||
export { CancellationReason } from "./CancellationReason";
|
export { CancellationReason } from "./CancellationReason";
|
||||||
|
|
|
@ -60,7 +60,7 @@ const CreateConversation: (speechConfig?: sdk.SpeechTranslationConfig) => Promis
|
||||||
};
|
};
|
||||||
|
|
||||||
const BuildSpeechConfig: () => sdk.SpeechTranslationConfig = (): sdk.SpeechTranslationConfig => {
|
const BuildSpeechConfig: () => sdk.SpeechTranslationConfig = (): sdk.SpeechTranslationConfig => {
|
||||||
const s: sdk.SpeechTranslationConfig = sdk.SpeechTranslationConfig.fromSubscription(Settings.SpeakerIDSubscriptionKey, Settings.SpeakerIDRegion);
|
const s: sdk.SpeechTranslationConfig = sdk.SpeechTranslationConfig.fromSubscription(Settings.ConversationTranscriptionKey, Settings.ConversationTranscriptionRegion);
|
||||||
expect(s).not.toBeUndefined();
|
expect(s).not.toBeUndefined();
|
||||||
return s;
|
return s;
|
||||||
};
|
};
|
||||||
|
|
|
@ -67,6 +67,8 @@ export class Settings {
|
||||||
public static WaveFile: string = Settings.InputDir + "whatstheweatherlike.wav";
|
public static WaveFile: string = Settings.InputDir + "whatstheweatherlike.wav";
|
||||||
public static WaveFile8ch: string = Settings.InputDir + "Speech016_30s_xmos_8ch.wav";
|
public static WaveFile8ch: string = Settings.InputDir + "Speech016_30s_xmos_8ch.wav";
|
||||||
public static WaveFile44k: string = Settings.InputDir + "whatstheweatherlike.44khz.wav";
|
public static WaveFile44k: string = Settings.InputDir + "whatstheweatherlike.44khz.wav";
|
||||||
|
public static WaveFileMulaw: string = Settings.InputDir + "whatstheweatherlike.mulaw";
|
||||||
|
public static WaveFileAlaw: string = Settings.InputDir + "whatstheweatherlike.alaw";
|
||||||
public static LongerWaveFile: string = Settings.InputDir + "StreamingEnrollment.wav";
|
public static LongerWaveFile: string = Settings.InputDir + "StreamingEnrollment.wav";
|
||||||
public static MonoChannelAlignedWaveFile: string = Settings.InputDir + "only-a-test.wav";
|
public static MonoChannelAlignedWaveFile: string = Settings.InputDir + "only-a-test.wav";
|
||||||
public static WaveFileLanguage: string = "en-US";
|
public static WaveFileLanguage: string = "en-US";
|
||||||
|
|
|
@ -1113,53 +1113,65 @@ describe.each([true])("Service based tests", (forceNodeWebSocket: boolean) => {
|
||||||
r.startContinuousRecognitionAsync();
|
r.startContinuousRecognitionAsync();
|
||||||
}, 15000);
|
}, 15000);
|
||||||
|
|
||||||
test("PushStream44K file", (done: jest.DoneCallback) => {
|
test("PushStream44K, muLaw, Alaw files", async (done: jest.DoneCallback) => {
|
||||||
// tslint:disable-next-line:no-console
|
// tslint:disable-next-line:no-console
|
||||||
console.info("Name: PushStream44K file");
|
console.info("Name: PushStream44K, muLaw, Alaw files");
|
||||||
const s: sdk.SpeechConfig = BuildSpeechConfig();
|
const s: sdk.SpeechConfig = BuildSpeechConfig();
|
||||||
objsToClose.push(s);
|
objsToClose.push(s);
|
||||||
|
|
||||||
const format: sdk.AudioStreamFormat = sdk.AudioStreamFormat.getWaveFormatPCM(44100, 16, 1);
|
let success: number = 0;
|
||||||
const f: ArrayBuffer = WaveFileAudioInput.LoadArrayFromFile(Settings.WaveFile44k);
|
|
||||||
const p: sdk.PushAudioInputStream = sdk.AudioInputStream.createPushStream(format);
|
|
||||||
const config: sdk.AudioConfig = sdk.AudioConfig.fromStreamInput(p);
|
|
||||||
|
|
||||||
p.write(f);
|
const formatTestFiles: { file: string, sampleRate: number, bitRate: number, channels: number, formatTag: sdk.AudioFormatTag }[] = [
|
||||||
p.close();
|
{ file: Settings.WaveFile44k, sampleRate: 44100, bitRate: 16, channels: 1, formatTag: sdk.AudioFormatTag.PCM },
|
||||||
|
{ file: Settings.WaveFileAlaw, sampleRate: 16000, bitRate: 16, channels: 1, formatTag: sdk.AudioFormatTag.ALaw },
|
||||||
|
{ file: Settings.WaveFileMulaw, sampleRate: 16000, bitRate: 16, channels: 1, formatTag: sdk.AudioFormatTag.MuLaw },
|
||||||
|
];
|
||||||
|
|
||||||
const r: sdk.SpeechRecognizer = new sdk.SpeechRecognizer(s, config);
|
for (const testFile of formatTestFiles) {
|
||||||
objsToClose.push(r);
|
const format: sdk.AudioStreamFormat = sdk.AudioStreamFormat.getWaveFormat(testFile.sampleRate, testFile.bitRate, testFile.channels, testFile.formatTag);
|
||||||
|
const f: ArrayBuffer = WaveFileAudioInput.LoadArrayFromFile(testFile.file);
|
||||||
|
const p: sdk.PushAudioInputStream = sdk.AudioInputStream.createPushStream(format);
|
||||||
|
const config: sdk.AudioConfig = sdk.AudioConfig.fromStreamInput(p);
|
||||||
|
|
||||||
expect(r).not.toBeUndefined();
|
p.write(f);
|
||||||
expect(r instanceof sdk.Recognizer);
|
p.close();
|
||||||
|
|
||||||
r.canceled = (o: sdk.Recognizer, e: sdk.SpeechRecognitionCanceledEventArgs): void => {
|
const r: sdk.SpeechRecognizer = new sdk.SpeechRecognizer(s, config);
|
||||||
try {
|
objsToClose.push(r);
|
||||||
expect(e.errorDetails).toBeUndefined();
|
|
||||||
} catch (error) {
|
|
||||||
done.fail(error);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
r.recognizeOnceAsync(
|
expect(r).not.toBeUndefined();
|
||||||
(p2: sdk.SpeechRecognitionResult) => {
|
expect(r instanceof sdk.Recognizer);
|
||||||
const res: sdk.SpeechRecognitionResult = p2;
|
|
||||||
|
r.canceled = (o: sdk.Recognizer, e: sdk.SpeechRecognitionCanceledEventArgs): void => {
|
||||||
try {
|
try {
|
||||||
expect(res).not.toBeUndefined();
|
expect(e.errorDetails).toBeUndefined();
|
||||||
expect(sdk.ResultReason[res.reason]).toEqual(sdk.ResultReason[sdk.ResultReason.RecognizedSpeech]);
|
|
||||||
expect(res.text).toEqual("What's the weather like?");
|
|
||||||
expect(res.properties).not.toBeUndefined();
|
|
||||||
expect(res.properties.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult)).not.toBeUndefined();
|
|
||||||
|
|
||||||
done();
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
done.fail(error);
|
done.fail(error);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
},
|
r.recognizeOnceAsync(
|
||||||
(error: string) => {
|
(p2: sdk.SpeechRecognitionResult) => {
|
||||||
done.fail(error);
|
const res: sdk.SpeechRecognitionResult = p2;
|
||||||
});
|
try {
|
||||||
|
expect(res).not.toBeUndefined();
|
||||||
|
expect(sdk.ResultReason[res.reason]).toEqual(sdk.ResultReason[sdk.ResultReason.RecognizedSpeech]);
|
||||||
|
expect(res.text).toEqual("What's the weather like?");
|
||||||
|
expect(res.properties).not.toBeUndefined();
|
||||||
|
expect(res.properties.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult)).not.toBeUndefined();
|
||||||
|
|
||||||
|
success++;
|
||||||
|
} catch (error) {
|
||||||
|
done.fail(error);
|
||||||
|
}
|
||||||
|
|
||||||
|
},
|
||||||
|
(error: string) => {
|
||||||
|
done.fail(error);
|
||||||
|
});
|
||||||
|
|
||||||
|
}
|
||||||
|
WaitForCondition(() => success === 3, done);
|
||||||
});
|
});
|
||||||
|
|
||||||
test("PushStream4KPostRecognizePush", (done: jest.DoneCallback) => {
|
test("PushStream4KPostRecognizePush", (done: jest.DoneCallback) => {
|
||||||
|
|
Двоичный файл не отображается.
Двоичный файл не отображается.
Загрузка…
Ссылка в новой задаче