initial commit for TTS implementation, with NodeJS support only.
This commit is contained in:
Yulin Li 2020-03-26 15:54:34 +08:00 коммит произвёл GitHub
Родитель b73c871047
Коммит 65d292ddf5
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
45 изменённых файлов: 3581 добавлений и 142 удалений

7
.gitignore поставляемый
Просмотреть файл

@ -13,3 +13,10 @@ test-javascript-junit.xml
coverage/*
*.tgz
# ignore audio files generated in tests
test*.wav
test*.mp3
# ignore files generated in tests
report[\.0-9]*json
junit.xml

6
package-lock.json сгенерированный
Просмотреть файл

@ -568,9 +568,9 @@
"dev": true
},
"@types/node": {
"version": "12.12.14",
"resolved": "https://registry.npmjs.org/@types/node/-/node-12.12.14.tgz",
"integrity": "sha512-u/SJDyXwuihpwjXy7hOOghagLEV1KdAST6syfnOk6QZAMzZuWZqXy5aYYZbh8Jdpd4escVFP0MvftHNDb9pruA==",
"version": "12.12.30",
"resolved": "https://registry.npmjs.org/@types/node/-/node-12.12.30.tgz",
"integrity": "sha512-sz9MF/zk6qVr3pAnM0BSQvYIBK44tS75QC5N+VbWSE4DjCV/pJ+UzCW/F+vVnl7TkOPcuwQureKNtSSwjBTaMg==",
"dev": true
},
"@types/request": {

Просмотреть файл

@ -16,7 +16,8 @@
"js",
"browser",
"websocket",
"speechtotext"
"speechtotext",
"texttospeech"
],
"bugs": {
"url": "https://github.com/Microsoft/cognitive-services-speech-sdk-js/issues"
@ -32,7 +33,8 @@
"distrib/lib/external/ocsp/ocsp": false,
"https-proxy-agent": false,
"simple-lru-cache": false,
"ws": false
"ws": false,
"fs": false
},
"main": "distrib/lib/microsoft.cognitiveservices.speech.sdk.js",
"module": "distrib/es2015/microsoft.cognitiveservices.speech.sdk.js",
@ -46,7 +48,7 @@
],
"devDependencies": {
"@types/jest": "^24.0.23",
"@types/node": "^12.12.14",
"@types/node": "^12.12.30",
"@types/request": "^2.48.3",
"@types/ws": "^6.0.4",
"asn1.js": "^5.2.0",

Просмотреть файл

@ -10,3 +10,4 @@ export * from "./WebsocketConnection";
export * from "./WebsocketMessageAdapter";
export * from "./ReplayableAudioNode";
export * from "./ProxyInfo";
export * from "./SpeakerAudioDestination";

Просмотреть файл

@ -2,7 +2,7 @@
// Licensed under the MIT license.
import { RecognizerConfig } from "../common.speech/Exports";
import { PropertyId } from "../sdk/Exports";
import { PropertyCollection, PropertyId } from "../sdk/Exports";
export class ProxyInfo {
private privProxyHostName: string;
@ -17,11 +17,15 @@ export class ProxyInfo {
this.privProxyPassword = proxyPassword;
}
public static fromParameters(parameters: PropertyCollection): ProxyInfo {
return new ProxyInfo(parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyHostName),
parseInt(parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyPort), 10),
parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyUserName),
parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyPassword));
}
public static fromRecognizerConfig(config: RecognizerConfig): ProxyInfo {
return new ProxyInfo(config.parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyHostName),
parseInt(config.parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyPort), 10),
config.parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyUserName),
config.parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyPassword));
return this.fromParameters(config.parameters);
}
public get HostName(): string {

Просмотреть файл

@ -0,0 +1,30 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { createNoDashGuid, IAudioDestination } from "../common/Exports";
import { AudioStreamFormat } from "../sdk/Exports";
/**
* This is not implemented yet, just a place holder.
*/
export class SpeakerAudioDestination implements IAudioDestination {
private readonly privId: string;
public constructor(audioDestinationId?: string) {
this.privId = audioDestinationId ? audioDestinationId : createNoDashGuid();
}
public id(): string {
return this.privId;
}
// tslint:disable-next-line:no-empty
public write(buffer: ArrayBuffer): void {}
// tslint:disable-next-line:no-empty
public close(): void {}
// tslint:disable-next-line:no-empty
set format(format: AudioStreamFormat) {}
}

Просмотреть файл

@ -4,6 +4,7 @@
import {
ArgumentNullError,
ConnectionClosedEvent,
ConnectionErrorEvent,
ConnectionEstablishedEvent,
ConnectionEvent,
ConnectionMessage,
@ -192,6 +193,7 @@ export class WebsocketMessageAdapter {
};
this.privWebsocketClient.onerror = (e: { error: any; message: string; type: string; target: WebSocket | ws }) => {
this.onEvent(new ConnectionErrorEvent(this.privConnectionId, e.message, e.type));
this.privLastErrorReceived = e.message;
};

Просмотреть файл

@ -1,6 +1,7 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { AudioOutputFormatImpl } from "../sdk/Audio/AudioOutputFormat";
import { AudioOutputStream, PullAudioOutputStreamImpl } from "../sdk/Audio/AudioOutputStream";
import { DialogServiceTurnStateManager } from "./DialogServiceTurnStateManager";
import { ActivityPayloadResponse, MessageDataStreamType } from "./ServiceMessages/ActivityResponsePayload";
@ -31,6 +32,7 @@ export class DialogServiceTurnState {
public processActivityPayload(payload: ActivityPayloadResponse): PullAudioOutputStreamImpl {
if (payload.messageDataStreamType === MessageDataStreamType.TextToSpeechAudio) {
this.privAudioStream = AudioOutputStream.createPullStream() as PullAudioOutputStreamImpl;
this.privAudioStream.format = AudioOutputFormatImpl.getDefaultOutputFormat();
// tslint:disable-next-line:no-console
// console.info("Audio start debugturn:" + this.privRequestId);
}

Просмотреть файл

@ -7,6 +7,7 @@ export * from "./CognitiveSubscriptionKeyAuthentication";
export * from "./CognitiveTokenAuthentication";
export * from "./IAuthentication";
export * from "./IConnectionFactory";
export * from "./ISynthesisConnectionFactory";
export * from "./IntentConnectionFactory";
export * from "./RecognitionEvents";
export * from "./ServiceRecognizerBase";
@ -15,6 +16,7 @@ export * from "./SpeechServiceInterfaces";
export * from "./WebsocketMessageFormatter";
export * from "./SpeechConnectionFactory";
export * from "./TranslationConnectionFactory";
export * from "./SpeechSynthesisConnectionFactory";
export * from "./EnumTranslation";
export * from "./ServiceMessages/Enums";
export * from "./ServiceMessages/TranslationSynthesisEnd";
@ -36,6 +38,10 @@ export * from "./DynamicGrammarInterfaces";
export * from "./DialogServiceAdapter";
export * from "./AgentConfig";
export * from "./Transcription/Exports";
export * from "./ServiceMessages/SynthesisAudioMetadata";
export * from "./SynthesisTurn";
export * from "./SynthesisAdapterBase";
export * from "./SynthesizerConfig";
export const OutputFormatPropertyName: string = "OutputFormat";
export const CancellationErrorCodePropertyName: string = "CancellationErrorCode";

Просмотреть файл

@ -0,0 +1,13 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { IConnection } from "../common/Exports";
import { AuthInfo } from "./IAuthentication";
import { SynthesizerConfig } from "./SynthesizerConfig";
export interface ISynthesisConnectionFactory {
create(
config: SynthesizerConfig,
authInfo: AuthInfo,
connectionId?: string): IConnection;
}

Просмотреть файл

@ -0,0 +1,34 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
export interface ISynthesisMetadata {
Type: string;
Data: {
Offset: number;
text: {
Text: string;
Length: number;
};
};
}
// audio.metadata
export interface ISynthesisAudioMetadata {
Metadata: ISynthesisMetadata[];
}
export class SynthesisAudioMetadata implements ISynthesisAudioMetadata {
private privSynthesisAudioMetadata: ISynthesisAudioMetadata;
private constructor(json: string) {
this.privSynthesisAudioMetadata = JSON.parse(json);
}
public static fromJSON(json: string): SynthesisAudioMetadata {
return new SynthesisAudioMetadata(json);
}
public get Metadata(): ISynthesisMetadata[] {
return this.privSynthesisAudioMetadata.Metadata;
}
}

Просмотреть файл

@ -7,12 +7,14 @@ const PathHeaderName: string = "path";
const ContentTypeHeaderName: string = "content-type";
const RequestIdHeaderName: string = "x-requestid";
const RequestTimestampHeaderName: string = "x-timestamp";
const RequestStreamIdHeaderName: string = "x-streamid";
export class SpeechConnectionMessage extends ConnectionMessage {
private privPath: string;
private privRequestId: string;
private privContentType: string;
private privStreamId: string;
private privAdditionalHeaders: IStringDictionary<string>;
public constructor(
@ -21,6 +23,7 @@ export class SpeechConnectionMessage extends ConnectionMessage {
requestId: string,
contentType: string,
body: any,
streamId?: string,
additionalHeaders?: IStringDictionary<string>,
id?: string) {
@ -40,6 +43,10 @@ export class SpeechConnectionMessage extends ConnectionMessage {
headers[ContentTypeHeaderName] = contentType;
}
if (streamId) {
headers[RequestStreamIdHeaderName] = streamId;
}
if (additionalHeaders) {
for (const headerName in additionalHeaders) {
if (headerName) {
@ -58,6 +65,7 @@ export class SpeechConnectionMessage extends ConnectionMessage {
this.privPath = path;
this.privRequestId = requestId;
this.privContentType = contentType;
this.privStreamId = streamId;
this.privAdditionalHeaders = additionalHeaders;
}
@ -73,6 +81,10 @@ export class SpeechConnectionMessage extends ConnectionMessage {
return this.privContentType;
}
public get streamId(): string {
return this.privStreamId;
}
public get additionalHeaders(): IStringDictionary<string> {
return this.privAdditionalHeaders;
}
@ -82,6 +94,7 @@ export class SpeechConnectionMessage extends ConnectionMessage {
let requestId = null;
let contentType = null;
let requestTimestamp = null;
let streamId = null;
const additionalHeaders: IStringDictionary<string> = {};
if (message.headers) {
@ -95,6 +108,8 @@ export class SpeechConnectionMessage extends ConnectionMessage {
requestTimestamp = message.headers[headerName];
} else if (headerName.toLowerCase() === ContentTypeHeaderName.toLowerCase()) {
contentType = message.headers[headerName];
} else if (headerName.toLowerCase() === RequestStreamIdHeaderName.toLowerCase()) {
streamId = message.headers[headerName];
} else {
additionalHeaders[headerName] = message.headers[headerName];
}
@ -108,6 +123,7 @@ export class SpeechConnectionMessage extends ConnectionMessage {
requestId,
contentType,
message.body,
streamId,
additionalHeaders,
message.id);
}

Просмотреть файл

@ -0,0 +1,55 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import {
ProxyInfo,
WebsocketConnection, WebsocketMessageAdapter,
} from "../common.browser/Exports";
import {
IConnection,
IStringDictionary
} from "../common/Exports";
import { PropertyId } from "../sdk/Exports";
import {
AuthInfo,
SynthesizerConfig,
WebsocketMessageFormatter
} from "./Exports";
import { ISynthesisConnectionFactory } from "./ISynthesisConnectionFactory";
import {
QueryParameterNames
} from "./QueryParameterNames";
export class SpeechSynthesisConnectionFactory implements ISynthesisConnectionFactory {
private readonly synthesisUri: string = "/cognitiveservices/websocket/v1";
public create = (
config: SynthesizerConfig,
authInfo: AuthInfo,
connectionId?: string): IConnection => {
let endpoint: string = config.parameters.getProperty(PropertyId.SpeechServiceConnection_Endpoint, undefined);
const region: string = config.parameters.getProperty(PropertyId.SpeechServiceConnection_Region, undefined);
const hostSuffix = (region && region.toLowerCase().startsWith("china")) ? ".azure.cn" : ".microsoft.com";
const host: string = config.parameters.getProperty(PropertyId.SpeechServiceConnection_Host, "wss://" + region + ".tts.speech" + hostSuffix);
const queryParams: IStringDictionary<string> = {};
if (!endpoint) {
endpoint = host + this.synthesisUri;
}
const headers: IStringDictionary<string> = {};
if (authInfo.token !== undefined && authInfo.token !== "") {
headers[authInfo.headerName] = authInfo.token;
}
headers[QueryParameterNames.ConnectionIdHeader] = connectionId;
config.parameters.setProperty(PropertyId.SpeechServiceConnection_Url, endpoint);
// set forceNpmWebSocket to true as we need to pass the auth info in websocket headers.
WebsocketMessageAdapter.forceNpmWebSocket = true;
return new WebsocketConnection(endpoint, queryParams, headers, new WebsocketMessageFormatter(), ProxyInfo.fromParameters(config.parameters), connectionId);
}
}

Просмотреть файл

@ -0,0 +1,625 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import {
ArgumentNullError,
ConnectionClosedEvent,
ConnectionEvent,
ConnectionMessage,
ConnectionOpenResponse,
ConnectionState,
createNoDashGuid,
EventSource,
IAudioDestination,
IConnection,
IDisposable,
MessageType,
Promise,
PromiseHelper,
PromiseResult,
ServiceEvent,
} from "../common/Exports";
import {AudioOutputFormatImpl} from "../sdk/Audio/AudioOutputFormat";
import {
CancellationErrorCode,
CancellationReason,
PropertyCollection,
PropertyId,
ResultReason,
SpeechSynthesisEventArgs,
SpeechSynthesisResult,
SpeechSynthesisWordBoundaryEventArgs,
SpeechSynthesizer,
} from "../sdk/Exports";
import {Callback} from "../sdk/Transcription/IConversation";
import {
AgentConfig,
CancellationErrorCodePropertyName,
DynamicGrammarBuilder,
RequestSession,
SpeechContext,
SynthesisAudioMetadata,
SynthesisTurn,
} from "./Exports";
import {AuthInfo, IAuthentication} from "./IAuthentication";
import {ISynthesisConnectionFactory} from "./ISynthesisConnectionFactory";
import {SpeechConnectionMessage} from "./SpeechConnectionMessage.Internal";
import {SynthesizerConfig} from "./SynthesizerConfig";
export class SynthesisAdapterBase implements IDisposable {
protected privRequestSession: RequestSession;
protected privSynthesisTurn: SynthesisTurn;
protected privConnectionId: string;
protected privSynthesizerConfig: SynthesizerConfig;
protected privSpeechSynthesizer: SpeechSynthesizer;
protected privSuccessCallback: (e: SpeechSynthesisResult) => void;
protected privErrorCallback: (e: string) => void;
public get synthesisContext(): SpeechContext {
return this.privSpeechContext;
}
public get dynamicGrammar(): DynamicGrammarBuilder {
return this.privDynamicGrammar;
}
public get agentConfig(): AgentConfig {
return this.privAgentConfig;
}
public get connectionEvents(): EventSource<ConnectionEvent> {
return this.privConnectionEvents;
}
public get serviceEvents(): EventSource<ServiceEvent> {
return this.privServiceEvents;
}
protected speakOverride: (ssml: string, requestId: string, sc: (e: SpeechSynthesisResult) => void, ec: (e: string) => void) => any = undefined;
// Called when telemetry data is sent to the service.
// Used for testing Telemetry capture.
public static telemetryData: (json: string) => void;
public static telemetryDataEnabled: boolean = true;
public set activityTemplate(messagePayload: string) { this.privActivityTemplate = messagePayload; }
public get activityTemplate(): string { return this.privActivityTemplate; }
protected receiveMessageOverride: () => any = undefined;
protected connectImplOverride: (isUnAuthorized: boolean) => any = undefined;
protected configConnectionOverride: () => any = undefined;
protected fetchConnectionOverride: () => any = undefined;
public set audioOutputFormat(format: AudioOutputFormatImpl) {
this.privAudioOutputFormat = format;
this.privSynthesisTurn.audioOutputFormat = format;
if (this.privSessionAudioDestination !== undefined) {
this.privSessionAudioDestination.format = format;
}
}
private privAuthentication: IAuthentication;
private privConnectionFactory: ISynthesisConnectionFactory;
// A promise for a configured connection.
// Do not consume directly, call fetchConnection instead.
private privConnectionConfigurationPromise: Promise<IConnection>;
// A promise for a connection, but one that has not had the speech context sent yet.
// Do not consume directly, call fetchConnection instead.
private privConnectionPromise: Promise<IConnection>;
private privAuthFetchEventId: string;
private privIsDisposed: boolean;
private privConnectionEvents: EventSource<ConnectionEvent>;
private privServiceEvents: EventSource<ServiceEvent>;
private privSpeechContext: SpeechContext;
private privDynamicGrammar: DynamicGrammarBuilder;
private privAgentConfig: AgentConfig;
private privServiceHasSentMessage: boolean;
private privActivityTemplate: string;
private privAudioOutputFormat: AudioOutputFormatImpl;
private privSessionAudioDestination: IAudioDestination;
public constructor(
authentication: IAuthentication,
connectionFactory: ISynthesisConnectionFactory,
synthesizerConfig: SynthesizerConfig,
speechSynthesizer: SpeechSynthesizer,
audioDestination: IAudioDestination) {
if (!authentication) {
throw new ArgumentNullError("authentication");
}
if (!connectionFactory) {
throw new ArgumentNullError("connectionFactory");
}
if (!synthesizerConfig) {
throw new ArgumentNullError("synthesizerConfig");
}
this.privAuthentication = authentication;
this.privConnectionFactory = connectionFactory;
this.privSynthesizerConfig = synthesizerConfig;
this.privIsDisposed = false;
this.privSpeechSynthesizer = speechSynthesizer;
this.privSessionAudioDestination = audioDestination;
this.privSynthesisTurn = new SynthesisTurn();
this.privConnectionEvents = new EventSource<ConnectionEvent>();
this.privServiceEvents = new EventSource<ServiceEvent>();
this.privDynamicGrammar = new DynamicGrammarBuilder();
this.privSpeechContext = new SpeechContext(this.privDynamicGrammar);
this.privAgentConfig = new AgentConfig();
this.connectionEvents.attach((connectionEvent: ConnectionEvent): void => {
if (connectionEvent.name === "ConnectionClosedEvent") {
const connectionClosedEvent = connectionEvent as ConnectionClosedEvent;
this.cancelSynthesisLocal(CancellationReason.Error,
connectionClosedEvent.statusCode === 1007 ? CancellationErrorCode.BadRequestParameters : CancellationErrorCode.ConnectionFailure,
connectionClosedEvent.reason + " websocket error code: " + connectionClosedEvent.statusCode);
}
});
}
public static addHeader(audio: ArrayBuffer, format: AudioOutputFormatImpl): ArrayBuffer {
if (!format.hasHeader) {
return audio;
}
format.updateHeader(audio.byteLength);
const tmp = new Uint8Array(audio.byteLength + format.header.byteLength);
tmp.set(new Uint8Array(format.header), 0);
tmp.set(new Uint8Array(audio), format.header.byteLength);
return tmp.buffer;
}
public isDisposed(): boolean {
return this.privIsDisposed;
}
public dispose(reason?: string): void {
this.privIsDisposed = true;
if (this.privSessionAudioDestination !== undefined) {
this.privSessionAudioDestination.close();
}
if (this.privConnectionConfigurationPromise) {
this.privConnectionConfigurationPromise.onSuccessContinueWith((connection: IConnection) => {
connection.dispose(reason);
});
}
}
public connect(): void {
this.connectImpl().result();
}
public connectAsync(cb?: Callback, err?: Callback): void {
this.connectImpl().continueWith((promiseResult: PromiseResult<IConnection>) => {
try {
if (promiseResult.isError) {
if (!!err) {
err(promiseResult.error);
}
} else if (promiseResult.isCompleted) {
if (!!cb) {
cb();
}
}
} catch (e) {
if (!!err) {
err(e);
}
}
});
}
public Speak(
text: string,
isSSML: boolean,
requestId: string,
successCallback: (e: SpeechSynthesisResult) => void,
errorCallBack: (e: string) => void,
audioDestination: IAudioDestination,
): Promise<boolean> {
let ssml: string;
if (isSSML) {
ssml = text;
} else {
ssml = SpeechSynthesizer.buildSsml(text, this.privSynthesizerConfig.parameters);
}
if (this.speakOverride !== undefined) {
return this.speakOverride(ssml, requestId, successCallback, errorCallBack);
}
this.privSuccessCallback = successCallback;
this.privErrorCallback = errorCallBack;
this.privSynthesisTurn.startNewSynthesis(requestId, text, isSSML, audioDestination);
return this.fetchConnection().continueWithPromise<boolean>((connection: PromiseResult<IConnection>) => {
if (connection.isError) {
this.cancelSynthesisLocal(CancellationReason.Error, CancellationErrorCode.ConnectionFailure, connection.error);
return PromiseHelper.fromError(connection.error);
}
return this.sendSynthesisContext(connection.result).continueWithPromise<boolean>((result: PromiseResult<boolean>): Promise<boolean> => {
if (result.isError) {
this.cancelSynthesisLocal(CancellationReason.Error, CancellationErrorCode.ConnectionFailure, result.error);
return PromiseHelper.fromError(result.error);
}
return this.sendSsmlMessage(connection.result, ssml, requestId).continueWithPromise<boolean>((result: PromiseResult<boolean>): Promise<boolean> => {
if (result.isError) {
this.cancelSynthesisLocal(CancellationReason.Error, CancellationErrorCode.ConnectionFailure, result.error);
return PromiseHelper.fromError(result.error);
}
const synthesisStartEventArgs: SpeechSynthesisEventArgs = new SpeechSynthesisEventArgs(
new SpeechSynthesisResult(
requestId,
ResultReason.SynthesizingAudioStarted,
)
);
if (!!this.privSpeechSynthesizer.synthesisStarted) {
this.privSpeechSynthesizer.synthesisStarted(this.privSpeechSynthesizer, synthesisStartEventArgs);
}
const messageRetrievalPromise = this.receiveMessage();
return PromiseHelper.fromResult(true);
});
});
});
}
// Cancels synthesis.
protected cancelSynthesis(
requestId: string,
cancellationReason: CancellationReason,
errorCode: CancellationErrorCode,
error: string): void {
const properties: PropertyCollection = new PropertyCollection();
properties.setProperty(CancellationErrorCodePropertyName, CancellationErrorCode[errorCode]);
const result: SpeechSynthesisResult = new SpeechSynthesisResult(
requestId,
ResultReason.Canceled,
undefined,
error,
properties
);
if (!!this.privSpeechSynthesizer.SynthesisCanceled) {
const cancelEvent: SpeechSynthesisEventArgs = new SpeechSynthesisEventArgs(result);
try {
this.privSpeechSynthesizer.SynthesisCanceled(this.privSpeechSynthesizer, cancelEvent);
/* tslint:disable:no-empty */
} catch { }
}
if (!!this.privSuccessCallback) {
try {
this.privSuccessCallback(result);
this.privSuccessCallback = undefined;
/* tslint:disable:no-empty */
} catch { }
}
}
// Cancels synthesis.
protected cancelSynthesisLocal(
cancellationReason: CancellationReason,
errorCode: CancellationErrorCode,
error: string): void {
if (!!this.privSynthesisTurn.isSynthesizing) {
this.privSynthesisTurn.onStopSynthesizing();
this.cancelSynthesis(
this.privSynthesisTurn.requestId,
cancellationReason,
errorCode,
error);
}
}
protected processTypeSpecificMessages(
connectionMessage: SpeechConnectionMessage,
successCallback?: (e: SpeechSynthesisResult) => void,
errorCallBack?: (e: string) => void): boolean {
return true;
}
protected receiveMessage = (): Promise<IConnection> => {
return this.fetchConnection().on((connection: IConnection): Promise<IConnection> => {
return connection.read()
.onSuccessContinueWithPromise((message: ConnectionMessage) => {
if (this.receiveMessageOverride !== undefined) {
return this.receiveMessageOverride();
}
if (this.privIsDisposed) {
// We're done.
return PromiseHelper.fromResult(undefined);
}
// indicates we are draining the queue and it came with no message;
if (!message) {
if (!this.privSynthesisTurn.isSynthesizing) {
return PromiseHelper.fromResult(true);
} else {
return this.receiveMessage();
}
}
this.privServiceHasSentMessage = true;
const connectionMessage = SpeechConnectionMessage.fromConnectionMessage(message);
if (connectionMessage.requestId.toLowerCase() === this.privSynthesisTurn.requestId.toLowerCase()) {
switch (connectionMessage.path.toLowerCase()) {
case "turn.start":
this.privSynthesisTurn.onServiceTurnStartResponse();
break;
case "response":
this.privSynthesisTurn.onServiceResponseMessage(connectionMessage.textBody);
break;
case "audio":
if (this.privSynthesisTurn.streamId.toLowerCase() === connectionMessage.streamId.toLowerCase()
&& !!connectionMessage.binaryBody) {
this.privSynthesisTurn.onAudioChunkReceived(connectionMessage.binaryBody);
if (!!this.privSpeechSynthesizer.synthesizing) {
try {
const audioWithHeader = SynthesisAdapterBase.addHeader(connectionMessage.binaryBody, this.privSynthesisTurn.audioOutputFormat);
const ev: SpeechSynthesisEventArgs = new SpeechSynthesisEventArgs(
new SpeechSynthesisResult(
this.privSynthesisTurn.requestId,
ResultReason.SynthesizingAudio,
audioWithHeader));
this.privSpeechSynthesizer.synthesizing(this.privSpeechSynthesizer, ev);
} catch (error) {
// Not going to let errors in the event handler
// trip things up.
}
}
if (this.privSessionAudioDestination !== undefined) {
this.privSessionAudioDestination.write(connectionMessage.binaryBody);
}
}
break;
case "audio.metadata":
const metadataList = SynthesisAudioMetadata.fromJSON(connectionMessage.textBody).Metadata;
for (const metadata of metadataList) {
if (metadata.Type.toLowerCase() === "WordBoundary".toLowerCase()) {
this.privSynthesisTurn.onWordBoundaryEvent(metadata.Data.text.Text);
const ev: SpeechSynthesisWordBoundaryEventArgs = new SpeechSynthesisWordBoundaryEventArgs(
metadata.Data.Offset,
metadata.Data.text.Text,
metadata.Data.text.Length,
this.privSynthesisTurn.currentTextOffset);
if (!!this.privSpeechSynthesizer.wordBoundary) {
try {
this.privSpeechSynthesizer.wordBoundary(this.privSpeechSynthesizer, ev);
} catch (error) {
// Not going to let errors in the event handler
// trip things up.
}
}
}
}
break;
case "turn.end":
this.privSynthesisTurn.onServiceTurnEndResponse();
let result: SpeechSynthesisResult;
try {
result = new SpeechSynthesisResult(
this.privSynthesisTurn.requestId,
ResultReason.SynthesizingAudioCompleted,
this.privSynthesisTurn.allReceivedAudioWithHeader
);
if (!!this.privSuccessCallback) {
this.privSuccessCallback(result);
}
} catch (error) {
if (!!this.privErrorCallback) {
this.privErrorCallback(error);
}
}
if (this.privSpeechSynthesizer.synthesisCompleted) {
try {
this.privSpeechSynthesizer.synthesisCompleted(
this.privSpeechSynthesizer,
new SpeechSynthesisEventArgs(result)
);
} catch (e) {
// Not going to let errors in the event handler
// trip things up.
}
}
break;
default:
if (!this.processTypeSpecificMessages(connectionMessage)) {
// here are some messages that the derived class has not processed, dispatch them to connect class
if (!!this.privServiceEvents) {
this.serviceEvents.onEvent(new ServiceEvent(connectionMessage.path.toLowerCase(), connectionMessage.textBody));
}
}
}
}
return this.receiveMessage();
});
}, (error: string) => {
});
}
protected sendSynthesisContext = (connection: IConnection): Promise<boolean> => {
const synthesisContextJson = JSON.stringify(this.buildSynthesisContext());
if (synthesisContextJson) {
return connection.send(new SpeechConnectionMessage(
MessageType.Text,
"synthesis.context",
this.privSynthesisTurn.requestId,
"application/json",
synthesisContextJson));
}
return PromiseHelper.fromResult(true);
}
// Establishes a websocket connection to the end point.
protected connectImpl(isUnAuthorized: boolean = false): Promise<IConnection> {
if (this.connectImplOverride !== undefined) {
return this.connectImplOverride(isUnAuthorized);
}
if (this.privConnectionPromise) {
if (this.privConnectionPromise.result().isCompleted &&
(this.privConnectionPromise.result().isError
|| this.privConnectionPromise.result().result.state() === ConnectionState.Disconnected) &&
this.privServiceHasSentMessage === true) {
this.privConnectionId = null;
this.privConnectionPromise = null;
this.privServiceHasSentMessage = false;
return this.connectImpl();
} else {
return this.privConnectionPromise;
}
}
this.privAuthFetchEventId = createNoDashGuid();
this.privConnectionId = createNoDashGuid();
this.privSynthesisTurn.onPreConnectionStart(this.privAuthFetchEventId, this.privConnectionId);
const authPromise = isUnAuthorized ? this.privAuthentication.fetchOnExpiry(this.privAuthFetchEventId) : this.privAuthentication.fetch(this.privAuthFetchEventId);
this.privConnectionPromise = authPromise
.continueWithPromise((result: PromiseResult<AuthInfo>) => {
if (result.isError) {
// this.privRequestSession.onAuthCompleted(true, result.error);
throw new Error(result.error);
} else {
// this.privRequestSession.onAuthCompleted(false);
}
const connection: IConnection = this.privConnectionFactory.create(this.privSynthesizerConfig, result.result, this.privConnectionId);
// Attach to the underlying event. No need to hold onto the detach pointers as in the event the connection goes away,
// it'll stop sending events.
connection.events.attach((event: ConnectionEvent) => {
this.connectionEvents.onEvent(event);
});
return connection.open().onSuccessContinueWithPromise((response: ConnectionOpenResponse): Promise<IConnection> => {
if (response.statusCode === 200) {
this.privSynthesisTurn.onPreConnectionStart(this.privAuthFetchEventId, this.privConnectionId);
this.privSynthesisTurn.onConnectionEstablishCompleted(response.statusCode);
return PromiseHelper.fromResult<IConnection>(connection);
} else if (response.statusCode === 403 && !isUnAuthorized) {
return this.connectImpl(true);
} else {
this.privSynthesisTurn.onConnectionEstablishCompleted(response.statusCode, response.reason);
return PromiseHelper.fromError<IConnection>(`Unable to contact server. StatusCode: ${response.statusCode}, ${this.privSynthesizerConfig.parameters.getProperty(PropertyId.SpeechServiceConnection_Endpoint)} Reason: ${response.reason}`);
}
});
});
return this.privConnectionPromise;
}
protected sendSpeechServiceConfig = (connection: IConnection, SpeechServiceConfigJson: string): Promise<boolean> => {
if (SpeechServiceConfigJson) {
return connection.send(new SpeechConnectionMessage(
MessageType.Text,
"speech.config",
this.privSynthesisTurn.requestId,
"application/json",
SpeechServiceConfigJson));
}
return PromiseHelper.fromResult(true);
}
protected sendSsmlMessage = (connection: IConnection, ssml: string, requestId: string): Promise<boolean> => {
return connection.send(new SpeechConnectionMessage(
MessageType.Text,
"ssml",
requestId,
"application/ssml+xml",
ssml));
}
private fetchConnection = (): Promise<IConnection> => {
if (this.fetchConnectionOverride !== undefined) {
return this.fetchConnectionOverride();
}
return this.configureConnection();
}
// Takes an established websocket connection to the endpoint and sends speech configuration information.
private configureConnection(): Promise<IConnection> {
if (this.configConnectionOverride !== undefined) {
return this.configConnectionOverride();
}
if (this.privConnectionConfigurationPromise) {
if (this.privConnectionConfigurationPromise.result().isCompleted &&
(this.privConnectionConfigurationPromise.result().isError
|| this.privConnectionConfigurationPromise.result().result.state() === ConnectionState.Disconnected)) {
this.privConnectionConfigurationPromise = null;
return this.configureConnection();
} else {
return this.privConnectionConfigurationPromise;
}
}
this.privConnectionConfigurationPromise = this.connectImpl().onSuccessContinueWithPromise((connection: IConnection): Promise<IConnection> => {
return this.sendSpeechServiceConfig(connection, this.privSynthesizerConfig.SpeechServiceConfig.serialize())
.onSuccessContinueWith((_: boolean) => {
return connection;
});
});
return this.privConnectionConfigurationPromise;
}
private buildSynthesisContext(): ISynthesisContext {
return {
synthesis: {
audio: {
metadataOptions: {
sentenceBoundaryEnabled: false,
wordBoundaryEnabled: (!!this.privSpeechSynthesizer.wordBoundary),
},
outputFormat: this.privAudioOutputFormat.requestAudioFormatString,
}
}
};
}
}
interface ISynthesisContext {
synthesis: {
audio: {
outputFormat: string,
metadataOptions: {
wordBoundaryEnabled: boolean,
sentenceBoundaryEnabled: boolean,
}
}
};
}

Просмотреть файл

@ -0,0 +1,68 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { EventType, PlatformEvent } from "../common/Exports";
export class SpeechSynthesisEvent extends PlatformEvent {
private privRequestId: string;
constructor(eventName: string, requestId: string, eventType: EventType = EventType.Info) {
super(eventName, eventType);
this.privRequestId = requestId;
}
public get requestId(): string {
return this.privRequestId;
}
}
// tslint:disable-next-line:max-classes-per-file
export class SynthesisTriggeredEvent extends SpeechSynthesisEvent {
private privSessionAudioDestinationId: string;
private privTurnAudioDestinationId: string;
constructor(requestId: string, sessionAudioDestinationId: string, turnAudioDestinationId: string) {
super("SynthesisTriggeredEvent", requestId);
this.privSessionAudioDestinationId = sessionAudioDestinationId;
this.privTurnAudioDestinationId = turnAudioDestinationId;
}
public get audioSessionDestinationId(): string {
return this.privSessionAudioDestinationId;
}
public get audioTurnDestinationId(): string {
return this.privTurnAudioDestinationId;
}
}
// tslint:disable-next-line:max-classes-per-file
export class ConnectingToSynthesisServiceEvent extends SpeechSynthesisEvent {
private privAuthFetchEventId: string;
constructor(requestId: string, authFetchEventId: string) {
super("ConnectingToSynthesisServiceEvent", requestId);
this.privAuthFetchEventId = authFetchEventId;
}
public get authFetchEventId(): string {
return this.privAuthFetchEventId;
}
}
// tslint:disable-next-line:max-classes-per-file
export class SynthesisStartedEvent extends SpeechSynthesisEvent {
private privAuthFetchEventId: string;
constructor(requestId: string, authFetchEventId: string) {
super("SynthesisStartedEvent", requestId);
this.privAuthFetchEventId = authFetchEventId;
}
public get authFetchEventId(): string {
return this.privAuthFetchEventId;
}
}

Просмотреть файл

@ -0,0 +1,250 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import {
createNoDashGuid,
Deferred,
Events, IAudioDestination,
Promise,
PromiseState
} from "../common/Exports";
import { AudioOutputFormatImpl } from "../sdk/Audio/AudioOutputFormat";
import { PullAudioOutputStreamImpl } from "../sdk/Audio/AudioOutputStream";
import {SynthesisAdapterBase} from "./SynthesisAdapterBase";
import {
ConnectingToSynthesisServiceEvent,
SpeechSynthesisEvent,
SynthesisStartedEvent,
SynthesisTriggeredEvent,
} from "./SynthesisEvents";
export interface ISynthesisResponseContext {
serviceTag: string;
}
export interface ISynthesisResponseAudio {
type: string;
streamId: string;
}
export interface ISynthesisResponse {
context: ISynthesisResponseContext;
audio: ISynthesisResponseAudio;
}
export class SynthesisTurn {
public get requestId(): string {
return this.privRequestId;
}
public get streamId(): string {
return this.privStreamId;
}
public set streamId(value: string) {
this.privStreamId = value;
}
public get audioOutputFormat(): AudioOutputFormatImpl {
return this.privAudioOutputFormat;
}
public set audioOutputFormat(format: AudioOutputFormatImpl) {
this.privAudioOutputFormat = format;
}
public get turnCompletionPromise(): Promise<boolean> {
return this.privTurnDeferral.promise();
}
public get isSynthesisEnded(): boolean {
return this.privIsSynthesisEnded;
}
public get isSynthesizing(): boolean {
return this.privIsSynthesizing;
}
public get currentTextOffset(): number {
return this.privTextOffset;
}
// The number of bytes received for current turn
public get bytesReceived(): number {
return this.privBytesReceived;
}
public get allReceivedAudio(): ArrayBuffer {
if (!!this.privReceivedAudio) {
return this.privReceivedAudio;
}
if (!this.privIsSynthesisEnded) {
return null;
}
this.readAllAudioFromStream();
return this.allReceivedAudio;
}
public get allReceivedAudioWithHeader(): ArrayBuffer {
if (!!this.privReceivedAudioWithHeader) {
return this.privReceivedAudioWithHeader;
}
if (!this.privIsSynthesisEnded) {
return null;
}
if (this.audioOutputFormat.hasHeader) {
this.privReceivedAudioWithHeader = SynthesisAdapterBase.addHeader(this.allReceivedAudio, this.audioOutputFormat);
return this.allReceivedAudioWithHeader;
} else {
return this.allReceivedAudio;
}
}
private privIsDisposed: boolean = false;
private privAudioNodeId: string;
private privAuthFetchEventId: string;
private privIsSynthesizing: boolean = false;
private privIsSynthesisEnded: boolean = false;
private privBytesReceived: number = 0;
private privRequestId: string;
private privStreamId: string;
private privTurnDeferral: Deferred<boolean>;
private privAudioOutputFormat: AudioOutputFormatImpl;
private privAudioOutputStream: PullAudioOutputStreamImpl;
private privReceivedAudio: ArrayBuffer;
private privReceivedAudioWithHeader: ArrayBuffer;
private privTextOffset: number = 0;
private privRawText: string;
private privIsSSML: boolean;
private privTurnAudioDestination: IAudioDestination;
constructor() {
this.privRequestId = createNoDashGuid();
this.privAudioNodeId = createNoDashGuid();
this.privTurnDeferral = new Deferred<boolean>();
// We're not in a turn, so resolve.
this.privTurnDeferral.resolve(true);
}
public startNewSynthesis(requestId: string, rawText: string, isSSML: boolean, audioDestination?: IAudioDestination): void {
this.privIsSynthesisEnded = false;
this.privIsSynthesizing = true;
this.privRequestId = requestId;
this.privRawText = rawText;
this.privIsSSML = isSSML;
this.privAudioOutputStream = new PullAudioOutputStreamImpl();
this.privAudioOutputStream.format = this.privAudioOutputFormat;
this.privReceivedAudio = null;
this.privReceivedAudioWithHeader = null;
this.privBytesReceived = 0;
if (audioDestination !== undefined) {
this.privTurnAudioDestination = audioDestination;
this.privTurnAudioDestination.format = this.privAudioOutputFormat;
}
this.onEvent(new SynthesisTriggeredEvent(this.requestId, undefined, audioDestination === undefined ? undefined : audioDestination.id()));
}
public onPreConnectionStart = (authFetchEventId: string, connectionId: string): void => {
this.privAuthFetchEventId = authFetchEventId;
this.onEvent(new ConnectingToSynthesisServiceEvent(this.privRequestId, this.privAuthFetchEventId));
}
public onAuthCompleted = (isError: boolean, error?: string): void => {
if (isError) {
this.onComplete();
}
}
public onConnectionEstablishCompleted = (statusCode: number, reason?: string): void => {
if (statusCode === 200) {
this.onEvent(new SynthesisStartedEvent(this.requestId, this.privAuthFetchEventId));
this.privBytesReceived = 0;
return;
} else if (statusCode === 403) {
this.onComplete();
}
}
public onServiceResponseMessage = (responseJson: string): void => {
const response: ISynthesisResponse = JSON.parse(responseJson);
this.streamId = response.audio.streamId;
}
public onServiceTurnEndResponse = (): void => {
this.privTurnDeferral.resolve(true);
this.onComplete();
}
public onServiceTurnStartResponse = (): void => {
if (this.privTurnDeferral.state() === PromiseState.None) {
// What? How are we starting a turn with another not done?
this.privTurnDeferral.reject("Another turn started before current completed.");
}
this.privTurnDeferral = new Deferred<boolean>();
}
public onAudioChunkReceived(data: ArrayBuffer): void {
if (this.isSynthesizing) {
this.privAudioOutputStream.write(data);
this.privBytesReceived += data.byteLength;
if (this.privTurnAudioDestination !== undefined) {
this.privTurnAudioDestination.write(data);
}
}
}
public onWordBoundaryEvent(text: string): void {
this.updateTextOffset(text);
}
public dispose = (error?: string): void => {
if (!this.privIsDisposed) {
// we should have completed by now. If we did not its an unknown error.
this.privIsDisposed = true;
}
}
public onStopSynthesizing(): void {
this.onComplete();
}
protected onEvent = (event: SpeechSynthesisEvent): void => {
Events.instance.onEvent(event);
}
private updateTextOffset(text: string): void {
if (this.privTextOffset >= 0) {
this.privTextOffset = this.privRawText.indexOf(text, this.privTextOffset + this.privTextOffset > 0 ? 1 : 0);
if (this.privIsSSML) {
if (this.privRawText.indexOf("<", this.privTextOffset + 1) > this.privRawText.indexOf(">", this.privTextOffset + 1)) {
this.updateTextOffset(text);
}
}
}
}
private onComplete = (): void => {
if (this.privIsSynthesizing) {
this.privIsSynthesizing = false;
this.privIsSynthesisEnded = true;
this.privAudioOutputStream.close();
if (this.privTurnAudioDestination !== undefined) {
this.privTurnAudioDestination.close();
this.privTurnAudioDestination = undefined;
}
}
}
private readAllAudioFromStream(): void {
if (this.privIsSynthesisEnded) {
this.privReceivedAudio = new ArrayBuffer(this.bytesReceived);
try {
this.privAudioOutputStream.read(this.privReceivedAudio);
} catch (e) {
this.privReceivedAudio = new ArrayBuffer(0);
}
}
}
}

Просмотреть файл

@ -0,0 +1,39 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { PropertyCollection } from "../sdk/Exports";
import {Context, SpeechServiceConfig} from "./Exports";
export enum SynthesisServiceType {
Standard,
Custom,
}
export class SynthesizerConfig {
private privSynthesisServiceType: SynthesisServiceType = SynthesisServiceType.Standard;
private privSpeechServiceConfig: SpeechServiceConfig;
private privParameters: PropertyCollection;
constructor(
speechServiceConfig: SpeechServiceConfig,
parameters: PropertyCollection) {
this.privSpeechServiceConfig = speechServiceConfig ? speechServiceConfig : new SpeechServiceConfig(new Context(null));
this.privParameters = parameters;
}
public get parameters(): PropertyCollection {
return this.privParameters;
}
public get synthesisServiceType(): SynthesisServiceType {
return this.privSynthesisServiceType;
}
public set synthesisServiceType(value: SynthesisServiceType) {
this.privSynthesisServiceType = value;
}
public get SpeechServiceConfig(): SpeechServiceConfig {
return this.privSpeechServiceConfig;
}
}

Просмотреть файл

@ -79,6 +79,26 @@ export class ConnectionClosedEvent extends ConnectionEvent {
}
}
// tslint:disable-next-line:max-classes-per-file
export class ConnectionErrorEvent extends ConnectionEvent {
private readonly privMessage: string;
private readonly privType: string;
constructor(connectionId: string, message: string, type: string) {
super("ConnectionErrorEvent", connectionId, EventType.Debug);
this.privMessage = message;
this.privType = type;
}
public get message(): string {
return this.privMessage;
}
public get type(): string {
return this.privType;
}
}
// tslint:disable-next-line:max-classes-per-file
export class ConnectionEstablishErrorEvent extends ConnectionEvent {
private privStatusCode: number;

Просмотреть файл

@ -26,3 +26,4 @@ export * from "./RiffPcmEncoder";
export * from "./Stream";
export { TranslationStatus } from "../common.speech/TranslationStatus";
export * from "./ChunkedArrayBufferStream";
export * from "./IAudioDestination";

Просмотреть файл

@ -0,0 +1,11 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { AudioStreamFormat } from "../sdk/Exports";
export interface IAudioDestination {
id(): string;
write(buffer: ArrayBuffer): void;
format: AudioStreamFormat;
close(): void;
}

Просмотреть файл

@ -1,8 +1,8 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { AudioStreamFormatImpl } from "../../src/sdk/Audio/AudioStreamFormat";
import { ISpeechConfigAudioDevice } from "../common.speech/Exports";
import { AudioStreamFormatImpl } from "../sdk/Audio/AudioStreamFormat";
import { AudioSourceEvent } from "./AudioSourceEvents";
import { EventSource } from "./EventSource";
import { IDetachable } from "./IDetachable";

Просмотреть файл

@ -1,17 +1,43 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { AudioStreamFormatImpl } from "../../../src/sdk/Audio/AudioStreamFormat";
import { FileAudioSource, MicAudioSource, PcmRecorder } from "../../common.browser/Exports";
import {PathLike} from "fs";
import {
FileAudioSource,
MicAudioSource,
PcmRecorder,
SpeakerAudioDestination
} from "../../common.browser/Exports";
import { ISpeechConfigAudioDevice } from "../../common.speech/Exports";
import { AudioSourceEvent, EventSource, IAudioSource, IAudioStreamNode, Promise } from "../../common/Exports";
import {
AudioSourceEvent,
EventSource,
IAudioDestination,
IAudioSource,
IAudioStreamNode,
Promise
} from "../../common/Exports";
import { Contracts } from "../Contracts";
import { AudioInputStream, PropertyCollection, PropertyId, PullAudioInputStreamCallback } from "../Exports";
import {
AudioInputStream,
AudioOutputStream,
AudioStreamFormat,
PropertyCollection,
PropertyId,
PullAudioInputStreamCallback,
PullAudioOutputStream,
PushAudioOutputStream,
PushAudioOutputStreamCallback
} from "../Exports";
import { AudioFileWriter } from "./AudioFileWriter";
import { PullAudioInputStreamImpl, PushAudioInputStreamImpl } from "./AudioInputStream";
import { PullAudioOutputStreamImpl, PushAudioOutputStreamImpl } from "./AudioOutputStream";
import { AudioStreamFormatImpl } from "./AudioStreamFormat";
/**
* Represents audio input configuration used for specifying what type of input to use (microphone, file, stream).
* @class AudioConfig
* Updated in version 1.11.0
*/
export abstract class AudioConfig {
/**
@ -75,6 +101,58 @@ export abstract class AudioConfig {
throw new Error("Not Supported Type");
}
/**
* Creates an AudioConfig object representing the default speaker.
* Note: this is just a place holder, not implemented now.
* @member AudioConfig.fromDefaultSpeakerOutput
* @function
* @public
* @returns {AudioConfig} The audio output configuration being created.
* Added in version 1.11.0
*/
public static fromDefaultSpeakerOutput(): AudioConfig {
return new AudioOutputConfigImpl(new SpeakerAudioDestination());
}
/**
* Creates an AudioConfig object representing a specified output audio file
* @member AudioConfig.fromAudioFileOutput
* @function
* @public
* @param {PathLike} filename - the filename of the output audio file
* @returns {AudioConfig} The audio output configuration being created.
* Added in version 1.11.0
*/
public static fromAudioFileOutput(filename: PathLike): AudioConfig {
return new AudioOutputConfigImpl(new AudioFileWriter(filename));
}
/**
* Creates an AudioConfig object representing a specified audio output stream
* @member AudioConfig.fromStreamOutput
* @function
* @public
* @param {AudioOutputStream | PushAudioOutputStreamCallback} audioStream - Specifies the custom audio output
* stream.
* @returns {AudioConfig} The audio output configuration being created.
* Added in version 1.11.0
*/
public static fromStreamOutput(audioStream: AudioOutputStream | PushAudioOutputStreamCallback): AudioConfig {
if (audioStream instanceof PushAudioOutputStreamCallback) {
return new AudioOutputConfigImpl(new PushAudioOutputStreamImpl(audioStream as PushAudioOutputStreamCallback));
}
if (audioStream instanceof PushAudioOutputStream) {
return new AudioOutputConfigImpl(audioStream as PushAudioOutputStreamImpl);
}
if (audioStream instanceof PullAudioOutputStream) {
return new AudioOutputConfigImpl(audioStream as PullAudioOutputStreamImpl);
}
throw new Error("Not Supported Type");
}
/**
* Explicitly frees any external resource attached to the object
* @member AudioConfig.prototype.close
@ -226,3 +304,42 @@ export class AudioConfigImpl extends AudioConfig implements IAudioSource {
return this.privSource.deviceInfo;
}
}
// tslint:disable-next-line:max-classes-per-file
export class AudioOutputConfigImpl extends AudioConfig implements IAudioDestination {
private privDestination: IAudioDestination;
/**
* Creates and initializes an instance of this class.
* @constructor
* @param {IAudioDestination} destination - An audio destination.
*/
public constructor(destination: IAudioDestination) {
super();
this.privDestination = destination;
}
public set format(format: AudioStreamFormat) {
this.privDestination.format = format;
}
public write(buffer: ArrayBuffer): void {
this.privDestination.write(buffer);
}
public close(): void {
this.privDestination.close();
}
public id(): string {
return this.privDestination.id();
}
public setProperty(name: string, value: string): void {
throw new Error("This AudioConfig instance does not support setting properties.");
}
public getProperty(name: string, def?: string): string {
throw new Error("This AudioConfig instance does not support getting properties.");
}
}

Просмотреть файл

@ -0,0 +1,60 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import * as fs from "fs";
import { IAudioDestination } from "../../common/Exports";
import { Contracts } from "../Contracts";
import { AudioStreamFormat } from "../Exports";
import { AudioOutputFormatImpl } from "./AudioOutputFormat";
export class AudioFileWriter implements IAudioDestination {
private privAudioFormat: AudioOutputFormatImpl;
private privFd: number;
private privId: string;
private privWriteStream: fs.WriteStream;
public constructor(filename: fs.PathLike) {
this.privFd = fs.openSync(filename, "w");
}
public set format(format: AudioStreamFormat) {
Contracts.throwIfNotUndefined(this.privAudioFormat, "format is already set");
this.privAudioFormat = format as AudioOutputFormatImpl;
let headerOffset: number = 0;
if (this.privAudioFormat.hasHeader) {
headerOffset = this.privAudioFormat.header.byteLength;
}
if (this.privFd !== undefined) {
this.privWriteStream = fs.createWriteStream("", {fd: this.privFd, start: headerOffset, autoClose: false});
}
}
public write(buffer: ArrayBuffer): void {
Contracts.throwIfNullOrUndefined(this.privAudioFormat, "must set format before writing.");
if (this.privWriteStream !== undefined) {
this.privWriteStream.write(new Uint8Array(buffer.slice(0)));
}
}
public close(): void {
if (this.privFd !== undefined) {
this.privWriteStream.on("finish", () => {
if (this.privAudioFormat.hasHeader) {
this.privAudioFormat.updateHeader(this.privWriteStream.bytesWritten);
fs.writeSync(this.privFd,
new Int8Array(this.privAudioFormat.header),
0,
this.privAudioFormat.header.byteLength,
0);
}
fs.closeSync(this.privFd);
this.privFd = undefined;
});
this.privWriteStream.end();
}
}
public id = (): string => {
return this.privId;
}
}

Просмотреть файл

@ -1,7 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { createNoDashGuid } from "../../../src/common/Guid";
import {
connectivity,
ISpeechConfigAudioDevice,
@ -25,6 +24,7 @@ import {
Stream,
StreamReader,
} from "../../common/Exports";
import { createNoDashGuid } from "../../common/Guid";
import { AudioStreamFormat, PullAudioInputStreamCallback } from "../Exports";
import { AudioStreamFormatImpl } from "./AudioStreamFormat";

Просмотреть файл

@ -0,0 +1,339 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { INumberDictionary } from "../../common/Exports";
import { SpeechSynthesisOutputFormat } from "../SpeechSynthesisOutputFormat";
import { AudioStreamFormatImpl } from "./AudioStreamFormat";
export enum AudioFormatTag {
PCM = 1,
MuLaw,
Siren,
MP3,
SILKSkype
}
/**
* @private
* @class AudioOutputFormatImpl
* Added in version 1.11.0
*/
// tslint:disable-next-line:max-classes-per-file
export class AudioOutputFormatImpl extends AudioStreamFormatImpl {
public static SpeechSynthesisOutputFormatToString: INumberDictionary<string> = {
[SpeechSynthesisOutputFormat.Raw8Khz8BitMonoMULaw]: "raw-8khz-8bit-mono-mulaw",
[SpeechSynthesisOutputFormat.Riff16Khz16KbpsMonoSiren]: "riff-16khz-16kbps-mono-siren",
[SpeechSynthesisOutputFormat.Audio16Khz16KbpsMonoSiren]: "audio-16khz-16kbps-mono-siren",
[SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3]: "audio-16khz-32kbitrate-mono-mp3",
[SpeechSynthesisOutputFormat.Audio16Khz128KBitRateMonoMp3]: "audio-16khz-128kbitrate-mono-mp3",
[SpeechSynthesisOutputFormat.Audio16Khz64KBitRateMonoMp3]: "audio-16khz-64kbitrate-mono-mp3",
[SpeechSynthesisOutputFormat.Audio24Khz48KBitRateMonoMp3]: "audio-24khz-48kbitrate-mono-mp3",
[SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3]: "audio-24khz-96kbitrate-mono-mp3",
[SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3]: "audio-24khz-160kbitrate-mono-mp3",
[SpeechSynthesisOutputFormat.Raw16Khz16BitMonoTrueSilk]: "raw-16khz-16bit-mono-truesilk",
[SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm]: "riff-16khz-16bit-mono-pcm",
[SpeechSynthesisOutputFormat.Riff8Khz16BitMonoPcm]: "riff-8khz-16bit-mono-pcm",
[SpeechSynthesisOutputFormat.Riff24Khz16BitMonoPcm]: "riff-24khz-16bit-mono-pcm",
[SpeechSynthesisOutputFormat.Riff8Khz8BitMonoMULaw]: "riff-8khz-8bit-mono-mulaw",
[SpeechSynthesisOutputFormat.Raw16Khz16BitMonoPcm]: "raw-16khz-16bit-mono-pcm",
[SpeechSynthesisOutputFormat.Raw24Khz16BitMonoPcm]: "raw-24khz-16bit-mono-pcm",
[SpeechSynthesisOutputFormat.Raw8Khz16BitMonoPcm]: "raw-8khz-16bit-mono-pcm",
};
private priAudioFormatString: string;
/**
* audio format string for synthesis request, which may differ from priAudioFormatString.
* e.g. for riff format, we will request raw format and add a header in SDK side.
*/
private readonly priRequestAudioFormatString: string;
private readonly priHasHeader: boolean;
/**
* Creates an instance with the given values.
* @constructor
* @param formatTag
* @param {number} samplesPerSec - Samples per second.
* @param {number} bitsPerSample - Bits per sample.
* @param {number} channels - Number of channels.
* @param avgBytesPerSec
* @param blockAlign
* @param audioFormatString
* @param requestAudioFormatString
* @param hasHeader
*/
public constructor(formatTag: AudioFormatTag,
channels: number,
samplesPerSec: number,
avgBytesPerSec: number,
blockAlign: number,
bitsPerSample: number,
audioFormatString: string,
requestAudioFormatString: string,
hasHeader: boolean) {
super(samplesPerSec, bitsPerSample, channels);
this.formatTag = formatTag;
this.avgBytesPerSec = avgBytesPerSec;
this.blockAlign = blockAlign;
this.priAudioFormatString = audioFormatString;
this.priRequestAudioFormatString = requestAudioFormatString;
this.priHasHeader = hasHeader;
}
public static fromSpeechSynthesisOutputFormat(speechSynthesisOutputFormat: SpeechSynthesisOutputFormat): AudioOutputFormatImpl {
return AudioOutputFormatImpl.fromSpeechSynthesisOutputFormatString(
AudioOutputFormatImpl.SpeechSynthesisOutputFormatToString[speechSynthesisOutputFormat]);
}
public static fromSpeechSynthesisOutputFormatString(speechSynthesisOutputFormatString: string): AudioOutputFormatImpl {
switch (speechSynthesisOutputFormatString) {
case "raw-8khz-8bit-mono-mulaw":
return new AudioOutputFormatImpl(
AudioFormatTag.PCM,
1,
8000,
8000,
1,
8,
speechSynthesisOutputFormatString,
speechSynthesisOutputFormatString,
false);
case "riff-16khz-16kbps-mono-siren":
return new AudioOutputFormatImpl(
AudioFormatTag.Siren,
1,
16000,
2000,
40,
0,
speechSynthesisOutputFormatString,
"audio-16khz-16kbps-mono-siren",
true);
case "audio-16khz-16kbps-mono-siren":
return new AudioOutputFormatImpl(
AudioFormatTag.Siren,
1,
16000,
2000,
40,
0,
speechSynthesisOutputFormatString,
speechSynthesisOutputFormatString,
false);
case "audio-16khz-32kbitrate-mono-mp3":
return new AudioOutputFormatImpl(
AudioFormatTag.MP3,
1,
16000,
32 << 7,
2,
16,
speechSynthesisOutputFormatString,
speechSynthesisOutputFormatString,
false);
case "audio-16khz-128kbitrate-mono-mp3":
return new AudioOutputFormatImpl(
AudioFormatTag.MP3,
1,
16000,
128 << 7,
2,
16,
speechSynthesisOutputFormatString,
speechSynthesisOutputFormatString,
false);
case "audio-16khz-64kbitrate-mono-mp3":
return new AudioOutputFormatImpl(
AudioFormatTag.MP3,
1,
16000,
64 << 7,
2,
16,
speechSynthesisOutputFormatString,
speechSynthesisOutputFormatString,
false);
case "audio-24khz-48kbitrate-mono-mp3":
return new AudioOutputFormatImpl(
AudioFormatTag.MP3,
1,
16000,
48 << 7,
2,
16,
speechSynthesisOutputFormatString,
speechSynthesisOutputFormatString,
false);
case "audio-24khz-96kbitrate-mono-mp3":
return new AudioOutputFormatImpl(
AudioFormatTag.MP3,
1,
16000,
96 << 7,
2,
16,
speechSynthesisOutputFormatString,
speechSynthesisOutputFormatString,
false);
case "audio-24khz-160kbitrate-mono-mp3":
return new AudioOutputFormatImpl(
AudioFormatTag.MP3,
1,
16000,
160 << 7,
2,
16,
speechSynthesisOutputFormatString,
speechSynthesisOutputFormatString,
false);
case "raw-16khz-16bit-mono-truesilk":
return new AudioOutputFormatImpl(
AudioFormatTag.SILKSkype,
1,
16000,
32000,
2,
16,
speechSynthesisOutputFormatString,
speechSynthesisOutputFormatString,
false);
case "riff-8khz-16bit-mono-pcm":
return new AudioOutputFormatImpl(
AudioFormatTag.PCM,
1,
8000,
16000,
2,
16,
speechSynthesisOutputFormatString,
"raw-8khz-16bit-mono-pcm",
true);
case "riff-24khz-16bit-mono-pcm":
return new AudioOutputFormatImpl(
AudioFormatTag.PCM,
1,
24000,
48000,
2,
16,
speechSynthesisOutputFormatString,
"raw-24khz-16bit-mono-pcm",
true);
case "riff-8khz-8bit-mono-mulaw":
return new AudioOutputFormatImpl(
AudioFormatTag.MuLaw,
1,
8000,
8000,
1,
8,
speechSynthesisOutputFormatString,
"raw-8khz-8bit-mono-mulaw",
true);
case "raw-16khz-16bit-mono-pcm":
return new AudioOutputFormatImpl(
AudioFormatTag.PCM,
1,
16000,
32000,
2,
16,
speechSynthesisOutputFormatString,
"raw-16khz-16bit-mono-pcm",
false);
case "raw-24khz-16bit-mono-pcm":
return new AudioOutputFormatImpl(
AudioFormatTag.PCM,
1,
24000,
48000,
2,
16,
speechSynthesisOutputFormatString,
"raw-24khz-16bit-mono-pcm",
false);
case "raw-8khz-16bit-mono-pcm":
return new AudioOutputFormatImpl(
AudioFormatTag.PCM,
1,
8000,
16000,
2,
16,
speechSynthesisOutputFormatString,
"raw-8khz-16bit-mono-pcm",
false);
case "riff-16khz-16bit-mono-pcm":
default:
return new AudioOutputFormatImpl(
AudioFormatTag.PCM,
1,
16000,
32000,
2,
16,
"riff-16khz-16bit-mono-pcm",
"raw-16khz-16bit-mono-pcm",
true);
}
}
public static getDefaultOutputFormat(): AudioOutputFormatImpl {
return AudioOutputFormatImpl.fromSpeechSynthesisOutputFormatString("");
}
/**
* The format tag of the audio
* @AudioFormatTag AudioOutputFormatImpl.prototype.formatTag
* @function
* @public
*/
public formatTag: AudioFormatTag;
/**
* Specifies if this audio output format has a header
* @boolean AudioOutputFormatImpl.prototype.hasHeader
* @function
* @public
*/
public get hasHeader(): boolean {
return this.priHasHeader;
}
/**
* Specifies the header of this format
* @ArrayBuffer AudioOutputFormatImpl.prototype.header
* @function
* @public
*/
public get header(): ArrayBuffer {
if (this.hasHeader) {
return this.privHeader;
}
return undefined;
}
/**
* Updates the header based on the audio length
* @member AudioOutputFormatImpl.updateHeader
* @function
* @public
* @param {number} audioLength - the audio length
*/
public updateHeader(audioLength: number): void {
if (this.priHasHeader) {
const view = new DataView(this.privHeader);
view.setUint32(40, audioLength, true);
}
}
/**
* Specifies the audio format string to be sent to the service
* @string AudioOutputFormatImpl.prototype.requestAudioFormatString
* @function
* @public
*/
public get requestAudioFormatString(): string {
return this.priRequestAudioFormatString;
}
}

Просмотреть файл

@ -1,23 +1,26 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { createNoDashGuid } from "../../../src/common/Guid";
import {
ChunkedArrayBufferStream,
createNoDashGuid,
Deferred,
IAudioDestination,
IStreamChunk,
Promise,
PromiseHelper,
Stream,
StreamReader,
} from "../../common/Exports";
import { AudioStreamFormat } from "../Exports";
import { AudioStreamFormatImpl } from "./AudioStreamFormat";
export const bufferSize: number = 4096;
import {Contracts} from "../Contracts";
import {
AudioStreamFormat,
PushAudioOutputStreamCallback
} from "../Exports";
import { AudioOutputFormatImpl } from "./AudioOutputFormat";
/**
* Represents audio input stream used for custom audio input configurations.
* @class AudioInputStream
* Represents audio output stream used for custom audio output configurations.
* @class AudioOutputStream
*/
export abstract class AudioOutputStream {
@ -27,22 +30,27 @@ export abstract class AudioOutputStream {
*/
protected constructor() { }
/**
* Sets the format of the AudioOutputStream
* Note: the format is set by the synthesizer before writing. Do not set it before passing it to AudioConfig
* @member AudioOutputStream.prototype.format
*/
public abstract set format(format: AudioStreamFormat);
/**
* Creates a memory backed PullAudioOutputStream with the specified audio format.
* @member AudioInputStream.createPullStream
* @member AudioOutputStream.createPullStream
* @function
* @public
* @param {AudioStreamFormat} format - The audio data format in which audio will be
* written to the push audio stream's write() method (currently only support 16 kHz 16bit mono PCM).
* @returns {PullAudioOutputStream} The audio input stream being created.
* @returns {PullAudioOutputStream} The audio output stream being created.
*/
public static createPullStream(format?: AudioStreamFormat): PullAudioOutputStream {
return PullAudioOutputStream.create(format);
public static createPullStream(): PullAudioOutputStream {
return PullAudioOutputStream.create();
}
/**
* Explicitly frees any external resource attached to the object
* @member AudioInputStream.prototype.close
* @member AudioOutputStream.prototype.close
* @function
* @public
*/
@ -50,7 +58,7 @@ export abstract class AudioOutputStream {
}
/**
* Represents memory backed push audio input stream used for custom audio input configurations.
* Represents memory backed push audio output stream used for custom audio output configurations.
* @class PullAudioOutputStream
*/
// tslint:disable-next-line:max-classes-per-file
@ -61,12 +69,10 @@ export abstract class PullAudioOutputStream extends AudioOutputStream {
* @member PullAudioOutputStream.create
* @function
* @public
* @param {AudioStreamFormat} format - The audio data format in which audio will be written to the
* push audio stream's write() method (currently only support 16 kHz 16bit mono PCM).
* @returns {PullAudioOutputStream} The push audio input stream being created.
* @returns {PullAudioOutputStream} The push audio output stream being created.
*/
public static create(format?: AudioStreamFormat): PullAudioOutputStream {
return new PullAudioOutputStreamImpl(bufferSize, format);
public static create(): PullAudioOutputStream {
return new PullAudioOutputStreamImpl();
}
/**
@ -74,9 +80,10 @@ export abstract class PullAudioOutputStream extends AudioOutputStream {
* @member PullAudioOutputStream.prototype.read
* @function
* @public
* @returns {Promise<ArrayBuffer>} Audio buffer data.
* @param {ArrayBuffer} dataBuffer - An ArrayBuffer to store the read data.
* @returns {Promise<number>} Audio buffer length has been read.
*/
public abstract read(): Promise<ArrayBuffer>;
public abstract read(dataBuffer: ArrayBuffer): Promise<number>;
/**
* Closes the stream.
@ -88,36 +95,40 @@ export abstract class PullAudioOutputStream extends AudioOutputStream {
}
/**
* Represents memory backed push audio input stream used for custom audio input configurations.
* Represents memory backed push audio output stream used for custom audio output configurations.
* @private
* @class PullAudioOutputStreamImpl
*/
// tslint:disable-next-line:max-classes-per-file
export class PullAudioOutputStreamImpl extends PullAudioOutputStream {
private privFormat: AudioStreamFormatImpl;
export class PullAudioOutputStreamImpl extends PullAudioOutputStream implements IAudioDestination {
private privFormat: AudioOutputFormatImpl;
private privId: string;
private privStream: Stream<ArrayBuffer>;
private streamReader: StreamReader<ArrayBuffer>;
private privLastChunkView: Int8Array;
/**
* Creates and initalizes an instance with the given values.
* Creates and initializes an instance with the given values.
* @constructor
* @param {AudioStreamFormat} format - The audio stream format.
*/
public constructor(chunkSize: number, format?: AudioStreamFormat) {
public constructor() {
super();
if (format === undefined) {
this.privFormat = AudioStreamFormatImpl.getDefaultInputFormat();
} else {
this.privFormat = format as AudioStreamFormatImpl;
}
this.privId = createNoDashGuid();
this.privStream = new ChunkedArrayBufferStream(chunkSize);
this.privStream = new Stream<ArrayBuffer>();
this.streamReader = this.privStream.getReader();
}
/**
* Sets the format information to the stream. For internal use only.
* @param {AudioStreamFormat} format - the format to be set.
*/
public set format(format: AudioStreamFormat) {
if (format === undefined || format === null) {
this.privFormat = AudioOutputFormatImpl.getDefaultOutputFormat();
}
this.privFormat = format as AudioOutputFormatImpl;
}
/**
* Format information for the audio
*/
@ -141,22 +152,61 @@ export class PullAudioOutputStreamImpl extends PullAudioOutputStream {
* @property
* @public
*/
public get id(): string {
public id(): string {
return this.privId;
}
/**
* Reads data from the buffer
* Reads audio data from the internal buffer.
* @member PullAudioOutputStreamImpl.prototype.read
* @function
* @public
* @param {ArrayBuffer} dataBuffer - The audio buffer of which this function will make a copy.
* @param {ArrayBuffer} dataBuffer - An ArrayBuffer to store the read data.
* @returns {Promise<number>} - Audio buffer length has been read.
*/
public read(): Promise<ArrayBuffer> {
return this.streamReader.read()
.onSuccessContinueWithPromise<ArrayBuffer>((chunk: IStreamChunk<ArrayBuffer>) => {
return PromiseHelper.fromResult(chunk.buffer);
});
public read(dataBuffer: ArrayBuffer): Promise<number> {
const intView: Int8Array = new Int8Array(dataBuffer);
let totalBytes: number = 0;
if (this.privLastChunkView !== undefined) {
if (this.privLastChunkView.length > dataBuffer.byteLength) {
intView.set(this.privLastChunkView.slice(0, dataBuffer.byteLength));
this.privLastChunkView = this.privLastChunkView.slice(dataBuffer.byteLength);
return PromiseHelper.fromResult(dataBuffer.byteLength);
}
intView.set(this.privLastChunkView);
totalBytes = this.privLastChunkView.length;
this.privLastChunkView = undefined;
}
const deffer: Deferred<number> = new Deferred<number>();
// Until we have the minimum number of bytes to send in a transmission, keep asking for more.
const readUntilFilled: () => void = (): void => {
if (totalBytes < dataBuffer.byteLength && !this.streamReader.isClosed) {
this.streamReader.read()
.onSuccessContinueWith((chunk: IStreamChunk<ArrayBuffer>) => {
if (chunk !== undefined && !chunk.isEnd) {
let tmpBuffer: ArrayBuffer;
if (chunk.buffer.byteLength > dataBuffer.byteLength - totalBytes) {
tmpBuffer = chunk.buffer.slice(0, dataBuffer.byteLength - totalBytes);
this.privLastChunkView = new Int8Array(chunk.buffer.slice(dataBuffer.byteLength - totalBytes));
} else {
tmpBuffer = chunk.buffer;
}
intView.set(new Int8Array(tmpBuffer), totalBytes);
totalBytes += tmpBuffer.byteLength;
readUntilFilled();
} else {
this.streamReader.close();
deffer.resolve(totalBytes);
}
});
} else {
deffer.resolve(totalBytes);
}
};
readUntilFilled();
return deffer.promise();
}
/**
@ -167,6 +217,7 @@ export class PullAudioOutputStreamImpl extends PullAudioOutputStream {
* @param {ArrayBuffer} dataBuffer - The audio buffer of which this function will make a copy.
*/
public write(dataBuffer: ArrayBuffer): void {
Contracts.throwIfNullOrUndefined(this.privStream, "must set format before writing");
this.privStream.writeStreamChunk({
buffer: dataBuffer,
isEnd: false,
@ -184,3 +235,82 @@ export class PullAudioOutputStreamImpl extends PullAudioOutputStream {
this.privStream.close();
}
}
/*
* Represents audio output stream used for custom audio output configurations.
* @class PushAudioOutputStream
*/
// tslint:disable-next-line:max-classes-per-file
export abstract class PushAudioOutputStream extends AudioOutputStream {
/**
* Creates and initializes and instance.
* @constructor
*/
protected constructor() { super(); }
/**
* Creates a PushAudioOutputStream that delegates to the specified callback interface for
* write() and close() methods.
* @member PushAudioOutputStream.create
* @function
* @public
* @param {PushAudioOutputStreamCallback} callback - The custom audio output object,
* derived from PushAudioOutputStreamCallback
* @returns {PushAudioOutputStream} The push audio output stream being created.
*/
public static create(callback: PushAudioOutputStreamCallback): PushAudioOutputStream {
return new PushAudioOutputStreamImpl(callback);
}
/**
* Explicitly frees any external resource attached to the object
* @member PushAudioOutputStream.prototype.close
* @function
* @public
*/
public abstract close(): void;
}
/**
* Represents audio output stream used for custom audio output configurations.
* @private
* @class PushAudioOutputStreamImpl
*/
// tslint:disable-next-line:max-classes-per-file
export class PushAudioOutputStreamImpl extends PushAudioOutputStream implements IAudioDestination {
private readonly privId: string;
private privCallback: PushAudioOutputStreamCallback;
/**
* Creates a PushAudioOutputStream that delegates to the specified callback interface for
* read() and close() methods.
* @constructor
* @param {PushAudioOutputStreamCallback} callback - The custom audio output object,
* derived from PushAudioOutputStreamCallback
*/
public constructor(callback: PushAudioOutputStreamCallback) {
super();
this.privId = createNoDashGuid();
this.privCallback = callback;
}
// tslint:disable-next-line:no-empty
public set format(format: AudioStreamFormat) {}
public write(buffer: ArrayBuffer): void {
if (!!this.privCallback.write) {
this.privCallback.write(buffer);
}
}
public close(): void {
if (!!this.privCallback.close) {
this.privCallback.close();
}
}
public id(): string {
return this.privId;
}
}

Просмотреть файл

@ -48,7 +48,7 @@ export abstract class AudioStreamFormat {
*/
// tslint:disable-next-line:max-classes-per-file
export class AudioStreamFormatImpl extends AudioStreamFormat {
private privHeader: ArrayBuffer;
protected privHeader: ArrayBuffer;
/**
* Creates an instance with the given values.
@ -168,7 +168,7 @@ export class AudioStreamFormatImpl extends AudioStreamFormat {
return this.privHeader;
}
private setString = (view: DataView, offset: number, str: string): void => {
protected setString = (view: DataView, offset: number, str: string): void => {
for (let i = 0; i < str.length; i++) {
view.setUint8(offset + i, str.charCodeAt(i));
}

Просмотреть файл

@ -0,0 +1,26 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
/**
* An abstract base class that defines callback methods (write() and close()) for
* custom audio output streams).
* @class PushAudioOutputStreamCallback
*/
export abstract class PushAudioOutputStreamCallback {
/**
* Writes audio data into the data buffer.
* @member PushAudioOutputStreamCallback.prototype.write
* @function
* @public
* @param {ArrayBuffer} dataBuffer - The byte array that stores the audio data to write.
*/
public abstract write(dataBuffer: ArrayBuffer): void;
/**
* Closes the audio output stream.
* @member PushAudioOutputStreamCallback.prototype.close
* @function
* @public
*/
public abstract close(): void;
}

Просмотреть файл

@ -2,7 +2,12 @@
// Licensed under the MIT license.
import { CancellationErrorCodePropertyName, EnumTranslation, SimpleSpeechPhrase } from "../common.speech/Exports";
import { CancellationErrorCode, CancellationReason, RecognitionResult } from "./Exports";
import {
CancellationErrorCode,
CancellationReason,
RecognitionResult,
SpeechSynthesisResult
} from "./Exports";
/**
* Contains detailed information about why a result was canceled.
@ -30,14 +35,14 @@ export class CancellationDetails {
* @member CancellationDetails.fromResult
* @function
* @public
* @param {RecognitionResult} result - The result that was canceled.
* @param {RecognitionResult | SpeechSynthesisResult} result - The result that was canceled.
* @returns {CancellationDetails} The cancellation details object being created.
*/
public static fromResult(result: RecognitionResult): CancellationDetails {
public static fromResult(result: RecognitionResult | SpeechSynthesisResult): CancellationDetails {
let reason = CancellationReason.Error;
let errorCode: CancellationErrorCode = CancellationErrorCode.NoError;
if (!!result.json) {
if (result instanceof RecognitionResult && !!result.json) {
const simpleSpeech: SimpleSpeechPhrase = SimpleSpeechPhrase.fromJSON(result.json);
reason = EnumTranslation.implTranslateCancelResult(simpleSpeech.RecognitionStatus);
}

Просмотреть файл

@ -49,4 +49,10 @@ export class Contracts {
// TODO check for file existence.
}
public static throwIfNotUndefined(param: any, name: string): void {
if (param !== undefined) {
throw new Error("throwIfNotUndefined:" + name);
}
}
}

Просмотреть файл

@ -4,9 +4,10 @@
export { AudioConfig } from "./Audio/AudioConfig";
export { AudioStreamFormat } from "./Audio/AudioStreamFormat";
export { AudioInputStream, PullAudioInputStream, PushAudioInputStream } from "./Audio/AudioInputStream";
export { AudioOutputStream, PullAudioOutputStream } from "./Audio/AudioOutputStream";
export { AudioOutputStream, PullAudioOutputStream, PushAudioOutputStream} from "./Audio/AudioOutputStream";
export { CancellationReason } from "./CancellationReason";
export { PullAudioInputStreamCallback } from "./Audio/PullAudioInputStreamCallback";
export { PushAudioOutputStreamCallback } from "./Audio/PushAudioOutputStreamCallback";
export { KeywordRecognitionModel } from "./KeywordRecognitionModel";
export { SessionEventArgs } from "./SessionEventArgs";
export { RecognitionEventArgs } from "./RecognitionEventArgs";
@ -63,3 +64,8 @@ export { Conversation,
ParticipantChangedReason,
User
} from "./Transcription/Exports";
export { SpeechSynthesisOutputFormat } from "./SpeechSynthesisOutputFormat";
export { SpeechSynthesizer } from "./SpeechSynthesizer";
export { SpeechSynthesisResult } from "./SpeechSynthesisResult";
export { SpeechSynthesisEventArgs } from "./SpeechSynthesisEventArgs";
export { SpeechSynthesisWordBoundaryEventArgs} from "./SpeechSynthesisWordBoundaryEventArgs";

Просмотреть файл

@ -144,6 +144,24 @@ export enum PropertyId {
*/
Speech_SessionId,
/**
* The spoken language to be synthesized (e.g. en-US)
* @member PropertyId.SpeechServiceConnection_SynthLanguage
*/
SpeechServiceConnection_SynthLanguage,
/**
* The name of the TTS voice to be used for speech synthesis
* @member PropertyId.SpeechServiceConnection_SynthVoice
*/
SpeechServiceConnection_SynthVoice,
/**
* The string to specify TTS output audio format
* @member PropertyId.SpeechServiceConnection_SynthOutputFormat
*/
SpeechServiceConnection_SynthOutputFormat,
/**
* The requested Cognitive Services Speech Service response output format (simple or detailed). Under normal circumstances, you shouldn't have
* to use this property directly.

Просмотреть файл

@ -70,4 +70,10 @@ export enum ResultReason {
* @member ResultReason.SynthesizingAudioCompleted
*/
SynthesizingAudioCompleted,
/**
* Indicates the speech synthesis is now started
* @member ResultReason.SynthesizingAudioStarted
*/
SynthesizingAudioStarted,
}

Просмотреть файл

@ -6,14 +6,15 @@ import {
OutputFormatPropertyName,
ServicePropertiesPropertyName
} from "../common.speech/Exports";
import { IStringDictionary } from "../common/Exports";
import { Contracts } from "./Contracts";
import {IStringDictionary} from "../common/Exports";
import {Contracts} from "./Contracts";
import {
OutputFormat,
ProfanityOption,
PropertyCollection,
PropertyId,
ServicePropertyChannel
ServicePropertyChannel,
SpeechSynthesisOutputFormat,
} from "./Exports";
/**
@ -212,7 +213,9 @@ export abstract class SpeechConfig {
public abstract getProperty(name: string, def?: string): string;
/**
* Gets output format.
* Gets speech recognition output format (simple or detailed).
* Note: This output format is for speech recognition result, use [SpeechConfig.speechSynthesisOutputFormat] to
* get synthesized audio output format.
* @member SpeechConfig.prototype.outputFormat
* @function
* @public
@ -221,7 +224,9 @@ export abstract class SpeechConfig {
public abstract get outputFormat(): OutputFormat;
/**
* Gets/Sets the output format.
* Gets/Sets speech recognition output format (simple or detailed).
* Note: This output format is for speech recognition result, use [SpeechConfig.speechSynthesisOutputFormat] to
* set synthesized audio output format.
* @member SpeechConfig.prototype.outputFormat
* @function
* @public
@ -259,7 +264,7 @@ export abstract class SpeechConfig {
* @member SpeechConfig.prototype.subscriptionKey
* @function
* @public
* @return {SubscriptionKey} The subscription key set on the config.
* @return {string} The subscription key set on the config.
*/
public abstract get subscriptionKey(): string;
@ -319,6 +324,63 @@ export abstract class SpeechConfig {
* Added in version 1.7.0.
*/
public abstract enableDictation(): void;
/**
* Gets the language of the speech synthesizer.
* @member SpeechConfig.prototype.speechSynthesisLanguage
* @function
* @public
* @returns {string} Returns the speech synthesis language.
* Added in version 1.11.0.
*/
public abstract get speechSynthesisLanguage(): string;
/**
* Sets the language of the speech synthesizer.
* @member SpeechConfig.prototype.speechSynthesisLanguage
* @function
* @public
* Added in version 1.11.0.
*/
public abstract set speechSynthesisLanguage(language: string);
/**
* Gets the voice of the speech synthesizer.
* @member SpeechConfig.prototype.speechSynthesisVoiceName
* @function
* @public
* @returns {string} Returns the speech synthesis voice.
* Added in version 1.11.0.
*/
public abstract get speechSynthesisVoiceName(): string;
/**
* Sets the voice of the speech synthesizer. (see <a href="https://aka.ms/speech/tts-languages">available voices</a>).
* @member SpeechConfig.prototype.speechSynthesisVoiceName
* @function
* @public
* Added in version 1.11.0.
*/
public abstract set speechSynthesisVoiceName(voice: string);
/**
* Gets the speech synthesis output format.
* @member SpeechConfig.prototype.speechSynthesisOutputFormat
* @function
* @public
* @returns {SpeechSynthesisOutputFormat} Returns the speech synthesis output format
* Added in version 1.11.0.
*/
public abstract get speechSynthesisOutputFormat(): SpeechSynthesisOutputFormat;
/**
* Sets the speech synthesis output format (e.g. Riff16Khz16BitMonoPcm).
* @member SpeechConfig.prototype.speechSynthesisOutputFormat
* @function
* @public
* Added in version 1.11.0.
*/
public abstract set speechSynthesisOutputFormat(format: SpeechSynthesisOutputFormat);
}
/**
@ -432,4 +494,28 @@ export class SpeechConfigImpl extends SpeechConfig {
ret.privProperties = this.privProperties.clone();
return ret;
}
public get speechSynthesisLanguage(): string {
return this.privProperties.getProperty(PropertyId.SpeechServiceConnection_SynthLanguage);
}
public set speechSynthesisLanguage(language: string) {
this.privProperties.setProperty(PropertyId.SpeechServiceConnection_SynthLanguage, language);
}
public get speechSynthesisVoiceName(): string {
return this.privProperties.getProperty(PropertyId.SpeechServiceConnection_SynthVoice);
}
public set speechSynthesisVoiceName(voice: string) {
this.privProperties.setProperty(PropertyId.SpeechServiceConnection_SynthVoice, voice);
}
public get speechSynthesisOutputFormat(): SpeechSynthesisOutputFormat {
return (SpeechSynthesisOutputFormat as any)[this.privProperties.getProperty(PropertyId.SpeechServiceConnection_SynthOutputFormat, undefined)];
}
public set speechSynthesisOutputFormat(format: SpeechSynthesisOutputFormat) {
this.privProperties.setProperty(PropertyId.SpeechServiceConnection_SynthOutputFormat, SpeechSynthesisOutputFormat[format]);
}
}

Просмотреть файл

@ -0,0 +1,33 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { SpeechSynthesisResult } from "./Exports";
/**
* Defines contents of speech synthesis events.
* @class SpeechSynthesisEventArgs
* Added in version 1.11.0
*/
export class SpeechSynthesisEventArgs {
private readonly privResult: SpeechSynthesisResult;
/**
* Creates and initializes an instance of this class.
* @constructor
* @param {SpeechSynthesisResult} result - The speech synthesis result.
*/
public constructor(result: SpeechSynthesisResult) {
this.privResult = result;
}
/**
* Specifies the synthesis result.
* @member SpeechSynthesisEventArgs.prototype.result
* @function
* @public
* @returns {SpeechSynthesisResult} the synthesis result.
*/
public get result(): SpeechSynthesisResult {
return this.privResult;
}
}

Просмотреть файл

@ -0,0 +1,111 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
/**
* Define speech synthesis audio output formats.
* @enum SpeechSynthesisOutputFormat
* Added in version 1.11.0
*/
export enum SpeechSynthesisOutputFormat {
/**
* raw-8khz-8bit-mono-mulaw
* @member SpeechSynthesisOutputFormat.Raw8Khz8BitMonoMULaw,
*/
Raw8Khz8BitMonoMULaw,
/**
* riff-16khz-16kbps-mono-siren
* @member SpeechSynthesisOutputFormat.Riff16Khz16KbpsMonoSiren
*/
Riff16Khz16KbpsMonoSiren ,
/**
* audio-16khz-16kbps-mono-siren
* @member SpeechSynthesisOutputFormat.Audio16Khz16KbpsMonoSiren
*/
Audio16Khz16KbpsMonoSiren,
/**
* audio-16khz-32kbitrate-mono-mp3
* @member SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3
*/
Audio16Khz32KBitRateMonoMp3,
/**
* audio-16khz-128kbitrate-mono-mp3
* @member SpeechSynthesisOutputFormat.Audio16Khz128KBitRateMonoMp3
*/
Audio16Khz128KBitRateMonoMp3,
/**
* audio-16khz-64kbitrate-mono-mp3
* @member SpeechSynthesisOutputFormat.Audio16Khz64KBitRateMonoMp3
*/
Audio16Khz64KBitRateMonoMp3,
/**
* audio-24khz-48kbitrate-mono-mp3
* @member SpeechSynthesisOutputFormat.Audio24Khz48KBitRateMonoMp3
*/
Audio24Khz48KBitRateMonoMp3,
/**
* audio-24khz-96kbitrate-mono-mp3
* @member SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3
*/
Audio24Khz96KBitRateMonoMp3,
/**
* audio-24khz-160kbitrate-mono-mp3
* @member SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3
*/
Audio24Khz160KBitRateMonoMp3,
/**
* raw-16khz-16bit-mono-truesilk
* @member SpeechSynthesisOutputFormat.Raw16Khz16BitMonoTrueSilk
*/
Raw16Khz16BitMonoTrueSilk,
/**
* riff-16khz-16bit-mono-pcm
* @member SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm
*/
Riff16Khz16BitMonoPcm,
/**
* riff-8khz-16bit-mono-pcm
* @member SpeechSynthesisOutputFormat.Riff8Khz16BitMonoPcm
*/
Riff8Khz16BitMonoPcm,
/**
* riff-24khz-16bit-mono-pcm
* @member SpeechSynthesisOutputFormat.Riff24Khz16BitMonoPcm
*/
Riff24Khz16BitMonoPcm,
/**
* riff-8khz-8bit-mono-mulaw
* @member SpeechSynthesisOutputFormat.Riff8Khz8BitMonoMULaw
*/
Riff8Khz8BitMonoMULaw,
/**
* raw-16khz-16bit-mono-pcm
* @member SpeechSynthesisOutputFormat.Raw16Khz16BitMonoPcm
*/
Raw16Khz16BitMonoPcm,
/**
* raw-24khz-16bit-mono-pcm
* @member SpeechSynthesisOutputFormat.Raw24Khz16BitMonoPcm
*/
Raw24Khz16BitMonoPcm,
/**
* raw-8khz-16bit-mono-pcm
* @member SpeechSynthesisOutputFormat.Raw8Khz16BitMonoPcm
*/
Raw8Khz16BitMonoPcm,
}

Просмотреть файл

@ -0,0 +1,92 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { PropertyCollection, ResultReason } from "./Exports";
/**
* Defines result of speech synthesis.
* @class SpeechSynthesisResult
* Added in version 1.11.0
*/
export class SpeechSynthesisResult {
private privResultId: string;
private privReason: ResultReason;
private privText: string;
private privAudioData: ArrayBuffer;
private privOffset: number;
private privErrorDetails: string;
private privProperties: PropertyCollection;
/**
* Creates and initializes an instance of this class.
* @constructor
* @param {string} resultId - The result id.
* @param {ResultReason} reason - The reason.
* @param {number} audioData - The offset into the stream.
* @param {string} errorDetails - Error details, if provided.
* @param {PropertyCollection} properties - Additional properties, if provided.
*/
constructor(resultId?: string, reason?: ResultReason, audioData?: ArrayBuffer,
errorDetails?: string, properties?: PropertyCollection) {
this.privResultId = resultId;
this.privReason = reason;
this.privAudioData = audioData;
this.privErrorDetails = errorDetails;
this.privProperties = properties;
}
/**
* Specifies the result identifier.
* @member SpeechSynthesisResult.prototype.resultId
* @function
* @public
* @returns {string} Specifies the result identifier.
*/
public get resultId(): string {
return this.privResultId;
}
/**
* Specifies status of the result.
* @member SpeechSynthesisResult.prototype.reason
* @function
* @public
* @returns {ResultReason} Specifies status of the result.
*/
public get reason(): ResultReason {
return this.privReason;
}
/**
* The synthesized audio data
* @member SpeechSynthesisResult.prototype.audioData
* @function
* @public
* @returns {ArrayBuffer} The synthesized audio data.
*/
public get audioData(): ArrayBuffer {
return this.privAudioData;
}
/**
* In case of an unsuccessful synthesis, provides details of the occurred error.
* @member SpeechSynthesisResult.prototype.errorDetails
* @function
* @public
* @returns {string} a brief description of an error.
*/
public get errorDetails(): string {
return this.privErrorDetails;
}
/**
* The set of properties exposed in the result.
* @member SpeechSynthesisResult.prototype.properties
* @function
* @public
* @returns {PropertyCollection} The set of properties exposed in the result.
*/
public get properties(): PropertyCollection {
return this.privProperties;
}
}

Просмотреть файл

@ -0,0 +1,73 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
/**
* Defines contents of speech synthesis word boundary event.
* @class SpeechSynthesisWordBoundaryEventArgs
* Added in version 1.11.0
*/
export class SpeechSynthesisWordBoundaryEventArgs {
private privAduioOffset: number;
private privText: string;
private privWordLength: number;
private privTextOffset: number;
/**
* Creates and initializes an instance of this class.
* @constructor
* @param {number} audioOffset - The audio offset.
* @param {string} text - The text.
* @param {number} wordLength - The length of the word.
* @param {number} textOffset - The text offset.
*/
public constructor(audioOffset: number, text: string, wordLength: number, textOffset: number) {
this.privAduioOffset = audioOffset;
this.privText = text;
this.privWordLength = wordLength;
this.privTextOffset = textOffset;
}
/**
* Specifies the audio offset.
* @member SpeechSynthesisWordBoundaryEventArgs.prototype.audioOffset
* @function
* @public
* @returns {number} the audio offset.
*/
public get audioOffset(): number {
return this.privAduioOffset;
}
/**
* Specifies the text of the word boundary event.
* @member SpeechSynthesisWordBoundaryEventArgs.prototype.text
* @function
* @public
* @returns {string} the text.
*/
public get text(): string {
return this.privText;
}
/**
* Specifies the word length
* @member SpeechSynthesisWordBoundaryEventArgs.prototype.wordLength
* @function
* @public
* @returns {number} the word length
*/
public get wordLength(): number {
return this.privWordLength;
}
/**
* Specifies the text offset.
* @member SpeechSynthesisWordBoundaryEventArgs.prototype.textOffset
* @function
* @public
* @returns {number} the text offset.
*/
public get textOffset(): number {
return this.privTextOffset;
}
}

Просмотреть файл

@ -0,0 +1,435 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import { PathLike } from "fs";
import {
CognitiveSubscriptionKeyAuthentication,
CognitiveTokenAuthentication,
Context,
IAuthentication,
ISynthesisConnectionFactory,
OS,
SpeechServiceConfig,
SpeechSynthesisConnectionFactory,
SynthesisAdapterBase,
SynthesizerConfig,
} from "../common.speech/Exports";
import {
createNoDashGuid,
IAudioDestination, IStringDictionary,
Promise,
PromiseHelper,
Queue
} from "../common/Exports";
import { AudioOutputConfigImpl } from "./Audio/AudioConfig";
import { AudioFileWriter } from "./Audio/AudioFileWriter";
import {AudioOutputFormatImpl} from "./Audio/AudioOutputFormat";
import { PullAudioOutputStreamImpl, PushAudioOutputStreamImpl } from "./Audio/AudioOutputStream";
import { Contracts } from "./Contracts";
import {
AudioConfig,
AudioOutputStream,
PropertyCollection,
PropertyId,
PullAudioOutputStream,
PushAudioOutputStreamCallback,
SpeechSynthesisEventArgs,
SpeechSynthesisOutputFormat,
SpeechSynthesisResult,
SpeechSynthesisWordBoundaryEventArgs,
} from "./Exports";
import { SpeechConfig, SpeechConfigImpl } from "./SpeechConfig";
/**
* Defines the class SpeechSynthesizer for text to speech.
* Added in version 1.11.0
* @class SpeechSynthesizer
*/
export class SpeechSynthesizer {
protected audioConfig: AudioConfig;
protected privAdapter: SynthesisAdapterBase;
protected privProperties: PropertyCollection;
protected synthesisRequestQueue: Queue<SynthesisRequest>;
/**
* Defines event handler for synthesis start events.
* @member SpeechSynthesizer.prototype.synthesisStarted
* @function
* @public
*/
public synthesisStarted: (sender: SpeechSynthesizer, event: SpeechSynthesisEventArgs) => void;
/**
* Defines event handler for synthesizing events.
* @member SpeechSynthesizer.prototype.synthesizing
* @function
* @public
*/
public synthesizing: (sender: SpeechSynthesizer, event: SpeechSynthesisEventArgs) => void;
/**
* Defines event handler for synthesis completed events.
* @member SpeechSynthesizer.prototype.synthesisCompleted
* @function
* @public
*/
public synthesisCompleted: (sender: SpeechSynthesizer, event: SpeechSynthesisEventArgs) => void;
/**
* Defines event handler for synthesis cancelled events.
* @member SpeechSynthesizer.prototype.SynthesisCanceled
* @function
* @public
*/
public SynthesisCanceled: (sender: SpeechSynthesizer, event: SpeechSynthesisEventArgs) => void;
/**
* Defines event handler for word boundary events
* @member SpeechSynthesizer.prototype.wordBoundary
* @function
* @public
*/
public wordBoundary: (sender: SpeechSynthesizer, event: SpeechSynthesisWordBoundaryEventArgs) => void;
/**
* Gets the authorization token used to communicate with the service.
* @member SpeechSynthesizer.prototype.authorizationToken
* @function
* @public
* @returns {string} Authorization token.
*/
public get authorizationToken(): string {
return this.properties.getProperty(PropertyId.SpeechServiceAuthorization_Token);
}
/**
* Gets/Sets the authorization token used to communicate with the service.
* @member SpeechSynthesizer.prototype.authorizationToken
* @function
* @public
* @param {string} token - Authorization token.
*/
public set authorizationToken(token: string) {
Contracts.throwIfNullOrWhitespace(token, "token");
this.properties.setProperty(PropertyId.SpeechServiceAuthorization_Token, token);
}
/**
* The collection of properties and their values defined for this SpeechSynthesizer.
* @member SpeechSynthesizer.prototype.properties
* @function
* @public
* @returns {PropertyCollection} The collection of properties and their values defined for this SpeechSynthesizer.
*/
public get properties(): PropertyCollection {
return this.privProperties;
}
private privDisposed: boolean;
private privConnectionFactory: ISynthesisConnectionFactory;
private privSynthesizing: boolean;
/**
* SpeechSynthesizer constructor.
* @constructor
* @param {SpeechConfig} speechConfig - An set of initial properties for this synthesizer
* @param {AudioConfig} audioConfig - An optional audio configuration associated with the synthesizer
*/
public constructor(speechConfig: SpeechConfig, audioConfig?: AudioConfig) {
const speechConfigImpl: SpeechConfigImpl = speechConfig as SpeechConfigImpl;
Contracts.throwIfNull(speechConfigImpl, "speechConfig");
if (audioConfig !== null) {
this.audioConfig = (audioConfig !== undefined) ? audioConfig : AudioConfig.fromDefaultSpeakerOutput();
}
this.privProperties = speechConfigImpl.properties.clone();
this.privDisposed = false;
this.privSynthesizing = false;
this.privConnectionFactory = new SpeechSynthesisConnectionFactory();
this.synthesisRequestQueue = new Queue<SynthesisRequest>();
this.implCommonRSynthesizeSetup();
}
public static buildSsml(text: string, properties: PropertyCollection): string {
const languageToDefaultVoice: IStringDictionary<string> = {
["ar-EG"]: "Microsoft Server Speech Text to Speech Voice (ar-EG, Hoda)",
["ar-SA"]: "Microsoft Server Speech Text to Speech Voice (ar-SA, Naayf)",
["bg-BG"]: "Microsoft Server Speech Text to Speech Voice (bg-BG, Ivan)",
["ca-ES"]: "Microsoft Server Speech Text to Speech Voice (ca-ES, HerenaRUS)",
["cs-CZ"]: "Microsoft Server Speech Text to Speech Voice (cs-CZ, Jakub)",
["da-DK"]: "Microsoft Server Speech Text to Speech Voice (da-DK, HelleRUS)",
["de-AT"]: "Microsoft Server Speech Text to Speech Voice (de-AT, Michael)",
["de-CH"]: "Microsoft Server Speech Text to Speech Voice (de-CH, Karsten)",
["de-DE"]: "Microsoft Server Speech Text to Speech Voice (de-DE, HeddaRUS)",
["el-GR"]: "Microsoft Server Speech Text to Speech Voice (el-GR, Stefanos)",
["en-AU"]: "Microsoft Server Speech Text to Speech Voice (en-AU, HayleyRUS)",
["en-CA"]: "Microsoft Server Speech Text to Speech Voice (en-CA, HeatherRUS)",
["en-GB"]: "Microsoft Server Speech Text to Speech Voice (en-GB, HazelRUS)",
["en-IE"]: "Microsoft Server Speech Text to Speech Voice (en-IE, Sean)",
["en-IN"]: "Microsoft Server Speech Text to Speech Voice (en-IN, PriyaRUS)",
["en-US"]: "Microsoft Server Speech Text to Speech Voice (en-US, JessaRUS)",
["es-ES"]: "Microsoft Server Speech Text to Speech Voice (es-ES, HelenaRUS)",
["es-MX"]: "Microsoft Server Speech Text to Speech Voice (es-MX, HildaRUS)",
["fi-FI"]: "Microsoft Server Speech Text to Speech Voice (fi-FI, HeidiRUS)",
["fr-CA"]: "Microsoft Server Speech Text to Speech Voice (fr-CA, HarmonieRUS)",
["fr-CH"]: "Microsoft Server Speech Text to Speech Voice (fr-CH, Guillaume)",
["fr-FR"]: "Microsoft Server Speech Text to Speech Voice (fr-FR, HortenseRUS)",
["he-IL"]: "Microsoft Server Speech Text to Speech Voice (he-IL, Asaf)",
["hi-IN"]: "Microsoft Server Speech Text to Speech Voice (hi-IN, Kalpana)",
["hr-HR"]: "Microsoft Server Speech Text to Speech Voice (hr-HR, Matej)",
["hu-HU"]: "Microsoft Server Speech Text to Speech Voice (hu-HU, Szabolcs)",
["id-ID"]: "Microsoft Server Speech Text to Speech Voice (id-ID, Andika)",
["it-IT"]: "Microsoft Server Speech Text to Speech Voice (it-IT, LuciaRUS)",
["ja-JP"]: "Microsoft Server Speech Text to Speech Voice (ja-JP, HarukaRUS)",
["ko-KR"]: "Microsoft Server Speech Text to Speech Voice (ko-KR, HeamiRUS)",
["ms-MY"]: "Microsoft Server Speech Text to Speech Voice (ms-MY, Rizwan)",
["nb-NO"]: "Microsoft Server Speech Text to Speech Voice (nb-NO, HuldaRUS)",
["nl-NL"]: "Microsoft Server Speech Text to Speech Voice (nl-NL, HannaRUS)",
["pl-PL"]: "Microsoft Server Speech Text to Speech Voice (pl-PL, PaulinaRUS)",
["pt-BR"]: "Microsoft Server Speech Text to Speech Voice (pt-BR, HeloisaRUS)",
["pt-PT"]: "Microsoft Server Speech Text to Speech Voice (pt-PT, HeliaRUS)",
["ro-RO"]: "Microsoft Server Speech Text to Speech Voice (ro-RO, Andrei)",
["ru-RU"]: "Microsoft Server Speech Text to Speech Voice (ru-RU, EkaterinaRUS)",
["sk-SK"]: "Microsoft Server Speech Text to Speech Voice (sk-SK, Filip)",
["sl-SI"]: "Microsoft Server Speech Text to Speech Voice (sl-SI, Lado)",
["sv-SE"]: "Microsoft Server Speech Text to Speech Voice (sv-SE, HedvigRUS)",
["ta-IN"]: "Microsoft Server Speech Text to Speech Voice (ta-IN, Valluvar)",
["te-IN"]: "Microsoft Server Speech Text to Speech Voice (te-IN, Chitra)",
["th-TH"]: "Microsoft Server Speech Text to Speech Voice (th-TH, Pattara)",
["tr-TR"]: "Microsoft Server Speech Text to Speech Voice (tr-TR, SedaRUS)",
["vi-VN"]: "Microsoft Server Speech Text to Speech Voice (vi-VN, An)",
["zh-CN"]: "Microsoft Server Speech Text to Speech Voice (zh-CN, HuihuiRUS)",
["zh-HK"]: "Microsoft Server Speech Text to Speech Voice (zh-HK, TracyRUS)",
["zh-TW"]: "Microsoft Server Speech Text to Speech Voice (zh-TW, HanHanRUS)",
};
const language = properties.getProperty(PropertyId.SpeechServiceConnection_SynthLanguage, "en-US");
const voice = properties.getProperty(PropertyId.SpeechServiceConnection_SynthVoice, languageToDefaultVoice[language]);
return `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='http://www.w3.org/2001/mstts' xmlns:emo='http://www.w3.org/2009/10/emotionml' xml:lang='${language}'><voice name='${voice}'>${this.XMLEncode(text)}</voice></speak>`;
}
/**
* Executes speech synthesis on plain text.
* The task returns the synthesis result.
* @member SpeechSynthesizer.prototype.speakTextAsync
* @function
* @public
* @param text - Text to be synthesized.
* @param cb - Callback that received the SpeechSynthesisResult.
* @param err - Callback invoked in case of an error.
* @param stream - AudioOutputStream to receive the synthesized audio.
*/
public speakTextAsync(text: string, cb?: (e: SpeechSynthesisResult) => void, err?: (e: string) => void, stream?: AudioOutputStream | PushAudioOutputStreamCallback | PathLike): void {
this.speakImpl(text, false, cb, err, stream);
}
/**
* Executes speech synthesis on SSML.
* The task returns the synthesis result.
* @member SpeechSynthesizer.prototype.speakSsmlAsync
* @function
* @public
* @param ssml - SSML to be synthesized.
* @param cb - Callback that received the SpeechSynthesisResult.
* @param err - Callback invoked in case of an error.
* @param stream - AudioOutputStream to receive the synthesized audio.
*/
public speakSsmlAsync(ssml: string, cb?: (e: SpeechSynthesisResult) => void, err?: (e: string) => void, stream?: AudioOutputStream | PushAudioOutputStreamCallback | PathLike): void {
this.speakImpl(ssml, true, cb, err, stream);
}
/**
* Dispose of associated resources.
* @member SpeechSynthesizer.prototype.close
* @function
* @public
*/
public close(): void {
Contracts.throwIfDisposed(this.privDisposed);
this.dispose(true);
}
/**
* @Internal
* Do not use externally, object returned will change without warning or notice.
*/
public get internalData(): object {
return this.privAdapter;
}
/**
* This method performs cleanup of resources.
* The Boolean parameter disposing indicates whether the method is called
* from Dispose (if disposing is true) or from the finalizer (if disposing is false).
* Derived classes should override this method to dispose resource if needed.
* @member SpeechSynthesizer.prototype.dispose
* @function
* @public
* @param {boolean} disposing - Flag to request disposal.
*/
protected dispose(disposing: boolean): void {
if (this.privDisposed) {
return;
}
if (disposing) {
if (this.privAdapter) {
this.privAdapter.dispose();
}
}
this.privDisposed = true;
}
//
// ################################################################################################################
// IMPLEMENTATION.
// Move to independent class
// ################################################################################################################
//
protected createSynthesizerConfig(speechConfig: SpeechServiceConfig): SynthesizerConfig {
return new SynthesizerConfig(
speechConfig,
this.privProperties);
}
// Creates the synthesis adapter
protected createSynthesisAdapter(
authentication: IAuthentication,
connectionFactory: ISynthesisConnectionFactory,
audioConfig: AudioConfig,
synthesizerConfig: SynthesizerConfig): SynthesisAdapterBase {
return new SynthesisAdapterBase(authentication, connectionFactory,
synthesizerConfig, this, this.audioConfig as AudioOutputConfigImpl);
}
protected implCommonRSynthesizeSetup(): void {
let osPlatform = (typeof window !== "undefined") ? "Browser" : "Node";
let osName = "unknown";
let osVersion = "unknown";
if (typeof navigator !== "undefined") {
osPlatform = osPlatform + "/" + navigator.platform;
osName = navigator.userAgent;
osVersion = navigator.appVersion;
}
const synthesizerConfig = this.createSynthesizerConfig(
new SpeechServiceConfig(
new Context(new OS(osPlatform, osName, osVersion))));
const subscriptionKey = this.privProperties.getProperty(PropertyId.SpeechServiceConnection_Key, undefined);
const authentication = (subscriptionKey && subscriptionKey !== "") ?
new CognitiveSubscriptionKeyAuthentication(subscriptionKey) :
new CognitiveTokenAuthentication(
(authFetchEventId: string): Promise<string> => {
const authorizationToken = this.privProperties.getProperty(PropertyId.SpeechServiceAuthorization_Token, undefined);
return PromiseHelper.fromResult(authorizationToken);
},
(authFetchEventId: string): Promise<string> => {
const authorizationToken = this.privProperties.getProperty(PropertyId.SpeechServiceAuthorization_Token, undefined);
return PromiseHelper.fromResult(authorizationToken);
});
this.privAdapter = this.createSynthesisAdapter(
authentication,
this.privConnectionFactory,
this.audioConfig,
synthesizerConfig);
this.privAdapter.audioOutputFormat = AudioOutputFormatImpl.fromSpeechSynthesisOutputFormat(
(SpeechSynthesisOutputFormat as any)[this.properties.getProperty(PropertyId.SpeechServiceConnection_SynthOutputFormat, undefined)]
);
}
protected speakImpl(text: string, IsSsml: boolean, cb?: (e: SpeechSynthesisResult) => void, err?: (e: string) => void, dataStream?: AudioOutputStream | PushAudioOutputStreamCallback | PathLike): void {
try {
Contracts.throwIfDisposed(this.privDisposed);
const requestId = createNoDashGuid();
let audioDestination;
if (dataStream instanceof PushAudioOutputStreamCallback) {
audioDestination = new PushAudioOutputStreamImpl(dataStream);
} else if (dataStream instanceof PullAudioOutputStream) {
audioDestination = dataStream as PullAudioOutputStreamImpl;
} else if (dataStream !== undefined) {
audioDestination = new AudioFileWriter(dataStream as PathLike);
} else {
audioDestination = undefined;
}
this.synthesisRequestQueue.enqueue(new SynthesisRequest(requestId, text, IsSsml, (e: SpeechSynthesisResult): void => {
this.privSynthesizing = false;
if (!!cb) {
try {
cb(e);
} catch (e) {
if (!!err) {
err(e);
}
}
}
cb = undefined;
this.adapterSpeak();
}, (e: string): void => {
if (!!err) {
err(e);
}
}, audioDestination));
this.adapterSpeak();
} catch (error) {
if (!!err) {
if (error instanceof Error) {
const typedError: Error = error as Error;
err(typedError.name + ": " + typedError.message);
} else {
err(error);
}
}
// Destroy the synthesizer.
this.dispose(true);
}
}
protected adapterSpeak(): Promise<boolean> {
if (!this.privDisposed && !this.privSynthesizing) {
this.privSynthesizing = true;
return this.synthesisRequestQueue.dequeue().
onSuccessContinueWithPromise((request: SynthesisRequest): Promise<boolean> => {
return this.privAdapter.Speak(request.text, request.isSSML, request.requestId, request.cb, request.err, request.dataStream);
});
}
return PromiseHelper.fromResult(true);
}
private static XMLEncode(text: string): string {
return text.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&apos;");
}
}
// tslint:disable-next-line:max-classes-per-file
export class SynthesisRequest {
public requestId: string;
public text: string;
public isSSML: boolean;
public cb: (e: SpeechSynthesisResult) => void;
public err: (e: string) => void;
public dataStream: IAudioDestination;
constructor(requestId: string, text: string, isSSML: boolean, cb?: (e: SpeechSynthesisResult) => void, err?: (e: string) => void, dataStream?: IAudioDestination) {
this.requestId = requestId;
this.text = text;
this.isSSML = isSSML;
this.cb = cb;
this.err = err;
this.dataStream = dataStream;
}
}

Просмотреть файл

@ -14,7 +14,7 @@ import {
PropertyCollection,
PropertyId,
ServicePropertyChannel,
SpeechConfig,
SpeechConfig, SpeechSynthesisOutputFormat,
} from "./Exports";
/**
@ -439,4 +439,27 @@ export class SpeechTranslationConfigImpl extends SpeechTranslationConfig {
this.privSpeechProperties.setProperty(ForceDictationPropertyName, "true");
}
public get speechSynthesisLanguage(): string {
return this.privSpeechProperties.getProperty(PropertyId.SpeechServiceConnection_SynthLanguage);
}
public set speechSynthesisLanguage(language: string) {
this.privSpeechProperties.setProperty(PropertyId.SpeechServiceConnection_SynthLanguage, language);
}
public get speechSynthesisVoiceName(): string {
return this.privSpeechProperties.getProperty(PropertyId.SpeechServiceConnection_SynthVoice);
}
public set speechSynthesisVoiceName(voice: string) {
this.privSpeechProperties.setProperty(PropertyId.SpeechServiceConnection_SynthVoice, voice);
}
public get speechSynthesisOutputFormat(): SpeechSynthesisOutputFormat {
return (SpeechSynthesisOutputFormat as any)[this.privSpeechProperties.getProperty(PropertyId.SpeechServiceConnection_SynthOutputFormat, undefined)];
}
public set speechSynthesisOutputFormat(format: SpeechSynthesisOutputFormat) {
this.privSpeechProperties.setProperty(PropertyId.SpeechServiceConnection_SynthOutputFormat, SpeechSynthesisOutputFormat[format]);
}
}

Просмотреть файл

@ -1,16 +1,12 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import {
IAudioStreamNode,
IStreamChunk,
} from "../src/common/Exports";
import {
bufferSize,
PullAudioOutputStreamImpl,
} from "../src/sdk/Audio/AudioOutputStream";
import { AudioOutputFormatImpl } from "../src/sdk/Audio/AudioOutputFormat";
import { PullAudioOutputStream, PullAudioOutputStreamImpl } from "../src/sdk/Audio/AudioOutputStream";
import { Settings } from "./Settings";
let objsToClose: any[];
beforeAll(() => {
// Override inputs, if necessary
Settings.LoadSettings();
@ -18,12 +14,50 @@ beforeAll(() => {
// Test cases are run linerally, the only other mechanism to demark them in the output is to put a console line in each case and
// report the name.
// tslint:disable-next-line:no-console
beforeEach(() => console.info("---------------------------------------Starting test case-----------------------------------"));
beforeEach(() => {
// tslint:disable-next-line:no-console
console.info("---------------------------------------Starting test case-----------------------------------");
objsToClose = [];
const used = process.memoryUsage().heapUsed / 1024 / 1024;
// tslint:disable-next-line:no-console
console.log(`Heap memory usage before test: ${Math.round(used * 100) / 100} MB`);
});
afterEach(() => {
// tslint:disable-next-line:no-console
console.info("End Time: " + new Date(Date.now()).toLocaleString());
objsToClose.forEach((value: any, index: number, array: any[]) => {
if (typeof value.close === "function") {
value.close();
}
});
const used = process.memoryUsage().heapUsed / 1024 / 1024;
// tslint:disable-next-line:no-console
console.log(`Heap memory usage after test: ${Math.round(used * 100) / 100} MB`);
});
const ReadPullAudioOutputStream: (stream: PullAudioOutputStream, length?: number, done?: () => void) => void =
(stream: PullAudioOutputStream, length?: number, done?: () => void): void => {
const audioBuffer = new ArrayBuffer(1024);
stream.read(audioBuffer).onSuccessContinueWith((bytesRead: number) => {
if (bytesRead > 0) {
ReadPullAudioOutputStream(stream, length === undefined ? undefined : length - bytesRead, done);
} else {
if (length !== undefined) {
expect(length).toEqual(0);
}
if (!!done) {
done();
}
}
});
};
test("PullAudioOutputStreamImpl basic test", (done: jest.DoneCallback) => {
const size: number = 256;
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl(size);
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl();
objsToClose.push(ps);
ps.format = AudioOutputFormatImpl.getDefaultOutputFormat();
const ab: ArrayBuffer = new ArrayBuffer(size);
const abView: Uint8Array = new Uint8Array(ab);
@ -33,12 +67,13 @@ test("PullAudioOutputStreamImpl basic test", (done: jest.DoneCallback) => {
ps.write(abView);
let bytesRead: number = 0;
ps.read().onSuccessContinueWith((audioBuffer: ArrayBuffer) => {
const audioBuffer = new ArrayBuffer(size);
ps.read(audioBuffer).onSuccessContinueWith((readSize: number) => {
try {
expect(audioBuffer.byteLength).toBeGreaterThanOrEqual(size);
expect(audioBuffer.byteLength).toBeLessThanOrEqual(size);
expect(readSize).toEqual(size);
const readView: Uint8Array = new Uint8Array(audioBuffer);
for (let i: number = 0; i < audioBuffer.byteLength; i++) {
for (let i: number = 0; i < readSize; i++) {
expect(readView[i]).toEqual(bytesRead++ % 256);
}
} catch (error) {
@ -49,8 +84,11 @@ test("PullAudioOutputStreamImpl basic test", (done: jest.DoneCallback) => {
});
test("PullAudioOutputStreamImpl multiple writes read after close", (done: jest.DoneCallback) => {
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl(bufferSize);
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl();
const format = AudioOutputFormatImpl.getDefaultOutputFormat();
ps.format = format;
const bufferSize = format.avgBytesPerSec / 10;
const ab: ArrayBuffer = new ArrayBuffer(bufferSize * 4);
const abView: Uint8Array = new Uint8Array(ab);
for (let i: number = 0; i < bufferSize * 4; i++) {
@ -63,27 +101,25 @@ test("PullAudioOutputStreamImpl multiple writes read after close", (done: jest.D
}
ps.close();
let bytesRead: number = 0;
let bytesReadTotal: number = 0;
const audioBuffer = new ArrayBuffer(bufferSize);
const readLoop = () => {
ps.read().onSuccessContinueWith((audioBuffer: ArrayBuffer) => {
ps.read(audioBuffer).onSuccessContinueWith((bytesRead: number) => {
try {
if (audioBuffer == null) {
expect(bytesRead).toBeGreaterThanOrEqual(bufferSize * 4);
expect(bytesRead).toBeLessThanOrEqual(bufferSize * 4);
if (bytesRead === 0) {
expect(bytesReadTotal).toEqual(bufferSize * 4);
} else {
expect(audioBuffer.byteLength).toBeGreaterThanOrEqual(bufferSize);
expect(audioBuffer.byteLength).toBeLessThanOrEqual(bufferSize);
const readView: Uint8Array = new Uint8Array(audioBuffer);
for (let i: number = 0; i < audioBuffer.byteLength; i++) {
expect(readView[i]).toEqual(bytesRead++ % 256);
for (let i: number = 0; i < bytesRead; i++) {
expect(readView[i]).toEqual(bytesReadTotal++ % 256);
}
}
} catch (error) {
done.fail(error);
}
if (audioBuffer != null) {
if (bytesRead > 0) {
readLoop();
} else {
done();
@ -95,7 +131,12 @@ test("PullAudioOutputStreamImpl multiple writes read after close", (done: jest.D
});
test("PullAudioOutputStreamImpl multiple writes and reads", (done: jest.DoneCallback) => {
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl(bufferSize);
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl();
objsToClose.push(ps);
const format = AudioOutputFormatImpl.getDefaultOutputFormat();
ps.format = format;
const bufferSize = format.avgBytesPerSec / 10;
const ab: ArrayBuffer = new ArrayBuffer(bufferSize * 4);
const abView: Uint8Array = new Uint8Array(ab);
@ -109,22 +150,22 @@ test("PullAudioOutputStreamImpl multiple writes and reads", (done: jest.DoneCall
}
ps.write(ab.slice(j));
let bytesRead: number = 0;
let bytesReadTotal: number = 0;
const audioBuffer = new ArrayBuffer(bufferSize);
const readLoop = () => {
ps.read().onSuccessContinueWith((audioBuffer: ArrayBuffer) => {
ps.read(audioBuffer).onSuccessContinueWith((bytesRead: number) => {
try {
expect(audioBuffer.byteLength).toBeGreaterThanOrEqual(bufferSize);
expect(audioBuffer.byteLength).toBeLessThanOrEqual(bufferSize);
expect(bytesRead).toBeLessThanOrEqual(bufferSize);
const readView: Uint8Array = new Uint8Array(audioBuffer);
for (let i: number = 0; i < audioBuffer.byteLength; i++) {
expect(readView[i]).toEqual(bytesRead++ % 256);
for (let i: number = 0; i < bytesRead; i++) {
expect(readView[i]).toEqual(bytesReadTotal++ % 256);
}
} catch (error) {
done.fail(error);
}
if (bytesRead < bufferSize * 4) {
if (bytesReadTotal < bufferSize * 4) {
readLoop();
} else {
done();
@ -134,3 +175,62 @@ test("PullAudioOutputStreamImpl multiple writes and reads", (done: jest.DoneCall
readLoop();
});
test("PullAudioOutputStreamImpl reads before writing", (done: jest.DoneCallback) => {
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl();
objsToClose.push(ps);
const format = AudioOutputFormatImpl.getDefaultOutputFormat();
ps.format = format;
const bufferSize = format.avgBytesPerSec / 10;
setTimeout(() => {
setTimeout(() => {
ReadPullAudioOutputStream(ps, bufferSize * 4, done);
}, 0);
});
const ab: ArrayBuffer = new ArrayBuffer(bufferSize * 4);
const abView: Uint8Array = new Uint8Array(ab);
for (let i: number = 0; i < bufferSize * 4; i++) {
abView[i] = i % 256;
}
let j: number = 0;
for (j = 0; j < bufferSize * 4; j += 100) {
ps.write(ab.slice(j, j + 100));
}
ps.write(ab.slice(j));
ps.close();
});
test("PullAudioOutputStreamImpl read all audio data in single read", (done: jest.DoneCallback) => {
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl();
const format = AudioOutputFormatImpl.getDefaultOutputFormat();
ps.format = format;
const bufferSize = format.avgBytesPerSec / 10;
const ab: ArrayBuffer = new ArrayBuffer(bufferSize * 4);
const abView: Uint8Array = new Uint8Array(ab);
for (let k: number = 0; k < 1500; k ++) { // 10 minutes data
for (let i: number = 0; i < bufferSize * 4; i++) {
abView[i] = (i + k * bufferSize * 4) % 256;
}
ps.write(ab);
}
ps.close();
const audioBuffer = new ArrayBuffer(bufferSize * 6000);
ps.read(audioBuffer).onSuccessContinueWith((bytesRead: number) => {
expect(bytesRead).toEqual(bufferSize * 6000);
const readView: Uint8Array = new Uint8Array(audioBuffer);
for (let i: number = 0; i < bytesRead - 1000; i += 997) { // not check all to avoid long running.
expect(readView[i]).toEqual(i % 256);
}
done();
});
});

Просмотреть файл

@ -362,12 +362,11 @@ describe.each([true, false])("Service-based tests", (forceNodeWebSocket: boolean
// }
// };
const audioBuffer = new ArrayBuffer(320);
const audioReadLoop = (audioStream: PullAudioOutputStream, done: jest.DoneCallback) => {
audioStream.read().on((audioBuffer: ArrayBuffer) => {
audioStream.read(audioBuffer).on((bytesRead: number) => {
try {
if (audioBuffer !== null) {
expect(audioBuffer.byteLength).toBeGreaterThanOrEqual(1);
} else {
if (bytesRead === 0) {
PostDoneTest(done, 2000);
}
@ -375,11 +374,10 @@ describe.each([true, false])("Service-based tests", (forceNodeWebSocket: boolean
done.fail(error);
}
if (audioBuffer != null) {
if (bytesRead > 0) {
audioReadLoop(audioStream, done);
}
},
(error: string) => {
}, (error: string) => {
done.fail(error);
});
};
@ -445,26 +443,23 @@ describe.each([true, false])("Service-based tests", (forceNodeWebSocket: boolean
hypoCounter++;
};
const audioBuffer = new ArrayBuffer(320);
const audioReadLoop = (audioStream: PullAudioOutputStream, done: jest.DoneCallback) => {
audioStream.read().on((audioBuffer: ArrayBuffer) => {
audioStream.read(audioBuffer).on((bytesRead: number) => {
try {
if (audioBuffer !== null) {
expect(audioBuffer.byteLength).toBeGreaterThanOrEqual(1);
} else {
if (bytesRead === 0) {
PostDoneTest(done, 2000);
}
} catch (error) {
done.fail(error);
}
if (audioBuffer != null) {
if (bytesRead > 0) {
audioReadLoop(audioStream, done);
}
},
(error: string) => {
done.fail(error);
});
}, (error: string) => {
done.fail(error);
});
};
connector.activityReceived = (sender: sdk.DialogServiceConnector, e: sdk.ActivityReceivedEventArgs) => {

Просмотреть файл

@ -28,7 +28,7 @@ beforeEach(() => {
// tslint:disable-next-line:no-console
console.info("---------------------------------------Starting test case-----------------------------------");
// tslint:disable-next-line:no-console
console.info("Sart Time: " + new Date(Date.now()).toLocaleString());
console.info("Start Time: " + new Date(Date.now()).toLocaleString());
});
afterEach(() => {

Просмотреть файл

@ -92,6 +92,16 @@ const BuildTranslationRecognizerFromWaveFile: (speechConfig: sdk.SpeechTranslati
return r;
};
const BuildSpeechSynthesizerToFileOutput: (speechConfig: sdk.SpeechConfig, fileName?: string) => sdk.SpeechSynthesizer =
(speechConfig?: sdk.SpeechConfig, fileName?: string): sdk.SpeechSynthesizer => {
const config: sdk.AudioConfig = fileName === undefined ? null : sdk.AudioConfig.fromAudioFileOutput(fileName);
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, config);
expect(s).not.toBeUndefined();
return s;
};
test("Null Param Check, both.", () => {
expect(() => sdk.SpeechConfig.fromSubscription(null, null)).toThrowError();
});
@ -127,17 +137,19 @@ test.skip("From endpoint, invalid key format.", () => {
});
// TODO use an endpoint that we control so the subscription key is not leaked!
test("From endpoing, valid Params", () => {
test("From endpoint, valid Params", () => {
const s: sdk.SpeechConfig = sdk.SpeechConfig.fromEndpoint(new URL("http://www.example.com"), "Settings.SpeechSubscriptionKey");
expect(s).not.toBeUndefined();
s.close();
});
test("TypedParametersAccessableViaPropBag", () => {
test("TypedParametersAccessibleViaPropBag", () => {
const s: sdk.SpeechConfig = sdk.SpeechConfig.fromSubscription(Settings.SpeechSubscriptionKey, Settings.SpeechRegion);
TestParam(() => s.authorizationToken, (val: string) => (s.authorizationToken = val), sdk.PropertyId.SpeechServiceAuthorization_Token, s);
TestParam(() => s.endpointId, (val: string) => (s.endpointId = val), sdk.PropertyId.SpeechServiceConnection_EndpointId, s);
TestParam(() => s.speechRecognitionLanguage, (val: string) => (s.speechRecognitionLanguage = val), sdk.PropertyId.SpeechServiceConnection_RecoLanguage, s);
TestParam(() => s.speechSynthesisLanguage, (val: string) => (s.speechSynthesisLanguage = val), sdk.PropertyId.SpeechServiceConnection_SynthLanguage, s);
TestParam(() => s.speechSynthesisVoiceName, (val: string) => (s.speechSynthesisVoiceName = val), sdk.PropertyId.SpeechServiceConnection_SynthVoice, s);
});
const TestParam = (getAccess: () => string, setAccess: (val: string) => void, propEnum: sdk.PropertyId, config: sdk.SpeechConfig): void => {
@ -177,7 +189,7 @@ test("Create Recognizer", () => {
s.close();
});
test("Proeprties are passed to recognizer", () => {
test("Properties are passed to recognizer", () => {
const s: sdk.SpeechConfig = sdk.SpeechConfig.fromSubscription(Settings.SpeechSubscriptionKey, Settings.SpeechRegion);
s.speechRecognitionLanguage = createNoDashGuid();
s.authorizationToken = createNoDashGuid();
@ -330,7 +342,7 @@ test("Translation Recognizer Null target languages throws", () => {
s.close();
});
test("Test Translation Recognizer emty target list throws", () => {
test("Test Translation Recognizer empty target list throws", () => {
const s: sdk.SpeechTranslationConfig = sdk.SpeechTranslationConfig.fromSubscription(Settings.SpeechSubscriptionKey, Settings.SpeechRegion);
s.speechRecognitionLanguage = "en-EN";
s.setProperty(sdk.PropertyId[sdk.PropertyId.SpeechServiceConnection_TranslationToLanguages], "");
@ -350,7 +362,7 @@ test("Translation Null voice value throws", () => {
s.close();
});
test("Translition Recognizer success.", () => {
test("Translation Recognizer success.", () => {
const s: sdk.SpeechTranslationConfig = sdk.SpeechTranslationConfig.fromSubscription(Settings.SpeechSubscriptionKey, Settings.SpeechRegion);
s.setProperty(sdk.PropertyId[sdk.PropertyId.SpeechServiceConnection_TranslationToLanguages], "en-US");
s.speechRecognitionLanguage = "en-EN";
@ -570,17 +582,28 @@ describe("Connection URL Tests", () => {
createMethod: (url: URL, key: string) => sdk.SpeechConfig | sdk.SpeechTranslationConfig,
hostName: string,
expectedHostName: string,
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer,
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer | sdk.SpeechSynthesizer,
done: jest.DoneCallback
): void {
const s: sdk.SpeechConfig | sdk.SpeechTranslationConfig = createMethod(new URL(hostName), "fakekey");
objsToClose.push(s);
const r: { recognizeOnceAsync: (cb?: (e: sdk.RecognitionResult) => void, err?: (e: string) => void) => void } = recognizerCreateMethod(s);
const r = recognizerCreateMethod(s);
objsToClose.push(r);
r.recognizeOnceAsync(
let recognizeOrSynthesizeOnceAsync: (cb: (p2: any) => void) => void;
if (r instanceof sdk.Recognizer) {
recognizeOrSynthesizeOnceAsync = (cb: (p2: any) => void): void => {
r.recognizeOnceAsync(cb);
};
} else if (r instanceof sdk.SpeechSynthesizer) {
recognizeOrSynthesizeOnceAsync = (cb: (p2: any) => void): void => {
r.speakTextAsync("", cb);
};
}
recognizeOrSynthesizeOnceAsync(
(p2: any): void => {
try {
expect(uri).not.toBeUndefined();
@ -605,9 +628,11 @@ describe("Connection URL Tests", () => {
describe.each([
[sdk.SpeechConfig.fromHost, BuildSpeechRecognizerFromWaveFile],
[sdk.SpeechTranslationConfig.fromHost, BuildTranslationRecognizerFromWaveFile],
[sdk.SpeechConfig.fromHost, BuildIntentRecognizerFromWaveFile]])("FromHost Tests",
[sdk.SpeechConfig.fromHost, BuildIntentRecognizerFromWaveFile],
[sdk.SpeechConfig.fromHost, BuildSpeechSynthesizerToFileOutput]])("FromHost Tests",
(createMethod: any,
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer) => {
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) =>
sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer | sdk.SpeechSynthesizer) => {
test("Simple Host and protocol", (done: jest.DoneCallback) => {
// tslint:disable-next-line:no-console
@ -635,7 +660,7 @@ describe("Connection URL Tests", () => {
function testUrlParameter(
createMethod: (url: URL, key: string) => sdk.SpeechConfig | sdk.SpeechTranslationConfig,
setMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => void,
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer,
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer | sdk.SpeechSynthesizer,
done: jest.DoneCallback,
...urlSubStrings: string[]
): void {
@ -645,10 +670,22 @@ describe("Connection URL Tests", () => {
setMethod(s);
const r: { recognizeOnceAsync: (cb?: (e: sdk.RecognitionResult) => void, err?: (e: string) => void) => void } = recognizerCreateMethod(s);
const r = recognizerCreateMethod(s);
objsToClose.push(r);
r.recognizeOnceAsync(
let recognizeOrSynthesizeOnceAsync: (cb: (p2: any) => void) => void;
if (r instanceof sdk.Recognizer) {
recognizeOrSynthesizeOnceAsync = (cb: (p2: any) => void): void => {
r.recognizeOnceAsync(cb);
};
} else if (r instanceof sdk.SpeechSynthesizer) {
recognizeOrSynthesizeOnceAsync = (cb: (p2: any) => void): void => {
r.speakTextAsync("", cb);
};
}
recognizeOrSynthesizeOnceAsync(
(p2: any): void => {
try {
expect(uri).not.toBeUndefined();
@ -675,7 +712,7 @@ describe("Connection URL Tests", () => {
[sdk.SpeechTranslationConfig.fromEndpoint, BuildTranslationRecognizerFromWaveFile],
[sdk.SpeechConfig.fromEndpoint, BuildIntentRecognizerFromWaveFile]])("Common URL Tests",
(createMethod: any,
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer) => {
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer | sdk.SpeechSynthesizer) => {
test("setServiceProperty (single)", (done: jest.DoneCallback) => {
// tslint:disable-next-line:no-console
console.info("Name: setServiceProperty");

Просмотреть файл

@ -0,0 +1,555 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
import * as fs from "fs";
import * as sdk from "../microsoft.cognitiveservices.speech.sdk";
import { ConsoleLoggingListener, WebsocketMessageAdapter } from "../src/common.browser/Exports";
import {Events, EventType, InvalidOperationError} from "../src/common/Exports";
import { Settings } from "./Settings";
import WaitForCondition from "./Utilities";
let objsToClose: any[];
beforeAll(() => {
// override inputs, if necessary
Settings.LoadSettings();
Events.instance.attachListener(new ConsoleLoggingListener(EventType.Debug));
});
// Test cases are run linerally, the only other mechanism to demark them in the output is to put a console line in each case and
// report the name.
beforeEach(() => {
objsToClose = [];
// tslint:disable-next-line:no-console
console.info("---------------------------------------Starting test case-----------------------------------");
// tslint:disable-next-line:no-console
console.info("Start Time: " + new Date(Date.now()).toLocaleString());
});
afterEach(() => {
// tslint:disable-next-line:no-console
console.info("End Time: " + new Date(Date.now()).toLocaleString());
objsToClose.forEach((value: any, index: number, array: any[]) => {
if (typeof value.close === "function") {
value.close();
}
});
});
const BuildSpeechConfig: () => sdk.SpeechConfig = (): sdk.SpeechConfig => {
let s: sdk.SpeechConfig;
if (undefined === Settings.SpeechEndpoint) {
s = sdk.SpeechConfig.fromSubscription(Settings.SpeechSubscriptionKey, Settings.SpeechRegion);
} else {
s = sdk.SpeechConfig.fromEndpoint(new URL(Settings.SpeechEndpoint), Settings.SpeechSubscriptionKey);
}
if (undefined !== Settings.proxyServer) {
s.setProxy(Settings.proxyServer, Settings.proxyPort);
}
expect(s).not.toBeUndefined();
return s;
};
const CheckSynthesisResult: (result: sdk.SpeechSynthesisResult, reason: sdk.ResultReason) =>
void = (result: sdk.SpeechSynthesisResult, reason: sdk.ResultReason): void => {
expect(result).not.toBeUndefined();
expect(result.reason).toEqual(reason);
switch (reason) {
case sdk.ResultReason.SynthesizingAudio:
case sdk.ResultReason.SynthesizingAudioCompleted:
expect(result.audioData).not.toBeUndefined();
expect(result.audioData.byteLength).toBeGreaterThan(0);
break;
case sdk.ResultReason.SynthesizingAudioStarted:
expect(result.audioData).toBeUndefined();
break;
}
};
const CheckBinaryEqual: (arr1: ArrayBuffer, arr2: ArrayBuffer) => void =
(arr1: ArrayBuffer, arr2: ArrayBuffer): void => {
expect(arr1).not.toBeUndefined();
expect(arr2).not.toBeUndefined();
expect(arr1.byteLength).toEqual(arr2.byteLength);
const view1: Uint8Array = new Uint8Array(arr1);
const view2: Uint8Array = new Uint8Array(arr2);
for (let i: number = 0; i < arr1.byteLength; i++) {
expect(view1[i]).toEqual(view2[i]);
}
};
const ReadPullAudioOutputStream: (stream: sdk.PullAudioOutputStream, length?: number, done?: () => void, fc?: (e: string) => void) => void =
(stream: sdk.PullAudioOutputStream, length?: number, done?: () => void, fc?: (e: string) => void): void => {
const audioBuffer = new ArrayBuffer(1024);
stream.read(audioBuffer).onSuccessContinueWith((bytesRead: number) => {
if (bytesRead > 0) {
ReadPullAudioOutputStream(stream, length === undefined ? undefined : length - bytesRead, done, fc);
} else {
if (length !== undefined) {
try {
expect(length).toEqual(0);
} catch (e) {
fc(e);
}
}
if (!!done) {
done();
}
}
});
};
class PushAudioOutputStreamTestCallback extends sdk.PushAudioOutputStreamCallback {
public length: number;
public isClosed: boolean = false;
constructor() {
super();
this.length = 0;
}
public write(dataBuffer: ArrayBuffer): void {
this.length += dataBuffer.byteLength;
}
public close(): void {
if (this.isClosed) {
throw new InvalidOperationError("PushAudioOutputStreamCallback already closed");
}
this.isClosed = true;
}
}
test("testSpeechSynthesizer1", () => {
// tslint:disable-next-line:no-console
console.info("Name: testSpeechSynthesizer1");
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
const config: sdk.AudioConfig = sdk.AudioConfig.fromDefaultSpeakerOutput();
const r: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, config);
objsToClose.push(r);
expect(r).not.toBeUndefined();
expect(r instanceof sdk.SpeechSynthesizer);
});
test("testSetAndGetParameters", () => {
// tslint:disable-next-line:no-console
console.info("Name: testSetAndGetParameters");
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
speechConfig.speechSynthesisLanguage = "zh-CN";
speechConfig.speechSynthesisVoiceName = "zh-CN-HuihuiRUS";
speechConfig.speechSynthesisOutputFormat = sdk.SpeechSynthesisOutputFormat.Audio16Khz128KBitRateMonoMp3;
expect(speechConfig.speechSynthesisOutputFormat).toEqual(sdk.SpeechSynthesisOutputFormat.Audio16Khz128KBitRateMonoMp3);
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
objsToClose.push(s);
expect(s.properties).not.toBeUndefined();
expect(s.properties.getProperty(sdk.PropertyId.SpeechServiceConnection_SynthLanguage)).toEqual("zh-CN");
expect(s.properties.getProperty(sdk.PropertyId.SpeechServiceConnection_SynthVoice)).toEqual("zh-CN-HuihuiRUS");
expect(s.properties.getProperty(sdk.PropertyId.SpeechServiceConnection_SynthOutputFormat))
.toEqual(sdk.SpeechSynthesisOutputFormat[sdk.SpeechSynthesisOutputFormat.Audio16Khz128KBitRateMonoMp3]);
});
describe.each([true])("Service based tests", (forceNodeWebSocket: boolean) => {
beforeAll(() => {
WebsocketMessageAdapter.forceNpmWebSocket = forceNodeWebSocket;
});
afterAll(() => {
WebsocketMessageAdapter.forceNpmWebSocket = false;
});
test("testSpeechSynthesizerEvent1", (done: jest.DoneCallback) => {
// tslint:disable-next-line:no-console
console.info("Name: testSpeechSynthesizerEvent1");
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
objsToClose.push(speechConfig);
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, undefined);
objsToClose.push(s);
expect(s).not.toBeUndefined();
let audioLength: number = 0;
let startEventCount: number = 0;
let synthesisingEventCount: number = 0;
let completeEventCount: number = 0;
s.synthesisStarted = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisEventArgs): void => {
// tslint:disable-next-line:no-console
console.info("Synthesis started.");
try {
CheckSynthesisResult(e.result, sdk.ResultReason.SynthesizingAudioStarted);
} catch (e) {
done.fail(e);
}
startEventCount += 1;
};
s.synthesizing = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisEventArgs): void => {
// tslint:disable-next-line:no-console
console.info("Audio received with length of " + e.result.audioData.byteLength);
audioLength += e.result.audioData.byteLength - 44;
try {
CheckSynthesisResult(e.result, sdk.ResultReason.SynthesizingAudio);
} catch (e) {
done.fail(e);
}
synthesisingEventCount += 1;
};
s.synthesisCompleted = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisEventArgs): void => {
// tslint:disable-next-line:no-console
console.info("Audio received with length of " + e.result.audioData.byteLength);
try {
CheckSynthesisResult(e.result, sdk.ResultReason.SynthesizingAudioCompleted);
expect(e.result.audioData.byteLength - 44).toEqual(audioLength);
} catch (e) {
done.fail(e);
}
completeEventCount += 1;
};
s.wordBoundary = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisWordBoundaryEventArgs): void => {
try {
expect(e).not.toBeUndefined();
} catch (e) {
done.fail(e);
}
};
s.speakTextAsync("hello world.", undefined, (e: string): void => {
done.fail(e);
});
WaitForCondition( (): boolean => {
return completeEventCount !== 0;
}, (): void => {
expect(startEventCount).toEqual(1);
expect(synthesisingEventCount).toBeGreaterThan(0);
done();
});
});
test("testSpeechSynthesizerSpeakTwice", (done: jest.DoneCallback) => {
// tslint:disable-next-line:no-console
console.info("Name: testSpeechSynthesizerSpeakTwice");
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
objsToClose.push(speechConfig);
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, undefined);
objsToClose.push(s);
expect(s).not.toBeUndefined();
s.speakTextAsync("hello world 1.", (result: sdk.SpeechSynthesisResult): void => {
// tslint:disable-next-line:no-console
console.info("speaking finished, turn 1");
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
}, (e: string): void => {
done.fail(e);
});
s.speakTextAsync("hello world 2.", (result: sdk.SpeechSynthesisResult): void => {
// tslint:disable-next-line:no-console
console.info("speaking finished, turn 2");
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
done();
}, (e: string): void => {
done.fail(e);
});
});
test("testSpeechSynthesizerToFile", (done: jest.DoneCallback) => {
// tslint:disable-next-line:no-console
console.info("Name: testSpeechSynthesizerToFile");
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
objsToClose.push(speechConfig);
const audioConfig: sdk.AudioConfig = sdk.AudioConfig.fromAudioFileOutput("test.wav");
expect(audioConfig).not.toBeUndefined();
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
expect(s).not.toBeUndefined();
let audioLength: number = 0;
s.speakTextAsync("hello world 1.", (result: sdk.SpeechSynthesisResult): void => {
// tslint:disable-next-line:no-console
console.info("speaking finished, turn 1");
audioLength += result.audioData.byteLength;
}, (e: string): void => {
done.fail(e);
});
s.speakTextAsync("hello world 2.", (result: sdk.SpeechSynthesisResult): void => {
// tslint:disable-next-line:no-console
console.info("speaking finished, turn 2");
audioLength += result.audioData.byteLength;
s.close();
// wait 2 seconds before checking file size, as the async file writing might not be finished right now.
setTimeout( () => {
const fileLength = fs.statSync("test.wav").size;
expect(fileLength).toEqual(audioLength - 44);
done();
}, 2000);
}, (e: string): void => {
done.fail(e);
});
});
test("testSpeechSynthesizer: synthesis to file in turn.", (done: jest.DoneCallback) => {
// tslint:disable-next-line:no-console
console.info("Name: testSpeechSynthesizer synthesis to file in turn.");
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
speechConfig.speechSynthesisOutputFormat = sdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3;
objsToClose.push(speechConfig);
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
expect(s).not.toBeUndefined();
objsToClose.push(s);
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
// tslint:disable-next-line:no-console
console.info("speaking finished.");
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
// wait 2 seconds before checking file size, as the async file writing might not be finished right now.
setTimeout( () => {
const fileLength = fs.statSync("test1.mp3").size;
expect(fileLength).toEqual(result.audioData.byteLength);
done();
}, 2000);
}, (e: string): void => {
done.fail(e);
}, "test1.mp3");
});
test("testSpeechSynthesizerWordBoundary", (done: jest.DoneCallback) => {
// tslint:disable-next-line:no-console
console.info("Name: testSpeechSynthesizerWordBoundary");
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
objsToClose.push(speechConfig);
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
objsToClose.push(s);
expect(s).not.toBeUndefined();
let wordBoundaryCount: number = 0;
s.wordBoundary = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisWordBoundaryEventArgs): void => {
try {
expect(e).not.toBeUndefined();
expect(e.audioOffset).not.toBeUndefined();
expect(e.text).not.toBeUndefined();
expect(e.textOffset).not.toBeUndefined();
expect(e.wordLength).not.toBeUndefined();
} catch (e) {
done.fail(e);
}
wordBoundaryCount += 1;
};
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
expect(wordBoundaryCount).toBeGreaterThan(0);
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
done();
}, (e: string): void => {
done.fail(e);
});
});
test("testSpeechSynthesizer: synthesis with SSML.", (done: jest.DoneCallback) => {
// tslint:disable-next-line:no-console
console.info("Name: testSpeechSynthesizer synthesis with SSML.");
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
objsToClose.push(speechConfig);
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
expect(s).not.toBeUndefined();
objsToClose.push(s);
let r: sdk.SpeechSynthesisResult;
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
// tslint:disable-next-line:no-console
console.info("speaking text finished.");
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
r = result;
}, (e: string): void => {
done.fail(e);
});
const ssml: string =
`<speak version='1.0' xml:lang='en-US' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='http://www.w3.org/2001/mstts'>
<voice name='Microsoft Server Speech Text to Speech Voice (en-US, JessaRUS)'>hello world.</voice></speak>`;
s.speakSsmlAsync(ssml, (result: sdk.SpeechSynthesisResult): void => {
// tslint:disable-next-line:no-console
console.info("speaking ssml finished.");
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
CheckBinaryEqual(r.audioData, result.audioData);
done();
}, (e: string): void => {
done.fail(e);
});
});
test("testSpeechSynthesizer: synthesis with invalid key.", (done: jest.DoneCallback) => {
// tslint:disable-next-line:no-console
console.info("Name: testSpeechSynthesizer synthesis with invalid key.");
const speechConfig: sdk.SpeechConfig = sdk.SpeechConfig.fromSubscription("invalidKey", Settings.SpeechRegion);
expect(speechConfig).not.toBeUndefined();
objsToClose.push(speechConfig);
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
expect(s).not.toBeUndefined();
objsToClose.push(s);
s.SynthesisCanceled = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisEventArgs): void => {
try {
CheckSynthesisResult(e.result, sdk.ResultReason.Canceled);
expect(e.result.errorDetails).toContain("401");
const cancellationDetail: sdk.CancellationDetails = sdk.CancellationDetails.fromResult(e.result);
expect(cancellationDetail.ErrorCode).toEqual(sdk.CancellationErrorCode.ConnectionFailure);
expect(cancellationDetail.reason).toEqual(sdk.CancellationReason.Error);
expect(cancellationDetail.errorDetails).toEqual(e.result.errorDetails);
} catch (err) {
done.fail(err);
}
};
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
CheckSynthesisResult(result, sdk.ResultReason.Canceled);
expect(result.errorDetails).toContain("401");
const cancellationDetail: sdk.CancellationDetails = sdk.CancellationDetails.fromResult(result);
expect(cancellationDetail.ErrorCode).toEqual(sdk.CancellationErrorCode.ConnectionFailure);
expect(cancellationDetail.reason).toEqual(sdk.CancellationReason.Error);
expect(cancellationDetail.errorDetails).toEqual(result.errorDetails);
done();
}, (e: string): void => {
done.fail(e);
});
});
test("testSpeechSynthesizer: synthesis with invalid voice name.", (done: jest.DoneCallback) => {
// tslint:disable-next-line:no-console
console.info("Name: testSpeechSynthesizer synthesis with invalid voice name.");
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
objsToClose.push(speechConfig);
speechConfig.speechSynthesisVoiceName = "invalid";
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
expect(s).not.toBeUndefined();
objsToClose.push(s);
s.SynthesisCanceled = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisEventArgs): void => {
try {
CheckSynthesisResult(e.result, sdk.ResultReason.Canceled);
expect(e.result.errorDetails).toContain("voice");
const cancellationDetail: sdk.CancellationDetails = sdk.CancellationDetails.fromResult(e.result);
expect(cancellationDetail.ErrorCode).toEqual(sdk.CancellationErrorCode.BadRequestParameters);
expect(cancellationDetail.reason).toEqual(sdk.CancellationReason.Error);
expect(cancellationDetail.errorDetails).toEqual(e.result.errorDetails);
} catch (e) {
done.fail(e);
}
};
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
CheckSynthesisResult(result, sdk.ResultReason.Canceled);
expect(result.errorDetails).toContain("voice");
const cancellationDetail: sdk.CancellationDetails = sdk.CancellationDetails.fromResult(result);
expect(cancellationDetail.ErrorCode).toEqual(sdk.CancellationErrorCode.BadRequestParameters);
expect(cancellationDetail.reason).toEqual(sdk.CancellationReason.Error);
expect(cancellationDetail.errorDetails).toEqual(result.errorDetails);
done();
}, (e: string): void => {
done.fail(e);
});
});
test("testSpeechSynthesizer: synthesis to pull audio output stream.", (done: jest.DoneCallback) => {
// tslint:disable-next-line:no-console
console.info("Name: testSpeechSynthesizer synthesis to pull audio output stream.");
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
objsToClose.push(speechConfig);
const stream = sdk.AudioOutputStream.createPullStream();
const audioConfig: sdk.AudioConfig = sdk.AudioConfig.fromStreamOutput(stream);
expect(audioConfig).not.toBeUndefined();
setTimeout(() => {
ReadPullAudioOutputStream(stream, undefined, done, done.fail);
}, 0);
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
expect(s).not.toBeUndefined();
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
// tslint:disable-next-line:no-console
console.info("speaking text finished.");
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
s.close();
}, (e: string): void => {
done.fail(e);
});
});
test("testSpeechSynthesizer: synthesis to pull audio output stream 2.", (done: jest.DoneCallback) => {
// tslint:disable-next-line:no-console
console.info("Name: testSpeechSynthesizer synthesis to pull audio output stream 2.");
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
objsToClose.push(speechConfig);
const stream = sdk.AudioOutputStream.createPullStream();
const audioConfig: sdk.AudioConfig = sdk.AudioConfig.fromStreamOutput(stream);
expect(audioConfig).not.toBeUndefined();
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
expect(s).not.toBeUndefined();
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
// tslint:disable-next-line:no-console
console.info("speaking text finished.");
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
s.close();
ReadPullAudioOutputStream(stream, result.audioData.byteLength - 44, done, done.fail);
}, (e: string): void => {
done.fail(e);
});
});
test("testSpeechSynthesizer: synthesis to push audio output stream.", (done: jest.DoneCallback) => {
// tslint:disable-next-line:no-console
console.info("Name: testSpeechSynthesizer synthesis to push audio output stream.");
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
objsToClose.push(speechConfig);
const stream = new PushAudioOutputStreamTestCallback();
const audioConfig: sdk.AudioConfig = sdk.AudioConfig.fromStreamOutput(stream);
expect(audioConfig).not.toBeUndefined();
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
expect(s).not.toBeUndefined();
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
// tslint:disable-next-line:no-console
console.info("speaking text finished.");
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
s.close();
expect(stream.length).toEqual(result.audioData.byteLength - 44);
expect(stream.isClosed).toEqual(true);
done();
}, (e: string): void => {
done.fail(e);
});
});
});