TTS implementation (#161)
initial commit for TTS implementation, with NodeJS support only.
This commit is contained in:
Родитель
b73c871047
Коммит
65d292ddf5
|
@ -13,3 +13,10 @@ test-javascript-junit.xml
|
|||
coverage/*
|
||||
*.tgz
|
||||
|
||||
# ignore audio files generated in tests
|
||||
test*.wav
|
||||
test*.mp3
|
||||
|
||||
# ignore files generated in tests
|
||||
report[\.0-9]*json
|
||||
junit.xml
|
||||
|
|
|
@ -568,9 +568,9 @@
|
|||
"dev": true
|
||||
},
|
||||
"@types/node": {
|
||||
"version": "12.12.14",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-12.12.14.tgz",
|
||||
"integrity": "sha512-u/SJDyXwuihpwjXy7hOOghagLEV1KdAST6syfnOk6QZAMzZuWZqXy5aYYZbh8Jdpd4escVFP0MvftHNDb9pruA==",
|
||||
"version": "12.12.30",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-12.12.30.tgz",
|
||||
"integrity": "sha512-sz9MF/zk6qVr3pAnM0BSQvYIBK44tS75QC5N+VbWSE4DjCV/pJ+UzCW/F+vVnl7TkOPcuwQureKNtSSwjBTaMg==",
|
||||
"dev": true
|
||||
},
|
||||
"@types/request": {
|
||||
|
|
|
@ -16,7 +16,8 @@
|
|||
"js",
|
||||
"browser",
|
||||
"websocket",
|
||||
"speechtotext"
|
||||
"speechtotext",
|
||||
"texttospeech"
|
||||
],
|
||||
"bugs": {
|
||||
"url": "https://github.com/Microsoft/cognitive-services-speech-sdk-js/issues"
|
||||
|
@ -32,7 +33,8 @@
|
|||
"distrib/lib/external/ocsp/ocsp": false,
|
||||
"https-proxy-agent": false,
|
||||
"simple-lru-cache": false,
|
||||
"ws": false
|
||||
"ws": false,
|
||||
"fs": false
|
||||
},
|
||||
"main": "distrib/lib/microsoft.cognitiveservices.speech.sdk.js",
|
||||
"module": "distrib/es2015/microsoft.cognitiveservices.speech.sdk.js",
|
||||
|
@ -46,7 +48,7 @@
|
|||
],
|
||||
"devDependencies": {
|
||||
"@types/jest": "^24.0.23",
|
||||
"@types/node": "^12.12.14",
|
||||
"@types/node": "^12.12.30",
|
||||
"@types/request": "^2.48.3",
|
||||
"@types/ws": "^6.0.4",
|
||||
"asn1.js": "^5.2.0",
|
||||
|
|
|
@ -10,3 +10,4 @@ export * from "./WebsocketConnection";
|
|||
export * from "./WebsocketMessageAdapter";
|
||||
export * from "./ReplayableAudioNode";
|
||||
export * from "./ProxyInfo";
|
||||
export * from "./SpeakerAudioDestination";
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// Licensed under the MIT license.
|
||||
|
||||
import { RecognizerConfig } from "../common.speech/Exports";
|
||||
import { PropertyId } from "../sdk/Exports";
|
||||
import { PropertyCollection, PropertyId } from "../sdk/Exports";
|
||||
|
||||
export class ProxyInfo {
|
||||
private privProxyHostName: string;
|
||||
|
@ -17,11 +17,15 @@ export class ProxyInfo {
|
|||
this.privProxyPassword = proxyPassword;
|
||||
}
|
||||
|
||||
public static fromParameters(parameters: PropertyCollection): ProxyInfo {
|
||||
return new ProxyInfo(parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyHostName),
|
||||
parseInt(parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyPort), 10),
|
||||
parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyUserName),
|
||||
parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyPassword));
|
||||
}
|
||||
|
||||
public static fromRecognizerConfig(config: RecognizerConfig): ProxyInfo {
|
||||
return new ProxyInfo(config.parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyHostName),
|
||||
parseInt(config.parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyPort), 10),
|
||||
config.parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyUserName),
|
||||
config.parameters.getProperty(PropertyId.SpeechServiceConnection_ProxyPassword));
|
||||
return this.fromParameters(config.parameters);
|
||||
}
|
||||
|
||||
public get HostName(): string {
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { createNoDashGuid, IAudioDestination } from "../common/Exports";
|
||||
import { AudioStreamFormat } from "../sdk/Exports";
|
||||
|
||||
/**
|
||||
* This is not implemented yet, just a place holder.
|
||||
*/
|
||||
export class SpeakerAudioDestination implements IAudioDestination {
|
||||
private readonly privId: string;
|
||||
|
||||
public constructor(audioDestinationId?: string) {
|
||||
this.privId = audioDestinationId ? audioDestinationId : createNoDashGuid();
|
||||
}
|
||||
|
||||
public id(): string {
|
||||
return this.privId;
|
||||
}
|
||||
|
||||
// tslint:disable-next-line:no-empty
|
||||
public write(buffer: ArrayBuffer): void {}
|
||||
|
||||
// tslint:disable-next-line:no-empty
|
||||
public close(): void {}
|
||||
|
||||
// tslint:disable-next-line:no-empty
|
||||
set format(format: AudioStreamFormat) {}
|
||||
|
||||
}
|
|
@ -4,6 +4,7 @@
|
|||
import {
|
||||
ArgumentNullError,
|
||||
ConnectionClosedEvent,
|
||||
ConnectionErrorEvent,
|
||||
ConnectionEstablishedEvent,
|
||||
ConnectionEvent,
|
||||
ConnectionMessage,
|
||||
|
@ -192,6 +193,7 @@ export class WebsocketMessageAdapter {
|
|||
};
|
||||
|
||||
this.privWebsocketClient.onerror = (e: { error: any; message: string; type: string; target: WebSocket | ws }) => {
|
||||
this.onEvent(new ConnectionErrorEvent(this.privConnectionId, e.message, e.type));
|
||||
this.privLastErrorReceived = e.message;
|
||||
};
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { AudioOutputFormatImpl } from "../sdk/Audio/AudioOutputFormat";
|
||||
import { AudioOutputStream, PullAudioOutputStreamImpl } from "../sdk/Audio/AudioOutputStream";
|
||||
import { DialogServiceTurnStateManager } from "./DialogServiceTurnStateManager";
|
||||
import { ActivityPayloadResponse, MessageDataStreamType } from "./ServiceMessages/ActivityResponsePayload";
|
||||
|
@ -31,6 +32,7 @@ export class DialogServiceTurnState {
|
|||
public processActivityPayload(payload: ActivityPayloadResponse): PullAudioOutputStreamImpl {
|
||||
if (payload.messageDataStreamType === MessageDataStreamType.TextToSpeechAudio) {
|
||||
this.privAudioStream = AudioOutputStream.createPullStream() as PullAudioOutputStreamImpl;
|
||||
this.privAudioStream.format = AudioOutputFormatImpl.getDefaultOutputFormat();
|
||||
// tslint:disable-next-line:no-console
|
||||
// console.info("Audio start debugturn:" + this.privRequestId);
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ export * from "./CognitiveSubscriptionKeyAuthentication";
|
|||
export * from "./CognitiveTokenAuthentication";
|
||||
export * from "./IAuthentication";
|
||||
export * from "./IConnectionFactory";
|
||||
export * from "./ISynthesisConnectionFactory";
|
||||
export * from "./IntentConnectionFactory";
|
||||
export * from "./RecognitionEvents";
|
||||
export * from "./ServiceRecognizerBase";
|
||||
|
@ -15,6 +16,7 @@ export * from "./SpeechServiceInterfaces";
|
|||
export * from "./WebsocketMessageFormatter";
|
||||
export * from "./SpeechConnectionFactory";
|
||||
export * from "./TranslationConnectionFactory";
|
||||
export * from "./SpeechSynthesisConnectionFactory";
|
||||
export * from "./EnumTranslation";
|
||||
export * from "./ServiceMessages/Enums";
|
||||
export * from "./ServiceMessages/TranslationSynthesisEnd";
|
||||
|
@ -36,6 +38,10 @@ export * from "./DynamicGrammarInterfaces";
|
|||
export * from "./DialogServiceAdapter";
|
||||
export * from "./AgentConfig";
|
||||
export * from "./Transcription/Exports";
|
||||
export * from "./ServiceMessages/SynthesisAudioMetadata";
|
||||
export * from "./SynthesisTurn";
|
||||
export * from "./SynthesisAdapterBase";
|
||||
export * from "./SynthesizerConfig";
|
||||
|
||||
export const OutputFormatPropertyName: string = "OutputFormat";
|
||||
export const CancellationErrorCodePropertyName: string = "CancellationErrorCode";
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { IConnection } from "../common/Exports";
|
||||
import { AuthInfo } from "./IAuthentication";
|
||||
import { SynthesizerConfig } from "./SynthesizerConfig";
|
||||
|
||||
export interface ISynthesisConnectionFactory {
|
||||
create(
|
||||
config: SynthesizerConfig,
|
||||
authInfo: AuthInfo,
|
||||
connectionId?: string): IConnection;
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
export interface ISynthesisMetadata {
|
||||
Type: string;
|
||||
Data: {
|
||||
Offset: number;
|
||||
text: {
|
||||
Text: string;
|
||||
Length: number;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
// audio.metadata
|
||||
export interface ISynthesisAudioMetadata {
|
||||
Metadata: ISynthesisMetadata[];
|
||||
}
|
||||
|
||||
export class SynthesisAudioMetadata implements ISynthesisAudioMetadata {
|
||||
private privSynthesisAudioMetadata: ISynthesisAudioMetadata;
|
||||
|
||||
private constructor(json: string) {
|
||||
this.privSynthesisAudioMetadata = JSON.parse(json);
|
||||
}
|
||||
|
||||
public static fromJSON(json: string): SynthesisAudioMetadata {
|
||||
return new SynthesisAudioMetadata(json);
|
||||
}
|
||||
|
||||
public get Metadata(): ISynthesisMetadata[] {
|
||||
return this.privSynthesisAudioMetadata.Metadata;
|
||||
}
|
||||
}
|
|
@ -7,12 +7,14 @@ const PathHeaderName: string = "path";
|
|||
const ContentTypeHeaderName: string = "content-type";
|
||||
const RequestIdHeaderName: string = "x-requestid";
|
||||
const RequestTimestampHeaderName: string = "x-timestamp";
|
||||
const RequestStreamIdHeaderName: string = "x-streamid";
|
||||
|
||||
export class SpeechConnectionMessage extends ConnectionMessage {
|
||||
|
||||
private privPath: string;
|
||||
private privRequestId: string;
|
||||
private privContentType: string;
|
||||
private privStreamId: string;
|
||||
private privAdditionalHeaders: IStringDictionary<string>;
|
||||
|
||||
public constructor(
|
||||
|
@ -21,6 +23,7 @@ export class SpeechConnectionMessage extends ConnectionMessage {
|
|||
requestId: string,
|
||||
contentType: string,
|
||||
body: any,
|
||||
streamId?: string,
|
||||
additionalHeaders?: IStringDictionary<string>,
|
||||
id?: string) {
|
||||
|
||||
|
@ -40,6 +43,10 @@ export class SpeechConnectionMessage extends ConnectionMessage {
|
|||
headers[ContentTypeHeaderName] = contentType;
|
||||
}
|
||||
|
||||
if (streamId) {
|
||||
headers[RequestStreamIdHeaderName] = streamId;
|
||||
}
|
||||
|
||||
if (additionalHeaders) {
|
||||
for (const headerName in additionalHeaders) {
|
||||
if (headerName) {
|
||||
|
@ -58,6 +65,7 @@ export class SpeechConnectionMessage extends ConnectionMessage {
|
|||
this.privPath = path;
|
||||
this.privRequestId = requestId;
|
||||
this.privContentType = contentType;
|
||||
this.privStreamId = streamId;
|
||||
this.privAdditionalHeaders = additionalHeaders;
|
||||
}
|
||||
|
||||
|
@ -73,6 +81,10 @@ export class SpeechConnectionMessage extends ConnectionMessage {
|
|||
return this.privContentType;
|
||||
}
|
||||
|
||||
public get streamId(): string {
|
||||
return this.privStreamId;
|
||||
}
|
||||
|
||||
public get additionalHeaders(): IStringDictionary<string> {
|
||||
return this.privAdditionalHeaders;
|
||||
}
|
||||
|
@ -82,6 +94,7 @@ export class SpeechConnectionMessage extends ConnectionMessage {
|
|||
let requestId = null;
|
||||
let contentType = null;
|
||||
let requestTimestamp = null;
|
||||
let streamId = null;
|
||||
const additionalHeaders: IStringDictionary<string> = {};
|
||||
|
||||
if (message.headers) {
|
||||
|
@ -95,6 +108,8 @@ export class SpeechConnectionMessage extends ConnectionMessage {
|
|||
requestTimestamp = message.headers[headerName];
|
||||
} else if (headerName.toLowerCase() === ContentTypeHeaderName.toLowerCase()) {
|
||||
contentType = message.headers[headerName];
|
||||
} else if (headerName.toLowerCase() === RequestStreamIdHeaderName.toLowerCase()) {
|
||||
streamId = message.headers[headerName];
|
||||
} else {
|
||||
additionalHeaders[headerName] = message.headers[headerName];
|
||||
}
|
||||
|
@ -108,6 +123,7 @@ export class SpeechConnectionMessage extends ConnectionMessage {
|
|||
requestId,
|
||||
contentType,
|
||||
message.body,
|
||||
streamId,
|
||||
additionalHeaders,
|
||||
message.id);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import {
|
||||
ProxyInfo,
|
||||
WebsocketConnection, WebsocketMessageAdapter,
|
||||
} from "../common.browser/Exports";
|
||||
import {
|
||||
IConnection,
|
||||
IStringDictionary
|
||||
} from "../common/Exports";
|
||||
import { PropertyId } from "../sdk/Exports";
|
||||
import {
|
||||
AuthInfo,
|
||||
SynthesizerConfig,
|
||||
WebsocketMessageFormatter
|
||||
} from "./Exports";
|
||||
import { ISynthesisConnectionFactory } from "./ISynthesisConnectionFactory";
|
||||
import {
|
||||
QueryParameterNames
|
||||
} from "./QueryParameterNames";
|
||||
|
||||
export class SpeechSynthesisConnectionFactory implements ISynthesisConnectionFactory {
|
||||
|
||||
private readonly synthesisUri: string = "/cognitiveservices/websocket/v1";
|
||||
|
||||
public create = (
|
||||
config: SynthesizerConfig,
|
||||
authInfo: AuthInfo,
|
||||
connectionId?: string): IConnection => {
|
||||
|
||||
let endpoint: string = config.parameters.getProperty(PropertyId.SpeechServiceConnection_Endpoint, undefined);
|
||||
const region: string = config.parameters.getProperty(PropertyId.SpeechServiceConnection_Region, undefined);
|
||||
const hostSuffix = (region && region.toLowerCase().startsWith("china")) ? ".azure.cn" : ".microsoft.com";
|
||||
const host: string = config.parameters.getProperty(PropertyId.SpeechServiceConnection_Host, "wss://" + region + ".tts.speech" + hostSuffix);
|
||||
|
||||
const queryParams: IStringDictionary<string> = {};
|
||||
|
||||
if (!endpoint) {
|
||||
endpoint = host + this.synthesisUri;
|
||||
}
|
||||
|
||||
const headers: IStringDictionary<string> = {};
|
||||
if (authInfo.token !== undefined && authInfo.token !== "") {
|
||||
headers[authInfo.headerName] = authInfo.token;
|
||||
}
|
||||
headers[QueryParameterNames.ConnectionIdHeader] = connectionId;
|
||||
|
||||
config.parameters.setProperty(PropertyId.SpeechServiceConnection_Url, endpoint);
|
||||
|
||||
// set forceNpmWebSocket to true as we need to pass the auth info in websocket headers.
|
||||
WebsocketMessageAdapter.forceNpmWebSocket = true;
|
||||
return new WebsocketConnection(endpoint, queryParams, headers, new WebsocketMessageFormatter(), ProxyInfo.fromParameters(config.parameters), connectionId);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,625 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import {
|
||||
ArgumentNullError,
|
||||
ConnectionClosedEvent,
|
||||
ConnectionEvent,
|
||||
ConnectionMessage,
|
||||
ConnectionOpenResponse,
|
||||
ConnectionState,
|
||||
createNoDashGuid,
|
||||
EventSource,
|
||||
IAudioDestination,
|
||||
IConnection,
|
||||
IDisposable,
|
||||
MessageType,
|
||||
Promise,
|
||||
PromiseHelper,
|
||||
PromiseResult,
|
||||
ServiceEvent,
|
||||
} from "../common/Exports";
|
||||
import {AudioOutputFormatImpl} from "../sdk/Audio/AudioOutputFormat";
|
||||
import {
|
||||
CancellationErrorCode,
|
||||
CancellationReason,
|
||||
PropertyCollection,
|
||||
PropertyId,
|
||||
ResultReason,
|
||||
SpeechSynthesisEventArgs,
|
||||
SpeechSynthesisResult,
|
||||
SpeechSynthesisWordBoundaryEventArgs,
|
||||
SpeechSynthesizer,
|
||||
} from "../sdk/Exports";
|
||||
import {Callback} from "../sdk/Transcription/IConversation";
|
||||
import {
|
||||
AgentConfig,
|
||||
CancellationErrorCodePropertyName,
|
||||
DynamicGrammarBuilder,
|
||||
RequestSession,
|
||||
SpeechContext,
|
||||
SynthesisAudioMetadata,
|
||||
SynthesisTurn,
|
||||
} from "./Exports";
|
||||
import {AuthInfo, IAuthentication} from "./IAuthentication";
|
||||
import {ISynthesisConnectionFactory} from "./ISynthesisConnectionFactory";
|
||||
import {SpeechConnectionMessage} from "./SpeechConnectionMessage.Internal";
|
||||
import {SynthesizerConfig} from "./SynthesizerConfig";
|
||||
|
||||
export class SynthesisAdapterBase implements IDisposable {
|
||||
protected privRequestSession: RequestSession;
|
||||
protected privSynthesisTurn: SynthesisTurn;
|
||||
protected privConnectionId: string;
|
||||
protected privSynthesizerConfig: SynthesizerConfig;
|
||||
protected privSpeechSynthesizer: SpeechSynthesizer;
|
||||
protected privSuccessCallback: (e: SpeechSynthesisResult) => void;
|
||||
protected privErrorCallback: (e: string) => void;
|
||||
|
||||
public get synthesisContext(): SpeechContext {
|
||||
return this.privSpeechContext;
|
||||
}
|
||||
|
||||
public get dynamicGrammar(): DynamicGrammarBuilder {
|
||||
return this.privDynamicGrammar;
|
||||
}
|
||||
|
||||
public get agentConfig(): AgentConfig {
|
||||
return this.privAgentConfig;
|
||||
}
|
||||
|
||||
public get connectionEvents(): EventSource<ConnectionEvent> {
|
||||
return this.privConnectionEvents;
|
||||
}
|
||||
|
||||
public get serviceEvents(): EventSource<ServiceEvent> {
|
||||
return this.privServiceEvents;
|
||||
}
|
||||
|
||||
protected speakOverride: (ssml: string, requestId: string, sc: (e: SpeechSynthesisResult) => void, ec: (e: string) => void) => any = undefined;
|
||||
|
||||
// Called when telemetry data is sent to the service.
|
||||
// Used for testing Telemetry capture.
|
||||
public static telemetryData: (json: string) => void;
|
||||
public static telemetryDataEnabled: boolean = true;
|
||||
|
||||
public set activityTemplate(messagePayload: string) { this.privActivityTemplate = messagePayload; }
|
||||
public get activityTemplate(): string { return this.privActivityTemplate; }
|
||||
|
||||
protected receiveMessageOverride: () => any = undefined;
|
||||
|
||||
protected connectImplOverride: (isUnAuthorized: boolean) => any = undefined;
|
||||
|
||||
protected configConnectionOverride: () => any = undefined;
|
||||
|
||||
protected fetchConnectionOverride: () => any = undefined;
|
||||
|
||||
public set audioOutputFormat(format: AudioOutputFormatImpl) {
|
||||
this.privAudioOutputFormat = format;
|
||||
this.privSynthesisTurn.audioOutputFormat = format;
|
||||
if (this.privSessionAudioDestination !== undefined) {
|
||||
this.privSessionAudioDestination.format = format;
|
||||
}
|
||||
}
|
||||
private privAuthentication: IAuthentication;
|
||||
private privConnectionFactory: ISynthesisConnectionFactory;
|
||||
|
||||
// A promise for a configured connection.
|
||||
// Do not consume directly, call fetchConnection instead.
|
||||
private privConnectionConfigurationPromise: Promise<IConnection>;
|
||||
|
||||
// A promise for a connection, but one that has not had the speech context sent yet.
|
||||
// Do not consume directly, call fetchConnection instead.
|
||||
private privConnectionPromise: Promise<IConnection>;
|
||||
private privAuthFetchEventId: string;
|
||||
private privIsDisposed: boolean;
|
||||
private privConnectionEvents: EventSource<ConnectionEvent>;
|
||||
private privServiceEvents: EventSource<ServiceEvent>;
|
||||
private privSpeechContext: SpeechContext;
|
||||
private privDynamicGrammar: DynamicGrammarBuilder;
|
||||
private privAgentConfig: AgentConfig;
|
||||
private privServiceHasSentMessage: boolean;
|
||||
private privActivityTemplate: string;
|
||||
private privAudioOutputFormat: AudioOutputFormatImpl;
|
||||
private privSessionAudioDestination: IAudioDestination;
|
||||
|
||||
public constructor(
|
||||
authentication: IAuthentication,
|
||||
connectionFactory: ISynthesisConnectionFactory,
|
||||
synthesizerConfig: SynthesizerConfig,
|
||||
speechSynthesizer: SpeechSynthesizer,
|
||||
audioDestination: IAudioDestination) {
|
||||
|
||||
if (!authentication) {
|
||||
throw new ArgumentNullError("authentication");
|
||||
}
|
||||
|
||||
if (!connectionFactory) {
|
||||
throw new ArgumentNullError("connectionFactory");
|
||||
}
|
||||
|
||||
if (!synthesizerConfig) {
|
||||
throw new ArgumentNullError("synthesizerConfig");
|
||||
}
|
||||
|
||||
this.privAuthentication = authentication;
|
||||
this.privConnectionFactory = connectionFactory;
|
||||
this.privSynthesizerConfig = synthesizerConfig;
|
||||
this.privIsDisposed = false;
|
||||
this.privSpeechSynthesizer = speechSynthesizer;
|
||||
this.privSessionAudioDestination = audioDestination;
|
||||
this.privSynthesisTurn = new SynthesisTurn();
|
||||
this.privConnectionEvents = new EventSource<ConnectionEvent>();
|
||||
this.privServiceEvents = new EventSource<ServiceEvent>();
|
||||
this.privDynamicGrammar = new DynamicGrammarBuilder();
|
||||
this.privSpeechContext = new SpeechContext(this.privDynamicGrammar);
|
||||
this.privAgentConfig = new AgentConfig();
|
||||
|
||||
this.connectionEvents.attach((connectionEvent: ConnectionEvent): void => {
|
||||
if (connectionEvent.name === "ConnectionClosedEvent") {
|
||||
const connectionClosedEvent = connectionEvent as ConnectionClosedEvent;
|
||||
this.cancelSynthesisLocal(CancellationReason.Error,
|
||||
connectionClosedEvent.statusCode === 1007 ? CancellationErrorCode.BadRequestParameters : CancellationErrorCode.ConnectionFailure,
|
||||
connectionClosedEvent.reason + " websocket error code: " + connectionClosedEvent.statusCode);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public static addHeader(audio: ArrayBuffer, format: AudioOutputFormatImpl): ArrayBuffer {
|
||||
if (!format.hasHeader) {
|
||||
return audio;
|
||||
}
|
||||
format.updateHeader(audio.byteLength);
|
||||
const tmp = new Uint8Array(audio.byteLength + format.header.byteLength);
|
||||
tmp.set(new Uint8Array(format.header), 0);
|
||||
tmp.set(new Uint8Array(audio), format.header.byteLength);
|
||||
return tmp.buffer;
|
||||
}
|
||||
|
||||
public isDisposed(): boolean {
|
||||
return this.privIsDisposed;
|
||||
}
|
||||
|
||||
public dispose(reason?: string): void {
|
||||
this.privIsDisposed = true;
|
||||
if (this.privSessionAudioDestination !== undefined) {
|
||||
this.privSessionAudioDestination.close();
|
||||
}
|
||||
if (this.privConnectionConfigurationPromise) {
|
||||
this.privConnectionConfigurationPromise.onSuccessContinueWith((connection: IConnection) => {
|
||||
connection.dispose(reason);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public connect(): void {
|
||||
this.connectImpl().result();
|
||||
}
|
||||
|
||||
public connectAsync(cb?: Callback, err?: Callback): void {
|
||||
this.connectImpl().continueWith((promiseResult: PromiseResult<IConnection>) => {
|
||||
try {
|
||||
if (promiseResult.isError) {
|
||||
if (!!err) {
|
||||
err(promiseResult.error);
|
||||
}
|
||||
} else if (promiseResult.isCompleted) {
|
||||
if (!!cb) {
|
||||
cb();
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
if (!!err) {
|
||||
err(e);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public Speak(
|
||||
text: string,
|
||||
isSSML: boolean,
|
||||
requestId: string,
|
||||
successCallback: (e: SpeechSynthesisResult) => void,
|
||||
errorCallBack: (e: string) => void,
|
||||
audioDestination: IAudioDestination,
|
||||
): Promise<boolean> {
|
||||
|
||||
let ssml: string;
|
||||
|
||||
if (isSSML) {
|
||||
ssml = text;
|
||||
} else {
|
||||
ssml = SpeechSynthesizer.buildSsml(text, this.privSynthesizerConfig.parameters);
|
||||
}
|
||||
|
||||
if (this.speakOverride !== undefined) {
|
||||
return this.speakOverride(ssml, requestId, successCallback, errorCallBack);
|
||||
}
|
||||
|
||||
this.privSuccessCallback = successCallback;
|
||||
this.privErrorCallback = errorCallBack;
|
||||
|
||||
this.privSynthesisTurn.startNewSynthesis(requestId, text, isSSML, audioDestination);
|
||||
|
||||
return this.fetchConnection().continueWithPromise<boolean>((connection: PromiseResult<IConnection>) => {
|
||||
if (connection.isError) {
|
||||
this.cancelSynthesisLocal(CancellationReason.Error, CancellationErrorCode.ConnectionFailure, connection.error);
|
||||
return PromiseHelper.fromError(connection.error);
|
||||
}
|
||||
return this.sendSynthesisContext(connection.result).continueWithPromise<boolean>((result: PromiseResult<boolean>): Promise<boolean> => {
|
||||
if (result.isError) {
|
||||
this.cancelSynthesisLocal(CancellationReason.Error, CancellationErrorCode.ConnectionFailure, result.error);
|
||||
return PromiseHelper.fromError(result.error);
|
||||
}
|
||||
return this.sendSsmlMessage(connection.result, ssml, requestId).continueWithPromise<boolean>((result: PromiseResult<boolean>): Promise<boolean> => {
|
||||
if (result.isError) {
|
||||
this.cancelSynthesisLocal(CancellationReason.Error, CancellationErrorCode.ConnectionFailure, result.error);
|
||||
return PromiseHelper.fromError(result.error);
|
||||
}
|
||||
|
||||
const synthesisStartEventArgs: SpeechSynthesisEventArgs = new SpeechSynthesisEventArgs(
|
||||
new SpeechSynthesisResult(
|
||||
requestId,
|
||||
ResultReason.SynthesizingAudioStarted,
|
||||
)
|
||||
);
|
||||
|
||||
if (!!this.privSpeechSynthesizer.synthesisStarted) {
|
||||
this.privSpeechSynthesizer.synthesisStarted(this.privSpeechSynthesizer, synthesisStartEventArgs);
|
||||
}
|
||||
|
||||
const messageRetrievalPromise = this.receiveMessage();
|
||||
return PromiseHelper.fromResult(true);
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Cancels synthesis.
|
||||
protected cancelSynthesis(
|
||||
requestId: string,
|
||||
cancellationReason: CancellationReason,
|
||||
errorCode: CancellationErrorCode,
|
||||
error: string): void {
|
||||
const properties: PropertyCollection = new PropertyCollection();
|
||||
properties.setProperty(CancellationErrorCodePropertyName, CancellationErrorCode[errorCode]);
|
||||
const result: SpeechSynthesisResult = new SpeechSynthesisResult(
|
||||
requestId,
|
||||
ResultReason.Canceled,
|
||||
undefined,
|
||||
error,
|
||||
properties
|
||||
);
|
||||
|
||||
if (!!this.privSpeechSynthesizer.SynthesisCanceled) {
|
||||
const cancelEvent: SpeechSynthesisEventArgs = new SpeechSynthesisEventArgs(result);
|
||||
try {
|
||||
this.privSpeechSynthesizer.SynthesisCanceled(this.privSpeechSynthesizer, cancelEvent);
|
||||
/* tslint:disable:no-empty */
|
||||
} catch { }
|
||||
}
|
||||
|
||||
if (!!this.privSuccessCallback) {
|
||||
try {
|
||||
this.privSuccessCallback(result);
|
||||
this.privSuccessCallback = undefined;
|
||||
/* tslint:disable:no-empty */
|
||||
} catch { }
|
||||
}
|
||||
}
|
||||
|
||||
// Cancels synthesis.
|
||||
protected cancelSynthesisLocal(
|
||||
cancellationReason: CancellationReason,
|
||||
errorCode: CancellationErrorCode,
|
||||
error: string): void {
|
||||
|
||||
if (!!this.privSynthesisTurn.isSynthesizing) {
|
||||
this.privSynthesisTurn.onStopSynthesizing();
|
||||
|
||||
this.cancelSynthesis(
|
||||
this.privSynthesisTurn.requestId,
|
||||
cancellationReason,
|
||||
errorCode,
|
||||
error);
|
||||
}
|
||||
}
|
||||
|
||||
protected processTypeSpecificMessages(
|
||||
connectionMessage: SpeechConnectionMessage,
|
||||
successCallback?: (e: SpeechSynthesisResult) => void,
|
||||
errorCallBack?: (e: string) => void): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
protected receiveMessage = (): Promise<IConnection> => {
|
||||
return this.fetchConnection().on((connection: IConnection): Promise<IConnection> => {
|
||||
return connection.read()
|
||||
.onSuccessContinueWithPromise((message: ConnectionMessage) => {
|
||||
|
||||
if (this.receiveMessageOverride !== undefined) {
|
||||
return this.receiveMessageOverride();
|
||||
}
|
||||
if (this.privIsDisposed) {
|
||||
// We're done.
|
||||
return PromiseHelper.fromResult(undefined);
|
||||
}
|
||||
|
||||
// indicates we are draining the queue and it came with no message;
|
||||
if (!message) {
|
||||
if (!this.privSynthesisTurn.isSynthesizing) {
|
||||
return PromiseHelper.fromResult(true);
|
||||
} else {
|
||||
return this.receiveMessage();
|
||||
}
|
||||
}
|
||||
|
||||
this.privServiceHasSentMessage = true;
|
||||
|
||||
const connectionMessage = SpeechConnectionMessage.fromConnectionMessage(message);
|
||||
|
||||
if (connectionMessage.requestId.toLowerCase() === this.privSynthesisTurn.requestId.toLowerCase()) {
|
||||
switch (connectionMessage.path.toLowerCase()) {
|
||||
case "turn.start":
|
||||
this.privSynthesisTurn.onServiceTurnStartResponse();
|
||||
break;
|
||||
case "response":
|
||||
this.privSynthesisTurn.onServiceResponseMessage(connectionMessage.textBody);
|
||||
break;
|
||||
case "audio":
|
||||
if (this.privSynthesisTurn.streamId.toLowerCase() === connectionMessage.streamId.toLowerCase()
|
||||
&& !!connectionMessage.binaryBody) {
|
||||
this.privSynthesisTurn.onAudioChunkReceived(connectionMessage.binaryBody);
|
||||
if (!!this.privSpeechSynthesizer.synthesizing) {
|
||||
try {
|
||||
const audioWithHeader = SynthesisAdapterBase.addHeader(connectionMessage.binaryBody, this.privSynthesisTurn.audioOutputFormat);
|
||||
const ev: SpeechSynthesisEventArgs = new SpeechSynthesisEventArgs(
|
||||
new SpeechSynthesisResult(
|
||||
this.privSynthesisTurn.requestId,
|
||||
ResultReason.SynthesizingAudio,
|
||||
audioWithHeader));
|
||||
this.privSpeechSynthesizer.synthesizing(this.privSpeechSynthesizer, ev);
|
||||
} catch (error) {
|
||||
// Not going to let errors in the event handler
|
||||
// trip things up.
|
||||
}
|
||||
}
|
||||
if (this.privSessionAudioDestination !== undefined) {
|
||||
this.privSessionAudioDestination.write(connectionMessage.binaryBody);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "audio.metadata":
|
||||
const metadataList = SynthesisAudioMetadata.fromJSON(connectionMessage.textBody).Metadata;
|
||||
for (const metadata of metadataList) {
|
||||
if (metadata.Type.toLowerCase() === "WordBoundary".toLowerCase()) {
|
||||
|
||||
this.privSynthesisTurn.onWordBoundaryEvent(metadata.Data.text.Text);
|
||||
|
||||
const ev: SpeechSynthesisWordBoundaryEventArgs = new SpeechSynthesisWordBoundaryEventArgs(
|
||||
metadata.Data.Offset,
|
||||
metadata.Data.text.Text,
|
||||
metadata.Data.text.Length,
|
||||
this.privSynthesisTurn.currentTextOffset);
|
||||
|
||||
if (!!this.privSpeechSynthesizer.wordBoundary) {
|
||||
try {
|
||||
this.privSpeechSynthesizer.wordBoundary(this.privSpeechSynthesizer, ev);
|
||||
} catch (error) {
|
||||
// Not going to let errors in the event handler
|
||||
// trip things up.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case "turn.end":
|
||||
this.privSynthesisTurn.onServiceTurnEndResponse();
|
||||
let result: SpeechSynthesisResult;
|
||||
try {
|
||||
result = new SpeechSynthesisResult(
|
||||
this.privSynthesisTurn.requestId,
|
||||
ResultReason.SynthesizingAudioCompleted,
|
||||
this.privSynthesisTurn.allReceivedAudioWithHeader
|
||||
);
|
||||
if (!!this.privSuccessCallback) {
|
||||
this.privSuccessCallback(result);
|
||||
}
|
||||
} catch (error) {
|
||||
if (!!this.privErrorCallback) {
|
||||
this.privErrorCallback(error);
|
||||
}
|
||||
}
|
||||
if (this.privSpeechSynthesizer.synthesisCompleted) {
|
||||
try {
|
||||
this.privSpeechSynthesizer.synthesisCompleted(
|
||||
this.privSpeechSynthesizer,
|
||||
new SpeechSynthesisEventArgs(result)
|
||||
);
|
||||
} catch (e) {
|
||||
// Not going to let errors in the event handler
|
||||
// trip things up.
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
||||
if (!this.processTypeSpecificMessages(connectionMessage)) {
|
||||
// here are some messages that the derived class has not processed, dispatch them to connect class
|
||||
if (!!this.privServiceEvents) {
|
||||
this.serviceEvents.onEvent(new ServiceEvent(connectionMessage.path.toLowerCase(), connectionMessage.textBody));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return this.receiveMessage();
|
||||
});
|
||||
}, (error: string) => {
|
||||
});
|
||||
}
|
||||
|
||||
protected sendSynthesisContext = (connection: IConnection): Promise<boolean> => {
|
||||
const synthesisContextJson = JSON.stringify(this.buildSynthesisContext());
|
||||
|
||||
if (synthesisContextJson) {
|
||||
return connection.send(new SpeechConnectionMessage(
|
||||
MessageType.Text,
|
||||
"synthesis.context",
|
||||
this.privSynthesisTurn.requestId,
|
||||
"application/json",
|
||||
synthesisContextJson));
|
||||
}
|
||||
return PromiseHelper.fromResult(true);
|
||||
}
|
||||
|
||||
// Establishes a websocket connection to the end point.
|
||||
protected connectImpl(isUnAuthorized: boolean = false): Promise<IConnection> {
|
||||
|
||||
if (this.connectImplOverride !== undefined) {
|
||||
return this.connectImplOverride(isUnAuthorized);
|
||||
}
|
||||
|
||||
if (this.privConnectionPromise) {
|
||||
if (this.privConnectionPromise.result().isCompleted &&
|
||||
(this.privConnectionPromise.result().isError
|
||||
|| this.privConnectionPromise.result().result.state() === ConnectionState.Disconnected) &&
|
||||
this.privServiceHasSentMessage === true) {
|
||||
this.privConnectionId = null;
|
||||
this.privConnectionPromise = null;
|
||||
this.privServiceHasSentMessage = false;
|
||||
return this.connectImpl();
|
||||
} else {
|
||||
return this.privConnectionPromise;
|
||||
}
|
||||
}
|
||||
|
||||
this.privAuthFetchEventId = createNoDashGuid();
|
||||
this.privConnectionId = createNoDashGuid();
|
||||
|
||||
this.privSynthesisTurn.onPreConnectionStart(this.privAuthFetchEventId, this.privConnectionId);
|
||||
|
||||
const authPromise = isUnAuthorized ? this.privAuthentication.fetchOnExpiry(this.privAuthFetchEventId) : this.privAuthentication.fetch(this.privAuthFetchEventId);
|
||||
|
||||
this.privConnectionPromise = authPromise
|
||||
.continueWithPromise((result: PromiseResult<AuthInfo>) => {
|
||||
if (result.isError) {
|
||||
// this.privRequestSession.onAuthCompleted(true, result.error);
|
||||
throw new Error(result.error);
|
||||
} else {
|
||||
// this.privRequestSession.onAuthCompleted(false);
|
||||
}
|
||||
|
||||
const connection: IConnection = this.privConnectionFactory.create(this.privSynthesizerConfig, result.result, this.privConnectionId);
|
||||
|
||||
// Attach to the underlying event. No need to hold onto the detach pointers as in the event the connection goes away,
|
||||
// it'll stop sending events.
|
||||
connection.events.attach((event: ConnectionEvent) => {
|
||||
this.connectionEvents.onEvent(event);
|
||||
});
|
||||
|
||||
return connection.open().onSuccessContinueWithPromise((response: ConnectionOpenResponse): Promise<IConnection> => {
|
||||
if (response.statusCode === 200) {
|
||||
this.privSynthesisTurn.onPreConnectionStart(this.privAuthFetchEventId, this.privConnectionId);
|
||||
this.privSynthesisTurn.onConnectionEstablishCompleted(response.statusCode);
|
||||
|
||||
return PromiseHelper.fromResult<IConnection>(connection);
|
||||
} else if (response.statusCode === 403 && !isUnAuthorized) {
|
||||
return this.connectImpl(true);
|
||||
} else {
|
||||
this.privSynthesisTurn.onConnectionEstablishCompleted(response.statusCode, response.reason);
|
||||
return PromiseHelper.fromError<IConnection>(`Unable to contact server. StatusCode: ${response.statusCode}, ${this.privSynthesizerConfig.parameters.getProperty(PropertyId.SpeechServiceConnection_Endpoint)} Reason: ${response.reason}`);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return this.privConnectionPromise;
|
||||
}
|
||||
|
||||
protected sendSpeechServiceConfig = (connection: IConnection, SpeechServiceConfigJson: string): Promise<boolean> => {
|
||||
|
||||
if (SpeechServiceConfigJson) {
|
||||
return connection.send(new SpeechConnectionMessage(
|
||||
MessageType.Text,
|
||||
"speech.config",
|
||||
this.privSynthesisTurn.requestId,
|
||||
"application/json",
|
||||
SpeechServiceConfigJson));
|
||||
}
|
||||
|
||||
return PromiseHelper.fromResult(true);
|
||||
}
|
||||
|
||||
protected sendSsmlMessage = (connection: IConnection, ssml: string, requestId: string): Promise<boolean> => {
|
||||
return connection.send(new SpeechConnectionMessage(
|
||||
MessageType.Text,
|
||||
"ssml",
|
||||
requestId,
|
||||
"application/ssml+xml",
|
||||
ssml));
|
||||
}
|
||||
|
||||
private fetchConnection = (): Promise<IConnection> => {
|
||||
if (this.fetchConnectionOverride !== undefined) {
|
||||
return this.fetchConnectionOverride();
|
||||
}
|
||||
|
||||
return this.configureConnection();
|
||||
}
|
||||
|
||||
// Takes an established websocket connection to the endpoint and sends speech configuration information.
|
||||
private configureConnection(): Promise<IConnection> {
|
||||
if (this.configConnectionOverride !== undefined) {
|
||||
return this.configConnectionOverride();
|
||||
}
|
||||
|
||||
if (this.privConnectionConfigurationPromise) {
|
||||
if (this.privConnectionConfigurationPromise.result().isCompleted &&
|
||||
(this.privConnectionConfigurationPromise.result().isError
|
||||
|| this.privConnectionConfigurationPromise.result().result.state() === ConnectionState.Disconnected)) {
|
||||
|
||||
this.privConnectionConfigurationPromise = null;
|
||||
return this.configureConnection();
|
||||
} else {
|
||||
return this.privConnectionConfigurationPromise;
|
||||
}
|
||||
}
|
||||
|
||||
this.privConnectionConfigurationPromise = this.connectImpl().onSuccessContinueWithPromise((connection: IConnection): Promise<IConnection> => {
|
||||
return this.sendSpeechServiceConfig(connection, this.privSynthesizerConfig.SpeechServiceConfig.serialize())
|
||||
.onSuccessContinueWith((_: boolean) => {
|
||||
return connection;
|
||||
});
|
||||
});
|
||||
|
||||
return this.privConnectionConfigurationPromise;
|
||||
}
|
||||
|
||||
private buildSynthesisContext(): ISynthesisContext {
|
||||
return {
|
||||
synthesis: {
|
||||
audio: {
|
||||
metadataOptions: {
|
||||
sentenceBoundaryEnabled: false,
|
||||
wordBoundaryEnabled: (!!this.privSpeechSynthesizer.wordBoundary),
|
||||
},
|
||||
outputFormat: this.privAudioOutputFormat.requestAudioFormatString,
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
interface ISynthesisContext {
|
||||
synthesis: {
|
||||
audio: {
|
||||
outputFormat: string,
|
||||
metadataOptions: {
|
||||
wordBoundaryEnabled: boolean,
|
||||
sentenceBoundaryEnabled: boolean,
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { EventType, PlatformEvent } from "../common/Exports";
|
||||
|
||||
export class SpeechSynthesisEvent extends PlatformEvent {
|
||||
private privRequestId: string;
|
||||
|
||||
constructor(eventName: string, requestId: string, eventType: EventType = EventType.Info) {
|
||||
super(eventName, eventType);
|
||||
|
||||
this.privRequestId = requestId;
|
||||
}
|
||||
|
||||
public get requestId(): string {
|
||||
return this.privRequestId;
|
||||
}
|
||||
}
|
||||
|
||||
// tslint:disable-next-line:max-classes-per-file
|
||||
export class SynthesisTriggeredEvent extends SpeechSynthesisEvent {
|
||||
private privSessionAudioDestinationId: string;
|
||||
private privTurnAudioDestinationId: string;
|
||||
|
||||
constructor(requestId: string, sessionAudioDestinationId: string, turnAudioDestinationId: string) {
|
||||
super("SynthesisTriggeredEvent", requestId);
|
||||
|
||||
this.privSessionAudioDestinationId = sessionAudioDestinationId;
|
||||
this.privTurnAudioDestinationId = turnAudioDestinationId;
|
||||
}
|
||||
|
||||
public get audioSessionDestinationId(): string {
|
||||
return this.privSessionAudioDestinationId;
|
||||
}
|
||||
|
||||
public get audioTurnDestinationId(): string {
|
||||
return this.privTurnAudioDestinationId;
|
||||
}
|
||||
}
|
||||
|
||||
// tslint:disable-next-line:max-classes-per-file
|
||||
export class ConnectingToSynthesisServiceEvent extends SpeechSynthesisEvent {
|
||||
private privAuthFetchEventId: string;
|
||||
|
||||
constructor(requestId: string, authFetchEventId: string) {
|
||||
super("ConnectingToSynthesisServiceEvent", requestId);
|
||||
this.privAuthFetchEventId = authFetchEventId;
|
||||
}
|
||||
|
||||
public get authFetchEventId(): string {
|
||||
return this.privAuthFetchEventId;
|
||||
}
|
||||
}
|
||||
|
||||
// tslint:disable-next-line:max-classes-per-file
|
||||
export class SynthesisStartedEvent extends SpeechSynthesisEvent {
|
||||
private privAuthFetchEventId: string;
|
||||
|
||||
constructor(requestId: string, authFetchEventId: string) {
|
||||
super("SynthesisStartedEvent", requestId);
|
||||
|
||||
this.privAuthFetchEventId = authFetchEventId;
|
||||
}
|
||||
|
||||
public get authFetchEventId(): string {
|
||||
return this.privAuthFetchEventId;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,250 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import {
|
||||
createNoDashGuid,
|
||||
Deferred,
|
||||
Events, IAudioDestination,
|
||||
Promise,
|
||||
PromiseState
|
||||
} from "../common/Exports";
|
||||
import { AudioOutputFormatImpl } from "../sdk/Audio/AudioOutputFormat";
|
||||
import { PullAudioOutputStreamImpl } from "../sdk/Audio/AudioOutputStream";
|
||||
import {SynthesisAdapterBase} from "./SynthesisAdapterBase";
|
||||
import {
|
||||
ConnectingToSynthesisServiceEvent,
|
||||
SpeechSynthesisEvent,
|
||||
SynthesisStartedEvent,
|
||||
SynthesisTriggeredEvent,
|
||||
} from "./SynthesisEvents";
|
||||
|
||||
export interface ISynthesisResponseContext {
|
||||
serviceTag: string;
|
||||
}
|
||||
|
||||
export interface ISynthesisResponseAudio {
|
||||
type: string;
|
||||
streamId: string;
|
||||
}
|
||||
|
||||
export interface ISynthesisResponse {
|
||||
context: ISynthesisResponseContext;
|
||||
audio: ISynthesisResponseAudio;
|
||||
}
|
||||
|
||||
export class SynthesisTurn {
|
||||
|
||||
public get requestId(): string {
|
||||
return this.privRequestId;
|
||||
}
|
||||
|
||||
public get streamId(): string {
|
||||
return this.privStreamId;
|
||||
}
|
||||
|
||||
public set streamId(value: string) {
|
||||
this.privStreamId = value;
|
||||
}
|
||||
|
||||
public get audioOutputFormat(): AudioOutputFormatImpl {
|
||||
return this.privAudioOutputFormat;
|
||||
}
|
||||
|
||||
public set audioOutputFormat(format: AudioOutputFormatImpl) {
|
||||
this.privAudioOutputFormat = format;
|
||||
}
|
||||
|
||||
public get turnCompletionPromise(): Promise<boolean> {
|
||||
return this.privTurnDeferral.promise();
|
||||
}
|
||||
|
||||
public get isSynthesisEnded(): boolean {
|
||||
return this.privIsSynthesisEnded;
|
||||
}
|
||||
|
||||
public get isSynthesizing(): boolean {
|
||||
return this.privIsSynthesizing;
|
||||
}
|
||||
|
||||
public get currentTextOffset(): number {
|
||||
return this.privTextOffset;
|
||||
}
|
||||
|
||||
// The number of bytes received for current turn
|
||||
public get bytesReceived(): number {
|
||||
return this.privBytesReceived;
|
||||
}
|
||||
|
||||
public get allReceivedAudio(): ArrayBuffer {
|
||||
if (!!this.privReceivedAudio) {
|
||||
return this.privReceivedAudio;
|
||||
}
|
||||
if (!this.privIsSynthesisEnded) {
|
||||
return null;
|
||||
}
|
||||
this.readAllAudioFromStream();
|
||||
return this.allReceivedAudio;
|
||||
}
|
||||
|
||||
public get allReceivedAudioWithHeader(): ArrayBuffer {
|
||||
if (!!this.privReceivedAudioWithHeader) {
|
||||
return this.privReceivedAudioWithHeader;
|
||||
}
|
||||
if (!this.privIsSynthesisEnded) {
|
||||
return null;
|
||||
}
|
||||
if (this.audioOutputFormat.hasHeader) {
|
||||
this.privReceivedAudioWithHeader = SynthesisAdapterBase.addHeader(this.allReceivedAudio, this.audioOutputFormat);
|
||||
return this.allReceivedAudioWithHeader;
|
||||
} else {
|
||||
return this.allReceivedAudio;
|
||||
}
|
||||
}
|
||||
private privIsDisposed: boolean = false;
|
||||
private privAudioNodeId: string;
|
||||
private privAuthFetchEventId: string;
|
||||
private privIsSynthesizing: boolean = false;
|
||||
private privIsSynthesisEnded: boolean = false;
|
||||
private privBytesReceived: number = 0;
|
||||
private privRequestId: string;
|
||||
private privStreamId: string;
|
||||
private privTurnDeferral: Deferred<boolean>;
|
||||
private privAudioOutputFormat: AudioOutputFormatImpl;
|
||||
private privAudioOutputStream: PullAudioOutputStreamImpl;
|
||||
private privReceivedAudio: ArrayBuffer;
|
||||
private privReceivedAudioWithHeader: ArrayBuffer;
|
||||
private privTextOffset: number = 0;
|
||||
private privRawText: string;
|
||||
private privIsSSML: boolean;
|
||||
private privTurnAudioDestination: IAudioDestination;
|
||||
|
||||
constructor() {
|
||||
this.privRequestId = createNoDashGuid();
|
||||
this.privAudioNodeId = createNoDashGuid();
|
||||
this.privTurnDeferral = new Deferred<boolean>();
|
||||
|
||||
// We're not in a turn, so resolve.
|
||||
this.privTurnDeferral.resolve(true);
|
||||
}
|
||||
|
||||
public startNewSynthesis(requestId: string, rawText: string, isSSML: boolean, audioDestination?: IAudioDestination): void {
|
||||
this.privIsSynthesisEnded = false;
|
||||
this.privIsSynthesizing = true;
|
||||
this.privRequestId = requestId;
|
||||
this.privRawText = rawText;
|
||||
this.privIsSSML = isSSML;
|
||||
this.privAudioOutputStream = new PullAudioOutputStreamImpl();
|
||||
this.privAudioOutputStream.format = this.privAudioOutputFormat;
|
||||
this.privReceivedAudio = null;
|
||||
this.privReceivedAudioWithHeader = null;
|
||||
this.privBytesReceived = 0;
|
||||
if (audioDestination !== undefined) {
|
||||
this.privTurnAudioDestination = audioDestination;
|
||||
this.privTurnAudioDestination.format = this.privAudioOutputFormat;
|
||||
}
|
||||
this.onEvent(new SynthesisTriggeredEvent(this.requestId, undefined, audioDestination === undefined ? undefined : audioDestination.id()));
|
||||
}
|
||||
|
||||
public onPreConnectionStart = (authFetchEventId: string, connectionId: string): void => {
|
||||
this.privAuthFetchEventId = authFetchEventId;
|
||||
this.onEvent(new ConnectingToSynthesisServiceEvent(this.privRequestId, this.privAuthFetchEventId));
|
||||
}
|
||||
|
||||
public onAuthCompleted = (isError: boolean, error?: string): void => {
|
||||
if (isError) {
|
||||
this.onComplete();
|
||||
}
|
||||
}
|
||||
|
||||
public onConnectionEstablishCompleted = (statusCode: number, reason?: string): void => {
|
||||
if (statusCode === 200) {
|
||||
this.onEvent(new SynthesisStartedEvent(this.requestId, this.privAuthFetchEventId));
|
||||
this.privBytesReceived = 0;
|
||||
return;
|
||||
} else if (statusCode === 403) {
|
||||
this.onComplete();
|
||||
}
|
||||
}
|
||||
|
||||
public onServiceResponseMessage = (responseJson: string): void => {
|
||||
const response: ISynthesisResponse = JSON.parse(responseJson);
|
||||
this.streamId = response.audio.streamId;
|
||||
}
|
||||
|
||||
public onServiceTurnEndResponse = (): void => {
|
||||
this.privTurnDeferral.resolve(true);
|
||||
this.onComplete();
|
||||
}
|
||||
|
||||
public onServiceTurnStartResponse = (): void => {
|
||||
if (this.privTurnDeferral.state() === PromiseState.None) {
|
||||
// What? How are we starting a turn with another not done?
|
||||
this.privTurnDeferral.reject("Another turn started before current completed.");
|
||||
}
|
||||
|
||||
this.privTurnDeferral = new Deferred<boolean>();
|
||||
}
|
||||
|
||||
public onAudioChunkReceived(data: ArrayBuffer): void {
|
||||
if (this.isSynthesizing) {
|
||||
this.privAudioOutputStream.write(data);
|
||||
this.privBytesReceived += data.byteLength;
|
||||
if (this.privTurnAudioDestination !== undefined) {
|
||||
this.privTurnAudioDestination.write(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public onWordBoundaryEvent(text: string): void {
|
||||
this.updateTextOffset(text);
|
||||
}
|
||||
|
||||
public dispose = (error?: string): void => {
|
||||
if (!this.privIsDisposed) {
|
||||
// we should have completed by now. If we did not its an unknown error.
|
||||
this.privIsDisposed = true;
|
||||
}
|
||||
}
|
||||
|
||||
public onStopSynthesizing(): void {
|
||||
this.onComplete();
|
||||
}
|
||||
|
||||
protected onEvent = (event: SpeechSynthesisEvent): void => {
|
||||
Events.instance.onEvent(event);
|
||||
}
|
||||
|
||||
private updateTextOffset(text: string): void {
|
||||
if (this.privTextOffset >= 0) {
|
||||
this.privTextOffset = this.privRawText.indexOf(text, this.privTextOffset + this.privTextOffset > 0 ? 1 : 0);
|
||||
if (this.privIsSSML) {
|
||||
if (this.privRawText.indexOf("<", this.privTextOffset + 1) > this.privRawText.indexOf(">", this.privTextOffset + 1)) {
|
||||
this.updateTextOffset(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private onComplete = (): void => {
|
||||
if (this.privIsSynthesizing) {
|
||||
this.privIsSynthesizing = false;
|
||||
this.privIsSynthesisEnded = true;
|
||||
this.privAudioOutputStream.close();
|
||||
if (this.privTurnAudioDestination !== undefined) {
|
||||
this.privTurnAudioDestination.close();
|
||||
this.privTurnAudioDestination = undefined;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private readAllAudioFromStream(): void {
|
||||
if (this.privIsSynthesisEnded) {
|
||||
this.privReceivedAudio = new ArrayBuffer(this.bytesReceived);
|
||||
try {
|
||||
this.privAudioOutputStream.read(this.privReceivedAudio);
|
||||
} catch (e) {
|
||||
this.privReceivedAudio = new ArrayBuffer(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { PropertyCollection } from "../sdk/Exports";
|
||||
import {Context, SpeechServiceConfig} from "./Exports";
|
||||
|
||||
export enum SynthesisServiceType {
|
||||
Standard,
|
||||
Custom,
|
||||
}
|
||||
|
||||
export class SynthesizerConfig {
|
||||
private privSynthesisServiceType: SynthesisServiceType = SynthesisServiceType.Standard;
|
||||
private privSpeechServiceConfig: SpeechServiceConfig;
|
||||
private privParameters: PropertyCollection;
|
||||
|
||||
constructor(
|
||||
speechServiceConfig: SpeechServiceConfig,
|
||||
parameters: PropertyCollection) {
|
||||
this.privSpeechServiceConfig = speechServiceConfig ? speechServiceConfig : new SpeechServiceConfig(new Context(null));
|
||||
this.privParameters = parameters;
|
||||
}
|
||||
|
||||
public get parameters(): PropertyCollection {
|
||||
return this.privParameters;
|
||||
}
|
||||
|
||||
public get synthesisServiceType(): SynthesisServiceType {
|
||||
return this.privSynthesisServiceType;
|
||||
}
|
||||
|
||||
public set synthesisServiceType(value: SynthesisServiceType) {
|
||||
this.privSynthesisServiceType = value;
|
||||
}
|
||||
|
||||
public get SpeechServiceConfig(): SpeechServiceConfig {
|
||||
return this.privSpeechServiceConfig;
|
||||
}
|
||||
}
|
|
@ -79,6 +79,26 @@ export class ConnectionClosedEvent extends ConnectionEvent {
|
|||
}
|
||||
}
|
||||
|
||||
// tslint:disable-next-line:max-classes-per-file
|
||||
export class ConnectionErrorEvent extends ConnectionEvent {
|
||||
private readonly privMessage: string;
|
||||
private readonly privType: string;
|
||||
|
||||
constructor(connectionId: string, message: string, type: string) {
|
||||
super("ConnectionErrorEvent", connectionId, EventType.Debug);
|
||||
this.privMessage = message;
|
||||
this.privType = type;
|
||||
}
|
||||
|
||||
public get message(): string {
|
||||
return this.privMessage;
|
||||
}
|
||||
|
||||
public get type(): string {
|
||||
return this.privType;
|
||||
}
|
||||
}
|
||||
|
||||
// tslint:disable-next-line:max-classes-per-file
|
||||
export class ConnectionEstablishErrorEvent extends ConnectionEvent {
|
||||
private privStatusCode: number;
|
||||
|
|
|
@ -26,3 +26,4 @@ export * from "./RiffPcmEncoder";
|
|||
export * from "./Stream";
|
||||
export { TranslationStatus } from "../common.speech/TranslationStatus";
|
||||
export * from "./ChunkedArrayBufferStream";
|
||||
export * from "./IAudioDestination";
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { AudioStreamFormat } from "../sdk/Exports";
|
||||
|
||||
export interface IAudioDestination {
|
||||
id(): string;
|
||||
write(buffer: ArrayBuffer): void;
|
||||
format: AudioStreamFormat;
|
||||
close(): void;
|
||||
}
|
|
@ -1,8 +1,8 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { AudioStreamFormatImpl } from "../../src/sdk/Audio/AudioStreamFormat";
|
||||
import { ISpeechConfigAudioDevice } from "../common.speech/Exports";
|
||||
import { AudioStreamFormatImpl } from "../sdk/Audio/AudioStreamFormat";
|
||||
import { AudioSourceEvent } from "./AudioSourceEvents";
|
||||
import { EventSource } from "./EventSource";
|
||||
import { IDetachable } from "./IDetachable";
|
||||
|
|
|
@ -1,17 +1,43 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { AudioStreamFormatImpl } from "../../../src/sdk/Audio/AudioStreamFormat";
|
||||
import { FileAudioSource, MicAudioSource, PcmRecorder } from "../../common.browser/Exports";
|
||||
import {PathLike} from "fs";
|
||||
import {
|
||||
FileAudioSource,
|
||||
MicAudioSource,
|
||||
PcmRecorder,
|
||||
SpeakerAudioDestination
|
||||
} from "../../common.browser/Exports";
|
||||
import { ISpeechConfigAudioDevice } from "../../common.speech/Exports";
|
||||
import { AudioSourceEvent, EventSource, IAudioSource, IAudioStreamNode, Promise } from "../../common/Exports";
|
||||
import {
|
||||
AudioSourceEvent,
|
||||
EventSource,
|
||||
IAudioDestination,
|
||||
IAudioSource,
|
||||
IAudioStreamNode,
|
||||
Promise
|
||||
} from "../../common/Exports";
|
||||
import { Contracts } from "../Contracts";
|
||||
import { AudioInputStream, PropertyCollection, PropertyId, PullAudioInputStreamCallback } from "../Exports";
|
||||
import {
|
||||
AudioInputStream,
|
||||
AudioOutputStream,
|
||||
AudioStreamFormat,
|
||||
PropertyCollection,
|
||||
PropertyId,
|
||||
PullAudioInputStreamCallback,
|
||||
PullAudioOutputStream,
|
||||
PushAudioOutputStream,
|
||||
PushAudioOutputStreamCallback
|
||||
} from "../Exports";
|
||||
import { AudioFileWriter } from "./AudioFileWriter";
|
||||
import { PullAudioInputStreamImpl, PushAudioInputStreamImpl } from "./AudioInputStream";
|
||||
import { PullAudioOutputStreamImpl, PushAudioOutputStreamImpl } from "./AudioOutputStream";
|
||||
import { AudioStreamFormatImpl } from "./AudioStreamFormat";
|
||||
|
||||
/**
|
||||
* Represents audio input configuration used for specifying what type of input to use (microphone, file, stream).
|
||||
* @class AudioConfig
|
||||
* Updated in version 1.11.0
|
||||
*/
|
||||
export abstract class AudioConfig {
|
||||
/**
|
||||
|
@ -75,6 +101,58 @@ export abstract class AudioConfig {
|
|||
throw new Error("Not Supported Type");
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an AudioConfig object representing the default speaker.
|
||||
* Note: this is just a place holder, not implemented now.
|
||||
* @member AudioConfig.fromDefaultSpeakerOutput
|
||||
* @function
|
||||
* @public
|
||||
* @returns {AudioConfig} The audio output configuration being created.
|
||||
* Added in version 1.11.0
|
||||
*/
|
||||
public static fromDefaultSpeakerOutput(): AudioConfig {
|
||||
return new AudioOutputConfigImpl(new SpeakerAudioDestination());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an AudioConfig object representing a specified output audio file
|
||||
* @member AudioConfig.fromAudioFileOutput
|
||||
* @function
|
||||
* @public
|
||||
* @param {PathLike} filename - the filename of the output audio file
|
||||
* @returns {AudioConfig} The audio output configuration being created.
|
||||
* Added in version 1.11.0
|
||||
*/
|
||||
public static fromAudioFileOutput(filename: PathLike): AudioConfig {
|
||||
return new AudioOutputConfigImpl(new AudioFileWriter(filename));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an AudioConfig object representing a specified audio output stream
|
||||
* @member AudioConfig.fromStreamOutput
|
||||
* @function
|
||||
* @public
|
||||
* @param {AudioOutputStream | PushAudioOutputStreamCallback} audioStream - Specifies the custom audio output
|
||||
* stream.
|
||||
* @returns {AudioConfig} The audio output configuration being created.
|
||||
* Added in version 1.11.0
|
||||
*/
|
||||
public static fromStreamOutput(audioStream: AudioOutputStream | PushAudioOutputStreamCallback): AudioConfig {
|
||||
if (audioStream instanceof PushAudioOutputStreamCallback) {
|
||||
return new AudioOutputConfigImpl(new PushAudioOutputStreamImpl(audioStream as PushAudioOutputStreamCallback));
|
||||
}
|
||||
|
||||
if (audioStream instanceof PushAudioOutputStream) {
|
||||
return new AudioOutputConfigImpl(audioStream as PushAudioOutputStreamImpl);
|
||||
}
|
||||
|
||||
if (audioStream instanceof PullAudioOutputStream) {
|
||||
return new AudioOutputConfigImpl(audioStream as PullAudioOutputStreamImpl);
|
||||
}
|
||||
|
||||
throw new Error("Not Supported Type");
|
||||
}
|
||||
|
||||
/**
|
||||
* Explicitly frees any external resource attached to the object
|
||||
* @member AudioConfig.prototype.close
|
||||
|
@ -226,3 +304,42 @@ export class AudioConfigImpl extends AudioConfig implements IAudioSource {
|
|||
return this.privSource.deviceInfo;
|
||||
}
|
||||
}
|
||||
|
||||
// tslint:disable-next-line:max-classes-per-file
|
||||
export class AudioOutputConfigImpl extends AudioConfig implements IAudioDestination {
|
||||
private privDestination: IAudioDestination;
|
||||
|
||||
/**
|
||||
* Creates and initializes an instance of this class.
|
||||
* @constructor
|
||||
* @param {IAudioDestination} destination - An audio destination.
|
||||
*/
|
||||
public constructor(destination: IAudioDestination) {
|
||||
super();
|
||||
this.privDestination = destination;
|
||||
}
|
||||
|
||||
public set format(format: AudioStreamFormat) {
|
||||
this.privDestination.format = format;
|
||||
}
|
||||
|
||||
public write(buffer: ArrayBuffer): void {
|
||||
this.privDestination.write(buffer);
|
||||
}
|
||||
|
||||
public close(): void {
|
||||
this.privDestination.close();
|
||||
}
|
||||
|
||||
public id(): string {
|
||||
return this.privDestination.id();
|
||||
}
|
||||
|
||||
public setProperty(name: string, value: string): void {
|
||||
throw new Error("This AudioConfig instance does not support setting properties.");
|
||||
}
|
||||
|
||||
public getProperty(name: string, def?: string): string {
|
||||
throw new Error("This AudioConfig instance does not support getting properties.");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import * as fs from "fs";
|
||||
import { IAudioDestination } from "../../common/Exports";
|
||||
import { Contracts } from "../Contracts";
|
||||
import { AudioStreamFormat } from "../Exports";
|
||||
import { AudioOutputFormatImpl } from "./AudioOutputFormat";
|
||||
|
||||
export class AudioFileWriter implements IAudioDestination {
|
||||
private privAudioFormat: AudioOutputFormatImpl;
|
||||
private privFd: number;
|
||||
private privId: string;
|
||||
private privWriteStream: fs.WriteStream;
|
||||
|
||||
public constructor(filename: fs.PathLike) {
|
||||
this.privFd = fs.openSync(filename, "w");
|
||||
}
|
||||
|
||||
public set format(format: AudioStreamFormat) {
|
||||
Contracts.throwIfNotUndefined(this.privAudioFormat, "format is already set");
|
||||
this.privAudioFormat = format as AudioOutputFormatImpl;
|
||||
let headerOffset: number = 0;
|
||||
if (this.privAudioFormat.hasHeader) {
|
||||
headerOffset = this.privAudioFormat.header.byteLength;
|
||||
}
|
||||
if (this.privFd !== undefined) {
|
||||
this.privWriteStream = fs.createWriteStream("", {fd: this.privFd, start: headerOffset, autoClose: false});
|
||||
}
|
||||
}
|
||||
|
||||
public write(buffer: ArrayBuffer): void {
|
||||
Contracts.throwIfNullOrUndefined(this.privAudioFormat, "must set format before writing.");
|
||||
if (this.privWriteStream !== undefined) {
|
||||
this.privWriteStream.write(new Uint8Array(buffer.slice(0)));
|
||||
}
|
||||
}
|
||||
|
||||
public close(): void {
|
||||
if (this.privFd !== undefined) {
|
||||
this.privWriteStream.on("finish", () => {
|
||||
if (this.privAudioFormat.hasHeader) {
|
||||
this.privAudioFormat.updateHeader(this.privWriteStream.bytesWritten);
|
||||
fs.writeSync(this.privFd,
|
||||
new Int8Array(this.privAudioFormat.header),
|
||||
0,
|
||||
this.privAudioFormat.header.byteLength,
|
||||
0);
|
||||
}
|
||||
fs.closeSync(this.privFd);
|
||||
this.privFd = undefined;
|
||||
});
|
||||
this.privWriteStream.end();
|
||||
}
|
||||
}
|
||||
|
||||
public id = (): string => {
|
||||
return this.privId;
|
||||
}
|
||||
}
|
|
@ -1,7 +1,6 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { createNoDashGuid } from "../../../src/common/Guid";
|
||||
import {
|
||||
connectivity,
|
||||
ISpeechConfigAudioDevice,
|
||||
|
@ -25,6 +24,7 @@ import {
|
|||
Stream,
|
||||
StreamReader,
|
||||
} from "../../common/Exports";
|
||||
import { createNoDashGuid } from "../../common/Guid";
|
||||
import { AudioStreamFormat, PullAudioInputStreamCallback } from "../Exports";
|
||||
import { AudioStreamFormatImpl } from "./AudioStreamFormat";
|
||||
|
||||
|
|
|
@ -0,0 +1,339 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { INumberDictionary } from "../../common/Exports";
|
||||
import { SpeechSynthesisOutputFormat } from "../SpeechSynthesisOutputFormat";
|
||||
import { AudioStreamFormatImpl } from "./AudioStreamFormat";
|
||||
|
||||
export enum AudioFormatTag {
|
||||
PCM = 1,
|
||||
MuLaw,
|
||||
Siren,
|
||||
MP3,
|
||||
SILKSkype
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
* @class AudioOutputFormatImpl
|
||||
* Added in version 1.11.0
|
||||
*/
|
||||
// tslint:disable-next-line:max-classes-per-file
|
||||
export class AudioOutputFormatImpl extends AudioStreamFormatImpl {
|
||||
public static SpeechSynthesisOutputFormatToString: INumberDictionary<string> = {
|
||||
[SpeechSynthesisOutputFormat.Raw8Khz8BitMonoMULaw]: "raw-8khz-8bit-mono-mulaw",
|
||||
[SpeechSynthesisOutputFormat.Riff16Khz16KbpsMonoSiren]: "riff-16khz-16kbps-mono-siren",
|
||||
[SpeechSynthesisOutputFormat.Audio16Khz16KbpsMonoSiren]: "audio-16khz-16kbps-mono-siren",
|
||||
[SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3]: "audio-16khz-32kbitrate-mono-mp3",
|
||||
[SpeechSynthesisOutputFormat.Audio16Khz128KBitRateMonoMp3]: "audio-16khz-128kbitrate-mono-mp3",
|
||||
[SpeechSynthesisOutputFormat.Audio16Khz64KBitRateMonoMp3]: "audio-16khz-64kbitrate-mono-mp3",
|
||||
[SpeechSynthesisOutputFormat.Audio24Khz48KBitRateMonoMp3]: "audio-24khz-48kbitrate-mono-mp3",
|
||||
[SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3]: "audio-24khz-96kbitrate-mono-mp3",
|
||||
[SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3]: "audio-24khz-160kbitrate-mono-mp3",
|
||||
[SpeechSynthesisOutputFormat.Raw16Khz16BitMonoTrueSilk]: "raw-16khz-16bit-mono-truesilk",
|
||||
[SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm]: "riff-16khz-16bit-mono-pcm",
|
||||
[SpeechSynthesisOutputFormat.Riff8Khz16BitMonoPcm]: "riff-8khz-16bit-mono-pcm",
|
||||
[SpeechSynthesisOutputFormat.Riff24Khz16BitMonoPcm]: "riff-24khz-16bit-mono-pcm",
|
||||
[SpeechSynthesisOutputFormat.Riff8Khz8BitMonoMULaw]: "riff-8khz-8bit-mono-mulaw",
|
||||
[SpeechSynthesisOutputFormat.Raw16Khz16BitMonoPcm]: "raw-16khz-16bit-mono-pcm",
|
||||
[SpeechSynthesisOutputFormat.Raw24Khz16BitMonoPcm]: "raw-24khz-16bit-mono-pcm",
|
||||
[SpeechSynthesisOutputFormat.Raw8Khz16BitMonoPcm]: "raw-8khz-16bit-mono-pcm",
|
||||
};
|
||||
private priAudioFormatString: string;
|
||||
/**
|
||||
* audio format string for synthesis request, which may differ from priAudioFormatString.
|
||||
* e.g. for riff format, we will request raw format and add a header in SDK side.
|
||||
*/
|
||||
private readonly priRequestAudioFormatString: string;
|
||||
private readonly priHasHeader: boolean;
|
||||
|
||||
/**
|
||||
* Creates an instance with the given values.
|
||||
* @constructor
|
||||
* @param formatTag
|
||||
* @param {number} samplesPerSec - Samples per second.
|
||||
* @param {number} bitsPerSample - Bits per sample.
|
||||
* @param {number} channels - Number of channels.
|
||||
* @param avgBytesPerSec
|
||||
* @param blockAlign
|
||||
* @param audioFormatString
|
||||
* @param requestAudioFormatString
|
||||
* @param hasHeader
|
||||
*/
|
||||
public constructor(formatTag: AudioFormatTag,
|
||||
channels: number,
|
||||
samplesPerSec: number,
|
||||
avgBytesPerSec: number,
|
||||
blockAlign: number,
|
||||
bitsPerSample: number,
|
||||
audioFormatString: string,
|
||||
requestAudioFormatString: string,
|
||||
hasHeader: boolean) {
|
||||
super(samplesPerSec, bitsPerSample, channels);
|
||||
this.formatTag = formatTag;
|
||||
this.avgBytesPerSec = avgBytesPerSec;
|
||||
this.blockAlign = blockAlign;
|
||||
this.priAudioFormatString = audioFormatString;
|
||||
this.priRequestAudioFormatString = requestAudioFormatString;
|
||||
this.priHasHeader = hasHeader;
|
||||
}
|
||||
|
||||
public static fromSpeechSynthesisOutputFormat(speechSynthesisOutputFormat: SpeechSynthesisOutputFormat): AudioOutputFormatImpl {
|
||||
return AudioOutputFormatImpl.fromSpeechSynthesisOutputFormatString(
|
||||
AudioOutputFormatImpl.SpeechSynthesisOutputFormatToString[speechSynthesisOutputFormat]);
|
||||
}
|
||||
|
||||
public static fromSpeechSynthesisOutputFormatString(speechSynthesisOutputFormatString: string): AudioOutputFormatImpl {
|
||||
switch (speechSynthesisOutputFormatString) {
|
||||
case "raw-8khz-8bit-mono-mulaw":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.PCM,
|
||||
1,
|
||||
8000,
|
||||
8000,
|
||||
1,
|
||||
8,
|
||||
speechSynthesisOutputFormatString,
|
||||
speechSynthesisOutputFormatString,
|
||||
false);
|
||||
case "riff-16khz-16kbps-mono-siren":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.Siren,
|
||||
1,
|
||||
16000,
|
||||
2000,
|
||||
40,
|
||||
0,
|
||||
speechSynthesisOutputFormatString,
|
||||
"audio-16khz-16kbps-mono-siren",
|
||||
true);
|
||||
case "audio-16khz-16kbps-mono-siren":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.Siren,
|
||||
1,
|
||||
16000,
|
||||
2000,
|
||||
40,
|
||||
0,
|
||||
speechSynthesisOutputFormatString,
|
||||
speechSynthesisOutputFormatString,
|
||||
false);
|
||||
case "audio-16khz-32kbitrate-mono-mp3":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.MP3,
|
||||
1,
|
||||
16000,
|
||||
32 << 7,
|
||||
2,
|
||||
16,
|
||||
speechSynthesisOutputFormatString,
|
||||
speechSynthesisOutputFormatString,
|
||||
false);
|
||||
case "audio-16khz-128kbitrate-mono-mp3":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.MP3,
|
||||
1,
|
||||
16000,
|
||||
128 << 7,
|
||||
2,
|
||||
16,
|
||||
speechSynthesisOutputFormatString,
|
||||
speechSynthesisOutputFormatString,
|
||||
false);
|
||||
case "audio-16khz-64kbitrate-mono-mp3":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.MP3,
|
||||
1,
|
||||
16000,
|
||||
64 << 7,
|
||||
2,
|
||||
16,
|
||||
speechSynthesisOutputFormatString,
|
||||
speechSynthesisOutputFormatString,
|
||||
false);
|
||||
case "audio-24khz-48kbitrate-mono-mp3":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.MP3,
|
||||
1,
|
||||
16000,
|
||||
48 << 7,
|
||||
2,
|
||||
16,
|
||||
speechSynthesisOutputFormatString,
|
||||
speechSynthesisOutputFormatString,
|
||||
false);
|
||||
case "audio-24khz-96kbitrate-mono-mp3":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.MP3,
|
||||
1,
|
||||
16000,
|
||||
96 << 7,
|
||||
2,
|
||||
16,
|
||||
speechSynthesisOutputFormatString,
|
||||
speechSynthesisOutputFormatString,
|
||||
false);
|
||||
case "audio-24khz-160kbitrate-mono-mp3":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.MP3,
|
||||
1,
|
||||
16000,
|
||||
160 << 7,
|
||||
2,
|
||||
16,
|
||||
speechSynthesisOutputFormatString,
|
||||
speechSynthesisOutputFormatString,
|
||||
false);
|
||||
case "raw-16khz-16bit-mono-truesilk":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.SILKSkype,
|
||||
1,
|
||||
16000,
|
||||
32000,
|
||||
2,
|
||||
16,
|
||||
speechSynthesisOutputFormatString,
|
||||
speechSynthesisOutputFormatString,
|
||||
false);
|
||||
|
||||
case "riff-8khz-16bit-mono-pcm":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.PCM,
|
||||
1,
|
||||
8000,
|
||||
16000,
|
||||
2,
|
||||
16,
|
||||
speechSynthesisOutputFormatString,
|
||||
"raw-8khz-16bit-mono-pcm",
|
||||
true);
|
||||
case "riff-24khz-16bit-mono-pcm":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.PCM,
|
||||
1,
|
||||
24000,
|
||||
48000,
|
||||
2,
|
||||
16,
|
||||
speechSynthesisOutputFormatString,
|
||||
"raw-24khz-16bit-mono-pcm",
|
||||
true);
|
||||
case "riff-8khz-8bit-mono-mulaw":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.MuLaw,
|
||||
1,
|
||||
8000,
|
||||
8000,
|
||||
1,
|
||||
8,
|
||||
speechSynthesisOutputFormatString,
|
||||
"raw-8khz-8bit-mono-mulaw",
|
||||
true);
|
||||
case "raw-16khz-16bit-mono-pcm":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.PCM,
|
||||
1,
|
||||
16000,
|
||||
32000,
|
||||
2,
|
||||
16,
|
||||
speechSynthesisOutputFormatString,
|
||||
"raw-16khz-16bit-mono-pcm",
|
||||
false);
|
||||
case "raw-24khz-16bit-mono-pcm":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.PCM,
|
||||
1,
|
||||
24000,
|
||||
48000,
|
||||
2,
|
||||
16,
|
||||
speechSynthesisOutputFormatString,
|
||||
"raw-24khz-16bit-mono-pcm",
|
||||
false);
|
||||
case "raw-8khz-16bit-mono-pcm":
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.PCM,
|
||||
1,
|
||||
8000,
|
||||
16000,
|
||||
2,
|
||||
16,
|
||||
speechSynthesisOutputFormatString,
|
||||
"raw-8khz-16bit-mono-pcm",
|
||||
false);
|
||||
case "riff-16khz-16bit-mono-pcm":
|
||||
default:
|
||||
return new AudioOutputFormatImpl(
|
||||
AudioFormatTag.PCM,
|
||||
1,
|
||||
16000,
|
||||
32000,
|
||||
2,
|
||||
16,
|
||||
"riff-16khz-16bit-mono-pcm",
|
||||
"raw-16khz-16bit-mono-pcm",
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
public static getDefaultOutputFormat(): AudioOutputFormatImpl {
|
||||
return AudioOutputFormatImpl.fromSpeechSynthesisOutputFormatString("");
|
||||
}
|
||||
|
||||
/**
|
||||
* The format tag of the audio
|
||||
* @AudioFormatTag AudioOutputFormatImpl.prototype.formatTag
|
||||
* @function
|
||||
* @public
|
||||
*/
|
||||
public formatTag: AudioFormatTag;
|
||||
|
||||
/**
|
||||
* Specifies if this audio output format has a header
|
||||
* @boolean AudioOutputFormatImpl.prototype.hasHeader
|
||||
* @function
|
||||
* @public
|
||||
*/
|
||||
public get hasHeader(): boolean {
|
||||
return this.priHasHeader;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies the header of this format
|
||||
* @ArrayBuffer AudioOutputFormatImpl.prototype.header
|
||||
* @function
|
||||
* @public
|
||||
*/
|
||||
public get header(): ArrayBuffer {
|
||||
if (this.hasHeader) {
|
||||
return this.privHeader;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates the header based on the audio length
|
||||
* @member AudioOutputFormatImpl.updateHeader
|
||||
* @function
|
||||
* @public
|
||||
* @param {number} audioLength - the audio length
|
||||
*/
|
||||
public updateHeader(audioLength: number): void {
|
||||
if (this.priHasHeader) {
|
||||
const view = new DataView(this.privHeader);
|
||||
view.setUint32(40, audioLength, true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies the audio format string to be sent to the service
|
||||
* @string AudioOutputFormatImpl.prototype.requestAudioFormatString
|
||||
* @function
|
||||
* @public
|
||||
*/
|
||||
public get requestAudioFormatString(): string {
|
||||
return this.priRequestAudioFormatString;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,23 +1,26 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { createNoDashGuid } from "../../../src/common/Guid";
|
||||
import {
|
||||
ChunkedArrayBufferStream,
|
||||
createNoDashGuid,
|
||||
Deferred,
|
||||
IAudioDestination,
|
||||
IStreamChunk,
|
||||
Promise,
|
||||
PromiseHelper,
|
||||
Stream,
|
||||
StreamReader,
|
||||
} from "../../common/Exports";
|
||||
import { AudioStreamFormat } from "../Exports";
|
||||
import { AudioStreamFormatImpl } from "./AudioStreamFormat";
|
||||
|
||||
export const bufferSize: number = 4096;
|
||||
import {Contracts} from "../Contracts";
|
||||
import {
|
||||
AudioStreamFormat,
|
||||
PushAudioOutputStreamCallback
|
||||
} from "../Exports";
|
||||
import { AudioOutputFormatImpl } from "./AudioOutputFormat";
|
||||
|
||||
/**
|
||||
* Represents audio input stream used for custom audio input configurations.
|
||||
* @class AudioInputStream
|
||||
* Represents audio output stream used for custom audio output configurations.
|
||||
* @class AudioOutputStream
|
||||
*/
|
||||
export abstract class AudioOutputStream {
|
||||
|
||||
|
@ -27,22 +30,27 @@ export abstract class AudioOutputStream {
|
|||
*/
|
||||
protected constructor() { }
|
||||
|
||||
/**
|
||||
* Sets the format of the AudioOutputStream
|
||||
* Note: the format is set by the synthesizer before writing. Do not set it before passing it to AudioConfig
|
||||
* @member AudioOutputStream.prototype.format
|
||||
*/
|
||||
public abstract set format(format: AudioStreamFormat);
|
||||
|
||||
/**
|
||||
* Creates a memory backed PullAudioOutputStream with the specified audio format.
|
||||
* @member AudioInputStream.createPullStream
|
||||
* @member AudioOutputStream.createPullStream
|
||||
* @function
|
||||
* @public
|
||||
* @param {AudioStreamFormat} format - The audio data format in which audio will be
|
||||
* written to the push audio stream's write() method (currently only support 16 kHz 16bit mono PCM).
|
||||
* @returns {PullAudioOutputStream} The audio input stream being created.
|
||||
* @returns {PullAudioOutputStream} The audio output stream being created.
|
||||
*/
|
||||
public static createPullStream(format?: AudioStreamFormat): PullAudioOutputStream {
|
||||
return PullAudioOutputStream.create(format);
|
||||
public static createPullStream(): PullAudioOutputStream {
|
||||
return PullAudioOutputStream.create();
|
||||
}
|
||||
|
||||
/**
|
||||
* Explicitly frees any external resource attached to the object
|
||||
* @member AudioInputStream.prototype.close
|
||||
* @member AudioOutputStream.prototype.close
|
||||
* @function
|
||||
* @public
|
||||
*/
|
||||
|
@ -50,7 +58,7 @@ export abstract class AudioOutputStream {
|
|||
}
|
||||
|
||||
/**
|
||||
* Represents memory backed push audio input stream used for custom audio input configurations.
|
||||
* Represents memory backed push audio output stream used for custom audio output configurations.
|
||||
* @class PullAudioOutputStream
|
||||
*/
|
||||
// tslint:disable-next-line:max-classes-per-file
|
||||
|
@ -61,12 +69,10 @@ export abstract class PullAudioOutputStream extends AudioOutputStream {
|
|||
* @member PullAudioOutputStream.create
|
||||
* @function
|
||||
* @public
|
||||
* @param {AudioStreamFormat} format - The audio data format in which audio will be written to the
|
||||
* push audio stream's write() method (currently only support 16 kHz 16bit mono PCM).
|
||||
* @returns {PullAudioOutputStream} The push audio input stream being created.
|
||||
* @returns {PullAudioOutputStream} The push audio output stream being created.
|
||||
*/
|
||||
public static create(format?: AudioStreamFormat): PullAudioOutputStream {
|
||||
return new PullAudioOutputStreamImpl(bufferSize, format);
|
||||
public static create(): PullAudioOutputStream {
|
||||
return new PullAudioOutputStreamImpl();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -74,9 +80,10 @@ export abstract class PullAudioOutputStream extends AudioOutputStream {
|
|||
* @member PullAudioOutputStream.prototype.read
|
||||
* @function
|
||||
* @public
|
||||
* @returns {Promise<ArrayBuffer>} Audio buffer data.
|
||||
* @param {ArrayBuffer} dataBuffer - An ArrayBuffer to store the read data.
|
||||
* @returns {Promise<number>} Audio buffer length has been read.
|
||||
*/
|
||||
public abstract read(): Promise<ArrayBuffer>;
|
||||
public abstract read(dataBuffer: ArrayBuffer): Promise<number>;
|
||||
|
||||
/**
|
||||
* Closes the stream.
|
||||
|
@ -88,36 +95,40 @@ export abstract class PullAudioOutputStream extends AudioOutputStream {
|
|||
}
|
||||
|
||||
/**
|
||||
* Represents memory backed push audio input stream used for custom audio input configurations.
|
||||
* Represents memory backed push audio output stream used for custom audio output configurations.
|
||||
* @private
|
||||
* @class PullAudioOutputStreamImpl
|
||||
*/
|
||||
// tslint:disable-next-line:max-classes-per-file
|
||||
export class PullAudioOutputStreamImpl extends PullAudioOutputStream {
|
||||
|
||||
private privFormat: AudioStreamFormatImpl;
|
||||
export class PullAudioOutputStreamImpl extends PullAudioOutputStream implements IAudioDestination {
|
||||
private privFormat: AudioOutputFormatImpl;
|
||||
private privId: string;
|
||||
private privStream: Stream<ArrayBuffer>;
|
||||
private streamReader: StreamReader<ArrayBuffer>;
|
||||
private privLastChunkView: Int8Array;
|
||||
|
||||
/**
|
||||
* Creates and initalizes an instance with the given values.
|
||||
* Creates and initializes an instance with the given values.
|
||||
* @constructor
|
||||
* @param {AudioStreamFormat} format - The audio stream format.
|
||||
*/
|
||||
public constructor(chunkSize: number, format?: AudioStreamFormat) {
|
||||
public constructor() {
|
||||
super();
|
||||
if (format === undefined) {
|
||||
this.privFormat = AudioStreamFormatImpl.getDefaultInputFormat();
|
||||
} else {
|
||||
this.privFormat = format as AudioStreamFormatImpl;
|
||||
}
|
||||
|
||||
this.privId = createNoDashGuid();
|
||||
this.privStream = new ChunkedArrayBufferStream(chunkSize);
|
||||
this.privStream = new Stream<ArrayBuffer>();
|
||||
this.streamReader = this.privStream.getReader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the format information to the stream. For internal use only.
|
||||
* @param {AudioStreamFormat} format - the format to be set.
|
||||
*/
|
||||
public set format(format: AudioStreamFormat) {
|
||||
if (format === undefined || format === null) {
|
||||
this.privFormat = AudioOutputFormatImpl.getDefaultOutputFormat();
|
||||
}
|
||||
this.privFormat = format as AudioOutputFormatImpl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format information for the audio
|
||||
*/
|
||||
|
@ -141,22 +152,61 @@ export class PullAudioOutputStreamImpl extends PullAudioOutputStream {
|
|||
* @property
|
||||
* @public
|
||||
*/
|
||||
public get id(): string {
|
||||
public id(): string {
|
||||
return this.privId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads data from the buffer
|
||||
* Reads audio data from the internal buffer.
|
||||
* @member PullAudioOutputStreamImpl.prototype.read
|
||||
* @function
|
||||
* @public
|
||||
* @param {ArrayBuffer} dataBuffer - The audio buffer of which this function will make a copy.
|
||||
* @param {ArrayBuffer} dataBuffer - An ArrayBuffer to store the read data.
|
||||
* @returns {Promise<number>} - Audio buffer length has been read.
|
||||
*/
|
||||
public read(): Promise<ArrayBuffer> {
|
||||
return this.streamReader.read()
|
||||
.onSuccessContinueWithPromise<ArrayBuffer>((chunk: IStreamChunk<ArrayBuffer>) => {
|
||||
return PromiseHelper.fromResult(chunk.buffer);
|
||||
});
|
||||
public read(dataBuffer: ArrayBuffer): Promise<number> {
|
||||
const intView: Int8Array = new Int8Array(dataBuffer);
|
||||
let totalBytes: number = 0;
|
||||
|
||||
if (this.privLastChunkView !== undefined) {
|
||||
if (this.privLastChunkView.length > dataBuffer.byteLength) {
|
||||
intView.set(this.privLastChunkView.slice(0, dataBuffer.byteLength));
|
||||
this.privLastChunkView = this.privLastChunkView.slice(dataBuffer.byteLength);
|
||||
return PromiseHelper.fromResult(dataBuffer.byteLength);
|
||||
}
|
||||
intView.set(this.privLastChunkView);
|
||||
totalBytes = this.privLastChunkView.length;
|
||||
this.privLastChunkView = undefined;
|
||||
}
|
||||
|
||||
const deffer: Deferred<number> = new Deferred<number>();
|
||||
// Until we have the minimum number of bytes to send in a transmission, keep asking for more.
|
||||
const readUntilFilled: () => void = (): void => {
|
||||
if (totalBytes < dataBuffer.byteLength && !this.streamReader.isClosed) {
|
||||
this.streamReader.read()
|
||||
.onSuccessContinueWith((chunk: IStreamChunk<ArrayBuffer>) => {
|
||||
if (chunk !== undefined && !chunk.isEnd) {
|
||||
let tmpBuffer: ArrayBuffer;
|
||||
if (chunk.buffer.byteLength > dataBuffer.byteLength - totalBytes) {
|
||||
tmpBuffer = chunk.buffer.slice(0, dataBuffer.byteLength - totalBytes);
|
||||
this.privLastChunkView = new Int8Array(chunk.buffer.slice(dataBuffer.byteLength - totalBytes));
|
||||
} else {
|
||||
tmpBuffer = chunk.buffer;
|
||||
}
|
||||
intView.set(new Int8Array(tmpBuffer), totalBytes);
|
||||
totalBytes += tmpBuffer.byteLength;
|
||||
readUntilFilled();
|
||||
} else {
|
||||
this.streamReader.close();
|
||||
deffer.resolve(totalBytes);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
deffer.resolve(totalBytes);
|
||||
}
|
||||
};
|
||||
readUntilFilled();
|
||||
return deffer.promise();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -167,6 +217,7 @@ export class PullAudioOutputStreamImpl extends PullAudioOutputStream {
|
|||
* @param {ArrayBuffer} dataBuffer - The audio buffer of which this function will make a copy.
|
||||
*/
|
||||
public write(dataBuffer: ArrayBuffer): void {
|
||||
Contracts.throwIfNullOrUndefined(this.privStream, "must set format before writing");
|
||||
this.privStream.writeStreamChunk({
|
||||
buffer: dataBuffer,
|
||||
isEnd: false,
|
||||
|
@ -184,3 +235,82 @@ export class PullAudioOutputStreamImpl extends PullAudioOutputStream {
|
|||
this.privStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Represents audio output stream used for custom audio output configurations.
|
||||
* @class PushAudioOutputStream
|
||||
*/
|
||||
// tslint:disable-next-line:max-classes-per-file
|
||||
export abstract class PushAudioOutputStream extends AudioOutputStream {
|
||||
/**
|
||||
* Creates and initializes and instance.
|
||||
* @constructor
|
||||
*/
|
||||
protected constructor() { super(); }
|
||||
|
||||
/**
|
||||
* Creates a PushAudioOutputStream that delegates to the specified callback interface for
|
||||
* write() and close() methods.
|
||||
* @member PushAudioOutputStream.create
|
||||
* @function
|
||||
* @public
|
||||
* @param {PushAudioOutputStreamCallback} callback - The custom audio output object,
|
||||
* derived from PushAudioOutputStreamCallback
|
||||
* @returns {PushAudioOutputStream} The push audio output stream being created.
|
||||
*/
|
||||
public static create(callback: PushAudioOutputStreamCallback): PushAudioOutputStream {
|
||||
return new PushAudioOutputStreamImpl(callback);
|
||||
}
|
||||
|
||||
/**
|
||||
* Explicitly frees any external resource attached to the object
|
||||
* @member PushAudioOutputStream.prototype.close
|
||||
* @function
|
||||
* @public
|
||||
*/
|
||||
public abstract close(): void;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents audio output stream used for custom audio output configurations.
|
||||
* @private
|
||||
* @class PushAudioOutputStreamImpl
|
||||
*/
|
||||
// tslint:disable-next-line:max-classes-per-file
|
||||
export class PushAudioOutputStreamImpl extends PushAudioOutputStream implements IAudioDestination {
|
||||
private readonly privId: string;
|
||||
private privCallback: PushAudioOutputStreamCallback;
|
||||
|
||||
/**
|
||||
* Creates a PushAudioOutputStream that delegates to the specified callback interface for
|
||||
* read() and close() methods.
|
||||
* @constructor
|
||||
* @param {PushAudioOutputStreamCallback} callback - The custom audio output object,
|
||||
* derived from PushAudioOutputStreamCallback
|
||||
*/
|
||||
public constructor(callback: PushAudioOutputStreamCallback) {
|
||||
super();
|
||||
this.privId = createNoDashGuid();
|
||||
this.privCallback = callback;
|
||||
}
|
||||
|
||||
// tslint:disable-next-line:no-empty
|
||||
public set format(format: AudioStreamFormat) {}
|
||||
|
||||
public write(buffer: ArrayBuffer): void {
|
||||
if (!!this.privCallback.write) {
|
||||
this.privCallback.write(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
public close(): void {
|
||||
if (!!this.privCallback.close) {
|
||||
this.privCallback.close();
|
||||
}
|
||||
}
|
||||
|
||||
public id(): string {
|
||||
return this.privId;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,7 +48,7 @@ export abstract class AudioStreamFormat {
|
|||
*/
|
||||
// tslint:disable-next-line:max-classes-per-file
|
||||
export class AudioStreamFormatImpl extends AudioStreamFormat {
|
||||
private privHeader: ArrayBuffer;
|
||||
protected privHeader: ArrayBuffer;
|
||||
|
||||
/**
|
||||
* Creates an instance with the given values.
|
||||
|
@ -168,7 +168,7 @@ export class AudioStreamFormatImpl extends AudioStreamFormat {
|
|||
return this.privHeader;
|
||||
}
|
||||
|
||||
private setString = (view: DataView, offset: number, str: string): void => {
|
||||
protected setString = (view: DataView, offset: number, str: string): void => {
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
view.setUint8(offset + i, str.charCodeAt(i));
|
||||
}
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
/**
|
||||
* An abstract base class that defines callback methods (write() and close()) for
|
||||
* custom audio output streams).
|
||||
* @class PushAudioOutputStreamCallback
|
||||
*/
|
||||
export abstract class PushAudioOutputStreamCallback {
|
||||
|
||||
/**
|
||||
* Writes audio data into the data buffer.
|
||||
* @member PushAudioOutputStreamCallback.prototype.write
|
||||
* @function
|
||||
* @public
|
||||
* @param {ArrayBuffer} dataBuffer - The byte array that stores the audio data to write.
|
||||
*/
|
||||
public abstract write(dataBuffer: ArrayBuffer): void;
|
||||
|
||||
/**
|
||||
* Closes the audio output stream.
|
||||
* @member PushAudioOutputStreamCallback.prototype.close
|
||||
* @function
|
||||
* @public
|
||||
*/
|
||||
public abstract close(): void;
|
||||
}
|
|
@ -2,7 +2,12 @@
|
|||
// Licensed under the MIT license.
|
||||
|
||||
import { CancellationErrorCodePropertyName, EnumTranslation, SimpleSpeechPhrase } from "../common.speech/Exports";
|
||||
import { CancellationErrorCode, CancellationReason, RecognitionResult } from "./Exports";
|
||||
import {
|
||||
CancellationErrorCode,
|
||||
CancellationReason,
|
||||
RecognitionResult,
|
||||
SpeechSynthesisResult
|
||||
} from "./Exports";
|
||||
|
||||
/**
|
||||
* Contains detailed information about why a result was canceled.
|
||||
|
@ -30,14 +35,14 @@ export class CancellationDetails {
|
|||
* @member CancellationDetails.fromResult
|
||||
* @function
|
||||
* @public
|
||||
* @param {RecognitionResult} result - The result that was canceled.
|
||||
* @param {RecognitionResult | SpeechSynthesisResult} result - The result that was canceled.
|
||||
* @returns {CancellationDetails} The cancellation details object being created.
|
||||
*/
|
||||
public static fromResult(result: RecognitionResult): CancellationDetails {
|
||||
public static fromResult(result: RecognitionResult | SpeechSynthesisResult): CancellationDetails {
|
||||
let reason = CancellationReason.Error;
|
||||
let errorCode: CancellationErrorCode = CancellationErrorCode.NoError;
|
||||
|
||||
if (!!result.json) {
|
||||
if (result instanceof RecognitionResult && !!result.json) {
|
||||
const simpleSpeech: SimpleSpeechPhrase = SimpleSpeechPhrase.fromJSON(result.json);
|
||||
reason = EnumTranslation.implTranslateCancelResult(simpleSpeech.RecognitionStatus);
|
||||
}
|
||||
|
|
|
@ -49,4 +49,10 @@ export class Contracts {
|
|||
|
||||
// TODO check for file existence.
|
||||
}
|
||||
|
||||
public static throwIfNotUndefined(param: any, name: string): void {
|
||||
if (param !== undefined) {
|
||||
throw new Error("throwIfNotUndefined:" + name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,9 +4,10 @@
|
|||
export { AudioConfig } from "./Audio/AudioConfig";
|
||||
export { AudioStreamFormat } from "./Audio/AudioStreamFormat";
|
||||
export { AudioInputStream, PullAudioInputStream, PushAudioInputStream } from "./Audio/AudioInputStream";
|
||||
export { AudioOutputStream, PullAudioOutputStream } from "./Audio/AudioOutputStream";
|
||||
export { AudioOutputStream, PullAudioOutputStream, PushAudioOutputStream} from "./Audio/AudioOutputStream";
|
||||
export { CancellationReason } from "./CancellationReason";
|
||||
export { PullAudioInputStreamCallback } from "./Audio/PullAudioInputStreamCallback";
|
||||
export { PushAudioOutputStreamCallback } from "./Audio/PushAudioOutputStreamCallback";
|
||||
export { KeywordRecognitionModel } from "./KeywordRecognitionModel";
|
||||
export { SessionEventArgs } from "./SessionEventArgs";
|
||||
export { RecognitionEventArgs } from "./RecognitionEventArgs";
|
||||
|
@ -63,3 +64,8 @@ export { Conversation,
|
|||
ParticipantChangedReason,
|
||||
User
|
||||
} from "./Transcription/Exports";
|
||||
export { SpeechSynthesisOutputFormat } from "./SpeechSynthesisOutputFormat";
|
||||
export { SpeechSynthesizer } from "./SpeechSynthesizer";
|
||||
export { SpeechSynthesisResult } from "./SpeechSynthesisResult";
|
||||
export { SpeechSynthesisEventArgs } from "./SpeechSynthesisEventArgs";
|
||||
export { SpeechSynthesisWordBoundaryEventArgs} from "./SpeechSynthesisWordBoundaryEventArgs";
|
||||
|
|
|
@ -144,6 +144,24 @@ export enum PropertyId {
|
|||
*/
|
||||
Speech_SessionId,
|
||||
|
||||
/**
|
||||
* The spoken language to be synthesized (e.g. en-US)
|
||||
* @member PropertyId.SpeechServiceConnection_SynthLanguage
|
||||
*/
|
||||
SpeechServiceConnection_SynthLanguage,
|
||||
|
||||
/**
|
||||
* The name of the TTS voice to be used for speech synthesis
|
||||
* @member PropertyId.SpeechServiceConnection_SynthVoice
|
||||
*/
|
||||
SpeechServiceConnection_SynthVoice,
|
||||
|
||||
/**
|
||||
* The string to specify TTS output audio format
|
||||
* @member PropertyId.SpeechServiceConnection_SynthOutputFormat
|
||||
*/
|
||||
SpeechServiceConnection_SynthOutputFormat,
|
||||
|
||||
/**
|
||||
* The requested Cognitive Services Speech Service response output format (simple or detailed). Under normal circumstances, you shouldn't have
|
||||
* to use this property directly.
|
||||
|
|
|
@ -70,4 +70,10 @@ export enum ResultReason {
|
|||
* @member ResultReason.SynthesizingAudioCompleted
|
||||
*/
|
||||
SynthesizingAudioCompleted,
|
||||
|
||||
/**
|
||||
* Indicates the speech synthesis is now started
|
||||
* @member ResultReason.SynthesizingAudioStarted
|
||||
*/
|
||||
SynthesizingAudioStarted,
|
||||
}
|
||||
|
|
|
@ -6,14 +6,15 @@ import {
|
|||
OutputFormatPropertyName,
|
||||
ServicePropertiesPropertyName
|
||||
} from "../common.speech/Exports";
|
||||
import { IStringDictionary } from "../common/Exports";
|
||||
import { Contracts } from "./Contracts";
|
||||
import {IStringDictionary} from "../common/Exports";
|
||||
import {Contracts} from "./Contracts";
|
||||
import {
|
||||
OutputFormat,
|
||||
ProfanityOption,
|
||||
PropertyCollection,
|
||||
PropertyId,
|
||||
ServicePropertyChannel
|
||||
ServicePropertyChannel,
|
||||
SpeechSynthesisOutputFormat,
|
||||
} from "./Exports";
|
||||
|
||||
/**
|
||||
|
@ -212,7 +213,9 @@ export abstract class SpeechConfig {
|
|||
public abstract getProperty(name: string, def?: string): string;
|
||||
|
||||
/**
|
||||
* Gets output format.
|
||||
* Gets speech recognition output format (simple or detailed).
|
||||
* Note: This output format is for speech recognition result, use [SpeechConfig.speechSynthesisOutputFormat] to
|
||||
* get synthesized audio output format.
|
||||
* @member SpeechConfig.prototype.outputFormat
|
||||
* @function
|
||||
* @public
|
||||
|
@ -221,7 +224,9 @@ export abstract class SpeechConfig {
|
|||
public abstract get outputFormat(): OutputFormat;
|
||||
|
||||
/**
|
||||
* Gets/Sets the output format.
|
||||
* Gets/Sets speech recognition output format (simple or detailed).
|
||||
* Note: This output format is for speech recognition result, use [SpeechConfig.speechSynthesisOutputFormat] to
|
||||
* set synthesized audio output format.
|
||||
* @member SpeechConfig.prototype.outputFormat
|
||||
* @function
|
||||
* @public
|
||||
|
@ -259,7 +264,7 @@ export abstract class SpeechConfig {
|
|||
* @member SpeechConfig.prototype.subscriptionKey
|
||||
* @function
|
||||
* @public
|
||||
* @return {SubscriptionKey} The subscription key set on the config.
|
||||
* @return {string} The subscription key set on the config.
|
||||
*/
|
||||
public abstract get subscriptionKey(): string;
|
||||
|
||||
|
@ -319,6 +324,63 @@ export abstract class SpeechConfig {
|
|||
* Added in version 1.7.0.
|
||||
*/
|
||||
public abstract enableDictation(): void;
|
||||
|
||||
/**
|
||||
* Gets the language of the speech synthesizer.
|
||||
* @member SpeechConfig.prototype.speechSynthesisLanguage
|
||||
* @function
|
||||
* @public
|
||||
* @returns {string} Returns the speech synthesis language.
|
||||
* Added in version 1.11.0.
|
||||
*/
|
||||
public abstract get speechSynthesisLanguage(): string;
|
||||
|
||||
/**
|
||||
* Sets the language of the speech synthesizer.
|
||||
* @member SpeechConfig.prototype.speechSynthesisLanguage
|
||||
* @function
|
||||
* @public
|
||||
* Added in version 1.11.0.
|
||||
*/
|
||||
public abstract set speechSynthesisLanguage(language: string);
|
||||
|
||||
/**
|
||||
* Gets the voice of the speech synthesizer.
|
||||
* @member SpeechConfig.prototype.speechSynthesisVoiceName
|
||||
* @function
|
||||
* @public
|
||||
* @returns {string} Returns the speech synthesis voice.
|
||||
* Added in version 1.11.0.
|
||||
*/
|
||||
public abstract get speechSynthesisVoiceName(): string;
|
||||
|
||||
/**
|
||||
* Sets the voice of the speech synthesizer. (see <a href="https://aka.ms/speech/tts-languages">available voices</a>).
|
||||
* @member SpeechConfig.prototype.speechSynthesisVoiceName
|
||||
* @function
|
||||
* @public
|
||||
* Added in version 1.11.0.
|
||||
*/
|
||||
public abstract set speechSynthesisVoiceName(voice: string);
|
||||
|
||||
/**
|
||||
* Gets the speech synthesis output format.
|
||||
* @member SpeechConfig.prototype.speechSynthesisOutputFormat
|
||||
* @function
|
||||
* @public
|
||||
* @returns {SpeechSynthesisOutputFormat} Returns the speech synthesis output format
|
||||
* Added in version 1.11.0.
|
||||
*/
|
||||
public abstract get speechSynthesisOutputFormat(): SpeechSynthesisOutputFormat;
|
||||
|
||||
/**
|
||||
* Sets the speech synthesis output format (e.g. Riff16Khz16BitMonoPcm).
|
||||
* @member SpeechConfig.prototype.speechSynthesisOutputFormat
|
||||
* @function
|
||||
* @public
|
||||
* Added in version 1.11.0.
|
||||
*/
|
||||
public abstract set speechSynthesisOutputFormat(format: SpeechSynthesisOutputFormat);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -432,4 +494,28 @@ export class SpeechConfigImpl extends SpeechConfig {
|
|||
ret.privProperties = this.privProperties.clone();
|
||||
return ret;
|
||||
}
|
||||
|
||||
public get speechSynthesisLanguage(): string {
|
||||
return this.privProperties.getProperty(PropertyId.SpeechServiceConnection_SynthLanguage);
|
||||
}
|
||||
|
||||
public set speechSynthesisLanguage(language: string) {
|
||||
this.privProperties.setProperty(PropertyId.SpeechServiceConnection_SynthLanguage, language);
|
||||
}
|
||||
|
||||
public get speechSynthesisVoiceName(): string {
|
||||
return this.privProperties.getProperty(PropertyId.SpeechServiceConnection_SynthVoice);
|
||||
}
|
||||
|
||||
public set speechSynthesisVoiceName(voice: string) {
|
||||
this.privProperties.setProperty(PropertyId.SpeechServiceConnection_SynthVoice, voice);
|
||||
}
|
||||
|
||||
public get speechSynthesisOutputFormat(): SpeechSynthesisOutputFormat {
|
||||
return (SpeechSynthesisOutputFormat as any)[this.privProperties.getProperty(PropertyId.SpeechServiceConnection_SynthOutputFormat, undefined)];
|
||||
}
|
||||
|
||||
public set speechSynthesisOutputFormat(format: SpeechSynthesisOutputFormat) {
|
||||
this.privProperties.setProperty(PropertyId.SpeechServiceConnection_SynthOutputFormat, SpeechSynthesisOutputFormat[format]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { SpeechSynthesisResult } from "./Exports";
|
||||
|
||||
/**
|
||||
* Defines contents of speech synthesis events.
|
||||
* @class SpeechSynthesisEventArgs
|
||||
* Added in version 1.11.0
|
||||
*/
|
||||
export class SpeechSynthesisEventArgs {
|
||||
private readonly privResult: SpeechSynthesisResult;
|
||||
|
||||
/**
|
||||
* Creates and initializes an instance of this class.
|
||||
* @constructor
|
||||
* @param {SpeechSynthesisResult} result - The speech synthesis result.
|
||||
*/
|
||||
public constructor(result: SpeechSynthesisResult) {
|
||||
this.privResult = result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies the synthesis result.
|
||||
* @member SpeechSynthesisEventArgs.prototype.result
|
||||
* @function
|
||||
* @public
|
||||
* @returns {SpeechSynthesisResult} the synthesis result.
|
||||
*/
|
||||
public get result(): SpeechSynthesisResult {
|
||||
return this.privResult;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
/**
|
||||
* Define speech synthesis audio output formats.
|
||||
* @enum SpeechSynthesisOutputFormat
|
||||
* Added in version 1.11.0
|
||||
*/
|
||||
export enum SpeechSynthesisOutputFormat {
|
||||
/**
|
||||
* raw-8khz-8bit-mono-mulaw
|
||||
* @member SpeechSynthesisOutputFormat.Raw8Khz8BitMonoMULaw,
|
||||
*/
|
||||
Raw8Khz8BitMonoMULaw,
|
||||
|
||||
/**
|
||||
* riff-16khz-16kbps-mono-siren
|
||||
* @member SpeechSynthesisOutputFormat.Riff16Khz16KbpsMonoSiren
|
||||
*/
|
||||
Riff16Khz16KbpsMonoSiren ,
|
||||
|
||||
/**
|
||||
* audio-16khz-16kbps-mono-siren
|
||||
* @member SpeechSynthesisOutputFormat.Audio16Khz16KbpsMonoSiren
|
||||
*/
|
||||
Audio16Khz16KbpsMonoSiren,
|
||||
|
||||
/**
|
||||
* audio-16khz-32kbitrate-mono-mp3
|
||||
* @member SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3
|
||||
*/
|
||||
Audio16Khz32KBitRateMonoMp3,
|
||||
|
||||
/**
|
||||
* audio-16khz-128kbitrate-mono-mp3
|
||||
* @member SpeechSynthesisOutputFormat.Audio16Khz128KBitRateMonoMp3
|
||||
*/
|
||||
Audio16Khz128KBitRateMonoMp3,
|
||||
|
||||
/**
|
||||
* audio-16khz-64kbitrate-mono-mp3
|
||||
* @member SpeechSynthesisOutputFormat.Audio16Khz64KBitRateMonoMp3
|
||||
*/
|
||||
Audio16Khz64KBitRateMonoMp3,
|
||||
|
||||
/**
|
||||
* audio-24khz-48kbitrate-mono-mp3
|
||||
* @member SpeechSynthesisOutputFormat.Audio24Khz48KBitRateMonoMp3
|
||||
*/
|
||||
Audio24Khz48KBitRateMonoMp3,
|
||||
|
||||
/**
|
||||
* audio-24khz-96kbitrate-mono-mp3
|
||||
* @member SpeechSynthesisOutputFormat.Audio24Khz96KBitRateMonoMp3
|
||||
*/
|
||||
Audio24Khz96KBitRateMonoMp3,
|
||||
|
||||
/**
|
||||
* audio-24khz-160kbitrate-mono-mp3
|
||||
* @member SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3
|
||||
*/
|
||||
Audio24Khz160KBitRateMonoMp3,
|
||||
|
||||
/**
|
||||
* raw-16khz-16bit-mono-truesilk
|
||||
* @member SpeechSynthesisOutputFormat.Raw16Khz16BitMonoTrueSilk
|
||||
*/
|
||||
Raw16Khz16BitMonoTrueSilk,
|
||||
|
||||
/**
|
||||
* riff-16khz-16bit-mono-pcm
|
||||
* @member SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm
|
||||
*/
|
||||
Riff16Khz16BitMonoPcm,
|
||||
|
||||
/**
|
||||
* riff-8khz-16bit-mono-pcm
|
||||
* @member SpeechSynthesisOutputFormat.Riff8Khz16BitMonoPcm
|
||||
*/
|
||||
Riff8Khz16BitMonoPcm,
|
||||
|
||||
/**
|
||||
* riff-24khz-16bit-mono-pcm
|
||||
* @member SpeechSynthesisOutputFormat.Riff24Khz16BitMonoPcm
|
||||
*/
|
||||
Riff24Khz16BitMonoPcm,
|
||||
|
||||
/**
|
||||
* riff-8khz-8bit-mono-mulaw
|
||||
* @member SpeechSynthesisOutputFormat.Riff8Khz8BitMonoMULaw
|
||||
*/
|
||||
Riff8Khz8BitMonoMULaw,
|
||||
|
||||
/**
|
||||
* raw-16khz-16bit-mono-pcm
|
||||
* @member SpeechSynthesisOutputFormat.Raw16Khz16BitMonoPcm
|
||||
*/
|
||||
Raw16Khz16BitMonoPcm,
|
||||
|
||||
/**
|
||||
* raw-24khz-16bit-mono-pcm
|
||||
* @member SpeechSynthesisOutputFormat.Raw24Khz16BitMonoPcm
|
||||
*/
|
||||
Raw24Khz16BitMonoPcm,
|
||||
|
||||
/**
|
||||
* raw-8khz-16bit-mono-pcm
|
||||
* @member SpeechSynthesisOutputFormat.Raw8Khz16BitMonoPcm
|
||||
*/
|
||||
Raw8Khz16BitMonoPcm,
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { PropertyCollection, ResultReason } from "./Exports";
|
||||
|
||||
/**
|
||||
* Defines result of speech synthesis.
|
||||
* @class SpeechSynthesisResult
|
||||
* Added in version 1.11.0
|
||||
*/
|
||||
export class SpeechSynthesisResult {
|
||||
private privResultId: string;
|
||||
private privReason: ResultReason;
|
||||
private privText: string;
|
||||
private privAudioData: ArrayBuffer;
|
||||
private privOffset: number;
|
||||
private privErrorDetails: string;
|
||||
private privProperties: PropertyCollection;
|
||||
|
||||
/**
|
||||
* Creates and initializes an instance of this class.
|
||||
* @constructor
|
||||
* @param {string} resultId - The result id.
|
||||
* @param {ResultReason} reason - The reason.
|
||||
* @param {number} audioData - The offset into the stream.
|
||||
* @param {string} errorDetails - Error details, if provided.
|
||||
* @param {PropertyCollection} properties - Additional properties, if provided.
|
||||
*/
|
||||
constructor(resultId?: string, reason?: ResultReason, audioData?: ArrayBuffer,
|
||||
errorDetails?: string, properties?: PropertyCollection) {
|
||||
this.privResultId = resultId;
|
||||
this.privReason = reason;
|
||||
this.privAudioData = audioData;
|
||||
this.privErrorDetails = errorDetails;
|
||||
this.privProperties = properties;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies the result identifier.
|
||||
* @member SpeechSynthesisResult.prototype.resultId
|
||||
* @function
|
||||
* @public
|
||||
* @returns {string} Specifies the result identifier.
|
||||
*/
|
||||
public get resultId(): string {
|
||||
return this.privResultId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies status of the result.
|
||||
* @member SpeechSynthesisResult.prototype.reason
|
||||
* @function
|
||||
* @public
|
||||
* @returns {ResultReason} Specifies status of the result.
|
||||
*/
|
||||
public get reason(): ResultReason {
|
||||
return this.privReason;
|
||||
}
|
||||
|
||||
/**
|
||||
* The synthesized audio data
|
||||
* @member SpeechSynthesisResult.prototype.audioData
|
||||
* @function
|
||||
* @public
|
||||
* @returns {ArrayBuffer} The synthesized audio data.
|
||||
*/
|
||||
public get audioData(): ArrayBuffer {
|
||||
return this.privAudioData;
|
||||
}
|
||||
|
||||
/**
|
||||
* In case of an unsuccessful synthesis, provides details of the occurred error.
|
||||
* @member SpeechSynthesisResult.prototype.errorDetails
|
||||
* @function
|
||||
* @public
|
||||
* @returns {string} a brief description of an error.
|
||||
*/
|
||||
public get errorDetails(): string {
|
||||
return this.privErrorDetails;
|
||||
}
|
||||
|
||||
/**
|
||||
* The set of properties exposed in the result.
|
||||
* @member SpeechSynthesisResult.prototype.properties
|
||||
* @function
|
||||
* @public
|
||||
* @returns {PropertyCollection} The set of properties exposed in the result.
|
||||
*/
|
||||
public get properties(): PropertyCollection {
|
||||
return this.privProperties;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
/**
|
||||
* Defines contents of speech synthesis word boundary event.
|
||||
* @class SpeechSynthesisWordBoundaryEventArgs
|
||||
* Added in version 1.11.0
|
||||
*/
|
||||
export class SpeechSynthesisWordBoundaryEventArgs {
|
||||
private privAduioOffset: number;
|
||||
private privText: string;
|
||||
private privWordLength: number;
|
||||
private privTextOffset: number;
|
||||
|
||||
/**
|
||||
* Creates and initializes an instance of this class.
|
||||
* @constructor
|
||||
* @param {number} audioOffset - The audio offset.
|
||||
* @param {string} text - The text.
|
||||
* @param {number} wordLength - The length of the word.
|
||||
* @param {number} textOffset - The text offset.
|
||||
*/
|
||||
public constructor(audioOffset: number, text: string, wordLength: number, textOffset: number) {
|
||||
this.privAduioOffset = audioOffset;
|
||||
this.privText = text;
|
||||
this.privWordLength = wordLength;
|
||||
this.privTextOffset = textOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies the audio offset.
|
||||
* @member SpeechSynthesisWordBoundaryEventArgs.prototype.audioOffset
|
||||
* @function
|
||||
* @public
|
||||
* @returns {number} the audio offset.
|
||||
*/
|
||||
public get audioOffset(): number {
|
||||
return this.privAduioOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies the text of the word boundary event.
|
||||
* @member SpeechSynthesisWordBoundaryEventArgs.prototype.text
|
||||
* @function
|
||||
* @public
|
||||
* @returns {string} the text.
|
||||
*/
|
||||
public get text(): string {
|
||||
return this.privText;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies the word length
|
||||
* @member SpeechSynthesisWordBoundaryEventArgs.prototype.wordLength
|
||||
* @function
|
||||
* @public
|
||||
* @returns {number} the word length
|
||||
*/
|
||||
public get wordLength(): number {
|
||||
return this.privWordLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies the text offset.
|
||||
* @member SpeechSynthesisWordBoundaryEventArgs.prototype.textOffset
|
||||
* @function
|
||||
* @public
|
||||
* @returns {number} the text offset.
|
||||
*/
|
||||
public get textOffset(): number {
|
||||
return this.privTextOffset;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,435 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import { PathLike } from "fs";
|
||||
import {
|
||||
CognitiveSubscriptionKeyAuthentication,
|
||||
CognitiveTokenAuthentication,
|
||||
Context,
|
||||
IAuthentication,
|
||||
ISynthesisConnectionFactory,
|
||||
OS,
|
||||
SpeechServiceConfig,
|
||||
SpeechSynthesisConnectionFactory,
|
||||
SynthesisAdapterBase,
|
||||
SynthesizerConfig,
|
||||
} from "../common.speech/Exports";
|
||||
import {
|
||||
createNoDashGuid,
|
||||
IAudioDestination, IStringDictionary,
|
||||
Promise,
|
||||
PromiseHelper,
|
||||
Queue
|
||||
} from "../common/Exports";
|
||||
import { AudioOutputConfigImpl } from "./Audio/AudioConfig";
|
||||
import { AudioFileWriter } from "./Audio/AudioFileWriter";
|
||||
import {AudioOutputFormatImpl} from "./Audio/AudioOutputFormat";
|
||||
import { PullAudioOutputStreamImpl, PushAudioOutputStreamImpl } from "./Audio/AudioOutputStream";
|
||||
import { Contracts } from "./Contracts";
|
||||
import {
|
||||
AudioConfig,
|
||||
AudioOutputStream,
|
||||
PropertyCollection,
|
||||
PropertyId,
|
||||
PullAudioOutputStream,
|
||||
PushAudioOutputStreamCallback,
|
||||
SpeechSynthesisEventArgs,
|
||||
SpeechSynthesisOutputFormat,
|
||||
SpeechSynthesisResult,
|
||||
SpeechSynthesisWordBoundaryEventArgs,
|
||||
} from "./Exports";
|
||||
import { SpeechConfig, SpeechConfigImpl } from "./SpeechConfig";
|
||||
|
||||
/**
|
||||
* Defines the class SpeechSynthesizer for text to speech.
|
||||
* Added in version 1.11.0
|
||||
* @class SpeechSynthesizer
|
||||
*/
|
||||
export class SpeechSynthesizer {
|
||||
protected audioConfig: AudioConfig;
|
||||
protected privAdapter: SynthesisAdapterBase;
|
||||
protected privProperties: PropertyCollection;
|
||||
protected synthesisRequestQueue: Queue<SynthesisRequest>;
|
||||
|
||||
/**
|
||||
* Defines event handler for synthesis start events.
|
||||
* @member SpeechSynthesizer.prototype.synthesisStarted
|
||||
* @function
|
||||
* @public
|
||||
*/
|
||||
public synthesisStarted: (sender: SpeechSynthesizer, event: SpeechSynthesisEventArgs) => void;
|
||||
|
||||
/**
|
||||
* Defines event handler for synthesizing events.
|
||||
* @member SpeechSynthesizer.prototype.synthesizing
|
||||
* @function
|
||||
* @public
|
||||
*/
|
||||
public synthesizing: (sender: SpeechSynthesizer, event: SpeechSynthesisEventArgs) => void;
|
||||
|
||||
/**
|
||||
* Defines event handler for synthesis completed events.
|
||||
* @member SpeechSynthesizer.prototype.synthesisCompleted
|
||||
* @function
|
||||
* @public
|
||||
*/
|
||||
public synthesisCompleted: (sender: SpeechSynthesizer, event: SpeechSynthesisEventArgs) => void;
|
||||
|
||||
/**
|
||||
* Defines event handler for synthesis cancelled events.
|
||||
* @member SpeechSynthesizer.prototype.SynthesisCanceled
|
||||
* @function
|
||||
* @public
|
||||
*/
|
||||
public SynthesisCanceled: (sender: SpeechSynthesizer, event: SpeechSynthesisEventArgs) => void;
|
||||
|
||||
/**
|
||||
* Defines event handler for word boundary events
|
||||
* @member SpeechSynthesizer.prototype.wordBoundary
|
||||
* @function
|
||||
* @public
|
||||
*/
|
||||
public wordBoundary: (sender: SpeechSynthesizer, event: SpeechSynthesisWordBoundaryEventArgs) => void;
|
||||
|
||||
/**
|
||||
* Gets the authorization token used to communicate with the service.
|
||||
* @member SpeechSynthesizer.prototype.authorizationToken
|
||||
* @function
|
||||
* @public
|
||||
* @returns {string} Authorization token.
|
||||
*/
|
||||
public get authorizationToken(): string {
|
||||
return this.properties.getProperty(PropertyId.SpeechServiceAuthorization_Token);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets/Sets the authorization token used to communicate with the service.
|
||||
* @member SpeechSynthesizer.prototype.authorizationToken
|
||||
* @function
|
||||
* @public
|
||||
* @param {string} token - Authorization token.
|
||||
*/
|
||||
public set authorizationToken(token: string) {
|
||||
Contracts.throwIfNullOrWhitespace(token, "token");
|
||||
this.properties.setProperty(PropertyId.SpeechServiceAuthorization_Token, token);
|
||||
}
|
||||
|
||||
/**
|
||||
* The collection of properties and their values defined for this SpeechSynthesizer.
|
||||
* @member SpeechSynthesizer.prototype.properties
|
||||
* @function
|
||||
* @public
|
||||
* @returns {PropertyCollection} The collection of properties and their values defined for this SpeechSynthesizer.
|
||||
*/
|
||||
public get properties(): PropertyCollection {
|
||||
return this.privProperties;
|
||||
}
|
||||
|
||||
private privDisposed: boolean;
|
||||
private privConnectionFactory: ISynthesisConnectionFactory;
|
||||
private privSynthesizing: boolean;
|
||||
|
||||
/**
|
||||
* SpeechSynthesizer constructor.
|
||||
* @constructor
|
||||
* @param {SpeechConfig} speechConfig - An set of initial properties for this synthesizer
|
||||
* @param {AudioConfig} audioConfig - An optional audio configuration associated with the synthesizer
|
||||
*/
|
||||
public constructor(speechConfig: SpeechConfig, audioConfig?: AudioConfig) {
|
||||
const speechConfigImpl: SpeechConfigImpl = speechConfig as SpeechConfigImpl;
|
||||
Contracts.throwIfNull(speechConfigImpl, "speechConfig");
|
||||
|
||||
if (audioConfig !== null) {
|
||||
this.audioConfig = (audioConfig !== undefined) ? audioConfig : AudioConfig.fromDefaultSpeakerOutput();
|
||||
}
|
||||
this.privProperties = speechConfigImpl.properties.clone();
|
||||
this.privDisposed = false;
|
||||
this.privSynthesizing = false;
|
||||
this.privConnectionFactory = new SpeechSynthesisConnectionFactory();
|
||||
this.synthesisRequestQueue = new Queue<SynthesisRequest>();
|
||||
this.implCommonRSynthesizeSetup();
|
||||
}
|
||||
|
||||
public static buildSsml(text: string, properties: PropertyCollection): string {
|
||||
const languageToDefaultVoice: IStringDictionary<string> = {
|
||||
["ar-EG"]: "Microsoft Server Speech Text to Speech Voice (ar-EG, Hoda)",
|
||||
["ar-SA"]: "Microsoft Server Speech Text to Speech Voice (ar-SA, Naayf)",
|
||||
["bg-BG"]: "Microsoft Server Speech Text to Speech Voice (bg-BG, Ivan)",
|
||||
["ca-ES"]: "Microsoft Server Speech Text to Speech Voice (ca-ES, HerenaRUS)",
|
||||
["cs-CZ"]: "Microsoft Server Speech Text to Speech Voice (cs-CZ, Jakub)",
|
||||
["da-DK"]: "Microsoft Server Speech Text to Speech Voice (da-DK, HelleRUS)",
|
||||
["de-AT"]: "Microsoft Server Speech Text to Speech Voice (de-AT, Michael)",
|
||||
["de-CH"]: "Microsoft Server Speech Text to Speech Voice (de-CH, Karsten)",
|
||||
["de-DE"]: "Microsoft Server Speech Text to Speech Voice (de-DE, HeddaRUS)",
|
||||
["el-GR"]: "Microsoft Server Speech Text to Speech Voice (el-GR, Stefanos)",
|
||||
["en-AU"]: "Microsoft Server Speech Text to Speech Voice (en-AU, HayleyRUS)",
|
||||
["en-CA"]: "Microsoft Server Speech Text to Speech Voice (en-CA, HeatherRUS)",
|
||||
["en-GB"]: "Microsoft Server Speech Text to Speech Voice (en-GB, HazelRUS)",
|
||||
["en-IE"]: "Microsoft Server Speech Text to Speech Voice (en-IE, Sean)",
|
||||
["en-IN"]: "Microsoft Server Speech Text to Speech Voice (en-IN, PriyaRUS)",
|
||||
["en-US"]: "Microsoft Server Speech Text to Speech Voice (en-US, JessaRUS)",
|
||||
["es-ES"]: "Microsoft Server Speech Text to Speech Voice (es-ES, HelenaRUS)",
|
||||
["es-MX"]: "Microsoft Server Speech Text to Speech Voice (es-MX, HildaRUS)",
|
||||
["fi-FI"]: "Microsoft Server Speech Text to Speech Voice (fi-FI, HeidiRUS)",
|
||||
["fr-CA"]: "Microsoft Server Speech Text to Speech Voice (fr-CA, HarmonieRUS)",
|
||||
["fr-CH"]: "Microsoft Server Speech Text to Speech Voice (fr-CH, Guillaume)",
|
||||
["fr-FR"]: "Microsoft Server Speech Text to Speech Voice (fr-FR, HortenseRUS)",
|
||||
["he-IL"]: "Microsoft Server Speech Text to Speech Voice (he-IL, Asaf)",
|
||||
["hi-IN"]: "Microsoft Server Speech Text to Speech Voice (hi-IN, Kalpana)",
|
||||
["hr-HR"]: "Microsoft Server Speech Text to Speech Voice (hr-HR, Matej)",
|
||||
["hu-HU"]: "Microsoft Server Speech Text to Speech Voice (hu-HU, Szabolcs)",
|
||||
["id-ID"]: "Microsoft Server Speech Text to Speech Voice (id-ID, Andika)",
|
||||
["it-IT"]: "Microsoft Server Speech Text to Speech Voice (it-IT, LuciaRUS)",
|
||||
["ja-JP"]: "Microsoft Server Speech Text to Speech Voice (ja-JP, HarukaRUS)",
|
||||
["ko-KR"]: "Microsoft Server Speech Text to Speech Voice (ko-KR, HeamiRUS)",
|
||||
["ms-MY"]: "Microsoft Server Speech Text to Speech Voice (ms-MY, Rizwan)",
|
||||
["nb-NO"]: "Microsoft Server Speech Text to Speech Voice (nb-NO, HuldaRUS)",
|
||||
["nl-NL"]: "Microsoft Server Speech Text to Speech Voice (nl-NL, HannaRUS)",
|
||||
["pl-PL"]: "Microsoft Server Speech Text to Speech Voice (pl-PL, PaulinaRUS)",
|
||||
["pt-BR"]: "Microsoft Server Speech Text to Speech Voice (pt-BR, HeloisaRUS)",
|
||||
["pt-PT"]: "Microsoft Server Speech Text to Speech Voice (pt-PT, HeliaRUS)",
|
||||
["ro-RO"]: "Microsoft Server Speech Text to Speech Voice (ro-RO, Andrei)",
|
||||
["ru-RU"]: "Microsoft Server Speech Text to Speech Voice (ru-RU, EkaterinaRUS)",
|
||||
["sk-SK"]: "Microsoft Server Speech Text to Speech Voice (sk-SK, Filip)",
|
||||
["sl-SI"]: "Microsoft Server Speech Text to Speech Voice (sl-SI, Lado)",
|
||||
["sv-SE"]: "Microsoft Server Speech Text to Speech Voice (sv-SE, HedvigRUS)",
|
||||
["ta-IN"]: "Microsoft Server Speech Text to Speech Voice (ta-IN, Valluvar)",
|
||||
["te-IN"]: "Microsoft Server Speech Text to Speech Voice (te-IN, Chitra)",
|
||||
["th-TH"]: "Microsoft Server Speech Text to Speech Voice (th-TH, Pattara)",
|
||||
["tr-TR"]: "Microsoft Server Speech Text to Speech Voice (tr-TR, SedaRUS)",
|
||||
["vi-VN"]: "Microsoft Server Speech Text to Speech Voice (vi-VN, An)",
|
||||
["zh-CN"]: "Microsoft Server Speech Text to Speech Voice (zh-CN, HuihuiRUS)",
|
||||
["zh-HK"]: "Microsoft Server Speech Text to Speech Voice (zh-HK, TracyRUS)",
|
||||
["zh-TW"]: "Microsoft Server Speech Text to Speech Voice (zh-TW, HanHanRUS)",
|
||||
};
|
||||
|
||||
const language = properties.getProperty(PropertyId.SpeechServiceConnection_SynthLanguage, "en-US");
|
||||
const voice = properties.getProperty(PropertyId.SpeechServiceConnection_SynthVoice, languageToDefaultVoice[language]);
|
||||
|
||||
return `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='http://www.w3.org/2001/mstts' xmlns:emo='http://www.w3.org/2009/10/emotionml' xml:lang='${language}'><voice name='${voice}'>${this.XMLEncode(text)}</voice></speak>`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes speech synthesis on plain text.
|
||||
* The task returns the synthesis result.
|
||||
* @member SpeechSynthesizer.prototype.speakTextAsync
|
||||
* @function
|
||||
* @public
|
||||
* @param text - Text to be synthesized.
|
||||
* @param cb - Callback that received the SpeechSynthesisResult.
|
||||
* @param err - Callback invoked in case of an error.
|
||||
* @param stream - AudioOutputStream to receive the synthesized audio.
|
||||
*/
|
||||
public speakTextAsync(text: string, cb?: (e: SpeechSynthesisResult) => void, err?: (e: string) => void, stream?: AudioOutputStream | PushAudioOutputStreamCallback | PathLike): void {
|
||||
this.speakImpl(text, false, cb, err, stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes speech synthesis on SSML.
|
||||
* The task returns the synthesis result.
|
||||
* @member SpeechSynthesizer.prototype.speakSsmlAsync
|
||||
* @function
|
||||
* @public
|
||||
* @param ssml - SSML to be synthesized.
|
||||
* @param cb - Callback that received the SpeechSynthesisResult.
|
||||
* @param err - Callback invoked in case of an error.
|
||||
* @param stream - AudioOutputStream to receive the synthesized audio.
|
||||
*/
|
||||
public speakSsmlAsync(ssml: string, cb?: (e: SpeechSynthesisResult) => void, err?: (e: string) => void, stream?: AudioOutputStream | PushAudioOutputStreamCallback | PathLike): void {
|
||||
this.speakImpl(ssml, true, cb, err, stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* Dispose of associated resources.
|
||||
* @member SpeechSynthesizer.prototype.close
|
||||
* @function
|
||||
* @public
|
||||
*/
|
||||
public close(): void {
|
||||
Contracts.throwIfDisposed(this.privDisposed);
|
||||
|
||||
this.dispose(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @Internal
|
||||
* Do not use externally, object returned will change without warning or notice.
|
||||
*/
|
||||
public get internalData(): object {
|
||||
return this.privAdapter;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method performs cleanup of resources.
|
||||
* The Boolean parameter disposing indicates whether the method is called
|
||||
* from Dispose (if disposing is true) or from the finalizer (if disposing is false).
|
||||
* Derived classes should override this method to dispose resource if needed.
|
||||
* @member SpeechSynthesizer.prototype.dispose
|
||||
* @function
|
||||
* @public
|
||||
* @param {boolean} disposing - Flag to request disposal.
|
||||
*/
|
||||
protected dispose(disposing: boolean): void {
|
||||
if (this.privDisposed) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (disposing) {
|
||||
if (this.privAdapter) {
|
||||
this.privAdapter.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
this.privDisposed = true;
|
||||
}
|
||||
|
||||
//
|
||||
// ################################################################################################################
|
||||
// IMPLEMENTATION.
|
||||
// Move to independent class
|
||||
// ################################################################################################################
|
||||
//
|
||||
protected createSynthesizerConfig(speechConfig: SpeechServiceConfig): SynthesizerConfig {
|
||||
return new SynthesizerConfig(
|
||||
speechConfig,
|
||||
this.privProperties);
|
||||
}
|
||||
|
||||
// Creates the synthesis adapter
|
||||
protected createSynthesisAdapter(
|
||||
authentication: IAuthentication,
|
||||
connectionFactory: ISynthesisConnectionFactory,
|
||||
audioConfig: AudioConfig,
|
||||
synthesizerConfig: SynthesizerConfig): SynthesisAdapterBase {
|
||||
return new SynthesisAdapterBase(authentication, connectionFactory,
|
||||
synthesizerConfig, this, this.audioConfig as AudioOutputConfigImpl);
|
||||
}
|
||||
|
||||
protected implCommonRSynthesizeSetup(): void {
|
||||
|
||||
let osPlatform = (typeof window !== "undefined") ? "Browser" : "Node";
|
||||
let osName = "unknown";
|
||||
let osVersion = "unknown";
|
||||
|
||||
if (typeof navigator !== "undefined") {
|
||||
osPlatform = osPlatform + "/" + navigator.platform;
|
||||
osName = navigator.userAgent;
|
||||
osVersion = navigator.appVersion;
|
||||
}
|
||||
|
||||
const synthesizerConfig = this.createSynthesizerConfig(
|
||||
new SpeechServiceConfig(
|
||||
new Context(new OS(osPlatform, osName, osVersion))));
|
||||
|
||||
const subscriptionKey = this.privProperties.getProperty(PropertyId.SpeechServiceConnection_Key, undefined);
|
||||
const authentication = (subscriptionKey && subscriptionKey !== "") ?
|
||||
new CognitiveSubscriptionKeyAuthentication(subscriptionKey) :
|
||||
new CognitiveTokenAuthentication(
|
||||
(authFetchEventId: string): Promise<string> => {
|
||||
const authorizationToken = this.privProperties.getProperty(PropertyId.SpeechServiceAuthorization_Token, undefined);
|
||||
return PromiseHelper.fromResult(authorizationToken);
|
||||
},
|
||||
(authFetchEventId: string): Promise<string> => {
|
||||
const authorizationToken = this.privProperties.getProperty(PropertyId.SpeechServiceAuthorization_Token, undefined);
|
||||
return PromiseHelper.fromResult(authorizationToken);
|
||||
});
|
||||
|
||||
this.privAdapter = this.createSynthesisAdapter(
|
||||
authentication,
|
||||
this.privConnectionFactory,
|
||||
this.audioConfig,
|
||||
synthesizerConfig);
|
||||
|
||||
this.privAdapter.audioOutputFormat = AudioOutputFormatImpl.fromSpeechSynthesisOutputFormat(
|
||||
(SpeechSynthesisOutputFormat as any)[this.properties.getProperty(PropertyId.SpeechServiceConnection_SynthOutputFormat, undefined)]
|
||||
);
|
||||
}
|
||||
|
||||
protected speakImpl(text: string, IsSsml: boolean, cb?: (e: SpeechSynthesisResult) => void, err?: (e: string) => void, dataStream?: AudioOutputStream | PushAudioOutputStreamCallback | PathLike): void {
|
||||
try {
|
||||
Contracts.throwIfDisposed(this.privDisposed);
|
||||
const requestId = createNoDashGuid();
|
||||
let audioDestination;
|
||||
if (dataStream instanceof PushAudioOutputStreamCallback) {
|
||||
audioDestination = new PushAudioOutputStreamImpl(dataStream);
|
||||
} else if (dataStream instanceof PullAudioOutputStream) {
|
||||
audioDestination = dataStream as PullAudioOutputStreamImpl;
|
||||
} else if (dataStream !== undefined) {
|
||||
audioDestination = new AudioFileWriter(dataStream as PathLike);
|
||||
} else {
|
||||
audioDestination = undefined;
|
||||
}
|
||||
this.synthesisRequestQueue.enqueue(new SynthesisRequest(requestId, text, IsSsml, (e: SpeechSynthesisResult): void => {
|
||||
this.privSynthesizing = false;
|
||||
if (!!cb) {
|
||||
try {
|
||||
cb(e);
|
||||
} catch (e) {
|
||||
if (!!err) {
|
||||
err(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
cb = undefined;
|
||||
this.adapterSpeak();
|
||||
}, (e: string): void => {
|
||||
if (!!err) {
|
||||
err(e);
|
||||
}
|
||||
}, audioDestination));
|
||||
|
||||
this.adapterSpeak();
|
||||
|
||||
} catch (error) {
|
||||
if (!!err) {
|
||||
if (error instanceof Error) {
|
||||
const typedError: Error = error as Error;
|
||||
err(typedError.name + ": " + typedError.message);
|
||||
} else {
|
||||
err(error);
|
||||
}
|
||||
}
|
||||
|
||||
// Destroy the synthesizer.
|
||||
this.dispose(true);
|
||||
}
|
||||
}
|
||||
|
||||
protected adapterSpeak(): Promise<boolean> {
|
||||
if (!this.privDisposed && !this.privSynthesizing) {
|
||||
this.privSynthesizing = true;
|
||||
return this.synthesisRequestQueue.dequeue().
|
||||
onSuccessContinueWithPromise((request: SynthesisRequest): Promise<boolean> => {
|
||||
return this.privAdapter.Speak(request.text, request.isSSML, request.requestId, request.cb, request.err, request.dataStream);
|
||||
});
|
||||
}
|
||||
return PromiseHelper.fromResult(true);
|
||||
}
|
||||
|
||||
private static XMLEncode(text: string): string {
|
||||
return text.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
}
|
||||
|
||||
// tslint:disable-next-line:max-classes-per-file
|
||||
export class SynthesisRequest {
|
||||
public requestId: string;
|
||||
public text: string;
|
||||
public isSSML: boolean;
|
||||
public cb: (e: SpeechSynthesisResult) => void;
|
||||
public err: (e: string) => void;
|
||||
public dataStream: IAudioDestination;
|
||||
|
||||
constructor(requestId: string, text: string, isSSML: boolean, cb?: (e: SpeechSynthesisResult) => void, err?: (e: string) => void, dataStream?: IAudioDestination) {
|
||||
this.requestId = requestId;
|
||||
this.text = text;
|
||||
this.isSSML = isSSML;
|
||||
this.cb = cb;
|
||||
this.err = err;
|
||||
this.dataStream = dataStream;
|
||||
}
|
||||
}
|
|
@ -14,7 +14,7 @@ import {
|
|||
PropertyCollection,
|
||||
PropertyId,
|
||||
ServicePropertyChannel,
|
||||
SpeechConfig,
|
||||
SpeechConfig, SpeechSynthesisOutputFormat,
|
||||
} from "./Exports";
|
||||
|
||||
/**
|
||||
|
@ -439,4 +439,27 @@ export class SpeechTranslationConfigImpl extends SpeechTranslationConfig {
|
|||
this.privSpeechProperties.setProperty(ForceDictationPropertyName, "true");
|
||||
}
|
||||
|
||||
public get speechSynthesisLanguage(): string {
|
||||
return this.privSpeechProperties.getProperty(PropertyId.SpeechServiceConnection_SynthLanguage);
|
||||
}
|
||||
|
||||
public set speechSynthesisLanguage(language: string) {
|
||||
this.privSpeechProperties.setProperty(PropertyId.SpeechServiceConnection_SynthLanguage, language);
|
||||
}
|
||||
|
||||
public get speechSynthesisVoiceName(): string {
|
||||
return this.privSpeechProperties.getProperty(PropertyId.SpeechServiceConnection_SynthVoice);
|
||||
}
|
||||
|
||||
public set speechSynthesisVoiceName(voice: string) {
|
||||
this.privSpeechProperties.setProperty(PropertyId.SpeechServiceConnection_SynthVoice, voice);
|
||||
}
|
||||
|
||||
public get speechSynthesisOutputFormat(): SpeechSynthesisOutputFormat {
|
||||
return (SpeechSynthesisOutputFormat as any)[this.privSpeechProperties.getProperty(PropertyId.SpeechServiceConnection_SynthOutputFormat, undefined)];
|
||||
}
|
||||
|
||||
public set speechSynthesisOutputFormat(format: SpeechSynthesisOutputFormat) {
|
||||
this.privSpeechProperties.setProperty(PropertyId.SpeechServiceConnection_SynthOutputFormat, SpeechSynthesisOutputFormat[format]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,16 +1,12 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import {
|
||||
IAudioStreamNode,
|
||||
IStreamChunk,
|
||||
} from "../src/common/Exports";
|
||||
import {
|
||||
bufferSize,
|
||||
PullAudioOutputStreamImpl,
|
||||
} from "../src/sdk/Audio/AudioOutputStream";
|
||||
import { AudioOutputFormatImpl } from "../src/sdk/Audio/AudioOutputFormat";
|
||||
import { PullAudioOutputStream, PullAudioOutputStreamImpl } from "../src/sdk/Audio/AudioOutputStream";
|
||||
import { Settings } from "./Settings";
|
||||
|
||||
let objsToClose: any[];
|
||||
|
||||
beforeAll(() => {
|
||||
// Override inputs, if necessary
|
||||
Settings.LoadSettings();
|
||||
|
@ -18,12 +14,50 @@ beforeAll(() => {
|
|||
|
||||
// Test cases are run linerally, the only other mechanism to demark them in the output is to put a console line in each case and
|
||||
// report the name.
|
||||
// tslint:disable-next-line:no-console
|
||||
beforeEach(() => console.info("---------------------------------------Starting test case-----------------------------------"));
|
||||
beforeEach(() => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("---------------------------------------Starting test case-----------------------------------");
|
||||
objsToClose = [];
|
||||
const used = process.memoryUsage().heapUsed / 1024 / 1024;
|
||||
// tslint:disable-next-line:no-console
|
||||
console.log(`Heap memory usage before test: ${Math.round(used * 100) / 100} MB`);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("End Time: " + new Date(Date.now()).toLocaleString());
|
||||
objsToClose.forEach((value: any, index: number, array: any[]) => {
|
||||
if (typeof value.close === "function") {
|
||||
value.close();
|
||||
}
|
||||
});
|
||||
const used = process.memoryUsage().heapUsed / 1024 / 1024;
|
||||
// tslint:disable-next-line:no-console
|
||||
console.log(`Heap memory usage after test: ${Math.round(used * 100) / 100} MB`);
|
||||
});
|
||||
|
||||
const ReadPullAudioOutputStream: (stream: PullAudioOutputStream, length?: number, done?: () => void) => void =
|
||||
(stream: PullAudioOutputStream, length?: number, done?: () => void): void => {
|
||||
const audioBuffer = new ArrayBuffer(1024);
|
||||
stream.read(audioBuffer).onSuccessContinueWith((bytesRead: number) => {
|
||||
if (bytesRead > 0) {
|
||||
ReadPullAudioOutputStream(stream, length === undefined ? undefined : length - bytesRead, done);
|
||||
} else {
|
||||
if (length !== undefined) {
|
||||
expect(length).toEqual(0);
|
||||
}
|
||||
if (!!done) {
|
||||
done();
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
test("PullAudioOutputStreamImpl basic test", (done: jest.DoneCallback) => {
|
||||
const size: number = 256;
|
||||
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl(size);
|
||||
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl();
|
||||
objsToClose.push(ps);
|
||||
ps.format = AudioOutputFormatImpl.getDefaultOutputFormat();
|
||||
const ab: ArrayBuffer = new ArrayBuffer(size);
|
||||
|
||||
const abView: Uint8Array = new Uint8Array(ab);
|
||||
|
@ -33,12 +67,13 @@ test("PullAudioOutputStreamImpl basic test", (done: jest.DoneCallback) => {
|
|||
ps.write(abView);
|
||||
|
||||
let bytesRead: number = 0;
|
||||
ps.read().onSuccessContinueWith((audioBuffer: ArrayBuffer) => {
|
||||
const audioBuffer = new ArrayBuffer(size);
|
||||
|
||||
ps.read(audioBuffer).onSuccessContinueWith((readSize: number) => {
|
||||
try {
|
||||
expect(audioBuffer.byteLength).toBeGreaterThanOrEqual(size);
|
||||
expect(audioBuffer.byteLength).toBeLessThanOrEqual(size);
|
||||
expect(readSize).toEqual(size);
|
||||
const readView: Uint8Array = new Uint8Array(audioBuffer);
|
||||
for (let i: number = 0; i < audioBuffer.byteLength; i++) {
|
||||
for (let i: number = 0; i < readSize; i++) {
|
||||
expect(readView[i]).toEqual(bytesRead++ % 256);
|
||||
}
|
||||
} catch (error) {
|
||||
|
@ -49,8 +84,11 @@ test("PullAudioOutputStreamImpl basic test", (done: jest.DoneCallback) => {
|
|||
});
|
||||
|
||||
test("PullAudioOutputStreamImpl multiple writes read after close", (done: jest.DoneCallback) => {
|
||||
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl(bufferSize);
|
||||
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl();
|
||||
const format = AudioOutputFormatImpl.getDefaultOutputFormat();
|
||||
ps.format = format;
|
||||
|
||||
const bufferSize = format.avgBytesPerSec / 10;
|
||||
const ab: ArrayBuffer = new ArrayBuffer(bufferSize * 4);
|
||||
const abView: Uint8Array = new Uint8Array(ab);
|
||||
for (let i: number = 0; i < bufferSize * 4; i++) {
|
||||
|
@ -63,27 +101,25 @@ test("PullAudioOutputStreamImpl multiple writes read after close", (done: jest.D
|
|||
}
|
||||
ps.close();
|
||||
|
||||
let bytesRead: number = 0;
|
||||
let bytesReadTotal: number = 0;
|
||||
const audioBuffer = new ArrayBuffer(bufferSize);
|
||||
|
||||
const readLoop = () => {
|
||||
ps.read().onSuccessContinueWith((audioBuffer: ArrayBuffer) => {
|
||||
ps.read(audioBuffer).onSuccessContinueWith((bytesRead: number) => {
|
||||
try {
|
||||
if (audioBuffer == null) {
|
||||
expect(bytesRead).toBeGreaterThanOrEqual(bufferSize * 4);
|
||||
expect(bytesRead).toBeLessThanOrEqual(bufferSize * 4);
|
||||
if (bytesRead === 0) {
|
||||
expect(bytesReadTotal).toEqual(bufferSize * 4);
|
||||
} else {
|
||||
expect(audioBuffer.byteLength).toBeGreaterThanOrEqual(bufferSize);
|
||||
expect(audioBuffer.byteLength).toBeLessThanOrEqual(bufferSize);
|
||||
const readView: Uint8Array = new Uint8Array(audioBuffer);
|
||||
for (let i: number = 0; i < audioBuffer.byteLength; i++) {
|
||||
expect(readView[i]).toEqual(bytesRead++ % 256);
|
||||
for (let i: number = 0; i < bytesRead; i++) {
|
||||
expect(readView[i]).toEqual(bytesReadTotal++ % 256);
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
done.fail(error);
|
||||
}
|
||||
|
||||
if (audioBuffer != null) {
|
||||
if (bytesRead > 0) {
|
||||
readLoop();
|
||||
} else {
|
||||
done();
|
||||
|
@ -95,7 +131,12 @@ test("PullAudioOutputStreamImpl multiple writes read after close", (done: jest.D
|
|||
});
|
||||
|
||||
test("PullAudioOutputStreamImpl multiple writes and reads", (done: jest.DoneCallback) => {
|
||||
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl(bufferSize);
|
||||
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl();
|
||||
objsToClose.push(ps);
|
||||
const format = AudioOutputFormatImpl.getDefaultOutputFormat();
|
||||
ps.format = format;
|
||||
|
||||
const bufferSize = format.avgBytesPerSec / 10;
|
||||
|
||||
const ab: ArrayBuffer = new ArrayBuffer(bufferSize * 4);
|
||||
const abView: Uint8Array = new Uint8Array(ab);
|
||||
|
@ -109,22 +150,22 @@ test("PullAudioOutputStreamImpl multiple writes and reads", (done: jest.DoneCall
|
|||
}
|
||||
ps.write(ab.slice(j));
|
||||
|
||||
let bytesRead: number = 0;
|
||||
let bytesReadTotal: number = 0;
|
||||
const audioBuffer = new ArrayBuffer(bufferSize);
|
||||
|
||||
const readLoop = () => {
|
||||
ps.read().onSuccessContinueWith((audioBuffer: ArrayBuffer) => {
|
||||
ps.read(audioBuffer).onSuccessContinueWith((bytesRead: number) => {
|
||||
try {
|
||||
expect(audioBuffer.byteLength).toBeGreaterThanOrEqual(bufferSize);
|
||||
expect(audioBuffer.byteLength).toBeLessThanOrEqual(bufferSize);
|
||||
expect(bytesRead).toBeLessThanOrEqual(bufferSize);
|
||||
const readView: Uint8Array = new Uint8Array(audioBuffer);
|
||||
for (let i: number = 0; i < audioBuffer.byteLength; i++) {
|
||||
expect(readView[i]).toEqual(bytesRead++ % 256);
|
||||
for (let i: number = 0; i < bytesRead; i++) {
|
||||
expect(readView[i]).toEqual(bytesReadTotal++ % 256);
|
||||
}
|
||||
} catch (error) {
|
||||
done.fail(error);
|
||||
}
|
||||
|
||||
if (bytesRead < bufferSize * 4) {
|
||||
if (bytesReadTotal < bufferSize * 4) {
|
||||
readLoop();
|
||||
} else {
|
||||
done();
|
||||
|
@ -134,3 +175,62 @@ test("PullAudioOutputStreamImpl multiple writes and reads", (done: jest.DoneCall
|
|||
|
||||
readLoop();
|
||||
});
|
||||
|
||||
test("PullAudioOutputStreamImpl reads before writing", (done: jest.DoneCallback) => {
|
||||
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl();
|
||||
objsToClose.push(ps);
|
||||
|
||||
const format = AudioOutputFormatImpl.getDefaultOutputFormat();
|
||||
ps.format = format;
|
||||
|
||||
const bufferSize = format.avgBytesPerSec / 10;
|
||||
|
||||
setTimeout(() => {
|
||||
setTimeout(() => {
|
||||
ReadPullAudioOutputStream(ps, bufferSize * 4, done);
|
||||
}, 0);
|
||||
});
|
||||
|
||||
const ab: ArrayBuffer = new ArrayBuffer(bufferSize * 4);
|
||||
const abView: Uint8Array = new Uint8Array(ab);
|
||||
for (let i: number = 0; i < bufferSize * 4; i++) {
|
||||
abView[i] = i % 256;
|
||||
}
|
||||
|
||||
let j: number = 0;
|
||||
for (j = 0; j < bufferSize * 4; j += 100) {
|
||||
ps.write(ab.slice(j, j + 100));
|
||||
}
|
||||
ps.write(ab.slice(j));
|
||||
|
||||
ps.close();
|
||||
});
|
||||
|
||||
test("PullAudioOutputStreamImpl read all audio data in single read", (done: jest.DoneCallback) => {
|
||||
const ps: PullAudioOutputStreamImpl = new PullAudioOutputStreamImpl();
|
||||
const format = AudioOutputFormatImpl.getDefaultOutputFormat();
|
||||
ps.format = format;
|
||||
|
||||
const bufferSize = format.avgBytesPerSec / 10;
|
||||
const ab: ArrayBuffer = new ArrayBuffer(bufferSize * 4);
|
||||
const abView: Uint8Array = new Uint8Array(ab);
|
||||
for (let k: number = 0; k < 1500; k ++) { // 10 minutes data
|
||||
for (let i: number = 0; i < bufferSize * 4; i++) {
|
||||
abView[i] = (i + k * bufferSize * 4) % 256;
|
||||
}
|
||||
ps.write(ab);
|
||||
}
|
||||
|
||||
ps.close();
|
||||
|
||||
const audioBuffer = new ArrayBuffer(bufferSize * 6000);
|
||||
|
||||
ps.read(audioBuffer).onSuccessContinueWith((bytesRead: number) => {
|
||||
expect(bytesRead).toEqual(bufferSize * 6000);
|
||||
const readView: Uint8Array = new Uint8Array(audioBuffer);
|
||||
for (let i: number = 0; i < bytesRead - 1000; i += 997) { // not check all to avoid long running.
|
||||
expect(readView[i]).toEqual(i % 256);
|
||||
}
|
||||
done();
|
||||
});
|
||||
});
|
||||
|
|
|
@ -362,12 +362,11 @@ describe.each([true, false])("Service-based tests", (forceNodeWebSocket: boolean
|
|||
// }
|
||||
// };
|
||||
|
||||
const audioBuffer = new ArrayBuffer(320);
|
||||
const audioReadLoop = (audioStream: PullAudioOutputStream, done: jest.DoneCallback) => {
|
||||
audioStream.read().on((audioBuffer: ArrayBuffer) => {
|
||||
audioStream.read(audioBuffer).on((bytesRead: number) => {
|
||||
try {
|
||||
if (audioBuffer !== null) {
|
||||
expect(audioBuffer.byteLength).toBeGreaterThanOrEqual(1);
|
||||
} else {
|
||||
if (bytesRead === 0) {
|
||||
PostDoneTest(done, 2000);
|
||||
}
|
||||
|
||||
|
@ -375,11 +374,10 @@ describe.each([true, false])("Service-based tests", (forceNodeWebSocket: boolean
|
|||
done.fail(error);
|
||||
}
|
||||
|
||||
if (audioBuffer != null) {
|
||||
if (bytesRead > 0) {
|
||||
audioReadLoop(audioStream, done);
|
||||
}
|
||||
},
|
||||
(error: string) => {
|
||||
}, (error: string) => {
|
||||
done.fail(error);
|
||||
});
|
||||
};
|
||||
|
@ -445,26 +443,23 @@ describe.each([true, false])("Service-based tests", (forceNodeWebSocket: boolean
|
|||
hypoCounter++;
|
||||
};
|
||||
|
||||
const audioBuffer = new ArrayBuffer(320);
|
||||
const audioReadLoop = (audioStream: PullAudioOutputStream, done: jest.DoneCallback) => {
|
||||
audioStream.read().on((audioBuffer: ArrayBuffer) => {
|
||||
audioStream.read(audioBuffer).on((bytesRead: number) => {
|
||||
try {
|
||||
if (audioBuffer !== null) {
|
||||
expect(audioBuffer.byteLength).toBeGreaterThanOrEqual(1);
|
||||
} else {
|
||||
if (bytesRead === 0) {
|
||||
PostDoneTest(done, 2000);
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
done.fail(error);
|
||||
}
|
||||
|
||||
if (audioBuffer != null) {
|
||||
if (bytesRead > 0) {
|
||||
audioReadLoop(audioStream, done);
|
||||
}
|
||||
},
|
||||
(error: string) => {
|
||||
done.fail(error);
|
||||
});
|
||||
}, (error: string) => {
|
||||
done.fail(error);
|
||||
});
|
||||
};
|
||||
|
||||
connector.activityReceived = (sender: sdk.DialogServiceConnector, e: sdk.ActivityReceivedEventArgs) => {
|
||||
|
|
|
@ -28,7 +28,7 @@ beforeEach(() => {
|
|||
// tslint:disable-next-line:no-console
|
||||
console.info("---------------------------------------Starting test case-----------------------------------");
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Sart Time: " + new Date(Date.now()).toLocaleString());
|
||||
console.info("Start Time: " + new Date(Date.now()).toLocaleString());
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
|
|
|
@ -92,6 +92,16 @@ const BuildTranslationRecognizerFromWaveFile: (speechConfig: sdk.SpeechTranslati
|
|||
return r;
|
||||
};
|
||||
|
||||
const BuildSpeechSynthesizerToFileOutput: (speechConfig: sdk.SpeechConfig, fileName?: string) => sdk.SpeechSynthesizer =
|
||||
(speechConfig?: sdk.SpeechConfig, fileName?: string): sdk.SpeechSynthesizer => {
|
||||
const config: sdk.AudioConfig = fileName === undefined ? null : sdk.AudioConfig.fromAudioFileOutput(fileName);
|
||||
|
||||
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, config);
|
||||
expect(s).not.toBeUndefined();
|
||||
|
||||
return s;
|
||||
};
|
||||
|
||||
test("Null Param Check, both.", () => {
|
||||
expect(() => sdk.SpeechConfig.fromSubscription(null, null)).toThrowError();
|
||||
});
|
||||
|
@ -127,17 +137,19 @@ test.skip("From endpoint, invalid key format.", () => {
|
|||
});
|
||||
|
||||
// TODO use an endpoint that we control so the subscription key is not leaked!
|
||||
test("From endpoing, valid Params", () => {
|
||||
test("From endpoint, valid Params", () => {
|
||||
const s: sdk.SpeechConfig = sdk.SpeechConfig.fromEndpoint(new URL("http://www.example.com"), "Settings.SpeechSubscriptionKey");
|
||||
expect(s).not.toBeUndefined();
|
||||
s.close();
|
||||
});
|
||||
|
||||
test("TypedParametersAccessableViaPropBag", () => {
|
||||
test("TypedParametersAccessibleViaPropBag", () => {
|
||||
const s: sdk.SpeechConfig = sdk.SpeechConfig.fromSubscription(Settings.SpeechSubscriptionKey, Settings.SpeechRegion);
|
||||
TestParam(() => s.authorizationToken, (val: string) => (s.authorizationToken = val), sdk.PropertyId.SpeechServiceAuthorization_Token, s);
|
||||
TestParam(() => s.endpointId, (val: string) => (s.endpointId = val), sdk.PropertyId.SpeechServiceConnection_EndpointId, s);
|
||||
TestParam(() => s.speechRecognitionLanguage, (val: string) => (s.speechRecognitionLanguage = val), sdk.PropertyId.SpeechServiceConnection_RecoLanguage, s);
|
||||
TestParam(() => s.speechSynthesisLanguage, (val: string) => (s.speechSynthesisLanguage = val), sdk.PropertyId.SpeechServiceConnection_SynthLanguage, s);
|
||||
TestParam(() => s.speechSynthesisVoiceName, (val: string) => (s.speechSynthesisVoiceName = val), sdk.PropertyId.SpeechServiceConnection_SynthVoice, s);
|
||||
});
|
||||
|
||||
const TestParam = (getAccess: () => string, setAccess: (val: string) => void, propEnum: sdk.PropertyId, config: sdk.SpeechConfig): void => {
|
||||
|
@ -177,7 +189,7 @@ test("Create Recognizer", () => {
|
|||
s.close();
|
||||
});
|
||||
|
||||
test("Proeprties are passed to recognizer", () => {
|
||||
test("Properties are passed to recognizer", () => {
|
||||
const s: sdk.SpeechConfig = sdk.SpeechConfig.fromSubscription(Settings.SpeechSubscriptionKey, Settings.SpeechRegion);
|
||||
s.speechRecognitionLanguage = createNoDashGuid();
|
||||
s.authorizationToken = createNoDashGuid();
|
||||
|
@ -330,7 +342,7 @@ test("Translation Recognizer Null target languages throws", () => {
|
|||
s.close();
|
||||
});
|
||||
|
||||
test("Test Translation Recognizer emty target list throws", () => {
|
||||
test("Test Translation Recognizer empty target list throws", () => {
|
||||
const s: sdk.SpeechTranslationConfig = sdk.SpeechTranslationConfig.fromSubscription(Settings.SpeechSubscriptionKey, Settings.SpeechRegion);
|
||||
s.speechRecognitionLanguage = "en-EN";
|
||||
s.setProperty(sdk.PropertyId[sdk.PropertyId.SpeechServiceConnection_TranslationToLanguages], "");
|
||||
|
@ -350,7 +362,7 @@ test("Translation Null voice value throws", () => {
|
|||
s.close();
|
||||
});
|
||||
|
||||
test("Translition Recognizer success.", () => {
|
||||
test("Translation Recognizer success.", () => {
|
||||
const s: sdk.SpeechTranslationConfig = sdk.SpeechTranslationConfig.fromSubscription(Settings.SpeechSubscriptionKey, Settings.SpeechRegion);
|
||||
s.setProperty(sdk.PropertyId[sdk.PropertyId.SpeechServiceConnection_TranslationToLanguages], "en-US");
|
||||
s.speechRecognitionLanguage = "en-EN";
|
||||
|
@ -570,17 +582,28 @@ describe("Connection URL Tests", () => {
|
|||
createMethod: (url: URL, key: string) => sdk.SpeechConfig | sdk.SpeechTranslationConfig,
|
||||
hostName: string,
|
||||
expectedHostName: string,
|
||||
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer,
|
||||
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer | sdk.SpeechSynthesizer,
|
||||
done: jest.DoneCallback
|
||||
): void {
|
||||
|
||||
const s: sdk.SpeechConfig | sdk.SpeechTranslationConfig = createMethod(new URL(hostName), "fakekey");
|
||||
objsToClose.push(s);
|
||||
|
||||
const r: { recognizeOnceAsync: (cb?: (e: sdk.RecognitionResult) => void, err?: (e: string) => void) => void } = recognizerCreateMethod(s);
|
||||
const r = recognizerCreateMethod(s);
|
||||
objsToClose.push(r);
|
||||
|
||||
r.recognizeOnceAsync(
|
||||
let recognizeOrSynthesizeOnceAsync: (cb: (p2: any) => void) => void;
|
||||
|
||||
if (r instanceof sdk.Recognizer) {
|
||||
recognizeOrSynthesizeOnceAsync = (cb: (p2: any) => void): void => {
|
||||
r.recognizeOnceAsync(cb);
|
||||
};
|
||||
} else if (r instanceof sdk.SpeechSynthesizer) {
|
||||
recognizeOrSynthesizeOnceAsync = (cb: (p2: any) => void): void => {
|
||||
r.speakTextAsync("", cb);
|
||||
};
|
||||
}
|
||||
recognizeOrSynthesizeOnceAsync(
|
||||
(p2: any): void => {
|
||||
try {
|
||||
expect(uri).not.toBeUndefined();
|
||||
|
@ -605,9 +628,11 @@ describe("Connection URL Tests", () => {
|
|||
describe.each([
|
||||
[sdk.SpeechConfig.fromHost, BuildSpeechRecognizerFromWaveFile],
|
||||
[sdk.SpeechTranslationConfig.fromHost, BuildTranslationRecognizerFromWaveFile],
|
||||
[sdk.SpeechConfig.fromHost, BuildIntentRecognizerFromWaveFile]])("FromHost Tests",
|
||||
[sdk.SpeechConfig.fromHost, BuildIntentRecognizerFromWaveFile],
|
||||
[sdk.SpeechConfig.fromHost, BuildSpeechSynthesizerToFileOutput]])("FromHost Tests",
|
||||
(createMethod: any,
|
||||
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer) => {
|
||||
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) =>
|
||||
sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer | sdk.SpeechSynthesizer) => {
|
||||
|
||||
test("Simple Host and protocol", (done: jest.DoneCallback) => {
|
||||
// tslint:disable-next-line:no-console
|
||||
|
@ -635,7 +660,7 @@ describe("Connection URL Tests", () => {
|
|||
function testUrlParameter(
|
||||
createMethod: (url: URL, key: string) => sdk.SpeechConfig | sdk.SpeechTranslationConfig,
|
||||
setMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => void,
|
||||
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer,
|
||||
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer | sdk.SpeechSynthesizer,
|
||||
done: jest.DoneCallback,
|
||||
...urlSubStrings: string[]
|
||||
): void {
|
||||
|
@ -645,10 +670,22 @@ describe("Connection URL Tests", () => {
|
|||
|
||||
setMethod(s);
|
||||
|
||||
const r: { recognizeOnceAsync: (cb?: (e: sdk.RecognitionResult) => void, err?: (e: string) => void) => void } = recognizerCreateMethod(s);
|
||||
const r = recognizerCreateMethod(s);
|
||||
objsToClose.push(r);
|
||||
|
||||
r.recognizeOnceAsync(
|
||||
let recognizeOrSynthesizeOnceAsync: (cb: (p2: any) => void) => void;
|
||||
|
||||
if (r instanceof sdk.Recognizer) {
|
||||
recognizeOrSynthesizeOnceAsync = (cb: (p2: any) => void): void => {
|
||||
r.recognizeOnceAsync(cb);
|
||||
};
|
||||
} else if (r instanceof sdk.SpeechSynthesizer) {
|
||||
recognizeOrSynthesizeOnceAsync = (cb: (p2: any) => void): void => {
|
||||
r.speakTextAsync("", cb);
|
||||
};
|
||||
}
|
||||
|
||||
recognizeOrSynthesizeOnceAsync(
|
||||
(p2: any): void => {
|
||||
try {
|
||||
expect(uri).not.toBeUndefined();
|
||||
|
@ -675,7 +712,7 @@ describe("Connection URL Tests", () => {
|
|||
[sdk.SpeechTranslationConfig.fromEndpoint, BuildTranslationRecognizerFromWaveFile],
|
||||
[sdk.SpeechConfig.fromEndpoint, BuildIntentRecognizerFromWaveFile]])("Common URL Tests",
|
||||
(createMethod: any,
|
||||
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer) => {
|
||||
recognizerCreateMethod: (config: sdk.SpeechConfig | sdk.SpeechTranslationConfig) => sdk.SpeechRecognizer | sdk.TranslationRecognizer | sdk.IntentRecognizer | sdk.SpeechSynthesizer) => {
|
||||
test("setServiceProperty (single)", (done: jest.DoneCallback) => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: setServiceProperty");
|
||||
|
|
|
@ -0,0 +1,555 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT license.
|
||||
|
||||
import * as fs from "fs";
|
||||
import * as sdk from "../microsoft.cognitiveservices.speech.sdk";
|
||||
import { ConsoleLoggingListener, WebsocketMessageAdapter } from "../src/common.browser/Exports";
|
||||
import {Events, EventType, InvalidOperationError} from "../src/common/Exports";
|
||||
import { Settings } from "./Settings";
|
||||
import WaitForCondition from "./Utilities";
|
||||
|
||||
let objsToClose: any[];
|
||||
|
||||
beforeAll(() => {
|
||||
// override inputs, if necessary
|
||||
Settings.LoadSettings();
|
||||
Events.instance.attachListener(new ConsoleLoggingListener(EventType.Debug));
|
||||
});
|
||||
|
||||
// Test cases are run linerally, the only other mechanism to demark them in the output is to put a console line in each case and
|
||||
// report the name.
|
||||
beforeEach(() => {
|
||||
objsToClose = [];
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("---------------------------------------Starting test case-----------------------------------");
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Start Time: " + new Date(Date.now()).toLocaleString());
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("End Time: " + new Date(Date.now()).toLocaleString());
|
||||
objsToClose.forEach((value: any, index: number, array: any[]) => {
|
||||
if (typeof value.close === "function") {
|
||||
value.close();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
const BuildSpeechConfig: () => sdk.SpeechConfig = (): sdk.SpeechConfig => {
|
||||
|
||||
let s: sdk.SpeechConfig;
|
||||
if (undefined === Settings.SpeechEndpoint) {
|
||||
s = sdk.SpeechConfig.fromSubscription(Settings.SpeechSubscriptionKey, Settings.SpeechRegion);
|
||||
} else {
|
||||
s = sdk.SpeechConfig.fromEndpoint(new URL(Settings.SpeechEndpoint), Settings.SpeechSubscriptionKey);
|
||||
}
|
||||
|
||||
if (undefined !== Settings.proxyServer) {
|
||||
s.setProxy(Settings.proxyServer, Settings.proxyPort);
|
||||
}
|
||||
|
||||
expect(s).not.toBeUndefined();
|
||||
return s;
|
||||
};
|
||||
|
||||
const CheckSynthesisResult: (result: sdk.SpeechSynthesisResult, reason: sdk.ResultReason) =>
|
||||
void = (result: sdk.SpeechSynthesisResult, reason: sdk.ResultReason): void => {
|
||||
expect(result).not.toBeUndefined();
|
||||
expect(result.reason).toEqual(reason);
|
||||
switch (reason) {
|
||||
case sdk.ResultReason.SynthesizingAudio:
|
||||
case sdk.ResultReason.SynthesizingAudioCompleted:
|
||||
expect(result.audioData).not.toBeUndefined();
|
||||
expect(result.audioData.byteLength).toBeGreaterThan(0);
|
||||
break;
|
||||
case sdk.ResultReason.SynthesizingAudioStarted:
|
||||
expect(result.audioData).toBeUndefined();
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
const CheckBinaryEqual: (arr1: ArrayBuffer, arr2: ArrayBuffer) => void =
|
||||
(arr1: ArrayBuffer, arr2: ArrayBuffer): void => {
|
||||
expect(arr1).not.toBeUndefined();
|
||||
expect(arr2).not.toBeUndefined();
|
||||
expect(arr1.byteLength).toEqual(arr2.byteLength);
|
||||
const view1: Uint8Array = new Uint8Array(arr1);
|
||||
const view2: Uint8Array = new Uint8Array(arr2);
|
||||
for (let i: number = 0; i < arr1.byteLength; i++) {
|
||||
expect(view1[i]).toEqual(view2[i]);
|
||||
}
|
||||
};
|
||||
|
||||
const ReadPullAudioOutputStream: (stream: sdk.PullAudioOutputStream, length?: number, done?: () => void, fc?: (e: string) => void) => void =
|
||||
(stream: sdk.PullAudioOutputStream, length?: number, done?: () => void, fc?: (e: string) => void): void => {
|
||||
const audioBuffer = new ArrayBuffer(1024);
|
||||
stream.read(audioBuffer).onSuccessContinueWith((bytesRead: number) => {
|
||||
if (bytesRead > 0) {
|
||||
ReadPullAudioOutputStream(stream, length === undefined ? undefined : length - bytesRead, done, fc);
|
||||
} else {
|
||||
if (length !== undefined) {
|
||||
try {
|
||||
expect(length).toEqual(0);
|
||||
} catch (e) {
|
||||
fc(e);
|
||||
}
|
||||
}
|
||||
if (!!done) {
|
||||
done();
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
class PushAudioOutputStreamTestCallback extends sdk.PushAudioOutputStreamCallback {
|
||||
public length: number;
|
||||
public isClosed: boolean = false;
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.length = 0;
|
||||
}
|
||||
|
||||
public write(dataBuffer: ArrayBuffer): void {
|
||||
this.length += dataBuffer.byteLength;
|
||||
}
|
||||
|
||||
public close(): void {
|
||||
if (this.isClosed) {
|
||||
throw new InvalidOperationError("PushAudioOutputStreamCallback already closed");
|
||||
}
|
||||
this.isClosed = true;
|
||||
}
|
||||
}
|
||||
|
||||
test("testSpeechSynthesizer1", () => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: testSpeechSynthesizer1");
|
||||
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
|
||||
|
||||
const config: sdk.AudioConfig = sdk.AudioConfig.fromDefaultSpeakerOutput();
|
||||
|
||||
const r: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, config);
|
||||
objsToClose.push(r);
|
||||
|
||||
expect(r).not.toBeUndefined();
|
||||
|
||||
expect(r instanceof sdk.SpeechSynthesizer);
|
||||
});
|
||||
|
||||
test("testSetAndGetParameters", () => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: testSetAndGetParameters");
|
||||
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
|
||||
speechConfig.speechSynthesisLanguage = "zh-CN";
|
||||
speechConfig.speechSynthesisVoiceName = "zh-CN-HuihuiRUS";
|
||||
speechConfig.speechSynthesisOutputFormat = sdk.SpeechSynthesisOutputFormat.Audio16Khz128KBitRateMonoMp3;
|
||||
expect(speechConfig.speechSynthesisOutputFormat).toEqual(sdk.SpeechSynthesisOutputFormat.Audio16Khz128KBitRateMonoMp3);
|
||||
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
|
||||
objsToClose.push(s);
|
||||
|
||||
expect(s.properties).not.toBeUndefined();
|
||||
|
||||
expect(s.properties.getProperty(sdk.PropertyId.SpeechServiceConnection_SynthLanguage)).toEqual("zh-CN");
|
||||
expect(s.properties.getProperty(sdk.PropertyId.SpeechServiceConnection_SynthVoice)).toEqual("zh-CN-HuihuiRUS");
|
||||
expect(s.properties.getProperty(sdk.PropertyId.SpeechServiceConnection_SynthOutputFormat))
|
||||
.toEqual(sdk.SpeechSynthesisOutputFormat[sdk.SpeechSynthesisOutputFormat.Audio16Khz128KBitRateMonoMp3]);
|
||||
});
|
||||
|
||||
describe.each([true])("Service based tests", (forceNodeWebSocket: boolean) => {
|
||||
|
||||
beforeAll(() => {
|
||||
WebsocketMessageAdapter.forceNpmWebSocket = forceNodeWebSocket;
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
WebsocketMessageAdapter.forceNpmWebSocket = false;
|
||||
});
|
||||
|
||||
test("testSpeechSynthesizerEvent1", (done: jest.DoneCallback) => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: testSpeechSynthesizerEvent1");
|
||||
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
|
||||
objsToClose.push(speechConfig);
|
||||
|
||||
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, undefined);
|
||||
objsToClose.push(s);
|
||||
|
||||
expect(s).not.toBeUndefined();
|
||||
|
||||
let audioLength: number = 0;
|
||||
let startEventCount: number = 0;
|
||||
let synthesisingEventCount: number = 0;
|
||||
let completeEventCount: number = 0;
|
||||
|
||||
s.synthesisStarted = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisEventArgs): void => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Synthesis started.");
|
||||
try {
|
||||
CheckSynthesisResult(e.result, sdk.ResultReason.SynthesizingAudioStarted);
|
||||
} catch (e) {
|
||||
done.fail(e);
|
||||
}
|
||||
startEventCount += 1;
|
||||
};
|
||||
|
||||
s.synthesizing = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisEventArgs): void => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Audio received with length of " + e.result.audioData.byteLength);
|
||||
audioLength += e.result.audioData.byteLength - 44;
|
||||
try {
|
||||
CheckSynthesisResult(e.result, sdk.ResultReason.SynthesizingAudio);
|
||||
} catch (e) {
|
||||
done.fail(e);
|
||||
}
|
||||
synthesisingEventCount += 1;
|
||||
};
|
||||
|
||||
s.synthesisCompleted = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisEventArgs): void => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Audio received with length of " + e.result.audioData.byteLength);
|
||||
try {
|
||||
CheckSynthesisResult(e.result, sdk.ResultReason.SynthesizingAudioCompleted);
|
||||
expect(e.result.audioData.byteLength - 44).toEqual(audioLength);
|
||||
} catch (e) {
|
||||
done.fail(e);
|
||||
}
|
||||
completeEventCount += 1;
|
||||
};
|
||||
|
||||
s.wordBoundary = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisWordBoundaryEventArgs): void => {
|
||||
try {
|
||||
expect(e).not.toBeUndefined();
|
||||
} catch (e) {
|
||||
done.fail(e);
|
||||
}
|
||||
};
|
||||
|
||||
s.speakTextAsync("hello world.", undefined, (e: string): void => {
|
||||
done.fail(e);
|
||||
});
|
||||
|
||||
WaitForCondition( (): boolean => {
|
||||
return completeEventCount !== 0;
|
||||
}, (): void => {
|
||||
expect(startEventCount).toEqual(1);
|
||||
expect(synthesisingEventCount).toBeGreaterThan(0);
|
||||
done();
|
||||
});
|
||||
});
|
||||
|
||||
test("testSpeechSynthesizerSpeakTwice", (done: jest.DoneCallback) => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: testSpeechSynthesizerSpeakTwice");
|
||||
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
|
||||
objsToClose.push(speechConfig);
|
||||
|
||||
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, undefined);
|
||||
objsToClose.push(s);
|
||||
|
||||
expect(s).not.toBeUndefined();
|
||||
|
||||
s.speakTextAsync("hello world 1.", (result: sdk.SpeechSynthesisResult): void => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("speaking finished, turn 1");
|
||||
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
|
||||
}, (e: string): void => {
|
||||
done.fail(e);
|
||||
});
|
||||
|
||||
s.speakTextAsync("hello world 2.", (result: sdk.SpeechSynthesisResult): void => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("speaking finished, turn 2");
|
||||
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
|
||||
done();
|
||||
}, (e: string): void => {
|
||||
done.fail(e);
|
||||
});
|
||||
});
|
||||
|
||||
test("testSpeechSynthesizerToFile", (done: jest.DoneCallback) => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: testSpeechSynthesizerToFile");
|
||||
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
|
||||
objsToClose.push(speechConfig);
|
||||
|
||||
const audioConfig: sdk.AudioConfig = sdk.AudioConfig.fromAudioFileOutput("test.wav");
|
||||
expect(audioConfig).not.toBeUndefined();
|
||||
|
||||
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
|
||||
|
||||
expect(s).not.toBeUndefined();
|
||||
|
||||
let audioLength: number = 0;
|
||||
|
||||
s.speakTextAsync("hello world 1.", (result: sdk.SpeechSynthesisResult): void => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("speaking finished, turn 1");
|
||||
audioLength += result.audioData.byteLength;
|
||||
}, (e: string): void => {
|
||||
done.fail(e);
|
||||
});
|
||||
|
||||
s.speakTextAsync("hello world 2.", (result: sdk.SpeechSynthesisResult): void => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("speaking finished, turn 2");
|
||||
audioLength += result.audioData.byteLength;
|
||||
s.close();
|
||||
// wait 2 seconds before checking file size, as the async file writing might not be finished right now.
|
||||
setTimeout( () => {
|
||||
const fileLength = fs.statSync("test.wav").size;
|
||||
expect(fileLength).toEqual(audioLength - 44);
|
||||
done();
|
||||
}, 2000);
|
||||
}, (e: string): void => {
|
||||
done.fail(e);
|
||||
});
|
||||
});
|
||||
|
||||
test("testSpeechSynthesizer: synthesis to file in turn.", (done: jest.DoneCallback) => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: testSpeechSynthesizer synthesis to file in turn.");
|
||||
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
|
||||
speechConfig.speechSynthesisOutputFormat = sdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3;
|
||||
objsToClose.push(speechConfig);
|
||||
|
||||
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
|
||||
expect(s).not.toBeUndefined();
|
||||
objsToClose.push(s);
|
||||
|
||||
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("speaking finished.");
|
||||
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
|
||||
// wait 2 seconds before checking file size, as the async file writing might not be finished right now.
|
||||
setTimeout( () => {
|
||||
const fileLength = fs.statSync("test1.mp3").size;
|
||||
expect(fileLength).toEqual(result.audioData.byteLength);
|
||||
done();
|
||||
}, 2000);
|
||||
}, (e: string): void => {
|
||||
done.fail(e);
|
||||
}, "test1.mp3");
|
||||
});
|
||||
|
||||
test("testSpeechSynthesizerWordBoundary", (done: jest.DoneCallback) => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: testSpeechSynthesizerWordBoundary");
|
||||
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
|
||||
objsToClose.push(speechConfig);
|
||||
|
||||
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
|
||||
objsToClose.push(s);
|
||||
|
||||
expect(s).not.toBeUndefined();
|
||||
|
||||
let wordBoundaryCount: number = 0;
|
||||
|
||||
s.wordBoundary = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisWordBoundaryEventArgs): void => {
|
||||
try {
|
||||
expect(e).not.toBeUndefined();
|
||||
expect(e.audioOffset).not.toBeUndefined();
|
||||
expect(e.text).not.toBeUndefined();
|
||||
expect(e.textOffset).not.toBeUndefined();
|
||||
expect(e.wordLength).not.toBeUndefined();
|
||||
} catch (e) {
|
||||
done.fail(e);
|
||||
}
|
||||
wordBoundaryCount += 1;
|
||||
};
|
||||
|
||||
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
|
||||
expect(wordBoundaryCount).toBeGreaterThan(0);
|
||||
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
|
||||
done();
|
||||
}, (e: string): void => {
|
||||
done.fail(e);
|
||||
});
|
||||
});
|
||||
|
||||
test("testSpeechSynthesizer: synthesis with SSML.", (done: jest.DoneCallback) => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: testSpeechSynthesizer synthesis with SSML.");
|
||||
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
|
||||
objsToClose.push(speechConfig);
|
||||
|
||||
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
|
||||
expect(s).not.toBeUndefined();
|
||||
objsToClose.push(s);
|
||||
|
||||
let r: sdk.SpeechSynthesisResult;
|
||||
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("speaking text finished.");
|
||||
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
|
||||
r = result;
|
||||
}, (e: string): void => {
|
||||
done.fail(e);
|
||||
});
|
||||
|
||||
const ssml: string =
|
||||
`<speak version='1.0' xml:lang='en-US' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='http://www.w3.org/2001/mstts'>
|
||||
<voice name='Microsoft Server Speech Text to Speech Voice (en-US, JessaRUS)'>hello world.</voice></speak>`;
|
||||
s.speakSsmlAsync(ssml, (result: sdk.SpeechSynthesisResult): void => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("speaking ssml finished.");
|
||||
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
|
||||
CheckBinaryEqual(r.audioData, result.audioData);
|
||||
done();
|
||||
}, (e: string): void => {
|
||||
done.fail(e);
|
||||
});
|
||||
});
|
||||
|
||||
test("testSpeechSynthesizer: synthesis with invalid key.", (done: jest.DoneCallback) => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: testSpeechSynthesizer synthesis with invalid key.");
|
||||
const speechConfig: sdk.SpeechConfig = sdk.SpeechConfig.fromSubscription("invalidKey", Settings.SpeechRegion);
|
||||
expect(speechConfig).not.toBeUndefined();
|
||||
objsToClose.push(speechConfig);
|
||||
|
||||
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
|
||||
expect(s).not.toBeUndefined();
|
||||
objsToClose.push(s);
|
||||
|
||||
s.SynthesisCanceled = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisEventArgs): void => {
|
||||
try {
|
||||
CheckSynthesisResult(e.result, sdk.ResultReason.Canceled);
|
||||
expect(e.result.errorDetails).toContain("401");
|
||||
const cancellationDetail: sdk.CancellationDetails = sdk.CancellationDetails.fromResult(e.result);
|
||||
expect(cancellationDetail.ErrorCode).toEqual(sdk.CancellationErrorCode.ConnectionFailure);
|
||||
expect(cancellationDetail.reason).toEqual(sdk.CancellationReason.Error);
|
||||
expect(cancellationDetail.errorDetails).toEqual(e.result.errorDetails);
|
||||
} catch (err) {
|
||||
done.fail(err);
|
||||
}
|
||||
};
|
||||
|
||||
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
|
||||
CheckSynthesisResult(result, sdk.ResultReason.Canceled);
|
||||
expect(result.errorDetails).toContain("401");
|
||||
const cancellationDetail: sdk.CancellationDetails = sdk.CancellationDetails.fromResult(result);
|
||||
expect(cancellationDetail.ErrorCode).toEqual(sdk.CancellationErrorCode.ConnectionFailure);
|
||||
expect(cancellationDetail.reason).toEqual(sdk.CancellationReason.Error);
|
||||
expect(cancellationDetail.errorDetails).toEqual(result.errorDetails);
|
||||
done();
|
||||
}, (e: string): void => {
|
||||
done.fail(e);
|
||||
});
|
||||
});
|
||||
|
||||
test("testSpeechSynthesizer: synthesis with invalid voice name.", (done: jest.DoneCallback) => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: testSpeechSynthesizer synthesis with invalid voice name.");
|
||||
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
|
||||
objsToClose.push(speechConfig);
|
||||
speechConfig.speechSynthesisVoiceName = "invalid";
|
||||
|
||||
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, null);
|
||||
expect(s).not.toBeUndefined();
|
||||
objsToClose.push(s);
|
||||
|
||||
s.SynthesisCanceled = (o: sdk.SpeechSynthesizer, e: sdk.SpeechSynthesisEventArgs): void => {
|
||||
try {
|
||||
CheckSynthesisResult(e.result, sdk.ResultReason.Canceled);
|
||||
expect(e.result.errorDetails).toContain("voice");
|
||||
const cancellationDetail: sdk.CancellationDetails = sdk.CancellationDetails.fromResult(e.result);
|
||||
expect(cancellationDetail.ErrorCode).toEqual(sdk.CancellationErrorCode.BadRequestParameters);
|
||||
expect(cancellationDetail.reason).toEqual(sdk.CancellationReason.Error);
|
||||
expect(cancellationDetail.errorDetails).toEqual(e.result.errorDetails);
|
||||
} catch (e) {
|
||||
done.fail(e);
|
||||
}
|
||||
};
|
||||
|
||||
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
|
||||
CheckSynthesisResult(result, sdk.ResultReason.Canceled);
|
||||
expect(result.errorDetails).toContain("voice");
|
||||
const cancellationDetail: sdk.CancellationDetails = sdk.CancellationDetails.fromResult(result);
|
||||
expect(cancellationDetail.ErrorCode).toEqual(sdk.CancellationErrorCode.BadRequestParameters);
|
||||
expect(cancellationDetail.reason).toEqual(sdk.CancellationReason.Error);
|
||||
expect(cancellationDetail.errorDetails).toEqual(result.errorDetails);
|
||||
done();
|
||||
}, (e: string): void => {
|
||||
done.fail(e);
|
||||
});
|
||||
});
|
||||
|
||||
test("testSpeechSynthesizer: synthesis to pull audio output stream.", (done: jest.DoneCallback) => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: testSpeechSynthesizer synthesis to pull audio output stream.");
|
||||
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
|
||||
objsToClose.push(speechConfig);
|
||||
|
||||
const stream = sdk.AudioOutputStream.createPullStream();
|
||||
const audioConfig: sdk.AudioConfig = sdk.AudioConfig.fromStreamOutput(stream);
|
||||
expect(audioConfig).not.toBeUndefined();
|
||||
|
||||
setTimeout(() => {
|
||||
ReadPullAudioOutputStream(stream, undefined, done, done.fail);
|
||||
}, 0);
|
||||
|
||||
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
|
||||
expect(s).not.toBeUndefined();
|
||||
|
||||
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("speaking text finished.");
|
||||
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
|
||||
s.close();
|
||||
}, (e: string): void => {
|
||||
done.fail(e);
|
||||
});
|
||||
});
|
||||
|
||||
test("testSpeechSynthesizer: synthesis to pull audio output stream 2.", (done: jest.DoneCallback) => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: testSpeechSynthesizer synthesis to pull audio output stream 2.");
|
||||
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
|
||||
objsToClose.push(speechConfig);
|
||||
|
||||
const stream = sdk.AudioOutputStream.createPullStream();
|
||||
const audioConfig: sdk.AudioConfig = sdk.AudioConfig.fromStreamOutput(stream);
|
||||
expect(audioConfig).not.toBeUndefined();
|
||||
|
||||
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
|
||||
expect(s).not.toBeUndefined();
|
||||
|
||||
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("speaking text finished.");
|
||||
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
|
||||
s.close();
|
||||
ReadPullAudioOutputStream(stream, result.audioData.byteLength - 44, done, done.fail);
|
||||
}, (e: string): void => {
|
||||
done.fail(e);
|
||||
});
|
||||
});
|
||||
|
||||
test("testSpeechSynthesizer: synthesis to push audio output stream.", (done: jest.DoneCallback) => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("Name: testSpeechSynthesizer synthesis to push audio output stream.");
|
||||
const speechConfig: sdk.SpeechConfig = BuildSpeechConfig();
|
||||
objsToClose.push(speechConfig);
|
||||
|
||||
const stream = new PushAudioOutputStreamTestCallback();
|
||||
const audioConfig: sdk.AudioConfig = sdk.AudioConfig.fromStreamOutput(stream);
|
||||
expect(audioConfig).not.toBeUndefined();
|
||||
|
||||
const s: sdk.SpeechSynthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);
|
||||
expect(s).not.toBeUndefined();
|
||||
|
||||
s.speakTextAsync("hello world.", (result: sdk.SpeechSynthesisResult): void => {
|
||||
// tslint:disable-next-line:no-console
|
||||
console.info("speaking text finished.");
|
||||
CheckSynthesisResult(result, sdk.ResultReason.SynthesizingAudioCompleted);
|
||||
s.close();
|
||||
expect(stream.length).toEqual(result.audioData.byteLength - 44);
|
||||
expect(stream.isClosed).toEqual(true);
|
||||
done();
|
||||
}, (e: string): void => {
|
||||
done.fail(e);
|
||||
});
|
||||
});
|
||||
});
|
Загрузка…
Ссылка в новой задаче