This commit is contained in:
Marko Hietala 2023-07-24 16:26:06 -04:00
Родитель 62c942e744
Коммит 279d6f2ef6
4 изменённых файлов: 31 добавлений и 23 удалений

Просмотреть файл

@ -28,6 +28,7 @@ import { IAuthentication } from "./IAuthentication";
import { IConnectionFactory } from "./IConnectionFactory";
import { RecognizerConfig } from "./RecognizerConfig";
import { SpeechConnectionMessage } from "./SpeechConnectionMessage.Internal";
import { PhraseDetection, SpeakerDiarization } from "./ServiceRecognizerBase";
// eslint-disable-next-line max-classes-per-file
export class ConversationTranscriptionServiceRecognizer extends ServiceRecognizerBase {
@ -40,8 +41,21 @@ export class ConversationTranscriptionServiceRecognizer extends ServiceRecognize
audioSource: IAudioSource,
recognizerConfig: RecognizerConfig,
conversationTranscriber: ConversationTranscriber) {
super(authentication, connectionFactory, audioSource, recognizerConfig, conversationTranscriber, true);
super(authentication, connectionFactory, audioSource, recognizerConfig, conversationTranscriber);
this.privConversationTranscriber = conversationTranscriber;
this.setSpeakerDiarizationJson();
}
protected setSpeakerDiarizationJson(): void {
if (this.privEnableSpeakerId) {
const phraseDetection = this.privSpeechContext.getSection("phraseDetection") as PhraseDetection;
const speakerDiarization: SpeakerDiarization = {};
speakerDiarization.mode = "Anonymous";
speakerDiarization.audioSessionId = this.privDiarizationSessionId;
speakerDiarization.audioOffsetMs = 0;
phraseDetection.speakerDiarization = speakerDiarization;
this.privSpeechContext.setSection("phraseDetection", phraseDetection);
}
}
protected async processTypeSpecificMessages(connectionMessage: SpeechConnectionMessage): Promise<boolean> {

Просмотреть файл

@ -23,6 +23,7 @@ export class RecognizerConfig {
private privRecognitionActivityTimeout: number;
private privParameters: PropertyCollection;
private privMaxRetryCount: number;
private privEnableSpeakerId: boolean;
public constructor(
speechServiceConfig: SpeechServiceConfig,
@ -31,6 +32,7 @@ export class RecognizerConfig {
this.privParameters = parameters;
this.privMaxRetryCount = parseInt(parameters.getProperty("SPEECH-Error-MaxRetryCount", "4"), 10);
this.privLanguageIdMode = parameters.getProperty(PropertyId.SpeechServiceConnection_LanguageIdMode, undefined);
this.privEnableSpeakerId = false;
}
public get parameters(): PropertyCollection {
@ -92,6 +94,14 @@ export class RecognizerConfig {
public get maxRetryCount(): number {
return this.privMaxRetryCount;
}
public get isSpeakerDiarizationEnabled(): boolean {
return this.privEnableSpeakerId;
}
public set isSpeakerDiarizationEnabled(value: boolean) {
this.privEnableSpeakerId = value;
}
}
// The config is serialized and sent as the Speech.Config

Просмотреть файл

@ -57,7 +57,7 @@ interface CustomModel {
endpoint: string;
}
interface PhraseDetection {
export interface PhraseDetection {
customModels?: CustomModel[];
onInterim?: { action: string };
onSuccess?: { action: string };
@ -68,14 +68,14 @@ interface PhraseDetection {
speakerDiarization?: SpeakerDiarization;
}
interface SpeakerDiarization {
export interface SpeakerDiarization {
mode?: string;
audioSessionId?: string;
audioOffsetMs?: number;
identityProvider?: string;
}
interface Segmentation {
export interface Segmentation {
segmentation: {
mode: "Custom";
segmentationSilenceTimeoutMs: number;
@ -121,8 +121,7 @@ export abstract class ServiceRecognizerBase implements IDisposable {
connectionFactory: IConnectionFactory,
audioSource: IAudioSource,
recognizerConfig: RecognizerConfig,
recognizer: Recognizer,
enableSpeakerId: boolean = false) {
recognizer: Recognizer) {
if (!authentication) {
throw new ArgumentNullError("authentication");
@ -140,7 +139,7 @@ export abstract class ServiceRecognizerBase implements IDisposable {
throw new ArgumentNullError("recognizerConfig");
}
this.privEnableSpeakerId = enableSpeakerId;
this.privEnableSpeakerId = recognizerConfig.isSpeakerDiarizationEnabled;
this.privMustReportEndOfStream = false;
this.privAuthentication = authentication;
this.privConnectionFactory = connectionFactory;
@ -176,12 +175,10 @@ export abstract class ServiceRecognizerBase implements IDisposable {
if (this.privEnableSpeakerId) {
this.privDiarizationSessionId = createNoDashGuid();
// this.setSpeakerDiarizationJson(phraseDetection);
}
this.setLanguageIdJson();
this.setOutputDetailLevelJson();
this.setSpeakerDiarizationJson();
}
protected setSpeechSegmentationTimeoutJson(): void{
@ -202,20 +199,6 @@ export abstract class ServiceRecognizerBase implements IDisposable {
}
}
protected setSpeakerDiarizationJson(): void {
if (this.privEnableSpeakerId) {
const phraseDetection = this.privSpeechContext.getSection("phraseDetection") as PhraseDetection;
const speakerDiarization: SpeakerDiarization = {};
speakerDiarization.mode = "Anonymous";
speakerDiarization.audioSessionId = this.privDiarizationSessionId;
// eslint-disable-next-line no-console
console.log("***** ServiceRecognizerBase sessionId: " + this.privDiarizationSessionId);
speakerDiarization.audioOffsetMs = 0;
phraseDetection.speakerDiarization = speakerDiarization;
this.privSpeechContext.setSection("phraseDetection", phraseDetection);
}
}
protected setLanguageIdJson(): void {
if (this.privRecognizerConfig.autoDetectSourceLanguages !== undefined) {
const phraseDetection = this.privSpeechContext.getSection("phraseDetection") as PhraseDetection;

Просмотреть файл

@ -235,6 +235,7 @@ export class ConversationTranscriber extends Recognizer {
audioConfig: AudioConfig,
recognizerConfig: RecognizerConfig): ServiceRecognizerBase {
const configImpl: AudioConfigImpl = audioConfig as AudioConfigImpl;
recognizerConfig.isSpeakerDiarizationEnabled = true;
return new ConversationTranscriptionServiceRecognizer(authentication, connectionFactory, configImpl, recognizerConfig, this);
}
}