From 4dcc4baf839b6a6fbee8ddab77e1b8b894d52818 Mon Sep 17 00:00:00 2001 From: Leo Leong Date: Thu, 28 Feb 2019 23:09:27 -0800 Subject: [PATCH] bugfixes Feb2019Update_Fabrikam_SpeechAPIv1.3-botv4_master (#81) * Retargeting projects to Win10 SDK Anniversary * bugfixes * fabrikam master bugfixes * fabrikam master bugfixes --- .../CallFabrikamCustomerService/App.config | 6 +- .../CallFabrikamCustomerService.csproj | 8 +- .../MainWindow.SpeechToText.xaml.cs | 148 ++++++------------ .../MainWindow.TextToSpeech.xaml.cs | 32 ++-- .../MainWindow.xaml.cs | 30 ++-- .../packages.config | 2 +- .../FabrikamCustomerServiceBot.csproj | 1 - .../FabrikamCustomerServiceBot/Web.config | 6 +- 8 files changed, 93 insertions(+), 140 deletions(-) diff --git a/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/App.config b/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/App.config index 04ccb71..3f439d2 100644 --- a/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/App.config +++ b/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/App.config @@ -5,11 +5,11 @@ - + - - + + diff --git a/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/CallFabrikamCustomerService.csproj b/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/CallFabrikamCustomerService.csproj index 07f7d4a..11badf4 100644 --- a/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/CallFabrikamCustomerService.csproj +++ b/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/CallFabrikamCustomerService.csproj @@ -95,8 +95,8 @@ ..\packages\Microsoft.Bot.Schema.4.2.2\lib\netstandard2.0\Microsoft.Bot.Schema.dll - - ..\packages\Microsoft.CognitiveServices.Speech.1.2.0\lib\net461\Microsoft.CognitiveServices.Speech.csharp.dll + + ..\packages\Microsoft.CognitiveServices.Speech.1.3.1\lib\net461\Microsoft.CognitiveServices.Speech.csharp.dll ..\packages\Microsoft.Extensions.Configuration.2.1.1\lib\netstandard2.0\Microsoft.Extensions.Configuration.dll @@ -275,13 +275,13 @@ This project references NuGet package(s) that are missing on this computer. Use NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - + - + \ No newline at end of file diff --git a/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/MainWindow.SpeechToText.xaml.cs b/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/MainWindow.SpeechToText.xaml.cs index 16c5025..db06e4c 100644 --- a/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/MainWindow.SpeechToText.xaml.cs +++ b/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/MainWindow.SpeechToText.xaml.cs @@ -1,10 +1,8 @@ using Microsoft.CognitiveServices.Speech; using System; -using System.Collections.Generic; -using System.Linq; +using System.Diagnostics; using System.Media; using System.Reflection; -using System.Text; using System.Threading.Tasks; using System.Windows; @@ -12,41 +10,14 @@ namespace CallFabrikamCustomerService { public partial class MainWindow : Window { - //These are fields needed for using speech recognition client library aka. Project Oxford + //These are fields needed for using speech recognition client library private string DefaultLocale; - SoundPlayer thinking; - private TaskCompletionSource stopBaseRecognitionTaskCompletionSource; private SpeechRecognizer recognizer; - private void StartMicrophone() - { - stopBaseRecognitionTaskCompletionSource = new TaskCompletionSource(); - Task.Run(async () => { await CreateMicrophoneReco().ConfigureAwait(false); }); - } + SoundPlayer thinking; + - private void StopMicrophone() - { - //end mic recognition - recognizer.StopContinuousRecognitionAsync().Wait(); - - // unsubscribe from events - recognizer.Recognizing -= (sender, e) => RecognizingEventHandler(e); - recognizer.Recognized -= (sender, e) => RecognizedEventHandler(e); - recognizer.Canceled -= (sender, e) => CanceledEventHandler(e, stopBaseRecognitionTaskCompletionSource); - recognizer.SessionStarted -= (sender, e) => SessionStartedEventHandler(e, stopBaseRecognitionTaskCompletionSource); - recognizer.SessionStopped -= (sender, e) => SessionStoppedEventHandler(e, stopBaseRecognitionTaskCompletionSource); - recognizer.SpeechStartDetected -= (sender, e) => SpeechStartDetectedEventHandler(e); - recognizer.SpeechEndDetected -= (sender, e) => SpeechEndDetectedEventHandler(e); - - stopBaseRecognitionTaskCompletionSource.TrySetResult(0); - } - - /// - /// Creates Recognizer with English language and microphone - /// Creates a config with subscription key and selected region - /// Waits on RunRecognition - /// - private async Task CreateMicrophoneReco() + private void StartSpeechRecognition() { string path = Assembly.GetExecutingAssembly().Location; string path1 = System.IO.Path.Combine(System.IO.Path.GetDirectoryName(path), "Resources\\SpeechResponse_Thinking.wav"); @@ -54,116 +25,93 @@ namespace CallFabrikamCustomerService try { + //set the key, region, language and instantiate a recognizer var speechConfig = SpeechConfig.FromSubscription(this.MicrosoftSpeechApiKey, this.Region); speechConfig.SpeechRecognitionLanguage = this.DefaultLocale; - SpeechRecognizer basicRecognizer; + //instantiate new instance of speech recognizer and + //keeping it for the lifetime until the main window closes + recognizer = new SpeechRecognizer(speechConfig); - using (basicRecognizer = new SpeechRecognizer(speechConfig)) - { - await this.RunRecognizer(basicRecognizer, stopBaseRecognitionTaskCompletionSource).ConfigureAwait(false); - } + //wire up event handlers to speech events + recognizer.Recognizing += (sender, e) => RecognizingEventHandler(e); + recognizer.Recognized += (sender, e) => RecognizedEventHandler(e); + recognizer.Canceled += (sender, e) => CanceledEventHandler(e); + recognizer.SessionStarted += (sender, e) => SessionStartedEventHandler(e); + recognizer.SessionStopped += (sender, e) => SessionStoppedEventHandler(e); + recognizer.SpeechStartDetected += (sender, e) => SpeechStartDetectedEventHandler(e); + recognizer.SpeechEndDetected += (sender, e) => SpeechEndDetectedEventHandler(e); + + //start speech recognition + recognizer.StartContinuousRecognitionAsync(); } catch (Exception ex) { - WriteLine($"An exception occured:{ex}"); - Console.WriteLine($"An exception occured:{ex}"); + this.WriteLine($"An exception occured:{ex}"); + Debug.WriteLine($"An exception occured:{ex}"); } } - /// - /// Subscribes to Recognition Events - /// Starts the Recognition and waits until Final Result is received, then Stops recognition - /// - /// Recognizer object - /// Type of Recognizer - /// - /// Base if Baseline model; otherwise, Custom. - /// - private async Task RunRecognizer(SpeechRecognizer recogniz, TaskCompletionSource source) + private void StopSpeechRecognition() { - recognizer = recogniz; + //stop recognition + recognizer.StopContinuousRecognitionAsync().Wait(); - //subscribe to events - recognizer.Recognizing += (sender, e) => RecognizingEventHandler(e); - recognizer.Recognized += (sender, e) => RecognizedEventHandler(e); - recognizer.Canceled += (sender, e) => CanceledEventHandler(e, source); - recognizer.SessionStarted += (sender, e) => SessionStartedEventHandler(e, source); - recognizer.SessionStopped += (sender, e) => SessionStoppedEventHandler(e, source); - recognizer.SpeechStartDetected += (sender, e) => SpeechStartDetectedEventHandler(e); - recognizer.SpeechEndDetected += (sender, e) => SpeechEndDetectedEventHandler(e); - - //start,wait,stop recognition - await recognizer.StartContinuousRecognitionAsync().ConfigureAwait(false); - await source.Task.ConfigureAwait(false); - await recognizer.StopContinuousRecognitionAsync().ConfigureAwait(false); - - - // unsubscribe from events + //unsubscribe from events recognizer.Recognizing -= (sender, e) => RecognizingEventHandler(e); recognizer.Recognized -= (sender, e) => RecognizedEventHandler(e); - recognizer.Canceled -= (sender, e) => CanceledEventHandler(e, source); - recognizer.SessionStarted -= (sender, e) => SessionStartedEventHandler(e, source); - recognizer.SessionStopped -= (sender, e) => SessionStoppedEventHandler(e, source); + recognizer.Canceled -= (sender, e) => CanceledEventHandler(e); + recognizer.SessionStarted -= (sender, e) => SessionStartedEventHandler(e); + recognizer.SessionStopped -= (sender, e) => SessionStoppedEventHandler(e); recognizer.SpeechStartDetected -= (sender, e) => SpeechStartDetectedEventHandler(e); recognizer.SpeechEndDetected -= (sender, e) => SpeechEndDetectedEventHandler(e); } #region Recognition Event Handlers - /// - /// Logs Intermediate Recognition results - /// private void RecognizingEventHandler(SpeechRecognitionEventArgs e) { - recognizer.StopContinuousRecognitionAsync().Wait(); + //logs Intermediate Recognition results to Visual Studio Output Window + Debug.WriteLine("\n RecognizingEventHandler: {0}", e.Result); } - /// - /// Logs the Final result - /// private void RecognizedEventHandler(SpeechRecognitionEventArgs e) { - var result = e.Result; + //play the thinking sound to simulate processing transription thinking.PlaySync(); - this.EchoResponseAsync(e).Wait(); - Console.WriteLine($"Reason: {result.Reason.ToString()}"); - if (result.Reason == ResultReason.RecognizedSpeech) + //display the result in the + this.EchoResponse(e); + } + + private void CanceledEventHandler(SpeechRecognitionCanceledEventArgs e) + { + //show errors in main window + if (e.Reason == CancellationReason.Error) { - Console.WriteLine($"Final result: Text: {result.Text}."); + this.WriteLine($"Recognition Canceled. Reason: {e.Reason}, ErrorDetails: {e.ErrorDetails}"); } } - /// - /// Logs Cancel events - /// And sets the TaskCompletionSource to 0, in order to trigger Recognition Stop - /// - private void CanceledEventHandler(SpeechRecognitionCanceledEventArgs e, TaskCompletionSource source) + private void SessionStartedEventHandler(SessionEventArgs e) { - WriteLine($"\n Recognition Canceled. Reason: {e.Reason.ToString()}, CanceledReason: {e.Reason}"); - source.TrySetResult(0); - TransitionHangUpGui(); + //writing out to the label control to show progress/status + this.WriteLine("Session start detected. Please start speaking."); } - private void SessionStartedEventHandler(SessionEventArgs e, TaskCompletionSource source) + private void SessionStoppedEventHandler(SessionEventArgs e) { - WriteLine("Session start detected. Please start speaking."); - } - - private void SessionStoppedEventHandler(SessionEventArgs e, TaskCompletionSource source) - { - WriteLine("Session stop detected."); + this.WriteLine("Session stop detected."); } private void SpeechStartDetectedEventHandler(RecognitionEventArgs e) { - Console.WriteLine("\n Speech start detected."); + Debug.WriteLine("\n Speech start detected."); } private void SpeechEndDetectedEventHandler(RecognitionEventArgs e) { - Console.WriteLine("\n Speech end detected."); + Debug.WriteLine("\n Speech end detected."); } #endregion diff --git a/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/MainWindow.TextToSpeech.xaml.cs b/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/MainWindow.TextToSpeech.xaml.cs index 166ee9c..e15cfb5 100644 --- a/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/MainWindow.TextToSpeech.xaml.cs +++ b/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/MainWindow.TextToSpeech.xaml.cs @@ -21,7 +21,6 @@ namespace CallFabrikamCustomerService private SoundPlayer speech; - private string apiKey; private string accessToken; private Timer accessTokenRenewer; @@ -30,12 +29,12 @@ namespace CallFabrikamCustomerService private void CreateSpeechClient() { + //initialize cookie container, http client handler, http client and get an access token var cookieContainer = new CookieContainer(); httpHandler = new HttpClientHandler() { CookieContainer = new CookieContainer(), UseProxy = false }; httpClient = new HttpClient(httpHandler); - apiKey = MicrosoftSpeechApiKey; - accessToken = HttpPost(MicrosoftSpeechAccessTokenEndpoint, apiKey); + accessToken = HttpPost(MicrosoftSpeechAccessTokenEndpoint, MicrosoftSpeechApiKey); //This auto-renew the Speech API access token needed when doing a POST //The access token only last for 10min so we setup a timer to renew the it every 9min @@ -55,21 +54,26 @@ namespace CallFabrikamCustomerService //these are the minimum number of Speech API headers to include httpClient.DefaultRequestHeaders.TryAddWithoutValidation("Content-Type", "application/ssml+xml"); - httpClient.DefaultRequestHeaders.TryAddWithoutValidation("X-Microsoft-OutputFormat", "riff-24khz-16bit-mono-pcm"); + httpClient.DefaultRequestHeaders.TryAddWithoutValidation("X-Microsoft-OutputFormat", "riff-16khz-16bit-mono-pcm"); httpClient.DefaultRequestHeaders.TryAddWithoutValidation("User-Agent", "IntelligentApps/FabrikamInvestmentCustomerService"); httpClient.DefaultRequestHeaders.TryAddWithoutValidation("Authorization", "Bearer " + accessToken); - httpClient.DefaultRequestHeaders.Add("Connection", "Keep-Alive"); + httpClient.DefaultRequestHeaders.TryAddWithoutValidation("Host", "westus.tts.speech.microsoft.com"); + //initialize a new instance of http request message var request = new HttpRequestMessage(HttpMethod.Post, MicrosoftTextToSpeechEndpoint) { //we are making a few default assumptions here such as using English, Female & the speech voice to use - //for additional choices refer https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/supported-languages#text-to-speech + //for additional choices refer https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#text-to-speech Content = new StringContent(GenerateSsml("en-US", "Female", "Microsoft Server Speech Text to Speech Voice (en-US, ZiraRUS)", Text)) }; - var httpTask = httpClient.SendAsync(request); + //send the request, read the response stream and pass it to sound player to play the audio to speaker + Task httpTask = null; + Task saveTask = null; - var saveTask = httpTask.ContinueWith( + httpTask = httpClient.SendAsync(request); + + saveTask = httpTask.ContinueWith( async (responseMessage, token) => { try @@ -104,8 +108,10 @@ namespace CallFabrikamCustomerService //Helps generate SSML for posting to Text-to-Speech API private string GenerateSsml(string locale, string gender, string name, string text) { - var ssmlDoc = new XDocument( - new XElement("speak", + XDocument ssmlDoc = new XDocument(); + + //create SSML XML document that will be the payload for posting to speech api + ssmlDoc.Add(new XElement("speak", new XAttribute("version", "1.0"), new XAttribute(XNamespace.Xml + "lang", "en-US"), new XElement("voice", @@ -113,15 +119,17 @@ namespace CallFabrikamCustomerService new XAttribute(XNamespace.Xml + "gender", gender), new XAttribute("name", name), text))); + return ssmlDoc.ToString(); } //Callback method when the timer fires every 9min to renew speech token private void OnTokenExpiredCallback(object stateInfo) { + //do http post to get new token and assign new token to accessToken try { - string newAccessToken = HttpPost(MicrosoftSpeechAccessTokenEndpoint, apiKey); + string newAccessToken = HttpPost(MicrosoftSpeechAccessTokenEndpoint, MicrosoftSpeechApiKey); //swap the new token with old one //Note: the swap is thread unsafe accessToken = newAccessToken; @@ -175,4 +183,4 @@ namespace CallFabrikamCustomerService } } } -} +} \ No newline at end of file diff --git a/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/MainWindow.xaml.cs b/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/MainWindow.xaml.cs index 87eda48..cd1f14f 100644 --- a/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/MainWindow.xaml.cs +++ b/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/MainWindow.xaml.cs @@ -102,20 +102,17 @@ namespace CallFabrikamCustomerService //Event handler that will cleanup speech client when window is closed; essentially closing app private void OnMainWindowClosing(object sender, CancelEventArgs e) { - //cleanup tones + //cleanup resources dialTone.Dispose(); ringing.Dispose(); - //cleanup speech to text mic & thinking tone - if (this.recognizer != null) - { - this.recognizer.StopContinuousRecognitionAsync(); - recognizer.Dispose(); - - } + //check for null in case window closed without using recognizer if (this.thinking != null) thinking.Dispose(); + if (this.recognizer != null) + recognizer.Dispose(); + //cleanup text to speech http client, handler & speech audio if (this.httpClient != null) this.httpClient.Dispose(); @@ -140,6 +137,8 @@ namespace CallFabrikamCustomerService //we should wait until the dialing tone has been completed before continue dial.Wait(); + StartSpeechRecognition(); + //transition calling to connected GUI TransitionCallGui(); @@ -148,13 +147,13 @@ namespace CallFabrikamCustomerService var result = await this.GetBotReplyAsync("hi"); await PlaySpeechAudioAsync(result); - StartMicrophone(); + } //Handle the hang up button click private void keypadHangUpButton_Click(object sender, RoutedEventArgs e) { - StopMicrophone(); + StopSpeechRecognition(); //transition GUI back to ready to call TransitionHangUpGui(); @@ -214,7 +213,7 @@ namespace CallFabrikamCustomerService //Writes the response result. - private async Task EchoResponseAsync(SpeechRecognitionEventArgs e) + private void EchoResponse(SpeechRecognitionEventArgs e) { WriteLine("Speech To Text Result:"); //handle the case when there are no results. @@ -232,17 +231,16 @@ namespace CallFabrikamCustomerService e.Result.Text); WriteLine(); + var botReplyTask = this.GetBotReplyAsync(e.Result.Text); + string result = string.Empty; //send transcribed text to bot and get the response - result = await this.GetBotReplyAsync(e.Result.Text); + result = botReplyTask.Result; //Play audio from text to speech API - await PlaySpeechAudioAsync(result); + var speakTask = this.PlaySpeechAudioAsync(result); } - - //Start Microphone - StartMicrophone(); } //Creates a line break diff --git a/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/packages.config b/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/packages.config index e7a4c3a..5f30548 100644 --- a/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/packages.config +++ b/FabrikamInvestmentCustomerService/CallFabrikamCustomerService/packages.config @@ -11,7 +11,7 @@ - + diff --git a/FabrikamInvestmentCustomerService/FabrikamCustomerServiceBot/FabrikamCustomerServiceBot.csproj b/FabrikamInvestmentCustomerService/FabrikamCustomerServiceBot/FabrikamCustomerServiceBot.csproj index 947a6c8..49e30ef 100644 --- a/FabrikamInvestmentCustomerService/FabrikamCustomerServiceBot/FabrikamCustomerServiceBot.csproj +++ b/FabrikamInvestmentCustomerService/FabrikamCustomerServiceBot/FabrikamCustomerServiceBot.csproj @@ -298,7 +298,6 @@ Designer - Web.config diff --git a/FabrikamInvestmentCustomerService/FabrikamCustomerServiceBot/Web.config b/FabrikamInvestmentCustomerService/FabrikamCustomerServiceBot/Web.config index 7de2cf2..45922cb 100644 --- a/FabrikamInvestmentCustomerService/FabrikamCustomerServiceBot/Web.config +++ b/FabrikamInvestmentCustomerService/FabrikamCustomerServiceBot/Web.config @@ -10,9 +10,9 @@ - - - + + +