From 9e40aac1a1d71a363fa8ce4fb168e766db15f4ce Mon Sep 17 00:00:00 2001 From: Jean-Yves Avenard Date: Sat, 2 Dec 2017 09:13:35 +0100 Subject: [PATCH] Bug 1331696 - P3. Remove direct audio support from speech synth. r=eeejay MozReview-Commit-ID: E1UKOOvwUOe --HG-- extra : rebase_source : c955b0f33a86c7d84ce28d8ba7d838338b5c1cf4 --- .../cocoa/OSXSpeechSynthesizerService.mm | 9 +- .../synth/ipc/SpeechSynthesisChild.cpp | 16 +- .../synth/ipc/SpeechSynthesisChild.h | 8 +- .../synth/ipc/SpeechSynthesisParent.h | 14 +- .../webspeech/synth/nsISpeechService.idl | 46 +- dom/media/webspeech/synth/nsSpeechTask.cpp | 407 +----------------- dom/media/webspeech/synth/nsSpeechTask.h | 38 +- .../webspeech/synth/nsSynthVoiceRegistry.cpp | 16 +- .../webspeech/synth/nsSynthVoiceRegistry.h | 1 - .../synth/speechd/SpeechDispatcherService.cpp | 10 +- .../synth/test/nsFakeSynthServices.cpp | 133 ++---- .../synth/test/nsFakeSynthServices.h | 8 +- .../webspeech/synth/windows/SapiService.cpp | 11 +- 13 files changed, 62 insertions(+), 655 deletions(-) diff --git a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm index f089cc0bc145..36db7d33f7e8 100644 --- a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm +++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm @@ -439,7 +439,7 @@ OSXSpeechSynthesizerService::Speak(const nsAString& aText, } RefPtr callback = new SpeechTaskCallback(aTask, synth, offsets); - nsresult rv = aTask->Setup(callback, 0, 0, 0); + nsresult rv = aTask->Setup(callback); NS_ENSURE_SUCCESS(rv, rv); NSString* text = nsCocoaUtils::ToNSString(escapedText); @@ -452,13 +452,6 @@ OSXSpeechSynthesizerService::Speak(const nsAString& aText, NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT; } -NS_IMETHODIMP -OSXSpeechSynthesizerService::GetServiceType(SpeechServiceType* aServiceType) -{ - *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO; - return NS_OK; -} - NS_IMETHODIMP OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic, const char16_t* aData) diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp index 00577411cea5..889a74f1817e 100644 --- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp @@ -166,21 +166,7 @@ SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsC } NS_IMETHODIMP -SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback, - uint32_t aChannels, uint32_t aRate, uint8_t argc) -{ - MOZ_CRASH("Should never be called from child"); -} - -NS_IMETHODIMP -SpeechTaskChild::SendAudio(JS::Handle aData, JS::Handle aLandmarks, - JSContext* aCx) -{ - MOZ_CRASH("Should never be called from child"); -} - -NS_IMETHODIMP -SpeechTaskChild::SendAudioNative(int16_t* aData, uint32_t aDataLen) +SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback) { MOZ_CRASH("Should never be called from child"); } diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h index 1d6f717e9181..dbc1210a32fb 100644 --- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h @@ -85,13 +85,7 @@ public: explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome); - NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback, - uint32_t aChannels, uint32_t aRate, uint8_t argc) override; - - NS_IMETHOD SendAudio(JS::Handle aData, JS::Handle aLandmarks, - JSContext* aCx) override; - - NS_IMETHOD SendAudioNative(int16_t* aData, uint32_t aDataLen) override; + NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback) override; void Pause() override; diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h index 6bc9b3bd97ef..1dbebb1cfaf1 100644 --- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h @@ -82,22 +82,22 @@ public: SpeechTaskParent(float aVolume, const nsAString& aUtterance, bool aIsChrome) : nsSpeechTask(aVolume, aUtterance, aIsChrome) {} - nsresult DispatchStartImpl(const nsAString& aUri); + nsresult DispatchStartImpl(const nsAString& aUri) override; - nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex); + nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) override; - nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex); + nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex) override; - nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex); + nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) override; - nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex); + nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex) override; nsresult DispatchBoundaryImpl(const nsAString& aName, float aElapsedTime, uint32_t aCharIndex, - uint32_t aCharLength, uint8_t argc); + uint32_t aCharLength, uint8_t argc) override; nsresult DispatchMarkImpl(const nsAString& aName, - float aElapsedTime, uint32_t aCharIndex); + float aElapsedTime, uint32_t aCharIndex) override; private: SpeechSynthesisRequestParent* mActor; diff --git a/dom/media/webspeech/synth/nsISpeechService.idl b/dom/media/webspeech/synth/nsISpeechService.idl index 1f55a8caf721..879469ae2e71 100644 --- a/dom/media/webspeech/synth/nsISpeechService.idl +++ b/dom/media/webspeech/synth/nsISpeechService.idl @@ -5,12 +5,8 @@ #include "nsISupports.idl" -typedef unsigned short SpeechServiceType; - /** - * A callback is implemented by the service. For direct audio services, it is - * required to implement these, although it could be helpful to use the - * cancel method for shutting down the speech resources. + * A callback is implemented by the service. */ [scriptable, uuid(c576de0c-8a3d-4570-be7e-9876d3e5bed2)] interface nsISpeechTaskCallback : nsISupports @@ -32,7 +28,6 @@ interface nsISpeechTaskCallback : nsISupports /** * The user or application has changed the volume of this speech. - * This is only used on indirect audio service type. */ void onVolumeChanged(in float aVolume); }; @@ -49,26 +44,8 @@ interface nsISpeechTask : nsISupports * Prepare browser for speech. * * @param aCallback callback object for mid-speech operations. - * @param aChannels number of audio channels. Only required - * in direct audio services - * @param aRate audio rate. Only required in direct audio services */ - [optional_argc] void setup(in nsISpeechTaskCallback aCallback, - [optional] in uint32_t aChannels, - [optional] in uint32_t aRate); - - /** - * Send audio data to browser. - * - * @param aData an Int16Array with PCM-16 audio data. - * @param aLandmarks an array of sample offset and landmark pairs. - * Used for emiting boundary and mark events. - */ - [implicit_jscontext] - void sendAudio(in jsval aData, in jsval aLandmarks); - - [noscript] - void sendAudioNative([array, size_is(aDataLen)] in short aData, in unsigned long aDataLen); + void setup(in nsISpeechTaskCallback aCallback); /** * Dispatch start event. @@ -132,15 +109,11 @@ interface nsISpeechTask : nsISupports /** * The main interface of a speech synthesis service. * - * A service's speak method could be implemented in two ways: - * 1. Indirect audio - the service is responsible for outputting audio. - * The service calls the nsISpeechTask.dispatch* methods directly. Starting - * with dispatchStart() and ending with dispatchEnd or dispatchError(). - * - * 2. Direct audio - the service provides us with PCM-16 data, and we output it. - * The service does not call the dispatch task methods directly. Instead, - * audio information is provided at setup(), and audio data is sent with - * sendAudio(). The utterance is terminated with an empty sendAudio(). + * A service is responsible for outputting audio. + * The service dispatches events, starting with dispatchStart() and ending with + * dispatchEnd or dispatchError(). + * A service must also respond with the currect actions and events in response + * to implemented callback methods. */ [scriptable, uuid(9b7d59db-88ff-43d0-b6ee-9f63d042d08f)] interface nsISpeechService : nsISupports @@ -161,11 +134,6 @@ interface nsISpeechService : nsISupports void speak(in DOMString aText, in DOMString aUri, in float aVolume, in float aRate, in float aPitch, in nsISpeechTask aTask); - - const SpeechServiceType SERVICETYPE_DIRECT_AUDIO = 1; - const SpeechServiceType SERVICETYPE_INDIRECT_AUDIO = 2; - - readonly attribute SpeechServiceType serviceType; }; %{C++ diff --git a/dom/media/webspeech/synth/nsSpeechTask.cpp b/dom/media/webspeech/synth/nsSpeechTask.cpp index b4fdffd45807..7c15a7475781 100644 --- a/dom/media/webspeech/synth/nsSpeechTask.cpp +++ b/dom/media/webspeech/synth/nsSpeechTask.cpp @@ -7,18 +7,11 @@ #include "AudioChannelAgent.h" #include "AudioChannelService.h" #include "AudioSegment.h" -#include "MediaStreamListener.h" #include "nsSpeechTask.h" #include "nsSynthVoiceRegistry.h" #include "SharedBuffer.h" #include "SpeechSynthesis.h" -// GetCurrentTime is defined in winbase.h as zero argument macro forwarding to -// GetTickCount() and conflicts with nsSpeechTask::GetCurrentTime(). -#ifdef GetCurrentTime -#undef GetCurrentTime -#endif - #undef LOG extern mozilla::LogModule* GetSpeechSynthLog(); #define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg) @@ -28,110 +21,6 @@ extern mozilla::LogModule* GetSpeechSynthLog(); namespace mozilla { namespace dom { -class SynthStreamListener : public MediaStreamListener -{ -public: - SynthStreamListener(nsSpeechTask* aSpeechTask, - MediaStream* aStream, - AbstractThread* aMainThread) - : mSpeechTask(aSpeechTask) - , mStream(aStream) - , mStarted(false) - { - } - - void DoNotifyStarted() - { - if (mSpeechTask) { - mSpeechTask->DispatchStartInner(); - } - } - - void DoNotifyFinished() - { - if (mSpeechTask) { - mSpeechTask->DispatchEndInner(mSpeechTask->GetCurrentTime(), - mSpeechTask->GetCurrentCharOffset()); - } - } - - void NotifyEvent(MediaStreamGraph* aGraph, - MediaStreamGraphEvent event) override - { - switch (event) { - case MediaStreamGraphEvent::EVENT_FINISHED: - { - RefPtr self = this; - if (!mStarted) { - mStarted = true; - aGraph->DispatchToMainThreadAfterStreamStateUpdate( - NS_NewRunnableFunction( - "dom::SynthStreamListener::NotifyEvent", - [self] { - // "start" event will be fired in DoNotifyStarted() which is - // not allowed in stable state, so we do it asynchronously in - // next run. - NS_DispatchToMainThread(NewRunnableMethod( - "dom::SynthStreamListener::DoNotifyStarted", - self, - &SynthStreamListener::DoNotifyStarted)); - })); - } - - aGraph->DispatchToMainThreadAfterStreamStateUpdate( - NS_NewRunnableFunction( - "dom::SynthStreamListener::NotifyEvent", - [self] { - // "end" event will be fired in DoNotifyFinished() which is - // not allowed in stable state, so we do it asynchronously in - // next run. - NS_DispatchToMainThread(NewRunnableMethod( - "dom::SynthStreamListener::DoNotifyFinished", - self, - &SynthStreamListener::DoNotifyFinished)); - })); - } - break; - case MediaStreamGraphEvent::EVENT_REMOVED: - mSpeechTask = nullptr; - // Dereference MediaStream to destroy safety - mStream = nullptr; - break; - default: - break; - } - } - - void NotifyBlockingChanged(MediaStreamGraph* aGraph, Blocking aBlocked) override - { - if (aBlocked == MediaStreamListener::UNBLOCKED && !mStarted) { - mStarted = true; - RefPtr self = this; - aGraph->DispatchToMainThreadAfterStreamStateUpdate( - NS_NewRunnableFunction( - "dom::SynthStreamListener::NotifyBlockingChanged", - [self] { - // "start" event will be fired in DoNotifyStarted() which is - // not allowed in stable state, so we do it asynchronously in - // next run. - NS_DispatchToMainThread(NewRunnableMethod( - "dom::SynthStreamListener::DoNotifyStarted", - self, - &SynthStreamListener::DoNotifyStarted)); - })); - } - } - -private: - // Raw pointer; if we exist, the stream exists, - // and 'mSpeechTask' exclusively owns it and therefor exists as well. - nsSpeechTask* mSpeechTask; - // This is KungFuDeathGrip for MediaStream - RefPtr mStream; - - bool mStarted; -}; - // nsSpeechTask NS_IMPL_CYCLE_COLLECTION(nsSpeechTask, mSpeechSynthesis, mUtterance, mCallback); @@ -152,7 +41,6 @@ nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome) , mPrePaused(false) , mPreCanceled(false) , mCallback(nullptr) - , mIndirectAudio(false) , mIsChrome(aIsChrome) { mText = aUtterance->mText; @@ -167,7 +55,6 @@ nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome , mPrePaused(false) , mPreCanceled(false) , mCallback(nullptr) - , mIndirectAudio(false) , mIsChrome(aIsChrome) { } @@ -175,38 +62,11 @@ nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome nsSpeechTask::~nsSpeechTask() { LOG(LogLevel::Debug, ("~nsSpeechTask")); - if (mStream) { - if (!mStream->IsDestroyed()) { - mStream->Destroy(); - } - - // This will finally destroyed by SynthStreamListener becasue - // MediaStream::Destroy() is async. - mStream = nullptr; - } - - if (mPort) { - mPort->Destroy(); - mPort = nullptr; - } } void -nsSpeechTask::InitDirectAudio() +nsSpeechTask::Init() { - // nullptr as final argument here means that this is not tied to a window. - // This is a global MSG. - mStream = MediaStreamGraph::GetInstance(MediaStreamGraph::AUDIO_THREAD_DRIVER, - nullptr)-> - CreateSourceStream(); - mIndirectAudio = false; - mInited = true; -} - -void -nsSpeechTask::InitIndirectAudio() -{ - mIndirectAudio = true; mInited = true; } @@ -217,8 +77,7 @@ nsSpeechTask::SetChosenVoiceURI(const nsAString& aUri) } NS_IMETHODIMP -nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback, - uint32_t aChannels, uint32_t aRate, uint8_t argc) +nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback) { MOZ_ASSERT(XRE_IsParentProcess()); @@ -226,169 +85,11 @@ nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback, mCallback = aCallback; - if (mIndirectAudio) { - MOZ_ASSERT(!mStream); - if (argc > 0) { - NS_WARNING("Audio info arguments in Setup() are ignored for indirect audio services."); - } - return NS_OK; - } - - // mStream is set up in Init() that should be called before this. - MOZ_ASSERT(mStream); - - mStream->AddListener( - // Non DocGroup-version of AbstractThread::MainThread for the task in parent. - new SynthStreamListener(this, mStream, AbstractThread::MainThread())); - - // XXX: Support more than one channel - if(NS_WARN_IF(!(aChannels == 1))) { - return NS_ERROR_FAILURE; - } - - mChannels = aChannels; - - AudioSegment* segment = new AudioSegment(); - mStream->AddAudioTrack(AUDIO_TRACK, aRate, 0, segment); - mStream->AddAudioOutput(this); - mStream->SetAudioOutputVolume(this, mVolume); - return NS_OK; } -static RefPtr -makeSamples(int16_t* aData, uint32_t aDataLen) -{ - RefPtr samples = - SharedBuffer::Create(aDataLen * sizeof(int16_t)); - int16_t* frames = static_cast(samples->Data()); - - for (uint32_t i = 0; i < aDataLen; i++) { - frames[i] = aData[i]; - } - - return samples; -} - -NS_IMETHODIMP -nsSpeechTask::SendAudio(JS::Handle aData, JS::Handle aLandmarks, - JSContext* aCx) -{ - MOZ_ASSERT(XRE_IsParentProcess()); - - if(NS_WARN_IF(!(mStream))) { - return NS_ERROR_NOT_AVAILABLE; - } - if(NS_WARN_IF(mStream->IsDestroyed())) { - return NS_ERROR_NOT_AVAILABLE; - } - if(NS_WARN_IF(!(mChannels))) { - return NS_ERROR_FAILURE; - } - if(NS_WARN_IF(!(aData.isObject()))) { - return NS_ERROR_INVALID_ARG; - } - - if (mIndirectAudio) { - NS_WARNING("Can't call SendAudio from an indirect audio speech service."); - return NS_ERROR_FAILURE; - } - - JS::Rooted darray(aCx, &aData.toObject()); - JSAutoCompartment ac(aCx, darray); - - JS::Rooted tsrc(aCx, nullptr); - - // Allow either Int16Array or plain JS Array - if (JS_IsInt16Array(darray)) { - tsrc = darray; - } else { - bool isArray; - if (!JS_IsArrayObject(aCx, darray, &isArray)) { - return NS_ERROR_UNEXPECTED; - } - if (isArray) { - tsrc = JS_NewInt16ArrayFromArray(aCx, darray); - } - } - - if (!tsrc) { - return NS_ERROR_DOM_TYPE_MISMATCH_ERR; - } - - uint32_t dataLen = JS_GetTypedArrayLength(tsrc); - RefPtr samples; - { - JS::AutoCheckCannotGC nogc; - bool isShared; - int16_t* data = JS_GetInt16ArrayData(tsrc, &isShared, nogc); - if (isShared) { - // Must opt in to using shared data. - return NS_ERROR_DOM_TYPE_MISMATCH_ERR; - } - samples = makeSamples(data, dataLen); - } - SendAudioImpl(samples, dataLen); - - return NS_OK; -} - -NS_IMETHODIMP -nsSpeechTask::SendAudioNative(int16_t* aData, uint32_t aDataLen) -{ - MOZ_ASSERT(XRE_IsParentProcess()); - - if(NS_WARN_IF(!(mStream))) { - return NS_ERROR_NOT_AVAILABLE; - } - if(NS_WARN_IF(mStream->IsDestroyed())) { - return NS_ERROR_NOT_AVAILABLE; - } - if(NS_WARN_IF(!(mChannels))) { - return NS_ERROR_FAILURE; - } - - if (mIndirectAudio) { - NS_WARNING("Can't call SendAudio from an indirect audio speech service."); - return NS_ERROR_FAILURE; - } - - RefPtr samples = makeSamples(aData, aDataLen); - SendAudioImpl(samples, aDataLen); - - return NS_OK; -} - -void -nsSpeechTask::SendAudioImpl(RefPtr& aSamples, uint32_t aDataLen) -{ - if (aDataLen == 0) { - mStream->EndAllTrackAndFinish(); - return; - } - - AudioSegment segment; - AutoTArray channelData; - channelData.AppendElement(static_cast(aSamples->Data())); - segment.AppendFrames(aSamples.forget(), channelData, aDataLen, - PRINCIPAL_HANDLE_NONE); - mStream->AppendToTrack(1, &segment); - mStream->AdvanceKnownTracksTime(STREAM_TIME_MAX); -} - NS_IMETHODIMP nsSpeechTask::DispatchStart() -{ - if (!mIndirectAudio) { - NS_WARNING("Can't call DispatchStart() from a direct audio speech service"); - return NS_ERROR_FAILURE; - } - - return DispatchStartInner(); -} - -nsresult -nsSpeechTask::DispatchStartInner() { nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true); return DispatchStartImpl(); @@ -403,7 +104,7 @@ nsSpeechTask::DispatchStartImpl() nsresult nsSpeechTask::DispatchStartImpl(const nsAString& aUri) { - LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStart")); + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStartImpl")); MOZ_ASSERT(mUtterance); if(NS_WARN_IF(!(mUtterance->mState == SpeechSynthesisUtterance::STATE_PENDING))) { @@ -422,17 +123,6 @@ nsSpeechTask::DispatchStartImpl(const nsAString& aUri) NS_IMETHODIMP nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex) -{ - if (!mIndirectAudio) { - NS_WARNING("Can't call DispatchEnd() from a direct audio speech service"); - return NS_ERROR_FAILURE; - } - - return DispatchEndInner(aElapsedTime, aCharIndex); -} - -nsresult -nsSpeechTask::DispatchEndInner(float aElapsedTime, uint32_t aCharIndex) { // After we end, no callback functions should go through. mCallback = nullptr; @@ -447,7 +137,7 @@ nsSpeechTask::DispatchEndInner(float aElapsedTime, uint32_t aCharIndex) nsresult nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) { - LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEnd\n")); + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEndImpl")); DestroyAudioChannelAgent(); @@ -456,11 +146,6 @@ nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) return NS_ERROR_NOT_AVAILABLE; } - // XXX: This should not be here, but it prevents a crash in MSG. - if (mStream) { - mStream->Destroy(); - } - RefPtr utterance = mUtterance; if (mSpeechSynthesis) { @@ -482,18 +167,13 @@ nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) NS_IMETHODIMP nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex) { - if (!mIndirectAudio) { - NS_WARNING("Can't call DispatchPause() from a direct audio speech service"); - return NS_ERROR_FAILURE; - } - return DispatchPauseImpl(aElapsedTime, aCharIndex); } nsresult nsSpeechTask::DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex) { - LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPause")); + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPauseImpl")); MOZ_ASSERT(mUtterance); if(NS_WARN_IF(mUtterance->mPaused)) { return NS_ERROR_NOT_AVAILABLE; @@ -514,18 +194,13 @@ nsSpeechTask::DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex) NS_IMETHODIMP nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex) { - if (!mIndirectAudio) { - NS_WARNING("Can't call DispatchResume() from a direct audio speech service"); - return NS_ERROR_FAILURE; - } - return DispatchResumeImpl(aElapsedTime, aCharIndex); } nsresult nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) { - LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResume")); + LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResumeImpl")); MOZ_ASSERT(mUtterance); if(NS_WARN_IF(!(mUtterance->mPaused))) { return NS_ERROR_NOT_AVAILABLE; @@ -547,7 +222,7 @@ nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) void nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex) { - DispatchErrorInner(aElapsedTime, aCharIndex); + DispatchError(aElapsedTime, aCharIndex); } NS_IMETHODIMP @@ -555,17 +230,6 @@ nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) { LOG(LogLevel::Debug, ("nsSpeechTask::DispatchError")); - if (!mIndirectAudio) { - NS_WARNING("Can't call DispatchError() from a direct audio speech service"); - return NS_ERROR_FAILURE; - } - - return DispatchErrorInner(aElapsedTime, aCharIndex); -} - -nsresult -nsSpeechTask::DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex) -{ if (!mPreCanceled) { nsSynthVoiceRegistry::GetInstance()->SpeakNext(); } @@ -597,11 +261,6 @@ nsSpeechTask::DispatchBoundary(const nsAString& aName, float aElapsedTime, uint32_t aCharIndex, uint32_t aCharLength, uint8_t argc) { - if (!mIndirectAudio) { - NS_WARNING("Can't call DispatchBoundary() from a direct audio speech service"); - return NS_ERROR_FAILURE; - } - return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex, aCharLength, argc); } @@ -626,11 +285,6 @@ NS_IMETHODIMP nsSpeechTask::DispatchMark(const nsAString& aName, float aElapsedTime, uint32_t aCharIndex) { - if (!mIndirectAudio) { - NS_WARNING("Can't call DispatchMark() from a direct audio speech service"); - return NS_ERROR_FAILURE; - } - return DispatchMarkImpl(aName, aElapsedTime, aCharIndex); } @@ -659,17 +313,9 @@ nsSpeechTask::Pause() NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to call onPause() callback"); } - if (mStream) { - mStream->Suspend(); - } - if (!mInited) { mPrePaused = true; } - - if (!mIndirectAudio) { - DispatchPauseImpl(GetCurrentTime(), GetCurrentCharOffset()); - } } void @@ -683,18 +329,10 @@ nsSpeechTask::Resume() "Unable to call onResume() callback"); } - if (mStream) { - mStream->Resume(); - } - if (mPrePaused) { mPrePaused = false; nsSynthVoiceRegistry::GetInstance()->ResumeQueue(); } - - if (!mIndirectAudio) { - DispatchResumeImpl(GetCurrentTime(), GetCurrentCharOffset()); - } } void @@ -710,43 +348,19 @@ nsSpeechTask::Cancel() "Unable to call onCancel() callback"); } - if (mStream) { - mStream->Suspend(); - } - if (!mInited) { mPreCanceled = true; } - - if (!mIndirectAudio) { - DispatchEndInner(GetCurrentTime(), GetCurrentCharOffset()); - } } void nsSpeechTask::ForceEnd() { - if (mStream) { - mStream->Suspend(); - } - if (!mInited) { mPreCanceled = true; } - DispatchEndInner(GetCurrentTime(), GetCurrentCharOffset()); -} - -float -nsSpeechTask::GetCurrentTime() -{ - return mStream ? (float)(mStream->GetCurrentTime() / 1000000.0) : 0; -} - -uint32_t -nsSpeechTask::GetCurrentCharOffset() -{ - return mStream && mStream->IsFinished() ? mText.Length() : 0; + DispatchEnd(0, 0); } void @@ -823,10 +437,7 @@ nsSpeechTask::WindowAudioCaptureChanged(bool aCapture) void nsSpeechTask::SetAudioOutputVolume(float aVolume) { - if (mStream && !mStream->IsDestroyed()) { - mStream->SetAudioOutputVolume(this, aVolume); - } - if (mIndirectAudio && mCallback) { + if (mCallback) { mCallback->OnVolumeChanged(aVolume); } } diff --git a/dom/media/webspeech/synth/nsSpeechTask.h b/dom/media/webspeech/synth/nsSpeechTask.h index d43ec10c4319..76e32d57d3b7 100644 --- a/dom/media/webspeech/synth/nsSpeechTask.h +++ b/dom/media/webspeech/synth/nsSpeechTask.h @@ -7,7 +7,6 @@ #ifndef mozilla_dom_nsSpeechTask_h #define mozilla_dom_nsSpeechTask_h -#include "MediaStreamGraph.h" #include "SpeechSynthesisUtterance.h" #include "nsIAudioChannelAgent.h" #include "nsISpeechService.h" @@ -46,14 +45,9 @@ public: virtual void ForceEnd(); - float GetCurrentTime(); - - uint32_t GetCurrentCharOffset(); - void SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis); - void InitDirectAudio(); - void InitIndirectAudio(); + void Init(); void SetChosenVoiceURI(const nsAString& aUri); @@ -61,20 +55,11 @@ public: void ForceError(float aElapsedTime, uint32_t aCharIndex); - bool IsPreCanceled() - { - return mPreCanceled; - }; + bool IsPreCanceled() { return mPreCanceled; }; - bool IsPrePaused() - { - return mPrePaused; - } + bool IsPrePaused() { return mPrePaused; } - bool IsChrome() - { - return mIsChrome; - } + bool IsChrome() { return mIsChrome; } protected: virtual ~nsSpeechTask(); @@ -115,31 +100,16 @@ protected: private: void End(); - void SendAudioImpl(RefPtr& aSamples, uint32_t aDataLen); - - nsresult DispatchStartInner(); - - nsresult DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex); - nsresult DispatchEndInner(float aElapsedTime, uint32_t aCharIndex); - void CreateAudioChannelAgent(); void DestroyAudioChannelAgent(); - RefPtr mStream; - - RefPtr mPort; - nsCOMPtr mCallback; nsCOMPtr mAudioChannelAgent; - uint32_t mChannels; - RefPtr mSpeechSynthesis; - bool mIndirectAudio; - nsString mChosenVoiceURI; bool mIsChrome; diff --git a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp index b4a454e82fef..c918f7317ac9 100644 --- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp +++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp @@ -824,23 +824,11 @@ nsSynthVoiceRegistry::SpeakImpl(VoiceData* aVoice, NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aVoice->mUri).get(), aRate, aPitch)); - SpeechServiceType serviceType; - - DebugOnly rv = aVoice->mService->GetServiceType(&serviceType); - NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to get speech service type"); - - if (serviceType == nsISpeechService::SERVICETYPE_INDIRECT_AUDIO) { - aTask->InitIndirectAudio(); - } else { - aTask->InitDirectAudio(); - } + aTask->Init(); if (NS_FAILED(aVoice->mService->Speak(aText, aVoice->mUri, aVolume, aRate, aPitch, aTask))) { - if (serviceType == nsISpeechService::SERVICETYPE_INDIRECT_AUDIO) { - aTask->DispatchError(0, 0); - } - // XXX When using direct audio, no way to dispatch error + aTask->DispatchError(0, 0); } } diff --git a/dom/media/webspeech/synth/nsSynthVoiceRegistry.h b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h index e545d773366b..d72fe32994dd 100644 --- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.h +++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h @@ -10,7 +10,6 @@ #include "nsISynthVoiceRegistry.h" #include "nsRefPtrHashtable.h" #include "nsTArray.h" -#include "MediaStreamGraph.h" class nsISpeechService; diff --git a/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp index c5c6698a22c4..51f4ce7d25ff 100644 --- a/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp +++ b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp @@ -513,8 +513,7 @@ SpeechDispatcherService::Speak(const nsAString& aText, const nsAString& aUri, // speech-dispatcher expects -100 to 100 with 0 being default. spd_set_voice_pitch(mSpeechdClient, static_cast((aPitch - 1) * 100)); - // The last three parameters don't matter for an indirect service - nsresult rv = aTask->Setup(callback, 0, 0, 0); + nsresult rv = aTask->Setup(callback); if (NS_FAILED(rv)) { return rv; @@ -549,13 +548,6 @@ SpeechDispatcherService::Speak(const nsAString& aText, const nsAString& aUri, return NS_OK; } -NS_IMETHODIMP -SpeechDispatcherService::GetServiceType(SpeechServiceType* aServiceType) -{ - *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO; - return NS_OK; -} - SpeechDispatcherService* SpeechDispatcherService::GetInstance(bool create) { diff --git a/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp b/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp index ed3e1402e5f4..b66daa14369a 100644 --- a/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp +++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp @@ -44,20 +44,17 @@ struct VoiceDetails uint32_t flags; }; -static const VoiceDetails sDirectVoices[] = { - {"urn:moz-tts:fake-direct:bob", "Bob Marley", "en-JM", true, 0}, - {"urn:moz-tts:fake-direct:amy", "Amy Winehouse", "en-GB", false, 0}, - {"urn:moz-tts:fake-direct:lenny", "Leonard Cohen", "en-CA", false, 0}, - {"urn:moz-tts:fake-direct:celine", "Celine Dion", "fr-CA", false, 0}, - {"urn:moz-tts:fake-direct:julie", "Julieta Venegas", "es-MX", false, }, -}; - -static const VoiceDetails sIndirectVoices[] = { - {"urn:moz-tts:fake-indirect:zanetta", "Zanetta Farussi", "it-IT", false, 0}, - {"urn:moz-tts:fake-indirect:margherita", "Margherita Durastanti", "it-IT-noevents-noend", false, eSuppressEvents | eSuppressEnd}, - {"urn:moz-tts:fake-indirect:teresa", "Teresa Cornelys", "it-IT-noend", false, eSuppressEnd}, - {"urn:moz-tts:fake-indirect:cecilia", "Cecilia Bartoli", "it-IT-failatstart", false, eFailAtStart}, - {"urn:moz-tts:fake-indirect:gottardo", "Gottardo Aldighieri", "it-IT-fail", false, eFail}, +static const VoiceDetails sVoices[] = { + {"urn:moz-tts:fake:bob", "Bob Marley", "en-JM", true, 0}, + {"urn:moz-tts:fake:amy", "Amy Winehouse", "en-GB", false, 0}, + {"urn:moz-tts:fake:lenny", "Leonard Cohen", "en-CA", false, 0}, + {"urn:moz-tts:fake:celine", "Celine Dion", "fr-CA", false, 0}, + {"urn:moz-tts:fake:julie", "Julieta Venegas", "es-MX", false, }, + {"urn:moz-tts:fake:zanetta", "Zanetta Farussi", "it-IT", false, 0}, + {"urn:moz-tts:fake:margherita", "Margherita Durastanti", "it-IT-noevents-noend", false, eSuppressEvents | eSuppressEnd}, + {"urn:moz-tts:fake:teresa", "Teresa Cornelys", "it-IT-noend", false, eSuppressEnd}, + {"urn:moz-tts:fake:cecilia", "Cecilia Bartoli", "it-IT-failatstart", false, eFailAtStart}, + {"urn:moz-tts:fake:gottardo", "Gottardo Aldighieri", "it-IT-fail", false, eFail}, }; // FakeSynthCallback @@ -116,90 +113,25 @@ NS_INTERFACE_MAP_END NS_IMPL_CYCLE_COLLECTING_ADDREF(FakeSynthCallback) NS_IMPL_CYCLE_COLLECTING_RELEASE(FakeSynthCallback) -// FakeDirectAudioSynth +// FakeSpeechSynth -class FakeDirectAudioSynth : public nsISpeechService +class FakeSpeechSynth : public nsISpeechService { public: - FakeDirectAudioSynth() { } + FakeSpeechSynth() {} NS_DECL_ISUPPORTS NS_DECL_NSISPEECHSERVICE private: - virtual ~FakeDirectAudioSynth() { } + virtual ~FakeSpeechSynth() { } }; -NS_IMPL_ISUPPORTS(FakeDirectAudioSynth, nsISpeechService) +NS_IMPL_ISUPPORTS(FakeSpeechSynth, nsISpeechService) NS_IMETHODIMP -FakeDirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri, - float aVolume, float aRate, float aPitch, - nsISpeechTask* aTask) -{ - class Runnable final : public mozilla::Runnable - { - public: - Runnable(nsISpeechTask* aTask, const nsAString& aText) - : mozilla::Runnable("Runnable") - , mTask(aTask) - , mText(aText) - { - } - - NS_IMETHOD Run() override - { - RefPtr cb = new FakeSynthCallback(nullptr); - mTask->Setup(cb, CHANNELS, SAMPLERATE, 2); - - // Just an arbitrary multiplier. Pretend that each character is - // synthesized to 40 frames. - uint32_t frames_length = 40 * mText.Length(); - auto frames = MakeUnique(frames_length); - mTask->SendAudioNative(frames.get(), frames_length); - - mTask->SendAudioNative(nullptr, 0); - - return NS_OK; - } - - private: - nsCOMPtr mTask; - nsString mText; - }; - - nsCOMPtr runnable = new Runnable(aTask, aText); - NS_DispatchToMainThread(runnable); - return NS_OK; -} - -NS_IMETHODIMP -FakeDirectAudioSynth::GetServiceType(SpeechServiceType* aServiceType) -{ - *aServiceType = nsISpeechService::SERVICETYPE_DIRECT_AUDIO; - return NS_OK; -} - -// FakeDirectAudioSynth - -class FakeIndirectAudioSynth : public nsISpeechService -{ - -public: - FakeIndirectAudioSynth() {} - - NS_DECL_ISUPPORTS - NS_DECL_NSISPEECHSERVICE - -private: - virtual ~FakeIndirectAudioSynth() { } -}; - -NS_IMPL_ISUPPORTS(FakeIndirectAudioSynth, nsISpeechService) - -NS_IMETHODIMP -FakeIndirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri, +FakeSpeechSynth::Speak(const nsAString& aText, const nsAString& aUri, float aVolume, float aRate, float aPitch, nsISpeechTask* aTask) { @@ -268,9 +200,10 @@ FakeIndirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri, }; uint32_t flags = 0; - for (uint32_t i = 0; i < ArrayLength(sIndirectVoices); i++) { - if (aUri.EqualsASCII(sIndirectVoices[i].uri)) { - flags = sIndirectVoices[i].flags; + for (VoiceDetails voice : sVoices) { + if (aUri.EqualsASCII(voice.uri)) { + flags = voice.flags; + break; } } @@ -281,7 +214,7 @@ FakeIndirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri, RefPtr cb = new FakeSynthCallback( (flags & eSuppressEvents) ? nullptr : aTask); - aTask->Setup(cb, 0, 0, 0); + aTask->Setup(cb); nsCOMPtr runnable = new DispatchStart(aTask); NS_DispatchToMainThread(runnable); @@ -297,13 +230,6 @@ FakeIndirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri, return NS_OK; } -NS_IMETHODIMP -FakeIndirectAudioSynth::GetServiceType(SpeechServiceType* aServiceType) -{ - *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO; - return NS_OK; -} - // nsFakeSynthService NS_INTERFACE_MAP_BEGIN(nsFakeSynthServices) @@ -314,14 +240,6 @@ NS_INTERFACE_MAP_END NS_IMPL_ADDREF(nsFakeSynthServices) NS_IMPL_RELEASE(nsFakeSynthServices) -nsFakeSynthServices::nsFakeSynthServices() -{ -} - -nsFakeSynthServices::~nsFakeSynthServices() -{ -} - static void AddVoices(nsISpeechService* aService, const VoiceDetails* aVoices, uint32_t aLength) { @@ -344,11 +262,8 @@ AddVoices(nsISpeechService* aService, const VoiceDetails* aVoices, uint32_t aLen void nsFakeSynthServices::Init() { - mDirectService = new FakeDirectAudioSynth(); - AddVoices(mDirectService, sDirectVoices, ArrayLength(sDirectVoices)); - - mIndirectService = new FakeIndirectAudioSynth(); - AddVoices(mIndirectService, sIndirectVoices, ArrayLength(sIndirectVoices)); + mSynthService = new FakeSpeechSynth(); + AddVoices(mSynthService, sVoices, ArrayLength(sVoices)); } // nsIObserver diff --git a/dom/media/webspeech/synth/test/nsFakeSynthServices.h b/dom/media/webspeech/synth/test/nsFakeSynthServices.h index bab93e779828..893d450972dd 100644 --- a/dom/media/webspeech/synth/test/nsFakeSynthServices.h +++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.h @@ -25,7 +25,7 @@ public: NS_DECL_ISUPPORTS NS_DECL_NSIOBSERVER - nsFakeSynthServices(); + nsFakeSynthServices() = default; static nsFakeSynthServices* GetInstance(); @@ -35,13 +35,11 @@ public: private: - virtual ~nsFakeSynthServices(); + virtual ~nsFakeSynthServices() = default; void Init(); - nsCOMPtr mDirectService; - - nsCOMPtr mIndirectService; + nsCOMPtr mSynthService; static StaticRefPtr sSingleton; }; diff --git a/dom/media/webspeech/synth/windows/SapiService.cpp b/dom/media/webspeech/synth/windows/SapiService.cpp index 2919d2f30ec6..828966e73a4c 100644 --- a/dom/media/webspeech/synth/windows/SapiService.cpp +++ b/dom/media/webspeech/synth/windows/SapiService.cpp @@ -402,14 +402,14 @@ SapiService::Speak(const nsAString& aText, const nsAString& aUri, new SapiCallback(aTask, spVoice, textOffset, aText.Length()); // The last three parameters doesn't matter for an indirect service - nsresult rv = aTask->Setup(callback, 0, 0, 0); + nsresult rv = aTask->Setup(callback); if (NS_FAILED(rv)) { return rv; } ULONG streamNum; if (FAILED(spVoice->Speak(xml.get(), SPF_ASYNC, &streamNum))) { - aTask->Setup(nullptr, 0, 0, 0); + aTask->Setup(nullptr); return NS_ERROR_FAILURE; } @@ -422,13 +422,6 @@ SapiService::Speak(const nsAString& aText, const nsAString& aUri, return NS_OK; } -NS_IMETHODIMP -SapiService::GetServiceType(SpeechServiceType* aServiceType) -{ - *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO; - return NS_OK; -} - NS_IMETHODIMP SapiService::Observe(nsISupports* aSubject, const char* aTopic, const char16_t* aData)