Bug 1331696 - P3. Remove direct audio support from speech synth. r=eeejay

MozReview-Commit-ID: E1UKOOvwUOe --HG-- extra : rebase_source : c955b0f33a86c7d84ce28d8ba7d838338b5c1cf4
2017-12-02 09:13:35 +01:00 · 2017-12-02 09:13:35 +01:00 · 9e40aac1a1
--- a/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
+++ b/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
@ -439,7 +439,7 @@ OSXSpeechSynthesizerService::Speak(const nsAString& aText,
  }

  RefPtr<SpeechTaskCallback> callback = new SpeechTaskCallback(aTask, synth, offsets);
-  nsresult rv = aTask->Setup(callback, 0, 0, 0);
+  nsresult rv = aTask->Setup(callback);
  NS_ENSURE_SUCCESS(rv, rv);

  NSString* text = nsCocoaUtils::ToNSString(escapedText);
@ -452,13 +452,6 @@ OSXSpeechSynthesizerService::Speak(const nsAString& aText,
  NS_OBJC_END_TRY_ABORT_BLOCK_NSRESULT;
 }

-NS_IMETHODIMP
-OSXSpeechSynthesizerService::GetServiceType(SpeechServiceType* aServiceType)
-{
-  *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO;
-  return NS_OK;
-}
-
 NS_IMETHODIMP
 OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic,
                                     const char16_t* aData)
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
@ -166,21 +166,7 @@ SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsC
 }

 NS_IMETHODIMP
-SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback,
-                       uint32_t aChannels, uint32_t aRate, uint8_t argc)
-{
-  MOZ_CRASH("Should never be called from child");
-}
-
-NS_IMETHODIMP
-SpeechTaskChild::SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks,
-                           JSContext* aCx)
-{
-  MOZ_CRASH("Should never be called from child");
-}
-
-NS_IMETHODIMP
-SpeechTaskChild::SendAudioNative(int16_t* aData, uint32_t aDataLen)
+SpeechTaskChild::Setup(nsISpeechTaskCallback* aCallback)
 {
  MOZ_CRASH("Should never be called from child");
 }
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
@ -85,13 +85,7 @@ public:

  explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome);

-  NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback,
-                   uint32_t aChannels, uint32_t aRate, uint8_t argc) override;
-
-  NS_IMETHOD SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks,
-                       JSContext* aCx) override;
-
-  NS_IMETHOD SendAudioNative(int16_t* aData, uint32_t aDataLen) override;
+  NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback) override;

  void Pause() override;

--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
@ -82,22 +82,22 @@ public:
  SpeechTaskParent(float aVolume, const nsAString& aUtterance, bool aIsChrome)
    : nsSpeechTask(aVolume, aUtterance, aIsChrome) {}

-  nsresult DispatchStartImpl(const nsAString& aUri);
+  nsresult DispatchStartImpl(const nsAString& aUri) override;

-  nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex);
+  nsresult DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex) override;

-  nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex);
+  nsresult DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex) override;

-  nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex);
+  nsresult DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) override;

-  nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex);
+  nsresult DispatchErrorImpl(float aElapsedTime, uint32_t aCharIndex) override;

  nsresult DispatchBoundaryImpl(const nsAString& aName,
                                float aElapsedTime, uint32_t aCharIndex,
-                                uint32_t aCharLength, uint8_t argc);
+                                uint32_t aCharLength, uint8_t argc) override;

  nsresult DispatchMarkImpl(const nsAString& aName,
-                            float aElapsedTime, uint32_t aCharIndex);
+                            float aElapsedTime, uint32_t aCharIndex) override;

 private:
  SpeechSynthesisRequestParent* mActor;
--- a/dom/media/webspeech/synth/nsISpeechService.idl
+++ b/dom/media/webspeech/synth/nsISpeechService.idl
@ -5,12 +5,8 @@

 #include "nsISupports.idl"

-typedef unsigned short SpeechServiceType;
-
 /**
- * A callback is implemented by the service. For direct audio services, it is
- * required to implement these, although it could be helpful to use the
- * cancel method for shutting down the speech resources.
+ * A callback is implemented by the service.
 */
 [scriptable, uuid(c576de0c-8a3d-4570-be7e-9876d3e5bed2)]
 interface nsISpeechTaskCallback : nsISupports
@ -32,7 +28,6 @@ interface nsISpeechTaskCallback : nsISupports

  /**
   * The user or application has changed the volume of this speech.
-   * This is only used on indirect audio service type.
   */
  void onVolumeChanged(in float aVolume);
 };
@ -49,26 +44,8 @@ interface nsISpeechTask : nsISupports
   * Prepare browser for speech.
   *
   * @param aCallback callback object for mid-speech operations.
-   * @param aChannels number of audio channels. Only required
-   *                    in direct audio services
-   * @param aRate     audio rate. Only required in direct audio services
   */
-  [optional_argc] void setup(in nsISpeechTaskCallback aCallback,
-                               [optional] in uint32_t aChannels,
-                               [optional] in uint32_t aRate);
-
-  /**
-   * Send audio data to browser.
-   *
-   * @param aData     an Int16Array with PCM-16 audio data.
-   * @param aLandmarks an array of sample offset and landmark pairs.
-   *                     Used for emiting boundary and mark events.
-   */
-  [implicit_jscontext]
-  void sendAudio(in jsval aData, in jsval aLandmarks);
-
-  [noscript]
-  void sendAudioNative([array, size_is(aDataLen)] in short aData, in unsigned long aDataLen);
+  void setup(in nsISpeechTaskCallback aCallback);

  /**
   * Dispatch start event.
@ -132,15 +109,11 @@ interface nsISpeechTask : nsISupports
 /**
 * The main interface of a speech synthesis service.
 *
- * A service's speak method could be implemented in two ways:
- *  1. Indirect audio - the service is responsible for outputting audio.
- *    The service calls the nsISpeechTask.dispatch* methods directly. Starting
- *    with dispatchStart() and ending with dispatchEnd or dispatchError().
- *
- *  2. Direct audio - the service provides us with PCM-16 data, and we output it.
- *    The service does not call the dispatch task methods directly. Instead,
- *    audio information is provided at setup(), and audio data is sent with
- *    sendAudio(). The utterance is terminated with an empty sendAudio().
+ * A service is responsible for outputting audio.
+ * The service dispatches events, starting with dispatchStart() and ending with
+ * dispatchEnd or dispatchError().
+ * A service must also respond with the currect actions and events in response
+ * to implemented callback methods.
 */
 [scriptable, uuid(9b7d59db-88ff-43d0-b6ee-9f63d042d08f)]
 interface nsISpeechService : nsISupports
@ -161,11 +134,6 @@ interface nsISpeechService : nsISupports
  void speak(in DOMString aText, in DOMString aUri,
             in float aVolume, in float aRate, in float aPitch,
             in nsISpeechTask aTask);
-
-  const SpeechServiceType SERVICETYPE_DIRECT_AUDIO = 1;
-  const SpeechServiceType SERVICETYPE_INDIRECT_AUDIO = 2;
-
-  readonly attribute SpeechServiceType serviceType;
 };

 %{C++
--- a/dom/media/webspeech/synth/nsSpeechTask.cpp
+++ b/dom/media/webspeech/synth/nsSpeechTask.cpp
@ -7,18 +7,11 @@
 #include "AudioChannelAgent.h"
 #include "AudioChannelService.h"
 #include "AudioSegment.h"
-#include "MediaStreamListener.h"
 #include "nsSpeechTask.h"
 #include "nsSynthVoiceRegistry.h"
 #include "SharedBuffer.h"
 #include "SpeechSynthesis.h"

-// GetCurrentTime is defined in winbase.h as zero argument macro forwarding to
-// GetTickCount() and conflicts with nsSpeechTask::GetCurrentTime().
-#ifdef GetCurrentTime
-#undef GetCurrentTime
-#endif
-
 #undef LOG
 extern mozilla::LogModule* GetSpeechSynthLog();
 #define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)
@ -28,110 +21,6 @@ extern mozilla::LogModule* GetSpeechSynthLog();
 namespace mozilla {
 namespace dom {

-class SynthStreamListener : public MediaStreamListener
-{
-public:
-  SynthStreamListener(nsSpeechTask* aSpeechTask,
-                      MediaStream* aStream,
-                      AbstractThread* aMainThread)
-    : mSpeechTask(aSpeechTask)
-    , mStream(aStream)
-    , mStarted(false)
-  {
-  }
-
-  void DoNotifyStarted()
-  {
-    if (mSpeechTask) {
-      mSpeechTask->DispatchStartInner();
-    }
-  }
-
-  void DoNotifyFinished()
-  {
-    if (mSpeechTask) {
-      mSpeechTask->DispatchEndInner(mSpeechTask->GetCurrentTime(),
-                                    mSpeechTask->GetCurrentCharOffset());
-    }
-  }
-
-  void NotifyEvent(MediaStreamGraph* aGraph,
-                   MediaStreamGraphEvent event) override
-  {
-    switch (event) {
-      case MediaStreamGraphEvent::EVENT_FINISHED:
-        {
-          RefPtr<SynthStreamListener> self = this;
-          if (!mStarted) {
-            mStarted = true;
-            aGraph->DispatchToMainThreadAfterStreamStateUpdate(
-              NS_NewRunnableFunction(
-                "dom::SynthStreamListener::NotifyEvent",
-                [self] {
-                  // "start" event will be fired in DoNotifyStarted() which is
-                  // not allowed in stable state, so we do it asynchronously in
-                  // next run.
-                  NS_DispatchToMainThread(NewRunnableMethod(
-                    "dom::SynthStreamListener::DoNotifyStarted",
-                    self,
-                    &SynthStreamListener::DoNotifyStarted));
-                }));
-          }
-
-          aGraph->DispatchToMainThreadAfterStreamStateUpdate(
-            NS_NewRunnableFunction(
-              "dom::SynthStreamListener::NotifyEvent",
-              [self] {
-                // "end" event will be fired in DoNotifyFinished() which is
-                // not allowed in stable state, so we do it asynchronously in
-                // next run.
-                NS_DispatchToMainThread(NewRunnableMethod(
-                  "dom::SynthStreamListener::DoNotifyFinished",
-                  self,
-                  &SynthStreamListener::DoNotifyFinished));
-              }));
-        }
-        break;
-      case MediaStreamGraphEvent::EVENT_REMOVED:
-        mSpeechTask = nullptr;
-        // Dereference MediaStream to destroy safety
-        mStream = nullptr;
-        break;
-      default:
-        break;
-    }
-  }
-
-  void NotifyBlockingChanged(MediaStreamGraph* aGraph, Blocking aBlocked) override
-  {
-    if (aBlocked == MediaStreamListener::UNBLOCKED && !mStarted) {
-      mStarted = true;
-      RefPtr<SynthStreamListener> self = this;
-      aGraph->DispatchToMainThreadAfterStreamStateUpdate(
-        NS_NewRunnableFunction(
-          "dom::SynthStreamListener::NotifyBlockingChanged",
-          [self] {
-            // "start" event will be fired in DoNotifyStarted() which is
-            // not allowed in stable state, so we do it asynchronously in
-            // next run.
-            NS_DispatchToMainThread(NewRunnableMethod(
-              "dom::SynthStreamListener::DoNotifyStarted",
-              self,
-              &SynthStreamListener::DoNotifyStarted));
-          }));
-    }
-  }
-
-private:
-  // Raw pointer; if we exist, the stream exists,
-  // and 'mSpeechTask' exclusively owns it and therefor exists as well.
-  nsSpeechTask* mSpeechTask;
-  // This is KungFuDeathGrip for MediaStream
-  RefPtr<MediaStream> mStream;
-
-  bool mStarted;
-};
-
 // nsSpeechTask

 NS_IMPL_CYCLE_COLLECTION(nsSpeechTask, mSpeechSynthesis, mUtterance, mCallback);
@ -152,7 +41,6 @@ nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
  , mPrePaused(false)
  , mPreCanceled(false)
  , mCallback(nullptr)
-  , mIndirectAudio(false)
  , mIsChrome(aIsChrome)
 {
  mText = aUtterance->mText;
@ -167,7 +55,6 @@ nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome
  , mPrePaused(false)
  , mPreCanceled(false)
  , mCallback(nullptr)
-  , mIndirectAudio(false)
  , mIsChrome(aIsChrome)
 {
 }
@ -175,38 +62,11 @@ nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome
 nsSpeechTask::~nsSpeechTask()
 {
  LOG(LogLevel::Debug, ("~nsSpeechTask"));
-  if (mStream) {
-    if (!mStream->IsDestroyed()) {
-      mStream->Destroy();
-    }
-
-    // This will finally destroyed by SynthStreamListener becasue
-    // MediaStream::Destroy() is async.
-    mStream = nullptr;
-  }
-
-  if (mPort) {
-    mPort->Destroy();
-    mPort = nullptr;
-  }
 }

 void
-nsSpeechTask::InitDirectAudio()
+nsSpeechTask::Init()
 {
-  // nullptr as final argument here means that this is not tied to a window.
-  // This is a global MSG.
-  mStream = MediaStreamGraph::GetInstance(MediaStreamGraph::AUDIO_THREAD_DRIVER,
-                                          nullptr)->
-    CreateSourceStream();
-  mIndirectAudio = false;
-  mInited = true;
-}
-
-void
-nsSpeechTask::InitIndirectAudio()
-{
-  mIndirectAudio = true;
  mInited = true;
 }

@ -217,8 +77,7 @@ nsSpeechTask::SetChosenVoiceURI(const nsAString& aUri)
 }

 NS_IMETHODIMP
-nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback,
-                    uint32_t aChannels, uint32_t aRate, uint8_t argc)
+nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback)
 {
  MOZ_ASSERT(XRE_IsParentProcess());

@ -226,169 +85,11 @@ nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback,

  mCallback = aCallback;

-  if (mIndirectAudio) {
-    MOZ_ASSERT(!mStream);
-    if (argc > 0) {
-      NS_WARNING("Audio info arguments in Setup() are ignored for indirect audio services.");
-    }
  return NS_OK;
 }

-  // mStream is set up in Init() that should be called before this.
-  MOZ_ASSERT(mStream);
-
-  mStream->AddListener(
-    // Non DocGroup-version of AbstractThread::MainThread for the task in parent.
-    new SynthStreamListener(this, mStream, AbstractThread::MainThread()));
-
-  // XXX: Support more than one channel
-  if(NS_WARN_IF(!(aChannels == 1))) {
-    return NS_ERROR_FAILURE;
-  }
-
-  mChannels = aChannels;
-
-  AudioSegment* segment = new AudioSegment();
-  mStream->AddAudioTrack(AUDIO_TRACK, aRate, 0, segment);
-  mStream->AddAudioOutput(this);
-  mStream->SetAudioOutputVolume(this, mVolume);
-
-  return NS_OK;
-}
-
-static RefPtr<mozilla::SharedBuffer>
-makeSamples(int16_t* aData, uint32_t aDataLen)
-{
-  RefPtr<mozilla::SharedBuffer> samples =
-    SharedBuffer::Create(aDataLen * sizeof(int16_t));
-  int16_t* frames = static_cast<int16_t*>(samples->Data());
-
-  for (uint32_t i = 0; i < aDataLen; i++) {
-    frames[i] = aData[i];
-  }
-
-  return samples;
-}
-
-NS_IMETHODIMP
-nsSpeechTask::SendAudio(JS::Handle<JS::Value> aData, JS::Handle<JS::Value> aLandmarks,
-                        JSContext* aCx)
-{
-  MOZ_ASSERT(XRE_IsParentProcess());
-
-  if(NS_WARN_IF(!(mStream))) {
-    return NS_ERROR_NOT_AVAILABLE;
-  }
-  if(NS_WARN_IF(mStream->IsDestroyed())) {
-    return NS_ERROR_NOT_AVAILABLE;
-  }
-  if(NS_WARN_IF(!(mChannels))) {
-    return NS_ERROR_FAILURE;
-  }
-  if(NS_WARN_IF(!(aData.isObject()))) {
-    return NS_ERROR_INVALID_ARG;
-  }
-
-  if (mIndirectAudio) {
-    NS_WARNING("Can't call SendAudio from an indirect audio speech service.");
-    return NS_ERROR_FAILURE;
-  }
-
-  JS::Rooted<JSObject*> darray(aCx, &aData.toObject());
-  JSAutoCompartment ac(aCx, darray);
-
-  JS::Rooted<JSObject*> tsrc(aCx, nullptr);
-
-  // Allow either Int16Array or plain JS Array
-  if (JS_IsInt16Array(darray)) {
-    tsrc = darray;
-  } else {
-    bool isArray;
-    if (!JS_IsArrayObject(aCx, darray, &isArray)) {
-      return NS_ERROR_UNEXPECTED;
-    }
-    if (isArray) {
-      tsrc = JS_NewInt16ArrayFromArray(aCx, darray);
-    }
-  }
-
-  if (!tsrc) {
-    return NS_ERROR_DOM_TYPE_MISMATCH_ERR;
-  }
-
-  uint32_t dataLen = JS_GetTypedArrayLength(tsrc);
-  RefPtr<mozilla::SharedBuffer> samples;
-  {
-    JS::AutoCheckCannotGC nogc;
-    bool isShared;
-    int16_t* data = JS_GetInt16ArrayData(tsrc, &isShared, nogc);
-    if (isShared) {
-      // Must opt in to using shared data.
-      return NS_ERROR_DOM_TYPE_MISMATCH_ERR;
-    }
-    samples = makeSamples(data, dataLen);
-  }
-  SendAudioImpl(samples, dataLen);
-
-  return NS_OK;
-}
-
-NS_IMETHODIMP
-nsSpeechTask::SendAudioNative(int16_t* aData, uint32_t aDataLen)
-{
-  MOZ_ASSERT(XRE_IsParentProcess());
-
-  if(NS_WARN_IF(!(mStream))) {
-    return NS_ERROR_NOT_AVAILABLE;
-  }
-  if(NS_WARN_IF(mStream->IsDestroyed())) {
-    return NS_ERROR_NOT_AVAILABLE;
-  }
-  if(NS_WARN_IF(!(mChannels))) {
-    return NS_ERROR_FAILURE;
-  }
-
-  if (mIndirectAudio) {
-    NS_WARNING("Can't call SendAudio from an indirect audio speech service.");
-    return NS_ERROR_FAILURE;
-  }
-
-  RefPtr<mozilla::SharedBuffer> samples = makeSamples(aData, aDataLen);
-  SendAudioImpl(samples, aDataLen);
-
-  return NS_OK;
-}
-
-void
-nsSpeechTask::SendAudioImpl(RefPtr<mozilla::SharedBuffer>& aSamples, uint32_t aDataLen)
-{
-  if (aDataLen == 0) {
-    mStream->EndAllTrackAndFinish();
-    return;
-  }
-
-  AudioSegment segment;
-  AutoTArray<const int16_t*, 1> channelData;
-  channelData.AppendElement(static_cast<int16_t*>(aSamples->Data()));
-  segment.AppendFrames(aSamples.forget(), channelData, aDataLen,
-                       PRINCIPAL_HANDLE_NONE);
-  mStream->AppendToTrack(1, &segment);
-  mStream->AdvanceKnownTracksTime(STREAM_TIME_MAX);
-}
-
 NS_IMETHODIMP
 nsSpeechTask::DispatchStart()
-{
-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchStart() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
-  return DispatchStartInner();
-}
-
-nsresult
-nsSpeechTask::DispatchStartInner()
 {
  nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true);
  return DispatchStartImpl();
@ -403,7 +104,7 @@ nsSpeechTask::DispatchStartImpl()
 nsresult
 nsSpeechTask::DispatchStartImpl(const nsAString& aUri)
 {
-  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStart"));
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStartImpl"));

  MOZ_ASSERT(mUtterance);
  if(NS_WARN_IF(!(mUtterance->mState == SpeechSynthesisUtterance::STATE_PENDING))) {
@ -422,17 +123,6 @@ nsSpeechTask::DispatchStartImpl(const nsAString& aUri)

 NS_IMETHODIMP
 nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex)
-{
-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchEnd() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
-  return DispatchEndInner(aElapsedTime, aCharIndex);
-}
-
-nsresult
-nsSpeechTask::DispatchEndInner(float aElapsedTime, uint32_t aCharIndex)
 {
  // After we end, no callback functions should go through.
  mCallback = nullptr;
@ -447,7 +137,7 @@ nsSpeechTask::DispatchEndInner(float aElapsedTime, uint32_t aCharIndex)
 nsresult
 nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex)
 {
-  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEnd\n"));
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEndImpl"));

  DestroyAudioChannelAgent();

@ -456,11 +146,6 @@ nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex)
    return NS_ERROR_NOT_AVAILABLE;
  }

-  // XXX: This should not be here, but it prevents a crash in MSG.
-  if (mStream) {
-    mStream->Destroy();
-  }
-
  RefPtr<SpeechSynthesisUtterance> utterance = mUtterance;

  if (mSpeechSynthesis) {
@ -482,18 +167,13 @@ nsSpeechTask::DispatchEndImpl(float aElapsedTime, uint32_t aCharIndex)
 NS_IMETHODIMP
 nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex)
 {
-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchPause() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
  return DispatchPauseImpl(aElapsedTime, aCharIndex);
 }

 nsresult
 nsSpeechTask::DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex)
 {
-  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPause"));
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPauseImpl"));
  MOZ_ASSERT(mUtterance);
  if(NS_WARN_IF(mUtterance->mPaused)) {
    return NS_ERROR_NOT_AVAILABLE;
@ -514,18 +194,13 @@ nsSpeechTask::DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex)
 NS_IMETHODIMP
 nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex)
 {
-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchResume() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
  return DispatchResumeImpl(aElapsedTime, aCharIndex);
 }

 nsresult
 nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex)
 {
-  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResume"));
+  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResumeImpl"));
  MOZ_ASSERT(mUtterance);
  if(NS_WARN_IF(!(mUtterance->mPaused))) {
    return NS_ERROR_NOT_AVAILABLE;
@ -547,7 +222,7 @@ nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex)
 void
 nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex)
 {
-  DispatchErrorInner(aElapsedTime, aCharIndex);
+  DispatchError(aElapsedTime, aCharIndex);
 }

 NS_IMETHODIMP
@ -555,17 +230,6 @@ nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex)
 {
  LOG(LogLevel::Debug, ("nsSpeechTask::DispatchError"));

-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchError() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
-  return DispatchErrorInner(aElapsedTime, aCharIndex);
-}
-
-nsresult
-nsSpeechTask::DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex)
-{
  if (!mPreCanceled) {
    nsSynthVoiceRegistry::GetInstance()->SpeakNext();
  }
@ -597,11 +261,6 @@ nsSpeechTask::DispatchBoundary(const nsAString& aName,
                               float aElapsedTime, uint32_t aCharIndex,
                               uint32_t aCharLength, uint8_t argc)
 {
-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchBoundary() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
  return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex, aCharLength, argc);
 }

@ -626,11 +285,6 @@ NS_IMETHODIMP
 nsSpeechTask::DispatchMark(const nsAString& aName,
                           float aElapsedTime, uint32_t aCharIndex)
 {
-  if (!mIndirectAudio) {
-    NS_WARNING("Can't call DispatchMark() from a direct audio speech service");
-    return NS_ERROR_FAILURE;
-  }
-
  return DispatchMarkImpl(aName, aElapsedTime, aCharIndex);
 }

@ -659,17 +313,9 @@ nsSpeechTask::Pause()
    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to call onPause() callback");
  }

-  if (mStream) {
-    mStream->Suspend();
-  }
-
  if (!mInited) {
    mPrePaused = true;
  }
-
-  if (!mIndirectAudio) {
-    DispatchPauseImpl(GetCurrentTime(), GetCurrentCharOffset());
-  }
 }

 void
@ -683,18 +329,10 @@ nsSpeechTask::Resume()
                         "Unable to call onResume() callback");
  }

-  if (mStream) {
-    mStream->Resume();
-  }
-
  if (mPrePaused) {
    mPrePaused = false;
    nsSynthVoiceRegistry::GetInstance()->ResumeQueue();
  }
-
-  if (!mIndirectAudio) {
-    DispatchResumeImpl(GetCurrentTime(), GetCurrentCharOffset());
-  }
 }

 void
@ -710,43 +348,19 @@ nsSpeechTask::Cancel()
                         "Unable to call onCancel() callback");
  }

-  if (mStream) {
-    mStream->Suspend();
-  }
-
  if (!mInited) {
    mPreCanceled = true;
  }
-
-  if (!mIndirectAudio) {
-    DispatchEndInner(GetCurrentTime(), GetCurrentCharOffset());
-  }
 }

 void
 nsSpeechTask::ForceEnd()
 {
-  if (mStream) {
-    mStream->Suspend();
-  }
-
  if (!mInited) {
    mPreCanceled = true;
  }

-  DispatchEndInner(GetCurrentTime(), GetCurrentCharOffset());
-}
-
-float
-nsSpeechTask::GetCurrentTime()
-{
-  return mStream ? (float)(mStream->GetCurrentTime() / 1000000.0) : 0;
-}
-
-uint32_t
-nsSpeechTask::GetCurrentCharOffset()
-{
-  return mStream && mStream->IsFinished() ? mText.Length() : 0;
+  DispatchEnd(0, 0);
 }

 void
@ -823,10 +437,7 @@ nsSpeechTask::WindowAudioCaptureChanged(bool aCapture)
 void
 nsSpeechTask::SetAudioOutputVolume(float aVolume)
 {
-  if (mStream && !mStream->IsDestroyed()) {
-    mStream->SetAudioOutputVolume(this, aVolume);
-  }
-  if (mIndirectAudio && mCallback) {
+  if (mCallback) {
    mCallback->OnVolumeChanged(aVolume);
  }
 }
--- a/dom/media/webspeech/synth/nsSpeechTask.h
+++ b/dom/media/webspeech/synth/nsSpeechTask.h
@ -7,7 +7,6 @@
 #ifndef mozilla_dom_nsSpeechTask_h
 #define mozilla_dom_nsSpeechTask_h

-#include "MediaStreamGraph.h"
 #include "SpeechSynthesisUtterance.h"
 #include "nsIAudioChannelAgent.h"
 #include "nsISpeechService.h"
@ -46,14 +45,9 @@ public:

  virtual void ForceEnd();

-  float GetCurrentTime();
-
-  uint32_t GetCurrentCharOffset();
-
  void SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis);

-  void InitDirectAudio();
-  void InitIndirectAudio();
+  void Init();

  void SetChosenVoiceURI(const nsAString& aUri);

@ -61,20 +55,11 @@ public:

  void ForceError(float aElapsedTime, uint32_t aCharIndex);

-  bool IsPreCanceled()
-  {
-    return mPreCanceled;
-  };
+  bool IsPreCanceled() { return mPreCanceled; };

-  bool IsPrePaused()
-  {
-    return mPrePaused;
-  }
+  bool IsPrePaused() { return mPrePaused; }

-  bool IsChrome()
-  {
-    return mIsChrome;
-  }
+  bool IsChrome() { return mIsChrome; }

 protected:
  virtual ~nsSpeechTask();
@ -115,31 +100,16 @@ protected:
 private:
  void End();

-  void SendAudioImpl(RefPtr<mozilla::SharedBuffer>& aSamples, uint32_t aDataLen);
-
-  nsresult DispatchStartInner();
-
-  nsresult DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex);
-  nsresult DispatchEndInner(float aElapsedTime, uint32_t aCharIndex);
-
  void CreateAudioChannelAgent();

  void DestroyAudioChannelAgent();

-  RefPtr<SourceMediaStream> mStream;
-
-  RefPtr<MediaInputPort> mPort;
-
  nsCOMPtr<nsISpeechTaskCallback> mCallback;

  nsCOMPtr<nsIAudioChannelAgent> mAudioChannelAgent;

-  uint32_t mChannels;
-
  RefPtr<SpeechSynthesis> mSpeechSynthesis;

-  bool mIndirectAudio;
-
  nsString mChosenVoiceURI;

  bool mIsChrome;
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
@ -824,24 +824,12 @@ nsSynthVoiceRegistry::SpeakImpl(VoiceData* aVoice,
       NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aVoice->mUri).get(),
       aRate, aPitch));

-  SpeechServiceType serviceType;
-
-  DebugOnly<nsresult> rv = aVoice->mService->GetServiceType(&serviceType);
-  NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to get speech service type");
-
-  if (serviceType == nsISpeechService::SERVICETYPE_INDIRECT_AUDIO) {
-    aTask->InitIndirectAudio();
-  } else {
-    aTask->InitDirectAudio();
-  }
+  aTask->Init();

  if (NS_FAILED(aVoice->mService->Speak(aText, aVoice->mUri, aVolume, aRate,
                                        aPitch, aTask))) {
-    if (serviceType == nsISpeechService::SERVICETYPE_INDIRECT_AUDIO) {
    aTask->DispatchError(0, 0);
  }
-    // XXX When using direct audio, no way to dispatch error
-  }
 }

 } // namespace dom
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.h
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h
@ -10,7 +10,6 @@
 #include "nsISynthVoiceRegistry.h"
 #include "nsRefPtrHashtable.h"
 #include "nsTArray.h"
-#include "MediaStreamGraph.h"

 class nsISpeechService;

--- a/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp
+++ b/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp
@ -513,8 +513,7 @@ SpeechDispatcherService::Speak(const nsAString& aText, const nsAString& aUri,
  // speech-dispatcher expects -100 to 100 with 0 being default.
  spd_set_voice_pitch(mSpeechdClient, static_cast<int>((aPitch - 1) * 100));

-  // The last three parameters don't matter for an indirect service
-  nsresult rv = aTask->Setup(callback, 0, 0, 0);
+  nsresult rv = aTask->Setup(callback);

  if (NS_FAILED(rv)) {
    return rv;
@ -549,13 +548,6 @@ SpeechDispatcherService::Speak(const nsAString& aText, const nsAString& aUri,
  return NS_OK;
 }

-NS_IMETHODIMP
-SpeechDispatcherService::GetServiceType(SpeechServiceType* aServiceType)
-{
-  *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO;
-  return NS_OK;
-}
-
 SpeechDispatcherService*
 SpeechDispatcherService::GetInstance(bool create)
 {
--- a/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp
+++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.cpp
@ -44,20 +44,17 @@ struct VoiceDetails
  uint32_t flags;
 };

-static const VoiceDetails sDirectVoices[] = {
-  {"urn:moz-tts:fake-direct:bob", "Bob Marley", "en-JM", true, 0},
-  {"urn:moz-tts:fake-direct:amy", "Amy Winehouse", "en-GB", false, 0},
-  {"urn:moz-tts:fake-direct:lenny", "Leonard Cohen", "en-CA", false, 0},
-  {"urn:moz-tts:fake-direct:celine", "Celine Dion", "fr-CA", false, 0},
-  {"urn:moz-tts:fake-direct:julie", "Julieta Venegas", "es-MX", false, },
-};
-
-static const VoiceDetails sIndirectVoices[] = {
-  {"urn:moz-tts:fake-indirect:zanetta", "Zanetta Farussi", "it-IT", false, 0},
-  {"urn:moz-tts:fake-indirect:margherita", "Margherita Durastanti", "it-IT-noevents-noend", false, eSuppressEvents | eSuppressEnd},
-  {"urn:moz-tts:fake-indirect:teresa", "Teresa Cornelys", "it-IT-noend", false, eSuppressEnd},
-  {"urn:moz-tts:fake-indirect:cecilia", "Cecilia Bartoli", "it-IT-failatstart", false, eFailAtStart},
-  {"urn:moz-tts:fake-indirect:gottardo", "Gottardo Aldighieri", "it-IT-fail", false, eFail},
+static const VoiceDetails sVoices[] = {
+  {"urn:moz-tts:fake:bob", "Bob Marley", "en-JM", true, 0},
+  {"urn:moz-tts:fake:amy", "Amy Winehouse", "en-GB", false, 0},
+  {"urn:moz-tts:fake:lenny", "Leonard Cohen", "en-CA", false, 0},
+  {"urn:moz-tts:fake:celine", "Celine Dion", "fr-CA", false, 0},
+  {"urn:moz-tts:fake:julie", "Julieta Venegas", "es-MX", false, },
+  {"urn:moz-tts:fake:zanetta", "Zanetta Farussi", "it-IT", false, 0},
+  {"urn:moz-tts:fake:margherita", "Margherita Durastanti", "it-IT-noevents-noend", false, eSuppressEvents | eSuppressEnd},
+  {"urn:moz-tts:fake:teresa", "Teresa Cornelys", "it-IT-noend", false, eSuppressEnd},
+  {"urn:moz-tts:fake:cecilia", "Cecilia Bartoli", "it-IT-failatstart", false, eFailAtStart},
+  {"urn:moz-tts:fake:gottardo", "Gottardo Aldighieri", "it-IT-fail", false, eFail},
 };

 // FakeSynthCallback
@ -116,90 +113,25 @@ NS_INTERFACE_MAP_END
 NS_IMPL_CYCLE_COLLECTING_ADDREF(FakeSynthCallback)
 NS_IMPL_CYCLE_COLLECTING_RELEASE(FakeSynthCallback)

-// FakeDirectAudioSynth
+// FakeSpeechSynth

-class FakeDirectAudioSynth : public nsISpeechService
+class FakeSpeechSynth : public nsISpeechService
 {

 public:
-  FakeDirectAudioSynth() { }
+  FakeSpeechSynth() {}

  NS_DECL_ISUPPORTS
  NS_DECL_NSISPEECHSERVICE

 private:
-  virtual ~FakeDirectAudioSynth() { }
+  virtual ~FakeSpeechSynth() { }
 };

-NS_IMPL_ISUPPORTS(FakeDirectAudioSynth, nsISpeechService)
+NS_IMPL_ISUPPORTS(FakeSpeechSynth, nsISpeechService)

 NS_IMETHODIMP
-FakeDirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri,
-                            float aVolume, float aRate, float aPitch,
-                            nsISpeechTask* aTask)
-{
-  class Runnable final : public mozilla::Runnable
-  {
-  public:
-    Runnable(nsISpeechTask* aTask, const nsAString& aText)
-      : mozilla::Runnable("Runnable")
-      , mTask(aTask)
-      , mText(aText)
-    {
-    }
-
-    NS_IMETHOD Run() override
-    {
-      RefPtr<FakeSynthCallback> cb = new FakeSynthCallback(nullptr);
-      mTask->Setup(cb, CHANNELS, SAMPLERATE, 2);
-
-      // Just an arbitrary multiplier. Pretend that each character is
-      // synthesized to 40 frames.
-      uint32_t frames_length = 40 * mText.Length();
-      auto frames = MakeUnique<int16_t[]>(frames_length);
-      mTask->SendAudioNative(frames.get(), frames_length);
-
-      mTask->SendAudioNative(nullptr, 0);
-
-      return NS_OK;
-    }
-
-  private:
-    nsCOMPtr<nsISpeechTask> mTask;
-    nsString mText;
-  };
-
-  nsCOMPtr<nsIRunnable> runnable = new Runnable(aTask, aText);
-  NS_DispatchToMainThread(runnable);
-  return NS_OK;
-}
-
-NS_IMETHODIMP
-FakeDirectAudioSynth::GetServiceType(SpeechServiceType* aServiceType)
-{
-  *aServiceType = nsISpeechService::SERVICETYPE_DIRECT_AUDIO;
-  return NS_OK;
-}
-
-// FakeDirectAudioSynth
-
-class FakeIndirectAudioSynth : public nsISpeechService
-{
-
-public:
-  FakeIndirectAudioSynth() {}
-
-  NS_DECL_ISUPPORTS
-  NS_DECL_NSISPEECHSERVICE
-
-private:
-  virtual ~FakeIndirectAudioSynth() { }
-};
-
-NS_IMPL_ISUPPORTS(FakeIndirectAudioSynth, nsISpeechService)
-
-NS_IMETHODIMP
-FakeIndirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri,
+FakeSpeechSynth::Speak(const nsAString& aText, const nsAString& aUri,
                              float aVolume, float aRate, float aPitch,
                              nsISpeechTask* aTask)
 {
@ -268,9 +200,10 @@ FakeIndirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri,
  };

  uint32_t flags = 0;
-  for (uint32_t i = 0; i < ArrayLength(sIndirectVoices); i++) {
-    if (aUri.EqualsASCII(sIndirectVoices[i].uri)) {
-      flags = sIndirectVoices[i].flags;
+  for (VoiceDetails voice : sVoices) {
+    if (aUri.EqualsASCII(voice.uri)) {
+      flags = voice.flags;
+      break;
    }
  }

@ -281,7 +214,7 @@ FakeIndirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri,
  RefPtr<FakeSynthCallback> cb = new FakeSynthCallback(
    (flags & eSuppressEvents) ? nullptr : aTask);

-  aTask->Setup(cb, 0, 0, 0);
+  aTask->Setup(cb);

  nsCOMPtr<nsIRunnable> runnable = new DispatchStart(aTask);
  NS_DispatchToMainThread(runnable);
@ -297,13 +230,6 @@ FakeIndirectAudioSynth::Speak(const nsAString& aText, const nsAString& aUri,
  return NS_OK;
 }

-NS_IMETHODIMP
-FakeIndirectAudioSynth::GetServiceType(SpeechServiceType* aServiceType)
-{
-  *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO;
-  return NS_OK;
-}
-
 // nsFakeSynthService

 NS_INTERFACE_MAP_BEGIN(nsFakeSynthServices)
@ -314,14 +240,6 @@ NS_INTERFACE_MAP_END
 NS_IMPL_ADDREF(nsFakeSynthServices)
 NS_IMPL_RELEASE(nsFakeSynthServices)

-nsFakeSynthServices::nsFakeSynthServices()
-{
-}
-
-nsFakeSynthServices::~nsFakeSynthServices()
-{
-}
-
 static void
 AddVoices(nsISpeechService* aService, const VoiceDetails* aVoices, uint32_t aLength)
 {
@ -344,11 +262,8 @@ AddVoices(nsISpeechService* aService, const VoiceDetails* aVoices, uint32_t aLen
 void
 nsFakeSynthServices::Init()
 {
-  mDirectService = new FakeDirectAudioSynth();
-  AddVoices(mDirectService, sDirectVoices, ArrayLength(sDirectVoices));
-
-  mIndirectService = new FakeIndirectAudioSynth();
-  AddVoices(mIndirectService, sIndirectVoices, ArrayLength(sIndirectVoices));
+  mSynthService = new FakeSpeechSynth();
+  AddVoices(mSynthService, sVoices, ArrayLength(sVoices));
 }

 // nsIObserver
--- a/dom/media/webspeech/synth/test/nsFakeSynthServices.h
+++ b/dom/media/webspeech/synth/test/nsFakeSynthServices.h
@ -25,7 +25,7 @@ public:
  NS_DECL_ISUPPORTS
  NS_DECL_NSIOBSERVER

-  nsFakeSynthServices();
+  nsFakeSynthServices() = default;

  static nsFakeSynthServices* GetInstance();

@ -35,13 +35,11 @@ public:

 private:

-  virtual ~nsFakeSynthServices();
+  virtual ~nsFakeSynthServices() = default;

  void Init();

-  nsCOMPtr<nsISpeechService> mDirectService;
-
-  nsCOMPtr<nsISpeechService> mIndirectService;
+  nsCOMPtr<nsISpeechService> mSynthService;

  static StaticRefPtr<nsFakeSynthServices> sSingleton;
 };
--- a/dom/media/webspeech/synth/windows/SapiService.cpp
+++ b/dom/media/webspeech/synth/windows/SapiService.cpp
@ -402,14 +402,14 @@ SapiService::Speak(const nsAString& aText, const nsAString& aUri,
    new SapiCallback(aTask, spVoice, textOffset, aText.Length());

  // The last three parameters doesn't matter for an indirect service
-  nsresult rv = aTask->Setup(callback, 0, 0, 0);
+  nsresult rv = aTask->Setup(callback);
  if (NS_FAILED(rv)) {
    return rv;
  }

  ULONG streamNum;
  if (FAILED(spVoice->Speak(xml.get(), SPF_ASYNC, &streamNum))) {
-    aTask->Setup(nullptr, 0, 0, 0);
+    aTask->Setup(nullptr);
    return NS_ERROR_FAILURE;
  }

@ -422,13 +422,6 @@ SapiService::Speak(const nsAString& aText, const nsAString& aUri,
  return NS_OK;
 }

-NS_IMETHODIMP
-SapiService::GetServiceType(SpeechServiceType* aServiceType)
-{
-  *aServiceType = nsISpeechService::SERVICETYPE_INDIRECT_AUDIO;
-  return NS_OK;
-}
-
 NS_IMETHODIMP
 SapiService::Observe(nsISupports* aSubject, const char* aTopic,
                     const char16_t* aData)