From ec978256f5322c8210801caa8c9ac32a820e3cdc Mon Sep 17 00:00:00 2001
From: Eitan Isaacson <eitan@monotonous.org>
Date: Wed, 5 Aug 2015 10:25:42 -0700
Subject: [PATCH] Bug 1188099 - (Part 2) Introduce global queue and track
 speaking state across windows. r=smaug r=kdavis

---
 dom/media/webspeech/synth/SpeechSynthesis.cpp |  33 ++-
 dom/media/webspeech/synth/SpeechSynthesis.h   |   2 +
 .../webspeech/synth/ipc/PSpeechSynthesis.ipdl |   8 +-
 .../synth/ipc/SpeechSynthesisChild.cpp        |   7 +
 .../synth/ipc/SpeechSynthesisChild.h          |   2 +
 .../synth/ipc/SpeechSynthesisParent.cpp       |  16 +-
 .../synth/ipc/SpeechSynthesisParent.h         |   7 +-
 dom/media/webspeech/synth/nsSpeechTask.cpp    |  86 ++++++--
 dom/media/webspeech/synth/nsSpeechTask.h      |  24 ++-
 .../webspeech/synth/nsSynthVoiceRegistry.cpp  | 193 ++++++++++++++++--
 .../webspeech/synth/nsSynthVoiceRegistry.h    |  33 ++-
 dom/webidl/SpeechSynthesis.webidl             |   3 +
 12 files changed, 360 insertions(+), 54 deletions(-)

diff --git a/dom/media/webspeech/synth/SpeechSynthesis.cpp b/dom/media/webspeech/synth/SpeechSynthesis.cpp
index b93c2878439a..c72600b42c23 100644
--- a/dom/media/webspeech/synth/SpeechSynthesis.cpp
+++ b/dom/media/webspeech/synth/SpeechSynthesis.cpp
@@ -111,17 +111,19 @@ SpeechSynthesis::Pending() const
 bool
 SpeechSynthesis::Speaking() const
 {
-  if (mSpeechQueue.IsEmpty()) {
-    return false;
+  if (!mSpeechQueue.IsEmpty() &&
+      mSpeechQueue.ElementAt(0)->GetState() == SpeechSynthesisUtterance::STATE_SPEAKING) {
+    return true;
   }
 
-  return mSpeechQueue.ElementAt(0)->GetState() == SpeechSynthesisUtterance::STATE_SPEAKING;
+  // Returns global speaking state if global queue is enabled. Or false.
+  return nsSynthVoiceRegistry::GetInstance()->IsSpeaking();
 }
 
 bool
 SpeechSynthesis::Paused() const
 {
-  return mHoldQueue ||
+  return mHoldQueue || (mCurrentTask && mCurrentTask->IsPrePaused()) ||
          (!mSpeechQueue.IsEmpty() && mSpeechQueue.ElementAt(0)->IsPaused());
 }
 
@@ -178,15 +180,18 @@ SpeechSynthesis::AdvanceQueue()
 void
 SpeechSynthesis::Cancel()
 {
-  if (mCurrentTask) {
-    if (mSpeechQueue.Length() > 1) {
-      // Remove all queued utterances except for current one.
-      mSpeechQueue.RemoveElementsAt(1, mSpeechQueue.Length() - 1);
-    }
-    mCurrentTask->Cancel();
+  if (!mSpeechQueue.IsEmpty() &&
+      mSpeechQueue.ElementAt(0)->GetState() == SpeechSynthesisUtterance::STATE_SPEAKING) {
+    // Remove all queued utterances except for current one, we will remove it
+    // in OnEnd
+    mSpeechQueue.RemoveElementsAt(1, mSpeechQueue.Length() - 1);
   } else {
     mSpeechQueue.Clear();
   }
+
+  if (mCurrentTask) {
+    mCurrentTask->Cancel();
+  }
 }
 
 void
@@ -269,5 +274,13 @@ SpeechSynthesis::GetVoices(nsTArray< nsRefPtr<SpeechSynthesisVoice> >& aResult)
   }
 }
 
+// For testing purposes, allows us to drop anything in the global queue from
+// content, and bring the browser to initial state.
+void
+SpeechSynthesis::DropGlobalQueue()
+{
+  nsSynthVoiceRegistry::GetInstance()->DropGlobalQueue();
+}
+
 } // namespace dom
 } // namespace mozilla
diff --git a/dom/media/webspeech/synth/SpeechSynthesis.h b/dom/media/webspeech/synth/SpeechSynthesis.h
index 2efab85bf80a..81b2275cb51d 100644
--- a/dom/media/webspeech/synth/SpeechSynthesis.h
+++ b/dom/media/webspeech/synth/SpeechSynthesis.h
@@ -54,6 +54,8 @@ public:
 
   void GetVoices(nsTArray< nsRefPtr<SpeechSynthesisVoice> >& aResult);
 
+  void DropGlobalQueue();
+
 private:
   virtual ~SpeechSynthesis();
 
diff --git a/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
index 7e5daa7f4b93..7939e1abf922 100644
--- a/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
+++ b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
@@ -31,12 +31,18 @@ child:
 
     SetDefaultVoice(nsString aUri, bool aIsDefault);
 
+    IsSpeakingChanged(bool aIsSpeaking);
+
 parent:
     __delete__();
 
     PSpeechSynthesisRequest(nsString aText, nsString aUri, nsString aLang,
                             float aVolume, float aRate, float aPitch);
-    sync ReadVoiceList() returns (RemoteVoice[] aVoices, nsString[] aDefaults);
+
+    sync ReadVoicesAndState() returns (RemoteVoice[] aVoices,
+                                       nsString[] aDefaults, bool aIsSpeaking);
+
+    DropGlobalQueue();
 };
 
 } // namespace dom
diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
index 81d950f5dbdb..56b8aee1b847 100644
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
@@ -40,6 +40,13 @@ SpeechSynthesisChild::RecvSetDefaultVoice(const nsString& aUri,
   return true;
 }
 
+bool
+SpeechSynthesisChild::RecvIsSpeakingChanged(const bool& aIsSpeaking)
+{
+  nsSynthVoiceRegistry::RecvIsSpeakingChanged(aIsSpeaking);
+  return true;
+}
+
 PSpeechSynthesisRequestChild*
 SpeechSynthesisChild::AllocPSpeechSynthesisRequestChild(const nsString& aText,
                                                         const nsString& aLang,
diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
index 9cdfafb245db..99b4284599bb 100644
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
@@ -28,6 +28,8 @@ public:
 
   bool RecvSetDefaultVoice(const nsString& aUri, const bool& aIsDefault) override;
 
+  bool RecvIsSpeakingChanged(const bool& aIsSpeaking) override;
+
 protected:
   SpeechSynthesisChild();
   virtual ~SpeechSynthesisChild();
diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
index 1e62441a698d..24a7c0958aea 100644
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
@@ -25,10 +25,20 @@ SpeechSynthesisParent::ActorDestroy(ActorDestroyReason aWhy)
 }
 
 bool
-SpeechSynthesisParent::RecvReadVoiceList(InfallibleTArray<RemoteVoice>* aVoices,
-                                         InfallibleTArray<nsString>* aDefaults)
+SpeechSynthesisParent::RecvReadVoicesAndState(InfallibleTArray<RemoteVoice>* aVoices,
+                                              InfallibleTArray<nsString>* aDefaults,
+                                              bool* aIsSpeaking)
 {
-  nsSynthVoiceRegistry::GetInstance()->SendVoices(aVoices, aDefaults);
+  nsSynthVoiceRegistry::GetInstance()->SendVoicesAndState(aVoices, aDefaults,
+                                                          aIsSpeaking);
+  return true;
+}
+
+bool
+SpeechSynthesisParent::RecvDropGlobalQueue()
+{
+  nsSynthVoiceRegistry::GetInstance()->DropGlobalQueue();
+
   return true;
 }
 
diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
index 3044b19acf28..137119d7bf6b 100644
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
@@ -24,8 +24,11 @@ class SpeechSynthesisParent : public PSpeechSynthesisParent
 public:
   virtual void ActorDestroy(ActorDestroyReason aWhy) override;
 
-  bool RecvReadVoiceList(InfallibleTArray<RemoteVoice>* aVoices,
-                         InfallibleTArray<nsString>* aDefaults) override;
+  bool RecvReadVoicesAndState(InfallibleTArray<RemoteVoice>* aVoices,
+                              InfallibleTArray<nsString>* aDefaults,
+                              bool* aIsSpeaking) override;
+
+  bool RecvDropGlobalQueue() override;
 
 protected:
   SpeechSynthesisParent();
diff --git a/dom/media/webspeech/synth/nsSpeechTask.cpp b/dom/media/webspeech/synth/nsSpeechTask.cpp
index ff1089816df3..bfa3011d00a4 100644
--- a/dom/media/webspeech/synth/nsSpeechTask.cpp
+++ b/dom/media/webspeech/synth/nsSpeechTask.cpp
@@ -7,6 +7,7 @@
 #include "AudioSegment.h"
 #include "nsSpeechTask.h"
 #include "SpeechSynthesis.h"
+#include "nsSynthVoiceRegistry.h"
 
 // GetCurrentTime is defined in winbase.h as zero argument macro forwarding to
 // GetTickCount() and conflicts with nsSpeechTask::GetCurrentTime().
@@ -33,15 +34,15 @@ public:
   void DoNotifyStarted()
   {
     if (mSpeechTask) {
-      mSpeechTask->DispatchStartImpl();
+      mSpeechTask->DispatchStartInner();
     }
   }
 
   void DoNotifyFinished()
   {
     if (mSpeechTask) {
-      mSpeechTask->DispatchEndImpl(mSpeechTask->GetCurrentTime(),
-                                   mSpeechTask->GetCurrentCharOffset());
+      mSpeechTask->DispatchEndInner(mSpeechTask->GetCurrentTime(),
+                                    mSpeechTask->GetCurrentCharOffset());
     }
   }
 
@@ -96,6 +97,9 @@ NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
 
 nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance)
   : mUtterance(aUtterance)
+  , mInited(false)
+  , mPrePaused(false)
+  , mPreCanceled(false)
   , mCallback(nullptr)
   , mIndirectAudio(false)
 {
@@ -107,6 +111,9 @@ nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText)
   : mUtterance(nullptr)
   , mVolume(aVolume)
   , mText(aText)
+  , mInited(false)
+  , mPrePaused(false)
+  , mPreCanceled(false)
   , mCallback(nullptr)
   , mIndirectAudio(false)
 {
@@ -130,10 +137,16 @@ nsSpeechTask::~nsSpeechTask()
 }
 
 void
-nsSpeechTask::BindStream(ProcessedMediaStream* aStream)
+nsSpeechTask::Init(ProcessedMediaStream* aStream)
 {
-  mStream = MediaStreamGraph::GetInstance()->CreateSourceStream(nullptr);
-  mPort = aStream->AllocateInputPort(mStream, 0);
+  if (aStream) {
+    mStream = MediaStreamGraph::GetInstance()->CreateSourceStream(nullptr);
+    mPort = aStream->AllocateInputPort(mStream, 0);
+    mIndirectAudio = false;
+  } else {
+    mIndirectAudio = true;
+  }
+  mInited = true;
 }
 
 void
@@ -153,13 +166,14 @@ nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback,
   mCallback = aCallback;
 
   if (mIndirectAudio) {
+    MOZ_ASSERT(!mStream);
     if (argc > 0) {
       NS_WARNING("Audio info arguments in Setup() are ignored for indirect audio services.");
     }
     return NS_OK;
   }
 
-  // mStream is set up in BindStream() that should be called before this.
+  // mStream is set up in Init() that should be called before this.
   MOZ_ASSERT(mStream);
 
   mStream->AddListener(new SynthStreamListener(this));
@@ -294,6 +308,13 @@ nsSpeechTask::DispatchStart()
     return NS_ERROR_FAILURE;
   }
 
+  return DispatchStartInner();
+}
+
+nsresult
+nsSpeechTask::DispatchStartInner()
+{
+  nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true);
   return DispatchStartImpl();
 }
 
@@ -329,6 +350,16 @@ nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex)
     return NS_ERROR_FAILURE;
   }
 
+  return DispatchEndInner(aElapsedTime, aCharIndex);
+}
+
+nsresult
+nsSpeechTask::DispatchEndInner(float aElapsedTime, uint32_t aCharIndex)
+{
+  if (!mPreCanceled) {
+    nsSynthVoiceRegistry::GetInstance()->SpeakNext();
+  }
+
   return DispatchEndImpl(aElapsedTime, aCharIndex);
 }
 
@@ -389,9 +420,11 @@ nsSpeechTask::DispatchPauseImpl(float aElapsedTime, uint32_t aCharIndex)
   }
 
   mUtterance->mPaused = true;
-  mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("pause"),
-                                           aCharIndex, aElapsedTime,
-                                           EmptyString());
+  if (mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING) {
+    mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("pause"),
+                                             aCharIndex, aElapsedTime,
+                                             EmptyString());
+  }
   return NS_OK;
 }
 
@@ -419,9 +452,12 @@ nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex)
   }
 
   mUtterance->mPaused = false;
-  mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("resume"),
-                                           aCharIndex, aElapsedTime,
-                                           EmptyString());
+  if (mUtterance->mState == SpeechSynthesisUtterance::STATE_SPEAKING) {
+    mUtterance->DispatchSpeechSynthesisEvent(NS_LITERAL_STRING("resume"),
+                                             aCharIndex, aElapsedTime,
+                                             EmptyString());
+  }
+
   return NS_OK;
 }
 
@@ -517,6 +553,13 @@ nsSpeechTask::Pause()
 
   if (mStream) {
     mStream->ChangeExplicitBlockerCount(1);
+  }
+
+  if (!mInited) {
+    mPrePaused = true;
+  }
+
+  if (!mIndirectAudio) {
     DispatchPauseImpl(GetCurrentTime(), GetCurrentCharOffset());
   }
 }
@@ -533,6 +576,14 @@ nsSpeechTask::Resume()
 
   if (mStream) {
     mStream->ChangeExplicitBlockerCount(-1);
+  }
+
+  if (mPrePaused) {
+    mPrePaused = false;
+    nsSynthVoiceRegistry::GetInstance()->ResumeQueue();
+  }
+
+  if (!mIndirectAudio) {
     DispatchResumeImpl(GetCurrentTime(), GetCurrentCharOffset());
   }
 }
@@ -551,7 +602,14 @@ nsSpeechTask::Cancel()
 
   if (mStream) {
     mStream->ChangeExplicitBlockerCount(1);
-    DispatchEndImpl(GetCurrentTime(), GetCurrentCharOffset());
+  }
+
+  if (!mInited) {
+    mPreCanceled = true;
+  }
+
+  if (!mIndirectAudio) {
+    DispatchEndInner(GetCurrentTime(), GetCurrentCharOffset());
   }
 }
 
diff --git a/dom/media/webspeech/synth/nsSpeechTask.h b/dom/media/webspeech/synth/nsSpeechTask.h
index 3d2956f5c548..63a1e1aa9b5b 100644
--- a/dom/media/webspeech/synth/nsSpeechTask.h
+++ b/dom/media/webspeech/synth/nsSpeechTask.h
@@ -43,12 +43,20 @@ public:
 
   void SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis);
 
-  void SetIndirectAudio(bool aIndirectAudio) { mIndirectAudio = aIndirectAudio; }
-
-  void BindStream(ProcessedMediaStream* aStream);
+  void Init(ProcessedMediaStream* aStream);
 
   void SetChosenVoiceURI(const nsAString& aUri);
 
+  bool IsPreCanceled()
+  {
+    return mPreCanceled;
+  };
+
+  bool IsPrePaused()
+  {
+    return mPrePaused;
+  }
+
 protected:
   virtual ~nsSpeechTask();
 
@@ -77,11 +85,21 @@ protected:
 
   nsString mText;
 
+  bool mInited;
+
+  bool mPrePaused;
+
+  bool mPreCanceled;
+
 private:
   void End();
 
   void SendAudioImpl(nsRefPtr<mozilla::SharedBuffer>& aSamples, uint32_t aDataLen);
 
+  nsresult DispatchStartInner();
+
+  nsresult DispatchEndInner(float aElapsedTime, uint32_t aCharIndex);
+
   nsRefPtr<SourceMediaStream> mStream;
 
   nsRefPtr<MediaInputPort> mPort;
diff --git a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
index 7928e6f1470d..176f4aa0dd6c 100644
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
@@ -97,6 +97,41 @@ public:
   bool mIsQueued;
 };
 
+// GlobalQueueItem
+
+class GlobalQueueItem final
+{
+private:
+  // Private destructor, to discourage deletion outside of Release():
+  ~GlobalQueueItem() {}
+
+public:
+  GlobalQueueItem(VoiceData* aVoice, nsSpeechTask* aTask, const nsAString& aText,
+                  const float& aVolume, const float& aRate, const float& aPitch)
+    : mVoice(aVoice)
+    , mTask(aTask)
+    , mText(aText)
+    , mVolume(aVolume)
+    , mRate(aRate)
+    , mPitch(aPitch) {}
+
+  NS_INLINE_DECL_REFCOUNTING(GlobalQueueItem)
+
+  nsRefPtr<VoiceData> mVoice;
+
+  nsRefPtr<nsSpeechTask> mTask;
+
+  nsString mText;
+
+  float mVolume;
+
+  float mRate;
+
+  float mPitch;
+
+  bool mIsLocal;
+};
+
 // nsSynthVoiceRegistry
 
 static StaticRefPtr<nsSynthVoiceRegistry> gSynthVoiceRegistry;
@@ -107,6 +142,7 @@ NS_IMPL_ISUPPORTS(nsSynthVoiceRegistry, nsISynthVoiceRegistry)
 nsSynthVoiceRegistry::nsSynthVoiceRegistry()
   : mSpeechSynthChild(nullptr)
   , mUseGlobalQueue(false)
+  , mIsSpeaking(false)
 {
   if (XRE_IsContentProcess()) {
 
@@ -115,8 +151,9 @@ nsSynthVoiceRegistry::nsSynthVoiceRegistry()
 
     InfallibleTArray<RemoteVoice> voices;
     InfallibleTArray<nsString> defaults;
+    bool isSpeaking;
 
-    mSpeechSynthChild->SendReadVoiceList(&voices, &defaults);
+    mSpeechSynthChild->SendReadVoicesAndState(&voices, &defaults, &isSpeaking);
 
     for (uint32_t i = 0; i < voices.Length(); ++i) {
       RemoteVoice voice = voices[i];
@@ -128,6 +165,8 @@ nsSynthVoiceRegistry::nsSynthVoiceRegistry()
     for (uint32_t i = 0; i < defaults.Length(); ++i) {
       SetDefaultVoice(defaults[i], true);
     }
+
+    mIsSpeaking = isSpeaking;
   }
 }
 
@@ -140,10 +179,10 @@ nsSynthVoiceRegistry::~nsSynthVoiceRegistry()
 
   if (mStream) {
     if (!mStream->IsDestroyed()) {
-     mStream->Destroy();
-   }
+      mStream->Destroy();
+    }
 
-   mStream = nullptr;
+    mStream = nullptr;
   }
 
   mUriVoiceMap.Clear();
@@ -175,13 +214,14 @@ void
 nsSynthVoiceRegistry::Shutdown()
 {
   LOG(LogLevel::Debug, ("[%s] nsSynthVoiceRegistry::Shutdown()",
-                     (XRE_IsContentProcess()) ? "Content" : "Default"));
+                        (XRE_IsContentProcess()) ? "Content" : "Default"));
   gSynthVoiceRegistry = nullptr;
 }
 
 void
-nsSynthVoiceRegistry::SendVoices(InfallibleTArray<RemoteVoice>* aVoices,
-                                 InfallibleTArray<nsString>* aDefaults)
+nsSynthVoiceRegistry::SendVoicesAndState(InfallibleTArray<RemoteVoice>* aVoices,
+                                         InfallibleTArray<nsString>* aDefaults,
+                                         bool* aIsSpeaking)
 {
   for (uint32_t i=0; i < mVoices.Length(); ++i) {
     nsRefPtr<VoiceData> voice = mVoices[i];
@@ -193,6 +233,8 @@ nsSynthVoiceRegistry::SendVoices(InfallibleTArray<RemoteVoice>* aVoices,
   for (uint32_t i=0; i < mDefaultVoices.Length(); ++i) {
     aDefaults->AppendElement(mDefaultVoices[i]->mUri);
   }
+
+  *aIsSpeaking = IsSpeaking();
 }
 
 void
@@ -233,6 +275,18 @@ nsSynthVoiceRegistry::RecvSetDefaultVoice(const nsAString& aUri, bool aIsDefault
   gSynthVoiceRegistry->SetDefaultVoice(aUri, aIsDefault);
 }
 
+void
+nsSynthVoiceRegistry::RecvIsSpeakingChanged(bool aIsSpeaking)
+{
+  // If we dont have a local instance of the registry yet, we will get the
+  // speaking state on construction.
+  if(!gSynthVoiceRegistry) {
+    return;
+  }
+
+  gSynthVoiceRegistry->mIsSpeaking = aIsSpeaking;
+}
+
 NS_IMETHODIMP
 nsSynthVoiceRegistry::AddVoice(nsISpeechService* aService,
                                const nsAString& aUri,
@@ -603,10 +657,7 @@ nsSynthVoiceRegistry::Speak(const nsAString& aText,
                             const float& aPitch,
                             nsSpeechTask* aTask)
 {
-  LOG(LogLevel::Debug,
-      ("nsSynthVoiceRegistry::Speak text='%s' lang='%s' uri='%s' rate=%f pitch=%f",
-       NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aLang).get(),
-       NS_ConvertUTF16toUTF8(aUri).get(), aRate, aPitch));
+  MOZ_ASSERT(XRE_IsParentProcess());
 
   VoiceData* voice = FindBestMatch(aUri, aLang);
 
@@ -618,24 +669,132 @@ nsSynthVoiceRegistry::Speak(const nsAString& aText,
 
   aTask->SetChosenVoiceURI(voice->mUri);
 
-  LOG(LogLevel::Debug, ("nsSynthVoiceRegistry::Speak - Using voice URI: %s",
-                     NS_ConvertUTF16toUTF8(voice->mUri).get()));
+  if (mUseGlobalQueue || sForceGlobalQueue) {
+    LOG(LogLevel::Debug,
+        ("nsSynthVoiceRegistry::Speak queueing text='%s' lang='%s' uri='%s' rate=%f pitch=%f",
+         NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aLang).get(),
+         NS_ConvertUTF16toUTF8(aUri).get(), aRate, aPitch));
+    nsRefPtr<GlobalQueueItem> item = new GlobalQueueItem(voice, aTask, aText,
+                                                         aVolume, aRate, aPitch);
+    mGlobalQueue.AppendElement(item);
+
+    if (mGlobalQueue.Length() == 1) {
+      SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume, item->mRate,
+                item->mPitch);
+    }
+  } else {
+    SpeakImpl(voice, aTask, aText, aVolume, aRate, aPitch);
+  }
+}
+
+void
+nsSynthVoiceRegistry::SpeakNext()
+{
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  LOG(LogLevel::Debug,
+      ("nsSynthVoiceRegistry::SpeakNext %d", mGlobalQueue.IsEmpty()));
+
+  SetIsSpeaking(false);
+
+  if (mGlobalQueue.IsEmpty()) {
+    return;
+  }
+
+  mGlobalQueue.RemoveElementAt(0);
+
+  while (!mGlobalQueue.IsEmpty()) {
+    nsRefPtr<GlobalQueueItem> item = mGlobalQueue.ElementAt(0);
+    if (item->mTask->IsPreCanceled()) {
+      mGlobalQueue.RemoveElementAt(0);
+      continue;
+    }
+    if (!item->mTask->IsPrePaused()) {
+      SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume,
+                item->mRate, item->mPitch);
+    }
+    break;
+  }
+}
+
+void
+nsSynthVoiceRegistry::ResumeQueue()
+{
+  MOZ_ASSERT(XRE_IsParentProcess());
+  LOG(LogLevel::Debug,
+      ("nsSynthVoiceRegistry::ResumeQueue %d", mGlobalQueue.IsEmpty()));
+
+  if (mGlobalQueue.IsEmpty()) {
+    return;
+  }
+
+  nsRefPtr<GlobalQueueItem> item = mGlobalQueue.ElementAt(0);
+  if (!item->mTask->IsPrePaused()) {
+    SpeakImpl(item->mVoice, item->mTask, item->mText, item->mVolume,
+              item->mRate, item->mPitch);
+  }
+}
+
+bool
+nsSynthVoiceRegistry::IsSpeaking()
+{
+  return mIsSpeaking;
+}
+
+void
+nsSynthVoiceRegistry::SetIsSpeaking(bool aIsSpeaking)
+{
+  MOZ_ASSERT(XRE_IsParentProcess());
+
+  // Only set to 'true' if global queue is enabled.
+  mIsSpeaking = aIsSpeaking && (mUseGlobalQueue || sForceGlobalQueue);
+
+  nsTArray<SpeechSynthesisParent*> ssplist;
+  GetAllSpeechSynthActors(ssplist);
+  for (uint32_t i = 0; i < ssplist.Length(); ++i) {
+    unused << ssplist[i]->SendIsSpeakingChanged(aIsSpeaking);
+  }
+}
+
+void
+nsSynthVoiceRegistry::DropGlobalQueue()
+{
+  if (XRE_IsParentProcess()) {
+    mGlobalQueue.Clear();
+    SetIsSpeaking(false);
+  } else {
+    mSpeechSynthChild->SendDropGlobalQueue();
+  }
+}
+
+void
+nsSynthVoiceRegistry::SpeakImpl(VoiceData* aVoice,
+                                nsSpeechTask* aTask,
+                                const nsAString& aText,
+                                const float& aVolume,
+                                const float& aRate,
+                                const float& aPitch)
+{
+  LOG(LogLevel::Debug,
+      ("nsSynthVoiceRegistry::SpeakImpl queueing text='%s' uri='%s' rate=%f pitch=%f",
+       NS_ConvertUTF16toUTF8(aText).get(), NS_ConvertUTF16toUTF8(aVoice->mUri).get(),
+       aRate, aPitch));
 
   SpeechServiceType serviceType;
 
-  DebugOnly<nsresult> rv = voice->mService->GetServiceType(&serviceType);
+  DebugOnly<nsresult> rv = aVoice->mService->GetServiceType(&serviceType);
   NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "Failed to get speech service type");
 
   if (serviceType == nsISpeechService::SERVICETYPE_INDIRECT_AUDIO) {
-    aTask->SetIndirectAudio(true);
+    aTask->Init(nullptr);
   } else {
     if (!mStream) {
       mStream = MediaStreamGraph::GetInstance()->CreateTrackUnionStream(nullptr);
     }
-    aTask->BindStream(mStream);
+    aTask->Init(mStream);
   }
 
-  voice->mService->Speak(aText, voice->mUri, aVolume, aRate, aPitch, aTask);
+  aVoice->mService->Speak(aText, aVoice->mUri, aVolume, aRate, aPitch, aTask);
 }
 
 } // namespace dom
diff --git a/dom/media/webspeech/synth/nsSynthVoiceRegistry.h b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h
index 98967e2362e9..94569bee183c 100644
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.h
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.h
@@ -23,6 +23,7 @@ class SpeechSynthesisUtterance;
 class SpeechSynthesisChild;
 class nsSpeechTask;
 class VoiceData;
+class GlobalQueueItem;
 
 class nsSynthVoiceRegistry final : public nsISynthVoiceRegistry
 {
@@ -39,8 +40,19 @@ public:
              const nsAString& aUri, const float& aVolume,  const float& aRate,
              const float& aPitch, nsSpeechTask* aTask);
 
-  void SendVoices(InfallibleTArray<RemoteVoice>* aVoices,
-                  InfallibleTArray<nsString>* aDefaults);
+  void SendVoicesAndState(InfallibleTArray<RemoteVoice>* aVoices,
+                          InfallibleTArray<nsString>* aDefaults,
+                          bool* aIsSpeaking);
+
+  void SpeakNext();
+
+  void ResumeQueue();
+
+  bool IsSpeaking();
+
+  void SetIsSpeaking(bool aIsSpeaking);
+
+  void DropGlobalQueue();
 
   static nsSynthVoiceRegistry* GetInstance();
 
@@ -52,6 +64,8 @@ public:
 
   static void RecvSetDefaultVoice(const nsAString& aUri, bool aIsDefault);
 
+  static void RecvIsSpeakingChanged(bool aIsSpeaking);
+
   static void Shutdown();
 
 private:
@@ -68,9 +82,16 @@ private:
                         bool aLocalService,
                         bool aQueuesUtterances);
 
-  nsTArray<nsRefPtr<VoiceData> > mVoices;
+  void SpeakImpl(VoiceData* aVoice,
+                 nsSpeechTask* aTask,
+                 const nsAString& aText,
+                 const float& aVolume,
+                 const float& aRate,
+                 const float& aPitch);
 
-  nsTArray<nsRefPtr<VoiceData> > mDefaultVoices;
+  nsTArray<nsRefPtr<VoiceData>> mVoices;
+
+  nsTArray<nsRefPtr<VoiceData>> mDefaultVoices;
 
   nsRefPtrHashtable<nsStringHashKey, VoiceData> mUriVoiceMap;
 
@@ -79,6 +100,10 @@ private:
   nsRefPtr<ProcessedMediaStream> mStream;
 
   bool mUseGlobalQueue;
+
+  nsTArray<nsRefPtr<GlobalQueueItem>> mGlobalQueue;
+
+  bool mIsSpeaking;
 };
 
 } // namespace dom
diff --git a/dom/webidl/SpeechSynthesis.webidl b/dom/webidl/SpeechSynthesis.webidl
index b48cf3d2f45a..f7a0275170ba 100644
--- a/dom/webidl/SpeechSynthesis.webidl
+++ b/dom/webidl/SpeechSynthesis.webidl
@@ -23,4 +23,7 @@ interface SpeechSynthesis {
   [UnsafeInPrerendering]
   void resume();
   sequence<SpeechSynthesisVoice> getVoices();
+
+  [ChromeOnly]
+  void dropGlobalQueue();
 };