From df4c4baa41bbbcdc3e31c5557a87168606c74093 Mon Sep 17 00:00:00 2001
From: Tim Huang <tihuang@mozilla.com>
Date: Thu, 20 Jul 2017 16:07:32 +0800
Subject: [PATCH] Bug 1333641 - Part 1: Making the speechSynthesis API
 unfingerprintable when 'privacy.resistFingerprinting' is true.
 r=arthuredelstein,smaug

The patch will change the behavior of speechSynthesis API when fingerprinting
resistance is enabled. First, the speechSynthesis.getVoices() will always report
an empty list and the speechSynthesis.onvoiceschanged event will be blocked.
And it will immediately fail when using the speechSynthesis.speak() to speak an
utterance. By doing so, websites can no longer fingerprint users through this
speechSynthesis API.

In addition, this only affect contents, so the chrome can still use this API even
the fingerprinting resistance is enabled.

MozReview-Commit-ID: KxJX8fo30WS

--HG--
extra : rebase_source : 12c14f3ae2d23dacb07796156b4f0fd233bff0f9
---
 dom/media/webspeech/synth/SpeechSynthesis.cpp | 22 +++++++++++++++----
 .../webspeech/synth/ipc/PSpeechSynthesis.ipdl |  2 +-
 .../synth/ipc/SpeechSynthesisChild.cpp        |  7 +++---
 .../synth/ipc/SpeechSynthesisChild.h          |  5 +++--
 .../synth/ipc/SpeechSynthesisParent.cpp       |  8 ++++---
 .../synth/ipc/SpeechSynthesisParent.h         | 10 +++++----
 dom/media/webspeech/synth/nsSpeechTask.cpp    | 18 +++++++++++++--
 dom/media/webspeech/synth/nsSpeechTask.h      | 14 ++++++++++--
 .../webspeech/synth/nsSynthVoiceRegistry.cpp  | 19 ++++++++++++----
 9 files changed, 80 insertions(+), 25 deletions(-)
diff --git a/dom/media/webspeech/synth/SpeechSynthesis.cpp b/dom/media/webspeech/synth/SpeechSynthesis.cpp
index cf763dd39f22..af8f6b8d2762 100644
--- a/dom/media/webspeech/synth/SpeechSynthesis.cpp
+++ b/dom/media/webspeech/synth/SpeechSynthesis.cpp
@@ -14,8 +14,10 @@
 
 #include "mozilla/dom/SpeechSynthesisBinding.h"
 #include "SpeechSynthesis.h"
+#include "nsContentUtils.h"
 #include "nsSynthVoiceRegistry.h"
 #include "nsIDocument.h"
+#include "nsIDocShell.h"
 
 #undef LOG
 mozilla::LogModule*
@@ -249,6 +251,13 @@ SpeechSynthesis::GetVoices(nsTArray< RefPtr<SpeechSynthesisVoice> >& aResult)
 {
   aResult.Clear();
   uint32_t voiceCount = 0;
+  nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+  nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
+
+
+  if (nsContentUtils::ShouldResistFingerprinting(docShell)) {
+    return;
+  }
 
   nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);
   if(NS_WARN_IF(NS_FAILED(rv))) {
@@ -318,10 +327,15 @@ SpeechSynthesis::Observe(nsISupports* aSubject, const char* aTopic,
     }
   } else if (strcmp(aTopic, "synth-voices-changed") == 0) {
     LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged"));
-    DispatchTrustedEvent(NS_LITERAL_STRING("voiceschanged"));
-    // If we have a pending item, and voices become available, speak it.
-    if (!mCurrentTask && !mHoldQueue && HasVoices()) {
-      AdvanceQueue();
+    nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
+    nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
+
+    if (!nsContentUtils::ShouldResistFingerprinting(docShell)) {
+      DispatchTrustedEvent(NS_LITERAL_STRING("voiceschanged"));
+      // If we have a pending item, and voices become available, speak it.
+      if (!mCurrentTask && !mHoldQueue && HasVoices()) {
+        AdvanceQueue();
+      }
     }
   }
 
diff --git a/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
index 5a1737e358a4..fa9db91db981 100644
--- a/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
+++ b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl
@@ -42,7 +42,7 @@ parent:
     async __delete__();
 
     async PSpeechSynthesisRequest(nsString aText, nsString aUri, nsString aLang,
-                                  float aVolume, float aRate, float aPitch);
+                                  float aVolume, float aRate, float aPitch, bool aIsChrome);
 };
 
 } // namespace dom
diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
index b170fc7457e3..00577411cea5 100644
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp
@@ -69,7 +69,8 @@ SpeechSynthesisChild::AllocPSpeechSynthesisRequestChild(const nsString& aText,
                                                         const nsString& aUri,
                                                         const float& aVolume,
                                                         const float& aRate,
-                                                        const float& aPitch)
+                                                        const float& aPitch,
+                                                        const bool& aIsChrome)
 {
   MOZ_CRASH("Caller is supposed to manually construct a request!");
 }
@@ -159,8 +160,8 @@ SpeechSynthesisRequestChild::RecvOnMark(const nsString& aName,
 
 // SpeechTaskChild
 
-SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance)
-  : nsSpeechTask(aUtterance)
+SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
+  : nsSpeechTask(aUtterance, aIsChrome)
 {
 }
 
diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
index f830b23216be..1d6f717e9181 100644
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h
@@ -45,7 +45,8 @@ protected:
                                                                   const nsString& aText,
                                                                   const float& aVolume,
                                                                   const float& aPitch,
-                                                                  const float& aRate) override;
+                                                                  const float& aRate,
+                                                                  const bool& aIsChrome) override;
   bool DeallocPSpeechSynthesisRequestChild(PSpeechSynthesisRequestChild* aActor) override;
 };
 
@@ -82,7 +83,7 @@ class SpeechTaskChild : public nsSpeechTask
   friend class SpeechSynthesisRequestChild;
 public:
 
-  explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance);
+  explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome);
 
   NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback,
                    uint32_t aChannels, uint32_t aRate, uint8_t argc) override;
diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
index 9dab9c013b70..605590719a68 100644
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp
@@ -36,9 +36,10 @@ SpeechSynthesisParent::AllocPSpeechSynthesisRequestParent(const nsString& aText,
                                                           const nsString& aUri,
                                                           const float& aVolume,
                                                           const float& aRate,
-                                                          const float& aPitch)
+                                                          const float& aPitch,
+                                                          const bool& aIsChrome)
 {
-  RefPtr<SpeechTaskParent> task = new SpeechTaskParent(aVolume, aText);
+  RefPtr<SpeechTaskParent> task = new SpeechTaskParent(aVolume, aText, aIsChrome);
   SpeechSynthesisRequestParent* actor = new SpeechSynthesisRequestParent(task);
   return actor;
 }
@@ -57,7 +58,8 @@ SpeechSynthesisParent::RecvPSpeechSynthesisRequestConstructor(PSpeechSynthesisRe
                                                               const nsString& aUri,
                                                               const float& aVolume,
                                                               const float& aRate,
-                                                              const float& aPitch)
+                                                              const float& aPitch,
+                                                              const bool& aIsChrome)
 {
   MOZ_ASSERT(aActor);
   SpeechSynthesisRequestParent* actor =
diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
index 55854b1716d0..6bc9b3bd97ef 100644
--- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
+++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h
@@ -34,7 +34,8 @@ protected:
                                                                     const nsString& aUri,
                                                                     const float& aVolume,
                                                                     const float& aRate,
-                                                                    const float& aPitch)
+                                                                    const float& aPitch,
+                                                                    const bool& aIsChrome)
                                                                     override;
 
   bool DeallocPSpeechSynthesisRequestParent(PSpeechSynthesisRequestParent* aActor) override;
@@ -45,7 +46,8 @@ protected:
                                                                  const nsString& aUri,
                                                                  const float& aVolume,
                                                                  const float& aRate,
-                                                                 const float& aPitch) override;
+                                                                 const float& aPitch,
+                                                                 const bool& aIsChrome) override;
 };
 
 class SpeechSynthesisRequestParent : public PSpeechSynthesisRequestParent
@@ -77,8 +79,8 @@ class SpeechTaskParent : public nsSpeechTask
 {
   friend class SpeechSynthesisRequestParent;
 public:
-  SpeechTaskParent(float aVolume, const nsAString& aUtterance)
-    : nsSpeechTask(aVolume, aUtterance) {}
+  SpeechTaskParent(float aVolume, const nsAString& aUtterance, bool aIsChrome)
+    : nsSpeechTask(aVolume, aUtterance, aIsChrome) {}
 
   nsresult DispatchStartImpl(const nsAString& aUri);
 
diff --git a/dom/media/webspeech/synth/nsSpeechTask.cpp b/dom/media/webspeech/synth/nsSpeechTask.cpp
index d9056df9071c..24dfff3ebcb9 100644
--- a/dom/media/webspeech/synth/nsSpeechTask.cpp
+++ b/dom/media/webspeech/synth/nsSpeechTask.cpp
@@ -120,19 +120,20 @@ NS_INTERFACE_MAP_END
 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask)
 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
 
-nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance)
+nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
   : mUtterance(aUtterance)
   , mInited(false)
   , mPrePaused(false)
   , mPreCanceled(false)
   , mCallback(nullptr)
   , mIndirectAudio(false)
+  , mIsChrome(aIsChrome)
 {
   mText = aUtterance->mText;
   mVolume = aUtterance->Volume();
 }
 
-nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText)
+nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome)
   : mUtterance(nullptr)
   , mVolume(aVolume)
   , mText(aText)
@@ -141,6 +142,7 @@ nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText)
   , mPreCanceled(false)
   , mCallback(nullptr)
   , mIndirectAudio(false)
+  , mIsChrome(aIsChrome)
 {
 }
 
@@ -513,6 +515,12 @@ nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex)
   return NS_OK;
 }
 
+void
+nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex)
+{
+  DispatchErrorInner(aElapsedTime, aCharIndex);
+}
+
 NS_IMETHODIMP
 nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex)
 {
@@ -523,6 +531,12 @@ nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex)
     return NS_ERROR_FAILURE;
   }
 
+  return DispatchErrorInner(aElapsedTime, aCharIndex);
+}
+
+nsresult
+nsSpeechTask::DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex)
+{
   if (!mPreCanceled) {
     nsSynthVoiceRegistry::GetInstance()->SpeakNext();
   }
diff --git a/dom/media/webspeech/synth/nsSpeechTask.h b/dom/media/webspeech/synth/nsSpeechTask.h
index f627f27877e6..d43ec10c4319 100644
--- a/dom/media/webspeech/synth/nsSpeechTask.h
+++ b/dom/media/webspeech/synth/nsSpeechTask.h
@@ -35,8 +35,8 @@ public:
   NS_DECL_NSISPEECHTASK
   NS_DECL_NSIAUDIOCHANNELAGENTCALLBACK
 
-  explicit nsSpeechTask(SpeechSynthesisUtterance* aUtterance);
-  nsSpeechTask(float aVolume, const nsAString& aText);
+  explicit nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome);
+  nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome);
 
   virtual void Pause();
 
@@ -59,6 +59,8 @@ public:
 
   virtual void SetAudioOutputVolume(float aVolume);
 
+  void ForceError(float aElapsedTime, uint32_t aCharIndex);
+
   bool IsPreCanceled()
   {
     return mPreCanceled;
@@ -69,6 +71,11 @@ public:
     return mPrePaused;
   }
 
+  bool IsChrome()
+  {
+    return mIsChrome;
+  }
+
 protected:
   virtual ~nsSpeechTask();
 
@@ -112,6 +119,7 @@ private:
 
   nsresult DispatchStartInner();
 
+  nsresult DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex);
   nsresult DispatchEndInner(float aElapsedTime, uint32_t aCharIndex);
 
   void CreateAudioChannelAgent();
@@ -133,6 +141,8 @@ private:
   bool mIndirectAudio;
 
   nsString mChosenVoiceURI;
+
+  bool mIsChrome;
 };
 
 } // namespace dom
diff --git a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
index 9c728257806e..a0d4c40c6206 100644
--- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
+++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp
@@ -679,9 +679,14 @@ nsSynthVoiceRegistry::SpeakUtterance(SpeechSynthesisUtterance& aUtterance,
     }
   }
 
+  nsCOMPtr<nsPIDOMWindowInner> window = aUtterance.GetOwner();
+  nsCOMPtr<nsIDocument> doc = window ? window->GetDoc() : nullptr;
+
+  bool isChrome = nsContentUtils::IsChromeDoc(doc);
+
   RefPtr<nsSpeechTask> task;
   if (XRE_IsContentProcess()) {
-    task = new SpeechTaskChild(&aUtterance);
+    task = new SpeechTaskChild(&aUtterance, isChrome);
     SpeechSynthesisRequestChild* actor =
       new SpeechSynthesisRequestChild(static_cast<SpeechTaskChild*>(task.get()));
     mSpeechSynthChild->SendPSpeechSynthesisRequestConstructor(actor,
@@ -690,9 +695,10 @@ nsSynthVoiceRegistry::SpeakUtterance(SpeechSynthesisUtterance& aUtterance,
                                                               uri,
                                                               volume,
                                                               aUtterance.Rate(),
-                                                              aUtterance.Pitch());
+                                                              aUtterance.Pitch(),
+                                                              isChrome);
   } else {
-    task = new nsSpeechTask(&aUtterance);
+    task = new nsSpeechTask(&aUtterance, isChrome);
     Speak(aUtterance.mText, lang, uri,
           volume, aUtterance.Rate(), aUtterance.Pitch(), task);
   }
@@ -711,11 +717,16 @@ nsSynthVoiceRegistry::Speak(const nsAString& aText,
 {
   MOZ_ASSERT(XRE_IsParentProcess());
 
+  if (!aTask->IsChrome() && nsContentUtils::ShouldResistFingerprinting()) {
+    aTask->ForceError(0, 0);
+    return;
+  }
+
   VoiceData* voice = FindBestMatch(aUri, aLang);
 
   if (!voice) {
     NS_WARNING("No voices found.");
-    aTask->DispatchError(0, 0);
+    aTask->ForceError(0, 0);
     return;
   }