From df4c4baa41bbbcdc3e31c5557a87168606c74093 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 20 Jul 2017 16:07:32 +0800 Subject: [PATCH] Bug 1333641 - Part 1: Making the speechSynthesis API unfingerprintable when 'privacy.resistFingerprinting' is true. r=arthuredelstein,smaug The patch will change the behavior of speechSynthesis API when fingerprinting resistance is enabled. First, the speechSynthesis.getVoices() will always report an empty list and the speechSynthesis.onvoiceschanged event will be blocked. And it will immediately fail when using the speechSynthesis.speak() to speak an utterance. By doing so, websites can no longer fingerprint users through this speechSynthesis API. In addition, this only affect contents, so the chrome can still use this API even the fingerprinting resistance is enabled. MozReview-Commit-ID: KxJX8fo30WS --HG-- extra : rebase_source : 12c14f3ae2d23dacb07796156b4f0fd233bff0f9 --- dom/media/webspeech/synth/SpeechSynthesis.cpp | 22 +++++++++++++++---- .../webspeech/synth/ipc/PSpeechSynthesis.ipdl | 2 +- .../synth/ipc/SpeechSynthesisChild.cpp | 7 +++--- .../synth/ipc/SpeechSynthesisChild.h | 5 +++-- .../synth/ipc/SpeechSynthesisParent.cpp | 8 ++++--- .../synth/ipc/SpeechSynthesisParent.h | 10 +++++---- dom/media/webspeech/synth/nsSpeechTask.cpp | 18 +++++++++++++-- dom/media/webspeech/synth/nsSpeechTask.h | 14 ++++++++++-- .../webspeech/synth/nsSynthVoiceRegistry.cpp | 19 ++++++++++++---- 9 files changed, 80 insertions(+), 25 deletions(-) diff --git a/dom/media/webspeech/synth/SpeechSynthesis.cpp b/dom/media/webspeech/synth/SpeechSynthesis.cpp index cf763dd39f22..af8f6b8d2762 100644 --- a/dom/media/webspeech/synth/SpeechSynthesis.cpp +++ b/dom/media/webspeech/synth/SpeechSynthesis.cpp @@ -14,8 +14,10 @@ #include "mozilla/dom/SpeechSynthesisBinding.h" #include "SpeechSynthesis.h" +#include "nsContentUtils.h" #include "nsSynthVoiceRegistry.h" #include "nsIDocument.h" +#include "nsIDocShell.h" #undef LOG mozilla::LogModule* @@ -249,6 +251,13 @@ SpeechSynthesis::GetVoices(nsTArray< RefPtr >& aResult) { aResult.Clear(); uint32_t voiceCount = 0; + nsCOMPtr window = GetOwner(); + nsCOMPtr docShell = window ? window->GetDocShell() : nullptr; + + + if (nsContentUtils::ShouldResistFingerprinting(docShell)) { + return; + } nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount); if(NS_WARN_IF(NS_FAILED(rv))) { @@ -318,10 +327,15 @@ SpeechSynthesis::Observe(nsISupports* aSubject, const char* aTopic, } } else if (strcmp(aTopic, "synth-voices-changed") == 0) { LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged")); - DispatchTrustedEvent(NS_LITERAL_STRING("voiceschanged")); - // If we have a pending item, and voices become available, speak it. - if (!mCurrentTask && !mHoldQueue && HasVoices()) { - AdvanceQueue(); + nsCOMPtr window = GetOwner(); + nsCOMPtr docShell = window ? window->GetDocShell() : nullptr; + + if (!nsContentUtils::ShouldResistFingerprinting(docShell)) { + DispatchTrustedEvent(NS_LITERAL_STRING("voiceschanged")); + // If we have a pending item, and voices become available, speak it. + if (!mCurrentTask && !mHoldQueue && HasVoices()) { + AdvanceQueue(); + } } } diff --git a/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl index 5a1737e358a4..fa9db91db981 100644 --- a/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl +++ b/dom/media/webspeech/synth/ipc/PSpeechSynthesis.ipdl @@ -42,7 +42,7 @@ parent: async __delete__(); async PSpeechSynthesisRequest(nsString aText, nsString aUri, nsString aLang, - float aVolume, float aRate, float aPitch); + float aVolume, float aRate, float aPitch, bool aIsChrome); }; } // namespace dom diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp index b170fc7457e3..00577411cea5 100644 --- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.cpp @@ -69,7 +69,8 @@ SpeechSynthesisChild::AllocPSpeechSynthesisRequestChild(const nsString& aText, const nsString& aUri, const float& aVolume, const float& aRate, - const float& aPitch) + const float& aPitch, + const bool& aIsChrome) { MOZ_CRASH("Caller is supposed to manually construct a request!"); } @@ -159,8 +160,8 @@ SpeechSynthesisRequestChild::RecvOnMark(const nsString& aName, // SpeechTaskChild -SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance) - : nsSpeechTask(aUtterance) +SpeechTaskChild::SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome) + : nsSpeechTask(aUtterance, aIsChrome) { } diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h index f830b23216be..1d6f717e9181 100644 --- a/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisChild.h @@ -45,7 +45,8 @@ protected: const nsString& aText, const float& aVolume, const float& aPitch, - const float& aRate) override; + const float& aRate, + const bool& aIsChrome) override; bool DeallocPSpeechSynthesisRequestChild(PSpeechSynthesisRequestChild* aActor) override; }; @@ -82,7 +83,7 @@ class SpeechTaskChild : public nsSpeechTask friend class SpeechSynthesisRequestChild; public: - explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance); + explicit SpeechTaskChild(SpeechSynthesisUtterance* aUtterance, bool aIsChrome); NS_IMETHOD Setup(nsISpeechTaskCallback* aCallback, uint32_t aChannels, uint32_t aRate, uint8_t argc) override; diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp index 9dab9c013b70..605590719a68 100644 --- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.cpp @@ -36,9 +36,10 @@ SpeechSynthesisParent::AllocPSpeechSynthesisRequestParent(const nsString& aText, const nsString& aUri, const float& aVolume, const float& aRate, - const float& aPitch) + const float& aPitch, + const bool& aIsChrome) { - RefPtr task = new SpeechTaskParent(aVolume, aText); + RefPtr task = new SpeechTaskParent(aVolume, aText, aIsChrome); SpeechSynthesisRequestParent* actor = new SpeechSynthesisRequestParent(task); return actor; } @@ -57,7 +58,8 @@ SpeechSynthesisParent::RecvPSpeechSynthesisRequestConstructor(PSpeechSynthesisRe const nsString& aUri, const float& aVolume, const float& aRate, - const float& aPitch) + const float& aPitch, + const bool& aIsChrome) { MOZ_ASSERT(aActor); SpeechSynthesisRequestParent* actor = diff --git a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h index 55854b1716d0..6bc9b3bd97ef 100644 --- a/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h +++ b/dom/media/webspeech/synth/ipc/SpeechSynthesisParent.h @@ -34,7 +34,8 @@ protected: const nsString& aUri, const float& aVolume, const float& aRate, - const float& aPitch) + const float& aPitch, + const bool& aIsChrome) override; bool DeallocPSpeechSynthesisRequestParent(PSpeechSynthesisRequestParent* aActor) override; @@ -45,7 +46,8 @@ protected: const nsString& aUri, const float& aVolume, const float& aRate, - const float& aPitch) override; + const float& aPitch, + const bool& aIsChrome) override; }; class SpeechSynthesisRequestParent : public PSpeechSynthesisRequestParent @@ -77,8 +79,8 @@ class SpeechTaskParent : public nsSpeechTask { friend class SpeechSynthesisRequestParent; public: - SpeechTaskParent(float aVolume, const nsAString& aUtterance) - : nsSpeechTask(aVolume, aUtterance) {} + SpeechTaskParent(float aVolume, const nsAString& aUtterance, bool aIsChrome) + : nsSpeechTask(aVolume, aUtterance, aIsChrome) {} nsresult DispatchStartImpl(const nsAString& aUri); diff --git a/dom/media/webspeech/synth/nsSpeechTask.cpp b/dom/media/webspeech/synth/nsSpeechTask.cpp index d9056df9071c..24dfff3ebcb9 100644 --- a/dom/media/webspeech/synth/nsSpeechTask.cpp +++ b/dom/media/webspeech/synth/nsSpeechTask.cpp @@ -120,19 +120,20 @@ NS_INTERFACE_MAP_END NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask) NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask) -nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance) +nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome) : mUtterance(aUtterance) , mInited(false) , mPrePaused(false) , mPreCanceled(false) , mCallback(nullptr) , mIndirectAudio(false) + , mIsChrome(aIsChrome) { mText = aUtterance->mText; mVolume = aUtterance->Volume(); } -nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText) +nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome) : mUtterance(nullptr) , mVolume(aVolume) , mText(aText) @@ -141,6 +142,7 @@ nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText) , mPreCanceled(false) , mCallback(nullptr) , mIndirectAudio(false) + , mIsChrome(aIsChrome) { } @@ -513,6 +515,12 @@ nsSpeechTask::DispatchResumeImpl(float aElapsedTime, uint32_t aCharIndex) return NS_OK; } +void +nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex) +{ + DispatchErrorInner(aElapsedTime, aCharIndex); +} + NS_IMETHODIMP nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) { @@ -523,6 +531,12 @@ nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) return NS_ERROR_FAILURE; } + return DispatchErrorInner(aElapsedTime, aCharIndex); +} + +nsresult +nsSpeechTask::DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex) +{ if (!mPreCanceled) { nsSynthVoiceRegistry::GetInstance()->SpeakNext(); } diff --git a/dom/media/webspeech/synth/nsSpeechTask.h b/dom/media/webspeech/synth/nsSpeechTask.h index f627f27877e6..d43ec10c4319 100644 --- a/dom/media/webspeech/synth/nsSpeechTask.h +++ b/dom/media/webspeech/synth/nsSpeechTask.h @@ -35,8 +35,8 @@ public: NS_DECL_NSISPEECHTASK NS_DECL_NSIAUDIOCHANNELAGENTCALLBACK - explicit nsSpeechTask(SpeechSynthesisUtterance* aUtterance); - nsSpeechTask(float aVolume, const nsAString& aText); + explicit nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome); + nsSpeechTask(float aVolume, const nsAString& aText, bool aIsChrome); virtual void Pause(); @@ -59,6 +59,8 @@ public: virtual void SetAudioOutputVolume(float aVolume); + void ForceError(float aElapsedTime, uint32_t aCharIndex); + bool IsPreCanceled() { return mPreCanceled; @@ -69,6 +71,11 @@ public: return mPrePaused; } + bool IsChrome() + { + return mIsChrome; + } + protected: virtual ~nsSpeechTask(); @@ -112,6 +119,7 @@ private: nsresult DispatchStartInner(); + nsresult DispatchErrorInner(float aElapsedTime, uint32_t aCharIndex); nsresult DispatchEndInner(float aElapsedTime, uint32_t aCharIndex); void CreateAudioChannelAgent(); @@ -133,6 +141,8 @@ private: bool mIndirectAudio; nsString mChosenVoiceURI; + + bool mIsChrome; }; } // namespace dom diff --git a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp index 9c728257806e..a0d4c40c6206 100644 --- a/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp +++ b/dom/media/webspeech/synth/nsSynthVoiceRegistry.cpp @@ -679,9 +679,14 @@ nsSynthVoiceRegistry::SpeakUtterance(SpeechSynthesisUtterance& aUtterance, } } + nsCOMPtr window = aUtterance.GetOwner(); + nsCOMPtr doc = window ? window->GetDoc() : nullptr; + + bool isChrome = nsContentUtils::IsChromeDoc(doc); + RefPtr task; if (XRE_IsContentProcess()) { - task = new SpeechTaskChild(&aUtterance); + task = new SpeechTaskChild(&aUtterance, isChrome); SpeechSynthesisRequestChild* actor = new SpeechSynthesisRequestChild(static_cast(task.get())); mSpeechSynthChild->SendPSpeechSynthesisRequestConstructor(actor, @@ -690,9 +695,10 @@ nsSynthVoiceRegistry::SpeakUtterance(SpeechSynthesisUtterance& aUtterance, uri, volume, aUtterance.Rate(), - aUtterance.Pitch()); + aUtterance.Pitch(), + isChrome); } else { - task = new nsSpeechTask(&aUtterance); + task = new nsSpeechTask(&aUtterance, isChrome); Speak(aUtterance.mText, lang, uri, volume, aUtterance.Rate(), aUtterance.Pitch(), task); } @@ -711,11 +717,16 @@ nsSynthVoiceRegistry::Speak(const nsAString& aText, { MOZ_ASSERT(XRE_IsParentProcess()); + if (!aTask->IsChrome() && nsContentUtils::ShouldResistFingerprinting()) { + aTask->ForceError(0, 0); + return; + } + VoiceData* voice = FindBestMatch(aUri, aLang); if (!voice) { NS_WARNING("No voices found."); - aTask->DispatchError(0, 0); + aTask->ForceError(0, 0); return; }