Bug 1051148 - Part 1: Patch that introduces WebSpeech API implementation. r=smaug

IGNORE IDL --HG-- extra : histedit_source : d9d8b673d22c90cfc0e9c58e8599a2b34167c913
2015-05-27 21:48:00 -04:00 · 2015-05-27 21:48:00 -04:00 · bf300061b2
--- a/b2g/app/b2g.js
+++ b/b2g/app/b2g.js
@ -1003,6 +1003,10 @@ pref("network.proxy.browsing.app_origins", "app://system.gaiamobile.org");
 // Enable Web Speech synthesis API
 pref("media.webspeech.synth.enabled", true);

+// Enable Web Speech recognition API
+pref("media.webspeech.recognition.enable", true);
+pref("media.webspeech.service.default", "pocketsphinx");
+
 // Downloads API
 pref("dom.mozDownloads.enabled", true);
 pref("dom.downloads.max_retention_days", 7);
--- a/dom/events/test/test_all_synthetic_events.html
+++ b/dom/events/test/test_all_synthetic_events.html
@ -427,6 +427,10 @@ const kEventConstructors = {
                                                         return e;
                                                       },
                                             },
+  SpeechRecognitionError:                    { create: function (aName, aProps) {
+                                                         return new SpeechRecognitionError(aName, aProps);
+                                                       },
+                                             },
  SpeechRecognitionEvent:                    { create: function (aName, aProps) {
                                                         return new SpeechRecognitionEvent(aName, aProps);
                                                       },
--- a/dom/media/webspeech/recognition/PocketSphinxSpeechRecognitionService.cpp
+++ b/dom/media/webspeech/recognition/PocketSphinxSpeechRecognitionService.cpp
@ -0,0 +1,345 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "nsThreadUtils.h"
+#include "nsXPCOMCIDInternal.h"
+#include "PocketSphinxSpeechRecognitionService.h"
+#include "nsIFile.h"
+#include "SpeechGrammar.h"
+#include "SpeechRecognition.h"
+#include "SpeechRecognitionAlternative.h"
+#include "SpeechRecognitionResult.h"
+#include "SpeechRecognitionResultList.h"
+#include "nsIObserverService.h"
+#include "mozilla/Services.h"
+#include "nsDirectoryServiceDefs.h"
+#include "nsDirectoryServiceUtils.h"
+#include "nsMemory.h"
+
+extern "C" {
+#include "pocketsphinx/pocketsphinx.h"
+#include "sphinxbase/sphinx_config.h"
+#include "sphinxbase/jsgf.h"
+}
+
+namespace mozilla {
+
+using namespace dom;
+
+class DecodeResultTask : public nsRunnable
+{
+public:
+  DecodeResultTask(const nsString& hypstring,
+                   WeakPtr<dom::SpeechRecognition> recognition)
+      : mResult(hypstring),
+        mRecognition(recognition),
+        mWorkerThread(do_GetCurrentThread())
+  {
+    MOZ_ASSERT(
+      !NS_IsMainThread()); // This should be running on the worker thread
+  }
+
+  NS_IMETHOD
+  Run()
+  {
+    MOZ_ASSERT(NS_IsMainThread()); // This method is supposed to run on the main
+                                   // thread!
+
+    // Declare javascript result events
+    nsRefPtr<SpeechEvent> event = new SpeechEvent(
+      mRecognition, SpeechRecognition::EVENT_RECOGNITIONSERVICE_FINAL_RESULT);
+    SpeechRecognitionResultList* resultList =
+      new SpeechRecognitionResultList(mRecognition);
+    SpeechRecognitionResult* result = new SpeechRecognitionResult(mRecognition);
+    SpeechRecognitionAlternative* alternative =
+      new SpeechRecognitionAlternative(mRecognition);
+
+    alternative->mTranscript = mResult;
+    alternative->mConfidence = 100;
+
+    result->mItems.AppendElement(alternative);
+    resultList->mItems.AppendElement(result);
+
+    event->mRecognitionResultList = resultList;
+    NS_DispatchToMainThread(event);
+
+    // If we don't destroy the thread when we're done with it, it will hang
+    // around forever... bad!
+    // But thread->Shutdown must be called from the main thread, not from the
+    // thread itself.
+    return mWorkerThread->Shutdown();
+  }
+
+private:
+  nsString mResult;
+  WeakPtr<dom::SpeechRecognition> mRecognition;
+  nsCOMPtr<nsIThread> mWorkerThread;
+};
+
+class DecodeTask : public nsRunnable
+{
+public:
+  DecodeTask(WeakPtr<dom::SpeechRecognition> recogntion,
+             const nsTArray<int16_t>& audiovector, ps_decoder_t* ps)
+      : mRecognition(recogntion), mAudiovector(audiovector), mPs(ps)
+  {
+  }
+
+  NS_IMETHOD
+  Run()
+  {
+    char const* hyp;
+    int rv;
+    int32 score;
+    nsAutoCString hypoValue;
+
+    rv = ps_start_utt(mPs);
+    rv = ps_process_raw(mPs, &mAudiovector[0], mAudiovector.Length(), FALSE,
+                        FALSE);
+
+    rv = ps_end_utt(mPs);
+    if (rv >= 0) {
+      hyp = ps_get_hyp(mPs, &score);
+      if (hyp == nullptr) {
+        hypoValue.Assign("ERROR");
+      } else {
+        hypoValue.Assign(hyp);
+      }
+    }
+
+    nsCOMPtr<nsIRunnable> resultrunnable =
+      new DecodeResultTask(NS_ConvertUTF8toUTF16(hypoValue), mRecognition);
+    return NS_DispatchToMainThread(resultrunnable);
+  }
+
+private:
+  WeakPtr<dom::SpeechRecognition> mRecognition;
+  nsTArray<int16_t> mAudiovector;
+  ps_decoder_t* mPs;
+};
+
+NS_IMPL_ISUPPORTS(PocketSphinxSpeechRecognitionService,
+                  nsISpeechRecognitionService, nsIObserver)
+
+PocketSphinxSpeechRecognitionService::PocketSphinxSpeechRecognitionService()
+{
+  mSpeexState = nullptr;
+
+  // get root folder
+  nsCOMPtr<nsIFile> tmpFile;
+  nsAutoString aStringAMPath;   // am folder
+  nsAutoString aStringDictPath; // dict folder
+
+  NS_GetSpecialDirectory(NS_GRE_DIR, getter_AddRefs(tmpFile));
+#if defined(XP_WIN) // for some reason, on windows NS_GRE_DIR is not bin root,
+                    // but bin/browser
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING(".."));
+#endif
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING("models"));
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING("en-us-semi"));
+  tmpFile->GetPath(aStringAMPath);
+
+  NS_GetSpecialDirectory(NS_GRE_DIR, getter_AddRefs(tmpFile));
+#if defined(XP_WIN) // for some reason, on windows NS_GRE_DIR is not bin root,
+                    // but bin/browser
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING(".."));
+#endif
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING("models"));     //
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING("dict"));       //
+  tmpFile->AppendRelativePath(NS_LITERAL_STRING("cmu07a.dic")); //
+  tmpFile->GetPath(aStringDictPath);
+
+  // FOR B2G PATHS HARDCODED (APPEND /DATA ON THE BEGINING, FOR DESKTOP, ONLY
+  // MODELS/ RELATIVE TO ROOT
+  mPSConfig = cmd_ln_init(nullptr, ps_args(), TRUE, "-hmm",
+                          ToNewUTF8String(aStringAMPath), // acoustic model
+                          "-dict", ToNewUTF8String(aStringDictPath), nullptr);
+  if (mPSConfig == nullptr) {
+    ISDecoderCreated = false;
+  } else {
+    mPSHandle = ps_init(mPSConfig);
+    if (mPSHandle == nullptr) {
+      ISDecoderCreated = false;
+    } else {
+      ISDecoderCreated = true;
+    }
+  }
+
+  ISGrammarCompiled = false;
+}
+
+PocketSphinxSpeechRecognitionService::~PocketSphinxSpeechRecognitionService()
+{
+  if (mPSConfig) {
+    free(mPSConfig);
+  }
+  if (mPSHandle) {
+    free(mPSHandle);
+  }
+
+  mSpeexState = nullptr;
+}
+
+// CALL START IN JS FALLS HERE
+NS_IMETHODIMP
+PocketSphinxSpeechRecognitionService::Initialize(
+    WeakPtr<SpeechRecognition> aSpeechRecognition)
+{
+  if (!ISDecoderCreated || !ISGrammarCompiled) {
+    return NS_ERROR_NOT_INITIALIZED;
+  } else {
+    mAudioVector.Clear();
+
+    if (mSpeexState) {
+      mSpeexState = nullptr;
+    }
+
+    mRecognition = aSpeechRecognition;
+    nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
+    obs->AddObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC, false);
+    obs->AddObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC, false);
+    return NS_OK;
+  }
+}
+
+NS_IMETHODIMP
+PocketSphinxSpeechRecognitionService::ProcessAudioSegment(
+  AudioSegment* aAudioSegment, int32_t aSampleRate)
+{
+  if (!mSpeexState) {
+    mSpeexState = speex_resampler_init(1, aSampleRate, 16000,
+                                       SPEEX_RESAMPLER_QUALITY_MAX, nullptr);
+  }
+  aAudioSegment->ResampleChunks(mSpeexState, aSampleRate, 16000);
+
+  AudioSegment::ChunkIterator iterator(*aAudioSegment);
+
+  while (!iterator.IsEnded()) {
+    mozilla::AudioChunk& chunk = *(iterator);
+    MOZ_ASSERT(chunk.mBuffer);
+    const int16_t* buf = static_cast<const int16_t*>(chunk.mChannelData[0]);
+
+    for (int i = 0; i < iterator->mDuration; i++) {
+      mAudioVector.AppendElement((int16_t)buf[i]);
+    }
+    iterator.Next();
+  }
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+PocketSphinxSpeechRecognitionService::SoundEnd()
+{
+  speex_resampler_destroy(mSpeexState);
+  mSpeexState = nullptr;
+
+  // To create a new thread, get the thread manager
+  nsCOMPtr<nsIThreadManager> tm = do_GetService(NS_THREADMANAGER_CONTRACTID);
+  nsCOMPtr<nsIThread> decodethread;
+  nsresult rv = tm->NewThread(0, 0, getter_AddRefs(decodethread));
+  if (NS_FAILED(rv)) {
+    // In case of failure, call back immediately with an empty string which
+    // indicates failure
+    return NS_OK;
+  }
+
+  nsCOMPtr<nsIRunnable> r =
+    new DecodeTask(mRecognition, mAudioVector, mPSHandle);
+  decodethread->Dispatch(r, nsIEventTarget::DISPATCH_NORMAL);
+
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+PocketSphinxSpeechRecognitionService::ValidateAndSetGrammarList(
+  SpeechGrammar* aSpeechGrammar,
+  nsISpeechGrammarCompilationCallback* aCallback)
+{
+  if (!ISDecoderCreated) {
+    ISGrammarCompiled = false;
+  } else if (aSpeechGrammar) {
+    nsAutoString grammar;
+    ErrorResult rv;
+    aSpeechGrammar->GetSrc(grammar, rv);
+
+    int result = ps_set_jsgf_string(mPSHandle, "name",
+                                    NS_ConvertUTF16toUTF8(grammar).get());
+
+    ps_set_search(mPSHandle, "name");
+
+    if (result != 0) {
+      ISGrammarCompiled = false;
+    } else {
+      ISGrammarCompiled = true;
+    }
+  } else {
+    ISGrammarCompiled = false;
+  }
+
+  return ISGrammarCompiled ? NS_OK : NS_ERROR_NOT_INITIALIZED;
+}
+
+NS_IMETHODIMP
+PocketSphinxSpeechRecognitionService::Abort()
+{
+  return NS_OK;
+}
+
+NS_IMETHODIMP
+PocketSphinxSpeechRecognitionService::Observe(nsISupports* aSubject,
+                                              const char* aTopic,
+                                              const char16_t* aData)
+{
+  MOZ_ASSERT(mRecognition->mTestConfig.mFakeRecognitionService,
+             "Got request to fake recognition service event, "
+             "but " TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE " is not set");
+
+  if (!strcmp(aTopic, SPEECH_RECOGNITION_TEST_END_TOPIC)) {
+    nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
+    obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC);
+    obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC);
+
+    return NS_OK;
+  }
+
+  const nsDependentString eventName = nsDependentString(aData);
+
+  if (eventName.EqualsLiteral("EVENT_RECOGNITIONSERVICE_ERROR")) {
+    mRecognition->DispatchError(
+      SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
+      SpeechRecognitionErrorCode::Network, // TODO different codes?
+      NS_LITERAL_STRING("RECOGNITIONSERVICE_ERROR test event"));
+
+  } else if (eventName.EqualsLiteral("EVENT_RECOGNITIONSERVICE_FINAL_RESULT")) {
+    nsRefPtr<SpeechEvent> event = new SpeechEvent(
+      mRecognition, SpeechRecognition::EVENT_RECOGNITIONSERVICE_FINAL_RESULT);
+
+    event->mRecognitionResultList = BuildMockResultList();
+    NS_DispatchToMainThread(event);
+  }
+
+  return NS_OK;
+}
+
+SpeechRecognitionResultList*
+PocketSphinxSpeechRecognitionService::BuildMockResultList()
+{
+  SpeechRecognitionResultList* resultList =
+    new SpeechRecognitionResultList(mRecognition);
+  SpeechRecognitionResult* result = new SpeechRecognitionResult(mRecognition);
+  SpeechRecognitionAlternative* alternative =
+    new SpeechRecognitionAlternative(mRecognition);
+
+  alternative->mTranscript = NS_LITERAL_STRING("Mock final result");
+  alternative->mConfidence = 0.0f;
+
+  result->mItems.AppendElement(alternative);
+  resultList->mItems.AppendElement(result);
+
+  return resultList;
+}
+
+} // namespace mozilla
--- a/dom/media/webspeech/recognition/PocketSphinxSpeechRecognitionService.h
+++ b/dom/media/webspeech/recognition/PocketSphinxSpeechRecognitionService.h
@ -0,0 +1,85 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_dom_PocketSphinxRecognitionService_h
+#define mozilla_dom_PocketSphinxRecognitionService_h
+
+#include "nsCOMPtr.h"
+#include "nsTArray.h"
+#include "nsIObserver.h"
+#include "nsISpeechRecognitionService.h"
+#include "speex/speex_resampler.h"
+
+extern "C" {
+#include <pocketsphinx/pocketsphinx.h>
+#include <sphinxbase/sphinx_config.h>
+}
+
+#define NS_POCKETSPHINX_SPEECH_RECOGNITION_SERVICE_CID                         \
+  {                                                                            \
+    0x0ff5ce56, 0x5b09, 0x4db8, {                                              \
+      0xad, 0xc6, 0x82, 0x66, 0xaf, 0x95, 0xf8, 0x64                           \
+    }                                                                          \
+  };
+
+namespace mozilla {
+
+/**
+ * Pocketsphix implementation of the nsISpeechRecognitionService interface
+ */
+class PocketSphinxSpeechRecognitionService : public nsISpeechRecognitionService,
+                                             public nsIObserver
+{
+public:
+  // Add XPCOM glue code
+  NS_DECL_ISUPPORTS
+  NS_DECL_NSISPEECHRECOGNITIONSERVICE
+
+  // Add nsIObserver code
+  NS_DECL_NSIOBSERVER
+
+  /**
+   * Default constructs a PocketSphinxSpeechRecognitionService loading default
+   * files
+   */
+  PocketSphinxSpeechRecognitionService();
+
+private:
+  /**
+   * Private destructor to prevent bypassing of reference counting
+   */
+  virtual ~PocketSphinxSpeechRecognitionService();
+
+  /** The associated SpeechRecognition */
+  WeakPtr<dom::SpeechRecognition> mRecognition;
+
+  /**
+   * Builds a mock SpeechRecognitionResultList
+   */
+  dom::SpeechRecognitionResultList* BuildMockResultList();
+
+  /** Speex state */
+  SpeexResamplerState* mSpeexState;
+
+  /** Pocksphix decoder */
+  ps_decoder_t* mPSHandle;
+
+  /** Sphinxbase parsed command line arguments */
+  cmd_ln_t* mPSConfig;
+
+  /** Flag to verify if decoder was created */
+  bool ISDecoderCreated;
+
+  /** Flag to verify if grammar was compiled */
+  bool ISGrammarCompiled;
+
+  /** Audio data */
+  nsTArray<int16_t> mAudioVector;
+};
+
+} // namespace mozilla
+
+#endif
--- a/dom/media/webspeech/recognition/SpeechGrammar.cpp
+++ b/dom/media/webspeech/recognition/SpeechGrammar.cpp
@ -53,14 +53,14 @@ SpeechGrammar::WrapObject(JSContext* aCx, JS::Handle<JSObject*> aGivenProto)
 void
 SpeechGrammar::GetSrc(nsString& aRetVal, ErrorResult& aRv) const
 {
-  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  aRetVal = mSrc;
  return;
 }

 void
 SpeechGrammar::SetSrc(const nsAString& aArg, ErrorResult& aRv)
 {
-  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  mSrc = aArg;
  return;
 }

--- a/dom/media/webspeech/recognition/SpeechGrammar.h
+++ b/dom/media/webspeech/recognition/SpeechGrammar.h
@ -49,6 +49,8 @@ private:
  ~SpeechGrammar();

  nsCOMPtr<nsISupports> mParent;
+
+  nsString mSrc;
 };

 } // namespace dom
--- a/dom/media/webspeech/recognition/SpeechGrammarList.cpp
+++ b/dom/media/webspeech/recognition/SpeechGrammarList.cpp
@ -15,7 +15,7 @@
 namespace mozilla {
 namespace dom {

-NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(SpeechGrammarList, mParent)
+NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(SpeechGrammarList, mParent, mItems)
 NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechGrammarList)
 NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechGrammarList)
 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechGrammarList)
@ -64,14 +64,14 @@ SpeechGrammarList::GetParentObject() const
 uint32_t
 SpeechGrammarList::Length() const
 {
-  return 0;
+  return mItems.Length();
 }

 already_AddRefed<SpeechGrammar>
 SpeechGrammarList::Item(uint32_t aIndex, ErrorResult& aRv)
 {
-  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
-  return nullptr;
+  nsRefPtr<SpeechGrammar> result = mItems.ElementAt(aIndex);
+  return result.forget();
 }

 void
@ -88,7 +88,10 @@ SpeechGrammarList::AddFromString(const nsAString& aString,
                                 const Optional<float>& aWeight,
                                 ErrorResult& aRv)
 {
-  mRecognitionService->ValidateAndSetGrammarList(this, nullptr);
+  SpeechGrammar* speechGrammar = new SpeechGrammar(mParent);
+  speechGrammar->SetSrc(aString, aRv);
+  mItems.AppendElement(speechGrammar);
+  mRecognitionService->ValidateAndSetGrammarList(speechGrammar, nullptr);
  return;
 }

@ -96,8 +99,13 @@ already_AddRefed<SpeechGrammar>
 SpeechGrammarList::IndexedGetter(uint32_t aIndex, bool& aPresent,
                                 ErrorResult& aRv)
 {
-  aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
+  if (aIndex >= Length()) {
+    aPresent = false;
    return nullptr;
+  }
+  ErrorResult rv;
+  aPresent = true;
+  return Item(aIndex, rv);
 }
 } // namespace dom
 } // namespace mozilla
--- a/dom/media/webspeech/recognition/SpeechGrammarList.h
+++ b/dom/media/webspeech/recognition/SpeechGrammarList.h
@ -56,6 +56,8 @@ private:
  ~SpeechGrammarList();

  nsCOMPtr<nsISupports> mParent;
+
+  nsTArray<nsRefPtr<SpeechGrammar>> mItems;
 };

 } // namespace dom
--- a/dom/media/webspeech/recognition/SpeechRecognition.cpp
+++ b/dom/media/webspeech/recognition/SpeechRecognition.cpp
@ -34,7 +34,7 @@ namespace mozilla {
 namespace dom {

 #define PREFERENCE_DEFAULT_RECOGNITION_SERVICE "media.webspeech.service.default"
-#define DEFAULT_RECOGNITION_SERVICE "google"
+#define DEFAULT_RECOGNITION_SERVICE "pocketsphinx"

 #define PREFERENCE_ENDPOINTER_SILENCE_LENGTH "media.webspeech.silence_length"
 #define PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH "media.webspeech.long_silence_length"
@ -84,9 +84,9 @@ GetSpeechRecognitionService()
      NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX "fake";
  }

-  nsresult aRv;
+  nsresult rv;
  nsCOMPtr<nsISpeechRecognitionService> recognitionService;
-  recognitionService = do_GetService(speechRecognitionServiceCID.get(), &aRv);
+  recognitionService = do_GetService(speechRecognitionServiceCID.get(), &rv);
  return recognitionService.forget();
 }

@ -477,7 +477,7 @@ SpeechRecognition::NotifyFinalResult(SpeechEvent* aEvent)
  init.mCancelable = false;
  // init.mResultIndex = 0;
  init.mResults = aEvent->mRecognitionResultList;
-  init.mInterpretation = NS_LITERAL_STRING("NOT_IMPLEMENTED");
+  init.mInterpretation = JS::NullValue();
  // init.mEmma = nullptr;

  nsRefPtr<SpeechRecognitionEvent> event =
@ -535,7 +535,9 @@ SpeechRecognition::StartRecording(DOMMediaStream* aDOMStream)
  // doesn't get Destroy()'ed
  mDOMStream = aDOMStream;

-  NS_ENSURE_STATE(mDOMStream->GetStream());
+  if (NS_WARN_IF(!mDOMStream->GetStream())) {
+    return NS_ERROR_UNEXPECTED;
+  }
  mSpeechListener = new SpeechStreamListener(this);
  mDOMStream->GetStream()->AddListener(mSpeechListener);

@ -698,11 +700,15 @@ SpeechRecognition::Start(const Optional<NonNull<DOMMediaStream>>& aStream, Error
  }

  mRecognitionService = GetSpeechRecognitionService();
-  NS_ENSURE_TRUE_VOID(mRecognitionService);
+  if (NS_WARN_IF(!mRecognitionService)) {
+    return;
+  }

  nsresult rv;
  rv = mRecognitionService->Initialize(this);
-  NS_ENSURE_SUCCESS_VOID(rv);
+  if (NS_WARN_IF(NS_FAILED(rv))) {
+    return;
+  }

  MediaStreamConstraints constraints;
  constraints.mAudio.SetAsBoolean() = true;
@ -957,7 +963,7 @@ SpeechRecognition::GetUserMediaErrorCallback::OnError(nsISupports* aError)
  }
  SpeechRecognitionErrorCode errorCode;

-  nsString name;
+  nsAutoString name;
  error->GetName(name);
  if (name.EqualsLiteral("PERMISSION_DENIED")) {
    errorCode = SpeechRecognitionErrorCode::Not_allowed;
@ -965,7 +971,7 @@ SpeechRecognition::GetUserMediaErrorCallback::OnError(nsISupports* aError)
    errorCode = SpeechRecognitionErrorCode::Audio_capture;
  }

-  nsString message;
+  nsAutoString message;
  error->GetMessage(message);
  mRecognition->DispatchError(SpeechRecognition::EVENT_AUDIO_ERROR, errorCode,
                              message);
--- a/dom/media/webspeech/recognition/SpeechRecognitionAlternative.cpp
+++ b/dom/media/webspeech/recognition/SpeechRecognitionAlternative.cpp
@ -22,8 +22,7 @@ NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognitionAlternative)
 NS_INTERFACE_MAP_END

 SpeechRecognitionAlternative::SpeechRecognitionAlternative(SpeechRecognition* aParent)
-  : mTranscript(NS_LITERAL_STRING(""))
-  , mConfidence(0)
+  : mConfidence(0)
  , mParent(aParent)
 {
 }
--- a/dom/media/webspeech/recognition/SpeechRecognitionResult.cpp
+++ b/dom/media/webspeech/recognition/SpeechRecognitionResult.cpp
@ -67,7 +67,7 @@ SpeechRecognitionResult::Item(uint32_t aIndex)
 }

 bool
-SpeechRecognitionResult::Final() const
+SpeechRecognitionResult::IsFinal() const
 {
  return true; // TODO
 }
--- a/dom/media/webspeech/recognition/SpeechRecognitionResult.h
+++ b/dom/media/webspeech/recognition/SpeechRecognitionResult.h
@ -38,11 +38,11 @@ public:

  already_AddRefed<SpeechRecognitionAlternative> Item(uint32_t aIndex);

-  bool Final() const;
+  bool IsFinal() const;

  already_AddRefed<SpeechRecognitionAlternative> IndexedGetter(uint32_t aIndex, bool& aPresent);

-  nsTArray<nsRefPtr<SpeechRecognitionAlternative> > mItems;
+  nsTArray<nsRefPtr<SpeechRecognitionAlternative>> mItems;

 private:
  ~SpeechRecognitionResult();
--- a/dom/media/webspeech/recognition/SpeechRecognitionResultList.h
+++ b/dom/media/webspeech/recognition/SpeechRecognitionResultList.h
@ -41,7 +41,7 @@ public:

  already_AddRefed<SpeechRecognitionResult> IndexedGetter(uint32_t aIndex, bool& aPresent);

-  nsTArray<nsRefPtr<SpeechRecognitionResult> > mItems;
+  nsTArray<nsRefPtr<SpeechRecognitionResult>> mItems;
 private:
  ~SpeechRecognitionResultList();

--- a/dom/media/webspeech/recognition/moz.build
+++ b/dom/media/webspeech/recognition/moz.build
@ -26,6 +26,11 @@ if CONFIG['MOZ_WEBSPEECH_TEST_BACKEND']:
        'test/FakeSpeechRecognitionService.h',
    ]

+if CONFIG['MOZ_WEBSPEECH_POCKETSPHINX']:
+    EXPORTS.mozilla.dom += [
+        'PocketSphinxSpeechRecognitionService.h',
+    ]
+
 UNIFIED_SOURCES += [
    'endpointer.cc',
    'energy_endpointer.cc',
@ -44,10 +49,21 @@ if CONFIG['MOZ_WEBSPEECH_TEST_BACKEND']:
        'test/FakeSpeechRecognitionService.cpp',
    ]

+if CONFIG['MOZ_WEBSPEECH_POCKETSPHINX']:
+    UNIFIED_SOURCES += [
+        'PocketSphinxSpeechRecognitionService.cpp',
+    ]
+
 LOCAL_INCLUDES += [
    '/dom/base',
+    '/media/sphinxbase',
 ]

+if CONFIG['MOZ_WEBSPEECH_POCKETSPHINX']:
+    LOCAL_INCLUDES += [
+        '/media/pocketsphinx',
+    ]
+
 include('/ipc/chromium/chromium-config.mozbuild')

 FINAL_LIBRARY = 'xul'
--- a/dom/media/webspeech/recognition/nsISpeechRecognitionService.idl
+++ b/dom/media/webspeech/recognition/nsISpeechRecognitionService.idl
@ -21,8 +21,8 @@ class SpeechGrammar;

 native SpeechRecognitionWeakPtr(mozilla::WeakPtr<mozilla::dom::SpeechRecognition>);
 [ptr] native AudioSegmentPtr(mozilla::AudioSegment);
-[ptr] native SpeechGrammarListPtr(mozilla::dom::SpeechGrammarList);
 [ptr] native SpeechGrammarPtr(mozilla::dom::SpeechGrammar);
+[ptr] native SpeechGrammarListPtr(mozilla::dom::SpeechGrammarList);

 [uuid(374583f0-4507-11e4-a183-164230d1df67)]
 interface nsISpeechGrammarCompilationCallback : nsISupports {
@ -33,7 +33,7 @@ interface nsISpeechGrammarCompilationCallback : nsISupports {
 interface nsISpeechRecognitionService : nsISupports {
    void initialize(in SpeechRecognitionWeakPtr aSpeechRecognition);
    void processAudioSegment(in AudioSegmentPtr aAudioSegment, in long aSampleRate);
-    void validateAndSetGrammarList(in SpeechGrammarListPtr aSpeechGramarList, in nsISpeechGrammarCompilationCallback aCallback);
+    void validateAndSetGrammarList(in SpeechGrammarPtr aSpeechGrammar, in nsISpeechGrammarCompilationCallback aCallback);
    void soundEnd();
    void abort();
 };
--- a/dom/media/webspeech/recognition/test/FakeSpeechRecognitionService.cpp
+++ b/dom/media/webspeech/recognition/test/FakeSpeechRecognitionService.cpp
@ -52,7 +52,7 @@ FakeSpeechRecognitionService::SoundEnd()
 }

 NS_IMETHODIMP
-FakeSpeechRecognitionService::ValidateAndSetGrammarList(mozilla::dom::SpeechGrammarList*, nsISpeechGrammarCompilationCallback*)
+FakeSpeechRecognitionService::ValidateAndSetGrammarList(mozilla::dom::SpeechGrammar*, nsISpeechGrammarCompilationCallback*)
 {
  return NS_OK;
 }
--- a/dom/media/webspeech/recognition/test/head.js
+++ b/dom/media/webspeech/recognition/test/head.js
@ -38,7 +38,7 @@ function EventManager(sr) {

  var eventDependencies = {
    "speechend": "speechstart",
-    "soundent": "soundstart",
+    "soundend": "soundstart",
    "audioend": "audiostart"
  };

--- a/dom/tests/mochitest/general/test_interfaces.html
+++ b/dom/tests/mochitest/general/test_interfaces.html
@ -938,6 +938,22 @@ var interfaceNamesInGlobalScope =
    {name: "SourceBuffer", linux: false, release: false},
 // IMPORTANT: Do not change this list without review from a DOM peer!
    {name: "SourceBufferList", linux: false, release: false},
+// IMPORTANT: Do not change this list without review from a DOM peer!
+    {name: "SpeechRecognition", b2g: true, nightly: true},
+// IMPORTANT: Do not change this list without review from a DOM peer!
+    {name: "SpeechRecognitionError", b2g: true, nightly: true},
+// IMPORTANT: Do not change this list without review from a DOM peer!
+    {name: "SpeechRecognitionAlternative", b2g: true, nightly: true},
+// IMPORTANT: Do not change this list without review from a DOM peer!
+    {name: "SpeechRecognitionResult", b2g: true, nightly: true},
+// IMPORTANT: Do not change this list without review from a DOM peer!
+    {name: "SpeechRecognitionResultList", b2g: true, nightly: true},
+// IMPORTANT: Do not change this list without review from a DOM peer!
+    {name: "SpeechRecognitionEvent", b2g: true, nightly: true},
+// IMPORTANT: Do not change this list without review from a DOM peer!
+    {name: "SpeechGrammar", b2g: true, nightly: true},
+// IMPORTANT: Do not change this list without review from a DOM peer!
+    {name: "SpeechGrammarList", b2g: true, nightly: true},
 // IMPORTANT: Do not change this list without review from a DOM peer!
    {name: "SpeechSynthesisEvent", b2g: true},
 // IMPORTANT: Do not change this list without review from a DOM peer!
--- a/dom/webidl/SpeechRecognitionEvent.webidl
+++ b/dom/webidl/SpeechRecognitionEvent.webidl
@ -10,15 +10,15 @@ interface nsISupports;
 interface SpeechRecognitionEvent : Event
 {
  readonly attribute unsigned long resultIndex;
-  readonly attribute nsISupports? results;
-  readonly attribute DOMString? interpretation;
+  readonly attribute SpeechRecognitionResultList? results;
+  readonly attribute any interpretation;
  readonly attribute Document? emma;
 };

 dictionary SpeechRecognitionEventInit : EventInit
 {
  unsigned long resultIndex = 0;
-  nsISupports? results = null;
-  DOMString interpretation = "";
+  SpeechRecognitionResultList? results = null;
+  any interpretation = null;
  Document? emma = null;
 };
--- a/dom/webidl/SpeechRecognitionResult.webidl
+++ b/dom/webidl/SpeechRecognitionResult.webidl
@ -14,5 +14,5 @@
 interface SpeechRecognitionResult {
    readonly attribute unsigned long length;
    getter SpeechRecognitionAlternative item(unsigned long index);
-    readonly attribute boolean final;
+    readonly attribute boolean isFinal;
 };
--- a/layout/build/moz.build
+++ b/layout/build/moz.build
@ -119,6 +119,7 @@ if CONFIG['MOZ_B2G_BT']:

 if CONFIG['MOZ_WEBSPEECH']:
    LOCAL_INCLUDES += [
+        '/dom/media/webspeech/recognition',
        '/dom/media/webspeech/synth',
    ]

--- a/layout/build/nsLayoutModule.cpp
+++ b/layout/build/nsLayoutModule.cpp
@ -99,6 +99,9 @@
 #ifdef MOZ_WEBSPEECH_TEST_BACKEND
 #include "mozilla/dom/FakeSpeechRecognitionService.h"
 #endif
+#ifdef MOZ_WEBSPEECH_POCKETSPHINX
+#include "mozilla/dom/PocketSphinxSpeechRecognitionService.h"
+#endif
 #ifdef MOZ_WEBSPEECH
 #include "mozilla/dom/nsSynthVoiceRegistry.h"
 #endif
@ -635,6 +638,9 @@ NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(DataStoreService, DataStoreService::Get
 #ifdef MOZ_WEBSPEECH_TEST_BACKEND
 NS_GENERIC_FACTORY_CONSTRUCTOR(FakeSpeechRecognitionService)
 #endif
+#ifdef MOZ_WEBSPEECH_POCKETSPHINX
+NS_GENERIC_FACTORY_CONSTRUCTOR(PocketSphinxSpeechRecognitionService)
+#endif

 NS_GENERIC_FACTORY_CONSTRUCTOR(nsCSPContext)
 NS_GENERIC_FACTORY_CONSTRUCTOR(CSPService)
@ -831,6 +837,9 @@ NS_DEFINE_NAMED_CID(NS_GAMEPAD_TEST_CID);
 #ifdef MOZ_WEBSPEECH_TEST_BACKEND
 NS_DEFINE_NAMED_CID(NS_FAKE_SPEECH_RECOGNITION_SERVICE_CID);
 #endif
+#ifdef MOZ_WEBSPEECH_POCKETSPHINX
+NS_DEFINE_NAMED_CID(NS_POCKETSPHINX_SPEECH_RECOGNITION_SERVICE_CID);
+#endif
 #ifdef MOZ_WEBSPEECH
 NS_DEFINE_NAMED_CID(NS_SYNTHVOICEREGISTRY_CID);
 #endif
@ -1088,6 +1097,9 @@ static const mozilla::Module::CIDEntry kLayoutCIDs[] = {
 #ifdef MOZ_WEBSPEECH_TEST_BACKEND
  { &kNS_FAKE_SPEECH_RECOGNITION_SERVICE_CID, false, nullptr, FakeSpeechRecognitionServiceConstructor },
 #endif
+#ifdef MOZ_WEBSPEECH_POCKETSPHINX
+  { &kNS_POCKETSPHINX_SPEECH_RECOGNITION_SERVICE_CID, false, nullptr, PocketSphinxSpeechRecognitionServiceConstructor },
+#endif
 #ifdef MOZ_WEBSPEECH
  { &kNS_SYNTHVOICEREGISTRY_CID, true, nullptr, nsSynthVoiceRegistryConstructor },
 #endif
@ -1252,6 +1264,9 @@ static const mozilla::Module::ContractIDEntry kLayoutContracts[] = {
 #ifdef MOZ_WEBSPEECH_TEST_BACKEND
  { NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX "fake", &kNS_FAKE_SPEECH_RECOGNITION_SERVICE_CID },
 #endif
+#ifdef MOZ_WEBSPEECH_POCKETSPHINX
+  { NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX "pocketsphinx", &kNS_POCKETSPHINX_SPEECH_RECOGNITION_SERVICE_CID },
+#endif
 #ifdef MOZ_WEBSPEECH
  { NS_SYNTHVOICEREGISTRY_CONTRACTID, &kNS_SYNTHVOICEREGISTRY_CID },
 #endif