diff --git a/b2g/app/b2g.js b/b2g/app/b2g.js index 3cf84cf0d10f..9d2761c47987 100644 --- a/b2g/app/b2g.js +++ b/b2g/app/b2g.js @@ -1003,6 +1003,10 @@ pref("network.proxy.browsing.app_origins", "app://system.gaiamobile.org"); // Enable Web Speech synthesis API pref("media.webspeech.synth.enabled", true); +// Enable Web Speech recognition API +pref("media.webspeech.recognition.enable", true); +pref("media.webspeech.service.default", "pocketsphinx"); + // Downloads API pref("dom.mozDownloads.enabled", true); pref("dom.downloads.max_retention_days", 7); diff --git a/dom/events/test/test_all_synthetic_events.html b/dom/events/test/test_all_synthetic_events.html index fc8bdef18bb3..9cf5fc6ac210 100644 --- a/dom/events/test/test_all_synthetic_events.html +++ b/dom/events/test/test_all_synthetic_events.html @@ -427,6 +427,10 @@ const kEventConstructors = { return e; }, }, + SpeechRecognitionError: { create: function (aName, aProps) { + return new SpeechRecognitionError(aName, aProps); + }, + }, SpeechRecognitionEvent: { create: function (aName, aProps) { return new SpeechRecognitionEvent(aName, aProps); }, diff --git a/dom/media/webspeech/recognition/PocketSphinxSpeechRecognitionService.cpp b/dom/media/webspeech/recognition/PocketSphinxSpeechRecognitionService.cpp new file mode 100644 index 000000000000..6108cf1e9480 --- /dev/null +++ b/dom/media/webspeech/recognition/PocketSphinxSpeechRecognitionService.cpp @@ -0,0 +1,345 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsThreadUtils.h" +#include "nsXPCOMCIDInternal.h" +#include "PocketSphinxSpeechRecognitionService.h" +#include "nsIFile.h" +#include "SpeechGrammar.h" +#include "SpeechRecognition.h" +#include "SpeechRecognitionAlternative.h" +#include "SpeechRecognitionResult.h" +#include "SpeechRecognitionResultList.h" +#include "nsIObserverService.h" +#include "mozilla/Services.h" +#include "nsDirectoryServiceDefs.h" +#include "nsDirectoryServiceUtils.h" +#include "nsMemory.h" + +extern "C" { +#include "pocketsphinx/pocketsphinx.h" +#include "sphinxbase/sphinx_config.h" +#include "sphinxbase/jsgf.h" +} + +namespace mozilla { + +using namespace dom; + +class DecodeResultTask : public nsRunnable +{ +public: + DecodeResultTask(const nsString& hypstring, + WeakPtr recognition) + : mResult(hypstring), + mRecognition(recognition), + mWorkerThread(do_GetCurrentThread()) + { + MOZ_ASSERT( + !NS_IsMainThread()); // This should be running on the worker thread + } + + NS_IMETHOD + Run() + { + MOZ_ASSERT(NS_IsMainThread()); // This method is supposed to run on the main + // thread! + + // Declare javascript result events + nsRefPtr event = new SpeechEvent( + mRecognition, SpeechRecognition::EVENT_RECOGNITIONSERVICE_FINAL_RESULT); + SpeechRecognitionResultList* resultList = + new SpeechRecognitionResultList(mRecognition); + SpeechRecognitionResult* result = new SpeechRecognitionResult(mRecognition); + SpeechRecognitionAlternative* alternative = + new SpeechRecognitionAlternative(mRecognition); + + alternative->mTranscript = mResult; + alternative->mConfidence = 100; + + result->mItems.AppendElement(alternative); + resultList->mItems.AppendElement(result); + + event->mRecognitionResultList = resultList; + NS_DispatchToMainThread(event); + + // If we don't destroy the thread when we're done with it, it will hang + // around forever... bad! + // But thread->Shutdown must be called from the main thread, not from the + // thread itself. + return mWorkerThread->Shutdown(); + } + +private: + nsString mResult; + WeakPtr mRecognition; + nsCOMPtr mWorkerThread; +}; + +class DecodeTask : public nsRunnable +{ +public: + DecodeTask(WeakPtr recogntion, + const nsTArray& audiovector, ps_decoder_t* ps) + : mRecognition(recogntion), mAudiovector(audiovector), mPs(ps) + { + } + + NS_IMETHOD + Run() + { + char const* hyp; + int rv; + int32 score; + nsAutoCString hypoValue; + + rv = ps_start_utt(mPs); + rv = ps_process_raw(mPs, &mAudiovector[0], mAudiovector.Length(), FALSE, + FALSE); + + rv = ps_end_utt(mPs); + if (rv >= 0) { + hyp = ps_get_hyp(mPs, &score); + if (hyp == nullptr) { + hypoValue.Assign("ERROR"); + } else { + hypoValue.Assign(hyp); + } + } + + nsCOMPtr resultrunnable = + new DecodeResultTask(NS_ConvertUTF8toUTF16(hypoValue), mRecognition); + return NS_DispatchToMainThread(resultrunnable); + } + +private: + WeakPtr mRecognition; + nsTArray mAudiovector; + ps_decoder_t* mPs; +}; + +NS_IMPL_ISUPPORTS(PocketSphinxSpeechRecognitionService, + nsISpeechRecognitionService, nsIObserver) + +PocketSphinxSpeechRecognitionService::PocketSphinxSpeechRecognitionService() +{ + mSpeexState = nullptr; + + // get root folder + nsCOMPtr tmpFile; + nsAutoString aStringAMPath; // am folder + nsAutoString aStringDictPath; // dict folder + + NS_GetSpecialDirectory(NS_GRE_DIR, getter_AddRefs(tmpFile)); +#if defined(XP_WIN) // for some reason, on windows NS_GRE_DIR is not bin root, + // but bin/browser + tmpFile->AppendRelativePath(NS_LITERAL_STRING("..")); +#endif + tmpFile->AppendRelativePath(NS_LITERAL_STRING("models")); + tmpFile->AppendRelativePath(NS_LITERAL_STRING("en-us-semi")); + tmpFile->GetPath(aStringAMPath); + + NS_GetSpecialDirectory(NS_GRE_DIR, getter_AddRefs(tmpFile)); +#if defined(XP_WIN) // for some reason, on windows NS_GRE_DIR is not bin root, + // but bin/browser + tmpFile->AppendRelativePath(NS_LITERAL_STRING("..")); +#endif + tmpFile->AppendRelativePath(NS_LITERAL_STRING("models")); // + tmpFile->AppendRelativePath(NS_LITERAL_STRING("dict")); // + tmpFile->AppendRelativePath(NS_LITERAL_STRING("cmu07a.dic")); // + tmpFile->GetPath(aStringDictPath); + + // FOR B2G PATHS HARDCODED (APPEND /DATA ON THE BEGINING, FOR DESKTOP, ONLY + // MODELS/ RELATIVE TO ROOT + mPSConfig = cmd_ln_init(nullptr, ps_args(), TRUE, "-hmm", + ToNewUTF8String(aStringAMPath), // acoustic model + "-dict", ToNewUTF8String(aStringDictPath), nullptr); + if (mPSConfig == nullptr) { + ISDecoderCreated = false; + } else { + mPSHandle = ps_init(mPSConfig); + if (mPSHandle == nullptr) { + ISDecoderCreated = false; + } else { + ISDecoderCreated = true; + } + } + + ISGrammarCompiled = false; +} + +PocketSphinxSpeechRecognitionService::~PocketSphinxSpeechRecognitionService() +{ + if (mPSConfig) { + free(mPSConfig); + } + if (mPSHandle) { + free(mPSHandle); + } + + mSpeexState = nullptr; +} + +// CALL START IN JS FALLS HERE +NS_IMETHODIMP +PocketSphinxSpeechRecognitionService::Initialize( + WeakPtr aSpeechRecognition) +{ + if (!ISDecoderCreated || !ISGrammarCompiled) { + return NS_ERROR_NOT_INITIALIZED; + } else { + mAudioVector.Clear(); + + if (mSpeexState) { + mSpeexState = nullptr; + } + + mRecognition = aSpeechRecognition; + nsCOMPtr obs = services::GetObserverService(); + obs->AddObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC, false); + obs->AddObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC, false); + return NS_OK; + } +} + +NS_IMETHODIMP +PocketSphinxSpeechRecognitionService::ProcessAudioSegment( + AudioSegment* aAudioSegment, int32_t aSampleRate) +{ + if (!mSpeexState) { + mSpeexState = speex_resampler_init(1, aSampleRate, 16000, + SPEEX_RESAMPLER_QUALITY_MAX, nullptr); + } + aAudioSegment->ResampleChunks(mSpeexState, aSampleRate, 16000); + + AudioSegment::ChunkIterator iterator(*aAudioSegment); + + while (!iterator.IsEnded()) { + mozilla::AudioChunk& chunk = *(iterator); + MOZ_ASSERT(chunk.mBuffer); + const int16_t* buf = static_cast(chunk.mChannelData[0]); + + for (int i = 0; i < iterator->mDuration; i++) { + mAudioVector.AppendElement((int16_t)buf[i]); + } + iterator.Next(); + } + return NS_OK; +} + +NS_IMETHODIMP +PocketSphinxSpeechRecognitionService::SoundEnd() +{ + speex_resampler_destroy(mSpeexState); + mSpeexState = nullptr; + + // To create a new thread, get the thread manager + nsCOMPtr tm = do_GetService(NS_THREADMANAGER_CONTRACTID); + nsCOMPtr decodethread; + nsresult rv = tm->NewThread(0, 0, getter_AddRefs(decodethread)); + if (NS_FAILED(rv)) { + // In case of failure, call back immediately with an empty string which + // indicates failure + return NS_OK; + } + + nsCOMPtr r = + new DecodeTask(mRecognition, mAudioVector, mPSHandle); + decodethread->Dispatch(r, nsIEventTarget::DISPATCH_NORMAL); + + return NS_OK; +} + +NS_IMETHODIMP +PocketSphinxSpeechRecognitionService::ValidateAndSetGrammarList( + SpeechGrammar* aSpeechGrammar, + nsISpeechGrammarCompilationCallback* aCallback) +{ + if (!ISDecoderCreated) { + ISGrammarCompiled = false; + } else if (aSpeechGrammar) { + nsAutoString grammar; + ErrorResult rv; + aSpeechGrammar->GetSrc(grammar, rv); + + int result = ps_set_jsgf_string(mPSHandle, "name", + NS_ConvertUTF16toUTF8(grammar).get()); + + ps_set_search(mPSHandle, "name"); + + if (result != 0) { + ISGrammarCompiled = false; + } else { + ISGrammarCompiled = true; + } + } else { + ISGrammarCompiled = false; + } + + return ISGrammarCompiled ? NS_OK : NS_ERROR_NOT_INITIALIZED; +} + +NS_IMETHODIMP +PocketSphinxSpeechRecognitionService::Abort() +{ + return NS_OK; +} + +NS_IMETHODIMP +PocketSphinxSpeechRecognitionService::Observe(nsISupports* aSubject, + const char* aTopic, + const char16_t* aData) +{ + MOZ_ASSERT(mRecognition->mTestConfig.mFakeRecognitionService, + "Got request to fake recognition service event, " + "but " TEST_PREFERENCE_FAKE_RECOGNITION_SERVICE " is not set"); + + if (!strcmp(aTopic, SPEECH_RECOGNITION_TEST_END_TOPIC)) { + nsCOMPtr obs = services::GetObserverService(); + obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC); + obs->RemoveObserver(this, SPEECH_RECOGNITION_TEST_END_TOPIC); + + return NS_OK; + } + + const nsDependentString eventName = nsDependentString(aData); + + if (eventName.EqualsLiteral("EVENT_RECOGNITIONSERVICE_ERROR")) { + mRecognition->DispatchError( + SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR, + SpeechRecognitionErrorCode::Network, // TODO different codes? + NS_LITERAL_STRING("RECOGNITIONSERVICE_ERROR test event")); + + } else if (eventName.EqualsLiteral("EVENT_RECOGNITIONSERVICE_FINAL_RESULT")) { + nsRefPtr event = new SpeechEvent( + mRecognition, SpeechRecognition::EVENT_RECOGNITIONSERVICE_FINAL_RESULT); + + event->mRecognitionResultList = BuildMockResultList(); + NS_DispatchToMainThread(event); + } + + return NS_OK; +} + +SpeechRecognitionResultList* +PocketSphinxSpeechRecognitionService::BuildMockResultList() +{ + SpeechRecognitionResultList* resultList = + new SpeechRecognitionResultList(mRecognition); + SpeechRecognitionResult* result = new SpeechRecognitionResult(mRecognition); + SpeechRecognitionAlternative* alternative = + new SpeechRecognitionAlternative(mRecognition); + + alternative->mTranscript = NS_LITERAL_STRING("Mock final result"); + alternative->mConfidence = 0.0f; + + result->mItems.AppendElement(alternative); + resultList->mItems.AppendElement(result); + + return resultList; +} + +} // namespace mozilla diff --git a/dom/media/webspeech/recognition/PocketSphinxSpeechRecognitionService.h b/dom/media/webspeech/recognition/PocketSphinxSpeechRecognitionService.h new file mode 100644 index 000000000000..5b814b9a5578 --- /dev/null +++ b/dom/media/webspeech/recognition/PocketSphinxSpeechRecognitionService.h @@ -0,0 +1,85 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef mozilla_dom_PocketSphinxRecognitionService_h +#define mozilla_dom_PocketSphinxRecognitionService_h + +#include "nsCOMPtr.h" +#include "nsTArray.h" +#include "nsIObserver.h" +#include "nsISpeechRecognitionService.h" +#include "speex/speex_resampler.h" + +extern "C" { +#include +#include +} + +#define NS_POCKETSPHINX_SPEECH_RECOGNITION_SERVICE_CID \ + { \ + 0x0ff5ce56, 0x5b09, 0x4db8, { \ + 0xad, 0xc6, 0x82, 0x66, 0xaf, 0x95, 0xf8, 0x64 \ + } \ + }; + +namespace mozilla { + +/** + * Pocketsphix implementation of the nsISpeechRecognitionService interface + */ +class PocketSphinxSpeechRecognitionService : public nsISpeechRecognitionService, + public nsIObserver +{ +public: + // Add XPCOM glue code + NS_DECL_ISUPPORTS + NS_DECL_NSISPEECHRECOGNITIONSERVICE + + // Add nsIObserver code + NS_DECL_NSIOBSERVER + + /** + * Default constructs a PocketSphinxSpeechRecognitionService loading default + * files + */ + PocketSphinxSpeechRecognitionService(); + +private: + /** + * Private destructor to prevent bypassing of reference counting + */ + virtual ~PocketSphinxSpeechRecognitionService(); + + /** The associated SpeechRecognition */ + WeakPtr mRecognition; + + /** + * Builds a mock SpeechRecognitionResultList + */ + dom::SpeechRecognitionResultList* BuildMockResultList(); + + /** Speex state */ + SpeexResamplerState* mSpeexState; + + /** Pocksphix decoder */ + ps_decoder_t* mPSHandle; + + /** Sphinxbase parsed command line arguments */ + cmd_ln_t* mPSConfig; + + /** Flag to verify if decoder was created */ + bool ISDecoderCreated; + + /** Flag to verify if grammar was compiled */ + bool ISGrammarCompiled; + + /** Audio data */ + nsTArray mAudioVector; +}; + +} // namespace mozilla + +#endif diff --git a/dom/media/webspeech/recognition/SpeechGrammar.cpp b/dom/media/webspeech/recognition/SpeechGrammar.cpp index a195e34a1485..dd2fb1a987e6 100644 --- a/dom/media/webspeech/recognition/SpeechGrammar.cpp +++ b/dom/media/webspeech/recognition/SpeechGrammar.cpp @@ -53,14 +53,14 @@ SpeechGrammar::WrapObject(JSContext* aCx, JS::Handle aGivenProto) void SpeechGrammar::GetSrc(nsString& aRetVal, ErrorResult& aRv) const { - aRv.Throw(NS_ERROR_NOT_IMPLEMENTED); + aRetVal = mSrc; return; } void SpeechGrammar::SetSrc(const nsAString& aArg, ErrorResult& aRv) { - aRv.Throw(NS_ERROR_NOT_IMPLEMENTED); + mSrc = aArg; return; } diff --git a/dom/media/webspeech/recognition/SpeechGrammar.h b/dom/media/webspeech/recognition/SpeechGrammar.h index 70453e513f79..5acc41410e5f 100644 --- a/dom/media/webspeech/recognition/SpeechGrammar.h +++ b/dom/media/webspeech/recognition/SpeechGrammar.h @@ -49,6 +49,8 @@ private: ~SpeechGrammar(); nsCOMPtr mParent; + + nsString mSrc; }; } // namespace dom diff --git a/dom/media/webspeech/recognition/SpeechGrammarList.cpp b/dom/media/webspeech/recognition/SpeechGrammarList.cpp index 490a1378e729..09657018348c 100644 --- a/dom/media/webspeech/recognition/SpeechGrammarList.cpp +++ b/dom/media/webspeech/recognition/SpeechGrammarList.cpp @@ -15,7 +15,7 @@ namespace mozilla { namespace dom { -NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(SpeechGrammarList, mParent) +NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(SpeechGrammarList, mParent, mItems) NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechGrammarList) NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechGrammarList) NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechGrammarList) @@ -64,14 +64,14 @@ SpeechGrammarList::GetParentObject() const uint32_t SpeechGrammarList::Length() const { - return 0; + return mItems.Length(); } already_AddRefed SpeechGrammarList::Item(uint32_t aIndex, ErrorResult& aRv) { - aRv.Throw(NS_ERROR_NOT_IMPLEMENTED); - return nullptr; + nsRefPtr result = mItems.ElementAt(aIndex); + return result.forget(); } void @@ -88,7 +88,10 @@ SpeechGrammarList::AddFromString(const nsAString& aString, const Optional& aWeight, ErrorResult& aRv) { - mRecognitionService->ValidateAndSetGrammarList(this, nullptr); + SpeechGrammar* speechGrammar = new SpeechGrammar(mParent); + speechGrammar->SetSrc(aString, aRv); + mItems.AppendElement(speechGrammar); + mRecognitionService->ValidateAndSetGrammarList(speechGrammar, nullptr); return; } @@ -96,8 +99,13 @@ already_AddRefed SpeechGrammarList::IndexedGetter(uint32_t aIndex, bool& aPresent, ErrorResult& aRv) { - aRv.Throw(NS_ERROR_NOT_IMPLEMENTED); - return nullptr; + if (aIndex >= Length()) { + aPresent = false; + return nullptr; + } + ErrorResult rv; + aPresent = true; + return Item(aIndex, rv); } } // namespace dom } // namespace mozilla diff --git a/dom/media/webspeech/recognition/SpeechGrammarList.h b/dom/media/webspeech/recognition/SpeechGrammarList.h index a821ab9817d2..018913c2396c 100644 --- a/dom/media/webspeech/recognition/SpeechGrammarList.h +++ b/dom/media/webspeech/recognition/SpeechGrammarList.h @@ -56,6 +56,8 @@ private: ~SpeechGrammarList(); nsCOMPtr mParent; + + nsTArray> mItems; }; } // namespace dom diff --git a/dom/media/webspeech/recognition/SpeechRecognition.cpp b/dom/media/webspeech/recognition/SpeechRecognition.cpp index 0666153d999a..fbb9c44e1f0a 100644 --- a/dom/media/webspeech/recognition/SpeechRecognition.cpp +++ b/dom/media/webspeech/recognition/SpeechRecognition.cpp @@ -34,7 +34,7 @@ namespace mozilla { namespace dom { #define PREFERENCE_DEFAULT_RECOGNITION_SERVICE "media.webspeech.service.default" -#define DEFAULT_RECOGNITION_SERVICE "google" +#define DEFAULT_RECOGNITION_SERVICE "pocketsphinx" #define PREFERENCE_ENDPOINTER_SILENCE_LENGTH "media.webspeech.silence_length" #define PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH "media.webspeech.long_silence_length" @@ -84,9 +84,9 @@ GetSpeechRecognitionService() NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX "fake"; } - nsresult aRv; + nsresult rv; nsCOMPtr recognitionService; - recognitionService = do_GetService(speechRecognitionServiceCID.get(), &aRv); + recognitionService = do_GetService(speechRecognitionServiceCID.get(), &rv); return recognitionService.forget(); } @@ -477,7 +477,7 @@ SpeechRecognition::NotifyFinalResult(SpeechEvent* aEvent) init.mCancelable = false; // init.mResultIndex = 0; init.mResults = aEvent->mRecognitionResultList; - init.mInterpretation = NS_LITERAL_STRING("NOT_IMPLEMENTED"); + init.mInterpretation = JS::NullValue(); // init.mEmma = nullptr; nsRefPtr event = @@ -535,7 +535,9 @@ SpeechRecognition::StartRecording(DOMMediaStream* aDOMStream) // doesn't get Destroy()'ed mDOMStream = aDOMStream; - NS_ENSURE_STATE(mDOMStream->GetStream()); + if (NS_WARN_IF(!mDOMStream->GetStream())) { + return NS_ERROR_UNEXPECTED; + } mSpeechListener = new SpeechStreamListener(this); mDOMStream->GetStream()->AddListener(mSpeechListener); @@ -698,11 +700,15 @@ SpeechRecognition::Start(const Optional>& aStream, Error } mRecognitionService = GetSpeechRecognitionService(); - NS_ENSURE_TRUE_VOID(mRecognitionService); + if (NS_WARN_IF(!mRecognitionService)) { + return; + } nsresult rv; rv = mRecognitionService->Initialize(this); - NS_ENSURE_SUCCESS_VOID(rv); + if (NS_WARN_IF(NS_FAILED(rv))) { + return; + } MediaStreamConstraints constraints; constraints.mAudio.SetAsBoolean() = true; @@ -957,7 +963,7 @@ SpeechRecognition::GetUserMediaErrorCallback::OnError(nsISupports* aError) } SpeechRecognitionErrorCode errorCode; - nsString name; + nsAutoString name; error->GetName(name); if (name.EqualsLiteral("PERMISSION_DENIED")) { errorCode = SpeechRecognitionErrorCode::Not_allowed; @@ -965,7 +971,7 @@ SpeechRecognition::GetUserMediaErrorCallback::OnError(nsISupports* aError) errorCode = SpeechRecognitionErrorCode::Audio_capture; } - nsString message; + nsAutoString message; error->GetMessage(message); mRecognition->DispatchError(SpeechRecognition::EVENT_AUDIO_ERROR, errorCode, message); diff --git a/dom/media/webspeech/recognition/SpeechRecognitionAlternative.cpp b/dom/media/webspeech/recognition/SpeechRecognitionAlternative.cpp index 9d19774fb4fd..0609c19ef788 100644 --- a/dom/media/webspeech/recognition/SpeechRecognitionAlternative.cpp +++ b/dom/media/webspeech/recognition/SpeechRecognitionAlternative.cpp @@ -22,8 +22,7 @@ NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechRecognitionAlternative) NS_INTERFACE_MAP_END SpeechRecognitionAlternative::SpeechRecognitionAlternative(SpeechRecognition* aParent) - : mTranscript(NS_LITERAL_STRING("")) - , mConfidence(0) + : mConfidence(0) , mParent(aParent) { } diff --git a/dom/media/webspeech/recognition/SpeechRecognitionResult.cpp b/dom/media/webspeech/recognition/SpeechRecognitionResult.cpp index 215978d218ec..00bcec4396ee 100644 --- a/dom/media/webspeech/recognition/SpeechRecognitionResult.cpp +++ b/dom/media/webspeech/recognition/SpeechRecognitionResult.cpp @@ -67,7 +67,7 @@ SpeechRecognitionResult::Item(uint32_t aIndex) } bool -SpeechRecognitionResult::Final() const +SpeechRecognitionResult::IsFinal() const { return true; // TODO } diff --git a/dom/media/webspeech/recognition/SpeechRecognitionResult.h b/dom/media/webspeech/recognition/SpeechRecognitionResult.h index cf1545721b2b..2a0b5641818f 100644 --- a/dom/media/webspeech/recognition/SpeechRecognitionResult.h +++ b/dom/media/webspeech/recognition/SpeechRecognitionResult.h @@ -38,11 +38,11 @@ public: already_AddRefed Item(uint32_t aIndex); - bool Final() const; + bool IsFinal() const; already_AddRefed IndexedGetter(uint32_t aIndex, bool& aPresent); - nsTArray > mItems; + nsTArray> mItems; private: ~SpeechRecognitionResult(); diff --git a/dom/media/webspeech/recognition/SpeechRecognitionResultList.h b/dom/media/webspeech/recognition/SpeechRecognitionResultList.h index 861974956ca7..2c43af3ffafd 100644 --- a/dom/media/webspeech/recognition/SpeechRecognitionResultList.h +++ b/dom/media/webspeech/recognition/SpeechRecognitionResultList.h @@ -41,7 +41,7 @@ public: already_AddRefed IndexedGetter(uint32_t aIndex, bool& aPresent); - nsTArray > mItems; + nsTArray> mItems; private: ~SpeechRecognitionResultList(); diff --git a/dom/media/webspeech/recognition/moz.build b/dom/media/webspeech/recognition/moz.build index 14cb3752431c..8002a2ffabec 100644 --- a/dom/media/webspeech/recognition/moz.build +++ b/dom/media/webspeech/recognition/moz.build @@ -26,6 +26,11 @@ if CONFIG['MOZ_WEBSPEECH_TEST_BACKEND']: 'test/FakeSpeechRecognitionService.h', ] +if CONFIG['MOZ_WEBSPEECH_POCKETSPHINX']: + EXPORTS.mozilla.dom += [ + 'PocketSphinxSpeechRecognitionService.h', + ] + UNIFIED_SOURCES += [ 'endpointer.cc', 'energy_endpointer.cc', @@ -44,10 +49,21 @@ if CONFIG['MOZ_WEBSPEECH_TEST_BACKEND']: 'test/FakeSpeechRecognitionService.cpp', ] +if CONFIG['MOZ_WEBSPEECH_POCKETSPHINX']: + UNIFIED_SOURCES += [ + 'PocketSphinxSpeechRecognitionService.cpp', + ] + LOCAL_INCLUDES += [ '/dom/base', + '/media/sphinxbase', ] +if CONFIG['MOZ_WEBSPEECH_POCKETSPHINX']: + LOCAL_INCLUDES += [ + '/media/pocketsphinx', + ] + include('/ipc/chromium/chromium-config.mozbuild') FINAL_LIBRARY = 'xul' diff --git a/dom/media/webspeech/recognition/nsISpeechRecognitionService.idl b/dom/media/webspeech/recognition/nsISpeechRecognitionService.idl index 7bd3b6173b1d..fa125d766030 100644 --- a/dom/media/webspeech/recognition/nsISpeechRecognitionService.idl +++ b/dom/media/webspeech/recognition/nsISpeechRecognitionService.idl @@ -21,8 +21,8 @@ class SpeechGrammar; native SpeechRecognitionWeakPtr(mozilla::WeakPtr); [ptr] native AudioSegmentPtr(mozilla::AudioSegment); -[ptr] native SpeechGrammarListPtr(mozilla::dom::SpeechGrammarList); [ptr] native SpeechGrammarPtr(mozilla::dom::SpeechGrammar); +[ptr] native SpeechGrammarListPtr(mozilla::dom::SpeechGrammarList); [uuid(374583f0-4507-11e4-a183-164230d1df67)] interface nsISpeechGrammarCompilationCallback : nsISupports { @@ -33,7 +33,7 @@ interface nsISpeechGrammarCompilationCallback : nsISupports { interface nsISpeechRecognitionService : nsISupports { void initialize(in SpeechRecognitionWeakPtr aSpeechRecognition); void processAudioSegment(in AudioSegmentPtr aAudioSegment, in long aSampleRate); - void validateAndSetGrammarList(in SpeechGrammarListPtr aSpeechGramarList, in nsISpeechGrammarCompilationCallback aCallback); + void validateAndSetGrammarList(in SpeechGrammarPtr aSpeechGrammar, in nsISpeechGrammarCompilationCallback aCallback); void soundEnd(); void abort(); }; diff --git a/dom/media/webspeech/recognition/test/FakeSpeechRecognitionService.cpp b/dom/media/webspeech/recognition/test/FakeSpeechRecognitionService.cpp index 58a0ac3e2987..ff36134aa81c 100644 --- a/dom/media/webspeech/recognition/test/FakeSpeechRecognitionService.cpp +++ b/dom/media/webspeech/recognition/test/FakeSpeechRecognitionService.cpp @@ -52,7 +52,7 @@ FakeSpeechRecognitionService::SoundEnd() } NS_IMETHODIMP -FakeSpeechRecognitionService::ValidateAndSetGrammarList(mozilla::dom::SpeechGrammarList*, nsISpeechGrammarCompilationCallback*) +FakeSpeechRecognitionService::ValidateAndSetGrammarList(mozilla::dom::SpeechGrammar*, nsISpeechGrammarCompilationCallback*) { return NS_OK; } diff --git a/dom/media/webspeech/recognition/test/head.js b/dom/media/webspeech/recognition/test/head.js index 85478c99eb50..8ff7d1569e38 100644 --- a/dom/media/webspeech/recognition/test/head.js +++ b/dom/media/webspeech/recognition/test/head.js @@ -38,7 +38,7 @@ function EventManager(sr) { var eventDependencies = { "speechend": "speechstart", - "soundent": "soundstart", + "soundend": "soundstart", "audioend": "audiostart" }; diff --git a/dom/tests/mochitest/general/test_interfaces.html b/dom/tests/mochitest/general/test_interfaces.html index f998207f4c60..cf6674fb063c 100644 --- a/dom/tests/mochitest/general/test_interfaces.html +++ b/dom/tests/mochitest/general/test_interfaces.html @@ -938,6 +938,22 @@ var interfaceNamesInGlobalScope = {name: "SourceBuffer", linux: false, release: false}, // IMPORTANT: Do not change this list without review from a DOM peer! {name: "SourceBufferList", linux: false, release: false}, +// IMPORTANT: Do not change this list without review from a DOM peer! + {name: "SpeechRecognition", b2g: true, nightly: true}, +// IMPORTANT: Do not change this list without review from a DOM peer! + {name: "SpeechRecognitionError", b2g: true, nightly: true}, +// IMPORTANT: Do not change this list without review from a DOM peer! + {name: "SpeechRecognitionAlternative", b2g: true, nightly: true}, +// IMPORTANT: Do not change this list without review from a DOM peer! + {name: "SpeechRecognitionResult", b2g: true, nightly: true}, +// IMPORTANT: Do not change this list without review from a DOM peer! + {name: "SpeechRecognitionResultList", b2g: true, nightly: true}, +// IMPORTANT: Do not change this list without review from a DOM peer! + {name: "SpeechRecognitionEvent", b2g: true, nightly: true}, +// IMPORTANT: Do not change this list without review from a DOM peer! + {name: "SpeechGrammar", b2g: true, nightly: true}, +// IMPORTANT: Do not change this list without review from a DOM peer! + {name: "SpeechGrammarList", b2g: true, nightly: true}, // IMPORTANT: Do not change this list without review from a DOM peer! {name: "SpeechSynthesisEvent", b2g: true}, // IMPORTANT: Do not change this list without review from a DOM peer! diff --git a/dom/webidl/SpeechRecognitionEvent.webidl b/dom/webidl/SpeechRecognitionEvent.webidl index 62003392ac72..a464fcc70d29 100644 --- a/dom/webidl/SpeechRecognitionEvent.webidl +++ b/dom/webidl/SpeechRecognitionEvent.webidl @@ -10,15 +10,15 @@ interface nsISupports; interface SpeechRecognitionEvent : Event { readonly attribute unsigned long resultIndex; - readonly attribute nsISupports? results; - readonly attribute DOMString? interpretation; + readonly attribute SpeechRecognitionResultList? results; + readonly attribute any interpretation; readonly attribute Document? emma; }; dictionary SpeechRecognitionEventInit : EventInit { unsigned long resultIndex = 0; - nsISupports? results = null; - DOMString interpretation = ""; + SpeechRecognitionResultList? results = null; + any interpretation = null; Document? emma = null; }; diff --git a/dom/webidl/SpeechRecognitionResult.webidl b/dom/webidl/SpeechRecognitionResult.webidl index 73e1bb62065f..9fb7bdc6b738 100644 --- a/dom/webidl/SpeechRecognitionResult.webidl +++ b/dom/webidl/SpeechRecognitionResult.webidl @@ -14,5 +14,5 @@ interface SpeechRecognitionResult { readonly attribute unsigned long length; getter SpeechRecognitionAlternative item(unsigned long index); - readonly attribute boolean final; + readonly attribute boolean isFinal; }; diff --git a/layout/build/moz.build b/layout/build/moz.build index e144a964cd84..abe45694ce72 100644 --- a/layout/build/moz.build +++ b/layout/build/moz.build @@ -119,6 +119,7 @@ if CONFIG['MOZ_B2G_BT']: if CONFIG['MOZ_WEBSPEECH']: LOCAL_INCLUDES += [ + '/dom/media/webspeech/recognition', '/dom/media/webspeech/synth', ] diff --git a/layout/build/nsLayoutModule.cpp b/layout/build/nsLayoutModule.cpp index 6d81b4d9ef2d..81a2275a47d8 100644 --- a/layout/build/nsLayoutModule.cpp +++ b/layout/build/nsLayoutModule.cpp @@ -99,6 +99,9 @@ #ifdef MOZ_WEBSPEECH_TEST_BACKEND #include "mozilla/dom/FakeSpeechRecognitionService.h" #endif +#ifdef MOZ_WEBSPEECH_POCKETSPHINX +#include "mozilla/dom/PocketSphinxSpeechRecognitionService.h" +#endif #ifdef MOZ_WEBSPEECH #include "mozilla/dom/nsSynthVoiceRegistry.h" #endif @@ -635,6 +638,9 @@ NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(DataStoreService, DataStoreService::Get #ifdef MOZ_WEBSPEECH_TEST_BACKEND NS_GENERIC_FACTORY_CONSTRUCTOR(FakeSpeechRecognitionService) #endif +#ifdef MOZ_WEBSPEECH_POCKETSPHINX +NS_GENERIC_FACTORY_CONSTRUCTOR(PocketSphinxSpeechRecognitionService) +#endif NS_GENERIC_FACTORY_CONSTRUCTOR(nsCSPContext) NS_GENERIC_FACTORY_CONSTRUCTOR(CSPService) @@ -831,6 +837,9 @@ NS_DEFINE_NAMED_CID(NS_GAMEPAD_TEST_CID); #ifdef MOZ_WEBSPEECH_TEST_BACKEND NS_DEFINE_NAMED_CID(NS_FAKE_SPEECH_RECOGNITION_SERVICE_CID); #endif +#ifdef MOZ_WEBSPEECH_POCKETSPHINX +NS_DEFINE_NAMED_CID(NS_POCKETSPHINX_SPEECH_RECOGNITION_SERVICE_CID); +#endif #ifdef MOZ_WEBSPEECH NS_DEFINE_NAMED_CID(NS_SYNTHVOICEREGISTRY_CID); #endif @@ -1088,6 +1097,9 @@ static const mozilla::Module::CIDEntry kLayoutCIDs[] = { #ifdef MOZ_WEBSPEECH_TEST_BACKEND { &kNS_FAKE_SPEECH_RECOGNITION_SERVICE_CID, false, nullptr, FakeSpeechRecognitionServiceConstructor }, #endif +#ifdef MOZ_WEBSPEECH_POCKETSPHINX + { &kNS_POCKETSPHINX_SPEECH_RECOGNITION_SERVICE_CID, false, nullptr, PocketSphinxSpeechRecognitionServiceConstructor }, +#endif #ifdef MOZ_WEBSPEECH { &kNS_SYNTHVOICEREGISTRY_CID, true, nullptr, nsSynthVoiceRegistryConstructor }, #endif @@ -1252,6 +1264,9 @@ static const mozilla::Module::ContractIDEntry kLayoutContracts[] = { #ifdef MOZ_WEBSPEECH_TEST_BACKEND { NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX "fake", &kNS_FAKE_SPEECH_RECOGNITION_SERVICE_CID }, #endif +#ifdef MOZ_WEBSPEECH_POCKETSPHINX + { NS_SPEECH_RECOGNITION_SERVICE_CONTRACTID_PREFIX "pocketsphinx", &kNS_POCKETSPHINX_SPEECH_RECOGNITION_SERVICE_CID }, +#endif #ifdef MOZ_WEBSPEECH { NS_SYNTHVOICEREGISTRY_CONTRACTID, &kNS_SYNTHVOICEREGISTRY_CID }, #endif