зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1248897 - Introducing an online speech recognition service for Web Speech API r=smaug,pehrsons,padenot
This patch introduces a Speech Recognition Service which interfaces with Mozilla's remote STT endpoint which is currently being used by multiple services Differential Revision: https://phabricator.services.mozilla.com/D26047 --HG-- extra : moz-landing-system : lando
This commit is contained in:
Родитель
1fd2626e6a
Коммит
20834f4fb9
|
@ -0,0 +1,473 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* vim:set ts=2 sw=2 sts=2 et cindent: */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsThreadUtils.h"
|
||||
#include "nsXPCOMCIDInternal.h"
|
||||
#include "OnlineSpeechRecognitionService.h"
|
||||
#include "nsIFile.h"
|
||||
#include "SpeechGrammar.h"
|
||||
#include "SpeechRecognition.h"
|
||||
#include "SpeechRecognitionAlternative.h"
|
||||
#include "SpeechRecognitionResult.h"
|
||||
#include "SpeechRecognitionResultList.h"
|
||||
#include "nsIObserverService.h"
|
||||
#include "mozilla/StaticPrefs_media.h"
|
||||
#include "mozilla/Services.h"
|
||||
#include "nsDirectoryServiceDefs.h"
|
||||
#include "nsDirectoryServiceUtils.h"
|
||||
#include "nsMemory.h"
|
||||
#include "nsNetUtil.h"
|
||||
#include "nsContentUtils.h"
|
||||
#include "nsIPrincipal.h"
|
||||
#include "nsIStreamListener.h"
|
||||
#include "nsIUploadChannel2.h"
|
||||
#include "mozilla/dom/ClientIPCTypes.h"
|
||||
#include "nsStringStream.h"
|
||||
#include "nsIOutputStream.h"
|
||||
#include "nsStreamUtils.h"
|
||||
#include "OpusTrackEncoder.h"
|
||||
#include "OggWriter.h"
|
||||
#include "nsIClassOfService.h"
|
||||
#include <json/json.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
using namespace dom;
|
||||
using namespace std;
|
||||
|
||||
#define PREFERENCE_DEFAULT_RECOGNITION_ENDPOINT \
|
||||
"media.webspeech.service.endpoint"
|
||||
#define DEFAULT_RECOGNITION_ENDPOINT "https://speaktome-2.services.mozilla.com/"
|
||||
#define MAX_LISTENING_TIME_MS 10000
|
||||
|
||||
NS_IMPL_ISUPPORTS(OnlineSpeechRecognitionService, nsISpeechRecognitionService,
|
||||
nsIStreamListener)
|
||||
|
||||
NS_IMETHODIMP
|
||||
OnlineSpeechRecognitionService::OnStartRequest(nsIRequest* aRequest) {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
static nsresult AssignResponseToBuffer(nsIInputStream* aIn, void* aClosure,
|
||||
const char* aFromRawSegment,
|
||||
uint32_t aToOffset, uint32_t aCount,
|
||||
uint32_t* aWriteCount) {
|
||||
nsCString* buf = static_cast<nsCString*>(aClosure);
|
||||
buf->Append(aFromRawSegment, aCount);
|
||||
*aWriteCount = aCount;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
OnlineSpeechRecognitionService::OnDataAvailable(nsIRequest* aRequest,
|
||||
nsIInputStream* aInputStream,
|
||||
uint64_t aOffset,
|
||||
uint32_t aCount) {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
nsresult rv;
|
||||
uint32_t readCount;
|
||||
rv = aInputStream->ReadSegments(AssignResponseToBuffer, &mBuf, aCount,
|
||||
&readCount);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
OnlineSpeechRecognitionService::OnStopRequest(nsIRequest* aRequest,
|
||||
nsresult aStatusCode) {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
|
||||
auto clearBuf = MakeScopeExit([&] { mBuf.Truncate(); });
|
||||
|
||||
if (mAborted) {
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
bool success;
|
||||
float confidence = 0;
|
||||
Json::Value root;
|
||||
Json::CharReaderBuilder builder;
|
||||
bool parsingSuccessful;
|
||||
nsAutoCString result;
|
||||
nsAutoCString hypoValue;
|
||||
nsAutoString errorMsg;
|
||||
SpeechRecognitionErrorCode errorCode;
|
||||
|
||||
SR_LOG("STT Result: %s", mBuf.get());
|
||||
|
||||
if (NS_FAILED(aStatusCode)) {
|
||||
success = false;
|
||||
errorMsg.Assign(NS_LITERAL_STRING("Error connecting to the service."));
|
||||
errorCode = SpeechRecognitionErrorCode::Network;
|
||||
} else {
|
||||
success = true;
|
||||
UniquePtr<Json::CharReader> const reader(builder.newCharReader());
|
||||
parsingSuccessful =
|
||||
reader->parse(mBuf.BeginReading(), mBuf.EndReading(), &root, nullptr);
|
||||
if (!parsingSuccessful) {
|
||||
// there's an internal server error
|
||||
success = false;
|
||||
errorMsg.Assign(NS_LITERAL_STRING("Internal server error"));
|
||||
errorCode = SpeechRecognitionErrorCode::Network;
|
||||
} else {
|
||||
result.Assign(root.get("status", "error").asString().c_str());
|
||||
if (result.EqualsLiteral("ok")) {
|
||||
// ok, we have a result
|
||||
if (!root["data"].empty()) {
|
||||
hypoValue.Assign(root["data"][0].get("text", "").asString().c_str());
|
||||
confidence = root["data"][0].get("confidence", "0").asFloat();
|
||||
} else {
|
||||
success = false;
|
||||
errorMsg.Assign(NS_LITERAL_STRING("Error reading result data."));
|
||||
errorCode = SpeechRecognitionErrorCode::Network;
|
||||
}
|
||||
} else {
|
||||
success = false;
|
||||
NS_ConvertUTF8toUTF16 error(root.get("message", "").asString().c_str());
|
||||
errorMsg.Assign(error);
|
||||
errorCode = SpeechRecognitionErrorCode::No_speech;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!success) {
|
||||
mRecognition->DispatchError(
|
||||
SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR, errorCode, errorMsg);
|
||||
} else {
|
||||
// Declare javascript result events
|
||||
RefPtr<SpeechEvent> event = new SpeechEvent(
|
||||
mRecognition, SpeechRecognition::EVENT_RECOGNITIONSERVICE_FINAL_RESULT);
|
||||
SpeechRecognitionResultList* resultList =
|
||||
new SpeechRecognitionResultList(mRecognition);
|
||||
SpeechRecognitionResult* result = new SpeechRecognitionResult(mRecognition);
|
||||
|
||||
if (mRecognition->MaxAlternatives() > 0) {
|
||||
SpeechRecognitionAlternative* alternative =
|
||||
new SpeechRecognitionAlternative(mRecognition);
|
||||
|
||||
alternative->mTranscript = NS_ConvertUTF8toUTF16(hypoValue);
|
||||
alternative->mConfidence = confidence;
|
||||
|
||||
result->mItems.AppendElement(alternative);
|
||||
}
|
||||
resultList->mItems.AppendElement(result);
|
||||
|
||||
event->mRecognitionResultList = resultList;
|
||||
NS_DispatchToMainThread(event);
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
OnlineSpeechRecognitionService::OnlineSpeechRecognitionService() = default;
|
||||
OnlineSpeechRecognitionService::~OnlineSpeechRecognitionService() = default;
|
||||
|
||||
NS_IMETHODIMP
|
||||
OnlineSpeechRecognitionService::Initialize(
|
||||
WeakPtr<SpeechRecognition> aSpeechRecognition) {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
mWriter = MakeUnique<OggWriter>();
|
||||
mRecognition = new nsMainThreadPtrHolder<SpeechRecognition>(
|
||||
"OnlineSpeechRecognitionService::mRecognition", aSpeechRecognition);
|
||||
mEncodeTaskQueue = mRecognition->GetTaskQueueForEncoding();
|
||||
MOZ_ASSERT(mEncodeTaskQueue);
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
void OnlineSpeechRecognitionService::EncoderDataAvailable() {
|
||||
MOZ_ASSERT(!NS_IsMainThread());
|
||||
nsresult rv;
|
||||
AutoTArray<RefPtr<EncodedFrame>, 4> container;
|
||||
rv = mAudioEncoder->GetEncodedTrack(container);
|
||||
if (NS_WARN_IF(NS_FAILED(rv))) {
|
||||
MOZ_ASSERT_UNREACHABLE();
|
||||
}
|
||||
|
||||
rv = mWriter->WriteEncodedTrack(
|
||||
container,
|
||||
mAudioEncoder->IsEncodingComplete() ? ContainerWriter::END_OF_STREAM : 0);
|
||||
if (NS_WARN_IF(NS_FAILED(rv))) {
|
||||
MOZ_ASSERT_UNREACHABLE();
|
||||
}
|
||||
|
||||
mWriter->GetContainerData(&mEncodedData, mAudioEncoder->IsEncodingComplete()
|
||||
? ContainerWriter::FLUSH_NEEDED
|
||||
: 0);
|
||||
|
||||
if (mAudioEncoder->IsEncodingComplete()) {
|
||||
NS_DispatchToMainThread(
|
||||
NewRunnableMethod("OnlineSpeechRecognitionService::DoSTT", this,
|
||||
&OnlineSpeechRecognitionService::DoSTT));
|
||||
}
|
||||
}
|
||||
|
||||
void OnlineSpeechRecognitionService::EncoderInitialized() {
|
||||
MOZ_ASSERT(!NS_IsMainThread());
|
||||
AutoTArray<RefPtr<TrackMetadataBase>, 1> metadata;
|
||||
metadata.AppendElement(mAudioEncoder->GetMetadata());
|
||||
if (metadata[0]->GetKind() != TrackMetadataBase::METADATA_OPUS) {
|
||||
SR_LOG("wrong meta data type!");
|
||||
MOZ_ASSERT_UNREACHABLE();
|
||||
}
|
||||
|
||||
nsresult rv = mWriter->SetMetadata(metadata);
|
||||
MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
|
||||
|
||||
rv = mWriter->GetContainerData(&mEncodedData, ContainerWriter::GET_HEADER);
|
||||
MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
|
||||
}
|
||||
|
||||
void OnlineSpeechRecognitionService::EncoderError() {
|
||||
MOZ_ASSERT(!NS_IsMainThread());
|
||||
SR_LOG("Error encoding frames.");
|
||||
mEncodedData.Clear();
|
||||
NS_DispatchToMainThread(NS_NewRunnableFunction(
|
||||
"SpeechRecognition::DispatchError",
|
||||
[this, self = RefPtr<OnlineSpeechRecognitionService>(this)]() {
|
||||
if (!mRecognition) {
|
||||
return;
|
||||
}
|
||||
mRecognition->DispatchError(
|
||||
SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
|
||||
SpeechRecognitionErrorCode::Audio_capture,
|
||||
NS_LITERAL_STRING("Encoder error"));
|
||||
}));
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
OnlineSpeechRecognitionService::ProcessAudioSegment(AudioSegment* aAudioSegment,
|
||||
int32_t aSampleRate) {
|
||||
MOZ_ASSERT(!NS_IsMainThread());
|
||||
int64_t duration = aAudioSegment->GetDuration();
|
||||
if (duration <= 0) {
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
if (!mAudioEncoder) {
|
||||
mSpeechEncoderListener = new SpeechEncoderListener(this);
|
||||
mAudioEncoder = MakeAndAddRef<OpusTrackEncoder>(aSampleRate);
|
||||
RefPtr<AbstractThread> mEncoderThread = AbstractThread::GetCurrent();
|
||||
mAudioEncoder->SetWorkerThread(mEncoderThread);
|
||||
mAudioEncoder->RegisterListener(mSpeechEncoderListener);
|
||||
}
|
||||
|
||||
mAudioEncoder->AppendAudioSegment(std::move(*aAudioSegment));
|
||||
|
||||
TimeStamp now = TimeStamp::Now();
|
||||
if (mFirstIteration.IsNull()) {
|
||||
mFirstIteration = now;
|
||||
}
|
||||
|
||||
if ((now - mFirstIteration).ToMilliseconds() >= MAX_LISTENING_TIME_MS) {
|
||||
NS_DispatchToMainThread(NS_NewRunnableFunction(
|
||||
"SpeechRecognition::Stop",
|
||||
[this, self = RefPtr<OnlineSpeechRecognitionService>(this)]() {
|
||||
if (!mRecognition) {
|
||||
return;
|
||||
}
|
||||
mRecognition->Stop();
|
||||
}));
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
void OnlineSpeechRecognitionService::DoSTT() {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
|
||||
if (mAborted) {
|
||||
return;
|
||||
}
|
||||
|
||||
nsresult rv;
|
||||
nsCOMPtr<nsIChannel> chan;
|
||||
nsCOMPtr<nsIURI> uri;
|
||||
nsAutoCString speechRecognitionEndpoint;
|
||||
nsAutoCString prefEndpoint;
|
||||
nsAutoString language;
|
||||
|
||||
Preferences::GetCString(PREFERENCE_DEFAULT_RECOGNITION_ENDPOINT,
|
||||
prefEndpoint);
|
||||
|
||||
if (!prefEndpoint.IsEmpty()) {
|
||||
speechRecognitionEndpoint = prefEndpoint;
|
||||
} else {
|
||||
speechRecognitionEndpoint = DEFAULT_RECOGNITION_ENDPOINT;
|
||||
}
|
||||
|
||||
rv = NS_NewURI(getter_AddRefs(uri), speechRecognitionEndpoint, nullptr,
|
||||
nullptr);
|
||||
if (NS_WARN_IF(NS_FAILED(rv))) {
|
||||
mRecognition->DispatchError(
|
||||
SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
|
||||
SpeechRecognitionErrorCode::Network, NS_LITERAL_STRING("Unknown URI"));
|
||||
return;
|
||||
}
|
||||
|
||||
nsSecurityFlags secFlags = nsILoadInfo::SEC_REQUIRE_CORS_DATA_INHERITS;
|
||||
nsLoadFlags loadFlags =
|
||||
nsIRequest::LOAD_NORMAL | nsIChannel::LOAD_BYPASS_SERVICE_WORKER;
|
||||
nsContentPolicyType contentPolicy =
|
||||
nsContentUtils::InternalContentPolicyTypeToExternal(
|
||||
nsIContentPolicy::TYPE_OTHER);
|
||||
|
||||
nsPIDOMWindowInner* window = mRecognition->GetOwner();
|
||||
if (NS_WARN_IF(!window)) {
|
||||
mRecognition->DispatchError(
|
||||
SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
|
||||
SpeechRecognitionErrorCode::Aborted, NS_LITERAL_STRING("No window"));
|
||||
return;
|
||||
}
|
||||
|
||||
Document* doc = window->GetExtantDoc();
|
||||
if (NS_WARN_IF(!doc)) {
|
||||
mRecognition->DispatchError(
|
||||
SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
|
||||
SpeechRecognitionErrorCode::Aborted, NS_LITERAL_STRING("No document"));
|
||||
}
|
||||
rv = NS_NewChannel(getter_AddRefs(chan), uri, doc->NodePrincipal(), secFlags,
|
||||
contentPolicy, nullptr, nullptr, nullptr, nullptr,
|
||||
loadFlags);
|
||||
if (NS_WARN_IF(NS_FAILED(rv))) {
|
||||
mRecognition->DispatchError(
|
||||
SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
|
||||
SpeechRecognitionErrorCode::Network,
|
||||
NS_LITERAL_STRING("Failed to open channel"));
|
||||
return;
|
||||
}
|
||||
|
||||
nsCOMPtr<nsIHttpChannel> httpChan = do_QueryInterface(chan);
|
||||
if (httpChan) {
|
||||
rv = httpChan->SetRequestMethod(NS_LITERAL_CSTRING("POST"));
|
||||
MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv));
|
||||
}
|
||||
|
||||
if (httpChan) {
|
||||
mRecognition->GetLang(language);
|
||||
// Accept-Language-STT is a custom header of our backend server used to set
|
||||
// the language of the speech sample being submitted by the client
|
||||
rv = httpChan->SetRequestHeader(NS_LITERAL_CSTRING("Accept-Language-STT"),
|
||||
NS_ConvertUTF16toUTF8(language), false);
|
||||
MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv));
|
||||
// Tell the server to not store the transcription by default
|
||||
rv = httpChan->SetRequestHeader(NS_LITERAL_CSTRING("Store-Transcription"),
|
||||
NS_LITERAL_CSTRING("0"), false);
|
||||
MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv));
|
||||
// Tell the server to not store the sample by default
|
||||
rv = httpChan->SetRequestHeader(NS_LITERAL_CSTRING("Store-Sample"),
|
||||
NS_LITERAL_CSTRING("0"), false);
|
||||
MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv));
|
||||
// Set the product tag as teh web speech api
|
||||
rv = httpChan->SetRequestHeader(NS_LITERAL_CSTRING("Product-Tag"),
|
||||
NS_LITERAL_CSTRING("wsa"), false);
|
||||
MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv));
|
||||
}
|
||||
|
||||
nsCOMPtr<nsIClassOfService> cos(do_QueryInterface(chan));
|
||||
if (cos) {
|
||||
cos->AddClassFlags(nsIClassOfService::UrgentStart);
|
||||
}
|
||||
|
||||
nsCOMPtr<nsIUploadChannel2> uploadChan = do_QueryInterface(chan);
|
||||
if (uploadChan) {
|
||||
nsCOMPtr<nsIInputStream> bodyStream;
|
||||
uint32_t length = 0;
|
||||
for (const nsTArray<uint8_t>& chunk : mEncodedData) {
|
||||
length += chunk.Length();
|
||||
}
|
||||
|
||||
nsTArray<uint8_t> audio;
|
||||
if (!audio.SetCapacity(length, fallible)) {
|
||||
mRecognition->DispatchError(
|
||||
SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
|
||||
SpeechRecognitionErrorCode::Audio_capture,
|
||||
NS_LITERAL_STRING("Allocation error"));
|
||||
return;
|
||||
}
|
||||
|
||||
for (const nsTArray<uint8_t>& chunk : mEncodedData) {
|
||||
audio.AppendElements(chunk);
|
||||
}
|
||||
|
||||
mEncodedData.Clear();
|
||||
|
||||
rv = NS_NewByteInputStream(getter_AddRefs(bodyStream), std::move(audio));
|
||||
if (NS_WARN_IF(NS_FAILED(rv))) {
|
||||
mRecognition->DispatchError(
|
||||
SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
|
||||
SpeechRecognitionErrorCode::Network,
|
||||
NS_LITERAL_STRING("Failed to open stream"));
|
||||
return;
|
||||
}
|
||||
if (bodyStream) {
|
||||
rv = uploadChan->ExplicitSetUploadStream(
|
||||
bodyStream, NS_LITERAL_CSTRING("audio/ogg"), length,
|
||||
NS_LITERAL_CSTRING("POST"), false);
|
||||
MOZ_RELEASE_ASSERT(NS_SUCCEEDED(rv));
|
||||
}
|
||||
}
|
||||
|
||||
rv = chan->AsyncOpen(this);
|
||||
if (NS_WARN_IF(NS_FAILED(rv))) {
|
||||
mRecognition->DispatchError(
|
||||
SpeechRecognition::EVENT_RECOGNITIONSERVICE_ERROR,
|
||||
SpeechRecognitionErrorCode::Network,
|
||||
NS_LITERAL_STRING("Internal server error"));
|
||||
}
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
OnlineSpeechRecognitionService::SoundEnd() {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
|
||||
if (!mEncodeTaskQueue) {
|
||||
// Not initialized
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
nsresult rv = mEncodeTaskQueue->Dispatch(NS_NewRunnableFunction(
|
||||
"OnlineSpeechRecognitionService::SoundEnd",
|
||||
[this, self = RefPtr<OnlineSpeechRecognitionService>(this)]() {
|
||||
if (mAudioEncoder) {
|
||||
mAudioEncoder->NotifyEndOfStream();
|
||||
mAudioEncoder->UnregisterListener(mSpeechEncoderListener);
|
||||
mSpeechEncoderListener = nullptr;
|
||||
mAudioEncoder = nullptr;
|
||||
}
|
||||
}));
|
||||
MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
|
||||
Unused << rv;
|
||||
|
||||
mEncodeTaskQueue = nullptr;
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
OnlineSpeechRecognitionService::ValidateAndSetGrammarList(
|
||||
SpeechGrammar* aSpeechGrammar,
|
||||
nsISpeechGrammarCompilationCallback* aCallback) {
|
||||
// This is an online LVCSR (STT) service,
|
||||
// so we don't need to set a grammar
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
OnlineSpeechRecognitionService::Abort() {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
if (mAborted) {
|
||||
return NS_OK;
|
||||
}
|
||||
mAborted = true;
|
||||
return SoundEnd();
|
||||
}
|
||||
} // namespace mozilla
|
|
@ -0,0 +1,133 @@
|
|||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* vim:set ts=2 sw=2 sts=2 et cindent: */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef mozilla_dom_OnlineRecognitionService_h
|
||||
#define mozilla_dom_OnlineRecognitionService_h
|
||||
|
||||
#include "nsCOMPtr.h"
|
||||
#include "nsTArray.h"
|
||||
#include "nsISpeechRecognitionService.h"
|
||||
#include "speex/speex_resampler.h"
|
||||
#include "nsIStreamListener.h"
|
||||
#include "OpusTrackEncoder.h"
|
||||
#include "ContainerWriter.h"
|
||||
|
||||
#define NS_ONLINE_SPEECH_RECOGNITION_SERVICE_CID \
|
||||
{0x0ff5ce56, \
|
||||
0x5b09, \
|
||||
0x4db8, \
|
||||
{0xad, 0xc6, 0x82, 0x66, 0xaf, 0x95, 0xf8, 0x64}};
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
namespace ipc {
|
||||
class PrincipalInfo;
|
||||
} // namespace ipc
|
||||
|
||||
/**
|
||||
* Online implementation of the nsISpeechRecognitionService interface
|
||||
*/
|
||||
class OnlineSpeechRecognitionService : public nsISpeechRecognitionService,
|
||||
public nsIStreamListener {
|
||||
public:
|
||||
// Add XPCOM glue code
|
||||
NS_DECL_THREADSAFE_ISUPPORTS
|
||||
NS_DECL_NSISPEECHRECOGNITIONSERVICE
|
||||
NS_DECL_NSIREQUESTOBSERVER
|
||||
NS_DECL_NSISTREAMLISTENER
|
||||
|
||||
/**
|
||||
* Listener responsible for handling the events raised by the TrackEncoder
|
||||
*/
|
||||
class SpeechEncoderListener : public TrackEncoderListener {
|
||||
public:
|
||||
explicit SpeechEncoderListener(OnlineSpeechRecognitionService* aService)
|
||||
: mService(aService), mOwningThread(AbstractThread::GetCurrent()) {}
|
||||
|
||||
void Initialized(TrackEncoder* aEncoder) override {
|
||||
MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
|
||||
mService->EncoderInitialized();
|
||||
}
|
||||
|
||||
void DataAvailable(TrackEncoder* aEncoder) override {
|
||||
MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
|
||||
mService->EncoderDataAvailable();
|
||||
}
|
||||
|
||||
void Error(TrackEncoder* aEncoder) override {
|
||||
MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
|
||||
mService->EncoderError();
|
||||
}
|
||||
|
||||
private:
|
||||
const RefPtr<OnlineSpeechRecognitionService> mService;
|
||||
const RefPtr<AbstractThread> mOwningThread;
|
||||
};
|
||||
|
||||
/**
|
||||
* Default constructs a OnlineSpeechRecognitionService
|
||||
*/
|
||||
OnlineSpeechRecognitionService();
|
||||
|
||||
/**
|
||||
* Called by SpeechEncoderListener when the AudioTrackEncoder has been
|
||||
* initialized.
|
||||
*/
|
||||
void EncoderInitialized();
|
||||
|
||||
/**
|
||||
* Called by SpeechEncoderListener when the AudioTrackEncoder has encoded
|
||||
* some data for us to pass along.
|
||||
*/
|
||||
void EncoderDataAvailable();
|
||||
|
||||
/**
|
||||
* Called by SpeechEncoderListener when the AudioTrackEncoder has
|
||||
* encountered an error.
|
||||
*/
|
||||
void EncoderError();
|
||||
|
||||
private:
|
||||
/**
|
||||
* Private destructor to prevent bypassing of reference counting
|
||||
*/
|
||||
virtual ~OnlineSpeechRecognitionService();
|
||||
|
||||
/** The associated SpeechRecognition */
|
||||
nsMainThreadPtrHandle<dom::SpeechRecognition> mRecognition;
|
||||
|
||||
/**
|
||||
* Builds a mock SpeechRecognitionResultList
|
||||
*/
|
||||
dom::SpeechRecognitionResultList* BuildMockResultList();
|
||||
|
||||
/**
|
||||
* Method responsible for uploading the audio to the remote endpoint
|
||||
*/
|
||||
void DoSTT();
|
||||
|
||||
// Encoded and packaged ogg audio data
|
||||
nsTArray<nsTArray<uint8_t>> mEncodedData;
|
||||
// Member responsible for holding a reference to the TrackEncoderListener
|
||||
RefPtr<SpeechEncoderListener> mSpeechEncoderListener;
|
||||
// Encoder responsible for encoding the frames from pcm to opus which is the
|
||||
// format supported by our backend
|
||||
RefPtr<AudioTrackEncoder> mAudioEncoder;
|
||||
// Object responsible for wrapping the opus frames into an ogg container
|
||||
UniquePtr<ContainerWriter> mWriter;
|
||||
// Member responsible for storing the json string returned by the endpoint
|
||||
nsCString mBuf;
|
||||
// Used to calculate a ceiling on the time spent listening.
|
||||
TimeStamp mFirstIteration;
|
||||
// flag responsible to control if the user choose to abort
|
||||
bool mAborted = false;
|
||||
// reference to the audio encoder queue
|
||||
RefPtr<TaskQueue> mEncodeTaskQueue;
|
||||
};
|
||||
|
||||
} // namespace mozilla
|
||||
|
||||
#endif
|
|
@ -36,6 +36,11 @@ class SpeechGrammar final : public nsISupports, public nsWrapperCache {
|
|||
static already_AddRefed<SpeechGrammar> Constructor(
|
||||
const GlobalObject& aGlobal);
|
||||
|
||||
static already_AddRefed<SpeechGrammar> WebkitSpeechGrammar(
|
||||
const GlobalObject& aGlobal, ErrorResult& aRv) {
|
||||
return Constructor(aGlobal);
|
||||
}
|
||||
|
||||
void GetSrc(nsString& aRetVal, ErrorResult& aRv) const;
|
||||
|
||||
void SetSrc(const nsAString& aArg, ErrorResult& aRv);
|
||||
|
|
|
@ -35,6 +35,11 @@ class SpeechGrammarList final : public nsISupports, public nsWrapperCache {
|
|||
static already_AddRefed<SpeechGrammarList> Constructor(
|
||||
const GlobalObject& aGlobal);
|
||||
|
||||
static already_AddRefed<SpeechGrammarList> WebkitSpeechGrammarList(
|
||||
const GlobalObject& aGlobal, ErrorResult& aRv) {
|
||||
return Constructor(aGlobal);
|
||||
}
|
||||
|
||||
nsISupports* GetParentObject() const;
|
||||
|
||||
JSObject* WrapObject(JSContext* aCx,
|
||||
|
|
|
@ -19,7 +19,8 @@
|
|||
#include "mozilla/Preferences.h"
|
||||
#include "mozilla/Services.h"
|
||||
#include "mozilla/StaticPrefs_media.h"
|
||||
|
||||
#include "mozilla/AbstractThread.h"
|
||||
#include "VideoUtils.h"
|
||||
#include "AudioSegment.h"
|
||||
#include "MediaEnginePrefs.h"
|
||||
#include "endpointer.h"
|
||||
|
@ -46,17 +47,17 @@ namespace mozilla {
|
|||
namespace dom {
|
||||
|
||||
#define PREFERENCE_DEFAULT_RECOGNITION_SERVICE "media.webspeech.service.default"
|
||||
#define DEFAULT_RECOGNITION_SERVICE_PREFIX "pocketsphinx-"
|
||||
#define DEFAULT_RECOGNITION_SERVICE "pocketsphinx-en-US"
|
||||
#define DEFAULT_RECOGNITION_SERVICE "online"
|
||||
|
||||
#define PREFERENCE_ENDPOINTER_SILENCE_LENGTH "media.webspeech.silence_length"
|
||||
#define PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH \
|
||||
"media.webspeech.long_silence_length"
|
||||
#define PREFERENCE_ENDPOINTER_LONG_SPEECH_LENGTH \
|
||||
"media.webspeech.long_speech_length"
|
||||
#define PREFERENCE_SPEECH_DETECTION_TIMEOUT_MS \
|
||||
"media.webspeech.recognition.timeout"
|
||||
|
||||
static const uint32_t kSAMPLE_RATE = 16000;
|
||||
static const uint32_t kSPEECH_DETECTION_TIMEOUT_MS = 10000;
|
||||
|
||||
// number of frames corresponding to 300ms of audio to send to endpointer while
|
||||
// it's in environment estimation mode
|
||||
|
@ -70,19 +71,39 @@ LogModule* GetSpeechRecognitionLog() {
|
|||
#define SR_LOG(...) \
|
||||
MOZ_LOG(GetSpeechRecognitionLog(), mozilla::LogLevel::Debug, (__VA_ARGS__))
|
||||
|
||||
already_AddRefed<nsISpeechRecognitionService> GetSpeechRecognitionService(
|
||||
const nsAString& aLang) {
|
||||
namespace {
|
||||
class SpeechRecognitionShutdownBlocker : public media::ShutdownBlocker {
|
||||
public:
|
||||
SpeechRecognitionShutdownBlocker(SpeechRecognition* aRecognition,
|
||||
const nsString& aName)
|
||||
: media::ShutdownBlocker(aName), mRecognition(aRecognition) {}
|
||||
|
||||
NS_IMETHOD BlockShutdown(nsIAsyncShutdownClient*) override {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
// AbortSilently will eventually clear the blocker.
|
||||
mRecognition->Abort();
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
private:
|
||||
const RefPtr<SpeechRecognition> mRecognition;
|
||||
};
|
||||
|
||||
enum class ServiceCreationError {
|
||||
ServiceNotFound,
|
||||
};
|
||||
|
||||
Result<nsCOMPtr<nsISpeechRecognitionService>, ServiceCreationError>
|
||||
CreateSpeechRecognitionService(nsPIDOMWindowInner* aWindow,
|
||||
SpeechRecognition* aRecognition,
|
||||
const nsAString& aLang) {
|
||||
nsAutoCString speechRecognitionServiceCID;
|
||||
|
||||
nsAutoCString prefValue;
|
||||
Preferences::GetCString(PREFERENCE_DEFAULT_RECOGNITION_SERVICE, prefValue);
|
||||
nsAutoCString speechRecognitionService;
|
||||
|
||||
if (!aLang.IsEmpty()) {
|
||||
speechRecognitionService =
|
||||
NS_LITERAL_CSTRING(DEFAULT_RECOGNITION_SERVICE_PREFIX) +
|
||||
NS_ConvertUTF16toUTF8(aLang);
|
||||
} else if (!prefValue.IsEmpty()) {
|
||||
if (!prefValue.IsEmpty()) {
|
||||
speechRecognitionService = prefValue;
|
||||
} else {
|
||||
speechRecognitionService = DEFAULT_RECOGNITION_SERVICE;
|
||||
|
@ -99,27 +120,15 @@ already_AddRefed<nsISpeechRecognitionService> GetSpeechRecognitionService(
|
|||
|
||||
nsresult rv;
|
||||
nsCOMPtr<nsISpeechRecognitionService> recognitionService;
|
||||
recognitionService = do_GetService(speechRecognitionServiceCID.get(), &rv);
|
||||
return recognitionService.forget();
|
||||
}
|
||||
|
||||
class SpeechRecognitionShutdownBlocker : public media::ShutdownBlocker {
|
||||
public:
|
||||
explicit SpeechRecognitionShutdownBlocker(SpeechRecognition* aRecognition)
|
||||
: media::ShutdownBlocker(NS_LITERAL_STRING("SpeechRecognition shutdown")),
|
||||
mRecognition(aRecognition) {}
|
||||
|
||||
NS_IMETHOD BlockShutdown(nsIAsyncShutdownClient*) override {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
|
||||
// AbortSilently will eventually clear the blocker.
|
||||
mRecognition->Abort();
|
||||
return NS_OK;
|
||||
recognitionService =
|
||||
do_CreateInstance(speechRecognitionServiceCID.get(), &rv);
|
||||
if (!recognitionService) {
|
||||
return Err(ServiceCreationError::ServiceNotFound);
|
||||
}
|
||||
|
||||
private:
|
||||
const RefPtr<SpeechRecognition> mRecognition;
|
||||
};
|
||||
return recognitionService;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
NS_IMPL_CYCLE_COLLECTION_INHERITED(SpeechRecognition, DOMEventTargetHelper,
|
||||
mStream, mTrack, mRecognitionService,
|
||||
|
@ -137,7 +146,8 @@ SpeechRecognition::SpeechRecognition(nsPIDOMWindowInner* aOwnerWindow)
|
|||
mEndpointer(kSAMPLE_RATE),
|
||||
mAudioSamplesPerChunk(mEndpointer.FrameSize()),
|
||||
mSpeechDetectionTimer(NS_NewTimer()),
|
||||
mSpeechGrammarList(new SpeechGrammarList(GetParentObject())),
|
||||
mSpeechGrammarList(new SpeechGrammarList(GetOwner())),
|
||||
mContinuous(false),
|
||||
mInterimResults(false),
|
||||
mMaxAlternatives(1) {
|
||||
SR_LOG("created SpeechRecognition");
|
||||
|
@ -154,6 +164,10 @@ SpeechRecognition::SpeechRecognition(nsPIDOMWindowInner* aOwnerWindow)
|
|||
Preferences::GetInt(PREFERENCE_ENDPOINTER_LONG_SILENCE_LENGTH, 2500000));
|
||||
mEndpointer.set_long_speech_length(
|
||||
Preferences::GetInt(PREFERENCE_ENDPOINTER_SILENCE_LENGTH, 3 * 1000000));
|
||||
|
||||
mSpeechDetectionTimeoutMs =
|
||||
Preferences::GetInt(PREFERENCE_SPEECH_DETECTION_TIMEOUT_MS, 10000);
|
||||
|
||||
Reset();
|
||||
}
|
||||
|
||||
|
@ -211,8 +225,6 @@ already_AddRefed<SpeechRecognition> SpeechRecognition::Constructor(
|
|||
return object.forget();
|
||||
}
|
||||
|
||||
nsISupports* SpeechRecognition::GetParentObject() const { return GetOwner(); }
|
||||
|
||||
void SpeechRecognition::ProcessEvent(SpeechEvent* aEvent) {
|
||||
SR_LOG("Processing %s, current state is %s", GetName(aEvent),
|
||||
GetName(mCurrentState));
|
||||
|
@ -245,8 +257,8 @@ void SpeechRecognition::Transition(SpeechEvent* aEvent) {
|
|||
case EVENT_RECOGNITIONSERVICE_ERROR:
|
||||
AbortError(aEvent);
|
||||
break;
|
||||
case EVENT_COUNT:
|
||||
MOZ_CRASH("Invalid event EVENT_COUNT");
|
||||
default:
|
||||
MOZ_CRASH("Invalid event");
|
||||
}
|
||||
break;
|
||||
case STATE_STARTING:
|
||||
|
@ -262,7 +274,7 @@ void SpeechRecognition::Transition(SpeechEvent* aEvent) {
|
|||
AbortSilently(aEvent);
|
||||
break;
|
||||
case EVENT_STOP:
|
||||
Reset();
|
||||
ResetAndEnd();
|
||||
break;
|
||||
case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
|
||||
case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
|
||||
|
@ -271,8 +283,8 @@ void SpeechRecognition::Transition(SpeechEvent* aEvent) {
|
|||
case EVENT_START:
|
||||
SR_LOG("STATE_STARTING: Unhandled event %s", GetName(aEvent));
|
||||
MOZ_CRASH();
|
||||
case EVENT_COUNT:
|
||||
MOZ_CRASH("Invalid event EVENT_COUNT");
|
||||
default:
|
||||
MOZ_CRASH("Invalid event");
|
||||
}
|
||||
break;
|
||||
case STATE_ESTIMATING:
|
||||
|
@ -297,8 +309,8 @@ void SpeechRecognition::Transition(SpeechEvent* aEvent) {
|
|||
case EVENT_START:
|
||||
SR_LOG("STATE_ESTIMATING: Unhandled event %d", aEvent->mType);
|
||||
MOZ_CRASH();
|
||||
case EVENT_COUNT:
|
||||
MOZ_CRASH("Invalid event EVENT_COUNT");
|
||||
default:
|
||||
MOZ_CRASH("Invalid event");
|
||||
}
|
||||
break;
|
||||
case STATE_WAITING_FOR_SPEECH:
|
||||
|
@ -323,8 +335,8 @@ void SpeechRecognition::Transition(SpeechEvent* aEvent) {
|
|||
case EVENT_START:
|
||||
SR_LOG("STATE_STARTING: Unhandled event %s", GetName(aEvent));
|
||||
MOZ_CRASH();
|
||||
case EVENT_COUNT:
|
||||
MOZ_CRASH("Invalid event EVENT_COUNT");
|
||||
default:
|
||||
MOZ_CRASH("Invalid event");
|
||||
}
|
||||
break;
|
||||
case STATE_RECOGNIZING:
|
||||
|
@ -349,8 +361,8 @@ void SpeechRecognition::Transition(SpeechEvent* aEvent) {
|
|||
case EVENT_START:
|
||||
SR_LOG("STATE_RECOGNIZING: Unhandled aEvent %s", GetName(aEvent));
|
||||
MOZ_CRASH();
|
||||
case EVENT_COUNT:
|
||||
MOZ_CRASH("Invalid event EVENT_COUNT");
|
||||
default:
|
||||
MOZ_CRASH("Invalid event");
|
||||
}
|
||||
break;
|
||||
case STATE_WAITING_FOR_RESULT:
|
||||
|
@ -376,12 +388,30 @@ void SpeechRecognition::Transition(SpeechEvent* aEvent) {
|
|||
SR_LOG("STATE_WAITING_FOR_RESULT: Unhandled aEvent %s",
|
||||
GetName(aEvent));
|
||||
MOZ_CRASH();
|
||||
case EVENT_COUNT:
|
||||
MOZ_CRASH("Invalid event EVENT_COUNT");
|
||||
default:
|
||||
MOZ_CRASH("Invalid event");
|
||||
}
|
||||
break;
|
||||
case STATE_COUNT:
|
||||
MOZ_CRASH("Invalid state STATE_COUNT");
|
||||
case STATE_ABORTING:
|
||||
switch (aEvent->mType) {
|
||||
case EVENT_STOP:
|
||||
case EVENT_ABORT:
|
||||
case EVENT_AUDIO_DATA:
|
||||
case EVENT_AUDIO_ERROR:
|
||||
case EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT:
|
||||
case EVENT_RECOGNITIONSERVICE_FINAL_RESULT:
|
||||
case EVENT_RECOGNITIONSERVICE_ERROR:
|
||||
DoNothing(aEvent);
|
||||
break;
|
||||
case EVENT_START:
|
||||
SR_LOG("STATE_ABORTING: Unhandled aEvent %s", GetName(aEvent));
|
||||
MOZ_CRASH();
|
||||
default:
|
||||
MOZ_CRASH("Invalid event");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("Invalid state");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -400,7 +430,17 @@ uint32_t SpeechRecognition::ProcessAudioSegment(AudioSegment* aSegment,
|
|||
iterator.Next();
|
||||
}
|
||||
|
||||
mRecognitionService->ProcessAudioSegment(aSegment, aTrackRate);
|
||||
// we need to call the nsISpeechRecognitionService::ProcessAudioSegment
|
||||
// in a separate thread so that any eventual encoding or pre-processing
|
||||
// of the audio does not block the main thread
|
||||
nsresult rv = mEncodeTaskQueue->Dispatch(
|
||||
NewRunnableMethod<StoreCopyPassByPtr<AudioSegment>, TrackRate>(
|
||||
"nsISpeechRecognitionService::ProcessAudioSegment",
|
||||
mRecognitionService,
|
||||
&nsISpeechRecognitionService::ProcessAudioSegment,
|
||||
std::move(*aSegment), aTrackRate));
|
||||
MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
|
||||
Unused << rv;
|
||||
return samples;
|
||||
}
|
||||
|
||||
|
@ -421,7 +461,19 @@ uint32_t SpeechRecognition::ProcessAudioSegment(AudioSegment* aSegment,
|
|||
|
||||
void SpeechRecognition::Reset() {
|
||||
SetState(STATE_IDLE);
|
||||
|
||||
// This breaks potential ref-cycles.
|
||||
mRecognitionService = nullptr;
|
||||
|
||||
++mStreamGeneration;
|
||||
if (mStream) {
|
||||
mStream->UnregisterTrackListener(this);
|
||||
mStream = nullptr;
|
||||
}
|
||||
mTrack = nullptr;
|
||||
mTrackIsOwned = false;
|
||||
mStopRecordingPromise = nullptr;
|
||||
mEncodeTaskQueue = nullptr;
|
||||
mEstimationSamples = 0;
|
||||
mBufferedSamples = 0;
|
||||
mSpeechDetectionTimer->Cancel();
|
||||
|
@ -454,7 +506,12 @@ void SpeechRecognition::StopRecordingAndRecognize(SpeechEvent* aEvent) {
|
|||
SetState(STATE_WAITING_FOR_RESULT);
|
||||
|
||||
MOZ_ASSERT(mRecognitionService, "Service deleted before recording done");
|
||||
mRecognitionService->SoundEnd();
|
||||
|
||||
// This will run SoundEnd on the service just before StopRecording begins
|
||||
// shutting the encode thread down.
|
||||
mSpeechListener->mRemovedPromise->Then(
|
||||
GetCurrentThreadSerialEventTarget(), __func__,
|
||||
[service = mRecognitionService] { service->SoundEnd(); });
|
||||
|
||||
StopRecording();
|
||||
}
|
||||
|
@ -518,14 +575,23 @@ void SpeechRecognition::DoNothing(SpeechEvent* aEvent) {}
|
|||
|
||||
void SpeechRecognition::AbortSilently(SpeechEvent* aEvent) {
|
||||
if (mRecognitionService) {
|
||||
mRecognitionService->Abort();
|
||||
if (mTrack) {
|
||||
// This will run Abort on the service just before StopRecording begins
|
||||
// shutting the encode thread down.
|
||||
mSpeechListener->mRemovedPromise->Then(
|
||||
GetCurrentThreadSerialEventTarget(), __func__,
|
||||
[service = mRecognitionService] { service->Abort(); });
|
||||
} else {
|
||||
// Recording hasn't started yet. We can just call Abort().
|
||||
mRecognitionService->Abort();
|
||||
}
|
||||
}
|
||||
|
||||
if (mTrack) {
|
||||
StopRecording();
|
||||
}
|
||||
StopRecording()->Then(
|
||||
GetCurrentThreadSerialEventTarget(), __func__,
|
||||
[self = RefPtr<SpeechRecognition>(this), this] { ResetAndEnd(); });
|
||||
|
||||
ResetAndEnd();
|
||||
SetState(STATE_ABORTING);
|
||||
}
|
||||
|
||||
void SpeechRecognition::AbortError(SpeechEvent* aEvent) {
|
||||
|
@ -544,54 +610,83 @@ void SpeechRecognition::NotifyError(SpeechEvent* aEvent) {
|
|||
**************************************/
|
||||
NS_IMETHODIMP
|
||||
SpeechRecognition::StartRecording(RefPtr<AudioStreamTrack>& aTrack) {
|
||||
// hold a reference so that the underlying track
|
||||
// doesn't get Destroy()'ed
|
||||
// hold a reference so that the underlying track doesn't get collected.
|
||||
mTrack = aTrack;
|
||||
MOZ_ASSERT(!mTrack->Ended());
|
||||
|
||||
if (NS_WARN_IF(mTrack->Ended())) {
|
||||
return NS_ERROR_UNEXPECTED;
|
||||
}
|
||||
mSpeechListener = new SpeechTrackListener(this);
|
||||
mTrack->AddListener(mSpeechListener);
|
||||
|
||||
mShutdownBlocker = MakeAndAddRef<SpeechRecognitionShutdownBlocker>(this);
|
||||
nsString blockerName;
|
||||
blockerName.AppendPrintf("SpeechRecognition %p shutdown", this);
|
||||
mShutdownBlocker =
|
||||
MakeAndAddRef<SpeechRecognitionShutdownBlocker>(this, blockerName);
|
||||
RefPtr<nsIAsyncShutdownClient> shutdown = media::GetShutdownBarrier();
|
||||
shutdown->AddBlocker(mShutdownBlocker, NS_LITERAL_STRING(__FILE__), __LINE__,
|
||||
NS_LITERAL_STRING("SpeechRecognition shutdown"));
|
||||
|
||||
mEndpointer.StartSession();
|
||||
|
||||
return mSpeechDetectionTimer->Init(this, kSPEECH_DETECTION_TIMEOUT_MS,
|
||||
return mSpeechDetectionTimer->Init(this, mSpeechDetectionTimeoutMs,
|
||||
nsITimer::TYPE_ONE_SHOT);
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
SpeechRecognition::StopRecording() {
|
||||
if (mShutdownBlocker) {
|
||||
// Block shutdown until the speech track listener has been removed from the
|
||||
// MTG, as it holds a reference to us, and we reference the world, which we
|
||||
// don't want to leak.
|
||||
mSpeechListener->mRemovedPromise->Then(
|
||||
GetCurrentThreadSerialEventTarget(), __func__,
|
||||
[blocker = std::move(mShutdownBlocker)] {
|
||||
RefPtr<nsIAsyncShutdownClient> shutdown = media::GetShutdownBarrier();
|
||||
nsresult rv = shutdown->RemoveBlocker(blocker);
|
||||
MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
|
||||
Unused << rv;
|
||||
});
|
||||
RefPtr<GenericNonExclusivePromise> SpeechRecognition::StopRecording() {
|
||||
if (!mTrack) {
|
||||
// Recording wasn't started, or has already been stopped.
|
||||
if (mStream) {
|
||||
// Ensure we don't start recording because a track became available
|
||||
// before we get reset.
|
||||
mStream->UnregisterTrackListener(this);
|
||||
}
|
||||
return GenericNonExclusivePromise::CreateAndResolve(true, __func__);
|
||||
}
|
||||
|
||||
if (mStopRecordingPromise) {
|
||||
return mStopRecordingPromise;
|
||||
}
|
||||
MOZ_ASSERT(!mShutdownBlocker);
|
||||
|
||||
mStream->UnregisterTrackListener(this);
|
||||
mTrack->RemoveListener(mSpeechListener);
|
||||
mStream = nullptr;
|
||||
mSpeechListener = nullptr;
|
||||
mTrack = nullptr;
|
||||
if (mTrackIsOwned) {
|
||||
mTrack->Stop();
|
||||
}
|
||||
|
||||
mEndpointer.EndSession();
|
||||
DispatchTrustedEvent(NS_LITERAL_STRING("audioend"));
|
||||
|
||||
return NS_OK;
|
||||
// Block shutdown until the speech track listener has been removed from the
|
||||
// MSG, as it holds a reference to us, and we reference the world, which we
|
||||
// don't want to leak.
|
||||
mStopRecordingPromise =
|
||||
mSpeechListener->mRemovedPromise
|
||||
->Then(
|
||||
GetCurrentThreadSerialEventTarget(), __func__,
|
||||
[self = RefPtr<SpeechRecognition>(this), this] {
|
||||
SR_LOG("Shutting down encoding thread");
|
||||
return mEncodeTaskQueue->BeginShutdown();
|
||||
},
|
||||
[] {
|
||||
MOZ_CRASH("Unexpected rejection");
|
||||
return ShutdownPromise::CreateAndResolve(false, __func__);
|
||||
})
|
||||
->Then(
|
||||
GetCurrentThreadSerialEventTarget(), __func__,
|
||||
[self = RefPtr<SpeechRecognition>(this), this] {
|
||||
RefPtr<nsIAsyncShutdownClient> shutdown =
|
||||
media::GetShutdownBarrier();
|
||||
shutdown->RemoveBlocker(mShutdownBlocker);
|
||||
mShutdownBlocker = nullptr;
|
||||
|
||||
MOZ_DIAGNOSTIC_ASSERT(mCurrentState != STATE_IDLE);
|
||||
return GenericNonExclusivePromise::CreateAndResolve(true,
|
||||
__func__);
|
||||
},
|
||||
[] {
|
||||
MOZ_CRASH("Unexpected rejection");
|
||||
return GenericNonExclusivePromise::CreateAndResolve(false,
|
||||
__func__);
|
||||
});
|
||||
return mStopRecordingPromise;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
|
@ -648,12 +743,11 @@ void SpeechRecognition::GetLang(nsString& aRetVal) const { aRetVal = mLang; }
|
|||
void SpeechRecognition::SetLang(const nsAString& aArg) { mLang = aArg; }
|
||||
|
||||
bool SpeechRecognition::GetContinuous(ErrorResult& aRv) const {
|
||||
aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
|
||||
return false;
|
||||
return mContinuous;
|
||||
}
|
||||
|
||||
void SpeechRecognition::SetContinuous(bool aArg, ErrorResult& aRv) {
|
||||
aRv.Throw(NS_ERROR_NOT_IMPLEMENTED);
|
||||
mContinuous = aArg;
|
||||
}
|
||||
|
||||
bool SpeechRecognition::InterimResults() const { return mInterimResults; }
|
||||
|
@ -690,6 +784,10 @@ void SpeechRecognition::Start(const Optional<NonNull<DOMMediaStream>>& aStream,
|
|||
return;
|
||||
}
|
||||
|
||||
mEncodeTaskQueue = MakeAndAddRef<TaskQueue>(
|
||||
GetMediaThreadPool(MediaThreadType::WEBRTC_DECODER),
|
||||
"WebSpeechEncoderThread");
|
||||
|
||||
nsresult rv;
|
||||
rv = mRecognitionService->Initialize(this);
|
||||
if (NS_WARN_IF(NS_FAILED(rv))) {
|
||||
|
@ -701,6 +799,7 @@ void SpeechRecognition::Start(const Optional<NonNull<DOMMediaStream>>& aStream,
|
|||
|
||||
if (aStream.WasPassed()) {
|
||||
mStream = &aStream.Value();
|
||||
mTrackIsOwned = false;
|
||||
mStream->RegisterTrackListener(this);
|
||||
nsTArray<RefPtr<AudioStreamTrack>> tracks;
|
||||
mStream->GetAudioTracks(tracks);
|
||||
|
@ -711,24 +810,40 @@ void SpeechRecognition::Start(const Optional<NonNull<DOMMediaStream>>& aStream,
|
|||
}
|
||||
}
|
||||
} else {
|
||||
mTrackIsOwned = true;
|
||||
AutoNoJSAPI nojsapi;
|
||||
RefPtr<SpeechRecognition> self(this);
|
||||
MediaManager::Get()
|
||||
->GetUserMedia(GetOwner(), constraints, aCallerType)
|
||||
->Then(
|
||||
GetCurrentThreadSerialEventTarget(), __func__,
|
||||
[this, self](RefPtr<DOMMediaStream>&& aStream) {
|
||||
[this, self,
|
||||
generation = mStreamGeneration](RefPtr<DOMMediaStream>&& aStream) {
|
||||
nsTArray<RefPtr<AudioStreamTrack>> tracks;
|
||||
aStream->GetAudioTracks(tracks);
|
||||
if (mAborted || mCurrentState != STATE_STARTING ||
|
||||
mStreamGeneration != generation) {
|
||||
// We were probably aborted. Exit early.
|
||||
for (const RefPtr<AudioStreamTrack>& track : tracks) {
|
||||
track->Stop();
|
||||
}
|
||||
return;
|
||||
}
|
||||
mStream = std::move(aStream);
|
||||
mStream->RegisterTrackListener(this);
|
||||
nsTArray<RefPtr<AudioStreamTrack>> tracks;
|
||||
mStream->GetAudioTracks(tracks);
|
||||
for (const RefPtr<AudioStreamTrack>& track : tracks) {
|
||||
if (!track->Ended()) {
|
||||
NotifyTrackAdded(track);
|
||||
}
|
||||
}
|
||||
},
|
||||
[this, self](RefPtr<MediaMgrError>&& error) {
|
||||
[this, self,
|
||||
generation = mStreamGeneration](RefPtr<MediaMgrError>&& error) {
|
||||
if (mAborted || mCurrentState != STATE_STARTING ||
|
||||
mStreamGeneration != generation) {
|
||||
// We were probably aborted. Exit early.
|
||||
return;
|
||||
}
|
||||
SpeechRecognitionErrorCode errorCode;
|
||||
|
||||
if (error->mName == MediaMgrError::Name::NotAllowedError) {
|
||||
|
@ -746,44 +861,47 @@ void SpeechRecognition::Start(const Optional<NonNull<DOMMediaStream>>& aStream,
|
|||
}
|
||||
|
||||
bool SpeechRecognition::SetRecognitionService(ErrorResult& aRv) {
|
||||
if (!GetOwner()) {
|
||||
aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
|
||||
return false;
|
||||
}
|
||||
|
||||
// See:
|
||||
// https://dvcs.w3.org/hg/speech-api/raw-file/tip/webspeechapi.html#dfn-lang
|
||||
nsAutoString lang;
|
||||
if (!mLang.IsEmpty()) {
|
||||
mRecognitionService = GetSpeechRecognitionService(mLang);
|
||||
|
||||
if (!mRecognitionService) {
|
||||
lang = mLang;
|
||||
} else {
|
||||
nsCOMPtr<Document> document = GetOwner()->GetExtantDoc();
|
||||
if (!document) {
|
||||
aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
|
||||
return false;
|
||||
}
|
||||
nsCOMPtr<Element> element = document->GetRootElement();
|
||||
if (!element) {
|
||||
aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
nsAutoString lang;
|
||||
element->GetLang(lang);
|
||||
}
|
||||
|
||||
nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
|
||||
if (!window) {
|
||||
aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
|
||||
return false;
|
||||
}
|
||||
nsCOMPtr<Document> document = window->GetExtantDoc();
|
||||
if (!document) {
|
||||
aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
|
||||
return false;
|
||||
}
|
||||
nsCOMPtr<Element> element = document->GetRootElement();
|
||||
if (!element) {
|
||||
aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
|
||||
return false;
|
||||
}
|
||||
|
||||
nsAutoString lang;
|
||||
element->GetLang(lang);
|
||||
mRecognitionService = GetSpeechRecognitionService(lang);
|
||||
|
||||
if (!mRecognitionService) {
|
||||
aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
|
||||
auto result = CreateSpeechRecognitionService(GetOwner(), this, lang);
|
||||
|
||||
if (result.isErr()) {
|
||||
switch (result.unwrapErr()) {
|
||||
case ServiceCreationError::ServiceNotFound:
|
||||
aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
|
||||
break;
|
||||
default:
|
||||
MOZ_CRASH("Unknown error");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
mRecognitionService = result.unwrap();
|
||||
MOZ_DIAGNOSTIC_ASSERT(mRecognitionService);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -794,11 +912,6 @@ bool SpeechRecognition::ValidateAndSetGrammarList(ErrorResult& aRv) {
|
|||
}
|
||||
|
||||
uint32_t grammarListLength = mSpeechGrammarList->Length();
|
||||
if (0 == grammarListLength) {
|
||||
aRv.Throw(NS_ERROR_DOM_INVALID_STATE_ERR);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (uint32_t count = 0; count < grammarListLength; ++count) {
|
||||
RefPtr<SpeechGrammar> speechGrammar = mSpeechGrammarList->Item(count, aRv);
|
||||
if (aRv.Failed()) {
|
||||
|
@ -825,6 +938,7 @@ void SpeechRecognition::Abort() {
|
|||
}
|
||||
|
||||
mAborted = true;
|
||||
|
||||
RefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_ABORT);
|
||||
NS_DispatchToMainThread(event);
|
||||
}
|
||||
|
@ -874,14 +988,13 @@ void SpeechRecognition::DispatchError(EventType aErrorType,
|
|||
uint32_t SpeechRecognition::FillSamplesBuffer(const int16_t* aSamples,
|
||||
uint32_t aSampleCount) {
|
||||
MOZ_ASSERT(mBufferedSamples < mAudioSamplesPerChunk);
|
||||
MOZ_ASSERT(mAudioSamplesBuffer.get());
|
||||
MOZ_ASSERT(mAudioSamplesBuffer);
|
||||
|
||||
int16_t* samplesBuffer = static_cast<int16_t*>(mAudioSamplesBuffer->Data());
|
||||
size_t samplesToCopy =
|
||||
std::min(aSampleCount, mAudioSamplesPerChunk - mBufferedSamples);
|
||||
|
||||
memcpy(samplesBuffer + mBufferedSamples, aSamples,
|
||||
samplesToCopy * sizeof(int16_t));
|
||||
PodCopy(samplesBuffer + mBufferedSamples, aSamples, samplesToCopy);
|
||||
|
||||
mBufferedSamples += samplesToCopy;
|
||||
return samplesToCopy;
|
||||
|
@ -903,8 +1016,8 @@ uint32_t SpeechRecognition::SplitSamplesBuffer(
|
|||
RefPtr<SharedBuffer> chunk =
|
||||
SharedBuffer::Create(mAudioSamplesPerChunk * sizeof(int16_t));
|
||||
|
||||
memcpy(chunk->Data(), aSamplesBuffer + chunkStart,
|
||||
mAudioSamplesPerChunk * sizeof(int16_t));
|
||||
PodCopy(static_cast<short*>(chunk->Data()), aSamplesBuffer + chunkStart,
|
||||
mAudioSamplesPerChunk);
|
||||
|
||||
aResult.AppendElement(chunk.forget());
|
||||
chunkStart += mAudioSamplesPerChunk;
|
||||
|
@ -987,6 +1100,7 @@ const char* SpeechRecognition::GetName(FSMState aId) {
|
|||
"STATE_IDLE", "STATE_STARTING",
|
||||
"STATE_ESTIMATING", "STATE_WAITING_FOR_SPEECH",
|
||||
"STATE_RECOGNIZING", "STATE_WAITING_FOR_RESULT",
|
||||
"STATE_ABORTING",
|
||||
};
|
||||
|
||||
MOZ_ASSERT(aId < STATE_COUNT);
|
||||
|
@ -1009,6 +1123,11 @@ const char* SpeechRecognition::GetName(SpeechEvent* aEvent) {
|
|||
return names[aEvent->mType];
|
||||
}
|
||||
|
||||
TaskQueue* SpeechRecognition::GetTaskQueueForEncoding() const {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
return mEncodeTaskQueue;
|
||||
}
|
||||
|
||||
SpeechEvent::SpeechEvent(SpeechRecognition* aRecognition,
|
||||
SpeechRecognition::EventType aType)
|
||||
: Runnable("dom::SpeechEvent"),
|
||||
|
|
|
@ -32,6 +32,10 @@
|
|||
|
||||
namespace mozilla {
|
||||
|
||||
namespace media {
|
||||
class ShutdownBlocker;
|
||||
}
|
||||
|
||||
namespace dom {
|
||||
|
||||
#define SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC \
|
||||
|
@ -40,7 +44,6 @@ namespace dom {
|
|||
|
||||
class GlobalObject;
|
||||
class AudioStreamTrack;
|
||||
class SpeechRecognitionShutdownBlocker;
|
||||
class SpeechEvent;
|
||||
class SpeechTrackListener;
|
||||
|
||||
|
@ -62,8 +65,6 @@ class SpeechRecognition final : public DOMEventTargetHelper,
|
|||
|
||||
NS_DECL_NSIOBSERVER
|
||||
|
||||
nsISupports* GetParentObject() const;
|
||||
|
||||
JSObject* WrapObject(JSContext* aCx,
|
||||
JS::Handle<JSObject*> aGivenProto) override;
|
||||
|
||||
|
@ -72,6 +73,11 @@ class SpeechRecognition final : public DOMEventTargetHelper,
|
|||
static already_AddRefed<SpeechRecognition> Constructor(
|
||||
const GlobalObject& aGlobal, ErrorResult& aRv);
|
||||
|
||||
static already_AddRefed<SpeechRecognition> WebkitSpeechRecognition(
|
||||
const GlobalObject& aGlobal, ErrorResult& aRv) {
|
||||
return Constructor(aGlobal, aRv);
|
||||
}
|
||||
|
||||
already_AddRefed<SpeechGrammarList> Grammars() const;
|
||||
|
||||
void SetGrammars(mozilla::dom::SpeechGrammarList& aArg);
|
||||
|
@ -90,6 +96,8 @@ class SpeechRecognition final : public DOMEventTargetHelper,
|
|||
|
||||
uint32_t MaxAlternatives() const;
|
||||
|
||||
TaskQueue* GetTaskQueueForEncoding() const;
|
||||
|
||||
void SetMaxAlternatives(uint32_t aArg);
|
||||
|
||||
void GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const;
|
||||
|
@ -153,6 +161,7 @@ class SpeechRecognition final : public DOMEventTargetHelper,
|
|||
STATE_WAITING_FOR_SPEECH,
|
||||
STATE_RECOGNIZING,
|
||||
STATE_WAITING_FOR_RESULT,
|
||||
STATE_ABORTING,
|
||||
STATE_COUNT
|
||||
};
|
||||
|
||||
|
@ -163,7 +172,7 @@ class SpeechRecognition final : public DOMEventTargetHelper,
|
|||
bool ValidateAndSetGrammarList(ErrorResult& aRv);
|
||||
|
||||
NS_IMETHOD StartRecording(RefPtr<AudioStreamTrack>& aDOMStream);
|
||||
NS_IMETHOD StopRecording();
|
||||
RefPtr<GenericNonExclusivePromise> StopRecording();
|
||||
|
||||
uint32_t ProcessAudioSegment(AudioSegment* aSegment, TrackRate aTrackRate);
|
||||
void NotifyError(SpeechEvent* aEvent);
|
||||
|
@ -186,9 +195,19 @@ class SpeechRecognition final : public DOMEventTargetHelper,
|
|||
|
||||
RefPtr<DOMMediaStream> mStream;
|
||||
RefPtr<AudioStreamTrack> mTrack;
|
||||
bool mTrackIsOwned = false;
|
||||
RefPtr<GenericNonExclusivePromise> mStopRecordingPromise;
|
||||
RefPtr<SpeechTrackListener> mSpeechListener;
|
||||
RefPtr<SpeechRecognitionShutdownBlocker> mShutdownBlocker;
|
||||
nsCOMPtr<nsISpeechRecognitionService> mRecognitionService;
|
||||
RefPtr<media::ShutdownBlocker> mShutdownBlocker;
|
||||
// TaskQueue responsible for pre-processing the samples by the service
|
||||
// it runs in a separate thread from the main thread
|
||||
RefPtr<TaskQueue> mEncodeTaskQueue;
|
||||
|
||||
// A generation ID of the MediaStream a started session is for, so that
|
||||
// a gUM request that resolves after the session has stopped, and a new
|
||||
// one has started, can exit early. Main thread only. Can wrap.
|
||||
uint8_t mStreamGeneration = 0;
|
||||
|
||||
FSMState mCurrentState;
|
||||
|
||||
|
@ -197,6 +216,10 @@ class SpeechRecognition final : public DOMEventTargetHelper,
|
|||
|
||||
uint32_t mAudioSamplesPerChunk;
|
||||
|
||||
// maximum amount of seconds the engine will wait for voice
|
||||
// until returning a 'no speech detected' error
|
||||
uint32_t mSpeechDetectionTimeoutMs;
|
||||
|
||||
// buffer holds one chunk of mAudioSamplesPerChunk
|
||||
// samples before feeding it to mEndpointer
|
||||
RefPtr<SharedBuffer> mAudioSamplesBuffer;
|
||||
|
@ -209,6 +232,10 @@ class SpeechRecognition final : public DOMEventTargetHelper,
|
|||
|
||||
RefPtr<SpeechGrammarList> mSpeechGrammarList;
|
||||
|
||||
// private flag used to hold if the user called the setContinuous() method
|
||||
// of the API
|
||||
bool mContinuous;
|
||||
|
||||
// WebSpeechAPI (http://bit.ly/1gIl7DC) states:
|
||||
//
|
||||
// 1. Default value MUST be false
|
||||
|
|
|
@ -12,6 +12,7 @@ XPIDL_SOURCES = [
|
|||
]
|
||||
|
||||
EXPORTS.mozilla.dom += [
|
||||
'OnlineSpeechRecognitionService.h',
|
||||
'SpeechGrammar.h',
|
||||
'SpeechGrammarList.h',
|
||||
'SpeechRecognition.h',
|
||||
|
@ -21,6 +22,12 @@ EXPORTS.mozilla.dom += [
|
|||
'SpeechTrackListener.h',
|
||||
]
|
||||
|
||||
EXPORTS += [
|
||||
'endpointer.h',
|
||||
'energy_endpointer.h',
|
||||
'energy_endpointer_params.h',
|
||||
]
|
||||
|
||||
if CONFIG['MOZ_WEBSPEECH_TEST_BACKEND']:
|
||||
EXPORTS.mozilla.dom += [
|
||||
'test/FakeSpeechRecognitionService.h',
|
||||
|
@ -30,6 +37,7 @@ UNIFIED_SOURCES += [
|
|||
'endpointer.cc',
|
||||
'energy_endpointer.cc',
|
||||
'energy_endpointer_params.cc',
|
||||
'OnlineSpeechRecognitionService.cpp',
|
||||
'SpeechGrammar.cpp',
|
||||
'SpeechGrammarList.cpp',
|
||||
'SpeechRecognition.cpp',
|
||||
|
@ -44,8 +52,13 @@ if CONFIG['MOZ_WEBSPEECH_TEST_BACKEND']:
|
|||
'test/FakeSpeechRecognitionService.cpp',
|
||||
]
|
||||
|
||||
USE_LIBS += [
|
||||
'jsoncpp',
|
||||
]
|
||||
|
||||
LOCAL_INCLUDES += [
|
||||
'/dom/base',
|
||||
'/toolkit/components/jsoncpp/include',
|
||||
]
|
||||
|
||||
include('/ipc/chromium/chromium-config.mozbuild')
|
||||
|
|
|
@ -30,6 +30,7 @@ FakeSpeechRecognitionService::~FakeSpeechRecognitionService() = default;
|
|||
NS_IMETHODIMP
|
||||
FakeSpeechRecognitionService::Initialize(
|
||||
WeakPtr<SpeechRecognition> aSpeechRecognition) {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
mRecognition = aSpeechRecognition;
|
||||
nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
|
||||
obs->AddObserver(this, SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC, false);
|
||||
|
@ -40,11 +41,15 @@ FakeSpeechRecognitionService::Initialize(
|
|||
NS_IMETHODIMP
|
||||
FakeSpeechRecognitionService::ProcessAudioSegment(AudioSegment* aAudioSegment,
|
||||
int32_t aSampleRate) {
|
||||
MOZ_ASSERT(!NS_IsMainThread());
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
FakeSpeechRecognitionService::SoundEnd() { return NS_OK; }
|
||||
FakeSpeechRecognitionService::SoundEnd() {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
FakeSpeechRecognitionService::ValidateAndSetGrammarList(
|
||||
|
@ -53,7 +58,10 @@ FakeSpeechRecognitionService::ValidateAndSetGrammarList(
|
|||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
FakeSpeechRecognitionService::Abort() { return NS_OK; }
|
||||
FakeSpeechRecognitionService::Abort() {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
NS_IMETHODIMP
|
||||
FakeSpeechRecognitionService::Observe(nsISupports* aSubject, const char* aTopic,
|
||||
|
@ -85,7 +93,6 @@ FakeSpeechRecognitionService::Observe(nsISupports* aSubject, const char* aTopic,
|
|||
event->mRecognitionResultList = BuildMockResultList();
|
||||
NS_DispatchToMainThread(event);
|
||||
}
|
||||
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ namespace mozilla {
|
|||
class FakeSpeechRecognitionService : public nsISpeechRecognitionService,
|
||||
public nsIObserver {
|
||||
public:
|
||||
NS_DECL_ISUPPORTS
|
||||
NS_DECL_THREADSAFE_ISUPPORTS
|
||||
NS_DECL_NSISPEECHRECOGNITIONSERVICE
|
||||
NS_DECL_NSIOBSERVER
|
||||
|
||||
|
|
|
@ -163,7 +163,16 @@ function performTest(options) {
|
|||
);
|
||||
|
||||
SpecialPowers.pushPrefEnv({ set: prefs }, function() {
|
||||
var sr = new SpeechRecognition();
|
||||
var sr;
|
||||
if (!options.webkit) {
|
||||
sr = new SpeechRecognition();
|
||||
} else {
|
||||
sr = new webkitSpeechRecognition();
|
||||
var grammar = new webkitSpeechGrammar();
|
||||
var speechrecognitionlist = new webkitSpeechGrammarList();
|
||||
speechrecognitionlist.addFromString("", 1);
|
||||
sr.grammars = speechrecognitionlist;
|
||||
}
|
||||
var em = new EventManager(sr);
|
||||
|
||||
for (var eventName in options.expectedEvents) {
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
const CC = Components.Constructor;
|
||||
|
||||
// Context structure - we need to set this up properly to pass to setObjectState
|
||||
const ctx = {
|
||||
QueryInterface: function(iid) {
|
||||
if (iid.equals(Components.interfaces.nsISupports))
|
||||
return this;
|
||||
throw Components.results.NS_ERROR_NO_INTERFACE;
|
||||
}
|
||||
};
|
||||
|
||||
function setRequest(request) {
|
||||
setObjectState(key, request);
|
||||
}
|
||||
function getRequest() {
|
||||
let request;
|
||||
getObjectState(v => { request = v });
|
||||
return request;
|
||||
}
|
||||
|
||||
function handleRequest(request, response) {
|
||||
response.processAsync();
|
||||
if (request.queryString == "save") {
|
||||
// Get the context structure and finish the old request
|
||||
getObjectState("context", function(obj) {
|
||||
savedCtx = obj.wrappedJSObject;
|
||||
request = savedCtx.request;
|
||||
|
||||
response.setHeader("Content-Type", "application/octet-stream", false);
|
||||
response.setHeader("Access-Control-Allow-Origin", "*", false);
|
||||
response.setHeader("Cache-Control", "no-cache", false);
|
||||
response.setStatusLine(request.httpVersion, 200, "OK");
|
||||
|
||||
const input = request.bodyInputStream;
|
||||
const output = response.bodyOutputStream;
|
||||
let bodyAvail;
|
||||
while ((bodyAvail = input.available()) > 0) {
|
||||
output.writeFrom(input, bodyAvail);
|
||||
}
|
||||
response.finish();
|
||||
});
|
||||
return;
|
||||
} else if (request.queryString == "malformedresult=1" || request.queryString == "emptyresult=1") {
|
||||
jsonOK = request.queryString == "malformedresult=1" ? '{"status":"ok","dat' : '{"status":"ok","data":[]}'
|
||||
response.setHeader("Content-Length", String(jsonOK.length), false);
|
||||
response.setHeader("Content-Type", "application/json", false);
|
||||
response.setHeader("Access-Control-Allow-Origin", "*", false);
|
||||
response.setHeader("Cache-Control", "no-cache", false);
|
||||
response.setStatusLine(request.httpVersion, 200, "OK");
|
||||
response.write(jsonOK, jsonOK.length);
|
||||
response.finish();
|
||||
} else if (request.queryString == "hangup=1") {
|
||||
response.finish();
|
||||
} else if (request.queryString == "return400=1") {
|
||||
jsonOK = "{'message':'Bad header:accept-language-stt'}";
|
||||
response.setHeader("Content-Length", String(jsonOK.length), false);
|
||||
response.setHeader("Content-Type", "application/json", false);
|
||||
response.setHeader("Access-Control-Allow-Origin", "*", false);
|
||||
response.setHeader("Cache-Control", "no-cache", false);
|
||||
response.setStatusLine(request.httpVersion, 400, "Bad Request");
|
||||
response.write(jsonOK, jsonOK.length);
|
||||
response.finish();
|
||||
}
|
||||
else {
|
||||
ctx.wrappedJSObject = ctx;
|
||||
ctx.request = request;
|
||||
setObjectState("context", ctx);
|
||||
jsonOK = '{"status":"ok","data":[{"confidence":0.9085610,"text":"hello"}]}';
|
||||
response.setHeader("Content-Length", String(jsonOK.length), false);
|
||||
response.setHeader("Content-Type", "application/json", false);
|
||||
response.setHeader("Access-Control-Allow-Origin", "*", false);
|
||||
response.setHeader("Cache-Control", "no-cache", false);
|
||||
response.setStatusLine(request.httpVersion, 200, "OK");
|
||||
response.write(jsonOK, jsonOK.length);
|
||||
response.finish();
|
||||
}
|
||||
}
|
|
@ -5,6 +5,9 @@ support-files =
|
|||
head.js
|
||||
hello.ogg
|
||||
hello.ogg^headers^
|
||||
http_requesthandler.sjs
|
||||
sinoid+hello.ogg
|
||||
sinoid+hello.ogg^headers^
|
||||
silence.ogg
|
||||
silence.ogg^headers^
|
||||
[test_abort.html]
|
||||
|
@ -16,6 +19,12 @@ tags=capturestream
|
|||
skip-if = (os == "win" && processor == "aarch64") # aarch64 due to 1538363
|
||||
[test_nested_eventloop.html]
|
||||
skip-if = toolkit == 'android'
|
||||
[test_online_400_response.html]
|
||||
[test_online_hangup.html]
|
||||
[test_online_http.html]
|
||||
[test_online_http_webkit.html]
|
||||
[test_online_malformed_result_handling.html]
|
||||
[test_online_empty_result_handling.html]
|
||||
[test_preference_enable.html]
|
||||
[test_recognition_service_error.html]
|
||||
skip-if = (os == "win" && processor == "aarch64") # aarch64 due to 1538360
|
||||
|
|
Двоичный файл не отображается.
|
@ -0,0 +1 @@
|
|||
Cache-Control: no-store
|
|
@ -60,7 +60,9 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=650295
|
|||
eventsToRequest: [],
|
||||
expectedEvents,
|
||||
doneFunc: (nextEventIdx < eventsToAbortOn.length) ? doNextTest : SimpleTest.finish,
|
||||
prefs: [["media.webspeech.test.fake_fsm_events", true], ["media.webspeech.test.fake_recognition_service", true]]
|
||||
prefs: [["media.webspeech.test.fake_fsm_events", true],
|
||||
["media.webspeech.test.fake_recognition_service", true],
|
||||
["media.webspeech.recognition.timeout", 100000]]
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -32,7 +32,9 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=650295
|
|||
'end': null
|
||||
},
|
||||
doneFunc: SimpleTest.finish,
|
||||
prefs: [["media.webspeech.test.fake_fsm_events", true], ["media.webspeech.test.fake_recognition_service", true]]
|
||||
prefs: [["media.webspeech.test.fake_fsm_events", true],
|
||||
["media.webspeech.test.fake_recognition_service", true],
|
||||
["media.webspeech.recognition.timeout", 100000]]
|
||||
});
|
||||
</script>
|
||||
</pre>
|
||||
|
|
|
@ -91,7 +91,9 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=650295
|
|||
'result': buildResultCallback("Mock final result"),
|
||||
'end': endHandler,
|
||||
},
|
||||
prefs: [["media.webspeech.test.fake_fsm_events", true], ["media.webspeech.test.fake_recognition_service", true]]
|
||||
prefs: [["media.webspeech.test.fake_fsm_events", true],
|
||||
["media.webspeech.test.fake_recognition_service", true],
|
||||
["media.webspeech.recognition.timeout", 100000]]
|
||||
});
|
||||
|
||||
</script>
|
||||
|
|
|
@ -72,7 +72,8 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=650295
|
|||
},
|
||||
doneFunc,
|
||||
prefs: [["media.webspeech.test.fake_fsm_events", true],
|
||||
["media.webspeech.test.fake_recognition_service", true]]
|
||||
["media.webspeech.test.fake_recognition_service", true],
|
||||
["media.webspeech.recognition.timeout", 100000]]
|
||||
});
|
||||
|
||||
</script>
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<!--
|
||||
https://bugzilla.mozilla.org/show_bug.cgi?id=1248897
|
||||
The intent of this file is to test the speech recognition service behavior
|
||||
whenever the server returns a 400 error
|
||||
-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Test for Bug 1248897 -- Online speech service</title>
|
||||
<script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
|
||||
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
|
||||
<script type="application/javascript" src="head.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1248897">Mozilla Bug 1248897</a>
|
||||
<p id="display"></p>
|
||||
<div id="content" style="display: none">
|
||||
|
||||
</div>
|
||||
<pre id="test">
|
||||
<script type="text/javascript">
|
||||
SimpleTest.waitForExplicitFinish();
|
||||
|
||||
performTest({
|
||||
eventsToRequest: [],
|
||||
expectedEvents: {
|
||||
"start": null,
|
||||
"audiostart": null,
|
||||
"audioend": null,
|
||||
"end": null,
|
||||
'error': buildErrorCallback(errorCodes.NETWORK),
|
||||
"speechstart": null,
|
||||
"speechend": null
|
||||
},
|
||||
doneFunc: SimpleTest.finish,
|
||||
prefs: [["media.webspeech.recognition.enable", true],
|
||||
["media.webspeech.recognition.force_enable", true],
|
||||
["media.webspeech.service.endpoint",
|
||||
"http://mochi.test:8888/tests/dom/media/webspeech/recognition/test/http_requesthandler.sjs?return400=1"],
|
||||
["media.webspeech.recognition.timeout", 100000]]
|
||||
});
|
||||
|
||||
</script>
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,48 @@
|
|||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<!--
|
||||
https://bugzilla.mozilla.org/show_bug.cgi?id=1248897
|
||||
The intent of this file is to test the speech recognition service behavior
|
||||
whenever the server returns a valid json object, but without any transcription
|
||||
results on it, for example: `{"status":"ok","data":[]}`
|
||||
-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Test for Bug 1248897 -- Online speech service</title>
|
||||
<script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
|
||||
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
|
||||
<script type="application/javascript" src="head.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1248897">Mozilla Bug 1248897</a>
|
||||
<p id="display"></p>
|
||||
<div id="content" style="display: none">
|
||||
|
||||
</div>
|
||||
<pre id="test">
|
||||
<script type="text/javascript">
|
||||
SimpleTest.waitForExplicitFinish();
|
||||
|
||||
performTest({
|
||||
eventsToRequest: [],
|
||||
expectedEvents: {
|
||||
"start": null,
|
||||
"audiostart": null,
|
||||
"audioend": null,
|
||||
"end": null,
|
||||
'error': buildErrorCallback(errorCodes.NETWORK),
|
||||
"speechstart": null,
|
||||
"speechend": null
|
||||
},
|
||||
doneFunc: SimpleTest.finish,
|
||||
prefs: [["media.webspeech.recognition.enable", true],
|
||||
["media.webspeech.recognition.force_enable", true],
|
||||
["media.webspeech.service.endpoint",
|
||||
"http://mochi.test:8888/tests/dom/media/webspeech/recognition/test/http_requesthandler.sjs?emptyresult=1"],
|
||||
["media.webspeech.recognition.timeout", 100000]]
|
||||
});
|
||||
|
||||
</script>
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,47 @@
|
|||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<!--
|
||||
https://bugzilla.mozilla.org/show_bug.cgi?id=1248897
|
||||
The intent of this file is to test the speech recognition service behavior
|
||||
whenever the server hangups the connection without sending any response
|
||||
-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Test for Bug 1248897 -- Online speech service</title>
|
||||
<script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
|
||||
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
|
||||
<script type="application/javascript" src="head.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1248897">Mozilla Bug 1248897</a>
|
||||
<p id="display"></p>
|
||||
<div id="content" style="display: none">
|
||||
|
||||
</div>
|
||||
<pre id="test">
|
||||
<script type="text/javascript">
|
||||
SimpleTest.waitForExplicitFinish();
|
||||
|
||||
performTest({
|
||||
eventsToRequest: [],
|
||||
expectedEvents: {
|
||||
"start": null,
|
||||
"audiostart": null,
|
||||
"audioend": null,
|
||||
"end": null,
|
||||
'error': buildErrorCallback(errorCodes.NETWORK),
|
||||
"speechstart": null,
|
||||
"speechend": null
|
||||
},
|
||||
doneFunc: SimpleTest.finish,
|
||||
prefs: [["media.webspeech.recognition.enable", true],
|
||||
["media.webspeech.recognition.force_enable", true],
|
||||
["media.webspeech.service.endpoint",
|
||||
"http://mochi.test:8888/tests/dom/media/webspeech/recognition/test/http_requesthandler.sjs?hangup=1"],
|
||||
["media.webspeech.recognition.timeout", 100000]]
|
||||
});
|
||||
|
||||
</script>
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,89 @@
|
|||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<!--
|
||||
https://bugzilla.mozilla.org/show_bug.cgi?id=1248897
|
||||
The intent of this file is to test a successfull speech recognition request and
|
||||
that audio is being properly encoded
|
||||
-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Test for Bug 1248897 -- Online speech service</title>
|
||||
<script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
|
||||
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
|
||||
<script type="application/javascript" src="head.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1248897">Mozilla Bug 1248897</a>
|
||||
<p id="display"></p>
|
||||
<div id="content" style="display: none">
|
||||
|
||||
</div>
|
||||
<pre id="test">
|
||||
<script type="text/javascript">
|
||||
SimpleTest.waitForExplicitFinish();
|
||||
|
||||
async function validateRawAudio(buffer) {
|
||||
const ac = new AudioContext();
|
||||
const decodedData = await ac.decodeAudioData(buffer);
|
||||
const source = ac.createBufferSource();
|
||||
source.buffer = decodedData;
|
||||
source.loop = true;
|
||||
const analyser = ac.createAnalyser();
|
||||
analyser.smoothingTimeConstant = 0.2;
|
||||
analyser.fftSize = 1024;
|
||||
source.connect(analyser);
|
||||
const binIndexForFrequency = frequency =>
|
||||
1 + Math.round(frequency * analyser.fftSize / ac.sampleRate);
|
||||
source.start();
|
||||
const data = new Uint8Array(analyser.frequencyBinCount);
|
||||
const start = performance.now();
|
||||
while (true) {
|
||||
if (performance.now() - start > 10000) {
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
analyser.getByteFrequencyData(data);
|
||||
if (data[binIndexForFrequency(200)] < 50 &&
|
||||
data[binIndexForFrequency(440)] > 180 &&
|
||||
data[binIndexForFrequency(1000)] < 50) {
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
await new Promise(r => requestAnimationFrame(r));
|
||||
}
|
||||
}
|
||||
|
||||
async function verifyEncodedAudio(requestUrl) {
|
||||
try {
|
||||
const response = await fetch(requestUrl);
|
||||
const buffer = await response.arrayBuffer();
|
||||
ok(await validateRawAudio(buffer), "Audio encoding is valid");
|
||||
} catch(e) {
|
||||
ok(false, e);
|
||||
} finally {
|
||||
SimpleTest.finish();
|
||||
}
|
||||
}
|
||||
|
||||
performTest({
|
||||
eventsToRequest: {},
|
||||
expectedEvents: {
|
||||
"start": null,
|
||||
"audiostart": null,
|
||||
"audioend": null,
|
||||
"end": null,
|
||||
"result": () => verifyEncodedAudio("http_requesthandler.sjs?save"),
|
||||
"speechstart": null,
|
||||
"speechend": null
|
||||
},
|
||||
audioSampleFile: "sinoid+hello.ogg",
|
||||
prefs: [["media.webspeech.recognition.enable", true],
|
||||
["media.webspeech.recognition.force_enable", true],
|
||||
["media.webspeech.service.endpoint",
|
||||
"http://mochi.test:8888/tests/dom/media/webspeech/recognition/test/http_requesthandler.sjs"],
|
||||
["media.webspeech.recognition.timeout", 100000]]
|
||||
});
|
||||
</script>
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,90 @@
|
|||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<!--
|
||||
https://bugzilla.mozilla.org/show_bug.cgi?id=1248897
|
||||
The intent of this file is to test a successfull speech recognition request and
|
||||
that audio is being properly encoded
|
||||
-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Test for Bug 1248897 -- Online speech service</title>
|
||||
<script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
|
||||
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
|
||||
<script type="application/javascript" src="head.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1248897">Mozilla Bug 1248897</a>
|
||||
<p id="display"></p>
|
||||
<div id="content" style="display: none">
|
||||
|
||||
</div>
|
||||
<pre id="test">
|
||||
<script type="text/javascript">
|
||||
SimpleTest.waitForExplicitFinish();
|
||||
|
||||
async function validateRawAudio(buffer) {
|
||||
const ac = new AudioContext();
|
||||
const decodedData = await ac.decodeAudioData(buffer);
|
||||
const source = ac.createBufferSource();
|
||||
source.buffer = decodedData;
|
||||
source.loop = true;
|
||||
const analyser = ac.createAnalyser();
|
||||
analyser.smoothingTimeConstant = 0.2;
|
||||
analyser.fftSize = 1024;
|
||||
source.connect(analyser);
|
||||
const binIndexForFrequency = frequency =>
|
||||
1 + Math.round(frequency * analyser.fftSize / ac.sampleRate);
|
||||
source.start();
|
||||
const data = new Uint8Array(analyser.frequencyBinCount);
|
||||
const start = performance.now();
|
||||
while (true) {
|
||||
if (performance.now() - start > 10000) {
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
analyser.getByteFrequencyData(data);
|
||||
if (data[binIndexForFrequency(200)] < 50 &&
|
||||
data[binIndexForFrequency(440)] > 180 &&
|
||||
data[binIndexForFrequency(1000)] < 50) {
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
await new Promise(r => requestAnimationFrame(r));
|
||||
}
|
||||
}
|
||||
|
||||
async function verifyEncodedAudio(requestUrl) {
|
||||
try {
|
||||
const response = await fetch(requestUrl);
|
||||
const buffer = await response.arrayBuffer();
|
||||
ok(await validateRawAudio(buffer), "Audio encoding is valid");
|
||||
} catch(e) {
|
||||
ok(false, e);
|
||||
} finally {
|
||||
SimpleTest.finish();
|
||||
}
|
||||
}
|
||||
|
||||
performTest({
|
||||
eventsToRequest: {},
|
||||
expectedEvents: {
|
||||
"start": null,
|
||||
"audiostart": null,
|
||||
"audioend": null,
|
||||
"end": null,
|
||||
"result": () => verifyEncodedAudio("http_requesthandler.sjs?save"),
|
||||
"speechstart": null,
|
||||
"speechend": null
|
||||
},
|
||||
audioSampleFile: "sinoid+hello.ogg",
|
||||
prefs: [["media.webspeech.recognition.enable", true],
|
||||
["media.webspeech.recognition.force_enable", true],
|
||||
["media.webspeech.service.endpoint",
|
||||
"http://mochi.test:8888/tests/dom/media/webspeech/recognition/test/http_requesthandler.sjs"],
|
||||
["media.webspeech.recognition.timeout", 100000]],
|
||||
webkit: true
|
||||
});
|
||||
</script>
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,48 @@
|
|||
<!DOCTYPE HTML>
|
||||
<html>
|
||||
<!--
|
||||
https://bugzilla.mozilla.org/show_bug.cgi?id=1248897
|
||||
The intent of this file is to test the speech recognition service behavior
|
||||
whenever the server returns an invalid/corrupted json object, for example:
|
||||
`{"status":"ok","dat`
|
||||
-->
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Test for Bug 1248897 -- Online speech service</title>
|
||||
<script type="application/javascript" src="/tests/SimpleTest/SimpleTest.js"></script>
|
||||
<link rel="stylesheet" type="text/css" href="/tests/SimpleTest/test.css"/>
|
||||
<script type="application/javascript" src="head.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<a target="_blank" href="https://bugzilla.mozilla.org/show_bug.cgi?id=1248897">Mozilla Bug 1248897</a>
|
||||
<p id="display"></p>
|
||||
<div id="content" style="display: none">
|
||||
|
||||
</div>
|
||||
<pre id="test">
|
||||
<script type="text/javascript">
|
||||
SimpleTest.waitForExplicitFinish();
|
||||
|
||||
performTest({
|
||||
eventsToRequest: [],
|
||||
expectedEvents: {
|
||||
"start": null,
|
||||
"audiostart": null,
|
||||
"audioend": null,
|
||||
"end": null,
|
||||
'error': buildErrorCallback(errorCodes.NETWORK),
|
||||
"speechstart": null,
|
||||
"speechend": null
|
||||
},
|
||||
doneFunc: SimpleTest.finish,
|
||||
prefs: [["media.webspeech.recognition.enable", true],
|
||||
["media.webspeech.recognition.force_enable", true],
|
||||
["media.webspeech.service.endpoint",
|
||||
"http://mochi.test:8888/tests/dom/media/webspeech/recognition/test/http_requesthandler.sjs?malformedresult=1"],
|
||||
["media.webspeech.recognition.timeout", 100000]]
|
||||
});
|
||||
|
||||
</script>
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
|
@ -34,7 +34,9 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=650295
|
|||
'end': null
|
||||
},
|
||||
doneFunc: SimpleTest.finish,
|
||||
prefs: [["media.webspeech.test.fake_fsm_events", true], ["media.webspeech.test.fake_recognition_service", true]]
|
||||
prefs: [["media.webspeech.test.fake_fsm_events", true],
|
||||
["media.webspeech.test.fake_recognition_service", true],
|
||||
["media.webspeech.recognition.timeout", 100000]]
|
||||
});
|
||||
|
||||
</script>
|
||||
|
|
|
@ -34,7 +34,9 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=650295
|
|||
'end': null
|
||||
},
|
||||
doneFunc:SimpleTest.finish,
|
||||
prefs: [["media.webspeech.test.fake_fsm_events", true], ["media.webspeech.test.fake_recognition_service", true]]
|
||||
prefs: [["media.webspeech.test.fake_fsm_events", true],
|
||||
["media.webspeech.test.fake_recognition_service", true],
|
||||
["media.webspeech.recognition.timeout", 100000]]
|
||||
});
|
||||
|
||||
</script>
|
||||
|
|
|
@ -31,7 +31,9 @@ https://bugzilla.mozilla.org/show_bug.cgi?id=650295
|
|||
},
|
||||
doneFunc: SimpleTest.finish,
|
||||
audioSampleFile: "silence.ogg",
|
||||
prefs: [["media.webspeech.test.fake_fsm_events", true], ["media.webspeech.test.fake_recognition_service", true]]
|
||||
prefs: [["media.webspeech.test.fake_fsm_events", true],
|
||||
["media.webspeech.test.fake_recognition_service", true],
|
||||
["media.webspeech.recognition.timeout", 1000]]
|
||||
});
|
||||
|
||||
</script>
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
*/
|
||||
|
||||
[Pref="media.webspeech.recognition.enable",
|
||||
NamedConstructor=webkitSpeechGrammar,
|
||||
Func="SpeechRecognition::IsAuthorized",
|
||||
Exposed=Window]
|
||||
interface SpeechGrammar {
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
*/
|
||||
|
||||
[Pref="media.webspeech.recognition.enable",
|
||||
NamedConstructor=webkitSpeechGrammarList,
|
||||
Func="SpeechRecognition::IsAuthorized",
|
||||
Exposed=Window]
|
||||
interface SpeechGrammarList {
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
*/
|
||||
|
||||
[Pref="media.webspeech.recognition.enable",
|
||||
NamedConstructor=webkitSpeechRecognition,
|
||||
Func="SpeechRecognition::IsAuthorized",
|
||||
Exposed=Window]
|
||||
interface SpeechRecognition : EventTarget {
|
||||
|
|
|
@ -438,6 +438,12 @@ if defined('MOZ_WEBSPEECH'):
|
|||
'headers': ['mozilla/dom/nsSynthVoiceRegistry.h'],
|
||||
'constructor': 'mozilla::dom::nsSynthVoiceRegistry::GetInstanceForService',
|
||||
},
|
||||
{
|
||||
'cid': '{0ff5ce56-5b09-4db8-adc6-8266af95f864}',
|
||||
'contract_ids': ['@mozilla.org/webspeech/service;1?name=online'],
|
||||
'type': 'mozilla::OnlineSpeechRecognitionService',
|
||||
'headers': ['mozilla/dom/OnlineSpeechRecognitionService.h'],
|
||||
},
|
||||
]
|
||||
|
||||
if defined('MOZ_WEBSPEECH_TEST_BACKEND'):
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
|
||||
#ifdef MOZ_WEBSPEECH
|
||||
# include "mozilla/dom/nsSynthVoiceRegistry.h"
|
||||
# include "mozilla/dom/OnlineSpeechRecognitionService.h"
|
||||
#endif
|
||||
|
||||
#include "mozilla/dom/PushNotifier.h"
|
||||
|
|
Загрузка…
Ссылка в новой задаче