Bug 1051118 - Pass the sample rate of captured audio to speech recognition services. r=smaug

This commit is contained in:
Andre Natal 2014-09-11 10:47:00 -04:00
Родитель ac745b44f1
Коммит 7be3deda65
6 изменённых файлов: 26 добавлений и 20 удалений

Просмотреть файл

@ -313,7 +313,7 @@ SpeechRecognition::Transition(SpeechEvent* aEvent)
* Returns the number of samples that were processed.
*/
uint32_t
SpeechRecognition::ProcessAudioSegment(AudioSegment* aSegment)
SpeechRecognition::ProcessAudioSegment(AudioSegment* aSegment, TrackRate aTrackRate)
{
AudioSegment::ChunkIterator iterator(*aSegment);
uint32_t samples = 0;
@ -324,7 +324,7 @@ SpeechRecognition::ProcessAudioSegment(AudioSegment* aSegment)
iterator.Next();
}
mRecognitionService->ProcessAudioSegment(aSegment);
mRecognitionService->ProcessAudioSegment(aSegment, aTrackRate);
return samples;
}
@ -400,7 +400,7 @@ SpeechRecognition::StartedAudioCapture(SpeechEvent* aEvent)
SetState(STATE_ESTIMATING);
mEndpointer.SetEnvironmentEstimationMode();
mEstimationSamples += ProcessAudioSegment(aEvent->mAudioSegment);
mEstimationSamples += ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
DispatchTrustedEvent(NS_LITERAL_STRING("audiostart"));
if (mCurrentState == STATE_ESTIMATING) {
@ -424,7 +424,7 @@ SpeechRecognition::WaitForEstimation(SpeechEvent* aEvent)
{
SetState(STATE_ESTIMATING);
mEstimationSamples += ProcessAudioSegment(aEvent->mAudioSegment);
mEstimationSamples += ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
if (mEstimationSamples > kESTIMATION_SAMPLES) {
mEndpointer.SetUserInputMode();
SetState(STATE_WAITING_FOR_SPEECH);
@ -436,7 +436,7 @@ SpeechRecognition::DetectSpeech(SpeechEvent* aEvent)
{
SetState(STATE_WAITING_FOR_SPEECH);
ProcessAudioSegment(aEvent->mAudioSegment);
ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
if (mEndpointer.DidStartReceivingSpeech()) {
mSpeechDetectionTimer->Cancel();
SetState(STATE_RECOGNIZING);
@ -449,7 +449,7 @@ SpeechRecognition::WaitForSpeechEnd(SpeechEvent* aEvent)
{
SetState(STATE_RECOGNIZING);
ProcessAudioSegment(aEvent->mAudioSegment);
ProcessAudioSegment(aEvent->mAudioSegment, aEvent->mTrackRate);
if (mEndpointer.speech_input_complete()) {
DispatchTrustedEvent(NS_LITERAL_STRING("speechend"));
@ -828,7 +828,7 @@ SpeechRecognition::CreateAudioSegment(nsTArray<nsRefPtr<SharedBuffer>>& aChunks)
void
SpeechRecognition::FeedAudioData(already_AddRefed<SharedBuffer> aSamples,
uint32_t aDuration,
MediaStreamListener* aProvider)
MediaStreamListener* aProvider, TrackRate aTrackRate)
{
NS_ASSERTION(!NS_IsMainThread(),
"FeedAudioData should not be called in the main thread");
@ -876,6 +876,7 @@ SpeechRecognition::FeedAudioData(already_AddRefed<SharedBuffer> aSamples,
nsRefPtr<SpeechEvent> event = new SpeechEvent(this, EVENT_AUDIO_DATA);
event->mAudioSegment = segment;
event->mProvider = aProvider;
event->mTrackRate = aTrackRate;
NS_DispatchToMainThread(event);
return;

Просмотреть файл

@ -130,7 +130,7 @@ public:
uint32_t FillSamplesBuffer(const int16_t* aSamples, uint32_t aSampleCount);
uint32_t SplitSamplesBuffer(const int16_t* aSamplesBuffer, uint32_t aSampleCount, nsTArray<nsRefPtr<SharedBuffer>>& aResult);
AudioSegment* CreateAudioSegment(nsTArray<nsRefPtr<SharedBuffer>>& aChunks);
void FeedAudioData(already_AddRefed<SharedBuffer> aSamples, uint32_t aDuration, MediaStreamListener* aProvider);
void FeedAudioData(already_AddRefed<SharedBuffer> aSamples, uint32_t aDuration, MediaStreamListener* aProvider, TrackRate aTrackRate);
static struct TestConfig
{
@ -211,7 +211,7 @@ private:
NS_IMETHOD StartRecording(DOMMediaStream* aDOMStream);
NS_IMETHOD StopRecording();
uint32_t ProcessAudioSegment(AudioSegment* aSegment);
uint32_t ProcessAudioSegment(AudioSegment* aSegment, TrackRate aTrackRate);
void NotifyError(SpeechEvent* aEvent);
void ProcessEvent(SpeechEvent* aEvent);
@ -266,6 +266,7 @@ public:
, mError(0)
, mRecognition(aRecognition)
, mType(aType)
, mTrackRate(0)
{
}
@ -286,6 +287,7 @@ private:
// event gets processed.
nsRefPtr<MediaStreamListener> mProvider;
SpeechRecognition::EventType mType;
TrackRate mTrackRate;
};
} // namespace dom

Просмотреть файл

@ -50,18 +50,20 @@ SpeechStreamListener::NotifyQueuedTrackChanges(MediaStreamGraph* aGraph,
if (iterator->IsNull()) {
nsTArray<int16_t> nullData;
PodZero(nullData.AppendElements(duration), duration);
ConvertAndDispatchAudioChunk(duration, iterator->mVolume, nullData.Elements());
ConvertAndDispatchAudioChunk(duration, iterator->mVolume, nullData.Elements(), aTrackRate);
} else {
AudioSampleFormat format = iterator->mBufferFormat;
MOZ_ASSERT(format == AUDIO_FORMAT_S16 || format == AUDIO_FORMAT_FLOAT32);
if (format == AUDIO_FORMAT_S16) {
ConvertAndDispatchAudioChunk(duration, iterator->mVolume,
static_cast<const int16_t*>(iterator->mChannelData[0]));
ConvertAndDispatchAudioChunk(duration,iterator->mVolume,
static_cast<const int16_t*>(iterator->mChannelData[0]),
aTrackRate);
} else if (format == AUDIO_FORMAT_FLOAT32) {
ConvertAndDispatchAudioChunk(duration, iterator->mVolume,
static_cast<const float*>(iterator->mChannelData[0]));
ConvertAndDispatchAudioChunk(duration,iterator->mVolume,
static_cast<const float*>(iterator->mChannelData[0]),
aTrackRate);
}
}
@ -71,7 +73,8 @@ SpeechStreamListener::NotifyQueuedTrackChanges(MediaStreamGraph* aGraph,
template<typename SampleFormatType> void
SpeechStreamListener::ConvertAndDispatchAudioChunk(int aDuration, float aVolume,
SampleFormatType* aData)
SampleFormatType* aData,
TrackRate aTrackRate)
{
nsRefPtr<SharedBuffer> samples(SharedBuffer::Create(aDuration *
1 * // channel
@ -80,7 +83,7 @@ SpeechStreamListener::ConvertAndDispatchAudioChunk(int aDuration, float aVolume,
int16_t* to = static_cast<int16_t*>(samples->Data());
ConvertAudioSamplesWithScale(aData, to, aDuration, aVolume);
mRecognition->FeedAudioData(samples.forget(), aDuration, this);
mRecognition->FeedAudioData(samples.forget(), aDuration, this, aTrackRate);
}
void

Просмотреть файл

@ -35,7 +35,7 @@ public:
private:
template<typename SampleFormatType>
void ConvertAndDispatchAudioChunk(int aDuration, float aVolume, SampleFormatType* aData);
void ConvertAndDispatchAudioChunk(int aDuration, float aVolume, SampleFormatType* aData, TrackRate aTrackRate);
nsRefPtr<SpeechRecognition> mRecognition;
};

Просмотреть файл

@ -12,10 +12,10 @@
native SpeechRecognitionWeakPtr(mozilla::WeakPtr<mozilla::dom::SpeechRecognition>);
[ptr] native AudioSegmentPtr(mozilla::AudioSegment);
[uuid(cb98d929-81cd-4a51-a214-80d3e6281d24)]
[uuid(857f3fa2-a980-4d3e-a959-a2f53af74232)]
interface nsISpeechRecognitionService : nsISupports {
void initialize(in SpeechRecognitionWeakPtr aSpeechRecognition);
void processAudioSegment(in AudioSegmentPtr aAudioSegment);
void processAudioSegment(in AudioSegmentPtr aAudioSegment, in long aSampleRate);
void soundEnd();
void abort();
};

Просмотреть файл

@ -40,7 +40,7 @@ FakeSpeechRecognitionService::Initialize(WeakPtr<SpeechRecognition> aSpeechRecog
}
NS_IMETHODIMP
FakeSpeechRecognitionService::ProcessAudioSegment(AudioSegment* aAudioSegment)
FakeSpeechRecognitionService::ProcessAudioSegment(AudioSegment* aAudioSegment, int32_t aSampleRate)
{
return NS_OK;
}