diff --git a/dom/media/MediaTrackGraphImpl.h b/dom/media/MediaTrackGraphImpl.h index ef2a2d6dc179..d0c3ac84998d 100644 --- a/dom/media/MediaTrackGraphImpl.h +++ b/dom/media/MediaTrackGraphImpl.h @@ -163,7 +163,7 @@ class MediaTrackGraphImpl : public MediaTrackGraph, * Append a ControlMessage to the message queue. This queue is drained * during RunInStableState; the messages will run on the graph thread. */ - void AppendMessage(UniquePtr aMessage); + virtual void AppendMessage(UniquePtr aMessage); /** * Dispatches a runnable from any thread to the correct main thread for this diff --git a/dom/media/gtest/TestAudioDecoderInputTrack.cpp b/dom/media/gtest/TestAudioDecoderInputTrack.cpp new file mode 100644 index 000000000000..bee1336a0fc9 --- /dev/null +++ b/dom/media/gtest/TestAudioDecoderInputTrack.cpp @@ -0,0 +1,413 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim:set ts=2 sw=2 sts=2 et cindent: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include + +#include "AudioDecoderInputTrack.h" +#include "gmock/gmock.h" +#include "GraphDriver.h" +#include "gtest/gtest.h" +#include "MediaInfo.h" +#include "MediaTrackGraphImpl.h" +#include "nsThreadUtils.h" +#include "VideoUtils.h" +#include "WaitFor.h" + +using namespace mozilla; +using namespace mozilla::media; +using testing::AssertionResult; +using testing::NiceMock; +using testing::Return; + +constexpr uint32_t kNoFlags = 0; +constexpr TrackRate kRate = 44100; +constexpr uint32_t kChannels = 2; + +class MockTestGraph : public MediaTrackGraphImpl { + public: + MockTestGraph(TrackRate aRate, uint32_t aChannels) + : MediaTrackGraphImpl(OFFLINE_THREAD_DRIVER, DIRECT_DRIVER, aRate, + aChannels, nullptr, NS_GetCurrentThread()) { + ON_CALL(*this, OnGraphThread).WillByDefault(Return(true)); + // We have to call `Destroy()` manually in order to break the reference. + // The reason we don't assign a null driver is because we would add a track + // to the graph, then it would trigger graph's `EnsureNextIteration()` that + // requires a non-null driver. + SetCurrentDriver(new NiceMock()); + } + + MOCK_CONST_METHOD0(OnGraphThread, bool()); + MOCK_METHOD1(AppendMessage, void(UniquePtr)); + + protected: + ~MockTestGraph() = default; + + class MockDriver : public GraphDriver { + NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MockDriver, override); + + MockDriver() : GraphDriver(nullptr, nullptr, 0) { + ON_CALL(*this, OnThread).WillByDefault(Return(true)); + ON_CALL(*this, ThreadRunning).WillByDefault(Return(true)); + } + + MOCK_METHOD0(Start, void()); + MOCK_METHOD0(Shutdown, void()); + MOCK_METHOD0(IterationDuration, uint32_t()); + MOCK_METHOD0(EnsureNextIteration, void()); + MOCK_CONST_METHOD0(OnThread, bool()); + MOCK_CONST_METHOD0(ThreadRunning, bool()); + + protected: + ~MockDriver() = default; + }; + + bool mEnableFakeAppend = false; +}; + +AudioData* CreateAudioDataFromInfo(uint32_t aFrames, const AudioInfo& aInfo) { + AlignedAudioBuffer samples(aFrames * aInfo.mChannels); + return new AudioData(0, TimeUnit::Zero(), std::move(samples), aInfo.mChannels, + aInfo.mRate); +} + +AudioDecoderInputTrack* CreateTrack(MediaTrackGraph* aGraph, + nsISerialEventTarget* aThread, + const AudioInfo& aInfo, + float aPlaybackRate = 1.0, + float aVolume = 1.0, + bool aPreservesPitch = true) { + return AudioDecoderInputTrack::Create(aGraph, aThread, aInfo, aPlaybackRate, + aVolume, aPreservesPitch); +} + +class TestAudioDecoderInputTrack : public testing::Test { + protected: + void SetUp() override { + mGraph = MakeRefPtr>(kRate, kChannels); + + mInfo.mRate = kRate; + mInfo.mChannels = kChannels; + mTrack = CreateTrack(mGraph, NS_GetCurrentThread(), mInfo); + EXPECT_FALSE(mTrack->Ended()); + } + + void TearDown() override { + // This simulates the normal usage where the `Close()` is always be called + // before the `Destroy()`. + mTrack->Close(); + mTrack->Destroy(); + // Remove the reference of the track from the mock graph, and then release + // the self-reference of mock graph. + mGraph->RemoveTrackGraphThread(mTrack); + mGraph->Destroy(); + } + + AudioData* CreateAudioData(uint32_t aFrames) { + return CreateAudioDataFromInfo(aFrames, mInfo); + } + + AudioSegment* GetTrackSegment() { return mTrack->GetData(); } + + AssertionResult ExpectSegmentNonSilence(const char* aStartExpr, + const char* aEndExpr, + TrackTime aStart, TrackTime aEnd) { + AudioSegment checkedRange; + checkedRange.AppendSlice(*mTrack->GetData(), aStart, aEnd); + if (!checkedRange.IsNull()) { + return testing::AssertionSuccess(); + } + return testing::AssertionFailure() + << "segment [" << aStart << ":" << aEnd << "] should be non-silence"; + } + + AssertionResult ExpectSegmentSilence(const char* aStartExpr, + const char* aEndExpr, TrackTime aStart, + TrackTime aEnd) { + AudioSegment checkedRange; + checkedRange.AppendSlice(*mTrack->GetData(), aStart, aEnd); + if (checkedRange.IsNull()) { + return testing::AssertionSuccess(); + } + return testing::AssertionFailure() + << "segment [" << aStart << ":" << aEnd << "] should be silence"; + } + + RefPtr mGraph; + RefPtr mTrack; + AudioInfo mInfo; +}; + +TEST_F(TestAudioDecoderInputTrack, BasicAppendData) { + // Start from [0:10] and each time we move the time by 10ms. + // Expected: outputDuration=10, outputFrames=0, outputSilence=10 + TrackTime start = 0; + TrackTime end = 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_EQ(mTrack->GetEnd(), end); + EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end); + + // Expected: outputDuration=20, outputFrames=5, outputSilence=15 + RefPtr audio1 = CreateAudioData(5); + mTrack->AppendData(audio1, nullptr); + start = end; + end += 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_EQ(mTrack->GetEnd(), end); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, start + audio1->Frames()); + EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start + audio1->Frames(), end); + + // Expected: outputDuration=30, outputFrames=15, outputSilence=15 + RefPtr audio2 = CreateAudioData(10); + mTrack->AppendData(audio2, nullptr); + start = end; + end += 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end); + EXPECT_EQ(mTrack->GetEnd(), end); + + // Expected : sent all data, track should be ended in the next iteration and + // fill slience in this iteration. + mTrack->NotifyEndOfStream(); + start = end; + end += 10; + mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END); + EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end); + EXPECT_EQ(mTrack->GetEnd(), end); + EXPECT_FALSE(mTrack->Ended()); + + // Expected : track ended + start = end; + end += 10; + mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END); + EXPECT_EQ(mTrack->WrittenFrames(), audio1->Frames() + audio2->Frames()); +} + +TEST_F(TestAudioDecoderInputTrack, ClearFuture) { + // Start from [0:10] and each time we move the time by 10ms. + // Expected: appended=30, expected duration=10 + RefPtr audio1 = CreateAudioData(30); + mTrack->AppendData(audio1, nullptr); + TrackTime start = 0; + TrackTime end = 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end); + + // In next iteration [10:20], we would consume the remaining data that was + // appended in the previous iteration. + start = end; + end += 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end); + + // Clear future data which is the remaining 10 frames so the track would + // only output silence. + mTrack->ClearFutureData(); + start = end; + end += 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end); + + // Test appending data again, to see if we can append data correctly after + // calling `ClearFutureData()`. + RefPtr audio2 = CreateAudioData(10); + mTrack->AppendData(audio2, nullptr); + start = end; + end += 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end); + + // Run another iteration that should only contains silence because the data + // we appended only enough for one iteration. + start = end; + end += 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end); + + // Clear future data would also remove the EOS. + mTrack->NotifyEndOfStream(); + mTrack->ClearFutureData(); + start = end; + end += 10; + mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END); + EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end); + EXPECT_FALSE(mTrack->Ended()); + + // As EOS has been removed, in next iteration the track would still be + // running. + start = end; + end += 10; + mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END); + EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end); + EXPECT_FALSE(mTrack->Ended()); + EXPECT_EQ(mTrack->WrittenFrames(), + (audio1->Frames() - 10 /* got clear */) + audio2->Frames()); +} + +TEST_F(TestAudioDecoderInputTrack, InputRateChange) { + // Start from [0:10] and each time we move the time by 10ms. + // Expected: appended=10, expected duration=10 + RefPtr audio1 = CreateAudioData(10); + mTrack->AppendData(audio1, nullptr); + TrackTime start = 0; + TrackTime end = 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end); + + // Change input sample rate to the half, input data should be resampled and + // its duration would become longer. + // Expected: appended=10 + 5, + // expected duration=10 + 5*2 (resampled) + mInfo.mRate = kRate / 2; + RefPtr audioHalfSampleRate = CreateAudioData(5); + mTrack->AppendData(audioHalfSampleRate, nullptr); + start = end; + end += 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end); + + // Change input sample rate to the double, input data should be resampled and + // its duration would become shorter. + // Expected: appended=10 + 10 + 10, + // expected duration=10 + 10 + 10/2(resampled) + 5(silence) + mInfo.mRate = kRate * 2; + RefPtr audioDoubleSampleRate = CreateAudioData(10); + TrackTime expectedDuration = audioDoubleSampleRate->Frames() / 2; + mTrack->AppendData(audioDoubleSampleRate, nullptr); + start = end; + end += 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, start + expectedDuration); + EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start + expectedDuration, end); + EXPECT_EQ(mTrack->WrittenFrames(), audio1->Frames() + + audioHalfSampleRate->Frames() * 2 + + audioDoubleSampleRate->Frames() / 2); +} + +TEST_F(TestAudioDecoderInputTrack, ChannelChange) { + // Start from [0:10] and each time we move the time by 10ms. + // Track was initialized in stero. + EXPECT_EQ(mTrack->NumberOfChannels(), uint32_t(2)); + + // But first audio data is mono, so the `NumberOfChannels()` changes to + // reflect the maximum channel in the audio segment. + mInfo.mChannels = 1; + RefPtr audioMono = CreateAudioData(10); + mTrack->AppendData(audioMono, nullptr); + TrackTime start = 0; + TrackTime end = 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end); + EXPECT_EQ(mTrack->NumberOfChannels(), audioMono->mChannels); + + // Then append audio data with 5 channels. + mInfo.mChannels = 5; + RefPtr audioWithFiveChannels = CreateAudioData(10); + mTrack->AppendData(audioWithFiveChannels, nullptr); + start = end; + end += 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end); + EXPECT_EQ(mTrack->NumberOfChannels(), audioWithFiveChannels->mChannels); + EXPECT_EQ(mTrack->WrittenFrames(), + audioMono->Frames() + audioWithFiveChannels->Frames()); +} + +TEST_F(TestAudioDecoderInputTrack, VolumeChange) { + // In order to run the volume change directly without using a real graph. + // one for setting the track's volume, another for the track destruction. + EXPECT_CALL(*mGraph, AppendMessage) + .Times(2) + .WillOnce([](UniquePtr aMessage) { aMessage->Run(); }) + .WillOnce([](UniquePtr aMessage) {}); + + // The default volume is 1.0. + float expectedVolume = 1.0; + RefPtr audio = CreateAudioData(20); + TrackTime start = 0; + TrackTime end = 10; + mTrack->AppendData(audio, nullptr); + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end); + EXPECT_TRUE(GetTrackSegment()->GetLastChunk()->mVolume == expectedVolume); + + // After setting volume on the track, the data in the output chunk should be + // changed as well. + expectedVolume = 0.1; + mTrack->SetVolume(expectedVolume); + SpinEventLoopUntil( + [&] { return mTrack->Volume() == expectedVolume; }); + start = end; + end += 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end); + EXPECT_TRUE(GetTrackSegment()->GetLastChunk()->mVolume == expectedVolume); +} + +TEST_F(TestAudioDecoderInputTrack, BatchedData) { + uint32_t appendedFrames = 0; + RefPtr audio = CreateAudioData(10); + for (size_t idx = 0; idx < 50; idx++) { + mTrack->AppendData(audio, nullptr); + appendedFrames += audio->Frames(); + } + + // First we need to call `ProcessInput` at least once to drain the track's + // SPSC queue, otherwise we're not able to push the batched data later. + TrackTime start = 0; + TrackTime end = 10; + uint32_t expectedFrames = end - start; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end); + + // The batched data would be pushed to the graph thread in around 10ms after + // the track first time started to batch data, which we can't control here. + // Therefore, we need to wait until the batched data gets cleared. + SpinEventLoopUntil( + [&] { return !mTrack->HasBatchedData(); }); + + // Check that we received all the remainging data previously appended. + start = end; + end = start + (appendedFrames - expectedFrames); + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end); + + // Check that we received no more data than previously appended. + start = end; + end += 10; + mTrack->ProcessInput(start, end, kNoFlags); + EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end); + EXPECT_EQ(mTrack->WrittenFrames(), appendedFrames); +} + +TEST_F(TestAudioDecoderInputTrack, OutputAndEndEvent) { + // Append an audio and EOS, the output event should notify the amount of + // frames that is equal to the amount of audio we appended. + RefPtr audio = CreateAudioData(10); + MozPromiseHolder holder; + RefPtr p = holder.Ensure(__func__); + MediaEventListener outputListener = + mTrack->OnOutput().Connect(NS_GetCurrentThread(), [&](TrackTime aFrame) { + EXPECT_EQ(aFrame, audio->Frames()); + holder.Resolve(true, __func__); + }); + mTrack->AppendData(audio, nullptr); + mTrack->NotifyEndOfStream(); + TrackTime start = 0; + TrackTime end = 10; + mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END); + Unused << WaitFor(p); + + // Track should end in this iteration, so the end event should be notified. + p = holder.Ensure(__func__); + MediaEventListener endListener = mTrack->OnEnd().Connect( + NS_GetCurrentThread(), [&]() { holder.Resolve(true, __func__); }); + start = end; + end += 10; + mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END); + Unused << WaitFor(p); + outputListener.Disconnect(); + endListener.Disconnect(); +} diff --git a/dom/media/gtest/moz.build b/dom/media/gtest/moz.build index d00d1ef5889f..aaa5e45972ee 100644 --- a/dom/media/gtest/moz.build +++ b/dom/media/gtest/moz.build @@ -9,6 +9,7 @@ include("/dom/media/webrtc/third_party_build/webrtc.mozbuild") DEFINES["ENABLE_SET_CUBEB_BACKEND"] = True LOCAL_INCLUDES += [ + "/dom/media/mediasink", "/dom/media/webrtc/common/", "/third_party/libwebrtc", "/third_party/libwebrtc/webrtc", @@ -20,6 +21,7 @@ UNIFIED_SOURCES += [ "TestAudioBuffers.cpp", "TestAudioCallbackDriver.cpp", "TestAudioCompactor.cpp", + "TestAudioDecoderInputTrack.cpp", "TestAudioDriftCorrection.cpp", "TestAudioMixer.cpp", "TestAudioPacketizer.cpp", diff --git a/dom/media/mediasink/AudioDecoderInputTrack.h b/dom/media/mediasink/AudioDecoderInputTrack.h index 8eb776e6c270..c5e37eb94b53 100644 --- a/dom/media/mediasink/AudioDecoderInputTrack.h +++ b/dom/media/mediasink/AudioDecoderInputTrack.h @@ -94,6 +94,7 @@ class AudioDecoderInputTrack final : public ProcessedMediaTrack { void SetPreservesPitch(bool aPreservesPitch); // After calling this, the track are not expected to receive any new data. void Close(); + bool HasBatchedData() const; MediaEventSource& OnOutput() { return mOnOutput; } MediaEventSource& OnEnd() { return mOnEnd; } @@ -103,6 +104,16 @@ class AudioDecoderInputTrack final : public ProcessedMediaTrack { void ProcessInput(GraphTime aFrom, GraphTime aTo, uint32_t aFlags) override; uint32_t NumberOfChannels() const override; + // The functions below are only used for testing. + TrackTime WrittenFrames() const { + AssertOnGraphThread(); + return mWrittenFrames; + } + float Volume() const { + AssertOnGraphThread(); + return mVolume; + } + protected: ~AudioDecoderInputTrack(); @@ -123,7 +134,6 @@ class AudioDecoderInputTrack final : public ProcessedMediaTrack { bool HasSentAllData() const; bool ShouldBatchData() const; - bool HasBatchedData() const; void BatchData(AudioData* aAudio, const PrincipalHandle& aPrincipalHandle); void DispatchPushBatchedDataIfNeeded(); void PushBatchedDataIfNeeded();