diff --git a/dom/media/MediaTrackGraphImpl.h b/dom/media/MediaTrackGraphImpl.h
index ef2a2d6dc179..d0c3ac84998d 100644
--- a/dom/media/MediaTrackGraphImpl.h
+++ b/dom/media/MediaTrackGraphImpl.h
@@ -163,7 +163,7 @@ class MediaTrackGraphImpl : public MediaTrackGraph,
    * Append a ControlMessage to the message queue. This queue is drained
    * during RunInStableState; the messages will run on the graph thread.
    */
-  void AppendMessage(UniquePtr<ControlMessage> aMessage);
+  virtual void AppendMessage(UniquePtr<ControlMessage> aMessage);
 
   /**
    * Dispatches a runnable from any thread to the correct main thread for this
diff --git a/dom/media/gtest/TestAudioDecoderInputTrack.cpp b/dom/media/gtest/TestAudioDecoderInputTrack.cpp
new file mode 100644
index 000000000000..bee1336a0fc9
--- /dev/null
+++ b/dom/media/gtest/TestAudioDecoderInputTrack.cpp
@@ -0,0 +1,413 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <utility>
+
+#include "AudioDecoderInputTrack.h"
+#include "gmock/gmock.h"
+#include "GraphDriver.h"
+#include "gtest/gtest.h"
+#include "MediaInfo.h"
+#include "MediaTrackGraphImpl.h"
+#include "nsThreadUtils.h"
+#include "VideoUtils.h"
+#include "WaitFor.h"
+
+using namespace mozilla;
+using namespace mozilla::media;
+using testing::AssertionResult;
+using testing::NiceMock;
+using testing::Return;
+
+constexpr uint32_t kNoFlags = 0;
+constexpr TrackRate kRate = 44100;
+constexpr uint32_t kChannels = 2;
+
+class MockTestGraph : public MediaTrackGraphImpl {
+ public:
+  MockTestGraph(TrackRate aRate, uint32_t aChannels)
+      : MediaTrackGraphImpl(OFFLINE_THREAD_DRIVER, DIRECT_DRIVER, aRate,
+                            aChannels, nullptr, NS_GetCurrentThread()) {
+    ON_CALL(*this, OnGraphThread).WillByDefault(Return(true));
+    // We have to call `Destroy()` manually in order to break the reference.
+    // The reason we don't assign a null driver is because we would add a track
+    // to the graph, then it would trigger graph's `EnsureNextIteration()` that
+    // requires a non-null driver.
+    SetCurrentDriver(new NiceMock<MockDriver>());
+  }
+
+  MOCK_CONST_METHOD0(OnGraphThread, bool());
+  MOCK_METHOD1(AppendMessage, void(UniquePtr<ControlMessage>));
+
+ protected:
+  ~MockTestGraph() = default;
+
+  class MockDriver : public GraphDriver {
+    NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MockDriver, override);
+
+    MockDriver() : GraphDriver(nullptr, nullptr, 0) {
+      ON_CALL(*this, OnThread).WillByDefault(Return(true));
+      ON_CALL(*this, ThreadRunning).WillByDefault(Return(true));
+    }
+
+    MOCK_METHOD0(Start, void());
+    MOCK_METHOD0(Shutdown, void());
+    MOCK_METHOD0(IterationDuration, uint32_t());
+    MOCK_METHOD0(EnsureNextIteration, void());
+    MOCK_CONST_METHOD0(OnThread, bool());
+    MOCK_CONST_METHOD0(ThreadRunning, bool());
+
+   protected:
+    ~MockDriver() = default;
+  };
+
+  bool mEnableFakeAppend = false;
+};
+
+AudioData* CreateAudioDataFromInfo(uint32_t aFrames, const AudioInfo& aInfo) {
+  AlignedAudioBuffer samples(aFrames * aInfo.mChannels);
+  return new AudioData(0, TimeUnit::Zero(), std::move(samples), aInfo.mChannels,
+                       aInfo.mRate);
+}
+
+AudioDecoderInputTrack* CreateTrack(MediaTrackGraph* aGraph,
+                                    nsISerialEventTarget* aThread,
+                                    const AudioInfo& aInfo,
+                                    float aPlaybackRate = 1.0,
+                                    float aVolume = 1.0,
+                                    bool aPreservesPitch = true) {
+  return AudioDecoderInputTrack::Create(aGraph, aThread, aInfo, aPlaybackRate,
+                                        aVolume, aPreservesPitch);
+}
+
+class TestAudioDecoderInputTrack : public testing::Test {
+ protected:
+  void SetUp() override {
+    mGraph = MakeRefPtr<NiceMock<MockTestGraph>>(kRate, kChannels);
+
+    mInfo.mRate = kRate;
+    mInfo.mChannels = kChannels;
+    mTrack = CreateTrack(mGraph, NS_GetCurrentThread(), mInfo);
+    EXPECT_FALSE(mTrack->Ended());
+  }
+
+  void TearDown() override {
+    // This simulates the normal usage where the `Close()` is always be called
+    // before the `Destroy()`.
+    mTrack->Close();
+    mTrack->Destroy();
+    // Remove the reference of the track from the mock graph, and then release
+    // the self-reference of mock graph.
+    mGraph->RemoveTrackGraphThread(mTrack);
+    mGraph->Destroy();
+  }
+
+  AudioData* CreateAudioData(uint32_t aFrames) {
+    return CreateAudioDataFromInfo(aFrames, mInfo);
+  }
+
+  AudioSegment* GetTrackSegment() { return mTrack->GetData<AudioSegment>(); }
+
+  AssertionResult ExpectSegmentNonSilence(const char* aStartExpr,
+                                          const char* aEndExpr,
+                                          TrackTime aStart, TrackTime aEnd) {
+    AudioSegment checkedRange;
+    checkedRange.AppendSlice(*mTrack->GetData(), aStart, aEnd);
+    if (!checkedRange.IsNull()) {
+      return testing::AssertionSuccess();
+    }
+    return testing::AssertionFailure()
+           << "segment [" << aStart << ":" << aEnd << "] should be non-silence";
+  }
+
+  AssertionResult ExpectSegmentSilence(const char* aStartExpr,
+                                       const char* aEndExpr, TrackTime aStart,
+                                       TrackTime aEnd) {
+    AudioSegment checkedRange;
+    checkedRange.AppendSlice(*mTrack->GetData(), aStart, aEnd);
+    if (checkedRange.IsNull()) {
+      return testing::AssertionSuccess();
+    }
+    return testing::AssertionFailure()
+           << "segment [" << aStart << ":" << aEnd << "] should be silence";
+  }
+
+  RefPtr<MockTestGraph> mGraph;
+  RefPtr<AudioDecoderInputTrack> mTrack;
+  AudioInfo mInfo;
+};
+
+TEST_F(TestAudioDecoderInputTrack, BasicAppendData) {
+  // Start from [0:10] and each time we move the time by 10ms.
+  // Expected: outputDuration=10, outputFrames=0, outputSilence=10
+  TrackTime start = 0;
+  TrackTime end = 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_EQ(mTrack->GetEnd(), end);
+  EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
+
+  // Expected: outputDuration=20, outputFrames=5, outputSilence=15
+  RefPtr<AudioData> audio1 = CreateAudioData(5);
+  mTrack->AppendData(audio1, nullptr);
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_EQ(mTrack->GetEnd(), end);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, start + audio1->Frames());
+  EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start + audio1->Frames(), end);
+
+  // Expected: outputDuration=30, outputFrames=15, outputSilence=15
+  RefPtr<AudioData> audio2 = CreateAudioData(10);
+  mTrack->AppendData(audio2, nullptr);
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
+  EXPECT_EQ(mTrack->GetEnd(), end);
+
+  // Expected : sent all data, track should be ended in the next iteration and
+  // fill slience in this iteration.
+  mTrack->NotifyEndOfStream();
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END);
+  EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
+  EXPECT_EQ(mTrack->GetEnd(), end);
+  EXPECT_FALSE(mTrack->Ended());
+
+  // Expected : track ended
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END);
+  EXPECT_EQ(mTrack->WrittenFrames(), audio1->Frames() + audio2->Frames());
+}
+
+TEST_F(TestAudioDecoderInputTrack, ClearFuture) {
+  // Start from [0:10] and each time we move the time by 10ms.
+  // Expected: appended=30, expected duration=10
+  RefPtr<AudioData> audio1 = CreateAudioData(30);
+  mTrack->AppendData(audio1, nullptr);
+  TrackTime start = 0;
+  TrackTime end = 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
+
+  // In next iteration [10:20], we would consume the remaining data that was
+  // appended in the previous iteration.
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
+
+  // Clear future data which is the remaining 10 frames so the track would
+  // only output silence.
+  mTrack->ClearFutureData();
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
+
+  // Test appending data again, to see if we can append data correctly after
+  // calling `ClearFutureData()`.
+  RefPtr<AudioData> audio2 = CreateAudioData(10);
+  mTrack->AppendData(audio2, nullptr);
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
+
+  // Run another iteration that should only contains silence because the data
+  // we appended only enough for one iteration.
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
+
+  // Clear future data would also remove the EOS.
+  mTrack->NotifyEndOfStream();
+  mTrack->ClearFutureData();
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END);
+  EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
+  EXPECT_FALSE(mTrack->Ended());
+
+  // As EOS has been removed, in next iteration the track would still be
+  // running.
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END);
+  EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
+  EXPECT_FALSE(mTrack->Ended());
+  EXPECT_EQ(mTrack->WrittenFrames(),
+            (audio1->Frames() - 10 /* got clear */) + audio2->Frames());
+}
+
+TEST_F(TestAudioDecoderInputTrack, InputRateChange) {
+  // Start from [0:10] and each time we move the time by 10ms.
+  // Expected: appended=10, expected duration=10
+  RefPtr<AudioData> audio1 = CreateAudioData(10);
+  mTrack->AppendData(audio1, nullptr);
+  TrackTime start = 0;
+  TrackTime end = 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
+
+  // Change input sample rate to the half, input data should be resampled and
+  // its duration would become longer.
+  // Expected: appended=10 + 5,
+  //           expected duration=10 + 5*2 (resampled)
+  mInfo.mRate = kRate / 2;
+  RefPtr<AudioData> audioHalfSampleRate = CreateAudioData(5);
+  mTrack->AppendData(audioHalfSampleRate, nullptr);
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
+
+  // Change input sample rate to the double, input data should be resampled and
+  // its duration would become shorter.
+  // Expected: appended=10 + 10 + 10,
+  //           expected duration=10 + 10 + 10/2(resampled) + 5(silence)
+  mInfo.mRate = kRate * 2;
+  RefPtr<AudioData> audioDoubleSampleRate = CreateAudioData(10);
+  TrackTime expectedDuration = audioDoubleSampleRate->Frames() / 2;
+  mTrack->AppendData(audioDoubleSampleRate, nullptr);
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, start + expectedDuration);
+  EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start + expectedDuration, end);
+  EXPECT_EQ(mTrack->WrittenFrames(), audio1->Frames() +
+                                         audioHalfSampleRate->Frames() * 2 +
+                                         audioDoubleSampleRate->Frames() / 2);
+}
+
+TEST_F(TestAudioDecoderInputTrack, ChannelChange) {
+  // Start from [0:10] and each time we move the time by 10ms.
+  // Track was initialized in stero.
+  EXPECT_EQ(mTrack->NumberOfChannels(), uint32_t(2));
+
+  // But first audio data is mono, so the `NumberOfChannels()` changes to
+  // reflect the maximum channel in the audio segment.
+  mInfo.mChannels = 1;
+  RefPtr<AudioData> audioMono = CreateAudioData(10);
+  mTrack->AppendData(audioMono, nullptr);
+  TrackTime start = 0;
+  TrackTime end = 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
+  EXPECT_EQ(mTrack->NumberOfChannels(), audioMono->mChannels);
+
+  // Then append audio data with 5 channels.
+  mInfo.mChannels = 5;
+  RefPtr<AudioData> audioWithFiveChannels = CreateAudioData(10);
+  mTrack->AppendData(audioWithFiveChannels, nullptr);
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
+  EXPECT_EQ(mTrack->NumberOfChannels(), audioWithFiveChannels->mChannels);
+  EXPECT_EQ(mTrack->WrittenFrames(),
+            audioMono->Frames() + audioWithFiveChannels->Frames());
+}
+
+TEST_F(TestAudioDecoderInputTrack, VolumeChange) {
+  // In order to run the volume change directly without using a real graph.
+  // one for setting the track's volume, another for the track destruction.
+  EXPECT_CALL(*mGraph, AppendMessage)
+      .Times(2)
+      .WillOnce([](UniquePtr<ControlMessage> aMessage) { aMessage->Run(); })
+      .WillOnce([](UniquePtr<ControlMessage> aMessage) {});
+
+  // The default volume is 1.0.
+  float expectedVolume = 1.0;
+  RefPtr<AudioData> audio = CreateAudioData(20);
+  TrackTime start = 0;
+  TrackTime end = 10;
+  mTrack->AppendData(audio, nullptr);
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
+  EXPECT_TRUE(GetTrackSegment()->GetLastChunk()->mVolume == expectedVolume);
+
+  // After setting volume on the track, the data in the output chunk should be
+  // changed as well.
+  expectedVolume = 0.1;
+  mTrack->SetVolume(expectedVolume);
+  SpinEventLoopUntil<ProcessFailureBehavior::IgnoreAndContinue>(
+      [&] { return mTrack->Volume() == expectedVolume; });
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
+  EXPECT_TRUE(GetTrackSegment()->GetLastChunk()->mVolume == expectedVolume);
+}
+
+TEST_F(TestAudioDecoderInputTrack, BatchedData) {
+  uint32_t appendedFrames = 0;
+  RefPtr<AudioData> audio = CreateAudioData(10);
+  for (size_t idx = 0; idx < 50; idx++) {
+    mTrack->AppendData(audio, nullptr);
+    appendedFrames += audio->Frames();
+  }
+
+  // First we need to call `ProcessInput` at least once to drain the track's
+  // SPSC queue, otherwise we're not able to push the batched data later.
+  TrackTime start = 0;
+  TrackTime end = 10;
+  uint32_t expectedFrames = end - start;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
+
+  // The batched data would be pushed to the graph thread in around 10ms after
+  // the track first time started to batch data, which we can't control here.
+  // Therefore, we need to wait until the batched data gets cleared.
+  SpinEventLoopUntil<ProcessFailureBehavior::IgnoreAndContinue>(
+      [&] { return !mTrack->HasBatchedData(); });
+
+  // Check that we received all the remainging data previously appended.
+  start = end;
+  end = start + (appendedFrames - expectedFrames);
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentNonSilence, start, end);
+
+  // Check that we received no more data than previously appended.
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, kNoFlags);
+  EXPECT_PRED_FORMAT2(ExpectSegmentSilence, start, end);
+  EXPECT_EQ(mTrack->WrittenFrames(), appendedFrames);
+}
+
+TEST_F(TestAudioDecoderInputTrack, OutputAndEndEvent) {
+  // Append an audio and EOS, the output event should notify the amount of
+  // frames that is equal to the amount of audio we appended.
+  RefPtr<AudioData> audio = CreateAudioData(10);
+  MozPromiseHolder<GenericPromise> holder;
+  RefPtr<GenericPromise> p = holder.Ensure(__func__);
+  MediaEventListener outputListener =
+      mTrack->OnOutput().Connect(NS_GetCurrentThread(), [&](TrackTime aFrame) {
+        EXPECT_EQ(aFrame, audio->Frames());
+        holder.Resolve(true, __func__);
+      });
+  mTrack->AppendData(audio, nullptr);
+  mTrack->NotifyEndOfStream();
+  TrackTime start = 0;
+  TrackTime end = 10;
+  mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END);
+  Unused << WaitFor(p);
+
+  // Track should end in this iteration, so the end event should be notified.
+  p = holder.Ensure(__func__);
+  MediaEventListener endListener = mTrack->OnEnd().Connect(
+      NS_GetCurrentThread(), [&]() { holder.Resolve(true, __func__); });
+  start = end;
+  end += 10;
+  mTrack->ProcessInput(start, end, ProcessedMediaTrack::ALLOW_END);
+  Unused << WaitFor(p);
+  outputListener.Disconnect();
+  endListener.Disconnect();
+}
diff --git a/dom/media/gtest/moz.build b/dom/media/gtest/moz.build
index d00d1ef5889f..aaa5e45972ee 100644
--- a/dom/media/gtest/moz.build
+++ b/dom/media/gtest/moz.build
@@ -9,6 +9,7 @@ include("/dom/media/webrtc/third_party_build/webrtc.mozbuild")
 DEFINES["ENABLE_SET_CUBEB_BACKEND"] = True
 
 LOCAL_INCLUDES += [
+    "/dom/media/mediasink",
     "/dom/media/webrtc/common/",
     "/third_party/libwebrtc",
     "/third_party/libwebrtc/webrtc",
@@ -20,6 +21,7 @@ UNIFIED_SOURCES += [
     "TestAudioBuffers.cpp",
     "TestAudioCallbackDriver.cpp",
     "TestAudioCompactor.cpp",
+    "TestAudioDecoderInputTrack.cpp",
     "TestAudioDriftCorrection.cpp",
     "TestAudioMixer.cpp",
     "TestAudioPacketizer.cpp",
diff --git a/dom/media/mediasink/AudioDecoderInputTrack.h b/dom/media/mediasink/AudioDecoderInputTrack.h
index 8eb776e6c270..c5e37eb94b53 100644
--- a/dom/media/mediasink/AudioDecoderInputTrack.h
+++ b/dom/media/mediasink/AudioDecoderInputTrack.h
@@ -94,6 +94,7 @@ class AudioDecoderInputTrack final : public ProcessedMediaTrack {
   void SetPreservesPitch(bool aPreservesPitch);
   // After calling this, the track are not expected to receive any new data.
   void Close();
+  bool HasBatchedData() const;
 
   MediaEventSource<int64_t>& OnOutput() { return mOnOutput; }
   MediaEventSource<void>& OnEnd() { return mOnEnd; }
@@ -103,6 +104,16 @@ class AudioDecoderInputTrack final : public ProcessedMediaTrack {
   void ProcessInput(GraphTime aFrom, GraphTime aTo, uint32_t aFlags) override;
   uint32_t NumberOfChannels() const override;
 
+  // The functions below are only used for testing.
+  TrackTime WrittenFrames() const {
+    AssertOnGraphThread();
+    return mWrittenFrames;
+  }
+  float Volume() const {
+    AssertOnGraphThread();
+    return mVolume;
+  }
+
  protected:
   ~AudioDecoderInputTrack();
 
@@ -123,7 +134,6 @@ class AudioDecoderInputTrack final : public ProcessedMediaTrack {
   bool HasSentAllData() const;
 
   bool ShouldBatchData() const;
-  bool HasBatchedData() const;
   void BatchData(AudioData* aAudio, const PrincipalHandle& aPrincipalHandle);
   void DispatchPushBatchedDataIfNeeded();
   void PushBatchedDataIfNeeded();