diff --git a/dom/media/AudioConverter.cpp b/dom/media/AudioConverter.cpp
index c9660d3555f9..162e7c730258 100644
--- a/dom/media/AudioConverter.cpp
+++ b/dom/media/AudioConverter.cpp
@@ -33,19 +33,7 @@ AudioConverter::AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut)
   MOZ_DIAGNOSTIC_ASSERT(aOut.Interleaved(), "planar audio format not supported");
   mIn.Layout().MappingTable(mOut.Layout(), mChannelOrderMap);
   if (aIn.Rate() != aOut.Rate()) {
-    int error;
-    mResampler = speex_resampler_init(aOut.Channels(),
-                                      aIn.Rate(),
-                                      aOut.Rate(),
-                                      SPEEX_RESAMPLER_QUALITY_DEFAULT,
-                                      &error);
-
-    if (error == RESAMPLER_ERR_SUCCESS) {
-      speex_resampler_skip_zeros(mResampler);
-    } else {
-      NS_WARNING("Failed to initialize resampler.");
-      mResampler = nullptr;
-    }
+    RecreateResampler();
   }
 }
 
@@ -282,6 +270,46 @@ AudioConverter::ResampleAudio(void* aOut, const void* aIn, size_t aFrames)
   return outframes;
 }
 
+void
+AudioConverter::RecreateResampler()
+{
+  if (mResampler) {
+    speex_resampler_destroy(mResampler);
+  }
+  int error;
+  mResampler = speex_resampler_init(mOut.Channels(),
+                                    mIn.Rate(),
+                                    mOut.Rate(),
+                                    SPEEX_RESAMPLER_QUALITY_DEFAULT,
+                                    &error);
+
+  if (error == RESAMPLER_ERR_SUCCESS) {
+    speex_resampler_skip_zeros(mResampler);
+  } else {
+    NS_WARNING("Failed to initialize resampler.");
+    mResampler = nullptr;
+  }
+}
+
+size_t
+AudioConverter::DrainResampler(void* aOut)
+{
+  if (!mResampler) {
+    return 0;
+  }
+  int frames = speex_resampler_get_input_latency(mResampler);
+  AlignedByteBuffer buffer(FramesOutToSamples(frames) *
+                           AudioConfig::SampleSize(mOut.Format()));
+  if (!buffer) {
+    // OOM
+    return 0;
+  }
+  frames = ResampleAudio(aOut, buffer.Data(), frames);
+  // Tore down the resampler as it's easier than handling follow-up.
+  RecreateResampler();
+  return frames;
+}
+
 size_t
 AudioConverter::UpmixAudio(void* aOut, const void* aIn, size_t aFrames) const
 {
@@ -327,7 +355,13 @@ AudioConverter::UpmixAudio(void* aOut, const void* aIn, size_t aFrames) const
 size_t
 AudioConverter::ResampleRecipientFrames(size_t aFrames) const
 {
-  return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
+  if (!aFrames && mIn.Rate() != mOut.Rate()) {
+    // The resampler will be drained, account for frames currently buffered
+    // in the resampler.
+    return speex_resampler_get_output_latency(mResampler);
+  } else {
+    return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1;
+  }
 }
 
 size_t
diff --git a/dom/media/AudioConverter.h b/dom/media/AudioConverter.h
index e544eaeaf7ae..637ffcdc2f76 100644
--- a/dom/media/AudioConverter.h
+++ b/dom/media/AudioConverter.h
@@ -123,6 +123,8 @@ public:
   // Convert the AudioDataBuffer.
   // Conversion will be done in place if possible. Otherwise a new buffer will
   // be returned.
+  // Providing an empty buffer and resampling is expected, the resampler
+  // will be drained.
   template <AudioConfig::SampleFormat Format, typename Value>
   AudioDataBuffer<Format, Value> Process(AudioDataBuffer<Format, Value>&& aBuffer)
   {
@@ -152,7 +154,7 @@ public:
       return AudioDataBuffer<Format, Value>(Move(temp1));
     }
     frames = ProcessInternal(temp1.Data(), aBuffer.Data(), frames);
-    if (!frames || mIn.Rate() == mOut.Rate()) {
+    if (mIn.Rate() == mOut.Rate()) {
       temp1.SetLength(FramesOutToSamples(frames));
       return AudioDataBuffer<Format, Value>(Move(temp1));
     }
@@ -161,13 +163,17 @@ public:
     // If we are downsampling we can re-use it.
     AlignedBuffer<Value>* outputBuffer = &temp1;
     AlignedBuffer<Value> temp2;
-    if (mOut.Rate() > mIn.Rate()) {
-      // We are upsampling, we can't work in place. Allocate another temporary
-      // buffer where the upsampling will occur.
+    if (!frames || mOut.Rate() > mIn.Rate()) {
+      // We are upsampling or about to drain, we can't work in place.
+      // Allocate another temporary buffer where the upsampling will occur.
       temp2.SetLength(FramesOutToSamples(ResampleRecipientFrames(frames)));
       outputBuffer = &temp2;
     }
-    frames = ResampleAudio(outputBuffer->Data(), temp1.Data(), frames);
+    if (!frames) {
+      frames = DrainResampler(outputBuffer->Data());
+    } else {
+      frames = ResampleAudio(outputBuffer->Data(), temp1.Data(), frames);
+    }
     outputBuffer->SetLength(FramesOutToSamples(frames));
     return AudioDataBuffer<Format, Value>(Move(*outputBuffer));
   }
@@ -223,6 +229,8 @@ private:
   SpeexResamplerState* mResampler;
   size_t ResampleAudio(void* aOut, const void* aIn, size_t aFrames);
   size_t ResampleRecipientFrames(size_t aFrames) const;
+  void RecreateResampler();
+  size_t DrainResampler(void* aOut);
 };
 
 } // namespace mozilla
diff --git a/dom/media/mediasink/DecodedAudioDataSink.cpp b/dom/media/mediasink/DecodedAudioDataSink.cpp
index bb5b83a8019e..9ad133aed7df 100644
--- a/dom/media/mediasink/DecodedAudioDataSink.cpp
+++ b/dom/media/mediasink/DecodedAudioDataSink.cpp
@@ -321,6 +321,12 @@ DecodedAudioDataSink::NotifyAudioNeeded()
     return;
   }
 
+  if (AudioQueue().IsFinished() && !AudioQueue().GetSize()) {
+    // We have reached the end of the data, drain the resampler.
+    DrainConverter();
+    return;
+  }
+
   // Always ensure we have two processed frames pending to allow for processing
   // latency.
   while (AudioQueue().GetSize() && mProcessedQueue.GetSize() < 2) {
@@ -339,6 +345,8 @@ DecodedAudioDataSink::NotifyAudioNeeded()
                  mConverter ? mConverter->InputConfig().Rate() : 0,
                  data->mChannels, data->mRate);
 
+      DrainConverter();
+
       // mFramesParsed indicates the current playtime in frames at the current
       // input sampling rate. Recalculate it per the new sampling rate.
       if (mFramesParsed) {
@@ -385,10 +393,15 @@ DecodedAudioDataSink::NotifyAudioNeeded()
       // time.
       missingFrames = std::min<int64_t>(INT32_MAX, missingFrames.value());
       mFramesParsed += missingFrames.value();
-      AlignedAudioBuffer silenceData(missingFrames.value() * mOutputChannels);
-      RefPtr<AudioData> silence = CreateAudioFromBuffer(Move(silenceData), data);
-      if (silence) {
-        mProcessedQueue.Push(silence);
+      // We need to insert silence, first use drained frames if any.
+      missingFrames -= DrainConverter(missingFrames.value());
+      // Insert silence is still needed.
+      if (missingFrames.value()) {
+        AlignedAudioBuffer silenceData(missingFrames.value() * mOutputChannels);
+        RefPtr<AudioData> silence = CreateAudioFromBuffer(Move(silenceData), data);
+        if (silence) {
+          mProcessedQueue.Push(silence);
+        }
       }
     }
 
@@ -404,6 +417,7 @@ DecodedAudioDataSink::NotifyAudioNeeded()
       }
     }
     mProcessedQueue.Push(data);
+    mLastProcessedPacket = Some(data);
   }
 }
 
@@ -432,5 +446,36 @@ DecodedAudioDataSink::CreateAudioFromBuffer(AlignedAudioBuffer&& aBuffer,
   return data.forget();
 }
 
+uint32_t
+DecodedAudioDataSink::DrainConverter(uint32_t aMaxFrames)
+{
+  MOZ_ASSERT(mProcessingThread->IsCurrentThreadIn());
+
+  if (!mConverter || !mLastProcessedPacket) {
+    // nothing to drain.
+    return 0;
+  }
+
+  // To drain we simply provide an empty packet to the audio converter.
+  AlignedAudioBuffer convertedData =
+    mConverter->Process(AudioSampleBuffer(AlignedAudioBuffer())).Forget();
+
+  uint32_t frames = convertedData.Length() / mOutputChannels;
+  convertedData.SetLength(std::min(frames, aMaxFrames) * mOutputChannels);
+
+  // We assume the start time of the drained data is just before the end of the
+  // previous packet. Ultimately, the start time doesn't really matter, however
+  // we do not want to trigger the gap detection in PopFrames.
+  RefPtr<AudioData> data = CreateAudioFromBuffer(Move(convertedData),
+                                                 mLastProcessedPacket.ref());
+  mLastProcessedPacket.reset();
+
+  if (!data) {
+    return 0;
+  }
+  mProcessedQueue.Push(data);
+  return data->mFrames;
+}
+
 } // namespace media
 } // namespace mozilla
diff --git a/dom/media/mediasink/DecodedAudioDataSink.h b/dom/media/mediasink/DecodedAudioDataSink.h
index aa85a874fae9..e58349e19675 100644
--- a/dom/media/mediasink/DecodedAudioDataSink.h
+++ b/dom/media/mediasink/DecodedAudioDataSink.h
@@ -115,6 +115,9 @@ private:
   void OnAudioPopped(const RefPtr<MediaData>& aSample);
   void OnAudioPushed(const RefPtr<MediaData>& aSample);
   void NotifyAudioNeeded();
+  // Drain the converter and add the output to the processed audio queue.
+  // A maximum of aMaxFrames will be added.
+  uint32_t DrainConverter(uint32_t aMaxFrames = UINT32_MAX);
   already_AddRefed<AudioData> CreateAudioFromBuffer(AlignedAudioBuffer&& aBuffer,
                                                     AudioData* aReference);
   UniquePtr<AudioConverter> mConverter;
@@ -125,6 +128,7 @@ private:
   // the input stream. It indicates the time in frames since playback started
   // at the current input framerate.
   int64_t mFramesParsed;
+  Maybe<RefPtr<AudioData>> mLastProcessedPacket;
   int64_t mLastEndTime;
   // Never modifed after construction.
   uint32_t mOutputRate;