From 87b5e2c9efc327bcfaac6b5ee1001ac31dd3d0dc Mon Sep 17 00:00:00 2001 From: "Timothy B. Terriberry" Date: Thu, 31 May 2012 11:13:17 -0700 Subject: [PATCH] Bug 759612 - Update granule position accounting for Opus, r=kinetik --- content/media/ogg/nsOggCodecState.cpp | 310 ++++++++++++++++++-------- content/media/ogg/nsOggCodecState.h | 32 ++- content/media/ogg/nsOggReader.cpp | 57 ++--- 3 files changed, 274 insertions(+), 125 deletions(-) diff --git a/content/media/ogg/nsOggCodecState.cpp b/content/media/ogg/nsOggCodecState.cpp index 532919720bf2..7b72eab3ad7a 100644 --- a/content/media/ogg/nsOggCodecState.cpp +++ b/content/media/ogg/nsOggCodecState.cpp @@ -260,6 +260,7 @@ bool nsTheoraState::Init() { bool nsTheoraState::DecodeHeader(ogg_packet* aPacket) { + nsAutoRef autoRelease(aPacket); mPacketCount++; int ret = th_decode_headerin(&mInfo, &mComment, @@ -283,14 +284,14 @@ nsTheoraState::DecodeHeader(ogg_packet* aPacket) bool isSetupHeader = aPacket->bytes > 0 && aPacket->packet[0] == 0x82; if (ret < 0 || mPacketCount > 3) { // We've received an error, or the first three packets weren't valid - // header packets, assume bad input, and don't activate the bitstream. - mDoneReadingHeaders = true; + // header packets. Assume bad input. + // Our caller will deactivate the bitstream. + return false; } else if (ret > 0 && isSetupHeader && mPacketCount == 3) { // Successfully read the three header packets. mDoneReadingHeaders = true; - mActive = true; } - return mDoneReadingHeaders; + return true; } PRInt64 @@ -521,6 +522,7 @@ nsVorbisState::~nsVorbisState() { } bool nsVorbisState::DecodeHeader(ogg_packet* aPacket) { + nsAutoRef autoRelease(aPacket); mPacketCount++; int ret = vorbis_synthesis_headerin(&mInfo, &mComment, @@ -545,15 +547,15 @@ bool nsVorbisState::DecodeHeader(ogg_packet* aPacket) { if (ret < 0 || mPacketCount > 3) { // We've received an error, or the first three packets weren't valid - // header packets, assume bad input, and deactivate the bitstream. - mDoneReadingHeaders = true; - mActive = false; + // header packets. Assume bad input. Our caller will deactivate the + // bitstream. + return false; } else if (ret == 0 && isSetupHeader && mPacketCount == 3) { // Successfully read the three header packets. // The bitstream remains active. mDoneReadingHeaders = true; } - return mDoneReadingHeaders; + return true; } bool nsVorbisState::Init() @@ -759,7 +761,9 @@ nsOpusState::nsOpusState(ogg_page* aBosPage) : mChannelMapping(0), mStreams(0), mDecoder(NULL), - mSkip(0) + mSkip(0), + mPrevPacketGranulepos(0), + mPrevPageGranulepos(0) { MOZ_COUNT_CTOR(nsOpusState); } @@ -775,13 +779,23 @@ nsOpusState::~nsOpusState() { } nsresult nsOpusState::Reset() +{ + return Reset(false); +} + +nsresult nsOpusState::Reset(bool aStart) { nsresult res = NS_OK; if (mActive && mDecoder) { // Reset the decoder. opus_decoder_ctl(mDecoder, OPUS_RESET_STATE); - mSkip = 0; // Let the seek logic handle this. + // Let the seek logic handle pre-roll if we're not seeking to the start. + mSkip = aStart ? mPreSkip : 0; + // This lets us distinguish the first page being the last page vs. just + // not having processed the previous page when we encounter the last page. + mPrevPageGranulepos = aStart ? 0 : -1; + mPrevPacketGranulepos = aStart ? 0 : -1; } // Clear queued data. @@ -796,6 +810,9 @@ nsresult nsOpusState::Reset() bool nsOpusState::Init(void) { + if (!mActive) + return false; + int error; NS_ASSERTION(mDecoder == NULL, "leaking OpusDecoder"); @@ -810,65 +827,109 @@ bool nsOpusState::Init(void) bool nsOpusState::DecodeHeader(ogg_packet* aPacket) { - // Minimum length of any header is 16 bytes. - if (aPacket->bytes < 16) { - LOG(PR_LOG_DEBUG, ("Invalid Opus file: header too short")); - mActive = false; - return true; - } + nsAutoRef autoRelease(aPacket); + switch(mPacketCount++) { + // Parse the id header. + case 0: { + if (aPacket->bytes < 19 || memcmp(aPacket->packet, "OpusHead", 8)) { + LOG(PR_LOG_DEBUG, ("Invalid Opus file: unrecognized header")); + return false; + } - // Try parsing as the metadata header. - if (!memcmp(aPacket->packet, "OpusTags", 8)) { - mDoneReadingHeaders = true; // This is the last Opus header. - mActive = true; - return true; - } + mRate = 48000; // The Opus decoder runs at 48 kHz regardless. - // Otherwise, parse as the id header. - if (aPacket->bytes < 19 || memcmp(aPacket->packet, "OpusHead", 8)) { - LOG(PR_LOG_DEBUG, ("Invalid Opus file: unrecognized header")); - mActive = false; - return true; - } + int version = aPacket->packet[8]; + // Accept file format versions 0.x. + if ((version & 0xf0) != 0) { + LOG(PR_LOG_DEBUG, ("Rejecting unknown Opus file version %d", version)); + return false; + } - mRate = 48000; // The Opus decoder runs at 48 kHz regardless. + mChannels= aPacket->packet[9]; + mPreSkip = LEUint16(aPacket->packet + 10); + mNominalRate = LEUint32(aPacket->packet + 12); + mGain = (float)LEUint16(aPacket->packet + 16) / 256.0; + mChannelMapping = aPacket->packet[18]; - int version = aPacket->packet[8]; - // Accept file format versions 0.x. - if ((version & 0xf0) != 0) { - LOG(PR_LOG_DEBUG, ("Rejecting unknown Opus file version %d", version)); - mActive = false; - return true; - } - - mChannels= aPacket->packet[9]; - mPreSkip = LEUint16(aPacket->packet + 10); - mNominalRate = LEUint32(aPacket->packet + 12); - mGain = (float)LEUint16(aPacket->packet + 16) / 256.0; - mChannelMapping = aPacket->packet[18]; - - if (mChannelMapping == 0) { - mStreams = 1; - } else if (aPacket->bytes > 19) { - mStreams = aPacket->packet[19]; - } else { - LOG(PR_LOG_DEBUG, ("Invalid Opus file: channel mapping %d," - " but no channel mapping table", mChannelMapping)); - mActive = false; - return true; - } + if (mChannelMapping == 0) { + mStreams = 1; + } else if (aPacket->bytes > 19) { + mStreams = aPacket->packet[19]; + } else { + LOG(PR_LOG_DEBUG, ("Invalid Opus file: channel mapping %d," + " but no channel mapping table", mChannelMapping)); + return false; + } #ifdef DEBUG - LOG(PR_LOG_DEBUG, ("Opus stream header:")); - LOG(PR_LOG_DEBUG, (" channels: %d", mChannels)); - LOG(PR_LOG_DEBUG, (" preskip: %d", mPreSkip)); - LOG(PR_LOG_DEBUG, (" original: %d Hz", mNominalRate)); - LOG(PR_LOG_DEBUG, (" gain: %.2f dB", mGain)); - LOG(PR_LOG_DEBUG, ("Channel Mapping:")); - LOG(PR_LOG_DEBUG, (" family: %d", mChannelMapping)); - LOG(PR_LOG_DEBUG, (" streams: %d", mStreams)); + LOG(PR_LOG_DEBUG, ("Opus stream header:")); + LOG(PR_LOG_DEBUG, (" channels: %d", mChannels)); + LOG(PR_LOG_DEBUG, (" preskip: %d", mPreSkip)); + LOG(PR_LOG_DEBUG, (" original: %d Hz", mNominalRate)); + LOG(PR_LOG_DEBUG, (" gain: %.2f dB", mGain)); + LOG(PR_LOG_DEBUG, ("Channel Mapping:")); + LOG(PR_LOG_DEBUG, (" family: %d", mChannelMapping)); + LOG(PR_LOG_DEBUG, (" streams: %d", mStreams)); #endif + } + break; + // Parse the metadata header. + case 1: { + if (aPacket->bytes < 16 || memcmp(aPacket->packet, "OpusTags", 8)) + return false; + + // We don't actually need any of the data here, but validating the + // contents helps reduce the propagation of broken files. + // This only checks for actual malicious content: too little data, too + // many comments, or comments that are too long. + // It does not ensure they are valid UTF-8, nor does it validate the + // required ASCII_TAG=value format of the user comments. + const unsigned char *buf = aPacket->packet + 8; + PRUint32 bytes = aPacket->bytes - 8; + PRUint32 len; + // Skip the vendor string. + len = LEUint32(buf); + buf += 4; + bytes -= 4; + if (len > bytes) + return false; + buf += len; + bytes -= len; + // Skip the user comments. + if (bytes < 4) + return false; + PRUint32 ncomments = LEUint32(buf); + buf += 4; + bytes -= 4; + // If there are so many comments even their length fields won't fit in + // the packet, stop reading now. + if (ncomments > (bytes>>2)) + return false; + PRUint32 i; + for (i = 0; i < ncomments; i++) { + if (bytes < 4) + return false; + len = LEUint32(buf); + buf += 4; + bytes -= 4; + if (len > bytes) + return false; + buf += len; + bytes -= len; + } + } + break; + + // We made it to the first data packet (which includes reconstructing + // timestamps for it in PageIn). Success! + default: { + mDoneReadingHeaders = true; + // Put it back on the queue so we can decode it. + mPackets.PushFront(autoRelease.disown()); + } + break; + } return true; } @@ -909,9 +970,10 @@ nsresult nsOpusState::PageIn(ogg_page* aPage) bool haveGranulepos; nsresult rv = PacketOutUntilGranulepos(haveGranulepos); - if (NS_FAILED(rv) || !haveGranulepos || !mDoneReadingHeaders) + if (NS_FAILED(rv) || !haveGranulepos || mPacketCount < 2) return rv; - ReconstructGranulepos(); + if(!ReconstructOpusGranulepos()) + return NS_ERROR_FAILURE; for (PRUint32 i = 0; i < mUnstamped.Length(); i++) { ogg_packet* packet = mUnstamped[i]; NS_ASSERTION(!IsHeader(packet), "Don't try to play a header packet"); @@ -922,30 +984,108 @@ nsresult nsOpusState::PageIn(ogg_page* aPage) return NS_OK; } -void nsOpusState::ReconstructGranulepos(void) +// Helper method to return the change in granule position due to an Opus packet +// (as distinct from the number of samples in the packet, which depends on the +// decoder rate). It should work with a multistream Opus file, and continue to +// work should we ever allow the decoder to decode at a rate other than 48 kHz. +// It even works before we've created the actual Opus decoder. +static int GetOpusDeltaGP(ogg_packet* packet) +{ + int nframes; + nframes = opus_packet_get_nb_frames(packet->packet, packet->bytes); + if (nframes > 0) { + return nframes*opus_packet_get_samples_per_frame(packet->packet, 48000); + } + NS_WARNING("Invalid Opus packet."); + return nframes; +} + +bool nsOpusState::ReconstructOpusGranulepos(void) { NS_ASSERTION(mUnstamped.Length() > 0, "Must have unstamped packets"); - DebugOnly last = mUnstamped[mUnstamped.Length()-1]; + ogg_packet* last = mUnstamped[mUnstamped.Length()-1]; NS_ASSERTION(last->e_o_s || last->granulepos > 0, "Must know last granulepos!"); + PRInt64 gp; + // If this is the last page, and we've seen at least one previous page (or + // this is the first page)... + if (last->e_o_s) { + if (mPrevPageGranulepos != -1) { + // If this file only has one page and the final granule position is + // smaller than the pre-skip amount, we MUST reject the stream. + if (!mDoneReadingHeaders && last->granulepos < mPreSkip) + return false; + PRInt64 last_gp = last->granulepos; + gp = mPrevPageGranulepos; + // Loop through the packets forwards, adding the current packet's + // duration to the previous granulepos to get the value for the + // current packet. + for (PRUint32 i = 0; i < mUnstamped.Length() - 1; ++i) { + ogg_packet* packet = mUnstamped[i]; + int offset = GetOpusDeltaGP(packet); + // Check for error (negative offset) and overflow. + if (offset >= 0 && gp <= PR_INT64_MAX - offset) { + gp += offset; + if (gp >= last_gp) { + NS_WARNING("Opus end trimming removed more than a full packet."); + // We were asked to remove a full packet's worth of data or more. + // Encoders SHOULD NOT produce streams like this, but we'll handle + // it for them anyway. + gp = last_gp; + for (PRUint32 j = i+1; j < mUnstamped.Length(); ++j) { + nsOggCodecState::ReleasePacket(mUnstamped[j]); + } + mUnstamped.RemoveElementsAt(i+1, mUnstamped.Length() - (i+1)); + last = packet; + last->e_o_s = 1; + } + } + packet->granulepos = gp; + } + mPrevPageGranulepos = last_gp; + return true; + } else { + NS_WARNING("No previous granule position to use for Opus end trimming."); + // If we don't have a previous granule position, fall through. + // We simply won't trim any samples from the end. + // TODO: Are we guaranteed to have seen a previous page if there is one? + } + } + gp = last->granulepos; // Loop through the packets backwards, subtracting the next // packet's duration from its granulepos to get the value // for the current packet. for (PRUint32 i = mUnstamped.Length() - 1; i > 0; i--) { - ogg_packet* next = mUnstamped[i]; - int offset = opus_decoder_get_nb_samples(mDecoder, - next->packet, - next->bytes); + int offset = GetOpusDeltaGP(mUnstamped[i]); // Check for error (negative offset) and overflow. - if (offset >= 0 && offset <= next->granulepos) { - mUnstamped[i - 1]->granulepos = next->granulepos - offset; - } else { - if (offset > next->granulepos) + if (offset >= 0) { + if (offset <= gp) { + gp -= offset; + } else { + // If the granule position of the first data page is smaller than the + // number of decodable audio samples on that page, then we MUST reject + // the stream. + if (!mDoneReadingHeaders) + return false; + // It's too late to reject the stream. + // If we get here, this almost certainly means the file has screwed-up + // timestamps somewhere after the first page. NS_WARNING("Clamping negative Opus granulepos to zero."); - mUnstamped[i - 1]->granulepos = 0; + gp = 0; + } } + mUnstamped[i - 1]->granulepos = gp; } + + // Check to make sure the first granule position is at least as large as the + // total number of samples decodable from the first page with completed + // packets. This requires looking at the duration of the first packet, too. + // We MUST reject such streams. + if (!mDoneReadingHeaders && GetOpusDeltaGP(mUnstamped[0]) > gp) + return false; + mPrevPageGranulepos = last->granulepos; + return true; } #endif /* MOZ_OPUS */ @@ -1238,6 +1378,7 @@ nsresult nsSkeletonState::GetDuration(const nsTArray& aTracks, bool nsSkeletonState::DecodeHeader(ogg_packet* aPacket) { + nsAutoRef autoRelease(aPacket); if (IsSkeletonBOS(aPacket)) { PRUint16 verMajor = LEUint16(aPacket->packet + SKELETON_VERSION_MAJOR_OFFSET); PRUint16 verMinor = LEUint16(aPacket->packet + SKELETON_VERSION_MINOR_OFFSET); @@ -1249,32 +1390,25 @@ bool nsSkeletonState::DecodeHeader(ogg_packet* aPacket) mPresentationTime = d == 0 ? 0 : (static_cast(n) / static_cast(d)) * USECS_PER_S; mVersion = SKELETON_VERSION(verMajor, verMinor); + // We can only care to parse Skeleton version 4.0+. if (mVersion < SKELETON_VERSION(4,0) || mVersion >= SKELETON_VERSION(5,0) || aPacket->bytes < SKELETON_4_0_MIN_HEADER_LEN) - { - // We can only care to parse Skeleton version 4.0+. - mActive = false; - return mDoneReadingHeaders = true; - } + return false; // Extract the segment length. mLength = LEInt64(aPacket->packet + SKELETON_FILE_LENGTH_OFFSET); LOG(PR_LOG_DEBUG, ("Skeleton segment length: %lld", mLength)); - // Initialize the serianlno-to-index map. + // Initialize the serialno-to-index map. mIndex.Init(); - mActive = true; + return true; } else if (IsSkeletonIndex(aPacket) && mVersion >= SKELETON_VERSION(4,0)) { - if (!DecodeIndex(aPacket)) { - // Failed to parse index, or invalid/hostile index. DecodeIndex() will - // have deactivated the track. - return mDoneReadingHeaders = true; - } - + return DecodeIndex(aPacket); } else if (aPacket->e_o_s) { mDoneReadingHeaders = true; + return true; } - return mDoneReadingHeaders; + return false; } diff --git a/content/media/ogg/nsOggCodecState.h b/content/media/ogg/nsOggCodecState.h index c3694a338e27..37317a0a2a03 100644 --- a/content/media/ogg/nsOggCodecState.h +++ b/content/media/ogg/nsOggCodecState.h @@ -16,6 +16,7 @@ #ifdef MOZ_OPUS #include #endif +#include #include #include #include @@ -83,8 +84,10 @@ public: static nsOggCodecState* Create(ogg_page* aPage); virtual CodecType GetType() { return TYPE_UNKNOWN; } - + // Reads a header packet. Returns true when last header has been read. + // This function takes ownership of the packet and is responsible for + // releasing it or queuing it for later processing. virtual bool DecodeHeader(ogg_packet* aPacket) { return (mDoneReadingHeaders = true); } @@ -301,6 +304,7 @@ public: PRInt64 Time(PRInt64 aGranulepos); bool Init(); nsresult Reset(); + nsresult Reset(bool aStart); bool IsHeader(ogg_packet* aPacket); nsresult PageIn(ogg_page* aPage); @@ -309,15 +313,18 @@ public: // Various fields from the Ogg Opus header. int mRate; // Sample rate the decoder uses (always 48 kHz). - int mNominalRate; // Original sample rate of the data (informational). + PRUint32 mNominalRate; // Original sample rate of the data (informational). int mChannels; // Number of channels the stream encodes. - int mPreSkip; // Number of samples to strip after decoder reset. + PRUint16 mPreSkip; // Number of samples to strip after decoder reset. float mGain; // Gain (dB) to apply to decoder output. int mChannelMapping; // Channel mapping family. int mStreams; // Number of packed streams in each packet. OpusDecoder *mDecoder; int mSkip; // Number of samples left to trim before playback. + // Granule position (end sample) of the last decoded Opus packet. This is + // used to calculate the amount we should trim from the last packet. + PRInt64 mPrevPacketGranulepos; private: @@ -326,7 +333,13 @@ private: // the stream, with the last packet having a known granulepos. Using this // known granulepos, and the known frame numbers, we recover the granulepos // of all frames in the array. This enables us to determine their timestamps. - void ReconstructGranulepos(); + bool ReconstructOpusGranulepos(); + + // Granule position (end sample) of the last decoded Opus page. This is + // used to calculate the Opus per-packet granule positions on the last page, + // where we may need to trim some samples from the end. + PRInt64 mPrevPageGranulepos; + #endif /* MOZ_OPUS */ }; @@ -464,4 +477,15 @@ private: nsClassHashtable mIndex; }; +// This allows the use of nsAutoRefs for an ogg_packet that properly free the +// contents of the packet. +template <> +class nsAutoRefTraits : public nsPointerRefTraits +{ +public: + static void Release(ogg_packet* aPacket) { + nsOggCodecState::ReleasePacket(aPacket); + } +}; + #endif diff --git a/content/media/ogg/nsOggReader.cpp b/content/media/ogg/nsOggReader.cpp index f948741d96cf..380582ea32d6 100644 --- a/content/media/ogg/nsOggReader.cpp +++ b/content/media/ogg/nsOggReader.cpp @@ -65,16 +65,6 @@ PageSync(MediaResource* aResource, // is about 4300 bytes, so we read the file in chunks larger than that. static const int PAGE_STEP = 8192; -class nsAutoReleasePacket { -public: - nsAutoReleasePacket(ogg_packet* aPacket) : mPacket(aPacket) { } - ~nsAutoReleasePacket() { - nsOggCodecState::ReleasePacket(mPacket); - } -private: - ogg_packet* mPacket; -}; - nsOggReader::nsOggReader(nsBuiltinDecoder* aDecoder) : nsBuiltinDecoderReader(aDecoder), mTheoraState(nsnull), @@ -124,17 +114,8 @@ nsresult nsOggReader::ResetDecode(bool start) if (mVorbisState && NS_FAILED(mVorbisState->Reset())) { res = NS_ERROR_FAILURE; } - if (mOpusState) { - if (NS_FAILED(mOpusState->Reset())) { - res = NS_ERROR_FAILURE; - } - else if (start) { - // Reset the skip frame counter as if - // we're starting playback fresh. - mOpusState->mSkip = mOpusState->mPreSkip; - LOG(PR_LOG_DEBUG, ("Seek to start: asking opus decoder to skip %d", - mOpusState->mSkip)); - } + if (mOpusState && NS_FAILED(mOpusState->Reset(start))) { + res = NS_ERROR_FAILURE; } if (mTheoraState && NS_FAILED(mTheoraState->Reset())) { res = NS_ERROR_FAILURE; @@ -147,11 +128,10 @@ bool nsOggReader::ReadHeaders(nsOggCodecState* aState) { while (!aState->DoneReadingHeaders()) { ogg_packet* packet = NextOggPacket(aState); - nsAutoReleasePacket autoRelease(packet); - if (!packet || !aState->IsHeader(packet)) { + // DecodeHeader is responsible for releasing packet. + if (!packet || !aState->DecodeHeader(packet)) { aState->Deactivate(); - } else { - aState->DecodeHeader(packet); + return false; } } return aState->Init(); @@ -433,9 +413,16 @@ nsresult nsOggReader::DecodeOpus(ogg_packet* aPacket) { NS_ASSERTION(ret == frames, "Opus decoded too few audio samples"); PRInt64 endFrame = aPacket->granulepos; - PRInt64 endTime = mOpusState->Time(endFrame); - PRInt64 startTime = mOpusState->Time(endFrame - frames); - PRInt64 duration = endTime - startTime; + PRInt64 startFrame; + // If this is the last packet, perform end trimming. + if (aPacket->e_o_s && mOpusState->mPrevPacketGranulepos != -1) { + startFrame = mOpusState->mPrevPacketGranulepos; + frames = static_cast(NS_MAX(static_cast(0), + NS_MIN(endFrame - startFrame, + static_cast(frames)))); + } else { + startFrame = endFrame - frames; + } // Trim the initial frames while the decoder is settling. if (mOpusState->mSkip > 0) { @@ -453,19 +440,23 @@ nsresult nsOggReader::DecodeOpus(ogg_packet* aPacket) { for (int i = 0; i < samples; i++) trimBuffer[i] = buffer[skipFrames*channels + i]; - startTime = mOpusState->Time(endFrame - keepFrames); - duration = endTime - startTime; + startFrame = endFrame - keepFrames; frames = keepFrames; buffer = trimBuffer; mOpusState->mSkip -= skipFrames; LOG(PR_LOG_DEBUG, ("Opus decoder skipping %d frames", skipFrames)); } + // Save this packet's granule position in case we need to perform end + // trimming on the next packet. + mOpusState->mPrevPacketGranulepos = endFrame; LOG(PR_LOG_DEBUG, ("Opus decoder pushing %d frames", frames)); + PRInt64 startTime = mOpusState->Time(startFrame); + PRInt64 endTime = mOpusState->Time(endFrame); mAudioQueue.Push(new AudioData(mPageOffset, startTime, - duration, + endTime - startTime, frames, buffer.forget(), channels)); @@ -500,7 +491,7 @@ bool nsOggReader::DecodeAudioData() NS_ASSERTION(packet && packet->granulepos != -1, "Must have packet with known granulepos"); - nsAutoReleasePacket autoRelease(packet); + nsAutoRef autoRelease(packet); if (mVorbisState) { DecodeVorbis(packet); #ifdef MOZ_OPUS @@ -607,7 +598,7 @@ bool nsOggReader::DecodeVideoFrame(bool &aKeyframeSkip, mVideoQueue.Finish(); return false; } - nsAutoReleasePacket autoRelease(packet); + nsAutoRef autoRelease(packet); parsed++; NS_ASSERTION(packet && packet->granulepos != -1,