/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim:set ts=2 sw=2 sts=2 et cindent: */ /* ***** BEGIN LICENSE BLOCK ***** * Version: ML 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Mozilla code. * * The Initial Developer of the Original Code is the Mozilla Corporation. * Portions created by the Initial Developer are Copyright (C) 2010 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Chris Double * Chris Pearce * * Alternatively, the contents of this file may be used under the terms of * either the GNU General Public License Version 2 or later (the "GPL"), or * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */ #if !defined(nsOggCodecState_h_) #define nsOggCodecState_h_ #include #include #ifdef MOZ_TREMOR #include #else #include #endif #ifdef MOZ_OPUS #include #endif #include #include #include #include "VideoUtils.h" #include "mozilla/StandardInteger.h" // Uncomment the following to validate that we're predicting the number // of Vorbis samples in each packet correctly. #define VALIDATE_VORBIS_SAMPLE_CALCULATION #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION #include #endif // Deallocates a packet, used in nsPacketQueue below. class OggPacketDeallocator : public nsDequeFunctor { virtual void* operator() (void* aPacket) { ogg_packet* p = static_cast(aPacket); delete [] p->packet; delete p; return nsnull; } }; // A queue of ogg_packets. When we read a page, we extract the page's packets // and buffer them in the owning stream's nsOggCodecState. This is because // if we're skipping up to the next keyframe in very large frame sized videos, // there may be several megabytes of data between keyframes, and the // ogg_stream_state would end up resizing its buffer every time we added a // new 4KB page to the bitstream, which kills performance on Windows. This // also gives us the option to timestamp packets rather than decoded // frames/samples, reducing the amount of frames/samples we must decode to // determine start-time at a particular offset, and gives us finer control // over memory usage. class nsPacketQueue : private nsDeque { public: nsPacketQueue() : nsDeque(new OggPacketDeallocator()) {} ~nsPacketQueue() { Erase(); } bool IsEmpty() { return nsDeque::GetSize() == 0; } void Append(ogg_packet* aPacket); ogg_packet* PopFront() { return static_cast(nsDeque::PopFront()); } ogg_packet* PeekFront() { return static_cast(nsDeque::PeekFront()); } void PushFront(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); } void PushBack(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); } void Erase() { nsDeque::Erase(); } }; // Encapsulates the data required for decoding an ogg bitstream and for // converting granulepos to timestamps. class nsOggCodecState { public: // Ogg types we know about enum CodecType { TYPE_VORBIS=0, TYPE_THEORA=1, TYPE_OPUS=2, TYPE_SKELETON=3, TYPE_UNKNOWN=4 }; virtual ~nsOggCodecState(); // Factory for creating nsCodecStates. Use instead of constructor. // aPage should be a beginning-of-stream page. static nsOggCodecState* Create(ogg_page* aPage); virtual CodecType GetType() { return TYPE_UNKNOWN; } // Reads a header packet. Returns true when last header has been read. virtual bool DecodeHeader(ogg_packet* aPacket) { return (mDoneReadingHeaders = true); } // Returns the end time that a granulepos represents. virtual PRInt64 Time(PRInt64 granulepos) { return -1; } // Returns the start time that a granulepos represents. virtual PRInt64 StartTime(PRInt64 granulepos) { return -1; } // Initializes the codec state. virtual bool Init(); // Returns true when this bitstream has finished reading all its // header packets. bool DoneReadingHeaders() { return mDoneReadingHeaders; } // Deactivates the bitstream. Only the primary video and audio bitstreams // should be active. void Deactivate() { mActive = false; mDoneReadingHeaders = true; Reset(); } // Resets decoding state. virtual nsresult Reset(); // Returns true if the nsOggCodecState thinks this packet is a header // packet. Note this does not verify the validity of the header packet, // it just guarantees that the packet is marked as a header packet (i.e. // it is definintely not a data packet). Do not use this to identify // streams, use it to filter header packets from data packets while // decoding. virtual bool IsHeader(ogg_packet* aPacket) { return false; } // Returns the next packet in the stream, or nsnull if there are no more // packets buffered in the packet queue. More packets can be buffered by // inserting one or more pages into the stream by calling PageIn(). The // caller is responsible for deleting returned packet's using // nsOggCodecState::ReleasePacket(). The packet will have a valid granulepos. ogg_packet* PacketOut(); // Releases the memory used by a cloned packet. Every packet returned by // PacketOut() must be free'd using this function. static void ReleasePacket(ogg_packet* aPacket); // Extracts all packets from the page, and inserts them into the packet // queue. They can be extracted by calling PacketOut(). Packets from an // inactive stream are not buffered, i.e. this call has no effect for // inactive streams. Multiple pages may need to be inserted before // PacketOut() starts to return packets, as granulepos may need to be // captured. virtual nsresult PageIn(ogg_page* aPage); // Number of packets read. PRUint64 mPacketCount; // Serial number of the bitstream. PRUint32 mSerial; // Ogg specific state. ogg_stream_state mState; // Queue of as yet undecoded packets. Packets are guaranteed to have // a valid granulepos. nsPacketQueue mPackets; // Is the bitstream active; whether we're decoding and playing this bitstream. bool mActive; // True when all headers packets have been read. bool mDoneReadingHeaders; protected: // Constructs a new nsOggCodecState. aActive denotes whether the stream is // active. For streams of unsupported or unknown types, aActive should be // false. nsOggCodecState(ogg_page* aBosPage, bool aActive); // Deallocates all packets stored in mUnstamped, and clears the array. void ClearUnstamped(); // Extracts packets out of mState until a data packet with a non -1 // granulepos is encountered, or no more packets are readable. Header // packets are pushed into the packet queue immediately, and data packets // are buffered in mUnstamped. Once a non -1 granulepos packet is read // the granulepos of the packets in mUnstamped can be inferred, and they // can be pushed over to mPackets. Used by PageIn() implementations in // subclasses. nsresult PacketOutUntilGranulepos(bool& aFoundGranulepos); // Temporary buffer in which to store packets while we're reading packets // in order to capture granulepos. nsTArray mUnstamped; }; class nsVorbisState : public nsOggCodecState { public: nsVorbisState(ogg_page* aBosPage); virtual ~nsVorbisState(); CodecType GetType() { return TYPE_VORBIS; } bool DecodeHeader(ogg_packet* aPacket); PRInt64 Time(PRInt64 granulepos); bool Init(); nsresult Reset(); bool IsHeader(ogg_packet* aPacket); nsresult PageIn(ogg_page* aPage); // Returns the end time that a granulepos represents. static PRInt64 Time(vorbis_info* aInfo, PRInt64 aGranulePos); vorbis_info mInfo; vorbis_comment mComment; vorbis_dsp_state mDsp; vorbis_block mBlock; private: // Reconstructs the granulepos of Vorbis packets stored in the mUnstamped // array. nsresult ReconstructVorbisGranulepos(); // The "block size" of the previously decoded Vorbis packet, or 0 if we've // not yet decoded anything. This is used to calculate the number of samples // in a Vorbis packet, since each Vorbis packet depends on the previous // packet while being decoded. long mPrevVorbisBlockSize; // Granulepos (end sample) of the last decoded Vorbis packet. This is used // to calculate the Vorbis granulepos when we don't find a granulepos to // back-propagate from. PRInt64 mGranulepos; #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION // When validating that we've correctly predicted Vorbis packets' number // of samples, we store each packet's predicted number of samples in this // map, and verify we decode the predicted number of samples. std::map mVorbisPacketSamples; #endif // Records that aPacket is predicted to have aSamples samples. // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION // is not defined. void RecordVorbisPacketSamples(ogg_packet* aPacket, long aSamples); // Verifies that aPacket has had its number of samples predicted. // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION // is not defined. void AssertHasRecordedPacketSamples(ogg_packet* aPacket); public: // Asserts that the number of samples predicted for aPacket is aSamples. // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION // is not defined. void ValidateVorbisPacketSamples(ogg_packet* aPacket, long aSamples); }; // Returns 1 if the Theora info struct is decoding a media of Theora // version (maj,min,sub) or later, otherwise returns 0. int TheoraVersion(th_info* info, unsigned char maj, unsigned char min, unsigned char sub); class nsTheoraState : public nsOggCodecState { public: nsTheoraState(ogg_page* aBosPage); virtual ~nsTheoraState(); CodecType GetType() { return TYPE_THEORA; } bool DecodeHeader(ogg_packet* aPacket); PRInt64 Time(PRInt64 granulepos); PRInt64 StartTime(PRInt64 granulepos); bool Init(); bool IsHeader(ogg_packet* aPacket); nsresult PageIn(ogg_page* aPage); // Returns the maximum number of microseconds which a keyframe can be offset // from any given interframe. PRInt64 MaxKeyframeOffset(); // Returns the end time that a granulepos represents. static PRInt64 Time(th_info* aInfo, PRInt64 aGranulePos); th_info mInfo; th_comment mComment; th_setup_info *mSetup; th_dec_ctx* mCtx; float mPixelAspectRatio; private: // Reconstructs the granulepos of Theora packets stored in the // mUnstamped array. mUnstamped must be filled with consecutive packets from // the stream, with the last packet having a known granulepos. Using this // known granulepos, and the known frame numbers, we recover the granulepos // of all frames in the array. This enables us to determine their timestamps. void ReconstructTheoraGranulepos(); }; class nsOpusState : public nsOggCodecState { #ifdef MOZ_OPUS public: nsOpusState(ogg_page* aBosPage); virtual ~nsOpusState(); CodecType GetType() { return TYPE_OPUS; } bool DecodeHeader(ogg_packet* aPacket); PRInt64 Time(PRInt64 granulepos); bool Init(); nsresult Reset(); bool IsHeader(ogg_packet* aPacket); nsresult PageIn(ogg_page* aPage); // Various fields from the Ogg Opus header. int mRate; // Sample rate the decoder uses (always 48 kHz). int mNominalRate; // Original sample rate of the data (informational). int mChannels; // Number of channels the stream encodes. int mPreSkip; // Number of samples to strip after decoder reset. float mGain; // Gain (dB) to apply to decoder output. int mChannelMapping; // Channel mapping family. int mStreams; // Number of packed streams in each packet. OpusDecoder *mDecoder; private: // Reconstructs the granulepos of Opus packets stored in the // mUnstamped array. mUnstamped must be filled with consecutive packets from // the stream, with the last packet having a known granulepos. Using this // known granulepos, and the known frame numbers, we recover the granulepos // of all frames in the array. This enables us to determine their timestamps. void ReconstructGranulepos(); #endif /* MOZ_OPUS */ }; // Constructs a 32bit version number out of two 16 bit major,minor // version numbers. #define SKELETON_VERSION(major, minor) (((major)<<16)|(minor)) class nsSkeletonState : public nsOggCodecState { public: nsSkeletonState(ogg_page* aBosPage); ~nsSkeletonState(); CodecType GetType() { return TYPE_SKELETON; } bool DecodeHeader(ogg_packet* aPacket); PRInt64 Time(PRInt64 granulepos) { return -1; } bool Init() { return true; } bool IsHeader(ogg_packet* aPacket) { return true; } // Return true if the given time (in milliseconds) is within // the presentation time defined in the skeleton track. bool IsPresentable(PRInt64 aTime) { return aTime >= mPresentationTime; } // Stores the offset of the page on which a keyframe starts, // and its presentation time. class nsKeyPoint { public: nsKeyPoint() : mOffset(INT64_MAX), mTime(INT64_MAX) {} nsKeyPoint(PRInt64 aOffset, PRInt64 aTime) : mOffset(aOffset), mTime(aTime) {} // Offset from start of segment/link-in-the-chain in bytes. PRInt64 mOffset; // Presentation time in usecs. PRInt64 mTime; bool IsNull() { return mOffset == INT64_MAX && mTime == INT64_MAX; } }; // Stores a keyframe's byte-offset, presentation time and the serialno // of the stream it belongs to. class nsSeekTarget { public: nsSeekTarget() : mSerial(0) {} nsKeyPoint mKeyPoint; PRUint32 mSerial; bool IsNull() { return mKeyPoint.IsNull() && mSerial == 0; } }; // Determines from the seek index the keyframe which you must seek back to // in order to get all keyframes required to render all streams with // serialnos in aTracks, at time aTarget. nsresult IndexedSeekTarget(PRInt64 aTarget, nsTArray& aTracks, nsSeekTarget& aResult); bool HasIndex() const { return mIndex.IsInitialized() && mIndex.Count() > 0; } // Returns the duration of the active tracks in the media, if we have // an index. aTracks must be filled with the serialnos of the active tracks. // The duration is calculated as the greatest end time of all active tracks, // minus the smalled start time of all the active tracks. nsresult GetDuration(const nsTArray& aTracks, PRInt64& aDuration); private: // Decodes an index packet. Returns false on failure. bool DecodeIndex(ogg_packet* aPacket); // Gets the keypoint you must seek to in order to get the keyframe required // to render the stream at time aTarget on stream with serial aSerialno. nsresult IndexedSeekTargetForTrack(PRUint32 aSerialno, PRInt64 aTarget, nsKeyPoint& aResult); // Version of the decoded skeleton track, as per the SKELETON_VERSION macro. PRUint32 mVersion; // Presentation time of the resource in milliseconds PRInt64 mPresentationTime; // Length of the resource in bytes. PRInt64 mLength; // Stores the keyframe index and duration information for a particular // stream. class nsKeyFrameIndex { public: nsKeyFrameIndex(PRInt64 aStartTime, PRInt64 aEndTime) : mStartTime(aStartTime), mEndTime(aEndTime) { MOZ_COUNT_CTOR(nsKeyFrameIndex); } ~nsKeyFrameIndex() { MOZ_COUNT_DTOR(nsKeyFrameIndex); } void Add(PRInt64 aOffset, PRInt64 aTimeMs) { mKeyPoints.AppendElement(nsKeyPoint(aOffset, aTimeMs)); } const nsKeyPoint& Get(PRUint32 aIndex) const { return mKeyPoints[aIndex]; } PRUint32 Length() const { return mKeyPoints.Length(); } // Presentation time of the first sample in this stream in usecs. const PRInt64 mStartTime; // End time of the last sample in this stream in usecs. const PRInt64 mEndTime; private: nsTArray mKeyPoints; }; // Maps Ogg serialnos to the index-keypoint list. nsClassHashtable mIndex; }; #endif