gecko-dev/dom/media/mp4/MoofParser.h

365 строки
10 KiB
C++

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef MOOF_PARSER_H_
#define MOOF_PARSER_H_
#include "mozilla/ResultExtensions.h"
#include "mozilla/Variant.h"
#include "Atom.h"
#include "AtomType.h"
#include "SinfParser.h"
#include "ByteStream.h"
#include "MP4Interval.h"
#include "MediaResource.h"
namespace mozilla {
typedef int64_t Microseconds;
class Box;
class BoxContext;
class BoxReader;
class Moof;
// Used to track the CTS end time of the last sample of a track
// in the preceeding Moof, so that we can smooth tracks' timestamps
// across Moofs.
struct TrackEndCts {
TrackEndCts(uint32_t aTrackId, Microseconds aCtsEndTime)
: mTrackId(aTrackId), mCtsEndTime(aCtsEndTime) {}
uint32_t mTrackId;
Microseconds mCtsEndTime;
};
class Mvhd : public Atom {
public:
Mvhd()
: mCreationTime(0), mModificationTime(0), mTimescale(0), mDuration(0) {}
explicit Mvhd(Box& aBox);
Result<Microseconds, nsresult> ToMicroseconds(int64_t aTimescaleUnits) {
if (!mTimescale) {
NS_WARNING("invalid mTimescale");
return Err(NS_ERROR_FAILURE);
}
int64_t major = aTimescaleUnits / mTimescale;
int64_t remainder = aTimescaleUnits % mTimescale;
return major * 1000000ll + remainder * 1000000ll / mTimescale;
}
uint64_t mCreationTime;
uint64_t mModificationTime;
uint32_t mTimescale;
uint64_t mDuration;
protected:
Result<Ok, nsresult> Parse(Box& aBox);
};
class Tkhd : public Mvhd {
public:
Tkhd() : mTrackId(0) {}
explicit Tkhd(Box& aBox);
uint32_t mTrackId;
protected:
Result<Ok, nsresult> Parse(Box& aBox);
};
class Mdhd : public Mvhd {
public:
Mdhd() = default;
explicit Mdhd(Box& aBox);
};
class Trex : public Atom {
public:
explicit Trex(uint32_t aTrackId)
: mFlags(0),
mTrackId(aTrackId),
mDefaultSampleDescriptionIndex(0),
mDefaultSampleDuration(0),
mDefaultSampleSize(0),
mDefaultSampleFlags(0) {}
explicit Trex(Box& aBox);
uint32_t mFlags;
uint32_t mTrackId;
uint32_t mDefaultSampleDescriptionIndex;
uint32_t mDefaultSampleDuration;
uint32_t mDefaultSampleSize;
uint32_t mDefaultSampleFlags;
protected:
Result<Ok, nsresult> Parse(Box& aBox);
};
class Tfhd : public Trex {
public:
explicit Tfhd(Trex& aTrex) : Trex(aTrex), mBaseDataOffset(0) {
mValid = aTrex.IsValid();
}
Tfhd(Box& aBox, Trex& aTrex);
uint64_t mBaseDataOffset;
protected:
Result<Ok, nsresult> Parse(Box& aBox);
};
class Tfdt : public Atom {
public:
Tfdt() : mBaseMediaDecodeTime(0) {}
explicit Tfdt(Box& aBox);
uint64_t mBaseMediaDecodeTime;
protected:
Result<Ok, nsresult> Parse(Box& aBox);
};
class Edts : public Atom {
public:
Edts() : mMediaStart(0), mEmptyOffset(0) {}
explicit Edts(Box& aBox);
virtual bool IsValid() override {
// edts is optional
return true;
}
int64_t mMediaStart;
int64_t mEmptyOffset;
protected:
Result<Ok, nsresult> Parse(Box& aBox);
};
struct Sample {
mozilla::MediaByteRange mByteRange;
mozilla::MediaByteRange mCencRange;
Microseconds mDecodeTime;
MP4Interval<Microseconds> mCompositionRange;
bool mSync;
};
class Saiz final : public Atom {
public:
Saiz(Box& aBox, AtomType aDefaultType);
AtomType mAuxInfoType;
uint32_t mAuxInfoTypeParameter;
FallibleTArray<uint8_t> mSampleInfoSize;
protected:
Result<Ok, nsresult> Parse(Box& aBox);
};
class Saio final : public Atom {
public:
Saio(Box& aBox, AtomType aDefaultType);
AtomType mAuxInfoType;
uint32_t mAuxInfoTypeParameter;
FallibleTArray<uint64_t> mOffsets;
protected:
Result<Ok, nsresult> Parse(Box& aBox);
};
struct SampleToGroupEntry {
public:
static const uint32_t kTrackGroupDescriptionIndexBase = 0;
static const uint32_t kFragmentGroupDescriptionIndexBase = 0x10000;
SampleToGroupEntry(uint32_t aSampleCount, uint32_t aGroupDescriptionIndex)
: mSampleCount(aSampleCount),
mGroupDescriptionIndex(aGroupDescriptionIndex) {}
uint32_t mSampleCount;
uint32_t mGroupDescriptionIndex;
};
class Sbgp final : public Atom // SampleToGroup box.
{
public:
explicit Sbgp(Box& aBox);
AtomType mGroupingType;
uint32_t mGroupingTypeParam;
FallibleTArray<SampleToGroupEntry> mEntries;
protected:
Result<Ok, nsresult> Parse(Box& aBox);
};
// Stores information form CencSampleEncryptionInformationGroupEntry (seig).
// Cenc here refers to the common encryption standard, rather than the specific
// cenc scheme from that standard. This structure is used for all encryption
// schemes. I.e. it is used for both cenc and cbcs, not just cenc.
struct CencSampleEncryptionInfoEntry final {
public:
CencSampleEncryptionInfoEntry() {}
Result<Ok, nsresult> Init(BoxReader& aReader);
bool mIsEncrypted = false;
uint8_t mIVSize = 0;
nsTArray<uint8_t> mKeyId;
uint8_t mCryptByteBlock = 0;
uint8_t mSkipByteBlock = 0;
nsTArray<uint8_t> mConsantIV;
};
class Sgpd final : public Atom // SampleGroupDescription box.
{
public:
explicit Sgpd(Box& aBox);
AtomType mGroupingType;
FallibleTArray<CencSampleEncryptionInfoEntry> mEntries;
protected:
Result<Ok, nsresult> Parse(Box& aBox);
};
// Audio/video entries from the sample description box (stsd). We only need to
// store if these are encrypted, so do not need a specialized class for
// different audio and video data. Currently most of the parsing of these
// entries is by the mp4parse-rust, but moof pasrser needs to know which of
// these are encrypted when parsing the track fragment header (tfhd).
struct SampleDescriptionEntry {
bool mIsEncryptedEntry = false;
};
// Used to indicate in variants if all tracks should be parsed.
struct ParseAllTracks {};
typedef Variant<ParseAllTracks, uint32_t> TrackParseMode;
class Moof final : public Atom {
public:
Moof(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
uint64_t* aDecodeTime, bool aIsAudio,
nsTArray<TrackEndCts>& aTracksEndCts);
bool GetAuxInfo(AtomType aType, FallibleTArray<MediaByteRange>* aByteRanges);
void FixRounding(const Moof& aMoof);
mozilla::MediaByteRange mRange;
mozilla::MediaByteRange mMdatRange;
MP4Interval<Microseconds> mTimeRange;
FallibleTArray<Sample> mIndex;
FallibleTArray<CencSampleEncryptionInfoEntry>
mFragmentSampleEncryptionInfoEntries;
FallibleTArray<SampleToGroupEntry> mFragmentSampleToGroupEntries;
Tfhd mTfhd;
FallibleTArray<Saiz> mSaizs;
FallibleTArray<Saio> mSaios;
nsTArray<nsTArray<uint8_t>> mPsshes;
private:
// aDecodeTime is updated to the end of the parsed TRAF on return.
void ParseTraf(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
uint64_t* aDecodeTime, bool aIsAudio);
// aDecodeTime is updated to the end of the parsed TRUN on return.
Result<Ok, nsresult> ParseTrun(Box& aBox, Mvhd& aMvhd, Mdhd& aMdhd,
Edts& aEdts, uint64_t* aDecodeTime,
bool aIsAudio);
// Process the sample auxiliary information used by common encryption.
// aScheme is used to select the appropriate auxiliary information and should
// be set based on the encryption scheme used by the track being processed.
// Note, the term cenc here refers to the standard, not the specific scheme
// from that standard. I.e. this function is used to handle up auxiliary
// information from the cenc and cbcs schemes.
bool ProcessCencAuxInfo(AtomType aScheme);
uint64_t mMaxRoundingError;
};
DDLoggedTypeDeclName(MoofParser);
class MoofParser : public DecoderDoctorLifeLogger<MoofParser> {
public:
MoofParser(ByteStream* aSource, const TrackParseMode& aTrackParseMode,
bool aIsAudio)
: mSource(aSource),
mOffset(0),
mTrex(aTrackParseMode.is<uint32_t>() ? aTrackParseMode.as<uint32_t>()
: 0),
mIsAudio(aIsAudio),
mLastDecodeTime(0),
mTrackParseMode(aTrackParseMode) {
// Setting mIsMultitrackParser is a nasty work around for calculating
// the composition range for MSE that causes the parser to parse multiple
// tracks. Ideally we'd store an array of tracks with different metadata
// for each.
DDLINKCHILD("source", aSource);
}
bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges);
// If *aCanEvict is set to true. then will remove all moofs already parsed
// from index then rebuild the index. *aCanEvict is set to true upon return if
// some moofs were removed.
bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges,
bool* aCanEvict);
bool RebuildFragmentedIndex(BoxContext& aContext);
MP4Interval<Microseconds> GetCompositionRange(
const mozilla::MediaByteRangeSet& aByteRanges);
bool ReachedEnd();
void ParseMoov(Box& aBox);
void ParseTrak(Box& aBox);
void ParseMdia(Box& aBox);
void ParseMvex(Box& aBox);
void ParseMinf(Box& aBox);
void ParseStbl(Box& aBox);
void ParseStsd(Box& aBox);
void ParseEncrypted(Box& aBox);
bool BlockingReadNextMoof();
already_AddRefed<mozilla::MediaByteBuffer> Metadata();
MediaByteRange FirstCompleteMediaSegment();
MediaByteRange FirstCompleteMediaHeader();
mozilla::MediaByteRange mInitRange;
RefPtr<ByteStream> mSource;
uint64_t mOffset;
Mvhd mMvhd;
Mdhd mMdhd;
Trex mTrex;
Tfdt mTfdt;
Edts mEdts;
Sinf mSinf;
FallibleTArray<CencSampleEncryptionInfoEntry>
mTrackSampleEncryptionInfoEntries;
FallibleTArray<SampleToGroupEntry> mTrackSampleToGroupEntries;
FallibleTArray<SampleDescriptionEntry> mSampleDescriptions;
nsTArray<Moof>& Moofs() { return mMoofs; }
private:
void ScanForMetadata(mozilla::MediaByteRange& aMoov);
nsTArray<Moof> mMoofs;
nsTArray<MediaByteRange> mMediaRanges;
nsTArray<TrackEndCts> mTracksEndCts;
bool mIsAudio;
uint64_t mLastDecodeTime;
// Either a ParseAllTracks if in multitrack mode, or an integer representing
// the track_id for the track being parsed. If parsing a specific track, mTrex
// should have an id matching mTrackParseMode.as<uint32_t>(). In this case 0
// is a valid track id -- this is not allowed in the spec, but such mp4s
// appear in the wild. In the ParseAllTracks case, mTrex can have an arbitrary
// id based on the tracks being parsed.
const TrackParseMode mTrackParseMode;
};
} // namespace mozilla
#endif