зеркало из https://github.com/mozilla/gecko-dev.git
365 строки
10 KiB
C++
365 строки
10 KiB
C++
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#ifndef MOOF_PARSER_H_
|
|
#define MOOF_PARSER_H_
|
|
|
|
#include "mozilla/ResultExtensions.h"
|
|
#include "mozilla/Variant.h"
|
|
#include "Atom.h"
|
|
#include "AtomType.h"
|
|
#include "SinfParser.h"
|
|
#include "ByteStream.h"
|
|
#include "MP4Interval.h"
|
|
#include "MediaResource.h"
|
|
|
|
namespace mozilla {
|
|
|
|
typedef int64_t Microseconds;
|
|
|
|
class Box;
|
|
class BoxContext;
|
|
class BoxReader;
|
|
class Moof;
|
|
|
|
// Used to track the CTS end time of the last sample of a track
|
|
// in the preceeding Moof, so that we can smooth tracks' timestamps
|
|
// across Moofs.
|
|
struct TrackEndCts {
|
|
TrackEndCts(uint32_t aTrackId, Microseconds aCtsEndTime)
|
|
: mTrackId(aTrackId), mCtsEndTime(aCtsEndTime) {}
|
|
uint32_t mTrackId;
|
|
Microseconds mCtsEndTime;
|
|
};
|
|
|
|
class Mvhd : public Atom {
|
|
public:
|
|
Mvhd()
|
|
: mCreationTime(0), mModificationTime(0), mTimescale(0), mDuration(0) {}
|
|
explicit Mvhd(Box& aBox);
|
|
|
|
Result<Microseconds, nsresult> ToMicroseconds(int64_t aTimescaleUnits) {
|
|
if (!mTimescale) {
|
|
NS_WARNING("invalid mTimescale");
|
|
return Err(NS_ERROR_FAILURE);
|
|
}
|
|
int64_t major = aTimescaleUnits / mTimescale;
|
|
int64_t remainder = aTimescaleUnits % mTimescale;
|
|
return major * 1000000ll + remainder * 1000000ll / mTimescale;
|
|
}
|
|
|
|
uint64_t mCreationTime;
|
|
uint64_t mModificationTime;
|
|
uint32_t mTimescale;
|
|
uint64_t mDuration;
|
|
|
|
protected:
|
|
Result<Ok, nsresult> Parse(Box& aBox);
|
|
};
|
|
|
|
class Tkhd : public Mvhd {
|
|
public:
|
|
Tkhd() : mTrackId(0) {}
|
|
explicit Tkhd(Box& aBox);
|
|
|
|
uint32_t mTrackId;
|
|
|
|
protected:
|
|
Result<Ok, nsresult> Parse(Box& aBox);
|
|
};
|
|
|
|
class Mdhd : public Mvhd {
|
|
public:
|
|
Mdhd() = default;
|
|
explicit Mdhd(Box& aBox);
|
|
};
|
|
|
|
class Trex : public Atom {
|
|
public:
|
|
explicit Trex(uint32_t aTrackId)
|
|
: mFlags(0),
|
|
mTrackId(aTrackId),
|
|
mDefaultSampleDescriptionIndex(0),
|
|
mDefaultSampleDuration(0),
|
|
mDefaultSampleSize(0),
|
|
mDefaultSampleFlags(0) {}
|
|
|
|
explicit Trex(Box& aBox);
|
|
|
|
uint32_t mFlags;
|
|
uint32_t mTrackId;
|
|
uint32_t mDefaultSampleDescriptionIndex;
|
|
uint32_t mDefaultSampleDuration;
|
|
uint32_t mDefaultSampleSize;
|
|
uint32_t mDefaultSampleFlags;
|
|
|
|
protected:
|
|
Result<Ok, nsresult> Parse(Box& aBox);
|
|
};
|
|
|
|
class Tfhd : public Trex {
|
|
public:
|
|
explicit Tfhd(Trex& aTrex) : Trex(aTrex), mBaseDataOffset(0) {
|
|
mValid = aTrex.IsValid();
|
|
}
|
|
Tfhd(Box& aBox, Trex& aTrex);
|
|
|
|
uint64_t mBaseDataOffset;
|
|
|
|
protected:
|
|
Result<Ok, nsresult> Parse(Box& aBox);
|
|
};
|
|
|
|
class Tfdt : public Atom {
|
|
public:
|
|
Tfdt() : mBaseMediaDecodeTime(0) {}
|
|
explicit Tfdt(Box& aBox);
|
|
|
|
uint64_t mBaseMediaDecodeTime;
|
|
|
|
protected:
|
|
Result<Ok, nsresult> Parse(Box& aBox);
|
|
};
|
|
|
|
class Edts : public Atom {
|
|
public:
|
|
Edts() : mMediaStart(0), mEmptyOffset(0) {}
|
|
explicit Edts(Box& aBox);
|
|
virtual bool IsValid() override {
|
|
// edts is optional
|
|
return true;
|
|
}
|
|
|
|
int64_t mMediaStart;
|
|
int64_t mEmptyOffset;
|
|
|
|
protected:
|
|
Result<Ok, nsresult> Parse(Box& aBox);
|
|
};
|
|
|
|
struct Sample {
|
|
mozilla::MediaByteRange mByteRange;
|
|
mozilla::MediaByteRange mCencRange;
|
|
Microseconds mDecodeTime;
|
|
MP4Interval<Microseconds> mCompositionRange;
|
|
bool mSync;
|
|
};
|
|
|
|
class Saiz final : public Atom {
|
|
public:
|
|
Saiz(Box& aBox, AtomType aDefaultType);
|
|
|
|
AtomType mAuxInfoType;
|
|
uint32_t mAuxInfoTypeParameter;
|
|
FallibleTArray<uint8_t> mSampleInfoSize;
|
|
|
|
protected:
|
|
Result<Ok, nsresult> Parse(Box& aBox);
|
|
};
|
|
|
|
class Saio final : public Atom {
|
|
public:
|
|
Saio(Box& aBox, AtomType aDefaultType);
|
|
|
|
AtomType mAuxInfoType;
|
|
uint32_t mAuxInfoTypeParameter;
|
|
FallibleTArray<uint64_t> mOffsets;
|
|
|
|
protected:
|
|
Result<Ok, nsresult> Parse(Box& aBox);
|
|
};
|
|
|
|
struct SampleToGroupEntry {
|
|
public:
|
|
static const uint32_t kTrackGroupDescriptionIndexBase = 0;
|
|
static const uint32_t kFragmentGroupDescriptionIndexBase = 0x10000;
|
|
|
|
SampleToGroupEntry(uint32_t aSampleCount, uint32_t aGroupDescriptionIndex)
|
|
: mSampleCount(aSampleCount),
|
|
mGroupDescriptionIndex(aGroupDescriptionIndex) {}
|
|
|
|
uint32_t mSampleCount;
|
|
uint32_t mGroupDescriptionIndex;
|
|
};
|
|
|
|
class Sbgp final : public Atom // SampleToGroup box.
|
|
{
|
|
public:
|
|
explicit Sbgp(Box& aBox);
|
|
|
|
AtomType mGroupingType;
|
|
uint32_t mGroupingTypeParam;
|
|
FallibleTArray<SampleToGroupEntry> mEntries;
|
|
|
|
protected:
|
|
Result<Ok, nsresult> Parse(Box& aBox);
|
|
};
|
|
|
|
// Stores information form CencSampleEncryptionInformationGroupEntry (seig).
|
|
// Cenc here refers to the common encryption standard, rather than the specific
|
|
// cenc scheme from that standard. This structure is used for all encryption
|
|
// schemes. I.e. it is used for both cenc and cbcs, not just cenc.
|
|
struct CencSampleEncryptionInfoEntry final {
|
|
public:
|
|
CencSampleEncryptionInfoEntry() {}
|
|
|
|
Result<Ok, nsresult> Init(BoxReader& aReader);
|
|
|
|
bool mIsEncrypted = false;
|
|
uint8_t mIVSize = 0;
|
|
nsTArray<uint8_t> mKeyId;
|
|
uint8_t mCryptByteBlock = 0;
|
|
uint8_t mSkipByteBlock = 0;
|
|
nsTArray<uint8_t> mConsantIV;
|
|
};
|
|
|
|
class Sgpd final : public Atom // SampleGroupDescription box.
|
|
{
|
|
public:
|
|
explicit Sgpd(Box& aBox);
|
|
|
|
AtomType mGroupingType;
|
|
FallibleTArray<CencSampleEncryptionInfoEntry> mEntries;
|
|
|
|
protected:
|
|
Result<Ok, nsresult> Parse(Box& aBox);
|
|
};
|
|
|
|
// Audio/video entries from the sample description box (stsd). We only need to
|
|
// store if these are encrypted, so do not need a specialized class for
|
|
// different audio and video data. Currently most of the parsing of these
|
|
// entries is by the mp4parse-rust, but moof pasrser needs to know which of
|
|
// these are encrypted when parsing the track fragment header (tfhd).
|
|
struct SampleDescriptionEntry {
|
|
bool mIsEncryptedEntry = false;
|
|
};
|
|
|
|
// Used to indicate in variants if all tracks should be parsed.
|
|
struct ParseAllTracks {};
|
|
|
|
typedef Variant<ParseAllTracks, uint32_t> TrackParseMode;
|
|
|
|
class Moof final : public Atom {
|
|
public:
|
|
Moof(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
|
|
Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
|
|
uint64_t* aDecodeTime, bool aIsAudio,
|
|
nsTArray<TrackEndCts>& aTracksEndCts);
|
|
bool GetAuxInfo(AtomType aType, FallibleTArray<MediaByteRange>* aByteRanges);
|
|
void FixRounding(const Moof& aMoof);
|
|
|
|
mozilla::MediaByteRange mRange;
|
|
mozilla::MediaByteRange mMdatRange;
|
|
MP4Interval<Microseconds> mTimeRange;
|
|
FallibleTArray<Sample> mIndex;
|
|
|
|
FallibleTArray<CencSampleEncryptionInfoEntry>
|
|
mFragmentSampleEncryptionInfoEntries;
|
|
FallibleTArray<SampleToGroupEntry> mFragmentSampleToGroupEntries;
|
|
|
|
Tfhd mTfhd;
|
|
FallibleTArray<Saiz> mSaizs;
|
|
FallibleTArray<Saio> mSaios;
|
|
nsTArray<nsTArray<uint8_t>> mPsshes;
|
|
|
|
private:
|
|
// aDecodeTime is updated to the end of the parsed TRAF on return.
|
|
void ParseTraf(Box& aBox, const TrackParseMode& aTrackParseMode, Trex& aTrex,
|
|
Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf,
|
|
uint64_t* aDecodeTime, bool aIsAudio);
|
|
// aDecodeTime is updated to the end of the parsed TRUN on return.
|
|
Result<Ok, nsresult> ParseTrun(Box& aBox, Mvhd& aMvhd, Mdhd& aMdhd,
|
|
Edts& aEdts, uint64_t* aDecodeTime,
|
|
bool aIsAudio);
|
|
// Process the sample auxiliary information used by common encryption.
|
|
// aScheme is used to select the appropriate auxiliary information and should
|
|
// be set based on the encryption scheme used by the track being processed.
|
|
// Note, the term cenc here refers to the standard, not the specific scheme
|
|
// from that standard. I.e. this function is used to handle up auxiliary
|
|
// information from the cenc and cbcs schemes.
|
|
bool ProcessCencAuxInfo(AtomType aScheme);
|
|
uint64_t mMaxRoundingError;
|
|
};
|
|
|
|
DDLoggedTypeDeclName(MoofParser);
|
|
|
|
class MoofParser : public DecoderDoctorLifeLogger<MoofParser> {
|
|
public:
|
|
MoofParser(ByteStream* aSource, const TrackParseMode& aTrackParseMode,
|
|
bool aIsAudio)
|
|
: mSource(aSource),
|
|
mOffset(0),
|
|
mTrex(aTrackParseMode.is<uint32_t>() ? aTrackParseMode.as<uint32_t>()
|
|
: 0),
|
|
mIsAudio(aIsAudio),
|
|
mLastDecodeTime(0),
|
|
mTrackParseMode(aTrackParseMode) {
|
|
// Setting mIsMultitrackParser is a nasty work around for calculating
|
|
// the composition range for MSE that causes the parser to parse multiple
|
|
// tracks. Ideally we'd store an array of tracks with different metadata
|
|
// for each.
|
|
DDLINKCHILD("source", aSource);
|
|
}
|
|
bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges);
|
|
// If *aCanEvict is set to true. then will remove all moofs already parsed
|
|
// from index then rebuild the index. *aCanEvict is set to true upon return if
|
|
// some moofs were removed.
|
|
bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges,
|
|
bool* aCanEvict);
|
|
bool RebuildFragmentedIndex(BoxContext& aContext);
|
|
MP4Interval<Microseconds> GetCompositionRange(
|
|
const mozilla::MediaByteRangeSet& aByteRanges);
|
|
bool ReachedEnd();
|
|
void ParseMoov(Box& aBox);
|
|
void ParseTrak(Box& aBox);
|
|
void ParseMdia(Box& aBox);
|
|
void ParseMvex(Box& aBox);
|
|
|
|
void ParseMinf(Box& aBox);
|
|
void ParseStbl(Box& aBox);
|
|
void ParseStsd(Box& aBox);
|
|
void ParseEncrypted(Box& aBox);
|
|
|
|
bool BlockingReadNextMoof();
|
|
|
|
already_AddRefed<mozilla::MediaByteBuffer> Metadata();
|
|
MediaByteRange FirstCompleteMediaSegment();
|
|
MediaByteRange FirstCompleteMediaHeader();
|
|
|
|
mozilla::MediaByteRange mInitRange;
|
|
RefPtr<ByteStream> mSource;
|
|
uint64_t mOffset;
|
|
Mvhd mMvhd;
|
|
Mdhd mMdhd;
|
|
Trex mTrex;
|
|
Tfdt mTfdt;
|
|
Edts mEdts;
|
|
Sinf mSinf;
|
|
|
|
FallibleTArray<CencSampleEncryptionInfoEntry>
|
|
mTrackSampleEncryptionInfoEntries;
|
|
FallibleTArray<SampleToGroupEntry> mTrackSampleToGroupEntries;
|
|
FallibleTArray<SampleDescriptionEntry> mSampleDescriptions;
|
|
|
|
nsTArray<Moof>& Moofs() { return mMoofs; }
|
|
|
|
private:
|
|
void ScanForMetadata(mozilla::MediaByteRange& aMoov);
|
|
nsTArray<Moof> mMoofs;
|
|
nsTArray<MediaByteRange> mMediaRanges;
|
|
nsTArray<TrackEndCts> mTracksEndCts;
|
|
bool mIsAudio;
|
|
uint64_t mLastDecodeTime;
|
|
// Either a ParseAllTracks if in multitrack mode, or an integer representing
|
|
// the track_id for the track being parsed. If parsing a specific track, mTrex
|
|
// should have an id matching mTrackParseMode.as<uint32_t>(). In this case 0
|
|
// is a valid track id -- this is not allowed in the spec, but such mp4s
|
|
// appear in the wild. In the ParseAllTracks case, mTrex can have an arbitrary
|
|
// id based on the tracks being parsed.
|
|
const TrackParseMode mTrackParseMode;
|
|
};
|
|
} // namespace mozilla
|
|
|
|
#endif
|