зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1168040: Part2. Properly handle MP4 time offset in MoofParser. r=kentuckyfriedtakahe
This commit is contained in:
Родитель
34ab4962db
Коммит
9a7876feb4
|
@ -228,7 +228,7 @@ var gPlayTests = [
|
|||
{ name:"gizmo.mp4", type:"video/mp4", duration:5.56 },
|
||||
// Test playback of a MP4 file with a non-zero start time (and audio starting
|
||||
// a second later).
|
||||
{ name:"bipbop-lateaudio.mp4", type:"video/mp4", duration:2.401 },
|
||||
{ name:"bipbop-lateaudio.mp4", type:"video/mp4" },
|
||||
|
||||
{ name:"small-shot.m4a", type:"audio/mp4", duration:0.29 },
|
||||
{ name:"small-shot.mp3", type:"audio/mpeg", duration:0.27 },
|
||||
|
|
|
@ -68,7 +68,8 @@ private:
|
|||
|
||||
static inline bool
|
||||
ConvertIndex(FallibleTArray<Index::Indice>& aDest,
|
||||
const stagefright::Vector<stagefright::MediaSource::Indice>& aIndex)
|
||||
const stagefright::Vector<stagefright::MediaSource::Indice>& aIndex,
|
||||
int64_t aMediaTime)
|
||||
{
|
||||
if (!aDest.SetCapacity(aIndex.size())) {
|
||||
return false;
|
||||
|
@ -78,8 +79,8 @@ ConvertIndex(FallibleTArray<Index::Indice>& aDest,
|
|||
const stagefright::MediaSource::Indice& s_indice = aIndex[i];
|
||||
indice.start_offset = s_indice.start_offset;
|
||||
indice.end_offset = s_indice.end_offset;
|
||||
indice.start_composition = s_indice.start_composition;
|
||||
indice.end_composition = s_indice.end_composition;
|
||||
indice.start_composition = s_indice.start_composition - aMediaTime;
|
||||
indice.end_composition = s_indice.end_composition - aMediaTime;
|
||||
indice.sync = s_indice.sync;
|
||||
MOZ_ALWAYS_TRUE(aDest.AppendElement(indice));
|
||||
}
|
||||
|
@ -248,7 +249,13 @@ MP4Metadata::ReadTrackIndex(FallibleTArray<Index::Indice>& aDest, mozilla::Track
|
|||
if (!track.get() || track->start() != OK) {
|
||||
return false;
|
||||
}
|
||||
bool rv = ConvertIndex(aDest, track->exportIndex());
|
||||
sp<MetaData> metadata =
|
||||
mPrivate->mMetadataExtractor->getTrackMetaData(trackNumber);
|
||||
int64_t mediaTime;
|
||||
if (!metadata->findInt64(kKeyMediaTime, &mediaTime)) {
|
||||
mediaTime = 0;
|
||||
}
|
||||
bool rv = ConvertIndex(aDest, track->exportIndex(), mediaTime);
|
||||
|
||||
track->stop();
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ MoofParser::RebuildFragmentedIndex(BoxContext& aContext)
|
|||
mInitRange = MediaByteRange(0, box.Range().mEnd);
|
||||
ParseMoov(box);
|
||||
} else if (box.IsType("moof")) {
|
||||
Moof moof(box, mTrex, mMdhd, mEdts, mSinf, mIsAudio);
|
||||
Moof moof(box, mTrex, mMvhd, mMdhd, mEdts, mSinf, mIsAudio);
|
||||
|
||||
if (!moof.IsValid() && !box.Next().IsAvailable()) {
|
||||
// Moof isn't valid abort search for now.
|
||||
|
@ -171,7 +171,9 @@ void
|
|||
MoofParser::ParseMoov(Box& aBox)
|
||||
{
|
||||
for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
|
||||
if (box.IsType("trak")) {
|
||||
if (box.IsType("mvhd")) {
|
||||
mMvhd = Mvhd(box);
|
||||
} else if (box.IsType("trak")) {
|
||||
ParseTrak(box);
|
||||
} else if (box.IsType("mvex")) {
|
||||
ParseMvex(box);
|
||||
|
@ -190,7 +192,8 @@ MoofParser::ParseTrak(Box& aBox)
|
|||
if (!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId) {
|
||||
ParseMdia(box, tkhd);
|
||||
}
|
||||
} else if (box.IsType("edts")) {
|
||||
} else if (box.IsType("edts") &&
|
||||
(!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId)) {
|
||||
mEdts = Edts(box);
|
||||
}
|
||||
}
|
||||
|
@ -268,13 +271,13 @@ MoofParser::ParseEncrypted(Box& aBox)
|
|||
}
|
||||
}
|
||||
|
||||
Moof::Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio)
|
||||
Moof::Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio)
|
||||
: mRange(aBox.Range())
|
||||
, mMaxRoundingError(35000)
|
||||
{
|
||||
for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
|
||||
if (box.IsType("traf")) {
|
||||
ParseTraf(box, aTrex, aMdhd, aEdts, aSinf, aIsAudio);
|
||||
ParseTraf(box, aTrex, aMvhd, aMdhd, aEdts, aSinf, aIsAudio);
|
||||
}
|
||||
}
|
||||
if (IsValid()) {
|
||||
|
@ -347,7 +350,7 @@ Moof::ProcessCenc()
|
|||
}
|
||||
|
||||
void
|
||||
Moof::ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio)
|
||||
Moof::ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio)
|
||||
{
|
||||
Tfhd tfhd(aTrex);
|
||||
Tfdt tfdt;
|
||||
|
@ -375,7 +378,7 @@ Moof::ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, b
|
|||
uint64_t decodeTime = tfdt.mBaseMediaDecodeTime;
|
||||
for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
|
||||
if (box.IsType("trun")) {
|
||||
if (ParseTrun(box, tfhd, aMdhd, aEdts, &decodeTime, aIsAudio)) {
|
||||
if (ParseTrun(box, tfhd, aMvhd, aMdhd, aEdts, &decodeTime, aIsAudio)) {
|
||||
mValid = true;
|
||||
} else {
|
||||
mValid = false;
|
||||
|
@ -408,11 +411,12 @@ public:
|
|||
};
|
||||
|
||||
bool
|
||||
Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio)
|
||||
Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio)
|
||||
{
|
||||
if (!aTfhd.IsValid() || !aMdhd.IsValid() || !aEdts.IsValid()) {
|
||||
LOG(Moof, "Invalid dependencies: aTfhd(%d) aMdhd(%d) aEdts(%d)",
|
||||
aTfhd.IsValid(), aMdhd.IsValid(), !aEdts.IsValid());
|
||||
if (!aTfhd.IsValid() || !aMvhd.IsValid() || !aMdhd.IsValid() ||
|
||||
!aEdts.IsValid()) {
|
||||
LOG(Moof, "Invalid dependencies: aTfhd(%d) aMvhd(%d) aMdhd(%d) aEdts(%d)",
|
||||
aTfhd.IsValid(), aMvhd.IsValid(), aMdhd.IsValid(), !aEdts.IsValid());
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -484,8 +488,8 @@ Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDec
|
|||
|
||||
sample.mDecodeTime = aMdhd.ToMicroseconds(decodeTime);
|
||||
sample.mCompositionRange = Interval<Microseconds>(
|
||||
aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset - aEdts.mMediaStart),
|
||||
aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset + sampleDuration - aEdts.mMediaStart));
|
||||
aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset),
|
||||
aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset + sampleDuration - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset));
|
||||
decodeTime += sampleDuration;
|
||||
|
||||
// Sometimes audio streams don't properly mark their samples as keyframes,
|
||||
|
@ -551,7 +555,7 @@ Tkhd::Tkhd(Box& aBox)
|
|||
mValid = true;
|
||||
}
|
||||
|
||||
Mdhd::Mdhd(Box& aBox)
|
||||
Mvhd::Mvhd(Box& aBox)
|
||||
{
|
||||
BoxReader reader(aBox);
|
||||
if (!reader->CanReadType<uint32_t>()) {
|
||||
|
@ -561,9 +565,9 @@ Mdhd::Mdhd(Box& aBox)
|
|||
uint32_t flags = reader->ReadU32();
|
||||
uint8_t version = flags >> 24;
|
||||
size_t need =
|
||||
3*(version ? sizeof(int64_t) : sizeof(int32_t)) + 2*sizeof(uint32_t);
|
||||
3*(version ? sizeof(int64_t) : sizeof(int32_t)) + sizeof(uint32_t);
|
||||
if (reader->Remaining() < need) {
|
||||
LOG(Mdhd, "Incomplete Box (have:%lld need:%lld)",
|
||||
LOG(Mvhd, "Incomplete Box (have:%lld need:%lld)",
|
||||
(uint64_t)reader->Remaining(), (uint64_t)need);
|
||||
return;
|
||||
}
|
||||
|
@ -578,12 +582,18 @@ Mdhd::Mdhd(Box& aBox)
|
|||
mModificationTime = reader->ReadU64();
|
||||
mTimescale = reader->ReadU32();
|
||||
mDuration = reader->ReadU64();
|
||||
} else {
|
||||
reader->DiscardRemaining();
|
||||
return;
|
||||
}
|
||||
// language and pre_defined=0
|
||||
reader->ReadU32();
|
||||
if (mTimescale) {
|
||||
mValid = true;
|
||||
}
|
||||
// More stuff that we don't care about
|
||||
reader->DiscardRemaining();
|
||||
mValid = true;
|
||||
}
|
||||
|
||||
Mdhd::Mdhd(Box& aBox)
|
||||
: Mvhd(aBox)
|
||||
{
|
||||
}
|
||||
|
||||
Trex::Trex(Box& aBox)
|
||||
|
@ -672,6 +682,7 @@ Tfdt::Tfdt(Box& aBox)
|
|||
|
||||
Edts::Edts(Box& aBox)
|
||||
: mMediaStart(0)
|
||||
, mEmptyOffset(0)
|
||||
{
|
||||
Box child = aBox.FirstChild();
|
||||
if (!child.IsType("elst")) {
|
||||
|
@ -692,22 +703,31 @@ Edts::Edts(Box& aBox)
|
|||
(uint64_t)reader->Remaining(), (uint64_t)need);
|
||||
return;
|
||||
}
|
||||
bool emptyEntry = false;
|
||||
uint32_t entryCount = reader->ReadU32();
|
||||
NS_ASSERTION(entryCount == 1, "Can't handle videos with multiple edits");
|
||||
if (entryCount != 1) {
|
||||
reader->DiscardRemaining();
|
||||
return;
|
||||
for (uint32_t i = 0; i < entryCount; i++) {
|
||||
uint64_t segment_duration;
|
||||
int64_t media_time;
|
||||
if (version == 1) {
|
||||
segment_duration = reader->ReadU64();
|
||||
media_time = reader->Read64();
|
||||
} else {
|
||||
segment_duration = reader->ReadU32();
|
||||
media_time = reader->Read32();
|
||||
}
|
||||
if (media_time == -1 && i) {
|
||||
LOG(Edts, "Multiple empty edit, not handled");
|
||||
} else if (media_time == -1) {
|
||||
mEmptyOffset = segment_duration;
|
||||
emptyEntry = true;
|
||||
} else if (i > 1 || (i > 0 && !emptyEntry)) {
|
||||
LOG(Edts, "More than one edit entry, not handled. A/V sync will be wrong");
|
||||
break;
|
||||
} else {
|
||||
mMediaStart = media_time;
|
||||
}
|
||||
reader->ReadU32(); // media_rate_integer and media_rate_fraction
|
||||
}
|
||||
|
||||
uint64_t segment_duration;
|
||||
if (version == 1) {
|
||||
segment_duration = reader->ReadU64();
|
||||
mMediaStart = reader->Read64();
|
||||
} else {
|
||||
segment_duration = reader->ReadU32();
|
||||
mMediaStart = reader->Read32();
|
||||
}
|
||||
reader->DiscardRemaining();
|
||||
}
|
||||
|
||||
Saiz::Saiz(Box& aBox, AtomType aDefaultType)
|
||||
|
|
|
@ -18,35 +18,17 @@ class Box;
|
|||
class BoxContext;
|
||||
class Moof;
|
||||
|
||||
class Tkhd : public Atom
|
||||
class Mvhd : public Atom
|
||||
{
|
||||
public:
|
||||
Tkhd()
|
||||
: mCreationTime(0)
|
||||
, mModificationTime(0)
|
||||
, mTrackId(0)
|
||||
, mDuration(0)
|
||||
{
|
||||
}
|
||||
explicit Tkhd(Box& aBox);
|
||||
|
||||
uint64_t mCreationTime;
|
||||
uint64_t mModificationTime;
|
||||
uint32_t mTrackId;
|
||||
uint64_t mDuration;
|
||||
};
|
||||
|
||||
class Mdhd : public Atom
|
||||
{
|
||||
public:
|
||||
Mdhd()
|
||||
Mvhd()
|
||||
: mCreationTime(0)
|
||||
, mModificationTime(0)
|
||||
, mTimescale(0)
|
||||
, mDuration(0)
|
||||
{
|
||||
}
|
||||
explicit Mdhd(Box& aBox);
|
||||
explicit Mvhd(Box& aBox);
|
||||
|
||||
Microseconds ToMicroseconds(int64_t aTimescaleUnits)
|
||||
{
|
||||
|
@ -59,6 +41,25 @@ public:
|
|||
uint64_t mDuration;
|
||||
};
|
||||
|
||||
class Tkhd : public Mvhd
|
||||
{
|
||||
public:
|
||||
Tkhd()
|
||||
: mTrackId(0)
|
||||
{
|
||||
}
|
||||
explicit Tkhd(Box& aBox);
|
||||
|
||||
uint32_t mTrackId;
|
||||
};
|
||||
|
||||
class Mdhd : public Mvhd
|
||||
{
|
||||
public:
|
||||
Mdhd() = default;
|
||||
explicit Mdhd(Box& aBox);
|
||||
};
|
||||
|
||||
class Trex : public Atom
|
||||
{
|
||||
public:
|
||||
|
@ -113,6 +114,7 @@ class Edts : public Atom
|
|||
public:
|
||||
Edts()
|
||||
: mMediaStart(0)
|
||||
, mEmptyOffset(0)
|
||||
{
|
||||
}
|
||||
explicit Edts(Box& aBox);
|
||||
|
@ -123,6 +125,7 @@ public:
|
|||
}
|
||||
|
||||
int64_t mMediaStart;
|
||||
int64_t mEmptyOffset;
|
||||
};
|
||||
|
||||
struct Sample
|
||||
|
@ -168,7 +171,7 @@ private:
|
|||
class Moof : public Atom
|
||||
{
|
||||
public:
|
||||
Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio);
|
||||
Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio);
|
||||
bool GetAuxInfo(AtomType aType, nsTArray<MediaByteRange>* aByteRanges);
|
||||
void FixRounding(const Moof& aMoof);
|
||||
|
||||
|
@ -181,9 +184,9 @@ public:
|
|||
nsTArray<Saio> mSaios;
|
||||
|
||||
private:
|
||||
void ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio);
|
||||
void ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio);
|
||||
// aDecodeTime is updated to the end of the parsed TRUN on return.
|
||||
bool ParseTrun(Box& aBox, Tfhd& aTfhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio);
|
||||
bool ParseTrun(Box& aBox, Tfhd& aTfhd, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio);
|
||||
void ParseSaiz(Box& aBox);
|
||||
void ParseSaio(Box& aBox);
|
||||
bool ProcessCenc();
|
||||
|
@ -227,6 +230,7 @@ public:
|
|||
nsRefPtr<Stream> mSource;
|
||||
uint64_t mOffset;
|
||||
nsTArray<uint64_t> mMoofOffsets;
|
||||
Mvhd mMvhd;
|
||||
Mdhd mMdhd;
|
||||
Trex mTrex;
|
||||
Tfdt mTfdt;
|
||||
|
|
Загрузка…
Ссылка в новой задаче