Bug 1168040: Part2. Properly handle MP4 time offset in MoofParser. r=kentuckyfriedtakahe

This commit is contained in:
Jean-Yves Avenard 2015-06-10 19:38:13 +10:00
Родитель 34ab4962db
Коммит 9a7876feb4
4 изменённых файлов: 95 добавлений и 64 удалений

Просмотреть файл

@ -228,7 +228,7 @@ var gPlayTests = [
{ name:"gizmo.mp4", type:"video/mp4", duration:5.56 }, { name:"gizmo.mp4", type:"video/mp4", duration:5.56 },
// Test playback of a MP4 file with a non-zero start time (and audio starting // Test playback of a MP4 file with a non-zero start time (and audio starting
// a second later). // a second later).
{ name:"bipbop-lateaudio.mp4", type:"video/mp4", duration:2.401 }, { name:"bipbop-lateaudio.mp4", type:"video/mp4" },
{ name:"small-shot.m4a", type:"audio/mp4", duration:0.29 }, { name:"small-shot.m4a", type:"audio/mp4", duration:0.29 },
{ name:"small-shot.mp3", type:"audio/mpeg", duration:0.27 }, { name:"small-shot.mp3", type:"audio/mpeg", duration:0.27 },

Просмотреть файл

@ -68,7 +68,8 @@ private:
static inline bool static inline bool
ConvertIndex(FallibleTArray<Index::Indice>& aDest, ConvertIndex(FallibleTArray<Index::Indice>& aDest,
const stagefright::Vector<stagefright::MediaSource::Indice>& aIndex) const stagefright::Vector<stagefright::MediaSource::Indice>& aIndex,
int64_t aMediaTime)
{ {
if (!aDest.SetCapacity(aIndex.size())) { if (!aDest.SetCapacity(aIndex.size())) {
return false; return false;
@ -78,8 +79,8 @@ ConvertIndex(FallibleTArray<Index::Indice>& aDest,
const stagefright::MediaSource::Indice& s_indice = aIndex[i]; const stagefright::MediaSource::Indice& s_indice = aIndex[i];
indice.start_offset = s_indice.start_offset; indice.start_offset = s_indice.start_offset;
indice.end_offset = s_indice.end_offset; indice.end_offset = s_indice.end_offset;
indice.start_composition = s_indice.start_composition; indice.start_composition = s_indice.start_composition - aMediaTime;
indice.end_composition = s_indice.end_composition; indice.end_composition = s_indice.end_composition - aMediaTime;
indice.sync = s_indice.sync; indice.sync = s_indice.sync;
MOZ_ALWAYS_TRUE(aDest.AppendElement(indice)); MOZ_ALWAYS_TRUE(aDest.AppendElement(indice));
} }
@ -248,7 +249,13 @@ MP4Metadata::ReadTrackIndex(FallibleTArray<Index::Indice>& aDest, mozilla::Track
if (!track.get() || track->start() != OK) { if (!track.get() || track->start() != OK) {
return false; return false;
} }
bool rv = ConvertIndex(aDest, track->exportIndex()); sp<MetaData> metadata =
mPrivate->mMetadataExtractor->getTrackMetaData(trackNumber);
int64_t mediaTime;
if (!metadata->findInt64(kKeyMediaTime, &mediaTime)) {
mediaTime = 0;
}
bool rv = ConvertIndex(aDest, track->exportIndex(), mediaTime);
track->stop(); track->stop();

Просмотреть файл

@ -48,7 +48,7 @@ MoofParser::RebuildFragmentedIndex(BoxContext& aContext)
mInitRange = MediaByteRange(0, box.Range().mEnd); mInitRange = MediaByteRange(0, box.Range().mEnd);
ParseMoov(box); ParseMoov(box);
} else if (box.IsType("moof")) { } else if (box.IsType("moof")) {
Moof moof(box, mTrex, mMdhd, mEdts, mSinf, mIsAudio); Moof moof(box, mTrex, mMvhd, mMdhd, mEdts, mSinf, mIsAudio);
if (!moof.IsValid() && !box.Next().IsAvailable()) { if (!moof.IsValid() && !box.Next().IsAvailable()) {
// Moof isn't valid abort search for now. // Moof isn't valid abort search for now.
@ -171,7 +171,9 @@ void
MoofParser::ParseMoov(Box& aBox) MoofParser::ParseMoov(Box& aBox)
{ {
for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
if (box.IsType("trak")) { if (box.IsType("mvhd")) {
mMvhd = Mvhd(box);
} else if (box.IsType("trak")) {
ParseTrak(box); ParseTrak(box);
} else if (box.IsType("mvex")) { } else if (box.IsType("mvex")) {
ParseMvex(box); ParseMvex(box);
@ -190,7 +192,8 @@ MoofParser::ParseTrak(Box& aBox)
if (!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId) { if (!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId) {
ParseMdia(box, tkhd); ParseMdia(box, tkhd);
} }
} else if (box.IsType("edts")) { } else if (box.IsType("edts") &&
(!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId)) {
mEdts = Edts(box); mEdts = Edts(box);
} }
} }
@ -268,13 +271,13 @@ MoofParser::ParseEncrypted(Box& aBox)
} }
} }
Moof::Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio) Moof::Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio)
: mRange(aBox.Range()) : mRange(aBox.Range())
, mMaxRoundingError(35000) , mMaxRoundingError(35000)
{ {
for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
if (box.IsType("traf")) { if (box.IsType("traf")) {
ParseTraf(box, aTrex, aMdhd, aEdts, aSinf, aIsAudio); ParseTraf(box, aTrex, aMvhd, aMdhd, aEdts, aSinf, aIsAudio);
} }
} }
if (IsValid()) { if (IsValid()) {
@ -347,7 +350,7 @@ Moof::ProcessCenc()
} }
void void
Moof::ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio) Moof::ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio)
{ {
Tfhd tfhd(aTrex); Tfhd tfhd(aTrex);
Tfdt tfdt; Tfdt tfdt;
@ -375,7 +378,7 @@ Moof::ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, b
uint64_t decodeTime = tfdt.mBaseMediaDecodeTime; uint64_t decodeTime = tfdt.mBaseMediaDecodeTime;
for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
if (box.IsType("trun")) { if (box.IsType("trun")) {
if (ParseTrun(box, tfhd, aMdhd, aEdts, &decodeTime, aIsAudio)) { if (ParseTrun(box, tfhd, aMvhd, aMdhd, aEdts, &decodeTime, aIsAudio)) {
mValid = true; mValid = true;
} else { } else {
mValid = false; mValid = false;
@ -408,11 +411,12 @@ public:
}; };
bool bool
Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio) Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio)
{ {
if (!aTfhd.IsValid() || !aMdhd.IsValid() || !aEdts.IsValid()) { if (!aTfhd.IsValid() || !aMvhd.IsValid() || !aMdhd.IsValid() ||
LOG(Moof, "Invalid dependencies: aTfhd(%d) aMdhd(%d) aEdts(%d)", !aEdts.IsValid()) {
aTfhd.IsValid(), aMdhd.IsValid(), !aEdts.IsValid()); LOG(Moof, "Invalid dependencies: aTfhd(%d) aMvhd(%d) aMdhd(%d) aEdts(%d)",
aTfhd.IsValid(), aMvhd.IsValid(), aMdhd.IsValid(), !aEdts.IsValid());
return false; return false;
} }
@ -484,8 +488,8 @@ Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDec
sample.mDecodeTime = aMdhd.ToMicroseconds(decodeTime); sample.mDecodeTime = aMdhd.ToMicroseconds(decodeTime);
sample.mCompositionRange = Interval<Microseconds>( sample.mCompositionRange = Interval<Microseconds>(
aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset - aEdts.mMediaStart), aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset),
aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset + sampleDuration - aEdts.mMediaStart)); aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset + sampleDuration - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset));
decodeTime += sampleDuration; decodeTime += sampleDuration;
// Sometimes audio streams don't properly mark their samples as keyframes, // Sometimes audio streams don't properly mark their samples as keyframes,
@ -551,7 +555,7 @@ Tkhd::Tkhd(Box& aBox)
mValid = true; mValid = true;
} }
Mdhd::Mdhd(Box& aBox) Mvhd::Mvhd(Box& aBox)
{ {
BoxReader reader(aBox); BoxReader reader(aBox);
if (!reader->CanReadType<uint32_t>()) { if (!reader->CanReadType<uint32_t>()) {
@ -561,9 +565,9 @@ Mdhd::Mdhd(Box& aBox)
uint32_t flags = reader->ReadU32(); uint32_t flags = reader->ReadU32();
uint8_t version = flags >> 24; uint8_t version = flags >> 24;
size_t need = size_t need =
3*(version ? sizeof(int64_t) : sizeof(int32_t)) + 2*sizeof(uint32_t); 3*(version ? sizeof(int64_t) : sizeof(int32_t)) + sizeof(uint32_t);
if (reader->Remaining() < need) { if (reader->Remaining() < need) {
LOG(Mdhd, "Incomplete Box (have:%lld need:%lld)", LOG(Mvhd, "Incomplete Box (have:%lld need:%lld)",
(uint64_t)reader->Remaining(), (uint64_t)need); (uint64_t)reader->Remaining(), (uint64_t)need);
return; return;
} }
@ -578,12 +582,18 @@ Mdhd::Mdhd(Box& aBox)
mModificationTime = reader->ReadU64(); mModificationTime = reader->ReadU64();
mTimescale = reader->ReadU32(); mTimescale = reader->ReadU32();
mDuration = reader->ReadU64(); mDuration = reader->ReadU64();
} else {
reader->DiscardRemaining();
return;
} }
// language and pre_defined=0 // More stuff that we don't care about
reader->ReadU32(); reader->DiscardRemaining();
if (mTimescale) { mValid = true;
mValid = true; }
}
Mdhd::Mdhd(Box& aBox)
: Mvhd(aBox)
{
} }
Trex::Trex(Box& aBox) Trex::Trex(Box& aBox)
@ -672,6 +682,7 @@ Tfdt::Tfdt(Box& aBox)
Edts::Edts(Box& aBox) Edts::Edts(Box& aBox)
: mMediaStart(0) : mMediaStart(0)
, mEmptyOffset(0)
{ {
Box child = aBox.FirstChild(); Box child = aBox.FirstChild();
if (!child.IsType("elst")) { if (!child.IsType("elst")) {
@ -692,22 +703,31 @@ Edts::Edts(Box& aBox)
(uint64_t)reader->Remaining(), (uint64_t)need); (uint64_t)reader->Remaining(), (uint64_t)need);
return; return;
} }
bool emptyEntry = false;
uint32_t entryCount = reader->ReadU32(); uint32_t entryCount = reader->ReadU32();
NS_ASSERTION(entryCount == 1, "Can't handle videos with multiple edits"); for (uint32_t i = 0; i < entryCount; i++) {
if (entryCount != 1) { uint64_t segment_duration;
reader->DiscardRemaining(); int64_t media_time;
return; if (version == 1) {
segment_duration = reader->ReadU64();
media_time = reader->Read64();
} else {
segment_duration = reader->ReadU32();
media_time = reader->Read32();
}
if (media_time == -1 && i) {
LOG(Edts, "Multiple empty edit, not handled");
} else if (media_time == -1) {
mEmptyOffset = segment_duration;
emptyEntry = true;
} else if (i > 1 || (i > 0 && !emptyEntry)) {
LOG(Edts, "More than one edit entry, not handled. A/V sync will be wrong");
break;
} else {
mMediaStart = media_time;
}
reader->ReadU32(); // media_rate_integer and media_rate_fraction
} }
uint64_t segment_duration;
if (version == 1) {
segment_duration = reader->ReadU64();
mMediaStart = reader->Read64();
} else {
segment_duration = reader->ReadU32();
mMediaStart = reader->Read32();
}
reader->DiscardRemaining();
} }
Saiz::Saiz(Box& aBox, AtomType aDefaultType) Saiz::Saiz(Box& aBox, AtomType aDefaultType)

Просмотреть файл

@ -18,35 +18,17 @@ class Box;
class BoxContext; class BoxContext;
class Moof; class Moof;
class Tkhd : public Atom class Mvhd : public Atom
{ {
public: public:
Tkhd() Mvhd()
: mCreationTime(0)
, mModificationTime(0)
, mTrackId(0)
, mDuration(0)
{
}
explicit Tkhd(Box& aBox);
uint64_t mCreationTime;
uint64_t mModificationTime;
uint32_t mTrackId;
uint64_t mDuration;
};
class Mdhd : public Atom
{
public:
Mdhd()
: mCreationTime(0) : mCreationTime(0)
, mModificationTime(0) , mModificationTime(0)
, mTimescale(0) , mTimescale(0)
, mDuration(0) , mDuration(0)
{ {
} }
explicit Mdhd(Box& aBox); explicit Mvhd(Box& aBox);
Microseconds ToMicroseconds(int64_t aTimescaleUnits) Microseconds ToMicroseconds(int64_t aTimescaleUnits)
{ {
@ -59,6 +41,25 @@ public:
uint64_t mDuration; uint64_t mDuration;
}; };
class Tkhd : public Mvhd
{
public:
Tkhd()
: mTrackId(0)
{
}
explicit Tkhd(Box& aBox);
uint32_t mTrackId;
};
class Mdhd : public Mvhd
{
public:
Mdhd() = default;
explicit Mdhd(Box& aBox);
};
class Trex : public Atom class Trex : public Atom
{ {
public: public:
@ -113,6 +114,7 @@ class Edts : public Atom
public: public:
Edts() Edts()
: mMediaStart(0) : mMediaStart(0)
, mEmptyOffset(0)
{ {
} }
explicit Edts(Box& aBox); explicit Edts(Box& aBox);
@ -123,6 +125,7 @@ public:
} }
int64_t mMediaStart; int64_t mMediaStart;
int64_t mEmptyOffset;
}; };
struct Sample struct Sample
@ -168,7 +171,7 @@ private:
class Moof : public Atom class Moof : public Atom
{ {
public: public:
Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio); Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio);
bool GetAuxInfo(AtomType aType, nsTArray<MediaByteRange>* aByteRanges); bool GetAuxInfo(AtomType aType, nsTArray<MediaByteRange>* aByteRanges);
void FixRounding(const Moof& aMoof); void FixRounding(const Moof& aMoof);
@ -181,9 +184,9 @@ public:
nsTArray<Saio> mSaios; nsTArray<Saio> mSaios;
private: private:
void ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio); void ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio);
// aDecodeTime is updated to the end of the parsed TRUN on return. // aDecodeTime is updated to the end of the parsed TRUN on return.
bool ParseTrun(Box& aBox, Tfhd& aTfhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio); bool ParseTrun(Box& aBox, Tfhd& aTfhd, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio);
void ParseSaiz(Box& aBox); void ParseSaiz(Box& aBox);
void ParseSaio(Box& aBox); void ParseSaio(Box& aBox);
bool ProcessCenc(); bool ProcessCenc();
@ -227,6 +230,7 @@ public:
nsRefPtr<Stream> mSource; nsRefPtr<Stream> mSource;
uint64_t mOffset; uint64_t mOffset;
nsTArray<uint64_t> mMoofOffsets; nsTArray<uint64_t> mMoofOffsets;
Mvhd mMvhd;
Mdhd mMdhd; Mdhd mMdhd;
Trex mTrex; Trex mTrex;
Tfdt mTfdt; Tfdt mTfdt;