Bug 1168040: Part2. Properly handle MP4 time offset in MoofParser. r=kentuckyfriedtakahe

This commit is contained in:
Jean-Yves Avenard 2015-06-10 19:38:13 +10:00
Родитель 34ab4962db
Коммит 9a7876feb4
4 изменённых файлов: 95 добавлений и 64 удалений

Просмотреть файл

@ -228,7 +228,7 @@ var gPlayTests = [
{ name:"gizmo.mp4", type:"video/mp4", duration:5.56 },
// Test playback of a MP4 file with a non-zero start time (and audio starting
// a second later).
{ name:"bipbop-lateaudio.mp4", type:"video/mp4", duration:2.401 },
{ name:"bipbop-lateaudio.mp4", type:"video/mp4" },
{ name:"small-shot.m4a", type:"audio/mp4", duration:0.29 },
{ name:"small-shot.mp3", type:"audio/mpeg", duration:0.27 },

Просмотреть файл

@ -68,7 +68,8 @@ private:
static inline bool
ConvertIndex(FallibleTArray<Index::Indice>& aDest,
const stagefright::Vector<stagefright::MediaSource::Indice>& aIndex)
const stagefright::Vector<stagefright::MediaSource::Indice>& aIndex,
int64_t aMediaTime)
{
if (!aDest.SetCapacity(aIndex.size())) {
return false;
@ -78,8 +79,8 @@ ConvertIndex(FallibleTArray<Index::Indice>& aDest,
const stagefright::MediaSource::Indice& s_indice = aIndex[i];
indice.start_offset = s_indice.start_offset;
indice.end_offset = s_indice.end_offset;
indice.start_composition = s_indice.start_composition;
indice.end_composition = s_indice.end_composition;
indice.start_composition = s_indice.start_composition - aMediaTime;
indice.end_composition = s_indice.end_composition - aMediaTime;
indice.sync = s_indice.sync;
MOZ_ALWAYS_TRUE(aDest.AppendElement(indice));
}
@ -248,7 +249,13 @@ MP4Metadata::ReadTrackIndex(FallibleTArray<Index::Indice>& aDest, mozilla::Track
if (!track.get() || track->start() != OK) {
return false;
}
bool rv = ConvertIndex(aDest, track->exportIndex());
sp<MetaData> metadata =
mPrivate->mMetadataExtractor->getTrackMetaData(trackNumber);
int64_t mediaTime;
if (!metadata->findInt64(kKeyMediaTime, &mediaTime)) {
mediaTime = 0;
}
bool rv = ConvertIndex(aDest, track->exportIndex(), mediaTime);
track->stop();

Просмотреть файл

@ -48,7 +48,7 @@ MoofParser::RebuildFragmentedIndex(BoxContext& aContext)
mInitRange = MediaByteRange(0, box.Range().mEnd);
ParseMoov(box);
} else if (box.IsType("moof")) {
Moof moof(box, mTrex, mMdhd, mEdts, mSinf, mIsAudio);
Moof moof(box, mTrex, mMvhd, mMdhd, mEdts, mSinf, mIsAudio);
if (!moof.IsValid() && !box.Next().IsAvailable()) {
// Moof isn't valid abort search for now.
@ -171,7 +171,9 @@ void
MoofParser::ParseMoov(Box& aBox)
{
for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
if (box.IsType("trak")) {
if (box.IsType("mvhd")) {
mMvhd = Mvhd(box);
} else if (box.IsType("trak")) {
ParseTrak(box);
} else if (box.IsType("mvex")) {
ParseMvex(box);
@ -190,7 +192,8 @@ MoofParser::ParseTrak(Box& aBox)
if (!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId) {
ParseMdia(box, tkhd);
}
} else if (box.IsType("edts")) {
} else if (box.IsType("edts") &&
(!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId)) {
mEdts = Edts(box);
}
}
@ -268,13 +271,13 @@ MoofParser::ParseEncrypted(Box& aBox)
}
}
Moof::Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio)
Moof::Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio)
: mRange(aBox.Range())
, mMaxRoundingError(35000)
{
for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
if (box.IsType("traf")) {
ParseTraf(box, aTrex, aMdhd, aEdts, aSinf, aIsAudio);
ParseTraf(box, aTrex, aMvhd, aMdhd, aEdts, aSinf, aIsAudio);
}
}
if (IsValid()) {
@ -347,7 +350,7 @@ Moof::ProcessCenc()
}
void
Moof::ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio)
Moof::ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio)
{
Tfhd tfhd(aTrex);
Tfdt tfdt;
@ -375,7 +378,7 @@ Moof::ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, b
uint64_t decodeTime = tfdt.mBaseMediaDecodeTime;
for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) {
if (box.IsType("trun")) {
if (ParseTrun(box, tfhd, aMdhd, aEdts, &decodeTime, aIsAudio)) {
if (ParseTrun(box, tfhd, aMvhd, aMdhd, aEdts, &decodeTime, aIsAudio)) {
mValid = true;
} else {
mValid = false;
@ -408,11 +411,12 @@ public:
};
bool
Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio)
Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio)
{
if (!aTfhd.IsValid() || !aMdhd.IsValid() || !aEdts.IsValid()) {
LOG(Moof, "Invalid dependencies: aTfhd(%d) aMdhd(%d) aEdts(%d)",
aTfhd.IsValid(), aMdhd.IsValid(), !aEdts.IsValid());
if (!aTfhd.IsValid() || !aMvhd.IsValid() || !aMdhd.IsValid() ||
!aEdts.IsValid()) {
LOG(Moof, "Invalid dependencies: aTfhd(%d) aMvhd(%d) aMdhd(%d) aEdts(%d)",
aTfhd.IsValid(), aMvhd.IsValid(), aMdhd.IsValid(), !aEdts.IsValid());
return false;
}
@ -484,8 +488,8 @@ Moof::ParseTrun(Box& aBox, Tfhd& aTfhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDec
sample.mDecodeTime = aMdhd.ToMicroseconds(decodeTime);
sample.mCompositionRange = Interval<Microseconds>(
aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset - aEdts.mMediaStart),
aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset + sampleDuration - aEdts.mMediaStart));
aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset),
aMdhd.ToMicroseconds((int64_t)decodeTime + ctsOffset + sampleDuration - aEdts.mMediaStart) + aMvhd.ToMicroseconds(aEdts.mEmptyOffset));
decodeTime += sampleDuration;
// Sometimes audio streams don't properly mark their samples as keyframes,
@ -551,7 +555,7 @@ Tkhd::Tkhd(Box& aBox)
mValid = true;
}
Mdhd::Mdhd(Box& aBox)
Mvhd::Mvhd(Box& aBox)
{
BoxReader reader(aBox);
if (!reader->CanReadType<uint32_t>()) {
@ -561,9 +565,9 @@ Mdhd::Mdhd(Box& aBox)
uint32_t flags = reader->ReadU32();
uint8_t version = flags >> 24;
size_t need =
3*(version ? sizeof(int64_t) : sizeof(int32_t)) + 2*sizeof(uint32_t);
3*(version ? sizeof(int64_t) : sizeof(int32_t)) + sizeof(uint32_t);
if (reader->Remaining() < need) {
LOG(Mdhd, "Incomplete Box (have:%lld need:%lld)",
LOG(Mvhd, "Incomplete Box (have:%lld need:%lld)",
(uint64_t)reader->Remaining(), (uint64_t)need);
return;
}
@ -578,12 +582,18 @@ Mdhd::Mdhd(Box& aBox)
mModificationTime = reader->ReadU64();
mTimescale = reader->ReadU32();
mDuration = reader->ReadU64();
} else {
reader->DiscardRemaining();
return;
}
// language and pre_defined=0
reader->ReadU32();
if (mTimescale) {
mValid = true;
}
// More stuff that we don't care about
reader->DiscardRemaining();
mValid = true;
}
Mdhd::Mdhd(Box& aBox)
: Mvhd(aBox)
{
}
Trex::Trex(Box& aBox)
@ -672,6 +682,7 @@ Tfdt::Tfdt(Box& aBox)
Edts::Edts(Box& aBox)
: mMediaStart(0)
, mEmptyOffset(0)
{
Box child = aBox.FirstChild();
if (!child.IsType("elst")) {
@ -692,22 +703,31 @@ Edts::Edts(Box& aBox)
(uint64_t)reader->Remaining(), (uint64_t)need);
return;
}
bool emptyEntry = false;
uint32_t entryCount = reader->ReadU32();
NS_ASSERTION(entryCount == 1, "Can't handle videos with multiple edits");
if (entryCount != 1) {
reader->DiscardRemaining();
return;
for (uint32_t i = 0; i < entryCount; i++) {
uint64_t segment_duration;
int64_t media_time;
if (version == 1) {
segment_duration = reader->ReadU64();
media_time = reader->Read64();
} else {
segment_duration = reader->ReadU32();
media_time = reader->Read32();
}
if (media_time == -1 && i) {
LOG(Edts, "Multiple empty edit, not handled");
} else if (media_time == -1) {
mEmptyOffset = segment_duration;
emptyEntry = true;
} else if (i > 1 || (i > 0 && !emptyEntry)) {
LOG(Edts, "More than one edit entry, not handled. A/V sync will be wrong");
break;
} else {
mMediaStart = media_time;
}
reader->ReadU32(); // media_rate_integer and media_rate_fraction
}
uint64_t segment_duration;
if (version == 1) {
segment_duration = reader->ReadU64();
mMediaStart = reader->Read64();
} else {
segment_duration = reader->ReadU32();
mMediaStart = reader->Read32();
}
reader->DiscardRemaining();
}
Saiz::Saiz(Box& aBox, AtomType aDefaultType)

Просмотреть файл

@ -18,35 +18,17 @@ class Box;
class BoxContext;
class Moof;
class Tkhd : public Atom
class Mvhd : public Atom
{
public:
Tkhd()
: mCreationTime(0)
, mModificationTime(0)
, mTrackId(0)
, mDuration(0)
{
}
explicit Tkhd(Box& aBox);
uint64_t mCreationTime;
uint64_t mModificationTime;
uint32_t mTrackId;
uint64_t mDuration;
};
class Mdhd : public Atom
{
public:
Mdhd()
Mvhd()
: mCreationTime(0)
, mModificationTime(0)
, mTimescale(0)
, mDuration(0)
{
}
explicit Mdhd(Box& aBox);
explicit Mvhd(Box& aBox);
Microseconds ToMicroseconds(int64_t aTimescaleUnits)
{
@ -59,6 +41,25 @@ public:
uint64_t mDuration;
};
class Tkhd : public Mvhd
{
public:
Tkhd()
: mTrackId(0)
{
}
explicit Tkhd(Box& aBox);
uint32_t mTrackId;
};
class Mdhd : public Mvhd
{
public:
Mdhd() = default;
explicit Mdhd(Box& aBox);
};
class Trex : public Atom
{
public:
@ -113,6 +114,7 @@ class Edts : public Atom
public:
Edts()
: mMediaStart(0)
, mEmptyOffset(0)
{
}
explicit Edts(Box& aBox);
@ -123,6 +125,7 @@ public:
}
int64_t mMediaStart;
int64_t mEmptyOffset;
};
struct Sample
@ -168,7 +171,7 @@ private:
class Moof : public Atom
{
public:
Moof(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio);
Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio);
bool GetAuxInfo(AtomType aType, nsTArray<MediaByteRange>* aByteRanges);
void FixRounding(const Moof& aMoof);
@ -181,9 +184,9 @@ public:
nsTArray<Saio> mSaios;
private:
void ParseTraf(Box& aBox, Trex& aTrex, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio);
void ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, bool aIsAudio);
// aDecodeTime is updated to the end of the parsed TRUN on return.
bool ParseTrun(Box& aBox, Tfhd& aTfhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio);
bool ParseTrun(Box& aBox, Tfhd& aTfhd, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, bool aIsAudio);
void ParseSaiz(Box& aBox);
void ParseSaio(Box& aBox);
bool ProcessCenc();
@ -227,6 +230,7 @@ public:
nsRefPtr<Stream> mSource;
uint64_t mOffset;
nsTArray<uint64_t> mMoofOffsets;
Mvhd mMvhd;
Mdhd mMdhd;
Trex mTrex;
Tfdt mTfdt;