Bug 519897 - Supported indexed Ogg files. r=doublec

This commit is contained in:
Chris Pearce 2010-08-20 10:50:37 +12:00
Родитель 055a322685
Коммит c83f221aa0
13 изменённых файлов: 825 добавлений и 119 удалений

Просмотреть файл

@ -376,7 +376,7 @@ public:
mTimeEnd(aTimeEnd)
{}
PRBool IsNull() {
PRBool IsNull() const {
return mOffsetStart == 0 &&
mOffsetEnd == 0 &&
mTimeStart == 0 &&
@ -438,8 +438,12 @@ public:
virtual PRInt64 FindEndTime(PRInt64 aEndOffset);
// Moves the decode head to aTime milliseconds. aStartTime and aEndTime
// denote the start and end times of the media.
virtual nsresult Seek(PRInt64 aTime, PRInt64 aStartTime, PRInt64 aEndTime) = 0;
// denote the start and end times of the media in ms, and aCurrentTime
// is the current playback position in ms.
virtual nsresult Seek(PRInt64 aTime,
PRInt64 aStartTime,
PRInt64 aEndTime,
PRInt64 aCurrentTime) = 0;
// Gets presentation info required for playback.
const nsVideoInfo& GetInfo() {

Просмотреть файл

@ -939,7 +939,10 @@ nsresult nsBuiltinDecoderStateMachine::Run()
MonitorAutoExit exitMon(mDecoder->GetMonitor());
// Now perform the seek. We must not hold the state machine monitor
// while we seek, since the seek decodes.
res = mReader->Seek(seekTime, mStartTime, mEndTime);
res = mReader->Seek(seekTime,
mStartTime,
mEndTime,
mCurrentFrameTime + mStartTime);
}
if (NS_SUCCEEDED(res)){
PRInt64 audioTime = seekTime;

Просмотреть файл

@ -43,6 +43,13 @@
#include "nsTraceRefcnt.h"
#include "VideoUtils.h"
#ifdef PR_LOGGING
extern PRLogModuleInfo* gBuiltinDecoderLog;
#define LOG(type, msg) PR_LOG(gBuiltinDecoderLog, type, msg)
#else
#define LOG(type, msg)
#endif
/*
The maximum height and width of the video. Used for
sanitizing the memory allocation of the RGB buffer.
@ -364,7 +371,8 @@ PRBool nsVorbisState::Init()
return PR_TRUE;
}
PRInt64 nsVorbisState::Time(PRInt64 granulepos) {
PRInt64 nsVorbisState::Time(PRInt64 granulepos)
{
if (granulepos == -1 || !mActive || mDsp.vi->rate == 0) {
return -1;
}
@ -374,20 +382,344 @@ PRInt64 nsVorbisState::Time(PRInt64 granulepos) {
}
nsSkeletonState::nsSkeletonState(ogg_page* aBosPage)
: nsOggCodecState(aBosPage)
: nsOggCodecState(aBosPage),
mVersion(0),
mLength(0)
{
MOZ_COUNT_CTOR(nsSkeletonState);
}
nsSkeletonState::~nsSkeletonState()
{
MOZ_COUNT_DTOR(nsSkeletonState);
}
// Support for Ogg Skeleton 4.0, as per specification at:
// http://wiki.xiph.org/Ogg_Skeleton_4
// Minimum length in bytes of a Skeleton 4.0 header packet.
#define SKELETON_4_0_MIN_HEADER_LEN 80
// Minimum length in bytes of a Skeleton 4.0 index packet.
#define SKELETON_4_0_MIN_INDEX_LEN 42
// Minimum possible size of a compressed index keypoint.
#define MIN_KEY_POINT_SIZE 2
// Byte offset of the major and minor version numbers in the
// Ogg Skeleton 4.0 header packet.
#define SKELETON_VERSION_MAJOR_OFFSET 8
#define SKELETON_VERSION_MINOR_OFFSET 10
// Byte-offsets of the length of file field in the Skeleton 4.0 header packet.
#define SKELETON_FILE_LENGTH_OFFSET 64
// Byte-offsets of the fields in the Skeleton index packet.
#define INDEX_SERIALNO_OFFSET 6
#define INDEX_NUM_KEYPOINTS_OFFSET 10
#define INDEX_TIME_DENOM_OFFSET 18
#define INDEX_FIRST_NUMER_OFFSET 26
#define INDEX_LAST_NUMER_OFFSET 34
#define INDEX_KEYPOINT_OFFSET 42
static PRBool IsSkeletonBOS(ogg_packet* aPacket)
{
return aPacket->bytes >= SKELETON_4_0_MIN_HEADER_LEN &&
memcmp(reinterpret_cast<char*>(aPacket->packet), "fishead", 8) == 0;
}
static PRBool IsSkeletonIndex(ogg_packet* aPacket)
{
return aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN &&
memcmp(reinterpret_cast<char*>(aPacket->packet), "index", 5) == 0;
}
// Reads a little-endian encoded unsigned 32bit integer at p.
static PRUint32 LEUint32(const unsigned char* p)
{
return p[0] +
(p[1] << 8) +
(p[2] << 16) +
(p[3] << 24);
}
// Reads a little-endian encoded 64bit integer at p.
static PRInt64 LEInt64(const unsigned char* p)
{
PRUint32 lo = LEUint32(p);
PRUint32 hi = LEUint32(p + 4);
return static_cast<PRInt64>(lo) | (static_cast<PRInt64>(hi) << 32);
}
// Reads a little-endian encoded unsigned 16bit integer at p.
static PRUint16 LEUint16(const unsigned char* p)
{
return p[0] + (p[1] << 8);
}
// Reads a variable length encoded integer at p. Will not read
// past aLimit. Returns pointer to character after end of integer.
static const unsigned char* ReadVariableLengthInt(const unsigned char* p,
const unsigned char* aLimit,
PRInt64& n)
{
int shift = 0;
PRInt64 byte = 0;
n = 0;
while (p < aLimit &&
(byte & 0x80) != 0x80 &&
shift < 57)
{
byte = static_cast<PRInt64>(*p);
n |= ((byte & 0x7f) << shift);
shift += 7;
p++;
}
return p;
}
PRBool nsSkeletonState::DecodeIndex(ogg_packet* aPacket)
{
NS_ASSERTION(aPacket->bytes >= SKELETON_4_0_MIN_INDEX_LEN,
"Index must be at least minimum size");
if (!mActive) {
return PR_FALSE;
}
PRUint32 serialno = LEUint32(aPacket->packet + INDEX_SERIALNO_OFFSET);
PRInt64 numKeyPoints = LEInt64(aPacket->packet + INDEX_NUM_KEYPOINTS_OFFSET);
PRInt64 n = 0;
PRInt64 endTime = 0, startTime = 0;
const unsigned char* p = aPacket->packet;
PRInt64 timeDenom = LEInt64(aPacket->packet + INDEX_TIME_DENOM_OFFSET);
if (timeDenom == 0) {
LOG(PR_LOG_DEBUG, ("Ogg Skeleton Index packet for stream %u has 0 "
"timestamp denominator.", serialno));
return (mActive = PR_FALSE);
}
// Extract the start time.
n = LEInt64(p + INDEX_FIRST_NUMER_OFFSET);
PRInt64 t;
if (!MulOverflow(n, 1000, t)) {
return (mActive = PR_FALSE);
} else {
startTime = t / timeDenom;
}
// Extract the end time.
n = LEInt64(p + INDEX_LAST_NUMER_OFFSET);
if (!MulOverflow(n, 1000, t)) {
return (mActive = PR_FALSE);
} else {
endTime = t / timeDenom;
}
// Check the numKeyPoints value read, ensure we're not going to run out of
// memory while trying to decode the index packet.
PRInt64 minPacketSize;
if (!MulOverflow(numKeyPoints, MIN_KEY_POINT_SIZE, minPacketSize) ||
!AddOverflow(INDEX_KEYPOINT_OFFSET, minPacketSize, minPacketSize))
{
return (mActive = PR_FALSE);
}
PRInt64 sizeofIndex = aPacket->bytes - INDEX_KEYPOINT_OFFSET;
PRInt64 maxNumKeyPoints = sizeofIndex / MIN_KEY_POINT_SIZE;
if (aPacket->bytes < minPacketSize ||
numKeyPoints > maxNumKeyPoints ||
numKeyPoints < 0)
{
// Packet size is less than the theoretical minimum size, or the packet is
// claiming to store more keypoints than it's capable of storing. This means
// that the numKeyPoints field is too large or small for the packet to
// possibly contain as many packets as it claims to, so the numKeyPoints
// field is possibly malicious. Don't try decoding this index, we may run
// out of memory.
LOG(PR_LOG_DEBUG, ("Possibly malicious number of key points reported "
"(%lld) in index packet for stream %u.",
numKeyPoints,
serialno));
return (mActive = PR_FALSE);
}
nsAutoPtr<nsKeyFrameIndex> keyPoints(new nsKeyFrameIndex(startTime, endTime));
p = aPacket->packet + INDEX_KEYPOINT_OFFSET;
const unsigned char* limit = aPacket->packet + aPacket->bytes;
PRInt64 numKeyPointsRead = 0;
PRInt64 offset = 0;
PRInt64 time = 0;
while (p < limit &&
numKeyPointsRead < numKeyPoints)
{
PRInt64 delta = 0;
p = ReadVariableLengthInt(p, limit, delta);
if (p == limit ||
!AddOverflow(offset, delta, offset) ||
offset > mLength ||
offset < 0)
{
return (mActive = PR_FALSE);
}
p = ReadVariableLengthInt(p, limit, delta);
if (!AddOverflow(time, delta, time) ||
time > endTime ||
time < startTime)
{
return (mActive = PR_FALSE);
}
PRInt64 timeMs = 0;
if (!MulOverflow(time, 1000, timeMs))
return mActive = PR_FALSE;
timeMs /= timeDenom;
keyPoints->Add(offset, timeMs);
numKeyPointsRead++;
}
PRInt32 keyPointsRead = keyPoints->Length();
if (keyPointsRead > 0) {
mIndex.Put(serialno, keyPoints.forget());
}
LOG(PR_LOG_DEBUG, ("Loaded %d keypoints for Skeleton on stream %u",
keyPointsRead, serialno));
return PR_TRUE;
}
nsresult nsSkeletonState::IndexedSeekTargetForTrack(PRUint32 aSerialno,
PRInt64 aTarget,
nsKeyPoint& aResult)
{
nsKeyFrameIndex* index = nsnull;
mIndex.Get(aSerialno, &index);
if (!index ||
index->Length() == 0 ||
aTarget < index->mStartTime ||
aTarget > index->mEndTime)
{
return NS_ERROR_FAILURE;
}
// Binary search to find the last key point with time less than target.
int start = 0;
int end = index->Length() - 1;
while (end > start) {
int mid = start + ((end - start + 1) >> 1);
if (index->Get(mid).mTime == aTarget) {
start = mid;
break;
} else if (index->Get(mid).mTime < aTarget) {
start = mid;
} else {
end = mid - 1;
}
}
aResult = index->Get(start);
NS_ASSERTION(aResult.mTime <= aTarget, "Result should have time <= target");
return NS_OK;
}
nsresult nsSkeletonState::IndexedSeekTarget(PRInt64 aTarget,
nsTArray<PRUint32>& aTracks,
nsSeekTarget& aResult)
{
if (!mActive || mVersion < SKELETON_VERSION(4,0)) {
return NS_ERROR_FAILURE;
}
// Loop over all requested tracks' indexes, and get the keypoint for that
// seek target. Record the keypoint with the lowest offset, this will be
// our seek result. User must seek to the one with lowest offset to ensure we
// pass "keyframes" on all tracks when we decode forwards to the seek target.
nsSeekTarget r;
for (PRUint32 i=0; i<aTracks.Length(); i++) {
nsKeyPoint k;
if (NS_SUCCEEDED(IndexedSeekTargetForTrack(aTracks[i], aTarget, k)) &&
k.mOffset < r.mKeyPoint.mOffset)
{
r.mKeyPoint = k;
r.mSerial = aTracks[i];
}
}
if (r.IsNull()) {
return NS_ERROR_FAILURE;
}
LOG(PR_LOG_DEBUG, ("Indexed seek target for time %lld is offset %lld",
aTarget, r.mKeyPoint.mOffset));
aResult = r;
return NS_OK;
}
nsresult nsSkeletonState::GetDuration(const nsTArray<PRUint32>& aTracks,
PRInt64& aDuration)
{
if (!mActive ||
mVersion < SKELETON_VERSION(4,0) ||
!HasIndex() ||
aTracks.Length() == 0)
{
return NS_ERROR_FAILURE;
}
PRInt64 endTime = PR_INT64_MIN;
PRInt64 startTime = PR_INT64_MAX;
for (PRUint32 i=0; i<aTracks.Length(); i++) {
nsKeyFrameIndex* index = nsnull;
mIndex.Get(aTracks[i], &index);
if (!index) {
// Can't get the timestamps for one of the required tracks, fail.
return NS_ERROR_FAILURE;
}
if (index->mEndTime > endTime) {
endTime = index->mEndTime;
}
if (index->mStartTime < startTime) {
startTime = index->mStartTime;
}
}
NS_ASSERTION(endTime > startTime, "Duration must be positive");
return AddOverflow(endTime, -startTime, aDuration) ? NS_OK : NS_ERROR_FAILURE;
}
PRBool nsSkeletonState::DecodeHeader(ogg_packet* aPacket)
{
if (aPacket->e_o_s) {
if (IsSkeletonBOS(aPacket)) {
PRUint16 verMajor = LEUint16(aPacket->packet + SKELETON_VERSION_MAJOR_OFFSET);
PRUint16 verMinor = LEUint16(aPacket->packet + SKELETON_VERSION_MINOR_OFFSET);
mVersion = SKELETON_VERSION(verMajor, verMinor);
if (mVersion < SKELETON_VERSION(4,0) ||
mVersion >= SKELETON_VERSION(5,0) ||
aPacket->bytes < SKELETON_4_0_MIN_HEADER_LEN)
{
// We can only care to parse Skeleton version 4.0+.
mActive = PR_FALSE;
return mDoneReadingHeaders = PR_TRUE;
}
// Extract the segment length.
mLength = LEInt64(aPacket->packet + SKELETON_FILE_LENGTH_OFFSET);
LOG(PR_LOG_DEBUG, ("Skeleton segment length: %lld", mLength));
// Initialize the serianlno-to-index map.
PRBool init = mIndex.Init();
if (!init) {
NS_WARNING("Failed to initialize Ogg skeleton serialno-to-index map");
mActive = PR_FALSE;
return mDoneReadingHeaders = PR_TRUE;
}
mActive = PR_TRUE;
} else if (IsSkeletonIndex(aPacket) && mVersion >= SKELETON_VERSION(4,0)) {
if (!DecodeIndex(aPacket)) {
// Failed to parse index, or invalid/hostile index. DecodeIndex() will
// have deactivated the track.
return mDoneReadingHeaders = PR_TRUE;
}
} else if (aPacket->e_o_s) {
mDoneReadingHeaders = PR_TRUE;
}
return mDoneReadingHeaders;

Просмотреть файл

@ -43,6 +43,9 @@
#include <theora/theoradec.h>
#include <vorbis/codec.h>
#include <nsDeque.h>
#include <nsTArray.h>
#include <nsClassHashtable.h>
#include "VideoUtils.h"
class OggPageDeallocator : public nsDequeFunctor {
virtual void* operator() (void* aPage) {
@ -126,8 +129,6 @@ class nsOggCodecState {
// into the bitstream.
PRBool PageInFromBuffer();
public:
// Number of packets read.
PRUint64 mPacketCount;
@ -193,6 +194,10 @@ public:
float mPixelAspectRatio;
};
// Constructs a 32bit version number out of two 16 bit major,minor
// version numbers.
#define SKELETON_VERSION(major, minor) (((major)<<16)|(minor))
class nsSkeletonState : public nsOggCodecState {
public:
nsSkeletonState(ogg_page* aBosPage);
@ -201,6 +206,119 @@ public:
virtual PRBool DecodeHeader(ogg_packet* aPacket);
virtual PRInt64 Time(PRInt64 granulepos) { return -1; }
virtual PRBool Init() { return PR_TRUE; }
// Stores the offset of the page on which a keyframe starts,
// and its presentation time.
class nsKeyPoint {
public:
nsKeyPoint()
: mOffset(PR_INT64_MAX),
mTime(PR_INT64_MAX) {}
nsKeyPoint(PRInt64 aOffset, PRInt64 aTime)
: mOffset(aOffset),
mTime(aTime) {}
// Offset from start of segment/link-in-the-chain in bytes.
PRInt64 mOffset;
// Presentation time in ms.
PRInt64 mTime;
PRBool IsNull() {
return mOffset == PR_INT64_MAX &&
mTime == PR_INT64_MAX;
}
};
// Stores a keyframe's byte-offset, presentation time and the serialno
// of the stream it belongs to.
class nsSeekTarget {
public:
nsSeekTarget() : mSerial(0) {}
nsKeyPoint mKeyPoint;
PRUint32 mSerial;
PRBool IsNull() {
return mKeyPoint.IsNull() &&
mSerial == 0;
}
};
// Determines from the seek index the keyframe which you must seek back to
// in order to get all keyframes required to render all streams with
// serialnos in aTracks, at time aTarget.
nsresult IndexedSeekTarget(PRInt64 aTarget,
nsTArray<PRUint32>& aTracks,
nsSeekTarget& aResult);
PRBool HasIndex() const {
return mIndex.Count() > 0;
}
// Returns the duration of the active tracks in the media, if we have
// an index. aTracks must be filled with the serialnos of the active tracks.
// The duration is calculated as the greatest end time of all active tracks,
// minus the smalled start time of all the active tracks.
nsresult GetDuration(const nsTArray<PRUint32>& aTracks, PRInt64& aDuration);
private:
// Decodes an index packet. Returns PR_FALSE on failure.
PRBool DecodeIndex(ogg_packet* aPacket);
// Gets the keypoint you must seek to in order to get the keyframe required
// to render the stream at time aTarget on stream with serial aSerialno.
nsresult IndexedSeekTargetForTrack(PRUint32 aSerialno,
PRInt64 aTarget,
nsKeyPoint& aResult);
// Version of the decoded skeleton track, as per the SKELETON_VERSION macro.
PRUint32 mVersion;
// Length of the resource in bytes.
PRInt64 mLength;
// Stores the keyframe index and duration information for a particular
// stream.
class nsKeyFrameIndex {
public:
nsKeyFrameIndex(PRInt64 aStartTime, PRInt64 aEndTime)
: mStartTime(aStartTime),
mEndTime(aEndTime)
{
MOZ_COUNT_CTOR(nsKeyFrameIndex);
}
~nsKeyFrameIndex() {
MOZ_COUNT_DTOR(nsKeyFrameIndex);
}
void Add(PRInt64 aOffset, PRInt64 aTimeMs) {
mKeyPoints.AppendElement(nsKeyPoint(aOffset, aTimeMs));
}
const nsKeyPoint& Get(PRUint32 aIndex) const {
return mKeyPoints[aIndex];
}
PRUint32 Length() const {
return mKeyPoints.Length();
}
// Presentation time of the first sample in this stream in ms.
const PRInt64 mStartTime;
// End time of the last sample in this stream in ms.
const PRInt64 mEndTime;
private:
nsTArray<nsKeyPoint> mKeyPoints;
};
// Maps Ogg serialnos to the index-keypoint list.
nsClassHashtable<nsUint32HashKey, nsKeyFrameIndex> mIndex;
};
#endif

Просмотреть файл

@ -62,6 +62,37 @@ extern PRLogModuleInfo* gBuiltinDecoderLog;
#define SEEK_LOG(type, msg)
#endif
// If we don't have a Theora video stream, then during seeking, if a seek
// target is less than SEEK_DECODE_MARGIN ahead of the current playback
// position, we'll just decode forwards rather than performing a bisection
// search. If we have Theora video we use the maximum keyframe interval as
// this value, rather than SEEK_DECODE_MARGIN. This makes small seeks faster.
#define SEEK_DECODE_MARGIN 2000
// The number of milliseconds of "fuzz" we use in a bisection search over
// HTTP. When we're seeking with fuzz, we'll stop the search if a bisection
// lands between the seek target and SEEK_FUZZ_MS milliseconds before the
// seek target. This is becaue it's usually quicker to just keep downloading
// from an exisiting connection than to do another bisection inside that
// small range, which would open a new HTTP connetion.
#define SEEK_FUZZ_MS 500
enum PageSyncResult {
PAGE_SYNC_ERROR = 1,
PAGE_SYNC_END_OF_RANGE= 2,
PAGE_SYNC_OK = 3
};
// Reads a page from the media stream.
static PageSyncResult
PageSync(nsMediaStream* aStream,
ogg_sync_state* aState,
PRBool aCachedDataOnly,
PRInt64 aOffset,
PRInt64 aEndOffset,
ogg_page* aPage,
int& aSkippedBytes);
// Chunk size to read when reading Ogg files. Average Ogg page length
// is about 4300 bytes, so we read the file in chunks larger than that.
static const int PAGE_STEP = 8192;
@ -70,6 +101,7 @@ nsOggReader::nsOggReader(nsBuiltinDecoder* aDecoder)
: nsBuiltinDecoderReader(aDecoder),
mTheoraState(nsnull),
mVorbisState(nsnull),
mSkeletonState(nsnull),
mPageOffset(0),
mTheoraGranulepos(-1),
mVorbisGranulepos(-1)
@ -188,6 +220,12 @@ nsresult nsOggReader::ReadMetadata()
// bitstreams will be ignored.
mTheoraState = static_cast<nsTheoraState*>(codecState);
}
if (codecState &&
codecState->GetType() == nsOggCodecState::TYPE_SKELETON &&
!mSkeletonState)
{
mSkeletonState = static_cast<nsSkeletonState*>(codecState);
}
} else {
// We've encountered the a non Beginning Of Stream page. No more
// BOS pages can follow in this Ogg segment, so there will be no other
@ -237,7 +275,7 @@ nsresult nsOggReader::ReadMetadata()
// Deactivate any non-primary bitstreams.
for (PRUint32 i = 0; i < bitstreams.Length(); i++) {
nsOggCodecState* s = bitstreams[i];
if (s != mVorbisState && s != mTheoraState) {
if (s != mVorbisState && s != mTheoraState && s != mSkeletonState) {
s->Deactivate();
}
}
@ -259,6 +297,12 @@ nsresult nsOggReader::ReadMetadata()
mVorbisState->Init();
}
if (!HasAudio() && !HasVideo() && mSkeletonState) {
// We have a skeleton track, but no audio or video, may as well disable
// the skeleton, we can't do anything useful with this media.
mSkeletonState->Deactivate();
}
mInfo.mHasAudio = HasAudio();
mInfo.mHasVideo = HasVideo();
if (HasAudio()) {
@ -276,6 +320,25 @@ nsresult nsOggReader::ReadMetadata()
}
mInfo.mDataOffset = mDataOffset;
if (mSkeletonState && mSkeletonState->HasIndex()) {
// Extract the duration info out of the index, so we don't need to seek to
// the end of stream to get it.
nsAutoTArray<PRUint32, 2> tracks;
if (HasVideo()) {
tracks.AppendElement(mTheoraState->mSerial);
}
if (HasAudio()) {
tracks.AppendElement(mVorbisState->mSerial);
}
PRInt64 duration = 0;
if (NS_SUCCEEDED(mSkeletonState->GetDuration(tracks, duration))) {
MonitorAutoExit exitReaderMon(mMonitor);
MonitorAutoEnter decoderMon(mDecoder->GetMonitor());
mDecoder->GetStateMachine()->SetDuration(duration);
LOG(PR_LOG_DEBUG, ("Got duration from Skeleton index %lld", duration));
}
}
LOG(PR_LOG_DEBUG, ("Done loading headers, data offset %lld", mDataOffset));
return NS_OK;
@ -939,129 +1002,262 @@ PRInt64 nsOggReader::FindEndTime(PRInt64 aEndOffset,
return endTime;
}
nsresult nsOggReader::Seek(PRInt64 aTarget, PRInt64 aStartTime, PRInt64 aEndTime)
nsOggReader::IndexedSeekResult nsOggReader::RollbackIndexedSeek(PRInt64 aOffset)
{
mSkeletonState->Deactivate();
nsMediaStream* stream = mDecoder->GetCurrentStream();
NS_ENSURE_TRUE(stream != nsnull, SEEK_FATAL_ERROR);
nsresult res = stream->Seek(nsISeekableStream::NS_SEEK_SET, aOffset);
NS_ENSURE_SUCCESS(res, SEEK_FATAL_ERROR);
return SEEK_INDEX_FAIL;
}
nsOggReader::IndexedSeekResult nsOggReader::SeekToKeyframeUsingIndex(PRInt64 aTarget)
{
nsMediaStream* stream = mDecoder->GetCurrentStream();
NS_ENSURE_TRUE(stream != nsnull, SEEK_FATAL_ERROR);
if (!HasSkeleton() || !mSkeletonState->HasIndex()) {
return SEEK_INDEX_FAIL;
}
// We have an index from the Skeleton track, try to use it to seek.
nsAutoTArray<PRUint32, 2> tracks;
if (HasVideo()) {
tracks.AppendElement(mTheoraState->mSerial);
}
if (HasAudio()) {
tracks.AppendElement(mVorbisState->mSerial);
}
nsSkeletonState::nsSeekTarget keyframe;
if (NS_FAILED(mSkeletonState->IndexedSeekTarget(aTarget,
tracks,
keyframe)))
{
// Could not locate a keypoint for the target in the index.
return SEEK_INDEX_FAIL;
}
// Remember original stream read cursor position so we can rollback on failure.
PRInt64 tell = stream->Tell();
// Seek to the keypoint returned by the index.
if (keyframe.mKeyPoint.mOffset > stream->GetLength() ||
keyframe.mKeyPoint.mOffset < 0)
{
// Index must be invalid.
return RollbackIndexedSeek(tell);
}
LOG(PR_LOG_DEBUG, ("Seeking using index to keyframe at offset %lld\n",
keyframe.mKeyPoint.mOffset));
nsresult res = stream->Seek(nsISeekableStream::NS_SEEK_SET,
keyframe.mKeyPoint.mOffset);
NS_ENSURE_SUCCESS(res, SEEK_FATAL_ERROR);
mPageOffset = keyframe.mKeyPoint.mOffset;
// We've moved the read set, so reset decode.
res = ResetDecode();
NS_ENSURE_SUCCESS(res, SEEK_FATAL_ERROR);
// Check that the page the index thinks is exactly here is actually exactly
// here. If not, the index is invalid.
ogg_page page;
int skippedBytes = 0;
PageSyncResult syncres = PageSync(stream,
&mOggState,
PR_FALSE,
mPageOffset,
stream->GetLength(),
&page,
skippedBytes);
NS_ENSURE_TRUE(syncres != PAGE_SYNC_ERROR, SEEK_FATAL_ERROR);
if (syncres != PAGE_SYNC_OK || skippedBytes != 0) {
LOG(PR_LOG_DEBUG, ("Indexed-seek failure: Ogg Skeleton Index is invalid "
"or sync error after seek"));
return RollbackIndexedSeek(tell);
}
PRUint32 serial = ogg_page_serialno(&page);
if (serial != keyframe.mSerial) {
// Serialno of page at offset isn't what the index told us to expect.
// Assume the index is invalid.
return RollbackIndexedSeek(tell);
}
nsOggCodecState* codecState = nsnull;
mCodecStates.Get(serial, &codecState);
if (codecState &&
codecState->mActive &&
ogg_stream_pagein(&codecState->mState, &page) != 0)
{
// Couldn't insert page into the ogg stream, or somehow the stream
// is no longer active.
return RollbackIndexedSeek(tell);
}
mPageOffset = keyframe.mKeyPoint.mOffset + page.header_len + page.body_len;
return SEEK_OK;
}
nsresult nsOggReader::SeekInBufferedRange(PRInt64 aTarget,
PRInt64 aStartTime,
PRInt64 aEndTime,
const nsTArray<ByteRange>& aRanges,
const ByteRange& aRange)
{
LOG(PR_LOG_DEBUG, ("%p Seeking in buffered data to %lldms using bisection search", mDecoder, aTarget));
// We know the exact byte range in which the target must lie. It must
// be buffered in the media cache. Seek there.
nsresult res = SeekBisection(aTarget, aRange, 0);
if (NS_FAILED(res) || !HasVideo()) {
return res;
}
// We have an active Theora bitstream. Decode the next Theora frame, and
// extract its keyframe's time.
PRBool eof;
do {
PRBool skip = PR_FALSE;
eof = !DecodeVideoFrame(skip, 0);
{
MonitorAutoExit exitReaderMon(mMonitor);
MonitorAutoEnter decoderMon(mDecoder->GetMonitor());
if (mDecoder->GetDecodeState() == nsBuiltinDecoderStateMachine::DECODER_STATE_SHUTDOWN) {
return NS_ERROR_FAILURE;
}
}
} while (!eof &&
mVideoQueue.GetSize() == 0);
VideoData* video = mVideoQueue.PeekFront();
if (video && !video->mKeyframe) {
// First decoded frame isn't a keyframe, seek back to previous keyframe,
// otherwise we'll get visual artifacts.
NS_ASSERTION(video->mTimecode != -1, "Must have a granulepos");
int shift = mTheoraState->mInfo.keyframe_granule_shift;
PRInt64 keyframeGranulepos = (video->mTimecode >> shift) << shift;
PRInt64 keyframeTime = mTheoraState->StartTime(keyframeGranulepos);
SEEK_LOG(PR_LOG_DEBUG, ("Keyframe for %lld is at %lld, seeking back to it",
video->mTime, keyframeTime));
ByteRange k = GetSeekRange(aRanges,
keyframeTime,
aStartTime,
aEndTime,
PR_FALSE);
res = SeekBisection(keyframeTime, k, SEEK_FUZZ_MS);
NS_ASSERTION(mTheoraGranulepos == -1, "SeekBisection must reset Theora decode");
NS_ASSERTION(mVorbisGranulepos == -1, "SeekBisection must reset Vorbis decode");
}
return res;
}
PRBool nsOggReader::CanDecodeToTarget(PRInt64 aTarget,
PRInt64 aCurrentTime)
{
// We can decode to the target if the target is no further than the
// maximum keyframe offset ahead of the current playback position, if
// we have video, or SEEK_DECODE_MARGIN if we don't have video.
PRInt64 margin = HasVideo() ? mTheoraState->MaxKeyframeOffset() : SEEK_DECODE_MARGIN;
return aTarget >= aCurrentTime &&
aTarget - aCurrentTime < margin;
}
nsresult nsOggReader::SeekInUnbuffered(PRInt64 aTarget,
PRInt64 aStartTime,
PRInt64 aEndTime,
const nsTArray<ByteRange>& aRanges)
{
LOG(PR_LOG_DEBUG, ("%p Seeking in unbuffered data to %lldms using bisection search", mDecoder, aTarget));
// If we've got an active Theora bitstream, determine the maximum possible
// time in ms which a keyframe could be before a given interframe. We
// subtract this from our seek target, seek to the new target, and then
// will decode forward to the original seek target. We should encounter a
// keyframe in that interval. This prevents us from needing to run two
// bisections; one for the seek target frame, and another to find its
// keyframe. It's usually faster to just download this extra data, rather
// tham perform two bisections to find the seek target's keyframe. We
// don't do this offsetting when seeking in a buffered range,
// as the extra decoding causes a noticeable speed hit when all the data
// is buffered (compared to just doing a bisection to exactly find the
// keyframe).
PRInt64 keyframeOffsetMs = 0;
if (HasVideo() && mTheoraState) {
keyframeOffsetMs = mTheoraState->MaxKeyframeOffset();
}
PRInt64 seekTarget = NS_MAX(aStartTime, aTarget - keyframeOffsetMs);
// Minimize the bisection search space using the known timestamps from the
// buffered ranges.
ByteRange k = GetSeekRange(aRanges, seekTarget, aStartTime, aEndTime, PR_FALSE);
nsresult res = SeekBisection(seekTarget, k, SEEK_FUZZ_MS);
NS_ASSERTION(mTheoraGranulepos == -1, "SeekBisection must reset Theora decode");
NS_ASSERTION(mVorbisGranulepos == -1, "SeekBisection must reset Vorbis decode");
return res;
}
nsresult nsOggReader::Seek(PRInt64 aTarget,
PRInt64 aStartTime,
PRInt64 aEndTime,
PRInt64 aCurrentTime)
{
MonitorAutoEnter mon(mMonitor);
nsresult res;
NS_ASSERTION(mDecoder->OnStateMachineThread(),
"Should be on state machine thread.");
LOG(PR_LOG_DEBUG, ("%p About to seek to %lldms", mDecoder, aTarget));
nsresult res;
nsMediaStream* stream = mDecoder->GetCurrentStream();
NS_ENSURE_TRUE(stream != nsnull, NS_ERROR_FAILURE);
if (NS_FAILED(ResetDecode())) {
return NS_ERROR_FAILURE;
}
if (aTarget == aStartTime) {
// We've seeked to the media start. Just seek to the offset of the first
// content page.
res = stream->Seek(nsISeekableStream::NS_SEEK_SET, mDataOffset);
NS_ENSURE_SUCCESS(res, res);
NS_ENSURE_SUCCESS(res,res);
mPageOffset = mDataOffset;
res = ResetDecode();
NS_ENSURE_SUCCESS(res,res);
NS_ASSERTION(aStartTime != -1, "mStartTime should be known");
{
MonitorAutoExit exitReaderMon(mMonitor);
MonitorAutoEnter decoderMon(mDecoder->GetMonitor());
mDecoder->UpdatePlaybackPosition(aStartTime);
}
} else if (CanDecodeToTarget(aTarget, aCurrentTime)) {
LOG(PR_LOG_DEBUG, ("%p Seek target (%lld) is close to current time (%lld), "
"will just decode to it", mDecoder, aCurrentTime, aTarget));
} else {
IndexedSeekResult sres = SeekToKeyframeUsingIndex(aTarget);
NS_ENSURE_TRUE(sres != SEEK_FATAL_ERROR, NS_ERROR_FAILURE);
if (sres == SEEK_INDEX_FAIL) {
// No index or other non-fatal index-related failure. Try to seek
// using a bisection search. Determine the already downloaded data
// in the media cache, so we can try to seek in the cached data first.
nsAutoTArray<ByteRange, 16> ranges;
res = GetBufferedBytes(ranges);
NS_ENSURE_SUCCESS(res,res);
// Determine the already downloaded data in the media cache.
nsAutoTArray<ByteRange, 16> ranges;
stream->Pin();
if (NS_FAILED(GetBufferedBytes(ranges))) {
stream->Unpin();
return NS_ERROR_FAILURE;
}
// Figure out if the seek target lies in a buffered range.
ByteRange r = GetSeekRange(ranges, aTarget, aStartTime, aEndTime, PR_TRUE);
// Try to seek in the cached data ranges first, before falling back to
// seeking over the network. This makes seeking in buffered ranges almost
// instantaneous.
ByteRange r = GetSeekRange(ranges, aTarget, aStartTime, aEndTime, PR_TRUE);
res = NS_ERROR_FAILURE;
if (!r.IsNull()) {
// The frame should be in this buffered range. Seek exactly there.
res = SeekBisection(aTarget, r, 0);
if (NS_SUCCEEDED(res) && HasVideo()) {
// We have an active Theora bitstream. Decode the next Theora frame, and
// extract its keyframe's time.
PRBool eof;
do {
PRBool skip = PR_FALSE;
eof = !DecodeVideoFrame(skip, 0);
{
MonitorAutoExit exitReaderMon(mMonitor);
MonitorAutoEnter decoderMon(mDecoder->GetMonitor());
if (mDecoder->GetDecodeState() == nsBuiltinDecoderStateMachine::DECODER_STATE_SHUTDOWN) {
stream->Unpin();
return NS_ERROR_FAILURE;
}
}
} while (!eof &&
mVideoQueue.GetSize() == 0);
VideoData* video = mVideoQueue.PeekFront();
if (video && !video->mKeyframe) {
// First decoded frame isn't a keyframe, seek back to previous keyframe,
// otherwise we'll get visual artifacts.
NS_ASSERTION(video->mTimecode != -1, "Must have a granulepos");
int shift = mTheoraState->mInfo.keyframe_granule_shift;
PRInt64 keyframeGranulepos = (video->mTimecode >> shift) << shift;
PRInt64 keyframeTime = mTheoraState->StartTime(keyframeGranulepos);
SEEK_LOG(PR_LOG_DEBUG, ("Keyframe for %lld is at %lld, seeking back to it",
video->mTime, keyframeTime));
ByteRange k = GetSeekRange(ranges,
keyframeTime,
aStartTime,
aEndTime,
PR_FALSE);
res = SeekBisection(keyframeTime, k, 500);
NS_ASSERTION(mTheoraGranulepos == -1, "SeekBisection must reset Theora decode");
NS_ASSERTION(mVorbisGranulepos == -1, "SeekBisection must reset Vorbis decode");
}
if (!r.IsNull()) {
// We know the buffered range in which the seek target lies, do a
// bisection search in that buffered range.
res = SeekInBufferedRange(aTarget, aStartTime, aEndTime, ranges, r);
NS_ENSURE_SUCCESS(res,res);
} else {
// The target doesn't lie in a buffered range. Perform a bisection
// search over the whole media, using the known buffered ranges to
// reduce the search space.
res = SeekInUnbuffered(aTarget, aStartTime, aEndTime, ranges);
NS_ENSURE_SUCCESS(res,res);
}
}
stream->Unpin();
if (NS_FAILED(res)) {
// We failed to find the seek target (or perhaps its keyframe, somehow?)
// in a buffered range. Minimize the bisection search space using the
// buffered ranges, and perform a bisection search.
// If we've got an active Theora bitstream, determine the maximum possible
// time in ms which a keyframe could be before a given interframe. We
// subtract this from our seek target, seek to the new target, and then
// decode forwards to the original seek target. We should encounter a
// keyframe in that interval. This prevents us from needing to run two
// bisections; one for the seek target frame, and another to find its
// keyframe. It's usually faster to just download this extra data, rather
// tham perform two bisections to find the seek target's keyframe. We
// don't do this offsetting when seeking in a buffered ranges (above),
// as the extra decoding causes a noticeable speed hit when all the data
// is buffered.
PRInt64 keyframeOffsetMs = 0;
if (HasVideo() && mTheoraState) {
keyframeOffsetMs = mTheoraState->MaxKeyframeOffset();
}
PRInt64 seekTarget = NS_MAX(aStartTime, aTarget - keyframeOffsetMs);
ByteRange k = GetSeekRange(ranges, seekTarget, aStartTime, aEndTime, PR_FALSE);
res = SeekBisection(seekTarget, k, 500);
NS_ENSURE_SUCCESS(res, res);
NS_ASSERTION(mTheoraGranulepos == -1, "SeekBisection must reset Theora decode");
NS_ASSERTION(mVorbisGranulepos == -1, "SeekBisection must reset Vorbis decode");
}
}
// The decode position must now be either close to the seek target, or
// we've seeked to before the keyframe before the seek target. Decode
// forward to the seek target frame.
return DecodeToTarget(aTarget);
}
enum PageSyncResult {
PAGE_SYNC_ERROR = 1,
PAGE_SYNC_END_OF_RANGE= 2,
PAGE_SYNC_OK = 3
};
// Reads a page from the media stream.
static PageSyncResult
PageSync(nsMediaStream* aStream,
@ -1140,7 +1336,7 @@ nsresult nsOggReader::SeekBisection(PRInt64 aTarget,
return NS_ERROR_FAILURE;
}
res = stream->Seek(nsISeekableStream::NS_SEEK_SET, mDataOffset);
NS_ENSURE_SUCCESS(res, res);
NS_ENSURE_SUCCESS(res,res);
mPageOffset = mDataOffset;
return NS_OK;
}
@ -1222,9 +1418,7 @@ nsresult nsOggReader::SeekBisection(PRInt64 aTarget,
endOffset,
&page,
skippedBytes);
if (res == PAGE_SYNC_ERROR) {
return NS_ERROR_FAILURE;
}
NS_ENSURE_TRUE(res != PAGE_SYNC_ERROR, NS_ERROR_FAILURE);
// We've located a page of length |ret| at |guess + skippedBytes|.
// Remember where the page is located.
@ -1304,7 +1498,7 @@ nsresult nsOggReader::SeekBisection(PRInt64 aTarget,
SEEK_LOG(PR_LOG_DEBUG, ("Seek loop (interval == 0) break"));
NS_ASSERTION(startTime < aTarget, "Start time must always be less than target");
res = stream->Seek(nsISeekableStream::NS_SEEK_SET, startOffset);
NS_ENSURE_SUCCESS(res, res);
NS_ENSURE_SUCCESS(res,res);
mPageOffset = startOffset;
if (NS_FAILED(ResetDecode())) {
return NS_ERROR_FAILURE;

Просмотреть файл

@ -44,6 +44,9 @@
#include <vorbis/codec.h>
#include "nsBuiltinDecoderReader.h"
#include "nsOggCodecState.h"
#include "VideoUtils.h"
using namespace mozilla;
class nsMediaDecoder;
class nsHTMLTimeRanges;
@ -84,11 +87,58 @@ public:
}
virtual nsresult ReadMetadata();
virtual nsresult Seek(PRInt64 aTime, PRInt64 aStartTime, PRInt64 aEndTime);
virtual nsresult Seek(PRInt64 aTime, PRInt64 aStartTime, PRInt64 aEndTime, PRInt64 aCurrentTime);
virtual nsresult GetBuffered(nsHTMLTimeRanges* aBuffered, PRInt64 aStartTime);
private:
PRBool HasSkeleton()
{
MonitorAutoEnter mon(mMonitor);
return mSkeletonState != 0 && mSkeletonState->mActive;
}
// Returns PR_TRUE if we should decode up to the seek target rather than
// seeking to the target using a bisection search or index-assisted seek.
// We should do this if the seek target (aTarget, in ms), lies not too far
// ahead of the current playback position (aCurrentTime, in ms).
PRBool CanDecodeToTarget(PRInt64 aTarget,
PRInt64 aCurrentTime);
// Seeks to the keyframe preceeding the target time using available
// keyframe indexes.
enum IndexedSeekResult {
SEEK_OK, // Success.
SEEK_INDEX_FAIL, // Failure due to no index, or invalid index.
SEEK_FATAL_ERROR // Error returned by a stream operation.
};
IndexedSeekResult SeekToKeyframeUsingIndex(PRInt64 aTarget);
// Rolls back a seek-using-index attempt, returning a failure error code.
IndexedSeekResult RollbackIndexedSeek(PRInt64 aOffset);
// Seeks to aTarget ms in the buffered range aRange using bisection search,
// or to the keyframe prior to aTarget if we have video. aStartTime must be
// the presentation time at the start of media, and aEndTime the time at
// end of media. aRanges must be the time/byte ranges buffered in the media
// cache as per GetBufferedBytes().
nsresult SeekInBufferedRange(PRInt64 aTarget,
PRInt64 aStartTime,
PRInt64 aEndTime,
const nsTArray<ByteRange>& aRanges,
const ByteRange& aRange);
// Seeks to before aTarget ms in media using bisection search. If the media
// has video, this will seek to before the keyframe required to render the
// media at aTarget. Will use aRanges in order to narrow the bisection
// search space. aStartTime must be the presentation time at the start of
// media, and aEndTime the time at end of media. aRanges must be the time/byte
// ranges buffered in the media cache as per GetBufferedBytes().
nsresult SeekInUnbuffered(PRInt64 aTarget,
PRInt64 aStartTime,
PRInt64 aEndTime,
const nsTArray<ByteRange>& aRanges);
// Get the end time of aEndOffset. This is the playback position we'd reach
// after playback finished at aEndOffset. If PRBool aCachedDataOnly is
// PR_TRUE, then we'll only read from data which is cached in the media cached,
@ -137,6 +187,9 @@ private:
// Decode state of the Vorbis bitstream we're decoding, if we have audio.
nsVorbisState* mVorbisState;
// Decode state of the Skeleton bitstream.
nsSkeletonState* mSkeletonState;
// Ogg decoding state.
ogg_sync_state mOggState;

Просмотреть файл

@ -242,7 +242,7 @@ PRBool nsRawReader::DecodeVideoFrame(PRBool &aKeyframeSkip,
return PR_TRUE;
}
nsresult nsRawReader::Seek(PRInt64 aTime, PRInt64 aStartTime, PRInt64 aEndTime)
nsresult nsRawReader::Seek(PRInt64 aTime, PRInt64 aStartTime, PRInt64 aEndTime, PRInt64 aCurrentTime)
{
mozilla::MonitorAutoEnter autoEnter(mMonitor);
NS_ASSERTION(mDecoder->OnStateMachineThread(),

Просмотреть файл

@ -111,7 +111,7 @@ public:
}
virtual nsresult ReadMetadata();
virtual nsresult Seek(PRInt64 aTime, PRInt64 aStartTime, PRInt64 aEndTime);
virtual nsresult Seek(PRInt64 aTime, PRInt64 aStartTime, PRInt64 aEndTime, PRInt64 aCurrentTime);
virtual PRInt64 FindEndTime(PRInt64 aEndOffset);
virtual nsresult GetBuffered(nsHTMLTimeRanges* aBuffered, PRInt64 aStartTime);

Просмотреть файл

@ -187,6 +187,7 @@ _TEST_FILES += \
bug504843.ogv \
bug506094.ogv \
bug516323.ogv \
bug516323.indexed.ogv \
bug520493.ogg \
bug520500.ogg \
bug520908.ogv \

Двоичные данные
content/media/test/bug516323.indexed.ogv Normal file

Двоичный файл не отображается.

Просмотреть файл

@ -203,6 +203,7 @@ var gSeekTests = [
{ name:"seek.ogv", type:"video/ogg", duration:3.966 },
{ name:"320x240.ogv", type:"video/ogg", duration:0.233 },
{ name:"seek.webm", type:"video/webm", duration:3.966 },
{ name:"bug516323.indexed.ogv", type:"video/ogg", duration:4.208 },
{ name:"bogus.duh", type:"bogus/duh", duration:123 }
];

Просмотреть файл

@ -675,7 +675,7 @@ PRBool nsWebMReader::DecodeVideoFrame(PRBool &aKeyframeSkip,
return PR_TRUE;
}
nsresult nsWebMReader::Seek(PRInt64 aTarget, PRInt64 aStartTime, PRInt64 aEndTime)
nsresult nsWebMReader::Seek(PRInt64 aTarget, PRInt64 aStartTime, PRInt64 aEndTime, PRInt64 aCurrentTime)
{
MonitorAutoEnter mon(mMonitor);
NS_ASSERTION(mDecoder->OnStateMachineThread(),

Просмотреть файл

@ -124,7 +124,7 @@ public:
}
virtual nsresult ReadMetadata();
virtual nsresult Seek(PRInt64 aTime, PRInt64 aStartTime, PRInt64 aEndTime);
virtual nsresult Seek(PRInt64 aTime, PRInt64 aStartTime, PRInt64 aEndTime, PRInt64 aCurrentTime);
virtual nsresult GetBuffered(nsHTMLTimeRanges* aBuffered, PRInt64 aStartTime);
private: