Bug 938686 - Support Opus in WebM. r=kinetik

Support the Opus audio codec in the WebM (Matroska) container.
This is part of the "WebM 2" proposed spec, which also includes
the new VP9 video codec. Alas we weren't able to get concensus
to change the doctype of filename extension to mark the revision
allowing the new codecs.
This commit is contained in:
Jan Gerber 2013-11-22 14:07:00 -08:00
Родитель 748f641e19
Коммит 33d36a07dc
3 изменённых файлов: 280 добавлений и 60 удалений

Просмотреть файл

@ -171,10 +171,11 @@ static const char* const gWebMTypes[3] = {
nullptr
};
static char const *const gWebMCodecs[4] = {
static char const *const gWebMCodecs[5] = {
"vp8",
"vp8.0",
"vorbis",
"opus",
nullptr
};

Просмотреть файл

@ -16,6 +16,8 @@
#include "vpx/vp8dx.h"
#include "vpx/vpx_decoder.h"
#include "OggReader.h"
using mozilla::NesteggPacketHolder;
template <>
@ -141,6 +143,11 @@ WebMReader::WebMReader(AbstractMediaDecoder* aDecoder)
mContext(nullptr),
mPacketCount(0),
mChannels(0),
#ifdef MOZ_OPUS
mOpusParser(nullptr),
mOpusDecoder(nullptr),
mSkip(0),
#endif
mVideoTrack(0),
mAudioTrack(0),
mAudioStartUsec(-1),
@ -177,6 +184,11 @@ WebMReader::~WebMReader()
vorbis_info_clear(&mVorbisInfo);
vorbis_comment_clear(&mVorbisComment);
if (mOpusDecoder) {
opus_multistream_decoder_destroy(mOpusDecoder);
mOpusDecoder = nullptr;
}
MOZ_COUNT_DTOR(WebMReader);
}
@ -338,51 +350,83 @@ nsresult WebMReader::ReadMetadata(MediaInfo* aInfo,
mAudioTrack = track;
mHasAudio = true;
mInfo.mAudio.mHasAudio = true;
mAudioCodec = nestegg_track_codec_id(mContext, track);
mCodecDelay = params.codec_delay;
// Get the Vorbis header data
unsigned int nheaders = 0;
r = nestegg_track_codec_data_count(mContext, track, &nheaders);
if (r == -1 || nheaders != 3) {
Cleanup();
return NS_ERROR_FAILURE;
}
if (mAudioCodec == NESTEGG_CODEC_VORBIS) {
// Get the Vorbis header data
unsigned int nheaders = 0;
r = nestegg_track_codec_data_count(mContext, track, &nheaders);
if (r == -1 || nheaders != 3) {
Cleanup();
return NS_ERROR_FAILURE;
}
for (uint32_t header = 0; header < nheaders; ++header) {
for (uint32_t header = 0; header < nheaders; ++header) {
unsigned char* data = 0;
size_t length = 0;
r = nestegg_track_codec_data(mContext, track, header, &data, &length);
if (r == -1) {
Cleanup();
return NS_ERROR_FAILURE;
}
ogg_packet opacket = InitOggPacket(data, length, header == 0, false, 0);
r = vorbis_synthesis_headerin(&mVorbisInfo,
&mVorbisComment,
&opacket);
if (r != 0) {
Cleanup();
return NS_ERROR_FAILURE;
}
}
r = vorbis_synthesis_init(&mVorbisDsp, &mVorbisInfo);
if (r != 0) {
Cleanup();
return NS_ERROR_FAILURE;
}
r = vorbis_block_init(&mVorbisDsp, &mVorbisBlock);
if (r != 0) {
Cleanup();
return NS_ERROR_FAILURE;
}
mInfo.mAudio.mRate = mVorbisDsp.vi->rate;
mInfo.mAudio.mChannels = mVorbisDsp.vi->channels;
mChannels = mInfo.mAudio.mChannels;
#ifdef MOZ_OPUS
} else if (mAudioCodec == NESTEGG_CODEC_OPUS) {
unsigned char* data = 0;
size_t length = 0;
r = nestegg_track_codec_data(mContext, track, header, &data, &length);
r = nestegg_track_codec_data(mContext, track, 0, &data, &length);
if (r == -1) {
Cleanup();
return NS_ERROR_FAILURE;
}
ogg_packet opacket = InitOggPacket(data, length, header == 0, false, 0);
r = vorbis_synthesis_headerin(&mVorbisInfo,
&mVorbisComment,
&opacket);
if (r != 0) {
mOpusParser = new OpusParser;
if (!mOpusParser->DecodeHeader(data, length)) {
Cleanup();
return NS_ERROR_FAILURE;
}
}
r = vorbis_synthesis_init(&mVorbisDsp, &mVorbisInfo);
if (r != 0) {
if (!InitOpusDecoder()) {
Cleanup();
return NS_ERROR_FAILURE;
}
mInfo.mAudio.mRate = mOpusParser->mRate;
mInfo.mAudio.mChannels = mOpusParser->mChannels;
mInfo.mAudio.mChannels = mInfo.mAudio.mChannels > 2 ? 2 : mInfo.mAudio.mChannels;
#endif
} else {
Cleanup();
return NS_ERROR_FAILURE;
}
r = vorbis_block_init(&mVorbisDsp, &mVorbisBlock);
if (r != 0) {
Cleanup();
return NS_ERROR_FAILURE;
}
mInfo.mAudio.mRate = mVorbisDsp.vi->rate;
mInfo.mAudio.mChannels = mVorbisDsp.vi->channels;
mChannels = mInfo.mAudio.mChannels;
}
}
@ -396,6 +440,25 @@ nsresult WebMReader::ReadMetadata(MediaInfo* aInfo,
return NS_OK;
}
#ifdef MOZ_OPUS
bool WebMReader::InitOpusDecoder()
{
int r;
NS_ASSERTION(mOpusDecoder == nullptr, "leaking OpusDecoder");
mOpusDecoder = opus_multistream_decoder_create(mOpusParser->mRate,
mOpusParser->mChannels,
mOpusParser->mStreams,
mOpusParser->mCoupledStreams,
mOpusParser->mMappingTable,
&r);
mSkip = mOpusParser->mPreSkip;
return r == OPUS_OK;
}
#endif
ogg_packet WebMReader::InitOggPacket(unsigned char* aData,
size_t aLength,
bool aBOS,
@ -429,7 +492,7 @@ bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset)
return false;
}
const uint32_t rate = mVorbisDsp.vi->rate;
const uint32_t rate = mInfo.mAudio.mRate;
uint64_t tstamp_usecs = tstamp / NS_PER_USEC;
if (mAudioStartUsec == -1) {
// This is the first audio chunk. Assume the start time of our decode
@ -471,27 +534,164 @@ bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset)
if (r == -1) {
return false;
}
if (mAudioCodec == NESTEGG_CODEC_VORBIS) {
ogg_packet opacket = InitOggPacket(data, length, false, false, -1);
ogg_packet opacket = InitOggPacket(data, length, false, false, -1);
if (vorbis_synthesis(&mVorbisBlock, &opacket) != 0) {
return false;
}
if (vorbis_synthesis(&mVorbisBlock, &opacket) != 0) {
return false;
}
if (vorbis_synthesis_blockin(&mVorbisDsp,
&mVorbisBlock) != 0) {
return false;
}
if (vorbis_synthesis_blockin(&mVorbisDsp,
&mVorbisBlock) != 0) {
return false;
}
VorbisPCMValue** pcm = 0;
int32_t frames = 0;
while ((frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm)) > 0) {
nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * mChannels]);
for (uint32_t j = 0; j < mChannels; ++j) {
VorbisPCMValue* channel = pcm[j];
for (uint32_t i = 0; i < uint32_t(frames); ++i) {
buffer[i*mChannels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]);
VorbisPCMValue** pcm = 0;
int32_t frames = 0;
while ((frames = vorbis_synthesis_pcmout(&mVorbisDsp, &pcm)) > 0) {
nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * mChannels]);
for (uint32_t j = 0; j < mChannels; ++j) {
VorbisPCMValue* channel = pcm[j];
for (uint32_t i = 0; i < uint32_t(frames); ++i) {
buffer[i*mChannels + j] = MOZ_CONVERT_VORBIS_SAMPLE(channel[i]);
}
}
CheckedInt64 duration = FramesToUsecs(frames, rate);
if (!duration.isValid()) {
NS_WARNING("Int overflow converting WebM audio duration");
return false;
}
CheckedInt64 total_duration = FramesToUsecs(total_frames, rate);
if (!total_duration.isValid()) {
NS_WARNING("Int overflow converting WebM audio total_duration");
return false;
}
CheckedInt64 time = total_duration + tstamp_usecs;
if (!time.isValid()) {
NS_WARNING("Int overflow adding total_duration and tstamp_usecs");
nestegg_free_packet(aPacket);
return false;
};
total_frames += frames;
AudioQueue().Push(new AudioData(aOffset,
time.value(),
duration.value(),
frames,
buffer.forget(),
mChannels));
mAudioFrames += frames;
if (vorbis_synthesis_read(&mVorbisDsp, frames) != 0) {
return false;
}
}
} else if (mAudioCodec == NESTEGG_CODEC_OPUS) {
#ifdef MOZ_OPUS
uint32_t channels = mOpusParser->mChannels;
// Maximum value is 63*2880, so there's no chance of overflow.
int32_t frames_number = opus_packet_get_nb_frames(data, length);
if (frames_number <= 0)
return false; // Invalid packet header.
int32_t samples = opus_packet_get_samples_per_frame(data,
(opus_int32) rate);
int32_t frames = frames_number*samples;
// A valid Opus packet must be between 2.5 and 120 ms long.
if (frames < 120 || frames > 5760)
return false;
nsAutoArrayPtr<AudioDataValue> buffer(new AudioDataValue[frames * channels]);
// Decode to the appropriate sample type.
#ifdef MOZ_SAMPLE_TYPE_FLOAT32
int ret = opus_multistream_decode_float(mOpusDecoder,
data, length,
buffer, frames, false);
#else
int ret = opus_multistream_decode(mOpusDecoder,
data, length,
buffer, frames, false);
#endif
if (ret < 0)
return false;
NS_ASSERTION(ret == frames, "Opus decoded too few audio samples");
// Trim the initial frames while the decoder is settling.
if (mSkip > 0) {
int32_t skipFrames = std::min(mSkip, frames);
if (skipFrames == frames) {
// discard the whole packet
mSkip -= frames;
LOG(PR_LOG_DEBUG, ("Opus decoder skipping %d frames"
" (whole packet)", frames));
return true;
}
int32_t keepFrames = frames - skipFrames;
int samples = keepFrames * channels;
nsAutoArrayPtr<AudioDataValue> trimBuffer(new AudioDataValue[samples]);
for (int i = 0; i < samples; i++)
trimBuffer[i] = buffer[skipFrames*channels + i];
frames = keepFrames;
buffer = trimBuffer;
mSkip -= skipFrames;
LOG(PR_LOG_DEBUG, ("Opus decoder skipping %d frames", skipFrames));
}
int64_t discardPadding = 0;
r = nestegg_packet_discard_padding(aPacket, &discardPadding);
if (discardPadding > 0) {
CheckedInt64 discardFrames = UsecsToFrames(discardPadding * NS_PER_USEC, rate);
if (!discardFrames.isValid()) {
NS_WARNING("Int overflow in DiscardPadding");
return false;
}
int32_t keepFrames = frames - discardFrames.value();
if (keepFrames > 0) {
int samples = keepFrames * channels;
nsAutoArrayPtr<AudioDataValue> trimBuffer(new AudioDataValue[samples]);
for (int i = 0; i < samples; i++)
trimBuffer[i] = buffer[i];
frames = keepFrames;
buffer = trimBuffer;
} else {
LOG(PR_LOG_DEBUG, ("Opus decoder discarding whole packet"
" ( %d frames) as padding", frames));
return true;
}
}
// Apply the header gain if one was specified.
#ifdef MOZ_SAMPLE_TYPE_FLOAT32
if (mOpusParser->mGain != 1.0f) {
float gain = mOpusParser->mGain;
int samples = frames * channels;
for (int i = 0; i < samples; i++) {
buffer[i] *= gain;
}
}
#else
if (mOpusParser->mGain_Q16 != 65536) {
int64_t gain_Q16 = mOpusParser->mGain_Q16;
int samples = frames * channels;
for (int i = 0; i < samples; i++) {
int32_t val = static_cast<int32_t>((gain_Q16*buffer[i] + 32768)>>16);
buffer[i] = static_cast<AudioDataValue>(MOZ_CLIP_TO_15(val));
}
}
#endif
// More than 2 decoded channels must be downmixed to stereo.
if (channels > 2) {
// Opus doesn't provide a channel mapping for more than 8 channels,
// so we can't downmix more than that.
if (channels > 8)
return false;
OggReader::DownmixToStereo(buffer, channels, frames);
}
CheckedInt64 duration = FramesToUsecs(frames, rate);
@ -499,30 +699,25 @@ bool WebMReader::DecodeAudioPacket(nestegg_packet* aPacket, int64_t aOffset)
NS_WARNING("Int overflow converting WebM audio duration");
return false;
}
CheckedInt64 total_duration = FramesToUsecs(total_frames, rate);
if (!total_duration.isValid()) {
NS_WARNING("Int overflow converting WebM audio total_duration");
return false;
}
CheckedInt64 time = total_duration + tstamp_usecs;
CheckedInt64 time = tstamp_usecs;
if (!time.isValid()) {
NS_WARNING("Int overflow adding total_duration and tstamp_usecs");
nestegg_free_packet(aPacket);
return false;
};
total_frames += frames;
AudioQueue().Push(new AudioData(aOffset,
AudioQueue().Push(new AudioData(mDecoder->GetResource()->Tell(),
time.value(),
duration.value(),
frames,
buffer.forget(),
mChannels));
mAudioFrames += frames;
if (vorbis_synthesis_read(&mVorbisDsp, frames) != 0) {
return false;
}
#else
return false;
#endif /* MOZ_OPUS */
}
}

Просмотреть файл

@ -22,6 +22,10 @@
#include "vorbis/codec.h"
#endif
#ifdef MOZ_OPUS
#include "OpusParser.h"
#endif
namespace mozilla {
class WebMBufferedState;
@ -154,6 +158,11 @@ protected:
bool aEOS,
int64_t aGranulepos);
#ifdef MOZ_OPUS
// Setup opus decoder
bool InitOpusDecoder();
#endif
// Decode a nestegg packet of audio data. Push the audio data on the
// audio queue. Returns true when there's more audio to decode,
// false if the audio is finished, end of file has been reached,
@ -182,6 +191,14 @@ private:
uint32_t mPacketCount;
uint32_t mChannels;
#ifdef MOZ_OPUS
// Opus decoder state
nsAutoPtr<OpusParser> mOpusParser;
OpusMSDecoder *mOpusDecoder;
int mSkip; // Number of samples left to trim before playback.
#endif
// Queue of video and audio packets that have been read but not decoded. These
// must only be accessed from the state machine thread.
WebMPacketQueue mVideoPackets;
@ -197,6 +214,9 @@ private:
// Number of audio frames we've decoded since decoding began at mAudioStartMs.
uint64_t mAudioFrames;
// Number of nanoseconds that must be discarded from the start of the Stream.
uint64_t mCodecDelay;
// Parser state and computed offset-time mappings. Shared by multiple
// readers when decoder has been cloned. Main thread only.
nsRefPtr<WebMBufferedState> mBufferedState;
@ -211,6 +231,10 @@ private:
// Booleans to indicate if we have audio and/or video data
bool mHasVideo;
bool mHasAudio;
// Codec ID of audio track
int mAudioCodec;
};
} // namespace mozilla