Bug 831224: Added MP3 frame parser, r=padenot

MP3 streams consist of small frames, with each frame containing the audio data of a few hundred milliseconds. The actual duration of the encoded audio can among frames. Each frame consists of a 4-byte frame header, some optional extra information, and the audio data. The MP3 frame parser walks over the content of an MP3 stream, computes the duration of each frame from the frame header, and sums them up to the streams complete duration. The MP3 frame parser does not decode the actual audio data. --HG-- extra : rebase_source : 1b101d8f9bf73e62672933d0f5d20253d7b25491
2013-05-03 09:44:02 +02:00 · 2013-05-03 09:44:02 +02:00 · 4574d05406
--- a/content/media/omx/MP3FrameParser.cpp
+++ b/content/media/omx/MP3FrameParser.cpp
@ -0,0 +1,487 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #include <algorithm>
 #include "nsMemory.h"
 #include "MP3FrameParser.h"
 namespace mozilla {
 // An ID3Buffer contains data of an ID3v2 header. The supplied buffer must
 // point to an ID3 header and at least the size of ID_HEADER_LENGTH. Run the
 // Parse method to read in the header's values.
 class ID3Buffer
 {
 public:
  enum {
    ID3_HEADER_LENGTH = 10
  };
  ID3Buffer(const uint8_t* aBuffer, uint32_t aLength)
  : mBuffer(aBuffer),
    mLength(aLength),
    mSize(0)
  {
    MOZ_ASSERT(mBuffer || !mLength);
  }
  nsresult Parse();
  int64_t GetMP3Offset() const {
    return ID3_HEADER_LENGTH + mSize;
  }
 private:
  const uint8_t* mBuffer;
  uint32_t       mLength;
  uint32_t       mSize;
 };
 nsresult ID3Buffer::Parse()
 {
  NS_ENSURE_TRUE(mBuffer && mLength >= ID3_HEADER_LENGTH, NS_ERROR_INVALID_ARG);
  if ((mBuffer[0] != 'I') ||
      (mBuffer[1] != 'D') ||
      (mBuffer[2] != '3') ||
      (mBuffer[6] & 0x80) ||
      (mBuffer[7] & 0x80) ||
      (mBuffer[8] & 0x80) ||
      (mBuffer[9] & 0x80)) {
    return NS_ERROR_INVALID_ARG;
  }
  mSize = ((static_cast<uint32_t>(mBuffer[6])<<21) |
           (static_cast<uint32_t>(mBuffer[7])<<14) |
           (static_cast<uint32_t>(mBuffer[8])<<7)  |
            static_cast<uint32_t>(mBuffer[9]));
  return NS_OK;
 }
 // The MP3Buffer contains MP3 frame data. The supplied buffer must point
 // to a frame header. Call the method Parse to extract information from
 // the MP3 frame headers in the supplied buffer.
 class MP3Buffer
 {
 public:
  enum {
    MP3_HEADER_LENGTH   = 4,
    MP3_FRAMESIZE_CONST = 144000,
    MP3_DURATION_CONST  = 8000
  };
  MP3Buffer(const uint8_t* aBuffer, uint32_t aLength)
  : mBuffer(aBuffer),
    mLength(aLength),
    mDurationUs(0),
    mNumFrames(0),
    mBitRateSum(0),
    mFrameSizeSum(0),
    mTrailing(0)
  {
    MOZ_ASSERT(mBuffer || !mLength);
  }
  static const uint8_t* FindNextHeader(const uint8_t* aBuffer, uint32_t aLength);
  nsresult Parse();
  int64_t GetDuration() const {
    return mDurationUs;
  }
  int64_t GetNumberOfFrames() const {
    return mNumFrames;
  }
  int64_t GetBitRateSum() const {
    return mBitRateSum;
  }
  int64_t GetFrameSizeSum() const {
    return mFrameSizeSum;
  }
  int64_t GetTrailing() const {
    return mTrailing;
  }
 private:
  enum MP3FrameHeaderField {
    MP3_HDR_FIELD_SYNC,
    MP3_HDR_FIELD_VERSION,
    MP3_HDR_FIELD_LAYER,
    MP3_HDR_FIELD_BITRATE,
    MP3_HDR_FIELD_SAMPLERATE,
    MP3_HDR_FIELD_PADDING,
    MP3_HDR_FIELDS // Must be last enumerator value
  };
  enum {
    MP3_HDR_CONST_FRAMESYNC = 0x7ff,
    MP3_HDR_CONST_VERSION   = 3,
    MP3_HDR_CONST_LAYER     = 1
  };
  static uint32_t ExtractBits(uint32_t aValue, uint32_t aOffset,
                              uint32_t aBits);
  static uint32_t ExtractFrameHeaderField(uint32_t aHeader,
                                          enum MP3FrameHeaderField aField);
  static uint32_t ExtractFrameHeader(const uint8_t* aBuffer);
  static nsresult DecodeFrameHeader(const uint8_t* aBuffer,
                                          size_t* aFrameSize,
                                          uint32_t* aBitRate,
                                          uint64_t* aDuration);
  static const uint16_t sBitRate[16];
  static const uint16_t sSampleRate[4];
  const uint8_t* mBuffer;
  uint32_t       mLength;
  // The duration of this parsers data in milliseconds.
  int64_t mDurationUs;
  // The number of frames in the range.
  int64_t mNumFrames;
  // The sum of all frame's bit rates.
  int64_t mBitRateSum;
  // The sum of all frame's sizes in byte.
  int32_t mFrameSizeSum;
  // The number of trailing bytes.
  int32_t mTrailing;
 };
 const uint16_t MP3Buffer::sBitRate[16] = {
  0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0
 };
 const uint16_t MP3Buffer::sSampleRate[4] = {
  44100, 48000, 32000, 0
 };
 uint32_t MP3Buffer::ExtractBits(uint32_t aValue, uint32_t aOffset, uint32_t aBits)
 {
  return (aValue >> aOffset) & ((0x1ul << aBits) - 1);
 }
 uint32_t MP3Buffer::ExtractFrameHeaderField(uint32_t aHeader, enum MP3FrameHeaderField aField)
 {
  static const uint8_t sField[MP3_HDR_FIELDS][2] = {
    {21, 11}, {19, 2}, {17, 2}, {12, 4}, {10, 2}, {9, 1}
  };
  MOZ_ASSERT(aField < MP3_HDR_FIELDS);
  return ExtractBits(aHeader, sField[aField][0], sField[aField][1]);
 }
 uint32_t MP3Buffer::ExtractFrameHeader(const uint8_t* aBuffer)
 {
  MOZ_ASSERT(aBuffer);
  uint32_t header = (static_cast<uint32_t>(aBuffer[0])<<24) |
                    (static_cast<uint32_t>(aBuffer[1])<<16) |
                    (static_cast<uint32_t>(aBuffer[2])<<8)  |
                     static_cast<uint32_t>(aBuffer[3]);
  uint32_t frameSync = ExtractFrameHeaderField(header, MP3_HDR_FIELD_SYNC);
  uint32_t version = ExtractFrameHeaderField(header, MP3_HDR_FIELD_VERSION);
  uint32_t layer = ExtractFrameHeaderField(header, MP3_HDR_FIELD_LAYER);
  uint32_t bitRate = sBitRate[ExtractFrameHeaderField(header, MP3_HDR_FIELD_BITRATE)];
  uint32_t sampleRate = sSampleRate[ExtractFrameHeaderField(header, MP3_HDR_FIELD_SAMPLERATE)];
  // branch-less implementation of
  //
  //  if (fields-are-valid)
  //    return header;
  //  else
  //    return 0;
  //
  return (frameSync == uint32_t(MP3_HDR_CONST_FRAMESYNC)) *
         (version == uint32_t(MP3_HDR_CONST_VERSION)) *
         (layer == uint32_t(MP3_HDR_CONST_LAYER)) * !!bitRate * !!sampleRate * header;
 }
 const uint8_t* MP3Buffer::FindNextHeader(const uint8_t* aBuffer, uint32_t aLength)
 {
  MOZ_ASSERT(aBuffer || !aLength);
  // Find MP3's frame-sync marker while there are at least 4 bytes
  // left to contain the MP3 frame header
  while (aLength >= MP3_HEADER_LENGTH) {
    if (ExtractFrameHeader(aBuffer)) {
      break;
    }
    ++aBuffer;
    --aLength;
  }
  return aBuffer;
 }
 nsresult MP3Buffer::DecodeFrameHeader(const uint8_t* aBuffer,
                                      uint32_t* aFrameSize,
                                      uint32_t* aBitRate,
                                      uint64_t* aDuration)
 {
  uint32_t header = ExtractFrameHeader(aBuffer);
  if (!header) {
    return NS_ERROR_INVALID_ARG;
  }
  uint32_t bitRate = sBitRate[ExtractFrameHeaderField(header, MP3_HDR_FIELD_BITRATE)];
  uint32_t sampleRate = sSampleRate[ExtractFrameHeaderField(header, MP3_HDR_FIELD_SAMPLERATE)];
  uint32_t padding = ExtractFrameHeaderField(header, MP3_HDR_FIELD_PADDING);
  uint32_t frameSize = (uint64_t(MP3_FRAMESIZE_CONST) * bitRate) / sampleRate + padding;
  MOZ_ASSERT(aBitRate);
  *aBitRate = bitRate;
  MOZ_ASSERT(aFrameSize);
  *aFrameSize = frameSize;
  MOZ_ASSERT(aDuration);
  *aDuration = (uint64_t(MP3_DURATION_CONST) * frameSize) / bitRate;
  return NS_OK;
 }
 nsresult MP3Buffer::Parse()
 {
  // We walk over the newly arrived data and sum up the
  // bit rates, sizes, durations, etc. of the contained
  // MP3 frames.
  const uint8_t* buffer = mBuffer;
  uint32_t       length = mLength;
  while (length >= MP3_HEADER_LENGTH) {
    uint32_t frameSize;
    uint32_t bitRate;
    uint64_t duration;
    nsresult rv = DecodeFrameHeader(buffer, &frameSize, &bitRate, &duration);
    NS_ENSURE_SUCCESS(rv, rv);
    mBitRateSum += bitRate;
    mDurationUs += duration;
    ++mNumFrames;
    mFrameSizeSum += frameSize;
    if (frameSize <= length) {
      length -= frameSize;
    } else {
      length = 0;
    }
    buffer += frameSize;
  }
  mTrailing = length;
  return NS_OK;
 }
 MP3FrameParser::MP3FrameParser(int64_t aLength)
 : mBufferLength(0),
  mLock("MP3FrameParser.mLock"),
  mDurationUs(0),
  mBitRateSum(0),
  mNumFrames(0),
  mOffset(0),
  mUnhandled(0),
  mLength(aLength),
  mTrailing(0),
  mIsMP3(true)
 { }
 size_t MP3FrameParser::ParseInternalBuffer(const uint8_t* aBuffer, uint32_t aLength, int64_t aOffset)
 {
  if (mOffset != aOffset) {
    // If we don't append, we throw away our temporary buffer.
    mBufferLength = 0;
    return 0;
  }
  size_t copyLength = 0;
  if (mBufferLength || !mOffset) {
    // We have some data in our temporary buffer and append to it, or
    // we are at the beginning of the stream. We both cases, we append
    // some data to our temporary buffer and try to parse it.
    copyLength = std::min<size_t>(NS_ARRAY_LENGTH(mBuffer)-mBufferLength, aLength);
    memcpy(mBuffer+mBufferLength, aBuffer, copyLength*sizeof(*mBuffer));
    mBufferLength += copyLength;
  }
  if ((mBufferLength >= ID3Buffer::ID3_HEADER_LENGTH) && (mOffset < ID3Buffer::ID3_HEADER_LENGTH)) {
    // There might be an ID3 header at the very beginning of the stream.
    ID3Buffer id3Buffer(mBuffer, mBufferLength);
    nsresult rv = id3Buffer.Parse();
    if (rv == NS_OK) {
      mOffset += id3Buffer.GetMP3Offset()-(mBufferLength-copyLength);
      mBufferLength = 0;
    }
  }
  if (mBufferLength >= MP3Buffer::MP3_HEADER_LENGTH) {
    // Or there could be a regular frame header somewhere
    // in the stream.
    MP3Buffer mp3Buffer(mBuffer, mBufferLength);
    nsresult rv = mp3Buffer.Parse();
    if (rv == NS_OK) {
      mDurationUs += mp3Buffer.GetDuration();
      mBitRateSum += mp3Buffer.GetBitRateSum();
      mNumFrames  += mp3Buffer.GetNumberOfFrames();
      mOffset     += mp3Buffer.GetFrameSizeSum()-(mBufferLength-copyLength);
      mBufferLength = 0;
    }
  }
  if (mBufferLength) {
    // We have not been able to successfully parse the
    // content of the temporary buffer. If the buffer is
    // full already, the stream does not contain MP3.
    mOffset += copyLength;
    mIsMP3   = (mBufferLength < NS_ARRAY_LENGTH(mBuffer));
  } else {
    // We parsed the temporary buffer. The parser code
    // will update the input data.
    copyLength = 0;
  }
  if (mOffset > mLength) {
    mLength = mOffset;
  }
  return copyLength;
 }
 void MP3FrameParser::Parse(const uint8_t* aBuffer, uint32_t aLength, int64_t aOffset)
 {
  MutexAutoLock mon(mLock);
  // We first try to parse the remaining data from the last call that
  // is stored in an internal buffer.
  size_t bufferIncr = ParseInternalBuffer(aBuffer, aLength, aOffset);
  aBuffer += bufferIncr;
  aLength -= bufferIncr;
  aOffset += bufferIncr;
  // The number of attempts to parse the data. This should be 1 of we
  // append to the end of the existing data.
  int retries = 1;
  if (aOffset+aLength <= mOffset) {
    // We already processed this fragment.
    return;
  } else if (aOffset < mOffset) {
    // mOffset is within the new fragment, shorten range.
    aLength -= mOffset-aOffset;
    aBuffer += mOffset-aOffset;
    aOffset  = mOffset;
  } else if (aOffset > mOffset) {
    // Fragment comes after current position, store difference.
    mUnhandled += aOffset-mOffset;
    // We might start in the middle of a frame and have find the next
    // frame header. As our detection heuristics might return false
    // positives, we simply try multiple times. The current value comes
    // from experimentation with MP3 files. If you encounter false positives
    // and incorrectly parsed MP3 files, try incrementing this value.
    retries = 5;
  }
  uint32_t trailing = 0;
  while (retries) {
    MP3Buffer mp3Buffer(aBuffer, aLength);
    nsresult rv = mp3Buffer.Parse();
    if (rv != NS_OK) {
      --retries;
      if (!retries) {
        mIsMP3 = false;
        return;
      }
      // We might be in the middle of a frame, find next frame header
      const uint8_t *buffer = MP3Buffer::FindNextHeader(aBuffer+1, aLength-1);
      mUnhandled += buffer-aBuffer;
      mOffset     = aOffset + buffer-aBuffer;
      aLength    -= buffer-aBuffer;
      aBuffer     = buffer;
    } else {
      mDurationUs += mp3Buffer.GetDuration();
      mBitRateSum += mp3Buffer.GetBitRateSum();
      mNumFrames  += mp3Buffer.GetNumberOfFrames();
      mOffset     += mp3Buffer.GetFrameSizeSum();
      trailing = mp3Buffer.GetTrailing();
      retries = 0;
    }
  }
  if (trailing) {
    // Store trailing bytes in temporary buffer.
    MOZ_ASSERT(trailing < (NS_ARRAY_LENGTH(mBuffer)*sizeof(*mBuffer)));
    memcpy(mBuffer, aBuffer+(aLength-trailing), trailing);
    mBufferLength = trailing;
  }
  if (mOffset > mLength) {
    mLength = mOffset;
  }
 }
 void MP3FrameParser::NotifyDataArrived(const char* aBuffer, uint32_t aLength, int64_t aOffset)
 {
  Parse(reinterpret_cast<const uint8_t*>(aBuffer), aLength, aOffset);
 }
 int64_t MP3FrameParser::GetDuration()
 {
  MutexAutoLock mon(mLock);
  if (!mNumFrames) {
    return -1; // Not a single frame decoded yet
  }
  // Compute the duration of the unhandled fragments from
  // the average bitrate.
  int64_t avgBitRate = mBitRateSum / mNumFrames;
  NS_ENSURE_TRUE(avgBitRate > 0, mDurationUs);
  MOZ_ASSERT(mLength >= mOffset);
  int64_t unhandled = mUnhandled + (mLength-mOffset);
  return mDurationUs + (uint64_t(MP3Buffer::MP3_DURATION_CONST) * unhandled) / avgBitRate;
 }
 }
--- a/content/media/omx/MP3FrameParser.h
+++ b/content/media/omx/MP3FrameParser.h
@ -0,0 +1,76 @@
 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim:set ts=2 sw=2 sts=2 et cindent: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #include <stdint.h>
 #include "mozilla/Mutex.h"
 namespace mozilla {
 // A description of the MP3 format and its extensions is available at
 //
 //  http://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header
 //
 // The data in MP3 streams is split into small frames, with each frame
 // containing a fixed number of samples. The duration of a frame depends
 // on the frame's bit rate and sample rate. Both values can vary among
 // frames, so it is necessary to examine each individual frame of an MP3
 // stream to calculate the stream's overall duration.
 //
 // The MP3 frame parser extracts information from an MP3 data stream. It
 // accepts a range of frames of an MP3 stream as input, and parses all
 // frames for their duration. Callers can query the stream's overall
 // duration from the parser.
 //
 // Call the methods NotifyDataArrived or Parse to add new data. If you added
 // information for a certain stream position, you cannot go back to previous
 // positions. The parser will simply ignore the input. If you skip stream
 // positions, the duration of the related MP3 frames will be estimated from
 // the stream's average.
 //
 // The method GetDuration returns calculated duration of the stream, including
 // estimates for skipped ranges.
 //
 // All public methods are thread-safe.
 class MP3FrameParser
 {
 public:
  MP3FrameParser(int64_t aLength=-1);
  bool IsMP3() {
    MutexAutoLock mon(mLock);
    return mIsMP3;
  }
  void Parse(const uint8_t* aBuffer, uint32_t aLength, int64_t aOffset);
  void NotifyDataArrived(const char* aBuffer, uint32_t aLength, int64_t aOffset);
  int64_t GetDuration();
 private:
  size_t ParseInternalBuffer(const uint8_t* aBuffer, uint32_t aLength, int64_t aOffset);
  uint8_t  mBuffer[10];
  uint32_t mBufferLength;
  // A low-contention lock for protecting the parser results
  Mutex mLock;
  // All fields below are protected by mLock
  uint64_t mDurationUs;
  uint64_t mBitRateSum;
  uint64_t mNumFrames;
  int64_t  mOffset;
  int64_t  mUnhandled;
  int64_t  mLength;
  uint32_t mTrailing;
  // Contains the state of the MP3 detection
  bool mIsMP3;
 };
 }
--- a/content/media/omx/moz.build
+++ b/content/media/omx/moz.build
@ -14,6 +14,7 @@ EXPORTS += [
 CPP_SOURCES += [
    'MediaOmxDecoder.cpp',
    'MediaOmxReader.cpp',
    'MP3FrameParser.cpp',
    'OmxDecoder.cpp',
    'OMXCodecProxy.cpp',
 ]