Merge mozilla-central to autoland. a=merge CLOSED TREE

2018-11-01 11:38:01 +02:00 · 2018-11-01 11:38:01 +02:00 · 1970272fed
--- a/build/mobile/remoteautomation.py
+++ b/build/mobile/remoteautomation.py
@ -427,7 +427,7 @@ class RemoteAutomation(Automation):
            if stagedShutdown:
                # Trigger an ANR report with "kill -3" (SIGQUIT)
                try:
-                    self.device.pkill(self.procName, sig=3, attempts=1)
+                    self.device.pkill(self.procName, sig=3, attempts=1, root=True)
                except ADBTimeoutError:
                    raise
                except:  # NOQA: E722
@ -435,7 +435,7 @@ class RemoteAutomation(Automation):
                time.sleep(3)
                # Trigger a breakpad dump with "kill -6" (SIGABRT)
                try:
-                    self.device.pkill(self.procName, sig=6, attempts=1)
+                    self.device.pkill(self.procName, sig=6, attempts=1, root=True)
                except ADBTimeoutError:
                    raise
                except:  # NOQA: E722
@ -447,15 +447,30 @@ class RemoteAutomation(Automation):
                        print("%s still alive after SIGABRT: waiting..." % self.procName)
                        time.sleep(5)
                    else:
-                        return
+                        break
                    retries += 1
-                try:
-                    self.device.pkill(self.procName, sig=9, attempts=1)
-                except ADBTimeoutError:
-                    raise
-                except:  # NOQA: E722
-                    print("%s still alive after SIGKILL!" % self.procName)
+                if self.device.process_exist(self.procName):
+                    try:
+                        self.device.pkill(self.procName, sig=9, attempts=1, root=True)
+                    except ADBTimeoutError:
+                        raise
+                    except:  # NOQA: E722
+                        print("%s still alive after SIGKILL!" % self.procName)
                if self.device.process_exist(self.procName):
                    self.device.stop_application(self.procName)
            else:
                self.device.stop_application(self.procName)
+            # Test harnesses use the MOZ_CRASHREPORTER environment variables to suppress
+            # the interactive crash reporter, but that may not always be effective;
+            # check for and cleanup errant crashreporters.
+            crashreporter = "%s.CrashReporter" % self.procName
+            if self.device.process_exist(crashreporter):
+                print("Warning: %s unexpectedly found running. Killing..." % crashreporter)
+                try:
+                    self.device.pkill(crashreporter, root=True)
+                except ADBTimeoutError:
+                    raise
+                except:  # NOQA: E722
+                    pass
+            if self.device.process_exist(crashreporter):
+                print("ERROR: %s still running!!" % crashreporter)
--- a/config/external/moz.build
+++ b/config/external/moz.build
@ -42,6 +42,9 @@ if CONFIG['MOZ_AV1']:
 if not CONFIG['MOZ_SYSTEM_PNG']:
    external_dirs += ['media/libpng']

+if not CONFIG['MOZ_SYSTEM_WEBP']:
+    external_dirs += ['media/libwebp']
+
 if CONFIG['CPU_ARCH'] == 'arm':
    external_dirs += ['media/openmax_dl/dl']

--- a/config/system-headers.mozbuild
+++ b/config/system-headers.mozbuild
@ -1255,6 +1255,14 @@ if CONFIG['MOZ_SYSTEM_PNG']:
        'png.h',
    ]

+if CONFIG['MOZ_SYSTEM_WEBP']:
+    system_headers += [
+        'webp/decode.h',
+        'webp/demux.h',
+        'webp/mux_types.h',
+        'webp/types.h',
+    ]
+
 if CONFIG['MOZ_SYSTEM_ZLIB']:
    system_headers += [
        'zlib.h',
--- a/dom/bindings/BindingUtils.cpp
+++ b/dom/bindings/BindingUtils.cpp
@ -1070,6 +1070,9 @@ CreateInterfaceObjects(JSContext* cx, JS::Handle<JSObject*> global,
                                     isChrome ? chromeOnlyProperties : nullptr,
                                     unscopableNames, toStringTag, isGlobal);
    if (!proto) {
+      if (name && !strcmp(name, "Document")) {
+        MOZ_CRASH("Bug 1405521/1488480: CreateInterfacePrototypeObject failed for Document.prototype");
+      }
      return;
    }

@ -1088,6 +1091,9 @@ CreateInterfaceObjects(JSContext* cx, JS::Handle<JSObject*> global,
                                      isChrome,
                                      defineOnGlobal);
    if (!interface) {
+      if (name && !strcmp(name, "Document")) {
+        MOZ_CRASH("Bug 1405521/1488480: CreateInterfaceObject failed for Document");
+      }
      if (protoCache) {
        // If we fail we need to make sure to clear the value of protoCache we
        // set above.
--- a/dom/bindings/Codegen.py
+++ b/dom/bindings/Codegen.py
@ -3113,6 +3113,9 @@ class CGCreateInterfaceObjectsMethod(CGAbstractMethod):
                                        ${name}, aDefineOnGlobal,
                                        ${unscopableNames},
                                        ${isGlobal});
+            if (protoCache && !*protoCache) {
+              $*{maybeCrash}
+            }
            """,
            protoClass=protoClass,
            parentProto=parentProto,
@ -3127,7 +3130,8 @@ class CGCreateInterfaceObjectsMethod(CGAbstractMethod):
            chromeProperties=chromeProperties,
            name='"' + self.descriptor.interface.identifier.name + '"' if needInterfaceObject else "nullptr",
            unscopableNames="unscopableNames" if self.haveUnscopables else "nullptr",
-            isGlobal=toStringBool(isGlobal))
+            isGlobal=toStringBool(isGlobal),
+            maybeCrash=maybecrash("dom::CreateInterfaceObjects failed for Document"))

        # If we fail after here, we must clear interface and prototype caches
        # using this code: intermediate failure must not expose the interface in
--- a/dom/media/test/mochitest.ini
+++ b/dom/media/test/mochitest.ini
@ -709,19 +709,19 @@ skip-if = true # bug 475110 - disabled since we don't play Wave files standalone
 [test_autoplay_contentEditable.html]
 skip-if = android_version == '17' || android_version == '22' # android(bug 1232305, bug 1232318, bug 1372457)
 [test_autoplay_policy.html]
-skip-if = android_version == '23' # bug 1424903
+skip-if = android_version >= '23' # bug 1424903
 [test_autoplay_policy_activation.html]
-skip-if = android_version == '23' # bug 1424903
+skip-if = android_version >= '23' # bug 1424903
 [test_autoplay_policy_eventdown_activation.html]
-skip-if = android_version == '23' # bug 1424903
+skip-if = android_version >= '23' # bug 1424903
 [test_autoplay_policy_key_blacklist.html]
-skip-if = android_version == '23' || (verify && debug && (os == 'win')) # bug 1424903
+skip-if = android_version >= '23' || (verify && debug && (os == 'win')) # bug 1424903
 [test_autoplay_policy_unmute_pauses.html]
-skip-if = android_version == '23' # bug 1424903
+skip-if = android_version >= '23' # bug 1424903
 [test_autoplay_policy_play_before_loadedmetadata.html]
-skip-if = android_version == '23' # bug 1424903
+skip-if = android_version >= '23' # bug 1424903
 [test_autoplay_policy_permission.html]
-skip-if = android_version == '23' # bug 1424903
+skip-if = android_version >= '23' # bug 1424903
 [test_buffered.html]
 skip-if = android_version == '22' # bug 1308388, android(bug 1232305)
 [test_bug448534.html]
@ -757,7 +757,7 @@ skip-if = (android_version == '23' && debug) || (android_version == '25' && debu
 skip-if = (android_version == '23' && debug) || (android_version == '25' && debug) # android(bug 1232305)
 [test_bug1018933.html]
 [test_bug1113600.html]
-skip-if = android_version == '19' || android_version == '22' # bug 1198168, android(bug 1232305)
+skip-if = android_version >= '19' # bug 1198168, android(bug 1232305)
 tags=capturestream
 [test_bug1242338.html]
 skip-if = toolkit == 'android' # bug 1306916, bug 1329566, android(bug 1232305)
@ -1088,7 +1088,7 @@ skip-if = toolkit == 'android' || (verify && debug && os == 'win') # android(bug
 [test_video_dimensions.html]
 skip-if = toolkit == 'android' # bug 1298238, bug 1304535, android(bug 1232305)
 [test_resolution_change.html]
-skip-if = android_version == '19' # bug 1393866
+skip-if = android_version >= '19' # bug 1393866
 tags=capturestream
 [test_resume.html]
 skip-if = true # bug 1021673
@ -1286,7 +1286,7 @@ tags = suspend
 [test_temporary_file_blob_video_plays.html]
 skip-if = toolkit == 'android' # android(bug 1232305)
 [test_videoPlaybackQuality_totalFrames.html]
-skip-if = (os == 'win' || android_version == '19') # bug 1374189
+skip-if = (os == 'win' || android_version >= '19') # bug 1374189

 [test_video_gzip_encoding.html]

--- a/dom/workers/RuntimeService.cpp
+++ b/dom/workers/RuntimeService.cpp
@ -2026,10 +2026,6 @@ RuntimeService::Cleanup()
 {
  AssertIsOnMainThread();

-  if (!mShuttingDown) {
-    Shutdown();
-  }
-
  nsCOMPtr<nsIObserverService> obs = services::GetObserverService();
  NS_WARNING_ASSERTION(obs, "Failed to get observer service?!");

--- a/gfx/layers/FrameMetrics.h
+++ b/gfx/layers/FrameMetrics.h
@ -276,14 +276,28 @@ public:
    mDoSmoothScroll = aOther.mDoSmoothScroll;
  }

-  void ApplyRelativeScrollUpdateFrom(const FrameMetrics& aOther)
+  /**
+   * Applies the relative scroll offset update contained in aOther to the
+   * scroll offset contained in this. The scroll delta is clamped to the
+   * scrollable region.
+   *
+   * @returns The clamped scroll offset delta that was applied
+   */
+  CSSPoint ApplyRelativeScrollUpdateFrom(const FrameMetrics& aOther)
  {
    MOZ_ASSERT(aOther.IsRelative());
+    CSSPoint origin = mScrollOffset;
    CSSPoint delta = (aOther.mScrollOffset - aOther.mBaseScrollOffset);
    ClampAndSetScrollOffset(mScrollOffset + delta);
    mScrollGeneration = aOther.mScrollGeneration;
+    return mScrollOffset - origin;
  }

+  /**
+   * Applies the relative scroll offset update contained in aOther to the
+   * smooth scroll destination offset contained in this. The scroll delta is
+   * clamped to the scrollable region.
+   */
  void ApplyRelativeSmoothScrollUpdateFrom(const FrameMetrics& aOther)
  {
    MOZ_ASSERT(aOther.IsRelative());
--- a/gfx/layers/apz/src/AsyncPanZoomAnimation.h
+++ b/gfx/layers/apz/src/AsyncPanZoomAnimation.h
@ -31,6 +31,19 @@ public:
  virtual bool DoSample(FrameMetrics& aFrameMetrics,
                        const TimeDuration& aDelta) = 0;

+  /**
+   * Attempt to apply a translation to the animation in response to content
+   * providing a relative scroll offset update.
+   *
+   * @param aShiftDelta the amount to translate the animation in app units
+   * @returns Whether the animation was able to translate. If false, the
+   *    animation must be canceled.
+   */
+  virtual bool ApplyContentShift(const CSSPoint& aShiftDelta)
+  {
+    return false;
+  }
+
  bool Sample(FrameMetrics& aFrameMetrics,
              const TimeDuration& aDelta) {
    // In some situations, particularly when handoff is involved, it's possible
--- a/gfx/layers/apz/src/AsyncPanZoomController.cpp
+++ b/gfx/layers/apz/src/AsyncPanZoomController.cpp
@ -4380,6 +4380,7 @@ void AsyncPanZoomController::NotifyLayersUpdated(const ScrollMetadata& aScrollMe
      // becomes incorrect for the purposes of calculating the LD transform. To
      // correct this we need to update mExpectedGeckoMetrics to be the
      // last thing we know was painted by Gecko.
+      Maybe<CSSPoint> relativeDelta;
      if (gfxPrefs::APZRelativeUpdate() && aLayerMetrics.IsRelative()) {
        APZC_LOG("%p relative updating scroll offset from %s by %s\n", this,
          ToString(Metrics().GetScrollOffset()).c_str(),
@ -4395,7 +4396,7 @@ void AsyncPanZoomController::NotifyLayersUpdated(const ScrollMetadata& aScrollMe
          userAction = true;
        }

-        Metrics().ApplyRelativeScrollUpdateFrom(aLayerMetrics);
+        relativeDelta = Some(Metrics().ApplyRelativeScrollUpdateFrom(aLayerMetrics));
      } else {
        APZC_LOG("%p updating scroll offset from %s to %s\n", this,
          ToString(Metrics().GetScrollOffset()).c_str(),
@ -4408,10 +4409,17 @@ void AsyncPanZoomController::NotifyLayersUpdated(const ScrollMetadata& aScrollMe
      mCompositedScrollOffset = Metrics().GetScrollOffset();
      mExpectedGeckoMetrics = aLayerMetrics;

-      // Cancel the animation (which might also trigger a repaint request)
-      // after we update the scroll offset above. Otherwise we can be left
-      // in a state where things are out of sync.
-      CancelAnimation();
+      // If we have applied a relative scroll update and a scroll animation is
+      // happening, attempt to apply a content shift and preserve the
+      // animation.
+      if (!mAnimation ||
+          relativeDelta.isNothing() ||
+          !mAnimation->ApplyContentShift(relativeDelta.value())) {
+        // Cancel the animation (which might also trigger a repaint request)
+        // after we update the scroll offset above. Otherwise we can be left
+        // in a state where things are out of sync.
+        CancelAnimation();
+      }

      // Since the scroll offset has changed, we need to recompute the
      // displayport margins and send them to layout. Otherwise there might be
--- a/gfx/layers/apz/src/AutoscrollAnimation.h
+++ b/gfx/layers/apz/src/AutoscrollAnimation.h
@ -22,6 +22,13 @@ public:

  bool DoSample(FrameMetrics& aFrameMetrics, const TimeDuration& aDelta) override;

+  bool ApplyContentShift(const CSSPoint& aShiftDelta) override
+  {
+    // Autoscroll works using screen space coordinates, so there's no work we
+    // need to do to handle a content shift
+    return true;
+  }
+
  void Cancel(CancelAnimationFlags aFlags) override;
 private:
  AsyncPanZoomController& mApzc;
--- a/gfx/layers/apz/src/GenericScrollAnimation.cpp
+++ b/gfx/layers/apz/src/GenericScrollAnimation.cpp
@ -109,5 +109,12 @@ GenericScrollAnimation::DoSample(FrameMetrics& aFrameMetrics, const TimeDuration
  return !finished;
 }

+bool
+GenericScrollAnimation::ApplyContentShift(const CSSPoint& aShiftDelta)
+{
+  mAnimationPhysics->ApplyContentShift(aShiftDelta);
+  return true;
+}
+
 } // namespace layers
 } // namespace mozilla
--- a/gfx/layers/apz/src/GenericScrollAnimation.h
+++ b/gfx/layers/apz/src/GenericScrollAnimation.h
@ -28,6 +28,8 @@ public:

  bool DoSample(FrameMetrics& aFrameMetrics, const TimeDuration& aDelta) override;

+  bool ApplyContentShift(const CSSPoint& aShiftDelta) override;
+
  void UpdateDelta(TimeStamp aTime,
                   const nsPoint& aDelta,
                   const nsSize& aCurrentVelocity);
--- a/gfx/thebes/gfxPrefs.h
+++ b/gfx/thebes/gfxPrefs.h
@ -570,6 +570,7 @@ private:
  DECL_GFX_PREF(Live, "image.mem.volatile.min_threshold_kb",   ImageMemVolatileMinThresholdKB, int32_t, -1);
  DECL_GFX_PREF(Once, "image.multithreaded_decoding.limit",    ImageMTDecodingLimit, int32_t, -1);
  DECL_GFX_PREF(Once, "image.multithreaded_decoding.idle_timeout", ImageMTDecodingIdleTimeout, int32_t, -1);
+  DECL_GFX_PREF(Live, "image.webp.enabled",                    ImageWebPEnabled, bool, false);

  DECL_GFX_PREF(Once, "layers.acceleration.disabled",          LayersAccelerationDisabledDoNotUseDirectly, bool, false);
  DECL_GFX_PREF(Live, "layers.acceleration.draw-fps",          LayersDrawFPS, bool, false);
--- a/image/DecoderFactory.cpp
+++ b/image/DecoderFactory.cpp
@ -5,6 +5,7 @@

 #include "DecoderFactory.h"

+#include "gfxPrefs.h"
 #include "nsMimeTypes.h"
 #include "mozilla/RefPtr.h"

@ -19,6 +20,7 @@
 #include "nsBMPDecoder.h"
 #include "nsICODecoder.h"
 #include "nsIconDecoder.h"
+#include "nsWebPDecoder.h"

 namespace mozilla {

@ -67,6 +69,11 @@ DecoderFactory::GetDecoderType(const char* aMimeType)
  // Icon
  } else if (!strcmp(aMimeType, IMAGE_ICON_MS)) {
    type = DecoderType::ICON;
+
+  // WebP
+  } else if (!strcmp(aMimeType, IMAGE_WEBP) &&
+             gfxPrefs::ImageWebPEnabled()) {
+    type = DecoderType::WEBP;
  }

  return type;
@ -102,6 +109,9 @@ DecoderFactory::GetDecoder(DecoderType aType,
    case DecoderType::ICON:
      decoder = new nsIconDecoder(aImage);
      break;
+    case DecoderType::WEBP:
+      decoder = new nsWebPDecoder(aImage);
+      break;
    default:
      MOZ_ASSERT_UNREACHABLE("Unknown decoder type");
  }
@ -182,7 +192,8 @@ DecoderFactory::CreateAnimationDecoder(DecoderType aType,
    return NS_ERROR_INVALID_ARG;
  }

-  MOZ_ASSERT(aType == DecoderType::GIF || aType == DecoderType::PNG,
+  MOZ_ASSERT(aType == DecoderType::GIF || aType == DecoderType::PNG ||
+             aType == DecoderType::WEBP,
             "Calling CreateAnimationDecoder for non-animating DecoderType");

  // Create an anonymous decoder. Interaction with the SurfaceCache and the
--- a/image/DecoderFactory.h
+++ b/image/DecoderFactory.h
@ -37,6 +37,7 @@ enum class DecoderType
  BMP,
  ICO,
  ICON,
+  WEBP,
  UNKNOWN
 };

--- a/image/SourceBuffer.h
+++ b/image/SourceBuffer.h
@ -175,6 +175,14 @@ public:
    return mState == READY ? mData.mIterating.mNextReadLength : 0;
  }

+  /// If we're ready to read, returns whether or not everything available thus
+  /// far has been in the same contiguous buffer.
+  bool IsContiguous() const
+  {
+    MOZ_ASSERT(mState == READY, "Calling IsContiguous() in the wrong state");
+    return mState == READY ? mData.mIterating.mChunk == 0 : false;
+  }
+
  /// @return a count of the chunks we've advanced through.
  uint32_t ChunkCount() const { return mChunkCount; }

--- a/image/build/nsImageModule.cpp
+++ b/image/build/nsImageModule.cpp
@ -82,6 +82,7 @@ static const mozilla::Module::CategoryEntry kImageCategories[] = {
  { "Gecko-Content-Viewers", IMAGE_PNG, "@mozilla.org/content/document-loader-factory;1" },
  { "Gecko-Content-Viewers", IMAGE_APNG, "@mozilla.org/content/document-loader-factory;1" },
  { "Gecko-Content-Viewers", IMAGE_X_PNG, "@mozilla.org/content/document-loader-factory;1" },
+  { "Gecko-Content-Viewers", IMAGE_WEBP, "@mozilla.org/content/document-loader-factory;1" },
  { "content-sniffing-services", "@mozilla.org/image/loader;1", "@mozilla.org/image/loader;1" },
  { nullptr }
 };
--- a/image/decoders/moz.build
+++ b/image/decoders/moz.build
@ -28,6 +28,7 @@ UNIFIED_SOURCES += [
    'nsIconDecoder.cpp',
    'nsJPEGDecoder.cpp',
    'nsPNGDecoder.cpp',
+    'nsWebPDecoder.cpp',
 ]

 include('/ipc/chromium/chromium-config.mozbuild')
--- a/image/decoders/nsWebPDecoder.cpp
+++ b/image/decoders/nsWebPDecoder.cpp
@ -0,0 +1,557 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ImageLogging.h" // Must appear first
+#include "nsWebPDecoder.h"
+
+#include "RasterImage.h"
+#include "SurfacePipeFactory.h"
+
+using namespace mozilla::gfx;
+
+namespace mozilla {
+namespace image {
+
+static LazyLogModule sWebPLog("WebPDecoder");
+
+nsWebPDecoder::nsWebPDecoder(RasterImage* aImage)
+  : Decoder(aImage)
+  , mDecoder(nullptr)
+  , mBlend(BlendMethod::OVER)
+  , mDisposal(DisposalMethod::KEEP)
+  , mTimeout(FrameTimeout::Forever())
+  , mFormat(SurfaceFormat::B8G8R8X8)
+  , mLastRow(0)
+  , mCurrentFrame(0)
+  , mData(nullptr)
+  , mLength(0)
+  , mIteratorComplete(false)
+  , mNeedDemuxer(true)
+  , mGotColorProfile(false)
+  , mInProfile(nullptr)
+  , mTransform(nullptr)
+{
+  MOZ_LOG(sWebPLog, LogLevel::Debug,
+      ("[this=%p] nsWebPDecoder::nsWebPDecoder", this));
+}
+
+nsWebPDecoder::~nsWebPDecoder()
+{
+  MOZ_LOG(sWebPLog, LogLevel::Debug,
+      ("[this=%p] nsWebPDecoder::~nsWebPDecoder", this));
+  if (mDecoder) {
+    WebPIDelete(mDecoder);
+    WebPFreeDecBuffer(&mBuffer);
+  }
+  if (mInProfile) {
+    // mTransform belongs to us only if mInProfile is non-null
+    if (mTransform) {
+      qcms_transform_release(mTransform);
+    }
+    qcms_profile_release(mInProfile);
+  }
+}
+
+LexerResult
+nsWebPDecoder::ReadData()
+{
+  MOZ_ASSERT(mData);
+  MOZ_ASSERT(mLength > 0);
+
+  WebPDemuxer* demuxer = nullptr;
+  bool complete = mIteratorComplete;
+
+  if (mNeedDemuxer) {
+    WebPDemuxState state;
+    WebPData fragment;
+    fragment.bytes = mData;
+    fragment.size = mLength;
+
+    demuxer = WebPDemuxPartial(&fragment, &state);
+    if (state == WEBP_DEMUX_PARSE_ERROR) {
+      MOZ_LOG(sWebPLog, LogLevel::Error,
+          ("[this=%p] nsWebPDecoder::ReadData -- demux parse error\n", this));
+      WebPDemuxDelete(demuxer);
+      return LexerResult(TerminalState::FAILURE);
+    }
+
+    if (state == WEBP_DEMUX_PARSING_HEADER) {
+      WebPDemuxDelete(demuxer);
+      return LexerResult(Yield::NEED_MORE_DATA);
+    }
+
+    if (!demuxer) {
+      MOZ_LOG(sWebPLog, LogLevel::Error,
+          ("[this=%p] nsWebPDecoder::ReadData -- no demuxer\n", this));
+      return LexerResult(TerminalState::FAILURE);
+    }
+
+    complete = complete || state == WEBP_DEMUX_DONE;
+  }
+
+  LexerResult rv(TerminalState::FAILURE);
+  if (!HasSize()) {
+    rv = ReadHeader(demuxer, complete);
+  } else {
+    rv = ReadPayload(demuxer, complete);
+  }
+
+  WebPDemuxDelete(demuxer);
+  return rv;
+}
+
+LexerResult
+nsWebPDecoder::DoDecode(SourceBufferIterator& aIterator, IResumable* aOnResume)
+{
+  MOZ_ASSERT(!HasError(), "Shouldn't call DoDecode after error!");
+
+  SourceBufferIterator::State state = SourceBufferIterator::COMPLETE;
+  if (!mIteratorComplete) {
+    state = aIterator.Advance(SIZE_MAX);
+
+    // We need to remember since we can't advance a complete iterator.
+    mIteratorComplete = state == SourceBufferIterator::COMPLETE;
+  }
+
+  switch (state) {
+    case SourceBufferIterator::READY:
+      if (!aIterator.IsContiguous()) {
+        // We need to buffer. This should be rare, but expensive.
+        break;
+      }
+      if (!mData) {
+        // For as long as we hold onto an iterator, we know the data pointers
+        // to the chunks cannot change underneath us, so save the pointer to
+        // the first block.
+        MOZ_ASSERT(mLength == 0);
+        mData = reinterpret_cast<const uint8_t*>(aIterator.Data());
+      }
+      mLength += aIterator.Length();
+      return ReadData();
+    case SourceBufferIterator::COMPLETE:
+      return ReadData();
+    default:
+      MOZ_LOG(sWebPLog, LogLevel::Error,
+          ("[this=%p] nsWebPDecoder::DoDecode -- bad state\n", this));
+      return LexerResult(TerminalState::FAILURE);
+  }
+
+  // We need to buffer. If we have no data buffered, we need to get everything
+  // from the first chunk of the source buffer before appending the new data.
+  if (mBufferedData.empty()) {
+    MOZ_ASSERT(mData);
+    MOZ_ASSERT(mLength > 0);
+
+    if (!mBufferedData.append(mData, mLength)) {
+      MOZ_LOG(sWebPLog, LogLevel::Error,
+          ("[this=%p] nsWebPDecoder::DoDecode -- oom, initialize %zu\n",
+           this, mLength));
+      return LexerResult(TerminalState::FAILURE);
+    }
+
+    MOZ_LOG(sWebPLog, LogLevel::Debug,
+        ("[this=%p] nsWebPDecoder::DoDecode -- buffered %zu bytes\n",
+         this, mLength));
+  }
+
+  // Append the incremental data from the iterator.
+  if (!mBufferedData.append(aIterator.Data(), aIterator.Length())) {
+    MOZ_LOG(sWebPLog, LogLevel::Error,
+        ("[this=%p] nsWebPDecoder::DoDecode -- oom, append %zu on %zu\n",
+         this, aIterator.Length(), mBufferedData.length()));
+    return LexerResult(TerminalState::FAILURE);
+  }
+
+  MOZ_LOG(sWebPLog, LogLevel::Debug,
+      ("[this=%p] nsWebPDecoder::DoDecode -- buffered %zu -> %zu bytes\n",
+       this, aIterator.Length(), mBufferedData.length()));
+  mData = mBufferedData.begin();
+  mLength = mBufferedData.length();
+  return ReadData();
+}
+
+nsresult
+nsWebPDecoder::CreateFrame(const nsIntRect& aFrameRect)
+{
+  MOZ_ASSERT(HasSize());
+  MOZ_ASSERT(!mDecoder);
+
+  MOZ_LOG(sWebPLog, LogLevel::Debug,
+      ("[this=%p] nsWebPDecoder::CreateFrame -- frame %u, %d x %d\n",
+       this, mCurrentFrame, aFrameRect.width, aFrameRect.height));
+
+  // If this is our first frame in an animation and it doesn't cover the
+  // full frame, then we are transparent even if there is no alpha
+  if (mCurrentFrame == 0 && !aFrameRect.IsEqualEdges(FullFrame())) {
+    MOZ_ASSERT(HasAnimation());
+    mFormat = SurfaceFormat::B8G8R8A8;
+    PostHasTransparency();
+  }
+
+  WebPInitDecBuffer(&mBuffer);
+  mBuffer.colorspace = MODE_RGBA;
+
+  mDecoder = WebPINewDecoder(&mBuffer);
+  if (!mDecoder) {
+    MOZ_LOG(sWebPLog, LogLevel::Error,
+        ("[this=%p] nsWebPDecoder::CreateFrame -- create decoder error\n",
+         this));
+    return NS_ERROR_FAILURE;
+  }
+
+  SurfacePipeFlags pipeFlags = SurfacePipeFlags();
+
+  if (ShouldBlendAnimation()) {
+    pipeFlags |= SurfacePipeFlags::BLEND_ANIMATION;
+  }
+
+  AnimationParams animParams {
+    aFrameRect, mTimeout, mCurrentFrame, mBlend, mDisposal
+  };
+
+  Maybe<SurfacePipe> pipe = SurfacePipeFactory::CreateSurfacePipe(this,
+      Size(), OutputSize(), aFrameRect, mFormat, Some(animParams), pipeFlags);
+  if (!pipe) {
+    MOZ_LOG(sWebPLog, LogLevel::Error,
+        ("[this=%p] nsWebPDecoder::CreateFrame -- no pipe\n", this));
+    return NS_ERROR_FAILURE;
+  }
+
+  mPipe = std::move(*pipe);
+  return NS_OK;
+}
+
+void
+nsWebPDecoder::EndFrame()
+{
+  MOZ_ASSERT(HasSize());
+  MOZ_ASSERT(mDecoder);
+
+  auto opacity = mFormat == SurfaceFormat::B8G8R8A8
+                 ? Opacity::SOME_TRANSPARENCY : Opacity::FULLY_OPAQUE;
+
+  MOZ_LOG(sWebPLog, LogLevel::Debug,
+      ("[this=%p] nsWebPDecoder::EndFrame -- frame %u, opacity %d, "
+       "disposal %d, timeout %d, blend %d\n",
+       this, mCurrentFrame, (int)opacity, (int)mDisposal,
+       mTimeout.AsEncodedValueDeprecated(), (int)mBlend));
+
+  PostFrameStop(opacity);
+  WebPIDelete(mDecoder);
+  WebPFreeDecBuffer(&mBuffer);
+  mDecoder = nullptr;
+  mLastRow = 0;
+  ++mCurrentFrame;
+}
+
+void
+nsWebPDecoder::ApplyColorProfile(const char* aProfile, size_t aLength)
+{
+  MOZ_ASSERT(!mGotColorProfile);
+  mGotColorProfile = true;
+
+  if (GetSurfaceFlags() & SurfaceFlags::NO_COLORSPACE_CONVERSION) {
+    return;
+  }
+
+  auto mode = gfxPlatform::GetCMSMode();
+  if (mode == eCMSMode_Off || (mode == eCMSMode_TaggedOnly && !aProfile)) {
+    return;
+  }
+
+  if (!aProfile || !gfxPlatform::GetCMSOutputProfile()) {
+    MOZ_LOG(sWebPLog, LogLevel::Debug,
+      ("[this=%p] nsWebPDecoder::ApplyColorProfile -- not tagged or no output "
+       "profile , use sRGB transform\n", this));
+    mTransform = gfxPlatform::GetCMSRGBATransform();
+    return;
+  }
+
+  mInProfile = qcms_profile_from_memory(aProfile, aLength);
+  if (!mInProfile) {
+    MOZ_LOG(sWebPLog, LogLevel::Error,
+      ("[this=%p] nsWebPDecoder::ApplyColorProfile -- bad color profile\n",
+       this));
+    return;
+  }
+
+  // Calculate rendering intent.
+  int intent = gfxPlatform::GetRenderingIntent();
+  if (intent == -1) {
+    intent = qcms_profile_get_rendering_intent(mInProfile);
+  }
+
+  // Create the color management transform.
+  mTransform = qcms_transform_create(mInProfile,
+                                     QCMS_DATA_RGBA_8,
+                                     gfxPlatform::GetCMSOutputProfile(),
+                                     QCMS_DATA_RGBA_8,
+                                     (qcms_intent)intent);
+  MOZ_LOG(sWebPLog, LogLevel::Debug,
+    ("[this=%p] nsWebPDecoder::ApplyColorProfile -- use tagged "
+     "transform\n", this));
+}
+
+LexerResult
+nsWebPDecoder::ReadHeader(WebPDemuxer* aDemuxer,
+                          bool aIsComplete)
+{
+  MOZ_ASSERT(aDemuxer);
+
+  MOZ_LOG(sWebPLog, LogLevel::Debug,
+      ("[this=%p] nsWebPDecoder::ReadHeader -- %zu bytes\n", this, mLength));
+
+  uint32_t flags = WebPDemuxGetI(aDemuxer, WEBP_FF_FORMAT_FLAGS);
+
+  if (!IsMetadataDecode() && !mGotColorProfile) {
+    if (flags & WebPFeatureFlags::ICCP_FLAG) {
+      WebPChunkIterator iter;
+      if (!WebPDemuxGetChunk(aDemuxer, "ICCP", 1, &iter)) {
+        return aIsComplete ? LexerResult(TerminalState::FAILURE)
+                           : LexerResult(Yield::NEED_MORE_DATA);
+      }
+
+      ApplyColorProfile(reinterpret_cast<const char*>(iter.chunk.bytes),
+                        iter.chunk.size);
+      WebPDemuxReleaseChunkIterator(&iter);
+    } else {
+      ApplyColorProfile(nullptr, 0);
+    }
+  }
+
+  if (flags & WebPFeatureFlags::ANIMATION_FLAG) {
+    // A metadata decode expects to get the correct first frame timeout which
+    // sadly is not provided by the normal WebP header parsing.
+    WebPIterator iter;
+    if (!WebPDemuxGetFrame(aDemuxer, 1, &iter)) {
+      return aIsComplete ? LexerResult(TerminalState::FAILURE)
+                         : LexerResult(Yield::NEED_MORE_DATA);
+    }
+
+    PostIsAnimated(FrameTimeout::FromRawMilliseconds(iter.duration));
+    WebPDemuxReleaseIterator(&iter);
+  } else {
+    // Single frames don't need a demuxer to be created.
+    mNeedDemuxer = false;
+  }
+
+  uint32_t width = WebPDemuxGetI(aDemuxer, WEBP_FF_CANVAS_WIDTH);
+  uint32_t height = WebPDemuxGetI(aDemuxer, WEBP_FF_CANVAS_HEIGHT);
+  if (width > INT32_MAX || height > INT32_MAX) {
+    return LexerResult(TerminalState::FAILURE);
+  }
+
+  PostSize(width, height);
+
+  bool alpha = flags & WebPFeatureFlags::ALPHA_FLAG;
+  if (alpha) {
+    mFormat = SurfaceFormat::B8G8R8A8;
+    PostHasTransparency();
+  }
+
+  MOZ_LOG(sWebPLog, LogLevel::Debug,
+      ("[this=%p] nsWebPDecoder::ReadHeader -- %u x %u, alpha %d, "
+       "animation %d, metadata decode %d, first frame decode %d\n",
+       this, width, height, alpha, HasAnimation(),
+       IsMetadataDecode(), IsFirstFrameDecode()));
+
+  if (IsMetadataDecode()) {
+    return LexerResult(TerminalState::SUCCESS);
+  }
+
+  return ReadPayload(aDemuxer, aIsComplete);
+}
+
+LexerResult
+nsWebPDecoder::ReadPayload(WebPDemuxer* aDemuxer,
+                           bool aIsComplete)
+{
+  if (!HasAnimation()) {
+    auto rv = ReadSingle(mData, mLength, FullFrame());
+    if (rv.is<TerminalState>() &&
+        rv.as<TerminalState>() == TerminalState::SUCCESS) {
+      PostDecodeDone();
+    }
+    return rv;
+  }
+  return ReadMultiple(aDemuxer, aIsComplete);
+}
+
+LexerResult
+nsWebPDecoder::ReadSingle(const uint8_t* aData, size_t aLength, const IntRect& aFrameRect)
+{
+  MOZ_ASSERT(!IsMetadataDecode());
+  MOZ_ASSERT(aData);
+  MOZ_ASSERT(aLength > 0);
+
+  MOZ_LOG(sWebPLog, LogLevel::Debug,
+      ("[this=%p] nsWebPDecoder::ReadSingle -- %zu bytes\n", this, aLength));
+
+  if (!mDecoder && NS_FAILED(CreateFrame(aFrameRect))) {
+    return LexerResult(TerminalState::FAILURE);
+  }
+
+  bool complete;
+  VP8StatusCode status = WebPIUpdate(mDecoder, aData, aLength);
+  switch (status) {
+    case VP8_STATUS_OK:
+      complete = true;
+      break;
+    case VP8_STATUS_SUSPENDED:
+      complete = false;
+      break;
+    default:
+      MOZ_LOG(sWebPLog, LogLevel::Error,
+          ("[this=%p] nsWebPDecoder::ReadSingle -- append error %d\n",
+           this, status));
+      return LexerResult(TerminalState::FAILURE);
+  }
+
+  int lastRow = -1;
+  int width = 0;
+  int height = 0;
+  int stride = 0;
+  uint8_t* rowStart = WebPIDecGetRGB(mDecoder, &lastRow, &width, &height, &stride);
+  if (!rowStart || lastRow == -1) {
+    return LexerResult(Yield::NEED_MORE_DATA);
+  }
+
+  if (width <= 0 || height <= 0 || stride <= 0) {
+    MOZ_LOG(sWebPLog, LogLevel::Error,
+        ("[this=%p] nsWebPDecoder::ReadSingle -- bad (w,h,s) = (%d, %d, %d)\n",
+         this, width, height, stride));
+    return LexerResult(TerminalState::FAILURE);
+  }
+
+  const bool noPremultiply =
+    bool(GetSurfaceFlags() & SurfaceFlags::NO_PREMULTIPLY_ALPHA);
+
+  for (int row = mLastRow; row < lastRow; row++) {
+    uint8_t* src = rowStart + row * stride;
+    if (mTransform) {
+      qcms_transform_data(mTransform, src, src, width);
+    }
+
+    WriteState result;
+    if (noPremultiply) {
+      result = mPipe.WritePixelsToRow<uint32_t>([&]() -> NextPixel<uint32_t> {
+        MOZ_ASSERT(mFormat == SurfaceFormat::B8G8R8A8 || src[3] == 0xFF);
+        const uint32_t pixel =
+          gfxPackedPixelNoPreMultiply(src[3], src[0], src[1], src[2]);
+        src += 4;
+        return AsVariant(pixel);
+      });
+    } else {
+      result = mPipe.WritePixelsToRow<uint32_t>([&]() -> NextPixel<uint32_t> {
+        MOZ_ASSERT(mFormat == SurfaceFormat::B8G8R8A8 || src[3] == 0xFF);
+        const uint32_t pixel = gfxPackedPixel(src[3], src[0], src[1], src[2]);
+        src += 4;
+        return AsVariant(pixel);
+      });
+    }
+
+    MOZ_ASSERT(result != WriteState::FAILURE);
+    MOZ_ASSERT_IF(result == WriteState::FINISHED, complete && row == lastRow - 1);
+
+    if (result == WriteState::FAILURE) {
+      MOZ_LOG(sWebPLog, LogLevel::Error,
+          ("[this=%p] nsWebPDecoder::ReadSingle -- write pixels error\n",
+           this));
+      return LexerResult(TerminalState::FAILURE);
+    }
+  }
+
+  if (mLastRow != lastRow) {
+    mLastRow = lastRow;
+
+    Maybe<SurfaceInvalidRect> invalidRect = mPipe.TakeInvalidRect();
+    if (invalidRect) {
+      PostInvalidation(invalidRect->mInputSpaceRect,
+          Some(invalidRect->mOutputSpaceRect));
+    }
+  }
+
+  if (!complete) {
+    return LexerResult(Yield::NEED_MORE_DATA);
+  }
+
+  EndFrame();
+  return LexerResult(TerminalState::SUCCESS);
+}
+
+LexerResult
+nsWebPDecoder::ReadMultiple(WebPDemuxer* aDemuxer, bool aIsComplete)
+{
+  MOZ_ASSERT(!IsMetadataDecode());
+  MOZ_ASSERT(aDemuxer);
+
+  MOZ_LOG(sWebPLog, LogLevel::Debug,
+      ("[this=%p] nsWebPDecoder::ReadMultiple\n", this));
+
+  bool complete = aIsComplete;
+  WebPIterator iter;
+  auto rv = LexerResult(Yield::NEED_MORE_DATA);
+  if (WebPDemuxGetFrame(aDemuxer, mCurrentFrame + 1, &iter)) {
+    switch (iter.blend_method) {
+      case WEBP_MUX_BLEND:
+        mBlend = BlendMethod::OVER;
+        break;
+      case WEBP_MUX_NO_BLEND:
+        mBlend = BlendMethod::SOURCE;
+        break;
+      default:
+        MOZ_ASSERT_UNREACHABLE("Unhandled blend method");
+        break;
+    }
+
+    switch (iter.dispose_method) {
+      case WEBP_MUX_DISPOSE_NONE:
+        mDisposal = DisposalMethod::KEEP;
+        break;
+      case WEBP_MUX_DISPOSE_BACKGROUND:
+        mDisposal = DisposalMethod::CLEAR;
+        break;
+      default:
+        MOZ_ASSERT_UNREACHABLE("Unhandled dispose method");
+        break;
+    }
+
+    mFormat = iter.has_alpha ? SurfaceFormat::B8G8R8A8 : SurfaceFormat::B8G8R8X8;
+    mTimeout = FrameTimeout::FromRawMilliseconds(iter.duration);
+    nsIntRect frameRect(iter.x_offset, iter.y_offset, iter.width, iter.height);
+
+    rv = ReadSingle(iter.fragment.bytes, iter.fragment.size, frameRect);
+    complete = complete && !WebPDemuxNextFrame(&iter);
+    WebPDemuxReleaseIterator(&iter);
+  }
+
+  if (rv.is<TerminalState>() &&
+      rv.as<TerminalState>() == TerminalState::SUCCESS) {
+    // If we extracted one frame, and it is not the last, we need to yield to
+    // the lexer to allow the upper layers to acknowledge the frame.
+    if (!complete && !IsFirstFrameDecode()) {
+      rv = LexerResult(Yield::OUTPUT_AVAILABLE);
+    } else {
+      uint32_t loopCount = WebPDemuxGetI(aDemuxer, WEBP_FF_LOOP_COUNT);
+
+      MOZ_LOG(sWebPLog, LogLevel::Debug,
+        ("[this=%p] nsWebPDecoder::ReadMultiple -- loop count %u\n",
+         this, loopCount));
+      PostDecodeDone(loopCount - 1);
+    }
+  }
+
+  return rv;
+}
+
+Maybe<Telemetry::HistogramID>
+nsWebPDecoder::SpeedHistogram() const
+{
+  return Some(Telemetry::IMAGE_DECODE_SPEED_WEBP);
+}
+
+} // namespace image
+} // namespace mozilla
--- a/image/decoders/nsWebPDecoder.h
+++ b/image/decoders/nsWebPDecoder.h
@ -0,0 +1,111 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_image_decoders_nsWebPDecoder_h
+#define mozilla_image_decoders_nsWebPDecoder_h
+
+#include "Decoder.h"
+#include "webp/demux.h"
+#include "StreamingLexer.h"
+#include "SurfacePipe.h"
+
+namespace mozilla {
+namespace image {
+class RasterImage;
+
+class nsWebPDecoder final : public Decoder
+{
+public:
+  virtual ~nsWebPDecoder();
+
+protected:
+  LexerResult DoDecode(SourceBufferIterator& aIterator,
+                       IResumable* aOnResume) override;
+  Maybe<Telemetry::HistogramID> SpeedHistogram() const override;
+
+private:
+  friend class DecoderFactory;
+
+  // Decoders should only be instantiated via DecoderFactory.
+  explicit nsWebPDecoder(RasterImage* aImage);
+
+  enum class State
+  {
+    WEBP_DATA,
+    FINISHED_WEBP_DATA
+  };
+
+  void ApplyColorProfile(const char* aProfile, size_t aLength);
+
+  LexerResult ReadData();
+  LexerResult ReadHeader(WebPDemuxer* aDemuxer, bool aIsComplete);
+  LexerResult ReadPayload(WebPDemuxer* aDemuxer, bool aIsComplete);
+
+  nsresult CreateFrame(const nsIntRect& aFrameRect);
+  void EndFrame();
+
+  LexerResult ReadSingle(const uint8_t* aData, size_t aLength,
+                         const IntRect& aFrameRect);
+
+  LexerResult ReadMultiple(WebPDemuxer* aDemuxer, bool aIsComplete);
+
+  /// The SurfacePipe used to write to the output surface.
+  SurfacePipe mPipe;
+
+  /// The buffer used to accumulate data until the complete WebP header is
+  /// received, if and only if the iterator is discontiguous.
+  Vector<uint8_t> mBufferedData;
+
+  /// The libwebp output buffer descriptor pointing to the decoded data.
+  WebPDecBuffer mBuffer;
+
+  /// The libwebp incremental decoder descriptor, wraps mBuffer.
+  WebPIDecoder* mDecoder;
+
+  /// Blend method for the current frame.
+  BlendMethod mBlend;
+
+  /// Disposal method for the current frame.
+  DisposalMethod mDisposal;
+
+  /// Frame timeout for the current frame;
+  FrameTimeout mTimeout;
+
+  /// Surface format for the current frame.
+  gfx::SurfaceFormat mFormat;
+
+  /// The last row of decoded pixels written to mPipe.
+  int mLastRow;
+
+  /// Number of decoded frames.
+  uint32_t mCurrentFrame;
+
+  /// Pointer to the start of the contiguous encoded image data.
+  const uint8_t* mData;
+
+  /// Length of data pointed to by mData.
+  size_t mLength;
+
+  /// True if the iterator has reached its end.
+  bool mIteratorComplete;
+
+  /// True if this decoding pass requires a WebPDemuxer.
+  bool mNeedDemuxer;
+
+  /// True if we have setup the color profile for the image.
+  bool mGotColorProfile;
+
+  /// Color management profile from the ICCP chunk in the image.
+  qcms_profile* mInProfile;
+
+  /// Color management transform to apply to image data.
+  qcms_transform* mTransform;
+};
+
+} // namespace image
+} // namespace mozilla
+
+#endif // mozilla_image_decoders_nsWebPDecoder_h
--- a/image/test/gtest/Common.cpp
+++ b/image/test/gtest/Common.cpp
@ -38,7 +38,12 @@ AutoInitializeImageLib::AutoInitializeImageLib()
  sImageLibInitialized = true;

  // Force sRGB to be consistent with reftests.
-  Preferences::SetBool("gfx.color_management.force_srgb", true);
+  nsresult rv = Preferences::SetBool("gfx.color_management.force_srgb", true);
+  EXPECT_TRUE(rv == NS_OK);
+
+  // Ensure WebP is enabled to run decoder tests.
+  rv = Preferences::SetBool("image.webp.enabled", true);
+  EXPECT_TRUE(rv == NS_OK);

  // Ensure that ImageLib services are initialized.
  nsCOMPtr<imgITools> imgTools = do_CreateInstance("@mozilla.org/image/tools;1");
@ -551,6 +556,16 @@ ImageTestCase GreenIconTestCase()
                       TEST_CASE_IS_TRANSPARENT);
 }

+ImageTestCase GreenWebPTestCase()
+{
+  return ImageTestCase("green.webp", "image/webp", IntSize(100, 100));
+}
+
+ImageTestCase GreenWebPIccSrgbTestCase()
+{
+  return ImageTestCase("green.icc_srgb.webp", "image/webp", IntSize(100, 100));
+}
+
 ImageTestCase GreenFirstFrameAnimatedGIFTestCase()
 {
  return ImageTestCase("first-frame-green.gif", "image/gif", IntSize(100, 100),
@ -563,6 +578,12 @@ ImageTestCase GreenFirstFrameAnimatedPNGTestCase()
                       TEST_CASE_IS_TRANSPARENT | TEST_CASE_IS_ANIMATED);
 }

+ImageTestCase GreenFirstFrameAnimatedWebPTestCase()
+{
+  return ImageTestCase("first-frame-green.webp", "image/webp", IntSize(100, 100),
+                       TEST_CASE_IS_ANIMATED);
+}
+
 ImageTestCase CorruptTestCase()
 {
  return ImageTestCase("corrupt.jpg", "image/jpeg", IntSize(100, 100),
@ -699,6 +720,12 @@ ImageTestCase DownscaledIconTestCase()
                       IntSize(20, 20), TEST_CASE_IS_TRANSPARENT);
 }

+ImageTestCase DownscaledWebPTestCase()
+{
+  return ImageTestCase("downscaled.webp", "image/webp", IntSize(100, 100),
+                       IntSize(20, 20));
+}
+
 ImageTestCase DownscaledTransparentICOWithANDMaskTestCase()
 {
  // This test case is an ICO with AND mask transparency. We want to ensure that
--- a/image/test/gtest/Common.h
+++ b/image/test/gtest/Common.h
@ -433,9 +433,13 @@ ImageTestCase GreenJPGTestCase();
 ImageTestCase GreenBMPTestCase();
 ImageTestCase GreenICOTestCase();
 ImageTestCase GreenIconTestCase();
+ImageTestCase GreenWebPTestCase();
+
+ImageTestCase GreenWebPIccSrgbTestCase();

 ImageTestCase GreenFirstFrameAnimatedGIFTestCase();
 ImageTestCase GreenFirstFrameAnimatedPNGTestCase();
+ImageTestCase GreenFirstFrameAnimatedWebPTestCase();

 ImageTestCase CorruptTestCase();
 ImageTestCase CorruptBMPWithTruncatedHeader();
@ -460,6 +464,7 @@ ImageTestCase DownscaledJPGTestCase();
 ImageTestCase DownscaledBMPTestCase();
 ImageTestCase DownscaledICOTestCase();
 ImageTestCase DownscaledIconTestCase();
+ImageTestCase DownscaledWebPTestCase();
 ImageTestCase DownscaledTransparentICOWithANDMaskTestCase();

 ImageTestCase TruncatedSmallGIFTestCase();
--- a/image/test/gtest/TestDecodeToSurface.cpp
+++ b/image/test/gtest/TestDecodeToSurface.cpp
@ -125,6 +125,7 @@ TEST_F(ImageDecodeToSurface, JPG) { RunDecodeToSurface(GreenJPGTestCase()); }
 TEST_F(ImageDecodeToSurface, BMP) { RunDecodeToSurface(GreenBMPTestCase()); }
 TEST_F(ImageDecodeToSurface, ICO) { RunDecodeToSurface(GreenICOTestCase()); }
 TEST_F(ImageDecodeToSurface, Icon) { RunDecodeToSurface(GreenIconTestCase()); }
+TEST_F(ImageDecodeToSurface, WebP) { RunDecodeToSurface(GreenWebPTestCase()); }

 TEST_F(ImageDecodeToSurface, AnimatedGIF)
 {
--- a/image/test/gtest/TestDecoders.cpp
+++ b/image/test/gtest/TestDecoders.cpp
@ -329,6 +329,247 @@ CheckAnimationDecoderSingleChunk(const ImageTestCase& aTestCase)
  });
 }

+static void
+CheckDecoderFrameFirst(const ImageTestCase& aTestCase)
+{
+  // Verify that we can decode this test case and retrieve the first frame using
+  // imgIContainer::FRAME_FIRST. This ensures that we correctly trigger a
+  // single-frame decode rather than an animated decode when
+  // imgIContainer::FRAME_FIRST is requested.
+
+  // Create an image.
+  RefPtr<Image> image =
+    ImageFactory::CreateAnonymousImage(nsDependentCString(aTestCase.mMimeType));
+  ASSERT_TRUE(!image->HasError());
+
+  nsCOMPtr<nsIInputStream> inputStream = LoadFile(aTestCase.mPath);
+  ASSERT_TRUE(inputStream);
+
+  // Figure out how much data we have.
+  uint64_t length;
+  nsresult rv = inputStream->Available(&length);
+  ASSERT_TRUE(NS_SUCCEEDED(rv));
+
+  // Write the data into the image.
+  rv = image->OnImageDataAvailable(nullptr, nullptr, inputStream, 0,
+                                   static_cast<uint32_t>(length));
+  ASSERT_TRUE(NS_SUCCEEDED(rv));
+
+  // Let the image know we've sent all the data.
+  rv = image->OnImageDataComplete(nullptr, nullptr, NS_OK, true);
+  ASSERT_TRUE(NS_SUCCEEDED(rv));
+
+  RefPtr<ProgressTracker> tracker = image->GetProgressTracker();
+  tracker->SyncNotifyProgress(FLAG_LOAD_COMPLETE);
+
+  // Lock the image so its surfaces don't disappear during the test.
+  image->LockImage();
+
+  auto unlock = mozilla::MakeScopeExit([&] {
+    image->UnlockImage();
+  });
+
+  // Use GetFrame() to force a sync decode of the image, specifying FRAME_FIRST
+  // to ensure that we don't get an animated decode.
+  RefPtr<SourceSurface> surface =
+    image->GetFrame(imgIContainer::FRAME_FIRST,
+                    imgIContainer::FLAG_SYNC_DECODE);
+
+  // Ensure that the image's metadata meets our expectations.
+  IntSize imageSize(0, 0);
+  rv = image->GetWidth(&imageSize.width);
+  EXPECT_TRUE(NS_SUCCEEDED(rv));
+  rv = image->GetHeight(&imageSize.height);
+  EXPECT_TRUE(NS_SUCCEEDED(rv));
+
+  EXPECT_EQ(aTestCase.mSize.width, imageSize.width);
+  EXPECT_EQ(aTestCase.mSize.height, imageSize.height);
+
+  Progress imageProgress = tracker->GetProgress();
+
+  EXPECT_TRUE(bool(imageProgress & FLAG_HAS_TRANSPARENCY) == false);
+  EXPECT_TRUE(bool(imageProgress & FLAG_IS_ANIMATED) == true);
+
+  // Ensure that we decoded the static version of the image.
+  {
+    LookupResult result =
+      SurfaceCache::Lookup(ImageKey(image.get()),
+                           RasterSurfaceKey(imageSize,
+                                            DefaultSurfaceFlags(),
+                                            PlaybackType::eStatic),
+                           /* aMarkUsed = */ false);
+    ASSERT_EQ(MatchType::EXACT, result.Type());
+    EXPECT_TRUE(bool(result.Surface()));
+  }
+
+  // Ensure that we didn't decode the animated version of the image.
+  {
+    LookupResult result =
+      SurfaceCache::Lookup(ImageKey(image.get()),
+                           RasterSurfaceKey(imageSize,
+                                            DefaultSurfaceFlags(),
+                                            PlaybackType::eAnimated),
+                           /* aMarkUsed = */ false);
+    ASSERT_EQ(MatchType::NOT_FOUND, result.Type());
+  }
+
+  // Use GetFrame() to force a sync decode of the image, this time specifying
+  // FRAME_CURRENT to ensure that we get an animated decode.
+  RefPtr<SourceSurface> animatedSurface =
+    image->GetFrame(imgIContainer::FRAME_CURRENT,
+                    imgIContainer::FLAG_SYNC_DECODE);
+
+  // Ensure that we decoded both frames of the animated version of the image.
+  {
+    LookupResult result =
+      SurfaceCache::Lookup(ImageKey(image.get()),
+                           RasterSurfaceKey(imageSize,
+                                            DefaultSurfaceFlags(),
+                                            PlaybackType::eAnimated),
+                           /* aMarkUsed = */ true);
+    ASSERT_EQ(MatchType::EXACT, result.Type());
+
+    EXPECT_TRUE(NS_SUCCEEDED(result.Surface().Seek(0)));
+    EXPECT_TRUE(bool(result.Surface()));
+
+    RefPtr<imgFrame> partialFrame = result.Surface().GetFrame(1);
+    EXPECT_TRUE(bool(partialFrame));
+  }
+
+  // Ensure that the static version is still around.
+  {
+    LookupResult result =
+      SurfaceCache::Lookup(ImageKey(image.get()),
+                           RasterSurfaceKey(imageSize,
+                                            DefaultSurfaceFlags(),
+                                            PlaybackType::eStatic),
+                           /* aMarkUsed = */ true);
+    ASSERT_EQ(MatchType::EXACT, result.Type());
+    EXPECT_TRUE(bool(result.Surface()));
+  }
+}
+
+static void
+CheckDecoderFrameCurrent(const ImageTestCase& aTestCase)
+{
+  // Verify that we can decode this test case and retrieve the entire sequence
+  // of frames using imgIContainer::FRAME_CURRENT. This ensures that we
+  // correctly trigger an animated decode rather than a single-frame decode when
+  // imgIContainer::FRAME_CURRENT is requested.
+
+  // Create an image.
+  RefPtr<Image> image =
+    ImageFactory::CreateAnonymousImage(nsDependentCString(aTestCase.mMimeType));
+  ASSERT_TRUE(!image->HasError());
+
+  nsCOMPtr<nsIInputStream> inputStream = LoadFile(aTestCase.mPath);
+  ASSERT_TRUE(inputStream);
+
+  // Figure out how much data we have.
+  uint64_t length;
+  nsresult rv = inputStream->Available(&length);
+  ASSERT_TRUE(NS_SUCCEEDED(rv));
+
+  // Write the data into the image.
+  rv = image->OnImageDataAvailable(nullptr, nullptr, inputStream, 0,
+                                   static_cast<uint32_t>(length));
+  ASSERT_TRUE(NS_SUCCEEDED(rv));
+
+  // Let the image know we've sent all the data.
+  rv = image->OnImageDataComplete(nullptr, nullptr, NS_OK, true);
+  ASSERT_TRUE(NS_SUCCEEDED(rv));
+
+  RefPtr<ProgressTracker> tracker = image->GetProgressTracker();
+  tracker->SyncNotifyProgress(FLAG_LOAD_COMPLETE);
+
+  // Lock the image so its surfaces don't disappear during the test.
+  image->LockImage();
+
+  // Use GetFrame() to force a sync decode of the image, specifying
+  // FRAME_CURRENT to ensure we get an animated decode.
+  RefPtr<SourceSurface> surface =
+    image->GetFrame(imgIContainer::FRAME_CURRENT,
+                    imgIContainer::FLAG_SYNC_DECODE);
+
+  // Ensure that the image's metadata meets our expectations.
+  IntSize imageSize(0, 0);
+  rv = image->GetWidth(&imageSize.width);
+  EXPECT_TRUE(NS_SUCCEEDED(rv));
+  rv = image->GetHeight(&imageSize.height);
+  EXPECT_TRUE(NS_SUCCEEDED(rv));
+
+  EXPECT_EQ(aTestCase.mSize.width, imageSize.width);
+  EXPECT_EQ(aTestCase.mSize.height, imageSize.height);
+
+  Progress imageProgress = tracker->GetProgress();
+
+  EXPECT_TRUE(bool(imageProgress & FLAG_HAS_TRANSPARENCY) == false);
+  EXPECT_TRUE(bool(imageProgress & FLAG_IS_ANIMATED) == true);
+
+  // Ensure that we decoded both frames of the animated version of the image.
+  {
+    LookupResult result =
+      SurfaceCache::Lookup(ImageKey(image.get()),
+                           RasterSurfaceKey(imageSize,
+                                            DefaultSurfaceFlags(),
+                                            PlaybackType::eAnimated),
+                           /* aMarkUsed = */ true);
+    ASSERT_EQ(MatchType::EXACT, result.Type());
+
+    EXPECT_TRUE(NS_SUCCEEDED(result.Surface().Seek(0)));
+    EXPECT_TRUE(bool(result.Surface()));
+
+    RefPtr<imgFrame> partialFrame = result.Surface().GetFrame(1);
+    EXPECT_TRUE(bool(partialFrame));
+  }
+
+  // Ensure that we didn't decode the static version of the image.
+  {
+    LookupResult result =
+      SurfaceCache::Lookup(ImageKey(image.get()),
+                           RasterSurfaceKey(imageSize,
+                                            DefaultSurfaceFlags(),
+                                            PlaybackType::eStatic),
+                           /* aMarkUsed = */ false);
+    ASSERT_EQ(MatchType::NOT_FOUND, result.Type());
+  }
+
+  // Use GetFrame() to force a sync decode of the image, this time specifying
+  // FRAME_FIRST to ensure that we get a single-frame decode.
+  RefPtr<SourceSurface> animatedSurface =
+    image->GetFrame(imgIContainer::FRAME_FIRST,
+                    imgIContainer::FLAG_SYNC_DECODE);
+
+  // Ensure that we decoded the static version of the image.
+  {
+    LookupResult result =
+      SurfaceCache::Lookup(ImageKey(image.get()),
+                           RasterSurfaceKey(imageSize,
+                                            DefaultSurfaceFlags(),
+                                            PlaybackType::eStatic),
+                           /* aMarkUsed = */ true);
+    ASSERT_EQ(MatchType::EXACT, result.Type());
+    EXPECT_TRUE(bool(result.Surface()));
+  }
+
+  // Ensure that both frames of the animated version are still around.
+  {
+    LookupResult result =
+      SurfaceCache::Lookup(ImageKey(image.get()),
+                           RasterSurfaceKey(imageSize,
+                                            DefaultSurfaceFlags(),
+                                            PlaybackType::eAnimated),
+                           /* aMarkUsed = */ true);
+    ASSERT_EQ(MatchType::EXACT, result.Type());
+
+    EXPECT_TRUE(NS_SUCCEEDED(result.Surface().Seek(0)));
+    EXPECT_TRUE(bool(result.Surface()));
+
+    RefPtr<imgFrame> partialFrame = result.Surface().GetFrame(1);
+    EXPECT_TRUE(bool(partialFrame));
+  }
+}
+
 class ImageDecoders : public ::testing::Test
 {
 protected:
@ -430,6 +671,26 @@ TEST_F(ImageDecoders, IconDownscaleDuringDecode)
  CheckDownscaleDuringDecode(DownscaledIconTestCase());
 }

+TEST_F(ImageDecoders, WebPSingleChunk)
+{
+  CheckDecoderSingleChunk(GreenWebPTestCase());
+}
+
+TEST_F(ImageDecoders, WebPMultiChunk)
+{
+  CheckDecoderMultiChunk(GreenWebPTestCase());
+}
+
+TEST_F(ImageDecoders, WebPDownscaleDuringDecode)
+{
+  CheckDownscaleDuringDecode(DownscaledWebPTestCase());
+}
+
+TEST_F(ImageDecoders, WebPIccSrgbMultiChunk)
+{
+  CheckDecoderMultiChunk(GreenWebPIccSrgbTestCase());
+}
+
 TEST_F(ImageDecoders, AnimatedGIFSingleChunk)
 {
  CheckDecoderSingleChunk(GreenFirstFrameAnimatedGIFTestCase());
@ -460,6 +721,21 @@ TEST_F(ImageDecoders, AnimatedPNGWithBlendedFrames)
  CheckAnimationDecoderSingleChunk(GreenFirstFrameAnimatedPNGTestCase());
 }

+TEST_F(ImageDecoders, AnimatedWebPSingleChunk)
+{
+  CheckDecoderSingleChunk(GreenFirstFrameAnimatedWebPTestCase());
+}
+
+TEST_F(ImageDecoders, AnimatedWebPMultiChunk)
+{
+  CheckDecoderMultiChunk(GreenFirstFrameAnimatedWebPTestCase());
+}
+
+TEST_F(ImageDecoders, AnimatedWebPWithBlendedFrames)
+{
+  CheckAnimationDecoderSingleChunk(GreenFirstFrameAnimatedWebPTestCase());
+}
+
 TEST_F(ImageDecoders, CorruptSingleChunk)
 {
  CheckDecoderSingleChunk(CorruptTestCase());
@ -507,245 +783,12 @@ TEST_F(ImageDecoders, CorruptICOWithBadBppSingleChunk)

 TEST_F(ImageDecoders, AnimatedGIFWithFRAME_FIRST)
 {
-  ImageTestCase testCase = GreenFirstFrameAnimatedGIFTestCase();
-
-  // Verify that we can decode this test case and retrieve the first frame using
-  // imgIContainer::FRAME_FIRST. This ensures that we correctly trigger a
-  // single-frame decode rather than an animated decode when
-  // imgIContainer::FRAME_FIRST is requested.
-
-  // Create an image.
-  RefPtr<Image> image =
-    ImageFactory::CreateAnonymousImage(nsDependentCString(testCase.mMimeType));
-  ASSERT_TRUE(!image->HasError());
-
-  nsCOMPtr<nsIInputStream> inputStream = LoadFile(testCase.mPath);
-  ASSERT_TRUE(inputStream);
-
-  // Figure out how much data we have.
-  uint64_t length;
-  nsresult rv = inputStream->Available(&length);
-  ASSERT_TRUE(NS_SUCCEEDED(rv));
-
-  // Write the data into the image.
-  rv = image->OnImageDataAvailable(nullptr, nullptr, inputStream, 0,
-                                   static_cast<uint32_t>(length));
-  ASSERT_TRUE(NS_SUCCEEDED(rv));
-
-  // Let the image know we've sent all the data.
-  rv = image->OnImageDataComplete(nullptr, nullptr, NS_OK, true);
-  ASSERT_TRUE(NS_SUCCEEDED(rv));
-
-  RefPtr<ProgressTracker> tracker = image->GetProgressTracker();
-  tracker->SyncNotifyProgress(FLAG_LOAD_COMPLETE);
-
-  // Lock the image so its surfaces don't disappear during the test.
-  image->LockImage();
-
-  auto unlock = mozilla::MakeScopeExit([&] {
-    image->UnlockImage();
-  });
-
-  // Use GetFrame() to force a sync decode of the image, specifying FRAME_FIRST
-  // to ensure that we don't get an animated decode.
-  RefPtr<SourceSurface> surface =
-    image->GetFrame(imgIContainer::FRAME_FIRST,
-                    imgIContainer::FLAG_SYNC_DECODE);
-
-  // Ensure that the image's metadata meets our expectations.
-  IntSize imageSize(0, 0);
-  rv = image->GetWidth(&imageSize.width);
-  EXPECT_TRUE(NS_SUCCEEDED(rv));
-  rv = image->GetHeight(&imageSize.height);
-  EXPECT_TRUE(NS_SUCCEEDED(rv));
-
-  EXPECT_EQ(testCase.mSize.width, imageSize.width);
-  EXPECT_EQ(testCase.mSize.height, imageSize.height);
-
-  Progress imageProgress = tracker->GetProgress();
-
-  EXPECT_TRUE(bool(imageProgress & FLAG_HAS_TRANSPARENCY) == false);
-  EXPECT_TRUE(bool(imageProgress & FLAG_IS_ANIMATED) == true);
-
-  // Ensure that we decoded the static version of the image.
-  {
-    LookupResult result =
-      SurfaceCache::Lookup(ImageKey(image.get()),
-                           RasterSurfaceKey(imageSize,
-                                            DefaultSurfaceFlags(),
-                                            PlaybackType::eStatic),
-                           /* aMarkUsed = */ false);
-    ASSERT_EQ(MatchType::EXACT, result.Type());
-    EXPECT_TRUE(bool(result.Surface()));
-  }
-
-  // Ensure that we didn't decode the animated version of the image.
-  {
-    LookupResult result =
-      SurfaceCache::Lookup(ImageKey(image.get()),
-                           RasterSurfaceKey(imageSize,
-                                            DefaultSurfaceFlags(),
-                                            PlaybackType::eAnimated),
-                           /* aMarkUsed = */ false);
-    ASSERT_EQ(MatchType::NOT_FOUND, result.Type());
-  }
-
-  // Use GetFrame() to force a sync decode of the image, this time specifying
-  // FRAME_CURRENT to ensure that we get an animated decode.
-  RefPtr<SourceSurface> animatedSurface =
-    image->GetFrame(imgIContainer::FRAME_CURRENT,
-                    imgIContainer::FLAG_SYNC_DECODE);
-
-  // Ensure that we decoded both frames of the animated version of the image.
-  {
-    LookupResult result =
-      SurfaceCache::Lookup(ImageKey(image.get()),
-                           RasterSurfaceKey(imageSize,
-                                            DefaultSurfaceFlags(),
-                                            PlaybackType::eAnimated),
-                           /* aMarkUsed = */ true);
-    ASSERT_EQ(MatchType::EXACT, result.Type());
-
-    EXPECT_TRUE(NS_SUCCEEDED(result.Surface().Seek(0)));
-    EXPECT_TRUE(bool(result.Surface()));
-
-    RefPtr<imgFrame> partialFrame = result.Surface().GetFrame(1);
-    EXPECT_TRUE(bool(partialFrame));
-  }
-
-  // Ensure that the static version is still around.
-  {
-    LookupResult result =
-      SurfaceCache::Lookup(ImageKey(image.get()),
-                           RasterSurfaceKey(imageSize,
-                                            DefaultSurfaceFlags(),
-                                            PlaybackType::eStatic),
-                           /* aMarkUsed = */ true);
-    ASSERT_EQ(MatchType::EXACT, result.Type());
-    EXPECT_TRUE(bool(result.Surface()));
-  }
+  CheckDecoderFrameFirst(GreenFirstFrameAnimatedGIFTestCase());
 }

 TEST_F(ImageDecoders, AnimatedGIFWithFRAME_CURRENT)
 {
-  ImageTestCase testCase = GreenFirstFrameAnimatedGIFTestCase();
-
-  // Verify that we can decode this test case and retrieve the entire sequence
-  // of frames using imgIContainer::FRAME_CURRENT. This ensures that we
-  // correctly trigger an animated decode rather than a single-frame decode when
-  // imgIContainer::FRAME_CURRENT is requested.
-
-  // Create an image.
-  RefPtr<Image> image =
-    ImageFactory::CreateAnonymousImage(nsDependentCString(testCase.mMimeType));
-  ASSERT_TRUE(!image->HasError());
-
-  nsCOMPtr<nsIInputStream> inputStream = LoadFile(testCase.mPath);
-  ASSERT_TRUE(inputStream);
-
-  // Figure out how much data we have.
-  uint64_t length;
-  nsresult rv = inputStream->Available(&length);
-  ASSERT_TRUE(NS_SUCCEEDED(rv));
-
-  // Write the data into the image.
-  rv = image->OnImageDataAvailable(nullptr, nullptr, inputStream, 0,
-                                   static_cast<uint32_t>(length));
-  ASSERT_TRUE(NS_SUCCEEDED(rv));
-
-  // Let the image know we've sent all the data.
-  rv = image->OnImageDataComplete(nullptr, nullptr, NS_OK, true);
-  ASSERT_TRUE(NS_SUCCEEDED(rv));
-
-  RefPtr<ProgressTracker> tracker = image->GetProgressTracker();
-  tracker->SyncNotifyProgress(FLAG_LOAD_COMPLETE);
-
-  // Lock the image so its surfaces don't disappear during the test.
-  image->LockImage();
-
-  // Use GetFrame() to force a sync decode of the image, specifying
-  // FRAME_CURRENT to ensure we get an animated decode.
-  RefPtr<SourceSurface> surface =
-    image->GetFrame(imgIContainer::FRAME_CURRENT,
-                    imgIContainer::FLAG_SYNC_DECODE);
-
-  // Ensure that the image's metadata meets our expectations.
-  IntSize imageSize(0, 0);
-  rv = image->GetWidth(&imageSize.width);
-  EXPECT_TRUE(NS_SUCCEEDED(rv));
-  rv = image->GetHeight(&imageSize.height);
-  EXPECT_TRUE(NS_SUCCEEDED(rv));
-
-  EXPECT_EQ(testCase.mSize.width, imageSize.width);
-  EXPECT_EQ(testCase.mSize.height, imageSize.height);
-
-  Progress imageProgress = tracker->GetProgress();
-
-  EXPECT_TRUE(bool(imageProgress & FLAG_HAS_TRANSPARENCY) == false);
-  EXPECT_TRUE(bool(imageProgress & FLAG_IS_ANIMATED) == true);
-
-  // Ensure that we decoded both frames of the animated version of the image.
-  {
-    LookupResult result =
-      SurfaceCache::Lookup(ImageKey(image.get()),
-                           RasterSurfaceKey(imageSize,
-                                            DefaultSurfaceFlags(),
-                                            PlaybackType::eAnimated),
-                           /* aMarkUsed = */ true);
-    ASSERT_EQ(MatchType::EXACT, result.Type());
-
-    EXPECT_TRUE(NS_SUCCEEDED(result.Surface().Seek(0)));
-    EXPECT_TRUE(bool(result.Surface()));
-
-    RefPtr<imgFrame> partialFrame = result.Surface().GetFrame(1);
-    EXPECT_TRUE(bool(partialFrame));
-  }
-
-  // Ensure that we didn't decode the static version of the image.
-  {
-    LookupResult result =
-      SurfaceCache::Lookup(ImageKey(image.get()),
-                           RasterSurfaceKey(imageSize,
-                                            DefaultSurfaceFlags(),
-                                            PlaybackType::eStatic),
-                           /* aMarkUsed = */ false);
-    ASSERT_EQ(MatchType::NOT_FOUND, result.Type());
-  }
-
-  // Use GetFrame() to force a sync decode of the image, this time specifying
-  // FRAME_FIRST to ensure that we get a single-frame decode.
-  RefPtr<SourceSurface> animatedSurface =
-    image->GetFrame(imgIContainer::FRAME_FIRST,
-                    imgIContainer::FLAG_SYNC_DECODE);
-
-  // Ensure that we decoded the static version of the image.
-  {
-    LookupResult result =
-      SurfaceCache::Lookup(ImageKey(image.get()),
-                           RasterSurfaceKey(imageSize,
-                                            DefaultSurfaceFlags(),
-                                            PlaybackType::eStatic),
-                           /* aMarkUsed = */ true);
-    ASSERT_EQ(MatchType::EXACT, result.Type());
-    EXPECT_TRUE(bool(result.Surface()));
-  }
-
-  // Ensure that both frames of the animated version are still around.
-  {
-    LookupResult result =
-      SurfaceCache::Lookup(ImageKey(image.get()),
-                           RasterSurfaceKey(imageSize,
-                                            DefaultSurfaceFlags(),
-                                            PlaybackType::eAnimated),
-                           /* aMarkUsed = */ true);
-    ASSERT_EQ(MatchType::EXACT, result.Type());
-
-    EXPECT_TRUE(NS_SUCCEEDED(result.Surface().Seek(0)));
-    EXPECT_TRUE(bool(result.Surface()));
-
-    RefPtr<imgFrame> partialFrame = result.Surface().GetFrame(1);
-    EXPECT_TRUE(bool(partialFrame));
-  }
+  CheckDecoderFrameCurrent(GreenFirstFrameAnimatedGIFTestCase());
 }

 TEST_F(ImageDecoders, AnimatedGIFWithExtraImageSubBlocks)
@ -817,6 +860,16 @@ TEST_F(ImageDecoders, AnimatedGIFWithExtraImageSubBlocks)
  EXPECT_TRUE(bool(partialFrame));
 }

+TEST_F(ImageDecoders, AnimatedWebPWithFRAME_FIRST)
+{
+  CheckDecoderFrameFirst(GreenFirstFrameAnimatedWebPTestCase());
+}
+
+TEST_F(ImageDecoders, AnimatedWebPWithFRAME_CURRENT)
+{
+  CheckDecoderFrameCurrent(GreenFirstFrameAnimatedWebPTestCase());
+}
+
 TEST_F(ImageDecoders, TruncatedSmallGIFSingleChunk)
 {
  CheckDecoderSingleChunk(TruncatedSmallGIFTestCase());
--- a/image/test/gtest/TestMetadata.cpp
+++ b/image/test/gtest/TestMetadata.cpp
@ -153,6 +153,7 @@ TEST_F(ImageDecoderMetadata, JPG) { CheckMetadata(GreenJPGTestCase()); }
 TEST_F(ImageDecoderMetadata, BMP) { CheckMetadata(GreenBMPTestCase()); }
 TEST_F(ImageDecoderMetadata, ICO) { CheckMetadata(GreenICOTestCase()); }
 TEST_F(ImageDecoderMetadata, Icon) { CheckMetadata(GreenIconTestCase()); }
+TEST_F(ImageDecoderMetadata, WebP) { CheckMetadata(GreenWebPTestCase()); }

 TEST_F(ImageDecoderMetadata, AnimatedGIF)
 {
--- a/image/test/gtest/downscaled.webp
+++ b/image/test/gtest/downscaled.webp
--- a/image/test/gtest/first-frame-green.webp
+++ b/image/test/gtest/first-frame-green.webp
--- a/image/test/gtest/green.icc_srgb.webp
+++ b/image/test/gtest/green.icc_srgb.webp
--- a/image/test/gtest/green.webp
+++ b/image/test/gtest/green.webp
--- a/image/test/gtest/moz.build
+++ b/image/test/gtest/moz.build
@ -48,8 +48,10 @@ TEST_HARNESS_FILES.gtest += [
    'downscaled.icon',
    'downscaled.jpg',
    'downscaled.png',
+    'downscaled.webp',
    'first-frame-green.gif',
    'first-frame-green.png',
+    'first-frame-green.webp',
    'first-frame-padding.gif',
    'green-1x1-truncated.gif',
    'green-large-bmp.ico',
@ -57,10 +59,12 @@ TEST_HARNESS_FILES.gtest += [
    'green-multiple-sizes.ico',
    'green.bmp',
    'green.gif',
+    'green.icc_srgb.webp',
    'green.ico',
    'green.icon',
    'green.jpg',
    'green.png',
+    'green.webp',
    'invalid-truncated-metadata.bmp',
    'no-frame-delay.gif',
    'rle4.bmp',
--- a/js/src/frontend/moz.build
+++ b/js/src/frontend/moz.build
@ -0,0 +1,84 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+FINAL_LIBRARY = 'js'
+
+FILES_PER_UNIFIED_FILE = 6
+
+# Includes should be relative to parent path
+LOCAL_INCLUDES += [
+    '!..',
+    '..'
+]
+
+include('../js-config.mozbuild')
+include('../js-cxxflags.mozbuild')
+
+
+# Generate frontend/ReservedWordsGenerated.h from frontend/ReservedWords.h
+GENERATED_FILES += ['ReservedWordsGenerated.h']
+ReservedWordsGenerated = GENERATED_FILES['ReservedWordsGenerated.h']
+ReservedWordsGenerated.script = 'GenerateReservedWords.py'
+ReservedWordsGenerated.inputs += ['ReservedWords.h']
+
+
+UNIFIED_SOURCES += [
+    'BytecodeCompiler.cpp',
+    'BytecodeControlStructures.cpp',
+    'BytecodeEmitter.cpp',
+    'CallOrNewEmitter.cpp',
+    'CForEmitter.cpp',
+    'DoWhileEmitter.cpp',
+    'ElemOpEmitter.cpp',
+    'EmitterScope.cpp',
+    'ExpressionStatementEmitter.cpp',
+    'FoldConstants.cpp',
+    'ForInEmitter.cpp',
+    'ForOfEmitter.cpp',
+    'ForOfLoopControl.cpp',
+    'IfEmitter.cpp',
+    'JumpList.cpp',
+    'NameFunctions.cpp',
+    'NameOpEmitter.cpp',
+    'ParseNode.cpp',
+    'PropOpEmitter.cpp',
+    'SwitchEmitter.cpp',
+    'TDZCheckCache.cpp',
+    'TokenStream.cpp',
+    'TryEmitter.cpp',
+    'WhileEmitter.cpp',
+]
+
+# Parser.cpp cannot be built in unified mode because of explicit
+#   template instantiations.
+SOURCES += [
+    'Parser.cpp',
+]
+
+if CONFIG['JS_BUILD_BINAST']:
+    # Using SOURCES, as UNIFIED_SOURCES causes mysterious bugs on 32-bit platforms.
+    # These parts of BinAST are designed only to test evolutions of the
+    # specification.
+    SOURCES += ['BinTokenReaderTester.cpp']
+    # These parts of BinAST should eventually move to release.
+    SOURCES += [
+        'BinSource-auto.cpp',
+        'BinSource.cpp',
+        'BinSourceRuntimeSupport.cpp',
+        'BinToken.cpp',
+        'BinTokenReaderBase.cpp',
+        'BinTokenReaderMultipart.cpp',
+    ]
+
+    # Instrument BinAST files for fuzzing as we have a fuzzing target for BinAST.
+    if CONFIG['FUZZING_INTERFACES'] and CONFIG['LIBFUZZER']:
+        include('/tools/fuzzing/libfuzzer-flags.mozbuild')
+
+        SOURCES['BinSource-auto.cpp'].flags += libfuzzer_flags
+        SOURCES['BinSource.cpp'].flags += libfuzzer_flags
+        SOURCES['BinToken.cpp'].flags += libfuzzer_flags
+        SOURCES['BinTokenReaderBase.cpp'].flags += libfuzzer_flags
+        SOURCES['BinTokenReaderMultipart.cpp'].flags += libfuzzer_flags
--- a/js/src/jit/BaselineJIT.cpp
+++ b/js/src/jit/BaselineJIT.cpp
@ -796,7 +796,7 @@ BaselineScript::computeYieldAndAwaitNativeOffsets(JSScript* script)
        return nativeCode;
    };

-    mozilla::Span<uint32_t> pcOffsets = script->yieldAndAwaitOffsets();
+    mozilla::Span<const uint32_t> pcOffsets = script->yieldAndAwaitOffsets();
    uint8_t** nativeOffsets = yieldEntryList();
    std::transform(pcOffsets.begin(), pcOffsets.end(), nativeOffsets, computeNative);
 }
--- a/js/src/moz.build
+++ b/js/src/moz.build
@ -210,30 +210,6 @@ UNIFIED_SOURCES += [
    'ds/Bitmap.cpp',
    'ds/LifoAlloc.cpp',
    'ds/MemoryProtectionExceptionHandler.cpp',
-    'frontend/BytecodeCompiler.cpp',
-    'frontend/BytecodeControlStructures.cpp',
-    'frontend/BytecodeEmitter.cpp',
-    'frontend/CallOrNewEmitter.cpp',
-    'frontend/CForEmitter.cpp',
-    'frontend/DoWhileEmitter.cpp',
-    'frontend/ElemOpEmitter.cpp',
-    'frontend/EmitterScope.cpp',
-    'frontend/ExpressionStatementEmitter.cpp',
-    'frontend/FoldConstants.cpp',
-    'frontend/ForInEmitter.cpp',
-    'frontend/ForOfEmitter.cpp',
-    'frontend/ForOfLoopControl.cpp',
-    'frontend/IfEmitter.cpp',
-    'frontend/JumpList.cpp',
-    'frontend/NameFunctions.cpp',
-    'frontend/NameOpEmitter.cpp',
-    'frontend/ParseNode.cpp',
-    'frontend/PropOpEmitter.cpp',
-    'frontend/SwitchEmitter.cpp',
-    'frontend/TDZCheckCache.cpp',
-    'frontend/TokenStream.cpp',
-    'frontend/TryEmitter.cpp',
-    'frontend/WhileEmitter.cpp',
    'gc/Allocator.cpp',
    'gc/AtomMarking.cpp',
    'gc/Barrier.cpp',
@ -373,8 +349,6 @@ UNIFIED_SOURCES += [

 # builtin/RegExp.cpp cannot be built in unified mode because it causes huge
 #   win32 test slowdowns
-# frontend/Parser.cpp cannot be built in unified mode because of explicit
-#   template instantiations.
 # jsmath.cpp cannot be built in unified mode because it needs to re-#define the
 #   RtlGenRandom declaration's calling convention in <ntsecapi.h> on Windows.
 # jsutil.cpp cannot be built in unified mode because it is needed for
@ -391,7 +365,6 @@ UNIFIED_SOURCES += [
 #   files unlucky enough to be unified with it.
 SOURCES += [
    'builtin/RegExp.cpp',
-    'frontend/Parser.cpp',
    'gc/StoreBuffer.cpp',
    'jsmath.cpp',
    'jsutil.cpp',
@ -465,15 +438,9 @@ else:
        'perf/pm_stub.cpp'
    ]

-GENERATED_FILES += ['frontend/ReservedWordsGenerated.h']
-ReservedWordsGenerated = GENERATED_FILES['frontend/ReservedWordsGenerated.h']
-ReservedWordsGenerated.script = 'frontend/GenerateReservedWords.py'
-ReservedWordsGenerated.inputs += [
-    'frontend/ReservedWords.h'
-]
-
 DIRS += [
    'build',
+    'frontend',
    'jit',
 ]

@ -485,29 +452,6 @@ if CONFIG['ENABLE_WASM_CRANELIFT']:
        'wasm/WasmCraneliftCompile.cpp',
    ]

-if CONFIG['JS_BUILD_BINAST']:
-    # Using SOURCES, as UNIFIED_SOURCES causes mysterious bugs on 32-bit platforms.
-    # These parts of BinAST are designed only to test evolutions of the
-    # specification.
-    SOURCES += ['frontend/BinTokenReaderTester.cpp']
-    # These parts of BinAST should eventually move to release.
-    SOURCES += [
-        'frontend/BinSource-auto.cpp',
-        'frontend/BinSource.cpp',
-        'frontend/BinSourceRuntimeSupport.cpp',
-        'frontend/BinToken.cpp',
-        'frontend/BinTokenReaderBase.cpp',
-        'frontend/BinTokenReaderMultipart.cpp',
-    ]
-
-    # Instrument BinAST files for fuzzing as we have a fuzzing target for BinAST.
-    if CONFIG['FUZZING_INTERFACES'] and CONFIG['LIBFUZZER']:
-        SOURCES['frontend/BinSource-auto.cpp'].flags += libfuzzer_flags
-        SOURCES['frontend/BinSource.cpp'].flags += libfuzzer_flags
-        SOURCES['frontend/BinToken.cpp'].flags += libfuzzer_flags
-        SOURCES['frontend/BinTokenReaderBase.cpp'].flags += libfuzzer_flags
-        SOURCES['frontend/BinTokenReaderMultipart.cpp'].flags += libfuzzer_flags
-
 # Prepare self-hosted JS code for embedding
 GENERATED_FILES += [('selfhosted.out.h', 'selfhosted.js')]
 selfhosted = GENERATED_FILES[('selfhosted.out.h', 'selfhosted.js')]
--- a/js/src/vm/JSScript.cpp
+++ b/js/src/vm/JSScript.cpp
@ -414,13 +414,13 @@ js::XDRScript(XDRState<mode>* xdr, HandleScope scriptEnclosingScope,

        nsrcnotes = script->numNotes();

+        nscopes = script->scopes().size();
        if (script->hasConsts()) {
            nconsts = script->consts().size();
        }
        if (script->hasObjects()) {
            nobjects = script->objects().size();
        }
-        nscopes = script->scopes().size();
        if (script->hasTrynotes()) {
            ntrynotes = script->trynotes().size();
        }
@ -732,9 +732,11 @@ js::XDRScript(XDRState<mode>* xdr, HandleScope scriptEnclosingScope,
        }
    }

+    js::PrivateScriptData* data = script->data_;
+
    if (nconsts) {
        RootedValue val(cx);
-        for (GCPtrValue& elem : script->consts()) {
+        for (GCPtrValue& elem : data->consts()) {
            if (mode == XDR_ENCODE) {
                val = elem.get();
            }
@ -747,7 +749,7 @@ js::XDRScript(XDRState<mode>* xdr, HandleScope scriptEnclosingScope,

    {
        MOZ_ASSERT(nscopes != 0);
-        GCPtrScope* vector = script->scopes().data();
+        GCPtrScope* vector = data->scopes().data();
        RootedScope scope(cx);
        RootedScope enclosing(cx);
        ScopeKind scopeKind;
@ -844,7 +846,7 @@ js::XDRScript(XDRState<mode>* xdr, HandleScope scriptEnclosingScope,
     * after the enclosing block has been XDR'd.
     */
    if (nobjects) {
-        for (GCPtrObject& elem : script->objects()) {
+        for (GCPtrObject& elem : data->objects()) {
            XDRClassKind classk;

            if (mode == XDR_ENCODE) {
@ -939,7 +941,7 @@ js::XDRScript(XDRState<mode>* xdr, HandleScope scriptEnclosingScope,
    MOZ_TRY(xdr->codeMarker(0xF83B989A));

    if (ntrynotes) {
-        for (JSTryNote& elem : script->trynotes()) {
+        for (JSTryNote& elem : data->tryNotes()) {
            MOZ_TRY(xdr->codeUint8(&elem.kind));
            MOZ_TRY(xdr->codeUint32(&elem.stackDepth));
            MOZ_TRY(xdr->codeUint32(&elem.start));
@ -948,7 +950,7 @@ js::XDRScript(XDRState<mode>* xdr, HandleScope scriptEnclosingScope,
    }

    if (nscopenotes) {
-        for (ScopeNote& elem : script->scopeNotes()) {
+        for (ScopeNote& elem : data->scopeNotes()) {
            MOZ_TRY(xdr->codeUint32(&elem.index));
            MOZ_TRY(xdr->codeUint32(&elem.start));
            MOZ_TRY(xdr->codeUint32(&elem.length));
@ -957,7 +959,7 @@ js::XDRScript(XDRState<mode>* xdr, HandleScope scriptEnclosingScope,
    }

    if (nyieldoffsets) {
-        for (uint32_t& elem : script->yieldAndAwaitOffsets()) {
+        for (uint32_t& elem : data->yieldAndAwaitOffsets()) {
            MOZ_TRY(xdr->codeUint32(&elem));
        }
    }
@ -3036,123 +3038,207 @@ js::FreeScriptData(JSRuntime* rt)
    table.clear();
 }

-/*
- * [SMDOC] JSScript data layout (unshared)
- *
- * JSScript::data and SharedScriptData::data have complex,
- * manually-controlled, memory layouts.
- *
- * JSScript::data begins with some optional array headers. They are optional
- * because they often aren't needed, i.e. the corresponding arrays often have
- * zero elements. Each header has a bit in JSScript::hasArrayBits that
- * indicates if it's present within |data|; from this the offset of each
- * present array header can be computed. Each header has an accessor function
- * in JSScript that encapsulates this offset computation.
- *
- * Array type      Array elements  Accessor
- * ----------      --------------  --------
- * ConstArray      Consts          consts()
- * ObjectArray     Objects         objects()
- * ObjectArray     Regexps         regexps()
- * TryNoteArray    Try notes       trynotes()
- * ScopeNoteArray  Scope notes     scopeNotes()
- *
- * Then are the elements of several arrays.
- * - Most of these arrays have headers listed above (if present). For each of
- *   these, the array pointer and the array length is stored in the header.
- * - The remaining arrays have pointers and lengths that are stored directly in
- *   JSScript. This is because, unlike the others, they are nearly always
- *   non-zero length and so the optional-header space optimization isn't
- *   worthwhile.
- *
- * Array elements   Pointed to by         Length
- * --------------   -------------         ------
- * Consts           consts()->vector      consts()->length
- * Objects          objects()->vector     objects()->length
- * Regexps          regexps()->vector     regexps()->length
- * Try notes        trynotes()->vector    trynotes()->length
- * Scope notes      scopeNotes()->vector  scopeNotes()->length
- *
- * IMPORTANT: This layout has two key properties.
- * - It ensures that everything has sufficient alignment; in particular, the
- *   consts() elements need Value alignment.
- * - It ensures there are no gaps between elements, which saves space and makes
- *   manual layout easy. In particular, in the second part, arrays with larger
- *   elements precede arrays with smaller elements.
- *
- * The following static assertions check JSScript::data's alignment properties.
- */
-
-template<class T>
-constexpr bool
-KeepsValueAlignment() {
-    return alignof(JS::Value) % alignof(T) == 0 &&
-           sizeof(T) % sizeof(JS::Value) == 0;
-}
-
-template<class T>
-constexpr bool
-HasValueAlignment() {
-    return alignof(JS::Value) == alignof(T) &&
-           sizeof(T) == sizeof(JS::Value);
-}
-
-template<class T1, class T2>
-constexpr bool
-NoPaddingBetweenEntries() {
-    return alignof(T1) % alignof(T2) == 0;
-}
-
-/*
- * These assertions ensure that there is no padding between the array headers,
- * and also that the consts() elements (which follow immediately afterward) are
- * Value-aligned.  (There is an assumption that |data| itself is Value-aligned;
- * we check this below).
- */
-JS_STATIC_ASSERT(KeepsValueAlignment<ConstArray>());
-JS_STATIC_ASSERT(KeepsValueAlignment<ObjectArray>());       /* there are two of these */
-JS_STATIC_ASSERT(KeepsValueAlignment<TryNoteArray>());
-JS_STATIC_ASSERT(KeepsValueAlignment<ScopeNoteArray>());
-
-/* These assertions ensure there is no padding required between array elements. */
-JS_STATIC_ASSERT(HasValueAlignment<GCPtrValue>());
-JS_STATIC_ASSERT((NoPaddingBetweenEntries<GCPtrValue, GCPtrObject>()));
-JS_STATIC_ASSERT((NoPaddingBetweenEntries<GCPtrObject, GCPtrObject>()));
-JS_STATIC_ASSERT((NoPaddingBetweenEntries<GCPtrObject, JSTryNote>()));
-JS_STATIC_ASSERT((NoPaddingBetweenEntries<JSTryNote, uint32_t>()));
-JS_STATIC_ASSERT((NoPaddingBetweenEntries<uint32_t, uint32_t>()));
-
-JS_STATIC_ASSERT((NoPaddingBetweenEntries<GCPtrValue, ScopeNote>()));
-JS_STATIC_ASSERT((NoPaddingBetweenEntries<ScopeNote, ScopeNote>()));
-JS_STATIC_ASSERT((NoPaddingBetweenEntries<JSTryNote, ScopeNote>()));
-JS_STATIC_ASSERT((NoPaddingBetweenEntries<GCPtrObject, ScopeNote>()));
-JS_STATIC_ASSERT((NoPaddingBetweenEntries<ScopeNote, uint32_t>()));
-
-static inline size_t
-ScriptDataSize(uint32_t nscopes, uint32_t nconsts, uint32_t nobjects,
-               uint32_t ntrynotes, uint32_t nscopenotes, uint32_t nyieldoffsets)
+// Placement-new elements of an array. This should optimize away for types with
+// trivial default initiation.
+template <typename T>
+static void
+DefaultInitializeElements(void* arrayPtr, size_t length)
 {
-    size_t size = 0;
+    uintptr_t elem = reinterpret_cast<uintptr_t>(arrayPtr);
+    MOZ_ASSERT(elem % alignof(T) == 0);

-    MOZ_ASSERT(nscopes != 0);
-    size += sizeof(ScopeArray) + nscopes * sizeof(Scope*);
-    if (nconsts != 0) {
-        size += sizeof(ConstArray) + nconsts * sizeof(Value);
+    for (size_t i = 0; i < length; ++i) {
+        new (reinterpret_cast<T*>(elem)) T;
+        elem += sizeof(T);
    }
-    if (nobjects != 0) {
-        size += sizeof(ObjectArray) + nobjects * sizeof(NativeObject*);
+}
+
+/* static */ size_t
+PrivateScriptData::AllocationSize(uint32_t nscopes, uint32_t nconsts, uint32_t nobjects,
+                                  uint32_t ntrynotes, uint32_t nscopenotes, uint32_t nyieldoffsets)
+{
+    size_t size = sizeof(PrivateScriptData);
+
+    if (nconsts) { size += sizeof(PackedSpan); }
+    if (nobjects) { size += sizeof(PackedSpan); }
+    if (ntrynotes) { size += sizeof(PackedSpan); }
+    if (nscopenotes) { size += sizeof(PackedSpan); }
+    if (nyieldoffsets) { size += sizeof(PackedSpan); }
+
+    size += nscopes * sizeof(GCPtrScope);
+
+    if (nconsts) {
+        // The scope array doesn't maintain Value alignment, so compute the
+        // padding needed to remedy this.
+        size = JS_ROUNDUP(size, alignof(GCPtrValue));
+        size += nconsts * sizeof(GCPtrValue);
    }
-    if (ntrynotes != 0) {
-        size += sizeof(TryNoteArray) + ntrynotes * sizeof(JSTryNote);
+    if (nobjects) {
+        size += nobjects * sizeof(GCPtrObject);
    }
-    if (nscopenotes != 0) {
-        size += sizeof(ScopeNoteArray) + nscopenotes * sizeof(ScopeNote);
+    if (ntrynotes) {
+        size += ntrynotes * sizeof(JSTryNote);
    }
-    if (nyieldoffsets != 0) {
-        size += sizeof(YieldAndAwaitOffsetArray) + nyieldoffsets * sizeof(uint32_t);
+    if (nscopenotes) {
+        size += nscopenotes * sizeof(ScopeNote);
+    }
+    if (nyieldoffsets) {
+        size += nyieldoffsets * sizeof(uint32_t);
    }

-     return size;
+    return size;
+}
+
+// Placement-new elements of an array. This should optimize away for types with
+// trivial default initiation.
+template <typename T>
+void
+PrivateScriptData::initElements(size_t offset, size_t length)
+{
+    uintptr_t base = reinterpret_cast<uintptr_t>(this);
+    DefaultInitializeElements<T>(reinterpret_cast<void*>(base + offset), length);
+}
+
+template <typename T>
+void
+PrivateScriptData::initSpan(size_t* cursor, uint32_t scaledSpanOffset, size_t length)
+{
+    // PackedSpans are elided when arrays are empty
+    if (scaledSpanOffset == 0) {
+        MOZ_ASSERT(length == 0);
+        return;
+    }
+
+    // Placement-new the PackedSpan
+    PackedSpan* span = packedOffsetToPointer<PackedSpan>(scaledSpanOffset);
+    span = new (span) PackedSpan { uint32_t(*cursor), uint32_t(length) };
+
+    // Placement-new the elements
+    initElements<T>(*cursor, length);
+
+    // Advance cursor
+    (*cursor) += length * sizeof(T);
+}
+
+// Initialize PackedSpans and placement-new the trailing arrays.
+PrivateScriptData::PrivateScriptData(uint32_t nscopes_, uint32_t nconsts, uint32_t nobjects,
+                                     uint32_t ntrynotes, uint32_t nscopenotes, uint32_t nyieldoffsets)
+  : nscopes(nscopes_)
+{
+    // Convert cursor possition to a packed offset.
+    auto ToPackedOffset = [](size_t cursor) {
+        MOZ_ASSERT(cursor % PackedOffsets::SCALE == 0);
+        return cursor / PackedOffsets::SCALE;
+    };
+
+    // Helper to allocate a PackedSpan from the variable length data.
+    auto TakeSpan = [=](size_t* cursor) {
+        size_t packedOffset = ToPackedOffset(*cursor);
+        MOZ_ASSERT(packedOffset <= PackedOffsets::MAX_OFFSET);
+
+        (*cursor) += sizeof(PackedSpan);
+        return packedOffset;
+    };
+
+    // Variable-length data begins immediately after PrivateScriptData itself.
+    // NOTE: Alignment is computed using cursor/offset so the alignment of
+    // PrivateScriptData must be stricter than any trailing array type.
+    size_t cursor = sizeof(*this);
+
+    // Layout PackedSpan structures and initialize packedOffsets fields.
+    static_assert(alignof(PrivateScriptData) >= alignof(PackedSpan),
+                  "Incompatible alignment");
+    if (nconsts) { packedOffsets.constsSpanOffset = TakeSpan(&cursor); }
+    if (nobjects) { packedOffsets.objectsSpanOffset = TakeSpan(&cursor); }
+    if (ntrynotes) { packedOffsets.tryNotesSpanOffset = TakeSpan(&cursor); }
+    if (nscopenotes) { packedOffsets.scopeNotesSpanOffset = TakeSpan(&cursor); }
+    if (nyieldoffsets) { packedOffsets.yieldOffsetsSpanOffset = TakeSpan(&cursor); }
+
+    // Layout and initialize the scopes array. Manually insert padding so that
+    // the subsequent |consts| array is aligned.
+    {
+        MOZ_ASSERT(nscopes > 0);
+
+        static_assert(alignof(PackedSpan) >= alignof(GCPtrScope),
+                      "Incompatible alignment");
+        initElements<GCPtrScope>(cursor, nscopes);
+        packedOffsets.scopesOffset = ToPackedOffset(cursor);
+
+        cursor += nscopes * sizeof(GCPtrScope);
+    }
+
+    if (nconsts) {
+        // Pad to required alignment if we are emitting constant array.
+        cursor = JS_ROUNDUP(cursor, alignof(GCPtrValue));
+
+        static_assert(alignof(PrivateScriptData) >= alignof(GCPtrValue),
+                      "Incompatible alignment");
+        initSpan<GCPtrValue>(&cursor, packedOffsets.constsSpanOffset, nconsts);
+    }
+
+    // Layout arrays, initialize PackedSpans and placement-new the elements.
+    static_assert(alignof(GCPtrValue) >= alignof(GCPtrObject),
+                  "Incompatible alignment");
+    static_assert(alignof(GCPtrScope) >= alignof(GCPtrObject),
+                  "Incompatible alignment");
+    initSpan<GCPtrObject>(&cursor, packedOffsets.objectsSpanOffset, nobjects);
+    static_assert(alignof(GCPtrObject) >= alignof(JSTryNote),
+                  "Incompatible alignment");
+    initSpan<JSTryNote>(&cursor, packedOffsets.tryNotesSpanOffset, ntrynotes);
+    static_assert(alignof(JSTryNote) >= alignof(ScopeNote),
+                  "Incompatible alignment");
+    initSpan<ScopeNote>(&cursor, packedOffsets.scopeNotesSpanOffset, nscopenotes);
+    static_assert(alignof(ScopeNote) >= alignof(uint32_t),
+                  "Incompatible alignment");
+    initSpan<uint32_t>(&cursor, packedOffsets.yieldOffsetsSpanOffset, nyieldoffsets);
+
+    // Sanity check
+    MOZ_ASSERT(AllocationSize(nscopes_, nconsts, nobjects,
+                              ntrynotes, nscopenotes, nyieldoffsets) == cursor);
+}
+
+/* static */ PrivateScriptData*
+PrivateScriptData::new_(JSContext* cx,
+                        uint32_t nscopes, uint32_t nconsts, uint32_t nobjects,
+                        uint32_t ntrynotes, uint32_t nscopenotes, uint32_t nyieldoffsets,
+                        uint32_t* dataSize)
+{
+    // Compute size including trailing arrays
+    size_t size = AllocationSize(nscopes, nconsts, nobjects,
+                                 ntrynotes, nscopenotes, nyieldoffsets);
+
+    // Allocate contiguous raw buffer
+    void* raw = cx->pod_malloc<uint8_t>(size);
+    MOZ_ASSERT(uintptr_t(raw) % alignof(PrivateScriptData) == 0);
+    if (!raw) {
+        return nullptr;
+    }
+
+    if (dataSize) {
+        *dataSize = size;
+    }
+
+    // Constuct the PrivateScriptData. Trailing arrays are uninitialized but
+    // GCPtrs are put into a safe state.
+    return new (raw) PrivateScriptData(nscopes, nconsts, nobjects,
+                                       ntrynotes, nscopenotes, nyieldoffsets);
+}
+
+void
+PrivateScriptData::traceChildren(JSTracer* trc)
+{
+    auto scopearray = scopes();
+    TraceRange(trc, scopearray.size(), scopearray.data(), "scopes");
+
+    if (hasConsts()) {
+        auto constarray = consts();
+        TraceRange(trc, constarray.size(), constarray.data(), "consts");
+    }
+
+    if (hasObjects()) {
+        auto objarray = objects();
+        TraceRange(trc, objarray.size(), objarray.data(), "objects");
+    }
 }

 JSScript::JSScript(JS::Realm* realm, uint8_t* stubEntry, const ReadOnlyCompileOptions& options,
@ -3288,92 +3374,16 @@ JSScript::partiallyInit(JSContext* cx, HandleScript script, uint32_t nscopes,
 {
    cx->check(script);

-    size_t size = ScriptDataSize(nscopes, nconsts, nobjects, ntrynotes,
-                                 nscopenotes, nyieldoffsets);
-    script->data = AllocScriptData(cx, size);
-    if (size && !script->data) {
+    uint32_t dataSize;
+
+    PrivateScriptData* data = PrivateScriptData::new_(cx, nscopes, nconsts, nobjects, ntrynotes,
+                                                      nscopenotes, nyieldoffsets, &dataSize);
+    if (!data) {
        return false;
    }

-    script->dataSize_ = size;
-
-    uint8_t* cursor = script->data;
-
-    // There must always be at least 1 scope, the body scope.
-    MOZ_ASSERT(nscopes != 0);
-    cursor += sizeof(ScopeArray);
-
-    if (nconsts != 0) {
-        script->setHasArray(CONSTS);
-        cursor += sizeof(ConstArray);
-    }
-    if (nobjects != 0) {
-        script->setHasArray(OBJECTS);
-        cursor += sizeof(ObjectArray);
-    }
-
-    if (ntrynotes != 0) {
-        script->setHasArray(TRYNOTES);
-        cursor += sizeof(TryNoteArray);
-    }
-    if (nscopenotes != 0) {
-        script->setHasArray(SCOPENOTES);
-        cursor += sizeof(ScopeNoteArray);
-    }
-
-    YieldAndAwaitOffsetArray* yieldAndAwaitOffsets = nullptr;
-    if (nyieldoffsets != 0) {
-        yieldAndAwaitOffsets = reinterpret_cast<YieldAndAwaitOffsetArray*>(cursor);
-        cursor += sizeof(YieldAndAwaitOffsetArray);
-    }
-
-    if (nconsts != 0) {
-        MOZ_ASSERT(reinterpret_cast<uintptr_t>(cursor) % sizeof(JS::Value) == 0);
-        script->constsRaw()->length = nconsts;
-        script->constsRaw()->vector = (GCPtrValue*)cursor;
-        cursor += nconsts * sizeof(script->constsRaw()->vector[0]);
-    }
-
-    script->scopesRaw()->length = nscopes;
-    script->scopesRaw()->vector = (GCPtrScope*)cursor;
-    cursor += nscopes * sizeof(script->scopesRaw()->vector[0]);
-
-    if (nobjects != 0) {
-        script->objectsRaw()->length = nobjects;
-        script->objectsRaw()->vector = (GCPtrObject*)cursor;
-        cursor += nobjects * sizeof(script->objectsRaw()->vector[0]);
-    }
-
-    if (ntrynotes != 0) {
-        script->trynotesRaw()->length = ntrynotes;
-        script->trynotesRaw()->vector = reinterpret_cast<JSTryNote*>(cursor);
-        size_t vectorSize = ntrynotes * sizeof(script->trynotesRaw()->vector[0]);
-#ifdef DEBUG
-        memset(cursor, 0, vectorSize);
-#endif
-        cursor += vectorSize;
-    }
-
-    if (nscopenotes != 0) {
-        script->scopeNotesRaw()->length = nscopenotes;
-        script->scopeNotesRaw()->vector = reinterpret_cast<ScopeNote*>(cursor);
-        size_t vectorSize = nscopenotes * sizeof(script->scopeNotesRaw()->vector[0]);
-#ifdef DEBUG
-        memset(cursor, 0, vectorSize);
-#endif
-        cursor += vectorSize;
-    }
-
-    if (nyieldoffsets != 0) {
-        yieldAndAwaitOffsets->init(reinterpret_cast<uint32_t*>(cursor), nyieldoffsets);
-        size_t vectorSize = nyieldoffsets * sizeof(script->yieldAndAwaitOffsetsRaw()[0]);
-#ifdef DEBUG
-        memset(cursor, 0, vectorSize);
-#endif
-        cursor += vectorSize;
-    }
-
-    MOZ_ASSERT(cursor == script->data + size);
+    script->data_ = data;
+    script->dataSize_ = dataSize;
    return true;
 }

@ -3401,7 +3411,9 @@ JSScript::initFunctionPrototype(JSContext* cx, Handle<JSScript*> script,
    if (!functionProtoScope) {
        return false;
    }
-    script->scopesRaw()->vector[0].init(functionProtoScope);
+
+    js::PrivateScriptData* data = script->data_;
+    data->scopes()[0].init(functionProtoScope);

    uint32_t codeLength = 1;
    uint32_t srcNotesLength = 1;
@ -3539,21 +3551,23 @@ JSScript::fullyInitFromEmitter(JSContext* cx, HandleScript script, frontend::Byt
        return false;
    }

+    js::PrivateScriptData* data = script->data_;
    if (bce->numberList.length() != 0) {
-        bce->numberList.finish(script->consts());
+        bce->numberList.finish(data->consts());
    }
    if (bce->objectList.length != 0) {
-        bce->objectList.finish(script->objects());
+        bce->objectList.finish(data->objects());
    }
    if (bce->scopeList.length() != 0) {
-        bce->scopeList.finish(script->scopes());
+        bce->scopeList.finish(data->scopes());
    }
    if (bce->tryNoteList.length() != 0) {
-        bce->tryNoteList.finish(script->trynotes(), prologueLength);
+        bce->tryNoteList.finish(data->tryNotes(), prologueLength);
    }
    if (bce->scopeNoteList.length() != 0) {
-        bce->scopeNoteList.finish(script->scopeNotes(), prologueLength);
+        bce->scopeNoteList.finish(data->scopeNotes(), prologueLength);
    }
+
    script->bitFields_.strict_ = bce->sc->strict();
    script->bitFields_.explicitUseStrict_ = bce->sc->hasExplicitUseStrict();
    script->bitFields_.bindingsAccessedDynamically_ = bce->sc->bindingsAccessedDynamically();
@ -3577,7 +3591,7 @@ JSScript::fullyInitFromEmitter(JSContext* cx, HandleScript script, frontend::Byt
    // Copy yield offsets last, as the generator kind is set in
    // initFromFunctionBox.
    if (bce->yieldAndAwaitOffsetList.length() != 0) {
-        bce->yieldAndAwaitOffsetList.finish(script->yieldAndAwaitOffsets(), prologueLength);
+        bce->yieldAndAwaitOffsetList.finish(data->yieldAndAwaitOffsets(), prologueLength);
    }

 #ifdef DEBUG
@ -3658,7 +3672,7 @@ JSScript::computedSizeOfData() const
 size_t
 JSScript::sizeOfData(mozilla::MallocSizeOf mallocSizeOf) const
 {
-    return mallocSizeOf(data);
+    return mallocSizeOf(data_);
 }

 size_t
@ -3700,9 +3714,9 @@ JSScript::finalize(FreeOp* fop)
    destroyScriptCounts();
    destroyDebugScript(fop);

-    if (data) {
-        JS_POISON(data, 0xdb, computedSizeOfData(), MemCheckKind::MakeNoAccess);
-        fop->free_(data);
+    if (data_) {
+        JS_POISON(data_, 0xdb, computedSizeOfData(), MemCheckKind::MakeNoAccess);
+        fop->free_(data_);
    }

    if (scriptData_) {
@ -3954,14 +3968,6 @@ js::DescribeScriptedCallerForCompilation(JSContext* cx, MutableHandleScript mayb
    }
 }

-template <class T>
-static inline T*
-Rebase(JSScript* dst, JSScript* src, T* srcp)
-{
-    size_t off = reinterpret_cast<uint8_t*>(srcp) - src->data;
-    return reinterpret_cast<T*>(dst->data + off);
-}
-
 static JSObject*
 CloneInnerInterpretedFunction(JSContext* cx, HandleScope enclosingScope, HandleFunction srcFun)
 {
@ -4024,18 +4030,17 @@ js::detail::CopyScript(JSContext* cx, HandleScript src, HandleScript dst,
    /* Some embeddings are not careful to use ExposeObjectToActiveJS as needed. */
    MOZ_ASSERT(!src->sourceObject()->isMarkedGray());

-    uint32_t nconsts = src->hasConsts() ? src->consts().size() : 0;
-    uint32_t nobjects = src->hasObjects() ? src->objects().size() : 0;
    uint32_t nscopes = src->scopes().size();
-    uint32_t ntrynotes = src->hasTrynotes() ? src->trynotes().size() : 0;
-    uint32_t nscopenotes = src->hasScopeNotes() ? src->scopeNotes().size() : 0;
-    uint32_t nyieldoffsets = src->hasYieldAndAwaitOffsets() ? src->yieldAndAwaitOffsets().size() : 0;
+#ifdef DEBUG
+    uint32_t nconsts = src->hasConsts() ? src->consts().size() : 0;
+#endif
+    uint32_t nobjects = src->hasObjects() ? src->objects().size() : 0;

    /* Script data */

    size_t size = src->dataSize();
    UniquePtr<uint8_t, JS::FreePolicy> data(AllocScriptData(cx, size));
-    if (size && !data) {
+    if (!data) {
        return false;
    }

@ -4101,13 +4106,9 @@ js::detail::CopyScript(JSContext* cx, HandleScript src, HandleScript dst,
        }
    }

-    /* This assignment must occur before all the Rebase calls. */
-    dst->data = data.release();
+    dst->data_ = reinterpret_cast<js::PrivateScriptData*>(data.release());
    dst->dataSize_ = size;
-    MOZ_ASSERT(bool(dst->data) == bool(src->data));
-    if (dst->data) {
-        memcpy(dst->data, src->data, size);
-    }
+    memcpy(dst->data_, src->data_, size);

    if (cx->zone() != src->zoneFromAnyThread()) {
        for (size_t i = 0; i < src->scriptData()->natoms(); i++) {
@ -4134,7 +4135,6 @@ js::detail::CopyScript(JSContext* cx, HandleScript src, HandleScript dst,
    dst->bitFields_.hasMappedArgsObj_ = src->hasMappedArgsObj();
    dst->bitFields_.functionHasThisBinding_ = src->functionHasThisBinding();
    dst->bitFields_.functionHasExtraBodyVarScope_ = src->functionHasExtraBodyVarScope();
-    dst->cloneHasArray(src);
    dst->bitFields_.strict_ = src->strict();
    dst->bitFields_.explicitUseStrict_ = src->explicitUseStrict();
    dst->bitFields_.hasNonSyntacticScope_ = scopes[0]->hasOnChain(ScopeKind::NonSyntactic);
@ -4152,36 +4152,26 @@ js::detail::CopyScript(JSContext* cx, HandleScript src, HandleScript dst,
    dst->bitFields_.hasRest_ = src->bitFields_.hasRest_;
    dst->bitFields_.hideScriptFromDebugger_ = src->bitFields_.hideScriptFromDebugger_;

-    if (nconsts != 0) {
-        GCPtrValue* vector = Rebase<GCPtrValue>(dst, src, src->constsRaw()->vector);
-        dst->constsRaw()->vector = vector;
-        for (unsigned i = 0; i < nconsts; ++i) {
-            MOZ_ASSERT_IF(vector[i].isGCThing(), vector[i].toString()->isAtom());
-        }
-    }
-    if (nobjects != 0) {
-        GCPtrObject* vector = Rebase<GCPtrObject>(dst, src, src->objectsRaw()->vector);
-        dst->objectsRaw()->vector = vector;
-        for (unsigned i = 0; i < nobjects; ++i) {
-            vector[i].init(&objects[i]->as<NativeObject>());
-        }
-    }
    {
-        GCPtrScope* vector = Rebase<GCPtrScope>(dst, src, src->scopesRaw()->vector);
-        dst->scopesRaw()->vector = vector;
+        auto array = dst->data_->scopes();
        for (uint32_t i = 0; i < nscopes; ++i) {
-            vector[i].init(scopes[i]);
+            array[i].init(scopes[i]);
        }
    }
-    if (ntrynotes != 0) {
-        dst->trynotesRaw()->vector = Rebase<JSTryNote>(dst, src, src->trynotesRaw()->vector);
+#ifdef DEBUG
+    if (nconsts) {
+        auto array = dst->data_->consts();
+        for (unsigned i = 0; i < nconsts; ++i) {
+            // We don't support GCThings here and thus don't need to call |init|.
+            MOZ_ASSERT(!array[i].isGCThing());
+        }
    }
-    if (nscopenotes != 0) {
-        dst->scopeNotesRaw()->vector = Rebase<ScopeNote>(dst, src, src->scopeNotesRaw()->vector);
-    }
-    if (nyieldoffsets != 0) {
-        dst->yieldAndAwaitOffsetsRaw().vector_ =
-            Rebase<uint32_t>(dst, src, src->yieldAndAwaitOffsetsRaw().vector_);
+#endif
+    if (nobjects) {
+        auto array = dst->data_->objects();
+        for (unsigned i = 0; i < nobjects; ++i) {
+            array[i].init(objects[i]);
+        }
    }

    return true;
@ -4526,25 +4516,14 @@ JSScript::traceChildren(JSTracer* trc)
                  GCMarker::fromTracer(trc)->shouldCheckCompartments(),
                  zone()->isCollecting());

+    if (data_) {
+        data_->traceChildren(trc);
+    }
+
    if (scriptData()) {
        scriptData()->traceChildren(trc);
    }

-    if (data) {
-        auto array = scopes();
-        TraceRange(trc, array.size(), array.data(), "scopes");
-    }
-
-    if (hasConsts()) {
-        auto array = consts();
-        TraceRange(trc, array.size(), array.data(), "consts");
-    }
-
-    if (hasObjects()) {
-        auto array = objects();
-        TraceRange(trc, array.size(), array.data(), "objects");
-    }
-
    MOZ_ASSERT_IF(sourceObject(), MaybeForwarded(sourceObject())->compartment() == compartment());
    TraceNullableEdge(trc, &sourceObject_, "sourceObject");

--- a/js/src/vm/JSScript.h
+++ b/js/src/vm/JSScript.h
@ -147,53 +147,6 @@ struct ScopeNote {
    uint32_t        parent;     // Index of parent block scope in notes, or NoScopeNote.
 };

-struct ConstArray {
-    js::GCPtrValue* vector;     // array of indexed constant values
-    uint32_t length;
-};
-
-struct ObjectArray {
-    js::GCPtrObject* vector;    // Array of indexed objects.
-    uint32_t length;            // Count of indexed objects.
-};
-
-struct ScopeArray {
-    js::GCPtrScope* vector;     // Array of indexed scopes.
-    uint32_t        length;     // Count of indexed scopes.
-};
-
-struct TryNoteArray {
-    JSTryNote*      vector;     // Array of indexed try notes.
-    uint32_t        length;     // Count of indexed try notes.
-};
-
-struct ScopeNoteArray {
-    ScopeNote* vector;          // Array of indexed ScopeNote records.
-    uint32_t   length;          // Count of indexed try notes.
-};
-
-class YieldAndAwaitOffsetArray {
-    friend bool
-    detail::CopyScript(JSContext* cx, HandleScript src, HandleScript dst,
-                       MutableHandle<GCVector<Scope*>> scopes);
-
-    uint32_t*       vector_;    // Array of bytecode offsets.
-    uint32_t        length_;    // Count of bytecode offsets.
-
-  public:
-    void init(uint32_t* vector, uint32_t length) {
-        vector_ = vector;
-        length_ = length;
-    }
-    uint32_t& operator[](uint32_t index) {
-        MOZ_ASSERT(index < length_);
-        return vector_[index];
-    }
-    uint32_t length() const {
-        return length_;
-    }
-};
-
 class ScriptCounts
 {
  public:
@ -1274,6 +1227,182 @@ template<XDRMode mode>
 XDRResult
 XDRScriptConst(XDRState<mode>* xdr, MutableHandleValue vp);

+// [SMDOC] - JSScript data layout (unshared)
+//
+// PrivateScriptData stores variable-length data associated with a script.
+// Abstractly a PrivateScriptData consists of all these arrays:
+//
+//   * A non-empty array of GCPtrScope in scopes()
+//   * A possibly-empty array of GCPtrValue in consts()
+//   * A possibly-empty array of JSObject* in objects()
+//   * A possibly-empty array of JSTryNote in tryNotes()
+//   * A possibly-empty array of ScopeNote in scopeNotes()
+//   * A possibly-empty array of uint32_t in yieldAndAwaitOffsets()
+//
+// Accessing any of these arrays just requires calling the appropriate public
+// Span-computing function.
+//
+// Under the hood, PrivateScriptData is a small class followed by a memory
+// layout that compactly encodes all these arrays, in this manner (only
+// explicit padding, "--" separators for readability only):
+//
+//   <PrivateScriptData itself>
+//   --
+//   (OPTIONAL) PackedSpan for consts()
+//   (OPTIONAL) PackedSpan for objects()
+//   (OPTIONAL) PackedSpan for tryNotes()
+//   (OPTIONAL) PackedSpan for scopeNotes()
+//   (OPTIONAL) PackedSpan for yieldAndAwaitOffsets()
+//   --
+//   (REQUIRED) All the GCPtrScopes that constitute scopes()
+//   --
+//   (OPTIONAL) If there are consts, padding needed for space so far to be
+//              GCPtrValue-aligned
+//   (OPTIONAL) All the GCPtrValues that constitute consts()
+//   --
+//   (OPTIONAL) All the GCPtrObjects that constitute objects()
+//   --
+//   (OPTIONAL) All the JSTryNotes that constitute tryNotes()
+//   --
+//   (OPTIONAL) All the ScopeNotes that constitute scopeNotes()
+//   --
+//   (OPTIONAL) All the uint32_t's that constitute yieldAndAwaitOffsets()
+//
+// The contents of PrivateScriptData indicate which optional items are present.
+// PrivateScriptData::packedOffsets contains bit-fields, one per array.
+// Multiply each packed offset by sizeof(uint32_t) to compute a *real* offset.
+//
+// PrivateScriptData::scopesOffset indicates where scopes() begins. The bound
+// of five PackedSpans ensures we can encode this offset compactly.
+// PrivateScriptData::nscopes indicates the number of GCPtrScopes in scopes().
+//
+// The other PackedScriptData::*Offset fields indicate where a potential
+// corresponding PackedSpan resides. If the packed offset is 0, there is no
+// PackedSpan, and the array is empty. Otherwise the PackedSpan's uint32_t
+// offset and length fields store: 1) a *non-packed* offset (a literal count of
+// bytes offset from the *start* of PrivateScriptData struct) to the
+// corresponding array, and 2) the number of elements in the array,
+// respectively.
+//
+// PrivateScriptData and PackedSpan are 64-bit-aligned, so manual alignment in
+// trailing fields is only necessary before the first trailing fields with
+// increased alignment -- before GCPtrValues for consts(), on 32-bit, where the
+// preceding GCPtrScopes as pointers are only 32-bit-aligned.
+class alignas(JS::Value) PrivateScriptData final
+{
+    struct PackedOffsets
+    {
+        static constexpr size_t SCALE = sizeof(uint32_t);
+        static constexpr size_t MAX_OFFSET = 0b1111;
+
+        // (Scaled) offset to Scopes
+        uint32_t scopesOffset : 8;
+
+        // (Scaled) offset to Spans. These are set to 0 if they don't exist.
+        uint32_t constsSpanOffset : 4;
+        uint32_t objectsSpanOffset : 4;
+        uint32_t tryNotesSpanOffset : 4;
+        uint32_t scopeNotesSpanOffset : 4;
+        uint32_t yieldOffsetsSpanOffset : 4;
+    };
+
+    // Detect accidental size regressions.
+    static_assert(sizeof(PackedOffsets) == sizeof(uint32_t),
+                  "unexpected bit-field packing");
+
+    // A span describes base offset and length of one variable length array in
+    // the private data.
+    struct alignas(uintptr_t) PackedSpan
+    {
+        uint32_t offset;
+        uint32_t length;
+    };
+
+    // Concrete Fields
+    PackedOffsets packedOffsets = {}; // zeroes
+    uint32_t nscopes;
+
+    // Translate an offset into a concrete pointer.
+    template <typename T>
+    T* offsetToPointer(size_t offset)
+    {
+        uintptr_t base = reinterpret_cast<uintptr_t>(this);
+        uintptr_t elem = base + offset;
+        return reinterpret_cast<T*>(elem);
+    }
+
+    // Translate a PackedOffsets member into a pointer.
+    template <typename T>
+    T* packedOffsetToPointer(size_t packedOffset)
+    {
+        return offsetToPointer<T>(packedOffset * PackedOffsets::SCALE);
+    }
+
+    // Translates a PackedOffsets member into a PackedSpan* and then unpacks
+    // that to a mozilla::Span.
+    template <typename T>
+    mozilla::Span<T> packedOffsetToSpan(size_t scaledSpanOffset)
+    {
+        PackedSpan* span = packedOffsetToPointer<PackedSpan>(scaledSpanOffset);
+        T* base = offsetToPointer<T>(span->offset);
+        return mozilla::MakeSpan(base, span->length);
+    }
+
+    // Helpers for creating initializing trailing data
+    template <typename T>
+    void initSpan(size_t* cursor, uint32_t scaledSpanOffset, size_t length);
+
+    template <typename T>
+    void initElements(size_t offset, size_t length);
+
+    // Size to allocate
+    static size_t AllocationSize(uint32_t nscopes, uint32_t nconsts, uint32_t nobjects,
+                                 uint32_t ntrynotes, uint32_t nscopenotes, uint32_t nyieldoffsets);
+
+    // Initialize header and PackedSpans
+    PrivateScriptData(uint32_t nscopes_, uint32_t nconsts, uint32_t nobjects,
+                      uint32_t ntrynotes, uint32_t nscopenotes, uint32_t nyieldoffsets);
+
+  public:
+
+    // Accessors for typed array spans.
+    mozilla::Span<GCPtrScope> scopes() {
+        GCPtrScope* base = packedOffsetToPointer<GCPtrScope>(packedOffsets.scopesOffset);
+        return mozilla::MakeSpan(base, nscopes);
+    }
+    mozilla::Span<GCPtrValue> consts() {
+        return packedOffsetToSpan<GCPtrValue>(packedOffsets.constsSpanOffset);
+    }
+    mozilla::Span<GCPtrObject> objects() {
+        return packedOffsetToSpan<GCPtrObject>(packedOffsets.objectsSpanOffset);
+    }
+    mozilla::Span<JSTryNote> tryNotes() {
+        return packedOffsetToSpan<JSTryNote>(packedOffsets.tryNotesSpanOffset);
+    }
+    mozilla::Span<ScopeNote> scopeNotes() {
+        return packedOffsetToSpan<ScopeNote>(packedOffsets.scopeNotesSpanOffset);
+    }
+    mozilla::Span<uint32_t> yieldAndAwaitOffsets() {
+        return packedOffsetToSpan<uint32_t>(packedOffsets.yieldOffsetsSpanOffset);
+    }
+
+    // Fast tests for if array exists
+    bool hasConsts() const { return packedOffsets.constsSpanOffset != 0; }
+    bool hasObjects() const { return packedOffsets.objectsSpanOffset != 0; }
+    bool hasTryNotes() const { return packedOffsets.tryNotesSpanOffset != 0; }
+    bool hasScopeNotes() const { return packedOffsets.scopeNotesSpanOffset != 0; }
+    bool hasYieldOffsets() const { return packedOffsets.yieldOffsetsSpanOffset != 0; }
+
+    // Allocate a new PrivateScriptData. Headers and GCPtrs are initialized.
+    // The size of allocation is returned as an out parameter.
+    static PrivateScriptData* new_(JSContext* cx,
+                                   uint32_t nscopes, uint32_t nconsts, uint32_t nobjects,
+                                   uint32_t ntrynotes, uint32_t nscopenotes, uint32_t nyieldoffsets,
+                                   uint32_t* dataSize);
+
+    void traceChildren(JSTracer* trc);
+};
+
 /*
 * Common data that can be shared between many scripts in a single runtime.
 */
@ -1404,13 +1533,13 @@ class JSScript : public js::gc::TenuredCell
    uint8_t* jitCodeRaw_ = nullptr;
    uint8_t* jitCodeSkipArgCheck_ = nullptr;

+    // Shareable script data
    js::SharedScriptData* scriptData_ = nullptr;

-  public:
-    // Pointer to variable-length data array (see comment above Create() for
-    // details).
-    uint8_t* data = nullptr;
+    // Unshared variable-length data
+    js::PrivateScriptData* data_ = nullptr;

+  public:
    JS::Realm* realm_ = nullptr;

  private:
@ -1539,14 +1668,6 @@ class JSScript : public js::gc::TenuredCell
         * custom-assign particular bit-fields in the constructor body.
         */

-        // The bits in this field indicate the presence/non-presence of several
-        // optional arrays in |data|.  See the comments above Create() for details.
-        uint8_t hasArrayBits_ : ARRAY_KIND_BITS;
-
-        /*
-         * All remaining bit-fields are single-bit bools.
-         */
-
        // No need for result value of last expression statement.
        bool noScriptRval_ : 1;

@ -2392,99 +2513,43 @@ class JSScript : public js::gc::TenuredCell
    size_t sizeOfData(mozilla::MallocSizeOf mallocSizeOf) const;
    size_t sizeOfTypeScript(mozilla::MallocSizeOf mallocSizeOf) const;

-    bool hasArray(ArrayKind kind) const {
-        return bitFields_.hasArrayBits_ & (1 << kind);
-    }
-    void setHasArray(ArrayKind kind) { bitFields_.hasArrayBits_ |= (1 << kind); }
-    void cloneHasArray(JSScript* script) {
-        bitFields_.hasArrayBits_ = script->bitFields_.hasArrayBits_;
-    }
-
-    bool hasConsts() const       { return hasArray(CONSTS); }
-    bool hasObjects() const      { return hasArray(OBJECTS); }
-    bool hasTrynotes() const     { return hasArray(TRYNOTES); }
-    bool hasScopeNotes() const   { return hasArray(SCOPENOTES); }
-    bool hasYieldAndAwaitOffsets() const {
-        return isGenerator() || isAsync();
-    }
-
-#define OFF(fooOff, hasFoo, t)   (fooOff() + (hasFoo() ? sizeof(t) : 0))
-
-    size_t scopesOffset() const       { return 0; }
-    size_t constsOffset() const       { return scopesOffset() + sizeof(js::ScopeArray); }
-    size_t objectsOffset() const      { return OFF(constsOffset,     hasConsts,     js::ConstArray); }
-    size_t trynotesOffset() const     { return OFF(objectsOffset,    hasObjects,    js::ObjectArray); }
-    size_t scopeNotesOffset() const   { return OFF(trynotesOffset,   hasTrynotes,   js::TryNoteArray); }
-    size_t yieldAndAwaitOffsetsOffset() const {
-        return OFF(scopeNotesOffset, hasScopeNotes, js::ScopeNoteArray);
-    }
-
-#undef OFF
-
    size_t dataSize() const { return dataSize_; }

-  private:
+    bool hasConsts() const       { return data_->hasConsts(); }
+    bool hasObjects() const      { return data_->hasObjects(); }
+    bool hasTrynotes() const     { return data_->hasTryNotes(); }
+    bool hasScopeNotes() const   { return data_->hasScopeNotes(); }
+    bool hasYieldAndAwaitOffsets() const {
+        return data_->hasYieldOffsets();
+    }

-    js::ConstArray* constsRaw() const {
+    mozilla::Span<const js::GCPtrScope> scopes() const {
+        return data_->scopes();
+    }
+
+    mozilla::Span<const js::GCPtrValue> consts() const {
        MOZ_ASSERT(hasConsts());
-        return reinterpret_cast<js::ConstArray*>(data + constsOffset());
+        return data_->consts();
    }

-    js::ObjectArray* objectsRaw() const {
+    mozilla::Span<const js::GCPtrObject> objects() const {
        MOZ_ASSERT(hasObjects());
-        return reinterpret_cast<js::ObjectArray*>(data + objectsOffset());
+        return data_->objects();
    }

-    js::ScopeArray* scopesRaw() const {
-        return reinterpret_cast<js::ScopeArray*>(data + scopesOffset());
-    }
-
-    js::TryNoteArray* trynotesRaw() const {
+    mozilla::Span<const JSTryNote> trynotes() const {
        MOZ_ASSERT(hasTrynotes());
-        return reinterpret_cast<js::TryNoteArray*>(data + trynotesOffset());
+        return data_->tryNotes();
    }

-    js::ScopeNoteArray* scopeNotesRaw() const {
+    mozilla::Span<const js::ScopeNote> scopeNotes() const {
        MOZ_ASSERT(hasScopeNotes());
-        return reinterpret_cast<js::ScopeNoteArray*>(data + scopeNotesOffset());
+        return data_->scopeNotes();
    }

-    js::YieldAndAwaitOffsetArray& yieldAndAwaitOffsetsRaw() const {
+    mozilla::Span<const uint32_t> yieldAndAwaitOffsets() const {
        MOZ_ASSERT(hasYieldAndAwaitOffsets());
-        return *reinterpret_cast<js::YieldAndAwaitOffsetArray*>(data +
-                                                                yieldAndAwaitOffsetsOffset());
-    }
-
-  public:
-
-    mozilla::Span<js::GCPtrValue> consts() const {
-        js::ConstArray* array = constsRaw();
-        return mozilla::MakeSpan(array->vector, array->length);
-    }
-
-    mozilla::Span<js::GCPtrObject> objects() const {
-        js::ObjectArray* array = objectsRaw();
-        return mozilla::MakeSpan(array->vector, array->length);
-    }
-
-    mozilla::Span<js::GCPtrScope> scopes() const {
-        js::ScopeArray* array = scopesRaw();
-        return mozilla::MakeSpan(array->vector, array->length);
-    }
-
-    mozilla::Span<JSTryNote> trynotes() const {
-        js::TryNoteArray* array = trynotesRaw();
-        return mozilla::MakeSpan(array->vector, array->length);
-    }
-
-    mozilla::Span<js::ScopeNote> scopeNotes() const {
-        js::ScopeNoteArray* array = scopeNotesRaw();
-        return mozilla::MakeSpan(array->vector, array->length);
-    }
-
-    mozilla::Span<uint32_t> yieldAndAwaitOffsets() const {
-        js::YieldAndAwaitOffsetArray& array = yieldAndAwaitOffsetsRaw();
-        return mozilla::MakeSpan(&array[0], array.length());
+        return data_->yieldAndAwaitOffsets();
    }

    bool hasLoops();
--- a/layout/generic/ScrollAnimationBezierPhysics.cpp
+++ b/layout/generic/ScrollAnimationBezierPhysics.cpp
@ -53,6 +53,14 @@ ScrollAnimationBezierPhysics::Update(const TimeStamp& aTime,
  mIsFirstIteration = false;
 }

+void
+ScrollAnimationBezierPhysics::ApplyContentShift(const CSSPoint& aShiftDelta)
+{
+  nsPoint shiftDelta = CSSPoint::ToAppUnits(aShiftDelta);
+  mStartPos += shiftDelta;
+  mDestination += shiftDelta;
+}
+
 TimeDuration
 ScrollAnimationBezierPhysics::ComputeDuration(const TimeStamp& aTime)
 {
--- a/layout/generic/ScrollAnimationBezierPhysics.h
+++ b/layout/generic/ScrollAnimationBezierPhysics.h
@ -35,6 +35,8 @@ public:
              const nsPoint& aDestination,
              const nsSize& aCurrentVelocity) override;

+  void ApplyContentShift(const CSSPoint& aShiftDelta) override;
+
  // Get the velocity at a point in time in nscoords/sec.
  nsSize VelocityAt(const TimeStamp& aTime) override;

--- a/layout/generic/ScrollAnimationMSDPhysics.cpp
+++ b/layout/generic/ScrollAnimationMSDPhysics.cpp
@ -48,6 +48,14 @@ ScrollAnimationMSDPhysics::Update(const TimeStamp& aTime,
  mIsFirstIteration = false;
 }

+void
+ScrollAnimationMSDPhysics::ApplyContentShift(const CSSPoint& aShiftDelta)
+{
+  nsPoint shiftDelta = CSSPoint::ToAppUnits(aShiftDelta);
+  mStartPos += shiftDelta;
+  mDestination += shiftDelta;
+}
+
 double
 ScrollAnimationMSDPhysics::ComputeSpringConstant(const TimeStamp& aTime)
 {
--- a/layout/generic/ScrollAnimationMSDPhysics.h
+++ b/layout/generic/ScrollAnimationMSDPhysics.h
@ -25,6 +25,8 @@ public:
              const nsPoint& aDestination,
              const nsSize& aCurrentVelocity) override;

+  void ApplyContentShift(const CSSPoint& aShiftDelta) override;
+
  // Get the velocity at a point in time in nscoords/sec.
  nsSize VelocityAt(const TimeStamp& aTime) override;

--- a/layout/generic/ScrollAnimationPhysics.h
+++ b/layout/generic/ScrollAnimationPhysics.h
@ -19,6 +19,8 @@ public:
                      const nsPoint& aDestination,
                      const nsSize& aCurrentVelocity) = 0;

+  virtual void ApplyContentShift(const CSSPoint& aShiftDelta) = 0;
+
  // Get the velocity at a point in time in nscoords/sec.
  virtual nsSize VelocityAt(const TimeStamp& aTime) = 0;

--- a/layout/reftests/canvas/image-rendering-auto.html
+++ b/layout/reftests/canvas/image-rendering-auto.html
@ -0,0 +1,32 @@
+<!DOCTYPE HTML>
+<!--
+    Any copyright is dedicated to the Public Domain.
+    http://creativecommons.org/licenses/publicdomain/
+  -->
+<html reftest-zoom="2" class="reftest-wait">
+  <head>
+    <meta http-equiv="content-type" content="text/html; charset=UTF-8">
+    <title>test image-rendering auto</title>
+    <style>
+      canvas { position:absolute;left:0px;top:0px; }
+    </style>
+    <script type="text/javascript">
+      document.addEventListener("MozReftestInvalidate", draw);
+
+      function draw() {
+        var canvas = document.getElementById("canvas");
+        var ctx = canvas.getContext("2d");
+        ctx.fillStyle = "rgb(255,0,0)";
+        ctx.fillRect(25,25,100,100);
+        ctx.fillStyle = "rgb(0,255,0)";
+        ctx.fillRect(25,25,50,50);
+        document.documentElement.removeAttribute('class');
+      }
+    </script>
+  </head>
+
+  <body>
+    <canvas style="image-rendering: auto;" id="canvas" width="300" height="300"></canvas>
+  </body>
+
+</html>
--- a/layout/reftests/canvas/image-rendering-script.html
+++ b/layout/reftests/canvas/image-rendering-script.html
@ -0,0 +1,38 @@
+<!DOCTYPE HTML>
+<!--
+    Any copyright is dedicated to the Public Domain.
+    http://creativecommons.org/licenses/publicdomain/
+  -->
+<html reftest-zoom="2" class="reftest-wait">
+  <head>
+    <meta http-equiv="content-type" content="text/html; charset=UTF-8">
+    <title>test image-rendering script change</title>
+    <style>
+      canvas { position:absolute;left:0px;top:0px; }
+    </style>
+    <script type="text/javascript">
+      document.addEventListener("MozReftestInvalidate", updateImageRendering);
+
+      function updateImageRendering() {
+        var canvas = document.getElementById("canvas");
+        canvas.style.imageRendering = '-moz-crisp-edges';
+        document.documentElement.removeAttribute('class');
+      }
+    </script>
+  </head>
+
+  <body>
+    <canvas style="image-rendering: auto;" id="canvas" width="300" height="300">
+    </canvas>
+
+    <script type="text/javascript">
+      var canvas = document.getElementById("canvas");
+      var ctx = canvas.getContext("2d");
+      ctx.fillStyle = "rgb(255,0,0)";
+      ctx.fillRect(25,25,100,100);
+      ctx.fillStyle = "rgb(0,255,0)";
+      ctx.fillRect(25,25,50,50);
+    </script>
+  </body>
+
+</html>
--- a/layout/reftests/canvas/reftest.list
+++ b/layout/reftests/canvas/reftest.list
@ -4,6 +4,9 @@ fuzzy-if(Android,0-8,0-1000) == size-1.html size-1-ref.html
 == empty-transaction-1.html empty-transaction-1-ref.html

 == image-rendering-test.html image-rendering-ref.html
+== image-rendering-script.html image-rendering-ref.html
+!= image-rendering-auto.html image-rendering-script.html
+
 == image-shadow.html image-shadow-ref.html

 asserts-if(cocoaWidget,0-2) == size-change-1.html size-change-1-ref.html
--- a/layout/reftests/list-item/image-rendering-css-auto.html
+++ b/layout/reftests/list-item/image-rendering-css-auto.html
@ -0,0 +1,28 @@
+<!DOCTYPE HTML>
+<!--
+    Any copyright is dedicated to the Public Domain.
+    http://creativecommons.org/licenses/publicdomain/
+  -->
+<html reftest-zoom="2">
+  <head>
+    <meta http-equiv="content-type" content="text/html; charset=UTF-8">
+    <title>test list-style-image image-rendering css auto</title>
+    <style type="text/css">
+      div {
+        display: list-item;
+        list-style-image: url(data:image/gif;base64,R0lGODlhHAAcAMQAAAAAAP///8DP/8bU/8zZ/9Le/9jj/97o/+Tt/Ory9vD48PX96vv/5P//wP//xv//zP//0v//2P//3v///wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAEAABMALAAAAAAcABwAAAWNYCOOZGmeaKquYxSxcGMY8RoJwlujBk7vphsuByz1hr9iQzgkFiHHpkGXekQWCgOhyR0ODAiFhOoodM9oQcExKqffajbJDe+uT/Q67o7Kw/kpfmiAKkxpEjELdQoxCXUHMVFpBTEDXARmXDAOTQQMIgxbQ1QpQgMKciOhOJ8rC6epJq+MNrF9iEq5ug0hADs=);
+        height: 50px;
+        border: 1px solid black;
+        list-style-position: inside;
+
+        image-rendering: auto;
+      }
+    </style>
+  </head>
+
+  <body>
+    <div>
+    </div>
+  </body>
+
+</html>
--- a/layout/reftests/list-item/image-rendering-css.html
+++ b/layout/reftests/list-item/image-rendering-css.html
@ -0,0 +1,28 @@
+<!DOCTYPE HTML>
+<!--
+    Any copyright is dedicated to the Public Domain.
+    http://creativecommons.org/licenses/publicdomain/
+  -->
+<html reftest-zoom="2">
+  <head>
+    <meta http-equiv="content-type" content="text/html; charset=UTF-8">
+    <title>test list-style-image image-rendering css -moz-crisp-edges</title>
+    <style type="text/css">
+      div {
+        display: list-item;
+        list-style-image: url(data:image/gif;base64,R0lGODlhHAAcAMQAAAAAAP///8DP/8bU/8zZ/9Le/9jj/97o/+Tt/Ory9vD48PX96vv/5P//wP//xv//zP//0v//2P//3v///wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAEAABMALAAAAAAcABwAAAWNYCOOZGmeaKquYxSxcGMY8RoJwlujBk7vphsuByz1hr9iQzgkFiHHpkGXekQWCgOhyR0ODAiFhOoodM9oQcExKqffajbJDe+uT/Q67o7Kw/kpfmiAKkxpEjELdQoxCXUHMVFpBTEDXARmXDAOTQQMIgxbQ1QpQgMKciOhOJ8rC6epJq+MNrF9iEq5ug0hADs=);
+        height: 50px;
+        border: 1px solid black;
+        list-style-position: inside;
+
+        image-rendering: -moz-crisp-edges;
+      }
+    </style>
+  </head>
+
+  <body>
+    <div>
+    </div>
+  </body>
+
+</html>
--- a/layout/reftests/list-item/image-rendering-ref.html
+++ b/layout/reftests/list-item/image-rendering-ref.html
@ -0,0 +1,27 @@
+<!DOCTYPE HTML>
+<!--
+    Any copyright is dedicated to the Public Domain.
+    http://creativecommons.org/licenses/publicdomain/
+  -->
+<html reftest-zoom="2">
+  <head>
+    <meta http-equiv="content-type" content="text/html; charset=UTF-8">
+    <title>reference list-style-image image-rendering</title>
+    <style type="text/css">
+      div {
+        height: 50px;
+        border: 1px solid black;
+
+        image-rendering: -moz-crisp-edges;
+      }
+    </style>
+  </head>
+
+  <body>
+    <div>
+      <img src=" data:image/gif;base64,R0lGODlhHAAcAMQAAAAAAP///8DP/8bU/8zZ/9Le/9jj/97o/+Tt/Ory9vD48PX96vv/5P//wP//xv//zP//0v//2P//3v///wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAEAABMALAAAAAAcABwAAAWNYCOOZGmeaKquYxSxcGMY8RoJwlujBk7vphsuByz1hr9iQzgkFiHHpkGXekQWCgOhyR0ODAiFhOoodM9oQcExKqffajbJDe+uT/Q67o7Kw/kpfmiAKkxpEjELdQoxCXUHMVFpBTEDXARmXDAOTQQMIgxbQ1QpQgMKciOhOJ8rC6epJq+MNrF9iEq5ug0hADs="
+      >
+    </div>
+  </body>
+
+</html>
--- a/layout/reftests/list-item/image-rendering-script.html
+++ b/layout/reftests/list-item/image-rendering-script.html
@ -0,0 +1,37 @@
+<!DOCTYPE HTML>
+<!--
+    Any copyright is dedicated to the Public Domain.
+    http://creativecommons.org/licenses/publicdomain/
+  -->
+<html reftest-zoom="2" class="reftest-wait">
+  <head>
+    <meta http-equiv="content-type" content="text/html; charset=UTF-8">
+    <title>test list-style-image image-rendering script change</title>
+    <style type="text/css">
+      div {
+        display: list-item;
+        list-style-image: url(data:image/gif;base64,R0lGODlhHAAcAMQAAAAAAP///8DP/8bU/8zZ/9Le/9jj/97o/+Tt/Ory9vD48PX96vv/5P//wP//xv//zP//0v//2P//3v///wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAEAABMALAAAAAAcABwAAAWNYCOOZGmeaKquYxSxcGMY8RoJwlujBk7vphsuByz1hr9iQzgkFiHHpkGXekQWCgOhyR0ODAiFhOoodM9oQcExKqffajbJDe+uT/Q67o7Kw/kpfmiAKkxpEjELdQoxCXUHMVFpBTEDXARmXDAOTQQMIgxbQ1QpQgMKciOhOJ8rC6epJq+MNrF9iEq5ug0hADs=);
+        height: 50px;
+        border: 1px solid black;
+        list-style-position: inside;
+
+        image-rendering: auto;
+      }
+    </style>
+    <script type="text/javascript">
+      document.addEventListener("MozReftestInvalidate", updateImageRendering);
+
+      function updateImageRendering() {
+        var div = document.getElementById("d1");
+        div.style.imageRendering = '-moz-crisp-edges';
+        document.documentElement.removeAttribute("class");
+      }
+    </script>
+  </head>
+
+  <body>
+    <div id="d1">
+    </div>
+  </body>
+
+</html>
--- a/layout/reftests/list-item/reftest.list
+++ b/layout/reftests/list-item/reftest.list
@ -13,3 +13,6 @@ asserts(1) == ol-reversed-1b.html ol-reversed-1-ref.html # bug 478135
 == bullet-intrinsic-isize-1.html bullet-intrinsic-isize-1-ref.html
 == bullet-intrinsic-isize-2.html bullet-intrinsic-isize-2-ref.html
 == bullet-justify-1.html bullet-justify-1-ref.html
+== image-rendering-css.html image-rendering-ref.html
+== image-rendering-script.html image-rendering-ref.html
+!= image-rendering-css.html image-rendering-css-auto.html
--- a/media/libwebp/AUTHORS
+++ b/media/libwebp/AUTHORS
@ -0,0 +1,39 @@
+Contributors:
+- Charles Munger (clm at google dot com)
+- Christian Duvivier (cduvivier at google dot com)
+- Djordje Pesut (djordje dot pesut at imgtec dot com)
+- Hui Su (huisu at google dot com)
+- James Zern (jzern at google dot com)
+- Jan Engelhardt (jengelh at medozas dot de)
+- Jehan (jehan at girinstud dot io)
+- Johann (johann dot koenig at duck dot com)
+- Jovan Zelincevic (jovan dot zelincevic at imgtec dot com)
+- Jyrki Alakuijala (jyrki at google dot com)
+- Lode Vandevenne (lode at google dot com)
+- Lou Quillio (louquillio at google dot com)
+- Mans Rullgard (mans at mansr dot com)
+- Marcin Kowalczyk (qrczak at google dot com)
+- Martin Olsson (mnemo at minimum dot se)
+- Mikołaj Zalewski (mikolajz at google dot com)
+- Mislav Bradac (mislavm at google dot com)
+- Nico Weber (thakis at chromium dot org)
+- Noel Chromium (noel at chromium dot org)
+- Owen Rodley (orodley at google dot com)
+- Parag Salasakar (img dot mips1 at gmail dot com)
+- Pascal Massimino (pascal dot massimino at gmail dot com)
+- Paweł Hajdan, Jr (phajdan dot jr at chromium dot org)
+- Pierre Joye (pierre dot php at gmail dot com)
+- Sam Clegg (sbc at chromium dot org)
+- Scott Hancher (seh at google dot com)
+- Scott LaVarnway (slavarnway at google dot com)
+- Scott Talbot (s at chikachow dot org)
+- Slobodan Prijic (slobodan dot prijic at imgtec dot com)
+- Somnath Banerjee (somnath dot banerjee at gmail dot com)
+- Sriraman Tallam (tmsriram at google dot com)
+- Tamar Levy (tamar dot levy at intel dot com)
+- Timothy Gu (timothygu99 at gmail dot com)
+- Urvang Joshi (urvang at google dot com)
+- Vikas Arora (vikasa at google dot com)
+- Vincent Rabaud (vrabaud at google dot com)
+- Vlad Tsyrklevich (vtsyrklevich at chromium dot org)
+- Yang Zhang (yang dot zhang at arm dot com)
--- a/media/libwebp/COPYING
+++ b/media/libwebp/COPYING
@ -0,0 +1,30 @@
+Copyright (c) 2010, Google Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+
+  * Neither the name of Google nor the names of its contributors may
+    be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
--- a/media/libwebp/MOZCHANGES
+++ b/media/libwebp/MOZCHANGES
@ -0,0 +1,3 @@
+Changes made to pristine libwebp source by mozilla.org developers.
+
+2018/10/04  -- Synced with libwebp-1.0.0 (bug #1294490).
--- a/media/libwebp/NEWS
+++ b/media/libwebp/NEWS
@ -0,0 +1,199 @@
+- 4/2/2018: version 1.0.0
+  This is a binary compatible release.
+  * lossy encoder improvements to avoid chroma shifts in various circumstances
+    (issues #308, #340)
+  * big-endian fixes for decode, RGBA import and WebPPictureDistortion
+  Tool updates:
+    gifwebp, anim_diff - default duration behavior (<= 10ms) changed to match
+                         web browsers, transcoding tools (issue #379)
+    img2webp, webpmux - allow options to be passed in via a file (issue #355)
+
+- 11/24/2017: version 0.6.1
+  This is a binary compatible release.
+  * lossless performance and compression improvements + a new 'cruncher' mode
+    (-m 6 -q 100)
+  * ARM performance improvements with clang (15-20% w/ndk r15c, issue #339)
+  * webp-js: emscripten/webassembly based javascript decoder
+  * miscellaneous bug & build fixes (issue #329, #332, #343, #353, #360, #361,
+    #363)
+  Tool updates / additions:
+    added webpinfo - prints file format information (issue #330)
+    gif2webp - loop behavior modified to match Chrome M63+ (crbug.com/649264);
+               '-loop_compatibility' can be used for the old behavior
+
+- 1/26/2017: version 0.6.0
+  * lossless performance and compression improvements
+  * miscellaneous performance improvements (SSE2, NEON, MSA)
+  * webpmux gained a -duration option allowing for frame timing modification
+  * new img2webp utility allowing a sequence of images to be converted to
+    animated webp
+  * API changes:
+    - libwebp:
+      WebPPictureSharpARGBToYUVA
+      WebPPlaneDistortion
+    - libwebpmux / gif2webp:
+      WebPAnimEncoderOptions: kmax <= 0 now disables keyframes, kmax == 1
+                              forces all keyframes. See mux.h and the gif2webp
+                              manpage for details.
+
+- 12/13/2016: version 0.5.2
+  This is a binary compatible release.
+  This release covers CVE-2016-8888 and CVE-2016-9085.
+  * further security related hardening in the tools; fixes to
+    gif2webp/AnimEncoder (issues #310, #314, #316, #322), cwebp/libwebp (issue
+    #312)
+  * full libwebp (encoder & decoder) iOS framework; libwebpdecoder
+    WebP.framework renamed to WebPDecoder.framework (issue #307)
+  * CMake support for Android Studio (2.2)
+  * miscellaneous build related fixes (issue #306, #313)
+  * miscellaneous documentation improvements (issue #225)
+  * minor lossy encoder fixes and improvements
+
+- 6/14/2016: version 0.5.1
+  This is a binary compatible release.
+  * miscellaneous bug fixes (issues #280, #289)
+  * reverted alpha plane encoding with color cache for compatibility with
+    libwebp 0.4.0->0.4.3 (issues #291, #298)
+  * lossless encoding performance improvements
+  * memory reduction in both lossless encoding and decoding
+  * force mux output to be in the extended format (VP8X) when undefined chunks
+    are present (issue #294)
+  * gradle, cmake build support
+  * workaround for compiler bug causing 64-bit decode failures on android
+    devices using clang-3.8 in the r11c NDK
+  * various WebPAnimEncoder improvements
+
+- 12/17/2015: version 0.5.0
+  * miscellaneous bug & build fixes (issues #234, #258, #274, #275, #278)
+  * encoder & decoder speed-ups on x86/ARM/MIPS for lossy & lossless
+    - note! YUV->RGB conversion was sped-up, but the results will be slightly
+      different from previous releases
+  * various lossless encoder improvements
+  * gif2webp improvements, -min_size option added
+  * tools fully support input from stdin and output to stdout (issue #168)
+  * New WebPAnimEncoder API for creating animations
+  * New WebPAnimDecoder API for decoding animations
+  * other API changes:
+    - libwebp:
+      WebPPictureSmartARGBToYUVA() (-pre 4 in cwebp)
+      WebPConfig::exact (-exact in cwebp; -alpha_cleanup is now the default)
+      WebPConfig::near_lossless (-near_lossless in cwebp)
+      WebPFree() (free'ing webp allocated memory in other languages)
+      WebPConfigLosslessPreset()
+      WebPMemoryWriterClear()
+    - libwebpdemux: removed experimental fragment related fields and functions
+    - libwebpmux: WebPMuxSetCanvasSize()
+  * new libwebpextras library with some uncommon import functions:
+    WebPImportGray/WebPImportRGB565/WebPImportRGB4444
+
+- 10/15/15: version 0.4.4
+  This is a binary compatible release.
+  * rescaling out-of-bounds read fix (issue #254)
+  * various build fixes and improvements (issues #253, #259, #262, #267, #268)
+  * container documentation update
+  * gif2webp transparency fix (issue #245)
+
+- 3/3/15: version 0.4.3
+  This is a binary compatible release.
+  * Android / gcc / iOS / MSVS build fixes and improvements
+  * lossless decode fix (issue #239 -- since 0.4.0)
+  * documentation / vwebp updates for animation
+  * multi-threading fix (issue #234)
+
+- 10/13/14: version 0.4.2
+  This is a binary compatible release.
+  * Android / gcc build fixes
+  * (Windows) fix reading from stdin and writing to stdout
+  * gif2webp: miscellaneous fixes
+  * fix 'alpha-leak' with lossy compression (issue #220)
+  * the lossless bitstream spec has been amended to reflect the current code
+
+- 7/24/14: version 0.4.1
+  This is a binary compatible release.
+  * AArch64 (arm64) & MIPS support/optimizations
+  * NEON assembly additions:
+    - ~25% faster lossy decode / encode (-m 4)
+    - ~10% faster lossless decode
+    - ~5-10% faster lossless encode (-m 3/4)
+  * dwebp/vwebp can read from stdin
+  * cwebp/gif2webp can write to stdout
+  * cwebp can read webp files; useful if storing sources as webp lossless
+
+- 12/19/13: version 0.4.0
+  * improved gif2webp tool
+  * numerous fixes, compression improvement and speed-up
+  * dither option added to decoder (dwebp -dither 50 ...)
+  * improved multi-threaded modes (-mt option)
+  * improved filtering strength determination
+  * New function: WebPMuxGetCanvasSize
+  * BMP and TIFF format output added to 'dwebp'
+  * Significant memory reduction for decoding lossy images with alpha.
+  * Intertwined decoding of RGB and alpha for a shorter
+    time-to-first-decoded-pixel.
+  * WebPIterator has a new member 'has_alpha' denoting whether the frame
+    contains transparency.
+  * Container spec amended with new 'blending method' for animation.
+
+- 6/13/13: version 0.3.1
+  This is a binary compatible release.
+  * Add incremental decoding support for images containing ALPH and ICCP chunks.
+  * Python bindings via swig for the simple encode/decode interfaces similar to
+    Java.
+
+- 3/20/13: version 0.3.0
+  This is a binary compatible release.
+  * WebPINewRGB/WebPINewYUVA accept being passed a NULL output buffer
+    and will perform auto-allocation.
+  * default filter option is now '-strong -f 60'
+  * encoding speed-up for lossy methods 3 to 6
+  * alpha encoding can be done in parallel to lossy using 'cwebp -mt ...'
+  * color profile, metadata (XMP/EXIF) and animation support finalized in the
+    container.
+  * various NEON assembly additions
+  Tool updates / additions:
+    * gif2webp added
+    * vwebp given color profile & animation support
+    * cwebp can preserve color profile / metadata with '-metadata'
+
+- 10/30/12: version 0.2.1
+  * Various security related fixes
+  * cwebp.exe: fix import errors on Windows XP
+  * enable DLL builds for mingw targets
+
+- 8/3/12: version 0.2.0
+  * Add support for ARGB -> YUVA conversion for lossless decoder
+    New functions: WebPINewYUVA, WebPIDecGetYUVA
+  * Add stats for lossless and alpha encoding
+  * Security related hardening: allocation and size checks
+  * Add PAM output support to dwebp
+
+- 7/19/12: version 0.1.99
+  * This is a pre-release of 0.2.0, not an rc to allow for further
+    incompatible changes based on user feedback.
+  * Alpha channel encode/decode support.
+  * Lossless encoder/decoder.
+  * Add TIFF input support to cwebp.
+  Incompatible changes:
+    * The encode ABI has been modified to support alpha encoding.
+    * Deprecated function WebPINew() has been removed.
+    * Decode function signatures have changed to consistently use size_t over
+      int/uint32_t.
+    * decode_vp8.h is no longer installed system-wide.
+    * cwebp will encode the alpha channel if present.
+
+- 9/19/11: version 0.1.3
+  * Advanced decoding APIs.
+  * On-the-fly cropping and rescaling of images.
+  * SSE2 instructions for decoding performance optimizations on x86 based
+    platforms.
+  * Support Multi-threaded decoding.
+  * 40% improvement in Decoding performance.
+  * Add support for RGB565, RGBA4444 & ARGB image colorspace.
+  * Better handling of large picture encoding.
+
+- 3/25/11: version 0.1.2
+  * Incremental decoding: picture can be decoded byte-by-byte if needs be.
+  * lot of bug-fixes, consolidation and stabilization
+
+- 2/23/11: initial release of version 0.1, with the new encoder
+- 9/30/10: initial release version with only the lightweight decoder
--- a/media/libwebp/PATENTS
+++ b/media/libwebp/PATENTS
@ -0,0 +1,23 @@
+Additional IP Rights Grant (Patents)
+------------------------------------
+
+"These implementations" means the copyrightable works that implement the WebM
+codecs distributed by Google as part of the WebM Project.
+
+Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge,
+royalty-free, irrevocable (except as stated in this section) patent license to
+make, have made, use, offer to sell, sell, import, transfer, and otherwise
+run, modify and propagate the contents of these implementations of WebM, where
+such license applies only to those patent claims, both currently owned by
+Google and acquired in the future, licensable by Google that are necessarily
+infringed by these implementations of WebM. This grant does not include claims
+that would be infringed only as a consequence of further modification of these
+implementations. If you or your agent or exclusive licensee institute or order
+or agree to the institution of patent litigation or any other patent
+enforcement activity against any entity (including a cross-claim or
+counterclaim in a lawsuit) alleging that any of these implementations of WebM
+or any code incorporated within any of these implementations of WebM
+constitute direct or contributory patent infringement, or inducement of
+patent infringement, then any patent rights granted to you under this License
+for these implementations of WebM shall terminate as of the date such
+litigation is filed.
--- a/media/libwebp/README
+++ b/media/libwebp/README
@ -0,0 +1,782 @@
+          __   __  ____  ____  ____
+         /  \\/  \/  _ \/  _ )/  _ \
+         \       /   __/  _  \   __/
+          \__\__/\____/\_____/__/ ____  ___
+                / _/ /    \    \ /  _ \/ _/
+               /  \_/   / /   \ \   __/  \__
+               \____/____/\_____/_____/____/v1.0.0
+
+Description:
+============
+
+WebP codec: library to encode and decode images in WebP format. This package
+contains the library that can be used in other programs to add WebP support,
+as well as the command line tools 'cwebp' and 'dwebp'.
+
+See http://developers.google.com/speed/webp
+
+The latest source tree is available at
+https://chromium.googlesource.com/webm/libwebp
+
+It is released under the same license as the WebM project.
+See http://www.webmproject.org/license/software/ or the
+"COPYING" file for details. An additional intellectual
+property rights grant can be found in the file PATENTS.
+
+Building:
+=========
+
+Windows build:
+--------------
+
+By running:
+
+  nmake /f Makefile.vc CFG=release-static RTLIBCFG=static OBJDIR=output
+
+the directory output\release-static\(x64|x86)\bin will contain the tools
+cwebp.exe and dwebp.exe. The directory output\release-static\(x64|x86)\lib will
+contain the libwebp static library.
+The target architecture (x86/x64) is detected by Makefile.vc from the Visual
+Studio compiler (cl.exe) available in the system path.
+
+Unix build using makefile.unix:
+-------------------------------
+
+On platforms with GNU tools installed (gcc and make), running
+
+  make -f makefile.unix
+
+will build the binaries examples/cwebp and examples/dwebp, along
+with the static library src/libwebp.a. No system-wide installation
+is supplied, as this is a simple alternative to the full installation
+system based on the autoconf tools (see below).
+Please refer to makefile.unix for additional details and customizations.
+
+Using autoconf tools:
+---------------------
+Prerequisites:
+A compiler (e.g., gcc), make, autoconf, automake, libtool.
+On a Debian-like system the following should install everything you need for a
+minimal build:
+$ sudo apt-get install gcc make autoconf automake libtool
+
+When building from git sources, you will need to run autogen.sh to generate the
+configure script.
+
+./configure
+make
+make install
+
+should be all you need to have the following files
+
+/usr/local/include/webp/decode.h
+/usr/local/include/webp/encode.h
+/usr/local/include/webp/types.h
+/usr/local/lib/libwebp.*
+/usr/local/bin/cwebp
+/usr/local/bin/dwebp
+
+installed.
+
+Note: A decode-only library, libwebpdecoder, is available using the
+'--enable-libwebpdecoder' flag. The encode library is built separately and can
+be installed independently using a minor modification in the corresponding
+Makefile.am configure files (see comments there). See './configure --help' for
+more options.
+
+Building for MIPS Linux:
+------------------------
+MIPS Linux toolchain stable available releases can be found at:
+https://community.imgtec.com/developers/mips/tools/codescape-mips-sdk/available-releases/
+
+# Add toolchain to PATH
+export PATH=$PATH:/path/to/toolchain/bin
+
+# 32-bit build for mips32r5 (p5600)
+HOST=mips-mti-linux-gnu
+MIPS_CFLAGS="-O3 -mips32r5 -mabi=32 -mtune=p5600 -mmsa -mfp64 \
+  -msched-weight -mload-store-pairs -fPIE"
+MIPS_LDFLAGS="-mips32r5 -mabi=32 -mmsa -mfp64 -pie"
+
+# 64-bit build for mips64r6 (i6400)
+HOST=mips-img-linux-gnu
+MIPS_CFLAGS="-O3 -mips64r6 -mabi=64 -mtune=i6400 -mmsa -mfp64 \
+  -msched-weight -mload-store-pairs -fPIE"
+MIPS_LDFLAGS="-mips64r6 -mabi=64 -mmsa -mfp64 -pie"
+
+./configure --host=${HOST} --build=`config.guess` \
+  CC="${HOST}-gcc -EL" \
+  CFLAGS="$MIPS_CFLAGS" \
+  LDFLAGS="$MIPS_LDFLAGS"
+make
+make install
+
+CMake:
+------
+With CMake, you can compile libwebp, cwebp, dwebp, gif2web, img2webp, webpinfo
+and the JS bindings.
+
+Prerequisites:
+A compiler (e.g., gcc with autotools) and CMake.
+On a Debian-like system the following should install everything you need for a
+minimal build:
+$ sudo apt-get install build-essential cmake
+
+When building from git sources, you will need to run cmake to generate the
+makefiles.
+
+mkdir build && cd build && cmake ../
+make
+make install
+
+If you also want any of the executables, you will need to enable them through
+CMake, e.g.:
+
+cmake -DWEBP_BUILD_CWEBP=ON -DWEBP_BUILD_DWEBP=ON ../
+
+or through your favorite interface (like ccmake or cmake-qt-gui).
+
+Finally, once installed, you can also use WebP in your CMake project by doing:
+
+find_package(WebP)
+
+which will define the CMake variables WebP_INCLUDE_DIRS and WebP_LIBRARIES.
+
+Gradle:
+-------
+The support for Gradle is minimal: it only helps you compile libwebp, cwebp and
+dwebp and webpmux_example.
+
+Prerequisites:
+A compiler (e.g., gcc with autotools) and gradle.
+On a Debian-like system the following should install everything you need for a
+minimal build:
+$ sudo apt-get install build-essential gradle
+
+When building from git sources, you will need to run the Gradle wrapper with the
+appropriate target, e.g. :
+
+./gradlew buildAllExecutables
+
+SWIG bindings:
+--------------
+
+To generate language bindings from swig/libwebp.swig at least swig-1.3
+(http://www.swig.org) is required.
+
+Currently the following functions are mapped:
+Decode:
+  WebPGetDecoderVersion
+  WebPGetInfo
+  WebPDecodeRGBA
+  WebPDecodeARGB
+  WebPDecodeBGRA
+  WebPDecodeBGR
+  WebPDecodeRGB
+
+Encode:
+  WebPGetEncoderVersion
+  WebPEncodeRGBA
+  WebPEncodeBGRA
+  WebPEncodeRGB
+  WebPEncodeBGR
+  WebPEncodeLosslessRGBA
+  WebPEncodeLosslessBGRA
+  WebPEncodeLosslessRGB
+  WebPEncodeLosslessBGR
+
+See swig/README for more detailed build instructions.
+
+Java bindings:
+
+To build the swig-generated JNI wrapper code at least JDK-1.5 (or equivalent)
+is necessary for enum support. The output is intended to be a shared object /
+DLL that can be loaded via System.loadLibrary("webp_jni").
+
+Python bindings:
+
+To build the swig-generated Python extension code at least Python 2.6 is
+required. Python < 2.6 may build with some minor changes to libwebp.swig or the
+generated code, but is untested.
+
+Encoding tool:
+==============
+
+The examples/ directory contains tools for encoding (cwebp) and
+decoding (dwebp) images.
+
+The easiest use should look like:
+  cwebp input.png -q 80 -o output.webp
+which will convert the input file to a WebP file using a quality factor of 80
+on a 0->100 scale (0 being the lowest quality, 100 being the best. Default
+value is 75).
+You might want to try the -lossless flag too, which will compress the source
+(in RGBA format) without any loss. The -q quality parameter will in this case
+control the amount of processing time spent trying to make the output file as
+small as possible.
+
+A longer list of options is available using the -longhelp command line flag:
+
+> cwebp -longhelp
+Usage:
+ cwebp [-preset <...>] [options] in_file [-o out_file]
+
+If input size (-s) for an image is not specified, it is
+assumed to be a PNG, JPEG, TIFF or WebP file.
+
+Options:
+  -h / -help ............. short help
+  -H / -longhelp ......... long help
+  -q <float> ............. quality factor (0:small..100:big), default=75
+  -alpha_q <int> ......... transparency-compression quality (0..100),
+                           default=100
+  -preset <string> ....... preset setting, one of:
+                            default, photo, picture,
+                            drawing, icon, text
+     -preset must come first, as it overwrites other parameters
+  -z <int> ............... activates lossless preset with given
+                           level in [0:fast, ..., 9:slowest]
+
+  -m <int> ............... compression method (0=fast, 6=slowest), default=4
+  -segments <int> ........ number of segments to use (1..4), default=4
+  -size <int> ............ target size (in bytes)
+  -psnr <float> .......... target PSNR (in dB. typically: 42)
+
+  -s <int> <int> ......... input size (width x height) for YUV
+  -sns <int> ............. spatial noise shaping (0:off, 100:max), default=50
+  -f <int> ............... filter strength (0=off..100), default=60
+  -sharpness <int> ....... filter sharpness (0:most .. 7:least sharp), default=0
+  -strong ................ use strong filter instead of simple (default)
+  -nostrong .............. use simple filter instead of strong
+  -sharp_yuv ............. use sharper (and slower) RGB->YUV conversion
+  -partition_limit <int> . limit quality to fit the 512k limit on
+                           the first partition (0=no degradation ... 100=full)
+  -pass <int> ............ analysis pass number (1..10)
+  -crop <x> <y> <w> <h> .. crop picture with the given rectangle
+  -resize <w> <h> ........ resize picture (after any cropping)
+  -mt .................... use multi-threading if available
+  -low_memory ............ reduce memory usage (slower encoding)
+  -map <int> ............. print map of extra info
+  -print_psnr ............ prints averaged PSNR distortion
+  -print_ssim ............ prints averaged SSIM distortion
+  -print_lsim ............ prints local-similarity distortion
+  -d <file.pgm> .......... dump the compressed output (PGM file)
+  -alpha_method <int> .... transparency-compression method (0..1), default=1
+  -alpha_filter <string> . predictive filtering for alpha plane,
+                           one of: none, fast (default) or best
+  -exact ................. preserve RGB values in transparent area, default=off
+  -blend_alpha <hex> ..... blend colors against background color
+                           expressed as RGB values written in
+                           hexadecimal, e.g. 0xc0e0d0 for red=0xc0
+                           green=0xe0 and blue=0xd0
+  -noalpha ............... discard any transparency information
+  -lossless .............. encode image losslessly, default=off
+  -near_lossless <int> ... use near-lossless image
+                           preprocessing (0..100=off), default=100
+  -hint <string> ......... specify image characteristics hint,
+                           one of: photo, picture or graph
+
+  -metadata <string> ..... comma separated list of metadata to
+                           copy from the input to the output if present.
+                           Valid values: all, none (default), exif, icc, xmp
+
+  -short ................. condense printed message
+  -quiet ................. don't print anything
+  -version ............... print version number and exit
+  -noasm ................. disable all assembly optimizations
+  -v ..................... verbose, e.g. print encoding/decoding times
+  -progress .............. report encoding progress
+
+Experimental Options:
+  -jpeg_like ............. roughly match expected JPEG size
+  -af .................... auto-adjust filter strength
+  -pre <int> ............. pre-processing filter
+
+The main options you might want to try in order to further tune the
+visual quality are:
+ -preset
+ -sns
+ -f
+ -m
+
+Namely:
+  * 'preset' will set up a default encoding configuration targeting a
+     particular type of input. It should appear first in the list of options,
+     so that subsequent options can take effect on top of this preset.
+     Default value is 'default'.
+  * 'sns' will progressively turn on (when going from 0 to 100) some additional
+     visual optimizations (like: segmentation map re-enforcement). This option
+     will balance the bit allocation differently. It tries to take bits from the
+     "easy" parts of the picture and use them in the "difficult" ones instead.
+     Usually, raising the sns value (at fixed -q value) leads to larger files,
+     but with better quality.
+     Typical value is around '75'.
+  * 'f' option directly links to the filtering strength used by the codec's
+     in-loop processing. The higher the value, the smoother the
+     highly-compressed area will look. This is particularly useful when aiming
+     at very small files. Typical values are around 20-30. Note that using the
+     option -strong/-nostrong will change the type of filtering. Use "-f 0" to
+     turn filtering off.
+  * 'm' controls the trade-off between encoding speed and quality. Default is 4.
+     You can try -m 5 or -m 6 to explore more (time-consuming) encoding
+     possibilities. A lower value will result in faster encoding at the expense
+     of quality.
+
+Decoding tool:
+==============
+
+There is a decoding sample in examples/dwebp.c which will take
+a .webp file and decode it to a PNG image file (amongst other formats).
+This is simply to demonstrate the use of the API. You can verify the
+file test.webp decodes to exactly the same as test_ref.ppm by using:
+
+ cd examples
+ ./dwebp test.webp -ppm -o test.ppm
+ diff test.ppm test_ref.ppm
+
+The full list of options is available using -h:
+
+> dwebp -h
+Usage: dwebp in_file [options] [-o out_file]
+
+Decodes the WebP image file to PNG format [Default]
+Use following options to convert into alternate image formats:
+  -pam ......... save the raw RGBA samples as a color PAM
+  -ppm ......... save the raw RGB samples as a color PPM
+  -bmp ......... save as uncompressed BMP format
+  -tiff ........ save as uncompressed TIFF format
+  -pgm ......... save the raw YUV samples as a grayscale PGM
+                 file with IMC4 layout
+  -yuv ......... save the raw YUV samples in flat layout
+
+ Other options are:
+  -version ..... print version number and exit
+  -nofancy ..... don't use the fancy YUV420 upscaler
+  -nofilter .... disable in-loop filtering
+  -nodither .... disable dithering
+  -dither <d> .. dithering strength (in 0..100)
+  -alpha_dither  use alpha-plane dithering if needed
+  -mt .......... use multi-threading
+  -crop <x> <y> <w> <h> ... crop output with the given rectangle
+  -resize <w> <h> ......... scale the output (*after* any cropping)
+  -flip ........ flip the output vertically
+  -alpha ....... only save the alpha plane
+  -incremental . use incremental decoding (useful for tests)
+  -h ........... this help message
+  -v ........... verbose (e.g. print encoding/decoding times)
+  -quiet ....... quiet mode, don't print anything
+  -noasm ....... disable all assembly optimizations
+
+WebP file analysis tool:
+========================
+
+'webpinfo' can be used to print out the chunk level structure and bitstream
+header information of WebP files. It can also check if the files are of valid
+WebP format.
+
+Usage: webpinfo [options] in_files
+Note: there could be multiple input files;
+      options must come before input files.
+Options:
+  -version ........... Print version number and exit.
+  -quiet ............. Do not show chunk parsing information.
+  -diag .............. Show parsing error diagnosis.
+  -summary ........... Show chunk stats summary.
+  -bitstream_info .... Parse bitstream header.
+
+Visualization tool:
+===================
+
+There's a little self-serve visualization tool called 'vwebp' under the
+examples/ directory. It uses OpenGL to open a simple drawing window and show
+a decoded WebP file. It's not yet integrated in the automake build system, but
+you can try to manually compile it using the recommendations below.
+
+Usage: vwebp in_file [options]
+
+Decodes the WebP image file and visualize it using OpenGL
+Options are:
+  -version ..... print version number and exit
+  -noicc ....... don't use the icc profile if present
+  -nofancy ..... don't use the fancy YUV420 upscaler
+  -nofilter .... disable in-loop filtering
+  -dither <int>  dithering strength (0..100), default=50
+  -noalphadither disable alpha plane dithering
+  -mt .......... use multi-threading
+  -info ........ print info
+  -h ........... this help message
+
+Keyboard shortcuts:
+  'c' ................ toggle use of color profile
+  'i' ................ overlay file information
+  'd' ................ disable blending & disposal (debug)
+  'q' / 'Q' / ESC .... quit
+
+Building:
+---------
+
+Prerequisites:
+1) OpenGL & OpenGL Utility Toolkit (GLUT)
+  Linux:
+    $ sudo apt-get install freeglut3-dev mesa-common-dev
+  Mac + XCode:
+    - These libraries should be available in the OpenGL / GLUT frameworks.
+  Windows:
+    http://freeglut.sourceforge.net/index.php#download
+
+2) (Optional) qcms (Quick Color Management System)
+  i. Download qcms from Mozilla / Chromium:
+    http://hg.mozilla.org/mozilla-central/file/0e7639e3bdfb/gfx/qcms
+    http://src.chromium.org/viewvc/chrome/trunk/src/third_party/qcms
+  ii. Build and archive the source files as libqcms.a / qcms.lib
+  iii. Update makefile.unix / Makefile.vc
+    a) Define WEBP_HAVE_QCMS
+    b) Update include / library paths to reference the qcms directory.
+
+Build using makefile.unix / Makefile.vc:
+$ make -f makefile.unix examples/vwebp
+> nmake /f Makefile.vc CFG=release-static \
+    ../obj/x64/release-static/bin/vwebp.exe
+
+Animation creation tool:
+========================
+The utility 'img2webp' can turn a sequence of input images (PNG, JPEG, ...)
+into an animated WebP file. It offers fine control over duration, encoding
+modes, etc.
+
+Usage:
+
+  img2webp [file-level options] [image files...] [per-frame options...]
+
+File-level options (only used at the start of compression):
+ -min_size ............ minimize size
+ -loop <int> .......... loop count (default: 0, = infinite loop)
+ -kmax <int> .......... maximum number of frame between key-frames
+                        (0=only keyframes)
+ -kmin <int> .......... minimum number of frame between key-frames
+                        (0=disable key-frames altogether)
+ -mixed ............... use mixed lossy/lossless automatic mode
+ -v ................... verbose mode
+ -h ................... this help
+ -version ............. print version number and exit
+
+Per-frame options (only used for subsequent images input):
+ -d <int> ............. frame duration in ms (default: 100)
+ -lossless  ........... use lossless mode (default)
+ -lossy ... ........... use lossy mode
+ -q <float> ........... quality
+ -m <int> ............. method to use
+
+example: img2webp -loop 2 in0.png -lossy in1.jpg
+                  -d 80 in2.tiff -o out.webp
+
+Animated GIF conversion:
+========================
+Animated GIF files can be converted to WebP files with animation using the
+gif2webp utility available under examples/. The files can then be viewed using
+vwebp.
+
+Usage:
+ gif2webp [options] gif_file -o webp_file
+Options:
+  -h / -help ............. this help
+  -lossy ................. encode image using lossy compression
+  -mixed ................. for each frame in the image, pick lossy
+                           or lossless compression heuristically
+  -q <float> ............. quality factor (0:small..100:big)
+  -m <int> ............... compression method (0=fast, 6=slowest)
+  -min_size .............. minimize output size (default:off)
+                           lossless compression by default; can be
+                           combined with -q, -m, -lossy or -mixed
+                           options
+  -kmin <int> ............ min distance between key frames
+  -kmax <int> ............ max distance between key frames
+  -f <int> ............... filter strength (0=off..100)
+  -metadata <string> ..... comma separated list of metadata to
+                           copy from the input to the output if present
+                           Valid values: all, none, icc, xmp (default)
+  -loop_compatibility .... use compatibility mode for Chrome
+                           version prior to M62 (inclusive)
+  -mt .................... use multi-threading if available
+
+  -version ............... print version number and exit
+  -v ..................... verbose
+  -quiet ................. don't print anything
+
+Building:
+---------
+With the libgif development files installed, gif2webp can be built using
+makefile.unix:
+$ make -f makefile.unix examples/gif2webp
+
+or using autoconf:
+$ ./configure --enable-everything
+$ make
+
+Comparison of animated images:
+==============================
+Test utility anim_diff under examples/ can be used to compare two animated
+images (each can be GIF or WebP).
+
+Usage: anim_diff <image1> <image2> [options]
+
+Options:
+  -dump_frames <folder> dump decoded frames in PAM format
+  -min_psnr <float> ... minimum per-frame PSNR
+  -raw_comparison ..... if this flag is not used, RGB is
+                        premultiplied before comparison
+  -max_diff <int> ..... maximum allowed difference per channel
+                        between corresponding pixels in subsequent
+                        frames
+  -h .................. this help
+  -version ............ print version number and exit
+
+Building:
+---------
+With the libgif development files and a C++ compiler installed, anim_diff can
+be built using makefile.unix:
+$ make -f makefile.unix examples/anim_diff
+
+or using autoconf:
+$ ./configure --enable-everything
+$ make
+
+Encoding API:
+=============
+
+The main encoding functions are available in the header src/webp/encode.h
+The ready-to-use ones are:
+size_t WebPEncodeRGB(const uint8_t* rgb, int width, int height, int stride,
+                     float quality_factor, uint8_t** output);
+size_t WebPEncodeBGR(const uint8_t* bgr, int width, int height, int stride,
+                     float quality_factor, uint8_t** output);
+size_t WebPEncodeRGBA(const uint8_t* rgba, int width, int height, int stride,
+                      float quality_factor, uint8_t** output);
+size_t WebPEncodeBGRA(const uint8_t* bgra, int width, int height, int stride,
+                      float quality_factor, uint8_t** output);
+
+They will convert raw RGB samples to a WebP data. The only control supplied
+is the quality factor.
+
+There are some variants for using the lossless format:
+
+size_t WebPEncodeLosslessRGB(const uint8_t* rgb, int width, int height,
+                             int stride, uint8_t** output);
+size_t WebPEncodeLosslessBGR(const uint8_t* bgr, int width, int height,
+                             int stride, uint8_t** output);
+size_t WebPEncodeLosslessRGBA(const uint8_t* rgba, int width, int height,
+                              int stride, uint8_t** output);
+size_t WebPEncodeLosslessBGRA(const uint8_t* bgra, int width, int height,
+                              int stride, uint8_t** output);
+
+Of course in this case, no quality factor is needed since the compression
+occurs without loss of the input values, at the expense of larger output sizes.
+
+Advanced encoding API:
+----------------------
+
+A more advanced API is based on the WebPConfig and WebPPicture structures.
+
+WebPConfig contains the encoding settings and is not tied to a particular
+picture.
+WebPPicture contains input data, on which some WebPConfig will be used for
+compression.
+The encoding flow looks like:
+
+-------------------------------------- BEGIN PSEUDO EXAMPLE
+
+#include <webp/encode.h>
+
+  // Setup a config, starting form a preset and tuning some additional
+  // parameters
+  WebPConfig config;
+  if (!WebPConfigPreset(&config, WEBP_PRESET_PHOTO, quality_factor))
+    return 0;   // version error
+  }
+  // ... additional tuning
+  config.sns_strength = 90;
+  config.filter_sharpness = 6;
+  config_error = WebPValidateConfig(&config);  // not mandatory, but useful
+
+  // Setup the input data
+  WebPPicture pic;
+  if (!WebPPictureInit(&pic)) {
+    return 0;  // version error
+  }
+  pic.width = width;
+  pic.height = height;
+  // allocated picture of dimension width x height
+  if (!WebPPictureAllocate(&pic)) {
+    return 0;   // memory error
+  }
+  // at this point, 'pic' has been initialized as a container,
+  // and can receive the Y/U/V samples.
+  // Alternatively, one could use ready-made import functions like
+  // WebPPictureImportRGB(), which will take care of memory allocation.
+  // In any case, past this point, one will have to call
+  // WebPPictureFree(&pic) to reclaim memory.
+
+  // Set up a byte-output write method. WebPMemoryWriter, for instance.
+  WebPMemoryWriter wrt;
+  WebPMemoryWriterInit(&wrt);     // initialize 'wrt'
+
+  pic.writer = MyFileWriter;
+  pic.custom_ptr = my_opaque_structure_to_make_MyFileWriter_work;
+
+  // Compress!
+  int ok = WebPEncode(&config, &pic);   // ok = 0 => error occurred!
+  WebPPictureFree(&pic);  // must be called independently of the 'ok' result.
+
+  // output data should have been handled by the writer at that point.
+  // -> compressed data is the memory buffer described by wrt.mem / wrt.size
+
+  // deallocate the memory used by compressed data
+  WebPMemoryWriterClear(&wrt);
+
+-------------------------------------- END PSEUDO EXAMPLE
+
+Decoding API:
+=============
+
+This is mainly just one function to call:
+
+#include "webp/decode.h"
+uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size,
+                       int* width, int* height);
+
+Please have a look at the file src/webp/decode.h for the details.
+There are variants for decoding in BGR/RGBA/ARGB/BGRA order, along with
+decoding to raw Y'CbCr samples. One can also decode the image directly into a
+pre-allocated buffer.
+
+To detect a WebP file and gather the picture's dimensions, the function:
+  int WebPGetInfo(const uint8_t* data, size_t data_size,
+                  int* width, int* height);
+is supplied. No decoding is involved when using it.
+
+Incremental decoding API:
+=========================
+
+In the case when data is being progressively transmitted, pictures can still
+be incrementally decoded using a slightly more complicated API. Decoder state
+is stored into an instance of the WebPIDecoder object. This object can be
+created with the purpose of decoding either RGB or Y'CbCr samples.
+For instance:
+
+  WebPDecBuffer buffer;
+  WebPInitDecBuffer(&buffer);
+  buffer.colorspace = MODE_BGR;
+  ...
+  WebPIDecoder* idec = WebPINewDecoder(&buffer);
+
+As data is made progressively available, this incremental-decoder object
+can be used to decode the picture further. There are two (mutually exclusive)
+ways to pass freshly arrived data:
+
+either by appending the fresh bytes:
+
+  WebPIAppend(idec, fresh_data, size_of_fresh_data);
+
+or by just mentioning the new size of the transmitted data:
+
+  WebPIUpdate(idec, buffer, size_of_transmitted_buffer);
+
+Note that 'buffer' can be modified between each call to WebPIUpdate, in
+particular when the buffer is resized to accommodate larger data.
+
+These functions will return the decoding status: either VP8_STATUS_SUSPENDED if
+decoding is not finished yet or VP8_STATUS_OK when decoding is done. Any other
+status is an error condition.
+
+The 'idec' object must always be released (even upon an error condition) by
+calling: WebPDelete(idec).
+
+To retrieve partially decoded picture samples, one must use the corresponding
+method: WebPIDecGetRGB or WebPIDecGetYUVA.
+It will return the last displayable pixel row.
+
+Lastly, note that decoding can also be performed into a pre-allocated pixel
+buffer. This buffer must be passed when creating a WebPIDecoder, calling
+WebPINewRGB() or WebPINewYUVA().
+
+Please have a look at the src/webp/decode.h header for further details.
+
+Advanced Decoding API:
+======================
+
+WebP decoding supports an advanced API which provides on-the-fly cropping and
+rescaling, something of great usefulness on memory-constrained environments like
+mobile phones. Basically, the memory usage will scale with the output's size,
+not the input's, when one only needs a quick preview or a zoomed in portion of
+an otherwise too-large picture. Some CPU can be saved too, incidentally.
+
+-------------------------------------- BEGIN PSEUDO EXAMPLE
+     // A) Init a configuration object
+     WebPDecoderConfig config;
+     CHECK(WebPInitDecoderConfig(&config));
+
+     // B) optional: retrieve the bitstream's features.
+     CHECK(WebPGetFeatures(data, data_size, &config.input) == VP8_STATUS_OK);
+
+     // C) Adjust 'config' options, if needed
+     config.options.no_fancy_upsampling = 1;
+     config.options.use_scaling = 1;
+     config.options.scaled_width = scaledWidth();
+     config.options.scaled_height = scaledHeight();
+     // etc.
+
+     // D) Specify 'config' output options for specifying output colorspace.
+     // Optionally the external image decode buffer can also be specified.
+     config.output.colorspace = MODE_BGRA;
+     // Optionally, the config.output can be pointed to an external buffer as
+     // well for decoding the image. This externally supplied memory buffer
+     // should be big enough to store the decoded picture.
+     config.output.u.RGBA.rgba = (uint8_t*) memory_buffer;
+     config.output.u.RGBA.stride = scanline_stride;
+     config.output.u.RGBA.size = total_size_of_the_memory_buffer;
+     config.output.is_external_memory = 1;
+
+     // E) Decode the WebP image. There are two variants w.r.t decoding image.
+     // The first one (E.1) decodes the full image and the second one (E.2) is
+     // used to incrementally decode the image using small input buffers.
+     // Any one of these steps can be used to decode the WebP image.
+
+     // E.1) Decode full image.
+     CHECK(WebPDecode(data, data_size, &config) == VP8_STATUS_OK);
+
+     // E.2) Decode image incrementally.
+     WebPIDecoder* const idec = WebPIDecode(NULL, NULL, &config);
+     CHECK(idec != NULL);
+     while (bytes_remaining > 0) {
+       VP8StatusCode status = WebPIAppend(idec, input, bytes_read);
+       if (status == VP8_STATUS_OK || status == VP8_STATUS_SUSPENDED) {
+         bytes_remaining -= bytes_read;
+       } else {
+         break;
+       }
+     }
+     WebPIDelete(idec);
+
+     // F) Decoded image is now in config.output (and config.output.u.RGBA).
+     // It can be saved, displayed or otherwise processed.
+
+     // G) Reclaim memory allocated in config's object. It's safe to call
+     // this function even if the memory is external and wasn't allocated
+     // by WebPDecode().
+     WebPFreeDecBuffer(&config.output);
+
+-------------------------------------- END PSEUDO EXAMPLE
+
+Bugs:
+=====
+
+Please report all bugs to the issue tracker:
+    https://bugs.chromium.org/p/webp
+Patches welcome! See this page to get started:
+    http://www.webmproject.org/code/contribute/submitting-patches/
+
+Discuss:
+========
+
+Email: webp-discuss@webmproject.org
+Web: http://groups.google.com/a/webmproject.org/group/webp-discuss
--- a/media/libwebp/README.mux
+++ b/media/libwebp/README.mux
@ -0,0 +1,227 @@
+          __   __  ____  ____  ____  __ __  _     __ __
+         /  \\/  \/  _ \/  _ \/  _ \/  \  \/ \___/_ / _\
+         \       /   __/  _  \   __/      /  /  (_/  /__
+          \__\__/\_____/_____/__/  \__//_/\_____/__/___/v1.0.0
+
+
+Description:
+============
+
+WebPMux: set of two libraries 'Mux' and 'Demux' for creation, extraction and
+manipulation of an extended format WebP file, which can have features like
+color profile, metadata and animation. Reference command-line tools 'webpmux'
+and 'vwebp' as well as the WebP container specification
+'doc/webp-container-spec.txt' are also provided in this package.
+
+WebP Mux tool:
+==============
+
+The examples/ directory contains a tool (webpmux) for manipulating WebP
+files. The webpmux tool can be used to create an extended format WebP file and
+also to extract or strip relevant data from such a file.
+
+A list of options is available using the -help command line flag:
+
+> webpmux -help
+Usage: webpmux -get GET_OPTIONS INPUT -o OUTPUT
+       webpmux -set SET_OPTIONS INPUT -o OUTPUT
+       webpmux -duration DURATION_OPTIONS [-duration ...]
+               INPUT -o OUTPUT
+       webpmux -strip STRIP_OPTIONS INPUT -o OUTPUT
+       webpmux -frame FRAME_OPTIONS [-frame...] [-loop LOOP_COUNT]
+               [-bgcolor BACKGROUND_COLOR] -o OUTPUT
+       webpmux -info INPUT
+       webpmux [-h|-help]
+       webpmux -version
+       webpmux argument_file_name
+
+GET_OPTIONS:
+ Extract relevant data:
+   icc       get ICC profile
+   exif      get EXIF metadata
+   xmp       get XMP metadata
+   frame n   get nth frame
+
+SET_OPTIONS:
+ Set color profile/metadata:
+   icc  file.icc     set ICC profile
+   exif file.exif    set EXIF metadata
+   xmp  file.xmp     set XMP metadata
+   where:    'file.icc' contains the ICC profile to be set,
+             'file.exif' contains the EXIF metadata to be set
+             'file.xmp' contains the XMP metadata to be set
+
+DURATION_OPTIONS:
+ Set duration of selected frames:
+   duration            set duration for each frames
+   duration,frame      set duration of a particular frame
+   duration,start,end  set duration of frames in the
+                        interval [start,end])
+   where: 'duration' is the duration in milliseconds
+          'start' is the start frame index
+          'end' is the inclusive end frame index
+           The special 'end' value '0' means: last frame.
+
+STRIP_OPTIONS:
+ Strip color profile/metadata:
+   icc       strip ICC profile
+   exif      strip EXIF metadata
+   xmp       strip XMP metadata
+
+FRAME_OPTIONS(i):
+ Create animation:
+   file_i +di+[xi+yi[+mi[bi]]]
+   where:    'file_i' is the i'th animation frame (WebP format),
+             'di' is the pause duration before next frame,
+             'xi','yi' specify the image offset for this frame,
+             'mi' is the dispose method for this frame (0 or 1),
+             'bi' is the blending method for this frame (+b or -b)
+
+LOOP_COUNT:
+ Number of times to repeat the animation.
+ Valid range is 0 to 65535 [Default: 0 (infinite)].
+
+BACKGROUND_COLOR:
+ Background color of the canvas.
+  A,R,G,B
+  where:    'A', 'R', 'G' and 'B' are integers in the range 0 to 255 specifying
+            the Alpha, Red, Green and Blue component values respectively
+            [Default: 255,255,255,255]
+
+INPUT & OUTPUT are in WebP format.
+
+Note: The nature of EXIF, XMP and ICC data is not checked and is assumed to be
+valid.
+
+Note: if a single file name is passed as the argument, the arguments will be
+tokenized from this file. The file name must not start with the character '-'.
+
+Visualization tool:
+===================
+
+The examples/ directory also contains a tool (vwebp) for viewing WebP files.
+It decodes the image and visualizes it using OpenGL. See the libwebp README
+for details on building and running this program.
+
+Mux API:
+========
+The Mux API contains methods for adding data to and reading data from WebP
+files. This API currently supports XMP/EXIF metadata, ICC profile and animation.
+Other features may be added in subsequent releases.
+
+Example#1 (pseudo code): Creating a WebPMux object with image data, color
+profile and XMP metadata.
+
+  int copy_data = 0;
+  WebPMux* mux = WebPMuxNew();
+  // ... (Prepare image data).
+  WebPMuxSetImage(mux, &image, copy_data);
+  // ... (Prepare ICC profile data).
+  WebPMuxSetChunk(mux, "ICCP", &icc_profile, copy_data);
+  // ... (Prepare XMP metadata).
+  WebPMuxSetChunk(mux, "XMP ", &xmp, copy_data);
+  // Get data from mux in WebP RIFF format.
+  WebPMuxAssemble(mux, &output_data);
+  WebPMuxDelete(mux);
+  // ... (Consume output_data; e.g. write output_data.bytes to file).
+  WebPDataClear(&output_data);
+
+
+Example#2 (pseudo code): Get image and color profile data from a WebP file.
+
+  int copy_data = 0;
+  // ... (Read data from file).
+  WebPMux* mux = WebPMuxCreate(&data, copy_data);
+  WebPMuxGetFrame(mux, 1, &image);
+  // ... (Consume image; e.g. call WebPDecode() to decode the data).
+  WebPMuxGetChunk(mux, "ICCP", &icc_profile);
+  // ... (Consume icc_profile).
+  WebPMuxDelete(mux);
+  free(data);
+
+
+For a detailed Mux API reference, please refer to the header file
+(src/webp/mux.h).
+
+Demux API:
+==========
+The Demux API enables extraction of images and extended format data from
+WebP files. This API currently supports reading of XMP/EXIF metadata, ICC
+profile and animated images. Other features may be added in subsequent
+releases.
+
+Code example: Demuxing WebP data to extract all the frames, ICC profile
+and EXIF/XMP metadata.
+
+  WebPDemuxer* demux = WebPDemux(&webp_data);
+  uint32_t width = WebPDemuxGetI(demux, WEBP_FF_CANVAS_WIDTH);
+  uint32_t height = WebPDemuxGetI(demux, WEBP_FF_CANVAS_HEIGHT);
+  // ... (Get information about the features present in the WebP file).
+  uint32_t flags = WebPDemuxGetI(demux, WEBP_FF_FORMAT_FLAGS);
+
+  // ... (Iterate over all frames).
+  WebPIterator iter;
+  if (WebPDemuxGetFrame(demux, 1, &iter)) {
+    do {
+      // ... (Consume 'iter'; e.g. Decode 'iter.fragment' with WebPDecode(),
+      // ... and get other frame properties like width, height, offsets etc.
+      // ... see 'struct WebPIterator' below for more info).
+    } while (WebPDemuxNextFrame(&iter));
+    WebPDemuxReleaseIterator(&iter);
+  }
+
+  // ... (Extract metadata).
+  WebPChunkIterator chunk_iter;
+  if (flags & ICCP_FLAG) WebPDemuxGetChunk(demux, "ICCP", 1, &chunk_iter);
+  // ... (Consume the ICC profile in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  if (flags & EXIF_FLAG) WebPDemuxGetChunk(demux, "EXIF", 1, &chunk_iter);
+  // ... (Consume the EXIF metadata in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  if (flags & XMP_FLAG) WebPDemuxGetChunk(demux, "XMP ", 1, &chunk_iter);
+  // ... (Consume the XMP metadata in 'chunk_iter.chunk').
+  WebPDemuxReleaseChunkIterator(&chunk_iter);
+  WebPDemuxDelete(demux);
+
+
+For a detailed Demux API reference, please refer to the header file
+(src/webp/demux.h).
+
+AnimEncoder API:
+================
+The AnimEncoder API can be used to create animated WebP images.
+
+Code example:
+
+  WebPAnimEncoderOptions enc_options;
+  WebPAnimEncoderOptionsInit(&enc_options);
+  // ... (Tune 'enc_options' as needed).
+  WebPAnimEncoder* enc = WebPAnimEncoderNew(width, height, &enc_options);
+  while(<there are more frames>) {
+    WebPConfig config;
+    WebPConfigInit(&config);
+    // ... (Tune 'config' as needed).
+    WebPAnimEncoderAdd(enc, frame, duration, &config);
+  }
+  WebPAnimEncoderAssemble(enc, webp_data);
+  WebPAnimEncoderDelete(enc);
+  // ... (Write the 'webp_data' to a file, or re-mux it further).
+
+
+For a detailed AnimEncoder API reference, please refer to the header file
+(src/webp/mux.h).
+
+
+Bugs:
+=====
+
+Please report all bugs to the issue tracker:
+    https://bugs.chromium.org/p/webp
+Patches welcome! See this page to get started:
+    http://www.webmproject.org/code/contribute/submitting-patches/
+
+Discuss:
+========
+
+Email: webp-discuss@webmproject.org
+Web: http://groups.google.com/a/webmproject.org/group/webp-discuss
--- a/media/libwebp/moz.build
+++ b/media/libwebp/moz.build
@ -0,0 +1,28 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+with Files('**'):
+    BUG_COMPONENT = ('Core', 'ImageLib')
+
+EXPORTS.webp += [
+    'src/webp/decode.h',
+    'src/webp/demux.h',
+    'src/webp/mux_types.h',
+    'src/webp/types.h',
+]
+
+DIRS += [
+    'src/dec',
+    'src/demux',
+    'src/dsp',
+    'src/moz',
+    'src/utils',
+]
+
+FINAL_LIBRARY = 'gkmedias'
+
+# We allow warnings for third-party code that can be updated from upstream.
+AllowCompilerWarnings()
--- a/media/libwebp/src/dec/alpha_dec.c
+++ b/media/libwebp/src/dec/alpha_dec.c
@ -0,0 +1,232 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha-plane decompression.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include "src/dec/alphai_dec.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/dec/vp8li_dec.h"
+#include "src/dsp/dsp.h"
+#include "src/utils/quant_levels_dec_utils.h"
+#include "src/utils/utils.h"
+#include "src/webp/format_constants.h"
+
+//------------------------------------------------------------------------------
+// ALPHDecoder object.
+
+// Allocates a new alpha decoder instance.
+static ALPHDecoder* ALPHNew(void) {
+  ALPHDecoder* const dec = (ALPHDecoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
+  return dec;
+}
+
+// Clears and deallocates an alpha decoder instance.
+static void ALPHDelete(ALPHDecoder* const dec) {
+  if (dec != NULL) {
+    VP8LDelete(dec->vp8l_dec_);
+    dec->vp8l_dec_ = NULL;
+    WebPSafeFree(dec);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Decoding.
+
+// Initialize alpha decoding by parsing the alpha header and decoding the image
+// header for alpha data stored using lossless compression.
+// Returns false in case of error in alpha header (data too short, invalid
+// compression method or filter, error in lossless header data etc).
+static int ALPHInit(ALPHDecoder* const dec, const uint8_t* data,
+                    size_t data_size, const VP8Io* const src_io,
+                    uint8_t* output) {
+  int ok = 0;
+  const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN;
+  const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN;
+  int rsrv;
+  VP8Io* const io = &dec->io_;
+
+  assert(data != NULL && output != NULL && src_io != NULL);
+
+  VP8FiltersInit();
+  dec->output_ = output;
+  dec->width_ = src_io->width;
+  dec->height_ = src_io->height;
+  assert(dec->width_ > 0 && dec->height_ > 0);
+
+  if (data_size <= ALPHA_HEADER_LEN) {
+    return 0;
+  }
+
+  dec->method_ = (data[0] >> 0) & 0x03;
+  dec->filter_ = (WEBP_FILTER_TYPE)((data[0] >> 2) & 0x03);
+  dec->pre_processing_ = (data[0] >> 4) & 0x03;
+  rsrv = (data[0] >> 6) & 0x03;
+  if (dec->method_ < ALPHA_NO_COMPRESSION ||
+      dec->method_ > ALPHA_LOSSLESS_COMPRESSION ||
+      dec->filter_ >= WEBP_FILTER_LAST ||
+      dec->pre_processing_ > ALPHA_PREPROCESSED_LEVELS ||
+      rsrv != 0) {
+    return 0;
+  }
+
+  // Copy the necessary parameters from src_io to io
+  VP8InitIo(io);
+  WebPInitCustomIo(NULL, io);
+  io->opaque = dec;
+  io->width = src_io->width;
+  io->height = src_io->height;
+
+  io->use_cropping = src_io->use_cropping;
+  io->crop_left = src_io->crop_left;
+  io->crop_right = src_io->crop_right;
+  io->crop_top = src_io->crop_top;
+  io->crop_bottom = src_io->crop_bottom;
+  // No need to copy the scaling parameters.
+
+  if (dec->method_ == ALPHA_NO_COMPRESSION) {
+    const size_t alpha_decoded_size = dec->width_ * dec->height_;
+    ok = (alpha_data_size >= alpha_decoded_size);
+  } else {
+    assert(dec->method_ == ALPHA_LOSSLESS_COMPRESSION);
+    ok = VP8LDecodeAlphaHeader(dec, alpha_data, alpha_data_size);
+  }
+
+  return ok;
+}
+
+// Decodes, unfilters and dequantizes *at least* 'num_rows' rows of alpha
+// starting from row number 'row'. It assumes that rows up to (row - 1) have
+// already been decoded.
+// Returns false in case of bitstream error.
+static int ALPHDecode(VP8Decoder* const dec, int row, int num_rows) {
+  ALPHDecoder* const alph_dec = dec->alph_dec_;
+  const int width = alph_dec->width_;
+  const int height = alph_dec->io_.crop_bottom;
+  if (alph_dec->method_ == ALPHA_NO_COMPRESSION) {
+    int y;
+    const uint8_t* prev_line = dec->alpha_prev_line_;
+    const uint8_t* deltas = dec->alpha_data_ + ALPHA_HEADER_LEN + row * width;
+    uint8_t* dst = dec->alpha_plane_ + row * width;
+    assert(deltas <= &dec->alpha_data_[dec->alpha_data_size_]);
+    if (alph_dec->filter_ != WEBP_FILTER_NONE) {
+      assert(WebPUnfilters[alph_dec->filter_] != NULL);
+      for (y = 0; y < num_rows; ++y) {
+        WebPUnfilters[alph_dec->filter_](prev_line, deltas, dst, width);
+        prev_line = dst;
+        dst += width;
+        deltas += width;
+      }
+    } else {
+      for (y = 0; y < num_rows; ++y) {
+        memcpy(dst, deltas, width * sizeof(*dst));
+        prev_line = dst;
+        dst += width;
+        deltas += width;
+      }
+    }
+    dec->alpha_prev_line_ = prev_line;
+  } else {  // alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION
+    assert(alph_dec->vp8l_dec_ != NULL);
+    if (!VP8LDecodeAlphaImageStream(alph_dec, row + num_rows)) {
+      return 0;
+    }
+  }
+
+  if (row + num_rows >= height) {
+    dec->is_alpha_decoded_ = 1;
+  }
+  return 1;
+}
+
+static int AllocateAlphaPlane(VP8Decoder* const dec, const VP8Io* const io) {
+  const int stride = io->width;
+  const int height = io->crop_bottom;
+  const uint64_t alpha_size = (uint64_t)stride * height;
+  assert(dec->alpha_plane_mem_ == NULL);
+  dec->alpha_plane_mem_ =
+      (uint8_t*)WebPSafeMalloc(alpha_size, sizeof(*dec->alpha_plane_));
+  if (dec->alpha_plane_mem_ == NULL) {
+    return 0;
+  }
+  dec->alpha_plane_ = dec->alpha_plane_mem_;
+  dec->alpha_prev_line_ = NULL;
+  return 1;
+}
+
+void WebPDeallocateAlphaMemory(VP8Decoder* const dec) {
+  assert(dec != NULL);
+  WebPSafeFree(dec->alpha_plane_mem_);
+  dec->alpha_plane_mem_ = NULL;
+  dec->alpha_plane_ = NULL;
+  ALPHDelete(dec->alph_dec_);
+  dec->alph_dec_ = NULL;
+}
+
+//------------------------------------------------------------------------------
+// Main entry point.
+
+const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
+                                      const VP8Io* const io,
+                                      int row, int num_rows) {
+  const int width = io->width;
+  const int height = io->crop_bottom;
+
+  assert(dec != NULL && io != NULL);
+
+  if (row < 0 || num_rows <= 0 || row + num_rows > height) {
+    return NULL;    // sanity check.
+  }
+
+  if (!dec->is_alpha_decoded_) {
+    if (dec->alph_dec_ == NULL) {    // Initialize decoder.
+      dec->alph_dec_ = ALPHNew();
+      if (dec->alph_dec_ == NULL) return NULL;
+      if (!AllocateAlphaPlane(dec, io)) goto Error;
+      if (!ALPHInit(dec->alph_dec_, dec->alpha_data_, dec->alpha_data_size_,
+                    io, dec->alpha_plane_)) {
+        goto Error;
+      }
+      // if we allowed use of alpha dithering, check whether it's needed at all
+      if (dec->alph_dec_->pre_processing_ != ALPHA_PREPROCESSED_LEVELS) {
+        dec->alpha_dithering_ = 0;   // disable dithering
+      } else {
+        num_rows = height - row;     // decode everything in one pass
+      }
+    }
+
+    assert(dec->alph_dec_ != NULL);
+    assert(row + num_rows <= height);
+    if (!ALPHDecode(dec, row, num_rows)) goto Error;
+
+    if (dec->is_alpha_decoded_) {   // finished?
+      ALPHDelete(dec->alph_dec_);
+      dec->alph_dec_ = NULL;
+      if (dec->alpha_dithering_ > 0) {
+        uint8_t* const alpha = dec->alpha_plane_ + io->crop_top * width
+                             + io->crop_left;
+        if (!WebPDequantizeLevels(alpha,
+                                  io->crop_right - io->crop_left,
+                                  io->crop_bottom - io->crop_top,
+                                  width, dec->alpha_dithering_)) {
+          goto Error;
+        }
+      }
+    }
+  }
+
+  // Return a pointer to the current decoded row.
+  return dec->alpha_plane_ + row * width;
+
+ Error:
+  WebPDeallocateAlphaMemory(dec);
+  return NULL;
+}
--- a/media/libwebp/src/dec/alphai_dec.h
+++ b/media/libwebp/src/dec/alphai_dec.h
@ -0,0 +1,54 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Alpha decoder: internal header.
+//
+// Author: Urvang (urvang@google.com)
+
+#ifndef WEBP_DEC_ALPHAI_DEC_H_
+#define WEBP_DEC_ALPHAI_DEC_H_
+
+#include "src/dec/webpi_dec.h"
+#include "src/utils/filters_utils.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct VP8LDecoder;  // Defined in dec/vp8li.h.
+
+typedef struct ALPHDecoder ALPHDecoder;
+struct ALPHDecoder {
+  int width_;
+  int height_;
+  int method_;
+  WEBP_FILTER_TYPE filter_;
+  int pre_processing_;
+  struct VP8LDecoder* vp8l_dec_;
+  VP8Io io_;
+  int use_8b_decode_;  // Although alpha channel requires only 1 byte per
+                       // pixel, sometimes VP8LDecoder may need to allocate
+                       // 4 bytes per pixel internally during decode.
+  uint8_t* output_;
+  const uint8_t* prev_line_;   // last output row (or NULL)
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// Deallocate memory associated to dec->alpha_plane_ decoding
+void WebPDeallocateAlphaMemory(VP8Decoder* const dec);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_ALPHAI_DEC_H_ */
--- a/media/libwebp/src/dec/buffer_dec.c
+++ b/media/libwebp/src/dec/buffer_dec.c
@ -0,0 +1,311 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Everything about WebPDecBuffer
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+
+#include "src/dec/vp8i_dec.h"
+#include "src/dec/webpi_dec.h"
+#include "src/utils/utils.h"
+
+//------------------------------------------------------------------------------
+// WebPDecBuffer
+
+// Number of bytes per pixel for the different color-spaces.
+static const uint8_t kModeBpp[MODE_LAST] = {
+  3, 4, 3, 4, 4, 2, 2,
+  4, 4, 4, 2,    // pre-multiplied modes
+  1, 1 };
+
+// Check that webp_csp_mode is within the bounds of WEBP_CSP_MODE.
+// Convert to an integer to handle both the unsigned/signed enum cases
+// without the need for casting to remove type limit warnings.
+static int IsValidColorspace(int webp_csp_mode) {
+  return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST);
+}
+
+// strictly speaking, the very last (or first, if flipped) row
+// doesn't require padding.
+#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE)       \
+    ((uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH))
+
+static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) {
+  int ok = 1;
+  const WEBP_CSP_MODE mode = buffer->colorspace;
+  const int width = buffer->width;
+  const int height = buffer->height;
+  if (!IsValidColorspace(mode)) {
+    ok = 0;
+  } else if (!WebPIsRGBMode(mode)) {   // YUV checks
+    const WebPYUVABuffer* const buf = &buffer->u.YUVA;
+    const int uv_width  = (width  + 1) / 2;
+    const int uv_height = (height + 1) / 2;
+    const int y_stride = abs(buf->y_stride);
+    const int u_stride = abs(buf->u_stride);
+    const int v_stride = abs(buf->v_stride);
+    const int a_stride = abs(buf->a_stride);
+    const uint64_t y_size = MIN_BUFFER_SIZE(width, height, y_stride);
+    const uint64_t u_size = MIN_BUFFER_SIZE(uv_width, uv_height, u_stride);
+    const uint64_t v_size = MIN_BUFFER_SIZE(uv_width, uv_height, v_stride);
+    const uint64_t a_size = MIN_BUFFER_SIZE(width, height, a_stride);
+    ok &= (y_size <= buf->y_size);
+    ok &= (u_size <= buf->u_size);
+    ok &= (v_size <= buf->v_size);
+    ok &= (y_stride >= width);
+    ok &= (u_stride >= uv_width);
+    ok &= (v_stride >= uv_width);
+    ok &= (buf->y != NULL);
+    ok &= (buf->u != NULL);
+    ok &= (buf->v != NULL);
+    if (mode == MODE_YUVA) {
+      ok &= (a_stride >= width);
+      ok &= (a_size <= buf->a_size);
+      ok &= (buf->a != NULL);
+    }
+  } else {    // RGB checks
+    const WebPRGBABuffer* const buf = &buffer->u.RGBA;
+    const int stride = abs(buf->stride);
+    const uint64_t size = MIN_BUFFER_SIZE(width, height, stride);
+    ok &= (size <= buf->size);
+    ok &= (stride >= width * kModeBpp[mode]);
+    ok &= (buf->rgba != NULL);
+  }
+  return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM;
+}
+#undef MIN_BUFFER_SIZE
+
+static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) {
+  const int w = buffer->width;
+  const int h = buffer->height;
+  const WEBP_CSP_MODE mode = buffer->colorspace;
+
+  if (w <= 0 || h <= 0 || !IsValidColorspace(mode)) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+
+  if (buffer->is_external_memory <= 0 && buffer->private_memory == NULL) {
+    uint8_t* output;
+    int uv_stride = 0, a_stride = 0;
+    uint64_t uv_size = 0, a_size = 0, total_size;
+    // We need memory and it hasn't been allocated yet.
+    // => initialize output buffer, now that dimensions are known.
+    int stride;
+    uint64_t size;
+
+    if ((uint64_t)w * kModeBpp[mode] >= (1ull << 32)) {
+      return VP8_STATUS_INVALID_PARAM;
+    }
+    stride = w * kModeBpp[mode];
+    size = (uint64_t)stride * h;
+    if (!WebPIsRGBMode(mode)) {
+      uv_stride = (w + 1) / 2;
+      uv_size = (uint64_t)uv_stride * ((h + 1) / 2);
+      if (mode == MODE_YUVA) {
+        a_stride = w;
+        a_size = (uint64_t)a_stride * h;
+      }
+    }
+    total_size = size + 2 * uv_size + a_size;
+
+    // Security/sanity checks
+    output = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*output));
+    if (output == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    buffer->private_memory = output;
+
+    if (!WebPIsRGBMode(mode)) {   // YUVA initialization
+      WebPYUVABuffer* const buf = &buffer->u.YUVA;
+      buf->y = output;
+      buf->y_stride = stride;
+      buf->y_size = (size_t)size;
+      buf->u = output + size;
+      buf->u_stride = uv_stride;
+      buf->u_size = (size_t)uv_size;
+      buf->v = output + size + uv_size;
+      buf->v_stride = uv_stride;
+      buf->v_size = (size_t)uv_size;
+      if (mode == MODE_YUVA) {
+        buf->a = output + size + 2 * uv_size;
+      }
+      buf->a_size = (size_t)a_size;
+      buf->a_stride = a_stride;
+    } else {  // RGBA initialization
+      WebPRGBABuffer* const buf = &buffer->u.RGBA;
+      buf->rgba = output;
+      buf->stride = stride;
+      buf->size = (size_t)size;
+    }
+  }
+  return CheckDecBuffer(buffer);
+}
+
+VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer) {
+  if (buffer == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  if (WebPIsRGBMode(buffer->colorspace)) {
+    WebPRGBABuffer* const buf = &buffer->u.RGBA;
+    buf->rgba += (buffer->height - 1) * buf->stride;
+    buf->stride = -buf->stride;
+  } else {
+    WebPYUVABuffer* const buf = &buffer->u.YUVA;
+    const int H = buffer->height;
+    buf->y += (H - 1) * buf->y_stride;
+    buf->y_stride = -buf->y_stride;
+    buf->u += ((H - 1) >> 1) * buf->u_stride;
+    buf->u_stride = -buf->u_stride;
+    buf->v += ((H - 1) >> 1) * buf->v_stride;
+    buf->v_stride = -buf->v_stride;
+    if (buf->a != NULL) {
+      buf->a += (H - 1) * buf->a_stride;
+      buf->a_stride = -buf->a_stride;
+    }
+  }
+  return VP8_STATUS_OK;
+}
+
+VP8StatusCode WebPAllocateDecBuffer(int width, int height,
+                                    const WebPDecoderOptions* const options,
+                                    WebPDecBuffer* const buffer) {
+  VP8StatusCode status;
+  if (buffer == NULL || width <= 0 || height <= 0) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  if (options != NULL) {    // First, apply options if there is any.
+    if (options->use_cropping) {
+      const int cw = options->crop_width;
+      const int ch = options->crop_height;
+      const int x = options->crop_left & ~1;
+      const int y = options->crop_top & ~1;
+      if (x < 0 || y < 0 || cw <= 0 || ch <= 0 ||
+          x + cw > width || y + ch > height) {
+        return VP8_STATUS_INVALID_PARAM;   // out of frame boundary.
+      }
+      width = cw;
+      height = ch;
+    }
+
+    if (options->use_scaling) {
+#if !defined(WEBP_REDUCE_SIZE)
+      int scaled_width = options->scaled_width;
+      int scaled_height = options->scaled_height;
+      if (!WebPRescalerGetScaledDimensions(
+              width, height, &scaled_width, &scaled_height)) {
+        return VP8_STATUS_INVALID_PARAM;
+      }
+      width = scaled_width;
+      height = scaled_height;
+#else
+      return VP8_STATUS_INVALID_PARAM;   // rescaling not supported
+#endif
+    }
+  }
+  buffer->width = width;
+  buffer->height = height;
+
+  // Then, allocate buffer for real.
+  status = AllocateBuffer(buffer);
+  if (status != VP8_STATUS_OK) return status;
+
+  // Use the stride trick if vertical flip is needed.
+  if (options != NULL && options->flip) {
+    status = WebPFlipBuffer(buffer);
+  }
+  return status;
+}
+
+//------------------------------------------------------------------------------
+// constructors / destructors
+
+int WebPInitDecBufferInternal(WebPDecBuffer* buffer, int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+    return 0;  // version mismatch
+  }
+  if (buffer == NULL) return 0;
+  memset(buffer, 0, sizeof(*buffer));
+  return 1;
+}
+
+void WebPFreeDecBuffer(WebPDecBuffer* buffer) {
+  if (buffer != NULL) {
+    if (buffer->is_external_memory <= 0) {
+      WebPSafeFree(buffer->private_memory);
+    }
+    buffer->private_memory = NULL;
+  }
+}
+
+void WebPCopyDecBuffer(const WebPDecBuffer* const src,
+                       WebPDecBuffer* const dst) {
+  if (src != NULL && dst != NULL) {
+    *dst = *src;
+    if (src->private_memory != NULL) {
+      dst->is_external_memory = 1;   // dst buffer doesn't own the memory.
+      dst->private_memory = NULL;
+    }
+  }
+}
+
+// Copy and transfer ownership from src to dst (beware of parameter order!)
+void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst) {
+  if (src != NULL && dst != NULL) {
+    *dst = *src;
+    if (src->private_memory != NULL) {
+      src->is_external_memory = 1;   // src relinquishes ownership
+      src->private_memory = NULL;
+    }
+  }
+}
+
+VP8StatusCode WebPCopyDecBufferPixels(const WebPDecBuffer* const src_buf,
+                                      WebPDecBuffer* const dst_buf) {
+  assert(src_buf != NULL && dst_buf != NULL);
+  assert(src_buf->colorspace == dst_buf->colorspace);
+
+  dst_buf->width = src_buf->width;
+  dst_buf->height = src_buf->height;
+  if (CheckDecBuffer(dst_buf) != VP8_STATUS_OK) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  if (WebPIsRGBMode(src_buf->colorspace)) {
+    const WebPRGBABuffer* const src = &src_buf->u.RGBA;
+    const WebPRGBABuffer* const dst = &dst_buf->u.RGBA;
+    WebPCopyPlane(src->rgba, src->stride, dst->rgba, dst->stride,
+                  src_buf->width * kModeBpp[src_buf->colorspace],
+                  src_buf->height);
+  } else {
+    const WebPYUVABuffer* const src = &src_buf->u.YUVA;
+    const WebPYUVABuffer* const dst = &dst_buf->u.YUVA;
+    WebPCopyPlane(src->y, src->y_stride, dst->y, dst->y_stride,
+                  src_buf->width, src_buf->height);
+    WebPCopyPlane(src->u, src->u_stride, dst->u, dst->u_stride,
+                  (src_buf->width + 1) / 2, (src_buf->height + 1) / 2);
+    WebPCopyPlane(src->v, src->v_stride, dst->v, dst->v_stride,
+                  (src_buf->width + 1) / 2, (src_buf->height + 1) / 2);
+    if (WebPIsAlphaMode(src_buf->colorspace)) {
+      WebPCopyPlane(src->a, src->a_stride, dst->a, dst->a_stride,
+                    src_buf->width, src_buf->height);
+    }
+  }
+  return VP8_STATUS_OK;
+}
+
+int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
+                        const WebPBitstreamFeatures* const features) {
+  assert(output != NULL);
+  return (output->is_external_memory >= 2) &&
+         WebPIsPremultipliedMode(output->colorspace) &&
+         (features != NULL && features->has_alpha);
+}
+
+//------------------------------------------------------------------------------
--- a/media/libwebp/src/dec/common_dec.h
+++ b/media/libwebp/src/dec/common_dec.h
@ -0,0 +1,54 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Definitions and macros common to encoding and decoding
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DEC_COMMON_DEC_H_
+#define WEBP_DEC_COMMON_DEC_H_
+
+// intra prediction modes
+enum { B_DC_PRED = 0,   // 4x4 modes
+       B_TM_PRED = 1,
+       B_VE_PRED = 2,
+       B_HE_PRED = 3,
+       B_RD_PRED = 4,
+       B_VR_PRED = 5,
+       B_LD_PRED = 6,
+       B_VL_PRED = 7,
+       B_HD_PRED = 8,
+       B_HU_PRED = 9,
+       NUM_BMODES = B_HU_PRED + 1 - B_DC_PRED,  // = 10
+
+       // Luma16 or UV modes
+       DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
+       H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
+       B_PRED = NUM_BMODES,   // refined I4x4 mode
+       NUM_PRED_MODES = 4,
+
+       // special modes
+       B_DC_PRED_NOTOP = 4,
+       B_DC_PRED_NOLEFT = 5,
+       B_DC_PRED_NOTOPLEFT = 6,
+       NUM_B_DC_MODES = 7 };
+
+enum { MB_FEATURE_TREE_PROBS = 3,
+       NUM_MB_SEGMENTS = 4,
+       NUM_REF_LF_DELTAS = 4,
+       NUM_MODE_LF_DELTAS = 4,    // I4x4, ZERO, *, SPLIT
+       MAX_NUM_PARTITIONS = 8,
+       // Probabilities
+       NUM_TYPES = 4,   // 0: i16-AC,  1: i16-DC,  2:chroma-AC,  3:i4-AC
+       NUM_BANDS = 8,
+       NUM_CTX = 3,
+       NUM_PROBAS = 11
+     };
+
+#endif    // WEBP_DEC_COMMON_DEC_H_
--- a/media/libwebp/src/dec/frame_dec.c
+++ b/media/libwebp/src/dec/frame_dec.c
@ -0,0 +1,810 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Frame-reconstruction function. Memory allocation.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+#include "src/dec/vp8i_dec.h"
+#include "src/utils/utils.h"
+
+//------------------------------------------------------------------------------
+// Main reconstruction function.
+
+static const uint16_t kScan[16] = {
+  0 +  0 * BPS,  4 +  0 * BPS, 8 +  0 * BPS, 12 +  0 * BPS,
+  0 +  4 * BPS,  4 +  4 * BPS, 8 +  4 * BPS, 12 +  4 * BPS,
+  0 +  8 * BPS,  4 +  8 * BPS, 8 +  8 * BPS, 12 +  8 * BPS,
+  0 + 12 * BPS,  4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS
+};
+
+static int CheckMode(int mb_x, int mb_y, int mode) {
+  if (mode == B_DC_PRED) {
+    if (mb_x == 0) {
+      return (mb_y == 0) ? B_DC_PRED_NOTOPLEFT : B_DC_PRED_NOLEFT;
+    } else {
+      return (mb_y == 0) ? B_DC_PRED_NOTOP : B_DC_PRED;
+    }
+  }
+  return mode;
+}
+
+static void Copy32b(uint8_t* const dst, const uint8_t* const src) {
+  memcpy(dst, src, 4);
+}
+
+static WEBP_INLINE void DoTransform(uint32_t bits, const int16_t* const src,
+                                    uint8_t* const dst) {
+  switch (bits >> 30) {
+    case 3:
+      VP8Transform(src, dst, 0);
+      break;
+    case 2:
+      VP8TransformAC3(src, dst);
+      break;
+    case 1:
+      VP8TransformDC(src, dst);
+      break;
+    default:
+      break;
+  }
+}
+
+static void DoUVTransform(uint32_t bits, const int16_t* const src,
+                          uint8_t* const dst) {
+  if (bits & 0xff) {    // any non-zero coeff at all?
+    if (bits & 0xaa) {  // any non-zero AC coefficient?
+      VP8TransformUV(src, dst);   // note we don't use the AC3 variant for U/V
+    } else {
+      VP8TransformDCUV(src, dst);
+    }
+  }
+}
+
+static void ReconstructRow(const VP8Decoder* const dec,
+                           const VP8ThreadContext* ctx) {
+  int j;
+  int mb_x;
+  const int mb_y = ctx->mb_y_;
+  const int cache_id = ctx->id_;
+  uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
+  uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
+  uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
+
+  // Initialize left-most block.
+  for (j = 0; j < 16; ++j) {
+    y_dst[j * BPS - 1] = 129;
+  }
+  for (j = 0; j < 8; ++j) {
+    u_dst[j * BPS - 1] = 129;
+    v_dst[j * BPS - 1] = 129;
+  }
+
+  // Init top-left sample on left column too.
+  if (mb_y > 0) {
+    y_dst[-1 - BPS] = u_dst[-1 - BPS] = v_dst[-1 - BPS] = 129;
+  } else {
+    // we only need to do this init once at block (0,0).
+    // Afterward, it remains valid for the whole topmost row.
+    memset(y_dst - BPS - 1, 127, 16 + 4 + 1);
+    memset(u_dst - BPS - 1, 127, 8 + 1);
+    memset(v_dst - BPS - 1, 127, 8 + 1);
+  }
+
+  // Reconstruct one row.
+  for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
+    const VP8MBData* const block = ctx->mb_data_ + mb_x;
+
+    // Rotate in the left samples from previously decoded block. We move four
+    // pixels at a time for alignment reason, and because of in-loop filter.
+    if (mb_x > 0) {
+      for (j = -1; j < 16; ++j) {
+        Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
+      }
+      for (j = -1; j < 8; ++j) {
+        Copy32b(&u_dst[j * BPS - 4], &u_dst[j * BPS + 4]);
+        Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
+      }
+    }
+    {
+      // bring top samples into the cache
+      VP8TopSamples* const top_yuv = dec->yuv_t_ + mb_x;
+      const int16_t* const coeffs = block->coeffs_;
+      uint32_t bits = block->non_zero_y_;
+      int n;
+
+      if (mb_y > 0) {
+        memcpy(y_dst - BPS, top_yuv[0].y, 16);
+        memcpy(u_dst - BPS, top_yuv[0].u, 8);
+        memcpy(v_dst - BPS, top_yuv[0].v, 8);
+      }
+
+      // predict and add residuals
+      if (block->is_i4x4_) {   // 4x4
+        uint32_t* const top_right = (uint32_t*)(y_dst - BPS + 16);
+
+        if (mb_y > 0) {
+          if (mb_x >= dec->mb_w_ - 1) {    // on rightmost border
+            memset(top_right, top_yuv[0].y[15], sizeof(*top_right));
+          } else {
+            memcpy(top_right, top_yuv[1].y, sizeof(*top_right));
+          }
+        }
+        // replicate the top-right pixels below
+        top_right[BPS] = top_right[2 * BPS] = top_right[3 * BPS] = top_right[0];
+
+        // predict and add residuals for all 4x4 blocks in turn.
+        for (n = 0; n < 16; ++n, bits <<= 2) {
+          uint8_t* const dst = y_dst + kScan[n];
+          VP8PredLuma4[block->imodes_[n]](dst);
+          DoTransform(bits, coeffs + n * 16, dst);
+        }
+      } else {    // 16x16
+        const int pred_func = CheckMode(mb_x, mb_y, block->imodes_[0]);
+        VP8PredLuma16[pred_func](y_dst);
+        if (bits != 0) {
+          for (n = 0; n < 16; ++n, bits <<= 2) {
+            DoTransform(bits, coeffs + n * 16, y_dst + kScan[n]);
+          }
+        }
+      }
+      {
+        // Chroma
+        const uint32_t bits_uv = block->non_zero_uv_;
+        const int pred_func = CheckMode(mb_x, mb_y, block->uvmode_);
+        VP8PredChroma8[pred_func](u_dst);
+        VP8PredChroma8[pred_func](v_dst);
+        DoUVTransform(bits_uv >> 0, coeffs + 16 * 16, u_dst);
+        DoUVTransform(bits_uv >> 8, coeffs + 20 * 16, v_dst);
+      }
+
+      // stash away top samples for next block
+      if (mb_y < dec->mb_h_ - 1) {
+        memcpy(top_yuv[0].y, y_dst + 15 * BPS, 16);
+        memcpy(top_yuv[0].u, u_dst +  7 * BPS,  8);
+        memcpy(top_yuv[0].v, v_dst +  7 * BPS,  8);
+      }
+    }
+    // Transfer reconstructed samples from yuv_b_ cache to final destination.
+    {
+      const int y_offset = cache_id * 16 * dec->cache_y_stride_;
+      const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
+      uint8_t* const y_out = dec->cache_y_ + mb_x * 16 + y_offset;
+      uint8_t* const u_out = dec->cache_u_ + mb_x * 8 + uv_offset;
+      uint8_t* const v_out = dec->cache_v_ + mb_x * 8 + uv_offset;
+      for (j = 0; j < 16; ++j) {
+        memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
+      }
+      for (j = 0; j < 8; ++j) {
+        memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
+        memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Filtering
+
+// kFilterExtraRows[] = How many extra lines are needed on the MB boundary
+// for caching, given a filtering level.
+// Simple filter:  up to 2 luma samples are read and 1 is written.
+// Complex filter: up to 4 luma samples are read and 3 are written. Same for
+//                 U/V, so it's 8 samples total (because of the 2x upsampling).
+static const uint8_t kFilterExtraRows[3] = { 0, 2, 8 };
+
+static void DoFilter(const VP8Decoder* const dec, int mb_x, int mb_y) {
+  const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+  const int cache_id = ctx->id_;
+  const int y_bps = dec->cache_y_stride_;
+  const VP8FInfo* const f_info = ctx->f_info_ + mb_x;
+  uint8_t* const y_dst = dec->cache_y_ + cache_id * 16 * y_bps + mb_x * 16;
+  const int ilevel = f_info->f_ilevel_;
+  const int limit = f_info->f_limit_;
+  if (limit == 0) {
+    return;
+  }
+  assert(limit >= 3);
+  if (dec->filter_type_ == 1) {   // simple
+    if (mb_x > 0) {
+      VP8SimpleHFilter16(y_dst, y_bps, limit + 4);
+    }
+    if (f_info->f_inner_) {
+      VP8SimpleHFilter16i(y_dst, y_bps, limit);
+    }
+    if (mb_y > 0) {
+      VP8SimpleVFilter16(y_dst, y_bps, limit + 4);
+    }
+    if (f_info->f_inner_) {
+      VP8SimpleVFilter16i(y_dst, y_bps, limit);
+    }
+  } else {    // complex
+    const int uv_bps = dec->cache_uv_stride_;
+    uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
+    uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
+    const int hev_thresh = f_info->hev_thresh_;
+    if (mb_x > 0) {
+      VP8HFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
+      VP8HFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
+    }
+    if (f_info->f_inner_) {
+      VP8HFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
+      VP8HFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
+    }
+    if (mb_y > 0) {
+      VP8VFilter16(y_dst, y_bps, limit + 4, ilevel, hev_thresh);
+      VP8VFilter8(u_dst, v_dst, uv_bps, limit + 4, ilevel, hev_thresh);
+    }
+    if (f_info->f_inner_) {
+      VP8VFilter16i(y_dst, y_bps, limit, ilevel, hev_thresh);
+      VP8VFilter8i(u_dst, v_dst, uv_bps, limit, ilevel, hev_thresh);
+    }
+  }
+}
+
+// Filter the decoded macroblock row (if needed)
+static void FilterRow(const VP8Decoder* const dec) {
+  int mb_x;
+  const int mb_y = dec->thread_ctx_.mb_y_;
+  assert(dec->thread_ctx_.filter_row_);
+  for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
+    DoFilter(dec, mb_x, mb_y);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
+
+static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
+  if (dec->filter_type_ > 0) {
+    int s;
+    const VP8FilterHeader* const hdr = &dec->filter_hdr_;
+    for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+      int i4x4;
+      // First, compute the initial level
+      int base_level;
+      if (dec->segment_hdr_.use_segment_) {
+        base_level = dec->segment_hdr_.filter_strength_[s];
+        if (!dec->segment_hdr_.absolute_delta_) {
+          base_level += hdr->level_;
+        }
+      } else {
+        base_level = hdr->level_;
+      }
+      for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
+        VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
+        int level = base_level;
+        if (hdr->use_lf_delta_) {
+          level += hdr->ref_lf_delta_[0];
+          if (i4x4) {
+            level += hdr->mode_lf_delta_[0];
+          }
+        }
+        level = (level < 0) ? 0 : (level > 63) ? 63 : level;
+        if (level > 0) {
+          int ilevel = level;
+          if (hdr->sharpness_ > 0) {
+            if (hdr->sharpness_ > 4) {
+              ilevel >>= 2;
+            } else {
+              ilevel >>= 1;
+            }
+            if (ilevel > 9 - hdr->sharpness_) {
+              ilevel = 9 - hdr->sharpness_;
+            }
+          }
+          if (ilevel < 1) ilevel = 1;
+          info->f_ilevel_ = ilevel;
+          info->f_limit_ = 2 * level + ilevel;
+          info->hev_thresh_ = (level >= 40) ? 2 : (level >= 15) ? 1 : 0;
+        } else {
+          info->f_limit_ = 0;  // no filtering
+        }
+        info->f_inner_ = i4x4;
+      }
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Dithering
+
+// minimal amp that will provide a non-zero dithering effect
+#define MIN_DITHER_AMP 4
+
+#define DITHER_AMP_TAB_SIZE 12
+static const uint8_t kQuantToDitherAmp[DITHER_AMP_TAB_SIZE] = {
+  // roughly, it's dqm->uv_mat_[1]
+  8, 7, 6, 4, 4, 2, 2, 2, 1, 1, 1, 1
+};
+
+void VP8InitDithering(const WebPDecoderOptions* const options,
+                      VP8Decoder* const dec) {
+  assert(dec != NULL);
+  if (options != NULL) {
+    const int d = options->dithering_strength;
+    const int max_amp = (1 << VP8_RANDOM_DITHER_FIX) - 1;
+    const int f = (d < 0) ? 0 : (d > 100) ? max_amp : (d * max_amp / 100);
+    if (f > 0) {
+      int s;
+      int all_amp = 0;
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        VP8QuantMatrix* const dqm = &dec->dqm_[s];
+        if (dqm->uv_quant_ < DITHER_AMP_TAB_SIZE) {
+          // TODO(skal): should we specially dither more for uv_quant_ < 0?
+          const int idx = (dqm->uv_quant_ < 0) ? 0 : dqm->uv_quant_;
+          dqm->dither_ = (f * kQuantToDitherAmp[idx]) >> 3;
+        }
+        all_amp |= dqm->dither_;
+      }
+      if (all_amp != 0) {
+        VP8InitRandom(&dec->dithering_rg_, 1.0f);
+        dec->dither_ = 1;
+      }
+    }
+    // potentially allow alpha dithering
+    dec->alpha_dithering_ = options->alpha_dithering_strength;
+    if (dec->alpha_dithering_ > 100) {
+      dec->alpha_dithering_ = 100;
+    } else if (dec->alpha_dithering_ < 0) {
+      dec->alpha_dithering_ = 0;
+    }
+  }
+}
+
+// Convert to range: [-2,2] for dither=50, [-4,4] for dither=100
+static void Dither8x8(VP8Random* const rg, uint8_t* dst, int bps, int amp) {
+  uint8_t dither[64];
+  int i;
+  for (i = 0; i < 8 * 8; ++i) {
+    dither[i] = VP8RandomBits2(rg, VP8_DITHER_AMP_BITS + 1, amp);
+  }
+  VP8DitherCombine8x8(dither, dst, bps);
+}
+
+static void DitherRow(VP8Decoder* const dec) {
+  int mb_x;
+  assert(dec->dither_);
+  for (mb_x = dec->tl_mb_x_; mb_x < dec->br_mb_x_; ++mb_x) {
+    const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+    const VP8MBData* const data = ctx->mb_data_ + mb_x;
+    const int cache_id = ctx->id_;
+    const int uv_bps = dec->cache_uv_stride_;
+    if (data->dither_ >= MIN_DITHER_AMP) {
+      uint8_t* const u_dst = dec->cache_u_ + cache_id * 8 * uv_bps + mb_x * 8;
+      uint8_t* const v_dst = dec->cache_v_ + cache_id * 8 * uv_bps + mb_x * 8;
+      Dither8x8(&dec->dithering_rg_, u_dst, uv_bps, data->dither_);
+      Dither8x8(&dec->dithering_rg_, v_dst, uv_bps, data->dither_);
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// This function is called after a row of macroblocks is finished decoding.
+// It also takes into account the following restrictions:
+//  * In case of in-loop filtering, we must hold off sending some of the bottom
+//    pixels as they are yet unfiltered. They will be when the next macroblock
+//    row is decoded. Meanwhile, we must preserve them by rotating them in the
+//    cache area. This doesn't hold for the very bottom row of the uncropped
+//    picture of course.
+//  * we must clip the remaining pixels against the cropping area. The VP8Io
+//    struct must have the following fields set correctly before calling put():
+
+#define MACROBLOCK_VPOS(mb_y)  ((mb_y) * 16)    // vertical position of a MB
+
+// Finalize and transmit a complete row. Return false in case of user-abort.
+static int FinishRow(void* arg1, void* arg2) {
+  VP8Decoder* const dec = (VP8Decoder*)arg1;
+  VP8Io* const io = (VP8Io*)arg2;
+  int ok = 1;
+  const VP8ThreadContext* const ctx = &dec->thread_ctx_;
+  const int cache_id = ctx->id_;
+  const int extra_y_rows = kFilterExtraRows[dec->filter_type_];
+  const int ysize = extra_y_rows * dec->cache_y_stride_;
+  const int uvsize = (extra_y_rows / 2) * dec->cache_uv_stride_;
+  const int y_offset = cache_id * 16 * dec->cache_y_stride_;
+  const int uv_offset = cache_id * 8 * dec->cache_uv_stride_;
+  uint8_t* const ydst = dec->cache_y_ - ysize + y_offset;
+  uint8_t* const udst = dec->cache_u_ - uvsize + uv_offset;
+  uint8_t* const vdst = dec->cache_v_ - uvsize + uv_offset;
+  const int mb_y = ctx->mb_y_;
+  const int is_first_row = (mb_y == 0);
+  const int is_last_row = (mb_y >= dec->br_mb_y_ - 1);
+
+  if (dec->mt_method_ == 2) {
+    ReconstructRow(dec, ctx);
+  }
+
+  if (ctx->filter_row_) {
+    FilterRow(dec);
+  }
+
+  if (dec->dither_) {
+    DitherRow(dec);
+  }
+
+  if (io->put != NULL) {
+    int y_start = MACROBLOCK_VPOS(mb_y);
+    int y_end = MACROBLOCK_VPOS(mb_y + 1);
+    if (!is_first_row) {
+      y_start -= extra_y_rows;
+      io->y = ydst;
+      io->u = udst;
+      io->v = vdst;
+    } else {
+      io->y = dec->cache_y_ + y_offset;
+      io->u = dec->cache_u_ + uv_offset;
+      io->v = dec->cache_v_ + uv_offset;
+    }
+
+    if (!is_last_row) {
+      y_end -= extra_y_rows;
+    }
+    if (y_end > io->crop_bottom) {
+      y_end = io->crop_bottom;    // make sure we don't overflow on last row.
+    }
+    // If dec->alpha_data_ is not NULL, we have some alpha plane present.
+    io->a = NULL;
+    if (dec->alpha_data_ != NULL && y_start < y_end) {
+      io->a = VP8DecompressAlphaRows(dec, io, y_start, y_end - y_start);
+      if (io->a == NULL) {
+        return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                           "Could not decode alpha data.");
+      }
+    }
+    if (y_start < io->crop_top) {
+      const int delta_y = io->crop_top - y_start;
+      y_start = io->crop_top;
+      assert(!(delta_y & 1));
+      io->y += dec->cache_y_stride_ * delta_y;
+      io->u += dec->cache_uv_stride_ * (delta_y >> 1);
+      io->v += dec->cache_uv_stride_ * (delta_y >> 1);
+      if (io->a != NULL) {
+        io->a += io->width * delta_y;
+      }
+    }
+    if (y_start < y_end) {
+      io->y += io->crop_left;
+      io->u += io->crop_left >> 1;
+      io->v += io->crop_left >> 1;
+      if (io->a != NULL) {
+        io->a += io->crop_left;
+      }
+      io->mb_y = y_start - io->crop_top;
+      io->mb_w = io->crop_right - io->crop_left;
+      io->mb_h = y_end - y_start;
+      ok = io->put(io);
+    }
+  }
+  // rotate top samples if needed
+  if (cache_id + 1 == dec->num_caches_) {
+    if (!is_last_row) {
+      memcpy(dec->cache_y_ - ysize, ydst + 16 * dec->cache_y_stride_, ysize);
+      memcpy(dec->cache_u_ - uvsize, udst + 8 * dec->cache_uv_stride_, uvsize);
+      memcpy(dec->cache_v_ - uvsize, vdst + 8 * dec->cache_uv_stride_, uvsize);
+    }
+  }
+
+  return ok;
+}
+
+#undef MACROBLOCK_VPOS
+
+//------------------------------------------------------------------------------
+
+int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
+  int ok = 1;
+  VP8ThreadContext* const ctx = &dec->thread_ctx_;
+  const int filter_row =
+      (dec->filter_type_ > 0) &&
+      (dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);
+  if (dec->mt_method_ == 0) {
+    // ctx->id_ and ctx->f_info_ are already set
+    ctx->mb_y_ = dec->mb_y_;
+    ctx->filter_row_ = filter_row;
+    ReconstructRow(dec, ctx);
+    ok = FinishRow(dec, io);
+  } else {
+    WebPWorker* const worker = &dec->worker_;
+    // Finish previous job *before* updating context
+    ok &= WebPGetWorkerInterface()->Sync(worker);
+    assert(worker->status_ == OK);
+    if (ok) {   // spawn a new deblocking/output job
+      ctx->io_ = *io;
+      ctx->id_ = dec->cache_id_;
+      ctx->mb_y_ = dec->mb_y_;
+      ctx->filter_row_ = filter_row;
+      if (dec->mt_method_ == 2) {  // swap macroblock data
+        VP8MBData* const tmp = ctx->mb_data_;
+        ctx->mb_data_ = dec->mb_data_;
+        dec->mb_data_ = tmp;
+      } else {
+        // perform reconstruction directly in main thread
+        ReconstructRow(dec, ctx);
+      }
+      if (filter_row) {            // swap filter info
+        VP8FInfo* const tmp = ctx->f_info_;
+        ctx->f_info_ = dec->f_info_;
+        dec->f_info_ = tmp;
+      }
+      // (reconstruct)+filter in parallel
+      WebPGetWorkerInterface()->Launch(worker);
+      if (++dec->cache_id_ == dec->num_caches_) {
+        dec->cache_id_ = 0;
+      }
+    }
+  }
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+// Finish setting up the decoding parameter once user's setup() is called.
+
+VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
+  // Call setup() first. This may trigger additional decoding features on 'io'.
+  // Note: Afterward, we must call teardown() no matter what.
+  if (io->setup != NULL && !io->setup(io)) {
+    VP8SetError(dec, VP8_STATUS_USER_ABORT, "Frame setup failed");
+    return dec->status_;
+  }
+
+  // Disable filtering per user request
+  if (io->bypass_filtering) {
+    dec->filter_type_ = 0;
+  }
+
+  // Define the area where we can skip in-loop filtering, in case of cropping.
+  //
+  // 'Simple' filter reads two luma samples outside of the macroblock
+  // and filters one. It doesn't filter the chroma samples. Hence, we can
+  // avoid doing the in-loop filtering before crop_top/crop_left position.
+  // For the 'Complex' filter, 3 samples are read and up to 3 are filtered.
+  // Means: there's a dependency chain that goes all the way up to the
+  // top-left corner of the picture (MB #0). We must filter all the previous
+  // macroblocks.
+  {
+    const int extra_pixels = kFilterExtraRows[dec->filter_type_];
+    if (dec->filter_type_ == 2) {
+      // For complex filter, we need to preserve the dependency chain.
+      dec->tl_mb_x_ = 0;
+      dec->tl_mb_y_ = 0;
+    } else {
+      // For simple filter, we can filter only the cropped region.
+      // We include 'extra_pixels' on the other side of the boundary, since
+      // vertical or horizontal filtering of the previous macroblock can
+      // modify some abutting pixels.
+      dec->tl_mb_x_ = (io->crop_left - extra_pixels) >> 4;
+      dec->tl_mb_y_ = (io->crop_top - extra_pixels) >> 4;
+      if (dec->tl_mb_x_ < 0) dec->tl_mb_x_ = 0;
+      if (dec->tl_mb_y_ < 0) dec->tl_mb_y_ = 0;
+    }
+    // We need some 'extra' pixels on the right/bottom.
+    dec->br_mb_y_ = (io->crop_bottom + 15 + extra_pixels) >> 4;
+    dec->br_mb_x_ = (io->crop_right + 15 + extra_pixels) >> 4;
+    if (dec->br_mb_x_ > dec->mb_w_) {
+      dec->br_mb_x_ = dec->mb_w_;
+    }
+    if (dec->br_mb_y_ > dec->mb_h_) {
+      dec->br_mb_y_ = dec->mb_h_;
+    }
+  }
+  PrecomputeFilterStrengths(dec);
+  return VP8_STATUS_OK;
+}
+
+int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
+  int ok = 1;
+  if (dec->mt_method_ > 0) {
+    ok = WebPGetWorkerInterface()->Sync(&dec->worker_);
+  }
+
+  if (io->teardown != NULL) {
+    io->teardown(io);
+  }
+  return ok;
+}
+
+//------------------------------------------------------------------------------
+// For multi-threaded decoding we need to use 3 rows of 16 pixels as delay line.
+//
+// Reason is: the deblocking filter cannot deblock the bottom horizontal edges
+// immediately, and needs to wait for first few rows of the next macroblock to
+// be decoded. Hence, deblocking is lagging behind by 4 or 8 pixels (depending
+// on strength).
+// With two threads, the vertical positions of the rows being decoded are:
+// Decode:  [ 0..15][16..31][32..47][48..63][64..79][...
+// Deblock:         [ 0..11][12..27][28..43][44..59][...
+// If we use two threads and two caches of 16 pixels, the sequence would be:
+// Decode:  [ 0..15][16..31][ 0..15!!][16..31][ 0..15][...
+// Deblock:         [ 0..11][12..27!!][-4..11][12..27][...
+// The problem occurs during row [12..15!!] that both the decoding and
+// deblocking threads are writing simultaneously.
+// With 3 cache lines, one get a safe write pattern:
+// Decode:  [ 0..15][16..31][32..47][ 0..15][16..31][32..47][0..
+// Deblock:         [ 0..11][12..27][28..43][-4..11][12..27][28...
+// Note that multi-threaded output _without_ deblocking can make use of two
+// cache lines of 16 pixels only, since there's no lagging behind. The decoding
+// and output process have non-concurrent writing:
+// Decode:  [ 0..15][16..31][ 0..15][16..31][...
+// io->put:         [ 0..15][16..31][ 0..15][...
+
+#define MT_CACHE_LINES 3
+#define ST_CACHE_LINES 1   // 1 cache row only for single-threaded case
+
+// Initialize multi/single-thread worker
+static int InitThreadContext(VP8Decoder* const dec) {
+  dec->cache_id_ = 0;
+  if (dec->mt_method_ > 0) {
+    WebPWorker* const worker = &dec->worker_;
+    if (!WebPGetWorkerInterface()->Reset(worker)) {
+      return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
+                         "thread initialization failed.");
+    }
+    worker->data1 = dec;
+    worker->data2 = (void*)&dec->thread_ctx_.io_;
+    worker->hook = FinishRow;
+    dec->num_caches_ =
+      (dec->filter_type_ > 0) ? MT_CACHE_LINES : MT_CACHE_LINES - 1;
+  } else {
+    dec->num_caches_ = ST_CACHE_LINES;
+  }
+  return 1;
+}
+
+int VP8GetThreadMethod(const WebPDecoderOptions* const options,
+                       const WebPHeaderStructure* const headers,
+                       int width, int height) {
+  if (options == NULL || options->use_threads == 0) {
+    return 0;
+  }
+  (void)headers;
+  (void)width;
+  (void)height;
+  assert(headers == NULL || !headers->is_lossless);
+#if defined(WEBP_USE_THREAD)
+  if (width < MIN_WIDTH_FOR_THREADS) return 0;
+  // TODO(skal): tune the heuristic further
+#if 0
+  if (height < 2 * width) return 2;
+#endif
+  return 2;
+#else   // !WEBP_USE_THREAD
+  return 0;
+#endif
+}
+
+#undef MT_CACHE_LINES
+#undef ST_CACHE_LINES
+
+//------------------------------------------------------------------------------
+// Memory setup
+
+static int AllocateMemory(VP8Decoder* const dec) {
+  const int num_caches = dec->num_caches_;
+  const int mb_w = dec->mb_w_;
+  // Note: we use 'size_t' when there's no overflow risk, uint64_t otherwise.
+  const size_t intra_pred_mode_size = 4 * mb_w * sizeof(uint8_t);
+  const size_t top_size = sizeof(VP8TopSamples) * mb_w;
+  const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
+  const size_t f_info_size =
+      (dec->filter_type_ > 0) ?
+          mb_w * (dec->mt_method_ > 0 ? 2 : 1) * sizeof(VP8FInfo)
+        : 0;
+  const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
+  const size_t mb_data_size =
+      (dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_);
+  const size_t cache_height = (16 * num_caches
+                            + kFilterExtraRows[dec->filter_type_]) * 3 / 2;
+  const size_t cache_size = top_size * cache_height;
+  // alpha_size is the only one that scales as width x height.
+  const uint64_t alpha_size = (dec->alpha_data_ != NULL) ?
+      (uint64_t)dec->pic_hdr_.width_ * dec->pic_hdr_.height_ : 0ULL;
+  const uint64_t needed = (uint64_t)intra_pred_mode_size
+                        + top_size + mb_info_size + f_info_size
+                        + yuv_size + mb_data_size
+                        + cache_size + alpha_size + WEBP_ALIGN_CST;
+  uint8_t* mem;
+
+  if (needed != (size_t)needed) return 0;  // check for overflow
+  if (needed > dec->mem_size_) {
+    WebPSafeFree(dec->mem_);
+    dec->mem_size_ = 0;
+    dec->mem_ = WebPSafeMalloc(needed, sizeof(uint8_t));
+    if (dec->mem_ == NULL) {
+      return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
+                         "no memory during frame initialization.");
+    }
+    // down-cast is ok, thanks to WebPSafeMalloc() above.
+    dec->mem_size_ = (size_t)needed;
+  }
+
+  mem = (uint8_t*)dec->mem_;
+  dec->intra_t_ = mem;
+  mem += intra_pred_mode_size;
+
+  dec->yuv_t_ = (VP8TopSamples*)mem;
+  mem += top_size;
+
+  dec->mb_info_ = ((VP8MB*)mem) + 1;
+  mem += mb_info_size;
+
+  dec->f_info_ = f_info_size ? (VP8FInfo*)mem : NULL;
+  mem += f_info_size;
+  dec->thread_ctx_.id_ = 0;
+  dec->thread_ctx_.f_info_ = dec->f_info_;
+  if (dec->mt_method_ > 0) {
+    // secondary cache line. The deblocking process need to make use of the
+    // filtering strength from previous macroblock row, while the new ones
+    // are being decoded in parallel. We'll just swap the pointers.
+    dec->thread_ctx_.f_info_ += mb_w;
+  }
+
+  mem = (uint8_t*)WEBP_ALIGN(mem);
+  assert((yuv_size & WEBP_ALIGN_CST) == 0);
+  dec->yuv_b_ = mem;
+  mem += yuv_size;
+
+  dec->mb_data_ = (VP8MBData*)mem;
+  dec->thread_ctx_.mb_data_ = (VP8MBData*)mem;
+  if (dec->mt_method_ == 2) {
+    dec->thread_ctx_.mb_data_ += mb_w;
+  }
+  mem += mb_data_size;
+
+  dec->cache_y_stride_ = 16 * mb_w;
+  dec->cache_uv_stride_ = 8 * mb_w;
+  {
+    const int extra_rows = kFilterExtraRows[dec->filter_type_];
+    const int extra_y = extra_rows * dec->cache_y_stride_;
+    const int extra_uv = (extra_rows / 2) * dec->cache_uv_stride_;
+    dec->cache_y_ = mem + extra_y;
+    dec->cache_u_ = dec->cache_y_
+                  + 16 * num_caches * dec->cache_y_stride_ + extra_uv;
+    dec->cache_v_ = dec->cache_u_
+                  + 8 * num_caches * dec->cache_uv_stride_ + extra_uv;
+    dec->cache_id_ = 0;
+  }
+  mem += cache_size;
+
+  // alpha plane
+  dec->alpha_plane_ = alpha_size ? mem : NULL;
+  mem += alpha_size;
+  assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
+
+  // note: left/top-info is initialized once for all.
+  memset(dec->mb_info_ - 1, 0, mb_info_size);
+  VP8InitScanline(dec);   // initialize left too.
+
+  // initialize top
+  memset(dec->intra_t_, B_DC_PRED, intra_pred_mode_size);
+
+  return 1;
+}
+
+static void InitIo(VP8Decoder* const dec, VP8Io* io) {
+  // prepare 'io'
+  io->mb_y = 0;
+  io->y = dec->cache_y_;
+  io->u = dec->cache_u_;
+  io->v = dec->cache_v_;
+  io->y_stride = dec->cache_y_stride_;
+  io->uv_stride = dec->cache_uv_stride_;
+  io->a = NULL;
+}
+
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io) {
+  if (!InitThreadContext(dec)) return 0;  // call first. Sets dec->num_caches_.
+  if (!AllocateMemory(dec)) return 0;
+  InitIo(dec, io);
+  VP8DspInit();  // Init critical function pointers and look-up tables.
+  return 1;
+}
+
+//------------------------------------------------------------------------------
--- a/media/libwebp/src/dec/idec_dec.c
+++ b/media/libwebp/src/dec/idec_dec.c
@ -0,0 +1,892 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Incremental decoding
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "src/dec/alphai_dec.h"
+#include "src/dec/webpi_dec.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/utils/utils.h"
+
+// In append mode, buffer allocations increase as multiples of this value.
+// Needs to be a power of 2.
+#define CHUNK_SIZE 4096
+#define MAX_MB_SIZE 4096
+
+//------------------------------------------------------------------------------
+// Data structures for memory and states
+
+// Decoding states. State normally flows as:
+// WEBP_HEADER->VP8_HEADER->VP8_PARTS0->VP8_DATA->DONE for a lossy image, and
+// WEBP_HEADER->VP8L_HEADER->VP8L_DATA->DONE for a lossless image.
+// If there is any error the decoder goes into state ERROR.
+typedef enum {
+  STATE_WEBP_HEADER,  // All the data before that of the VP8/VP8L chunk.
+  STATE_VP8_HEADER,   // The VP8 Frame header (within the VP8 chunk).
+  STATE_VP8_PARTS0,
+  STATE_VP8_DATA,
+  STATE_VP8L_HEADER,
+  STATE_VP8L_DATA,
+  STATE_DONE,
+  STATE_ERROR
+} DecState;
+
+// Operating state for the MemBuffer
+typedef enum {
+  MEM_MODE_NONE = 0,
+  MEM_MODE_APPEND,
+  MEM_MODE_MAP
+} MemBufferMode;
+
+// storage for partition #0 and partial data (in a rolling fashion)
+typedef struct {
+  MemBufferMode mode_;  // Operation mode
+  size_t start_;        // start location of the data to be decoded
+  size_t end_;          // end location
+  size_t buf_size_;     // size of the allocated buffer
+  uint8_t* buf_;        // We don't own this buffer in case WebPIUpdate()
+
+  size_t part0_size_;         // size of partition #0
+  const uint8_t* part0_buf_;  // buffer to store partition #0
+} MemBuffer;
+
+struct WebPIDecoder {
+  DecState state_;         // current decoding state
+  WebPDecParams params_;   // Params to store output info
+  int is_lossless_;        // for down-casting 'dec_'.
+  void* dec_;              // either a VP8Decoder or a VP8LDecoder instance
+  VP8Io io_;
+
+  MemBuffer mem_;          // input memory buffer.
+  WebPDecBuffer output_;   // output buffer (when no external one is supplied,
+                           // or if the external one has slow-memory)
+  WebPDecBuffer* final_output_;  // Slow-memory output to copy to eventually.
+  size_t chunk_size_;      // Compressed VP8/VP8L size extracted from Header.
+
+  int last_mb_y_;          // last row reached for intra-mode decoding
+};
+
+// MB context to restore in case VP8DecodeMB() fails
+typedef struct {
+  VP8MB left_;
+  VP8MB info_;
+  VP8BitReader token_br_;
+} MBContext;
+
+//------------------------------------------------------------------------------
+// MemBuffer: incoming data handling
+
+static WEBP_INLINE size_t MemDataSize(const MemBuffer* mem) {
+  return (mem->end_ - mem->start_);
+}
+
+// Check if we need to preserve the compressed alpha data, as it may not have
+// been decoded yet.
+static int NeedCompressedAlpha(const WebPIDecoder* const idec) {
+  if (idec->state_ == STATE_WEBP_HEADER) {
+    // We haven't parsed the headers yet, so we don't know whether the image is
+    // lossy or lossless. This also means that we haven't parsed the ALPH chunk.
+    return 0;
+  }
+  if (idec->is_lossless_) {
+    return 0;  // ALPH chunk is not present for lossless images.
+  } else {
+    const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+    assert(dec != NULL);  // Must be true as idec->state_ != STATE_WEBP_HEADER.
+    return (dec->alpha_data_ != NULL) && !dec->is_alpha_decoded_;
+  }
+}
+
+static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
+  MemBuffer* const mem = &idec->mem_;
+  const uint8_t* const new_base = mem->buf_ + mem->start_;
+  // note: for VP8, setting up idec->io_ is only really needed at the beginning
+  // of the decoding, till partition #0 is complete.
+  idec->io_.data = new_base;
+  idec->io_.data_size = MemDataSize(mem);
+
+  if (idec->dec_ != NULL) {
+    if (!idec->is_lossless_) {
+      VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+      const uint32_t last_part = dec->num_parts_minus_one_;
+      if (offset != 0) {
+        uint32_t p;
+        for (p = 0; p <= last_part; ++p) {
+          VP8RemapBitReader(dec->parts_ + p, offset);
+        }
+        // Remap partition #0 data pointer to new offset, but only in MAP
+        // mode (in APPEND mode, partition #0 is copied into a fixed memory).
+        if (mem->mode_ == MEM_MODE_MAP) {
+          VP8RemapBitReader(&dec->br_, offset);
+        }
+      }
+      {
+        const uint8_t* const last_start = dec->parts_[last_part].buf_;
+        VP8BitReaderSetBuffer(&dec->parts_[last_part], last_start,
+                              mem->buf_ + mem->end_ - last_start);
+      }
+      if (NeedCompressedAlpha(idec)) {
+        ALPHDecoder* const alph_dec = dec->alph_dec_;
+        dec->alpha_data_ += offset;
+        if (alph_dec != NULL) {
+          if (alph_dec->method_ == ALPHA_LOSSLESS_COMPRESSION) {
+            VP8LDecoder* const alph_vp8l_dec = alph_dec->vp8l_dec_;
+            assert(alph_vp8l_dec != NULL);
+            assert(dec->alpha_data_size_ >= ALPHA_HEADER_LEN);
+            VP8LBitReaderSetBuffer(&alph_vp8l_dec->br_,
+                                   dec->alpha_data_ + ALPHA_HEADER_LEN,
+                                   dec->alpha_data_size_ - ALPHA_HEADER_LEN);
+          } else {  // alph_dec->method_ == ALPHA_NO_COMPRESSION
+            // Nothing special to do in this case.
+          }
+        }
+      }
+    } else {    // Resize lossless bitreader
+      VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+      VP8LBitReaderSetBuffer(&dec->br_, new_base, MemDataSize(mem));
+    }
+  }
+}
+
+// Appends data to the end of MemBuffer->buf_. It expands the allocated memory
+// size if required and also updates VP8BitReader's if new memory is allocated.
+static int AppendToMemBuffer(WebPIDecoder* const idec,
+                             const uint8_t* const data, size_t data_size) {
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+  MemBuffer* const mem = &idec->mem_;
+  const int need_compressed_alpha = NeedCompressedAlpha(idec);
+  const uint8_t* const old_start = mem->buf_ + mem->start_;
+  const uint8_t* const old_base =
+      need_compressed_alpha ? dec->alpha_data_ : old_start;
+  assert(mem->mode_ == MEM_MODE_APPEND);
+  if (data_size > MAX_CHUNK_PAYLOAD) {
+    // security safeguard: trying to allocate more than what the format
+    // allows for a chunk should be considered a smoke smell.
+    return 0;
+  }
+
+  if (mem->end_ + data_size > mem->buf_size_) {  // Need some free memory
+    const size_t new_mem_start = old_start - old_base;
+    const size_t current_size = MemDataSize(mem) + new_mem_start;
+    const uint64_t new_size = (uint64_t)current_size + data_size;
+    const uint64_t extra_size = (new_size + CHUNK_SIZE - 1) & ~(CHUNK_SIZE - 1);
+    uint8_t* const new_buf =
+        (uint8_t*)WebPSafeMalloc(extra_size, sizeof(*new_buf));
+    if (new_buf == NULL) return 0;
+    memcpy(new_buf, old_base, current_size);
+    WebPSafeFree(mem->buf_);
+    mem->buf_ = new_buf;
+    mem->buf_size_ = (size_t)extra_size;
+    mem->start_ = new_mem_start;
+    mem->end_ = current_size;
+  }
+
+  memcpy(mem->buf_ + mem->end_, data, data_size);
+  mem->end_ += data_size;
+  assert(mem->end_ <= mem->buf_size_);
+
+  DoRemap(idec, mem->buf_ + mem->start_ - old_start);
+  return 1;
+}
+
+static int RemapMemBuffer(WebPIDecoder* const idec,
+                          const uint8_t* const data, size_t data_size) {
+  MemBuffer* const mem = &idec->mem_;
+  const uint8_t* const old_buf = mem->buf_;
+  const uint8_t* const old_start = old_buf + mem->start_;
+  assert(mem->mode_ == MEM_MODE_MAP);
+
+  if (data_size < mem->buf_size_) return 0;  // can't remap to a shorter buffer!
+
+  mem->buf_ = (uint8_t*)data;
+  mem->end_ = mem->buf_size_ = data_size;
+
+  DoRemap(idec, mem->buf_ + mem->start_ - old_start);
+  return 1;
+}
+
+static void InitMemBuffer(MemBuffer* const mem) {
+  mem->mode_       = MEM_MODE_NONE;
+  mem->buf_        = NULL;
+  mem->buf_size_   = 0;
+  mem->part0_buf_  = NULL;
+  mem->part0_size_ = 0;
+}
+
+static void ClearMemBuffer(MemBuffer* const mem) {
+  assert(mem);
+  if (mem->mode_ == MEM_MODE_APPEND) {
+    WebPSafeFree(mem->buf_);
+    WebPSafeFree((void*)mem->part0_buf_);
+  }
+}
+
+static int CheckMemBufferMode(MemBuffer* const mem, MemBufferMode expected) {
+  if (mem->mode_ == MEM_MODE_NONE) {
+    mem->mode_ = expected;    // switch to the expected mode
+  } else if (mem->mode_ != expected) {
+    return 0;         // we mixed the modes => error
+  }
+  assert(mem->mode_ == expected);   // mode is ok
+  return 1;
+}
+
+// To be called last.
+static VP8StatusCode FinishDecoding(WebPIDecoder* const idec) {
+  const WebPDecoderOptions* const options = idec->params_.options;
+  WebPDecBuffer* const output = idec->params_.output;
+
+  idec->state_ = STATE_DONE;
+  if (options != NULL && options->flip) {
+    const VP8StatusCode status = WebPFlipBuffer(output);
+    if (status != VP8_STATUS_OK) return status;
+  }
+  if (idec->final_output_ != NULL) {
+    WebPCopyDecBufferPixels(output, idec->final_output_);  // do the slow-copy
+    WebPFreeDecBuffer(&idec->output_);
+    *output = *idec->final_output_;
+    idec->final_output_ = NULL;
+  }
+  return VP8_STATUS_OK;
+}
+
+//------------------------------------------------------------------------------
+// Macroblock-decoding contexts
+
+static void SaveContext(const VP8Decoder* dec, const VP8BitReader* token_br,
+                        MBContext* const context) {
+  context->left_ = dec->mb_info_[-1];
+  context->info_ = dec->mb_info_[dec->mb_x_];
+  context->token_br_ = *token_br;
+}
+
+static void RestoreContext(const MBContext* context, VP8Decoder* const dec,
+                           VP8BitReader* const token_br) {
+  dec->mb_info_[-1] = context->left_;
+  dec->mb_info_[dec->mb_x_] = context->info_;
+  *token_br = context->token_br_;
+}
+
+//------------------------------------------------------------------------------
+
+static VP8StatusCode IDecError(WebPIDecoder* const idec, VP8StatusCode error) {
+  if (idec->state_ == STATE_VP8_DATA) {
+    VP8Io* const io = &idec->io_;
+    if (io->teardown != NULL) {
+      io->teardown(io);
+    }
+  }
+  idec->state_ = STATE_ERROR;
+  return error;
+}
+
+static void ChangeState(WebPIDecoder* const idec, DecState new_state,
+                        size_t consumed_bytes) {
+  MemBuffer* const mem = &idec->mem_;
+  idec->state_ = new_state;
+  mem->start_ += consumed_bytes;
+  assert(mem->start_ <= mem->end_);
+  idec->io_.data = mem->buf_ + mem->start_;
+  idec->io_.data_size = MemDataSize(mem);
+}
+
+// Headers
+static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
+  MemBuffer* const mem = &idec->mem_;
+  const uint8_t* data = mem->buf_ + mem->start_;
+  size_t curr_size = MemDataSize(mem);
+  VP8StatusCode status;
+  WebPHeaderStructure headers;
+
+  headers.data = data;
+  headers.data_size = curr_size;
+  headers.have_all_data = 0;
+  status = WebPParseHeaders(&headers);
+  if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
+    return VP8_STATUS_SUSPENDED;  // We haven't found a VP8 chunk yet.
+  } else if (status != VP8_STATUS_OK) {
+    return IDecError(idec, status);
+  }
+
+  idec->chunk_size_ = headers.compressed_size;
+  idec->is_lossless_ = headers.is_lossless;
+  if (!idec->is_lossless_) {
+    VP8Decoder* const dec = VP8New();
+    if (dec == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    idec->dec_ = dec;
+    dec->alpha_data_ = headers.alpha_data;
+    dec->alpha_data_size_ = headers.alpha_data_size;
+    ChangeState(idec, STATE_VP8_HEADER, headers.offset);
+  } else {
+    VP8LDecoder* const dec = VP8LNew();
+    if (dec == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    idec->dec_ = dec;
+    ChangeState(idec, STATE_VP8L_HEADER, headers.offset);
+  }
+  return VP8_STATUS_OK;
+}
+
+static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
+  const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_;
+  const size_t curr_size = MemDataSize(&idec->mem_);
+  int width, height;
+  uint32_t bits;
+
+  if (curr_size < VP8_FRAME_HEADER_SIZE) {
+    // Not enough data bytes to extract VP8 Frame Header.
+    return VP8_STATUS_SUSPENDED;
+  }
+  if (!VP8GetInfo(data, curr_size, idec->chunk_size_, &width, &height)) {
+    return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+  }
+
+  bits = data[0] | (data[1] << 8) | (data[2] << 16);
+  idec->mem_.part0_size_ = (bits >> 5) + VP8_FRAME_HEADER_SIZE;
+
+  idec->io_.data = data;
+  idec->io_.data_size = curr_size;
+  idec->state_ = STATE_VP8_PARTS0;
+  return VP8_STATUS_OK;
+}
+
+// Partition #0
+static VP8StatusCode CopyParts0Data(WebPIDecoder* const idec) {
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+  VP8BitReader* const br = &dec->br_;
+  const size_t part_size = br->buf_end_ - br->buf_;
+  MemBuffer* const mem = &idec->mem_;
+  assert(!idec->is_lossless_);
+  assert(mem->part0_buf_ == NULL);
+  // the following is a format limitation, no need for runtime check:
+  assert(part_size <= mem->part0_size_);
+  if (part_size == 0) {   // can't have zero-size partition #0
+    return VP8_STATUS_BITSTREAM_ERROR;
+  }
+  if (mem->mode_ == MEM_MODE_APPEND) {
+    // We copy and grab ownership of the partition #0 data.
+    uint8_t* const part0_buf = (uint8_t*)WebPSafeMalloc(1ULL, part_size);
+    if (part0_buf == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    memcpy(part0_buf, br->buf_, part_size);
+    mem->part0_buf_ = part0_buf;
+    VP8BitReaderSetBuffer(br, part0_buf, part_size);
+  } else {
+    // Else: just keep pointers to the partition #0's data in dec_->br_.
+  }
+  mem->start_ += part_size;
+  return VP8_STATUS_OK;
+}
+
+static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+  VP8Io* const io = &idec->io_;
+  const WebPDecParams* const params = &idec->params_;
+  WebPDecBuffer* const output = params->output;
+
+  // Wait till we have enough data for the whole partition #0
+  if (MemDataSize(&idec->mem_) < idec->mem_.part0_size_) {
+    return VP8_STATUS_SUSPENDED;
+  }
+
+  if (!VP8GetHeaders(dec, io)) {
+    const VP8StatusCode status = dec->status_;
+    if (status == VP8_STATUS_SUSPENDED ||
+        status == VP8_STATUS_NOT_ENOUGH_DATA) {
+      // treating NOT_ENOUGH_DATA as SUSPENDED state
+      return VP8_STATUS_SUSPENDED;
+    }
+    return IDecError(idec, status);
+  }
+
+  // Allocate/Verify output buffer now
+  dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options,
+                                       output);
+  if (dec->status_ != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status_);
+  }
+  // This change must be done before calling VP8InitFrame()
+  dec->mt_method_ = VP8GetThreadMethod(params->options, NULL,
+                                       io->width, io->height);
+  VP8InitDithering(params->options, dec);
+
+  dec->status_ = CopyParts0Data(idec);
+  if (dec->status_ != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status_);
+  }
+
+  // Finish setting up the decoding parameters. Will call io->setup().
+  if (VP8EnterCritical(dec, io) != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status_);
+  }
+
+  // Note: past this point, teardown() must always be called
+  // in case of error.
+  idec->state_ = STATE_VP8_DATA;
+  // Allocate memory and prepare everything.
+  if (!VP8InitFrame(dec, io)) {
+    return IDecError(idec, dec->status_);
+  }
+  return VP8_STATUS_OK;
+}
+
+// Remaining partitions
+static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
+  VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
+  VP8Io* const io = &idec->io_;
+
+  assert(dec->ready_);
+  for (; dec->mb_y_ < dec->mb_h_; ++dec->mb_y_) {
+    if (idec->last_mb_y_ != dec->mb_y_) {
+      if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
+        // note: normally, error shouldn't occur since we already have the whole
+        // partition0 available here in DecodeRemaining(). Reaching EOF while
+        // reading intra modes really means a BITSTREAM_ERROR.
+        return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+      }
+      idec->last_mb_y_ = dec->mb_y_;
+    }
+    for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
+      VP8BitReader* const token_br =
+          &dec->parts_[dec->mb_y_ & dec->num_parts_minus_one_];
+      MBContext context;
+      SaveContext(dec, token_br, &context);
+      if (!VP8DecodeMB(dec, token_br)) {
+        // We shouldn't fail when MAX_MB data was available
+        if (dec->num_parts_minus_one_ == 0 &&
+            MemDataSize(&idec->mem_) > MAX_MB_SIZE) {
+          return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
+        }
+        RestoreContext(&context, dec, token_br);
+        return VP8_STATUS_SUSPENDED;
+      }
+      // Release buffer only if there is only one partition
+      if (dec->num_parts_minus_one_ == 0) {
+        idec->mem_.start_ = token_br->buf_ - idec->mem_.buf_;
+        assert(idec->mem_.start_ <= idec->mem_.end_);
+      }
+    }
+    VP8InitScanline(dec);   // Prepare for next scanline
+
+    // Reconstruct, filter and emit the row.
+    if (!VP8ProcessRow(dec, io)) {
+      return IDecError(idec, VP8_STATUS_USER_ABORT);
+    }
+  }
+  // Synchronize the thread and check for errors.
+  if (!VP8ExitCritical(dec, io)) {
+    return IDecError(idec, VP8_STATUS_USER_ABORT);
+  }
+  dec->ready_ = 0;
+  return FinishDecoding(idec);
+}
+
+static VP8StatusCode ErrorStatusLossless(WebPIDecoder* const idec,
+                                         VP8StatusCode status) {
+  if (status == VP8_STATUS_SUSPENDED || status == VP8_STATUS_NOT_ENOUGH_DATA) {
+    return VP8_STATUS_SUSPENDED;
+  }
+  return IDecError(idec, status);
+}
+
+static VP8StatusCode DecodeVP8LHeader(WebPIDecoder* const idec) {
+  VP8Io* const io = &idec->io_;
+  VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+  const WebPDecParams* const params = &idec->params_;
+  WebPDecBuffer* const output = params->output;
+  size_t curr_size = MemDataSize(&idec->mem_);
+  assert(idec->is_lossless_);
+
+  // Wait until there's enough data for decoding header.
+  if (curr_size < (idec->chunk_size_ >> 3)) {
+    dec->status_ = VP8_STATUS_SUSPENDED;
+    return ErrorStatusLossless(idec, dec->status_);
+  }
+
+  if (!VP8LDecodeHeader(dec, io)) {
+    if (dec->status_ == VP8_STATUS_BITSTREAM_ERROR &&
+        curr_size < idec->chunk_size_) {
+      dec->status_ = VP8_STATUS_SUSPENDED;
+    }
+    return ErrorStatusLossless(idec, dec->status_);
+  }
+  // Allocate/verify output buffer now.
+  dec->status_ = WebPAllocateDecBuffer(io->width, io->height, params->options,
+                                       output);
+  if (dec->status_ != VP8_STATUS_OK) {
+    return IDecError(idec, dec->status_);
+  }
+
+  idec->state_ = STATE_VP8L_DATA;
+  return VP8_STATUS_OK;
+}
+
+static VP8StatusCode DecodeVP8LData(WebPIDecoder* const idec) {
+  VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
+  const size_t curr_size = MemDataSize(&idec->mem_);
+  assert(idec->is_lossless_);
+
+  // Switch to incremental decoding if we don't have all the bytes available.
+  dec->incremental_ = (curr_size < idec->chunk_size_);
+
+  if (!VP8LDecodeImage(dec)) {
+    return ErrorStatusLossless(idec, dec->status_);
+  }
+  assert(dec->status_ == VP8_STATUS_OK || dec->status_ == VP8_STATUS_SUSPENDED);
+  return (dec->status_ == VP8_STATUS_SUSPENDED) ? dec->status_
+                                                : FinishDecoding(idec);
+}
+
+  // Main decoding loop
+static VP8StatusCode IDecode(WebPIDecoder* idec) {
+  VP8StatusCode status = VP8_STATUS_SUSPENDED;
+
+  if (idec->state_ == STATE_WEBP_HEADER) {
+    status = DecodeWebPHeaders(idec);
+  } else {
+    if (idec->dec_ == NULL) {
+      return VP8_STATUS_SUSPENDED;    // can't continue if we have no decoder.
+    }
+  }
+  if (idec->state_ == STATE_VP8_HEADER) {
+    status = DecodeVP8FrameHeader(idec);
+  }
+  if (idec->state_ == STATE_VP8_PARTS0) {
+    status = DecodePartition0(idec);
+  }
+  if (idec->state_ == STATE_VP8_DATA) {
+    status = DecodeRemaining(idec);
+  }
+  if (idec->state_ == STATE_VP8L_HEADER) {
+    status = DecodeVP8LHeader(idec);
+  }
+  if (idec->state_ == STATE_VP8L_DATA) {
+    status = DecodeVP8LData(idec);
+  }
+  return status;
+}
+
+//------------------------------------------------------------------------------
+// Internal constructor
+
+static WebPIDecoder* NewDecoder(WebPDecBuffer* const output_buffer,
+                                const WebPBitstreamFeatures* const features) {
+  WebPIDecoder* idec = (WebPIDecoder*)WebPSafeCalloc(1ULL, sizeof(*idec));
+  if (idec == NULL) {
+    return NULL;
+  }
+
+  idec->state_ = STATE_WEBP_HEADER;
+  idec->chunk_size_ = 0;
+
+  idec->last_mb_y_ = -1;
+
+  InitMemBuffer(&idec->mem_);
+  WebPInitDecBuffer(&idec->output_);
+  VP8InitIo(&idec->io_);
+
+  WebPResetDecParams(&idec->params_);
+  if (output_buffer == NULL || WebPAvoidSlowMemory(output_buffer, features)) {
+    idec->params_.output = &idec->output_;
+    idec->final_output_ = output_buffer;
+    if (output_buffer != NULL) {
+      idec->params_.output->colorspace = output_buffer->colorspace;
+    }
+  } else {
+    idec->params_.output = output_buffer;
+    idec->final_output_ = NULL;
+  }
+  WebPInitCustomIo(&idec->params_, &idec->io_);  // Plug the I/O functions.
+
+  return idec;
+}
+
+//------------------------------------------------------------------------------
+// Public functions
+
+WebPIDecoder* WebPINewDecoder(WebPDecBuffer* output_buffer) {
+  return NewDecoder(output_buffer, NULL);
+}
+
+WebPIDecoder* WebPIDecode(const uint8_t* data, size_t data_size,
+                          WebPDecoderConfig* config) {
+  WebPIDecoder* idec;
+  WebPBitstreamFeatures tmp_features;
+  WebPBitstreamFeatures* const features =
+      (config == NULL) ? &tmp_features : &config->input;
+  memset(&tmp_features, 0, sizeof(tmp_features));
+
+  // Parse the bitstream's features, if requested:
+  if (data != NULL && data_size > 0) {
+    if (WebPGetFeatures(data, data_size, features) != VP8_STATUS_OK) {
+      return NULL;
+    }
+  }
+
+  // Create an instance of the incremental decoder
+  idec = (config != NULL) ? NewDecoder(&config->output, features)
+                          : NewDecoder(NULL, features);
+  if (idec == NULL) {
+    return NULL;
+  }
+  // Finish initialization
+  if (config != NULL) {
+    idec->params_.options = &config->options;
+  }
+  return idec;
+}
+
+void WebPIDelete(WebPIDecoder* idec) {
+  if (idec == NULL) return;
+  if (idec->dec_ != NULL) {
+    if (!idec->is_lossless_) {
+      if (idec->state_ == STATE_VP8_DATA) {
+        // Synchronize the thread, clean-up and check for errors.
+        VP8ExitCritical((VP8Decoder*)idec->dec_, &idec->io_);
+      }
+      VP8Delete((VP8Decoder*)idec->dec_);
+    } else {
+      VP8LDelete((VP8LDecoder*)idec->dec_);
+    }
+  }
+  ClearMemBuffer(&idec->mem_);
+  WebPFreeDecBuffer(&idec->output_);
+  WebPSafeFree(idec);
+}
+
+//------------------------------------------------------------------------------
+// Wrapper toward WebPINewDecoder
+
+WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE csp, uint8_t* output_buffer,
+                          size_t output_buffer_size, int output_stride) {
+  const int is_external_memory = (output_buffer != NULL) ? 1 : 0;
+  WebPIDecoder* idec;
+
+  if (csp >= MODE_YUV) return NULL;
+  if (is_external_memory == 0) {    // Overwrite parameters to sane values.
+    output_buffer_size = 0;
+    output_stride = 0;
+  } else {  // A buffer was passed. Validate the other params.
+    if (output_stride == 0 || output_buffer_size == 0) {
+      return NULL;   // invalid parameter.
+    }
+  }
+  idec = WebPINewDecoder(NULL);
+  if (idec == NULL) return NULL;
+  idec->output_.colorspace = csp;
+  idec->output_.is_external_memory = is_external_memory;
+  idec->output_.u.RGBA.rgba = output_buffer;
+  idec->output_.u.RGBA.stride = output_stride;
+  idec->output_.u.RGBA.size = output_buffer_size;
+  return idec;
+}
+
+WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
+                           uint8_t* u, size_t u_size, int u_stride,
+                           uint8_t* v, size_t v_size, int v_stride,
+                           uint8_t* a, size_t a_size, int a_stride) {
+  const int is_external_memory = (luma != NULL) ? 1 : 0;
+  WebPIDecoder* idec;
+  WEBP_CSP_MODE colorspace;
+
+  if (is_external_memory == 0) {    // Overwrite parameters to sane values.
+    luma_size = u_size = v_size = a_size = 0;
+    luma_stride = u_stride = v_stride = a_stride = 0;
+    u = v = a = NULL;
+    colorspace = MODE_YUVA;
+  } else {  // A luma buffer was passed. Validate the other parameters.
+    if (u == NULL || v == NULL) return NULL;
+    if (luma_size == 0 || u_size == 0 || v_size == 0) return NULL;
+    if (luma_stride == 0 || u_stride == 0 || v_stride == 0) return NULL;
+    if (a != NULL) {
+      if (a_size == 0 || a_stride == 0) return NULL;
+    }
+    colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
+  }
+
+  idec = WebPINewDecoder(NULL);
+  if (idec == NULL) return NULL;
+
+  idec->output_.colorspace = colorspace;
+  idec->output_.is_external_memory = is_external_memory;
+  idec->output_.u.YUVA.y = luma;
+  idec->output_.u.YUVA.y_stride = luma_stride;
+  idec->output_.u.YUVA.y_size = luma_size;
+  idec->output_.u.YUVA.u = u;
+  idec->output_.u.YUVA.u_stride = u_stride;
+  idec->output_.u.YUVA.u_size = u_size;
+  idec->output_.u.YUVA.v = v;
+  idec->output_.u.YUVA.v_stride = v_stride;
+  idec->output_.u.YUVA.v_size = v_size;
+  idec->output_.u.YUVA.a = a;
+  idec->output_.u.YUVA.a_stride = a_stride;
+  idec->output_.u.YUVA.a_size = a_size;
+  return idec;
+}
+
+WebPIDecoder* WebPINewYUV(uint8_t* luma, size_t luma_size, int luma_stride,
+                          uint8_t* u, size_t u_size, int u_stride,
+                          uint8_t* v, size_t v_size, int v_stride) {
+  return WebPINewYUVA(luma, luma_size, luma_stride,
+                      u, u_size, u_stride,
+                      v, v_size, v_stride,
+                      NULL, 0, 0);
+}
+
+//------------------------------------------------------------------------------
+
+static VP8StatusCode IDecCheckStatus(const WebPIDecoder* const idec) {
+  assert(idec);
+  if (idec->state_ == STATE_ERROR) {
+    return VP8_STATUS_BITSTREAM_ERROR;
+  }
+  if (idec->state_ == STATE_DONE) {
+    return VP8_STATUS_OK;
+  }
+  return VP8_STATUS_SUSPENDED;
+}
+
+VP8StatusCode WebPIAppend(WebPIDecoder* idec,
+                          const uint8_t* data, size_t data_size) {
+  VP8StatusCode status;
+  if (idec == NULL || data == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  status = IDecCheckStatus(idec);
+  if (status != VP8_STATUS_SUSPENDED) {
+    return status;
+  }
+  // Check mixed calls between RemapMemBuffer and AppendToMemBuffer.
+  if (!CheckMemBufferMode(&idec->mem_, MEM_MODE_APPEND)) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  // Append data to memory buffer
+  if (!AppendToMemBuffer(idec, data, data_size)) {
+    return VP8_STATUS_OUT_OF_MEMORY;
+  }
+  return IDecode(idec);
+}
+
+VP8StatusCode WebPIUpdate(WebPIDecoder* idec,
+                          const uint8_t* data, size_t data_size) {
+  VP8StatusCode status;
+  if (idec == NULL || data == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  status = IDecCheckStatus(idec);
+  if (status != VP8_STATUS_SUSPENDED) {
+    return status;
+  }
+  // Check mixed calls between RemapMemBuffer and AppendToMemBuffer.
+  if (!CheckMemBufferMode(&idec->mem_, MEM_MODE_MAP)) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  // Make the memory buffer point to the new buffer
+  if (!RemapMemBuffer(idec, data, data_size)) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  return IDecode(idec);
+}
+
+//------------------------------------------------------------------------------
+
+static const WebPDecBuffer* GetOutputBuffer(const WebPIDecoder* const idec) {
+  if (idec == NULL || idec->dec_ == NULL) {
+    return NULL;
+  }
+  if (idec->state_ <= STATE_VP8_PARTS0) {
+    return NULL;
+  }
+  if (idec->final_output_ != NULL) {
+    return NULL;   // not yet slow-copied
+  }
+  return idec->params_.output;
+}
+
+const WebPDecBuffer* WebPIDecodedArea(const WebPIDecoder* idec,
+                                      int* left, int* top,
+                                      int* width, int* height) {
+  const WebPDecBuffer* const src = GetOutputBuffer(idec);
+  if (left != NULL) *left = 0;
+  if (top != NULL) *top = 0;
+  if (src != NULL) {
+    if (width != NULL) *width = src->width;
+    if (height != NULL) *height = idec->params_.last_y;
+  } else {
+    if (width != NULL) *width = 0;
+    if (height != NULL) *height = 0;
+  }
+  return src;
+}
+
+uint8_t* WebPIDecGetRGB(const WebPIDecoder* idec, int* last_y,
+                        int* width, int* height, int* stride) {
+  const WebPDecBuffer* const src = GetOutputBuffer(idec);
+  if (src == NULL) return NULL;
+  if (src->colorspace >= MODE_YUV) {
+    return NULL;
+  }
+
+  if (last_y != NULL) *last_y = idec->params_.last_y;
+  if (width != NULL) *width = src->width;
+  if (height != NULL) *height = src->height;
+  if (stride != NULL) *stride = src->u.RGBA.stride;
+
+  return src->u.RGBA.rgba;
+}
+
+uint8_t* WebPIDecGetYUVA(const WebPIDecoder* idec, int* last_y,
+                         uint8_t** u, uint8_t** v, uint8_t** a,
+                         int* width, int* height,
+                         int* stride, int* uv_stride, int* a_stride) {
+  const WebPDecBuffer* const src = GetOutputBuffer(idec);
+  if (src == NULL) return NULL;
+  if (src->colorspace < MODE_YUV) {
+    return NULL;
+  }
+
+  if (last_y != NULL) *last_y = idec->params_.last_y;
+  if (u != NULL) *u = src->u.YUVA.u;
+  if (v != NULL) *v = src->u.YUVA.v;
+  if (a != NULL) *a = src->u.YUVA.a;
+  if (width != NULL) *width = src->width;
+  if (height != NULL) *height = src->height;
+  if (stride != NULL) *stride = src->u.YUVA.y_stride;
+  if (uv_stride != NULL) *uv_stride = src->u.YUVA.u_stride;
+  if (a_stride != NULL) *a_stride = src->u.YUVA.a_stride;
+
+  return src->u.YUVA.y;
+}
+
+int WebPISetIOHooks(WebPIDecoder* const idec,
+                    VP8IoPutHook put,
+                    VP8IoSetupHook setup,
+                    VP8IoTeardownHook teardown,
+                    void* user_data) {
+  if (idec == NULL || idec->state_ > STATE_WEBP_HEADER) {
+    return 0;
+  }
+
+  idec->io_.put = put;
+  idec->io_.setup = setup;
+  idec->io_.teardown = teardown;
+  idec->io_.opaque = user_data;
+
+  return 1;
+}
--- a/media/libwebp/src/dec/io_dec.c
+++ b/media/libwebp/src/dec/io_dec.c
@ -0,0 +1,649 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// functions for sample output.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include <stdlib.h>
+#include "src/dec/vp8i_dec.h"
+#include "src/dec/webpi_dec.h"
+#include "src/dsp/dsp.h"
+#include "src/dsp/yuv.h"
+#include "src/utils/utils.h"
+
+//------------------------------------------------------------------------------
+// Main YUV<->RGB conversion functions
+
+static int EmitYUV(const VP8Io* const io, WebPDecParams* const p) {
+  WebPDecBuffer* output = p->output;
+  const WebPYUVABuffer* const buf = &output->u.YUVA;
+  uint8_t* const y_dst = buf->y + io->mb_y * buf->y_stride;
+  uint8_t* const u_dst = buf->u + (io->mb_y >> 1) * buf->u_stride;
+  uint8_t* const v_dst = buf->v + (io->mb_y >> 1) * buf->v_stride;
+  const int mb_w = io->mb_w;
+  const int mb_h = io->mb_h;
+  const int uv_w = (mb_w + 1) / 2;
+  const int uv_h = (mb_h + 1) / 2;
+  int j;
+  for (j = 0; j < mb_h; ++j) {
+    memcpy(y_dst + j * buf->y_stride, io->y + j * io->y_stride, mb_w);
+  }
+  for (j = 0; j < uv_h; ++j) {
+    memcpy(u_dst + j * buf->u_stride, io->u + j * io->uv_stride, uv_w);
+    memcpy(v_dst + j * buf->v_stride, io->v + j * io->uv_stride, uv_w);
+  }
+  return io->mb_h;
+}
+
+// Point-sampling U/V sampler.
+static int EmitSampledRGB(const VP8Io* const io, WebPDecParams* const p) {
+  WebPDecBuffer* const output = p->output;
+  WebPRGBABuffer* const buf = &output->u.RGBA;
+  uint8_t* const dst = buf->rgba + io->mb_y * buf->stride;
+  WebPSamplerProcessPlane(io->y, io->y_stride,
+                          io->u, io->v, io->uv_stride,
+                          dst, buf->stride, io->mb_w, io->mb_h,
+                          WebPSamplers[output->colorspace]);
+  return io->mb_h;
+}
+
+//------------------------------------------------------------------------------
+// Fancy upsampling
+
+#ifdef FANCY_UPSAMPLING
+static int EmitFancyRGB(const VP8Io* const io, WebPDecParams* const p) {
+  int num_lines_out = io->mb_h;   // a priori guess
+  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+  uint8_t* dst = buf->rgba + io->mb_y * buf->stride;
+  WebPUpsampleLinePairFunc upsample = WebPUpsamplers[p->output->colorspace];
+  const uint8_t* cur_y = io->y;
+  const uint8_t* cur_u = io->u;
+  const uint8_t* cur_v = io->v;
+  const uint8_t* top_u = p->tmp_u;
+  const uint8_t* top_v = p->tmp_v;
+  int y = io->mb_y;
+  const int y_end = io->mb_y + io->mb_h;
+  const int mb_w = io->mb_w;
+  const int uv_w = (mb_w + 1) / 2;
+
+  if (y == 0) {
+    // First line is special cased. We mirror the u/v samples at boundary.
+    upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v, dst, NULL, mb_w);
+  } else {
+    // We can finish the left-over line from previous call.
+    upsample(p->tmp_y, cur_y, top_u, top_v, cur_u, cur_v,
+             dst - buf->stride, dst, mb_w);
+    ++num_lines_out;
+  }
+  // Loop over each output pairs of row.
+  for (; y + 2 < y_end; y += 2) {
+    top_u = cur_u;
+    top_v = cur_v;
+    cur_u += io->uv_stride;
+    cur_v += io->uv_stride;
+    dst += 2 * buf->stride;
+    cur_y += 2 * io->y_stride;
+    upsample(cur_y - io->y_stride, cur_y,
+             top_u, top_v, cur_u, cur_v,
+             dst - buf->stride, dst, mb_w);
+  }
+  // move to last row
+  cur_y += io->y_stride;
+  if (io->crop_top + y_end < io->crop_bottom) {
+    // Save the unfinished samples for next call (as we're not done yet).
+    memcpy(p->tmp_y, cur_y, mb_w * sizeof(*p->tmp_y));
+    memcpy(p->tmp_u, cur_u, uv_w * sizeof(*p->tmp_u));
+    memcpy(p->tmp_v, cur_v, uv_w * sizeof(*p->tmp_v));
+    // The fancy upsampler leaves a row unfinished behind
+    // (except for the very last row)
+    num_lines_out--;
+  } else {
+    // Process the very last row of even-sized picture
+    if (!(y_end & 1)) {
+      upsample(cur_y, NULL, cur_u, cur_v, cur_u, cur_v,
+               dst + buf->stride, NULL, mb_w);
+    }
+  }
+  return num_lines_out;
+}
+
+#endif    /* FANCY_UPSAMPLING */
+
+//------------------------------------------------------------------------------
+
+static void FillAlphaPlane(uint8_t* dst, int w, int h, int stride) {
+  int j;
+  for (j = 0; j < h; ++j) {
+    memset(dst, 0xff, w * sizeof(*dst));
+    dst += stride;
+  }
+}
+
+static int EmitAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
+                        int expected_num_lines_out) {
+  const uint8_t* alpha = io->a;
+  const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+  const int mb_w = io->mb_w;
+  const int mb_h = io->mb_h;
+  uint8_t* dst = buf->a + io->mb_y * buf->a_stride;
+  int j;
+  (void)expected_num_lines_out;
+  assert(expected_num_lines_out == mb_h);
+  if (alpha != NULL) {
+    for (j = 0; j < mb_h; ++j) {
+      memcpy(dst, alpha, mb_w * sizeof(*dst));
+      alpha += io->width;
+      dst += buf->a_stride;
+    }
+  } else if (buf->a != NULL) {
+    // the user requested alpha, but there is none, set it to opaque.
+    FillAlphaPlane(dst, mb_w, mb_h, buf->a_stride);
+  }
+  return 0;
+}
+
+static int GetAlphaSourceRow(const VP8Io* const io,
+                             const uint8_t** alpha, int* const num_rows) {
+  int start_y = io->mb_y;
+  *num_rows = io->mb_h;
+
+  // Compensate for the 1-line delay of the fancy upscaler.
+  // This is similar to EmitFancyRGB().
+  if (io->fancy_upsampling) {
+    if (start_y == 0) {
+      // We don't process the last row yet. It'll be done during the next call.
+      --*num_rows;
+    } else {
+      --start_y;
+      // Fortunately, *alpha data is persistent, so we can go back
+      // one row and finish alpha blending, now that the fancy upscaler
+      // completed the YUV->RGB interpolation.
+      *alpha -= io->width;
+    }
+    if (io->crop_top + io->mb_y + io->mb_h == io->crop_bottom) {
+      // If it's the very last call, we process all the remaining rows!
+      *num_rows = io->crop_bottom - io->crop_top - start_y;
+    }
+  }
+  return start_y;
+}
+
+static int EmitAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
+                        int expected_num_lines_out) {
+  const uint8_t* alpha = io->a;
+  if (alpha != NULL) {
+    const int mb_w = io->mb_w;
+    const WEBP_CSP_MODE colorspace = p->output->colorspace;
+    const int alpha_first =
+        (colorspace == MODE_ARGB || colorspace == MODE_Argb);
+    const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+    int num_rows;
+    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+    uint8_t* const dst = base_rgba + (alpha_first ? 0 : 3);
+    const int has_alpha = WebPDispatchAlpha(alpha, io->width, mb_w,
+                                            num_rows, dst, buf->stride);
+    (void)expected_num_lines_out;
+    assert(expected_num_lines_out == num_rows);
+    // has_alpha is true if there's non-trivial alpha to premultiply with.
+    if (has_alpha && WebPIsPremultipliedMode(colorspace)) {
+      WebPApplyAlphaMultiply(base_rgba, alpha_first,
+                             mb_w, num_rows, buf->stride);
+    }
+  }
+  return 0;
+}
+
+static int EmitAlphaRGBA4444(const VP8Io* const io, WebPDecParams* const p,
+                             int expected_num_lines_out) {
+  const uint8_t* alpha = io->a;
+  if (alpha != NULL) {
+    const int mb_w = io->mb_w;
+    const WEBP_CSP_MODE colorspace = p->output->colorspace;
+    const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+    int num_rows;
+    const int start_y = GetAlphaSourceRow(io, &alpha, &num_rows);
+    uint8_t* const base_rgba = buf->rgba + start_y * buf->stride;
+#if (WEBP_SWAP_16BIT_CSP == 1)
+    uint8_t* alpha_dst = base_rgba;
+#else
+    uint8_t* alpha_dst = base_rgba + 1;
+#endif
+    uint32_t alpha_mask = 0x0f;
+    int i, j;
+    for (j = 0; j < num_rows; ++j) {
+      for (i = 0; i < mb_w; ++i) {
+        // Fill in the alpha value (converted to 4 bits).
+        const uint32_t alpha_value = alpha[i] >> 4;
+        alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+        alpha_mask &= alpha_value;
+      }
+      alpha += io->width;
+      alpha_dst += buf->stride;
+    }
+    (void)expected_num_lines_out;
+    assert(expected_num_lines_out == num_rows);
+    if (alpha_mask != 0x0f && WebPIsPremultipliedMode(colorspace)) {
+      WebPApplyAlphaMultiply4444(base_rgba, mb_w, num_rows, buf->stride);
+    }
+  }
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+// YUV rescaling (no final RGB conversion needed)
+
+#if !defined(WEBP_REDUCE_SIZE)
+static int Rescale(const uint8_t* src, int src_stride,
+                   int new_lines, WebPRescaler* const wrk) {
+  int num_lines_out = 0;
+  while (new_lines > 0) {    // import new contributions of source rows.
+    const int lines_in = WebPRescalerImport(wrk, new_lines, src, src_stride);
+    src += lines_in * src_stride;
+    new_lines -= lines_in;
+    num_lines_out += WebPRescalerExport(wrk);    // emit output row(s)
+  }
+  return num_lines_out;
+}
+
+static int EmitRescaledYUV(const VP8Io* const io, WebPDecParams* const p) {
+  const int mb_h = io->mb_h;
+  const int uv_mb_h = (mb_h + 1) >> 1;
+  WebPRescaler* const scaler = p->scaler_y;
+  int num_lines_out = 0;
+  if (WebPIsAlphaMode(p->output->colorspace) && io->a != NULL) {
+    // Before rescaling, we premultiply the luma directly into the io->y
+    // internal buffer. This is OK since these samples are not used for
+    // intra-prediction (the top samples are saved in cache_y_/u_/v_).
+    // But we need to cast the const away, though.
+    WebPMultRows((uint8_t*)io->y, io->y_stride,
+                 io->a, io->width, io->mb_w, mb_h, 0);
+  }
+  num_lines_out = Rescale(io->y, io->y_stride, mb_h, scaler);
+  Rescale(io->u, io->uv_stride, uv_mb_h, p->scaler_u);
+  Rescale(io->v, io->uv_stride, uv_mb_h, p->scaler_v);
+  return num_lines_out;
+}
+
+static int EmitRescaledAlphaYUV(const VP8Io* const io, WebPDecParams* const p,
+                                int expected_num_lines_out) {
+  const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+  uint8_t* const dst_a = buf->a + p->last_y * buf->a_stride;
+  if (io->a != NULL) {
+    uint8_t* const dst_y = buf->y + p->last_y * buf->y_stride;
+    const int num_lines_out = Rescale(io->a, io->width, io->mb_h, p->scaler_a);
+    assert(expected_num_lines_out == num_lines_out);
+    if (num_lines_out > 0) {   // unmultiply the Y
+      WebPMultRows(dst_y, buf->y_stride, dst_a, buf->a_stride,
+                   p->scaler_a->dst_width, num_lines_out, 1);
+    }
+  } else if (buf->a != NULL) {
+    // the user requested alpha, but there is none, set it to opaque.
+    assert(p->last_y + expected_num_lines_out <= io->scaled_height);
+    FillAlphaPlane(dst_a, io->scaled_width, expected_num_lines_out,
+                   buf->a_stride);
+  }
+  return 0;
+}
+
+static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) {
+  const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
+  const WebPYUVABuffer* const buf = &p->output->u.YUVA;
+  const int out_width  = io->scaled_width;
+  const int out_height = io->scaled_height;
+  const int uv_out_width  = (out_width + 1) >> 1;
+  const int uv_out_height = (out_height + 1) >> 1;
+  const int uv_in_width  = (io->mb_w + 1) >> 1;
+  const int uv_in_height = (io->mb_h + 1) >> 1;
+  const size_t work_size = 2 * out_width;   // scratch memory for luma rescaler
+  const size_t uv_work_size = 2 * uv_out_width;  // and for each u/v ones
+  size_t tmp_size, rescaler_size;
+  rescaler_t* work;
+  WebPRescaler* scalers;
+  const int num_rescalers = has_alpha ? 4 : 3;
+
+  tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work);
+  if (has_alpha) {
+    tmp_size += work_size * sizeof(*work);
+  }
+  rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST;
+
+  p->memory = WebPSafeMalloc(1ULL, tmp_size + rescaler_size);
+  if (p->memory == NULL) {
+    return 0;   // memory error
+  }
+  work = (rescaler_t*)p->memory;
+
+  scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + tmp_size);
+  p->scaler_y = &scalers[0];
+  p->scaler_u = &scalers[1];
+  p->scaler_v = &scalers[2];
+  p->scaler_a = has_alpha ? &scalers[3] : NULL;
+
+  WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
+                   buf->y, out_width, out_height, buf->y_stride, 1,
+                   work);
+  WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
+                   buf->u, uv_out_width, uv_out_height, buf->u_stride, 1,
+                   work + work_size);
+  WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
+                   buf->v, uv_out_width, uv_out_height, buf->v_stride, 1,
+                   work + work_size + uv_work_size);
+  p->emit = EmitRescaledYUV;
+
+  if (has_alpha) {
+    WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
+                     buf->a, out_width, out_height, buf->a_stride, 1,
+                     work + work_size + 2 * uv_work_size);
+    p->emit_alpha = EmitRescaledAlphaYUV;
+    WebPInitAlphaProcessing();
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// RGBA rescaling
+
+static int ExportRGB(WebPDecParams* const p, int y_pos) {
+  const WebPYUV444Converter convert =
+      WebPYUV444Converters[p->output->colorspace];
+  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+  uint8_t* dst = buf->rgba + y_pos * buf->stride;
+  int num_lines_out = 0;
+  // For RGB rescaling, because of the YUV420, current scan position
+  // U/V can be +1/-1 line from the Y one.  Hence the double test.
+  while (WebPRescalerHasPendingOutput(p->scaler_y) &&
+         WebPRescalerHasPendingOutput(p->scaler_u)) {
+    assert(y_pos + num_lines_out < p->output->height);
+    assert(p->scaler_u->y_accum == p->scaler_v->y_accum);
+    WebPRescalerExportRow(p->scaler_y);
+    WebPRescalerExportRow(p->scaler_u);
+    WebPRescalerExportRow(p->scaler_v);
+    convert(p->scaler_y->dst, p->scaler_u->dst, p->scaler_v->dst,
+            dst, p->scaler_y->dst_width);
+    dst += buf->stride;
+    ++num_lines_out;
+  }
+  return num_lines_out;
+}
+
+static int EmitRescaledRGB(const VP8Io* const io, WebPDecParams* const p) {
+  const int mb_h = io->mb_h;
+  const int uv_mb_h = (mb_h + 1) >> 1;
+  int j = 0, uv_j = 0;
+  int num_lines_out = 0;
+  while (j < mb_h) {
+    const int y_lines_in =
+        WebPRescalerImport(p->scaler_y, mb_h - j,
+                           io->y + j * io->y_stride, io->y_stride);
+    j += y_lines_in;
+    if (WebPRescaleNeededLines(p->scaler_u, uv_mb_h - uv_j)) {
+      const int u_lines_in =
+          WebPRescalerImport(p->scaler_u, uv_mb_h - uv_j,
+                             io->u + uv_j * io->uv_stride, io->uv_stride);
+      const int v_lines_in =
+          WebPRescalerImport(p->scaler_v, uv_mb_h - uv_j,
+                             io->v + uv_j * io->uv_stride, io->uv_stride);
+      (void)v_lines_in;   // remove a gcc warning
+      assert(u_lines_in == v_lines_in);
+      uv_j += u_lines_in;
+    }
+    num_lines_out += ExportRGB(p, p->last_y + num_lines_out);
+  }
+  return num_lines_out;
+}
+
+static int ExportAlpha(WebPDecParams* const p, int y_pos, int max_lines_out) {
+  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+  uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
+  const WEBP_CSP_MODE colorspace = p->output->colorspace;
+  const int alpha_first =
+      (colorspace == MODE_ARGB || colorspace == MODE_Argb);
+  uint8_t* dst = base_rgba + (alpha_first ? 0 : 3);
+  int num_lines_out = 0;
+  const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
+  uint32_t non_opaque = 0;
+  const int width = p->scaler_a->dst_width;
+
+  while (WebPRescalerHasPendingOutput(p->scaler_a) &&
+         num_lines_out < max_lines_out) {
+    assert(y_pos + num_lines_out < p->output->height);
+    WebPRescalerExportRow(p->scaler_a);
+    non_opaque |= WebPDispatchAlpha(p->scaler_a->dst, 0, width, 1, dst, 0);
+    dst += buf->stride;
+    ++num_lines_out;
+  }
+  if (is_premult_alpha && non_opaque) {
+    WebPApplyAlphaMultiply(base_rgba, alpha_first,
+                           width, num_lines_out, buf->stride);
+  }
+  return num_lines_out;
+}
+
+static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos,
+                               int max_lines_out) {
+  const WebPRGBABuffer* const buf = &p->output->u.RGBA;
+  uint8_t* const base_rgba = buf->rgba + y_pos * buf->stride;
+#if (WEBP_SWAP_16BIT_CSP == 1)
+  uint8_t* alpha_dst = base_rgba;
+#else
+  uint8_t* alpha_dst = base_rgba + 1;
+#endif
+  int num_lines_out = 0;
+  const WEBP_CSP_MODE colorspace = p->output->colorspace;
+  const int width = p->scaler_a->dst_width;
+  const int is_premult_alpha = WebPIsPremultipliedMode(colorspace);
+  uint32_t alpha_mask = 0x0f;
+
+  while (WebPRescalerHasPendingOutput(p->scaler_a) &&
+         num_lines_out < max_lines_out) {
+    int i;
+    assert(y_pos + num_lines_out < p->output->height);
+    WebPRescalerExportRow(p->scaler_a);
+    for (i = 0; i < width; ++i) {
+      // Fill in the alpha value (converted to 4 bits).
+      const uint32_t alpha_value = p->scaler_a->dst[i] >> 4;
+      alpha_dst[2 * i] = (alpha_dst[2 * i] & 0xf0) | alpha_value;
+      alpha_mask &= alpha_value;
+    }
+    alpha_dst += buf->stride;
+    ++num_lines_out;
+  }
+  if (is_premult_alpha && alpha_mask != 0x0f) {
+    WebPApplyAlphaMultiply4444(base_rgba, width, num_lines_out, buf->stride);
+  }
+  return num_lines_out;
+}
+
+static int EmitRescaledAlphaRGB(const VP8Io* const io, WebPDecParams* const p,
+                                int expected_num_out_lines) {
+  if (io->a != NULL) {
+    WebPRescaler* const scaler = p->scaler_a;
+    int lines_left = expected_num_out_lines;
+    const int y_end = p->last_y + lines_left;
+    while (lines_left > 0) {
+      const int row_offset = scaler->src_y - io->mb_y;
+      WebPRescalerImport(scaler, io->mb_h + io->mb_y - scaler->src_y,
+                         io->a + row_offset * io->width, io->width);
+      lines_left -= p->emit_alpha_row(p, y_end - lines_left, lines_left);
+    }
+  }
+  return 0;
+}
+
+static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) {
+  const int has_alpha = WebPIsAlphaMode(p->output->colorspace);
+  const int out_width  = io->scaled_width;
+  const int out_height = io->scaled_height;
+  const int uv_in_width  = (io->mb_w + 1) >> 1;
+  const int uv_in_height = (io->mb_h + 1) >> 1;
+  const size_t work_size = 2 * out_width;   // scratch memory for one rescaler
+  rescaler_t* work;  // rescalers work area
+  uint8_t* tmp;   // tmp storage for scaled YUV444 samples before RGB conversion
+  size_t tmp_size1, tmp_size2, total_size, rescaler_size;
+  WebPRescaler* scalers;
+  const int num_rescalers = has_alpha ? 4 : 3;
+
+  tmp_size1 = 3 * work_size;
+  tmp_size2 = 3 * out_width;
+  if (has_alpha) {
+    tmp_size1 += work_size;
+    tmp_size2 += out_width;
+  }
+  total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp);
+  rescaler_size = num_rescalers * sizeof(*p->scaler_y) + WEBP_ALIGN_CST;
+
+  p->memory = WebPSafeMalloc(1ULL, total_size + rescaler_size);
+  if (p->memory == NULL) {
+    return 0;   // memory error
+  }
+  work = (rescaler_t*)p->memory;
+  tmp = (uint8_t*)(work + tmp_size1);
+
+  scalers = (WebPRescaler*)WEBP_ALIGN((const uint8_t*)work + total_size);
+  p->scaler_y = &scalers[0];
+  p->scaler_u = &scalers[1];
+  p->scaler_v = &scalers[2];
+  p->scaler_a = has_alpha ? &scalers[3] : NULL;
+
+  WebPRescalerInit(p->scaler_y, io->mb_w, io->mb_h,
+                   tmp + 0 * out_width, out_width, out_height, 0, 1,
+                   work + 0 * work_size);
+  WebPRescalerInit(p->scaler_u, uv_in_width, uv_in_height,
+                   tmp + 1 * out_width, out_width, out_height, 0, 1,
+                   work + 1 * work_size);
+  WebPRescalerInit(p->scaler_v, uv_in_width, uv_in_height,
+                   tmp + 2 * out_width, out_width, out_height, 0, 1,
+                   work + 2 * work_size);
+  p->emit = EmitRescaledRGB;
+  WebPInitYUV444Converters();
+
+  if (has_alpha) {
+    WebPRescalerInit(p->scaler_a, io->mb_w, io->mb_h,
+                     tmp + 3 * out_width, out_width, out_height, 0, 1,
+                     work + 3 * work_size);
+    p->emit_alpha = EmitRescaledAlphaRGB;
+    if (p->output->colorspace == MODE_RGBA_4444 ||
+        p->output->colorspace == MODE_rgbA_4444) {
+      p->emit_alpha_row = ExportAlphaRGBA4444;
+    } else {
+      p->emit_alpha_row = ExportAlpha;
+    }
+    WebPInitAlphaProcessing();
+  }
+  return 1;
+}
+
+#endif  // WEBP_REDUCE_SIZE
+
+//------------------------------------------------------------------------------
+// Default custom functions
+
+static int CustomSetup(VP8Io* io) {
+  WebPDecParams* const p = (WebPDecParams*)io->opaque;
+  const WEBP_CSP_MODE colorspace = p->output->colorspace;
+  const int is_rgb = WebPIsRGBMode(colorspace);
+  const int is_alpha = WebPIsAlphaMode(colorspace);
+
+  p->memory = NULL;
+  p->emit = NULL;
+  p->emit_alpha = NULL;
+  p->emit_alpha_row = NULL;
+  if (!WebPIoInitFromOptions(p->options, io, is_alpha ? MODE_YUV : MODE_YUVA)) {
+    return 0;
+  }
+  if (is_alpha && WebPIsPremultipliedMode(colorspace)) {
+    WebPInitUpsamplers();
+  }
+  if (io->use_scaling) {
+#if !defined(WEBP_REDUCE_SIZE)
+    const int ok = is_rgb ? InitRGBRescaler(io, p) : InitYUVRescaler(io, p);
+    if (!ok) {
+      return 0;    // memory error
+    }
+#else
+    return 0;   // rescaling support not compiled
+#endif
+  } else {
+    if (is_rgb) {
+      WebPInitSamplers();
+      p->emit = EmitSampledRGB;   // default
+      if (io->fancy_upsampling) {
+#ifdef FANCY_UPSAMPLING
+        const int uv_width = (io->mb_w + 1) >> 1;
+        p->memory = WebPSafeMalloc(1ULL, (size_t)(io->mb_w + 2 * uv_width));
+        if (p->memory == NULL) {
+          return 0;   // memory error.
+        }
+        p->tmp_y = (uint8_t*)p->memory;
+        p->tmp_u = p->tmp_y + io->mb_w;
+        p->tmp_v = p->tmp_u + uv_width;
+        p->emit = EmitFancyRGB;
+        WebPInitUpsamplers();
+#endif
+      }
+    } else {
+      p->emit = EmitYUV;
+    }
+    if (is_alpha) {  // need transparency output
+      p->emit_alpha =
+          (colorspace == MODE_RGBA_4444 || colorspace == MODE_rgbA_4444) ?
+              EmitAlphaRGBA4444
+          : is_rgb ? EmitAlphaRGB
+          : EmitAlphaYUV;
+      if (is_rgb) {
+        WebPInitAlphaProcessing();
+      }
+    }
+  }
+
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+static int CustomPut(const VP8Io* io) {
+  WebPDecParams* const p = (WebPDecParams*)io->opaque;
+  const int mb_w = io->mb_w;
+  const int mb_h = io->mb_h;
+  int num_lines_out;
+  assert(!(io->mb_y & 1));
+
+  if (mb_w <= 0 || mb_h <= 0) {
+    return 0;
+  }
+  num_lines_out = p->emit(io, p);
+  if (p->emit_alpha != NULL) {
+    p->emit_alpha(io, p, num_lines_out);
+  }
+  p->last_y += num_lines_out;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+
+static void CustomTeardown(const VP8Io* io) {
+  WebPDecParams* const p = (WebPDecParams*)io->opaque;
+  WebPSafeFree(p->memory);
+  p->memory = NULL;
+}
+
+//------------------------------------------------------------------------------
+// Main entry point
+
+void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io) {
+  io->put      = CustomPut;
+  io->setup    = CustomSetup;
+  io->teardown = CustomTeardown;
+  io->opaque   = params;
+}
+
+//------------------------------------------------------------------------------
--- a/media/libwebp/src/dec/moz.build
+++ b/media/libwebp/src/dec/moz.build
@ -0,0 +1,30 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+with Files('**'):
+    BUG_COMPONENT = ('Core', 'ImageLib')
+
+SOURCES += [
+    'alpha_dec.c',
+    'buffer_dec.c',
+    'frame_dec.c',
+    'idec_dec.c',
+    'io_dec.c',
+    'quant_dec.c',
+    'tree_dec.c',
+    'vp8_dec.c',
+    'vp8l_dec.c',
+    'webp_dec.c',
+]
+
+LOCAL_INCLUDES += [
+    '/media/libwebp',
+]
+
+FINAL_LIBRARY = 'gkmedias'
+
+# We allow warnings for third-party code that can be updated from upstream.
+AllowCompilerWarnings()
--- a/media/libwebp/src/dec/quant_dec.c
+++ b/media/libwebp/src/dec/quant_dec.c
@ -0,0 +1,110 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Quantizer initialization
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "src/dec/vp8i_dec.h"
+
+static WEBP_INLINE int clip(int v, int M) {
+  return v < 0 ? 0 : v > M ? M : v;
+}
+
+// Paragraph 14.1
+static const uint8_t kDcTable[128] = {
+  4,     5,   6,   7,   8,   9,  10,  10,
+  11,   12,  13,  14,  15,  16,  17,  17,
+  18,   19,  20,  20,  21,  21,  22,  22,
+  23,   23,  24,  25,  25,  26,  27,  28,
+  29,   30,  31,  32,  33,  34,  35,  36,
+  37,   37,  38,  39,  40,  41,  42,  43,
+  44,   45,  46,  46,  47,  48,  49,  50,
+  51,   52,  53,  54,  55,  56,  57,  58,
+  59,   60,  61,  62,  63,  64,  65,  66,
+  67,   68,  69,  70,  71,  72,  73,  74,
+  75,   76,  76,  77,  78,  79,  80,  81,
+  82,   83,  84,  85,  86,  87,  88,  89,
+  91,   93,  95,  96,  98, 100, 101, 102,
+  104, 106, 108, 110, 112, 114, 116, 118,
+  122, 124, 126, 128, 130, 132, 134, 136,
+  138, 140, 143, 145, 148, 151, 154, 157
+};
+
+static const uint16_t kAcTable[128] = {
+  4,     5,   6,   7,   8,   9,  10,  11,
+  12,   13,  14,  15,  16,  17,  18,  19,
+  20,   21,  22,  23,  24,  25,  26,  27,
+  28,   29,  30,  31,  32,  33,  34,  35,
+  36,   37,  38,  39,  40,  41,  42,  43,
+  44,   45,  46,  47,  48,  49,  50,  51,
+  52,   53,  54,  55,  56,  57,  58,  60,
+  62,   64,  66,  68,  70,  72,  74,  76,
+  78,   80,  82,  84,  86,  88,  90,  92,
+  94,   96,  98, 100, 102, 104, 106, 108,
+  110, 112, 114, 116, 119, 122, 125, 128,
+  131, 134, 137, 140, 143, 146, 149, 152,
+  155, 158, 161, 164, 167, 170, 173, 177,
+  181, 185, 189, 193, 197, 201, 205, 209,
+  213, 217, 221, 225, 229, 234, 239, 245,
+  249, 254, 259, 264, 269, 274, 279, 284
+};
+
+//------------------------------------------------------------------------------
+// Paragraph 9.6
+
+void VP8ParseQuant(VP8Decoder* const dec) {
+  VP8BitReader* const br = &dec->br_;
+  const int base_q0 = VP8GetValue(br, 7);
+  const int dqy1_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+  const int dqy2_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+  const int dqy2_ac = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+  const int dquv_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+  const int dquv_ac = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0;
+
+  const VP8SegmentHeader* const hdr = &dec->segment_hdr_;
+  int i;
+
+  for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
+    int q;
+    if (hdr->use_segment_) {
+      q = hdr->quantizer_[i];
+      if (!hdr->absolute_delta_) {
+        q += base_q0;
+      }
+    } else {
+      if (i > 0) {
+        dec->dqm_[i] = dec->dqm_[0];
+        continue;
+      } else {
+        q = base_q0;
+      }
+    }
+    {
+      VP8QuantMatrix* const m = &dec->dqm_[i];
+      m->y1_mat_[0] = kDcTable[clip(q + dqy1_dc, 127)];
+      m->y1_mat_[1] = kAcTable[clip(q + 0,       127)];
+
+      m->y2_mat_[0] = kDcTable[clip(q + dqy2_dc, 127)] * 2;
+      // For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16.
+      // The smallest precision for that is '(x*6349) >> 12' but 16 is a good
+      // word size.
+      m->y2_mat_[1] = (kAcTable[clip(q + dqy2_ac, 127)] * 101581) >> 16;
+      if (m->y2_mat_[1] < 8) m->y2_mat_[1] = 8;
+
+      m->uv_mat_[0] = kDcTable[clip(q + dquv_dc, 117)];
+      m->uv_mat_[1] = kAcTable[clip(q + dquv_ac, 127)];
+
+      m->uv_quant_ = q + dquv_ac;   // for dithering strength evaluation
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+
--- a/media/libwebp/src/dec/tree_dec.c
+++ b/media/libwebp/src/dec/tree_dec.c
@ -0,0 +1,532 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Coding trees and probas
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "src/dec/vp8i_dec.h"
+#include "src/utils/bit_reader_inl_utils.h"
+
+#if !defined(USE_GENERIC_TREE)
+#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__)
+// using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then.
+#define USE_GENERIC_TREE 1   // ALTERNATE_CODE
+#else
+#define USE_GENERIC_TREE 0
+#endif
+#endif  // USE_GENERIC_TREE
+
+#if (USE_GENERIC_TREE == 1)
+static const int8_t kYModesIntra4[18] = {
+  -B_DC_PRED, 1,
+    -B_TM_PRED, 2,
+      -B_VE_PRED, 3,
+        4, 6,
+          -B_HE_PRED, 5,
+            -B_RD_PRED, -B_VR_PRED,
+        -B_LD_PRED, 7,
+          -B_VL_PRED, 8,
+            -B_HD_PRED, -B_HU_PRED
+};
+#endif
+
+//------------------------------------------------------------------------------
+// Default probabilities
+
+// Paragraph 13.5
+static const uint8_t
+  CoeffsProba0[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+  { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 },
+      { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 },
+      { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 },
+      { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 },
+      { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 },
+    },
+    { { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 },
+      { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 },
+      { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 },
+    },
+    { { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 },
+      { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 },
+      { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 },
+      { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 },
+      { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 },
+      { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 },
+      { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 },
+      { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 },
+      { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }
+    },
+    { { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 },
+      { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 },
+      { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }
+    },
+    { { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 },
+      { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 },
+      { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }
+    },
+    { { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 },
+      { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 },
+      { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 },
+      { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 },
+      { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }
+    },
+    { { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 },
+      { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 },
+      { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }
+    },
+    { { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 },
+      { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 },
+      { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }
+    },
+    { { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 },
+      { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 },
+      { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 },
+      { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }
+    },
+    { { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 },
+      { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 },
+      { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 },
+      { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 },
+      { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }
+    },
+    { { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 },
+      { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 },
+      { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 },
+      { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    },
+    { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  },
+  { { { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 },
+      { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 },
+      { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }
+    },
+    { { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 },
+      { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 },
+      { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }
+    },
+    { { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 },
+      { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 },
+      { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }
+    },
+    { { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 },
+      { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 },
+      { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }
+    },
+    { { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 },
+      { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 },
+      { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }
+    },
+    { { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 },
+      { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 },
+      { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }
+    },
+    { { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 },
+      { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 },
+      { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }
+    },
+    { { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 },
+      { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }
+    }
+  }
+};
+
+// Paragraph 11.5
+static const uint8_t kBModesProba[NUM_BMODES][NUM_BMODES][NUM_BMODES - 1] = {
+  { { 231, 120, 48, 89, 115, 113, 120, 152, 112 },
+    { 152, 179, 64, 126, 170, 118, 46, 70, 95 },
+    { 175, 69, 143, 80, 85, 82, 72, 155, 103 },
+    { 56, 58, 10, 171, 218, 189, 17, 13, 152 },
+    { 114, 26, 17, 163, 44, 195, 21, 10, 173 },
+    { 121, 24, 80, 195, 26, 62, 44, 64, 85 },
+    { 144, 71, 10, 38, 171, 213, 144, 34, 26 },
+    { 170, 46, 55, 19, 136, 160, 33, 206, 71 },
+    { 63, 20, 8, 114, 114, 208, 12, 9, 226 },
+    { 81, 40, 11, 96, 182, 84, 29, 16, 36 } },
+  { { 134, 183, 89, 137, 98, 101, 106, 165, 148 },
+    { 72, 187, 100, 130, 157, 111, 32, 75, 80 },
+    { 66, 102, 167, 99, 74, 62, 40, 234, 128 },
+    { 41, 53, 9, 178, 241, 141, 26, 8, 107 },
+    { 74, 43, 26, 146, 73, 166, 49, 23, 157 },
+    { 65, 38, 105, 160, 51, 52, 31, 115, 128 },
+    { 104, 79, 12, 27, 217, 255, 87, 17, 7 },
+    { 87, 68, 71, 44, 114, 51, 15, 186, 23 },
+    { 47, 41, 14, 110, 182, 183, 21, 17, 194 },
+    { 66, 45, 25, 102, 197, 189, 23, 18, 22 } },
+  { { 88, 88, 147, 150, 42, 46, 45, 196, 205 },
+    { 43, 97, 183, 117, 85, 38, 35, 179, 61 },
+    { 39, 53, 200, 87, 26, 21, 43, 232, 171 },
+    { 56, 34, 51, 104, 114, 102, 29, 93, 77 },
+    { 39, 28, 85, 171, 58, 165, 90, 98, 64 },
+    { 34, 22, 116, 206, 23, 34, 43, 166, 73 },
+    { 107, 54, 32, 26, 51, 1, 81, 43, 31 },
+    { 68, 25, 106, 22, 64, 171, 36, 225, 114 },
+    { 34, 19, 21, 102, 132, 188, 16, 76, 124 },
+    { 62, 18, 78, 95, 85, 57, 50, 48, 51 } },
+  { { 193, 101, 35, 159, 215, 111, 89, 46, 111 },
+    { 60, 148, 31, 172, 219, 228, 21, 18, 111 },
+    { 112, 113, 77, 85, 179, 255, 38, 120, 114 },
+    { 40, 42, 1, 196, 245, 209, 10, 25, 109 },
+    { 88, 43, 29, 140, 166, 213, 37, 43, 154 },
+    { 61, 63, 30, 155, 67, 45, 68, 1, 209 },
+    { 100, 80, 8, 43, 154, 1, 51, 26, 71 },
+    { 142, 78, 78, 16, 255, 128, 34, 197, 171 },
+    { 41, 40, 5, 102, 211, 183, 4, 1, 221 },
+    { 51, 50, 17, 168, 209, 192, 23, 25, 82 } },
+  { { 138, 31, 36, 171, 27, 166, 38, 44, 229 },
+    { 67, 87, 58, 169, 82, 115, 26, 59, 179 },
+    { 63, 59, 90, 180, 59, 166, 93, 73, 154 },
+    { 40, 40, 21, 116, 143, 209, 34, 39, 175 },
+    { 47, 15, 16, 183, 34, 223, 49, 45, 183 },
+    { 46, 17, 33, 183, 6, 98, 15, 32, 183 },
+    { 57, 46, 22, 24, 128, 1, 54, 17, 37 },
+    { 65, 32, 73, 115, 28, 128, 23, 128, 205 },
+    { 40, 3, 9, 115, 51, 192, 18, 6, 223 },
+    { 87, 37, 9, 115, 59, 77, 64, 21, 47 } },
+  { { 104, 55, 44, 218, 9, 54, 53, 130, 226 },
+    { 64, 90, 70, 205, 40, 41, 23, 26, 57 },
+    { 54, 57, 112, 184, 5, 41, 38, 166, 213 },
+    { 30, 34, 26, 133, 152, 116, 10, 32, 134 },
+    { 39, 19, 53, 221, 26, 114, 32, 73, 255 },
+    { 31, 9, 65, 234, 2, 15, 1, 118, 73 },
+    { 75, 32, 12, 51, 192, 255, 160, 43, 51 },
+    { 88, 31, 35, 67, 102, 85, 55, 186, 85 },
+    { 56, 21, 23, 111, 59, 205, 45, 37, 192 },
+    { 55, 38, 70, 124, 73, 102, 1, 34, 98 } },
+  { { 125, 98, 42, 88, 104, 85, 117, 175, 82 },
+    { 95, 84, 53, 89, 128, 100, 113, 101, 45 },
+    { 75, 79, 123, 47, 51, 128, 81, 171, 1 },
+    { 57, 17, 5, 71, 102, 57, 53, 41, 49 },
+    { 38, 33, 13, 121, 57, 73, 26, 1, 85 },
+    { 41, 10, 67, 138, 77, 110, 90, 47, 114 },
+    { 115, 21, 2, 10, 102, 255, 166, 23, 6 },
+    { 101, 29, 16, 10, 85, 128, 101, 196, 26 },
+    { 57, 18, 10, 102, 102, 213, 34, 20, 43 },
+    { 117, 20, 15, 36, 163, 128, 68, 1, 26 } },
+  { { 102, 61, 71, 37, 34, 53, 31, 243, 192 },
+    { 69, 60, 71, 38, 73, 119, 28, 222, 37 },
+    { 68, 45, 128, 34, 1, 47, 11, 245, 171 },
+    { 62, 17, 19, 70, 146, 85, 55, 62, 70 },
+    { 37, 43, 37, 154, 100, 163, 85, 160, 1 },
+    { 63, 9, 92, 136, 28, 64, 32, 201, 85 },
+    { 75, 15, 9, 9, 64, 255, 184, 119, 16 },
+    { 86, 6, 28, 5, 64, 255, 25, 248, 1 },
+    { 56, 8, 17, 132, 137, 255, 55, 116, 128 },
+    { 58, 15, 20, 82, 135, 57, 26, 121, 40 } },
+  { { 164, 50, 31, 137, 154, 133, 25, 35, 218 },
+    { 51, 103, 44, 131, 131, 123, 31, 6, 158 },
+    { 86, 40, 64, 135, 148, 224, 45, 183, 128 },
+    { 22, 26, 17, 131, 240, 154, 14, 1, 209 },
+    { 45, 16, 21, 91, 64, 222, 7, 1, 197 },
+    { 56, 21, 39, 155, 60, 138, 23, 102, 213 },
+    { 83, 12, 13, 54, 192, 255, 68, 47, 28 },
+    { 85, 26, 85, 85, 128, 128, 32, 146, 171 },
+    { 18, 11, 7, 63, 144, 171, 4, 4, 246 },
+    { 35, 27, 10, 146, 174, 171, 12, 26, 128 } },
+  { { 190, 80, 35, 99, 180, 80, 126, 54, 45 },
+    { 85, 126, 47, 87, 176, 51, 41, 20, 32 },
+    { 101, 75, 128, 139, 118, 146, 116, 128, 85 },
+    { 56, 41, 15, 176, 236, 85, 37, 9, 62 },
+    { 71, 30, 17, 119, 118, 255, 17, 18, 138 },
+    { 101, 38, 60, 138, 55, 70, 43, 26, 142 },
+    { 146, 36, 19, 30, 171, 255, 97, 27, 20 },
+    { 138, 45, 61, 62, 219, 1, 81, 188, 64 },
+    { 32, 41, 20, 117, 151, 142, 20, 21, 163 },
+    { 112, 19, 12, 61, 195, 128, 48, 4, 24 } }
+};
+
+void VP8ResetProba(VP8Proba* const proba) {
+  memset(proba->segments_, 255u, sizeof(proba->segments_));
+  // proba->bands_[][] is initialized later
+}
+
+static void ParseIntraMode(VP8BitReader* const br,
+                           VP8Decoder* const dec, int mb_x) {
+  uint8_t* const top = dec->intra_t_ + 4 * mb_x;
+  uint8_t* const left = dec->intra_l_;
+  VP8MBData* const block = dec->mb_data_ + mb_x;
+
+  // Note: we don't save segment map (yet), as we don't expect
+  // to decode more than 1 keyframe.
+  if (dec->segment_hdr_.update_map_) {
+    // Hardcoded tree parsing
+    block->segment_ = !VP8GetBit(br, dec->proba_.segments_[0])
+                    ? VP8GetBit(br, dec->proba_.segments_[1])
+                    : 2 + VP8GetBit(br, dec->proba_.segments_[2]);
+  } else {
+    block->segment_ = 0;  // default for intra
+  }
+  if (dec->use_skip_proba_) block->skip_ = VP8GetBit(br, dec->skip_p_);
+
+  block->is_i4x4_ = !VP8GetBit(br, 145);   // decide for B_PRED first
+  if (!block->is_i4x4_) {
+    // Hardcoded 16x16 intra-mode decision tree.
+    const int ymode =
+        VP8GetBit(br, 156) ? (VP8GetBit(br, 128) ? TM_PRED : H_PRED)
+                           : (VP8GetBit(br, 163) ? V_PRED : DC_PRED);
+    block->imodes_[0] = ymode;
+    memset(top, ymode, 4 * sizeof(*top));
+    memset(left, ymode, 4 * sizeof(*left));
+  } else {
+    uint8_t* modes = block->imodes_;
+    int y;
+    for (y = 0; y < 4; ++y) {
+      int ymode = left[y];
+      int x;
+      for (x = 0; x < 4; ++x) {
+        const uint8_t* const prob = kBModesProba[top[x]][ymode];
+#if (USE_GENERIC_TREE == 1)
+        // Generic tree-parsing
+        int i = kYModesIntra4[VP8GetBit(br, prob[0])];
+        while (i > 0) {
+          i = kYModesIntra4[2 * i + VP8GetBit(br, prob[i])];
+        }
+        ymode = -i;
+#else
+        // Hardcoded tree parsing
+        ymode = !VP8GetBit(br, prob[0]) ? B_DC_PRED :
+                  !VP8GetBit(br, prob[1]) ? B_TM_PRED :
+                    !VP8GetBit(br, prob[2]) ? B_VE_PRED :
+                      !VP8GetBit(br, prob[3]) ?
+                        (!VP8GetBit(br, prob[4]) ? B_HE_PRED :
+                          (!VP8GetBit(br, prob[5]) ? B_RD_PRED : B_VR_PRED)) :
+                        (!VP8GetBit(br, prob[6]) ? B_LD_PRED :
+                          (!VP8GetBit(br, prob[7]) ? B_VL_PRED :
+                            (!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED)));
+#endif  // USE_GENERIC_TREE
+        top[x] = ymode;
+      }
+      memcpy(modes, top, 4 * sizeof(*top));
+      modes += 4;
+      left[y] = ymode;
+    }
+  }
+  // Hardcoded UVMode decision tree
+  block->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED
+                 : !VP8GetBit(br, 114) ? V_PRED
+                 : VP8GetBit(br, 183) ? TM_PRED : H_PRED;
+}
+
+int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {
+  int mb_x;
+  for (mb_x = 0; mb_x < dec->mb_w_; ++mb_x) {
+    ParseIntraMode(br, dec, mb_x);
+  }
+  return !dec->br_.eof_;
+}
+
+//------------------------------------------------------------------------------
+// Paragraph 13
+
+static const uint8_t
+    CoeffsUpdateProba[NUM_TYPES][NUM_BANDS][NUM_CTX][NUM_PROBAS] = {
+  { { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 },
+      { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 }
+    },
+    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  },
+  { { { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 },
+      { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    },
+    { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
+      { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }
+    }
+  }
+};
+
+// Paragraph 9.9
+
+static const uint8_t kBands[16 + 1] = {
+  0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
+  0  // extra entry as sentinel
+};
+
+void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
+  VP8Proba* const proba = &dec->proba_;
+  int t, b, c, p;
+  for (t = 0; t < NUM_TYPES; ++t) {
+    for (b = 0; b < NUM_BANDS; ++b) {
+      for (c = 0; c < NUM_CTX; ++c) {
+        for (p = 0; p < NUM_PROBAS; ++p) {
+          const int v = VP8GetBit(br, CoeffsUpdateProba[t][b][c][p]) ?
+                        VP8GetValue(br, 8) : CoeffsProba0[t][b][c][p];
+          proba->bands_[t][b].probas_[c][p] = v;
+        }
+      }
+    }
+    for (b = 0; b < 16 + 1; ++b) {
+      proba->bands_ptr_[t][b] = &proba->bands_[t][kBands[b]];
+    }
+  }
+  dec->use_skip_proba_ = VP8Get(br);
+  if (dec->use_skip_proba_) {
+    dec->skip_p_ = VP8GetValue(br, 8);
+  }
+}
+
--- a/media/libwebp/src/dec/vp8_dec.c
+++ b/media/libwebp/src/dec/vp8_dec.c
@ -0,0 +1,721 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// main entry for the decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+
+#include "src/dec/alphai_dec.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/dec/vp8li_dec.h"
+#include "src/dec/webpi_dec.h"
+#include "src/utils/bit_reader_inl_utils.h"
+#include "src/utils/utils.h"
+
+//------------------------------------------------------------------------------
+
+int WebPGetDecoderVersion(void) {
+  return (DEC_MAJ_VERSION << 16) | (DEC_MIN_VERSION << 8) | DEC_REV_VERSION;
+}
+
+//------------------------------------------------------------------------------
+// Signature and pointer-to-function for GetCoeffs() variants below.
+
+typedef int (*GetCoeffsFunc)(VP8BitReader* const br,
+                             const VP8BandProbas* const prob[],
+                             int ctx, const quant_t dq, int n, int16_t* out);
+static volatile GetCoeffsFunc GetCoeffs = NULL;
+
+static void InitGetCoeffs(void);
+
+//------------------------------------------------------------------------------
+// VP8Decoder
+
+static void SetOk(VP8Decoder* const dec) {
+  dec->status_ = VP8_STATUS_OK;
+  dec->error_msg_ = "OK";
+}
+
+int VP8InitIoInternal(VP8Io* const io, int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+    return 0;  // mismatch error
+  }
+  if (io != NULL) {
+    memset(io, 0, sizeof(*io));
+  }
+  return 1;
+}
+
+VP8Decoder* VP8New(void) {
+  VP8Decoder* const dec = (VP8Decoder*)WebPSafeCalloc(1ULL, sizeof(*dec));
+  if (dec != NULL) {
+    SetOk(dec);
+    WebPGetWorkerInterface()->Init(&dec->worker_);
+    dec->ready_ = 0;
+    dec->num_parts_minus_one_ = 0;
+    InitGetCoeffs();
+  }
+  return dec;
+}
+
+VP8StatusCode VP8Status(VP8Decoder* const dec) {
+  if (!dec) return VP8_STATUS_INVALID_PARAM;
+  return dec->status_;
+}
+
+const char* VP8StatusMessage(VP8Decoder* const dec) {
+  if (dec == NULL) return "no object";
+  if (!dec->error_msg_) return "OK";
+  return dec->error_msg_;
+}
+
+void VP8Delete(VP8Decoder* const dec) {
+  if (dec != NULL) {
+    VP8Clear(dec);
+    WebPSafeFree(dec);
+  }
+}
+
+int VP8SetError(VP8Decoder* const dec,
+                VP8StatusCode error, const char* const msg) {
+  // The oldest error reported takes precedence over the new one.
+  if (dec->status_ == VP8_STATUS_OK) {
+    dec->status_ = error;
+    dec->error_msg_ = msg;
+    dec->ready_ = 0;
+  }
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+
+int VP8CheckSignature(const uint8_t* const data, size_t data_size) {
+  return (data_size >= 3 &&
+          data[0] == 0x9d && data[1] == 0x01 && data[2] == 0x2a);
+}
+
+int VP8GetInfo(const uint8_t* data, size_t data_size, size_t chunk_size,
+               int* const width, int* const height) {
+  if (data == NULL || data_size < VP8_FRAME_HEADER_SIZE) {
+    return 0;         // not enough data
+  }
+  // check signature
+  if (!VP8CheckSignature(data + 3, data_size - 3)) {
+    return 0;         // Wrong signature.
+  } else {
+    const uint32_t bits = data[0] | (data[1] << 8) | (data[2] << 16);
+    const int key_frame = !(bits & 1);
+    const int w = ((data[7] << 8) | data[6]) & 0x3fff;
+    const int h = ((data[9] << 8) | data[8]) & 0x3fff;
+
+    if (!key_frame) {   // Not a keyframe.
+      return 0;
+    }
+
+    if (((bits >> 1) & 7) > 3) {
+      return 0;         // unknown profile
+    }
+    if (!((bits >> 4) & 1)) {
+      return 0;         // first frame is invisible!
+    }
+    if (((bits >> 5)) >= chunk_size) {  // partition_length
+      return 0;         // inconsistent size information.
+    }
+    if (w == 0 || h == 0) {
+      return 0;         // We don't support both width and height to be zero.
+    }
+
+    if (width) {
+      *width = w;
+    }
+    if (height) {
+      *height = h;
+    }
+
+    return 1;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Header parsing
+
+static void ResetSegmentHeader(VP8SegmentHeader* const hdr) {
+  assert(hdr != NULL);
+  hdr->use_segment_ = 0;
+  hdr->update_map_ = 0;
+  hdr->absolute_delta_ = 1;
+  memset(hdr->quantizer_, 0, sizeof(hdr->quantizer_));
+  memset(hdr->filter_strength_, 0, sizeof(hdr->filter_strength_));
+}
+
+// Paragraph 9.3
+static int ParseSegmentHeader(VP8BitReader* br,
+                              VP8SegmentHeader* hdr, VP8Proba* proba) {
+  assert(br != NULL);
+  assert(hdr != NULL);
+  hdr->use_segment_ = VP8Get(br);
+  if (hdr->use_segment_) {
+    hdr->update_map_ = VP8Get(br);
+    if (VP8Get(br)) {   // update data
+      int s;
+      hdr->absolute_delta_ = VP8Get(br);
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        hdr->quantizer_[s] = VP8Get(br) ? VP8GetSignedValue(br, 7) : 0;
+      }
+      for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
+        hdr->filter_strength_[s] = VP8Get(br) ? VP8GetSignedValue(br, 6) : 0;
+      }
+    }
+    if (hdr->update_map_) {
+      int s;
+      for (s = 0; s < MB_FEATURE_TREE_PROBS; ++s) {
+        proba->segments_[s] = VP8Get(br) ? VP8GetValue(br, 8) : 255u;
+      }
+    }
+  } else {
+    hdr->update_map_ = 0;
+  }
+  return !br->eof_;
+}
+
+// Paragraph 9.5
+// This function returns VP8_STATUS_SUSPENDED if we don't have all the
+// necessary data in 'buf'.
+// This case is not necessarily an error (for incremental decoding).
+// Still, no bitreader is ever initialized to make it possible to read
+// unavailable memory.
+// If we don't even have the partitions' sizes, than VP8_STATUS_NOT_ENOUGH_DATA
+// is returned, and this is an unrecoverable error.
+// If the partitions were positioned ok, VP8_STATUS_OK is returned.
+static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
+                                     const uint8_t* buf, size_t size) {
+  VP8BitReader* const br = &dec->br_;
+  const uint8_t* sz = buf;
+  const uint8_t* buf_end = buf + size;
+  const uint8_t* part_start;
+  size_t size_left = size;
+  size_t last_part;
+  size_t p;
+
+  dec->num_parts_minus_one_ = (1 << VP8GetValue(br, 2)) - 1;
+  last_part = dec->num_parts_minus_one_;
+  if (size < 3 * last_part) {
+    // we can't even read the sizes with sz[]! That's a failure.
+    return VP8_STATUS_NOT_ENOUGH_DATA;
+  }
+  part_start = buf + last_part * 3;
+  size_left -= last_part * 3;
+  for (p = 0; p < last_part; ++p) {
+    size_t psize = sz[0] | (sz[1] << 8) | (sz[2] << 16);
+    if (psize > size_left) psize = size_left;
+    VP8InitBitReader(dec->parts_ + p, part_start, psize);
+    part_start += psize;
+    size_left -= psize;
+    sz += 3;
+  }
+  VP8InitBitReader(dec->parts_ + last_part, part_start, size_left);
+  return (part_start < buf_end) ? VP8_STATUS_OK :
+           VP8_STATUS_SUSPENDED;   // Init is ok, but there's not enough data
+}
+
+// Paragraph 9.4
+static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
+  VP8FilterHeader* const hdr = &dec->filter_hdr_;
+  hdr->simple_    = VP8Get(br);
+  hdr->level_     = VP8GetValue(br, 6);
+  hdr->sharpness_ = VP8GetValue(br, 3);
+  hdr->use_lf_delta_ = VP8Get(br);
+  if (hdr->use_lf_delta_) {
+    if (VP8Get(br)) {   // update lf-delta?
+      int i;
+      for (i = 0; i < NUM_REF_LF_DELTAS; ++i) {
+        if (VP8Get(br)) {
+          hdr->ref_lf_delta_[i] = VP8GetSignedValue(br, 6);
+        }
+      }
+      for (i = 0; i < NUM_MODE_LF_DELTAS; ++i) {
+        if (VP8Get(br)) {
+          hdr->mode_lf_delta_[i] = VP8GetSignedValue(br, 6);
+        }
+      }
+    }
+  }
+  dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
+  return !br->eof_;
+}
+
+// Topmost call
+int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
+  const uint8_t* buf;
+  size_t buf_size;
+  VP8FrameHeader* frm_hdr;
+  VP8PictureHeader* pic_hdr;
+  VP8BitReader* br;
+  VP8StatusCode status;
+
+  if (dec == NULL) {
+    return 0;
+  }
+  SetOk(dec);
+  if (io == NULL) {
+    return VP8SetError(dec, VP8_STATUS_INVALID_PARAM,
+                       "null VP8Io passed to VP8GetHeaders()");
+  }
+  buf = io->data;
+  buf_size = io->data_size;
+  if (buf_size < 4) {
+    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                       "Truncated header.");
+  }
+
+  // Paragraph 9.1
+  {
+    const uint32_t bits = buf[0] | (buf[1] << 8) | (buf[2] << 16);
+    frm_hdr = &dec->frm_hdr_;
+    frm_hdr->key_frame_ = !(bits & 1);
+    frm_hdr->profile_ = (bits >> 1) & 7;
+    frm_hdr->show_ = (bits >> 4) & 1;
+    frm_hdr->partition_length_ = (bits >> 5);
+    if (frm_hdr->profile_ > 3) {
+      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                         "Incorrect keyframe parameters.");
+    }
+    if (!frm_hdr->show_) {
+      return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
+                         "Frame not displayable.");
+    }
+    buf += 3;
+    buf_size -= 3;
+  }
+
+  pic_hdr = &dec->pic_hdr_;
+  if (frm_hdr->key_frame_) {
+    // Paragraph 9.2
+    if (buf_size < 7) {
+      return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                         "cannot parse picture header");
+    }
+    if (!VP8CheckSignature(buf, buf_size)) {
+      return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                         "Bad code word");
+    }
+    pic_hdr->width_ = ((buf[4] << 8) | buf[3]) & 0x3fff;
+    pic_hdr->xscale_ = buf[4] >> 6;   // ratio: 1, 5/4 5/3 or 2
+    pic_hdr->height_ = ((buf[6] << 8) | buf[5]) & 0x3fff;
+    pic_hdr->yscale_ = buf[6] >> 6;
+    buf += 7;
+    buf_size -= 7;
+
+    dec->mb_w_ = (pic_hdr->width_ + 15) >> 4;
+    dec->mb_h_ = (pic_hdr->height_ + 15) >> 4;
+
+    // Setup default output area (can be later modified during io->setup())
+    io->width = pic_hdr->width_;
+    io->height = pic_hdr->height_;
+    // IMPORTANT! use some sane dimensions in crop_* and scaled_* fields.
+    // So they can be used interchangeably without always testing for
+    // 'use_cropping'.
+    io->use_cropping = 0;
+    io->crop_top  = 0;
+    io->crop_left = 0;
+    io->crop_right  = io->width;
+    io->crop_bottom = io->height;
+    io->use_scaling  = 0;
+    io->scaled_width = io->width;
+    io->scaled_height = io->height;
+
+    io->mb_w = io->width;   // sanity check
+    io->mb_h = io->height;  // ditto
+
+    VP8ResetProba(&dec->proba_);
+    ResetSegmentHeader(&dec->segment_hdr_);
+  }
+
+  // Check if we have all the partition #0 available, and initialize dec->br_
+  // to read this partition (and this partition only).
+  if (frm_hdr->partition_length_ > buf_size) {
+    return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                       "bad partition length");
+  }
+
+  br = &dec->br_;
+  VP8InitBitReader(br, buf, frm_hdr->partition_length_);
+  buf += frm_hdr->partition_length_;
+  buf_size -= frm_hdr->partition_length_;
+
+  if (frm_hdr->key_frame_) {
+    pic_hdr->colorspace_ = VP8Get(br);
+    pic_hdr->clamp_type_ = VP8Get(br);
+  }
+  if (!ParseSegmentHeader(br, &dec->segment_hdr_, &dec->proba_)) {
+    return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                       "cannot parse segment header");
+  }
+  // Filter specs
+  if (!ParseFilterHeader(br, dec)) {
+    return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
+                       "cannot parse filter header");
+  }
+  status = ParsePartitions(dec, buf, buf_size);
+  if (status != VP8_STATUS_OK) {
+    return VP8SetError(dec, status, "cannot parse partitions");
+  }
+
+  // quantizer change
+  VP8ParseQuant(dec);
+
+  // Frame buffer marking
+  if (!frm_hdr->key_frame_) {
+    return VP8SetError(dec, VP8_STATUS_UNSUPPORTED_FEATURE,
+                       "Not a key frame.");
+  }
+
+  VP8Get(br);   // ignore the value of update_proba_
+
+  VP8ParseProba(br, dec);
+
+  // sanitized state
+  dec->ready_ = 1;
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Residual decoding (Paragraph 13.2 / 13.3)
+
+static const uint8_t kCat3[] = { 173, 148, 140, 0 };
+static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 };
+static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 };
+static const uint8_t kCat6[] =
+  { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
+static const uint8_t* const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 };
+static const uint8_t kZigzag[16] = {
+  0, 1, 4, 8,  5, 2, 3, 6,  9, 12, 13, 10,  7, 11, 14, 15
+};
+
+// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
+static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
+  int v;
+  if (!VP8GetBit(br, p[3])) {
+    if (!VP8GetBit(br, p[4])) {
+      v = 2;
+    } else {
+      v = 3 + VP8GetBit(br, p[5]);
+    }
+  } else {
+    if (!VP8GetBit(br, p[6])) {
+      if (!VP8GetBit(br, p[7])) {
+        v = 5 + VP8GetBit(br, 159);
+      } else {
+        v = 7 + 2 * VP8GetBit(br, 165);
+        v += VP8GetBit(br, 145);
+      }
+    } else {
+      const uint8_t* tab;
+      const int bit1 = VP8GetBit(br, p[8]);
+      const int bit0 = VP8GetBit(br, p[9 + bit1]);
+      const int cat = 2 * bit1 + bit0;
+      v = 0;
+      for (tab = kCat3456[cat]; *tab; ++tab) {
+        v += v + VP8GetBit(br, *tab);
+      }
+      v += 3 + (8 << cat);
+    }
+  }
+  return v;
+}
+
+// Returns the position of the last non-zero coeff plus one
+static int GetCoeffsFast(VP8BitReader* const br,
+                         const VP8BandProbas* const prob[],
+                         int ctx, const quant_t dq, int n, int16_t* out) {
+  const uint8_t* p = prob[n]->probas_[ctx];
+  for (; n < 16; ++n) {
+    if (!VP8GetBit(br, p[0])) {
+      return n;  // previous coeff was last non-zero coeff
+    }
+    while (!VP8GetBit(br, p[1])) {       // sequence of zero coeffs
+      p = prob[++n]->probas_[0];
+      if (n == 16) return 16;
+    }
+    {        // non zero coeff
+      const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
+      int v;
+      if (!VP8GetBit(br, p[2])) {
+        v = 1;
+        p = p_ctx[1];
+      } else {
+        v = GetLargeValue(br, p);
+        p = p_ctx[2];
+      }
+      out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0];
+    }
+  }
+  return 16;
+}
+
+// This version of GetCoeffs() uses VP8GetBitAlt() which is an alternate version
+// of VP8GetBitAlt() targeting specific platforms.
+static int GetCoeffsAlt(VP8BitReader* const br,
+                        const VP8BandProbas* const prob[],
+                        int ctx, const quant_t dq, int n, int16_t* out) {
+  const uint8_t* p = prob[n]->probas_[ctx];
+  for (; n < 16; ++n) {
+    if (!VP8GetBitAlt(br, p[0])) {
+      return n;  // previous coeff was last non-zero coeff
+    }
+    while (!VP8GetBitAlt(br, p[1])) {       // sequence of zero coeffs
+      p = prob[++n]->probas_[0];
+      if (n == 16) return 16;
+    }
+    {        // non zero coeff
+      const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
+      int v;
+      if (!VP8GetBitAlt(br, p[2])) {
+        v = 1;
+        p = p_ctx[1];
+      } else {
+        v = GetLargeValue(br, p);
+        p = p_ctx[2];
+      }
+      out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0];
+    }
+  }
+  return 16;
+}
+
+static WEBP_TSAN_IGNORE_FUNCTION void InitGetCoeffs(void) {
+  if (GetCoeffs == NULL) {
+    if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) {
+      GetCoeffs = GetCoeffsAlt;
+    } else {
+      GetCoeffs = GetCoeffsFast;
+    }
+  }
+}
+
+static WEBP_INLINE uint32_t NzCodeBits(uint32_t nz_coeffs, int nz, int dc_nz) {
+  nz_coeffs <<= 2;
+  nz_coeffs |= (nz > 3) ? 3 : (nz > 1) ? 2 : dc_nz;
+  return nz_coeffs;
+}
+
+static int ParseResiduals(VP8Decoder* const dec,
+                          VP8MB* const mb, VP8BitReader* const token_br) {
+  const VP8BandProbas* (* const bands)[16 + 1] = dec->proba_.bands_ptr_;
+  const VP8BandProbas* const * ac_proba;
+  VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
+  const VP8QuantMatrix* const q = &dec->dqm_[block->segment_];
+  int16_t* dst = block->coeffs_;
+  VP8MB* const left_mb = dec->mb_info_ - 1;
+  uint8_t tnz, lnz;
+  uint32_t non_zero_y = 0;
+  uint32_t non_zero_uv = 0;
+  int x, y, ch;
+  uint32_t out_t_nz, out_l_nz;
+  int first;
+
+  memset(dst, 0, 384 * sizeof(*dst));
+  if (!block->is_i4x4_) {    // parse DC
+    int16_t dc[16] = { 0 };
+    const int ctx = mb->nz_dc_ + left_mb->nz_dc_;
+    const int nz = GetCoeffs(token_br, bands[1], ctx, q->y2_mat_, 0, dc);
+    mb->nz_dc_ = left_mb->nz_dc_ = (nz > 0);
+    if (nz > 1) {   // more than just the DC -> perform the full transform
+      VP8TransformWHT(dc, dst);
+    } else {        // only DC is non-zero -> inlined simplified transform
+      int i;
+      const int dc0 = (dc[0] + 3) >> 3;
+      for (i = 0; i < 16 * 16; i += 16) dst[i] = dc0;
+    }
+    first = 1;
+    ac_proba = bands[0];
+  } else {
+    first = 0;
+    ac_proba = bands[3];
+  }
+
+  tnz = mb->nz_ & 0x0f;
+  lnz = left_mb->nz_ & 0x0f;
+  for (y = 0; y < 4; ++y) {
+    int l = lnz & 1;
+    uint32_t nz_coeffs = 0;
+    for (x = 0; x < 4; ++x) {
+      const int ctx = l + (tnz & 1);
+      const int nz = GetCoeffs(token_br, ac_proba, ctx, q->y1_mat_, first, dst);
+      l = (nz > first);
+      tnz = (tnz >> 1) | (l << 7);
+      nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
+      dst += 16;
+    }
+    tnz >>= 4;
+    lnz = (lnz >> 1) | (l << 7);
+    non_zero_y = (non_zero_y << 8) | nz_coeffs;
+  }
+  out_t_nz = tnz;
+  out_l_nz = lnz >> 4;
+
+  for (ch = 0; ch < 4; ch += 2) {
+    uint32_t nz_coeffs = 0;
+    tnz = mb->nz_ >> (4 + ch);
+    lnz = left_mb->nz_ >> (4 + ch);
+    for (y = 0; y < 2; ++y) {
+      int l = lnz & 1;
+      for (x = 0; x < 2; ++x) {
+        const int ctx = l + (tnz & 1);
+        const int nz = GetCoeffs(token_br, bands[2], ctx, q->uv_mat_, 0, dst);
+        l = (nz > 0);
+        tnz = (tnz >> 1) | (l << 3);
+        nz_coeffs = NzCodeBits(nz_coeffs, nz, dst[0] != 0);
+        dst += 16;
+      }
+      tnz >>= 2;
+      lnz = (lnz >> 1) | (l << 5);
+    }
+    // Note: we don't really need the per-4x4 details for U/V blocks.
+    non_zero_uv |= nz_coeffs << (4 * ch);
+    out_t_nz |= (tnz << 4) << ch;
+    out_l_nz |= (lnz & 0xf0) << ch;
+  }
+  mb->nz_ = out_t_nz;
+  left_mb->nz_ = out_l_nz;
+
+  block->non_zero_y_ = non_zero_y;
+  block->non_zero_uv_ = non_zero_uv;
+
+  // We look at the mode-code of each block and check if some blocks have less
+  // than three non-zero coeffs (code < 2). This is to avoid dithering flat and
+  // empty blocks.
+  block->dither_ = (non_zero_uv & 0xaaaa) ? 0 : q->dither_;
+
+  return !(non_zero_y | non_zero_uv);  // will be used for further optimization
+}
+
+//------------------------------------------------------------------------------
+// Main loop
+
+int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
+  VP8MB* const left = dec->mb_info_ - 1;
+  VP8MB* const mb = dec->mb_info_ + dec->mb_x_;
+  VP8MBData* const block = dec->mb_data_ + dec->mb_x_;
+  int skip = dec->use_skip_proba_ ? block->skip_ : 0;
+
+  if (!skip) {
+    skip = ParseResiduals(dec, mb, token_br);
+  } else {
+    left->nz_ = mb->nz_ = 0;
+    if (!block->is_i4x4_) {
+      left->nz_dc_ = mb->nz_dc_ = 0;
+    }
+    block->non_zero_y_ = 0;
+    block->non_zero_uv_ = 0;
+    block->dither_ = 0;
+  }
+
+  if (dec->filter_type_ > 0) {  // store filter info
+    VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
+    *finfo = dec->fstrengths_[block->segment_][block->is_i4x4_];
+    finfo->f_inner_ |= !skip;
+  }
+
+  return !token_br->eof_;
+}
+
+void VP8InitScanline(VP8Decoder* const dec) {
+  VP8MB* const left = dec->mb_info_ - 1;
+  left->nz_ = 0;
+  left->nz_dc_ = 0;
+  memset(dec->intra_l_, B_DC_PRED, sizeof(dec->intra_l_));
+  dec->mb_x_ = 0;
+}
+
+static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
+  for (dec->mb_y_ = 0; dec->mb_y_ < dec->br_mb_y_; ++dec->mb_y_) {
+    // Parse bitstream for this row.
+    VP8BitReader* const token_br =
+        &dec->parts_[dec->mb_y_ & dec->num_parts_minus_one_];
+    if (!VP8ParseIntraModeRow(&dec->br_, dec)) {
+      return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                         "Premature end-of-partition0 encountered.");
+    }
+    for (; dec->mb_x_ < dec->mb_w_; ++dec->mb_x_) {
+      if (!VP8DecodeMB(dec, token_br)) {
+        return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
+                           "Premature end-of-file encountered.");
+      }
+    }
+    VP8InitScanline(dec);   // Prepare for next scanline
+
+    // Reconstruct, filter and emit the row.
+    if (!VP8ProcessRow(dec, io)) {
+      return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
+    }
+  }
+  if (dec->mt_method_ > 0) {
+    if (!WebPGetWorkerInterface()->Sync(&dec->worker_)) return 0;
+  }
+
+  return 1;
+}
+
+// Main entry point
+int VP8Decode(VP8Decoder* const dec, VP8Io* const io) {
+  int ok = 0;
+  if (dec == NULL) {
+    return 0;
+  }
+  if (io == NULL) {
+    return VP8SetError(dec, VP8_STATUS_INVALID_PARAM,
+                       "NULL VP8Io parameter in VP8Decode().");
+  }
+
+  if (!dec->ready_) {
+    if (!VP8GetHeaders(dec, io)) {
+      return 0;
+    }
+  }
+  assert(dec->ready_);
+
+  // Finish setting up the decoding parameter. Will call io->setup().
+  ok = (VP8EnterCritical(dec, io) == VP8_STATUS_OK);
+  if (ok) {   // good to go.
+    // Will allocate memory and prepare everything.
+    if (ok) ok = VP8InitFrame(dec, io);
+
+    // Main decoding loop
+    if (ok) ok = ParseFrame(dec, io);
+
+    // Exit.
+    ok &= VP8ExitCritical(dec, io);
+  }
+
+  if (!ok) {
+    VP8Clear(dec);
+    return 0;
+  }
+
+  dec->ready_ = 0;
+  return ok;
+}
+
+void VP8Clear(VP8Decoder* const dec) {
+  if (dec == NULL) {
+    return;
+  }
+  WebPGetWorkerInterface()->End(&dec->worker_);
+  WebPDeallocateAlphaMemory(dec);
+  WebPSafeFree(dec->mem_);
+  dec->mem_ = NULL;
+  dec->mem_size_ = 0;
+  memset(&dec->br_, 0, sizeof(dec->br_));
+  dec->ready_ = 0;
+}
+
+//------------------------------------------------------------------------------
--- a/media/libwebp/src/dec/vp8_dec.h
+++ b/media/libwebp/src/dec/vp8_dec.h
@ -0,0 +1,185 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  Low-level API for VP8 decoder
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DEC_VP8_DEC_H_
+#define WEBP_DEC_VP8_DEC_H_
+
+#include "src/webp/decode.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Lower-level API
+//
+// These functions provide fine-grained control of the decoding process.
+// The call flow should resemble:
+//
+//   VP8Io io;
+//   VP8InitIo(&io);
+//   io.data = data;
+//   io.data_size = size;
+//   /* customize io's functions (setup()/put()/teardown()) if needed. */
+//
+//   VP8Decoder* dec = VP8New();
+//   int ok = VP8Decode(dec, &io);
+//   if (!ok) printf("Error: %s\n", VP8StatusMessage(dec));
+//   VP8Delete(dec);
+//   return ok;
+
+// Input / Output
+typedef struct VP8Io VP8Io;
+typedef int (*VP8IoPutHook)(const VP8Io* io);
+typedef int (*VP8IoSetupHook)(VP8Io* io);
+typedef void (*VP8IoTeardownHook)(const VP8Io* io);
+
+struct VP8Io {
+  // set by VP8GetHeaders()
+  int width, height;         // picture dimensions, in pixels (invariable).
+                             // These are the original, uncropped dimensions.
+                             // The actual area passed to put() is stored
+                             // in mb_w / mb_h fields.
+
+  // set before calling put()
+  int mb_y;                  // position of the current rows (in pixels)
+  int mb_w;                  // number of columns in the sample
+  int mb_h;                  // number of rows in the sample
+  const uint8_t* y, *u, *v;  // rows to copy (in yuv420 format)
+  int y_stride;              // row stride for luma
+  int uv_stride;             // row stride for chroma
+
+  void* opaque;              // user data
+
+  // called when fresh samples are available. Currently, samples are in
+  // YUV420 format, and can be up to width x 24 in size (depending on the
+  // in-loop filtering level, e.g.). Should return false in case of error
+  // or abort request. The actual size of the area to update is mb_w x mb_h
+  // in size, taking cropping into account.
+  VP8IoPutHook put;
+
+  // called just before starting to decode the blocks.
+  // Must return false in case of setup error, true otherwise. If false is
+  // returned, teardown() will NOT be called. But if the setup succeeded
+  // and true is returned, then teardown() will always be called afterward.
+  VP8IoSetupHook setup;
+
+  // Called just after block decoding is finished (or when an error occurred
+  // during put()). Is NOT called if setup() failed.
+  VP8IoTeardownHook teardown;
+
+  // this is a recommendation for the user-side yuv->rgb converter. This flag
+  // is set when calling setup() hook and can be overwritten by it. It then
+  // can be taken into consideration during the put() method.
+  int fancy_upsampling;
+
+  // Input buffer.
+  size_t data_size;
+  const uint8_t* data;
+
+  // If true, in-loop filtering will not be performed even if present in the
+  // bitstream. Switching off filtering may speed up decoding at the expense
+  // of more visible blocking. Note that output will also be non-compliant
+  // with the VP8 specifications.
+  int bypass_filtering;
+
+  // Cropping parameters.
+  int use_cropping;
+  int crop_left, crop_right, crop_top, crop_bottom;
+
+  // Scaling parameters.
+  int use_scaling;
+  int scaled_width, scaled_height;
+
+  // If non NULL, pointer to the alpha data (if present) corresponding to the
+  // start of the current row (That is: it is pre-offset by mb_y and takes
+  // cropping into account).
+  const uint8_t* a;
+};
+
+// Internal, version-checked, entry point
+int VP8InitIoInternal(VP8Io* const, int);
+
+// Set the custom IO function pointers and user-data. The setter for IO hooks
+// should be called before initiating incremental decoding. Returns true if
+// WebPIDecoder object is successfully modified, false otherwise.
+int WebPISetIOHooks(WebPIDecoder* const idec,
+                    VP8IoPutHook put,
+                    VP8IoSetupHook setup,
+                    VP8IoTeardownHook teardown,
+                    void* user_data);
+
+// Main decoding object. This is an opaque structure.
+typedef struct VP8Decoder VP8Decoder;
+
+// Create a new decoder object.
+VP8Decoder* VP8New(void);
+
+// Must be called to make sure 'io' is initialized properly.
+// Returns false in case of version mismatch. Upon such failure, no other
+// decoding function should be called (VP8Decode, VP8GetHeaders, ...)
+static WEBP_INLINE int VP8InitIo(VP8Io* const io) {
+  return VP8InitIoInternal(io, WEBP_DECODER_ABI_VERSION);
+}
+
+// Decode the VP8 frame header. Returns true if ok.
+// Note: 'io->data' must be pointing to the start of the VP8 frame header.
+int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io);
+
+// Decode a picture. Will call VP8GetHeaders() if it wasn't done already.
+// Returns false in case of error.
+int VP8Decode(VP8Decoder* const dec, VP8Io* const io);
+
+// Return current status of the decoder:
+VP8StatusCode VP8Status(VP8Decoder* const dec);
+
+// return readable string corresponding to the last status.
+const char* VP8StatusMessage(VP8Decoder* const dec);
+
+// Resets the decoder in its initial state, reclaiming memory.
+// Not a mandatory call between calls to VP8Decode().
+void VP8Clear(VP8Decoder* const dec);
+
+// Destroy the decoder object.
+void VP8Delete(VP8Decoder* const dec);
+
+//------------------------------------------------------------------------------
+// Miscellaneous VP8/VP8L bitstream probing functions.
+
+// Returns true if the next 3 bytes in data contain the VP8 signature.
+WEBP_EXTERN int VP8CheckSignature(const uint8_t* const data, size_t data_size);
+
+// Validates the VP8 data-header and retrieves basic header information viz
+// width and height. Returns 0 in case of formatting error. *width/*height
+// can be passed NULL.
+WEBP_EXTERN int VP8GetInfo(
+    const uint8_t* data,
+    size_t data_size,    // data available so far
+    size_t chunk_size,   // total data size expected in the chunk
+    int* const width, int* const height);
+
+// Returns true if the next byte(s) in data is a VP8L signature.
+WEBP_EXTERN int VP8LCheckSignature(const uint8_t* const data, size_t size);
+
+// Validates the VP8L data-header and retrieves basic header information viz
+// width, height and alpha. Returns 0 in case of formatting error.
+// width/height/has_alpha can be passed NULL.
+WEBP_EXTERN int VP8LGetInfo(
+    const uint8_t* data, size_t data_size,  // data available so far
+    int* const width, int* const height, int* const has_alpha);
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_VP8_DEC_H_ */
--- a/media/libwebp/src/dec/vp8i_dec.h
+++ b/media/libwebp/src/dec/vp8i_dec.h
@ -0,0 +1,319 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// VP8 decoder: internal header.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DEC_VP8I_DEC_H_
+#define WEBP_DEC_VP8I_DEC_H_
+
+#include <string.h>     // for memcpy()
+#include "src/dec/common_dec.h"
+#include "src/dec/vp8li_dec.h"
+#include "src/utils/bit_reader_utils.h"
+#include "src/utils/random_utils.h"
+#include "src/utils/thread_utils.h"
+#include "src/dsp/dsp.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Various defines and enums
+
+// version numbers
+#define DEC_MAJ_VERSION 1
+#define DEC_MIN_VERSION 0
+#define DEC_REV_VERSION 0
+
+// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
+// Constraints are: We need to store one 16x16 block of luma samples (y),
+// and two 8x8 chroma blocks (u/v). These are better be 16-bytes aligned,
+// in order to be SIMD-friendly. We also need to store the top, left and
+// top-left samples (from previously decoded blocks), along with four
+// extra top-right samples for luma (intra4x4 prediction only).
+// One possible layout is, using 32 * (17 + 9) bytes:
+//
+//   .+------   <- only 1 pixel high
+//   .|yyyyt.
+//   .|yyyyt.
+//   .|yyyyt.
+//   .|yyyy..
+//   .+--.+--   <- only 1 pixel high
+//   .|uu.|vv
+//   .|uu.|vv
+//
+// Every character is a 4x4 block, with legend:
+//  '.' = unused
+//  'y' = y-samples   'u' = u-samples     'v' = u-samples
+//  '|' = left sample,   '-' = top sample,    '+' = top-left sample
+//  't' = extra top-right sample for 4x4 modes
+#define YUV_SIZE (BPS * 17 + BPS * 9)
+#define Y_OFF    (BPS * 1 + 8)
+#define U_OFF    (Y_OFF + BPS * 16 + BPS)
+#define V_OFF    (U_OFF + 16)
+
+// minimal width under which lossy multi-threading is always disabled
+#define MIN_WIDTH_FOR_THREADS 512
+
+//------------------------------------------------------------------------------
+// Headers
+
+typedef struct {
+  uint8_t key_frame_;
+  uint8_t profile_;
+  uint8_t show_;
+  uint32_t partition_length_;
+} VP8FrameHeader;
+
+typedef struct {
+  uint16_t width_;
+  uint16_t height_;
+  uint8_t xscale_;
+  uint8_t yscale_;
+  uint8_t colorspace_;   // 0 = YCbCr
+  uint8_t clamp_type_;
+} VP8PictureHeader;
+
+// segment features
+typedef struct {
+  int use_segment_;
+  int update_map_;        // whether to update the segment map or not
+  int absolute_delta_;    // absolute or delta values for quantizer and filter
+  int8_t quantizer_[NUM_MB_SEGMENTS];        // quantization changes
+  int8_t filter_strength_[NUM_MB_SEGMENTS];  // filter strength for segments
+} VP8SegmentHeader;
+
+// probas associated to one of the contexts
+typedef uint8_t VP8ProbaArray[NUM_PROBAS];
+
+typedef struct {   // all the probas associated to one band
+  VP8ProbaArray probas_[NUM_CTX];
+} VP8BandProbas;
+
+// Struct collecting all frame-persistent probabilities.
+typedef struct {
+  uint8_t segments_[MB_FEATURE_TREE_PROBS];
+  // Type: 0:Intra16-AC  1:Intra16-DC   2:Chroma   3:Intra4
+  VP8BandProbas bands_[NUM_TYPES][NUM_BANDS];
+  const VP8BandProbas* bands_ptr_[NUM_TYPES][16 + 1];
+} VP8Proba;
+
+// Filter parameters
+typedef struct {
+  int simple_;                  // 0=complex, 1=simple
+  int level_;                   // [0..63]
+  int sharpness_;               // [0..7]
+  int use_lf_delta_;
+  int ref_lf_delta_[NUM_REF_LF_DELTAS];
+  int mode_lf_delta_[NUM_MODE_LF_DELTAS];
+} VP8FilterHeader;
+
+//------------------------------------------------------------------------------
+// Informations about the macroblocks.
+
+typedef struct {  // filter specs
+  uint8_t f_limit_;      // filter limit in [3..189], or 0 if no filtering
+  uint8_t f_ilevel_;     // inner limit in [1..63]
+  uint8_t f_inner_;      // do inner filtering?
+  uint8_t hev_thresh_;   // high edge variance threshold in [0..2]
+} VP8FInfo;
+
+typedef struct {  // Top/Left Contexts used for syntax-parsing
+  uint8_t nz_;        // non-zero AC/DC coeffs (4bit for luma + 4bit for chroma)
+  uint8_t nz_dc_;     // non-zero DC coeff (1bit)
+} VP8MB;
+
+// Dequantization matrices
+typedef int quant_t[2];      // [DC / AC].  Can be 'uint16_t[2]' too (~slower).
+typedef struct {
+  quant_t y1_mat_, y2_mat_, uv_mat_;
+
+  int uv_quant_;   // U/V quantizer value
+  int dither_;     // dithering amplitude (0 = off, max=255)
+} VP8QuantMatrix;
+
+// Data needed to reconstruct a macroblock
+typedef struct {
+  int16_t coeffs_[384];   // 384 coeffs = (16+4+4) * 4*4
+  uint8_t is_i4x4_;       // true if intra4x4
+  uint8_t imodes_[16];    // one 16x16 mode (#0) or sixteen 4x4 modes
+  uint8_t uvmode_;        // chroma prediction mode
+  // bit-wise info about the content of each sub-4x4 blocks (in decoding order).
+  // Each of the 4x4 blocks for y/u/v is associated with a 2b code according to:
+  //   code=0 -> no coefficient
+  //   code=1 -> only DC
+  //   code=2 -> first three coefficients are non-zero
+  //   code=3 -> more than three coefficients are non-zero
+  // This allows to call specialized transform functions.
+  uint32_t non_zero_y_;
+  uint32_t non_zero_uv_;
+  uint8_t dither_;      // local dithering strength (deduced from non_zero_*)
+  uint8_t skip_;
+  uint8_t segment_;
+} VP8MBData;
+
+// Persistent information needed by the parallel processing
+typedef struct {
+  int id_;              // cache row to process (in [0..2])
+  int mb_y_;            // macroblock position of the row
+  int filter_row_;      // true if row-filtering is needed
+  VP8FInfo* f_info_;    // filter strengths (swapped with dec->f_info_)
+  VP8MBData* mb_data_;  // reconstruction data (swapped with dec->mb_data_)
+  VP8Io io_;            // copy of the VP8Io to pass to put()
+} VP8ThreadContext;
+
+// Saved top samples, per macroblock. Fits into a cache-line.
+typedef struct {
+  uint8_t y[16], u[8], v[8];
+} VP8TopSamples;
+
+//------------------------------------------------------------------------------
+// VP8Decoder: the main opaque structure handed over to user
+
+struct VP8Decoder {
+  VP8StatusCode status_;
+  int ready_;     // true if ready to decode a picture with VP8Decode()
+  const char* error_msg_;  // set when status_ is not OK.
+
+  // Main data source
+  VP8BitReader br_;
+
+  // headers
+  VP8FrameHeader   frm_hdr_;
+  VP8PictureHeader pic_hdr_;
+  VP8FilterHeader  filter_hdr_;
+  VP8SegmentHeader segment_hdr_;
+
+  // Worker
+  WebPWorker worker_;
+  int mt_method_;      // multi-thread method: 0=off, 1=[parse+recon][filter]
+                       // 2=[parse][recon+filter]
+  int cache_id_;       // current cache row
+  int num_caches_;     // number of cached rows of 16 pixels (1, 2 or 3)
+  VP8ThreadContext thread_ctx_;  // Thread context
+
+  // dimension, in macroblock units.
+  int mb_w_, mb_h_;
+
+  // Macroblock to process/filter, depending on cropping and filter_type.
+  int tl_mb_x_, tl_mb_y_;  // top-left MB that must be in-loop filtered
+  int br_mb_x_, br_mb_y_;  // last bottom-right MB that must be decoded
+
+  // number of partitions minus one.
+  uint32_t num_parts_minus_one_;
+  // per-partition boolean decoders.
+  VP8BitReader parts_[MAX_NUM_PARTITIONS];
+
+  // Dithering strength, deduced from decoding options
+  int dither_;                // whether to use dithering or not
+  VP8Random dithering_rg_;    // random generator for dithering
+
+  // dequantization (one set of DC/AC dequant factor per segment)
+  VP8QuantMatrix dqm_[NUM_MB_SEGMENTS];
+
+  // probabilities
+  VP8Proba proba_;
+  int use_skip_proba_;
+  uint8_t skip_p_;
+
+  // Boundary data cache and persistent buffers.
+  uint8_t* intra_t_;      // top intra modes values: 4 * mb_w_
+  uint8_t  intra_l_[4];   // left intra modes values
+
+  VP8TopSamples* yuv_t_;  // top y/u/v samples
+
+  VP8MB* mb_info_;        // contextual macroblock info (mb_w_ + 1)
+  VP8FInfo* f_info_;      // filter strength info
+  uint8_t* yuv_b_;        // main block for Y/U/V (size = YUV_SIZE)
+
+  uint8_t* cache_y_;      // macroblock row for storing unfiltered samples
+  uint8_t* cache_u_;
+  uint8_t* cache_v_;
+  int cache_y_stride_;
+  int cache_uv_stride_;
+
+  // main memory chunk for the above data. Persistent.
+  void* mem_;
+  size_t mem_size_;
+
+  // Per macroblock non-persistent infos.
+  int mb_x_, mb_y_;       // current position, in macroblock units
+  VP8MBData* mb_data_;    // parsed reconstruction data
+
+  // Filtering side-info
+  int filter_type_;                          // 0=off, 1=simple, 2=complex
+  VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2];  // precalculated per-segment/type
+
+  // Alpha
+  struct ALPHDecoder* alph_dec_;  // alpha-plane decoder object
+  const uint8_t* alpha_data_;     // compressed alpha data (if present)
+  size_t alpha_data_size_;
+  int is_alpha_decoded_;      // true if alpha_data_ is decoded in alpha_plane_
+  uint8_t* alpha_plane_mem_;  // memory allocated for alpha_plane_
+  uint8_t* alpha_plane_;      // output. Persistent, contains the whole data.
+  const uint8_t* alpha_prev_line_;  // last decoded alpha row (or NULL)
+  int alpha_dithering_;       // derived from decoding options (0=off, 100=full)
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+// in vp8.c
+int VP8SetError(VP8Decoder* const dec,
+                VP8StatusCode error, const char* const msg);
+
+// in tree.c
+void VP8ResetProba(VP8Proba* const proba);
+void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec);
+// parses one row of intra mode data in partition 0, returns !eof
+int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec);
+
+// in quant.c
+void VP8ParseQuant(VP8Decoder* const dec);
+
+// in frame.c
+int VP8InitFrame(VP8Decoder* const dec, VP8Io* const io);
+// Call io->setup() and finish setting up scan parameters.
+// After this call returns, one must always call VP8ExitCritical() with the
+// same parameters. Both functions should be used in pair. Returns VP8_STATUS_OK
+// if ok, otherwise sets and returns the error status on *dec.
+VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
+// Must always be called in pair with VP8EnterCritical().
+// Returns false in case of error.
+int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
+// Return the multi-threading method to use (0=off), depending
+// on options and bitstream size. Only for lossy decoding.
+int VP8GetThreadMethod(const WebPDecoderOptions* const options,
+                       const WebPHeaderStructure* const headers,
+                       int width, int height);
+// Initialize dithering post-process if needed.
+void VP8InitDithering(const WebPDecoderOptions* const options,
+                      VP8Decoder* const dec);
+// Process the last decoded row (filtering + output).
+int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
+// To be called at the start of a new scanline, to initialize predictors.
+void VP8InitScanline(VP8Decoder* const dec);
+// Decode one macroblock. Returns false if there is not enough data.
+int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br);
+
+// in alpha.c
+const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
+                                      const VP8Io* const io,
+                                      int row, int num_rows);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_VP8I_DEC_H_ */
--- a/media/libwebp/src/dec/vp8l_dec.c
+++ b/media/libwebp/src/dec/vp8l_dec.c
--- a/media/libwebp/src/dec/vp8li_dec.h
+++ b/media/libwebp/src/dec/vp8li_dec.h
@ -0,0 +1,135 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Lossless decoder: internal header.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+//         Vikas Arora(vikaas.arora@gmail.com)
+
+#ifndef WEBP_DEC_VP8LI_DEC_H_
+#define WEBP_DEC_VP8LI_DEC_H_
+
+#include <string.h>     // for memcpy()
+#include "src/dec/webpi_dec.h"
+#include "src/utils/bit_reader_utils.h"
+#include "src/utils/color_cache_utils.h"
+#include "src/utils/huffman_utils.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+  READ_DATA = 0,
+  READ_HDR = 1,
+  READ_DIM = 2
+} VP8LDecodeState;
+
+typedef struct VP8LTransform VP8LTransform;
+struct VP8LTransform {
+  VP8LImageTransformType type_;   // transform type.
+  int                    bits_;   // subsampling bits defining transform window.
+  int                    xsize_;  // transform window X index.
+  int                    ysize_;  // transform window Y index.
+  uint32_t              *data_;   // transform data.
+};
+
+typedef struct {
+  int             color_cache_size_;
+  VP8LColorCache  color_cache_;
+  VP8LColorCache  saved_color_cache_;  // for incremental
+
+  int             huffman_mask_;
+  int             huffman_subsample_bits_;
+  int             huffman_xsize_;
+  uint32_t       *huffman_image_;
+  int             num_htree_groups_;
+  HTreeGroup     *htree_groups_;
+  HuffmanCode    *huffman_tables_;
+} VP8LMetadata;
+
+typedef struct VP8LDecoder VP8LDecoder;
+struct VP8LDecoder {
+  VP8StatusCode    status_;
+  VP8LDecodeState  state_;
+  VP8Io           *io_;
+
+  const WebPDecBuffer *output_;    // shortcut to io->opaque->output
+
+  uint32_t        *pixels_;        // Internal data: either uint8_t* for alpha
+                                   // or uint32_t* for BGRA.
+  uint32_t        *argb_cache_;    // Scratch buffer for temporary BGRA storage.
+
+  VP8LBitReader    br_;
+  int              incremental_;   // if true, incremental decoding is expected
+  VP8LBitReader    saved_br_;      // note: could be local variables too
+  int              saved_last_pixel_;
+
+  int              width_;
+  int              height_;
+  int              last_row_;      // last input row decoded so far.
+  int              last_pixel_;    // last pixel decoded so far. However, it may
+                                   // not be transformed, scaled and
+                                   // color-converted yet.
+  int              last_out_row_;  // last row output so far.
+
+  VP8LMetadata     hdr_;
+
+  int              next_transform_;
+  VP8LTransform    transforms_[NUM_TRANSFORMS];
+  // or'd bitset storing the transforms types.
+  uint32_t         transforms_seen_;
+
+  uint8_t         *rescaler_memory;  // Working memory for rescaling work.
+  WebPRescaler    *rescaler;         // Common rescaler for all channels.
+};
+
+//------------------------------------------------------------------------------
+// internal functions. Not public.
+
+struct ALPHDecoder;  // Defined in dec/alphai.h.
+
+// in vp8l.c
+
+// Decodes image header for alpha data stored using lossless compression.
+// Returns false in case of error.
+int VP8LDecodeAlphaHeader(struct ALPHDecoder* const alph_dec,
+                          const uint8_t* const data, size_t data_size);
+
+// Decodes *at least* 'last_row' rows of alpha. If some of the initial rows are
+// already decoded in previous call(s), it will resume decoding from where it
+// was paused.
+// Returns false in case of bitstream error.
+int VP8LDecodeAlphaImageStream(struct ALPHDecoder* const alph_dec,
+                               int last_row);
+
+// Allocates and initialize a new lossless decoder instance.
+VP8LDecoder* VP8LNew(void);
+
+// Decodes the image header. Returns false in case of error.
+int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io);
+
+// Decodes an image. It's required to decode the lossless header before calling
+// this function. Returns false in case of error, with updated dec->status_.
+int VP8LDecodeImage(VP8LDecoder* const dec);
+
+// Resets the decoder in its initial state, reclaiming memory.
+// Preserves the dec->status_ value.
+void VP8LClear(VP8LDecoder* const dec);
+
+// Clears and deallocate a lossless decoder instance.
+void VP8LDelete(VP8LDecoder* const dec);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_VP8LI_DEC_H_ */
--- a/media/libwebp/src/dec/webp_dec.c
+++ b/media/libwebp/src/dec/webp_dec.c
@ -0,0 +1,845 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Main decoding functions for WEBP images.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <stdlib.h>
+
+#include "src/dec/vp8i_dec.h"
+#include "src/dec/vp8li_dec.h"
+#include "src/dec/webpi_dec.h"
+#include "src/utils/utils.h"
+#include "src/webp/mux_types.h"  // ALPHA_FLAG
+
+//------------------------------------------------------------------------------
+// RIFF layout is:
+//   Offset  tag
+//   0...3   "RIFF" 4-byte tag
+//   4...7   size of image data (including metadata) starting at offset 8
+//   8...11  "WEBP"   our form-type signature
+// The RIFF container (12 bytes) is followed by appropriate chunks:
+//   12..15  "VP8 ": 4-bytes tags, signaling the use of VP8 video format
+//   16..19  size of the raw VP8 image data, starting at offset 20
+//   20....  the VP8 bytes
+// Or,
+//   12..15  "VP8L": 4-bytes tags, signaling the use of VP8L lossless format
+//   16..19  size of the raw VP8L image data, starting at offset 20
+//   20....  the VP8L bytes
+// Or,
+//   12..15  "VP8X": 4-bytes tags, describing the extended-VP8 chunk.
+//   16..19  size of the VP8X chunk starting at offset 20.
+//   20..23  VP8X flags bit-map corresponding to the chunk-types present.
+//   24..26  Width of the Canvas Image.
+//   27..29  Height of the Canvas Image.
+// There can be extra chunks after the "VP8X" chunk (ICCP, ANMF, VP8, VP8L,
+// XMP, EXIF  ...)
+// All sizes are in little-endian order.
+// Note: chunk data size must be padded to multiple of 2 when written.
+
+// Validates the RIFF container (if detected) and skips over it.
+// If a RIFF container is detected, returns:
+//     VP8_STATUS_BITSTREAM_ERROR for invalid header,
+//     VP8_STATUS_NOT_ENOUGH_DATA for truncated data if have_all_data is true,
+// and VP8_STATUS_OK otherwise.
+// In case there are not enough bytes (partial RIFF container), return 0 for
+// *riff_size. Else return the RIFF size extracted from the header.
+static VP8StatusCode ParseRIFF(const uint8_t** const data,
+                               size_t* const data_size, int have_all_data,
+                               size_t* const riff_size) {
+  assert(data != NULL);
+  assert(data_size != NULL);
+  assert(riff_size != NULL);
+
+  *riff_size = 0;  // Default: no RIFF present.
+  if (*data_size >= RIFF_HEADER_SIZE && !memcmp(*data, "RIFF", TAG_SIZE)) {
+    if (memcmp(*data + 8, "WEBP", TAG_SIZE)) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // Wrong image file signature.
+    } else {
+      const uint32_t size = GetLE32(*data + TAG_SIZE);
+      // Check that we have at least one chunk (i.e "WEBP" + "VP8?nnnn").
+      if (size < TAG_SIZE + CHUNK_HEADER_SIZE) {
+        return VP8_STATUS_BITSTREAM_ERROR;
+      }
+      if (size > MAX_CHUNK_PAYLOAD) {
+        return VP8_STATUS_BITSTREAM_ERROR;
+      }
+      if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) {
+        return VP8_STATUS_NOT_ENOUGH_DATA;  // Truncated bitstream.
+      }
+      // We have a RIFF container. Skip it.
+      *riff_size = size;
+      *data += RIFF_HEADER_SIZE;
+      *data_size -= RIFF_HEADER_SIZE;
+    }
+  }
+  return VP8_STATUS_OK;
+}
+
+// Validates the VP8X header and skips over it.
+// Returns VP8_STATUS_BITSTREAM_ERROR for invalid VP8X header,
+//         VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+//         VP8_STATUS_OK otherwise.
+// If a VP8X chunk is found, found_vp8x is set to true and *width_ptr,
+// *height_ptr and *flags_ptr are set to the corresponding values extracted
+// from the VP8X chunk.
+static VP8StatusCode ParseVP8X(const uint8_t** const data,
+                               size_t* const data_size,
+                               int* const found_vp8x,
+                               int* const width_ptr, int* const height_ptr,
+                               uint32_t* const flags_ptr) {
+  const uint32_t vp8x_size = CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE;
+  assert(data != NULL);
+  assert(data_size != NULL);
+  assert(found_vp8x != NULL);
+
+  *found_vp8x = 0;
+
+  if (*data_size < CHUNK_HEADER_SIZE) {
+    return VP8_STATUS_NOT_ENOUGH_DATA;  // Insufficient data.
+  }
+
+  if (!memcmp(*data, "VP8X", TAG_SIZE)) {
+    int width, height;
+    uint32_t flags;
+    const uint32_t chunk_size = GetLE32(*data + TAG_SIZE);
+    if (chunk_size != VP8X_CHUNK_SIZE) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // Wrong chunk size.
+    }
+
+    // Verify if enough data is available to validate the VP8X chunk.
+    if (*data_size < vp8x_size) {
+      return VP8_STATUS_NOT_ENOUGH_DATA;  // Insufficient data.
+    }
+    flags = GetLE32(*data + 8);
+    width = 1 + GetLE24(*data + 12);
+    height = 1 + GetLE24(*data + 15);
+    if (width * (uint64_t)height >= MAX_IMAGE_AREA) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // image is too large
+    }
+
+    if (flags_ptr != NULL) *flags_ptr = flags;
+    if (width_ptr != NULL) *width_ptr = width;
+    if (height_ptr != NULL) *height_ptr = height;
+    // Skip over VP8X header bytes.
+    *data += vp8x_size;
+    *data_size -= vp8x_size;
+    *found_vp8x = 1;
+  }
+  return VP8_STATUS_OK;
+}
+
+// Skips to the next VP8/VP8L chunk header in the data given the size of the
+// RIFF chunk 'riff_size'.
+// Returns VP8_STATUS_BITSTREAM_ERROR if any invalid chunk size is encountered,
+//         VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+//         VP8_STATUS_OK otherwise.
+// If an alpha chunk is found, *alpha_data and *alpha_size are set
+// appropriately.
+static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
+                                         size_t* const data_size,
+                                         size_t const riff_size,
+                                         const uint8_t** const alpha_data,
+                                         size_t* const alpha_size) {
+  const uint8_t* buf;
+  size_t buf_size;
+  uint32_t total_size = TAG_SIZE +           // "WEBP".
+                        CHUNK_HEADER_SIZE +  // "VP8Xnnnn".
+                        VP8X_CHUNK_SIZE;     // data.
+  assert(data != NULL);
+  assert(data_size != NULL);
+  buf = *data;
+  buf_size = *data_size;
+
+  assert(alpha_data != NULL);
+  assert(alpha_size != NULL);
+  *alpha_data = NULL;
+  *alpha_size = 0;
+
+  while (1) {
+    uint32_t chunk_size;
+    uint32_t disk_chunk_size;   // chunk_size with padding
+
+    *data = buf;
+    *data_size = buf_size;
+
+    if (buf_size < CHUNK_HEADER_SIZE) {  // Insufficient data.
+      return VP8_STATUS_NOT_ENOUGH_DATA;
+    }
+
+    chunk_size = GetLE32(buf + TAG_SIZE);
+    if (chunk_size > MAX_CHUNK_PAYLOAD) {
+      return VP8_STATUS_BITSTREAM_ERROR;          // Not a valid chunk size.
+    }
+    // For odd-sized chunk-payload, there's one byte padding at the end.
+    disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1;
+    total_size += disk_chunk_size;
+
+    // Check that total bytes skipped so far does not exceed riff_size.
+    if (riff_size > 0 && (total_size > riff_size)) {
+      return VP8_STATUS_BITSTREAM_ERROR;          // Not a valid chunk size.
+    }
+
+    // Start of a (possibly incomplete) VP8/VP8L chunk implies that we have
+    // parsed all the optional chunks.
+    // Note: This check must occur before the check 'buf_size < disk_chunk_size'
+    // below to allow incomplete VP8/VP8L chunks.
+    if (!memcmp(buf, "VP8 ", TAG_SIZE) ||
+        !memcmp(buf, "VP8L", TAG_SIZE)) {
+      return VP8_STATUS_OK;
+    }
+
+    if (buf_size < disk_chunk_size) {             // Insufficient data.
+      return VP8_STATUS_NOT_ENOUGH_DATA;
+    }
+
+    if (!memcmp(buf, "ALPH", TAG_SIZE)) {         // A valid ALPH header.
+      *alpha_data = buf + CHUNK_HEADER_SIZE;
+      *alpha_size = chunk_size;
+    }
+
+    // We have a full and valid chunk; skip it.
+    buf += disk_chunk_size;
+    buf_size -= disk_chunk_size;
+  }
+}
+
+// Validates the VP8/VP8L Header ("VP8 nnnn" or "VP8L nnnn") and skips over it.
+// Returns VP8_STATUS_BITSTREAM_ERROR for invalid (chunk larger than
+//         riff_size) VP8/VP8L header,
+//         VP8_STATUS_NOT_ENOUGH_DATA in case of insufficient data, and
+//         VP8_STATUS_OK otherwise.
+// If a VP8/VP8L chunk is found, *chunk_size is set to the total number of bytes
+// extracted from the VP8/VP8L chunk header.
+// The flag '*is_lossless' is set to 1 in case of VP8L chunk / raw VP8L data.
+static VP8StatusCode ParseVP8Header(const uint8_t** const data_ptr,
+                                    size_t* const data_size, int have_all_data,
+                                    size_t riff_size, size_t* const chunk_size,
+                                    int* const is_lossless) {
+  const uint8_t* const data = *data_ptr;
+  const int is_vp8 = !memcmp(data, "VP8 ", TAG_SIZE);
+  const int is_vp8l = !memcmp(data, "VP8L", TAG_SIZE);
+  const uint32_t minimal_size =
+      TAG_SIZE + CHUNK_HEADER_SIZE;  // "WEBP" + "VP8 nnnn" OR
+                                     // "WEBP" + "VP8Lnnnn"
+  assert(data != NULL);
+  assert(data_size != NULL);
+  assert(chunk_size != NULL);
+  assert(is_lossless != NULL);
+
+  if (*data_size < CHUNK_HEADER_SIZE) {
+    return VP8_STATUS_NOT_ENOUGH_DATA;  // Insufficient data.
+  }
+
+  if (is_vp8 || is_vp8l) {
+    // Bitstream contains VP8/VP8L header.
+    const uint32_t size = GetLE32(data + TAG_SIZE);
+    if ((riff_size >= minimal_size) && (size > riff_size - minimal_size)) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // Inconsistent size information.
+    }
+    if (have_all_data && (size > *data_size - CHUNK_HEADER_SIZE)) {
+      return VP8_STATUS_NOT_ENOUGH_DATA;  // Truncated bitstream.
+    }
+    // Skip over CHUNK_HEADER_SIZE bytes from VP8/VP8L Header.
+    *chunk_size = size;
+    *data_ptr += CHUNK_HEADER_SIZE;
+    *data_size -= CHUNK_HEADER_SIZE;
+    *is_lossless = is_vp8l;
+  } else {
+    // Raw VP8/VP8L bitstream (no header).
+    *is_lossless = VP8LCheckSignature(data, *data_size);
+    *chunk_size = *data_size;
+  }
+
+  return VP8_STATUS_OK;
+}
+
+//------------------------------------------------------------------------------
+
+// Fetch '*width', '*height', '*has_alpha' and fill out 'headers' based on
+// 'data'. All the output parameters may be NULL. If 'headers' is NULL only the
+// minimal amount will be read to fetch the remaining parameters.
+// If 'headers' is non-NULL this function will attempt to locate both alpha
+// data (with or without a VP8X chunk) and the bitstream chunk (VP8/VP8L).
+// Note: The following chunk sequences (before the raw VP8/VP8L data) are
+// considered valid by this function:
+// RIFF + VP8(L)
+// RIFF + VP8X + (optional chunks) + VP8(L)
+// ALPH + VP8 <-- Not a valid WebP format: only allowed for internal purpose.
+// VP8(L)     <-- Not a valid WebP format: only allowed for internal purpose.
+static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
+                                          size_t data_size,
+                                          int* const width,
+                                          int* const height,
+                                          int* const has_alpha,
+                                          int* const has_animation,
+                                          int* const format,
+                                          WebPHeaderStructure* const headers) {
+  int canvas_width = 0;
+  int canvas_height = 0;
+  int image_width = 0;
+  int image_height = 0;
+  int found_riff = 0;
+  int found_vp8x = 0;
+  int animation_present = 0;
+  const int have_all_data = (headers != NULL) ? headers->have_all_data : 0;
+
+  VP8StatusCode status;
+  WebPHeaderStructure hdrs;
+
+  if (data == NULL || data_size < RIFF_HEADER_SIZE) {
+    return VP8_STATUS_NOT_ENOUGH_DATA;
+  }
+  memset(&hdrs, 0, sizeof(hdrs));
+  hdrs.data = data;
+  hdrs.data_size = data_size;
+
+  // Skip over RIFF header.
+  status = ParseRIFF(&data, &data_size, have_all_data, &hdrs.riff_size);
+  if (status != VP8_STATUS_OK) {
+    return status;   // Wrong RIFF header / insufficient data.
+  }
+  found_riff = (hdrs.riff_size > 0);
+
+  // Skip over VP8X.
+  {
+    uint32_t flags = 0;
+    status = ParseVP8X(&data, &data_size, &found_vp8x,
+                       &canvas_width, &canvas_height, &flags);
+    if (status != VP8_STATUS_OK) {
+      return status;  // Wrong VP8X / insufficient data.
+    }
+    animation_present = !!(flags & ANIMATION_FLAG);
+    if (!found_riff && found_vp8x) {
+      // Note: This restriction may be removed in the future, if it becomes
+      // necessary to send VP8X chunk to the decoder.
+      return VP8_STATUS_BITSTREAM_ERROR;
+    }
+    if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG);
+    if (has_animation != NULL) *has_animation = animation_present;
+    if (format != NULL) *format = 0;   // default = undefined
+
+    image_width = canvas_width;
+    image_height = canvas_height;
+    if (found_vp8x && animation_present && headers == NULL) {
+      status = VP8_STATUS_OK;
+      goto ReturnWidthHeight;  // Just return features from VP8X header.
+    }
+  }
+
+  if (data_size < TAG_SIZE) {
+    status = VP8_STATUS_NOT_ENOUGH_DATA;
+    goto ReturnWidthHeight;
+  }
+
+  // Skip over optional chunks if data started with "RIFF + VP8X" or "ALPH".
+  if ((found_riff && found_vp8x) ||
+      (!found_riff && !found_vp8x && !memcmp(data, "ALPH", TAG_SIZE))) {
+    status = ParseOptionalChunks(&data, &data_size, hdrs.riff_size,
+                                 &hdrs.alpha_data, &hdrs.alpha_data_size);
+    if (status != VP8_STATUS_OK) {
+      goto ReturnWidthHeight;  // Invalid chunk size / insufficient data.
+    }
+  }
+
+  // Skip over VP8/VP8L header.
+  status = ParseVP8Header(&data, &data_size, have_all_data, hdrs.riff_size,
+                          &hdrs.compressed_size, &hdrs.is_lossless);
+  if (status != VP8_STATUS_OK) {
+    goto ReturnWidthHeight;  // Wrong VP8/VP8L chunk-header / insufficient data.
+  }
+  if (hdrs.compressed_size > MAX_CHUNK_PAYLOAD) {
+    return VP8_STATUS_BITSTREAM_ERROR;
+  }
+
+  if (format != NULL && !animation_present) {
+    *format = hdrs.is_lossless ? 2 : 1;
+  }
+
+  if (!hdrs.is_lossless) {
+    if (data_size < VP8_FRAME_HEADER_SIZE) {
+      status = VP8_STATUS_NOT_ENOUGH_DATA;
+      goto ReturnWidthHeight;
+    }
+    // Validates raw VP8 data.
+    if (!VP8GetInfo(data, data_size, (uint32_t)hdrs.compressed_size,
+                    &image_width, &image_height)) {
+      return VP8_STATUS_BITSTREAM_ERROR;
+    }
+  } else {
+    if (data_size < VP8L_FRAME_HEADER_SIZE) {
+      status = VP8_STATUS_NOT_ENOUGH_DATA;
+      goto ReturnWidthHeight;
+    }
+    // Validates raw VP8L data.
+    if (!VP8LGetInfo(data, data_size, &image_width, &image_height, has_alpha)) {
+      return VP8_STATUS_BITSTREAM_ERROR;
+    }
+  }
+  // Validates image size coherency.
+  if (found_vp8x) {
+    if (canvas_width != image_width || canvas_height != image_height) {
+      return VP8_STATUS_BITSTREAM_ERROR;
+    }
+  }
+  if (headers != NULL) {
+    *headers = hdrs;
+    headers->offset = data - headers->data;
+    assert((uint64_t)(data - headers->data) < MAX_CHUNK_PAYLOAD);
+    assert(headers->offset == headers->data_size - data_size);
+  }
+ ReturnWidthHeight:
+  if (status == VP8_STATUS_OK ||
+      (status == VP8_STATUS_NOT_ENOUGH_DATA && found_vp8x && headers == NULL)) {
+    if (has_alpha != NULL) {
+      // If the data did not contain a VP8X/VP8L chunk the only definitive way
+      // to set this is by looking for alpha data (from an ALPH chunk).
+      *has_alpha |= (hdrs.alpha_data != NULL);
+    }
+    if (width != NULL) *width = image_width;
+    if (height != NULL) *height = image_height;
+    return VP8_STATUS_OK;
+  } else {
+    return status;
+  }
+}
+
+VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
+  // status is marked volatile as a workaround for a clang-3.8 (aarch64) bug
+  volatile VP8StatusCode status;
+  int has_animation = 0;
+  assert(headers != NULL);
+  // fill out headers, ignore width/height/has_alpha.
+  status = ParseHeadersInternal(headers->data, headers->data_size,
+                                NULL, NULL, NULL, &has_animation,
+                                NULL, headers);
+  if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
+    // The WebPDemux API + libwebp can be used to decode individual
+    // uncomposited frames or the WebPAnimDecoder can be used to fully
+    // reconstruct them (see webp/demux.h).
+    if (has_animation) {
+      status = VP8_STATUS_UNSUPPORTED_FEATURE;
+    }
+  }
+  return status;
+}
+
+//------------------------------------------------------------------------------
+// WebPDecParams
+
+void WebPResetDecParams(WebPDecParams* const params) {
+  if (params != NULL) {
+    memset(params, 0, sizeof(*params));
+  }
+}
+
+//------------------------------------------------------------------------------
+// "Into" decoding variants
+
+// Main flow
+static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
+                                WebPDecParams* const params) {
+  VP8StatusCode status;
+  VP8Io io;
+  WebPHeaderStructure headers;
+
+  headers.data = data;
+  headers.data_size = data_size;
+  headers.have_all_data = 1;
+  status = WebPParseHeaders(&headers);   // Process Pre-VP8 chunks.
+  if (status != VP8_STATUS_OK) {
+    return status;
+  }
+
+  assert(params != NULL);
+  VP8InitIo(&io);
+  io.data = headers.data + headers.offset;
+  io.data_size = headers.data_size - headers.offset;
+  WebPInitCustomIo(params, &io);  // Plug the I/O functions.
+
+  if (!headers.is_lossless) {
+    VP8Decoder* const dec = VP8New();
+    if (dec == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    dec->alpha_data_ = headers.alpha_data;
+    dec->alpha_data_size_ = headers.alpha_data_size;
+
+    // Decode bitstream header, update io->width/io->height.
+    if (!VP8GetHeaders(dec, &io)) {
+      status = dec->status_;   // An error occurred. Grab error status.
+    } else {
+      // Allocate/check output buffers.
+      status = WebPAllocateDecBuffer(io.width, io.height, params->options,
+                                     params->output);
+      if (status == VP8_STATUS_OK) {  // Decode
+        // This change must be done before calling VP8Decode()
+        dec->mt_method_ = VP8GetThreadMethod(params->options, &headers,
+                                             io.width, io.height);
+        VP8InitDithering(params->options, dec);
+        if (!VP8Decode(dec, &io)) {
+          status = dec->status_;
+        }
+      }
+    }
+    VP8Delete(dec);
+  } else {
+    VP8LDecoder* const dec = VP8LNew();
+    if (dec == NULL) {
+      return VP8_STATUS_OUT_OF_MEMORY;
+    }
+    if (!VP8LDecodeHeader(dec, &io)) {
+      status = dec->status_;   // An error occurred. Grab error status.
+    } else {
+      // Allocate/check output buffers.
+      status = WebPAllocateDecBuffer(io.width, io.height, params->options,
+                                     params->output);
+      if (status == VP8_STATUS_OK) {  // Decode
+        if (!VP8LDecodeImage(dec)) {
+          status = dec->status_;
+        }
+      }
+    }
+    VP8LDelete(dec);
+  }
+
+  if (status != VP8_STATUS_OK) {
+    WebPFreeDecBuffer(params->output);
+  } else {
+    if (params->options != NULL && params->options->flip) {
+      // This restores the original stride values if options->flip was used
+      // during the call to WebPAllocateDecBuffer above.
+      status = WebPFlipBuffer(params->output);
+    }
+  }
+  return status;
+}
+
+// Helpers
+static uint8_t* DecodeIntoRGBABuffer(WEBP_CSP_MODE colorspace,
+                                     const uint8_t* const data,
+                                     size_t data_size,
+                                     uint8_t* const rgba,
+                                     int stride, size_t size) {
+  WebPDecParams params;
+  WebPDecBuffer buf;
+  if (rgba == NULL) {
+    return NULL;
+  }
+  WebPInitDecBuffer(&buf);
+  WebPResetDecParams(&params);
+  params.output = &buf;
+  buf.colorspace    = colorspace;
+  buf.u.RGBA.rgba   = rgba;
+  buf.u.RGBA.stride = stride;
+  buf.u.RGBA.size   = size;
+  buf.is_external_memory = 1;
+  if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
+    return NULL;
+  }
+  return rgba;
+}
+
+uint8_t* WebPDecodeRGBInto(const uint8_t* data, size_t data_size,
+                           uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_RGB, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeRGBAInto(const uint8_t* data, size_t data_size,
+                            uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_RGBA, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeARGBInto(const uint8_t* data, size_t data_size,
+                            uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_ARGB, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeBGRInto(const uint8_t* data, size_t data_size,
+                           uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_BGR, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeBGRAInto(const uint8_t* data, size_t data_size,
+                            uint8_t* output, size_t size, int stride) {
+  return DecodeIntoRGBABuffer(MODE_BGRA, data, data_size, output, stride, size);
+}
+
+uint8_t* WebPDecodeYUVInto(const uint8_t* data, size_t data_size,
+                           uint8_t* luma, size_t luma_size, int luma_stride,
+                           uint8_t* u, size_t u_size, int u_stride,
+                           uint8_t* v, size_t v_size, int v_stride) {
+  WebPDecParams params;
+  WebPDecBuffer output;
+  if (luma == NULL) return NULL;
+  WebPInitDecBuffer(&output);
+  WebPResetDecParams(&params);
+  params.output = &output;
+  output.colorspace      = MODE_YUV;
+  output.u.YUVA.y        = luma;
+  output.u.YUVA.y_stride = luma_stride;
+  output.u.YUVA.y_size   = luma_size;
+  output.u.YUVA.u        = u;
+  output.u.YUVA.u_stride = u_stride;
+  output.u.YUVA.u_size   = u_size;
+  output.u.YUVA.v        = v;
+  output.u.YUVA.v_stride = v_stride;
+  output.u.YUVA.v_size   = v_size;
+  output.is_external_memory = 1;
+  if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
+    return NULL;
+  }
+  return luma;
+}
+
+//------------------------------------------------------------------------------
+
+static uint8_t* Decode(WEBP_CSP_MODE mode, const uint8_t* const data,
+                       size_t data_size, int* const width, int* const height,
+                       WebPDecBuffer* const keep_info) {
+  WebPDecParams params;
+  WebPDecBuffer output;
+
+  WebPInitDecBuffer(&output);
+  WebPResetDecParams(&params);
+  params.output = &output;
+  output.colorspace = mode;
+
+  // Retrieve (and report back) the required dimensions from bitstream.
+  if (!WebPGetInfo(data, data_size, &output.width, &output.height)) {
+    return NULL;
+  }
+  if (width != NULL) *width = output.width;
+  if (height != NULL) *height = output.height;
+
+  // Decode
+  if (DecodeInto(data, data_size, &params) != VP8_STATUS_OK) {
+    return NULL;
+  }
+  if (keep_info != NULL) {    // keep track of the side-info
+    WebPCopyDecBuffer(&output, keep_info);
+  }
+  // return decoded samples (don't clear 'output'!)
+  return WebPIsRGBMode(mode) ? output.u.RGBA.rgba : output.u.YUVA.y;
+}
+
+uint8_t* WebPDecodeRGB(const uint8_t* data, size_t data_size,
+                       int* width, int* height) {
+  return Decode(MODE_RGB, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeRGBA(const uint8_t* data, size_t data_size,
+                        int* width, int* height) {
+  return Decode(MODE_RGBA, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeARGB(const uint8_t* data, size_t data_size,
+                        int* width, int* height) {
+  return Decode(MODE_ARGB, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
+                       int* width, int* height) {
+  return Decode(MODE_BGR, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size,
+                        int* width, int* height) {
+  return Decode(MODE_BGRA, data, data_size, width, height, NULL);
+}
+
+uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
+                       int* width, int* height, uint8_t** u, uint8_t** v,
+                       int* stride, int* uv_stride) {
+  WebPDecBuffer output;   // only to preserve the side-infos
+  uint8_t* const out = Decode(MODE_YUV, data, data_size,
+                              width, height, &output);
+
+  if (out != NULL) {
+    const WebPYUVABuffer* const buf = &output.u.YUVA;
+    *u = buf->u;
+    *v = buf->v;
+    *stride = buf->y_stride;
+    *uv_stride = buf->u_stride;
+    assert(buf->u_stride == buf->v_stride);
+  }
+  return out;
+}
+
+static void DefaultFeatures(WebPBitstreamFeatures* const features) {
+  assert(features != NULL);
+  memset(features, 0, sizeof(*features));
+}
+
+static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
+                                 WebPBitstreamFeatures* const features) {
+  if (features == NULL || data == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  DefaultFeatures(features);
+
+  // Only parse enough of the data to retrieve the features.
+  return ParseHeadersInternal(data, data_size,
+                              &features->width, &features->height,
+                              &features->has_alpha, &features->has_animation,
+                              &features->format, NULL);
+}
+
+//------------------------------------------------------------------------------
+// WebPGetInfo()
+
+int WebPGetInfo(const uint8_t* data, size_t data_size,
+                int* width, int* height) {
+  WebPBitstreamFeatures features;
+
+  if (GetFeatures(data, data_size, &features) != VP8_STATUS_OK) {
+    return 0;
+  }
+
+  if (width != NULL) {
+    *width  = features.width;
+  }
+  if (height != NULL) {
+    *height = features.height;
+  }
+
+  return 1;
+}
+
+//------------------------------------------------------------------------------
+// Advance decoding API
+
+int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
+                                  int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+    return 0;   // version mismatch
+  }
+  if (config == NULL) {
+    return 0;
+  }
+  memset(config, 0, sizeof(*config));
+  DefaultFeatures(&config->input);
+  WebPInitDecBuffer(&config->output);
+  return 1;
+}
+
+VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
+                                      WebPBitstreamFeatures* features,
+                                      int version) {
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
+    return VP8_STATUS_INVALID_PARAM;   // version mismatch
+  }
+  if (features == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+  return GetFeatures(data, data_size, features);
+}
+
+VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,
+                         WebPDecoderConfig* config) {
+  WebPDecParams params;
+  VP8StatusCode status;
+
+  if (config == NULL) {
+    return VP8_STATUS_INVALID_PARAM;
+  }
+
+  status = GetFeatures(data, data_size, &config->input);
+  if (status != VP8_STATUS_OK) {
+    if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
+      return VP8_STATUS_BITSTREAM_ERROR;  // Not-enough-data treated as error.
+    }
+    return status;
+  }
+
+  WebPResetDecParams(&params);
+  params.options = &config->options;
+  params.output = &config->output;
+  if (WebPAvoidSlowMemory(params.output, &config->input)) {
+    // decoding to slow memory: use a temporary in-mem buffer to decode into.
+    WebPDecBuffer in_mem_buffer;
+    WebPInitDecBuffer(&in_mem_buffer);
+    in_mem_buffer.colorspace = config->output.colorspace;
+    in_mem_buffer.width = config->input.width;
+    in_mem_buffer.height = config->input.height;
+    params.output = &in_mem_buffer;
+    status = DecodeInto(data, data_size, &params);
+    if (status == VP8_STATUS_OK) {  // do the slow-copy
+      status = WebPCopyDecBufferPixels(&in_mem_buffer, &config->output);
+    }
+    WebPFreeDecBuffer(&in_mem_buffer);
+  } else {
+    status = DecodeInto(data, data_size, &params);
+  }
+
+  return status;
+}
+
+//------------------------------------------------------------------------------
+// Cropping and rescaling.
+
+int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
+                          VP8Io* const io, WEBP_CSP_MODE src_colorspace) {
+  const int W = io->width;
+  const int H = io->height;
+  int x = 0, y = 0, w = W, h = H;
+
+  // Cropping
+  io->use_cropping = (options != NULL) && (options->use_cropping > 0);
+  if (io->use_cropping) {
+    w = options->crop_width;
+    h = options->crop_height;
+    x = options->crop_left;
+    y = options->crop_top;
+    if (!WebPIsRGBMode(src_colorspace)) {   // only snap for YUV420
+      x &= ~1;
+      y &= ~1;
+    }
+    if (x < 0 || y < 0 || w <= 0 || h <= 0 || x + w > W || y + h > H) {
+      return 0;  // out of frame boundary error
+    }
+  }
+  io->crop_left   = x;
+  io->crop_top    = y;
+  io->crop_right  = x + w;
+  io->crop_bottom = y + h;
+  io->mb_w = w;
+  io->mb_h = h;
+
+  // Scaling
+  io->use_scaling = (options != NULL) && (options->use_scaling > 0);
+  if (io->use_scaling) {
+    int scaled_width = options->scaled_width;
+    int scaled_height = options->scaled_height;
+    if (!WebPRescalerGetScaledDimensions(w, h, &scaled_width, &scaled_height)) {
+      return 0;
+    }
+    io->scaled_width = scaled_width;
+    io->scaled_height = scaled_height;
+  }
+
+  // Filter
+  io->bypass_filtering = (options != NULL) && options->bypass_filtering;
+
+  // Fancy upsampler
+#ifdef FANCY_UPSAMPLING
+  io->fancy_upsampling = (options == NULL) || (!options->no_fancy_upsampling);
+#endif
+
+  if (io->use_scaling) {
+    // disable filter (only for large downscaling ratio).
+    io->bypass_filtering = (io->scaled_width < W * 3 / 4) &&
+                           (io->scaled_height < H * 3 / 4);
+    io->fancy_upsampling = 0;
+  }
+  return 1;
+}
+
+//------------------------------------------------------------------------------
--- a/media/libwebp/src/dec/webpi_dec.h
+++ b/media/libwebp/src/dec/webpi_dec.h
@ -0,0 +1,133 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Internal header: WebP decoding parameters and custom IO on buffer
+//
+// Author: somnath@google.com (Somnath Banerjee)
+
+#ifndef WEBP_DEC_WEBPI_DEC_H_
+#define WEBP_DEC_WEBPI_DEC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "src/utils/rescaler_utils.h"
+#include "src/dec/vp8_dec.h"
+
+//------------------------------------------------------------------------------
+// WebPDecParams: Decoding output parameters. Transient internal object.
+
+typedef struct WebPDecParams WebPDecParams;
+typedef int (*OutputFunc)(const VP8Io* const io, WebPDecParams* const p);
+typedef int (*OutputAlphaFunc)(const VP8Io* const io, WebPDecParams* const p,
+                               int expected_num_out_lines);
+typedef int (*OutputRowFunc)(WebPDecParams* const p, int y_pos,
+                             int max_out_lines);
+
+struct WebPDecParams {
+  WebPDecBuffer* output;             // output buffer.
+  uint8_t* tmp_y, *tmp_u, *tmp_v;    // cache for the fancy upsampler
+                                     // or used for tmp rescaling
+
+  int last_y;                 // coordinate of the line that was last output
+  const WebPDecoderOptions* options;  // if not NULL, use alt decoding features
+
+  WebPRescaler* scaler_y, *scaler_u, *scaler_v, *scaler_a;  // rescalers
+  void* memory;                  // overall scratch memory for the output work.
+
+  OutputFunc emit;               // output RGB or YUV samples
+  OutputAlphaFunc emit_alpha;    // output alpha channel
+  OutputRowFunc emit_alpha_row;  // output one line of rescaled alpha values
+};
+
+// Should be called first, before any use of the WebPDecParams object.
+void WebPResetDecParams(WebPDecParams* const params);
+
+//------------------------------------------------------------------------------
+// Header parsing helpers
+
+// Structure storing a description of the RIFF headers.
+typedef struct {
+  const uint8_t* data;         // input buffer
+  size_t data_size;            // input buffer size
+  int have_all_data;           // true if all data is known to be available
+  size_t offset;               // offset to main data chunk (VP8 or VP8L)
+  const uint8_t* alpha_data;   // points to alpha chunk (if present)
+  size_t alpha_data_size;      // alpha chunk size
+  size_t compressed_size;      // VP8/VP8L compressed data size
+  size_t riff_size;            // size of the riff payload (or 0 if absent)
+  int is_lossless;             // true if a VP8L chunk is present
+} WebPHeaderStructure;
+
+// Skips over all valid chunks prior to the first VP8/VP8L frame header.
+// Returns: VP8_STATUS_OK, VP8_STATUS_BITSTREAM_ERROR (invalid header/chunk),
+// VP8_STATUS_NOT_ENOUGH_DATA (partial input) or VP8_STATUS_UNSUPPORTED_FEATURE
+// in the case of non-decodable features (animation for instance).
+// In 'headers', compressed_size, offset, alpha_data, alpha_size, and lossless
+// fields are updated appropriately upon success.
+VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);
+
+//------------------------------------------------------------------------------
+// Misc utils
+
+// Initializes VP8Io with custom setup, io and teardown functions. The default
+// hooks will use the supplied 'params' as io->opaque handle.
+void WebPInitCustomIo(WebPDecParams* const params, VP8Io* const io);
+
+// Setup crop_xxx fields, mb_w and mb_h in io. 'src_colorspace' refers
+// to the *compressed* format, not the output one.
+int WebPIoInitFromOptions(const WebPDecoderOptions* const options,
+                          VP8Io* const io, WEBP_CSP_MODE src_colorspace);
+
+//------------------------------------------------------------------------------
+// Internal functions regarding WebPDecBuffer memory (in buffer.c).
+// Don't really need to be externally visible for now.
+
+// Prepare 'buffer' with the requested initial dimensions width/height.
+// If no external storage is supplied, initializes buffer by allocating output
+// memory and setting up the stride information. Validate the parameters. Return
+// an error code in case of problem (no memory, or invalid stride / size /
+// dimension / etc.). If *options is not NULL, also verify that the options'
+// parameters are valid and apply them to the width/height dimensions of the
+// output buffer. This takes cropping / scaling / rotation into account.
+// Also incorporates the options->flip flag to flip the buffer parameters if
+// needed.
+VP8StatusCode WebPAllocateDecBuffer(int width, int height,
+                                    const WebPDecoderOptions* const options,
+                                    WebPDecBuffer* const buffer);
+
+// Flip buffer vertically by negating the various strides.
+VP8StatusCode WebPFlipBuffer(WebPDecBuffer* const buffer);
+
+// Copy 'src' into 'dst' buffer, making sure 'dst' is not marked as owner of the
+// memory (still held by 'src'). No pixels are copied.
+void WebPCopyDecBuffer(const WebPDecBuffer* const src,
+                       WebPDecBuffer* const dst);
+
+// Copy and transfer ownership from src to dst (beware of parameter order!)
+void WebPGrabDecBuffer(WebPDecBuffer* const src, WebPDecBuffer* const dst);
+
+// Copy pixels from 'src' into a *preallocated* 'dst' buffer. Returns
+// VP8_STATUS_INVALID_PARAM if the 'dst' is not set up correctly for the copy.
+VP8StatusCode WebPCopyDecBufferPixels(const WebPDecBuffer* const src,
+                                      WebPDecBuffer* const dst);
+
+// Returns true if decoding will be slow with the current configuration
+// and bitstream features.
+int WebPAvoidSlowMemory(const WebPDecBuffer* const output,
+                        const WebPBitstreamFeatures* const features);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DEC_WEBPI_DEC_H_ */
--- a/media/libwebp/src/demux/demux.c
+++ b/media/libwebp/src/demux/demux.c
@ -0,0 +1,967 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//  WebP container demux.
+//
+
+#ifdef HAVE_CONFIG_H
+#include "src/webp/config.h"
+#endif
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "src/utils/utils.h"
+#include "src/webp/decode.h"     // WebPGetFeatures
+#include "src/webp/demux.h"
+#include "src/webp/format_constants.h"
+
+#define DMUX_MAJ_VERSION 1
+#define DMUX_MIN_VERSION 0
+#define DMUX_REV_VERSION 0
+
+typedef struct {
+  size_t start_;        // start location of the data
+  size_t end_;          // end location
+  size_t riff_end_;     // riff chunk end location, can be > end_.
+  size_t buf_size_;     // size of the buffer
+  const uint8_t* buf_;
+} MemBuffer;
+
+typedef struct {
+  size_t offset_;
+  size_t size_;
+} ChunkData;
+
+typedef struct Frame {
+  int x_offset_, y_offset_;
+  int width_, height_;
+  int has_alpha_;
+  int duration_;
+  WebPMuxAnimDispose dispose_method_;
+  WebPMuxAnimBlend blend_method_;
+  int frame_num_;
+  int complete_;   // img_components_ contains a full image.
+  ChunkData img_components_[2];  // 0=VP8{,L} 1=ALPH
+  struct Frame* next_;
+} Frame;
+
+typedef struct Chunk {
+  ChunkData data_;
+  struct Chunk* next_;
+} Chunk;
+
+struct WebPDemuxer {
+  MemBuffer mem_;
+  WebPDemuxState state_;
+  int is_ext_format_;
+  uint32_t feature_flags_;
+  int canvas_width_, canvas_height_;
+  int loop_count_;
+  uint32_t bgcolor_;
+  int num_frames_;
+  Frame* frames_;
+  Frame** frames_tail_;
+  Chunk* chunks_;  // non-image chunks
+  Chunk** chunks_tail_;
+};
+
+typedef enum {
+  PARSE_OK,
+  PARSE_NEED_MORE_DATA,
+  PARSE_ERROR
+} ParseStatus;
+
+typedef struct ChunkParser {
+  uint8_t id[4];
+  ParseStatus (*parse)(WebPDemuxer* const dmux);
+  int (*valid)(const WebPDemuxer* const dmux);
+} ChunkParser;
+
+static ParseStatus ParseSingleImage(WebPDemuxer* const dmux);
+static ParseStatus ParseVP8X(WebPDemuxer* const dmux);
+static int IsValidSimpleFormat(const WebPDemuxer* const dmux);
+static int IsValidExtendedFormat(const WebPDemuxer* const dmux);
+
+static const ChunkParser kMasterChunks[] = {
+  { { 'V', 'P', '8', ' ' }, ParseSingleImage, IsValidSimpleFormat },
+  { { 'V', 'P', '8', 'L' }, ParseSingleImage, IsValidSimpleFormat },
+  { { 'V', 'P', '8', 'X' }, ParseVP8X,        IsValidExtendedFormat },
+  { { '0', '0', '0', '0' }, NULL,             NULL },
+};
+
+//------------------------------------------------------------------------------
+
+int WebPGetDemuxVersion(void) {
+  return (DMUX_MAJ_VERSION << 16) | (DMUX_MIN_VERSION << 8) | DMUX_REV_VERSION;
+}
+
+// -----------------------------------------------------------------------------
+// MemBuffer
+
+static int RemapMemBuffer(MemBuffer* const mem,
+                          const uint8_t* data, size_t size) {
+  if (size < mem->buf_size_) return 0;  // can't remap to a shorter buffer!
+
+  mem->buf_ = data;
+  mem->end_ = mem->buf_size_ = size;
+  return 1;
+}
+
+static int InitMemBuffer(MemBuffer* const mem,
+                         const uint8_t* data, size_t size) {
+  memset(mem, 0, sizeof(*mem));
+  return RemapMemBuffer(mem, data, size);
+}
+
+// Return the remaining data size available in 'mem'.
+static WEBP_INLINE size_t MemDataSize(const MemBuffer* const mem) {
+  return (mem->end_ - mem->start_);
+}
+
+// Return true if 'size' exceeds the end of the RIFF chunk.
+static WEBP_INLINE int SizeIsInvalid(const MemBuffer* const mem, size_t size) {
+  return (size > mem->riff_end_ - mem->start_);
+}
+
+static WEBP_INLINE void Skip(MemBuffer* const mem, size_t size) {
+  mem->start_ += size;
+}
+
+static WEBP_INLINE void Rewind(MemBuffer* const mem, size_t size) {
+  mem->start_ -= size;
+}
+
+static WEBP_INLINE const uint8_t* GetBuffer(MemBuffer* const mem) {
+  return mem->buf_ + mem->start_;
+}
+
+// Read from 'mem' and skip the read bytes.
+static WEBP_INLINE uint8_t ReadByte(MemBuffer* const mem) {
+  const uint8_t byte = mem->buf_[mem->start_];
+  Skip(mem, 1);
+  return byte;
+}
+
+static WEBP_INLINE int ReadLE16s(MemBuffer* const mem) {
+  const uint8_t* const data = mem->buf_ + mem->start_;
+  const int val = GetLE16(data);
+  Skip(mem, 2);
+  return val;
+}
+
+static WEBP_INLINE int ReadLE24s(MemBuffer* const mem) {
+  const uint8_t* const data = mem->buf_ + mem->start_;
+  const int val = GetLE24(data);
+  Skip(mem, 3);
+  return val;
+}
+
+static WEBP_INLINE uint32_t ReadLE32(MemBuffer* const mem) {
+  const uint8_t* const data = mem->buf_ + mem->start_;
+  const uint32_t val = GetLE32(data);
+  Skip(mem, 4);
+  return val;
+}
+
+// -----------------------------------------------------------------------------
+// Secondary chunk parsing
+
+static void AddChunk(WebPDemuxer* const dmux, Chunk* const chunk) {
+  *dmux->chunks_tail_ = chunk;
+  chunk->next_ = NULL;
+  dmux->chunks_tail_ = &chunk->next_;
+}
+
+// Add a frame to the end of the list, ensuring the last frame is complete.
+// Returns true on success, false otherwise.
+static int AddFrame(WebPDemuxer* const dmux, Frame* const frame) {
+  const Frame* const last_frame = *dmux->frames_tail_;
+  if (last_frame != NULL && !last_frame->complete_) return 0;
+
+  *dmux->frames_tail_ = frame;
+  frame->next_ = NULL;
+  dmux->frames_tail_ = &frame->next_;
+  return 1;
+}
+
+static void SetFrameInfo(size_t start_offset, size_t size,
+                         int frame_num, int complete,
+                         const WebPBitstreamFeatures* const features,
+                         Frame* const frame) {
+  frame->img_components_[0].offset_ = start_offset;
+  frame->img_components_[0].size_ = size;
+  frame->width_ = features->width;
+  frame->height_ = features->height;
+  frame->has_alpha_ |= features->has_alpha;
+  frame->frame_num_ = frame_num;
+  frame->complete_ = complete;
+}
+
+// Store image bearing chunks to 'frame'. 'min_size' is an optional size
+// requirement, it may be zero.
+static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
+                              MemBuffer* const mem, Frame* const frame) {
+  int alpha_chunks = 0;
+  int image_chunks = 0;
+  int done = (MemDataSize(mem) < CHUNK_HEADER_SIZE ||
+              MemDataSize(mem) < min_size);
+  ParseStatus status = PARSE_OK;
+
+  if (done) return PARSE_NEED_MORE_DATA;
+
+  do {
+    const size_t chunk_start_offset = mem->start_;
+    const uint32_t fourcc = ReadLE32(mem);
+    const uint32_t payload_size = ReadLE32(mem);
+    const uint32_t payload_size_padded = payload_size + (payload_size & 1);
+    const size_t payload_available = (payload_size_padded > MemDataSize(mem))
+                                   ? MemDataSize(mem) : payload_size_padded;
+    const size_t chunk_size = CHUNK_HEADER_SIZE + payload_available;
+
+    if (payload_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+    if (SizeIsInvalid(mem, payload_size_padded)) return PARSE_ERROR;
+    if (payload_size_padded > MemDataSize(mem)) status = PARSE_NEED_MORE_DATA;
+
+    switch (fourcc) {
+      case MKFOURCC('A', 'L', 'P', 'H'):
+        if (alpha_chunks == 0) {
+          ++alpha_chunks;
+          frame->img_components_[1].offset_ = chunk_start_offset;
+          frame->img_components_[1].size_ = chunk_size;
+          frame->has_alpha_ = 1;
+          frame->frame_num_ = frame_num;
+          Skip(mem, payload_available);
+        } else {
+          goto Done;
+        }
+        break;
+      case MKFOURCC('V', 'P', '8', 'L'):
+        if (alpha_chunks > 0) return PARSE_ERROR;  // VP8L has its own alpha
+        // fall through
+      case MKFOURCC('V', 'P', '8', ' '):
+        if (image_chunks == 0) {
+          // Extract the bitstream features, tolerating failures when the data
+          // is incomplete.
+          WebPBitstreamFeatures features;
+          const VP8StatusCode vp8_status =
+              WebPGetFeatures(mem->buf_ + chunk_start_offset, chunk_size,
+                              &features);
+          if (status == PARSE_NEED_MORE_DATA &&
+              vp8_status == VP8_STATUS_NOT_ENOUGH_DATA) {
+            return PARSE_NEED_MORE_DATA;
+          } else if (vp8_status != VP8_STATUS_OK) {
+            // We have enough data, and yet WebPGetFeatures() failed.
+            return PARSE_ERROR;
+          }
+          ++image_chunks;
+          SetFrameInfo(chunk_start_offset, chunk_size, frame_num,
+                       status == PARSE_OK, &features, frame);
+          Skip(mem, payload_available);
+        } else {
+          goto Done;
+        }
+        break;
+ Done:
+      default:
+        // Restore fourcc/size when moving up one level in parsing.
+        Rewind(mem, CHUNK_HEADER_SIZE);
+        done = 1;
+        break;
+    }
+
+    if (mem->start_ == mem->riff_end_) {
+      done = 1;
+    } else if (MemDataSize(mem) < CHUNK_HEADER_SIZE) {
+      status = PARSE_NEED_MORE_DATA;
+    }
+  } while (!done && status == PARSE_OK);
+
+  return status;
+}
+
+// Creates a new Frame if 'actual_size' is within bounds and 'mem' contains
+// enough data ('min_size') to parse the payload.
+// Returns PARSE_OK on success with *frame pointing to the new Frame.
+// Returns PARSE_NEED_MORE_DATA with insufficient data, PARSE_ERROR otherwise.
+static ParseStatus NewFrame(const MemBuffer* const mem,
+                            uint32_t min_size, uint32_t actual_size,
+                            Frame** frame) {
+  if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
+  if (actual_size < min_size) return PARSE_ERROR;
+  if (MemDataSize(mem) < min_size)  return PARSE_NEED_MORE_DATA;
+
+  *frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(**frame));
+  return (*frame == NULL) ? PARSE_ERROR : PARSE_OK;
+}
+
+// Parse a 'ANMF' chunk and any image bearing chunks that immediately follow.
+// 'frame_chunk_size' is the previously validated, padded chunk size.
+static ParseStatus ParseAnimationFrame(
+    WebPDemuxer* const dmux, uint32_t frame_chunk_size) {
+  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
+  const uint32_t anmf_payload_size = frame_chunk_size - ANMF_CHUNK_SIZE;
+  int added_frame = 0;
+  int bits;
+  MemBuffer* const mem = &dmux->mem_;
+  Frame* frame;
+  ParseStatus status =
+      NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame);
+  if (status != PARSE_OK) return status;
+
+  frame->x_offset_       = 2 * ReadLE24s(mem);
+  frame->y_offset_       = 2 * ReadLE24s(mem);
+  frame->width_          = 1 + ReadLE24s(mem);
+  frame->height_         = 1 + ReadLE24s(mem);
+  frame->duration_       = ReadLE24s(mem);
+  bits = ReadByte(mem);
+  frame->dispose_method_ =
+      (bits & 1) ? WEBP_MUX_DISPOSE_BACKGROUND : WEBP_MUX_DISPOSE_NONE;
+  frame->blend_method_ = (bits & 2) ? WEBP_MUX_NO_BLEND : WEBP_MUX_BLEND;
+  if (frame->width_ * (uint64_t)frame->height_ >= MAX_IMAGE_AREA) {
+    WebPSafeFree(frame);
+    return PARSE_ERROR;
+  }
+
+  // Store a frame only if the animation flag is set there is some data for
+  // this frame is available.
+  status = StoreFrame(dmux->num_frames_ + 1, anmf_payload_size, mem, frame);
+  if (status != PARSE_ERROR && is_animation && frame->frame_num_ > 0) {
+    added_frame = AddFrame(dmux, frame);
+    if (added_frame) {
+      ++dmux->num_frames_;
+    } else {
+      status = PARSE_ERROR;
+    }
+  }
+
+  if (!added_frame) WebPSafeFree(frame);
+  return status;
+}
+
+// General chunk storage, starting with the header at 'start_offset', allowing
+// the user to request the payload via a fourcc string. 'size' includes the
+// header and the unpadded payload size.
+// Returns true on success, false otherwise.
+static int StoreChunk(WebPDemuxer* const dmux,
+                      size_t start_offset, uint32_t size) {
+  Chunk* const chunk = (Chunk*)WebPSafeCalloc(1ULL, sizeof(*chunk));
+  if (chunk == NULL) return 0;
+
+  chunk->data_.offset_ = start_offset;
+  chunk->data_.size_ = size;
+  AddChunk(dmux, chunk);
+  return 1;
+}
+
+// -----------------------------------------------------------------------------
+// Primary chunk parsing
+
+static ParseStatus ReadHeader(MemBuffer* const mem) {
+  const size_t min_size = RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE;
+  uint32_t riff_size;
+
+  // Basic file level validation.
+  if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;
+  if (memcmp(GetBuffer(mem), "RIFF", CHUNK_SIZE_BYTES) ||
+      memcmp(GetBuffer(mem) + CHUNK_HEADER_SIZE, "WEBP", CHUNK_SIZE_BYTES)) {
+    return PARSE_ERROR;
+  }
+
+  riff_size = GetLE32(GetBuffer(mem) + TAG_SIZE);
+  if (riff_size < CHUNK_HEADER_SIZE) return PARSE_ERROR;
+  if (riff_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+
+  // There's no point in reading past the end of the RIFF chunk
+  mem->riff_end_ = riff_size + CHUNK_HEADER_SIZE;
+  if (mem->buf_size_ > mem->riff_end_) {
+    mem->buf_size_ = mem->end_ = mem->riff_end_;
+  }
+
+  Skip(mem, RIFF_HEADER_SIZE);
+  return PARSE_OK;
+}
+
+static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
+  const size_t min_size = CHUNK_HEADER_SIZE;
+  MemBuffer* const mem = &dmux->mem_;
+  Frame* frame;
+  ParseStatus status;
+  int image_added = 0;
+
+  if (dmux->frames_ != NULL) return PARSE_ERROR;
+  if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
+  if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;
+
+  frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
+  if (frame == NULL) return PARSE_ERROR;
+
+  // For the single image case we allow parsing of a partial frame, so no
+  // minimum size is imposed here.
+  status = StoreFrame(1, 0, &dmux->mem_, frame);
+  if (status != PARSE_ERROR) {
+    const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
+    // Clear any alpha when the alpha flag is missing.
+    if (!has_alpha && frame->img_components_[1].size_ > 0) {
+      frame->img_components_[1].offset_ = 0;
+      frame->img_components_[1].size_ = 0;
+      frame->has_alpha_ = 0;
+    }
+
+    // Use the frame width/height as the canvas values for non-vp8x files.
+    // Also, set ALPHA_FLAG if this is a lossless image with alpha.
+    if (!dmux->is_ext_format_ && frame->width_ > 0 && frame->height_ > 0) {
+      dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
+      dmux->canvas_width_ = frame->width_;
+      dmux->canvas_height_ = frame->height_;
+      dmux->feature_flags_ |= frame->has_alpha_ ? ALPHA_FLAG : 0;
+    }
+    if (!AddFrame(dmux, frame)) {
+      status = PARSE_ERROR;  // last frame was left incomplete
+    } else {
+      image_added = 1;
+      dmux->num_frames_ = 1;
+    }
+  }
+
+  if (!image_added) WebPSafeFree(frame);
+  return status;
+}
+
+static ParseStatus ParseVP8XChunks(WebPDemuxer* const dmux) {
+  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
+  MemBuffer* const mem = &dmux->mem_;
+  int anim_chunks = 0;
+  ParseStatus status = PARSE_OK;
+
+  do {
+    int store_chunk = 1;
+    const size_t chunk_start_offset = mem->start_;
+    const uint32_t fourcc = ReadLE32(mem);
+    const uint32_t chunk_size = ReadLE32(mem);
+    const uint32_t chunk_size_padded = chunk_size + (chunk_size & 1);
+
+    if (chunk_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+    if (SizeIsInvalid(mem, chunk_size_padded)) return PARSE_ERROR;
+
+    switch (fourcc) {
+      case MKFOURCC('V', 'P', '8', 'X'): {
+        return PARSE_ERROR;
+      }
+      case MKFOURCC('A', 'L', 'P', 'H'):
+      case MKFOURCC('V', 'P', '8', ' '):
+      case MKFOURCC('V', 'P', '8', 'L'): {
+        // check that this isn't an animation (all frames should be in an ANMF).
+        if (anim_chunks > 0 || is_animation) return PARSE_ERROR;
+
+        Rewind(mem, CHUNK_HEADER_SIZE);
+        status = ParseSingleImage(dmux);
+        break;
+      }
+      case MKFOURCC('A', 'N', 'I', 'M'): {
+        if (chunk_size_padded < ANIM_CHUNK_SIZE) return PARSE_ERROR;
+
+        if (MemDataSize(mem) < chunk_size_padded) {
+          status = PARSE_NEED_MORE_DATA;
+        } else if (anim_chunks == 0) {
+          ++anim_chunks;
+          dmux->bgcolor_ = ReadLE32(mem);
+          dmux->loop_count_ = ReadLE16s(mem);
+          Skip(mem, chunk_size_padded - ANIM_CHUNK_SIZE);
+        } else {
+          store_chunk = 0;
+          goto Skip;
+        }
+        break;
+      }
+      case MKFOURCC('A', 'N', 'M', 'F'): {
+        if (anim_chunks == 0) return PARSE_ERROR;  // 'ANIM' precedes frames.
+        status = ParseAnimationFrame(dmux, chunk_size_padded);
+        break;
+      }
+      case MKFOURCC('I', 'C', 'C', 'P'): {
+        store_chunk = !!(dmux->feature_flags_ & ICCP_FLAG);
+        goto Skip;
+      }
+      case MKFOURCC('E', 'X', 'I', 'F'): {
+        store_chunk = !!(dmux->feature_flags_ & EXIF_FLAG);
+        goto Skip;
+      }
+      case MKFOURCC('X', 'M', 'P', ' '): {
+        store_chunk = !!(dmux->feature_flags_ & XMP_FLAG);
+        goto Skip;
+      }
+ Skip:
+      default: {
+        if (chunk_size_padded <= MemDataSize(mem)) {
+          if (store_chunk) {
+            // Store only the chunk header and unpadded size as only the payload
+            // will be returned to the user.
+            if (!StoreChunk(dmux, chunk_start_offset,
+                            CHUNK_HEADER_SIZE + chunk_size)) {
+              return PARSE_ERROR;
+            }
+          }
+          Skip(mem, chunk_size_padded);
+        } else {
+          status = PARSE_NEED_MORE_DATA;
+        }
+      }
+    }
+
+    if (mem->start_ == mem->riff_end_) {
+      break;
+    } else if (MemDataSize(mem) < CHUNK_HEADER_SIZE) {
+      status = PARSE_NEED_MORE_DATA;
+    }
+  } while (status == PARSE_OK);
+
+  return status;
+}
+
+static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
+  MemBuffer* const mem = &dmux->mem_;
+  uint32_t vp8x_size;
+
+  if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;
+
+  dmux->is_ext_format_ = 1;
+  Skip(mem, TAG_SIZE);  // VP8X
+  vp8x_size = ReadLE32(mem);
+  if (vp8x_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
+  if (vp8x_size < VP8X_CHUNK_SIZE) return PARSE_ERROR;
+  vp8x_size += vp8x_size & 1;
+  if (SizeIsInvalid(mem, vp8x_size)) return PARSE_ERROR;
+  if (MemDataSize(mem) < vp8x_size) return PARSE_NEED_MORE_DATA;
+
+  dmux->feature_flags_ = ReadByte(mem);
+  Skip(mem, 3);  // Reserved.
+  dmux->canvas_width_  = 1 + ReadLE24s(mem);
+  dmux->canvas_height_ = 1 + ReadLE24s(mem);
+  if (dmux->canvas_width_ * (uint64_t)dmux->canvas_height_ >= MAX_IMAGE_AREA) {
+    return PARSE_ERROR;  // image final dimension is too large
+  }
+  Skip(mem, vp8x_size - VP8X_CHUNK_SIZE);  // skip any trailing data.
+  dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
+
+  if (SizeIsInvalid(mem, CHUNK_HEADER_SIZE)) return PARSE_ERROR;
+  if (MemDataSize(mem) < CHUNK_HEADER_SIZE) return PARSE_NEED_MORE_DATA;
+
+  return ParseVP8XChunks(dmux);
+}
+
+// -----------------------------------------------------------------------------
+// Format validation
+
+static int IsValidSimpleFormat(const WebPDemuxer* const dmux) {
+  const Frame* const frame = dmux->frames_;
+  if (dmux->state_ == WEBP_DEMUX_PARSING_HEADER) return 1;
+
+  if (dmux->canvas_width_ <= 0 || dmux->canvas_height_ <= 0) return 0;
+  if (dmux->state_ == WEBP_DEMUX_DONE && frame == NULL) return 0;
+
+  if (frame->width_ <= 0 || frame->height_ <= 0) return 0;
+  return 1;
+}
+
+// If 'exact' is true, check that the image resolution matches the canvas.
+// If 'exact' is false, check that the x/y offsets do not exceed the canvas.
+static int CheckFrameBounds(const Frame* const frame, int exact,
+                            int canvas_width, int canvas_height) {
+  if (exact) {
+    if (frame->x_offset_ != 0 || frame->y_offset_ != 0) {
+      return 0;
+    }
+    if (frame->width_ != canvas_width || frame->height_ != canvas_height) {
+      return 0;
+    }
+  } else {
+    if (frame->x_offset_ < 0 || frame->y_offset_ < 0) return 0;
+    if (frame->width_ + frame->x_offset_ > canvas_width) return 0;
+    if (frame->height_ + frame->y_offset_ > canvas_height) return 0;
+  }
+  return 1;
+}
+
+static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
+  const int is_animation = !!(dmux->feature_flags_ & ANIMATION_FLAG);
+  const Frame* f = dmux->frames_;
+
+  if (dmux->state_ == WEBP_DEMUX_PARSING_HEADER) return 1;
+
+  if (dmux->canvas_width_ <= 0 || dmux->canvas_height_ <= 0) return 0;
+  if (dmux->loop_count_ < 0) return 0;
+  if (dmux->state_ == WEBP_DEMUX_DONE && dmux->frames_ == NULL) return 0;
+  if (dmux->feature_flags_ & ~ALL_VALID_FLAGS) return 0;  // invalid bitstream
+
+  while (f != NULL) {
+    const int cur_frame_set = f->frame_num_;
+    int frame_count = 0;
+
+    // Check frame properties.
+    for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) {
+      const ChunkData* const image = f->img_components_;
+      const ChunkData* const alpha = f->img_components_ + 1;
+
+      if (!is_animation && f->frame_num_ > 1) return 0;
+
+      if (f->complete_) {
+        if (alpha->size_ == 0 && image->size_ == 0) return 0;
+        // Ensure alpha precedes image bitstream.
+        if (alpha->size_ > 0 && alpha->offset_ > image->offset_) {
+          return 0;
+        }
+
+        if (f->width_ <= 0 || f->height_ <= 0) return 0;
+      } else {
+        // There shouldn't be a partial frame in a complete file.
+        if (dmux->state_ == WEBP_DEMUX_DONE) return 0;
+
+        // Ensure alpha precedes image bitstream.
+        if (alpha->size_ > 0 && image->size_ > 0 &&
+            alpha->offset_ > image->offset_) {
+          return 0;
+        }
+        // There shouldn't be any frames after an incomplete one.
+        if (f->next_ != NULL) return 0;
+      }
+
+      if (f->width_ > 0 && f->height_ > 0 &&
+          !CheckFrameBounds(f, !is_animation,
+                            dmux->canvas_width_, dmux->canvas_height_)) {
+        return 0;
+      }
+
+      ++frame_count;
+    }
+  }
+  return 1;
+}
+
+// -----------------------------------------------------------------------------
+// WebPDemuxer object
+
+static void InitDemux(WebPDemuxer* const dmux, const MemBuffer* const mem) {
+  dmux->state_ = WEBP_DEMUX_PARSING_HEADER;
+  dmux->loop_count_ = 1;
+  dmux->bgcolor_ = 0xFFFFFFFF;  // White background by default.
+  dmux->canvas_width_ = -1;
+  dmux->canvas_height_ = -1;
+  dmux->frames_tail_ = &dmux->frames_;
+  dmux->chunks_tail_ = &dmux->chunks_;
+  dmux->mem_ = *mem;
+}
+
+static ParseStatus CreateRawImageDemuxer(MemBuffer* const mem,
+                                         WebPDemuxer** demuxer) {
+  WebPBitstreamFeatures features;
+  const VP8StatusCode status =
+      WebPGetFeatures(mem->buf_, mem->buf_size_, &features);
+  *demuxer = NULL;
+  if (status != VP8_STATUS_OK) {
+    return (status == VP8_STATUS_NOT_ENOUGH_DATA) ? PARSE_NEED_MORE_DATA
+                                                  : PARSE_ERROR;
+  }
+
+  {
+    WebPDemuxer* const dmux = (WebPDemuxer*)WebPSafeCalloc(1ULL, sizeof(*dmux));
+    Frame* const frame = (Frame*)WebPSafeCalloc(1ULL, sizeof(*frame));
+    if (dmux == NULL || frame == NULL) goto Error;
+    InitDemux(dmux, mem);
+    SetFrameInfo(0, mem->buf_size_, 1 /*frame_num*/, 1 /*complete*/, &features,
+                 frame);
+    if (!AddFrame(dmux, frame)) goto Error;
+    dmux->state_ = WEBP_DEMUX_DONE;
+    dmux->canvas_width_ = frame->width_;
+    dmux->canvas_height_ = frame->height_;
+    dmux->feature_flags_ |= frame->has_alpha_ ? ALPHA_FLAG : 0;
+    dmux->num_frames_ = 1;
+    assert(IsValidSimpleFormat(dmux));
+    *demuxer = dmux;
+    return PARSE_OK;
+
+ Error:
+    WebPSafeFree(dmux);
+    WebPSafeFree(frame);
+    return PARSE_ERROR;
+  }
+}
+
+WebPDemuxer* WebPDemuxInternal(const WebPData* data, int allow_partial,
+                               WebPDemuxState* state, int version) {
+  const ChunkParser* parser;
+  int partial;
+  ParseStatus status = PARSE_ERROR;
+  MemBuffer mem;
+  WebPDemuxer* dmux;
+
+  if (state != NULL) *state = WEBP_DEMUX_PARSE_ERROR;
+
+  if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DEMUX_ABI_VERSION)) return NULL;
+  if (data == NULL || data->bytes == NULL || data->size == 0) return NULL;
+
+  if (!InitMemBuffer(&mem, data->bytes, data->size)) return NULL;
+  status = ReadHeader(&mem);
+  if (status != PARSE_OK) {
+    // If parsing of the webp file header fails attempt to handle a raw
+    // VP8/VP8L frame. Note 'allow_partial' is ignored in this case.
+    if (status == PARSE_ERROR) {
+      status = CreateRawImageDemuxer(&mem, &dmux);
+      if (status == PARSE_OK) {
+        if (state != NULL) *state = WEBP_DEMUX_DONE;
+        return dmux;
+      }
+    }
+    if (state != NULL) {
+      *state = (status == PARSE_NEED_MORE_DATA) ? WEBP_DEMUX_PARSING_HEADER
+                                                : WEBP_DEMUX_PARSE_ERROR;
+    }
+    return NULL;
+  }
+
+  partial = (mem.buf_size_ < mem.riff_end_);
+  if (!allow_partial && partial) return NULL;
+
+  dmux = (WebPDemuxer*)WebPSafeCalloc(1ULL, sizeof(*dmux));
+  if (dmux == NULL) return NULL;
+  InitDemux(dmux, &mem);
+
+  status = PARSE_ERROR;
+  for (parser = kMasterChunks; parser->parse != NULL; ++parser) {
+    if (!memcmp(parser->id, GetBuffer(&dmux->mem_), TAG_SIZE)) {
+      status = parser->parse(dmux);
+      if (status == PARSE_OK) dmux->state_ = WEBP_DEMUX_DONE;
+      if (status == PARSE_NEED_MORE_DATA && !partial) status = PARSE_ERROR;
+      if (status != PARSE_ERROR && !parser->valid(dmux)) status = PARSE_ERROR;
+      if (status == PARSE_ERROR) dmux->state_ = WEBP_DEMUX_PARSE_ERROR;
+      break;
+    }
+  }
+  if (state != NULL) *state = dmux->state_;
+
+  if (status == PARSE_ERROR) {
+    WebPDemuxDelete(dmux);
+    return NULL;
+  }
+  return dmux;
+}
+
+void WebPDemuxDelete(WebPDemuxer* dmux) {
+  Chunk* c;
+  Frame* f;
+  if (dmux == NULL) return;
+
+  for (f = dmux->frames_; f != NULL;) {
+    Frame* const cur_frame = f;
+    f = f->next_;
+    WebPSafeFree(cur_frame);
+  }
+  for (c = dmux->chunks_; c != NULL;) {
+    Chunk* const cur_chunk = c;
+    c = c->next_;
+    WebPSafeFree(cur_chunk);
+  }
+  WebPSafeFree(dmux);
+}
+
+// -----------------------------------------------------------------------------
+
+uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
+  if (dmux == NULL) return 0;
+
+  switch (feature) {
+    case WEBP_FF_FORMAT_FLAGS:     return dmux->feature_flags_;
+    case WEBP_FF_CANVAS_WIDTH:     return (uint32_t)dmux->canvas_width_;
+    case WEBP_FF_CANVAS_HEIGHT:    return (uint32_t)dmux->canvas_height_;
+    case WEBP_FF_LOOP_COUNT:       return (uint32_t)dmux->loop_count_;
+    case WEBP_FF_BACKGROUND_COLOR: return dmux->bgcolor_;
+    case WEBP_FF_FRAME_COUNT:      return (uint32_t)dmux->num_frames_;
+  }
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Frame iteration
+
+static const Frame* GetFrame(const WebPDemuxer* const dmux, int frame_num) {
+  const Frame* f;
+  for (f = dmux->frames_; f != NULL; f = f->next_) {
+    if (frame_num == f->frame_num_) break;
+  }
+  return f;
+}
+
+static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
+                                      const Frame* const frame,
+                                      size_t* const data_size) {
+  *data_size = 0;
+  if (frame != NULL) {
+    const ChunkData* const image = frame->img_components_;
+    const ChunkData* const alpha = frame->img_components_ + 1;
+    size_t start_offset = image->offset_;
+    *data_size = image->size_;
+
+    // if alpha exists it precedes image, update the size allowing for
+    // intervening chunks.
+    if (alpha->size_ > 0) {
+      const size_t inter_size = (image->offset_ > 0)
+                              ? image->offset_ - (alpha->offset_ + alpha->size_)
+                              : 0;
+      start_offset = alpha->offset_;
+      *data_size  += alpha->size_ + inter_size;
+    }
+    return mem_buf + start_offset;
+  }
+  return NULL;
+}
+
+// Create a whole 'frame' from VP8 (+ alpha) or lossless.
+static int SynthesizeFrame(const WebPDemuxer* const dmux,
+                           const Frame* const frame,
+                           WebPIterator* const iter) {
+  const uint8_t* const mem_buf = dmux->mem_.buf_;
+  size_t payload_size = 0;
+  const uint8_t* const payload = GetFramePayload(mem_buf, frame, &payload_size);
+  if (payload == NULL) return 0;
+  assert(frame != NULL);
+
+  iter->frame_num      = frame->frame_num_;
+  iter->num_frames     = dmux->num_frames_;
+  iter->x_offset       = frame->x_offset_;
+  iter->y_offset       = frame->y_offset_;
+  iter->width          = frame->width_;
+  iter->height         = frame->height_;
+  iter->has_alpha      = frame->has_alpha_;
+  iter->duration       = frame->duration_;
+  iter->dispose_method = frame->dispose_method_;
+  iter->blend_method   = frame->blend_method_;
+  iter->complete       = frame->complete_;
+  iter->fragment.bytes = payload;
+  iter->fragment.size  = payload_size;
+  return 1;
+}
+
+static int SetFrame(int frame_num, WebPIterator* const iter) {
+  const Frame* frame;
+  const WebPDemuxer* const dmux = (WebPDemuxer*)iter->private_;
+  if (dmux == NULL || frame_num < 0) return 0;
+  if (frame_num > dmux->num_frames_) return 0;
+  if (frame_num == 0) frame_num = dmux->num_frames_;
+
+  frame = GetFrame(dmux, frame_num);
+  if (frame == NULL) return 0;
+
+  return SynthesizeFrame(dmux, frame, iter);
+}
+
+int WebPDemuxGetFrame(const WebPDemuxer* dmux, int frame, WebPIterator* iter) {
+  if (iter == NULL) return 0;
+
+  memset(iter, 0, sizeof(*iter));
+  iter->private_ = (void*)dmux;
+  return SetFrame(frame, iter);
+}
+
+int WebPDemuxNextFrame(WebPIterator* iter) {
+  if (iter == NULL) return 0;
+  return SetFrame(iter->frame_num + 1, iter);
+}
+
+int WebPDemuxPrevFrame(WebPIterator* iter) {
+  if (iter == NULL) return 0;
+  if (iter->frame_num <= 1) return 0;
+  return SetFrame(iter->frame_num - 1, iter);
+}
+
+void WebPDemuxReleaseIterator(WebPIterator* iter) {
+  (void)iter;
+}
+
+// -----------------------------------------------------------------------------
+// Chunk iteration
+
+static int ChunkCount(const WebPDemuxer* const dmux, const char fourcc[4]) {
+  const uint8_t* const mem_buf = dmux->mem_.buf_;
+  const Chunk* c;
+  int count = 0;
+  for (c = dmux->chunks_; c != NULL; c = c->next_) {
+    const uint8_t* const header = mem_buf + c->data_.offset_;
+    if (!memcmp(header, fourcc, TAG_SIZE)) ++count;
+  }
+  return count;
+}
+
+static const Chunk* GetChunk(const WebPDemuxer* const dmux,
+                             const char fourcc[4], int chunk_num) {
+  const uint8_t* const mem_buf = dmux->mem_.buf_;
+  const Chunk* c;
+  int count = 0;
+  for (c = dmux->chunks_; c != NULL; c = c->next_) {
+    const uint8_t* const header = mem_buf + c->data_.offset_;
+    if (!memcmp(header, fourcc, TAG_SIZE)) ++count;
+    if (count == chunk_num) break;
+  }
+  return c;
+}
+
+static int SetChunk(const char fourcc[4], int chunk_num,
+                    WebPChunkIterator* const iter) {
+  const WebPDemuxer* const dmux = (WebPDemuxer*)iter->private_;
+  int count;
+
+  if (dmux == NULL || fourcc == NULL || chunk_num < 0) return 0;
+  count = ChunkCount(dmux, fourcc);
+  if (count == 0) return 0;
+  if (chunk_num == 0) chunk_num = count;
+
+  if (chunk_num <= count) {
+    const uint8_t* const mem_buf = dmux->mem_.buf_;
+    const Chunk* const chunk = GetChunk(dmux, fourcc, chunk_num);
+    iter->chunk.bytes = mem_buf + chunk->data_.offset_ + CHUNK_HEADER_SIZE;
+    iter->chunk.size  = chunk->data_.size_ - CHUNK_HEADER_SIZE;
+    iter->num_chunks  = count;
+    iter->chunk_num   = chunk_num;
+    return 1;
+  }
+  return 0;
+}
+
+int WebPDemuxGetChunk(const WebPDemuxer* dmux,
+                      const char fourcc[4], int chunk_num,
+                      WebPChunkIterator* iter) {
+  if (iter == NULL) return 0;
+
+  memset(iter, 0, sizeof(*iter));
+  iter->private_ = (void*)dmux;
+  return SetChunk(fourcc, chunk_num, iter);
+}
+
+int WebPDemuxNextChunk(WebPChunkIterator* iter) {
+  if (iter != NULL) {
+    const char* const fourcc =
+        (const char*)iter->chunk.bytes - CHUNK_HEADER_SIZE;
+    return SetChunk(fourcc, iter->chunk_num + 1, iter);
+  }
+  return 0;
+}
+
+int WebPDemuxPrevChunk(WebPChunkIterator* iter) {
+  if (iter != NULL && iter->chunk_num > 1) {
+    const char* const fourcc =
+        (const char*)iter->chunk.bytes - CHUNK_HEADER_SIZE;
+    return SetChunk(fourcc, iter->chunk_num - 1, iter);
+  }
+  return 0;
+}
+
+void WebPDemuxReleaseChunkIterator(WebPChunkIterator* iter) {
+  (void)iter;
+}
+
--- a/media/libwebp/src/demux/moz.build
+++ b/media/libwebp/src/demux/moz.build
@ -0,0 +1,21 @@
+# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
+# vim: set filetype=python:
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+with Files('**'):
+    BUG_COMPONENT = ('Core', 'ImageLib')
+
+SOURCES += [
+    'demux.c',
+]
+
+LOCAL_INCLUDES += [
+    '/media/libwebp',
+]
+
+FINAL_LIBRARY = 'gkmedias'
+
+# We allow warnings for third-party code that can be updated from upstream.
+AllowCompilerWarnings()
--- a/media/libwebp/src/dsp/alpha_processing.c
+++ b/media/libwebp/src/dsp/alpha_processing.c
@ -0,0 +1,472 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+#include "src/dsp/dsp.h"
+
+// Tables can be faster on some platform but incur some extra binary size (~2k).
+#if !defined(USE_TABLES_FOR_ALPHA_MULT)
+#define USE_TABLES_FOR_ALPHA_MULT 0   // ALTERNATE_CODE
+#endif
+
+
+// -----------------------------------------------------------------------------
+
+#define MFIX 24    // 24bit fixed-point arithmetic
+#define HALF ((1u << MFIX) >> 1)
+#define KINV_255 ((1u << MFIX) / 255u)
+
+static uint32_t Mult(uint8_t x, uint32_t mult) {
+  const uint32_t v = (x * mult + HALF) >> MFIX;
+  assert(v <= 255);  // <- 24bit precision is enough to ensure that.
+  return v;
+}
+
+#if (USE_TABLES_FOR_ALPHA_MULT == 1)
+
+static const uint32_t kMultTables[2][256] = {
+  {    // (255u << MFIX) / alpha
+    0x00000000, 0xff000000, 0x7f800000, 0x55000000, 0x3fc00000, 0x33000000,
+    0x2a800000, 0x246db6db, 0x1fe00000, 0x1c555555, 0x19800000, 0x172e8ba2,
+    0x15400000, 0x139d89d8, 0x1236db6d, 0x11000000, 0x0ff00000, 0x0f000000,
+    0x0e2aaaaa, 0x0d6bca1a, 0x0cc00000, 0x0c249249, 0x0b9745d1, 0x0b1642c8,
+    0x0aa00000, 0x0a333333, 0x09cec4ec, 0x0971c71c, 0x091b6db6, 0x08cb08d3,
+    0x08800000, 0x0839ce73, 0x07f80000, 0x07ba2e8b, 0x07800000, 0x07492492,
+    0x07155555, 0x06e45306, 0x06b5e50d, 0x0689d89d, 0x06600000, 0x063831f3,
+    0x06124924, 0x05ee23b8, 0x05cba2e8, 0x05aaaaaa, 0x058b2164, 0x056cefa8,
+    0x05500000, 0x05343eb1, 0x05199999, 0x05000000, 0x04e76276, 0x04cfb2b7,
+    0x04b8e38e, 0x04a2e8ba, 0x048db6db, 0x0479435e, 0x04658469, 0x045270d0,
+    0x04400000, 0x042e29f7, 0x041ce739, 0x040c30c3, 0x03fc0000, 0x03ec4ec4,
+    0x03dd1745, 0x03ce540f, 0x03c00000, 0x03b21642, 0x03a49249, 0x03976fc6,
+    0x038aaaaa, 0x037e3f1f, 0x03722983, 0x03666666, 0x035af286, 0x034fcace,
+    0x0344ec4e, 0x033a5440, 0x03300000, 0x0325ed09, 0x031c18f9, 0x0312818a,
+    0x03092492, 0x03000000, 0x02f711dc, 0x02ee5846, 0x02e5d174, 0x02dd7baf,
+    0x02d55555, 0x02cd5cd5, 0x02c590b2, 0x02bdef7b, 0x02b677d4, 0x02af286b,
+    0x02a80000, 0x02a0fd5c, 0x029a1f58, 0x029364d9, 0x028ccccc, 0x0286562d,
+    0x02800000, 0x0279c952, 0x0273b13b, 0x026db6db, 0x0267d95b, 0x026217ec,
+    0x025c71c7, 0x0256e62a, 0x0251745d, 0x024c1bac, 0x0246db6d, 0x0241b2f9,
+    0x023ca1af, 0x0237a6f4, 0x0232c234, 0x022df2df, 0x02293868, 0x02249249,
+    0x02200000, 0x021b810e, 0x021714fb, 0x0212bb51, 0x020e739c, 0x020a3d70,
+    0x02061861, 0x02020408, 0x01fe0000, 0x01fa0be8, 0x01f62762, 0x01f25213,
+    0x01ee8ba2, 0x01ead3ba, 0x01e72a07, 0x01e38e38, 0x01e00000, 0x01dc7f10,
+    0x01d90b21, 0x01d5a3e9, 0x01d24924, 0x01cefa8d, 0x01cbb7e3, 0x01c880e5,
+    0x01c55555, 0x01c234f7, 0x01bf1f8f, 0x01bc14e5, 0x01b914c1, 0x01b61eed,
+    0x01b33333, 0x01b05160, 0x01ad7943, 0x01aaaaaa, 0x01a7e567, 0x01a5294a,
+    0x01a27627, 0x019fcbd2, 0x019d2a20, 0x019a90e7, 0x01980000, 0x01957741,
+    0x0192f684, 0x01907da4, 0x018e0c7c, 0x018ba2e8, 0x018940c5, 0x0186e5f0,
+    0x01849249, 0x018245ae, 0x01800000, 0x017dc11f, 0x017b88ee, 0x0179574e,
+    0x01772c23, 0x01750750, 0x0172e8ba, 0x0170d045, 0x016ebdd7, 0x016cb157,
+    0x016aaaaa, 0x0168a9b9, 0x0166ae6a, 0x0164b8a7, 0x0162c859, 0x0160dd67,
+    0x015ef7bd, 0x015d1745, 0x015b3bea, 0x01596596, 0x01579435, 0x0155c7b4,
+    0x01540000, 0x01523d03, 0x01507eae, 0x014ec4ec, 0x014d0fac, 0x014b5edc,
+    0x0149b26c, 0x01480a4a, 0x01466666, 0x0144c6af, 0x01432b16, 0x0141938b,
+    0x01400000, 0x013e7063, 0x013ce4a9, 0x013b5cc0, 0x0139d89d, 0x01385830,
+    0x0136db6d, 0x01356246, 0x0133ecad, 0x01327a97, 0x01310bf6, 0x012fa0be,
+    0x012e38e3, 0x012cd459, 0x012b7315, 0x012a150a, 0x0128ba2e, 0x01276276,
+    0x01260dd6, 0x0124bc44, 0x01236db6, 0x01222222, 0x0120d97c, 0x011f93bc,
+    0x011e50d7, 0x011d10c4, 0x011bd37a, 0x011a98ef, 0x0119611a, 0x01182bf2,
+    0x0116f96f, 0x0115c988, 0x01149c34, 0x0113716a, 0x01124924, 0x01112358,
+    0x01100000, 0x010edf12, 0x010dc087, 0x010ca458, 0x010b8a7d, 0x010a72f0,
+    0x01095da8, 0x01084a9f, 0x010739ce, 0x01062b2e, 0x01051eb8, 0x01041465,
+    0x01030c30, 0x01020612, 0x01010204, 0x01000000 },
+  {   // alpha * KINV_255
+    0x00000000, 0x00010101, 0x00020202, 0x00030303, 0x00040404, 0x00050505,
+    0x00060606, 0x00070707, 0x00080808, 0x00090909, 0x000a0a0a, 0x000b0b0b,
+    0x000c0c0c, 0x000d0d0d, 0x000e0e0e, 0x000f0f0f, 0x00101010, 0x00111111,
+    0x00121212, 0x00131313, 0x00141414, 0x00151515, 0x00161616, 0x00171717,
+    0x00181818, 0x00191919, 0x001a1a1a, 0x001b1b1b, 0x001c1c1c, 0x001d1d1d,
+    0x001e1e1e, 0x001f1f1f, 0x00202020, 0x00212121, 0x00222222, 0x00232323,
+    0x00242424, 0x00252525, 0x00262626, 0x00272727, 0x00282828, 0x00292929,
+    0x002a2a2a, 0x002b2b2b, 0x002c2c2c, 0x002d2d2d, 0x002e2e2e, 0x002f2f2f,
+    0x00303030, 0x00313131, 0x00323232, 0x00333333, 0x00343434, 0x00353535,
+    0x00363636, 0x00373737, 0x00383838, 0x00393939, 0x003a3a3a, 0x003b3b3b,
+    0x003c3c3c, 0x003d3d3d, 0x003e3e3e, 0x003f3f3f, 0x00404040, 0x00414141,
+    0x00424242, 0x00434343, 0x00444444, 0x00454545, 0x00464646, 0x00474747,
+    0x00484848, 0x00494949, 0x004a4a4a, 0x004b4b4b, 0x004c4c4c, 0x004d4d4d,
+    0x004e4e4e, 0x004f4f4f, 0x00505050, 0x00515151, 0x00525252, 0x00535353,
+    0x00545454, 0x00555555, 0x00565656, 0x00575757, 0x00585858, 0x00595959,
+    0x005a5a5a, 0x005b5b5b, 0x005c5c5c, 0x005d5d5d, 0x005e5e5e, 0x005f5f5f,
+    0x00606060, 0x00616161, 0x00626262, 0x00636363, 0x00646464, 0x00656565,
+    0x00666666, 0x00676767, 0x00686868, 0x00696969, 0x006a6a6a, 0x006b6b6b,
+    0x006c6c6c, 0x006d6d6d, 0x006e6e6e, 0x006f6f6f, 0x00707070, 0x00717171,
+    0x00727272, 0x00737373, 0x00747474, 0x00757575, 0x00767676, 0x00777777,
+    0x00787878, 0x00797979, 0x007a7a7a, 0x007b7b7b, 0x007c7c7c, 0x007d7d7d,
+    0x007e7e7e, 0x007f7f7f, 0x00808080, 0x00818181, 0x00828282, 0x00838383,
+    0x00848484, 0x00858585, 0x00868686, 0x00878787, 0x00888888, 0x00898989,
+    0x008a8a8a, 0x008b8b8b, 0x008c8c8c, 0x008d8d8d, 0x008e8e8e, 0x008f8f8f,
+    0x00909090, 0x00919191, 0x00929292, 0x00939393, 0x00949494, 0x00959595,
+    0x00969696, 0x00979797, 0x00989898, 0x00999999, 0x009a9a9a, 0x009b9b9b,
+    0x009c9c9c, 0x009d9d9d, 0x009e9e9e, 0x009f9f9f, 0x00a0a0a0, 0x00a1a1a1,
+    0x00a2a2a2, 0x00a3a3a3, 0x00a4a4a4, 0x00a5a5a5, 0x00a6a6a6, 0x00a7a7a7,
+    0x00a8a8a8, 0x00a9a9a9, 0x00aaaaaa, 0x00ababab, 0x00acacac, 0x00adadad,
+    0x00aeaeae, 0x00afafaf, 0x00b0b0b0, 0x00b1b1b1, 0x00b2b2b2, 0x00b3b3b3,
+    0x00b4b4b4, 0x00b5b5b5, 0x00b6b6b6, 0x00b7b7b7, 0x00b8b8b8, 0x00b9b9b9,
+    0x00bababa, 0x00bbbbbb, 0x00bcbcbc, 0x00bdbdbd, 0x00bebebe, 0x00bfbfbf,
+    0x00c0c0c0, 0x00c1c1c1, 0x00c2c2c2, 0x00c3c3c3, 0x00c4c4c4, 0x00c5c5c5,
+    0x00c6c6c6, 0x00c7c7c7, 0x00c8c8c8, 0x00c9c9c9, 0x00cacaca, 0x00cbcbcb,
+    0x00cccccc, 0x00cdcdcd, 0x00cecece, 0x00cfcfcf, 0x00d0d0d0, 0x00d1d1d1,
+    0x00d2d2d2, 0x00d3d3d3, 0x00d4d4d4, 0x00d5d5d5, 0x00d6d6d6, 0x00d7d7d7,
+    0x00d8d8d8, 0x00d9d9d9, 0x00dadada, 0x00dbdbdb, 0x00dcdcdc, 0x00dddddd,
+    0x00dedede, 0x00dfdfdf, 0x00e0e0e0, 0x00e1e1e1, 0x00e2e2e2, 0x00e3e3e3,
+    0x00e4e4e4, 0x00e5e5e5, 0x00e6e6e6, 0x00e7e7e7, 0x00e8e8e8, 0x00e9e9e9,
+    0x00eaeaea, 0x00ebebeb, 0x00ececec, 0x00ededed, 0x00eeeeee, 0x00efefef,
+    0x00f0f0f0, 0x00f1f1f1, 0x00f2f2f2, 0x00f3f3f3, 0x00f4f4f4, 0x00f5f5f5,
+    0x00f6f6f6, 0x00f7f7f7, 0x00f8f8f8, 0x00f9f9f9, 0x00fafafa, 0x00fbfbfb,
+    0x00fcfcfc, 0x00fdfdfd, 0x00fefefe, 0x00ffffff }
+};
+
+static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
+  return kMultTables[!inverse][a];
+}
+
+#else
+
+static WEBP_INLINE uint32_t GetScale(uint32_t a, int inverse) {
+  return inverse ? (255u << MFIX) / a : a * KINV_255;
+}
+
+#endif  // USE_TABLES_FOR_ALPHA_MULT
+
+void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    const uint32_t argb = ptr[x];
+    if (argb < 0xff000000u) {      // alpha < 255
+      if (argb <= 0x00ffffffu) {   // alpha == 0
+        ptr[x] = 0;
+      } else {
+        const uint32_t alpha = (argb >> 24) & 0xff;
+        const uint32_t scale = GetScale(alpha, inverse);
+        uint32_t out = argb & 0xff000000u;
+        out |= Mult(argb >>  0, scale) <<  0;
+        out |= Mult(argb >>  8, scale) <<  8;
+        out |= Mult(argb >> 16, scale) << 16;
+        ptr[x] = out;
+      }
+    }
+  }
+}
+
+void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
+                   int width, int inverse) {
+  int x;
+  for (x = 0; x < width; ++x) {
+    const uint32_t a = alpha[x];
+    if (a != 255) {
+      if (a == 0) {
+        ptr[x] = 0;
+      } else {
+        const uint32_t scale = GetScale(a, inverse);
+        ptr[x] = Mult(ptr[x], scale);
+      }
+    }
+  }
+}
+
+#undef KINV_255
+#undef HALF
+#undef MFIX
+
+void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
+void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
+                    int width, int inverse);
+
+//------------------------------------------------------------------------------
+// Generic per-plane calls
+
+void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
+                      int inverse) {
+  int n;
+  for (n = 0; n < num_rows; ++n) {
+    WebPMultARGBRow((uint32_t*)ptr, width, inverse);
+    ptr += stride;
+  }
+}
+
+void WebPMultRows(uint8_t* ptr, int stride,
+                  const uint8_t* alpha, int alpha_stride,
+                  int width, int num_rows, int inverse) {
+  int n;
+  for (n = 0; n < num_rows; ++n) {
+    WebPMultRow(ptr, alpha, width, inverse);
+    ptr += stride;
+    alpha += alpha_stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Premultiplied modes
+
+// non dithered-modes
+
+// (x * a * 32897) >> 23 is bit-wise equivalent to (int)(x * a / 255.)
+// for all 8bit x or a. For bit-wise equivalence to (int)(x * a / 255. + .5),
+// one can use instead: (x * a * 65793 + (1 << 23)) >> 24
+#if 1     // (int)(x * a / 255.)
+#define MULTIPLIER(a)   ((a) * 32897U)
+#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
+#else     // (int)(x * a / 255. + .5)
+#define MULTIPLIER(a) ((a) * 65793U)
+#define PREMULTIPLY(x, m) (((x) * (m) + (1U << 23)) >> 24)
+#endif
+
+#if !WEBP_NEON_OMIT_C_CODE
+static void ApplyAlphaMultiply_C(uint8_t* rgba, int alpha_first,
+                                 int w, int h, int stride) {
+  while (h-- > 0) {
+    uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
+    const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
+    int i;
+    for (i = 0; i < w; ++i) {
+      const uint32_t a = alpha[4 * i];
+      if (a != 0xff) {
+        const uint32_t mult = MULTIPLIER(a);
+        rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
+        rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
+        rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
+      }
+    }
+    rgba += stride;
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+#undef MULTIPLIER
+#undef PREMULTIPLY
+
+// rgbA4444
+
+#define MULTIPLIER(a)  ((a) * 0x1111)    // 0x1111 ~= (1 << 16) / 15
+
+static WEBP_INLINE uint8_t dither_hi(uint8_t x) {
+  return (x & 0xf0) | (x >> 4);
+}
+
+static WEBP_INLINE uint8_t dither_lo(uint8_t x) {
+  return (x & 0x0f) | (x << 4);
+}
+
+static WEBP_INLINE uint8_t multiply(uint8_t x, uint32_t m) {
+  return (x * m) >> 16;
+}
+
+static WEBP_INLINE void ApplyAlphaMultiply4444_C(uint8_t* rgba4444,
+                                                 int w, int h, int stride,
+                                                 int rg_byte_pos /* 0 or 1 */) {
+  while (h-- > 0) {
+    int i;
+    for (i = 0; i < w; ++i) {
+      const uint32_t rg = rgba4444[2 * i + rg_byte_pos];
+      const uint32_t ba = rgba4444[2 * i + (rg_byte_pos ^ 1)];
+      const uint8_t a = ba & 0x0f;
+      const uint32_t mult = MULTIPLIER(a);
+      const uint8_t r = multiply(dither_hi(rg), mult);
+      const uint8_t g = multiply(dither_lo(rg), mult);
+      const uint8_t b = multiply(dither_hi(ba), mult);
+      rgba4444[2 * i + rg_byte_pos] = (r & 0xf0) | ((g >> 4) & 0x0f);
+      rgba4444[2 * i + (rg_byte_pos ^ 1)] = (b & 0xf0) | a;
+    }
+    rgba4444 += stride;
+  }
+}
+#undef MULTIPLIER
+
+static void ApplyAlphaMultiply_16b_C(uint8_t* rgba4444,
+                                     int w, int h, int stride) {
+#if (WEBP_SWAP_16BIT_CSP == 1)
+  ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 1);
+#else
+  ApplyAlphaMultiply4444_C(rgba4444, w, h, stride, 0);
+#endif
+}
+
+#if !WEBP_NEON_OMIT_C_CODE
+static int DispatchAlpha_C(const uint8_t* alpha, int alpha_stride,
+                           int width, int height,
+                           uint8_t* dst, int dst_stride) {
+  uint32_t alpha_mask = 0xff;
+  int i, j;
+
+  for (j = 0; j < height; ++j) {
+    for (i = 0; i < width; ++i) {
+      const uint32_t alpha_value = alpha[i];
+      dst[4 * i] = alpha_value;
+      alpha_mask &= alpha_value;
+    }
+    alpha += alpha_stride;
+    dst += dst_stride;
+  }
+
+  return (alpha_mask != 0xff);
+}
+
+static void DispatchAlphaToGreen_C(const uint8_t* alpha, int alpha_stride,
+                                   int width, int height,
+                                   uint32_t* dst, int dst_stride) {
+  int i, j;
+  for (j = 0; j < height; ++j) {
+    for (i = 0; i < width; ++i) {
+      dst[i] = alpha[i] << 8;  // leave A/R/B channels zero'd.
+    }
+    alpha += alpha_stride;
+    dst += dst_stride;
+  }
+}
+
+static int ExtractAlpha_C(const uint8_t* argb, int argb_stride,
+                          int width, int height,
+                          uint8_t* alpha, int alpha_stride) {
+  uint8_t alpha_mask = 0xff;
+  int i, j;
+
+  for (j = 0; j < height; ++j) {
+    for (i = 0; i < width; ++i) {
+      const uint8_t alpha_value = argb[4 * i];
+      alpha[i] = alpha_value;
+      alpha_mask &= alpha_value;
+    }
+    argb += argb_stride;
+    alpha += alpha_stride;
+  }
+  return (alpha_mask == 0xff);
+}
+
+static void ExtractGreen_C(const uint32_t* argb, uint8_t* alpha, int size) {
+  int i;
+  for (i = 0; i < size; ++i) alpha[i] = argb[i] >> 8;
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+//------------------------------------------------------------------------------
+
+static int HasAlpha8b_C(const uint8_t* src, int length) {
+  while (length-- > 0) if (*src++ != 0xff) return 1;
+  return 0;
+}
+
+static int HasAlpha32b_C(const uint8_t* src, int length) {
+  int x;
+  for (x = 0; length-- > 0; x += 4) if (src[x] != 0xff) return 1;
+  return 0;
+}
+
+//------------------------------------------------------------------------------
+// Simple channel manipulations.
+
+static WEBP_INLINE uint32_t MakeARGB32(int a, int r, int g, int b) {
+  return (((uint32_t)a << 24) | (r << 16) | (g << 8) | b);
+}
+
+#ifdef WORDS_BIGENDIAN
+static void PackARGB_C(const uint8_t* a, const uint8_t* r, const uint8_t* g,
+                       const uint8_t* b, int len, uint32_t* out) {
+  int i;
+  for (i = 0; i < len; ++i) {
+    out[i] = MakeARGB32(a[4 * i], r[4 * i], g[4 * i], b[4 * i]);
+  }
+}
+#endif
+
+static void PackRGB_C(const uint8_t* r, const uint8_t* g, const uint8_t* b,
+                      int len, int step, uint32_t* out) {
+  int i, offset = 0;
+  for (i = 0; i < len; ++i) {
+    out[i] = MakeARGB32(0xff, r[offset], g[offset], b[offset]);
+    offset += step;
+  }
+}
+
+void (*WebPApplyAlphaMultiply)(uint8_t*, int, int, int, int);
+void (*WebPApplyAlphaMultiply4444)(uint8_t*, int, int, int);
+int (*WebPDispatchAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
+void (*WebPDispatchAlphaToGreen)(const uint8_t*, int, int, int, uint32_t*, int);
+int (*WebPExtractAlpha)(const uint8_t*, int, int, int, uint8_t*, int);
+void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
+#ifdef WORDS_BIGENDIAN
+void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r, const uint8_t* g,
+                     const uint8_t* b, int, uint32_t*);
+#endif
+void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
+                    int len, int step, uint32_t* out);
+
+int (*WebPHasAlpha8b)(const uint8_t* src, int length);
+int (*WebPHasAlpha32b)(const uint8_t* src, int length);
+
+//------------------------------------------------------------------------------
+// Init function
+
+extern void WebPInitAlphaProcessingMIPSdspR2(void);
+extern void WebPInitAlphaProcessingSSE2(void);
+extern void WebPInitAlphaProcessingSSE41(void);
+extern void WebPInitAlphaProcessingNEON(void);
+
+WEBP_DSP_INIT_FUNC(WebPInitAlphaProcessing) {
+  WebPMultARGBRow = WebPMultARGBRow_C;
+  WebPMultRow = WebPMultRow_C;
+  WebPApplyAlphaMultiply4444 = ApplyAlphaMultiply_16b_C;
+
+#ifdef WORDS_BIGENDIAN
+  WebPPackARGB = PackARGB_C;
+#endif
+  WebPPackRGB = PackRGB_C;
+#if !WEBP_NEON_OMIT_C_CODE
+  WebPApplyAlphaMultiply = ApplyAlphaMultiply_C;
+  WebPDispatchAlpha = DispatchAlpha_C;
+  WebPDispatchAlphaToGreen = DispatchAlphaToGreen_C;
+  WebPExtractAlpha = ExtractAlpha_C;
+  WebPExtractGreen = ExtractGreen_C;
+#endif
+
+  WebPHasAlpha8b = HasAlpha8b_C;
+  WebPHasAlpha32b = HasAlpha32b_C;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      WebPInitAlphaProcessingSSE2();
+#if defined(WEBP_USE_SSE41)
+      if (VP8GetCPUInfo(kSSE4_1)) {
+        WebPInitAlphaProcessingSSE41();
+      }
+#endif
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      WebPInitAlphaProcessingMIPSdspR2();
+    }
+#endif
+  }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    WebPInitAlphaProcessingNEON();
+  }
+#endif
+
+  assert(WebPMultARGBRow != NULL);
+  assert(WebPMultRow != NULL);
+  assert(WebPApplyAlphaMultiply != NULL);
+  assert(WebPApplyAlphaMultiply4444 != NULL);
+  assert(WebPDispatchAlpha != NULL);
+  assert(WebPDispatchAlphaToGreen != NULL);
+  assert(WebPExtractAlpha != NULL);
+  assert(WebPExtractGreen != NULL);
+#ifdef WORDS_BIGENDIAN
+  assert(WebPPackARGB != NULL);
+#endif
+  assert(WebPPackRGB != NULL);
+  assert(WebPHasAlpha8b != NULL);
+  assert(WebPHasAlpha32b != NULL);
+}
--- a/media/libwebp/src/dsp/alpha_processing_mips_dsp_r2.c
+++ b/media/libwebp/src/dsp/alpha_processing_mips_dsp_r2.c
@ -0,0 +1,228 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel.
+//
+// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
+//            Djordje Pesut  (djordje.pesut@imgtec.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
+                                   int width, int height,
+                                   uint8_t* dst, int dst_stride) {
+  uint32_t alpha_mask = 0xffffffff;
+  int i, j, temp0;
+
+  for (j = 0; j < height; ++j) {
+    uint8_t* pdst = dst;
+    const uint8_t* palpha = alpha;
+    for (i = 0; i < (width >> 2); ++i) {
+      int temp1, temp2, temp3;
+
+      __asm__ volatile (
+        "ulw    %[temp0],      0(%[palpha])                \n\t"
+        "addiu  %[palpha],     %[palpha],     4            \n\t"
+        "addiu  %[pdst],       %[pdst],       16           \n\t"
+        "srl    %[temp1],      %[temp0],      8            \n\t"
+        "srl    %[temp2],      %[temp0],      16           \n\t"
+        "srl    %[temp3],      %[temp0],      24           \n\t"
+        "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
+        "sb     %[temp0],      -16(%[pdst])                \n\t"
+        "sb     %[temp1],      -12(%[pdst])                \n\t"
+        "sb     %[temp2],      -8(%[pdst])                 \n\t"
+        "sb     %[temp3],      -4(%[pdst])                 \n\t"
+        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+          [temp3]"=&r"(temp3), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
+          [alpha_mask]"+r"(alpha_mask)
+        :
+        : "memory"
+      );
+    }
+
+    for (i = 0; i < (width & 3); ++i) {
+      __asm__ volatile (
+        "lbu    %[temp0],      0(%[palpha])                \n\t"
+        "addiu  %[palpha],     %[palpha],     1            \n\t"
+        "sb     %[temp0],      0(%[pdst])                  \n\t"
+        "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
+        "addiu  %[pdst],       %[pdst],       4            \n\t"
+        : [temp0]"=&r"(temp0), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
+          [alpha_mask]"+r"(alpha_mask)
+        :
+        : "memory"
+      );
+    }
+    alpha += alpha_stride;
+    dst += dst_stride;
+  }
+
+  __asm__ volatile (
+    "ext    %[temp0],      %[alpha_mask], 0, 16            \n\t"
+    "srl    %[alpha_mask], %[alpha_mask], 16               \n\t"
+    "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
+    "ext    %[temp0],      %[alpha_mask], 0, 8             \n\t"
+    "srl    %[alpha_mask], %[alpha_mask], 8                \n\t"
+    "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
+    : [temp0]"=&r"(temp0), [alpha_mask]"+r"(alpha_mask)
+    :
+  );
+
+  return (alpha_mask != 0xff);
+}
+
+static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
+                                  int inverse) {
+  int x;
+  const uint32_t c_00ffffff = 0x00ffffffu;
+  const uint32_t c_ff000000 = 0xff000000u;
+  const uint32_t c_8000000  = 0x00800000u;
+  const uint32_t c_8000080  = 0x00800080u;
+  for (x = 0; x < width; ++x) {
+    const uint32_t argb = ptr[x];
+    if (argb < 0xff000000u) {      // alpha < 255
+      if (argb <= 0x00ffffffu) {   // alpha == 0
+        ptr[x] = 0;
+      } else {
+        int temp0, temp1, temp2, temp3, alpha;
+        __asm__ volatile (
+          "srl          %[alpha],   %[argb],       24                \n\t"
+          "replv.qb     %[temp0],   %[alpha]                         \n\t"
+          "and          %[temp0],   %[temp0],      %[c_00ffffff]     \n\t"
+          "beqz         %[inverse], 0f                               \n\t"
+          "divu         $zero,      %[c_ff000000], %[alpha]          \n\t"
+          "mflo         %[temp0]                                     \n\t"
+        "0:                                                          \n\t"
+          "andi         %[temp1],   %[argb],       0xff              \n\t"
+          "ext          %[temp2],   %[argb],       8,             8  \n\t"
+          "ext          %[temp3],   %[argb],       16,            8  \n\t"
+          "mul          %[temp1],   %[temp1],      %[temp0]          \n\t"
+          "mul          %[temp2],   %[temp2],      %[temp0]          \n\t"
+          "mul          %[temp3],   %[temp3],      %[temp0]          \n\t"
+          "precrq.ph.w  %[temp1],   %[temp2],      %[temp1]          \n\t"
+          "addu         %[temp3],   %[temp3],      %[c_8000000]      \n\t"
+          "addu         %[temp1],   %[temp1],      %[c_8000080]      \n\t"
+          "precrq.ph.w  %[temp3],   %[argb],       %[temp3]          \n\t"
+          "precrq.qb.ph %[temp1],   %[temp3],      %[temp1]          \n\t"
+          : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+            [temp3]"=&r"(temp3), [alpha]"=&r"(alpha)
+          : [inverse]"r"(inverse), [c_00ffffff]"r"(c_00ffffff),
+            [c_8000000]"r"(c_8000000), [c_8000080]"r"(c_8000080),
+            [c_ff000000]"r"(c_ff000000), [argb]"r"(argb)
+          : "memory", "hi", "lo"
+        );
+        ptr[x] = temp1;
+      }
+    }
+  }
+}
+
+#ifdef WORDS_BIGENDIAN
+static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r,
+                               const uint8_t* g, const uint8_t* b, int len,
+                               uint32_t* out) {
+  int temp0, temp1, temp2, temp3, offset;
+  const int rest = len & 1;
+  const uint32_t* const loop_end = out + len - rest;
+  const int step = 4;
+  __asm__ volatile (
+    "xor          %[offset],   %[offset], %[offset]    \n\t"
+    "beq          %[loop_end], %[out],    0f           \n\t"
+  "2:                                                  \n\t"
+    "lbux         %[temp0],    %[offset](%[a])         \n\t"
+    "lbux         %[temp1],    %[offset](%[r])         \n\t"
+    "lbux         %[temp2],    %[offset](%[g])         \n\t"
+    "lbux         %[temp3],    %[offset](%[b])         \n\t"
+    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
+    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
+    "addiu        %[out],      %[out],    4            \n\t"
+    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
+    "sw           %[temp0],    -4(%[out])              \n\t"
+    "addu         %[offset],   %[offset], %[step]      \n\t"
+    "bne          %[loop_end], %[out],    2b           \n\t"
+  "0:                                                  \n\t"
+    "beq          %[rest],     $zero,     1f           \n\t"
+    "lbux         %[temp0],    %[offset](%[a])         \n\t"
+    "lbux         %[temp1],    %[offset](%[r])         \n\t"
+    "lbux         %[temp2],    %[offset](%[g])         \n\t"
+    "lbux         %[temp3],    %[offset](%[b])         \n\t"
+    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
+    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
+    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
+    "sw           %[temp0],    0(%[out])               \n\t"
+  "1:                                                  \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
+    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
+      [loop_end]"r"(loop_end), [rest]"r"(rest)
+    : "memory"
+  );
+}
+#endif  // WORDS_BIGENDIAN
+
+static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
+                              const uint8_t* b, int len, int step,
+                              uint32_t* out) {
+  int temp0, temp1, temp2, offset;
+  const int rest = len & 1;
+  const int a = 0xff;
+  const uint32_t* const loop_end = out + len - rest;
+  __asm__ volatile (
+    "xor          %[offset],   %[offset], %[offset]    \n\t"
+    "beq          %[loop_end], %[out],    0f           \n\t"
+  "2:                                                  \n\t"
+    "lbux         %[temp0],    %[offset](%[r])         \n\t"
+    "lbux         %[temp1],    %[offset](%[g])         \n\t"
+    "lbux         %[temp2],    %[offset](%[b])         \n\t"
+    "ins          %[temp0],    %[a],      16,     16   \n\t"
+    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
+    "addiu        %[out],      %[out],    4            \n\t"
+    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
+    "sw           %[temp0],    -4(%[out])              \n\t"
+    "addu         %[offset],   %[offset], %[step]      \n\t"
+    "bne          %[loop_end], %[out],    2b           \n\t"
+  "0:                                                  \n\t"
+    "beq          %[rest],     $zero,     1f           \n\t"
+    "lbux         %[temp0],    %[offset](%[r])         \n\t"
+    "lbux         %[temp1],    %[offset](%[g])         \n\t"
+    "lbux         %[temp2],    %[offset](%[b])         \n\t"
+    "ins          %[temp0],    %[a],      16,     16   \n\t"
+    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
+    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
+    "sw           %[temp0],    0(%[out])               \n\t"
+  "1:                                                  \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [offset]"=&r"(offset), [out]"+&r"(out)
+    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
+      [loop_end]"r"(loop_end), [rest]"r"(rest)
+    : "memory"
+  );
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitAlphaProcessingMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
+  WebPDispatchAlpha = DispatchAlpha_MIPSdspR2;
+  WebPMultARGBRow = MultARGBRow_MIPSdspR2;
+#ifdef WORDS_BIGENDIAN
+  WebPPackARGB = PackARGB_MIPSdspR2;
+#endif
+  WebPPackRGB = PackRGB_MIPSdspR2;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
--- a/media/libwebp/src/dsp/alpha_processing_neon.c
+++ b/media/libwebp/src/dsp/alpha_processing_neon.c
@ -0,0 +1,191 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel, NEON version.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include "src/dsp/neon.h"
+
+//------------------------------------------------------------------------------
+
+#define MULTIPLIER(a) ((a) * 0x8081)
+#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
+
+#define MULTIPLY_BY_ALPHA(V, ALPHA, OTHER) do {                        \
+  const uint8x8_t alpha = (V).val[(ALPHA)];                            \
+  const uint16x8_t r1 = vmull_u8((V).val[1], alpha);                   \
+  const uint16x8_t g1 = vmull_u8((V).val[2], alpha);                   \
+  const uint16x8_t b1 = vmull_u8((V).val[(OTHER)], alpha);             \
+  /* we use: v / 255 = (v + 1 + (v >> 8)) >> 8 */                      \
+  const uint16x8_t r2 = vsraq_n_u16(r1, r1, 8);                        \
+  const uint16x8_t g2 = vsraq_n_u16(g1, g1, 8);                        \
+  const uint16x8_t b2 = vsraq_n_u16(b1, b1, 8);                        \
+  const uint16x8_t r3 = vaddq_u16(r2, kOne);                           \
+  const uint16x8_t g3 = vaddq_u16(g2, kOne);                           \
+  const uint16x8_t b3 = vaddq_u16(b2, kOne);                           \
+  (V).val[1] = vshrn_n_u16(r3, 8);                                     \
+  (V).val[2] = vshrn_n_u16(g3, 8);                                     \
+  (V).val[(OTHER)] = vshrn_n_u16(b3, 8);                               \
+} while (0)
+
+static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
+                                    int w, int h, int stride) {
+  const uint16x8_t kOne = vdupq_n_u16(1u);
+  while (h-- > 0) {
+    uint32_t* const rgbx = (uint32_t*)rgba;
+    int i = 0;
+    if (alpha_first) {
+      for (; i + 8 <= w; i += 8) {
+        // load aaaa...|rrrr...|gggg...|bbbb...
+        uint8x8x4_t RGBX = vld4_u8((const uint8_t*)(rgbx + i));
+        MULTIPLY_BY_ALPHA(RGBX, 0, 3);
+        vst4_u8((uint8_t*)(rgbx + i), RGBX);
+      }
+    } else {
+      for (; i + 8 <= w; i += 8) {
+        uint8x8x4_t RGBX = vld4_u8((const uint8_t*)(rgbx + i));
+        MULTIPLY_BY_ALPHA(RGBX, 3, 0);
+        vst4_u8((uint8_t*)(rgbx + i), RGBX);
+      }
+    }
+    // Finish with left-overs.
+    for (; i < w; ++i) {
+      uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
+      const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
+      const uint32_t a = alpha[4 * i];
+      if (a != 0xff) {
+        const uint32_t mult = MULTIPLIER(a);
+        rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
+        rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
+        rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
+      }
+    }
+    rgba += stride;
+  }
+}
+#undef MULTIPLY_BY_ALPHA
+#undef MULTIPLIER
+#undef PREMULTIPLY
+
+//------------------------------------------------------------------------------
+
+static int DispatchAlpha_NEON(const uint8_t* alpha, int alpha_stride,
+                              int width, int height,
+                              uint8_t* dst, int dst_stride) {
+  uint32_t alpha_mask = 0xffffffffu;
+  uint8x8_t mask8 = vdup_n_u8(0xff);
+  uint32_t tmp[2];
+  int i, j;
+  for (j = 0; j < height; ++j) {
+    // We don't know if alpha is first or last in dst[] (depending on rgbA/Argb
+    // mode). So we must be sure dst[4*i + 8 - 1] is writable for the store.
+    // Hence the test with 'width - 1' instead of just 'width'.
+    for (i = 0; i + 8 <= width - 1; i += 8) {
+      uint8x8x4_t rgbX = vld4_u8((const uint8_t*)(dst + 4 * i));
+      const uint8x8_t alphas = vld1_u8(alpha + i);
+      rgbX.val[0] = alphas;
+      vst4_u8((uint8_t*)(dst + 4 * i), rgbX);
+      mask8 = vand_u8(mask8, alphas);
+    }
+    for (; i < width; ++i) {
+      const uint32_t alpha_value = alpha[i];
+      dst[4 * i] = alpha_value;
+      alpha_mask &= alpha_value;
+    }
+    alpha += alpha_stride;
+    dst += dst_stride;
+  }
+  vst1_u8((uint8_t*)tmp, mask8);
+  alpha_mask &= tmp[0];
+  alpha_mask &= tmp[1];
+  return (alpha_mask != 0xffffffffu);
+}
+
+static void DispatchAlphaToGreen_NEON(const uint8_t* alpha, int alpha_stride,
+                                      int width, int height,
+                                      uint32_t* dst, int dst_stride) {
+  int i, j;
+  uint8x8x4_t greens;   // leave A/R/B channels zero'd.
+  greens.val[0] = vdup_n_u8(0);
+  greens.val[2] = vdup_n_u8(0);
+  greens.val[3] = vdup_n_u8(0);
+  for (j = 0; j < height; ++j) {
+    for (i = 0; i + 8 <= width; i += 8) {
+      greens.val[1] = vld1_u8(alpha + i);
+      vst4_u8((uint8_t*)(dst + i), greens);
+    }
+    for (; i < width; ++i) dst[i] = alpha[i] << 8;
+    alpha += alpha_stride;
+    dst += dst_stride;
+  }
+}
+
+static int ExtractAlpha_NEON(const uint8_t* argb, int argb_stride,
+                             int width, int height,
+                             uint8_t* alpha, int alpha_stride) {
+  uint32_t alpha_mask = 0xffffffffu;
+  uint8x8_t mask8 = vdup_n_u8(0xff);
+  uint32_t tmp[2];
+  int i, j;
+  for (j = 0; j < height; ++j) {
+    // We don't know if alpha is first or last in dst[] (depending on rgbA/Argb
+    // mode). So we must be sure dst[4*i + 8 - 1] is writable for the store.
+    // Hence the test with 'width - 1' instead of just 'width'.
+    for (i = 0; i + 8 <= width - 1; i += 8) {
+      const uint8x8x4_t rgbX = vld4_u8((const uint8_t*)(argb + 4 * i));
+      const uint8x8_t alphas = rgbX.val[0];
+      vst1_u8((uint8_t*)(alpha + i), alphas);
+      mask8 = vand_u8(mask8, alphas);
+    }
+    for (; i < width; ++i) {
+      alpha[i] = argb[4 * i];
+      alpha_mask &= alpha[i];
+    }
+    argb += argb_stride;
+    alpha += alpha_stride;
+  }
+  vst1_u8((uint8_t*)tmp, mask8);
+  alpha_mask &= tmp[0];
+  alpha_mask &= tmp[1];
+  return (alpha_mask == 0xffffffffu);
+}
+
+static void ExtractGreen_NEON(const uint32_t* argb,
+                              uint8_t* alpha, int size) {
+  int i;
+  for (i = 0; i + 16 <= size; i += 16) {
+    const uint8x16x4_t rgbX = vld4q_u8((const uint8_t*)(argb + i));
+    const uint8x16_t greens = rgbX.val[1];
+    vst1q_u8(alpha + i, greens);
+  }
+  for (; i < size; ++i) alpha[i] = (argb[i] >> 8) & 0xff;
+}
+
+//------------------------------------------------------------------------------
+
+extern void WebPInitAlphaProcessingNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingNEON(void) {
+  WebPApplyAlphaMultiply = ApplyAlphaMultiply_NEON;
+  WebPDispatchAlpha = DispatchAlpha_NEON;
+  WebPDispatchAlphaToGreen = DispatchAlphaToGreen_NEON;
+  WebPExtractAlpha = ExtractAlpha_NEON;
+  WebPExtractGreen = ExtractGreen_NEON;
+}
+
+#else  // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingNEON)
+
+#endif  // WEBP_USE_NEON
--- a/media/libwebp/src/dsp/alpha_processing_sse2.c
+++ b/media/libwebp/src/dsp/alpha_processing_sse2.c
@ -0,0 +1,343 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+#include <emmintrin.h>
+
+//------------------------------------------------------------------------------
+
+static int DispatchAlpha_SSE2(const uint8_t* alpha, int alpha_stride,
+                              int width, int height,
+                              uint8_t* dst, int dst_stride) {
+  // alpha_and stores an 'and' operation of all the alpha[] values. The final
+  // value is not 0xff if any of the alpha[] is not equal to 0xff.
+  uint32_t alpha_and = 0xff;
+  int i, j;
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i rgb_mask = _mm_set1_epi32(0xffffff00u);  // to preserve RGB
+  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
+  __m128i all_alphas = all_0xff;
+
+  // We must be able to access 3 extra bytes after the last written byte
+  // 'dst[4 * width - 4]', because we don't know if alpha is the first or the
+  // last byte of the quadruplet.
+  const int limit = (width - 1) & ~7;
+
+  for (j = 0; j < height; ++j) {
+    __m128i* out = (__m128i*)dst;
+    for (i = 0; i < limit; i += 8) {
+      // load 8 alpha bytes
+      const __m128i a0 = _mm_loadl_epi64((const __m128i*)&alpha[i]);
+      const __m128i a1 = _mm_unpacklo_epi8(a0, zero);
+      const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero);
+      const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
+      // load 8 dst pixels (32 bytes)
+      const __m128i b0_lo = _mm_loadu_si128(out + 0);
+      const __m128i b0_hi = _mm_loadu_si128(out + 1);
+      // mask dst alpha values
+      const __m128i b1_lo = _mm_and_si128(b0_lo, rgb_mask);
+      const __m128i b1_hi = _mm_and_si128(b0_hi, rgb_mask);
+      // combine
+      const __m128i b2_lo = _mm_or_si128(b1_lo, a2_lo);
+      const __m128i b2_hi = _mm_or_si128(b1_hi, a2_hi);
+      // store
+      _mm_storeu_si128(out + 0, b2_lo);
+      _mm_storeu_si128(out + 1, b2_hi);
+      // accumulate eight alpha 'and' in parallel
+      all_alphas = _mm_and_si128(all_alphas, a0);
+      out += 2;
+    }
+    for (; i < width; ++i) {
+      const uint32_t alpha_value = alpha[i];
+      dst[4 * i] = alpha_value;
+      alpha_and &= alpha_value;
+    }
+    alpha += alpha_stride;
+    dst += dst_stride;
+  }
+  // Combine the eight alpha 'and' into a 8-bit mask.
+  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
+  return (alpha_and != 0xff);
+}
+
+static void DispatchAlphaToGreen_SSE2(const uint8_t* alpha, int alpha_stride,
+                                      int width, int height,
+                                      uint32_t* dst, int dst_stride) {
+  int i, j;
+  const __m128i zero = _mm_setzero_si128();
+  const int limit = width & ~15;
+  for (j = 0; j < height; ++j) {
+    for (i = 0; i < limit; i += 16) {   // process 16 alpha bytes
+      const __m128i a0 = _mm_loadu_si128((const __m128i*)&alpha[i]);
+      const __m128i a1 = _mm_unpacklo_epi8(zero, a0);  // note the 'zero' first!
+      const __m128i b1 = _mm_unpackhi_epi8(zero, a0);
+      const __m128i a2_lo = _mm_unpacklo_epi16(a1, zero);
+      const __m128i b2_lo = _mm_unpacklo_epi16(b1, zero);
+      const __m128i a2_hi = _mm_unpackhi_epi16(a1, zero);
+      const __m128i b2_hi = _mm_unpackhi_epi16(b1, zero);
+      _mm_storeu_si128((__m128i*)&dst[i +  0], a2_lo);
+      _mm_storeu_si128((__m128i*)&dst[i +  4], a2_hi);
+      _mm_storeu_si128((__m128i*)&dst[i +  8], b2_lo);
+      _mm_storeu_si128((__m128i*)&dst[i + 12], b2_hi);
+    }
+    for (; i < width; ++i) dst[i] = alpha[i] << 8;
+    alpha += alpha_stride;
+    dst += dst_stride;
+  }
+}
+
+static int ExtractAlpha_SSE2(const uint8_t* argb, int argb_stride,
+                             int width, int height,
+                             uint8_t* alpha, int alpha_stride) {
+  // alpha_and stores an 'and' operation of all the alpha[] values. The final
+  // value is not 0xff if any of the alpha[] is not equal to 0xff.
+  uint32_t alpha_and = 0xff;
+  int i, j;
+  const __m128i a_mask = _mm_set1_epi32(0xffu);  // to preserve alpha
+  const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u);
+  __m128i all_alphas = all_0xff;
+
+  // We must be able to access 3 extra bytes after the last written byte
+  // 'src[4 * width - 4]', because we don't know if alpha is the first or the
+  // last byte of the quadruplet.
+  const int limit = (width - 1) & ~7;
+
+  for (j = 0; j < height; ++j) {
+    const __m128i* src = (const __m128i*)argb;
+    for (i = 0; i < limit; i += 8) {
+      // load 32 argb bytes
+      const __m128i a0 = _mm_loadu_si128(src + 0);
+      const __m128i a1 = _mm_loadu_si128(src + 1);
+      const __m128i b0 = _mm_and_si128(a0, a_mask);
+      const __m128i b1 = _mm_and_si128(a1, a_mask);
+      const __m128i c0 = _mm_packs_epi32(b0, b1);
+      const __m128i d0 = _mm_packus_epi16(c0, c0);
+      // store
+      _mm_storel_epi64((__m128i*)&alpha[i], d0);
+      // accumulate eight alpha 'and' in parallel
+      all_alphas = _mm_and_si128(all_alphas, d0);
+      src += 2;
+    }
+    for (; i < width; ++i) {
+      const uint32_t alpha_value = argb[4 * i];
+      alpha[i] = alpha_value;
+      alpha_and &= alpha_value;
+    }
+    argb += argb_stride;
+    alpha += alpha_stride;
+  }
+  // Combine the eight alpha 'and' into a 8-bit mask.
+  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
+  return (alpha_and == 0xff);
+}
+
+//------------------------------------------------------------------------------
+// Non-dither premultiplied modes
+
+#define MULTIPLIER(a)   ((a) * 0x8081)
+#define PREMULTIPLY(x, m) (((x) * (m)) >> 23)
+
+// We can't use a 'const int' for the SHUFFLE value, because it has to be an
+// immediate in the _mm_shufflexx_epi16() instruction. We really need a macro.
+// We use: v / 255 = (v * 0x8081) >> 23, where v = alpha * {r,g,b} is a 16bit
+// value.
+#define APPLY_ALPHA(RGBX, SHUFFLE) do {                              \
+  const __m128i argb0 = _mm_loadu_si128((const __m128i*)&(RGBX));    \
+  const __m128i argb1_lo = _mm_unpacklo_epi8(argb0, zero);           \
+  const __m128i argb1_hi = _mm_unpackhi_epi8(argb0, zero);           \
+  const __m128i alpha0_lo = _mm_or_si128(argb1_lo, kMask);           \
+  const __m128i alpha0_hi = _mm_or_si128(argb1_hi, kMask);           \
+  const __m128i alpha1_lo = _mm_shufflelo_epi16(alpha0_lo, SHUFFLE); \
+  const __m128i alpha1_hi = _mm_shufflelo_epi16(alpha0_hi, SHUFFLE); \
+  const __m128i alpha2_lo = _mm_shufflehi_epi16(alpha1_lo, SHUFFLE); \
+  const __m128i alpha2_hi = _mm_shufflehi_epi16(alpha1_hi, SHUFFLE); \
+  /* alpha2 = [ff a0 a0 a0][ff a1 a1 a1] */                          \
+  const __m128i A0_lo = _mm_mullo_epi16(alpha2_lo, argb1_lo);        \
+  const __m128i A0_hi = _mm_mullo_epi16(alpha2_hi, argb1_hi);        \
+  const __m128i A1_lo = _mm_mulhi_epu16(A0_lo, kMult);               \
+  const __m128i A1_hi = _mm_mulhi_epu16(A0_hi, kMult);               \
+  const __m128i A2_lo = _mm_srli_epi16(A1_lo, 7);                    \
+  const __m128i A2_hi = _mm_srli_epi16(A1_hi, 7);                    \
+  const __m128i A3 = _mm_packus_epi16(A2_lo, A2_hi);                 \
+  _mm_storeu_si128((__m128i*)&(RGBX), A3);                           \
+} while (0)
+
+static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
+                                    int w, int h, int stride) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i kMult = _mm_set1_epi16(0x8081u);
+  const __m128i kMask = _mm_set_epi16(0, 0xff, 0xff, 0, 0, 0xff, 0xff, 0);
+  const int kSpan = 4;
+  while (h-- > 0) {
+    uint32_t* const rgbx = (uint32_t*)rgba;
+    int i;
+    if (!alpha_first) {
+      for (i = 0; i + kSpan <= w; i += kSpan) {
+        APPLY_ALPHA(rgbx[i], _MM_SHUFFLE(2, 3, 3, 3));
+      }
+    } else {
+      for (i = 0; i + kSpan <= w; i += kSpan) {
+        APPLY_ALPHA(rgbx[i], _MM_SHUFFLE(0, 0, 0, 1));
+      }
+    }
+    // Finish with left-overs.
+    for (; i < w; ++i) {
+      uint8_t* const rgb = rgba + (alpha_first ? 1 : 0);
+      const uint8_t* const alpha = rgba + (alpha_first ? 0 : 3);
+      const uint32_t a = alpha[4 * i];
+      if (a != 0xff) {
+        const uint32_t mult = MULTIPLIER(a);
+        rgb[4 * i + 0] = PREMULTIPLY(rgb[4 * i + 0], mult);
+        rgb[4 * i + 1] = PREMULTIPLY(rgb[4 * i + 1], mult);
+        rgb[4 * i + 2] = PREMULTIPLY(rgb[4 * i + 2], mult);
+      }
+    }
+    rgba += stride;
+  }
+}
+#undef MULTIPLIER
+#undef PREMULTIPLY
+
+//------------------------------------------------------------------------------
+// Alpha detection
+
+static int HasAlpha8b_SSE2(const uint8_t* src, int length) {
+  const __m128i all_0xff = _mm_set1_epi8(0xff);
+  int i = 0;
+  for (; i + 16 <= length; i += 16) {
+    const __m128i v = _mm_loadu_si128((const __m128i*)(src + i));
+    const __m128i bits = _mm_cmpeq_epi8(v, all_0xff);
+    const int mask = _mm_movemask_epi8(bits);
+    if (mask != 0xffff) return 1;
+  }
+  for (; i < length; ++i) if (src[i] != 0xff) return 1;
+  return 0;
+}
+
+static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
+  const __m128i alpha_mask = _mm_set1_epi32(0xff);
+  const __m128i all_0xff = _mm_set1_epi8(0xff);
+  int i = 0;
+  // We don't know if we can access the last 3 bytes after the last alpha
+  // value 'src[4 * length - 4]' (because we don't know if alpha is the first
+  // or the last byte of the quadruplet). Hence the '-3' protection below.
+  length = length * 4 - 3;   // size in bytes
+  for (; i + 64 <= length; i += 64) {
+    const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i +  0));
+    const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
+    const __m128i a2 = _mm_loadu_si128((const __m128i*)(src + i + 32));
+    const __m128i a3 = _mm_loadu_si128((const __m128i*)(src + i + 48));
+    const __m128i b0 = _mm_and_si128(a0, alpha_mask);
+    const __m128i b1 = _mm_and_si128(a1, alpha_mask);
+    const __m128i b2 = _mm_and_si128(a2, alpha_mask);
+    const __m128i b3 = _mm_and_si128(a3, alpha_mask);
+    const __m128i c0 = _mm_packs_epi32(b0, b1);
+    const __m128i c1 = _mm_packs_epi32(b2, b3);
+    const __m128i d  = _mm_packus_epi16(c0, c1);
+    const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
+    const int mask = _mm_movemask_epi8(bits);
+    if (mask != 0xffff) return 1;
+  }
+  for (; i + 32 <= length; i += 32) {
+    const __m128i a0 = _mm_loadu_si128((const __m128i*)(src + i +  0));
+    const __m128i a1 = _mm_loadu_si128((const __m128i*)(src + i + 16));
+    const __m128i b0 = _mm_and_si128(a0, alpha_mask);
+    const __m128i b1 = _mm_and_si128(a1, alpha_mask);
+    const __m128i c  = _mm_packs_epi32(b0, b1);
+    const __m128i d  = _mm_packus_epi16(c, c);
+    const __m128i bits = _mm_cmpeq_epi8(d, all_0xff);
+    const int mask = _mm_movemask_epi8(bits);
+    if (mask != 0xffff) return 1;
+  }
+  for (; i <= length; i += 4) if (src[i] != 0xff) return 1;
+  return 0;
+}
+
+// -----------------------------------------------------------------------------
+// Apply alpha value to rows
+
+static void MultARGBRow_SSE2(uint32_t* const ptr, int width, int inverse) {
+  int x = 0;
+  if (!inverse) {
+    const int kSpan = 2;
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i k128 = _mm_set1_epi16(128);
+    const __m128i kMult = _mm_set1_epi16(0x0101);
+    const __m128i kMask = _mm_set_epi16(0, 0xff, 0, 0, 0, 0xff, 0, 0);
+    for (x = 0; x + kSpan <= width; x += kSpan) {
+      // To compute 'result = (int)(a * x / 255. + .5)', we use:
+      //   tmp = a * v + 128, result = (tmp * 0x0101u) >> 16
+      const __m128i A0 = _mm_loadl_epi64((const __m128i*)&ptr[x]);
+      const __m128i A1 = _mm_unpacklo_epi8(A0, zero);
+      const __m128i A2 = _mm_or_si128(A1, kMask);
+      const __m128i A3 = _mm_shufflelo_epi16(A2, _MM_SHUFFLE(2, 3, 3, 3));
+      const __m128i A4 = _mm_shufflehi_epi16(A3, _MM_SHUFFLE(2, 3, 3, 3));
+      // here, A4 = [ff a0 a0 a0][ff a1 a1 a1]
+      const __m128i A5 = _mm_mullo_epi16(A4, A1);
+      const __m128i A6 = _mm_add_epi16(A5, k128);
+      const __m128i A7 = _mm_mulhi_epu16(A6, kMult);
+      const __m128i A10 = _mm_packus_epi16(A7, zero);
+      _mm_storel_epi64((__m128i*)&ptr[x], A10);
+    }
+  }
+  width -= x;
+  if (width > 0) WebPMultARGBRow_C(ptr + x, width, inverse);
+}
+
+static void MultRow_SSE2(uint8_t* const ptr, const uint8_t* const alpha,
+                         int width, int inverse) {
+  int x = 0;
+  if (!inverse) {
+    const __m128i zero = _mm_setzero_si128();
+    const __m128i k128 = _mm_set1_epi16(128);
+    const __m128i kMult = _mm_set1_epi16(0x0101);
+    for (x = 0; x + 8 <= width; x += 8) {
+      const __m128i v0 = _mm_loadl_epi64((__m128i*)&ptr[x]);
+      const __m128i a0 = _mm_loadl_epi64((const __m128i*)&alpha[x]);
+      const __m128i v1 = _mm_unpacklo_epi8(v0, zero);
+      const __m128i a1 = _mm_unpacklo_epi8(a0, zero);
+      const __m128i v2 = _mm_mullo_epi16(v1, a1);
+      const __m128i v3 = _mm_add_epi16(v2, k128);
+      const __m128i v4 = _mm_mulhi_epu16(v3, kMult);
+      const __m128i v5 = _mm_packus_epi16(v4, zero);
+      _mm_storel_epi64((__m128i*)&ptr[x], v5);
+    }
+  }
+  width -= x;
+  if (width > 0) WebPMultRow_C(ptr + x, alpha + x, width, inverse);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitAlphaProcessingSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE2(void) {
+  WebPMultARGBRow = MultARGBRow_SSE2;
+  WebPMultRow = MultRow_SSE2;
+  WebPApplyAlphaMultiply = ApplyAlphaMultiply_SSE2;
+  WebPDispatchAlpha = DispatchAlpha_SSE2;
+  WebPDispatchAlphaToGreen = DispatchAlphaToGreen_SSE2;
+  WebPExtractAlpha = ExtractAlpha_SSE2;
+
+  WebPHasAlpha8b = HasAlpha8b_SSE2;
+  WebPHasAlpha32b = HasAlpha32b_SSE2;
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingSSE2)
+
+#endif  // WEBP_USE_SSE2
--- a/media/libwebp/src/dsp/alpha_processing_sse41.c
+++ b/media/libwebp/src/dsp/alpha_processing_sse41.c
@ -0,0 +1,92 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Utilities for processing transparent channel, SSE4.1 variant.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_SSE41)
+
+#include <smmintrin.h>
+
+//------------------------------------------------------------------------------
+
+static int ExtractAlpha_SSE41(const uint8_t* argb, int argb_stride,
+                              int width, int height,
+                              uint8_t* alpha, int alpha_stride) {
+  // alpha_and stores an 'and' operation of all the alpha[] values. The final
+  // value is not 0xff if any of the alpha[] is not equal to 0xff.
+  uint32_t alpha_and = 0xff;
+  int i, j;
+  const __m128i all_0xff = _mm_set1_epi32(~0u);
+  __m128i all_alphas = all_0xff;
+
+  // We must be able to access 3 extra bytes after the last written byte
+  // 'src[4 * width - 4]', because we don't know if alpha is the first or the
+  // last byte of the quadruplet.
+  const int limit = (width - 1) & ~15;
+  const __m128i kCstAlpha0 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
+                                          -1, -1, -1, -1, 12, 8, 4, 0);
+  const __m128i kCstAlpha1 = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1,
+                                          12, 8, 4, 0, -1, -1, -1, -1);
+  const __m128i kCstAlpha2 = _mm_set_epi8(-1, -1, -1, -1, 12, 8, 4, 0,
+                                          -1, -1, -1, -1, -1, -1, -1, -1);
+  const __m128i kCstAlpha3 = _mm_set_epi8(12, 8, 4, 0, -1, -1, -1, -1,
+                                          -1, -1, -1, -1, -1, -1, -1, -1);
+  for (j = 0; j < height; ++j) {
+    const __m128i* src = (const __m128i*)argb;
+    for (i = 0; i < limit; i += 16) {
+      // load 64 argb bytes
+      const __m128i a0 = _mm_loadu_si128(src + 0);
+      const __m128i a1 = _mm_loadu_si128(src + 1);
+      const __m128i a2 = _mm_loadu_si128(src + 2);
+      const __m128i a3 = _mm_loadu_si128(src + 3);
+      const __m128i b0 = _mm_shuffle_epi8(a0, kCstAlpha0);
+      const __m128i b1 = _mm_shuffle_epi8(a1, kCstAlpha1);
+      const __m128i b2 = _mm_shuffle_epi8(a2, kCstAlpha2);
+      const __m128i b3 = _mm_shuffle_epi8(a3, kCstAlpha3);
+      const __m128i c0 = _mm_or_si128(b0, b1);
+      const __m128i c1 = _mm_or_si128(b2, b3);
+      const __m128i d0 = _mm_or_si128(c0, c1);
+      // store
+      _mm_storeu_si128((__m128i*)&alpha[i], d0);
+      // accumulate sixteen alpha 'and' in parallel
+      all_alphas = _mm_and_si128(all_alphas, d0);
+      src += 4;
+    }
+    for (; i < width; ++i) {
+      const uint32_t alpha_value = argb[4 * i];
+      alpha[i] = alpha_value;
+      alpha_and &= alpha_value;
+    }
+    argb += argb_stride;
+    alpha += alpha_stride;
+  }
+  // Combine the sixteen alpha 'and' into an 8-bit mask.
+  alpha_and |= 0xff00u;  // pretend the upper bits [8..15] were tested ok.
+  alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff));
+  return (alpha_and == 0xffffu);
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void WebPInitAlphaProcessingSSE41(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingSSE41(void) {
+  WebPExtractAlpha = ExtractAlpha_SSE41;
+}
+
+#else  // !WEBP_USE_SSE41
+
+WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingSSE41)
+
+#endif  // WEBP_USE_SSE41
--- a/media/libwebp/src/dsp/common_sse2.h
+++ b/media/libwebp/src/dsp/common_sse2.h
@ -0,0 +1,194 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 code common to several files.
+//
+// Author: Vincent Rabaud (vrabaud@google.com)
+
+#ifndef WEBP_DSP_COMMON_SSE2_H_
+#define WEBP_DSP_COMMON_SSE2_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(WEBP_USE_SSE2)
+
+#include <emmintrin.h>
+
+//------------------------------------------------------------------------------
+// Quite useful macro for debugging. Left here for convenience.
+
+#if 0
+#include <stdio.h>
+static WEBP_INLINE void PrintReg(const __m128i r, const char* const name,
+                                 int size) {
+  int n;
+  union {
+    __m128i r;
+    uint8_t i8[16];
+    uint16_t i16[8];
+    uint32_t i32[4];
+    uint64_t i64[2];
+  } tmp;
+  tmp.r = r;
+  fprintf(stderr, "%s\t: ", name);
+  if (size == 8) {
+    for (n = 0; n < 16; ++n) fprintf(stderr, "%.2x ", tmp.i8[n]);
+  } else if (size == 16) {
+    for (n = 0; n < 8; ++n) fprintf(stderr, "%.4x ", tmp.i16[n]);
+  } else if (size == 32) {
+    for (n = 0; n < 4; ++n) fprintf(stderr, "%.8x ", tmp.i32[n]);
+  } else {
+    for (n = 0; n < 2; ++n) fprintf(stderr, "%.16lx ", tmp.i64[n]);
+  }
+  fprintf(stderr, "\n");
+}
+#endif
+
+//------------------------------------------------------------------------------
+// Math functions.
+
+// Return the sum of all the 8b in the register.
+static WEBP_INLINE int VP8HorizontalAdd8b(const __m128i* const a) {
+  const __m128i zero = _mm_setzero_si128();
+  const __m128i sad8x2 = _mm_sad_epu8(*a, zero);
+  // sum the two sads: sad8x2[0:1] + sad8x2[8:9]
+  const __m128i sum = _mm_add_epi32(sad8x2, _mm_shuffle_epi32(sad8x2, 2));
+  return _mm_cvtsi128_si32(sum);
+}
+
+// Transpose two 4x4 16b matrices horizontally stored in registers.
+static WEBP_INLINE void VP8Transpose_2_4x4_16b(
+    const __m128i* const in0, const __m128i* const in1,
+    const __m128i* const in2, const __m128i* const in3, __m128i* const out0,
+    __m128i* const out1, __m128i* const out2, __m128i* const out3) {
+  // Transpose the two 4x4.
+  // a00 a01 a02 a03   b00 b01 b02 b03
+  // a10 a11 a12 a13   b10 b11 b12 b13
+  // a20 a21 a22 a23   b20 b21 b22 b23
+  // a30 a31 a32 a33   b30 b31 b32 b33
+  const __m128i transpose0_0 = _mm_unpacklo_epi16(*in0, *in1);
+  const __m128i transpose0_1 = _mm_unpacklo_epi16(*in2, *in3);
+  const __m128i transpose0_2 = _mm_unpackhi_epi16(*in0, *in1);
+  const __m128i transpose0_3 = _mm_unpackhi_epi16(*in2, *in3);
+  // a00 a10 a01 a11   a02 a12 a03 a13
+  // a20 a30 a21 a31   a22 a32 a23 a33
+  // b00 b10 b01 b11   b02 b12 b03 b13
+  // b20 b30 b21 b31   b22 b32 b23 b33
+  const __m128i transpose1_0 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
+  const __m128i transpose1_1 = _mm_unpacklo_epi32(transpose0_2, transpose0_3);
+  const __m128i transpose1_2 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
+  const __m128i transpose1_3 = _mm_unpackhi_epi32(transpose0_2, transpose0_3);
+  // a00 a10 a20 a30 a01 a11 a21 a31
+  // b00 b10 b20 b30 b01 b11 b21 b31
+  // a02 a12 a22 a32 a03 a13 a23 a33
+  // b02 b12 a22 b32 b03 b13 b23 b33
+  *out0 = _mm_unpacklo_epi64(transpose1_0, transpose1_1);
+  *out1 = _mm_unpackhi_epi64(transpose1_0, transpose1_1);
+  *out2 = _mm_unpacklo_epi64(transpose1_2, transpose1_3);
+  *out3 = _mm_unpackhi_epi64(transpose1_2, transpose1_3);
+  // a00 a10 a20 a30   b00 b10 b20 b30
+  // a01 a11 a21 a31   b01 b11 b21 b31
+  // a02 a12 a22 a32   b02 b12 b22 b32
+  // a03 a13 a23 a33   b03 b13 b23 b33
+}
+
+//------------------------------------------------------------------------------
+// Channel mixing.
+
+// Function used several times in VP8PlanarTo24b.
+// It samples the in buffer as follows: one every two unsigned char is stored
+// at the beginning of the buffer, while the other half is stored at the end.
+#define VP8PlanarTo24bHelper(IN, OUT)                            \
+  do {                                                           \
+    const __m128i v_mask = _mm_set1_epi16(0x00ff);               \
+    /* Take one every two upper 8b values.*/                     \
+    (OUT##0) = _mm_packus_epi16(_mm_and_si128((IN##0), v_mask),  \
+                                _mm_and_si128((IN##1), v_mask)); \
+    (OUT##1) = _mm_packus_epi16(_mm_and_si128((IN##2), v_mask),  \
+                                _mm_and_si128((IN##3), v_mask)); \
+    (OUT##2) = _mm_packus_epi16(_mm_and_si128((IN##4), v_mask),  \
+                                _mm_and_si128((IN##5), v_mask)); \
+    /* Take one every two lower 8b values.*/                     \
+    (OUT##3) = _mm_packus_epi16(_mm_srli_epi16((IN##0), 8),      \
+                                _mm_srli_epi16((IN##1), 8));     \
+    (OUT##4) = _mm_packus_epi16(_mm_srli_epi16((IN##2), 8),      \
+                                _mm_srli_epi16((IN##3), 8));     \
+    (OUT##5) = _mm_packus_epi16(_mm_srli_epi16((IN##4), 8),      \
+                                _mm_srli_epi16((IN##5), 8));     \
+  } while (0)
+
+// Pack the planar buffers
+// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
+// triplet by triplet in the output buffer rgb as rgbrgbrgbrgb ...
+static WEBP_INLINE void VP8PlanarTo24b_SSE2(
+    __m128i* const in0, __m128i* const in1, __m128i* const in2,
+    __m128i* const in3, __m128i* const in4, __m128i* const in5) {
+  // The input is 6 registers of sixteen 8b but for the sake of explanation,
+  // let's take 6 registers of four 8b values.
+  // To pack, we will keep taking one every two 8b integer and move it
+  // around as follows:
+  // Input:
+  //   r0r1r2r3 | r4r5r6r7 | g0g1g2g3 | g4g5g6g7 | b0b1b2b3 | b4b5b6b7
+  // Split the 6 registers in two sets of 3 registers: the first set as the even
+  // 8b bytes, the second the odd ones:
+  //   r0r2r4r6 | g0g2g4g6 | b0b2b4b6 | r1r3r5r7 | g1g3g5g7 | b1b3b5b7
+  // Repeat the same permutations twice more:
+  //   r0r4g0g4 | b0b4r1r5 | g1g5b1b5 | r2r6g2g6 | b2b6r3r7 | g3g7b3b7
+  //   r0g0b0r1 | g1b1r2g2 | b2r3g3b3 | r4g4b4r5 | g5b5r6g6 | b6r7g7b7
+  __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+  VP8PlanarTo24bHelper(*in, tmp);
+  VP8PlanarTo24bHelper(tmp, *in);
+  VP8PlanarTo24bHelper(*in, tmp);
+  // We need to do it two more times than the example as we have sixteen bytes.
+  {
+    __m128i out0, out1, out2, out3, out4, out5;
+    VP8PlanarTo24bHelper(tmp, out);
+    VP8PlanarTo24bHelper(out, *in);
+  }
+}
+
+#undef VP8PlanarTo24bHelper
+
+// Convert four packed four-channel buffers like argbargbargbargb... into the
+// split channels aaaaa ... rrrr ... gggg .... bbbbb ......
+static WEBP_INLINE void VP8L32bToPlanar_SSE2(__m128i* const in0,
+                                             __m128i* const in1,
+                                             __m128i* const in2,
+                                             __m128i* const in3) {
+  // Column-wise transpose.
+  const __m128i A0 = _mm_unpacklo_epi8(*in0, *in1);
+  const __m128i A1 = _mm_unpackhi_epi8(*in0, *in1);
+  const __m128i A2 = _mm_unpacklo_epi8(*in2, *in3);
+  const __m128i A3 = _mm_unpackhi_epi8(*in2, *in3);
+  const __m128i B0 = _mm_unpacklo_epi8(A0, A1);
+  const __m128i B1 = _mm_unpackhi_epi8(A0, A1);
+  const __m128i B2 = _mm_unpacklo_epi8(A2, A3);
+  const __m128i B3 = _mm_unpackhi_epi8(A2, A3);
+  // C0 = g7 g6 ... g1 g0 | b7 b6 ... b1 b0
+  // C1 = a7 a6 ... a1 a0 | r7 r6 ... r1 r0
+  const __m128i C0 = _mm_unpacklo_epi8(B0, B1);
+  const __m128i C1 = _mm_unpackhi_epi8(B0, B1);
+  const __m128i C2 = _mm_unpacklo_epi8(B2, B3);
+  const __m128i C3 = _mm_unpackhi_epi8(B2, B3);
+  // Gather the channels.
+  *in0 = _mm_unpackhi_epi64(C1, C3);
+  *in1 = _mm_unpacklo_epi64(C1, C3);
+  *in2 = _mm_unpackhi_epi64(C0, C2);
+  *in3 = _mm_unpacklo_epi64(C0, C2);
+}
+
+#endif  // WEBP_USE_SSE2
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // WEBP_DSP_COMMON_SSE2_H_
--- a/media/libwebp/src/dsp/common_sse41.h
+++ b/media/libwebp/src/dsp/common_sse41.h
@ -0,0 +1,132 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE4 code common to several files.
+//
+// Author: Vincent Rabaud (vrabaud@google.com)
+
+#ifndef WEBP_DSP_COMMON_SSE41_H_
+#define WEBP_DSP_COMMON_SSE41_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(WEBP_USE_SSE41)
+#include <smmintrin.h>
+
+//------------------------------------------------------------------------------
+// Channel mixing.
+// Shuffles the input buffer as A0 0 0 A1 0 0 A2 ...
+#define WEBP_SSE41_SHUFF(OUT, IN0, IN1)    \
+  OUT##0 = _mm_shuffle_epi8(*IN0, shuff0); \
+  OUT##1 = _mm_shuffle_epi8(*IN0, shuff1); \
+  OUT##2 = _mm_shuffle_epi8(*IN0, shuff2); \
+  OUT##3 = _mm_shuffle_epi8(*IN1, shuff0); \
+  OUT##4 = _mm_shuffle_epi8(*IN1, shuff1); \
+  OUT##5 = _mm_shuffle_epi8(*IN1, shuff2);
+
+// Pack the planar buffers
+// rrrr... rrrr... gggg... gggg... bbbb... bbbb....
+// triplet by triplet in the output buffer rgb as rgbrgbrgbrgb ...
+static WEBP_INLINE void VP8PlanarTo24b_SSE41(
+    __m128i* const in0, __m128i* const in1, __m128i* const in2,
+    __m128i* const in3, __m128i* const in4, __m128i* const in5) {
+  __m128i R0, R1, R2, R3, R4, R5;
+  __m128i G0, G1, G2, G3, G4, G5;
+  __m128i B0, B1, B2, B3, B4, B5;
+
+  // Process R.
+  {
+    const __m128i shuff0 = _mm_set_epi8(
+        5, -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0);
+    const __m128i shuff1 = _mm_set_epi8(
+        -1, 10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1);
+    const __m128i shuff2 = _mm_set_epi8(
+     -1, -1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1);
+    WEBP_SSE41_SHUFF(R, in0, in1)
+  }
+
+  // Process G.
+  {
+    // Same as before, just shifted to the left by one and including the right
+    // padding.
+    const __m128i shuff0 = _mm_set_epi8(
+        -1, -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1);
+    const __m128i shuff1 = _mm_set_epi8(
+        10, -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5);
+    const __m128i shuff2 = _mm_set_epi8(
+     -1, 15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1);
+    WEBP_SSE41_SHUFF(G, in2, in3)
+  }
+
+  // Process B.
+  {
+    const __m128i shuff0 = _mm_set_epi8(
+        -1, 4, -1, -1, 3, -1, -1, 2, -1, -1, 1, -1, -1, 0, -1, -1);
+    const __m128i shuff1 = _mm_set_epi8(
+        -1, -1, 9, -1, -1, 8, -1, -1, 7, -1, -1, 6, -1, -1, 5, -1);
+    const __m128i shuff2 = _mm_set_epi8(
+      15, -1, -1, 14, -1, -1, 13, -1, -1, 12, -1, -1, 11, -1, -1, 10);
+    WEBP_SSE41_SHUFF(B, in4, in5)
+  }
+
+  // OR the different channels.
+  {
+    const __m128i RG0 = _mm_or_si128(R0, G0);
+    const __m128i RG1 = _mm_or_si128(R1, G1);
+    const __m128i RG2 = _mm_or_si128(R2, G2);
+    const __m128i RG3 = _mm_or_si128(R3, G3);
+    const __m128i RG4 = _mm_or_si128(R4, G4);
+    const __m128i RG5 = _mm_or_si128(R5, G5);
+    *in0 = _mm_or_si128(RG0, B0);
+    *in1 = _mm_or_si128(RG1, B1);
+    *in2 = _mm_or_si128(RG2, B2);
+    *in3 = _mm_or_si128(RG3, B3);
+    *in4 = _mm_or_si128(RG4, B4);
+    *in5 = _mm_or_si128(RG5, B5);
+  }
+}
+
+#undef WEBP_SSE41_SHUFF
+
+// Convert four packed four-channel buffers like argbargbargbargb... into the
+// split channels aaaaa ... rrrr ... gggg .... bbbbb ......
+static WEBP_INLINE void VP8L32bToPlanar_SSE41(__m128i* const in0,
+                                              __m128i* const in1,
+                                              __m128i* const in2,
+                                              __m128i* const in3) {
+  // aaaarrrrggggbbbb
+  const __m128i shuff0 =
+      _mm_set_epi8(15, 11, 7, 3, 14, 10, 6, 2, 13, 9, 5, 1, 12, 8, 4, 0);
+  const __m128i A0 = _mm_shuffle_epi8(*in0, shuff0);
+  const __m128i A1 = _mm_shuffle_epi8(*in1, shuff0);
+  const __m128i A2 = _mm_shuffle_epi8(*in2, shuff0);
+  const __m128i A3 = _mm_shuffle_epi8(*in3, shuff0);
+  // A0A1R0R1
+  // G0G1B0B1
+  // A2A3R2R3
+  // G0G1B0B1
+  const __m128i B0 = _mm_unpacklo_epi32(A0, A1);
+  const __m128i B1 = _mm_unpackhi_epi32(A0, A1);
+  const __m128i B2 = _mm_unpacklo_epi32(A2, A3);
+  const __m128i B3 = _mm_unpackhi_epi32(A2, A3);
+  *in3 = _mm_unpacklo_epi64(B0, B2);
+  *in2 = _mm_unpackhi_epi64(B0, B2);
+  *in1 = _mm_unpacklo_epi64(B1, B3);
+  *in0 = _mm_unpackhi_epi64(B1, B3);
+}
+
+#endif  // WEBP_USE_SSE41
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // WEBP_DSP_COMMON_SSE41_H_
--- a/media/libwebp/src/dsp/dec.c
+++ b/media/libwebp/src/dsp/dec.c
@ -0,0 +1,887 @@
+// Copyright 2010 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Speed-critical decoding functions, default plain-C implementations.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include <assert.h>
+
+#include "src/dsp/dsp.h"
+#include "src/dec/vp8i_dec.h"
+#include "src/utils/utils.h"
+
+//------------------------------------------------------------------------------
+
+static WEBP_INLINE uint8_t clip_8b(int v) {
+  return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
+}
+
+//------------------------------------------------------------------------------
+// Transforms (Paragraph 14.4)
+
+#define STORE(x, y, v) \
+  dst[(x) + (y) * BPS] = clip_8b(dst[(x) + (y) * BPS] + ((v) >> 3))
+
+#define STORE2(y, dc, d, c) do {    \
+  const int DC = (dc);              \
+  STORE(0, y, DC + (d));            \
+  STORE(1, y, DC + (c));            \
+  STORE(2, y, DC - (c));            \
+  STORE(3, y, DC - (d));            \
+} while (0)
+
+#define MUL1(a) ((((a) * 20091) >> 16) + (a))
+#define MUL2(a) (((a) * 35468) >> 16)
+
+#if !WEBP_NEON_OMIT_C_CODE
+static void TransformOne_C(const int16_t* in, uint8_t* dst) {
+  int C[4 * 4], *tmp;
+  int i;
+  tmp = C;
+  for (i = 0; i < 4; ++i) {    // vertical pass
+    const int a = in[0] + in[8];    // [-4096, 4094]
+    const int b = in[0] - in[8];    // [-4095, 4095]
+    const int c = MUL2(in[4]) - MUL1(in[12]);   // [-3783, 3783]
+    const int d = MUL1(in[4]) + MUL2(in[12]);   // [-3785, 3781]
+    tmp[0] = a + d;   // [-7881, 7875]
+    tmp[1] = b + c;   // [-7878, 7878]
+    tmp[2] = b - c;   // [-7878, 7878]
+    tmp[3] = a - d;   // [-7877, 7879]
+    tmp += 4;
+    in++;
+  }
+  // Each pass is expanding the dynamic range by ~3.85 (upper bound).
+  // The exact value is (2. + (20091 + 35468) / 65536).
+  // After the second pass, maximum interval is [-3794, 3794], assuming
+  // an input in [-2048, 2047] interval. We then need to add a dst value
+  // in the [0, 255] range.
+  // In the worst case scenario, the input to clip_8b() can be as large as
+  // [-60713, 60968].
+  tmp = C;
+  for (i = 0; i < 4; ++i) {    // horizontal pass
+    const int dc = tmp[0] + 4;
+    const int a =  dc +  tmp[8];
+    const int b =  dc -  tmp[8];
+    const int c = MUL2(tmp[4]) - MUL1(tmp[12]);
+    const int d = MUL1(tmp[4]) + MUL2(tmp[12]);
+    STORE(0, 0, a + d);
+    STORE(1, 0, b + c);
+    STORE(2, 0, b - c);
+    STORE(3, 0, a - d);
+    tmp++;
+    dst += BPS;
+  }
+}
+
+// Simplified transform when only in[0], in[1] and in[4] are non-zero
+static void TransformAC3_C(const int16_t* in, uint8_t* dst) {
+  const int a = in[0] + 4;
+  const int c4 = MUL2(in[4]);
+  const int d4 = MUL1(in[4]);
+  const int c1 = MUL2(in[1]);
+  const int d1 = MUL1(in[1]);
+  STORE2(0, a + d4, d1, c1);
+  STORE2(1, a + c4, d1, c1);
+  STORE2(2, a - c4, d1, c1);
+  STORE2(3, a - d4, d1, c1);
+}
+#undef MUL1
+#undef MUL2
+#undef STORE2
+
+static void TransformTwo_C(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOne_C(in, dst);
+  if (do_two) {
+    TransformOne_C(in + 16, dst + 4);
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+static void TransformUV_C(const int16_t* in, uint8_t* dst) {
+  VP8Transform(in + 0 * 16, dst, 1);
+  VP8Transform(in + 2 * 16, dst + 4 * BPS, 1);
+}
+
+#if !WEBP_NEON_OMIT_C_CODE
+static void TransformDC_C(const int16_t* in, uint8_t* dst) {
+  const int DC = in[0] + 4;
+  int i, j;
+  for (j = 0; j < 4; ++j) {
+    for (i = 0; i < 4; ++i) {
+      STORE(i, j, DC);
+    }
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+static void TransformDCUV_C(const int16_t* in, uint8_t* dst) {
+  if (in[0 * 16]) VP8TransformDC(in + 0 * 16, dst);
+  if (in[1 * 16]) VP8TransformDC(in + 1 * 16, dst + 4);
+  if (in[2 * 16]) VP8TransformDC(in + 2 * 16, dst + 4 * BPS);
+  if (in[3 * 16]) VP8TransformDC(in + 3 * 16, dst + 4 * BPS + 4);
+}
+
+#undef STORE
+
+//------------------------------------------------------------------------------
+// Paragraph 14.3
+
+#if !WEBP_NEON_OMIT_C_CODE
+static void TransformWHT_C(const int16_t* in, int16_t* out) {
+  int tmp[16];
+  int i;
+  for (i = 0; i < 4; ++i) {
+    const int a0 = in[0 + i] + in[12 + i];
+    const int a1 = in[4 + i] + in[ 8 + i];
+    const int a2 = in[4 + i] - in[ 8 + i];
+    const int a3 = in[0 + i] - in[12 + i];
+    tmp[0  + i] = a0 + a1;
+    tmp[8  + i] = a0 - a1;
+    tmp[4  + i] = a3 + a2;
+    tmp[12 + i] = a3 - a2;
+  }
+  for (i = 0; i < 4; ++i) {
+    const int dc = tmp[0 + i * 4] + 3;    // w/ rounder
+    const int a0 = dc             + tmp[3 + i * 4];
+    const int a1 = tmp[1 + i * 4] + tmp[2 + i * 4];
+    const int a2 = tmp[1 + i * 4] - tmp[2 + i * 4];
+    const int a3 = dc             - tmp[3 + i * 4];
+    out[ 0] = (a0 + a1) >> 3;
+    out[16] = (a3 + a2) >> 3;
+    out[32] = (a0 - a1) >> 3;
+    out[48] = (a3 - a2) >> 3;
+    out += 64;
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
+
+//------------------------------------------------------------------------------
+// Intra predictions
+
+#define DST(x, y) dst[(x) + (y) * BPS]
+
+#if !WEBP_NEON_OMIT_C_CODE
+static WEBP_INLINE void TrueMotion(uint8_t* dst, int size) {
+  const uint8_t* top = dst - BPS;
+  const uint8_t* const clip0 = VP8kclip1 - top[-1];
+  int y;
+  for (y = 0; y < size; ++y) {
+    const uint8_t* const clip = clip0 + dst[-1];
+    int x;
+    for (x = 0; x < size; ++x) {
+      dst[x] = clip[top[x]];
+    }
+    dst += BPS;
+  }
+}
+static void TM4_C(uint8_t* dst)   { TrueMotion(dst, 4); }
+static void TM8uv_C(uint8_t* dst) { TrueMotion(dst, 8); }
+static void TM16_C(uint8_t* dst)  { TrueMotion(dst, 16); }
+
+//------------------------------------------------------------------------------
+// 16x16
+
+static void VE16_C(uint8_t* dst) {     // vertical
+  int j;
+  for (j = 0; j < 16; ++j) {
+    memcpy(dst + j * BPS, dst - BPS, 16);
+  }
+}
+
+static void HE16_C(uint8_t* dst) {     // horizontal
+  int j;
+  for (j = 16; j > 0; --j) {
+    memset(dst, dst[-1], 16);
+    dst += BPS;
+  }
+}
+
+static WEBP_INLINE void Put16(int v, uint8_t* dst) {
+  int j;
+  for (j = 0; j < 16; ++j) {
+    memset(dst + j * BPS, v, 16);
+  }
+}
+
+static void DC16_C(uint8_t* dst) {    // DC
+  int DC = 16;
+  int j;
+  for (j = 0; j < 16; ++j) {
+    DC += dst[-1 + j * BPS] + dst[j - BPS];
+  }
+  Put16(DC >> 5, dst);
+}
+
+static void DC16NoTop_C(uint8_t* dst) {   // DC with top samples not available
+  int DC = 8;
+  int j;
+  for (j = 0; j < 16; ++j) {
+    DC += dst[-1 + j * BPS];
+  }
+  Put16(DC >> 4, dst);
+}
+
+static void DC16NoLeft_C(uint8_t* dst) {  // DC with left samples not available
+  int DC = 8;
+  int i;
+  for (i = 0; i < 16; ++i) {
+    DC += dst[i - BPS];
+  }
+  Put16(DC >> 4, dst);
+}
+
+static void DC16NoTopLeft_C(uint8_t* dst) {  // DC with no top and left samples
+  Put16(0x80, dst);
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+VP8PredFunc VP8PredLuma16[NUM_B_DC_MODES];
+
+//------------------------------------------------------------------------------
+// 4x4
+
+#define AVG3(a, b, c) ((uint8_t)(((a) + 2 * (b) + (c) + 2) >> 2))
+#define AVG2(a, b) (((a) + (b) + 1) >> 1)
+
+#if !WEBP_NEON_OMIT_C_CODE
+static void VE4_C(uint8_t* dst) {    // vertical
+  const uint8_t* top = dst - BPS;
+  const uint8_t vals[4] = {
+    AVG3(top[-1], top[0], top[1]),
+    AVG3(top[ 0], top[1], top[2]),
+    AVG3(top[ 1], top[2], top[3]),
+    AVG3(top[ 2], top[3], top[4])
+  };
+  int i;
+  for (i = 0; i < 4; ++i) {
+    memcpy(dst + i * BPS, vals, sizeof(vals));
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+static void HE4_C(uint8_t* dst) {    // horizontal
+  const int A = dst[-1 - BPS];
+  const int B = dst[-1];
+  const int C = dst[-1 + BPS];
+  const int D = dst[-1 + 2 * BPS];
+  const int E = dst[-1 + 3 * BPS];
+  WebPUint32ToMem(dst + 0 * BPS, 0x01010101U * AVG3(A, B, C));
+  WebPUint32ToMem(dst + 1 * BPS, 0x01010101U * AVG3(B, C, D));
+  WebPUint32ToMem(dst + 2 * BPS, 0x01010101U * AVG3(C, D, E));
+  WebPUint32ToMem(dst + 3 * BPS, 0x01010101U * AVG3(D, E, E));
+}
+
+#if !WEBP_NEON_OMIT_C_CODE
+static void DC4_C(uint8_t* dst) {   // DC
+  uint32_t dc = 4;
+  int i;
+  for (i = 0; i < 4; ++i) dc += dst[i - BPS] + dst[-1 + i * BPS];
+  dc >>= 3;
+  for (i = 0; i < 4; ++i) memset(dst + i * BPS, dc, 4);
+}
+
+static void RD4_C(uint8_t* dst) {   // Down-right
+  const int I = dst[-1 + 0 * BPS];
+  const int J = dst[-1 + 1 * BPS];
+  const int K = dst[-1 + 2 * BPS];
+  const int L = dst[-1 + 3 * BPS];
+  const int X = dst[-1 - BPS];
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+  const int D = dst[3 - BPS];
+  DST(0, 3)                                     = AVG3(J, K, L);
+  DST(1, 3) = DST(0, 2)                         = AVG3(I, J, K);
+  DST(2, 3) = DST(1, 2) = DST(0, 1)             = AVG3(X, I, J);
+  DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
+              DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
+                          DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
+                                      DST(3, 0) = AVG3(D, C, B);
+}
+
+static void LD4_C(uint8_t* dst) {   // Down-Left
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+  const int D = dst[3 - BPS];
+  const int E = dst[4 - BPS];
+  const int F = dst[5 - BPS];
+  const int G = dst[6 - BPS];
+  const int H = dst[7 - BPS];
+  DST(0, 0)                                     = AVG3(A, B, C);
+  DST(1, 0) = DST(0, 1)                         = AVG3(B, C, D);
+  DST(2, 0) = DST(1, 1) = DST(0, 2)             = AVG3(C, D, E);
+  DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+              DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
+                          DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
+                                      DST(3, 3) = AVG3(G, H, H);
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+static void VR4_C(uint8_t* dst) {   // Vertical-Right
+  const int I = dst[-1 + 0 * BPS];
+  const int J = dst[-1 + 1 * BPS];
+  const int K = dst[-1 + 2 * BPS];
+  const int X = dst[-1 - BPS];
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+  const int D = dst[3 - BPS];
+  DST(0, 0) = DST(1, 2) = AVG2(X, A);
+  DST(1, 0) = DST(2, 2) = AVG2(A, B);
+  DST(2, 0) = DST(3, 2) = AVG2(B, C);
+  DST(3, 0)             = AVG2(C, D);
+
+  DST(0, 3) =             AVG3(K, J, I);
+  DST(0, 2) =             AVG3(J, I, X);
+  DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
+  DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
+  DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
+  DST(3, 1) =             AVG3(B, C, D);
+}
+
+static void VL4_C(uint8_t* dst) {   // Vertical-Left
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+  const int D = dst[3 - BPS];
+  const int E = dst[4 - BPS];
+  const int F = dst[5 - BPS];
+  const int G = dst[6 - BPS];
+  const int H = dst[7 - BPS];
+  DST(0, 0) =             AVG2(A, B);
+  DST(1, 0) = DST(0, 2) = AVG2(B, C);
+  DST(2, 0) = DST(1, 2) = AVG2(C, D);
+  DST(3, 0) = DST(2, 2) = AVG2(D, E);
+
+  DST(0, 1) =             AVG3(A, B, C);
+  DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+  DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+  DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+              DST(3, 2) = AVG3(E, F, G);
+              DST(3, 3) = AVG3(F, G, H);
+}
+
+static void HU4_C(uint8_t* dst) {   // Horizontal-Up
+  const int I = dst[-1 + 0 * BPS];
+  const int J = dst[-1 + 1 * BPS];
+  const int K = dst[-1 + 2 * BPS];
+  const int L = dst[-1 + 3 * BPS];
+  DST(0, 0) =             AVG2(I, J);
+  DST(2, 0) = DST(0, 1) = AVG2(J, K);
+  DST(2, 1) = DST(0, 2) = AVG2(K, L);
+  DST(1, 0) =             AVG3(I, J, K);
+  DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+  DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+  DST(3, 2) = DST(2, 2) =
+    DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+}
+
+static void HD4_C(uint8_t* dst) {  // Horizontal-Down
+  const int I = dst[-1 + 0 * BPS];
+  const int J = dst[-1 + 1 * BPS];
+  const int K = dst[-1 + 2 * BPS];
+  const int L = dst[-1 + 3 * BPS];
+  const int X = dst[-1 - BPS];
+  const int A = dst[0 - BPS];
+  const int B = dst[1 - BPS];
+  const int C = dst[2 - BPS];
+
+  DST(0, 0) = DST(2, 1) = AVG2(I, X);
+  DST(0, 1) = DST(2, 2) = AVG2(J, I);
+  DST(0, 2) = DST(2, 3) = AVG2(K, J);
+  DST(0, 3)             = AVG2(L, K);
+
+  DST(3, 0)             = AVG3(A, B, C);
+  DST(2, 0)             = AVG3(X, A, B);
+  DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+  DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+  DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+  DST(1, 3)             = AVG3(L, K, J);
+}
+
+#undef DST
+#undef AVG3
+#undef AVG2
+
+VP8PredFunc VP8PredLuma4[NUM_BMODES];
+
+//------------------------------------------------------------------------------
+// Chroma
+
+#if !WEBP_NEON_OMIT_C_CODE
+static void VE8uv_C(uint8_t* dst) {    // vertical
+  int j;
+  for (j = 0; j < 8; ++j) {
+    memcpy(dst + j * BPS, dst - BPS, 8);
+  }
+}
+
+static void HE8uv_C(uint8_t* dst) {    // horizontal
+  int j;
+  for (j = 0; j < 8; ++j) {
+    memset(dst, dst[-1], 8);
+    dst += BPS;
+  }
+}
+
+// helper for chroma-DC predictions
+static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
+  int j;
+  for (j = 0; j < 8; ++j) {
+    memset(dst + j * BPS, value, 8);
+  }
+}
+
+static void DC8uv_C(uint8_t* dst) {     // DC
+  int dc0 = 8;
+  int i;
+  for (i = 0; i < 8; ++i) {
+    dc0 += dst[i - BPS] + dst[-1 + i * BPS];
+  }
+  Put8x8uv(dc0 >> 4, dst);
+}
+
+static void DC8uvNoLeft_C(uint8_t* dst) {   // DC with no left samples
+  int dc0 = 4;
+  int i;
+  for (i = 0; i < 8; ++i) {
+    dc0 += dst[i - BPS];
+  }
+  Put8x8uv(dc0 >> 3, dst);
+}
+
+static void DC8uvNoTop_C(uint8_t* dst) {  // DC with no top samples
+  int dc0 = 4;
+  int i;
+  for (i = 0; i < 8; ++i) {
+    dc0 += dst[-1 + i * BPS];
+  }
+  Put8x8uv(dc0 >> 3, dst);
+}
+
+static void DC8uvNoTopLeft_C(uint8_t* dst) {    // DC with nothing
+  Put8x8uv(0x80, dst);
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+VP8PredFunc VP8PredChroma8[NUM_B_DC_MODES];
+
+//------------------------------------------------------------------------------
+// Edge filtering functions
+
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+// 4 pixels in, 2 pixels out
+static WEBP_INLINE void DoFilter2_C(uint8_t* p, int step) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];  // in [-893,892]
+  const int a1 = VP8ksclip2[(a + 4) >> 3];            // in [-16,15]
+  const int a2 = VP8ksclip2[(a + 3) >> 3];
+  p[-step] = VP8kclip1[p0 + a2];
+  p[    0] = VP8kclip1[q0 - a1];
+}
+
+// 4 pixels in, 4 pixels out
+static WEBP_INLINE void DoFilter4_C(uint8_t* p, int step) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0);
+  const int a1 = VP8ksclip2[(a + 4) >> 3];
+  const int a2 = VP8ksclip2[(a + 3) >> 3];
+  const int a3 = (a1 + 1) >> 1;
+  p[-2*step] = VP8kclip1[p1 + a3];
+  p[-  step] = VP8kclip1[p0 + a2];
+  p[      0] = VP8kclip1[q0 - a1];
+  p[   step] = VP8kclip1[q1 - a3];
+}
+
+// 6 pixels in, 6 pixels out
+static WEBP_INLINE void DoFilter6_C(uint8_t* p, int step) {
+  const int p2 = p[-3*step], p1 = p[-2*step], p0 = p[-step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2*step];
+  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
+  // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
+  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
+  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
+  const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
+  p[-3*step] = VP8kclip1[p2 + a3];
+  p[-2*step] = VP8kclip1[p1 + a2];
+  p[-  step] = VP8kclip1[p0 + a1];
+  p[      0] = VP8kclip1[q0 - a1];
+  p[   step] = VP8kclip1[q1 - a2];
+  p[ 2*step] = VP8kclip1[q2 - a3];
+}
+
+static WEBP_INLINE int Hev(const uint8_t* p, int step, int thresh) {
+  const int p1 = p[-2*step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return (VP8kabs0[p1 - p0] > thresh) || (VP8kabs0[q1 - q0] > thresh);
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+
+#if !WEBP_NEON_OMIT_C_CODE
+static WEBP_INLINE int NeedsFilter_C(const uint8_t* p, int step, int t) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) <= t);
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+static WEBP_INLINE int NeedsFilter2_C(const uint8_t* p,
+                                      int step, int t, int it) {
+  const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step];
+  const int p0 = p[-step], q0 = p[0];
+  const int q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
+  if ((4 * VP8kabs0[p0 - q0] + VP8kabs0[p1 - q1]) > t) return 0;
+  return VP8kabs0[p3 - p2] <= it && VP8kabs0[p2 - p1] <= it &&
+         VP8kabs0[p1 - p0] <= it && VP8kabs0[q3 - q2] <= it &&
+         VP8kabs0[q2 - q1] <= it && VP8kabs0[q1 - q0] <= it;
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+#if !WEBP_NEON_OMIT_C_CODE
+static void SimpleVFilter16_C(uint8_t* p, int stride, int thresh) {
+  int i;
+  const int thresh2 = 2 * thresh + 1;
+  for (i = 0; i < 16; ++i) {
+    if (NeedsFilter_C(p + i, stride, thresh2)) {
+      DoFilter2_C(p + i, stride);
+    }
+  }
+}
+
+static void SimpleHFilter16_C(uint8_t* p, int stride, int thresh) {
+  int i;
+  const int thresh2 = 2 * thresh + 1;
+  for (i = 0; i < 16; ++i) {
+    if (NeedsFilter_C(p + i * stride, 1, thresh2)) {
+      DoFilter2_C(p + i * stride, 1);
+    }
+  }
+}
+
+static void SimpleVFilter16i_C(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    SimpleVFilter16_C(p, stride, thresh);
+  }
+}
+
+static void SimpleHFilter16i_C(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    SimpleHFilter16_C(p, stride, thresh);
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+//------------------------------------------------------------------------------
+// Complex In-loop filtering (Paragraph 15.3)
+
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+static WEBP_INLINE void FilterLoop26_C(uint8_t* p,
+                                       int hstride, int vstride, int size,
+                                       int thresh, int ithresh,
+                                       int hev_thresh) {
+  const int thresh2 = 2 * thresh + 1;
+  while (size-- > 0) {
+    if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
+      if (Hev(p, hstride, hev_thresh)) {
+        DoFilter2_C(p, hstride);
+      } else {
+        DoFilter6_C(p, hstride);
+      }
+    }
+    p += vstride;
+  }
+}
+
+static WEBP_INLINE void FilterLoop24_C(uint8_t* p,
+                                       int hstride, int vstride, int size,
+                                       int thresh, int ithresh,
+                                       int hev_thresh) {
+  const int thresh2 = 2 * thresh + 1;
+  while (size-- > 0) {
+    if (NeedsFilter2_C(p, hstride, thresh2, ithresh)) {
+      if (Hev(p, hstride, hev_thresh)) {
+        DoFilter2_C(p, hstride);
+      } else {
+        DoFilter4_C(p, hstride);
+      }
+    }
+    p += vstride;
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+
+#if !WEBP_NEON_OMIT_C_CODE
+// on macroblock edges
+static void VFilter16_C(uint8_t* p, int stride,
+                        int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter16_C(uint8_t* p, int stride,
+                        int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+}
+
+// on three inner edges
+static void VFilter16i_C(uint8_t* p, int stride,
+                         int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    FilterLoop24_C(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+static void HFilter16i_C(uint8_t* p, int stride,
+                         int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    FilterLoop24_C(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+
+#if !WEBP_NEON_OMIT_C_CODE
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8_C(uint8_t* u, uint8_t* v, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26_C(u, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26_C(v, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+static void HFilter8_C(uint8_t* u, uint8_t* v, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26_C(u, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26_C(v, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+
+#if !WEBP_NEON_OMIT_C_CODE
+static void VFilter8i_C(uint8_t* u, uint8_t* v, int stride,
+                        int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24_C(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24_C(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+static void HFilter8i_C(uint8_t* u, uint8_t* v, int stride,
+                        int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24_C(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24_C(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+
+//------------------------------------------------------------------------------
+
+static void DitherCombine8x8_C(const uint8_t* dither, uint8_t* dst,
+                               int dst_stride) {
+  int i, j;
+  for (j = 0; j < 8; ++j) {
+    for (i = 0; i < 8; ++i) {
+      const int delta0 = dither[i] - VP8_DITHER_AMP_CENTER;
+      const int delta1 =
+          (delta0 + VP8_DITHER_DESCALE_ROUNDER) >> VP8_DITHER_DESCALE;
+      dst[i] = clip_8b((int)dst[i] + delta1);
+    }
+    dst += dst_stride;
+    dither += 8;
+  }
+}
+
+//------------------------------------------------------------------------------
+
+VP8DecIdct2 VP8Transform;
+VP8DecIdct VP8TransformAC3;
+VP8DecIdct VP8TransformUV;
+VP8DecIdct VP8TransformDC;
+VP8DecIdct VP8TransformDCUV;
+
+VP8LumaFilterFunc VP8VFilter16;
+VP8LumaFilterFunc VP8HFilter16;
+VP8ChromaFilterFunc VP8VFilter8;
+VP8ChromaFilterFunc VP8HFilter8;
+VP8LumaFilterFunc VP8VFilter16i;
+VP8LumaFilterFunc VP8HFilter16i;
+VP8ChromaFilterFunc VP8VFilter8i;
+VP8ChromaFilterFunc VP8HFilter8i;
+VP8SimpleFilterFunc VP8SimpleVFilter16;
+VP8SimpleFilterFunc VP8SimpleHFilter16;
+VP8SimpleFilterFunc VP8SimpleVFilter16i;
+VP8SimpleFilterFunc VP8SimpleHFilter16i;
+
+void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
+                            int dst_stride);
+
+extern void VP8DspInitSSE2(void);
+extern void VP8DspInitSSE41(void);
+extern void VP8DspInitNEON(void);
+extern void VP8DspInitMIPS32(void);
+extern void VP8DspInitMIPSdspR2(void);
+extern void VP8DspInitMSA(void);
+
+WEBP_DSP_INIT_FUNC(VP8DspInit) {
+  VP8InitClipTables();
+
+#if !WEBP_NEON_OMIT_C_CODE
+  VP8TransformWHT = TransformWHT_C;
+  VP8Transform = TransformTwo_C;
+  VP8TransformDC = TransformDC_C;
+  VP8TransformAC3 = TransformAC3_C;
+#endif
+  VP8TransformUV = TransformUV_C;
+  VP8TransformDCUV = TransformDCUV_C;
+
+#if !WEBP_NEON_OMIT_C_CODE
+  VP8VFilter16 = VFilter16_C;
+  VP8VFilter16i = VFilter16i_C;
+  VP8HFilter16 = HFilter16_C;
+  VP8VFilter8 = VFilter8_C;
+  VP8VFilter8i = VFilter8i_C;
+  VP8SimpleVFilter16 = SimpleVFilter16_C;
+  VP8SimpleHFilter16 = SimpleHFilter16_C;
+  VP8SimpleVFilter16i = SimpleVFilter16i_C;
+  VP8SimpleHFilter16i = SimpleHFilter16i_C;
+#endif
+
+#if !WEBP_NEON_OMIT_C_CODE || WEBP_NEON_WORK_AROUND_GCC
+  VP8HFilter16i = HFilter16i_C;
+  VP8HFilter8 = HFilter8_C;
+  VP8HFilter8i = HFilter8i_C;
+#endif
+
+#if !WEBP_NEON_OMIT_C_CODE
+  VP8PredLuma4[0] = DC4_C;
+  VP8PredLuma4[1] = TM4_C;
+  VP8PredLuma4[2] = VE4_C;
+  VP8PredLuma4[4] = RD4_C;
+  VP8PredLuma4[6] = LD4_C;
+#endif
+
+  VP8PredLuma4[3] = HE4_C;
+  VP8PredLuma4[5] = VR4_C;
+  VP8PredLuma4[7] = VL4_C;
+  VP8PredLuma4[8] = HD4_C;
+  VP8PredLuma4[9] = HU4_C;
+
+#if !WEBP_NEON_OMIT_C_CODE
+  VP8PredLuma16[0] = DC16_C;
+  VP8PredLuma16[1] = TM16_C;
+  VP8PredLuma16[2] = VE16_C;
+  VP8PredLuma16[3] = HE16_C;
+  VP8PredLuma16[4] = DC16NoTop_C;
+  VP8PredLuma16[5] = DC16NoLeft_C;
+  VP8PredLuma16[6] = DC16NoTopLeft_C;
+
+  VP8PredChroma8[0] = DC8uv_C;
+  VP8PredChroma8[1] = TM8uv_C;
+  VP8PredChroma8[2] = VE8uv_C;
+  VP8PredChroma8[3] = HE8uv_C;
+  VP8PredChroma8[4] = DC8uvNoTop_C;
+  VP8PredChroma8[5] = DC8uvNoLeft_C;
+  VP8PredChroma8[6] = DC8uvNoTopLeft_C;
+#endif
+
+  VP8DitherCombine8x8 = DitherCombine8x8_C;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8DspInitSSE2();
+#if defined(WEBP_USE_SSE41)
+      if (VP8GetCPUInfo(kSSE4_1)) {
+        VP8DspInitSSE41();
+      }
+#endif
+    }
+#endif
+#if defined(WEBP_USE_MIPS32)
+    if (VP8GetCPUInfo(kMIPS32)) {
+      VP8DspInitMIPS32();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      VP8DspInitMIPSdspR2();
+    }
+#endif
+#if defined(WEBP_USE_MSA)
+    if (VP8GetCPUInfo(kMSA)) {
+      VP8DspInitMSA();
+    }
+#endif
+  }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8DspInitNEON();
+  }
+#endif
+
+  assert(VP8TransformWHT != NULL);
+  assert(VP8Transform != NULL);
+  assert(VP8TransformDC != NULL);
+  assert(VP8TransformAC3 != NULL);
+  assert(VP8TransformUV != NULL);
+  assert(VP8TransformDCUV != NULL);
+  assert(VP8VFilter16 != NULL);
+  assert(VP8HFilter16 != NULL);
+  assert(VP8VFilter8 != NULL);
+  assert(VP8HFilter8 != NULL);
+  assert(VP8VFilter16i != NULL);
+  assert(VP8HFilter16i != NULL);
+  assert(VP8VFilter8i != NULL);
+  assert(VP8HFilter8i != NULL);
+  assert(VP8SimpleVFilter16 != NULL);
+  assert(VP8SimpleHFilter16 != NULL);
+  assert(VP8SimpleVFilter16i != NULL);
+  assert(VP8SimpleHFilter16i != NULL);
+  assert(VP8PredLuma4[0] != NULL);
+  assert(VP8PredLuma4[1] != NULL);
+  assert(VP8PredLuma4[2] != NULL);
+  assert(VP8PredLuma4[3] != NULL);
+  assert(VP8PredLuma4[4] != NULL);
+  assert(VP8PredLuma4[5] != NULL);
+  assert(VP8PredLuma4[6] != NULL);
+  assert(VP8PredLuma4[7] != NULL);
+  assert(VP8PredLuma4[8] != NULL);
+  assert(VP8PredLuma4[9] != NULL);
+  assert(VP8PredLuma16[0] != NULL);
+  assert(VP8PredLuma16[1] != NULL);
+  assert(VP8PredLuma16[2] != NULL);
+  assert(VP8PredLuma16[3] != NULL);
+  assert(VP8PredLuma16[4] != NULL);
+  assert(VP8PredLuma16[5] != NULL);
+  assert(VP8PredLuma16[6] != NULL);
+  assert(VP8PredChroma8[0] != NULL);
+  assert(VP8PredChroma8[1] != NULL);
+  assert(VP8PredChroma8[2] != NULL);
+  assert(VP8PredChroma8[3] != NULL);
+  assert(VP8PredChroma8[4] != NULL);
+  assert(VP8PredChroma8[5] != NULL);
+  assert(VP8PredChroma8[6] != NULL);
+  assert(VP8DitherCombine8x8 != NULL);
+}
--- a/media/libwebp/src/dsp/dec_clip_tables.c
+++ b/media/libwebp/src/dsp/dec_clip_tables.c
@ -0,0 +1,369 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Clipping tables for filtering
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "src/dsp/dsp.h"
+
+// define to 0 to have run-time table initialization
+#if !defined(USE_STATIC_TABLES)
+#define USE_STATIC_TABLES 1   // ALTERNATE_CODE
+#endif
+
+#if (USE_STATIC_TABLES == 1)
+
+static const uint8_t abs0[255 + 255 + 1] = {
+  0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8, 0xf7, 0xf6, 0xf5, 0xf4,
+  0xf3, 0xf2, 0xf1, 0xf0, 0xef, 0xee, 0xed, 0xec, 0xeb, 0xea, 0xe9, 0xe8,
+  0xe7, 0xe6, 0xe5, 0xe4, 0xe3, 0xe2, 0xe1, 0xe0, 0xdf, 0xde, 0xdd, 0xdc,
+  0xdb, 0xda, 0xd9, 0xd8, 0xd7, 0xd6, 0xd5, 0xd4, 0xd3, 0xd2, 0xd1, 0xd0,
+  0xcf, 0xce, 0xcd, 0xcc, 0xcb, 0xca, 0xc9, 0xc8, 0xc7, 0xc6, 0xc5, 0xc4,
+  0xc3, 0xc2, 0xc1, 0xc0, 0xbf, 0xbe, 0xbd, 0xbc, 0xbb, 0xba, 0xb9, 0xb8,
+  0xb7, 0xb6, 0xb5, 0xb4, 0xb3, 0xb2, 0xb1, 0xb0, 0xaf, 0xae, 0xad, 0xac,
+  0xab, 0xaa, 0xa9, 0xa8, 0xa7, 0xa6, 0xa5, 0xa4, 0xa3, 0xa2, 0xa1, 0xa0,
+  0x9f, 0x9e, 0x9d, 0x9c, 0x9b, 0x9a, 0x99, 0x98, 0x97, 0x96, 0x95, 0x94,
+  0x93, 0x92, 0x91, 0x90, 0x8f, 0x8e, 0x8d, 0x8c, 0x8b, 0x8a, 0x89, 0x88,
+  0x87, 0x86, 0x85, 0x84, 0x83, 0x82, 0x81, 0x80, 0x7f, 0x7e, 0x7d, 0x7c,
+  0x7b, 0x7a, 0x79, 0x78, 0x77, 0x76, 0x75, 0x74, 0x73, 0x72, 0x71, 0x70,
+  0x6f, 0x6e, 0x6d, 0x6c, 0x6b, 0x6a, 0x69, 0x68, 0x67, 0x66, 0x65, 0x64,
+  0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, 0x5a, 0x59, 0x58,
+  0x57, 0x56, 0x55, 0x54, 0x53, 0x52, 0x51, 0x50, 0x4f, 0x4e, 0x4d, 0x4c,
+  0x4b, 0x4a, 0x49, 0x48, 0x47, 0x46, 0x45, 0x44, 0x43, 0x42, 0x41, 0x40,
+  0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34,
+  0x33, 0x32, 0x31, 0x30, 0x2f, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28,
+  0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, 0x1f, 0x1e, 0x1d, 0x1c,
+  0x1b, 0x1a, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10,
+  0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04,
+  0x03, 0x02, 0x01, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+  0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
+  0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
+  0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
+  0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+  0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
+  0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
+  0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
+  0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+  0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
+  0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+  0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
+  0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+  0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
+  0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+  0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
+  0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
+  0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
+  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+  0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
+  0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+};
+
+static const uint8_t sclip1[1020 + 1020 + 1] = {
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+  0x80, 0x80, 0x80, 0x80, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+  0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93,
+  0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+  0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab,
+  0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+  0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3,
+  0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+  0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
+  0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+  0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3,
+  0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
+  0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23,
+  0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+  0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
+  0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+  0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53,
+  0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+  0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
+  0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+  0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
+  0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f
+};
+
+static const uint8_t sclip2[112 + 112 + 1] = {
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
+  0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb,
+  0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
+  0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f
+};
+
+static const uint8_t clip1[255 + 511 + 1] = {
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+  0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
+  0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
+  0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c,
+  0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+  0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44,
+  0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
+  0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c,
+  0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+  0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
+  0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80,
+  0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c,
+  0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+  0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
+  0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+  0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc,
+  0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8,
+  0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4,
+  0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0,
+  0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec,
+  0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+  0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+};
+
+#else
+
+// uninitialized tables
+static uint8_t abs0[255 + 255 + 1];
+static int8_t sclip1[1020 + 1020 + 1];
+static int8_t sclip2[112 + 112 + 1];
+static uint8_t clip1[255 + 511 + 1];
+
+// We declare this variable 'volatile' to prevent instruction reordering
+// and make sure it's set to true _last_ (so as to be thread-safe)
+static volatile int tables_ok = 0;
+
+#endif    // USE_STATIC_TABLES
+
+const int8_t* const VP8ksclip1 = (const int8_t*)&sclip1[1020];
+const int8_t* const VP8ksclip2 = (const int8_t*)&sclip2[112];
+const uint8_t* const VP8kclip1 = &clip1[255];
+const uint8_t* const VP8kabs0 = &abs0[255];
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8InitClipTables(void) {
+#if (USE_STATIC_TABLES == 0)
+  int i;
+  if (!tables_ok) {
+    for (i = -255; i <= 255; ++i) {
+      abs0[255 + i] = (i < 0) ? -i : i;
+    }
+    for (i = -1020; i <= 1020; ++i) {
+      sclip1[1020 + i] = (i < -128) ? -128 : (i > 127) ? 127 : i;
+    }
+    for (i = -112; i <= 112; ++i) {
+      sclip2[112 + i] = (i < -16) ? -16 : (i > 15) ? 15 : i;
+    }
+    for (i = -255; i <= 255 + 255; ++i) {
+      clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
+    }
+    tables_ok = 1;
+  }
+#endif    // USE_STATIC_TABLES
+}
--- a/media/libwebp/src/dsp/dec_mips32.c
+++ b/media/libwebp/src/dsp/dec_mips32.c
@ -0,0 +1,587 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of dsp functions
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_MIPS32)
+
+#include "src/dsp/mips_macro.h"
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+
+static WEBP_INLINE int abs_mips32(int x) {
+  const int sign = x >> 31;
+  return (x ^ sign) - sign;
+}
+
+// 4 pixels in, 2 pixels out
+static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];
+  const int a1 = VP8ksclip2[(a + 4) >> 3];
+  const int a2 = VP8ksclip2[(a + 3) >> 3];
+  p[-step] = VP8kclip1[p0 + a2];
+  p[    0] = VP8kclip1[q0 - a1];
+}
+
+// 4 pixels in, 4 pixels out
+static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  const int a = 3 * (q0 - p0);
+  const int a1 = VP8ksclip2[(a + 4) >> 3];
+  const int a2 = VP8ksclip2[(a + 3) >> 3];
+  const int a3 = (a1 + 1) >> 1;
+  p[-2 * step] = VP8kclip1[p1 + a3];
+  p[-    step] = VP8kclip1[p0 + a2];
+  p[        0] = VP8kclip1[q0 - a1];
+  p[     step] = VP8kclip1[q1 - a3];
+}
+
+// 6 pixels in, 6 pixels out
+static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
+  const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
+  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
+  // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
+  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
+  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
+  const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
+  p[-3 * step] = VP8kclip1[p2 + a3];
+  p[-2 * step] = VP8kclip1[p1 + a2];
+  p[-    step] = VP8kclip1[p0 + a1];
+  p[        0] = VP8kclip1[q0 - a1];
+  p[     step] = VP8kclip1[q1 - a2];
+  p[ 2 * step] = VP8kclip1[q2 - a3];
+}
+
+static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return (abs_mips32(p1 - p0) > thresh) || (abs_mips32(q1 - q0) > thresh);
+}
+
+static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
+  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
+  return ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) <= t);
+}
+
+static WEBP_INLINE int needs_filter2(const uint8_t* p,
+                                     int step, int t, int it) {
+  const int p3 = p[-4 * step], p2 = p[-3 * step];
+  const int p1 = p[-2 * step], p0 = p[-step];
+  const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
+  if ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) > t) {
+    return 0;
+  }
+  return abs_mips32(p3 - p2) <= it && abs_mips32(p2 - p1) <= it &&
+         abs_mips32(p1 - p0) <= it && abs_mips32(q3 - q2) <= it &&
+         abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
+}
+
+static WEBP_INLINE void FilterLoop26(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  const int thresh2 = 2 * thresh + 1;
+  while (size-- > 0) {
+    if (needs_filter2(p, hstride, thresh2, ithresh)) {
+      if (hev(p, hstride, hev_thresh)) {
+        do_filter2(p, hstride);
+      } else {
+        do_filter6(p, hstride);
+      }
+    }
+    p += vstride;
+  }
+}
+
+static WEBP_INLINE void FilterLoop24(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  const int thresh2 = 2 * thresh + 1;
+  while (size-- > 0) {
+    if (needs_filter2(p, hstride, thresh2, ithresh)) {
+      if (hev(p, hstride, hev_thresh)) {
+        do_filter2(p, hstride);
+      } else {
+        do_filter4(p, hstride);
+      }
+    }
+    p += vstride;
+  }
+}
+
+// on macroblock edges
+static void VFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+}
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  const int thresh2 = 2 * thresh + 1;
+  for (i = 0; i < 16; ++i) {
+    if (needs_filter(p + i, stride, thresh2)) {
+      do_filter2(p + i, stride);
+    }
+  }
+}
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  const int thresh2 = 2 * thresh + 1;
+  for (i = 0; i < 16; ++i) {
+    if (needs_filter(p + i * stride, 1, thresh2)) {
+      do_filter2(p + i * stride, 1);
+    }
+  }
+}
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    SimpleVFilter16(p, stride, thresh);
+  }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    SimpleHFilter16(p, stride, thresh);
+  }
+}
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8, temp9;
+  int temp10, temp11, temp12, temp13, temp14;
+  int temp15, temp16, temp17, temp18;
+  int16_t* p_in = (int16_t*)in;
+
+  // loops unrolled and merged to avoid usage of tmp buffer
+  // and to reduce number of stalls. MUL macro is written
+  // in assembler and inlined
+  __asm__ volatile(
+    "lh       %[temp0],  0(%[in])                      \n\t"
+    "lh       %[temp8],  16(%[in])                     \n\t"
+    "lh       %[temp4],  8(%[in])                      \n\t"
+    "lh       %[temp12], 24(%[in])                     \n\t"
+    "addu     %[temp16], %[temp0],  %[temp8]           \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp8]           \n\t"
+    "mul      %[temp8],  %[temp4],  %[kC2]             \n\t"
+    "mul      %[temp17], %[temp12], %[kC1]             \n\t"
+    "mul      %[temp4],  %[temp4],  %[kC1]             \n\t"
+    "mul      %[temp12], %[temp12], %[kC2]             \n\t"
+    "lh       %[temp1],  2(%[in])                      \n\t"
+    "lh       %[temp5],  10(%[in])                     \n\t"
+    "lh       %[temp9],  18(%[in])                     \n\t"
+    "lh       %[temp13], 26(%[in])                     \n\t"
+    "sra      %[temp8],  %[temp8],  16                 \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp4],  %[temp4],  16                 \n\t"
+    "sra      %[temp12], %[temp12], 16                 \n\t"
+    "lh       %[temp2],  4(%[in])                      \n\t"
+    "lh       %[temp6],  12(%[in])                     \n\t"
+    "lh       %[temp10], 20(%[in])                     \n\t"
+    "lh       %[temp14], 28(%[in])                     \n\t"
+    "subu     %[temp17], %[temp8],  %[temp17]          \n\t"
+    "addu     %[temp4],  %[temp4],  %[temp12]          \n\t"
+    "addu     %[temp8],  %[temp16], %[temp4]           \n\t"
+    "subu     %[temp4],  %[temp16], %[temp4]           \n\t"
+    "addu     %[temp16], %[temp1],  %[temp9]           \n\t"
+    "subu     %[temp1],  %[temp1],  %[temp9]           \n\t"
+    "lh       %[temp3],  6(%[in])                      \n\t"
+    "lh       %[temp7],  14(%[in])                     \n\t"
+    "lh       %[temp11], 22(%[in])                     \n\t"
+    "lh       %[temp15], 30(%[in])                     \n\t"
+    "addu     %[temp12], %[temp0],  %[temp17]          \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp17]          \n\t"
+    "mul      %[temp9],  %[temp5],  %[kC2]             \n\t"
+    "mul      %[temp17], %[temp13], %[kC1]             \n\t"
+    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
+    "mul      %[temp13], %[temp13], %[kC2]             \n\t"
+    "sra      %[temp9],  %[temp9],  16                 \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "subu     %[temp17], %[temp9],  %[temp17]          \n\t"
+    "sra      %[temp5],  %[temp5],  16                 \n\t"
+    "sra      %[temp13], %[temp13], 16                 \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
+    "addu     %[temp13], %[temp1],  %[temp17]          \n\t"
+    "subu     %[temp1],  %[temp1],  %[temp17]          \n\t"
+    "mul      %[temp17], %[temp14], %[kC1]             \n\t"
+    "mul      %[temp14], %[temp14], %[kC2]             \n\t"
+    "addu     %[temp9],  %[temp16], %[temp5]           \n\t"
+    "subu     %[temp5],  %[temp16], %[temp5]           \n\t"
+    "addu     %[temp16], %[temp2],  %[temp10]          \n\t"
+    "subu     %[temp2],  %[temp2],  %[temp10]          \n\t"
+    "mul      %[temp10], %[temp6],  %[kC2]             \n\t"
+    "mul      %[temp6],  %[temp6],  %[kC1]             \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp14], %[temp14], 16                 \n\t"
+    "sra      %[temp10], %[temp10], 16                 \n\t"
+    "sra      %[temp6],  %[temp6],  16                 \n\t"
+    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
+    "addu     %[temp6],  %[temp6],  %[temp14]          \n\t"
+    "addu     %[temp10], %[temp16], %[temp6]           \n\t"
+    "subu     %[temp6],  %[temp16], %[temp6]           \n\t"
+    "addu     %[temp14], %[temp2],  %[temp17]          \n\t"
+    "subu     %[temp2],  %[temp2],  %[temp17]          \n\t"
+    "mul      %[temp17], %[temp15], %[kC1]             \n\t"
+    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
+    "addu     %[temp16], %[temp3],  %[temp11]          \n\t"
+    "subu     %[temp3],  %[temp3],  %[temp11]          \n\t"
+    "mul      %[temp11], %[temp7],  %[kC2]             \n\t"
+    "mul      %[temp7],  %[temp7],  %[kC1]             \n\t"
+    "addiu    %[temp8],  %[temp8],  4                  \n\t"
+    "addiu    %[temp12], %[temp12], 4                  \n\t"
+    "addiu    %[temp0],  %[temp0],  4                  \n\t"
+    "addiu    %[temp4],  %[temp4],  4                  \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp15], %[temp15], 16                 \n\t"
+    "sra      %[temp11], %[temp11], 16                 \n\t"
+    "sra      %[temp7],  %[temp7],  16                 \n\t"
+    "subu     %[temp17], %[temp11], %[temp17]          \n\t"
+    "addu     %[temp7],  %[temp7],  %[temp15]          \n\t"
+    "addu     %[temp15], %[temp3],  %[temp17]          \n\t"
+    "subu     %[temp3],  %[temp3],  %[temp17]          \n\t"
+    "addu     %[temp11], %[temp16], %[temp7]           \n\t"
+    "subu     %[temp7],  %[temp16], %[temp7]           \n\t"
+    "addu     %[temp16], %[temp8],  %[temp10]          \n\t"
+    "subu     %[temp8],  %[temp8],  %[temp10]          \n\t"
+    "mul      %[temp10], %[temp9],  %[kC2]             \n\t"
+    "mul      %[temp17], %[temp11], %[kC1]             \n\t"
+    "mul      %[temp9],  %[temp9],  %[kC1]             \n\t"
+    "mul      %[temp11], %[temp11], %[kC2]             \n\t"
+    "sra      %[temp10], %[temp10], 16                 \n\t"
+    "sra      %[temp17], %[temp17], 16                 \n\t"
+    "sra      %[temp9],  %[temp9],  16                 \n\t"
+    "sra      %[temp11], %[temp11], 16                 \n\t"
+    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
+    "addu     %[temp11], %[temp9],  %[temp11]          \n\t"
+    "addu     %[temp10], %[temp12], %[temp14]          \n\t"
+    "subu     %[temp12], %[temp12], %[temp14]          \n\t"
+    "mul      %[temp14], %[temp13], %[kC2]             \n\t"
+    "mul      %[temp9],  %[temp15], %[kC1]             \n\t"
+    "mul      %[temp13], %[temp13], %[kC1]             \n\t"
+    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
+    "sra      %[temp14], %[temp14], 16                 \n\t"
+    "sra      %[temp9],  %[temp9],  16                 \n\t"
+    "sra      %[temp13], %[temp13], 16                 \n\t"
+    "sra      %[temp15], %[temp15], 16                 \n\t"
+    "subu     %[temp9],  %[temp14], %[temp9]           \n\t"
+    "addu     %[temp15], %[temp13], %[temp15]          \n\t"
+    "addu     %[temp14], %[temp0],  %[temp2]           \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp2]           \n\t"
+    "mul      %[temp2],  %[temp1],  %[kC2]             \n\t"
+    "mul      %[temp13], %[temp3],  %[kC1]             \n\t"
+    "mul      %[temp1],  %[temp1],  %[kC1]             \n\t"
+    "mul      %[temp3],  %[temp3],  %[kC2]             \n\t"
+    "sra      %[temp2],  %[temp2],  16                 \n\t"
+    "sra      %[temp13], %[temp13], 16                 \n\t"
+    "sra      %[temp1],  %[temp1],  16                 \n\t"
+    "sra      %[temp3],  %[temp3],  16                 \n\t"
+    "subu     %[temp13], %[temp2],  %[temp13]          \n\t"
+    "addu     %[temp3],  %[temp1],  %[temp3]           \n\t"
+    "addu     %[temp2],  %[temp4],  %[temp6]           \n\t"
+    "subu     %[temp4],  %[temp4],  %[temp6]           \n\t"
+    "mul      %[temp6],  %[temp5],  %[kC2]             \n\t"
+    "mul      %[temp1],  %[temp7],  %[kC1]             \n\t"
+    "mul      %[temp5],  %[temp5],  %[kC1]             \n\t"
+    "mul      %[temp7],  %[temp7],  %[kC2]             \n\t"
+    "sra      %[temp6],  %[temp6],  16                 \n\t"
+    "sra      %[temp1],  %[temp1],  16                 \n\t"
+    "sra      %[temp5],  %[temp5],  16                 \n\t"
+    "sra      %[temp7],  %[temp7],  16                 \n\t"
+    "subu     %[temp1],  %[temp6],  %[temp1]           \n\t"
+    "addu     %[temp7],  %[temp5],  %[temp7]           \n\t"
+    "addu     %[temp5],  %[temp16], %[temp11]          \n\t"
+    "subu     %[temp16], %[temp16], %[temp11]          \n\t"
+    "addu     %[temp11], %[temp8],  %[temp17]          \n\t"
+    "subu     %[temp8],  %[temp8],  %[temp17]          \n\t"
+    "sra      %[temp5],  %[temp5],  3                  \n\t"
+    "sra      %[temp16], %[temp16], 3                  \n\t"
+    "sra      %[temp11], %[temp11], 3                  \n\t"
+    "sra      %[temp8],  %[temp8],  3                  \n\t"
+    "addu     %[temp17], %[temp10], %[temp15]          \n\t"
+    "subu     %[temp10], %[temp10], %[temp15]          \n\t"
+    "addu     %[temp15], %[temp12], %[temp9]           \n\t"
+    "subu     %[temp12], %[temp12], %[temp9]           \n\t"
+    "sra      %[temp17], %[temp17], 3                  \n\t"
+    "sra      %[temp10], %[temp10], 3                  \n\t"
+    "sra      %[temp15], %[temp15], 3                  \n\t"
+    "sra      %[temp12], %[temp12], 3                  \n\t"
+    "addu     %[temp9],  %[temp14], %[temp3]           \n\t"
+    "subu     %[temp14], %[temp14], %[temp3]           \n\t"
+    "addu     %[temp3],  %[temp0],  %[temp13]          \n\t"
+    "subu     %[temp0],  %[temp0],  %[temp13]          \n\t"
+    "sra      %[temp9],  %[temp9],  3                  \n\t"
+    "sra      %[temp14], %[temp14], 3                  \n\t"
+    "sra      %[temp3],  %[temp3],  3                  \n\t"
+    "sra      %[temp0],  %[temp0],  3                  \n\t"
+    "addu     %[temp13], %[temp2],  %[temp7]           \n\t"
+    "subu     %[temp2],  %[temp2],  %[temp7]           \n\t"
+    "addu     %[temp7],  %[temp4],  %[temp1]           \n\t"
+    "subu     %[temp4],  %[temp4],  %[temp1]           \n\t"
+    "sra      %[temp13], %[temp13], 3                  \n\t"
+    "sra      %[temp2],  %[temp2],  3                  \n\t"
+    "sra      %[temp7],  %[temp7],  3                  \n\t"
+    "sra      %[temp4],  %[temp4],  3                  \n\t"
+    "addiu    %[temp6],  $zero,     255                \n\t"
+    "lbu      %[temp1],  0+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp1],  %[temp1],  %[temp5]           \n\t"
+    "sra      %[temp5],  %[temp1],  8                  \n\t"
+    "sra      %[temp18], %[temp1],  31                 \n\t"
+    "beqz     %[temp5],  1f                            \n\t"
+    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
+    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
+  "1:                                                  \n\t"
+    "lbu      %[temp18], 1+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp1],  0+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp18], %[temp18], %[temp11]          \n\t"
+    "sra      %[temp11], %[temp18], 8                  \n\t"
+    "sra      %[temp1],  %[temp18], 31                 \n\t"
+    "beqz     %[temp11], 2f                            \n\t"
+    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
+    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
+  "2:                                                  \n\t"
+    "lbu      %[temp1],  2+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp18], 1+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp1],  %[temp1],  %[temp8]           \n\t"
+    "sra      %[temp8],  %[temp1],  8                  \n\t"
+    "sra      %[temp18], %[temp1],  31                 \n\t"
+    "beqz     %[temp8],  3f                            \n\t"
+    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
+    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
+  "3:                                                  \n\t"
+    "lbu      %[temp18], 3+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp1],  2+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp18], %[temp18], %[temp16]          \n\t"
+    "sra      %[temp16], %[temp18], 8                  \n\t"
+    "sra      %[temp1],  %[temp18], 31                 \n\t"
+    "beqz     %[temp16], 4f                            \n\t"
+    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
+    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
+  "4:                                                  \n\t"
+    "sb       %[temp18], 3+0*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp5],  0+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp8],  1+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp11], 2+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp16], 3+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp17]          \n\t"
+    "addu     %[temp8],  %[temp8],  %[temp15]          \n\t"
+    "addu     %[temp11], %[temp11], %[temp12]          \n\t"
+    "addu     %[temp16], %[temp16], %[temp10]          \n\t"
+    "sra      %[temp18], %[temp5],  8                  \n\t"
+    "sra      %[temp1],  %[temp5],  31                 \n\t"
+    "beqz     %[temp18], 5f                            \n\t"
+    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
+    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
+  "5:                                                  \n\t"
+    "sra      %[temp18], %[temp8],  8                  \n\t"
+    "sra      %[temp1],  %[temp8],  31                 \n\t"
+    "beqz     %[temp18], 6f                            \n\t"
+    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
+    "movz     %[temp8],  %[temp6],  %[temp1]           \n\t"
+  "6:                                                  \n\t"
+    "sra      %[temp18], %[temp11], 8                  \n\t"
+    "sra      %[temp1],  %[temp11], 31                 \n\t"
+    "sra      %[temp17], %[temp16], 8                  \n\t"
+    "sra      %[temp15], %[temp16], 31                 \n\t"
+    "beqz     %[temp18], 7f                            \n\t"
+    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
+    "movz     %[temp11], %[temp6],  %[temp1]           \n\t"
+  "7:                                                  \n\t"
+    "beqz     %[temp17], 8f                            \n\t"
+    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
+    "movz     %[temp16], %[temp6],  %[temp15]          \n\t"
+  "8:                                                  \n\t"
+    "sb       %[temp5],  0+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp8],  1+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp11], 2+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp16], 3+1*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp5],  0+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp8],  1+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp11], 2+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp16], 3+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp9]           \n\t"
+    "addu     %[temp8],  %[temp8],  %[temp3]           \n\t"
+    "addu     %[temp11], %[temp11], %[temp0]           \n\t"
+    "addu     %[temp16], %[temp16], %[temp14]          \n\t"
+    "sra      %[temp18], %[temp5],  8                  \n\t"
+    "sra      %[temp1],  %[temp5],  31                 \n\t"
+    "sra      %[temp17], %[temp8],  8                  \n\t"
+    "sra      %[temp15], %[temp8],  31                 \n\t"
+    "sra      %[temp12], %[temp11], 8                  \n\t"
+    "sra      %[temp10], %[temp11], 31                 \n\t"
+    "sra      %[temp9],  %[temp16], 8                  \n\t"
+    "sra      %[temp3],  %[temp16], 31                 \n\t"
+    "beqz     %[temp18], 9f                            \n\t"
+    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
+    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
+  "9:                                                  \n\t"
+    "beqz     %[temp17], 10f                           \n\t"
+    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
+    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
+  "10:                                                 \n\t"
+    "beqz     %[temp12], 11f                           \n\t"
+    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
+    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
+  "11:                                                 \n\t"
+    "beqz     %[temp9],  12f                           \n\t"
+    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
+    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
+  "12:                                                 \n\t"
+    "sb       %[temp5],  0+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp8],  1+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp11], 2+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp16], 3+2*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp5],  0+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp8],  1+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp11], 2+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "lbu      %[temp16], 3+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
+    "addu     %[temp8],  %[temp8],  %[temp7]           \n\t"
+    "addu     %[temp11], %[temp11], %[temp4]           \n\t"
+    "addu     %[temp16], %[temp16], %[temp2]           \n\t"
+    "sra      %[temp18], %[temp5],  8                  \n\t"
+    "sra      %[temp1],  %[temp5],  31                 \n\t"
+    "sra      %[temp17], %[temp8],  8                  \n\t"
+    "sra      %[temp15], %[temp8],  31                 \n\t"
+    "sra      %[temp12], %[temp11], 8                  \n\t"
+    "sra      %[temp10], %[temp11], 31                 \n\t"
+    "sra      %[temp9],  %[temp16], 8                  \n\t"
+    "sra      %[temp3],  %[temp16], 31                 \n\t"
+    "beqz     %[temp18], 13f                           \n\t"
+    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
+    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
+  "13:                                                 \n\t"
+    "beqz     %[temp17], 14f                           \n\t"
+    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
+    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
+  "14:                                                 \n\t"
+    "beqz     %[temp12], 15f                           \n\t"
+    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
+    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
+  "15:                                                 \n\t"
+    "beqz     %[temp9],  16f                           \n\t"
+    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
+    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
+  "16:                                                 \n\t"
+    "sb       %[temp5],  0+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp8],  1+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp11], 2+3*" XSTR(BPS) "(%[dst])     \n\t"
+    "sb       %[temp16], 3+3*" XSTR(BPS) "(%[dst])     \n\t"
+
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
+      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
+      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
+      [temp18]"=&r"(temp18)
+    : [in]"r"(p_in), [kC1]"r"(kC1), [kC2]"r"(kC2), [dst]"r"(dst)
+    : "memory", "hi", "lo"
+  );
+}
+
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOne(in, dst);
+  if (do_two) {
+    TransformOne(in + 16, dst + 4);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitMIPS32(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPS32(void) {
+  VP8InitClipTables();
+
+  VP8Transform = TransformTwo;
+
+  VP8VFilter16 = VFilter16;
+  VP8HFilter16 = HFilter16;
+  VP8VFilter8 = VFilter8;
+  VP8HFilter8 = HFilter8;
+  VP8VFilter16i = VFilter16i;
+  VP8HFilter16i = HFilter16i;
+  VP8VFilter8i = VFilter8i;
+  VP8HFilter8i = HFilter8i;
+
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
+  VP8SimpleVFilter16i = SimpleVFilter16i;
+  VP8SimpleHFilter16i = SimpleHFilter16i;
+}
+
+#else  // !WEBP_USE_MIPS32
+
+WEBP_DSP_INIT_STUB(VP8DspInitMIPS32)
+
+#endif  // WEBP_USE_MIPS32
--- a/media/libwebp/src/dsp/dec_mips_dsp_r2.c
+++ b/media/libwebp/src/dsp/dec_mips_dsp_r2.c
@ -0,0 +1,994 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MIPS version of dsp functions
+//
+// Author(s):  Djordje Pesut    (djordje.pesut@imgtec.com)
+//             Jovan Zelincevic (jovan.zelincevic@imgtec.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "src/dsp/mips_macro.h"
+
+static const int kC1 = 20091 + (1 << 16);
+static const int kC2 = 35468;
+
+#define MUL(a, b) (((a) * (b)) >> 16)
+
+static void TransformDC(const int16_t* in, uint8_t* dst) {
+  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10;
+
+  __asm__ volatile (
+    LOAD_WITH_OFFSET_X4(temp1, temp2, temp3, temp4, dst,
+                        0, 0, 0, 0,
+                        0, 1, 2, 3,
+                        BPS)
+    "lh               %[temp5],  0(%[in])               \n\t"
+    "addiu            %[temp5],  %[temp5],  4           \n\t"
+    "ins              %[temp5],  %[temp5],  16, 16      \n\t"
+    "shra.ph          %[temp5],  %[temp5],  3           \n\t"
+    CONVERT_2_BYTES_TO_HALF(temp6, temp7, temp8, temp9, temp10, temp1, temp2,
+                            temp3, temp1, temp2, temp3, temp4)
+    STORE_SAT_SUM_X2(temp6, temp7, temp8, temp9, temp10, temp1, temp2, temp3,
+                     temp5, temp5, temp5, temp5, temp5, temp5, temp5, temp5,
+                     dst, 0, 1, 2, 3, BPS)
+
+    OUTPUT_EARLY_CLOBBER_REGS_10()
+    : [in]"r"(in), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void TransformAC3(const int16_t* in, uint8_t* dst) {
+  const int a = in[0] + 4;
+  int c4 = MUL(in[4], kC2);
+  const int d4 = MUL(in[4], kC1);
+  const int c1 = MUL(in[1], kC2);
+  const int d1 = MUL(in[1], kC1);
+  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
+
+  __asm__ volatile (
+    "ins              %[c4],      %[d4],     16,       16    \n\t"
+    "replv.ph         %[temp1],   %[a]                       \n\t"
+    "replv.ph         %[temp4],   %[d1]                      \n\t"
+    ADD_SUB_HALVES(temp2, temp3, temp1, c4)
+    "replv.ph         %[temp5],   %[c1]                      \n\t"
+    SHIFT_R_SUM_X2(temp1, temp6, temp7, temp8, temp2, temp9, temp10, temp4,
+                   temp2, temp2, temp3, temp3, temp4, temp5, temp4, temp5)
+    LOAD_WITH_OFFSET_X4(temp3, temp5, temp11, temp12, dst,
+                        0, 0, 0, 0,
+                        0, 1, 2, 3,
+                        BPS)
+    CONVERT_2_BYTES_TO_HALF(temp13, temp14, temp3, temp15, temp5, temp16,
+                            temp11, temp17, temp3, temp5, temp11, temp12)
+    PACK_2_HALVES_TO_WORD(temp12, temp18, temp7, temp6, temp1, temp8, temp2,
+                          temp4, temp7, temp6, temp10, temp9)
+    STORE_SAT_SUM_X2(temp13, temp14, temp3, temp15, temp5, temp16, temp11,
+                     temp17, temp12, temp18, temp1, temp8, temp2, temp4,
+                     temp7, temp6, dst, 0, 1, 2, 3, BPS)
+
+    OUTPUT_EARLY_CLOBBER_REGS_18(),
+      [c4]"+&r"(c4)
+    : [dst]"r"(dst), [a]"r"(a), [d1]"r"(d1), [d4]"r"(d4), [c1]"r"(c1)
+    : "memory"
+  );
+}
+
+static void TransformOne(const int16_t* in, uint8_t* dst) {
+  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+  int temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17, temp18;
+
+  __asm__ volatile (
+    "ulw              %[temp1],   0(%[in])                 \n\t"
+    "ulw              %[temp2],   16(%[in])                \n\t"
+    LOAD_IN_X2(temp5, temp6, 24, 26)
+    ADD_SUB_HALVES(temp3, temp4, temp1, temp2)
+    LOAD_IN_X2(temp1, temp2, 8, 10)
+    MUL_SHIFT_SUM(temp7, temp8, temp9, temp10, temp11, temp12, temp13, temp14,
+                  temp10, temp8, temp9, temp7, temp1, temp2, temp5, temp6,
+                  temp13, temp11, temp14, temp12)
+    INSERT_HALF_X2(temp8, temp7, temp10, temp9)
+    "ulw              %[temp17],  4(%[in])                 \n\t"
+    "ulw              %[temp18],  20(%[in])                \n\t"
+    ADD_SUB_HALVES(temp1, temp2, temp3, temp8)
+    ADD_SUB_HALVES(temp5, temp6, temp4, temp7)
+    ADD_SUB_HALVES(temp7, temp8, temp17, temp18)
+    LOAD_IN_X2(temp17, temp18, 12, 14)
+    LOAD_IN_X2(temp9, temp10, 28, 30)
+    MUL_SHIFT_SUM(temp11, temp12, temp13, temp14, temp15, temp16, temp4, temp17,
+                  temp12, temp14, temp11, temp13, temp17, temp18, temp9, temp10,
+                  temp15, temp4, temp16, temp17)
+    INSERT_HALF_X2(temp11, temp12, temp13, temp14)
+    ADD_SUB_HALVES(temp17, temp8, temp8, temp11)
+    ADD_SUB_HALVES(temp3, temp4, temp7, temp12)
+
+    // horizontal
+    SRA_16(temp9, temp10, temp11, temp12, temp1, temp2, temp5, temp6)
+    INSERT_HALF_X2(temp1, temp6, temp5, temp2)
+    SRA_16(temp13, temp14, temp15, temp16, temp3, temp4, temp17, temp8)
+    "repl.ph          %[temp2],   0x4                      \n\t"
+    INSERT_HALF_X2(temp3, temp8, temp17, temp4)
+    "addq.ph          %[temp1],   %[temp1],  %[temp2]      \n\t"
+    "addq.ph          %[temp6],   %[temp6],  %[temp2]      \n\t"
+    ADD_SUB_HALVES(temp2, temp4, temp1, temp3)
+    ADD_SUB_HALVES(temp5, temp7, temp6, temp8)
+    MUL_SHIFT_SUM(temp1, temp3, temp6, temp8, temp9, temp13, temp17, temp18,
+                  temp3, temp13, temp1, temp9, temp9, temp13, temp11, temp15,
+                  temp6, temp17, temp8, temp18)
+    MUL_SHIFT_SUM(temp6, temp8, temp18, temp17, temp11, temp15, temp12, temp16,
+                  temp8, temp15, temp6, temp11, temp12, temp16, temp10, temp14,
+                  temp18, temp12, temp17, temp16)
+    INSERT_HALF_X2(temp1, temp3, temp9, temp13)
+    INSERT_HALF_X2(temp6, temp8, temp11, temp15)
+    SHIFT_R_SUM_X2(temp9, temp10, temp11, temp12, temp13, temp14, temp15,
+                   temp16, temp2, temp4, temp5, temp7, temp3, temp1, temp8,
+                   temp6)
+    PACK_2_HALVES_TO_WORD(temp1, temp2, temp3, temp4, temp9, temp12, temp13,
+                          temp16, temp11, temp10, temp15, temp14)
+    LOAD_WITH_OFFSET_X4(temp10, temp11, temp14, temp15, dst,
+                        0, 0, 0, 0,
+                        0, 1, 2, 3,
+                        BPS)
+    CONVERT_2_BYTES_TO_HALF(temp5, temp6, temp7, temp8, temp17, temp18, temp10,
+                            temp11, temp10, temp11, temp14, temp15)
+    STORE_SAT_SUM_X2(temp5, temp6, temp7, temp8, temp17, temp18, temp10, temp11,
+                     temp9, temp12, temp1, temp2, temp13, temp16, temp3, temp4,
+                     dst, 0, 1, 2, 3, BPS)
+
+    OUTPUT_EARLY_CLOBBER_REGS_18()
+    : [dst]"r"(dst), [in]"r"(in), [kC1]"r"(kC1), [kC2]"r"(kC2)
+    : "memory", "hi", "lo"
+  );
+}
+
+static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
+  TransformOne(in, dst);
+  if (do_two) {
+    TransformOne(in + 16, dst + 4);
+  }
+}
+
+static WEBP_INLINE void FilterLoop26(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  const int thresh2 = 2 * thresh + 1;
+  int temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9;
+  int temp10, temp11, temp12, temp13, temp14, temp15;
+
+  __asm__ volatile (
+    ".set      push                                      \n\t"
+    ".set      noreorder                                 \n\t"
+  "1:                                                    \n\t"
+    "negu      %[temp1],  %[hstride]                     \n\t"
+    "addiu     %[size],   %[size],        -1             \n\t"
+    "sll       %[temp2],  %[hstride],     1              \n\t"
+    "sll       %[temp3],  %[temp1],       1              \n\t"
+    "addu      %[temp4],  %[temp2],       %[hstride]     \n\t"
+    "addu      %[temp5],  %[temp3],       %[temp1]       \n\t"
+    "lbu       %[temp7],  0(%[p])                        \n\t"
+    "sll       %[temp6],  %[temp3],       1              \n\t"
+    "lbux      %[temp8],  %[temp5](%[p])                 \n\t"
+    "lbux      %[temp9],  %[temp3](%[p])                 \n\t"
+    "lbux      %[temp10], %[temp1](%[p])                 \n\t"
+    "lbux      %[temp11], %[temp6](%[p])                 \n\t"
+    "lbux      %[temp12], %[hstride](%[p])               \n\t"
+    "lbux      %[temp13], %[temp2](%[p])                 \n\t"
+    "lbux      %[temp14], %[temp4](%[p])                 \n\t"
+    "subu      %[temp1],  %[temp10],      %[temp7]       \n\t"
+    "subu      %[temp2],  %[temp9],       %[temp12]      \n\t"
+    "absq_s.w  %[temp3],  %[temp1]                       \n\t"
+    "absq_s.w  %[temp4],  %[temp2]                       \n\t"
+    "negu      %[temp1],  %[temp1]                       \n\t"
+    "sll       %[temp3],  %[temp3],       2              \n\t"
+    "addu      %[temp15], %[temp3],       %[temp4]       \n\t"
+    "subu      %[temp3],  %[temp15],      %[thresh2]     \n\t"
+    "sll       %[temp6],  %[temp1],       1              \n\t"
+    "bgtz      %[temp3],  3f                             \n\t"
+    " subu     %[temp4],  %[temp11],      %[temp8]       \n\t"
+    "absq_s.w  %[temp4],  %[temp4]                       \n\t"
+    "shll_s.w  %[temp2],  %[temp2],       24             \n\t"
+    "subu      %[temp4],  %[temp4],       %[ithresh]     \n\t"
+    "bgtz      %[temp4],  3f                             \n\t"
+    " subu     %[temp3],  %[temp8],       %[temp9]       \n\t"
+    "absq_s.w  %[temp3],  %[temp3]                       \n\t"
+    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+    "bgtz      %[temp3],  3f                             \n\t"
+    " subu     %[temp5],  %[temp9],       %[temp10]      \n\t"
+    "absq_s.w  %[temp3],  %[temp5]                       \n\t"
+    "absq_s.w  %[temp5],  %[temp5]                       \n\t"
+    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+    "bgtz      %[temp3],  3f                             \n\t"
+    " subu     %[temp3],  %[temp14],      %[temp13]      \n\t"
+    "absq_s.w  %[temp3],  %[temp3]                       \n\t"
+    "slt       %[temp5],  %[hev_thresh],  %[temp5]       \n\t"
+    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+    "bgtz      %[temp3],  3f                             \n\t"
+    " subu     %[temp3],  %[temp13],      %[temp12]      \n\t"
+    "absq_s.w  %[temp3],  %[temp3]                       \n\t"
+    "sra       %[temp4],  %[temp2],       24             \n\t"
+    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+    "bgtz      %[temp3],  3f                             \n\t"
+    " subu     %[temp15], %[temp12],      %[temp7]       \n\t"
+    "absq_s.w  %[temp3],  %[temp15]                      \n\t"
+    "absq_s.w  %[temp15], %[temp15]                      \n\t"
+    "subu      %[temp3],  %[temp3],       %[ithresh]     \n\t"
+    "bgtz      %[temp3],  3f                             \n\t"
+    " slt      %[temp15], %[hev_thresh],  %[temp15]      \n\t"
+    "addu      %[temp3],  %[temp6],       %[temp1]       \n\t"
+    "or        %[temp2],  %[temp5],       %[temp15]      \n\t"
+    "addu      %[temp5],  %[temp4],       %[temp3]       \n\t"
+    "beqz      %[temp2],  4f                             \n\t"
+    " shra_r.w %[temp1],  %[temp5],       3              \n\t"
+    "addiu     %[temp2],  %[temp5],       3              \n\t"
+    "sra       %[temp2],  %[temp2],       3              \n\t"
+    "shll_s.w  %[temp1],  %[temp1],       27             \n\t"
+    "shll_s.w  %[temp2],  %[temp2],       27             \n\t"
+    "subu      %[temp3],  %[p],           %[hstride]     \n\t"
+    "sra       %[temp1],  %[temp1],       27             \n\t"
+    "sra       %[temp2],  %[temp2],       27             \n\t"
+    "subu      %[temp1],  %[temp7],       %[temp1]       \n\t"
+    "addu      %[temp2],  %[temp10],      %[temp2]       \n\t"
+    "lbux      %[temp2],  %[temp2](%[VP8kclip1])         \n\t"
+    "lbux      %[temp1],  %[temp1](%[VP8kclip1])         \n\t"
+    "sb        %[temp2],  0(%[temp3])                    \n\t"
+    "j         3f                                        \n\t"
+    " sb       %[temp1],  0(%[p])                        \n\t"
+  "4:                                                    \n\t"
+    "shll_s.w  %[temp5],  %[temp5],       24             \n\t"
+    "subu      %[temp14], %[p],           %[hstride]     \n\t"
+    "subu      %[temp11], %[temp14],      %[hstride]     \n\t"
+    "sra       %[temp6],  %[temp5],       24             \n\t"
+    "sll       %[temp1],  %[temp6],       3              \n\t"
+    "subu      %[temp15], %[temp11],      %[hstride]     \n\t"
+    "addu      %[temp2],  %[temp6],       %[temp1]       \n\t"
+    "sll       %[temp3],  %[temp2],       1              \n\t"
+    "addu      %[temp4],  %[temp3],       %[temp2]       \n\t"
+    "addiu     %[temp2],  %[temp2],       63             \n\t"
+    "addiu     %[temp3],  %[temp3],       63             \n\t"
+    "addiu     %[temp4],  %[temp4],       63             \n\t"
+    "sra       %[temp2],  %[temp2],       7              \n\t"
+    "sra       %[temp3],  %[temp3],       7              \n\t"
+    "sra       %[temp4],  %[temp4],       7              \n\t"
+    "addu      %[temp1],  %[temp8],       %[temp2]       \n\t"
+    "addu      %[temp5],  %[temp9],       %[temp3]       \n\t"
+    "addu      %[temp6],  %[temp10],      %[temp4]       \n\t"
+    "subu      %[temp8],  %[temp7],       %[temp4]       \n\t"
+    "subu      %[temp7],  %[temp12],      %[temp3]       \n\t"
+    "addu      %[temp10], %[p],           %[hstride]     \n\t"
+    "subu      %[temp9],  %[temp13],      %[temp2]       \n\t"
+    "addu      %[temp12], %[temp10],      %[hstride]     \n\t"
+    "lbux      %[temp2],  %[temp1](%[VP8kclip1])         \n\t"
+    "lbux      %[temp3],  %[temp5](%[VP8kclip1])         \n\t"
+    "lbux      %[temp4],  %[temp6](%[VP8kclip1])         \n\t"
+    "lbux      %[temp5],  %[temp8](%[VP8kclip1])         \n\t"
+    "lbux      %[temp6],  %[temp7](%[VP8kclip1])         \n\t"
+    "lbux      %[temp8],  %[temp9](%[VP8kclip1])         \n\t"
+    "sb        %[temp2],  0(%[temp15])                   \n\t"
+    "sb        %[temp3],  0(%[temp11])                   \n\t"
+    "sb        %[temp4],  0(%[temp14])                   \n\t"
+    "sb        %[temp5],  0(%[p])                        \n\t"
+    "sb        %[temp6],  0(%[temp10])                   \n\t"
+    "sb        %[temp8],  0(%[temp12])                   \n\t"
+  "3:                                                    \n\t"
+    "bgtz      %[size],   1b                             \n\t"
+    " addu     %[p],      %[p],           %[vstride]     \n\t"
+    ".set      pop                                       \n\t"
+    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),[temp3]"=&r"(temp3),
+      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
+      [temp7]"=&r"(temp7),[temp8]"=&r"(temp8),[temp9]"=&r"(temp9),
+      [temp10]"=&r"(temp10),[temp11]"=&r"(temp11),[temp12]"=&r"(temp12),
+      [temp13]"=&r"(temp13),[temp14]"=&r"(temp14),[temp15]"=&r"(temp15),
+      [size]"+&r"(size), [p]"+&r"(p)
+    : [hstride]"r"(hstride), [thresh2]"r"(thresh2),
+      [ithresh]"r"(ithresh),[vstride]"r"(vstride), [hev_thresh]"r"(hev_thresh),
+      [VP8kclip1]"r"(VP8kclip1)
+    : "memory"
+  );
+}
+
+static WEBP_INLINE void FilterLoop24(uint8_t* p,
+                                     int hstride, int vstride, int size,
+                                     int thresh, int ithresh, int hev_thresh) {
+  int p0, q0, p1, q1, p2, q2, p3, q3;
+  int step1, step2, temp1, temp2, temp3, temp4;
+  uint8_t* pTemp0;
+  uint8_t* pTemp1;
+  const int thresh2 = 2 * thresh + 1;
+
+  __asm__ volatile (
+    ".set      push                                   \n\t"
+    ".set      noreorder                              \n\t"
+    "bltz      %[size],    3f                         \n\t"
+    " nop                                             \n\t"
+  "2:                                                 \n\t"
+    "negu      %[step1],   %[hstride]                 \n\t"
+    "lbu       %[q0],      0(%[p])                    \n\t"
+    "lbux      %[p0],      %[step1](%[p])             \n\t"
+    "subu      %[step1],   %[step1],      %[hstride]  \n\t"
+    "lbux      %[q1],      %[hstride](%[p])           \n\t"
+    "subu      %[temp1],   %[p0],         %[q0]       \n\t"
+    "lbux      %[p1],      %[step1](%[p])             \n\t"
+    "addu      %[step2],   %[hstride],    %[hstride]  \n\t"
+    "absq_s.w  %[temp2],   %[temp1]                   \n\t"
+    "subu      %[temp3],   %[p1],         %[q1]       \n\t"
+    "absq_s.w  %[temp4],   %[temp3]                   \n\t"
+    "sll       %[temp2],   %[temp2],      2           \n\t"
+    "addu      %[temp2],   %[temp2],      %[temp4]    \n\t"
+    "subu      %[temp4],   %[temp2],      %[thresh2]  \n\t"
+    "subu      %[step1],   %[step1],      %[hstride]  \n\t"
+    "bgtz      %[temp4],   0f                         \n\t"
+    " lbux     %[p2],      %[step1](%[p])             \n\t"
+    "subu      %[step1],   %[step1],      %[hstride]  \n\t"
+    "lbux      %[q2],      %[step2](%[p])             \n\t"
+    "lbux      %[p3],      %[step1](%[p])             \n\t"
+    "subu      %[temp4],   %[p2],         %[p1]       \n\t"
+    "addu      %[step2],   %[step2],      %[hstride]  \n\t"
+    "subu      %[temp2],   %[p3],         %[p2]       \n\t"
+    "absq_s.w  %[temp4],   %[temp4]                   \n\t"
+    "absq_s.w  %[temp2],   %[temp2]                   \n\t"
+    "lbux      %[q3],      %[step2](%[p])             \n\t"
+    "subu      %[temp4],   %[temp4],      %[ithresh]  \n\t"
+    "negu      %[temp1],   %[temp1]                   \n\t"
+    "bgtz      %[temp4],   0f                         \n\t"
+    " subu     %[temp2],   %[temp2],      %[ithresh]  \n\t"
+    "subu      %[p3],      %[p1],         %[p0]       \n\t"
+    "bgtz      %[temp2],   0f                         \n\t"
+    " absq_s.w %[p3],      %[p3]                      \n\t"
+    "subu      %[temp4],   %[q3],         %[q2]       \n\t"
+    "subu      %[pTemp0],  %[p],          %[hstride]  \n\t"
+    "absq_s.w  %[temp4],   %[temp4]                   \n\t"
+    "subu      %[temp2],   %[p3],         %[ithresh]  \n\t"
+    "sll       %[step1],   %[temp1],      1           \n\t"
+    "bgtz      %[temp2],   0f                         \n\t"
+    " subu     %[temp4],   %[temp4],      %[ithresh]  \n\t"
+    "subu      %[temp2],   %[q2],         %[q1]       \n\t"
+    "bgtz      %[temp4],   0f                         \n\t"
+    " absq_s.w %[temp2],   %[temp2]                   \n\t"
+    "subu      %[q3],      %[q1],         %[q0]       \n\t"
+    "absq_s.w  %[q3],      %[q3]                      \n\t"
+    "subu      %[temp2],   %[temp2],      %[ithresh]  \n\t"
+    "addu      %[temp1],   %[temp1],      %[step1]    \n\t"
+    "bgtz      %[temp2],   0f                         \n\t"
+    " subu     %[temp4],   %[q3],         %[ithresh]  \n\t"
+    "slt       %[p3],      %[hev_thresh], %[p3]       \n\t"
+    "bgtz      %[temp4],   0f                         \n\t"
+    " slt      %[q3],      %[hev_thresh], %[q3]       \n\t"
+    "or        %[q3],      %[q3],         %[p3]       \n\t"
+    "bgtz      %[q3],      1f                         \n\t"
+    " shra_r.w %[temp2],   %[temp1],      3           \n\t"
+    "addiu     %[temp1],   %[temp1],      3           \n\t"
+    "sra       %[temp1],   %[temp1],      3           \n\t"
+    "shll_s.w  %[temp2],   %[temp2],      27          \n\t"
+    "shll_s.w  %[temp1],   %[temp1],      27          \n\t"
+    "addu      %[pTemp1],  %[p],          %[hstride]  \n\t"
+    "sra       %[temp2],   %[temp2],      27          \n\t"
+    "sra       %[temp1],   %[temp1],      27          \n\t"
+    "addiu     %[step1],   %[temp2],      1           \n\t"
+    "sra       %[step1],   %[step1],      1           \n\t"
+    "addu      %[p0],      %[p0],         %[temp1]    \n\t"
+    "addu      %[p1],      %[p1],         %[step1]    \n\t"
+    "subu      %[q0],      %[q0],         %[temp2]    \n\t"
+    "subu      %[q1],      %[q1],         %[step1]    \n\t"
+    "lbux      %[temp2],   %[p0](%[VP8kclip1])        \n\t"
+    "lbux      %[temp3],   %[q0](%[VP8kclip1])        \n\t"
+    "lbux      %[temp4],   %[q1](%[VP8kclip1])        \n\t"
+    "sb        %[temp2],   0(%[pTemp0])               \n\t"
+    "lbux      %[temp1],   %[p1](%[VP8kclip1])        \n\t"
+    "subu      %[pTemp0],  %[pTemp0],    %[hstride]   \n\t"
+    "sb        %[temp3],   0(%[p])                    \n\t"
+    "sb        %[temp4],   0(%[pTemp1])               \n\t"
+    "j         0f                                     \n\t"
+    " sb       %[temp1],   0(%[pTemp0])               \n\t"
+  "1:                                                 \n\t"
+    "shll_s.w  %[temp3],   %[temp3],      24          \n\t"
+    "sra       %[temp3],   %[temp3],      24          \n\t"
+    "addu      %[temp1],   %[temp1],      %[temp3]    \n\t"
+    "shra_r.w  %[temp2],   %[temp1],      3           \n\t"
+    "addiu     %[temp1],   %[temp1],      3           \n\t"
+    "shll_s.w  %[temp2],   %[temp2],      27          \n\t"
+    "sra       %[temp1],   %[temp1],      3           \n\t"
+    "shll_s.w  %[temp1],   %[temp1],      27          \n\t"
+    "sra       %[temp2],   %[temp2],      27          \n\t"
+    "sra       %[temp1],   %[temp1],      27          \n\t"
+    "addu      %[p0],      %[p0],         %[temp1]    \n\t"
+    "subu      %[q0],      %[q0],         %[temp2]    \n\t"
+    "lbux      %[temp1],   %[p0](%[VP8kclip1])        \n\t"
+    "lbux      %[temp2],   %[q0](%[VP8kclip1])        \n\t"
+    "sb        %[temp2],   0(%[p])                    \n\t"
+    "sb        %[temp1],   0(%[pTemp0])               \n\t"
+  "0:                                                 \n\t"
+    "subu      %[size],    %[size],       1           \n\t"
+    "bgtz      %[size],    2b                         \n\t"
+    " addu     %[p],       %[p],          %[vstride]  \n\t"
+  "3:                                                 \n\t"
+    ".set      pop                                    \n\t"
+    : [p0]"=&r"(p0), [q0]"=&r"(q0), [p1]"=&r"(p1), [q1]"=&r"(q1),
+      [p2]"=&r"(p2), [q2]"=&r"(q2), [p3]"=&r"(p3), [q3]"=&r"(q3),
+      [step2]"=&r"(step2), [step1]"=&r"(step1), [temp1]"=&r"(temp1),
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
+      [pTemp0]"=&r"(pTemp0), [pTemp1]"=&r"(pTemp1), [p]"+&r"(p),
+      [size]"+&r"(size)
+    : [vstride]"r"(vstride), [ithresh]"r"(ithresh),
+      [hev_thresh]"r"(hev_thresh), [hstride]"r"(hstride),
+      [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
+    : "memory"
+  );
+}
+
+// on macroblock edges
+static void VFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter16(uint8_t* p, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+}
+
+// 8-pixels wide variant, for chroma filtering
+static void VFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8(uint8_t* u, uint8_t* v, int stride,
+                     int thresh, int ithresh, int hev_thresh) {
+  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+// on three inner edges
+static void VFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+static void HFilter16i(uint8_t* p, int stride,
+                       int thresh, int ithresh, int hev_thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
+  }
+}
+
+static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
+}
+
+static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
+                      int thresh, int ithresh, int hev_thresh) {
+  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
+}
+
+#undef MUL
+
+//------------------------------------------------------------------------------
+// Simple In-loop filtering (Paragraph 15.2)
+
+static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  const int thresh2 = 2 * thresh + 1;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+  uint8_t* p1 = p - stride;
+  __asm__ volatile (
+    ".set      push                                      \n\t"
+    ".set      noreorder                                 \n\t"
+    "li        %[i],        16                           \n\t"
+  "0:                                                    \n\t"
+    "negu      %[temp4],    %[stride]                    \n\t"
+    "sll       %[temp5],    %[temp4],       1            \n\t"
+    "lbu       %[temp2],    0(%[p])                      \n\t"
+    "lbux      %[temp3],    %[stride](%[p])              \n\t"
+    "lbux      %[temp1],    %[temp4](%[p])               \n\t"
+    "lbux      %[temp0],    %[temp5](%[p])               \n\t"
+    "subu      %[temp7],    %[temp1],       %[temp2]     \n\t"
+    "subu      %[temp6],    %[temp0],       %[temp3]     \n\t"
+    "absq_s.w  %[temp4],    %[temp7]                     \n\t"
+    "absq_s.w  %[temp5],    %[temp6]                     \n\t"
+    "sll       %[temp4],    %[temp4],       2            \n\t"
+    "subu      %[temp5],    %[temp5],       %[thresh2]   \n\t"
+    "addu      %[temp5],    %[temp4],       %[temp5]     \n\t"
+    "negu      %[temp8],    %[temp7]                     \n\t"
+    "bgtz      %[temp5],    1f                           \n\t"
+    " addiu    %[i],        %[i],           -1           \n\t"
+    "sll       %[temp4],    %[temp8],       1            \n\t"
+    "shll_s.w  %[temp5],    %[temp6],       24           \n\t"
+    "addu      %[temp3],    %[temp4],       %[temp8]     \n\t"
+    "sra       %[temp5],    %[temp5],       24           \n\t"
+    "addu      %[temp3],    %[temp3],       %[temp5]     \n\t"
+    "addiu     %[temp7],    %[temp3],       3            \n\t"
+    "sra       %[temp7],    %[temp7],       3            \n\t"
+    "shra_r.w  %[temp8],    %[temp3],       3            \n\t"
+    "shll_s.w  %[temp0],    %[temp7],       27           \n\t"
+    "shll_s.w  %[temp4],    %[temp8],       27           \n\t"
+    "sra       %[temp0],    %[temp0],       27           \n\t"
+    "sra       %[temp4],    %[temp4],       27           \n\t"
+    "addu      %[temp7],    %[temp1],       %[temp0]     \n\t"
+    "subu      %[temp2],    %[temp2],       %[temp4]     \n\t"
+    "lbux      %[temp3],    %[temp7](%[VP8kclip1])       \n\t"
+    "lbux      %[temp4],    %[temp2](%[VP8kclip1])       \n\t"
+    "sb        %[temp3],    0(%[p1])                     \n\t"
+    "sb        %[temp4],    0(%[p])                      \n\t"
+  "1:                                                    \n\t"
+    "addiu     %[p1],       %[p1],          1            \n\t"
+    "bgtz      %[i],        0b                           \n\t"
+    " addiu    %[p],        %[p],           1            \n\t"
+    " .set     pop                                       \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [p]"+&r"(p), [i]"=&r"(i), [p1]"+&r"(p1)
+    : [stride]"r"(stride), [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
+    : "memory"
+  );
+}
+
+// TEMP0 = SRC[A + A1 * BPS]
+// TEMP1 = SRC[B + B1 * BPS]
+// TEMP2 = SRC[C + C1 * BPS]
+// TEMP3 = SRC[D + D1 * BPS]
+#define LOAD_4_BYTES(TEMP0, TEMP1, TEMP2, TEMP3,                               \
+                     A, A1, B, B1, C, C1, D, D1, SRC)                          \
+  "lbu      %[" #TEMP0 "],   " #A "+" #A1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
+  "lbu      %[" #TEMP1 "],   " #B "+" #B1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
+  "lbu      %[" #TEMP2 "],   " #C "+" #C1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
+  "lbu      %[" #TEMP3 "],   " #D "+" #D1 "*" XSTR(BPS) "(%[" #SRC "]) \n\t"   \
+
+static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
+  int i;
+  const int thresh2 = 2 * thresh + 1;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8;
+  __asm__ volatile (
+    ".set      push                                     \n\t"
+    ".set      noreorder                                \n\t"
+    "li        %[i],       16                           \n\t"
+  "0:                                                   \n\t"
+    LOAD_4_BYTES(temp0, temp1, temp2, temp3, -2, 0, -1, 0, 0, 0, 1, 0, p)
+    "subu      %[temp7],    %[temp1],       %[temp2]    \n\t"
+    "subu      %[temp6],    %[temp0],       %[temp3]    \n\t"
+    "absq_s.w  %[temp4],    %[temp7]                    \n\t"
+    "absq_s.w  %[temp5],    %[temp6]                    \n\t"
+    "sll       %[temp4],    %[temp4],       2           \n\t"
+    "addu      %[temp5],    %[temp4],       %[temp5]    \n\t"
+    "subu      %[temp5],    %[temp5],       %[thresh2]  \n\t"
+    "negu      %[temp8],    %[temp7]                    \n\t"
+    "bgtz      %[temp5],    1f                          \n\t"
+    " addiu    %[i],        %[i],           -1          \n\t"
+    "sll       %[temp4],    %[temp8],       1           \n\t"
+    "shll_s.w  %[temp5],    %[temp6],       24          \n\t"
+    "addu      %[temp3],    %[temp4],       %[temp8]    \n\t"
+    "sra       %[temp5],    %[temp5],       24          \n\t"
+    "addu      %[temp3],    %[temp3],       %[temp5]    \n\t"
+    "addiu     %[temp7],    %[temp3],       3           \n\t"
+    "sra       %[temp7],    %[temp7],       3           \n\t"
+    "shra_r.w  %[temp8],    %[temp3],       3           \n\t"
+    "shll_s.w  %[temp0],    %[temp7],       27          \n\t"
+    "shll_s.w  %[temp4],    %[temp8],       27          \n\t"
+    "sra       %[temp0],    %[temp0],       27          \n\t"
+    "sra       %[temp4],    %[temp4],       27          \n\t"
+    "addu      %[temp7],    %[temp1],       %[temp0]    \n\t"
+    "subu      %[temp2],    %[temp2],       %[temp4]    \n\t"
+    "lbux      %[temp3],    %[temp7](%[VP8kclip1])      \n\t"
+    "lbux      %[temp4],    %[temp2](%[VP8kclip1])      \n\t"
+    "sb        %[temp3],    -1(%[p])                    \n\t"
+    "sb        %[temp4],    0(%[p])                     \n\t"
+  "1:                                                   \n\t"
+    "bgtz      %[i],        0b                          \n\t"
+    " addu     %[p],        %[p],           %[stride]   \n\t"
+    ".set      pop                                      \n\t"
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [p]"+&r"(p), [i]"=&r"(i)
+    : [stride]"r"(stride), [VP8kclip1]"r"(VP8kclip1), [thresh2]"r"(thresh2)
+    : "memory"
+  );
+}
+
+static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4 * stride;
+    SimpleVFilter16(p, stride, thresh);
+  }
+}
+
+static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
+  int k;
+  for (k = 3; k > 0; --k) {
+    p += 4;
+    SimpleHFilter16(p, stride, thresh);
+  }
+}
+
+// DST[A * BPS]     = TEMP0
+// DST[B + C * BPS] = TEMP1
+#define STORE_8_BYTES(TEMP0, TEMP1, A, B, C, DST)                              \
+  "usw    %[" #TEMP0 "],   " #A "*" XSTR(BPS) "(%[" #DST "])         \n\t"     \
+  "usw    %[" #TEMP1 "],   " #B "+" #C "*" XSTR(BPS) "(%[" #DST "])  \n\t"
+
+static void VE4(uint8_t* dst) {    // vertical
+  const uint8_t* top = dst - BPS;
+  int temp0, temp1, temp2, temp3, temp4, temp5, temp6;
+  __asm__ volatile (
+    "ulw             %[temp0],   -1(%[top])              \n\t"
+    "ulh             %[temp1],   3(%[top])               \n\t"
+    "preceu.ph.qbr   %[temp2],   %[temp0]                \n\t"
+    "preceu.ph.qbl   %[temp3],   %[temp0]                \n\t"
+    "preceu.ph.qbr   %[temp4],   %[temp1]                \n\t"
+    "packrl.ph       %[temp5],   %[temp3],    %[temp2]   \n\t"
+    "packrl.ph       %[temp6],   %[temp4],    %[temp3]   \n\t"
+    "shll.ph         %[temp5],   %[temp5],    1          \n\t"
+    "shll.ph         %[temp6],   %[temp6],    1          \n\t"
+    "addq.ph         %[temp2],   %[temp5],    %[temp2]   \n\t"
+    "addq.ph         %[temp6],   %[temp6],    %[temp4]   \n\t"
+    "addq.ph         %[temp2],   %[temp2],    %[temp3]   \n\t"
+    "addq.ph         %[temp6],   %[temp6],    %[temp3]   \n\t"
+    "shra_r.ph       %[temp2],   %[temp2],    2          \n\t"
+    "shra_r.ph       %[temp6],   %[temp6],    2          \n\t"
+    "precr.qb.ph     %[temp4],   %[temp6],    %[temp2]   \n\t"
+    STORE_8_BYTES(temp4, temp4, 0, 0, 1, dst)
+    STORE_8_BYTES(temp4, temp4, 2, 0, 3, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6)
+    : [top]"r"(top), [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void DC4(uint8_t* dst) {   // DC
+  int temp0, temp1, temp2, temp3, temp4;
+  __asm__ volatile (
+    "ulw          %[temp0],   -1*" XSTR(BPS) "(%[dst]) \n\t"
+    LOAD_4_BYTES(temp1, temp2, temp3, temp4, -1, 0, -1, 1, -1, 2, -1, 3, dst)
+    "ins          %[temp1],   %[temp2],    8,     8    \n\t"
+    "ins          %[temp1],   %[temp3],    16,    8    \n\t"
+    "ins          %[temp1],   %[temp4],    24,    8    \n\t"
+    "raddu.w.qb   %[temp0],   %[temp0]                 \n\t"
+    "raddu.w.qb   %[temp1],   %[temp1]                 \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp1]    \n\t"
+    "shra_r.w     %[temp0],   %[temp0],    3           \n\t"
+    "replv.qb     %[temp0],   %[temp0]                 \n\t"
+    STORE_8_BYTES(temp0, temp0, 0, 0, 1, dst)
+    STORE_8_BYTES(temp0, temp0, 2, 0, 3, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void RD4(uint8_t* dst) {   // Down-right
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8;
+  __asm__ volatile (
+    LOAD_4_BYTES(temp0, temp1, temp2, temp3, -1, 0, -1, 1, -1, 2, -1, 3, dst)
+    "ulw            %[temp7],   -1-" XSTR(BPS) "(%[dst])       \n\t"
+    "ins            %[temp1],   %[temp0], 16, 16               \n\t"
+    "preceu.ph.qbr  %[temp5],   %[temp7]                       \n\t"
+    "ins            %[temp2],   %[temp1], 16, 16               \n\t"
+    "preceu.ph.qbl  %[temp4],   %[temp7]                       \n\t"
+    "ins            %[temp3],   %[temp2], 16, 16               \n\t"
+    "shll.ph        %[temp2],   %[temp2], 1                    \n\t"
+    "addq.ph        %[temp3],   %[temp3], %[temp1]             \n\t"
+    "packrl.ph      %[temp6],   %[temp5], %[temp1]             \n\t"
+    "addq.ph        %[temp3],   %[temp3], %[temp2]             \n\t"
+    "addq.ph        %[temp1],   %[temp1], %[temp5]             \n\t"
+    "shll.ph        %[temp6],   %[temp6], 1                    \n\t"
+    "addq.ph        %[temp1],   %[temp1], %[temp6]             \n\t"
+    "packrl.ph      %[temp0],   %[temp4], %[temp5]             \n\t"
+    "addq.ph        %[temp8],   %[temp5], %[temp4]             \n\t"
+    "shra_r.ph      %[temp3],   %[temp3], 2                    \n\t"
+    "shll.ph        %[temp0],   %[temp0], 1                    \n\t"
+    "shra_r.ph      %[temp1],   %[temp1], 2                    \n\t"
+    "addq.ph        %[temp8],   %[temp0], %[temp8]             \n\t"
+    "lbu            %[temp5],   3-" XSTR(BPS) "(%[dst])        \n\t"
+    "precrq.ph.w    %[temp7],   %[temp7], %[temp7]             \n\t"
+    "shra_r.ph      %[temp8],   %[temp8], 2                    \n\t"
+    "ins            %[temp7],   %[temp5], 0,  8                \n\t"
+    "precr.qb.ph    %[temp2],   %[temp1], %[temp3]             \n\t"
+    "raddu.w.qb     %[temp4],   %[temp7]                       \n\t"
+    "precr.qb.ph    %[temp6],   %[temp8], %[temp1]             \n\t"
+    "shra_r.w       %[temp4],   %[temp4], 2                    \n\t"
+    STORE_8_BYTES(temp2, temp6, 3, 0, 1, dst)
+    "prepend        %[temp2],   %[temp8], 8                    \n\t"
+    "prepend        %[temp6],   %[temp4], 8                    \n\t"
+    STORE_8_BYTES(temp2, temp6, 2, 0, 0, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+// TEMP0 = SRC[A * BPS]
+// TEMP1 = SRC[B + C * BPS]
+#define LOAD_8_BYTES(TEMP0, TEMP1, A, B, C, SRC)                               \
+  "ulw    %[" #TEMP0 "],   " #A "*" XSTR(BPS) "(%[" #SRC "])         \n\t"     \
+  "ulw    %[" #TEMP1 "],   " #B "+" #C "*" XSTR(BPS) "(%[" #SRC "])  \n\t"
+
+static void LD4(uint8_t* dst) {   // Down-Left
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8, temp9;
+  __asm__ volatile (
+    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
+    "preceu.ph.qbl   %[temp2],    %[temp0]                     \n\t"
+    "preceu.ph.qbr   %[temp3],    %[temp0]                     \n\t"
+    "preceu.ph.qbr   %[temp4],    %[temp1]                     \n\t"
+    "preceu.ph.qbl   %[temp5],    %[temp1]                     \n\t"
+    "packrl.ph       %[temp6],    %[temp2],    %[temp3]        \n\t"
+    "packrl.ph       %[temp7],    %[temp4],    %[temp2]        \n\t"
+    "packrl.ph       %[temp8],    %[temp5],    %[temp4]        \n\t"
+    "shll.ph         %[temp6],    %[temp6],    1               \n\t"
+    "addq.ph         %[temp9],    %[temp2],    %[temp6]        \n\t"
+    "shll.ph         %[temp7],    %[temp7],    1               \n\t"
+    "addq.ph         %[temp9],    %[temp9],    %[temp3]        \n\t"
+    "shll.ph         %[temp8],    %[temp8],    1               \n\t"
+    "shra_r.ph       %[temp9],    %[temp9],    2               \n\t"
+    "addq.ph         %[temp3],    %[temp4],    %[temp7]        \n\t"
+    "addq.ph         %[temp0],    %[temp5],    %[temp8]        \n\t"
+    "addq.ph         %[temp3],    %[temp3],    %[temp2]        \n\t"
+    "addq.ph         %[temp0],    %[temp0],    %[temp4]        \n\t"
+    "shra_r.ph       %[temp3],    %[temp3],    2               \n\t"
+    "shra_r.ph       %[temp0],    %[temp0],    2               \n\t"
+    "srl             %[temp1],    %[temp1],    24              \n\t"
+    "sll             %[temp1],    %[temp1],    1               \n\t"
+    "raddu.w.qb      %[temp5],    %[temp5]                     \n\t"
+    "precr.qb.ph     %[temp9],    %[temp3],    %[temp9]        \n\t"
+    "precr.qb.ph     %[temp3],    %[temp0],    %[temp3]        \n\t"
+    "addu            %[temp1],    %[temp1],    %[temp5]        \n\t"
+    "shra_r.w        %[temp1],    %[temp1],    2               \n\t"
+    STORE_8_BYTES(temp9, temp3, 0, 0, 2, dst)
+    "prepend         %[temp9],    %[temp0],    8               \n\t"
+    "prepend         %[temp3],    %[temp1],    8               \n\t"
+    STORE_8_BYTES(temp9, temp3, 1, 0, 3, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+//------------------------------------------------------------------------------
+// Chroma
+
+static void DC8uv(uint8_t* dst) {     // DC
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8, temp9;
+  __asm__ volatile (
+    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
+    LOAD_4_BYTES(temp2, temp3, temp4, temp5, -1, 0, -1, 1, -1, 2, -1, 3, dst)
+    LOAD_4_BYTES(temp6, temp7, temp8, temp9, -1, 4, -1, 5, -1, 6, -1, 7, dst)
+    "raddu.w.qb   %[temp0],   %[temp0]                   \n\t"
+    "raddu.w.qb   %[temp1],   %[temp1]                   \n\t"
+    "addu         %[temp2],   %[temp2],    %[temp3]      \n\t"
+    "addu         %[temp4],   %[temp4],    %[temp5]      \n\t"
+    "addu         %[temp6],   %[temp6],    %[temp7]      \n\t"
+    "addu         %[temp8],   %[temp8],    %[temp9]      \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp1]      \n\t"
+    "addu         %[temp2],   %[temp2],    %[temp4]      \n\t"
+    "addu         %[temp6],   %[temp6],    %[temp8]      \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp2]      \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp6]      \n\t"
+    "shra_r.w     %[temp0],   %[temp0],    4             \n\t"
+    "replv.qb     %[temp0],   %[temp0]                   \n\t"
+    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
+    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
+    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
+    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
+    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
+    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
+    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
+    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
+      [temp9]"=&r"(temp9)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void DC8uvNoLeft(uint8_t* dst) {   // DC with no left samples
+  int temp0, temp1;
+  __asm__ volatile (
+    LOAD_8_BYTES(temp0, temp1, -1, 4, -1, dst)
+    "raddu.w.qb   %[temp0],   %[temp0]                   \n\t"
+    "raddu.w.qb   %[temp1],   %[temp1]                   \n\t"
+    "addu         %[temp0],   %[temp0],    %[temp1]      \n\t"
+    "shra_r.w     %[temp0],   %[temp0],    3             \n\t"
+    "replv.qb     %[temp0],   %[temp0]                   \n\t"
+    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
+    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
+    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
+    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
+    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
+    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
+    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
+    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+static void DC8uvNoTop(uint8_t* dst) {  // DC with no top samples
+  int temp0, temp1, temp2, temp3, temp4;
+  int temp5, temp6, temp7, temp8;
+  __asm__ volatile (
+    LOAD_4_BYTES(temp2, temp3, temp4, temp5, -1, 0, -1, 1, -1, 2, -1, 3, dst)
+    LOAD_4_BYTES(temp6, temp7, temp8, temp1, -1, 4, -1, 5, -1, 6, -1, 7, dst)
+    "addu         %[temp2],   %[temp2],    %[temp3]      \n\t"
+    "addu         %[temp4],   %[temp4],    %[temp5]      \n\t"
+    "addu         %[temp6],   %[temp6],    %[temp7]      \n\t"
+    "addu         %[temp8],   %[temp8],    %[temp1]      \n\t"
+    "addu         %[temp2],   %[temp2],    %[temp4]      \n\t"
+    "addu         %[temp6],   %[temp6],    %[temp8]      \n\t"
+    "addu         %[temp0],   %[temp6],    %[temp2]      \n\t"
+    "shra_r.w     %[temp0],   %[temp0],    3             \n\t"
+    "replv.qb     %[temp0],   %[temp0]                   \n\t"
+    STORE_8_BYTES(temp0, temp0, 0, 4, 0, dst)
+    STORE_8_BYTES(temp0, temp0, 1, 4, 1, dst)
+    STORE_8_BYTES(temp0, temp0, 2, 4, 2, dst)
+    STORE_8_BYTES(temp0, temp0, 3, 4, 3, dst)
+    STORE_8_BYTES(temp0, temp0, 4, 4, 4, dst)
+    STORE_8_BYTES(temp0, temp0, 5, 4, 5, dst)
+    STORE_8_BYTES(temp0, temp0, 6, 4, 6, dst)
+    STORE_8_BYTES(temp0, temp0, 7, 4, 7, dst)
+    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
+      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
+      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8)
+    : [dst]"r"(dst)
+    : "memory"
+  );
+}
+
+#undef LOAD_8_BYTES
+#undef STORE_8_BYTES
+#undef LOAD_4_BYTES
+
+#define CLIPPING(SIZE)                                                         \
+  "preceu.ph.qbl   %[temp2],   %[temp0]                  \n\t"                 \
+  "preceu.ph.qbr   %[temp0],   %[temp0]                  \n\t"                 \
+".if " #SIZE " == 8                                      \n\t"                 \
+  "preceu.ph.qbl   %[temp3],   %[temp1]                  \n\t"                 \
+  "preceu.ph.qbr   %[temp1],   %[temp1]                  \n\t"                 \
+".endif                                                  \n\t"                 \
+  "addu.ph         %[temp2],   %[temp2],   %[dst_1]      \n\t"                 \
+  "addu.ph         %[temp0],   %[temp0],   %[dst_1]      \n\t"                 \
+".if " #SIZE " == 8                                      \n\t"                 \
+  "addu.ph         %[temp3],   %[temp3],   %[dst_1]      \n\t"                 \
+  "addu.ph         %[temp1],   %[temp1],   %[dst_1]      \n\t"                 \
+".endif                                                  \n\t"                 \
+  "shll_s.ph       %[temp2],   %[temp2],   7             \n\t"                 \
+  "shll_s.ph       %[temp0],   %[temp0],   7             \n\t"                 \
+".if " #SIZE " == 8                                      \n\t"                 \
+  "shll_s.ph       %[temp3],   %[temp3],   7             \n\t"                 \
+  "shll_s.ph       %[temp1],   %[temp1],   7             \n\t"                 \
+".endif                                                  \n\t"                 \
+  "precrqu_s.qb.ph %[temp0],   %[temp2],   %[temp0]      \n\t"                 \
+".if " #SIZE " == 8                                      \n\t"                 \
+  "precrqu_s.qb.ph %[temp1],   %[temp3],   %[temp1]      \n\t"                 \
+".endif                                                  \n\t"
+
+
+#define CLIP_8B_TO_DST(DST, TOP, SIZE) do {                                    \
+  int dst_1 = ((int)(DST)[-1] << 16) + (DST)[-1];                              \
+  int temp0, temp1, temp2, temp3;                                              \
+  __asm__ volatile (                                                           \
+  ".if " #SIZE " < 8                                     \n\t"                 \
+    "ulw             %[temp0],   0(%[top])               \n\t"                 \
+    "subu.ph         %[dst_1],   %[dst_1],    %[top_1]   \n\t"                 \
+    CLIPPING(4)                                                                \
+    "usw             %[temp0],   0(%[dst])               \n\t"                 \
+  ".else                                                 \n\t"                 \
+    "ulw             %[temp0],   0(%[top])               \n\t"                 \
+    "ulw             %[temp1],   4(%[top])               \n\t"                 \
+    "subu.ph         %[dst_1],   %[dst_1],    %[top_1]   \n\t"                 \
+    CLIPPING(8)                                                                \
+    "usw             %[temp0],   0(%[dst])               \n\t"                 \
+    "usw             %[temp1],   4(%[dst])               \n\t"                 \
+  ".if " #SIZE " == 16                                   \n\t"                 \
+    "ulw             %[temp0],   8(%[top])               \n\t"                 \
+    "ulw             %[temp1],   12(%[top])              \n\t"                 \
+    CLIPPING(8)                                                                \
+    "usw             %[temp0],   8(%[dst])               \n\t"                 \
+    "usw             %[temp1],   12(%[dst])              \n\t"                 \
+  ".endif                                                \n\t"                 \
+  ".endif                                                \n\t"                 \
+    : [dst_1]"+&r"(dst_1), [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),           \
+      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3)                                 \
+    : [top_1]"r"(top_1), [top]"r"((TOP)), [dst]"r"((DST))                      \
+    : "memory"                                                                 \
+  );                                                                           \
+} while (0)
+
+#define CLIP_TO_DST(DST, SIZE) do {                                            \
+  int y;                                                                       \
+  const uint8_t* top = (DST) - BPS;                                            \
+  const int top_1 = ((int)top[-1] << 16) + top[-1];                            \
+  for (y = 0; y < (SIZE); ++y) {                                               \
+    CLIP_8B_TO_DST((DST), top, (SIZE));                                        \
+    (DST) += BPS;                                                              \
+  }                                                                            \
+} while (0)
+
+#define TRUE_MOTION(DST, SIZE)                                                 \
+static void TrueMotion##SIZE(uint8_t* (DST)) {                                 \
+  CLIP_TO_DST((DST), (SIZE));                                                  \
+}
+
+TRUE_MOTION(dst, 4)
+TRUE_MOTION(dst, 8)
+TRUE_MOTION(dst, 16)
+
+#undef TRUE_MOTION
+#undef CLIP_TO_DST
+#undef CLIP_8B_TO_DST
+#undef CLIPPING
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPSdspR2(void) {
+  VP8TransformDC = TransformDC;
+  VP8TransformAC3 = TransformAC3;
+  VP8Transform = TransformTwo;
+
+  VP8VFilter16 = VFilter16;
+  VP8HFilter16 = HFilter16;
+  VP8VFilter8 = VFilter8;
+  VP8HFilter8 = HFilter8;
+  VP8VFilter16i = VFilter16i;
+  VP8HFilter16i = HFilter16i;
+  VP8VFilter8i = VFilter8i;
+  VP8HFilter8i = HFilter8i;
+  VP8SimpleVFilter16 = SimpleVFilter16;
+  VP8SimpleHFilter16 = SimpleHFilter16;
+  VP8SimpleVFilter16i = SimpleVFilter16i;
+  VP8SimpleHFilter16i = SimpleHFilter16i;
+
+  VP8PredLuma4[0] = DC4;
+  VP8PredLuma4[1] = TrueMotion4;
+  VP8PredLuma4[2] = VE4;
+  VP8PredLuma4[4] = RD4;
+  VP8PredLuma4[6] = LD4;
+
+  VP8PredChroma8[0] = DC8uv;
+  VP8PredChroma8[1] = TrueMotion8;
+  VP8PredChroma8[4] = DC8uvNoTop;
+  VP8PredChroma8[5] = DC8uvNoLeft;
+
+  VP8PredLuma16[1] = TrueMotion16;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8DspInitMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
--- a/media/libwebp/src/dsp/dec_msa.c
+++ b/media/libwebp/src/dsp/dec_msa.c
--- a/media/libwebp/src/dsp/dec_neon.c
+++ b/media/libwebp/src/dsp/dec_neon.c
--- a/media/libwebp/src/dsp/dec_sse2.c
+++ b/media/libwebp/src/dsp/dec_sse2.c
--- a/media/libwebp/src/dsp/dec_sse41.c
+++ b/media/libwebp/src/dsp/dec_sse41.c
@ -0,0 +1,46 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE4 version of some decoding functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_SSE41)
+
+#include <smmintrin.h>
+#include "src/dec/vp8i_dec.h"
+#include "src/utils/utils.h"
+
+static void HE16_SSE41(uint8_t* dst) {     // horizontal
+  int j;
+  const __m128i kShuffle3 = _mm_set1_epi8(3);
+  for (j = 16; j > 0; --j) {
+    const __m128i in = _mm_cvtsi32_si128(WebPMemToUint32(dst - 4));
+    const __m128i values = _mm_shuffle_epi8(in, kShuffle3);
+    _mm_storeu_si128((__m128i*)dst, values);
+    dst += BPS;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8DspInitSSE41(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitSSE41(void) {
+  VP8PredLuma16[3] = HE16_SSE41;
+}
+
+#else  // !WEBP_USE_SSE41
+
+WEBP_DSP_INIT_STUB(VP8DspInitSSE41)
+
+#endif  // WEBP_USE_SSE41
--- a/media/libwebp/src/dsp/dsp.h
+++ b/media/libwebp/src/dsp/dsp.h
@ -0,0 +1,682 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+//   Speed-critical functions.
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#ifndef WEBP_DSP_DSP_H_
+#define WEBP_DSP_DSP_H_
+
+#ifdef HAVE_CONFIG_H
+#include "src/webp/config.h"
+#endif
+
+#include "src/webp/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define BPS 32   // this is the common stride for enc/dec
+
+//------------------------------------------------------------------------------
+// CPU detection
+
+#if defined(__GNUC__)
+# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
+# define LOCAL_GCC_PREREQ(maj, min) \
+    (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
+#else
+# define LOCAL_GCC_VERSION 0
+# define LOCAL_GCC_PREREQ(maj, min) 0
+#endif
+
+#if defined(__clang__)
+# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
+# define LOCAL_CLANG_PREREQ(maj, min) \
+    (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
+#else
+# define LOCAL_CLANG_VERSION 0
+# define LOCAL_CLANG_PREREQ(maj, min) 0
+#endif
+
+#ifndef __has_builtin
+# define __has_builtin(x) 0
+#endif
+
+// for now, none of the optimizations below are available in emscripten
+#if !defined(EMSCRIPTEN)
+
+#if defined(_MSC_VER) && _MSC_VER > 1310 && \
+    (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE2  // Visual C++ SSE2 targets
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
+    (defined(_M_X64) || defined(_M_IX86))
+#define WEBP_MSC_SSE41  // Visual C++ SSE4.1 targets
+#endif
+
+// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
+// files without intrinsics, allowing the corresponding Init() to be called.
+// Files containing intrinsics will need to be built targeting the instruction
+// set so should succeed on one of the earlier tests.
+#if defined(__SSE2__) || defined(WEBP_MSC_SSE2) || defined(WEBP_HAVE_SSE2)
+#define WEBP_USE_SSE2
+#endif
+
+#if defined(__SSE4_1__) || defined(WEBP_MSC_SSE41) || defined(WEBP_HAVE_SSE41)
+#define WEBP_USE_SSE41
+#endif
+
+#if defined(__AVX2__) || defined(WEBP_HAVE_AVX2)
+#define WEBP_USE_AVX2
+#endif
+
+// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
+// inline assembly would need to be modified for use with Native Client.
+#if (defined(__ARM_NEON__) || \
+     defined(__aarch64__) || defined(WEBP_HAVE_NEON)) && \
+    !defined(__native_client__)
+#define WEBP_USE_NEON
+#endif
+
+#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
+    defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
+#define WEBP_ANDROID_NEON  // Android targets that may have NEON
+#define WEBP_USE_NEON
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
+#define WEBP_USE_NEON
+#define WEBP_USE_INTRINSICS
+#endif
+
+#if defined(__mips__) && !defined(__mips64) && \
+    defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
+#define WEBP_USE_MIPS32
+#if (__mips_isa_rev >= 2)
+#define WEBP_USE_MIPS32_R2
+#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
+#define WEBP_USE_MIPS_DSP_R2
+#endif
+#endif
+#endif
+
+#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
+#define WEBP_USE_MSA
+#endif
+
+#endif  /* EMSCRIPTEN */
+
+#ifndef WEBP_DSP_OMIT_C_CODE
+#define WEBP_DSP_OMIT_C_CODE 1
+#endif
+
+#if (defined(__aarch64__) || defined(__ARM_NEON__)) && WEBP_DSP_OMIT_C_CODE
+#define WEBP_NEON_OMIT_C_CODE 1
+#else
+#define WEBP_NEON_OMIT_C_CODE 0
+#endif
+
+#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
+#define WEBP_NEON_WORK_AROUND_GCC 1
+#else
+#define WEBP_NEON_WORK_AROUND_GCC 0
+#endif
+
+// This macro prevents thread_sanitizer from reporting known concurrent writes.
+#define WEBP_TSAN_IGNORE_FUNCTION
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#undef WEBP_TSAN_IGNORE_FUNCTION
+#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
+#endif
+#endif
+
+#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
+#include <pthread.h>  // NOLINT
+
+#define WEBP_DSP_INIT(func) do {                                    \
+  static volatile VP8CPUInfo func ## _last_cpuinfo_used =           \
+      (VP8CPUInfo)&func ## _last_cpuinfo_used;                      \
+  static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \
+  if (pthread_mutex_lock(&func ## _lock)) break;                    \
+  if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func();          \
+  func ## _last_cpuinfo_used = VP8GetCPUInfo;                       \
+  (void)pthread_mutex_unlock(&func ## _lock);                       \
+} while (0)
+#else  // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
+#define WEBP_DSP_INIT(func) do {                                    \
+  static volatile VP8CPUInfo func ## _last_cpuinfo_used =           \
+      (VP8CPUInfo)&func ## _last_cpuinfo_used;                      \
+  if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break;           \
+  func();                                                           \
+  func ## _last_cpuinfo_used = VP8GetCPUInfo;                       \
+} while (0)
+#endif  // defined(WEBP_USE_THREAD) && !defined(_WIN32)
+
+// Defines an Init + helper function that control multiple initialization of
+// function pointers / tables.
+/* Usage:
+   WEBP_DSP_INIT_FUNC(InitFunc) {
+     ...function body
+   }
+*/
+#define WEBP_DSP_INIT_FUNC(name)                             \
+  static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \
+  WEBP_TSAN_IGNORE_FUNCTION void name(void) {                \
+    WEBP_DSP_INIT(name ## _body);                            \
+  }                                                          \
+  static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void)
+
+#define WEBP_UBSAN_IGNORE_UNDEF
+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
+#if defined(__clang__) && defined(__has_attribute)
+#if __has_attribute(no_sanitize)
+// This macro prevents the undefined behavior sanitizer from reporting
+// failures. This is only meant to silence unaligned loads on platforms that
+// are known to support them.
+#undef WEBP_UBSAN_IGNORE_UNDEF
+#define WEBP_UBSAN_IGNORE_UNDEF \
+  __attribute__((no_sanitize("undefined")))
+
+// This macro prevents the undefined behavior sanitizer from reporting
+// failures related to unsigned integer overflows. This is only meant to
+// silence cases where this well defined behavior is expected.
+#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
+#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
+  __attribute__((no_sanitize("unsigned-integer-overflow")))
+#endif
+#endif
+
+// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
+#if !defined(WEBP_SWAP_16BIT_CSP)
+#define WEBP_SWAP_16BIT_CSP 0
+#endif
+
+// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
+#if !defined(WORDS_BIGENDIAN) && \
+    (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
+     (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
+#define WORDS_BIGENDIAN
+#endif
+
+typedef enum {
+  kSSE2,
+  kSSE3,
+  kSlowSSSE3,  // special feature for slow SSSE3 architectures
+  kSSE4_1,
+  kAVX,
+  kAVX2,
+  kNEON,
+  kMIPS32,
+  kMIPSdspR2,
+  kMSA
+} CPUFeature;
+// returns true if the CPU supports the feature.
+typedef int (*VP8CPUInfo)(CPUFeature feature);
+WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
+
+//------------------------------------------------------------------------------
+// Init stub generator
+
+// Defines an init function stub to ensure each module exposes a symbol,
+// avoiding a compiler warning.
+#define WEBP_DSP_INIT_STUB(func) \
+  extern void func(void); \
+  void func(void) {}
+
+//------------------------------------------------------------------------------
+// Encoding
+
+// Transforms
+// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
+//          will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
+typedef void (*VP8Idct)(const uint8_t* ref, const int16_t* in, uint8_t* dst,
+                        int do_two);
+typedef void (*VP8Fdct)(const uint8_t* src, const uint8_t* ref, int16_t* out);
+typedef void (*VP8WHT)(const int16_t* in, int16_t* out);
+extern VP8Idct VP8ITransform;
+extern VP8Fdct VP8FTransform;
+extern VP8Fdct VP8FTransform2;   // performs two transforms at a time
+extern VP8WHT VP8FTransformWHT;
+// Predictions
+// *dst is the destination block. *top and *left can be NULL.
+typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left,
+                              const uint8_t* top);
+typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top);
+extern VP8Intra4Preds VP8EncPredLuma4;
+extern VP8IntraPreds VP8EncPredLuma16;
+extern VP8IntraPreds VP8EncPredChroma8;
+
+typedef int (*VP8Metric)(const uint8_t* pix, const uint8_t* ref);
+extern VP8Metric VP8SSE16x16, VP8SSE16x8, VP8SSE8x8, VP8SSE4x4;
+typedef int (*VP8WMetric)(const uint8_t* pix, const uint8_t* ref,
+                          const uint16_t* const weights);
+// The weights for VP8TDisto4x4 and VP8TDisto16x16 contain a row-major
+// 4 by 4 symmetric matrix.
+extern VP8WMetric VP8TDisto4x4, VP8TDisto16x16;
+
+// Compute the average (DC) of four 4x4 blocks.
+// Each sub-4x4 block #i sum is stored in dc[i].
+typedef void (*VP8MeanMetric)(const uint8_t* ref, uint32_t dc[4]);
+extern VP8MeanMetric VP8Mean16x4;
+
+typedef void (*VP8BlockCopy)(const uint8_t* src, uint8_t* dst);
+extern VP8BlockCopy VP8Copy4x4;
+extern VP8BlockCopy VP8Copy16x8;
+// Quantization
+struct VP8Matrix;   // forward declaration
+typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
+                                const struct VP8Matrix* const mtx);
+// Same as VP8QuantizeBlock, but quantizes two consecutive blocks.
+typedef int (*VP8Quantize2Blocks)(int16_t in[32], int16_t out[32],
+                                  const struct VP8Matrix* const mtx);
+
+extern VP8QuantizeBlock VP8EncQuantizeBlock;
+extern VP8Quantize2Blocks VP8EncQuantize2Blocks;
+
+// specific to 2nd transform:
+typedef int (*VP8QuantizeBlockWHT)(int16_t in[16], int16_t out[16],
+                                   const struct VP8Matrix* const mtx);
+extern VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT;
+
+extern const int VP8DspScan[16 + 4 + 4];
+
+// Collect histogram for susceptibility calculation.
+#define MAX_COEFF_THRESH   31   // size of histogram used by CollectHistogram.
+typedef struct {
+  // We only need to store max_value and last_non_zero, not the distribution.
+  int max_value;
+  int last_non_zero;
+} VP8Histogram;
+typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
+                          int start_block, int end_block,
+                          VP8Histogram* const histo);
+extern VP8CHisto VP8CollectHistogram;
+// General-purpose util function to help VP8CollectHistogram().
+void VP8SetHistogramData(const int distribution[MAX_COEFF_THRESH + 1],
+                         VP8Histogram* const histo);
+
+// must be called before using any of the above
+void VP8EncDspInit(void);
+
+//------------------------------------------------------------------------------
+// cost functions (encoding)
+
+extern const uint16_t VP8EntropyCost[256];        // 8bit fixed-point log(p)
+// approximate cost per level:
+extern const uint16_t VP8LevelFixedCosts[2047 /*MAX_LEVEL*/ + 1];
+extern const uint8_t VP8EncBands[16 + 1];
+
+struct VP8Residual;
+typedef void (*VP8SetResidualCoeffsFunc)(const int16_t* const coeffs,
+                                         struct VP8Residual* const res);
+extern VP8SetResidualCoeffsFunc VP8SetResidualCoeffs;
+
+// Cost calculation function.
+typedef int (*VP8GetResidualCostFunc)(int ctx0,
+                                      const struct VP8Residual* const res);
+extern VP8GetResidualCostFunc VP8GetResidualCost;
+
+// must be called before anything using the above
+void VP8EncDspCostInit(void);
+
+//------------------------------------------------------------------------------
+// SSIM / PSNR utils
+
+// struct for accumulating statistical moments
+typedef struct {
+  uint32_t w;              // sum(w_i) : sum of weights
+  uint32_t xm, ym;         // sum(w_i * x_i), sum(w_i * y_i)
+  uint32_t xxm, xym, yym;  // sum(w_i * x_i * x_i), etc.
+} VP8DistoStats;
+
+// Compute the final SSIM value
+// The non-clipped version assumes stats->w = (2 * VP8_SSIM_KERNEL + 1)^2.
+double VP8SSIMFromStats(const VP8DistoStats* const stats);
+double VP8SSIMFromStatsClipped(const VP8DistoStats* const stats);
+
+#define VP8_SSIM_KERNEL 3   // total size of the kernel: 2 * VP8_SSIM_KERNEL + 1
+typedef double (*VP8SSIMGetClippedFunc)(const uint8_t* src1, int stride1,
+                                        const uint8_t* src2, int stride2,
+                                        int xo, int yo,  // center position
+                                        int W, int H);   // plane dimension
+
+#if !defined(WEBP_REDUCE_SIZE)
+// This version is called with the guarantee that you can load 8 bytes and
+// 8 rows at offset src1 and src2
+typedef double (*VP8SSIMGetFunc)(const uint8_t* src1, int stride1,
+                                 const uint8_t* src2, int stride2);
+
+extern VP8SSIMGetFunc VP8SSIMGet;         // unclipped / unchecked
+extern VP8SSIMGetClippedFunc VP8SSIMGetClipped;   // with clipping
+#endif
+
+#if !defined(WEBP_DISABLE_STATS)
+typedef uint32_t (*VP8AccumulateSSEFunc)(const uint8_t* src1,
+                                         const uint8_t* src2, int len);
+extern VP8AccumulateSSEFunc VP8AccumulateSSE;
+#endif
+
+// must be called before using any of the above directly
+void VP8SSIMDspInit(void);
+
+//------------------------------------------------------------------------------
+// Decoding
+
+typedef void (*VP8DecIdct)(const int16_t* coeffs, uint8_t* dst);
+// when doing two transforms, coeffs is actually int16_t[2][16].
+typedef void (*VP8DecIdct2)(const int16_t* coeffs, uint8_t* dst, int do_two);
+extern VP8DecIdct2 VP8Transform;
+extern VP8DecIdct VP8TransformAC3;
+extern VP8DecIdct VP8TransformUV;
+extern VP8DecIdct VP8TransformDC;
+extern VP8DecIdct VP8TransformDCUV;
+extern VP8WHT VP8TransformWHT;
+
+// *dst is the destination block, with stride BPS. Boundary samples are
+// assumed accessible when needed.
+typedef void (*VP8PredFunc)(uint8_t* dst);
+extern VP8PredFunc VP8PredLuma16[/* NUM_B_DC_MODES */];
+extern VP8PredFunc VP8PredChroma8[/* NUM_B_DC_MODES */];
+extern VP8PredFunc VP8PredLuma4[/* NUM_BMODES */];
+
+// clipping tables (for filtering)
+extern const int8_t* const VP8ksclip1;  // clips [-1020, 1020] to [-128, 127]
+extern const int8_t* const VP8ksclip2;  // clips [-112, 112] to [-16, 15]
+extern const uint8_t* const VP8kclip1;  // clips [-255,511] to [0,255]
+extern const uint8_t* const VP8kabs0;   // abs(x) for x in [-255,255]
+// must be called first
+void VP8InitClipTables(void);
+
+// simple filter (only for luma)
+typedef void (*VP8SimpleFilterFunc)(uint8_t* p, int stride, int thresh);
+extern VP8SimpleFilterFunc VP8SimpleVFilter16;
+extern VP8SimpleFilterFunc VP8SimpleHFilter16;
+extern VP8SimpleFilterFunc VP8SimpleVFilter16i;  // filter 3 inner edges
+extern VP8SimpleFilterFunc VP8SimpleHFilter16i;
+
+// regular filter (on both macroblock edges and inner edges)
+typedef void (*VP8LumaFilterFunc)(uint8_t* luma, int stride,
+                                  int thresh, int ithresh, int hev_t);
+typedef void (*VP8ChromaFilterFunc)(uint8_t* u, uint8_t* v, int stride,
+                                    int thresh, int ithresh, int hev_t);
+// on outer edge
+extern VP8LumaFilterFunc VP8VFilter16;
+extern VP8LumaFilterFunc VP8HFilter16;
+extern VP8ChromaFilterFunc VP8VFilter8;
+extern VP8ChromaFilterFunc VP8HFilter8;
+
+// on inner edge
+extern VP8LumaFilterFunc VP8VFilter16i;   // filtering 3 inner edges altogether
+extern VP8LumaFilterFunc VP8HFilter16i;
+extern VP8ChromaFilterFunc VP8VFilter8i;  // filtering u and v altogether
+extern VP8ChromaFilterFunc VP8HFilter8i;
+
+// Dithering. Combines dithering values (centered around 128) with dst[],
+// according to: dst[] = clip(dst[] + (((dither[]-128) + 8) >> 4)
+#define VP8_DITHER_DESCALE 4
+#define VP8_DITHER_DESCALE_ROUNDER (1 << (VP8_DITHER_DESCALE - 1))
+#define VP8_DITHER_AMP_BITS 7
+#define VP8_DITHER_AMP_CENTER (1 << VP8_DITHER_AMP_BITS)
+extern void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst,
+                                   int dst_stride);
+
+// must be called before anything using the above
+void VP8DspInit(void);
+
+//------------------------------------------------------------------------------
+// WebP I/O
+
+#define FANCY_UPSAMPLING   // undefined to remove fancy upsampling support
+
+// Convert a pair of y/u/v lines together to the output rgb/a colorspace.
+// bottom_y can be NULL if only one line of output is needed (at top/bottom).
+typedef void (*WebPUpsampleLinePairFunc)(
+    const uint8_t* top_y, const uint8_t* bottom_y,
+    const uint8_t* top_u, const uint8_t* top_v,
+    const uint8_t* cur_u, const uint8_t* cur_v,
+    uint8_t* top_dst, uint8_t* bottom_dst, int len);
+
+#ifdef FANCY_UPSAMPLING
+
+// Fancy upsampling functions to convert YUV to RGB(A) modes
+extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
+
+#endif    // FANCY_UPSAMPLING
+
+// Per-row point-sampling methods.
+typedef void (*WebPSamplerRowFunc)(const uint8_t* y,
+                                   const uint8_t* u, const uint8_t* v,
+                                   uint8_t* dst, int len);
+// Generic function to apply 'WebPSamplerRowFunc' to the whole plane:
+void WebPSamplerProcessPlane(const uint8_t* y, int y_stride,
+                             const uint8_t* u, const uint8_t* v, int uv_stride,
+                             uint8_t* dst, int dst_stride,
+                             int width, int height, WebPSamplerRowFunc func);
+
+// Sampling functions to convert rows of YUV to RGB(A)
+extern WebPSamplerRowFunc WebPSamplers[/* MODE_LAST */];
+
+// General function for converting two lines of ARGB or RGBA.
+// 'alpha_is_last' should be true if 0xff000000 is stored in memory as
+// as 0x00, 0x00, 0x00, 0xff (little endian).
+WebPUpsampleLinePairFunc WebPGetLinePairConverter(int alpha_is_last);
+
+// YUV444->RGB converters
+typedef void (*WebPYUV444Converter)(const uint8_t* y,
+                                    const uint8_t* u, const uint8_t* v,
+                                    uint8_t* dst, int len);
+
+extern WebPYUV444Converter WebPYUV444Converters[/* MODE_LAST */];
+
+// Must be called before using the WebPUpsamplers[] (and for premultiplied
+// colorspaces like rgbA, rgbA4444, etc)
+void WebPInitUpsamplers(void);
+// Must be called before using WebPSamplers[]
+void WebPInitSamplers(void);
+// Must be called before using WebPYUV444Converters[]
+void WebPInitYUV444Converters(void);
+
+//------------------------------------------------------------------------------
+// ARGB -> YUV converters
+
+// Convert ARGB samples to luma Y.
+extern void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
+// Convert ARGB samples to U/V with downsampling. do_store should be '1' for
+// even lines and '0' for odd ones. 'src_width' is the original width, not
+// the U/V one.
+extern void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
+                                   int src_width, int do_store);
+
+// Convert a row of accumulated (four-values) of rgba32 toward U/V
+extern void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
+                                     uint8_t* u, uint8_t* v, int width);
+
+// Convert RGB or BGR to Y
+extern void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
+extern void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
+
+// used for plain-C fallback.
+extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
+                                  int src_width, int do_store);
+extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
+                                    uint8_t* u, uint8_t* v, int width);
+
+// utilities for accurate RGB->YUV conversion
+extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref,
+                                       uint16_t* dst, int len);
+extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref,
+                                     int16_t* dst, int len);
+extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B,
+                                     int len,
+                                     const uint16_t* best_y, uint16_t* out);
+
+// Must be called before using the above.
+void WebPInitConvertARGBToYUV(void);
+
+//------------------------------------------------------------------------------
+// Rescaler
+
+struct WebPRescaler;
+
+// Import a row of data and save its contribution in the rescaler.
+// 'channel' denotes the channel number to be imported. 'Expand' corresponds to
+// the wrk->x_expand case. Otherwise, 'Shrink' is to be used.
+typedef void (*WebPRescalerImportRowFunc)(struct WebPRescaler* const wrk,
+                                          const uint8_t* src);
+
+extern WebPRescalerImportRowFunc WebPRescalerImportRowExpand;
+extern WebPRescalerImportRowFunc WebPRescalerImportRowShrink;
+
+// Export one row (starting at x_out position) from rescaler.
+// 'Expand' corresponds to the wrk->y_expand case.
+// Otherwise 'Shrink' is to be used
+typedef void (*WebPRescalerExportRowFunc)(struct WebPRescaler* const wrk);
+extern WebPRescalerExportRowFunc WebPRescalerExportRowExpand;
+extern WebPRescalerExportRowFunc WebPRescalerExportRowShrink;
+
+// Plain-C implementation, as fall-back.
+extern void WebPRescalerImportRowExpand_C(struct WebPRescaler* const wrk,
+                                          const uint8_t* src);
+extern void WebPRescalerImportRowShrink_C(struct WebPRescaler* const wrk,
+                                          const uint8_t* src);
+extern void WebPRescalerExportRowExpand_C(struct WebPRescaler* const wrk);
+extern void WebPRescalerExportRowShrink_C(struct WebPRescaler* const wrk);
+
+// Main entry calls:
+extern void WebPRescalerImportRow(struct WebPRescaler* const wrk,
+                                  const uint8_t* src);
+// Export one row (starting at x_out position) from rescaler.
+extern void WebPRescalerExportRow(struct WebPRescaler* const wrk);
+
+// Must be called first before using the above.
+void WebPRescalerDspInit(void);
+
+//------------------------------------------------------------------------------
+// Utilities for processing transparent channel.
+
+// Apply alpha pre-multiply on an rgba, bgra or argb plane of size w * h.
+// alpha_first should be 0 for argb, 1 for rgba or bgra (where alpha is last).
+extern void (*WebPApplyAlphaMultiply)(
+    uint8_t* rgba, int alpha_first, int w, int h, int stride);
+
+// Same, buf specifically for RGBA4444 format
+extern void (*WebPApplyAlphaMultiply4444)(
+    uint8_t* rgba4444, int w, int h, int stride);
+
+// Dispatch the values from alpha[] plane to the ARGB destination 'dst'.
+// Returns true if alpha[] plane has non-trivial values different from 0xff.
+extern int (*WebPDispatchAlpha)(const uint8_t* alpha, int alpha_stride,
+                                int width, int height,
+                                uint8_t* dst, int dst_stride);
+
+// Transfer packed 8b alpha[] values to green channel in dst[], zero'ing the
+// A/R/B values. 'dst_stride' is the stride for dst[] in uint32_t units.
+extern void (*WebPDispatchAlphaToGreen)(const uint8_t* alpha, int alpha_stride,
+                                        int width, int height,
+                                        uint32_t* dst, int dst_stride);
+
+// Extract the alpha values from 32b values in argb[] and pack them into alpha[]
+// (this is the opposite of WebPDispatchAlpha).
+// Returns true if there's only trivial 0xff alpha values.
+extern int (*WebPExtractAlpha)(const uint8_t* argb, int argb_stride,
+                               int width, int height,
+                               uint8_t* alpha, int alpha_stride);
+
+// Extract the green values from 32b values in argb[] and pack them into alpha[]
+// (this is the opposite of WebPDispatchAlphaToGreen).
+extern void (*WebPExtractGreen)(const uint32_t* argb, uint8_t* alpha, int size);
+
+// Pre-Multiply operation transforms x into x * A / 255  (where x=Y,R,G or B).
+// Un-Multiply operation transforms x into x * 255 / A.
+
+// Pre-Multiply or Un-Multiply (if 'inverse' is true) argb values in a row.
+extern void (*WebPMultARGBRow)(uint32_t* const ptr, int width, int inverse);
+
+// Same a WebPMultARGBRow(), but for several rows.
+void WebPMultARGBRows(uint8_t* ptr, int stride, int width, int num_rows,
+                      int inverse);
+
+// Same for a row of single values, with side alpha values.
+extern void (*WebPMultRow)(uint8_t* const ptr, const uint8_t* const alpha,
+                           int width, int inverse);
+
+// Same a WebPMultRow(), but for several 'num_rows' rows.
+void WebPMultRows(uint8_t* ptr, int stride,
+                  const uint8_t* alpha, int alpha_stride,
+                  int width, int num_rows, int inverse);
+
+// Plain-C versions, used as fallback by some implementations.
+void WebPMultRow_C(uint8_t* const ptr, const uint8_t* const alpha,
+                   int width, int inverse);
+void WebPMultARGBRow_C(uint32_t* const ptr, int width, int inverse);
+
+#ifdef WORDS_BIGENDIAN
+// ARGB packing function: a/r/g/b input is rgba or bgra order.
+extern void (*WebPPackARGB)(const uint8_t* a, const uint8_t* r,
+                            const uint8_t* g, const uint8_t* b, int len,
+                            uint32_t* out);
+#endif
+
+// RGB packing function. 'step' can be 3 or 4. r/g/b input is rgb or bgr order.
+extern void (*WebPPackRGB)(const uint8_t* r, const uint8_t* g, const uint8_t* b,
+                           int len, int step, uint32_t* out);
+
+// This function returns true if src[i] contains a value different from 0xff.
+extern int (*WebPHasAlpha8b)(const uint8_t* src, int length);
+// This function returns true if src[4*i] contains a value different from 0xff.
+extern int (*WebPHasAlpha32b)(const uint8_t* src, int length);
+
+// To be called first before using the above.
+void WebPInitAlphaProcessing(void);
+
+//------------------------------------------------------------------------------
+// Filter functions
+
+typedef enum {     // Filter types.
+  WEBP_FILTER_NONE = 0,
+  WEBP_FILTER_HORIZONTAL,
+  WEBP_FILTER_VERTICAL,
+  WEBP_FILTER_GRADIENT,
+  WEBP_FILTER_LAST = WEBP_FILTER_GRADIENT + 1,  // end marker
+  WEBP_FILTER_BEST,    // meta-types
+  WEBP_FILTER_FAST
+} WEBP_FILTER_TYPE;
+
+typedef void (*WebPFilterFunc)(const uint8_t* in, int width, int height,
+                               int stride, uint8_t* out);
+// In-place un-filtering.
+// Warning! 'prev_line' pointer can be equal to 'cur_line' or 'preds'.
+typedef void (*WebPUnfilterFunc)(const uint8_t* prev_line, const uint8_t* preds,
+                                 uint8_t* cur_line, int width);
+
+// Filter the given data using the given predictor.
+// 'in' corresponds to a 2-dimensional pixel array of size (stride * height)
+// in raster order.
+// 'stride' is number of bytes per scan line (with possible padding).
+// 'out' should be pre-allocated.
+extern WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
+
+// In-place reconstruct the original data from the given filtered data.
+// The reconstruction will be done for 'num_rows' rows starting from 'row'
+// (assuming rows upto 'row - 1' are already reconstructed).
+extern WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
+
+// To be called first before using the above.
+void VP8FiltersInit(void);
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  /* WEBP_DSP_DSP_H_ */
--- a/media/libwebp/src/dsp/filters.c
+++ b/media/libwebp/src/dsp/filters.c
@ -0,0 +1,287 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Spatial prediction using various filters
+//
+// Author: Urvang (urvang@google.com)
+
+#include "src/dsp/dsp.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+//------------------------------------------------------------------------------
+// Helpful macro.
+
+# define SANITY_CHECK(in, out)                                                 \
+  assert((in) != NULL);                                                        \
+  assert((out) != NULL);                                                       \
+  assert(width > 0);                                                           \
+  assert(height > 0);                                                          \
+  assert(stride >= width);                                                     \
+  assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
+  (void)height;  // Silence unused warning.
+
+#if !WEBP_NEON_OMIT_C_CODE
+static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
+                                      uint8_t* dst, int length, int inverse) {
+  int i;
+  if (inverse) {
+    for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i];
+  } else {
+    for (i = 0; i < length; ++i) dst[i] = src[i] - pred[i];
+  }
+}
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+static WEBP_INLINE void DoHorizontalFilter_C(const uint8_t* in,
+                                             int width, int height, int stride,
+                                             int row, int num_rows,
+                                             int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  if (row == 0) {
+    // Leftmost pixel is the same as input for topmost scanline.
+    out[0] = in[0];
+    PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
+    row = 1;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    // Leftmost pixel is predicted from above.
+    PredictLine_C(in, preds - stride, out, 1, inverse);
+    PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+static WEBP_INLINE void DoVerticalFilter_C(const uint8_t* in,
+                                           int width, int height, int stride,
+                                           int row, int num_rows,
+                                           int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  if (row == 0) {
+    // Very first top-left pixel is copied.
+    out[0] = in[0];
+    // Rest of top scan-line is left-predicted.
+    PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
+    row = 1;
+    in += stride;
+    out += stride;
+  } else {
+    // We are starting from in-between. Make sure 'preds' points to prev row.
+    preds -= stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    PredictLine_C(in, preds, out, width, inverse);
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
+  const int g = a + b - c;
+  return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255;  // clip to 8bit
+}
+
+#if !WEBP_NEON_OMIT_C_CODE
+static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
+                                           int width, int height, int stride,
+                                           int row, int num_rows,
+                                           int inverse, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = inverse ? out : in;
+
+  // left prediction for top scan-line
+  if (row == 0) {
+    out[0] = in[0];
+    PredictLine_C(in + 1, preds, out + 1, width - 1, inverse);
+    row = 1;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    int w;
+    // leftmost pixel: predict from above.
+    PredictLine_C(in, preds - stride, out, 1, inverse);
+    for (w = 1; w < width; ++w) {
+      const int pred = GradientPredictor_C(preds[w - 1],
+                                           preds[w - stride],
+                                           preds[w - stride - 1]);
+      out[w] = in[w] + (inverse ? pred : -pred);
+    }
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+#undef SANITY_CHECK
+
+//------------------------------------------------------------------------------
+
+#if !WEBP_NEON_OMIT_C_CODE
+static void HorizontalFilter_C(const uint8_t* data, int width, int height,
+                               int stride, uint8_t* filtered_data) {
+  DoHorizontalFilter_C(data, width, height, stride, 0, height, 0,
+                       filtered_data);
+}
+
+static void VerticalFilter_C(const uint8_t* data, int width, int height,
+                             int stride, uint8_t* filtered_data) {
+  DoVerticalFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
+}
+
+static void GradientFilter_C(const uint8_t* data, int width, int height,
+                             int stride, uint8_t* filtered_data) {
+  DoGradientFilter_C(data, width, height, stride, 0, height, 0, filtered_data);
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+//------------------------------------------------------------------------------
+
+static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in,
+                                 uint8_t* out, int width) {
+  uint8_t pred = (prev == NULL) ? 0 : prev[0];
+  int i;
+  for (i = 0; i < width; ++i) {
+    out[i] = pred + in[i];
+    pred = out[i];
+  }
+}
+
+#if !WEBP_NEON_OMIT_C_CODE
+static void VerticalUnfilter_C(const uint8_t* prev, const uint8_t* in,
+                               uint8_t* out, int width) {
+  if (prev == NULL) {
+    HorizontalUnfilter_C(NULL, in, out, width);
+  } else {
+    int i;
+    for (i = 0; i < width; ++i) out[i] = prev[i] + in[i];
+  }
+}
+#endif  // !WEBP_NEON_OMIT_C_CODE
+
+static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in,
+                               uint8_t* out, int width) {
+  if (prev == NULL) {
+    HorizontalUnfilter_C(NULL, in, out, width);
+  } else {
+    uint8_t top = prev[0], top_left = top, left = top;
+    int i;
+    for (i = 0; i < width; ++i) {
+      top = prev[i];  // need to read this first, in case prev==out
+      left = in[i] + GradientPredictor_C(left, top, top_left);
+      top_left = top;
+      out[i] = left;
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Init function
+
+WebPFilterFunc WebPFilters[WEBP_FILTER_LAST];
+WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST];
+
+extern void VP8FiltersInitMIPSdspR2(void);
+extern void VP8FiltersInitMSA(void);
+extern void VP8FiltersInitNEON(void);
+extern void VP8FiltersInitSSE2(void);
+
+WEBP_DSP_INIT_FUNC(VP8FiltersInit) {
+  WebPUnfilters[WEBP_FILTER_NONE] = NULL;
+#if !WEBP_NEON_OMIT_C_CODE
+  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_C;
+  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_C;
+#endif
+  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_C;
+
+  WebPFilters[WEBP_FILTER_NONE] = NULL;
+#if !WEBP_NEON_OMIT_C_CODE
+  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_C;
+  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_C;
+  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_C;
+#endif
+
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8FiltersInitSSE2();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      VP8FiltersInitMIPSdspR2();
+    }
+#endif
+#if defined(WEBP_USE_MSA)
+    if (VP8GetCPUInfo(kMSA)) {
+      VP8FiltersInitMSA();
+    }
+#endif
+  }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8FiltersInitNEON();
+  }
+#endif
+
+  assert(WebPUnfilters[WEBP_FILTER_HORIZONTAL] != NULL);
+  assert(WebPUnfilters[WEBP_FILTER_VERTICAL] != NULL);
+  assert(WebPUnfilters[WEBP_FILTER_GRADIENT] != NULL);
+  assert(WebPFilters[WEBP_FILTER_HORIZONTAL] != NULL);
+  assert(WebPFilters[WEBP_FILTER_VERTICAL] != NULL);
+  assert(WebPFilters[WEBP_FILTER_GRADIENT] != NULL);
+}
--- a/media/libwebp/src/dsp/filters_mips_dsp_r2.c
+++ b/media/libwebp/src/dsp/filters_mips_dsp_r2.c
@ -0,0 +1,402 @@
+// Copyright 2014 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Spatial prediction using various filters
+//
+// Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
+//            Djordje Pesut (djordje.pesut@imgtec.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_MIPS_DSP_R2)
+
+#include "src/dsp/dsp.h"
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+//------------------------------------------------------------------------------
+// Helpful macro.
+
+# define SANITY_CHECK(in, out)                                                 \
+  assert(in != NULL);                                                          \
+  assert(out != NULL);                                                         \
+  assert(width > 0);                                                           \
+  assert(height > 0);                                                          \
+  assert(stride >= width);                                                     \
+  assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
+  (void)height;  // Silence unused warning.
+
+#define DO_PREDICT_LINE(SRC, DST, LENGTH, INVERSE) do {                        \
+    const uint8_t* psrc = (uint8_t*)(SRC);                                     \
+    uint8_t* pdst = (uint8_t*)(DST);                                           \
+    const int ilength = (int)(LENGTH);                                         \
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6;                       \
+    __asm__ volatile (                                                         \
+      ".set      push                                   \n\t"                  \
+      ".set      noreorder                              \n\t"                  \
+      "srl       %[temp0],    %[length],    2           \n\t"                  \
+      "beqz      %[temp0],    4f                        \n\t"                  \
+      " andi     %[temp6],    %[length],    3           \n\t"                  \
+    ".if " #INVERSE "                                   \n\t"                  \
+    "1:                                                 \n\t"                  \
+      "lbu       %[temp1],    -1(%[dst])                \n\t"                  \
+      "lbu       %[temp2],    0(%[src])                 \n\t"                  \
+      "lbu       %[temp3],    1(%[src])                 \n\t"                  \
+      "lbu       %[temp4],    2(%[src])                 \n\t"                  \
+      "lbu       %[temp5],    3(%[src])                 \n\t"                  \
+      "addu      %[temp1],    %[temp1],     %[temp2]    \n\t"                  \
+      "addu      %[temp2],    %[temp1],     %[temp3]    \n\t"                  \
+      "addu      %[temp3],    %[temp2],     %[temp4]    \n\t"                  \
+      "addu      %[temp4],    %[temp3],     %[temp5]    \n\t"                  \
+      "sb        %[temp1],    0(%[dst])                 \n\t"                  \
+      "sb        %[temp2],    1(%[dst])                 \n\t"                  \
+      "sb        %[temp3],    2(%[dst])                 \n\t"                  \
+      "sb        %[temp4],    3(%[dst])                 \n\t"                  \
+      "addiu     %[src],      %[src],       4           \n\t"                  \
+      "addiu     %[temp0],    %[temp0],     -1          \n\t"                  \
+      "bnez      %[temp0],    1b                        \n\t"                  \
+      " addiu    %[dst],      %[dst],       4           \n\t"                  \
+    ".else                                              \n\t"                  \
+    "1:                                                 \n\t"                  \
+      "ulw       %[temp1],    -1(%[src])                \n\t"                  \
+      "ulw       %[temp2],    0(%[src])                 \n\t"                  \
+      "addiu     %[src],      %[src],       4           \n\t"                  \
+      "addiu     %[temp0],    %[temp0],     -1          \n\t"                  \
+      "subu.qb   %[temp3],    %[temp2],     %[temp1]    \n\t"                  \
+      "usw       %[temp3],    0(%[dst])                 \n\t"                  \
+      "bnez      %[temp0],    1b                        \n\t"                  \
+      " addiu    %[dst],      %[dst],       4           \n\t"                  \
+    ".endif                                             \n\t"                  \
+    "4:                                                 \n\t"                  \
+      "beqz      %[temp6],    3f                        \n\t"                  \
+      " nop                                             \n\t"                  \
+    "2:                                                 \n\t"                  \
+      "lbu       %[temp2],    0(%[src])                 \n\t"                  \
+    ".if " #INVERSE "                                   \n\t"                  \
+      "lbu       %[temp1],    -1(%[dst])                \n\t"                  \
+      "addu      %[temp3],    %[temp1],     %[temp2]    \n\t"                  \
+    ".else                                              \n\t"                  \
+      "lbu       %[temp1],    -1(%[src])                \n\t"                  \
+      "subu      %[temp3],    %[temp1],     %[temp2]    \n\t"                  \
+    ".endif                                             \n\t"                  \
+      "addiu     %[src],      %[src],       1           \n\t"                  \
+      "sb        %[temp3],    0(%[dst])                 \n\t"                  \
+      "addiu     %[temp6],    %[temp6],     -1          \n\t"                  \
+      "bnez      %[temp6],    2b                        \n\t"                  \
+      " addiu    %[dst],      %[dst],       1           \n\t"                  \
+    "3:                                                 \n\t"                  \
+      ".set      pop                                    \n\t"                  \
+      : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),         \
+        [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),         \
+        [temp6]"=&r"(temp6), [dst]"+&r"(pdst), [src]"+&r"(psrc)                \
+      : [length]"r"(ilength)                                                   \
+      : "memory"                                                               \
+    );                                                                         \
+  } while (0)
+
+static WEBP_INLINE void PredictLine_MIPSdspR2(const uint8_t* src, uint8_t* dst,
+                                              int length) {
+  DO_PREDICT_LINE(src, dst, length, 0);
+}
+
+#define DO_PREDICT_LINE_VERTICAL(SRC, PRED, DST, LENGTH, INVERSE) do {         \
+    const uint8_t* psrc = (uint8_t*)(SRC);                                     \
+    const uint8_t* ppred = (uint8_t*)(PRED);                                   \
+    uint8_t* pdst = (uint8_t*)(DST);                                           \
+    const int ilength = (int)(LENGTH);                                         \
+    int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;                \
+    __asm__ volatile (                                                         \
+      ".set      push                                   \n\t"                  \
+      ".set      noreorder                              \n\t"                  \
+      "srl       %[temp0],    %[length],    0x3         \n\t"                  \
+      "beqz      %[temp0],    4f                        \n\t"                  \
+      " andi     %[temp7],    %[length],    0x7         \n\t"                  \
+    "1:                                                 \n\t"                  \
+      "ulw       %[temp1],    0(%[src])                 \n\t"                  \
+      "ulw       %[temp2],    0(%[pred])                \n\t"                  \
+      "ulw       %[temp3],    4(%[src])                 \n\t"                  \
+      "ulw       %[temp4],    4(%[pred])                \n\t"                  \
+      "addiu     %[src],      %[src],       8           \n\t"                  \
+    ".if " #INVERSE "                                   \n\t"                  \
+      "addu.qb   %[temp5],    %[temp1],     %[temp2]    \n\t"                  \
+      "addu.qb   %[temp6],    %[temp3],     %[temp4]    \n\t"                  \
+    ".else                                              \n\t"                  \
+      "subu.qb   %[temp5],    %[temp1],     %[temp2]    \n\t"                  \
+      "subu.qb   %[temp6],    %[temp3],     %[temp4]    \n\t"                  \
+    ".endif                                             \n\t"                  \
+      "addiu     %[pred],     %[pred],      8           \n\t"                  \
+      "usw       %[temp5],    0(%[dst])                 \n\t"                  \
+      "usw       %[temp6],    4(%[dst])                 \n\t"                  \
+      "addiu     %[temp0],    %[temp0],     -1          \n\t"                  \
+      "bnez      %[temp0],    1b                        \n\t"                  \
+      " addiu    %[dst],      %[dst],       8           \n\t"                  \
+    "4:                                                 \n\t"                  \
+      "beqz      %[temp7],    3f                        \n\t"                  \
+      " nop                                             \n\t"                  \
+    "2:                                                 \n\t"                  \
+      "lbu       %[temp1],    0(%[src])                 \n\t"                  \
+      "lbu       %[temp2],    0(%[pred])                \n\t"                  \
+      "addiu     %[src],      %[src],       1           \n\t"                  \
+      "addiu     %[pred],     %[pred],      1           \n\t"                  \
+    ".if " #INVERSE "                                   \n\t"                  \
+      "addu      %[temp3],    %[temp1],     %[temp2]    \n\t"                  \
+    ".else                                              \n\t"                  \
+      "subu      %[temp3],    %[temp1],     %[temp2]    \n\t"                  \
+    ".endif                                             \n\t"                  \
+      "sb        %[temp3],    0(%[dst])                 \n\t"                  \
+      "addiu     %[temp7],    %[temp7],     -1          \n\t"                  \
+      "bnez      %[temp7],    2b                        \n\t"                  \
+      " addiu    %[dst],      %[dst],       1           \n\t"                  \
+    "3:                                                 \n\t"                  \
+      ".set      pop                                    \n\t"                  \
+      : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),         \
+        [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),         \
+        [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [pred]"+&r"(ppred),          \
+        [dst]"+&r"(pdst), [src]"+&r"(psrc)                                     \
+      : [length]"r"(ilength)                                                   \
+      : "memory"                                                               \
+    );                                                                         \
+  } while (0)
+
+#define PREDICT_LINE_ONE_PASS(SRC, PRED, DST) do {                             \
+    int temp1, temp2, temp3;                                                   \
+    __asm__ volatile (                                                         \
+      "lbu       %[temp1],   0(%[src])               \n\t"                     \
+      "lbu       %[temp2],   0(%[pred])              \n\t"                     \
+      "subu      %[temp3],   %[temp1],   %[temp2]    \n\t"                     \
+      "sb        %[temp3],   0(%[dst])               \n\t"                     \
+      : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3)          \
+      : [pred]"r"((PRED)), [dst]"r"((DST)), [src]"r"((SRC))                    \
+      : "memory"                                                               \
+    );                                                                         \
+  } while (0)
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+#define FILTER_LINE_BY_LINE do {                                               \
+    while (row < last_row) {                                                   \
+      PREDICT_LINE_ONE_PASS(in, preds - stride, out);                          \
+      DO_PREDICT_LINE(in + 1, out + 1, width - 1, 0);                          \
+      ++row;                                                                   \
+      preds += stride;                                                         \
+      in += stride;                                                            \
+      out += stride;                                                           \
+    }                                                                          \
+  } while (0)
+
+static WEBP_INLINE void DoHorizontalFilter_MIPSdspR2(const uint8_t* in,
+                                                     int width, int height,
+                                                     int stride,
+                                                     int row, int num_rows,
+                                                     uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = in;
+
+  if (row == 0) {
+    // Leftmost pixel is the same as input for topmost scanline.
+    out[0] = in[0];
+    PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
+    row = 1;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  FILTER_LINE_BY_LINE;
+}
+#undef FILTER_LINE_BY_LINE
+
+static void HorizontalFilter_MIPSdspR2(const uint8_t* data,
+                                       int width, int height,
+                                       int stride, uint8_t* filtered_data) {
+  DoHorizontalFilter_MIPSdspR2(data, width, height, stride, 0, height,
+                               filtered_data);
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+#define FILTER_LINE_BY_LINE do {                                               \
+    while (row < last_row) {                                                   \
+      DO_PREDICT_LINE_VERTICAL(in, preds, out, width, 0);                      \
+      ++row;                                                                   \
+      preds += stride;                                                         \
+      in += stride;                                                            \
+      out += stride;                                                           \
+    }                                                                          \
+  } while (0)
+
+static WEBP_INLINE void DoVerticalFilter_MIPSdspR2(const uint8_t* in,
+                                                   int width, int height,
+                                                   int stride,
+                                                   int row, int num_rows,
+                                                   uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = in;
+
+  if (row == 0) {
+    // Very first top-left pixel is copied.
+    out[0] = in[0];
+    // Rest of top scan-line is left-predicted.
+    PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
+    row = 1;
+    in += stride;
+    out += stride;
+  } else {
+    // We are starting from in-between. Make sure 'preds' points to prev row.
+    preds -= stride;
+  }
+
+  // Filter line-by-line.
+  FILTER_LINE_BY_LINE;
+}
+#undef FILTER_LINE_BY_LINE
+
+static void VerticalFilter_MIPSdspR2(const uint8_t* data, int width, int height,
+                                     int stride, uint8_t* filtered_data) {
+  DoVerticalFilter_MIPSdspR2(data, width, height, stride, 0, height,
+                             filtered_data);
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static int GradientPredictor_MIPSdspR2(uint8_t a, uint8_t b, uint8_t c) {
+  int temp0;
+  __asm__ volatile (
+    "addu             %[temp0],   %[a],       %[b]        \n\t"
+    "subu             %[temp0],   %[temp0],   %[c]        \n\t"
+    "shll_s.w         %[temp0],   %[temp0],   23          \n\t"
+    "precrqu_s.qb.ph  %[temp0],   %[temp0],   $zero       \n\t"
+    "srl              %[temp0],   %[temp0],   24          \n\t"
+    : [temp0]"=&r"(temp0)
+    : [a]"r"(a),[b]"r"(b),[c]"r"(c)
+  );
+  return temp0;
+}
+
+#define FILTER_LINE_BY_LINE(PREDS, OPERATION) do {                             \
+    while (row < last_row) {                                                   \
+      int w;                                                                   \
+      PREDICT_LINE_ONE_PASS(in, PREDS - stride, out);                          \
+      for (w = 1; w < width; ++w) {                                            \
+        const int pred = GradientPredictor_MIPSdspR2(PREDS[w - 1],             \
+                                                     PREDS[w - stride],        \
+                                                     PREDS[w - stride - 1]);   \
+        out[w] = in[w] OPERATION pred;                                         \
+      }                                                                        \
+      ++row;                                                                   \
+      in += stride;                                                            \
+      out += stride;                                                           \
+    }                                                                          \
+  } while (0)
+
+static void DoGradientFilter_MIPSdspR2(const uint8_t* in,
+                                       int width, int height, int stride,
+                                       int row, int num_rows, uint8_t* out) {
+  const uint8_t* preds;
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+  preds = in;
+
+  // left prediction for top scan-line
+  if (row == 0) {
+    out[0] = in[0];
+    PredictLine_MIPSdspR2(in + 1, out + 1, width - 1);
+    row = 1;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  FILTER_LINE_BY_LINE(in, -);
+}
+#undef FILTER_LINE_BY_LINE
+
+static void GradientFilter_MIPSdspR2(const uint8_t* data, int width, int height,
+                                     int stride, uint8_t* filtered_data) {
+  DoGradientFilter_MIPSdspR2(data, width, height, stride, 0, height,
+                             filtered_data);
+}
+
+//------------------------------------------------------------------------------
+
+static void HorizontalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
+                                         uint8_t* out, int width) {
+ out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
+ DO_PREDICT_LINE(in + 1, out + 1, width - 1, 1);
+}
+
+static void VerticalUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
+                                       uint8_t* out, int width) {
+  if (prev == NULL) {
+    HorizontalUnfilter_MIPSdspR2(NULL, in, out, width);
+  } else {
+    DO_PREDICT_LINE_VERTICAL(in, prev, out, width, 1);
+  }
+}
+
+static void GradientUnfilter_MIPSdspR2(const uint8_t* prev, const uint8_t* in,
+                                       uint8_t* out, int width) {
+  if (prev == NULL) {
+    HorizontalUnfilter_MIPSdspR2(NULL, in, out, width);
+  } else {
+    uint8_t top = prev[0], top_left = top, left = top;
+    int i;
+    for (i = 0; i < width; ++i) {
+      top = prev[i];  // need to read this first, in case prev==dst
+      left = in[i] + GradientPredictor_MIPSdspR2(left, top, top_left);
+      top_left = top;
+      out[i] = left;
+    }
+  }
+}
+
+#undef DO_PREDICT_LINE_VERTICAL
+#undef PREDICT_LINE_ONE_PASS
+#undef DO_PREDICT_LINE
+#undef SANITY_CHECK
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8FiltersInitMIPSdspR2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMIPSdspR2(void) {
+  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_MIPSdspR2;
+  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_MIPSdspR2;
+  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_MIPSdspR2;
+
+  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_MIPSdspR2;
+  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_MIPSdspR2;
+  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_MIPSdspR2;
+}
+
+#else  // !WEBP_USE_MIPS_DSP_R2
+
+WEBP_DSP_INIT_STUB(VP8FiltersInitMIPSdspR2)
+
+#endif  // WEBP_USE_MIPS_DSP_R2
--- a/media/libwebp/src/dsp/filters_msa.c
+++ b/media/libwebp/src/dsp/filters_msa.c
@ -0,0 +1,202 @@
+// Copyright 2016 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// MSA variant of alpha filters
+//
+// Author: Prashant Patil (prashant.patil@imgtec.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_MSA)
+
+#include "src/dsp/msa_macro.h"
+
+#include <assert.h>
+
+static WEBP_INLINE void PredictLineInverse0(const uint8_t* src,
+                                            const uint8_t* pred,
+                                            uint8_t* dst, int length) {
+  v16u8 src0, pred0, dst0;
+  assert(length >= 0);
+  while (length >= 32) {
+    v16u8 src1, pred1, dst1;
+    LD_UB2(src, 16, src0, src1);
+    LD_UB2(pred, 16, pred0, pred1);
+    SUB2(src0, pred0, src1, pred1, dst0, dst1);
+    ST_UB2(dst0, dst1, dst, 16);
+    src += 32;
+    pred += 32;
+    dst += 32;
+    length -= 32;
+  }
+  if (length > 0) {
+    int i;
+    if (length >= 16) {
+      src0 = LD_UB(src);
+      pred0 = LD_UB(pred);
+      dst0 = src0 - pred0;
+      ST_UB(dst0, dst);
+      src += 16;
+      pred += 16;
+      dst += 16;
+      length -= 16;
+    }
+    for (i = 0; i < length; i++) {
+      dst[i] = src[i] - pred[i];
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// Helpful macro.
+
+#define SANITY_CHECK(in, out)  \
+  assert(in != NULL);          \
+  assert(out != NULL);         \
+  assert(width > 0);           \
+  assert(height > 0);          \
+  assert(stride >= width);
+
+//------------------------------------------------------------------------------
+// Horrizontal filter
+
+static void HorizontalFilter_MSA(const uint8_t* data, int width, int height,
+                                 int stride, uint8_t* filtered_data) {
+  const uint8_t* preds = data;
+  const uint8_t* in = data;
+  uint8_t* out = filtered_data;
+  int row = 1;
+  SANITY_CHECK(in, out);
+
+  // Leftmost pixel is the same as input for topmost scanline.
+  out[0] = in[0];
+  PredictLineInverse0(in + 1, preds, out + 1, width - 1);
+  preds += stride;
+  in += stride;
+  out += stride;
+  // Filter line-by-line.
+  while (row < height) {
+    // Leftmost pixel is predicted from above.
+    PredictLineInverse0(in, preds - stride, out, 1);
+    PredictLineInverse0(in + 1, preds, out + 1, width - 1);
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter
+
+static WEBP_INLINE void PredictLineGradient(const uint8_t* pinput,
+                                            const uint8_t* ppred,
+                                            uint8_t* poutput, int stride,
+                                            int size) {
+  int w;
+  const v16i8 zero = { 0 };
+  while (size >= 16) {
+    v16u8 pred0, dst0;
+    v8i16 a0, a1, b0, b1, c0, c1;
+    const v16u8 tmp0 = LD_UB(ppred - 1);
+    const v16u8 tmp1 = LD_UB(ppred - stride);
+    const v16u8 tmp2 = LD_UB(ppred - stride - 1);
+    const v16u8 src0 = LD_UB(pinput);
+    ILVRL_B2_SH(zero, tmp0, a0, a1);
+    ILVRL_B2_SH(zero, tmp1, b0, b1);
+    ILVRL_B2_SH(zero, tmp2, c0, c1);
+    ADD2(a0, b0, a1, b1, a0, a1);
+    SUB2(a0, c0, a1, c1, a0, a1);
+    CLIP_SH2_0_255(a0, a1);
+    pred0 = (v16u8)__msa_pckev_b((v16i8)a1, (v16i8)a0);
+    dst0 = src0 - pred0;
+    ST_UB(dst0, poutput);
+    ppred += 16;
+    pinput += 16;
+    poutput += 16;
+    size -= 16;
+  }
+  for (w = 0; w < size; ++w) {
+    const int pred = ppred[w - 1] + ppred[w - stride] - ppred[w - stride - 1];
+    poutput[w] = pinput[w] - (pred < 0 ? 0 : pred > 255 ? 255 : pred);
+  }
+}
+
+
+static void GradientFilter_MSA(const uint8_t* data, int width, int height,
+                               int stride, uint8_t* filtered_data) {
+  const uint8_t* in = data;
+  const uint8_t* preds = data;
+  uint8_t* out = filtered_data;
+  int row = 1;
+  SANITY_CHECK(in, out);
+
+  // left prediction for top scan-line
+  out[0] = in[0];
+  PredictLineInverse0(in + 1, preds, out + 1, width - 1);
+  preds += stride;
+  in += stride;
+  out += stride;
+  // Filter line-by-line.
+  while (row < height) {
+    out[0] = in[0] - preds[- stride];
+    PredictLineGradient(preds + 1, in + 1, out + 1, stride, width - 1);
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter
+
+static void VerticalFilter_MSA(const uint8_t* data, int width, int height,
+                               int stride, uint8_t* filtered_data) {
+  const uint8_t* in = data;
+  const uint8_t* preds = data;
+  uint8_t* out = filtered_data;
+  int row = 1;
+  SANITY_CHECK(in, out);
+
+  // Very first top-left pixel is copied.
+  out[0] = in[0];
+  // Rest of top scan-line is left-predicted.
+  PredictLineInverse0(in + 1, preds, out + 1, width - 1);
+  in += stride;
+  out += stride;
+
+  // Filter line-by-line.
+  while (row < height) {
+    PredictLineInverse0(in, preds, out, width);
+    ++row;
+    preds += stride;
+    in += stride;
+    out += stride;
+  }
+}
+
+#undef SANITY_CHECK
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8FiltersInitMSA(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitMSA(void) {
+  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_MSA;
+  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_MSA;
+  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_MSA;
+}
+
+#else  // !WEBP_USE_MSA
+
+WEBP_DSP_INIT_STUB(VP8FiltersInitMSA)
+
+#endif  // WEBP_USE_MSA
--- a/media/libwebp/src/dsp/filters_neon.c
+++ b/media/libwebp/src/dsp/filters_neon.c
@ -0,0 +1,329 @@
+// Copyright 2017 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// NEON variant of alpha filters
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_NEON)
+
+#include <assert.h>
+#include "src/dsp/neon.h"
+
+//------------------------------------------------------------------------------
+// Helpful macros.
+
+# define SANITY_CHECK(in, out)                                                 \
+  assert(in != NULL);                                                          \
+  assert(out != NULL);                                                         \
+  assert(width > 0);                                                           \
+  assert(height > 0);                                                          \
+  assert(stride >= width);                                                     \
+  assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
+  (void)height;  // Silence unused warning.
+
+// load eight u8 and widen to s16
+#define U8_TO_S16(A) vreinterpretq_s16_u16(vmovl_u8(A))
+#define LOAD_U8_TO_S16(A) U8_TO_S16(vld1_u8(A))
+
+// shift left or right by N byte, inserting zeros
+#define SHIFT_RIGHT_N_Q(A, N) vextq_u8((A), zero, (N))
+#define SHIFT_LEFT_N_Q(A, N) vextq_u8(zero, (A), (16 - (N)) % 16)
+
+// rotate left by N bytes
+#define ROTATE_LEFT_N(A, N)   vext_u8((A), (A), (N))
+// rotate right by N bytes
+#define ROTATE_RIGHT_N(A, N)   vext_u8((A), (A), (8 - (N)) % 8)
+
+static void PredictLine_NEON(const uint8_t* src, const uint8_t* pred,
+                             uint8_t* dst, int length) {
+  int i;
+  assert(length >= 0);
+  for (i = 0; i + 16 <= length; i += 16) {
+    const uint8x16_t A = vld1q_u8(&src[i]);
+    const uint8x16_t B = vld1q_u8(&pred[i]);
+    const uint8x16_t C = vsubq_u8(A, B);
+    vst1q_u8(&dst[i], C);
+  }
+  for (; i < length; ++i) dst[i] = src[i] - pred[i];
+}
+
+// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
+static void PredictLineLeft_NEON(const uint8_t* src, uint8_t* dst, int length) {
+  PredictLine_NEON(src, src - 1, dst, length);
+}
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+static WEBP_INLINE void DoHorizontalFilter_NEON(const uint8_t* in,
+                                                int width, int height,
+                                                int stride,
+                                                int row, int num_rows,
+                                                uint8_t* out) {
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+
+  if (row == 0) {
+    // Leftmost pixel is the same as input for topmost scanline.
+    out[0] = in[0];
+    PredictLineLeft_NEON(in + 1, out + 1, width - 1);
+    row = 1;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    // Leftmost pixel is predicted from above.
+    out[0] = in[0] - in[-stride];
+    PredictLineLeft_NEON(in + 1, out + 1, width - 1);
+    ++row;
+    in += stride;
+    out += stride;
+  }
+}
+
+static void HorizontalFilter_NEON(const uint8_t* data, int width, int height,
+                                  int stride, uint8_t* filtered_data) {
+  DoHorizontalFilter_NEON(data, width, height, stride, 0, height,
+                          filtered_data);
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+static WEBP_INLINE void DoVerticalFilter_NEON(const uint8_t* in,
+                                              int width, int height, int stride,
+                                              int row, int num_rows,
+                                              uint8_t* out) {
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+
+  if (row == 0) {
+    // Very first top-left pixel is copied.
+    out[0] = in[0];
+    // Rest of top scan-line is left-predicted.
+    PredictLineLeft_NEON(in + 1, out + 1, width - 1);
+    row = 1;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    PredictLine_NEON(in, in - stride, out, width);
+    ++row;
+    in += stride;
+    out += stride;
+  }
+}
+
+static void VerticalFilter_NEON(const uint8_t* data, int width, int height,
+                                int stride, uint8_t* filtered_data) {
+  DoVerticalFilter_NEON(data, width, height, stride, 0, height,
+                        filtered_data);
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static WEBP_INLINE int GradientPredictor_C(uint8_t a, uint8_t b, uint8_t c) {
+  const int g = a + b - c;
+  return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255;  // clip to 8bit
+}
+
+static void GradientPredictDirect_NEON(const uint8_t* const row,
+                                       const uint8_t* const top,
+                                       uint8_t* const out, int length) {
+  int i;
+  for (i = 0; i + 8 <= length; i += 8) {
+    const uint8x8_t A = vld1_u8(&row[i - 1]);
+    const uint8x8_t B = vld1_u8(&top[i + 0]);
+    const int16x8_t C = vreinterpretq_s16_u16(vaddl_u8(A, B));
+    const int16x8_t D = LOAD_U8_TO_S16(&top[i - 1]);
+    const uint8x8_t E = vqmovun_s16(vsubq_s16(C, D));
+    const uint8x8_t F = vld1_u8(&row[i + 0]);
+    vst1_u8(&out[i], vsub_u8(F, E));
+  }
+  for (; i < length; ++i) {
+    out[i] = row[i] - GradientPredictor_C(row[i - 1], top[i], top[i - 1]);
+  }
+}
+
+static WEBP_INLINE void DoGradientFilter_NEON(const uint8_t* in,
+                                              int width, int height,
+                                              int stride,
+                                              int row, int num_rows,
+                                              uint8_t* out) {
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+
+  // left prediction for top scan-line
+  if (row == 0) {
+    out[0] = in[0];
+    PredictLineLeft_NEON(in + 1, out + 1, width - 1);
+    row = 1;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    out[0] = in[0] - in[-stride];
+    GradientPredictDirect_NEON(in + 1, in + 1 - stride, out + 1, width - 1);
+    ++row;
+    in += stride;
+    out += stride;
+  }
+}
+
+static void GradientFilter_NEON(const uint8_t* data, int width, int height,
+                                int stride, uint8_t* filtered_data) {
+  DoGradientFilter_NEON(data, width, height, stride, 0, height,
+                        filtered_data);
+}
+
+#undef SANITY_CHECK
+
+//------------------------------------------------------------------------------
+// Inverse transforms
+
+static void HorizontalUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
+                                    uint8_t* out, int width) {
+  int i;
+  const uint8x16_t zero = vdupq_n_u8(0);
+  uint8x16_t last;
+  out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
+  if (width <= 1) return;
+  last = vsetq_lane_u8(out[0], zero, 0);
+  for (i = 1; i + 16 <= width; i += 16) {
+    const uint8x16_t A0 = vld1q_u8(&in[i]);
+    const uint8x16_t A1 = vaddq_u8(A0, last);
+    const uint8x16_t A2 = SHIFT_LEFT_N_Q(A1, 1);
+    const uint8x16_t A3 = vaddq_u8(A1, A2);
+    const uint8x16_t A4 = SHIFT_LEFT_N_Q(A3, 2);
+    const uint8x16_t A5 = vaddq_u8(A3, A4);
+    const uint8x16_t A6 = SHIFT_LEFT_N_Q(A5, 4);
+    const uint8x16_t A7 = vaddq_u8(A5, A6);
+    const uint8x16_t A8 = SHIFT_LEFT_N_Q(A7, 8);
+    const uint8x16_t A9 = vaddq_u8(A7, A8);
+    vst1q_u8(&out[i], A9);
+    last = SHIFT_RIGHT_N_Q(A9, 15);
+  }
+  for (; i < width; ++i) out[i] = in[i] + out[i - 1];
+}
+
+static void VerticalUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
+                                  uint8_t* out, int width) {
+  if (prev == NULL) {
+    HorizontalUnfilter_NEON(NULL, in, out, width);
+  } else {
+    int i;
+    assert(width >= 0);
+    for (i = 0; i + 16 <= width; i += 16) {
+      const uint8x16_t A = vld1q_u8(&in[i]);
+      const uint8x16_t B = vld1q_u8(&prev[i]);
+      const uint8x16_t C = vaddq_u8(A, B);
+      vst1q_u8(&out[i], C);
+    }
+    for (; i < width; ++i) out[i] = in[i] + prev[i];
+  }
+}
+
+// GradientUnfilter_NEON is correct but slower than the C-version,
+// at least on ARM64. For armv7, it's a wash.
+// So best is to disable it for now, but keep the idea around...
+#if !defined(USE_GRADIENT_UNFILTER)
+#define USE_GRADIENT_UNFILTER 0   // ALTERNATE_CODE
+#endif
+
+#if (USE_GRADIENT_UNFILTER == 1)
+#define GRAD_PROCESS_LANE(L)  do {                                             \
+  const uint8x8_t tmp1 = ROTATE_RIGHT_N(pred, 1);  /* rotate predictor in */   \
+  const int16x8_t tmp2 = vaddq_s16(BC, U8_TO_S16(tmp1));                       \
+  const uint8x8_t delta = vqmovun_s16(tmp2);                                   \
+  pred = vadd_u8(D, delta);                                                    \
+  out = vext_u8(out, ROTATE_LEFT_N(pred, (L)), 1);                             \
+} while (0)
+
+static void GradientPredictInverse_NEON(const uint8_t* const in,
+                                        const uint8_t* const top,
+                                        uint8_t* const row, int length) {
+  if (length > 0) {
+    int i;
+    uint8x8_t pred = vdup_n_u8(row[-1]);   // left sample
+    uint8x8_t out = vdup_n_u8(0);
+    for (i = 0; i + 8 <= length; i += 8) {
+      const int16x8_t B = LOAD_U8_TO_S16(&top[i + 0]);
+      const int16x8_t C = LOAD_U8_TO_S16(&top[i - 1]);
+      const int16x8_t BC = vsubq_s16(B, C);  // unclipped gradient basis B - C
+      const uint8x8_t D = vld1_u8(&in[i]);   // base input
+      GRAD_PROCESS_LANE(0);
+      GRAD_PROCESS_LANE(1);
+      GRAD_PROCESS_LANE(2);
+      GRAD_PROCESS_LANE(3);
+      GRAD_PROCESS_LANE(4);
+      GRAD_PROCESS_LANE(5);
+      GRAD_PROCESS_LANE(6);
+      GRAD_PROCESS_LANE(7);
+      vst1_u8(&row[i], out);
+    }
+    for (; i < length; ++i) {
+      row[i] = in[i] + GradientPredictor_C(row[i - 1], top[i], top[i - 1]);
+    }
+  }
+}
+#undef GRAD_PROCESS_LANE
+
+static void GradientUnfilter_NEON(const uint8_t* prev, const uint8_t* in,
+                                  uint8_t* out, int width) {
+  if (prev == NULL) {
+    HorizontalUnfilter_NEON(NULL, in, out, width);
+  } else {
+    out[0] = in[0] + prev[0];  // predict from above
+    GradientPredictInverse_NEON(in + 1, prev + 1, out + 1, width - 1);
+  }
+}
+
+#endif   // USE_GRADIENT_UNFILTER
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8FiltersInitNEON(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitNEON(void) {
+  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_NEON;
+  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_NEON;
+#if (USE_GRADIENT_UNFILTER == 1)
+  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_NEON;
+#endif
+
+  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_NEON;
+  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_NEON;
+  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_NEON;
+}
+
+#else  // !WEBP_USE_NEON
+
+WEBP_DSP_INIT_STUB(VP8FiltersInitNEON)
+
+#endif  // WEBP_USE_NEON
--- a/media/libwebp/src/dsp/filters_sse2.c
+++ b/media/libwebp/src/dsp/filters_sse2.c
@ -0,0 +1,333 @@
+// Copyright 2015 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// SSE2 variant of alpha filters
+//
+// Author: Skal (pascal.massimino@gmail.com)
+
+#include "src/dsp/dsp.h"
+
+#if defined(WEBP_USE_SSE2)
+
+#include <assert.h>
+#include <emmintrin.h>
+#include <stdlib.h>
+#include <string.h>
+
+//------------------------------------------------------------------------------
+// Helpful macro.
+
+# define SANITY_CHECK(in, out)                                                 \
+  assert((in) != NULL);                                                        \
+  assert((out) != NULL);                                                       \
+  assert(width > 0);                                                           \
+  assert(height > 0);                                                          \
+  assert(stride >= width);                                                     \
+  assert(row >= 0 && num_rows > 0 && row + num_rows <= height);                \
+  (void)height;  // Silence unused warning.
+
+static void PredictLineTop_SSE2(const uint8_t* src, const uint8_t* pred,
+                                uint8_t* dst, int length) {
+  int i;
+  const int max_pos = length & ~31;
+  assert(length >= 0);
+  for (i = 0; i < max_pos; i += 32) {
+    const __m128i A0 = _mm_loadu_si128((const __m128i*)&src[i +  0]);
+    const __m128i A1 = _mm_loadu_si128((const __m128i*)&src[i + 16]);
+    const __m128i B0 = _mm_loadu_si128((const __m128i*)&pred[i +  0]);
+    const __m128i B1 = _mm_loadu_si128((const __m128i*)&pred[i + 16]);
+    const __m128i C0 = _mm_sub_epi8(A0, B0);
+    const __m128i C1 = _mm_sub_epi8(A1, B1);
+    _mm_storeu_si128((__m128i*)&dst[i +  0], C0);
+    _mm_storeu_si128((__m128i*)&dst[i + 16], C1);
+  }
+  for (; i < length; ++i) dst[i] = src[i] - pred[i];
+}
+
+// Special case for left-based prediction (when preds==dst-1 or preds==src-1).
+static void PredictLineLeft_SSE2(const uint8_t* src, uint8_t* dst, int length) {
+  int i;
+  const int max_pos = length & ~31;
+  assert(length >= 0);
+  for (i = 0; i < max_pos; i += 32) {
+    const __m128i A0 = _mm_loadu_si128((const __m128i*)(src + i +  0    ));
+    const __m128i B0 = _mm_loadu_si128((const __m128i*)(src + i +  0 - 1));
+    const __m128i A1 = _mm_loadu_si128((const __m128i*)(src + i + 16    ));
+    const __m128i B1 = _mm_loadu_si128((const __m128i*)(src + i + 16 - 1));
+    const __m128i C0 = _mm_sub_epi8(A0, B0);
+    const __m128i C1 = _mm_sub_epi8(A1, B1);
+    _mm_storeu_si128((__m128i*)(dst + i +  0), C0);
+    _mm_storeu_si128((__m128i*)(dst + i + 16), C1);
+  }
+  for (; i < length; ++i) dst[i] = src[i] - src[i - 1];
+}
+
+//------------------------------------------------------------------------------
+// Horizontal filter.
+
+static WEBP_INLINE void DoHorizontalFilter_SSE2(const uint8_t* in,
+                                                int width, int height,
+                                                int stride,
+                                                int row, int num_rows,
+                                                uint8_t* out) {
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+
+  if (row == 0) {
+    // Leftmost pixel is the same as input for topmost scanline.
+    out[0] = in[0];
+    PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
+    row = 1;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    // Leftmost pixel is predicted from above.
+    out[0] = in[0] - in[-stride];
+    PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
+    ++row;
+    in += stride;
+    out += stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Vertical filter.
+
+static WEBP_INLINE void DoVerticalFilter_SSE2(const uint8_t* in,
+                                              int width, int height, int stride,
+                                              int row, int num_rows,
+                                              uint8_t* out) {
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+
+  if (row == 0) {
+    // Very first top-left pixel is copied.
+    out[0] = in[0];
+    // Rest of top scan-line is left-predicted.
+    PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
+    row = 1;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    PredictLineTop_SSE2(in, in - stride, out, width);
+    ++row;
+    in += stride;
+    out += stride;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Gradient filter.
+
+static WEBP_INLINE int GradientPredictor_SSE2(uint8_t a, uint8_t b, uint8_t c) {
+  const int g = a + b - c;
+  return ((g & ~0xff) == 0) ? g : (g < 0) ? 0 : 255;  // clip to 8bit
+}
+
+static void GradientPredictDirect_SSE2(const uint8_t* const row,
+                                       const uint8_t* const top,
+                                       uint8_t* const out, int length) {
+  const int max_pos = length & ~7;
+  int i;
+  const __m128i zero = _mm_setzero_si128();
+  for (i = 0; i < max_pos; i += 8) {
+    const __m128i A0 = _mm_loadl_epi64((const __m128i*)&row[i - 1]);
+    const __m128i B0 = _mm_loadl_epi64((const __m128i*)&top[i]);
+    const __m128i C0 = _mm_loadl_epi64((const __m128i*)&top[i - 1]);
+    const __m128i D = _mm_loadl_epi64((const __m128i*)&row[i]);
+    const __m128i A1 = _mm_unpacklo_epi8(A0, zero);
+    const __m128i B1 = _mm_unpacklo_epi8(B0, zero);
+    const __m128i C1 = _mm_unpacklo_epi8(C0, zero);
+    const __m128i E = _mm_add_epi16(A1, B1);
+    const __m128i F = _mm_sub_epi16(E, C1);
+    const __m128i G = _mm_packus_epi16(F, zero);
+    const __m128i H = _mm_sub_epi8(D, G);
+    _mm_storel_epi64((__m128i*)(out + i), H);
+  }
+  for (; i < length; ++i) {
+    out[i] = row[i] - GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
+  }
+}
+
+static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
+                                              int width, int height, int stride,
+                                              int row, int num_rows,
+                                              uint8_t* out) {
+  const size_t start_offset = row * stride;
+  const int last_row = row + num_rows;
+  SANITY_CHECK(in, out);
+  in += start_offset;
+  out += start_offset;
+
+  // left prediction for top scan-line
+  if (row == 0) {
+    out[0] = in[0];
+    PredictLineLeft_SSE2(in + 1, out + 1, width - 1);
+    row = 1;
+    in += stride;
+    out += stride;
+  }
+
+  // Filter line-by-line.
+  while (row < last_row) {
+    out[0] = in[0] - in[-stride];
+    GradientPredictDirect_SSE2(in + 1, in + 1 - stride, out + 1, width - 1);
+    ++row;
+    in += stride;
+    out += stride;
+  }
+}
+
+#undef SANITY_CHECK
+
+//------------------------------------------------------------------------------
+
+static void HorizontalFilter_SSE2(const uint8_t* data, int width, int height,
+                                  int stride, uint8_t* filtered_data) {
+  DoHorizontalFilter_SSE2(data, width, height, stride, 0, height,
+                          filtered_data);
+}
+
+static void VerticalFilter_SSE2(const uint8_t* data, int width, int height,
+                                int stride, uint8_t* filtered_data) {
+  DoVerticalFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
+}
+
+static void GradientFilter_SSE2(const uint8_t* data, int width, int height,
+                                int stride, uint8_t* filtered_data) {
+  DoGradientFilter_SSE2(data, width, height, stride, 0, height, filtered_data);
+}
+
+//------------------------------------------------------------------------------
+// Inverse transforms
+
+static void HorizontalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
+                                    uint8_t* out, int width) {
+  int i;
+  __m128i last;
+  out[0] = in[0] + (prev == NULL ? 0 : prev[0]);
+  if (width <= 1) return;
+  last = _mm_set_epi32(0, 0, 0, out[0]);
+  for (i = 1; i + 8 <= width; i += 8) {
+    const __m128i A0 = _mm_loadl_epi64((const __m128i*)(in + i));
+    const __m128i A1 = _mm_add_epi8(A0, last);
+    const __m128i A2 = _mm_slli_si128(A1, 1);
+    const __m128i A3 = _mm_add_epi8(A1, A2);
+    const __m128i A4 = _mm_slli_si128(A3, 2);
+    const __m128i A5 = _mm_add_epi8(A3, A4);
+    const __m128i A6 = _mm_slli_si128(A5, 4);
+    const __m128i A7 = _mm_add_epi8(A5, A6);
+    _mm_storel_epi64((__m128i*)(out + i), A7);
+    last = _mm_srli_epi64(A7, 56);
+  }
+  for (; i < width; ++i) out[i] = in[i] + out[i - 1];
+}
+
+static void VerticalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
+                                  uint8_t* out, int width) {
+  if (prev == NULL) {
+    HorizontalUnfilter_SSE2(NULL, in, out, width);
+  } else {
+    int i;
+    const int max_pos = width & ~31;
+    assert(width >= 0);
+    for (i = 0; i < max_pos; i += 32) {
+      const __m128i A0 = _mm_loadu_si128((const __m128i*)&in[i +  0]);
+      const __m128i A1 = _mm_loadu_si128((const __m128i*)&in[i + 16]);
+      const __m128i B0 = _mm_loadu_si128((const __m128i*)&prev[i +  0]);
+      const __m128i B1 = _mm_loadu_si128((const __m128i*)&prev[i + 16]);
+      const __m128i C0 = _mm_add_epi8(A0, B0);
+      const __m128i C1 = _mm_add_epi8(A1, B1);
+      _mm_storeu_si128((__m128i*)&out[i +  0], C0);
+      _mm_storeu_si128((__m128i*)&out[i + 16], C1);
+    }
+    for (; i < width; ++i) out[i] = in[i] + prev[i];
+  }
+}
+
+static void GradientPredictInverse_SSE2(const uint8_t* const in,
+                                        const uint8_t* const top,
+                                        uint8_t* const row, int length) {
+  if (length > 0) {
+    int i;
+    const int max_pos = length & ~7;
+    const __m128i zero = _mm_setzero_si128();
+    __m128i A = _mm_set_epi32(0, 0, 0, row[-1]);   // left sample
+    for (i = 0; i < max_pos; i += 8) {
+      const __m128i tmp0 = _mm_loadl_epi64((const __m128i*)&top[i]);
+      const __m128i tmp1 = _mm_loadl_epi64((const __m128i*)&top[i - 1]);
+      const __m128i B = _mm_unpacklo_epi8(tmp0, zero);
+      const __m128i C = _mm_unpacklo_epi8(tmp1, zero);
+      const __m128i D = _mm_loadl_epi64((const __m128i*)&in[i]);  // base input
+      const __m128i E = _mm_sub_epi16(B, C);  // unclipped gradient basis B - C
+      __m128i out = zero;                     // accumulator for output
+      __m128i mask_hi = _mm_set_epi32(0, 0, 0, 0xff);
+      int k = 8;
+      while (1) {
+        const __m128i tmp3 = _mm_add_epi16(A, E);           // delta = A + B - C
+        const __m128i tmp4 = _mm_packus_epi16(tmp3, zero);  // saturate delta
+        const __m128i tmp5 = _mm_add_epi8(tmp4, D);         // add to in[]
+        A = _mm_and_si128(tmp5, mask_hi);                   // 1-complement clip
+        out = _mm_or_si128(out, A);                         // accumulate output
+        if (--k == 0) break;
+        A = _mm_slli_si128(A, 1);                        // rotate left sample
+        mask_hi = _mm_slli_si128(mask_hi, 1);            // rotate mask
+        A = _mm_unpacklo_epi8(A, zero);                  // convert 8b->16b
+      }
+      A = _mm_srli_si128(A, 7);       // prepare left sample for next iteration
+      _mm_storel_epi64((__m128i*)&row[i], out);
+    }
+    for (; i < length; ++i) {
+      row[i] = in[i] + GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
+    }
+  }
+}
+
+static void GradientUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
+                                  uint8_t* out, int width) {
+  if (prev == NULL) {
+    HorizontalUnfilter_SSE2(NULL, in, out, width);
+  } else {
+    out[0] = in[0] + prev[0];  // predict from above
+    GradientPredictInverse_SSE2(in + 1, prev + 1, out + 1, width - 1);
+  }
+}
+
+//------------------------------------------------------------------------------
+// Entry point
+
+extern void VP8FiltersInitSSE2(void);
+
+WEBP_TSAN_IGNORE_FUNCTION void VP8FiltersInitSSE2(void) {
+  WebPUnfilters[WEBP_FILTER_HORIZONTAL] = HorizontalUnfilter_SSE2;
+  WebPUnfilters[WEBP_FILTER_VERTICAL] = VerticalUnfilter_SSE2;
+  WebPUnfilters[WEBP_FILTER_GRADIENT] = GradientUnfilter_SSE2;
+
+  WebPFilters[WEBP_FILTER_HORIZONTAL] = HorizontalFilter_SSE2;
+  WebPFilters[WEBP_FILTER_VERTICAL] = VerticalFilter_SSE2;
+  WebPFilters[WEBP_FILTER_GRADIENT] = GradientFilter_SSE2;
+}
+
+#else  // !WEBP_USE_SSE2
+
+WEBP_DSP_INIT_STUB(VP8FiltersInitSSE2)
+
+#endif  // WEBP_USE_SSE2
--- a/media/libwebp/src/dsp/lossless.c
+++ b/media/libwebp/src/dsp/lossless.c
@ -0,0 +1,659 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Image transforms and color space conversion methods for lossless decoder.
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+//          Urvang Joshi (urvang@google.com)
+
+#include "src/dsp/dsp.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include "src/dec/vp8li_dec.h"
+#include "src/utils/endian_inl_utils.h"
+#include "src/dsp/lossless.h"
+#include "src/dsp/lossless_common.h"
+
+#define MAX_DIFF_COST (1e30f)
+
+//------------------------------------------------------------------------------
+// Image transforms.
+
+static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
+  return (((a0 ^ a1) & 0xfefefefeu) >> 1) + (a0 & a1);
+}
+
+static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
+  return Average2(Average2(a0, a2), a1);
+}
+
+static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
+                                     uint32_t a2, uint32_t a3) {
+  return Average2(Average2(a0, a1), Average2(a2, a3));
+}
+
+static WEBP_INLINE uint32_t Clip255(uint32_t a) {
+  if (a < 256) {
+    return a;
+  }
+  // return 0, when a is a negative integer.
+  // return 255, when a is positive.
+  return ~a >> 24;
+}
+
+static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
+  return Clip255(a + b - c);
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
+                                                   uint32_t c2) {
+  const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
+  const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
+                                         (c1 >> 16) & 0xff,
+                                         (c2 >> 16) & 0xff);
+  const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
+                                         (c1 >> 8) & 0xff,
+                                         (c2 >> 8) & 0xff);
+  const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
+  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
+}
+
+static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
+  return Clip255(a + (a - b) / 2);
+}
+
+static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
+                                                   uint32_t c2) {
+  const uint32_t ave = Average2(c0, c1);
+  const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
+  const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
+  const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
+  const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
+  return ((uint32_t)a << 24) | (r << 16) | (g << 8) | b;
+}
+
+// gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
+// inlined.
+#if defined(__arm__) && LOCAL_GCC_VERSION <= 0x409
+# define LOCAL_INLINE __attribute__ ((noinline))
+#else
+# define LOCAL_INLINE WEBP_INLINE
+#endif
+
+static LOCAL_INLINE int Sub3(int a, int b, int c) {
+  const int pb = b - c;
+  const int pa = a - c;
+  return abs(pb) - abs(pa);
+}
+
+#undef LOCAL_INLINE
+
+static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
+  const int pa_minus_pb =
+      Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
+      Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
+      Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
+      Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
+  return (pa_minus_pb <= 0) ? a : b;
+}
+
+//------------------------------------------------------------------------------
+// Predictors
+
+static uint32_t Predictor0_C(uint32_t left, const uint32_t* const top) {
+  (void)top;
+  (void)left;
+  return ARGB_BLACK;
+}
+static uint32_t Predictor1_C(uint32_t left, const uint32_t* const top) {
+  (void)top;
+  return left;
+}
+static uint32_t Predictor2_C(uint32_t left, const uint32_t* const top) {
+  (void)left;
+  return top[0];
+}
+static uint32_t Predictor3_C(uint32_t left, const uint32_t* const top) {
+  (void)left;
+  return top[1];
+}
+static uint32_t Predictor4_C(uint32_t left, const uint32_t* const top) {
+  (void)left;
+  return top[-1];
+}
+static uint32_t Predictor5_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average3(left, top[0], top[1]);
+  return pred;
+}
+static uint32_t Predictor6_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(left, top[-1]);
+  return pred;
+}
+static uint32_t Predictor7_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(left, top[0]);
+  return pred;
+}
+static uint32_t Predictor8_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(top[-1], top[0]);
+  (void)left;
+  return pred;
+}
+static uint32_t Predictor9_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average2(top[0], top[1]);
+  (void)left;
+  return pred;
+}
+static uint32_t Predictor10_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
+  return pred;
+}
+static uint32_t Predictor11_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = Select(top[0], left, top[-1]);
+  return pred;
+}
+static uint32_t Predictor12_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
+  return pred;
+}
+static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) {
+  const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
+  return pred;
+}
+
+GENERATE_PREDICTOR_ADD(Predictor0_C, PredictorAdd0_C)
+static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
+                            int num_pixels, uint32_t* out) {
+  int i;
+  uint32_t left = out[-1];
+  for (i = 0; i < num_pixels; ++i) {
+    out[i] = left = VP8LAddPixels(in[i], left);
+  }
+  (void)upper;
+}
+GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C)
+GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C)
+GENERATE_PREDICTOR_ADD(Predictor4_C, PredictorAdd4_C)
+GENERATE_PREDICTOR_ADD(Predictor5_C, PredictorAdd5_C)
+GENERATE_PREDICTOR_ADD(Predictor6_C, PredictorAdd6_C)
+GENERATE_PREDICTOR_ADD(Predictor7_C, PredictorAdd7_C)
+GENERATE_PREDICTOR_ADD(Predictor8_C, PredictorAdd8_C)
+GENERATE_PREDICTOR_ADD(Predictor9_C, PredictorAdd9_C)
+GENERATE_PREDICTOR_ADD(Predictor10_C, PredictorAdd10_C)
+GENERATE_PREDICTOR_ADD(Predictor11_C, PredictorAdd11_C)
+GENERATE_PREDICTOR_ADD(Predictor12_C, PredictorAdd12_C)
+GENERATE_PREDICTOR_ADD(Predictor13_C, PredictorAdd13_C)
+
+//------------------------------------------------------------------------------
+
+// Inverse prediction.
+static void PredictorInverseTransform_C(const VP8LTransform* const transform,
+                                        int y_start, int y_end,
+                                        const uint32_t* in, uint32_t* out) {
+  const int width = transform->xsize_;
+  if (y_start == 0) {  // First Row follows the L (mode=1) mode.
+    PredictorAdd0_C(in, NULL, 1, out);
+    PredictorAdd1_C(in + 1, NULL, width - 1, out + 1);
+    in += width;
+    out += width;
+    ++y_start;
+  }
+
+  {
+    int y = y_start;
+    const int tile_width = 1 << transform->bits_;
+    const int mask = tile_width - 1;
+    const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
+    const uint32_t* pred_mode_base =
+        transform->data_ + (y >> transform->bits_) * tiles_per_row;
+
+    while (y < y_end) {
+      const uint32_t* pred_mode_src = pred_mode_base;
+      int x = 1;
+      // First pixel follows the T (mode=2) mode.
+      PredictorAdd2_C(in, out - width, 1, out);
+      // .. the rest:
+      while (x < width) {
+        const VP8LPredictorAddSubFunc pred_func =
+            VP8LPredictorsAdd[((*pred_mode_src++) >> 8) & 0xf];
+        int x_end = (x & ~mask) + tile_width;
+        if (x_end > width) x_end = width;
+        pred_func(in + x, out + x - width, x_end - x, out + x);
+        x = x_end;
+      }
+      in += width;
+      out += width;
+      ++y;
+      if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
+        pred_mode_base += tiles_per_row;
+      }
+    }
+  }
+}
+
+// Add green to blue and red channels (i.e. perform the inverse transform of
+// 'subtract green').
+void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
+                                uint32_t* dst) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint32_t argb = src[i];
+    const uint32_t green = ((argb >> 8) & 0xff);
+    uint32_t red_blue = (argb & 0x00ff00ffu);
+    red_blue += (green << 16) | green;
+    red_blue &= 0x00ff00ffu;
+    dst[i] = (argb & 0xff00ff00u) | red_blue;
+  }
+}
+
+static WEBP_INLINE int ColorTransformDelta(int8_t color_pred,
+                                           int8_t color) {
+  return ((int)color_pred * color) >> 5;
+}
+
+static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
+                                               VP8LMultipliers* const m) {
+  m->green_to_red_  = (color_code >>  0) & 0xff;
+  m->green_to_blue_ = (color_code >>  8) & 0xff;
+  m->red_to_blue_   = (color_code >> 16) & 0xff;
+}
+
+void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
+                                 const uint32_t* src, int num_pixels,
+                                 uint32_t* dst) {
+  int i;
+  for (i = 0; i < num_pixels; ++i) {
+    const uint32_t argb = src[i];
+    const uint32_t green = argb >> 8;
+    const uint32_t red = argb >> 16;
+    int new_red = red & 0xff;
+    int new_blue = argb & 0xff;
+    new_red += ColorTransformDelta(m->green_to_red_, green);
+    new_red &= 0xff;
+    new_blue += ColorTransformDelta(m->green_to_blue_, green);
+    new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
+    new_blue &= 0xff;
+    dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
+  }
+}
+
+// Color space inverse transform.
+static void ColorSpaceInverseTransform_C(const VP8LTransform* const transform,
+                                         int y_start, int y_end,
+                                         const uint32_t* src, uint32_t* dst) {
+  const int width = transform->xsize_;
+  const int tile_width = 1 << transform->bits_;
+  const int mask = tile_width - 1;
+  const int safe_width = width & ~mask;
+  const int remaining_width = width - safe_width;
+  const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
+  int y = y_start;
+  const uint32_t* pred_row =
+      transform->data_ + (y >> transform->bits_) * tiles_per_row;
+
+  while (y < y_end) {
+    const uint32_t* pred = pred_row;
+    VP8LMultipliers m = { 0, 0, 0 };
+    const uint32_t* const src_safe_end = src + safe_width;
+    const uint32_t* const src_end = src + width;
+    while (src < src_safe_end) {
+      ColorCodeToMultipliers(*pred++, &m);
+      VP8LTransformColorInverse(&m, src, tile_width, dst);
+      src += tile_width;
+      dst += tile_width;
+    }
+    if (src < src_end) {  // Left-overs using C-version.
+      ColorCodeToMultipliers(*pred++, &m);
+      VP8LTransformColorInverse(&m, src, remaining_width, dst);
+      src += remaining_width;
+      dst += remaining_width;
+    }
+    ++y;
+    if ((y & mask) == 0) pred_row += tiles_per_row;
+  }
+}
+
+// Separate out pixels packed together using pixel-bundling.
+// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
+#define COLOR_INDEX_INVERSE(FUNC_NAME, F_NAME, STATIC_DECL, TYPE, BIT_SUFFIX,  \
+                            GET_INDEX, GET_VALUE)                              \
+static void F_NAME(const TYPE* src, const uint32_t* const color_map,           \
+                   TYPE* dst, int y_start, int y_end, int width) {             \
+  int y;                                                                       \
+  for (y = y_start; y < y_end; ++y) {                                          \
+    int x;                                                                     \
+    for (x = 0; x < width; ++x) {                                              \
+      *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
+    }                                                                          \
+  }                                                                            \
+}                                                                              \
+STATIC_DECL void FUNC_NAME(const VP8LTransform* const transform,               \
+                           int y_start, int y_end, const TYPE* src,            \
+                           TYPE* dst) {                                        \
+  int y;                                                                       \
+  const int bits_per_pixel = 8 >> transform->bits_;                            \
+  const int width = transform->xsize_;                                         \
+  const uint32_t* const color_map = transform->data_;                          \
+  if (bits_per_pixel < 8) {                                                    \
+    const int pixels_per_byte = 1 << transform->bits_;                         \
+    const int count_mask = pixels_per_byte - 1;                                \
+    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;                       \
+    for (y = y_start; y < y_end; ++y) {                                        \
+      uint32_t packed_pixels = 0;                                              \
+      int x;                                                                   \
+      for (x = 0; x < width; ++x) {                                            \
+        /* We need to load fresh 'packed_pixels' once every                */  \
+        /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */  \
+        /* is a power of 2, so can just use a mask for that, instead of    */  \
+        /* decrementing a counter.                                         */  \
+        if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++);          \
+        *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]);               \
+        packed_pixels >>= bits_per_pixel;                                      \
+      }                                                                        \
+    }                                                                          \
+  } else {                                                                     \
+    VP8LMapColor##BIT_SUFFIX(src, color_map, dst, y_start, y_end, width);      \
+  }                                                                            \
+}
+
+COLOR_INDEX_INVERSE(ColorIndexInverseTransform_C, MapARGB_C, static,
+                    uint32_t, 32b, VP8GetARGBIndex, VP8GetARGBValue)
+COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, MapAlpha_C, ,
+                    uint8_t, 8b, VP8GetAlphaIndex, VP8GetAlphaValue)
+
+#undef COLOR_INDEX_INVERSE
+
+void VP8LInverseTransform(const VP8LTransform* const transform,
+                          int row_start, int row_end,
+                          const uint32_t* const in, uint32_t* const out) {
+  const int width = transform->xsize_;
+  assert(row_start < row_end);
+  assert(row_end <= transform->ysize_);
+  switch (transform->type_) {
+    case SUBTRACT_GREEN:
+      VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out);
+      break;
+    case PREDICTOR_TRANSFORM:
+      PredictorInverseTransform_C(transform, row_start, row_end, in, out);
+      if (row_end != transform->ysize_) {
+        // The last predicted row in this iteration will be the top-pred row
+        // for the first row in next iteration.
+        memcpy(out - width, out + (row_end - row_start - 1) * width,
+               width * sizeof(*out));
+      }
+      break;
+    case CROSS_COLOR_TRANSFORM:
+      ColorSpaceInverseTransform_C(transform, row_start, row_end, in, out);
+      break;
+    case COLOR_INDEXING_TRANSFORM:
+      if (in == out && transform->bits_ > 0) {
+        // Move packed pixels to the end of unpacked region, so that unpacking
+        // can occur seamlessly.
+        // Also, note that this is the only transform that applies on
+        // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
+        // transforms work on effective width of xsize_.
+        const int out_stride = (row_end - row_start) * width;
+        const int in_stride = (row_end - row_start) *
+            VP8LSubSampleSize(transform->xsize_, transform->bits_);
+        uint32_t* const src = out + out_stride - in_stride;
+        memmove(src, out, in_stride * sizeof(*src));
+        ColorIndexInverseTransform_C(transform, row_start, row_end, src, out);
+      } else {
+        ColorIndexInverseTransform_C(transform, row_start, row_end, in, out);
+      }
+      break;
+  }
+}
+
+//------------------------------------------------------------------------------
+// Color space conversion.
+
+static int is_big_endian(void) {
+  static const union {
+    uint16_t w;
+    uint8_t b[2];
+  } tmp = { 1 };
+  return (tmp.b[0] != 1);
+}
+
+void VP8LConvertBGRAToRGB_C(const uint32_t* src,
+                            int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    *dst++ = (argb >> 16) & 0xff;
+    *dst++ = (argb >>  8) & 0xff;
+    *dst++ = (argb >>  0) & 0xff;
+  }
+}
+
+void VP8LConvertBGRAToRGBA_C(const uint32_t* src,
+                             int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    *dst++ = (argb >> 16) & 0xff;
+    *dst++ = (argb >>  8) & 0xff;
+    *dst++ = (argb >>  0) & 0xff;
+    *dst++ = (argb >> 24) & 0xff;
+  }
+}
+
+void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
+                                 int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
+    const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
+#if (WEBP_SWAP_16BIT_CSP == 1)
+    *dst++ = ba;
+    *dst++ = rg;
+#else
+    *dst++ = rg;
+    *dst++ = ba;
+#endif
+  }
+}
+
+void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
+                               int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
+    const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
+#if (WEBP_SWAP_16BIT_CSP == 1)
+    *dst++ = gb;
+    *dst++ = rg;
+#else
+    *dst++ = rg;
+    *dst++ = gb;
+#endif
+  }
+}
+
+void VP8LConvertBGRAToBGR_C(const uint32_t* src,
+                            int num_pixels, uint8_t* dst) {
+  const uint32_t* const src_end = src + num_pixels;
+  while (src < src_end) {
+    const uint32_t argb = *src++;
+    *dst++ = (argb >>  0) & 0xff;
+    *dst++ = (argb >>  8) & 0xff;
+    *dst++ = (argb >> 16) & 0xff;
+  }
+}
+
+static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
+                       int swap_on_big_endian) {
+  if (is_big_endian() == swap_on_big_endian) {
+    const uint32_t* const src_end = src + num_pixels;
+    while (src < src_end) {
+      const uint32_t argb = *src++;
+      WebPUint32ToMem(dst, BSwap32(argb));
+      dst += sizeof(argb);
+    }
+  } else {
+    memcpy(dst, src, num_pixels * sizeof(*src));
+  }
+}
+
+void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
+                         WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
+  switch (out_colorspace) {
+    case MODE_RGB:
+      VP8LConvertBGRAToRGB(in_data, num_pixels, rgba);
+      break;
+    case MODE_RGBA:
+      VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
+      break;
+    case MODE_rgbA:
+      VP8LConvertBGRAToRGBA(in_data, num_pixels, rgba);
+      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
+      break;
+    case MODE_BGR:
+      VP8LConvertBGRAToBGR(in_data, num_pixels, rgba);
+      break;
+    case MODE_BGRA:
+      CopyOrSwap(in_data, num_pixels, rgba, 1);
+      break;
+    case MODE_bgrA:
+      CopyOrSwap(in_data, num_pixels, rgba, 1);
+      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
+      break;
+    case MODE_ARGB:
+      CopyOrSwap(in_data, num_pixels, rgba, 0);
+      break;
+    case MODE_Argb:
+      CopyOrSwap(in_data, num_pixels, rgba, 0);
+      WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
+      break;
+    case MODE_RGBA_4444:
+      VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+      break;
+    case MODE_rgbA_4444:
+      VP8LConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
+      WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
+      break;
+    case MODE_RGB_565:
+      VP8LConvertBGRAToRGB565(in_data, num_pixels, rgba);
+      break;
+    default:
+      assert(0);          // Code flow should not reach here.
+  }
+}
+
+//------------------------------------------------------------------------------
+
+VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
+VP8LPredictorFunc VP8LPredictors[16];
+
+// exposed plain-C implementations
+VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
+VP8LPredictorFunc VP8LPredictors_C[16];
+
+VP8LTransformColorInverseFunc VP8LTransformColorInverse;
+
+VP8LConvertFunc VP8LConvertBGRAToRGB;
+VP8LConvertFunc VP8LConvertBGRAToRGBA;
+VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
+VP8LConvertFunc VP8LConvertBGRAToRGB565;
+VP8LConvertFunc VP8LConvertBGRAToBGR;
+
+VP8LMapARGBFunc VP8LMapColor32b;
+VP8LMapAlphaFunc VP8LMapColor8b;
+
+extern void VP8LDspInitSSE2(void);
+extern void VP8LDspInitNEON(void);
+extern void VP8LDspInitMIPSdspR2(void);
+extern void VP8LDspInitMSA(void);
+
+#define COPY_PREDICTOR_ARRAY(IN, OUT) do {                \
+  (OUT)[0] = IN##0_C;                                     \
+  (OUT)[1] = IN##1_C;                                     \
+  (OUT)[2] = IN##2_C;                                     \
+  (OUT)[3] = IN##3_C;                                     \
+  (OUT)[4] = IN##4_C;                                     \
+  (OUT)[5] = IN##5_C;                                     \
+  (OUT)[6] = IN##6_C;                                     \
+  (OUT)[7] = IN##7_C;                                     \
+  (OUT)[8] = IN##8_C;                                     \
+  (OUT)[9] = IN##9_C;                                     \
+  (OUT)[10] = IN##10_C;                                   \
+  (OUT)[11] = IN##11_C;                                   \
+  (OUT)[12] = IN##12_C;                                   \
+  (OUT)[13] = IN##13_C;                                   \
+  (OUT)[14] = IN##0_C; /* <- padding security sentinels*/ \
+  (OUT)[15] = IN##0_C;                                    \
+} while (0);
+
+WEBP_DSP_INIT_FUNC(VP8LDspInit) {
+  COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors)
+  COPY_PREDICTOR_ARRAY(Predictor, VP8LPredictors_C)
+  COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd)
+  COPY_PREDICTOR_ARRAY(PredictorAdd, VP8LPredictorsAdd_C)
+
+#if !WEBP_NEON_OMIT_C_CODE
+  VP8LAddGreenToBlueAndRed = VP8LAddGreenToBlueAndRed_C;
+
+  VP8LTransformColorInverse = VP8LTransformColorInverse_C;
+
+  VP8LConvertBGRAToRGBA = VP8LConvertBGRAToRGBA_C;
+  VP8LConvertBGRAToRGB = VP8LConvertBGRAToRGB_C;
+  VP8LConvertBGRAToBGR = VP8LConvertBGRAToBGR_C;
+#endif
+
+  VP8LConvertBGRAToRGBA4444 = VP8LConvertBGRAToRGBA4444_C;
+  VP8LConvertBGRAToRGB565 = VP8LConvertBGRAToRGB565_C;
+
+  VP8LMapColor32b = MapARGB_C;
+  VP8LMapColor8b = MapAlpha_C;
+
+  // If defined, use CPUInfo() to overwrite some pointers with faster versions.
+  if (VP8GetCPUInfo != NULL) {
+#if defined(WEBP_USE_SSE2)
+    if (VP8GetCPUInfo(kSSE2)) {
+      VP8LDspInitSSE2();
+    }
+#endif
+#if defined(WEBP_USE_MIPS_DSP_R2)
+    if (VP8GetCPUInfo(kMIPSdspR2)) {
+      VP8LDspInitMIPSdspR2();
+    }
+#endif
+#if defined(WEBP_USE_MSA)
+    if (VP8GetCPUInfo(kMSA)) {
+      VP8LDspInitMSA();
+    }
+#endif
+  }
+
+#if defined(WEBP_USE_NEON)
+  if (WEBP_NEON_OMIT_C_CODE ||
+      (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
+    VP8LDspInitNEON();
+  }
+#endif
+
+  assert(VP8LAddGreenToBlueAndRed != NULL);
+  assert(VP8LTransformColorInverse != NULL);
+  assert(VP8LConvertBGRAToRGBA != NULL);
+  assert(VP8LConvertBGRAToRGB != NULL);
+  assert(VP8LConvertBGRAToBGR != NULL);
+  assert(VP8LConvertBGRAToRGBA4444 != NULL);
+  assert(VP8LConvertBGRAToRGB565 != NULL);
+  assert(VP8LMapColor32b != NULL);
+  assert(VP8LMapColor8b != NULL);
+}
+#undef COPY_PREDICTOR_ARRAY
+
+//------------------------------------------------------------------------------
--- a/media/libwebp/src/dsp/lossless.h
+++ b/media/libwebp/src/dsp/lossless.h
@ -0,0 +1,225 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Image transforms and color space conversion methods for lossless decoder.
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+
+#ifndef WEBP_DSP_LOSSLESS_H_
+#define WEBP_DSP_LOSSLESS_H_
+
+#include "src/webp/types.h"
+#include "src/webp/decode.h"
+
+#include "src/enc/histogram_enc.h"
+#include "src/utils/utils.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Decoding
+
+typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top);
+extern VP8LPredictorFunc VP8LPredictors[16];
+extern VP8LPredictorFunc VP8LPredictors_C[16];
+// These Add/Sub function expects upper[-1] and out[-1] to be readable.
+typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,
+                                        const uint32_t* upper, int num_pixels,
+                                        uint32_t* out);
+extern VP8LPredictorAddSubFunc VP8LPredictorsAdd[16];
+extern VP8LPredictorAddSubFunc VP8LPredictorsAdd_C[16];
+
+typedef void (*VP8LProcessDecBlueAndRedFunc)(const uint32_t* src,
+                                             int num_pixels, uint32_t* dst);
+extern VP8LProcessDecBlueAndRedFunc VP8LAddGreenToBlueAndRed;
+
+typedef struct {
+  // Note: the members are uint8_t, so that any negative values are
+  // automatically converted to "mod 256" values.
+  uint8_t green_to_red_;
+  uint8_t green_to_blue_;
+  uint8_t red_to_blue_;
+} VP8LMultipliers;
+typedef void (*VP8LTransformColorInverseFunc)(const VP8LMultipliers* const m,
+                                              const uint32_t* src,
+                                              int num_pixels, uint32_t* dst);
+extern VP8LTransformColorInverseFunc VP8LTransformColorInverse;
+
+struct VP8LTransform;  // Defined in dec/vp8li.h.
+
+// Performs inverse transform of data given transform information, start and end
+// rows. Transform will be applied to rows [row_start, row_end[.
+// The *in and *out pointers refer to source and destination data respectively
+// corresponding to the intermediate row (row_start).
+void VP8LInverseTransform(const struct VP8LTransform* const transform,
+                          int row_start, int row_end,
+                          const uint32_t* const in, uint32_t* const out);
+
+// Color space conversion.
+typedef void (*VP8LConvertFunc)(const uint32_t* src, int num_pixels,
+                                uint8_t* dst);
+extern VP8LConvertFunc VP8LConvertBGRAToRGB;
+extern VP8LConvertFunc VP8LConvertBGRAToRGBA;
+extern VP8LConvertFunc VP8LConvertBGRAToRGBA4444;
+extern VP8LConvertFunc VP8LConvertBGRAToRGB565;
+extern VP8LConvertFunc VP8LConvertBGRAToBGR;
+
+// Converts from BGRA to other color spaces.
+void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
+                         WEBP_CSP_MODE out_colorspace, uint8_t* const rgba);
+
+typedef void (*VP8LMapARGBFunc)(const uint32_t* src,
+                                const uint32_t* const color_map,
+                                uint32_t* dst, int y_start,
+                                int y_end, int width);
+typedef void (*VP8LMapAlphaFunc)(const uint8_t* src,
+                                 const uint32_t* const color_map,
+                                 uint8_t* dst, int y_start,
+                                 int y_end, int width);
+
+extern VP8LMapARGBFunc VP8LMapColor32b;
+extern VP8LMapAlphaFunc VP8LMapColor8b;
+
+// Similar to the static method ColorIndexInverseTransform() that is part of
+// lossless.c, but used only for alpha decoding. It takes uint8_t (rather than
+// uint32_t) arguments for 'src' and 'dst'.
+void VP8LColorIndexInverseTransformAlpha(
+    const struct VP8LTransform* const transform, int y_start, int y_end,
+    const uint8_t* src, uint8_t* dst);
+
+// Expose some C-only fallback functions
+void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
+                                 const uint32_t* src, int num_pixels,
+                                 uint32_t* dst);
+
+void VP8LConvertBGRAToRGB_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToRGBA_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToRGBA4444_C(const uint32_t* src,
+                                 int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToRGB565_C(const uint32_t* src,
+                               int num_pixels, uint8_t* dst);
+void VP8LConvertBGRAToBGR_C(const uint32_t* src, int num_pixels, uint8_t* dst);
+void VP8LAddGreenToBlueAndRed_C(const uint32_t* src, int num_pixels,
+                                uint32_t* dst);
+
+// Must be called before calling any of the above methods.
+void VP8LDspInit(void);
+
+//------------------------------------------------------------------------------
+// Encoding
+
+typedef void (*VP8LProcessEncBlueAndRedFunc)(uint32_t* dst, int num_pixels);
+extern VP8LProcessEncBlueAndRedFunc VP8LSubtractGreenFromBlueAndRed;
+typedef void (*VP8LTransformColorFunc)(const VP8LMultipliers* const m,
+                                       uint32_t* dst, int num_pixels);
+extern VP8LTransformColorFunc VP8LTransformColor;
+typedef void (*VP8LCollectColorBlueTransformsFunc)(
+    const uint32_t* argb, int stride,
+    int tile_width, int tile_height,
+    int green_to_blue, int red_to_blue, int histo[]);
+extern VP8LCollectColorBlueTransformsFunc VP8LCollectColorBlueTransforms;
+
+typedef void (*VP8LCollectColorRedTransformsFunc)(
+    const uint32_t* argb, int stride,
+    int tile_width, int tile_height,
+    int green_to_red, int histo[]);
+extern VP8LCollectColorRedTransformsFunc VP8LCollectColorRedTransforms;
+
+// Expose some C-only fallback functions
+void VP8LTransformColor_C(const VP8LMultipliers* const m,
+                          uint32_t* data, int num_pixels);
+void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels);
+void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
+                                     int tile_width, int tile_height,
+                                     int green_to_red, int histo[]);
+void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
+                                      int tile_width, int tile_height,
+                                      int green_to_blue, int red_to_blue,
+                                      int histo[]);
+
+extern VP8LPredictorAddSubFunc VP8LPredictorsSub[16];
+extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
+
+// -----------------------------------------------------------------------------
+// Huffman-cost related functions.
+
+typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
+typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
+                                       int length);
+typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
+                                                const int Y[256]);
+
+extern VP8LCostFunc VP8LExtraCost;
+extern VP8LCostCombinedFunc VP8LExtraCostCombined;
+extern VP8LCombinedShannonEntropyFunc VP8LCombinedShannonEntropy;
+
+typedef struct {        // small struct to hold counters
+  int counts[2];        // index: 0=zero steak, 1=non-zero streak
+  int streaks[2][2];    // [zero/non-zero][streak<3 / streak>=3]
+} VP8LStreaks;
+
+typedef struct {            // small struct to hold bit entropy results
+  double entropy;           // entropy
+  uint32_t sum;             // sum of the population
+  int nonzeros;             // number of non-zero elements in the population
+  uint32_t max_val;         // maximum value in the population
+  uint32_t nonzero_code;    // index of the last non-zero in the population
+} VP8LBitEntropy;
+
+void VP8LBitEntropyInit(VP8LBitEntropy* const entropy);
+
+// Get the combined symbol bit entropy and Huffman cost stats for the
+// distributions 'X' and 'Y'. Those results can then be refined according to
+// codec specific heuristics.
+typedef void (*VP8LGetCombinedEntropyUnrefinedFunc)(
+    const uint32_t X[], const uint32_t Y[], int length,
+    VP8LBitEntropy* const bit_entropy, VP8LStreaks* const stats);
+extern VP8LGetCombinedEntropyUnrefinedFunc VP8LGetCombinedEntropyUnrefined;
+
+// Get the entropy for the distribution 'X'.
+typedef void (*VP8LGetEntropyUnrefinedFunc)(const uint32_t X[], int length,
+                                            VP8LBitEntropy* const bit_entropy,
+                                            VP8LStreaks* const stats);
+extern VP8LGetEntropyUnrefinedFunc VP8LGetEntropyUnrefined;
+
+void VP8LBitsEntropyUnrefined(const uint32_t* const array, int n,
+                              VP8LBitEntropy* const entropy);
+
+typedef void (*VP8LHistogramAddFunc)(const VP8LHistogram* const a,
+                                     const VP8LHistogram* const b,
+                                     VP8LHistogram* const out);
+extern VP8LHistogramAddFunc VP8LHistogramAdd;
+
+// -----------------------------------------------------------------------------
+// PrefixEncode()
+
+typedef int (*VP8LVectorMismatchFunc)(const uint32_t* const array1,
+                                      const uint32_t* const array2, int length);
+// Returns the first index where array1 and array2 are different.
+extern VP8LVectorMismatchFunc VP8LVectorMismatch;
+
+typedef void (*VP8LBundleColorMapFunc)(const uint8_t* const row, int width,
+                                       int xbits, uint32_t* dst);
+extern VP8LBundleColorMapFunc VP8LBundleColorMap;
+void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
+                          uint32_t* dst);
+
+// Must be called before calling any of the above methods.
+void VP8LEncDspInit(void);
+
+//------------------------------------------------------------------------------
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  // WEBP_DSP_LOSSLESS_H_
--- a/media/libwebp/src/dsp/lossless_common.h
+++ b/media/libwebp/src/dsp/lossless_common.h
@ -0,0 +1,202 @@
+// Copyright 2012 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+//
+// Image transforms and color space conversion methods for lossless decoder.
+//
+// Authors: Vikas Arora (vikaas.arora@gmail.com)
+//          Jyrki Alakuijala (jyrki@google.com)
+//          Vincent Rabaud (vrabaud@google.com)
+
+#ifndef WEBP_DSP_LOSSLESS_COMMON_H_
+#define WEBP_DSP_LOSSLESS_COMMON_H_
+
+#include "src/webp/types.h"
+
+#include "src/utils/utils.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//------------------------------------------------------------------------------
+// Decoding
+
+// color mapping related functions.
+static WEBP_INLINE uint32_t VP8GetARGBIndex(uint32_t idx) {
+  return (idx >> 8) & 0xff;
+}
+
+static WEBP_INLINE uint8_t VP8GetAlphaIndex(uint8_t idx) {
+  return idx;
+}
+
+static WEBP_INLINE uint32_t VP8GetARGBValue(uint32_t val) {
+  return val;
+}
+
+static WEBP_INLINE uint8_t VP8GetAlphaValue(uint32_t val) {
+  return (val >> 8) & 0xff;
+}
+
+//------------------------------------------------------------------------------
+// Misc methods.
+
+// Computes sampled size of 'size' when sampling using 'sampling bits'.
+static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
+                                              uint32_t sampling_bits) {
+  return (size + (1 << sampling_bits) - 1) >> sampling_bits;
+}
+
+// Converts near lossless quality into max number of bits shaved off.
+static WEBP_INLINE int VP8LNearLosslessBits(int near_lossless_quality) {
+  //    100 -> 0
+  // 80..99 -> 1
+  // 60..79 -> 2
+  // 40..59 -> 3
+  // 20..39 -> 4
+  //  0..19 -> 5
+  return 5 - near_lossless_quality / 20;
+}
+
+// -----------------------------------------------------------------------------
+// Faster logarithm for integers. Small values use a look-up table.
+
+// The threshold till approximate version of log_2 can be used.
+// Practically, we can get rid of the call to log() as the two values match to
+// very high degree (the ratio of these two is 0.99999x).
+// Keeping a high threshold for now.
+#define APPROX_LOG_WITH_CORRECTION_MAX  65536
+#define APPROX_LOG_MAX                   4096
+#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
+#define LOG_LOOKUP_IDX_MAX 256
+extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
+extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
+typedef float (*VP8LFastLog2SlowFunc)(uint32_t v);
+
+extern VP8LFastLog2SlowFunc VP8LFastLog2Slow;
+extern VP8LFastLog2SlowFunc VP8LFastSLog2Slow;
+
+static WEBP_INLINE float VP8LFastLog2(uint32_t v) {
+  return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
+}
+// Fast calculation of v * log2(v) for integer input.
+static WEBP_INLINE float VP8LFastSLog2(uint32_t v) {
+  return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
+}
+
+// -----------------------------------------------------------------------------
+// PrefixEncode()
+
+// Splitting of distance and length codes into prefixes and
+// extra bits. The prefixes are encoded with an entropy code
+// while the extra bits are stored just as normal bits.
+static WEBP_INLINE void VP8LPrefixEncodeBitsNoLUT(int distance, int* const code,
+                                                  int* const extra_bits) {
+  const int highest_bit = BitsLog2Floor(--distance);
+  const int second_highest_bit = (distance >> (highest_bit - 1)) & 1;
+  *extra_bits = highest_bit - 1;
+  *code = 2 * highest_bit + second_highest_bit;
+}
+
+static WEBP_INLINE void VP8LPrefixEncodeNoLUT(int distance, int* const code,
+                                              int* const extra_bits,
+                                              int* const extra_bits_value) {
+  const int highest_bit = BitsLog2Floor(--distance);
+  const int second_highest_bit = (distance >> (highest_bit - 1)) & 1;
+  *extra_bits = highest_bit - 1;
+  *extra_bits_value = distance & ((1 << *extra_bits) - 1);
+  *code = 2 * highest_bit + second_highest_bit;
+}
+
+#define PREFIX_LOOKUP_IDX_MAX   512
+typedef struct {
+  int8_t code_;
+  int8_t extra_bits_;
+} VP8LPrefixCode;
+
+// These tables are derived using VP8LPrefixEncodeNoLUT.
+extern const VP8LPrefixCode kPrefixEncodeCode[PREFIX_LOOKUP_IDX_MAX];
+extern const uint8_t kPrefixEncodeExtraBitsValue[PREFIX_LOOKUP_IDX_MAX];
+static WEBP_INLINE void VP8LPrefixEncodeBits(int distance, int* const code,
+                                             int* const extra_bits) {
+  if (distance < PREFIX_LOOKUP_IDX_MAX) {
+    const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance];
+    *code = prefix_code.code_;
+    *extra_bits = prefix_code.extra_bits_;
+  } else {
+    VP8LPrefixEncodeBitsNoLUT(distance, code, extra_bits);
+  }
+}
+
+static WEBP_INLINE void VP8LPrefixEncode(int distance, int* const code,
+                                         int* const extra_bits,
+                                         int* const extra_bits_value) {
+  if (distance < PREFIX_LOOKUP_IDX_MAX) {
+    const VP8LPrefixCode prefix_code = kPrefixEncodeCode[distance];
+    *code = prefix_code.code_;
+    *extra_bits = prefix_code.extra_bits_;
+    *extra_bits_value = kPrefixEncodeExtraBitsValue[distance];
+  } else {
+    VP8LPrefixEncodeNoLUT(distance, code, extra_bits, extra_bits_value);
+  }
+}
+
+// Sum of each component, mod 256.
+static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE
+uint32_t VP8LAddPixels(uint32_t a, uint32_t b) {
+  const uint32_t alpha_and_green = (a & 0xff00ff00u) + (b & 0xff00ff00u);
+  const uint32_t red_and_blue = (a & 0x00ff00ffu) + (b & 0x00ff00ffu);
+  return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
+}
+
+// Difference of each component, mod 256.
+static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE
+uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
+  const uint32_t alpha_and_green =
+      0x00ff00ffu + (a & 0xff00ff00u) - (b & 0xff00ff00u);
+  const uint32_t red_and_blue =
+      0xff00ff00u + (a & 0x00ff00ffu) - (b & 0x00ff00ffu);
+  return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
+}
+
+//------------------------------------------------------------------------------
+// Transform-related functions use din both encoding and decoding.
+
+// Macros used to create a batch predictor that iteratively uses a
+// one-pixel predictor.
+
+// The predictor is added to the output pixel (which
+// is therefore considered as a residual) to get the final prediction.
+#define GENERATE_PREDICTOR_ADD(PREDICTOR, PREDICTOR_ADD)             \
+static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
+                          int num_pixels, uint32_t* out) {           \
+  int x;                                                             \
+  for (x = 0; x < num_pixels; ++x) {                                 \
+    const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x);        \
+    out[x] = VP8LAddPixels(in[x], pred);                             \
+  }                                                                  \
+}
+
+// It subtracts the prediction from the input pixel and stores the residual
+// in the output pixel.
+#define GENERATE_PREDICTOR_SUB(PREDICTOR, PREDICTOR_SUB)             \
+static void PREDICTOR_SUB(const uint32_t* in, const uint32_t* upper, \
+                          int num_pixels, uint32_t* out) {           \
+  int x;                                                             \
+  for (x = 0; x < num_pixels; ++x) {                                 \
+    const uint32_t pred = (PREDICTOR)(in[x - 1], upper + x);         \
+    out[x] = VP8LSubPixels(in[x], pred);                             \
+  }                                                                  \
+}
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
+
+#endif  // WEBP_DSP_LOSSLESS_COMMON_H_
--- a/Показать больше
+++ b/Показать больше