Bug 698519 - Update libjpeg-turbo to version 1.2.0. r=jlebar

2012-02-11 03:34:00 -05:00 · 2012-02-11 03:34:00 -05:00 · 4d8f009049
--- a/config/autoconf.mk.in
+++ b/config/autoconf.mk.in
@ -181,6 +181,7 @@ LIBJPEG_TURBO_AS = @LIBJPEG_TURBO_AS@
 LIBJPEG_TURBO_ASFLAGS = @LIBJPEG_TURBO_ASFLAGS@
 LIBJPEG_TURBO_X86_ASM = @LIBJPEG_TURBO_X86_ASM@
 LIBJPEG_TURBO_X64_ASM = @LIBJPEG_TURBO_X64_ASM@
+LIBJPEG_TURBO_ARM_ASM = @LIBJPEG_TURBO_ARM_ASM@
 NS_PRINTING = @NS_PRINTING@
 MOZ_PDF_PRINTING = @MOZ_PDF_PRINTING@
 MOZ_CRASHREPORTER = @MOZ_CRASHREPORTER@
--- a/configure.in
+++ b/configure.in
@ -4605,6 +4605,7 @@ LIBJPEG_TURBO_AS=
 LIBJPEG_TURBO_ASFLAGS=
 LIBJPEG_TURBO_X86_ASM=
 LIBJPEG_TURBO_X64_ASM=
+LIBJPEG_TURBO_ARM_ASM=
 MOZ_PANGO=1
 MOZ_PERMISSIONS=1
 MOZ_PLACES=1
@ -6163,38 +6164,51 @@ if test -n "$MOZ_LIBJPEG_TURBO"; then
    LIBJPEG_TURBO_ASFLAGS="-f win64 -rnasm -pnasm -D__x86_64__ -DPIC -DWIN64 -DMSVC"
    LIBJPEG_TURBO_X64_ASM=1
  ;;
+  *:arm*)
+    LIBJPEG_TURBO_ASFLAGS="-march=armv7-a -mfpu=neon"
+    LIBJPEG_TURBO_ARM_ASM=1
+  ;;
  esac

 fi

-dnl If we're on a system which supports libjpeg-turbo's asm routines and
-dnl --disable-libjpeg-turbo wasn't passed, check for yasm, and error out if it
-dnl doesn't exist or we have too old of a version.
+dnl If we're on an x86 or x64 system which supports libjpeg-turbo's asm routines
+dnl and --disable-libjpeg-turbo wasn't passed, check for Yasm, and error out if
+dnl it doesn't exist or we have too old of a version.
 if test -n "$LIBJPEG_TURBO_X86_ASM" -o -n "$LIBJPEG_TURBO_X64_ASM" ; then
-    AC_MSG_CHECKING([for YASM assembler])
+    AC_MSG_CHECKING([for Yasm assembler])
    AC_CHECK_PROGS(LIBJPEG_TURBO_AS, yasm, "")

    if test -z "$LIBJPEG_TURBO_AS" ; then
-        AC_MSG_ERROR([yasm is required to build with libjpeg-turbo's optimized JPEG decoding routines, but you do not appear to have yasm installed.  Either install it or configure with --disable-libjpeg-turbo to use the pure C JPEG decoder.  See https://developer.mozilla.org/en/YASM for more details.])
+        AC_MSG_ERROR([Yasm is required to build with libjpeg-turbo's optimized JPEG decoding routines, but you do not appear to have Yasm installed.  Either install it or configure with --disable-libjpeg-turbo to use the pure C JPEG decoder.  See https://developer.mozilla.org/en/YASM for more details.])
    fi

    dnl Check that we have the right yasm version.  We require 1.0.1 or newer
    dnl on Linux and 1.1 or newer everywhere else.
    if test "$OS_ARCH" = "Linux" ; then
        if test "$_YASM_MAJOR_VERSION" -lt "1" -o \( "$_YASM_MAJOR_VERSION" -eq "1" -a "$_YASM_MINOR_VERSION" -eq "0" -a "$_YASM_RELEASE" -lt "1" \) ; then
-            AC_MSG_ERROR([yasm 1.0.1 or greater is required to build with libjpeg-turbo's optimized JPEG decoding routines, but you appear to have version $_YASM_MAJOR_VERSION.$_YASM_MINOR_VERSION.$_YASM_RELEASE.  Upgrade to the newest version or configure with --disable-libjpeg-turbo to use the pure C JPEG decoder.  See https://developer.mozilla.org/en/YASM for more details.])
+            AC_MSG_ERROR([Yasm 1.0.1 or greater is required to build with libjpeg-turbo's optimized JPEG decoding routines, but you appear to have version $_YASM_MAJOR_VERSION.$_YASM_MINOR_VERSION.$_YASM_RELEASE.  Upgrade to the newest version or configure with --disable-libjpeg-turbo to use the pure C JPEG decoder.  See https://developer.mozilla.org/en/YASM for more details.])
        fi
    else
        if test "$_YASM_MAJOR_VERSION" -lt "1" -o \( "$_YASM_MAJOR_VERSION" -eq "1" -a "$_YASM_MINOR_VERSION" -lt "1" \) ; then
-            AC_MSG_ERROR([yasm 1.1 or greater is required to build with libjpeg-turbo's optimized JPEG decoding routines, but you appear to have version $_YASM_MAJOR_VERSION.$_YASM_MINOR_VERSION.  Upgrade to the newest version or configure with --disable-libjpeg-turbo to use the pure C JPEG decoder.  See https://developer.mozilla.org/en/YASM for more details.])
+            AC_MSG_ERROR([Yasm 1.1 or greater is required to build with libjpeg-turbo's optimized JPEG decoding routines, but you appear to have version $_YASM_MAJOR_VERSION.$_YASM_MINOR_VERSION.  Upgrade to the newest version or configure with --disable-libjpeg-turbo to use the pure C JPEG decoder.  See https://developer.mozilla.org/en/YASM for more details.])
        fi
    fi
 fi

+dnl If we're on an ARM system which supports libjpeg-turbo's asm routines and
+dnl --disable-libjpeg-turbo wasn't passed, use the C compiler as the assembler.
+if test -n "$LIBJPEG_TURBO_ARM_ASM" ; then
+    echo "Using $AS as the assembler for ARM code."
+    LIBJPEG_TURBO_AS=$AS
+fi
+
 if test -n "$LIBJPEG_TURBO_X86_ASM"; then
    AC_DEFINE(LIBJPEG_TURBO_X86_ASM)
 elif test -n "$LIBJPEG_TURBO_X64_ASM"; then
    AC_DEFINE(LIBJPEG_TURBO_X64_ASM)
+elif test -n "$LIBJPEG_TURBO_ARM_ASM"; then
+    AC_DEFINE(LIBJPEG_TURBO_ARM_ASM)
 elif test -n "$MOZ_LIBJPEG_TURBO"; then
    dnl Warn if we're not building the optimized routines, even though the user
    dnl didn't specify --disable-libjpeg-turbo.
@ -8736,6 +8750,7 @@ AC_SUBST(LIBJPEG_TURBO_AS)
 AC_SUBST(LIBJPEG_TURBO_ASFLAGS)
 AC_SUBST(LIBJPEG_TURBO_X86_ASM)
 AC_SUBST(LIBJPEG_TURBO_X64_ASM)
+AC_SUBST(LIBJPEG_TURBO_ARM_ASM)

 AC_MSG_CHECKING([for posix_fallocate])
 AC_TRY_LINK([#define _XOPEN_SOURCE 600
--- a/media/libjpeg/MOZCHANGES
+++ b/media/libjpeg/MOZCHANGES
@ -6,9 +6,15 @@ To upgrade to a new revision of libjpeg-turbo, do the following:

 * In a clean clone of mozilla-central, run the following commands

-    $ rm -rf jpeg
-    $ svn export --ignore-externals /path/to/libjpeg-turbo jpeg
-    $ cd jpeg
+    $ rm -rf media/libjpeg
+    $ svn export --ignore-externals /path/to/libjpeg-turbo media/libjpeg
+    $ cd media/libjpeg
+
+* Copy win/jsimdcfg.inc to simd/.
+
+* Since libjpeg-turbo normally creates config.h and jconfig.h at build time and
+  we use pre-generated versions, changes to jconfig.h.in and win/config.h.in
+  should be looked for and noted for later inclusion.

 * Now look through the new files and rm any which are npotb.  When I upgraded
  to libjpeg-turbo 1.1.0, the only files I kept which didn't match
@ -32,13 +38,19 @@ To upgrade to a new revision of libjpeg-turbo, do the following:

    diff <(ls *.c | sort) <(grep -o '\w*\.c' Makefile.in | sort)

-  of course, libjpeg-turbo might have added some new source files, so you'll
+  Of course, libjpeg-turbo might have added some new source files, so you'll
  have to look though and figure out which of these files to keep.

 * Restore files modified in the Mozilla repository.

-    $ hg revert --no-backup Makefile.in jconfig.h jmorecfg.h simd/Makefile.in \
-      simd/jsimdcfg.inc jchuff.c jdhuff.c jdhuff.h MOZCHANGES
+    $ hg revert --no-backup config.h jconfig.h Makefile.in MOZCHANGES \
+      mozilla.diff simd/Makefile.in
+
+* Update config.h and jconfig.h as noted previously.
+
+* Apply Mozilla-specific changes to upstream files.
+
+    $ patch -p0 -i mozilla.diff

 * Update Makefile.in to build any new files.

@ -47,6 +59,18 @@ To upgrade to a new revision of libjpeg-turbo, do the following:
    $ hg addremove


+== February 10, 2012 (libjpeg-turbo v1.2.0 r807 2012-02-10) ==
+
+* Imported jchuff.c, jdhuff.c, jdhuff.h under new licensing.
+
+* Created mozilla.diff for the required jmorecfg.h changes and to allow for any
+  future changes made by Mozilla to upstream files.
+
+* Removed the following files which are unused by the Mozilla build:
+
+    cderror.h, cdjpeg.h, jconfig.h.in, transupp.h, simd/jsimdcfg.inc.h
+
+
 == March 28, 2011 (initial commit, libjpeg-turbo v1.1.0 r469 2011-02-27) ==

 * Modified jmorecfg.h to define UINT8, UINT16, INT16, and INT32 in terms of
--- a/media/libjpeg/Makefile.in
+++ b/media/libjpeg/Makefile.in
@ -103,6 +103,7 @@ CSRCS		+= \
 		jcphuff.c \
 		jcprepct.c \
 		jcsample.c \
+		jctrans.c \
 		$(NULL)

 AS=$(LIBJPEG_TURBO_AS)
@ -115,14 +116,20 @@ ifeq ($(AS),yasm)
 endif

 # No SIMD support?
-ifeq (,$(LIBJPEG_TURBO_X86_ASM)$(LIBJPEG_TURBO_X64_ASM))
+ifeq (,$(LIBJPEG_TURBO_X86_ASM)$(LIBJPEG_TURBO_X64_ASM)$(LIBJPEG_TURBO_ARM_ASM))
  CSRCS += jsimd_none.c
 endif

+ifeq (1,$(LIBJPEG_TURBO_ARM_ASM))
+  CSRCS += simd/jsimd_arm.c
+  SSRCS += simd/jsimd_arm_neon.S
+endif
+
 ifeq (1,$(LIBJPEG_TURBO_X64_ASM))
  CSRCS   += simd/jsimd_x86_64.c
  ASFILES += \
 	simd/jccolss2-64.asm \
+	simd/jcgrass2-64.asm \
 	simd/jcqnts2f-64.asm \
 	simd/jcqnts2i-64.asm \
 	simd/jcsamss2-64.asm \
@ -144,6 +151,8 @@ ifeq (1,$(LIBJPEG_TURBO_X86_ASM))
  ASFILES += \
 	simd/jccolmmx.asm \
 	simd/jccolss2.asm \
+	simd/jcgrammx.asm \
+	simd/jcgrass2.asm \
 	simd/jcqnt3dn.asm \
 	simd/jcqntmmx.asm \
 	simd/jcqnts2f.asm \
@ -176,14 +185,14 @@ ifeq (1,$(LIBJPEG_TURBO_X86_ASM))
 	$(NULL)
 endif

-EXPORTS		= \
-		jconfig.h \
-		jerror.h \
-		jinclude.h \
-		jmorecfg.h \
-		jpegint.h \
-		jpeglib.h \
-		$(NULL)
+EXPORTS	= \
+	jconfig.h \
+	jerror.h \
+	jinclude.h \
+	jmorecfg.h \
+	jpegint.h \
+	jpeglib.h \
+	$(NULL)

 # need static lib for some of the libimg componentry to link properly
 FORCE_STATIC_LIB = 1
--- a/media/libjpeg/README
+++ b/media/libjpeg/README
@ -1,7 +1,8 @@
-libjpeg-turbo note:  This file is mostly taken from the libjpeg v8b README
-file, and it is included only for reference.  Some parts of it may not apply to
-libjpeg-turbo.  Please see README-turbo.txt for information specific to the
-turbo version.
+libjpeg-turbo note:  This file contains portions of the libjpeg v6b and v8
+README files, with additional wordsmithing by The libjpeg-turbo Project.
+It is included only for reference, as some parts of it may not apply to
+libjpeg-turbo.  Please see README-turbo.txt for information specific to
+libjpeg-turbo.


 The Independent JPEG Group's JPEG software
@ -62,7 +63,7 @@ OVERVIEW
 This package contains C software to implement JPEG image encoding, decoding,
 and transcoding.  JPEG (pronounced "jay-peg") is a standardized compression
 method for full-color and gray-scale images.  JPEG's strong suit is compressing
-photographic images or other types of images which have smooth color and
+photographic images or other types of images that have smooth color and
 brightness transitions between neighboring pixels.  Images with sharp lines or
 other abrupt features may not compress well with JPEG, and a higher JPEG
 quality may have to be used to avoid visible compression artifacts with such
@ -256,8 +257,8 @@ ARCHIVE LOCATIONS
 The "official" archive site for this software is www.ijg.org.
 The most recent released version can always be found there in
 directory "files".  This particular version will be archived as
-http://www.ijg.org/files/jpegsrc.v8b.tar.gz, and in Windows-compatible
-"zip" archive format as http://www.ijg.org/files/jpegsr8b.zip.
+http://www.ijg.org/files/jpegsrc.v8d.tar.gz, and in Windows-compatible
+"zip" archive format as http://www.ijg.org/files/jpegsr8d.zip.

 The JPEG FAQ (Frequently Asked Questions) article is a source of some
 general information about JPEG.
@ -274,7 +275,7 @@ FILE FORMAT WARS
 ================

 The ISO JPEG standards committee actually promotes different formats like
-"JPEG 2000" or "JPEG XR" which are incompatible with original DCT-based
+"JPEG 2000" or "JPEG XR", which are incompatible with original DCT-based
 JPEG.  IJG therefore does not support these formats (see REFERENCES).  Indeed,
 one of the original reasons for developing this free software was to help
 force convergence on common, interoperable format standards for JPEG files.
@ -286,4 +287,4 @@ image files indefinitely.)
 TO DO
 =====

-Please send bug reports, offers of help, etc. to jpeg-info@uc.ag.
+Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org.
--- a/media/libjpeg/README-turbo.txt
+++ b/media/libjpeg/README-turbo.txt
@ -2,51 +2,80 @@
 **     Background
 *******************************************************************************

-libjpeg-turbo is a derivative of libjpeg which uses SIMD instructions (MMX,
-SSE2, etc.) to accelerate baseline JPEG compression and decompression on x86
-and x86-64 systems.  On such systems, libjpeg-turbo is generally 2-4x as fast
-as the unmodified version of libjpeg, all else being equal.
+libjpeg-turbo is a derivative of libjpeg that uses SIMD instructions (MMX,
+SSE2, NEON) to accelerate baseline JPEG compression and decompression on x86,
+x86-64, and ARM systems.  On such systems, libjpeg-turbo is generally 2-4x as
+fast as the unmodified version of libjpeg, all else being equal.

 libjpeg-turbo was originally based on libjpeg/SIMD by Miyasaka Masaru, but
 the TigerVNC and VirtualGL projects made numerous enhancements to the codec in
 2009, including improved support for Mac OS X, 64-bit support, support for
-32-bit and big endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman
-encoding/decoding, and various bug fixes.  The goal was to produce a fully open
-source codec that could replace the partially closed source TurboJPEG/IPP codec
-used by VirtualGL and TurboVNC.  libjpeg-turbo generally performs in the range
-of 80-120% of TurboJPEG/IPP.  It is faster in some areas but slower in others.
+32-bit and big-endian pixel formats (RGBX, XBGR, etc.), accelerated Huffman
+encoding/decoding, and various bug fixes.  The goal was to produce a fully
+open-source codec that could replace the partially closed-source TurboJPEG/IPP
+codec used by VirtualGL and TurboVNC.  libjpeg-turbo generally achieves 80-120%
+of the performance of TurboJPEG/IPP.  It is faster in some areas but slower in
+others.

 In early 2010, libjpeg-turbo spun off into its own independent project, with
 the goal of making high-speed JPEG compression/decompression technology
-available to a broader range of users and developers.  The libjpeg-turbo shared
-libraries can be used as drop-in replacements for libjpeg on most systems.
+available to a broader range of users and developers.


 *******************************************************************************
 **     License
 *******************************************************************************

-The TurboJPEG/OSS wrapper, as well as some of the optimizations to the Huffman
-encoder (jchuff.c) and decoder (jdhuff.c), were borrowed from VirtualGL, and
-thus any distribution of libjpeg-turbo which includes those files must, as a
-whole, be subject to the terms of the wxWindows Library Licence, Version 3.1.
-A copy of this license can be found in this directory under LICENSE.txt.  The
-wxWindows Library License is based on the LGPL but includes provisions which
-allow the Library to be statically linked into proprietary libraries and
-applications without requiring the resulting binaries to be distributed under
-the terms of the LGPL.
+Most of libjpeg-turbo inherits the non-restrictive, BSD-style license used by
+libjpeg (see README.)  The TurboJPEG/OSS wrapper (both C and Java versions) and
+associated test programs bear a similar license, which is reproduced below:

-The rest of the source code, apart from TurboJPEG/OSS and the Huffman codec
-optimizations, falls under a less restrictive, BSD-style license (see README.)
-You can choose to distribute libjpeg-turbo, as a whole, under this BSD-style
-license by simply removing TurboJPEG/OSS and replacing the optimized jchuff.c
-and jdhuff.c with their unoptimized counterparts from the libjpeg v6b source.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+- Neither the name of the libjpeg-turbo Project nor the names of its
+  contributors may be used to endorse or promote products derived from this
+  software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.


 *******************************************************************************
 **     Using libjpeg-turbo
 *******************************************************************************

+libjpeg-turbo includes two APIs that can be used to compress and decompress
+JPEG images:
+
+  TurboJPEG API:  This API provides an easy-to-use interface for compressing
+  and decompressing JPEG images in memory.  It also provides some functionality
+  that would not be straightforward to achieve using the underlying libjpeg
+  API, such as generating planar YUV images and performing multiple
+  simultaneous lossless transforms on an image.  The Java interface for
+  libjpeg-turbo is written on top of the TurboJPEG API.
+
+  libjpeg API:  This is the de facto industry-standard API for compressing and
+  decompressing JPEG images.  It is more difficult to use than the TurboJPEG
+  API but also more powerful.  libjpeg-turbo is both API/ABI-compatible and
+  mathematically compatible with libjpeg v6b.  It can also optionally be
+  configured to be API/ABI-compatible with libjpeg v7 and v8 (see below.)
+
+
 =============================
 Replacing libjpeg at Run Time
 =============================
@ -72,13 +101,13 @@ NOTE: {lib} can be lib, lib32, lib64, or lib/64, depending on the O/S and
 architecture.

 System administrators can also replace the libjpeg sym links in /usr/{lib} with
-links to the libjpeg dynamic library located in /opt/libjpeg-turbo/{lib}.  This
-will effectively accelerate every dynamically linked libjpeg application on the
-system.
+links to the libjpeg-turbo dynamic library located in /opt/libjpeg-turbo/{lib}.
+This will effectively accelerate every application that uses the libjpeg
+dynamic library on the system.

 The libjpeg-turbo SDK for Visual C++ installs the libjpeg-turbo DLL
-(jpeg62.dll, jpeg7.dll, or jpeg8.dll, depending on whether libjpeg v6b, v7, or
-v8 emulation is enabled) into c:\libjpeg-turbo[64]\bin, and the PATH
+(jpeg62.dll, jpeg7.dll, or jpeg8.dll, depending on whether it was built with
+libjpeg v6b, v7, or v8 emulation) into c:\libjpeg-turbo[64]\bin, and the PATH
 environment variable can be modified such that this directory is searched
 before any others that might contain a libjpeg DLL.  However, if a libjpeg
 DLL exists in an application's install directory, then Windows will load this
@ -88,16 +117,16 @@ version of this DLL and copy c:\libjpeg-turbo[64]\bin\jpeg*.dll into the
 application's install directory to accelerate it.

 The version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for
-Visual C++ requires the Visual C++ 2008 C run time DLL (msvcr90.dll).
+Visual C++ requires the Visual C++ 2008 C run-time DLL (msvcr90.dll).
 msvcr90.dll ships with more recent versions of Windows, but users of older
 Windows releases can obtain it from the Visual C++ 2008 Redistributable
 Package, which is available as a free download from Microsoft's web site.

-NOTE:  Features of libjpeg which require passing a C run time structure, such
+NOTE:  Features of libjpeg that require passing a C run-time structure, such
 as a file handle, from an application to libjpeg will probably not work with
 the version of the libjpeg-turbo DLL distributed in the libjpeg-turbo SDK for
 Visual C++, unless the application is also built to use the Visual C++ 2008 C
-run time DLL.  In particular, this affects jpeg_stdio_dest() and
+run-time DLL.  In particular, this affects jpeg_stdio_dest() and
 jpeg_stdio_src().

 Mac applications typically embed their own copies of the libjpeg dylib inside
@ -117,7 +146,7 @@ Replacing TurboJPEG/IPP
 libjpeg-turbo is a drop-in replacement for the TurboJPEG/IPP SDK used by
 VirtualGL 2.1.x and TurboVNC 0.6 (and prior.)  libjpeg-turbo contains a wrapper
 library (TurboJPEG/OSS) that emulates the TurboJPEG API using libjpeg-turbo
-instead of the closed source Intel Performance Primitives.  You can replace the
+instead of the closed-source Intel Performance Primitives.  You can replace the
 TurboJPEG/IPP package on Linux systems with the libjpeg-turbo package in order
 to make existing releases of VirtualGL 2.1.x and TurboVNC 0.x use the new codec
 at run time.  Note that the 64-bit libjpeg-turbo packages contain only 64-bit
@ -128,7 +157,7 @@ both the 64-bit and 32-bit versions of libjpeg-turbo.
 You can also build the VirtualGL 2.1.x and TurboVNC 0.6 source code with
 the libjpeg-turbo SDK instead of TurboJPEG/IPP.  It should work identically.
 libjpeg-turbo also includes static library versions of TurboJPEG/OSS, which
-are used to build TurboVNC 1.0 and later.
+are used to build VirtualGL 2.2 and TurboVNC 1.0 and later.

 ========================================
 Using libjpeg-turbo in Your Own Programs
@ -179,9 +208,9 @@ libjpeg-turbo) or jpeg-static.lib (to use the static version of libjpeg-turbo.)
 Colorspace Extensions
 =====================

-libjpeg-turbo includes extensions which allow JPEG images to be compressed
-directly from (and decompressed directly to) buffers which use BGR, BGRX,
-RGBX, XBGR, and XRGB pixel ordering.  This is implemented with six new
+libjpeg-turbo includes extensions that allow JPEG images to be compressed
+directly from (and decompressed directly to) buffers that use BGR, BGRX,
+RGBX, XBGR, and XRGB pixel ordering.  This is implemented with ten new
 colorspace constants:

  JCS_EXT_RGB   /* red/green/blue */
@ -190,11 +219,15 @@ colorspace constants:
  JCS_EXT_BGRX  /* blue/green/red/x */
  JCS_EXT_XBGR  /* x/blue/green/red */
  JCS_EXT_XRGB  /* x/red/green/blue */
+  JCS_EXT_RGBA  /* red/green/blue/alpha */
+  JCS_EXT_BGRA  /* blue/green/red/alpha */
+  JCS_EXT_ABGR  /* alpha/blue/green/red */
+  JCS_EXT_ARGB  /* alpha/red/green/blue */

 Setting cinfo.in_color_space (compression) or cinfo.out_color_space
 (decompression) to one of these values will cause libjpeg-turbo to read the
 red, green, and blue values from (or write them to) the appropriate position in
-the pixel when YUV conversion is performed.
+the pixel when compressing from/decompressing to an RGB buffer.

 Your application can check for the existence of these extensions at compile
 time with:
@ -204,33 +237,41 @@ time with:
 At run time, attempting to use these extensions with a version of libjpeg
 that doesn't support them will result in a "Bogus input colorspace" error.

+When using the RGBX, BGRX, XBGR, and XRGB colorspaces during decompression, the
+X byte is undefined, and in order to ensure the best performance, libjpeg-turbo
+can set that byte to whatever value it wishes.  If an application expects the X
+byte to be used as an alpha channel, then it should specify JCS_EXT_RGBA,
+JCS_EXT_BGRA, JCS_EXT_ABGR, or JCS_EXT_ARGB.  When these colorspace constants
+are used, the X byte is guaranteed to be 0xFF, which is interpreted as opaque.
+
+Your application can check for the existence of the alpha channel colorspace
+extensions at compile time with:
+
+  #ifdef JCS_ALPHA_EXTENSIONS
+
+jcstest.c, located in the libjpeg-turbo source tree, demonstrates how to check
+for the existence of the colorspace extensions at compile time and run time.
+
 =================================
 libjpeg v7 and v8 API/ABI support
 =================================

-libjpeg v7 and v8 added new features to the API/ABI, and, unfortunately, the
-compression and decompression structures were extended in a backward-
-incompatible manner to accommodate these features.  Thus, programs which are
+With libjpeg v7 and v8, new features were added that necessitated extending the
+compression and decompression structures.  Unfortunately, due to the exposed
+nature of those structures, extending them also necessitated breaking backward
+ABI compatibility with previous libjpeg releases.  Thus, programs that are
 built to use libjpeg v7 or v8 did not work with libjpeg-turbo, since it is
 based on the libjpeg v6b code base.  Although libjpeg v7 and v8 are still not
 as widely used as v6b, enough programs (including a few Linux distros) have
 made the switch that it was desirable to provide support for the libjpeg v7/v8
-API/ABI in libjpeg-turbo.
-
-Some of the libjpeg v7 and v8 features -- DCT scaling, to name one -- involve
-deep modifications to the code which cannot be accommodated by libjpeg-turbo
-without either breaking compatibility with libjpeg v6b or producing an
-unsupportable mess.  In order to fully support libjpeg v8 with all of its
-features, we would have to essentially port the SIMD extensions to the libjpeg
-v8 code base and maintain two separate code trees.  We are hesitant to do this
-until/unless the newer libjpeg code bases garner more community support and
-involvement and until/unless we have some notion of whether future libjpeg
-releases will also be backward-incompatible.
+API/ABI in libjpeg-turbo.  Although libjpeg-turbo can now be configured as a
+drop-in replacement for libjpeg v7 or v8, it should be noted that not all of
+the features in libjpeg v7 and v8 are supported (see below.)

 By passing an argument of --with-jpeg7 or --with-jpeg8 to configure, or an
 argument of -DWITH_JPEG7=1 or -DWITH_JPEG8=1 to cmake, you can build a version
-of libjpeg-turbo which emulates the libjpeg v7 or v8 API/ABI, so that programs
-which are built against libjpeg v7 or v8 can be run with libjpeg-turbo.  The
+of libjpeg-turbo that emulates the libjpeg v7 or v8 API/ABI, so that programs
+that are built against libjpeg v7 or v8 can be run with libjpeg-turbo.  The
 following section describes which libjpeg v7+ features are supported and which
 aren't.

@ -264,6 +305,16 @@ Not supported:

 -- libjpeg: DCT scaling in compressor
   cinfo.scale_num and cinfo.scale_denom are silently ignored.
+   There is no technical reason why DCT scaling cannot be supported, but
+   without the SmartScale extension (see below), it would only be able to
+   down-scale using ratios of 1/2, 8/15, 4/7, 8/13, 2/3, 8/11, 4/5, and 8/9,
+   which is of limited usefulness.
+
+-- libjpeg: SmartScale
+   cinfo.block_size is silently ignored.
+   SmartScale is an extension to the JPEG format that allows for DCT block
+   sizes other than 8x8.  It would be difficult to support this feature while
+   retaining backward compatibility with libjpeg v6b.

 -- libjpeg: IDCT scaling extensions in decompressor
   libjpeg-turbo still supports IDCT scaling with scaling factors of 1/2, 1/4,
@ -271,9 +322,14 @@ Not supported:

 -- libjpeg: Fancy downsampling in compressor
   cinfo.do_fancy_downsampling is silently ignored.
+   This requires the DCT scaling feature, which is not supported.

 -- jpegtran: Scaling
-   Seems to depend on the DCT scaling feature, which isn't supported.
+   This requires both the DCT scaling and SmartScale features, which are not
+   supported.
+
+-- Lossless RGB JPEG files
+   This requires the SmartScale feature, which is not supported.


 *******************************************************************************
@ -285,12 +341,13 @@ Restart Markers
 ===============

 The optimized Huffman decoder in libjpeg-turbo does not handle restart markers
-in a way that makes libjpeg happy, so it is necessary to use the slow Huffman
-decoder when decompressing a JPEG image that has restart markers.  This can
-cause the decompression performance to drop by as much as 20%, but the
-performance will still be much much greater than that of libjpeg v6b.  Many
-consumer packages, such as PhotoShop, use restart markers when generating JPEG
-images, so images generated by those programs will experience this issue.
+in a way that makes the rest of the libjpeg infrastructure happy, so it is
+necessary to use the slow Huffman decoder when decompressing a JPEG image that
+has restart markers.  This can cause the decompression performance to drop by
+as much as 20%, but the performance will still be much greater than that of
+libjpeg.  Many consumer packages, such as PhotoShop, use restart markers when
+generating JPEG images, so images generated by those programs will experience
+this issue.

 ===============================================
 Fast Integer Forward DCT at High Quality Levels
--- a/media/libjpeg/cderror.h
+++ b/media/libjpeg/cderror.h
@ -1,134 +0,0 @@
-/*
- * cderror.h
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * Modified 2009 by Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file defines the error and message codes for the cjpeg/djpeg
- * applications.  These strings are not needed as part of the JPEG library
- * proper.
- * Edit this file to add new codes, or to translate the message strings to
- * some other language.
- */
-
-/*
- * To define the enum list of message codes, include this file without
- * defining macro JMESSAGE.  To create a message string table, include it
- * again with a suitable JMESSAGE definition (see jerror.c for an example).
- */
-#ifndef JMESSAGE
-#ifndef CDERROR_H
-#define CDERROR_H
-/* First time through, define the enum list */
-#define JMAKE_ENUM_LIST
-#else
-/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
-#define JMESSAGE(code,string)
-#endif /* CDERROR_H */
-#endif /* JMESSAGE */
-
-#ifdef JMAKE_ENUM_LIST
-
-typedef enum {
-
-#define JMESSAGE(code,string)	code ,
-
-#endif /* JMAKE_ENUM_LIST */
-
-JMESSAGE(JMSG_FIRSTADDONCODE=1000, NULL) /* Must be first entry! */
-
-#ifdef BMP_SUPPORTED
-JMESSAGE(JERR_BMP_BADCMAP, "Unsupported BMP colormap format")
-JMESSAGE(JERR_BMP_BADDEPTH, "Only 8- and 24-bit BMP files are supported")
-JMESSAGE(JERR_BMP_BADHEADER, "Invalid BMP file: bad header length")
-JMESSAGE(JERR_BMP_BADPLANES, "Invalid BMP file: biPlanes not equal to 1")
-JMESSAGE(JERR_BMP_COLORSPACE, "BMP output must be grayscale or RGB")
-JMESSAGE(JERR_BMP_COMPRESSED, "Sorry, compressed BMPs not yet supported")
-JMESSAGE(JERR_BMP_EMPTY, "Empty BMP image")
-JMESSAGE(JERR_BMP_NOT, "Not a BMP file - does not start with BM")
-JMESSAGE(JTRC_BMP, "%ux%u 24-bit BMP image")
-JMESSAGE(JTRC_BMP_MAPPED, "%ux%u 8-bit colormapped BMP image")
-JMESSAGE(JTRC_BMP_OS2, "%ux%u 24-bit OS2 BMP image")
-JMESSAGE(JTRC_BMP_OS2_MAPPED, "%ux%u 8-bit colormapped OS2 BMP image")
-#endif /* BMP_SUPPORTED */
-
-#ifdef GIF_SUPPORTED
-JMESSAGE(JERR_GIF_BUG, "GIF output got confused")
-JMESSAGE(JERR_GIF_CODESIZE, "Bogus GIF codesize %d")
-JMESSAGE(JERR_GIF_COLORSPACE, "GIF output must be grayscale or RGB")
-JMESSAGE(JERR_GIF_IMAGENOTFOUND, "Too few images in GIF file")
-JMESSAGE(JERR_GIF_NOT, "Not a GIF file")
-JMESSAGE(JTRC_GIF, "%ux%ux%d GIF image")
-JMESSAGE(JTRC_GIF_BADVERSION,
-	 "Warning: unexpected GIF version number '%c%c%c'")
-JMESSAGE(JTRC_GIF_EXTENSION, "Ignoring GIF extension block of type 0x%02x")
-JMESSAGE(JTRC_GIF_NONSQUARE, "Caution: nonsquare pixels in input")
-JMESSAGE(JWRN_GIF_BADDATA, "Corrupt data in GIF file")
-JMESSAGE(JWRN_GIF_CHAR, "Bogus char 0x%02x in GIF file, ignoring")
-JMESSAGE(JWRN_GIF_ENDCODE, "Premature end of GIF image")
-JMESSAGE(JWRN_GIF_NOMOREDATA, "Ran out of GIF bits")
-#endif /* GIF_SUPPORTED */
-
-#ifdef PPM_SUPPORTED
-JMESSAGE(JERR_PPM_COLORSPACE, "PPM output must be grayscale or RGB")
-JMESSAGE(JERR_PPM_NONNUMERIC, "Nonnumeric data in PPM file")
-JMESSAGE(JERR_PPM_NOT, "Not a PPM/PGM file")
-JMESSAGE(JTRC_PGM, "%ux%u PGM image")
-JMESSAGE(JTRC_PGM_TEXT, "%ux%u text PGM image")
-JMESSAGE(JTRC_PPM, "%ux%u PPM image")
-JMESSAGE(JTRC_PPM_TEXT, "%ux%u text PPM image")
-#endif /* PPM_SUPPORTED */
-
-#ifdef RLE_SUPPORTED
-JMESSAGE(JERR_RLE_BADERROR, "Bogus error code from RLE library")
-JMESSAGE(JERR_RLE_COLORSPACE, "RLE output must be grayscale or RGB")
-JMESSAGE(JERR_RLE_DIMENSIONS, "Image dimensions (%ux%u) too large for RLE")
-JMESSAGE(JERR_RLE_EMPTY, "Empty RLE file")
-JMESSAGE(JERR_RLE_EOF, "Premature EOF in RLE header")
-JMESSAGE(JERR_RLE_MEM, "Insufficient memory for RLE header")
-JMESSAGE(JERR_RLE_NOT, "Not an RLE file")
-JMESSAGE(JERR_RLE_TOOMANYCHANNELS, "Cannot handle %d output channels for RLE")
-JMESSAGE(JERR_RLE_UNSUPPORTED, "Cannot handle this RLE setup")
-JMESSAGE(JTRC_RLE, "%ux%u full-color RLE file")
-JMESSAGE(JTRC_RLE_FULLMAP, "%ux%u full-color RLE file with map of length %d")
-JMESSAGE(JTRC_RLE_GRAY, "%ux%u grayscale RLE file")
-JMESSAGE(JTRC_RLE_MAPGRAY, "%ux%u grayscale RLE file with map of length %d")
-JMESSAGE(JTRC_RLE_MAPPED, "%ux%u colormapped RLE file with map of length %d")
-#endif /* RLE_SUPPORTED */
-
-#ifdef TARGA_SUPPORTED
-JMESSAGE(JERR_TGA_BADCMAP, "Unsupported Targa colormap format")
-JMESSAGE(JERR_TGA_BADPARMS, "Invalid or unsupported Targa file")
-JMESSAGE(JERR_TGA_COLORSPACE, "Targa output must be grayscale or RGB")
-JMESSAGE(JTRC_TGA, "%ux%u RGB Targa image")
-JMESSAGE(JTRC_TGA_GRAY, "%ux%u grayscale Targa image")
-JMESSAGE(JTRC_TGA_MAPPED, "%ux%u colormapped Targa image")
-#else
-JMESSAGE(JERR_TGA_NOTCOMP, "Targa support was not compiled")
-#endif /* TARGA_SUPPORTED */
-
-JMESSAGE(JERR_BAD_CMAP_FILE,
-	 "Color map file is invalid or of unsupported format")
-JMESSAGE(JERR_TOO_MANY_COLORS,
-	 "Output file format cannot handle %d colormap entries")
-JMESSAGE(JERR_UNGETC_FAILED, "ungetc failed")
-#ifdef TARGA_SUPPORTED
-JMESSAGE(JERR_UNKNOWN_FORMAT,
-	 "Unrecognized input file format --- perhaps you need -targa")
-#else
-JMESSAGE(JERR_UNKNOWN_FORMAT, "Unrecognized input file format")
-#endif
-JMESSAGE(JERR_UNSUPPORTED_FORMAT, "Unsupported output file format")
-
-#ifdef JMAKE_ENUM_LIST
-
-  JMSG_LASTADDONCODE
-} ADDON_MESSAGE_CODE;
-
-#undef JMAKE_ENUM_LIST
-#endif /* JMAKE_ENUM_LIST */
-
-/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
-#undef JMESSAGE
--- a/media/libjpeg/cdjpeg.h
+++ b/media/libjpeg/cdjpeg.h
@ -1,187 +0,0 @@
-/*
- * cdjpeg.h
- *
- * Copyright (C) 1994-1997, Thomas G. Lane.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains common declarations for the sample applications
- * cjpeg and djpeg.  It is NOT used by the core JPEG library.
- */
-
-#define JPEG_CJPEG_DJPEG	/* define proper options in jconfig.h */
-#define JPEG_INTERNAL_OPTIONS	/* cjpeg.c,djpeg.c need to see xxx_SUPPORTED */
-#include "jinclude.h"
-#include "jpeglib.h"
-#include "jerror.h"		/* get library error codes too */
-#include "cderror.h"		/* get application-specific error codes */
-
-
-/*
- * Object interface for cjpeg's source file decoding modules
- */
-
-typedef struct cjpeg_source_struct * cjpeg_source_ptr;
-
-struct cjpeg_source_struct {
-  JMETHOD(void, start_input, (j_compress_ptr cinfo,
-			      cjpeg_source_ptr sinfo));
-  JMETHOD(JDIMENSION, get_pixel_rows, (j_compress_ptr cinfo,
-				       cjpeg_source_ptr sinfo));
-  JMETHOD(void, finish_input, (j_compress_ptr cinfo,
-			       cjpeg_source_ptr sinfo));
-
-  FILE *input_file;
-
-  JSAMPARRAY buffer;
-  JDIMENSION buffer_height;
-};
-
-
-/*
- * Object interface for djpeg's output file encoding modules
- */
-
-typedef struct djpeg_dest_struct * djpeg_dest_ptr;
-
-struct djpeg_dest_struct {
-  /* start_output is called after jpeg_start_decompress finishes.
-   * The color map will be ready at this time, if one is needed.
-   */
-  JMETHOD(void, start_output, (j_decompress_ptr cinfo,
-			       djpeg_dest_ptr dinfo));
-  /* Emit the specified number of pixel rows from the buffer. */
-  JMETHOD(void, put_pixel_rows, (j_decompress_ptr cinfo,
-				 djpeg_dest_ptr dinfo,
-				 JDIMENSION rows_supplied));
-  /* Finish up at the end of the image. */
-  JMETHOD(void, finish_output, (j_decompress_ptr cinfo,
-				djpeg_dest_ptr dinfo));
-
-  /* Target file spec; filled in by djpeg.c after object is created. */
-  FILE * output_file;
-
-  /* Output pixel-row buffer.  Created by module init or start_output.
-   * Width is cinfo->output_width * cinfo->output_components;
-   * height is buffer_height.
-   */
-  JSAMPARRAY buffer;
-  JDIMENSION buffer_height;
-};
-
-
-/*
- * cjpeg/djpeg may need to perform extra passes to convert to or from
- * the source/destination file format.  The JPEG library does not know
- * about these passes, but we'd like them to be counted by the progress
- * monitor.  We use an expanded progress monitor object to hold the
- * additional pass count.
- */
-
-struct cdjpeg_progress_mgr {
-  struct jpeg_progress_mgr pub;	/* fields known to JPEG library */
-  int completed_extra_passes;	/* extra passes completed */
-  int total_extra_passes;	/* total extra */
-  /* last printed percentage stored here to avoid multiple printouts */
-  int percent_done;
-};
-
-typedef struct cdjpeg_progress_mgr * cd_progress_ptr;
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jinit_read_bmp		jIRdBMP
-#define jinit_write_bmp		jIWrBMP
-#define jinit_read_gif		jIRdGIF
-#define jinit_write_gif		jIWrGIF
-#define jinit_read_ppm		jIRdPPM
-#define jinit_write_ppm		jIWrPPM
-#define jinit_read_rle		jIRdRLE
-#define jinit_write_rle		jIWrRLE
-#define jinit_read_targa	jIRdTarga
-#define jinit_write_targa	jIWrTarga
-#define read_quant_tables	RdQTables
-#define read_scan_script	RdScnScript
-#define set_quality_ratings     SetQRates
-#define set_quant_slots		SetQSlots
-#define set_sample_factors	SetSFacts
-#define read_color_map		RdCMap
-#define enable_signal_catcher	EnSigCatcher
-#define start_progress_monitor	StProgMon
-#define end_progress_monitor	EnProgMon
-#define read_stdin		RdStdin
-#define write_stdout		WrStdout
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-/* Module selection routines for I/O modules. */
-
-EXTERN(cjpeg_source_ptr) jinit_read_bmp JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_bmp JPP((j_decompress_ptr cinfo,
-					    boolean is_os2));
-EXTERN(cjpeg_source_ptr) jinit_read_gif JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_gif JPP((j_decompress_ptr cinfo));
-EXTERN(cjpeg_source_ptr) jinit_read_ppm JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_ppm JPP((j_decompress_ptr cinfo));
-EXTERN(cjpeg_source_ptr) jinit_read_rle JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_rle JPP((j_decompress_ptr cinfo));
-EXTERN(cjpeg_source_ptr) jinit_read_targa JPP((j_compress_ptr cinfo));
-EXTERN(djpeg_dest_ptr) jinit_write_targa JPP((j_decompress_ptr cinfo));
-
-/* cjpeg support routines (in rdswitch.c) */
-
-EXTERN(boolean) read_quant_tables JPP((j_compress_ptr cinfo, char * filename,
-				       boolean force_baseline));
-EXTERN(boolean) read_scan_script JPP((j_compress_ptr cinfo, char * filename));
-EXTERN(boolean) set_quality_ratings JPP((j_compress_ptr cinfo, char *arg,
-					 boolean force_baseline));
-EXTERN(boolean) set_quant_slots JPP((j_compress_ptr cinfo, char *arg));
-EXTERN(boolean) set_sample_factors JPP((j_compress_ptr cinfo, char *arg));
-
-/* djpeg support routines (in rdcolmap.c) */
-
-EXTERN(void) read_color_map JPP((j_decompress_ptr cinfo, FILE * infile));
-
-/* common support routines (in cdjpeg.c) */
-
-EXTERN(void) enable_signal_catcher JPP((j_common_ptr cinfo));
-EXTERN(void) start_progress_monitor JPP((j_common_ptr cinfo,
-					 cd_progress_ptr progress));
-EXTERN(void) end_progress_monitor JPP((j_common_ptr cinfo));
-EXTERN(boolean) keymatch JPP((char * arg, const char * keyword, int minchars));
-EXTERN(FILE *) read_stdin JPP((void));
-EXTERN(FILE *) write_stdout JPP((void));
-
-/* miscellaneous useful macros */
-
-#ifdef DONT_USE_B_MODE		/* define mode parameters for fopen() */
-#define READ_BINARY	"r"
-#define WRITE_BINARY	"w"
-#else
-#ifdef VMS			/* VMS is very nonstandard */
-#define READ_BINARY	"rb", "ctx=stm"
-#define WRITE_BINARY	"wb", "ctx=stm"
-#else				/* standard ANSI-compliant case */
-#define READ_BINARY	"rb"
-#define WRITE_BINARY	"wb"
-#endif
-#endif
-
-#ifndef EXIT_FAILURE		/* define exit() codes if not provided */
-#define EXIT_FAILURE  1
-#endif
-#ifndef EXIT_SUCCESS
-#ifdef VMS
-#define EXIT_SUCCESS  1		/* VMS is very nonstandard */
-#else
-#define EXIT_SUCCESS  0
-#endif
-#endif
-#ifndef EXIT_WARNING
-#ifdef VMS
-#define EXIT_WARNING  1		/* VMS is very nonstandard */
-#else
-#define EXIT_WARNING  2
-#endif
-#endif
--- a/media/libjpeg/config.h
+++ b/media/libjpeg/config.h
@ -0,0 +1,6 @@
+#define VERSION "1.2.0"
+#define BUILD "2012-02-10"
+#define PACKAGE_NAME "libjpeg-turbo"
+
+/* Need to use Mozilla-specific function inlining. */
+#define INLINE NS_ALWAYS_INLINE
--- a/media/libjpeg/jaricom.c
+++ b/media/libjpeg/jaricom.c
@ -150,3 +150,4 @@ const INT32 jpeg_aritab[113+1] = {
 * as recommended in Section 10.3 Table 5 of ITU-T Rec. T.851.
 */
  V( 113, 0x5a1d, 113, 113, 0 )
+};
--- a/media/libjpeg/jccolext.c
+++ b/media/libjpeg/jccolext.c
@ -0,0 +1,114 @@
+/*
+ * jccolext.c
+ *
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Copyright (C) 2009-2011, D. R. Commander.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains input colorspace conversion routines.
+ */
+
+
+/* This file is included by jccolor.c */
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ *
+ * Note that we change from the application's interleaved-pixel format
+ * to our internal noninterleaved, one-plane-per-component format.
+ * The input buffer is therefore three times as wide as the output buffer.
+ *
+ * A starting row offset is provided only for the output buffer.  The caller
+ * can easily adjust the passed input_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+INLINE
+LOCAL(void)
+rgb_ycc_convert_internal (j_compress_ptr cinfo,
+                          JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                          JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (JSAMPLE)
+		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
+		 >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (JSAMPLE)
+		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
+
+
+/**************** Cases other than RGB -> YCbCr **************/
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles RGB->grayscale conversion, which is the same
+ * as the RGB->Y portion of RGB->YCbCr.
+ * We assume rgb_ycc_start has been called (we only use the Y tables).
+ */
+
+INLINE
+LOCAL(void)
+rgb_gray_convert_internal (j_compress_ptr cinfo,
+                           JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                           JDIMENSION output_row, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int r, g, b;
+  register INT32 * ctab = cconvert->rgb_ycc_tab;
+  register JSAMPROW inptr;
+  register JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = GETJSAMPLE(inptr[RGB_RED]);
+      g = GETJSAMPLE(inptr[RGB_GREEN]);
+      b = GETJSAMPLE(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* Y */
+      outptr[col] = (JSAMPLE)
+		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
+		 >> SCALEBITS);
+    }
+  }
+}
--- a/media/libjpeg/jccolor.c
+++ b/media/libjpeg/jccolor.c
@ -3,7 +3,7 @@
 *
 * Copyright (C) 1991-1996, Thomas G. Lane.
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright 2009 D. R. Commander
+ * Copyright (C) 2009-2011, D. R. Commander.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -14,6 +14,7 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jsimd.h"
+#include "config.h"


 /* Private subobject */
@ -81,72 +82,97 @@ typedef my_color_converter * my_cconvert_ptr;
 #define TABLE_SIZE	(8*(MAXJSAMPLE+1))


-#if BITS_IN_JSAMPLE == 8
+/* Include inline routines for colorspace extensions */

-static const unsigned char red_lut[256] = {
-  0 , 0 , 1 , 1 , 1 , 1 , 2 , 2 , 2 , 3 , 3 , 3 , 4 , 4 , 4 , 4 ,
-  5 , 5 , 5 , 6 , 6 , 6 , 7 , 7 , 7 , 7 , 8 , 8 , 8 , 9 , 9 , 9 ,
-  10, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14,
-  14, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19,
-  19, 19, 20, 20, 20, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 24,
-  24, 24, 25, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 28, 28, 28,
-  29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33,
-  33, 34, 34, 34, 35, 35, 35, 36, 36, 36, 36, 37, 37, 37, 38, 38,
-  38, 39, 39, 39, 39, 40, 40, 40, 41, 41, 41, 42, 42, 42, 42, 43,
-  43, 43, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 47, 47, 47, 48,
-  48, 48, 48, 49, 49, 49, 50, 50, 50, 51, 51, 51, 51, 52, 52, 52,
-  53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57,
-  57, 58, 58, 58, 59, 59, 59, 60, 60, 60, 60, 61, 61, 61, 62, 62,
-  62, 62, 63, 63, 63, 64, 64, 64, 65, 65, 65, 65, 66, 66, 66, 67,
-  67, 67, 68, 68, 68, 68, 69, 69, 69, 70, 70, 70, 71, 71, 71, 71,
-  72, 72, 72, 73, 73, 73, 74, 74, 74, 74, 75, 75, 75, 76, 76, 76
-};
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE

-static const unsigned char green_lut[256] = {
-  0  , 1  , 1  , 2  , 2  , 3  , 4  , 4  , 5  , 5  , 6  , 6  ,
-  7  , 8  , 8  , 9  , 9  , 10 , 11 , 11 , 12 , 12 , 13 , 14 ,
-  14 , 15 , 15 , 16 , 16 , 17 , 18 , 18 , 19 , 19 , 20 , 21 ,
-  21 , 22 , 22 , 23 , 23 , 24 , 25 , 25 , 26 , 26 , 27 , 28 ,
-  28 , 29 , 29 , 30 , 31 , 31 , 32 , 32 , 33 , 33 , 34 , 35 ,
-  35 , 36 , 36 , 37 , 38 , 38 , 39 , 39 , 40 , 41 , 41 , 42 ,
-  42 , 43 , 43 , 44 , 45 , 45 , 46 , 46 , 47 , 48 , 48 , 49 ,
-  49 , 50 , 50 , 51 , 52 , 52 , 53 , 53 , 54 , 55 , 55 , 56 ,
-  56 , 57 , 58 , 58 , 59 , 59 , 60 , 60 , 61 , 62 , 62 , 63 ,
-  63 , 64 , 65 , 65 , 66 , 66 , 67 , 68 , 68 , 69 , 69 , 70 ,
-  70 , 71 , 72 , 72 , 73 , 73 , 74 , 75 , 75 , 76 , 76 , 77 ,
-  77 , 78 , 79 , 79 , 80 , 80 , 81 , 82 , 82 , 83 , 83 , 84 ,
-  85 , 85 , 86 , 86 , 87 , 87 , 88 , 89 , 89 , 90 , 90 , 91 ,
-  92 , 92 , 93 , 93 , 94 , 95 , 95 , 96 , 96 , 97 , 97 , 98 ,
-  99 , 99 , 100, 100, 101, 102, 102, 103, 103, 104, 104, 105,
-  106, 106, 107, 107, 108, 109, 109, 110, 110, 111, 112, 112,
-  113, 113, 114, 114, 115, 116, 116, 117, 117, 118, 119, 119,
-  120, 120, 121, 122, 122, 123, 123, 124, 124, 125, 126, 126,
-  127, 127, 128, 129, 129, 130, 130, 131, 131, 132, 133, 133,
-  134, 134, 135, 136, 136, 137, 137, 138, 139, 139, 140, 140,
-  141, 141, 142, 143, 143, 144, 144, 145, 146, 146, 147, 147,
-  148, 149, 149, 150
-};
+#define RGB_RED EXT_RGB_RED
+#define RGB_GREEN EXT_RGB_GREEN
+#define RGB_BLUE EXT_RGB_BLUE
+#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+#define rgb_ycc_convert_internal extrgb_ycc_convert_internal
+#define rgb_gray_convert_internal extrgb_gray_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal

-static const unsigned char blue_lut[256] = {
-  0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 2 , 2 ,
-  2 , 2 , 2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 3 , 4 ,
-  4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 5 , 5 , 5 , 5 ,
-  5 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 7 , 7 , 7 , 7 , 7 , 7 ,
-  7 , 7 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 8 , 9 , 9 , 9 , 9 , 9 ,
-  9 , 9 , 9 , 9 , 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11,
-  11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13,
-  13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14,
-  15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
-  16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18,
-  18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
-  20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22,
-  22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24,
-  24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25,
-  26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27,
-  27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29
-};
+#define RGB_RED EXT_RGBX_RED
+#define RGB_GREEN EXT_RGBX_GREEN
+#define RGB_BLUE EXT_RGBX_BLUE
+#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+#define rgb_ycc_convert_internal extrgbx_ycc_convert_internal
+#define rgb_gray_convert_internal extrgbx_gray_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal

-#endif
+#define RGB_RED EXT_BGR_RED
+#define RGB_GREEN EXT_BGR_GREEN
+#define RGB_BLUE EXT_BGR_BLUE
+#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+#define rgb_ycc_convert_internal extbgr_ycc_convert_internal
+#define rgb_gray_convert_internal extbgr_gray_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+
+#define RGB_RED EXT_BGRX_RED
+#define RGB_GREEN EXT_BGRX_GREEN
+#define RGB_BLUE EXT_BGRX_BLUE
+#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+#define rgb_ycc_convert_internal extbgrx_ycc_convert_internal
+#define rgb_gray_convert_internal extbgrx_gray_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+
+#define RGB_RED EXT_XBGR_RED
+#define RGB_GREEN EXT_XBGR_GREEN
+#define RGB_BLUE EXT_XBGR_BLUE
+#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+#define rgb_ycc_convert_internal extxbgr_ycc_convert_internal
+#define rgb_gray_convert_internal extxbgr_gray_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+
+#define RGB_RED EXT_XRGB_RED
+#define RGB_GREEN EXT_XRGB_GREEN
+#define RGB_BLUE EXT_XRGB_BLUE
+#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+#define rgb_ycc_convert_internal extxrgb_ycc_convert_internal
+#define rgb_gray_convert_internal extxrgb_gray_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal


 /*
@ -187,14 +213,6 @@ rgb_ycc_start (j_compress_ptr cinfo)

 /*
 * Convert some rows of samples to the JPEG colorspace.
- *
- * Note that we change from the application's interleaved-pixel format
- * to our internal noninterleaved, one-plane-per-component format.
- * The input buffer is therefore three times as wide as the output buffer.
- *
- * A starting row offset is provided only for the output buffer.  The caller
- * can easily adjust the passed input_buf value to accommodate any row
- * offset required on that side.
 */

 METHODDEF(void)
@ -202,43 +220,39 @@ rgb_ycc_convert (j_compress_ptr cinfo,
 		 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
 		 JDIMENSION output_row, int num_rows)
 {
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int r, g, b;
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
-  register JSAMPROW inptr;
-  register JSAMPROW outptr0, outptr1, outptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->image_width;
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr0 = output_buf[0][output_row];
-    outptr1 = output_buf[1][output_row];
-    outptr2 = output_buf[2][output_row];
-    output_row++;
-    for (col = 0; col < num_cols; col++) {
-      r = GETJSAMPLE(inptr[rgb_red[cinfo->in_color_space]]);
-      g = GETJSAMPLE(inptr[rgb_green[cinfo->in_color_space]]);
-      b = GETJSAMPLE(inptr[rgb_blue[cinfo->in_color_space]]);
-      inptr += rgb_pixelsize[cinfo->in_color_space];
-      /* If the inputs are 0..MAXJSAMPLE, the outputs of these equations
-       * must be too; we do not need an explicit range-limiting operation.
-       * Hence the value being shifted is never negative, and we don't
-       * need the general RIGHT_SHIFT macro.
-       */
-      /* Y */
-      outptr0[col] = (JSAMPLE)
-		((ctab[r+R_Y_OFF] + ctab[g+G_Y_OFF] + ctab[b+B_Y_OFF])
-		 >> SCALEBITS);
-      /* Cb */
-      outptr1[col] = (JSAMPLE)
-		((ctab[r+R_CB_OFF] + ctab[g+G_CB_OFF] + ctab[b+B_CB_OFF])
-		 >> SCALEBITS);
-      /* Cr */
-      outptr2[col] = (JSAMPLE)
-		((ctab[r+R_CR_OFF] + ctab[g+G_CR_OFF] + ctab[b+B_CR_OFF])
-		 >> SCALEBITS);
-    }
+  switch (cinfo->in_color_space) {
+    case JCS_EXT_RGB:
+      extrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                  num_rows);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      extrgbx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    case JCS_EXT_BGR:
+      extbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                  num_rows);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      extbgrx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      extxbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      extxrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    default:
+      rgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                               num_rows);
+      break;
  }
 }

@ -248,9 +262,6 @@ rgb_ycc_convert (j_compress_ptr cinfo,

 /*
 * Convert some rows of samples to the JPEG colorspace.
- * This version handles RGB->grayscale conversion, which is the same
- * as the RGB->Y portion of RGB->YCbCr.
- * We assume rgb_ycc_start has been called (we only use the Y tables).
 */

 METHODDEF(void)
@ -258,36 +269,39 @@ rgb_gray_convert (j_compress_ptr cinfo,
 		  JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
 		  JDIMENSION output_row, int num_rows)
 {
-  #if BITS_IN_JSAMPLE != 8
-  register INT32 * ctab = cconvert->rgb_ycc_tab;
-  #endif
-  register JSAMPROW inptr;
-  register JSAMPROW outptr;
-  JSAMPLE *maxoutptr;
-  JDIMENSION num_cols = cinfo->image_width;
-  int rindex = rgb_red[cinfo->in_color_space];
-  int gindex = rgb_green[cinfo->in_color_space];
-  int bindex = rgb_blue[cinfo->in_color_space];
-  int rgbstride = rgb_pixelsize[cinfo->in_color_space];
-
-  while (--num_rows >= 0) {
-    inptr = *input_buf++;
-    outptr = output_buf[0][output_row];
-    maxoutptr = &outptr[num_cols];
-    output_row++;
-    for (; outptr < maxoutptr; outptr++, inptr += rgbstride) {
-      /* Y */
-      #if BITS_IN_JSAMPLE == 8
-      *outptr = red_lut[inptr[rindex]] + green_lut[inptr[gindex]]
-	    + blue_lut[inptr[bindex]];
-      #else
-      *outptr = (JSAMPLE)
-	    ((ctab[GETJSAMPLE(inptr[rindex])+R_Y_OFF]
-	     + ctab[GETJSAMPLE(inptr[gindex])+G_Y_OFF]
-	     + ctab[GETJSAMPLE(inptr[bindex])+B_Y_OFF])
-	     >> SCALEBITS);
-      #endif
-    }
+  switch (cinfo->in_color_space) {
+    case JCS_EXT_RGB:
+      extrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      extrgbx_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                    num_rows);
+      break;
+    case JCS_EXT_BGR:
+      extbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                   num_rows);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      extbgrx_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                    num_rows);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      extxbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                    num_rows);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      extxrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                    num_rows);
+      break;
+    default:
+      rgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                num_rows);
+      break;
  }
 }

@ -453,6 +467,10 @@ jinit_color_converter (j_compress_ptr cinfo)
  case JCS_EXT_BGRX:
  case JCS_EXT_XBGR:
  case JCS_EXT_XRGB:
+  case JCS_EXT_RGBA:
+  case JCS_EXT_BGRA:
+  case JCS_EXT_ABGR:
+  case JCS_EXT_ARGB:
    if (cinfo->input_components != rgb_pixelsize[cinfo->in_color_space])
      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
    break;
@ -487,9 +505,17 @@ jinit_color_converter (j_compress_ptr cinfo)
             cinfo->in_color_space == JCS_EXT_BGR ||
             cinfo->in_color_space == JCS_EXT_BGRX ||
             cinfo->in_color_space == JCS_EXT_XBGR ||
-             cinfo->in_color_space == JCS_EXT_XRGB) {
-      cconvert->pub.start_pass = rgb_ycc_start;
-      cconvert->pub.color_convert = rgb_gray_convert;
+             cinfo->in_color_space == JCS_EXT_XRGB ||
+             cinfo->in_color_space == JCS_EXT_RGBA ||
+             cinfo->in_color_space == JCS_EXT_BGRA ||
+             cinfo->in_color_space == JCS_EXT_ABGR ||
+             cinfo->in_color_space == JCS_EXT_ARGB) {
+      if (jsimd_can_rgb_gray())
+        cconvert->pub.color_convert = jsimd_rgb_gray_convert;
+      else {
+        cconvert->pub.start_pass = rgb_ycc_start;
+        cconvert->pub.color_convert = rgb_gray_convert;
+      }
    } else if (cinfo->in_color_space == JCS_YCbCr)
      cconvert->pub.color_convert = grayscale_convert;
    else
@ -503,6 +529,10 @@ jinit_color_converter (j_compress_ptr cinfo)
  case JCS_EXT_BGRX:
  case JCS_EXT_XBGR:
  case JCS_EXT_XRGB:
+  case JCS_EXT_RGBA:
+  case JCS_EXT_BGRA:
+  case JCS_EXT_ABGR:
+  case JCS_EXT_ARGB:
    if (cinfo->num_components != 3)
      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
    if (cinfo->in_color_space == cinfo->jpeg_color_space &&
@ -521,7 +551,11 @@ jinit_color_converter (j_compress_ptr cinfo)
        cinfo->in_color_space == JCS_EXT_BGR ||
        cinfo->in_color_space == JCS_EXT_BGRX ||
        cinfo->in_color_space == JCS_EXT_XBGR ||
-        cinfo->in_color_space == JCS_EXT_XRGB) {
+        cinfo->in_color_space == JCS_EXT_XRGB ||
+        cinfo->in_color_space == JCS_EXT_RGBA ||
+        cinfo->in_color_space == JCS_EXT_BGRA ||
+        cinfo->in_color_space == JCS_EXT_ABGR ||
+        cinfo->in_color_space == JCS_EXT_ARGB) {
      if (jsimd_can_rgb_ycc())
        cconvert->pub.color_convert = jsimd_rgb_ycc_convert;
      else {
--- a/media/libjpeg/jcdctmgr.c
+++ b/media/libjpeg/jcdctmgr.c
@ -182,7 +182,7 @@ compute_reciprocal (UINT16 divisor, DCTELEM * dtbl)
    /* fq will be one bit too large to fit in DCTELEM, so adjust */
    fq >>= 1;
    r--;
-  } else if (fr <= (divisor / 2)) { /* fractional part is < 0.5 */
+  } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */
    c++;
  } else { /* fractional part is > 0.5 */
    fq++;
--- a/media/libjpeg/jchuff.c
+++ b/media/libjpeg/jchuff.c
@ -2,6 +2,7 @@
 * jchuff.c
 *
 * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Copyright (C) 2009-2011, D. R. Commander.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -18,6 +19,15 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jchuff.h"		/* Declarations shared with jcphuff.c */
+#include <limits.h>
+
+static unsigned char jpeg_nbits_table[65536];
+static int jpeg_nbits_table_init = 0;
+
+#ifndef min
+ #define min(a,b) ((a)<(b)?(a):(b))
+#endif
+

 /* Expanded entropy encoder object for Huffman encoding.
 *
@ -26,7 +36,7 @@
 */

 typedef struct {
-  INT32 put_buffer;		/* current bit-accumulation buffer */
+  size_t put_buffer;		/* current bit-accumulation buffer */
  int put_bits;			/* # of bits now in it */
  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
 } savable_state;
@ -260,6 +270,15 @@ jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno,
    dtbl->ehufco[i] = huffcode[p];
    dtbl->ehufsi[i] = huffsize[p];
  }
+
+  if(!jpeg_nbits_table_init) {
+    for(i = 0; i < 65536; i++) {
+      int nbits = 0, temp = i;
+      while (temp) {temp >>= 1;  nbits++;}
+      jpeg_nbits_table[i] = nbits;
+    }
+    jpeg_nbits_table_init = 1;
+  }
 }


@ -279,6 +298,8 @@ dump_buffer (working_state * state)
 {
  struct jpeg_destination_mgr * dest = state->cinfo->dest;

+  dest->free_in_buffer = state->free_in_buffer;
+
  if (! (*dest->empty_output_buffer) (state->cinfo))
    return FALSE;
  /* After a successful buffer dump, must reset buffer pointers */
@ -290,58 +311,138 @@ dump_buffer (working_state * state)

 /* Outputting bits to the file */

-/* Only the right 24 bits of put_buffer are used; the valid bits are
- * left-justified in this part.  At most 16 bits can be passed to emit_bits
- * in one call, and we never retain more than 7 bits in put_buffer
- * between calls, so 24 bits are sufficient.
+/* These macros perform the same task as the emit_bits() function in the
+ * original libjpeg code.  In addition to reducing overhead by explicitly
+ * inlining the code, additional performance is achieved by taking into
+ * account the size of the bit buffer and waiting until it is almost full
+ * before emptying it.  This mostly benefits 64-bit platforms, since 6
+ * bytes can be stored in a 64-bit bit buffer before it has to be emptied.
 */

-INLINE
-LOCAL(boolean)
-emit_bits (working_state * state, unsigned int code, int size)
-/* Emit some bits; return TRUE if successful, FALSE if must suspend */
-{
-  /* This routine is heavily used, so it's worth coding tightly. */
-  register INT32 put_buffer = (INT32) code;
-  register int put_bits = state->cur.put_bits;
+#define EMIT_BYTE() { \
+  JOCTET c; \
+  put_bits -= 8; \
+  c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \
+  *buffer++ = c; \
+  if (c == 0xFF)  /* need to stuff a zero byte? */ \
+    *buffer++ = 0; \
+ }

-  /* if size is 0, caller used an invalid Huffman table entry */
-  if (size == 0)
-    ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE);
-
-  put_buffer &= (((INT32) 1)<<size) - 1; /* mask off any extra bits in code */
-  
-  put_bits += size;		/* new number of bits in buffer */
-  
-  put_buffer <<= 24 - put_bits; /* align incoming bits */
-
-  put_buffer |= state->cur.put_buffer; /* and merge with old buffer contents */
-  
-  while (put_bits >= 8) {
-    int c = (int) ((put_buffer >> 16) & 0xFF);
-    
-    emit_byte(state, c, return FALSE);
-    if (c == 0xFF) {		/* need to stuff a zero byte? */
-      emit_byte(state, 0, return FALSE);
-    }
-    put_buffer <<= 8;
-    put_bits -= 8;
-  }
-
-  state->cur.put_buffer = put_buffer; /* update state variables */
-  state->cur.put_bits = put_bits;
-
-  return TRUE;
+#define PUT_BITS(code, size) { \
+  put_bits += size; \
+  put_buffer = (put_buffer << size) | code; \
 }

+#define CHECKBUF15() { \
+  if (put_bits > 15) { \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+  } \
+}
+
+#define CHECKBUF31() { \
+  if (put_bits > 31) { \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+  } \
+}
+
+#define CHECKBUF47() { \
+  if (put_bits > 47) { \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+    EMIT_BYTE() \
+  } \
+}
+
+#if __WORDSIZE==64 || defined(_WIN64)
+
+#define EMIT_BITS(code, size) { \
+  CHECKBUF47() \
+  PUT_BITS(code, size) \
+}
+
+#define EMIT_CODE(code, size) { \
+  temp2 &= (((INT32) 1)<<nbits) - 1; \
+  CHECKBUF31() \
+  PUT_BITS(code, size) \
+  PUT_BITS(temp2, nbits) \
+ }
+
+#else
+
+#define EMIT_BITS(code, size) { \
+  PUT_BITS(code, size) \
+  CHECKBUF15() \
+}
+
+#define EMIT_CODE(code, size) { \
+  temp2 &= (((INT32) 1)<<nbits) - 1; \
+  PUT_BITS(code, size) \
+  CHECKBUF15() \
+  PUT_BITS(temp2, nbits) \
+  CHECKBUF15() \
+ }
+
+#endif
+
+
+#define BUFSIZE (DCTSIZE2 * 2)
+
+#define LOAD_BUFFER() { \
+  if (state->free_in_buffer < BUFSIZE) { \
+    localbuf = 1; \
+    buffer = _buffer; \
+  } \
+  else buffer = state->next_output_byte; \
+ }
+
+#define STORE_BUFFER() { \
+  if (localbuf) { \
+    bytes = buffer - _buffer; \
+    buffer = _buffer; \
+    while (bytes > 0) { \
+      bytestocopy = min(bytes, state->free_in_buffer); \
+      MEMCOPY(state->next_output_byte, buffer, bytestocopy); \
+      state->next_output_byte += bytestocopy; \
+      buffer += bytestocopy; \
+      state->free_in_buffer -= bytestocopy; \
+      if (state->free_in_buffer == 0) \
+        if (! dump_buffer(state)) return FALSE; \
+      bytes -= bytestocopy; \
+    } \
+  } \
+  else { \
+    state->free_in_buffer -= (buffer - state->next_output_byte); \
+    state->next_output_byte = buffer; \
+  } \
+ }
+

 LOCAL(boolean)
 flush_bits (working_state * state)
 {
-  if (! emit_bits(state, 0x7F, 7)) /* fill any partial byte with ones */
-    return FALSE;
+  JOCTET _buffer[BUFSIZE], *buffer;
+  size_t put_buffer;  int put_bits;
+  size_t bytes, bytestocopy;  int localbuf = 0;
+
+  put_buffer = state->cur.put_buffer;
+  put_bits = state->cur.put_bits;
+  LOAD_BUFFER()
+
+  /* fill any partial byte with ones */
+  PUT_BITS(0x7F, 7)
+  while (put_bits >= 8) EMIT_BYTE()
+
  state->cur.put_buffer = 0;	/* and reset bit-buffer to empty */
  state->cur.put_bits = 0;
+  STORE_BUFFER()
+
  return TRUE;
 }

@ -352,91 +453,108 @@ LOCAL(boolean)
 encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val,
 		  c_derived_tbl *dctbl, c_derived_tbl *actbl)
 {
-  register int temp, temp2;
-  register int nbits;
-  register int k, r, i;
-  
+  int temp, temp2, temp3;
+  int nbits;
+  int r, code, size;
+  JOCTET _buffer[BUFSIZE], *buffer;
+  size_t put_buffer;  int put_bits;
+  int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0];
+  size_t bytes, bytestocopy;  int localbuf = 0;
+
+  put_buffer = state->cur.put_buffer;
+  put_bits = state->cur.put_bits;
+  LOAD_BUFFER()
+
  /* Encode the DC coefficient difference per section F.1.2.1 */
  
  temp = temp2 = block[0] - last_dc_val;

-  if (temp < 0) {
-    temp = -temp;		/* temp is abs value of input */
-    /* For a negative input, want temp2 = bitwise complement of abs(input) */
-    /* This code assumes we are on a two's complement machine */
-    temp2--;
-  }
-  
+ /* This is a well-known technique for obtaining the absolute value without a
+  * branch.  It is derived from an assembly language technique presented in
+  * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by
+  * Agner Fog.
+  */
+  temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
+  temp ^= temp3;
+  temp -= temp3;
+
+  /* For a negative input, want temp2 = bitwise complement of abs(input) */
+  /* This code assumes we are on a two's complement machine */
+  temp2 += temp3;
+
  /* Find the number of bits needed for the magnitude of the coefficient */
-  nbits = 0;
-  while (temp) {
-    nbits++;
-    temp >>= 1;
-  }
-  /* Check for out-of-range coefficient values.
-   * Since we're encoding a difference, the range limit is twice as much.
-   */
-  if (nbits > MAX_COEF_BITS+1)
-    ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
-  
+  nbits = jpeg_nbits_table[temp];
+
  /* Emit the Huffman-coded symbol for the number of bits */
-  if (! emit_bits(state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits]))
-    return FALSE;
+  code = dctbl->ehufco[nbits];
+  size = dctbl->ehufsi[nbits];
+  PUT_BITS(code, size)
+  CHECKBUF15()
+
+  /* Mask off any extra bits in code */
+  temp2 &= (((INT32) 1)<<nbits) - 1;

  /* Emit that number of bits of the value, if positive, */
  /* or the complement of its magnitude, if negative. */
-  if (nbits)			/* emit_bits rejects calls with size 0 */
-    if (! emit_bits(state, (unsigned int) temp2, nbits))
-      return FALSE;
+  PUT_BITS(temp2, nbits)
+  CHECKBUF15()

  /* Encode the AC coefficients per section F.1.2.2 */
  
  r = 0;			/* r = run length of zeros */
-  
-  for (k = 1; k < DCTSIZE2; k++) {
-    if ((temp = block[jpeg_natural_order[k]]) == 0) {
-      r++;
-    } else {
-      /* if run length > 15, must emit special run-length-16 codes (0xF0) */
-      while (r > 15) {
-	if (! emit_bits(state, actbl->ehufco[0xF0], actbl->ehufsi[0xF0]))
-	  return FALSE;
-	r -= 16;
-      }

-      temp2 = temp;
-      if (temp < 0) {
-	temp = -temp;		/* temp is abs value of input */
-	/* This code assumes we are on a two's complement machine */
-	temp2--;
-      }
-      
-      /* Find the number of bits needed for the magnitude of the coefficient */
-      nbits = 1;		/* there must be at least one 1 bit */
-      while ((temp >>= 1))
-	nbits++;
-      /* Check for out-of-range coefficient values */
-      if (nbits > MAX_COEF_BITS)
-	ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
-      
-      /* Emit Huffman symbol for run length / number of bits */
-      i = (r << 4) + nbits;
-      if (! emit_bits(state, actbl->ehufco[i], actbl->ehufsi[i]))
-	return FALSE;
+/* Manually unroll the k loop to eliminate the counter variable.  This
+ * improves performance greatly on systems with a limited number of
+ * registers (such as x86.)
+ */
+#define kloop(jpeg_natural_order_of_k) {  \
+  if ((temp = block[jpeg_natural_order_of_k]) == 0) { \
+    r++; \
+  } else { \
+    temp2 = temp; \
+    /* Branch-less absolute value, bitwise complement, etc., same as above */ \
+    temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \
+    temp ^= temp3; \
+    temp -= temp3; \
+    temp2 += temp3; \
+    nbits = jpeg_nbits_table[temp]; \
+    /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
+    while (r > 15) { \
+      EMIT_BITS(code_0xf0, size_0xf0) \
+      r -= 16; \
+    } \
+    /* Emit Huffman symbol for run length / number of bits */ \
+    temp3 = (r << 4) + nbits;  \
+    code = actbl->ehufco[temp3]; \
+    size = actbl->ehufsi[temp3]; \
+    EMIT_CODE(code, size) \
+    r = 0;  \
+  } \
+}

-      /* Emit that number of bits of the value, if positive, */
-      /* or the complement of its magnitude, if negative. */
-      if (! emit_bits(state, (unsigned int) temp2, nbits))
-	return FALSE;
-      
-      r = 0;
-    }
-  }
+  /* One iteration for each value in jpeg_natural_order[] */
+  kloop(1);   kloop(8);   kloop(16);  kloop(9);   kloop(2);   kloop(3);
+  kloop(10);  kloop(17);  kloop(24);  kloop(32);  kloop(25);  kloop(18);
+  kloop(11);  kloop(4);   kloop(5);   kloop(12);  kloop(19);  kloop(26);
+  kloop(33);  kloop(40);  kloop(48);  kloop(41);  kloop(34);  kloop(27);
+  kloop(20);  kloop(13);  kloop(6);   kloop(7);   kloop(14);  kloop(21);
+  kloop(28);  kloop(35);  kloop(42);  kloop(49);  kloop(56);  kloop(57);
+  kloop(50);  kloop(43);  kloop(36);  kloop(29);  kloop(22);  kloop(15);
+  kloop(23);  kloop(30);  kloop(37);  kloop(44);  kloop(51);  kloop(58);
+  kloop(59);  kloop(52);  kloop(45);  kloop(38);  kloop(31);  kloop(39);
+  kloop(46);  kloop(53);  kloop(60);  kloop(61);  kloop(54);  kloop(47);
+  kloop(55);  kloop(62);  kloop(63);

  /* If the last coef(s) were zero, emit an end-of-block code */
-  if (r > 0)
-    if (! emit_bits(state, actbl->ehufco[0], actbl->ehufsi[0]))
-      return FALSE;
+  if (r > 0) {
+    code = actbl->ehufco[0];
+    size = actbl->ehufsi[0];
+    EMIT_BITS(code, size)
+  }
+
+  state->cur.put_buffer = put_buffer;
+  state->cur.put_bits = put_bits;
+  STORE_BUFFER()

  return TRUE;
 }
--- a/media/libjpeg/jcmainct.c
+++ b/media/libjpeg/jcmainct.c
@ -68,32 +68,32 @@ METHODDEF(void) process_data_buffer_main
 METHODDEF(void)
 start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;

  /* Do nothing in raw-data mode. */
  if (cinfo->raw_data_in)
    return;

-  main->cur_iMCU_row = 0;	/* initialize counters */
-  main->rowgroup_ctr = 0;
-  main->suspended = FALSE;
-  main->pass_mode = pass_mode;	/* save mode for use by process_data */
+  main_ptr->cur_iMCU_row = 0;	/* initialize counters */
+  main_ptr->rowgroup_ctr = 0;
+  main_ptr->suspended = FALSE;
+  main_ptr->pass_mode = pass_mode;	/* save mode for use by process_data */

  switch (pass_mode) {
  case JBUF_PASS_THRU:
 #ifdef FULL_MAIN_BUFFER_SUPPORTED
-    if (main->whole_image[0] != NULL)
+    if (main_ptr->whole_image[0] != NULL)
      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
 #endif
-    main->pub.process_data = process_data_simple_main;
+    main_ptr->pub.process_data = process_data_simple_main;
    break;
 #ifdef FULL_MAIN_BUFFER_SUPPORTED
  case JBUF_SAVE_SOURCE:
  case JBUF_CRANK_DEST:
  case JBUF_SAVE_AND_PASS:
-    if (main->whole_image[0] == NULL)
+    if (main_ptr->whole_image[0] == NULL)
      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
-    main->pub.process_data = process_data_buffer_main;
+    main_ptr->pub.process_data = process_data_buffer_main;
    break;
 #endif
  default:
@ -114,46 +114,46 @@ process_data_simple_main (j_compress_ptr cinfo,
 			  JSAMPARRAY input_buf, JDIMENSION *in_row_ctr,
 			  JDIMENSION in_rows_avail)
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;

-  while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
+  while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) {
    /* Read input data if we haven't filled the main buffer yet */
-    if (main->rowgroup_ctr < DCTSIZE)
+    if (main_ptr->rowgroup_ctr < DCTSIZE)
      (*cinfo->prep->pre_process_data) (cinfo,
 					input_buf, in_row_ctr, in_rows_avail,
-					main->buffer, &main->rowgroup_ctr,
+					main_ptr->buffer, &main_ptr->rowgroup_ctr,
 					(JDIMENSION) DCTSIZE);

    /* If we don't have a full iMCU row buffered, return to application for
     * more data.  Note that preprocessor will always pad to fill the iMCU row
     * at the bottom of the image.
     */
-    if (main->rowgroup_ctr != DCTSIZE)
+    if (main_ptr->rowgroup_ctr != DCTSIZE)
      return;

    /* Send the completed row to the compressor */
-    if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
+    if (! (*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) {
      /* If compressor did not consume the whole row, then we must need to
       * suspend processing and return to the application.  In this situation
       * we pretend we didn't yet consume the last input row; otherwise, if
       * it happened to be the last row of the image, the application would
       * think we were done.
       */
-      if (! main->suspended) {
+      if (! main_ptr->suspended) {
 	(*in_row_ctr)--;
-	main->suspended = TRUE;
+	main_ptr->suspended = TRUE;
      }
      return;
    }
    /* We did finish the row.  Undo our little suspension hack if a previous
     * call suspended; then mark the main buffer empty.
     */
-    if (main->suspended) {
+    if (main_ptr->suspended) {
      (*in_row_ctr)++;
-      main->suspended = FALSE;
+      main_ptr->suspended = FALSE;
    }
-    main->rowgroup_ctr = 0;
-    main->cur_iMCU_row++;
+    main_ptr->rowgroup_ctr = 0;
+    main_ptr->cur_iMCU_row++;
  }
 }

@ -173,22 +173,22 @@ process_data_buffer_main (j_compress_ptr cinfo,
  my_main_ptr main = (my_main_ptr) cinfo->main;
  int ci;
  jpeg_component_info *compptr;
-  boolean writing = (main->pass_mode != JBUF_CRANK_DEST);
+  boolean writing = (main_ptr->pass_mode != JBUF_CRANK_DEST);

-  while (main->cur_iMCU_row < cinfo->total_iMCU_rows) {
+  while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) {
    /* Realign the virtual buffers if at the start of an iMCU row. */
-    if (main->rowgroup_ctr == 0) {
+    if (main_ptr->rowgroup_ctr == 0) {
      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
 	   ci++, compptr++) {
-	main->buffer[ci] = (*cinfo->mem->access_virt_sarray)
-	  ((j_common_ptr) cinfo, main->whole_image[ci],
-	   main->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE),
+	main_ptr->buffer[ci] = (*cinfo->mem->access_virt_sarray)
+	  ((j_common_ptr) cinfo, main_ptr->whole_image[ci],
+	   main_ptr->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE),
 	   (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing);
      }
      /* In a read pass, pretend we just read some source data. */
      if (! writing) {
 	*in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE;
-	main->rowgroup_ctr = DCTSIZE;
+	main_ptr->rowgroup_ctr = DCTSIZE;
      }
    }

@ -197,40 +197,40 @@ process_data_buffer_main (j_compress_ptr cinfo,
    if (writing) {
      (*cinfo->prep->pre_process_data) (cinfo,
 					input_buf, in_row_ctr, in_rows_avail,
-					main->buffer, &main->rowgroup_ctr,
+					main_ptr->buffer, &main_ptr->rowgroup_ctr,
 					(JDIMENSION) DCTSIZE);
      /* Return to application if we need more data to fill the iMCU row. */
-      if (main->rowgroup_ctr < DCTSIZE)
+      if (main_ptr->rowgroup_ctr < DCTSIZE)
 	return;
    }

    /* Emit data, unless this is a sink-only pass. */
-    if (main->pass_mode != JBUF_SAVE_SOURCE) {
-      if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) {
+    if (main_ptr->pass_mode != JBUF_SAVE_SOURCE) {
+      if (! (*cinfo->coef->compress_data) (cinfo, main_ptr->buffer)) {
 	/* If compressor did not consume the whole row, then we must need to
 	 * suspend processing and return to the application.  In this situation
 	 * we pretend we didn't yet consume the last input row; otherwise, if
 	 * it happened to be the last row of the image, the application would
 	 * think we were done.
 	 */
-	if (! main->suspended) {
+	if (! main_ptr->suspended) {
 	  (*in_row_ctr)--;
-	  main->suspended = TRUE;
+	  main_ptr->suspended = TRUE;
 	}
 	return;
      }
      /* We did finish the row.  Undo our little suspension hack if a previous
       * call suspended; then mark the main buffer empty.
       */
-      if (main->suspended) {
+      if (main_ptr->suspended) {
 	(*in_row_ctr)++;
-	main->suspended = FALSE;
+	main_ptr->suspended = FALSE;
      }
    }

    /* If get here, we are done with this iMCU row.  Mark buffer empty. */
-    main->rowgroup_ctr = 0;
-    main->cur_iMCU_row++;
+    main_ptr->rowgroup_ctr = 0;
+    main_ptr->cur_iMCU_row++;
  }
 }

@ -244,15 +244,15 @@ process_data_buffer_main (j_compress_ptr cinfo,
 GLOBAL(void)
 jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
 {
-  my_main_ptr main;
+  my_main_ptr main_ptr;
  int ci;
  jpeg_component_info *compptr;

-  main = (my_main_ptr)
+  main_ptr = (my_main_ptr)
    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				SIZEOF(my_main_controller));
-  cinfo->main = (struct jpeg_c_main_controller *) main;
-  main->pub.start_pass = start_pass_main;
+  cinfo->main = (struct jpeg_c_main_controller *) main_ptr;
+  main_ptr->pub.start_pass = start_pass_main;

  /* We don't need to create a buffer in raw-data mode. */
  if (cinfo->raw_data_in)
@ -267,7 +267,7 @@ jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
    /* Note we pad the bottom to a multiple of the iMCU height */
    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
 	 ci++, compptr++) {
-      main->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
+      main_ptr->whole_image[ci] = (*cinfo->mem->request_virt_sarray)
 	((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE,
 	 compptr->width_in_blocks * DCTSIZE,
 	 (JDIMENSION) jround_up((long) compptr->height_in_blocks,
@ -279,12 +279,12 @@ jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer)
 #endif
  } else {
 #ifdef FULL_MAIN_BUFFER_SUPPORTED
-    main->whole_image[0] = NULL; /* flag for no virtual arrays */
+    main_ptr->whole_image[0] = NULL; /* flag for no virtual arrays */
 #endif
    /* Allocate a strip buffer for each component */
    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
 	 ci++, compptr++) {
-      main->buffer[ci] = (*cinfo->mem->alloc_sarray)
+      main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray)
 	((j_common_ptr) cinfo, JPOOL_IMAGE,
 	 compptr->width_in_blocks * DCTSIZE,
 	 (JDIMENSION) (compptr->v_samp_factor * DCTSIZE));
--- a/media/libjpeg/jcmaster.c
+++ b/media/libjpeg/jcmaster.c
@ -75,7 +75,9 @@ initial_setup (j_compress_ptr cinfo, boolean transcode_only)
  JDIMENSION jd_samplesperrow;

 #if JPEG_LIB_VERSION >= 70
+#if JPEG_LIB_VERSION >= 80
  if (!transcode_only)
+#endif
    jpeg_calc_jpeg_dimensions(cinfo);
 #endif

--- a/media/libjpeg/jconfig.h
+++ b/media/libjpeg/jconfig.h
@ -4,7 +4,10 @@
 /* Export libjpeg v6.2's ABI. */
 #define JPEG_LIB_VERSION 62

-/* Define if your compiler supports prototypes */
+/* libjpeg-turbo version */
+#define LIBJPEG_TURBO_VERSION 1.2.0
+
+/* Compiler supports function prototypes. */
 #define HAVE_PROTOTYPES 1

 /* Define to 1 if you have the <stddef.h> header file. */
@ -13,25 +16,25 @@
 /* Define to 1 if you have the <stdlib.h> header file. */
 #define HAVE_STDLIB_H 1

-/* Define to 1 if the system has the type `unsigned char'. */
+/* Compiler supports 'unsigned char'. */
 #define HAVE_UNSIGNED_CHAR 1

-/* Define to 1 if the system has the type `unsigned short'. */
+/* Compiler supports 'unsigned short'. */
 #define HAVE_UNSIGNED_SHORT 1

-/* Define if you want use complete types */
+/* Compiler does not support pointers to unspecified structures. */
 /* #define INCOMPLETE_TYPES_BROKEN 1 */

-/* Define if you have BSD-like bzero and bcopy */
+/* Compiler has <strings.h> rather than standard <string.h>. */
 /* #undef NEED_BSD_STRINGS */

-/* Define if you need short function names */
+/* Linker requires that global names be unique in first 15 characters. */
 /* #undef NEED_SHORT_EXTERNAL_NAMES */

-/* Define if you have sys/types.h */
+/* Need to include <sys/types.h> in order to obtain size_t. */
 #define NEED_SYS_TYPES_H 1

-/* Define if shift is unsigned */
+/* Broken compiler shifts signed values as an unsigned shift. */
 /* #undef RIGHT_SHIFT_IS_UNSIGNED */

 /* Use accelerated SIMD routines. */
@ -45,15 +48,5 @@
 /* Define to empty if `const' does not conform to ANSI C. */
 /* #undef const */

-/* Define to `__inline__' or `__inline' if that's what the C compiler
-   calls it, or to nothing if 'inline' is not supported under any name.  */
-#ifndef __cplusplus
-/* #undef inline */
-#endif
-
 /* Define to `unsigned int' if <sys/types.h> does not define. */
 /* #undef size_t */
-
-/* MOZILLA CHANGE: libjpeg-turbo doesn't define INLINE in its config file, so
- * we define it here. */
-#define INLINE NS_ALWAYS_INLINE
--- a/media/libjpeg/jconfig.h.in
+++ b/media/libjpeg/jconfig.h.in
@ -1,60 +0,0 @@
-/* Version ID for the JPEG library.
- * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60".
- */
-#define JPEG_LIB_VERSION  62	/* Version 6b */
-
-/* Support arithmetic encoding */
-#undef C_ARITH_CODING_SUPPORTED
-
-/* Support arithmetic decoding */
-#undef D_ARITH_CODING_SUPPORTED
-
-/* Define if your compiler supports prototypes */
-#undef HAVE_PROTOTYPES
-
-/* Define to 1 if you have the <stddef.h> header file. */
-#undef HAVE_STDDEF_H
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#undef HAVE_STDLIB_H
-
-/* Define to 1 if the system has the type `unsigned char'. */
-#undef HAVE_UNSIGNED_CHAR
-
-/* Define to 1 if the system has the type `unsigned short'. */
-#undef HAVE_UNSIGNED_SHORT
-
-/* Define if you want use complete types */
-#undef INCOMPLETE_TYPES_BROKEN
-
-/* Define if you have BSD-like bzero and bcopy */
-#undef NEED_BSD_STRINGS
-
-/* Define if you need short function names */
-#undef NEED_SHORT_EXTERNAL_NAMES
-
-/* Define if you have sys/types.h */
-#undef NEED_SYS_TYPES_H
-
-/* Define if shift is unsigned */
-#undef RIGHT_SHIFT_IS_UNSIGNED
-
-/* Use accelerated SIMD routines. */
-#undef WITH_SIMD
-
-/* Define to 1 if type `char' is unsigned and you are not using gcc.  */
-#ifndef __CHAR_UNSIGNED__
-# undef __CHAR_UNSIGNED__
-#endif
-
-/* Define to empty if `const' does not conform to ANSI C. */
-#undef const
-
-/* Define to `__inline__' or `__inline' if that's what the C compiler
-   calls it, or to nothing if 'inline' is not supported under any name.  */
-#ifndef __cplusplus
-#undef inline
-#endif
-
-/* Define to `unsigned int' if <sys/types.h> does not define. */
-#undef size_t
--- a/media/libjpeg/jcparam.c
+++ b/media/libjpeg/jcparam.c
@ -3,7 +3,7 @@
 *
 * Copyright (C) 1991-1998, Thomas G. Lane.
 * Modified 2003-2008 by Guido Vollbeding.
- * Copyright (C) 2009-2010, D. R. Commander.
+ * Copyright (C) 2009-2011, D. R. Commander.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -398,6 +398,10 @@ jpeg_default_colorspace (j_compress_ptr cinfo)
  case JCS_EXT_BGRX:
  case JCS_EXT_XBGR:
  case JCS_EXT_XRGB:
+  case JCS_EXT_RGBA:
+  case JCS_EXT_BGRA:
+  case JCS_EXT_ABGR:
+  case JCS_EXT_ARGB:
    jpeg_set_colorspace(cinfo, JCS_YCbCr);
    break;
  case JCS_YCbCr:
--- a/media/libjpeg/jctrans.c
+++ b/media/libjpeg/jctrans.c
@ -0,0 +1,399 @@
+/*
+ * jctrans.c
+ *
+ * Copyright (C) 1995-1998, Thomas G. Lane.
+ * Modified 2000-2009 by Guido Vollbeding.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains library routines for transcoding compression,
+ * that is, writing raw DCT coefficient arrays to an output JPEG file.
+ * The routines in jcapimin.c will also be needed by a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Forward declarations */
+LOCAL(void) transencode_master_selection
+	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+LOCAL(void) transencode_coef_controller
+	JPP((j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays));
+
+
+/*
+ * Compression initialization for writing raw-coefficient data.
+ * Before calling this, all parameters and a data destination must be set up.
+ * Call jpeg_finish_compress() to actually write the data.
+ *
+ * The number of passed virtual arrays must match cinfo->num_components.
+ * Note that the virtual arrays need not be filled or even realized at
+ * the time write_coefficients is called; indeed, if the virtual arrays
+ * were requested from this compression object's memory manager, they
+ * typically will be realized during this routine and filled afterwards.
+ */
+
+GLOBAL(void)
+jpeg_write_coefficients (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Mark all tables to be written */
+  jpeg_suppress_tables(cinfo, FALSE);
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Perform master selection of active modules */
+  transencode_master_selection(cinfo, coef_arrays);
+  /* Wait for jpeg_finish_compress() call */
+  cinfo->next_scanline = 0;	/* so jpeg_write_marker works */
+  cinfo->global_state = CSTATE_WRCOEFS;
+}
+
+
+/*
+ * Initialize the compression object with default parameters,
+ * then copy from the source object all parameters needed for lossless
+ * transcoding.  Parameters that can be varied without loss (such as
+ * scan script and Huffman optimization) are left in their default states.
+ */
+
+GLOBAL(void)
+jpeg_copy_critical_parameters (j_decompress_ptr srcinfo,
+			       j_compress_ptr dstinfo)
+{
+  JQUANT_TBL ** qtblptr;
+  jpeg_component_info *incomp, *outcomp;
+  JQUANT_TBL *c_quant, *slot_quant;
+  int tblno, ci, coefi;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (dstinfo->global_state != CSTATE_START)
+    ERREXIT1(dstinfo, JERR_BAD_STATE, dstinfo->global_state);
+  /* Copy fundamental image dimensions */
+  dstinfo->image_width = srcinfo->image_width;
+  dstinfo->image_height = srcinfo->image_height;
+  dstinfo->input_components = srcinfo->num_components;
+  dstinfo->in_color_space = srcinfo->jpeg_color_space;
+#if JPEG_LIB_VERSION >= 70
+  dstinfo->jpeg_width = srcinfo->output_width;
+  dstinfo->jpeg_height = srcinfo->output_height;
+  dstinfo->min_DCT_h_scaled_size = srcinfo->min_DCT_h_scaled_size;
+  dstinfo->min_DCT_v_scaled_size = srcinfo->min_DCT_v_scaled_size;
+#endif
+  /* Initialize all parameters to default values */
+  jpeg_set_defaults(dstinfo);
+  /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB.
+   * Fix it to get the right header markers for the image colorspace.
+   */
+  jpeg_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
+  dstinfo->data_precision = srcinfo->data_precision;
+  dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
+  /* Copy the source's quantization tables. */
+  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
+    if (srcinfo->quant_tbl_ptrs[tblno] != NULL) {
+      qtblptr = & dstinfo->quant_tbl_ptrs[tblno];
+      if (*qtblptr == NULL)
+	*qtblptr = jpeg_alloc_quant_table((j_common_ptr) dstinfo);
+      MEMCOPY((*qtblptr)->quantval,
+	      srcinfo->quant_tbl_ptrs[tblno]->quantval,
+	      SIZEOF((*qtblptr)->quantval));
+      (*qtblptr)->sent_table = FALSE;
+    }
+  }
+  /* Copy the source's per-component info.
+   * Note we assume jpeg_set_defaults has allocated the dest comp_info array.
+   */
+  dstinfo->num_components = srcinfo->num_components;
+  if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components,
+	     MAX_COMPONENTS);
+  for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info;
+       ci < dstinfo->num_components; ci++, incomp++, outcomp++) {
+    outcomp->component_id = incomp->component_id;
+    outcomp->h_samp_factor = incomp->h_samp_factor;
+    outcomp->v_samp_factor = incomp->v_samp_factor;
+    outcomp->quant_tbl_no = incomp->quant_tbl_no;
+    /* Make sure saved quantization table for component matches the qtable
+     * slot.  If not, the input file re-used this qtable slot.
+     * IJG encoder currently cannot duplicate this.
+     */
+    tblno = outcomp->quant_tbl_no;
+    if (tblno < 0 || tblno >= NUM_QUANT_TBLS ||
+	srcinfo->quant_tbl_ptrs[tblno] == NULL)
+      ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno);
+    slot_quant = srcinfo->quant_tbl_ptrs[tblno];
+    c_quant = incomp->quant_table;
+    if (c_quant != NULL) {
+      for (coefi = 0; coefi < DCTSIZE2; coefi++) {
+	if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
+	  ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
+      }
+    }
+    /* Note: we do not copy the source's Huffman table assignments;
+     * instead we rely on jpeg_set_colorspace to have made a suitable choice.
+     */
+  }
+  /* Also copy JFIF version and resolution information, if available.
+   * Strictly speaking this isn't "critical" info, but it's nearly
+   * always appropriate to copy it if available.  In particular,
+   * if the application chooses to copy JFIF 1.02 extension markers from
+   * the source file, we need to copy the version to make sure we don't
+   * emit a file that has 1.02 extensions but a claimed version of 1.01.
+   * We will *not*, however, copy version info from mislabeled "2.01" files.
+   */
+  if (srcinfo->saw_JFIF_marker) {
+    if (srcinfo->JFIF_major_version == 1) {
+      dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
+      dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
+    }
+    dstinfo->density_unit = srcinfo->density_unit;
+    dstinfo->X_density = srcinfo->X_density;
+    dstinfo->Y_density = srcinfo->Y_density;
+  }
+}
+
+
+/*
+ * Master selection of compression modules for transcoding.
+ * This substitutes for jcinit.c's initialization of the full compressor.
+ */
+
+LOCAL(void)
+transencode_master_selection (j_compress_ptr cinfo,
+			      jvirt_barray_ptr * coef_arrays)
+{
+  /* Although we don't actually use input_components for transcoding,
+   * jcmaster.c's initial_setup will complain if input_components is 0.
+   */
+  cinfo->input_components = 1;
+  /* Initialize master control (includes parameter checking/processing) */
+  jinit_c_master_control(cinfo, TRUE /* transcode only */);
+
+  /* Entropy encoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+#ifdef C_ARITH_CODING_SUPPORTED
+    jinit_arith_encoder(cinfo);
+#else
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+#endif
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      jinit_phuff_encoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_encoder(cinfo);
+  }
+
+  /* We need a special coefficient buffer controller. */
+  transencode_coef_controller(cinfo, coef_arrays);
+
+  jinit_marker_writer(cinfo);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr) cinfo);
+
+  /* Write the datastream header (SOI, JFIF) immediately.
+   * Frame and scan headers are postponed till later.
+   * This lets application insert special markers after the SOI.
+   */
+  (*cinfo->marker->write_file_header) (cinfo);
+}
+
+
+/*
+ * The rest of this file is a special implementation of the coefficient
+ * buffer controller.  This is similar to jccoefct.c, but it handles only
+ * output from presupplied virtual arrays.  Furthermore, we generate any
+ * dummy padding blocks on-the-fly rather than expecting them to be present
+ * in the arrays.
+ */
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;	/* iMCU row # within image */
+  JDIMENSION mcu_ctr;		/* counts MCUs processed in current row */
+  int MCU_vert_offset;		/* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;	/* number of such rows needed */
+
+  /* Virtual block array for each component. */
+  jvirt_barray_ptr * whole_image;
+
+  /* Workspace for constructing dummy blocks at right/bottom edges. */
+  JBLOCKROW dummy_buffer[C_MAX_BLOCKS_IN_MCU];
+} my_coef_controller;
+
+typedef my_coef_controller * my_coef_ptr;
+
+
+LOCAL(void)
+start_iMCU_row (j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows-1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->mcu_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_coef (j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+
+  if (pass_mode != JBUF_CRANK_DEST)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  coef->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Process some data.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output (j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr) cinfo->coef;
+  JDIMENSION MCU_col_num;	/* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, ci, xindex, yindex, yoffset, blockcnt;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr) cinfo, coef->whole_image[compptr->component_index],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION) compptr->v_samp_factor, FALSE);
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
+	 MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;			/* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+	compptr = cinfo->cur_comp_info[ci];
+	start_col = MCU_col_num * compptr->MCU_width;
+	blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width
+						: compptr->last_col_width;
+	for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+	  if (coef->iMCU_row_num < last_iMCU_row ||
+	      yindex+yoffset < compptr->last_row_height) {
+	    /* Fill in pointers to real blocks in this row */
+	    buffer_ptr = buffer[ci][yindex+yoffset] + start_col;
+	    for (xindex = 0; xindex < blockcnt; xindex++)
+	      MCU_buffer[blkn++] = buffer_ptr++;
+	  } else {
+	    /* At bottom of image, need a whole row of dummy blocks */
+	    xindex = 0;
+	  }
+	  /* Fill in any dummy blocks needed in this row.
+	   * Dummy blocks are filled in the same way as in jccoefct.c:
+	   * all zeroes in the AC entries, DC entries equal to previous
+	   * block's DC value.  The init routine has already zeroed the
+	   * AC entries, so we need only set the DC entries correctly.
+	   */
+	  for (; xindex < compptr->MCU_width; xindex++) {
+	    MCU_buffer[blkn] = coef->dummy_buffer[blkn];
+	    MCU_buffer[blkn][0][0] = MCU_buffer[blkn-1][0][0];
+	    blkn++;
+	  }
+	}
+      }
+      /* Try to write the MCU. */
+      if (! (*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) {
+	/* Suspension forced; update state counters and exit */
+	coef->MCU_vert_offset = yoffset;
+	coef->mcu_ctr = MCU_col_num;
+	return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Initialize coefficient buffer controller.
+ *
+ * Each passed coefficient array must be the right size for that
+ * coefficient: width_in_blocks wide and height_in_blocks high,
+ * with unitheight at least v_samp_factor.
+ */
+
+LOCAL(void)
+transencode_coef_controller (j_compress_ptr cinfo,
+			     jvirt_barray_ptr * coef_arrays)
+{
+  my_coef_ptr coef;
+  JBLOCKROW buffer;
+  int i;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				SIZEOF(my_coef_controller));
+  cinfo->coef = (struct jpeg_c_coef_controller *) coef;
+  coef->pub.start_pass = start_pass_coef;
+  coef->pub.compress_data = compress_output;
+
+  /* Save pointer to virtual arrays */
+  coef->whole_image = coef_arrays;
+
+  /* Allocate and pre-zero space for dummy DCT blocks. */
+  buffer = (JBLOCKROW)
+    (*cinfo->mem->alloc_large) ((j_common_ptr) cinfo, JPOOL_IMAGE,
+				C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+  jzero_far((void FAR *) buffer, C_MAX_BLOCKS_IN_MCU * SIZEOF(JBLOCK));
+  for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
+    coef->dummy_buffer[i] = buffer + i;
+  }
+}
--- a/media/libjpeg/jdcolext.c
+++ b/media/libjpeg/jdcolext.c
@ -0,0 +1,104 @@
+/*
+ * jdcolext.c
+ *
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Copyright (C) 2009, 2011, D. R. Commander.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains output colorspace conversion routines.
+ */
+
+
+/* This file is included by jdcolor.c */
+
+
+/*
+ * Convert some rows of samples to the output colorspace.
+ *
+ * Note that we change from noninterleaved, one-plane-per-component format
+ * to interleaved-pixel format.  The output buffer is therefore three times
+ * as wide as the input buffer.
+ * A starting row offset is provided only for the input buffer.  The caller
+ * can easily adjust the passed output_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+INLINE
+LOCAL(void)
+ycc_rgb_convert_internal (j_decompress_ptr cinfo,
+                          JSAMPIMAGE input_buf, JDIMENSION input_row,
+                          JSAMPARRAY output_buf, int num_rows)
+{
+  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
+  register int y, cb, cr;
+  register JSAMPROW outptr;
+  register JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  register int * Crrtab = cconvert->Cr_r_tab;
+  register int * Cbbtab = cconvert->Cb_b_tab;
+  register INT32 * Crgtab = cconvert->Cr_g_tab;
+  register INT32 * Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = GETJSAMPLE(inptr0[col]);
+      cb = GETJSAMPLE(inptr1[col]);
+      cr = GETJSAMPLE(inptr2[col]);
+      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
+      outptr[RGB_GREEN] = range_limit[y +
+			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+						 SCALEBITS))];
+      outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
+      /* Set unused byte to 0xFF so it can be interpreted as an opaque */
+      /* alpha channel value */
+#ifdef RGB_ALPHA
+      outptr[RGB_ALPHA] = 0xFF;
+#endif
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Convert grayscale to RGB: just duplicate the graylevel three times.
+ * This is provided to support applications that don't want to cope
+ * with grayscale as a separate case.
+ */
+
+INLINE
+LOCAL(void)
+gray_rgb_convert_internal (j_decompress_ptr cinfo,
+                           JSAMPIMAGE input_buf, JDIMENSION input_row,
+                           JSAMPARRAY output_buf, int num_rows)
+{
+  register JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      /* We can dispense with GETJSAMPLE() here */
+      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
+      /* Set unused byte to 0xFF so it can be interpreted as an opaque */
+      /* alpha channel value */
+#ifdef RGB_ALPHA
+      outptr[RGB_ALPHA] = 0xFF;
+#endif
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
--- a/media/libjpeg/jdcolor.c
+++ b/media/libjpeg/jdcolor.c
@ -3,7 +3,7 @@
 *
 * Copyright (C) 1991-1997, Thomas G. Lane.
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009, D. R. Commander.
+ * Copyright (C) 2009, 2011, D. R. Commander.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -14,6 +14,7 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jsimd.h"
+#include "config.h"


 /* Private subobject */
@ -65,6 +66,107 @@ typedef my_color_deconverter * my_cconvert_ptr;
 #define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))


+/* Include inline routines for colorspace extensions */
+
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+
+#define RGB_RED EXT_RGB_RED
+#define RGB_GREEN EXT_RGB_GREEN
+#define RGB_BLUE EXT_RGB_BLUE
+#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+#define ycc_rgb_convert_internal ycc_extrgb_convert_internal
+#define gray_rgb_convert_internal gray_extrgb_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+
+#define RGB_RED EXT_RGBX_RED
+#define RGB_GREEN EXT_RGBX_GREEN
+#define RGB_BLUE EXT_RGBX_BLUE
+#define RGB_ALPHA 3
+#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+#define ycc_rgb_convert_internal ycc_extrgbx_convert_internal
+#define gray_rgb_convert_internal gray_extrgbx_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+
+#define RGB_RED EXT_BGR_RED
+#define RGB_GREEN EXT_BGR_GREEN
+#define RGB_BLUE EXT_BGR_BLUE
+#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+#define ycc_rgb_convert_internal ycc_extbgr_convert_internal
+#define gray_rgb_convert_internal gray_extbgr_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+
+#define RGB_RED EXT_BGRX_RED
+#define RGB_GREEN EXT_BGRX_GREEN
+#define RGB_BLUE EXT_BGRX_BLUE
+#define RGB_ALPHA 3
+#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+#define ycc_rgb_convert_internal ycc_extbgrx_convert_internal
+#define gray_rgb_convert_internal gray_extbgrx_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+
+#define RGB_RED EXT_XBGR_RED
+#define RGB_GREEN EXT_XBGR_GREEN
+#define RGB_BLUE EXT_XBGR_BLUE
+#define RGB_ALPHA 0
+#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+#define ycc_rgb_convert_internal ycc_extxbgr_convert_internal
+#define gray_rgb_convert_internal gray_extxbgr_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+
+#define RGB_RED EXT_XRGB_RED
+#define RGB_GREEN EXT_XRGB_GREEN
+#define RGB_BLUE EXT_XRGB_BLUE
+#define RGB_ALPHA 0
+#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+#define ycc_rgb_convert_internal ycc_extxrgb_convert_internal
+#define gray_rgb_convert_internal gray_extxrgb_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+
+
 /*
 * Initialize tables for YCC->RGB colorspace conversion.
 */
@ -110,13 +212,6 @@ build_ycc_rgb_table (j_decompress_ptr cinfo)

 /*
 * Convert some rows of samples to the output colorspace.
- *
- * Note that we change from noninterleaved, one-plane-per-component format
- * to interleaved-pixel format.  The output buffer is therefore three times
- * as wide as the input buffer.
- * A starting row offset is provided only for the input buffer.  The caller
- * can easily adjust the passed output_buf value to accommodate any row
- * offset required on that side.
 */

 METHODDEF(void)
@ -124,38 +219,39 @@ ycc_rgb_convert (j_decompress_ptr cinfo,
 		 JSAMPIMAGE input_buf, JDIMENSION input_row,
 		 JSAMPARRAY output_buf, int num_rows)
 {
-  my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
-  register int y, cb, cr;
-  register JSAMPROW outptr;
-  register JSAMPROW inptr0, inptr1, inptr2;
-  register JDIMENSION col;
-  JDIMENSION num_cols = cinfo->output_width;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  register int * Crrtab = cconvert->Cr_r_tab;
-  register int * Cbbtab = cconvert->Cb_b_tab;
-  register INT32 * Crgtab = cconvert->Cr_g_tab;
-  register INT32 * Cbgtab = cconvert->Cb_g_tab;
-  SHIFT_TEMPS
-
-  while (--num_rows >= 0) {
-    inptr0 = input_buf[0][input_row];
-    inptr1 = input_buf[1][input_row];
-    inptr2 = input_buf[2][input_row];
-    input_row++;
-    outptr = *output_buf++;
-    for (col = 0; col < num_cols; col++) {
-      y  = GETJSAMPLE(inptr0[col]);
-      cb = GETJSAMPLE(inptr1[col]);
-      cr = GETJSAMPLE(inptr2[col]);
-      /* Range-limiting is essential due to noise introduced by DCT losses. */
-      outptr[rgb_red[cinfo->out_color_space]] =   range_limit[y + Crrtab[cr]];
-      outptr[rgb_green[cinfo->out_color_space]] = range_limit[y +
-			      ((int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
-						 SCALEBITS))];
-      outptr[rgb_blue[cinfo->out_color_space]] =  range_limit[y + Cbbtab[cb]];
-      outptr += rgb_pixelsize[cinfo->out_color_space];
-    }
+  switch (cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      ycc_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                  num_rows);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      ycc_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    case JCS_EXT_BGR:
+      ycc_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                  num_rows);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      ycc_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      ycc_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      ycc_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    default:
+      ycc_rgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                               num_rows);
+      break;
  }
 }

@ -211,9 +307,7 @@ grayscale_convert (j_decompress_ptr cinfo,


 /*
- * Convert grayscale to RGB: just duplicate the graylevel three times.
- * This is provided to support applications that don't want to cope
- * with grayscale as a separate case.
+ * Convert grayscale to RGB
 */

 METHODDEF(void)
@ -221,22 +315,39 @@ gray_rgb_convert (j_decompress_ptr cinfo,
 		  JSAMPIMAGE input_buf, JDIMENSION input_row,
 		  JSAMPARRAY output_buf, int num_rows)
 {
-  register JSAMPROW inptr, outptr;
-  JSAMPLE *maxinptr;
-  JDIMENSION num_cols = cinfo->output_width;
-  int rindex = rgb_red[cinfo->out_color_space];
-  int gindex = rgb_green[cinfo->out_color_space];
-  int bindex = rgb_blue[cinfo->out_color_space];
-  int rgbstride = rgb_pixelsize[cinfo->out_color_space];
-
-  while (--num_rows >= 0) {
-    inptr = input_buf[0][input_row++];
-    maxinptr = &inptr[num_cols];
-    outptr = *output_buf++;
-    for (; inptr < maxinptr; inptr++, outptr += rgbstride) {
-      /* We can dispense with GETJSAMPLE() here */
-      outptr[rindex] = outptr[gindex] = outptr[bindex] = *inptr;
-    }
+  switch (cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      gray_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      gray_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                    num_rows);
+      break;
+    case JCS_EXT_BGR:
+      gray_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                   num_rows);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      gray_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                    num_rows);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      gray_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                    num_rows);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      gray_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                    num_rows);
+      break;
+    default:
+      gray_rgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                num_rows);
+      break;
  }
 }

@ -369,6 +480,10 @@ jinit_color_deconverter (j_decompress_ptr cinfo)
  case JCS_EXT_BGRX:
  case JCS_EXT_XBGR:
  case JCS_EXT_XRGB:
+  case JCS_EXT_RGBA:
+  case JCS_EXT_BGRA:
+  case JCS_EXT_ABGR:
+  case JCS_EXT_ARGB:
    cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space];
    if (cinfo->jpeg_color_space == JCS_YCbCr) {
      if (jsimd_can_ycc_rgb())
--- a/media/libjpeg/jdhuff.c
+++ b/media/libjpeg/jdhuff.c
@ -2,6 +2,7 @@
 * jdhuff.c
 *
 * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Copyright (C) 2009-2011, D. R. Commander.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -18,6 +19,7 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jdhuff.h"		/* Declarations shared with jdphuff.c */
+#include "jpegcomp.h"


 /*
@ -122,7 +124,7 @@ start_pass_huff_decoder (j_decompress_ptr cinfo)
    if (compptr->component_needed) {
      entropy->dc_needed[blkn] = TRUE;
      /* we don't need the ACs if producing a 1/8th-size image */
-      entropy->ac_needed[blkn] = (compptr->DCT_scaled_size > 1);
+      entropy->ac_needed[blkn] = (compptr->_DCT_scaled_size > 1);
    } else {
      entropy->dc_needed[blkn] = entropy->ac_needed[blkn] = FALSE;
    }
@ -225,6 +227,7 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
      dtbl->maxcode[l] = -1;	/* -1 if no codes of this length */
    }
  }
+  dtbl->valoffset[17] = 0;
  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */

  /* Compute lookahead tables to speed up decoding.
@ -234,7 +237,8 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
   * with that code.
   */

-  MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits));
+   for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++)
+     dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD;

  p = 0;
  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
@ -243,8 +247,7 @@ jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno,
      /* Generate left-justified code followed by all possible bit sequences */
      lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l);
      for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) {
-	dtbl->look_nbits[lookbits] = l;
-	dtbl->look_sym[lookbits] = htbl->huffval[p];
+	dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
 	lookbits++;
      }
    }
@ -389,6 +392,50 @@ jpeg_fill_bit_buffer (bitread_working_state * state,
 }


+/* Macro version of the above, which performs much better but does not
+   handle markers.  We have to hand off any blocks with markers to the
+   slower routines. */
+
+#define GET_BYTE \
+{ \
+  register int c0, c1; \
+  c0 = GETJOCTET(*buffer++); \
+  c1 = GETJOCTET(*buffer); \
+  /* Pre-execute most common case */ \
+  get_buffer = (get_buffer << 8) | c0; \
+  bits_left += 8; \
+  if (c0 == 0xFF) { \
+    /* Pre-execute case of FF/00, which represents an FF data byte */ \
+    buffer++; \
+    if (c1 != 0) { \
+      /* Oops, it's actually a marker indicating end of compressed data. */ \
+      cinfo->unread_marker = c1; \
+      /* Back out pre-execution and fill the buffer with zero bits */ \
+      buffer -= 2; \
+      get_buffer &= ~0xFF; \
+    } \
+  } \
+}
+
+#if __WORDSIZE == 64 || defined(_WIN64)
+
+/* Pre-fetch 48 bytes, because the holding register is 64-bit */
+#define FILL_BIT_BUFFER_FAST \
+  if (bits_left < 16) { \
+    GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE \
+  }
+
+#else
+
+/* Pre-fetch 16 bytes, because the holding register is 32-bit */
+#define FILL_BIT_BUFFER_FAST \
+  if (bits_left < 16) { \
+    GET_BYTE GET_BYTE \
+  }
+
+#endif
+
+
 /*
 * Out-of-line code for Huffman code decoding.
 * See jdhuff.h for info about usage.
@ -438,9 +485,10 @@ jpeg_huff_decode (bitread_working_state * state,
 * On some machines, a shift and add will be faster than a table lookup.
 */

+#define AVOID_TABLES
 #ifdef AVOID_TABLES

-#define HUFF_EXTEND(x,s)  ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x))
+#define HUFF_EXTEND(x,s)  ((x) + ((((x) - (1<<((s)-1))) >> 31) & (((-1)<<(s)) + 1)))

 #else

@ -498,6 +546,187 @@ process_restart (j_decompress_ptr cinfo)
 }


+LOCAL(boolean)
+decode_mcu_slow (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  BITREAD_STATE_VARS;
+  int blkn;
+  savable_state state;
+  /* Outer loop handles each block in the MCU */
+
+  /* Load up working state */
+  BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+  ASSIGN_STATE(state, entropy->saved);
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    JBLOCKROW block = MCU_data[blkn];
+    d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
+    d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
+    register int s, k, r;
+
+    /* Decode a single block's worth of coefficients */
+
+    /* Section F.2.2.1: decode the DC coefficient difference */
+    HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
+    if (s) {
+      CHECK_BIT_BUFFER(br_state, s, return FALSE);
+      r = GET_BITS(s);
+      s = HUFF_EXTEND(r, s);
+    }
+
+    if (entropy->dc_needed[blkn]) {
+      /* Convert DC difference to actual value, update last_dc_val */
+      int ci = cinfo->MCU_membership[blkn];
+      s += state.last_dc_val[ci];
+      state.last_dc_val[ci] = s;
+      /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
+      (*block)[0] = (JCOEF) s;
+    }
+
+    if (entropy->ac_needed[blkn]) {
+
+      /* Section F.2.2.2: decode the AC coefficients */
+      /* Since zeroes are skipped, output area must be cleared beforehand */
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
+
+        r = s >> 4;
+        s &= 15;
+      
+        if (s) {
+          k += r;
+          CHECK_BIT_BUFFER(br_state, s, return FALSE);
+          r = GET_BITS(s);
+          s = HUFF_EXTEND(r, s);
+          /* Output coefficient in natural (dezigzagged) order.
+           * Note: the extra entries in jpeg_natural_order[] will save us
+           * if k >= DCTSIZE2, which could happen if the data is corrupted.
+           */
+          (*block)[jpeg_natural_order[k]] = (JCOEF) s;
+        } else {
+          if (r != 15)
+            break;
+          k += 15;
+        }
+      }
+
+    } else {
+
+      /* Section F.2.2.2: decode the AC coefficients */
+      /* In this path we just discard the values */
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
+
+        r = s >> 4;
+        s &= 15;
+
+        if (s) {
+          k += r;
+          CHECK_BIT_BUFFER(br_state, s, return FALSE);
+          DROP_BITS(s);
+        } else {
+          if (r != 15)
+            break;
+          k += 15;
+        }
+      }
+    }
+  }
+
+  /* Completed MCU, so update state */
+  BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+  ASSIGN_STATE(entropy->saved, state);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+decode_mcu_fast (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
+  BITREAD_STATE_VARS;
+  JOCTET *buffer;
+  int blkn;
+  savable_state state;
+  /* Outer loop handles each block in the MCU */
+
+  /* Load up working state */
+  BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
+  buffer = (JOCTET *) br_state.next_input_byte;
+  ASSIGN_STATE(state, entropy->saved);
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    JBLOCKROW block = MCU_data[blkn];
+    d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
+    d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
+    register int s, k, r, l;
+
+    HUFF_DECODE_FAST(s, l, dctbl);
+    if (s) {
+      FILL_BIT_BUFFER_FAST
+      r = GET_BITS(s);
+      s = HUFF_EXTEND(r, s);
+    }
+
+    if (entropy->dc_needed[blkn]) {
+      int ci = cinfo->MCU_membership[blkn];
+      s += state.last_dc_val[ci];
+      state.last_dc_val[ci] = s;
+      (*block)[0] = (JCOEF) s;
+    }
+
+    if (entropy->ac_needed[blkn]) {
+
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE_FAST(s, l, actbl);
+        r = s >> 4;
+        s &= 15;
+      
+        if (s) {
+          k += r;
+          FILL_BIT_BUFFER_FAST
+          r = GET_BITS(s);
+          s = HUFF_EXTEND(r, s);
+          (*block)[jpeg_natural_order[k]] = (JCOEF) s;
+        } else {
+          if (r != 15) break;
+          k += 15;
+        }
+      }
+
+    } else {
+
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE_FAST(s, l, actbl);
+        r = s >> 4;
+        s &= 15;
+
+        if (s) {
+          k += r;
+          FILL_BIT_BUFFER_FAST
+          DROP_BITS(s);
+        } else {
+          if (r != 15) break;
+          k += 15;
+        }
+      }
+    }
+  }
+
+  if (cinfo->unread_marker != 0) {
+    cinfo->unread_marker = 0;
+    return FALSE;
+  }
+
+  br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte);
+  br_state.next_input_byte = buffer;
+  BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
+  ASSIGN_STATE(entropy->saved, state);
+  return TRUE;
+}
+
+
 /*
 * Decode and return one MCU's worth of Huffman-compressed coefficients.
 * The coefficients are reordered from zigzag order into natural array order,
@ -513,111 +742,39 @@ process_restart (j_decompress_ptr cinfo)
 * this module, since we'll just re-assign them on the next call.)
 */

+#define BUFSIZE (DCTSIZE2 * 2)
+
 METHODDEF(boolean)
 decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
 {
  huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy;
-  int blkn;
-  BITREAD_STATE_VARS;
-  savable_state state;
+  int usefast = 1;

  /* Process restart marker if needed; may have to suspend */
  if (cinfo->restart_interval) {
    if (entropy->restarts_to_go == 0)
      if (! process_restart(cinfo))
 	return FALSE;
+    usefast = 0;
  }

+  if (cinfo->src->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU
+    || cinfo->unread_marker != 0)
+    usefast = 0;
+
  /* If we've run out of data, just leave the MCU set to zeroes.
   * This way, we return uniform gray for the remainder of the segment.
   */
  if (! entropy->pub.insufficient_data) {

-    /* Load up working state */
-    BITREAD_LOAD_STATE(cinfo,entropy->bitstate);
-    ASSIGN_STATE(state, entropy->saved);
-
-    /* Outer loop handles each block in the MCU */
-
-    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
-      JBLOCKROW block = MCU_data[blkn];
-      d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn];
-      d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn];
-      register int s, k, r;
-
-      /* Decode a single block's worth of coefficients */
-
-      /* Section F.2.2.1: decode the DC coefficient difference */
-      HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
-      if (s) {
-	CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	r = GET_BITS(s);
-	s = HUFF_EXTEND(r, s);
-      }
-
-      if (entropy->dc_needed[blkn]) {
-	/* Convert DC difference to actual value, update last_dc_val */
-	int ci = cinfo->MCU_membership[blkn];
-	s += state.last_dc_val[ci];
-	state.last_dc_val[ci] = s;
-	/* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
-	(*block)[0] = (JCOEF) s;
-      }
-
-      if (entropy->ac_needed[blkn]) {
-
-	/* Section F.2.2.2: decode the AC coefficients */
-	/* Since zeroes are skipped, output area must be cleared beforehand */
-	for (k = 1; k < DCTSIZE2; k++) {
-	  HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
-      
-	  r = s >> 4;
-	  s &= 15;
-      
-	  if (s) {
-	    k += r;
-	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	    r = GET_BITS(s);
-	    s = HUFF_EXTEND(r, s);
-	    /* Output coefficient in natural (dezigzagged) order.
-	     * Note: the extra entries in jpeg_natural_order[] will save us
-	     * if k >= DCTSIZE2, which could happen if the data is corrupted.
-	     */
-	    (*block)[jpeg_natural_order[k]] = (JCOEF) s;
-	  } else {
-	    if (r != 15)
-	      break;
-	    k += 15;
-	  }
-	}
-
-      } else {
-
-	/* Section F.2.2.2: decode the AC coefficients */
-	/* In this path we just discard the values */
-	for (k = 1; k < DCTSIZE2; k++) {
-	  HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
-      
-	  r = s >> 4;
-	  s &= 15;
-      
-	  if (s) {
-	    k += r;
-	    CHECK_BIT_BUFFER(br_state, s, return FALSE);
-	    DROP_BITS(s);
-	  } else {
-	    if (r != 15)
-	      break;
-	    k += 15;
-	  }
-	}
-
-      }
+    if (usefast) {
+      if (!decode_mcu_fast(cinfo, MCU_data)) goto use_slow;
+    }
+    else {
+      use_slow:
+      if (!decode_mcu_slow(cinfo, MCU_data)) return FALSE;
    }

-    /* Completed MCU, so update state */
-    BITREAD_SAVE_STATE(cinfo,entropy->bitstate);
-    ASSIGN_STATE(entropy->saved, state);
  }

  /* Account for restart interval (no-op if not using restarts) */
--- a/media/libjpeg/jdhuff.h
+++ b/media/libjpeg/jdhuff.h
@ -2,6 +2,7 @@
 * jdhuff.h
 *
 * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Copyright (C) 2010-2011, D. R. Commander.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -27,7 +28,7 @@ typedef struct {
  /* Basic tables: (element [0] of each array is unused) */
  INT32 maxcode[18];		/* largest code of length k (-1 if none) */
  /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
-  INT32 valoffset[17];		/* huffval[] offset for codes of length k */
+  INT32 valoffset[18];		/* huffval[] offset for codes of length k */
  /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
   * the smallest code of length k; so given a code of length k, the
   * corresponding symbol is huffval[code + valoffset[k]]
@ -36,13 +37,17 @@ typedef struct {
  /* Link to public Huffman table (needed only in jpeg_huff_decode) */
  JHUFF_TBL *pub;

-  /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of
+  /* Lookahead table: indexed by the next HUFF_LOOKAHEAD bits of
   * the input data stream.  If the next Huffman code is no more
   * than HUFF_LOOKAHEAD bits long, we can obtain its length and
-   * the corresponding symbol directly from these tables.
+   * the corresponding symbol directly from this tables.
+   *
+   * The lower 8 bits of each table entry contain the number of
+   * bits in the corresponding Huffman code, or HUFF_LOOKAHEAD + 1
+   * if too long.  The next 8 bits of each entry contain the
+   * symbol.
   */
-  int look_nbits[1<<HUFF_LOOKAHEAD]; /* # bits, or 0 if too long */
-  UINT8 look_sym[1<<HUFF_LOOKAHEAD]; /* symbol, or unused */
+  int lookup[1<<HUFF_LOOKAHEAD];
 } d_derived_tbl;

 /* Expand a Huffman table definition into the derived format */
@ -69,8 +74,17 @@ EXTERN(void) jpeg_make_d_derived_tbl
 * necessary.
 */

+#if __WORDSIZE == 64 || defined(_WIN64)
+
+typedef size_t bit_buf_type;	/* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  64		/* size of buffer in bits */
+
+#else
+
 typedef INT32 bit_buf_type;	/* type of bit-extraction buffer */
-#define BIT_BUF_SIZE  32	/* size of buffer in bits */
+#define BIT_BUF_SIZE  32		/* size of buffer in bits */
+
+#endif

 /* If long is > 32 bits on your machine, and shifting/masking longs is
 * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
@ -183,11 +197,10 @@ EXTERN(boolean) jpeg_fill_bit_buffer
    } \
  } \
  look = PEEK_BITS(HUFF_LOOKAHEAD); \
-  if ((nb = htbl->look_nbits[look]) != 0) { \
+  if ((nb = (htbl->lookup[look] >> HUFF_LOOKAHEAD)) <= HUFF_LOOKAHEAD) { \
    DROP_BITS(nb); \
-    result = htbl->look_sym[look]; \
+    result = htbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1); \
  } else { \
-    nb = HUFF_LOOKAHEAD+1; \
 slowlabel: \
    if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \
 	{ failaction; } \
@ -195,6 +208,26 @@ slowlabel: \
  } \
 }

+#define HUFF_DECODE_FAST(s,nb,htbl) \
+  FILL_BIT_BUFFER_FAST; \
+  s = PEEK_BITS(HUFF_LOOKAHEAD); \
+  s = htbl->lookup[s]; \
+  nb = s >> HUFF_LOOKAHEAD; \
+  /* Pre-execute the common case of nb <= HUFF_LOOKAHEAD */ \
+  DROP_BITS(nb); \
+  s = s & ((1 << HUFF_LOOKAHEAD) - 1); \
+  if (nb > HUFF_LOOKAHEAD) { \
+    /* Equivalent of jpeg_huff_decode() */ \
+    /* Don't use GET_BITS() here because we don't want to modify bits_left */ \
+    s = (get_buffer >> bits_left) & ((1 << (nb)) - 1); \
+    while (s > htbl->maxcode[nb]) { \
+      s <<= 1; \
+      s |= GET_BITS(1); \
+      nb++; \
+    } \
+    s = htbl->pub->huffval[ (int) (s + htbl->valoffset[nb]) & 0xFF ]; \
+  }
+
 /* Out-of-line case for Huffman code fetching */
 EXTERN(int) jpeg_huff_decode
 	JPP((bitread_working_state * state, register bit_buf_type get_buffer,
--- a/media/libjpeg/jdmainct.c
+++ b/media/libjpeg/jdmainct.c
@ -161,7 +161,7 @@ alloc_funny_pointers (j_decompress_ptr cinfo)
 * This is done only once, not once per pass.
 */
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
  int ci, rgroup;
  int M = cinfo->_min_DCT_scaled_size;
  jpeg_component_info *compptr;
@ -170,10 +170,10 @@ alloc_funny_pointers (j_decompress_ptr cinfo)
  /* Get top-level space for component array pointers.
   * We alloc both arrays with one call to save a few cycles.
   */
-  main->xbuffer[0] = (JSAMPIMAGE)
+  main_ptr->xbuffer[0] = (JSAMPIMAGE)
    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				cinfo->num_components * 2 * SIZEOF(JSAMPARRAY));
-  main->xbuffer[1] = main->xbuffer[0] + cinfo->num_components;
+  main_ptr->xbuffer[1] = main_ptr->xbuffer[0] + cinfo->num_components;

  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
       ci++, compptr++) {
@ -186,9 +186,9 @@ alloc_funny_pointers (j_decompress_ptr cinfo)
      (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				  2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW));
    xbuf += rgroup;		/* want one row group at negative offsets */
-    main->xbuffer[0][ci] = xbuf;
+    main_ptr->xbuffer[0][ci] = xbuf;
    xbuf += rgroup * (M + 4);
-    main->xbuffer[1][ci] = xbuf;
+    main_ptr->xbuffer[1][ci] = xbuf;
  }
 }

@ -196,13 +196,13 @@ alloc_funny_pointers (j_decompress_ptr cinfo)
 LOCAL(void)
 make_funny_pointers (j_decompress_ptr cinfo)
 /* Create the funny pointer lists discussed in the comments above.
- * The actual workspace is already allocated (in main->buffer),
+ * The actual workspace is already allocated (in main_ptr->buffer),
 * and the space for the pointer lists is allocated too.
 * This routine just fills in the curiously ordered lists.
 * This will be repeated at the beginning of each pass.
 */
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
  int ci, i, rgroup;
  int M = cinfo->_min_DCT_scaled_size;
  jpeg_component_info *compptr;
@ -212,10 +212,10 @@ make_funny_pointers (j_decompress_ptr cinfo)
       ci++, compptr++) {
    rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
      cinfo->_min_DCT_scaled_size; /* height of a row group of component */
-    xbuf0 = main->xbuffer[0][ci];
-    xbuf1 = main->xbuffer[1][ci];
+    xbuf0 = main_ptr->xbuffer[0][ci];
+    xbuf1 = main_ptr->xbuffer[1][ci];
    /* First copy the workspace pointers as-is */
-    buf = main->buffer[ci];
+    buf = main_ptr->buffer[ci];
    for (i = 0; i < rgroup * (M + 2); i++) {
      xbuf0[i] = xbuf1[i] = buf[i];
    }
@ -242,7 +242,7 @@ set_wraparound_pointers (j_decompress_ptr cinfo)
 * This changes the pointer list state from top-of-image to the normal state.
 */
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
  int ci, i, rgroup;
  int M = cinfo->_min_DCT_scaled_size;
  jpeg_component_info *compptr;
@ -252,8 +252,8 @@ set_wraparound_pointers (j_decompress_ptr cinfo)
       ci++, compptr++) {
    rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
      cinfo->_min_DCT_scaled_size; /* height of a row group of component */
-    xbuf0 = main->xbuffer[0][ci];
-    xbuf1 = main->xbuffer[1][ci];
+    xbuf0 = main_ptr->xbuffer[0][ci];
+    xbuf1 = main_ptr->xbuffer[1][ci];
    for (i = 0; i < rgroup; i++) {
      xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i];
      xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i];
@ -271,7 +271,7 @@ set_bottom_pointers (j_decompress_ptr cinfo)
 * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
 */
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
  int ci, i, rgroup, iMCUheight, rows_left;
  jpeg_component_info *compptr;
  JSAMPARRAY xbuf;
@ -288,12 +288,12 @@ set_bottom_pointers (j_decompress_ptr cinfo)
     * so we need only do it once.
     */
    if (ci == 0) {
-      main->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
+      main_ptr->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1);
    }
    /* Duplicate the last real sample row rgroup*2 times; this pads out the
     * last partial rowgroup and ensures at least one full rowgroup of context.
     */
-    xbuf = main->xbuffer[main->whichptr][ci];
+    xbuf = main_ptr->xbuffer[main_ptr->whichptr][ci];
    for (i = 0; i < rgroup * 2; i++) {
      xbuf[rows_left + i] = xbuf[rows_left-1];
    }
@ -308,27 +308,27 @@ set_bottom_pointers (j_decompress_ptr cinfo)
 METHODDEF(void)
 start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;

  switch (pass_mode) {
  case JBUF_PASS_THRU:
    if (cinfo->upsample->need_context_rows) {
-      main->pub.process_data = process_data_context_main;
+      main_ptr->pub.process_data = process_data_context_main;
      make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
-      main->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
-      main->context_state = CTX_PREPARE_FOR_IMCU;
-      main->iMCU_row_ctr = 0;
+      main_ptr->whichptr = 0;	/* Read first iMCU row into xbuffer[0] */
+      main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
+      main_ptr->iMCU_row_ctr = 0;
    } else {
      /* Simple case with no context needed */
-      main->pub.process_data = process_data_simple_main;
+      main_ptr->pub.process_data = process_data_simple_main;
    }
-    main->buffer_full = FALSE;	/* Mark buffer empty */
-    main->rowgroup_ctr = 0;
+    main_ptr->buffer_full = FALSE;	/* Mark buffer empty */
+    main_ptr->rowgroup_ctr = 0;
    break;
 #ifdef QUANT_2PASS_SUPPORTED
  case JBUF_CRANK_DEST:
    /* For last pass of 2-pass quantization, just crank the postprocessor */
-    main->pub.process_data = process_data_crank_post;
+    main_ptr->pub.process_data = process_data_crank_post;
    break;
 #endif
  default:
@ -348,14 +348,14 @@ process_data_simple_main (j_decompress_ptr cinfo,
 			  JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
 			  JDIMENSION out_rows_avail)
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;
  JDIMENSION rowgroups_avail;

  /* Read input data if we haven't filled the main buffer yet */
-  if (! main->buffer_full) {
-    if (! (*cinfo->coef->decompress_data) (cinfo, main->buffer))
+  if (! main_ptr->buffer_full) {
+    if (! (*cinfo->coef->decompress_data) (cinfo, main_ptr->buffer))
      return;			/* suspension forced, can do nothing more */
-    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+    main_ptr->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
  }

  /* There are always min_DCT_scaled_size row groups in an iMCU row. */
@ -366,14 +366,14 @@ process_data_simple_main (j_decompress_ptr cinfo,
   */

  /* Feed the postprocessor */
-  (*cinfo->post->post_process_data) (cinfo, main->buffer,
-				     &main->rowgroup_ctr, rowgroups_avail,
+  (*cinfo->post->post_process_data) (cinfo, main_ptr->buffer,
+				     &main_ptr->rowgroup_ctr, rowgroups_avail,
 				     output_buf, out_row_ctr, out_rows_avail);

  /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
-  if (main->rowgroup_ctr >= rowgroups_avail) {
-    main->buffer_full = FALSE;
-    main->rowgroup_ctr = 0;
+  if (main_ptr->rowgroup_ctr >= rowgroups_avail) {
+    main_ptr->buffer_full = FALSE;
+    main_ptr->rowgroup_ctr = 0;
  }
 }

@ -388,15 +388,15 @@ process_data_context_main (j_decompress_ptr cinfo,
 			   JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
 			   JDIMENSION out_rows_avail)
 {
-  my_main_ptr main = (my_main_ptr) cinfo->main;
+  my_main_ptr main_ptr = (my_main_ptr) cinfo->main;

  /* Read input data if we haven't filled the main buffer yet */
-  if (! main->buffer_full) {
+  if (! main_ptr->buffer_full) {
    if (! (*cinfo->coef->decompress_data) (cinfo,
-					   main->xbuffer[main->whichptr]))
+					   main_ptr->xbuffer[main_ptr->whichptr]))
      return;			/* suspension forced, can do nothing more */
-    main->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
-    main->iMCU_row_ctr++;	/* count rows received */
+    main_ptr->buffer_full = TRUE;	/* OK, we have an iMCU row to work with */
+    main_ptr->iMCU_row_ctr++;	/* count rows received */
  }

  /* Postprocessor typically will not swallow all the input data it is handed
@ -404,47 +404,47 @@ process_data_context_main (j_decompress_ptr cinfo,
   * to exit and restart.  This switch lets us keep track of how far we got.
   * Note that each case falls through to the next on successful completion.
   */
-  switch (main->context_state) {
+  switch (main_ptr->context_state) {
  case CTX_POSTPONED_ROW:
    /* Call postprocessor using previously set pointers for postponed row */
-    (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
-			&main->rowgroup_ctr, main->rowgroups_avail,
+    (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr],
+			&main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail,
 			output_buf, out_row_ctr, out_rows_avail);
-    if (main->rowgroup_ctr < main->rowgroups_avail)
+    if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
      return;			/* Need to suspend */
-    main->context_state = CTX_PREPARE_FOR_IMCU;
+    main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
    if (*out_row_ctr >= out_rows_avail)
      return;			/* Postprocessor exactly filled output buf */
    /*FALLTHROUGH*/
  case CTX_PREPARE_FOR_IMCU:
    /* Prepare to process first M-1 row groups of this iMCU row */
-    main->rowgroup_ctr = 0;
-    main->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size - 1);
+    main_ptr->rowgroup_ctr = 0;
+    main_ptr->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size - 1);
    /* Check for bottom of image: if so, tweak pointers to "duplicate"
     * the last sample row, and adjust rowgroups_avail to ignore padding rows.
     */
-    if (main->iMCU_row_ctr == cinfo->total_iMCU_rows)
+    if (main_ptr->iMCU_row_ctr == cinfo->total_iMCU_rows)
      set_bottom_pointers(cinfo);
-    main->context_state = CTX_PROCESS_IMCU;
+    main_ptr->context_state = CTX_PROCESS_IMCU;
    /*FALLTHROUGH*/
  case CTX_PROCESS_IMCU:
    /* Call postprocessor using previously set pointers */
-    (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr],
-			&main->rowgroup_ctr, main->rowgroups_avail,
+    (*cinfo->post->post_process_data) (cinfo, main_ptr->xbuffer[main_ptr->whichptr],
+			&main_ptr->rowgroup_ctr, main_ptr->rowgroups_avail,
 			output_buf, out_row_ctr, out_rows_avail);
-    if (main->rowgroup_ctr < main->rowgroups_avail)
+    if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
      return;			/* Need to suspend */
    /* After the first iMCU, change wraparound pointers to normal state */
-    if (main->iMCU_row_ctr == 1)
+    if (main_ptr->iMCU_row_ctr == 1)
      set_wraparound_pointers(cinfo);
    /* Prepare to load new iMCU row using other xbuffer list */
-    main->whichptr ^= 1;	/* 0=>1 or 1=>0 */
-    main->buffer_full = FALSE;
+    main_ptr->whichptr ^= 1;	/* 0=>1 or 1=>0 */
+    main_ptr->buffer_full = FALSE;
    /* Still need to process last row group of this iMCU row, */
    /* which is saved at index M+1 of the other xbuffer */
-    main->rowgroup_ctr = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 1);
-    main->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 2);
-    main->context_state = CTX_POSTPONED_ROW;
+    main_ptr->rowgroup_ctr = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 1);
+    main_ptr->rowgroups_avail = (JDIMENSION) (cinfo->_min_DCT_scaled_size + 2);
+    main_ptr->context_state = CTX_POSTPONED_ROW;
  }
 }

@ -477,15 +477,15 @@ process_data_crank_post (j_decompress_ptr cinfo,
 GLOBAL(void)
 jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
 {
-  my_main_ptr main;
+  my_main_ptr main_ptr;
  int ci, rgroup, ngroups;
  jpeg_component_info *compptr;

-  main = (my_main_ptr)
+  main_ptr = (my_main_ptr)
    (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE,
 				SIZEOF(my_main_controller));
-  cinfo->main = (struct jpeg_d_main_controller *) main;
-  main->pub.start_pass = start_pass_main;
+  cinfo->main = (struct jpeg_d_main_controller *) main_ptr;
+  main_ptr->pub.start_pass = start_pass_main;

  if (need_full_buffer)		/* shouldn't happen */
    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
@ -506,7 +506,7 @@ jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer)
       ci++, compptr++) {
    rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
      cinfo->_min_DCT_scaled_size; /* height of a row group of component */
-    main->buffer[ci] = (*cinfo->mem->alloc_sarray)
+    main_ptr->buffer[ci] = (*cinfo->mem->alloc_sarray)
 			((j_common_ptr) cinfo, JPOOL_IMAGE,
 			 compptr->width_in_blocks * compptr->_DCT_scaled_size,
 			 (JDIMENSION) (rgroup * ngroups));
--- a/media/libjpeg/jdmarker.c
+++ b/media/libjpeg/jdmarker.c
@ -2,6 +2,7 @@
 * jdmarker.c
 *
 * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Copyright (C) 2012, D. R. Commander.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -322,13 +323,16 @@ get_sos (j_decompress_ptr cinfo)

  /* Collect the component-spec parameters */

+  for (i = 0; i < cinfo->num_components; i++)
+    cinfo->cur_comp_info[i] = NULL;
+
  for (i = 0; i < n; i++) {
    INPUT_BYTE(cinfo, cc, return FALSE);
    INPUT_BYTE(cinfo, c, return FALSE);
    
    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
 	 ci++, compptr++) {
-      if (cc == compptr->component_id)
+      if (cc == compptr->component_id && !cinfo->cur_comp_info[ci])
 	goto id_found;
    }

--- a/media/libjpeg/jdmaster.c
+++ b/media/libjpeg/jdmaster.c
@ -2,7 +2,7 @@
 * jdmaster.c
 *
 * Copyright (C) 1991-1997, Thomas G. Lane.
- * Copyright (C) 2009-2010, D. R. Commander.
+ * Copyright (C) 2009-2011, D. R. Commander.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -57,7 +57,11 @@ use_merged_upsample (j_decompress_ptr cinfo)
      cinfo->out_color_space != JCS_EXT_BGR &&
      cinfo->out_color_space != JCS_EXT_BGRX &&
      cinfo->out_color_space != JCS_EXT_XBGR &&
-      cinfo->out_color_space != JCS_EXT_XRGB) ||
+      cinfo->out_color_space != JCS_EXT_XRGB &&
+      cinfo->out_color_space != JCS_EXT_RGBA &&
+      cinfo->out_color_space != JCS_EXT_BGRA &&
+      cinfo->out_color_space != JCS_EXT_ABGR &&
+      cinfo->out_color_space != JCS_EXT_ARGB) ||
      cinfo->out_color_components != rgb_pixelsize[cinfo->out_color_space])
    return FALSE;
  /* and it only handles 2h1v or 2h2v sampling ratios */
@ -209,6 +213,10 @@ jpeg_calc_output_dimensions (j_decompress_ptr cinfo)
  case JCS_EXT_BGRX:
  case JCS_EXT_XBGR:
  case JCS_EXT_XRGB:
+  case JCS_EXT_RGBA:
+  case JCS_EXT_BGRA:
+  case JCS_EXT_ABGR:
+  case JCS_EXT_ARGB:
    cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space];
    break;
  case JCS_YCbCr:
--- a/media/libjpeg/jdmerge.c
+++ b/media/libjpeg/jdmerge.c
@ -3,7 +3,7 @@
 *
 * Copyright (C) 1994-1996, Thomas G. Lane.
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2009, D. R. Commander.
+ * Copyright (C) 2009, 2011, D. R. Commander.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -38,6 +38,7 @@
 #include "jinclude.h"
 #include "jpeglib.h"
 #include "jsimd.h"
+#include "config.h"

 #ifdef UPSAMPLE_MERGING_SUPPORTED

@ -77,6 +78,99 @@ typedef my_upsampler * my_upsample_ptr;
 #define FIX(x)		((INT32) ((x) * (1L<<SCALEBITS) + 0.5))


+/* Include inline routines for colorspace extensions */
+
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+
+#define RGB_RED EXT_RGB_RED
+#define RGB_GREEN EXT_RGB_GREEN
+#define RGB_BLUE EXT_RGB_BLUE
+#define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+#define h2v1_merged_upsample_internal extrgb_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal extrgb_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED EXT_RGBX_RED
+#define RGB_GREEN EXT_RGBX_GREEN
+#define RGB_BLUE EXT_RGBX_BLUE
+#define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+#define h2v1_merged_upsample_internal extrgbx_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal extrgbx_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED EXT_BGR_RED
+#define RGB_GREEN EXT_BGR_GREEN
+#define RGB_BLUE EXT_BGR_BLUE
+#define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+#define h2v1_merged_upsample_internal extbgr_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal extbgr_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED EXT_BGRX_RED
+#define RGB_GREEN EXT_BGRX_GREEN
+#define RGB_BLUE EXT_BGRX_BLUE
+#define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+#define h2v1_merged_upsample_internal extbgrx_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal extbgrx_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED EXT_XBGR_RED
+#define RGB_GREEN EXT_XBGR_GREEN
+#define RGB_BLUE EXT_XBGR_BLUE
+#define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+#define h2v1_merged_upsample_internal extxbgr_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal extxbgr_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED EXT_XRGB_RED
+#define RGB_GREEN EXT_XRGB_GREEN
+#define RGB_BLUE EXT_XRGB_BLUE
+#define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+#define h2v1_merged_upsample_internal extxrgb_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal extxrgb_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+
 /*
 * Initialize tables for YCC->RGB colorspace conversion.
 * This is taken directly from jdcolor.c; see that file for more info.
@ -230,55 +324,39 @@ h2v1_merged_upsample (j_decompress_ptr cinfo,
 		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
 		      JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  register int y, cred, cgreen, cblue;
-  int cb, cr;
-  register JSAMPROW outptr;
-  JSAMPROW inptr0, inptr1, inptr2;
-  JDIMENSION col;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  int * Crrtab = upsample->Cr_r_tab;
-  int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  SHIFT_TEMPS
-
-  inptr0 = input_buf[0][in_row_group_ctr];
-  inptr1 = input_buf[1][in_row_group_ctr];
-  inptr2 = input_buf[2][in_row_group_ctr];
-  outptr = output_buf[0];
-  /* Loop for each pair of output pixels */
-  for (col = cinfo->output_width >> 1; col > 0; col--) {
-    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-    /* Fetch 2 Y values and emit 2 pixels */
-    y  = GETJSAMPLE(*inptr0++);
-    outptr[rgb_red[cinfo->out_color_space]] =   range_limit[y + cred];
-    outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
-    outptr[rgb_blue[cinfo->out_color_space]] =  range_limit[y + cblue];
-    outptr += rgb_pixelsize[cinfo->out_color_space];
-    y  = GETJSAMPLE(*inptr0++);
-    outptr[rgb_red[cinfo->out_color_space]] =   range_limit[y + cred];
-    outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
-    outptr[rgb_blue[cinfo->out_color_space]] =  range_limit[y + cblue];
-    outptr += rgb_pixelsize[cinfo->out_color_space];
-  }
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr0);
-    outptr[rgb_red[cinfo->out_color_space]] =   range_limit[y + cred];
-    outptr[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
-    outptr[rgb_blue[cinfo->out_color_space]] =  range_limit[y + cblue];
+  switch (cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      extrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                           output_buf);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      extrgbx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    case JCS_EXT_BGR:
+      extbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                           output_buf);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      extbgrx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      extxbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      extxrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    default:
+      h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                    output_buf);
+      break;
  }
 }

@ -292,71 +370,39 @@ h2v2_merged_upsample (j_decompress_ptr cinfo,
 		      JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
 		      JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  register int y, cred, cgreen, cblue;
-  int cb, cr;
-  register JSAMPROW outptr0, outptr1;
-  JSAMPROW inptr00, inptr01, inptr1, inptr2;
-  JDIMENSION col;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  int * Crrtab = upsample->Cr_r_tab;
-  int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  SHIFT_TEMPS
-
-  inptr00 = input_buf[0][in_row_group_ctr*2];
-  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
-  inptr1 = input_buf[1][in_row_group_ctr];
-  inptr2 = input_buf[2][in_row_group_ctr];
-  outptr0 = output_buf[0];
-  outptr1 = output_buf[1];
-  /* Loop for each group of output pixels */
-  for (col = cinfo->output_width >> 1; col > 0; col--) {
-    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-    /* Fetch 4 Y values and emit 4 pixels */
-    y  = GETJSAMPLE(*inptr00++);
-    outptr0[rgb_red[cinfo->out_color_space]] =   range_limit[y + cred];
-    outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
-    outptr0[rgb_blue[cinfo->out_color_space]] =  range_limit[y + cblue];
-    outptr0 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr00++);
-    outptr0[rgb_red[cinfo->out_color_space]] =   range_limit[y + cred];
-    outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
-    outptr0[rgb_blue[cinfo->out_color_space]] =  range_limit[y + cblue];
-    outptr0 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr01++);
-    outptr1[rgb_red[cinfo->out_color_space]] =   range_limit[y + cred];
-    outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
-    outptr1[rgb_blue[cinfo->out_color_space]] =  range_limit[y + cblue];
-    outptr1 += RGB_PIXELSIZE;
-    y  = GETJSAMPLE(*inptr01++);
-    outptr1[rgb_red[cinfo->out_color_space]] =   range_limit[y + cred];
-    outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
-    outptr1[rgb_blue[cinfo->out_color_space]] =  range_limit[y + cblue];
-    outptr1 += RGB_PIXELSIZE;
-  }
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr00);
-    outptr0[rgb_red[cinfo->out_color_space]] =   range_limit[y + cred];
-    outptr0[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
-    outptr0[rgb_blue[cinfo->out_color_space]] =  range_limit[y + cblue];
-    y  = GETJSAMPLE(*inptr01);
-    outptr1[rgb_red[cinfo->out_color_space]] =   range_limit[y + cred];
-    outptr1[rgb_green[cinfo->out_color_space]] = range_limit[y + cgreen];
-    outptr1[rgb_blue[cinfo->out_color_space]] =  range_limit[y + cblue];
+  switch (cinfo->out_color_space) {
+    case JCS_EXT_RGB:
+      extrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                           output_buf);
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      extrgbx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    case JCS_EXT_BGR:
+      extbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                           output_buf);
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      extbgrx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      extxbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      extxrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                            output_buf);
+      break;
+    default:
+      h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                    output_buf);
+      break;
  }
 }

--- a/media/libjpeg/jdmrgext.c
+++ b/media/libjpeg/jdmrgext.c
@ -0,0 +1,156 @@
+/*
+ * jdmrgext.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README file.
+ *
+ * This file contains code for merged upsampling/color conversion.
+ */
+
+
+/* This file is included by jdmerge.c */
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+ */
+
+INLINE
+LOCAL(void)
+h2v1_merged_upsample_internal (j_decompress_ptr cinfo,
+                               JSAMPIMAGE input_buf,
+                               JDIMENSION in_row_group_ctr,
+                               JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr;
+  JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr0++);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+    outptr += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr0);
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+  }
+}
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+ */
+
+INLINE
+LOCAL(void)
+h2v2_merged_upsample_internal (j_decompress_ptr cinfo,
+                               JSAMPIMAGE input_buf,
+                               JDIMENSION in_row_group_ctr,
+                               JSAMPARRAY output_buf)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register JSAMPROW outptr0, outptr1;
+  JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register JSAMPLE * range_limit = cinfo->sample_range_limit;
+  int * Crrtab = upsample->Cr_r_tab;
+  int * Cbbtab = upsample->Cb_b_tab;
+  INT32 * Crgtab = upsample->Cr_g_tab;
+  INT32 * Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr*2];
+  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = GETJSAMPLE(*inptr1++);
+    cr = GETJSAMPLE(*inptr2++);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 4 Y values and emit 4 pixels */
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr00++);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+    outptr0 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+    y  = GETJSAMPLE(*inptr01++);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+    outptr1 += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = GETJSAMPLE(*inptr1);
+    cr = GETJSAMPLE(*inptr2);
+    cred = Crrtab[cr];
+    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = GETJSAMPLE(*inptr00);
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+    y  = GETJSAMPLE(*inptr01);
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+  }
+}
--- a/media/libjpeg/jdtrans.c
+++ b/media/libjpeg/jdtrans.c
@ -99,6 +99,11 @@ transdecode_master_selection (j_decompress_ptr cinfo)
  /* This is effectively a buffered-image operation. */
  cinfo->buffered_image = TRUE;

+#if JPEG_LIB_VERSION >= 80
+  /* Compute output image dimensions and related values. */
+  jpeg_core_output_dimensions(cinfo);
+#endif
+
  /* Entropy decoding: either Huffman or arithmetic coding. */
  if (cinfo->arith_code) {
 #ifdef D_ARITH_CODING_SUPPORTED
--- a/media/libjpeg/jerror.h
+++ b/media/libjpeg/jerror.h
@ -40,15 +40,23 @@ typedef enum {
 JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */

 /* For maintenance convenience, list is alphabetical by message code name */
+#if JPEG_LIB_VERSION < 70
 JMESSAGE(JERR_ARITH_NOTIMPL,
 	 "Sorry, arithmetic coding is not implemented")
+#endif
 JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
 JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
 JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
 JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
+#if JPEG_LIB_VERSION >= 70
 JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
+#endif
 JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range")
 JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported")
+#if JPEG_LIB_VERSION >= 70
+JMESSAGE(JERR_BAD_DROP_SAMPLING,
+	 "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
+#endif
 JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
 JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
 JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
@ -95,7 +103,9 @@ JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
 JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
 JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
 JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
+#if JPEG_LIB_VERSION >= 70
 JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
+#endif
 JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported")
 JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
 JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
@ -173,7 +183,9 @@ JMESSAGE(JTRC_UNKNOWN_IDS,
 JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
 JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
 JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
+#if JPEG_LIB_VERSION >= 70
 JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
+#endif
 JMESSAGE(JWRN_BOGUS_PROGRESSION,
 	 "Inconsistent progression sequence for component %d coefficient %d")
 JMESSAGE(JWRN_EXTRANEOUS_DATA,
@ -186,6 +198,13 @@ JMESSAGE(JWRN_MUST_RESYNC,
 	 "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
 JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
 JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
+#if JPEG_LIB_VERSION < 70
+JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
+#if defined(C_ARITH_CODING_SUPPORTED) || defined(D_ARITH_CODING_SUPPORTED)
+JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
+JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
+#endif
+#endif

 #ifdef JMAKE_ENUM_LIST

--- a/media/libjpeg/jmemmgr.c
+++ b/media/libjpeg/jmemmgr.c
@ -37,6 +37,15 @@ extern char * getenv JPP((const char * name));
 #endif


+LOCAL(size_t)
+round_up_pow2 (size_t a, size_t b)
+/* a rounded up to the next multiple of b, i.e. ceil(a/b)*b */
+/* Assumes a >= 0, b > 0, and b is a power of 2 */
+{
+  return ((a + b - 1) & (~(b - 1)));
+}
+
+
 /*
 * Some important notes:
 *   The allocation routines provided here must never return NULL.
@ -265,7 +274,7 @@ alloc_small (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
   * and so that algorithms can straddle outside the proper area up
   * to the next alignment.
   */
-  sizeofobject = jround_up(sizeofobject, ALIGN_SIZE);
+  sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE);

  /* Check for unsatisfiable request (do now to ensure no overflow below) */
  if ((SIZEOF(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK)
@ -354,7 +363,7 @@ alloc_large (j_common_ptr cinfo, int pool_id, size_t sizeofobject)
   * algorithms can straddle outside the proper area up to the next
   * alignment.
   */
-  sizeofobject = jround_up(sizeofobject, ALIGN_SIZE);
+  sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE);

  /* Check for unsatisfiable request (do now to ensure no overflow below) */
  if ((SIZEOF(large_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) > MAX_ALLOC_CHUNK)
@ -420,7 +429,7 @@ alloc_sarray (j_common_ptr cinfo, int pool_id,
  /* Make sure each row is properly aligned */
  if ((ALIGN_SIZE % SIZEOF(JSAMPLE)) != 0)
    out_of_memory(cinfo, 5);	/* safety check */
-  samplesperrow = (JDIMENSION)jround_up(samplesperrow, (2 * ALIGN_SIZE) / SIZEOF(JSAMPLE));
+  samplesperrow = (JDIMENSION)round_up_pow2(samplesperrow, (2 * ALIGN_SIZE) / SIZEOF(JSAMPLE));

  /* Calculate max # of rows allowed in one allocation chunk */
  ltemp = (MAX_ALLOC_CHUNK-SIZEOF(large_pool_hdr)) /
--- a/media/libjpeg/jmorecfg.h
+++ b/media/libjpeg/jmorecfg.h
@ -2,7 +2,7 @@
 * jmorecfg.h
 *
 * Copyright (C) 1991-1997, Thomas G. Lane.
- * Copyright (C) 2009, D. R. Commander.
+ * Copyright (C) 2009, 2011, D. R. Commander.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -242,8 +242,6 @@ typedef int boolean;
 * (You may HAVE to do that if your compiler doesn't like null source files.)
 */

-/* Arithmetic coding is unsupported for legal reasons.  Complaints to IBM. */
-
 /* Capability options common to encoder and decoder: */

 #define DCT_ISLOW_SUPPORTED	/* slow but accurate integer algorithm */
@ -252,7 +250,6 @@ typedef int boolean;

 /* Encoder capability options: */

-#undef  C_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
 #define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
 #define C_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
 #define ENTROPY_OPT_SUPPORTED	    /* Optimization of entropy coding parms? */
@ -268,7 +265,6 @@ typedef int boolean;

 /* Decoder capability options: */

-#undef  D_ARITH_CODING_SUPPORTED    /* Arithmetic coding back end? */
 #define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
 #define D_PROGRESSIVE_SUPPORTED	    /* Progressive JPEG? (Requires MULTISCAN)*/
 #define SAVE_MARKERS_SUPPORTED	    /* jpeg_save_markers() needed? */
@ -302,22 +298,60 @@ typedef int boolean;
 #define RGB_BLUE	2	/* Offset of Blue */
 #define RGB_PIXELSIZE	3	/* JSAMPLEs per RGB scanline element */

-#define JPEG_NUMCS 12
+#define JPEG_NUMCS 16
+
+#define EXT_RGB_RED        0
+#define EXT_RGB_GREEN      1
+#define EXT_RGB_BLUE       2
+#define EXT_RGB_PIXELSIZE  3
+
+#define EXT_RGBX_RED       0
+#define EXT_RGBX_GREEN     1
+#define EXT_RGBX_BLUE      2
+#define EXT_RGBX_PIXELSIZE 4
+
+#define EXT_BGR_RED        2
+#define EXT_BGR_GREEN      1
+#define EXT_BGR_BLUE       0
+#define EXT_BGR_PIXELSIZE  3
+
+#define EXT_BGRX_RED       2
+#define EXT_BGRX_GREEN     1
+#define EXT_BGRX_BLUE      0
+#define EXT_BGRX_PIXELSIZE 4
+
+#define EXT_XBGR_RED       3
+#define EXT_XBGR_GREEN     2
+#define EXT_XBGR_BLUE      1
+#define EXT_XBGR_PIXELSIZE 4
+
+#define EXT_XRGB_RED       1
+#define EXT_XRGB_GREEN     2
+#define EXT_XRGB_BLUE      3
+#define EXT_XRGB_PIXELSIZE 4

 static const int rgb_red[JPEG_NUMCS] = {
-	-1, -1, RGB_RED, -1, -1, -1, 0, 0, 2, 2, 3, 1
+  -1, -1, RGB_RED, -1, -1, -1, EXT_RGB_RED, EXT_RGBX_RED,
+  EXT_BGR_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED,
+  EXT_RGBX_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED
 };

 static const int rgb_green[JPEG_NUMCS] = {
-	-1, -1, RGB_GREEN, -1, -1, -1, 1, 1, 1, 1, 2, 2
+  -1, -1, RGB_GREEN, -1, -1, -1, EXT_RGB_GREEN, EXT_RGBX_GREEN,
+  EXT_BGR_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN,
+  EXT_RGBX_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN
 };

 static const int rgb_blue[JPEG_NUMCS] = {
-	-1, -1, RGB_BLUE, -1, -1, -1, 2, 2, 0, 0, 1, 3
+  -1, -1, RGB_BLUE, -1, -1, -1, EXT_RGB_BLUE, EXT_RGBX_BLUE,
+  EXT_BGR_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE,
+  EXT_RGBX_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE
 };

 static const int rgb_pixelsize[JPEG_NUMCS] = {
-	-1, -1, RGB_PIXELSIZE, -1, -1, -1, 3, 4, 3, 4, 4, 4
+  -1, -1, RGB_PIXELSIZE, -1, -1, -1, EXT_RGB_PIXELSIZE, EXT_RGBX_PIXELSIZE,
+  EXT_BGR_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE,
+  EXT_RGBX_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE
 };

 /* Definitions for speed-related optimizations. */
--- a/media/libjpeg/jpegint.h
+++ b/media/libjpeg/jpegint.h
@ -375,7 +375,7 @@ EXTERN(void) jinit_memory_mgr JPP((j_common_ptr cinfo));

 /* Utility routines in jutils.c */
 EXTERN(long) jdiv_round_up JPP((long a, long b));
-EXTERN(size_t) jround_up JPP((size_t a, size_t b));
+EXTERN(long) jround_up JPP((long a, long b));
 EXTERN(void) jcopy_sample_rows JPP((JSAMPARRAY input_array, int source_row,
 				    JSAMPARRAY output_array, int dest_row,
 				    int num_rows, JDIMENSION num_cols));
--- a/media/libjpeg/jpeglib.h
+++ b/media/libjpeg/jpeglib.h
@ -3,7 +3,7 @@
 *
 * Copyright (C) 1991-1998, Thomas G. Lane.
 * Modified 2002-2009 by Guido Vollbeding.
- * Copyright (C) 2009-2010, D. R. Commander.
+ * Copyright (C) 2009-2011, D. R. Commander.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -211,12 +211,13 @@ struct jpeg_marker_struct {
 /* Known color spaces. */

 #define JCS_EXTENSIONS 1
+#define JCS_ALPHA_EXTENSIONS 1

 typedef enum {
 	JCS_UNKNOWN,		/* error/unspecified */
 	JCS_GRAYSCALE,		/* monochrome */
 	JCS_RGB,		/* red/green/blue as specified by the RGB_RED, RGB_GREEN,
-                 RGB_BLUE, and RGB_PIXELSIZE macros */
+				   RGB_BLUE, and RGB_PIXELSIZE macros */
 	JCS_YCbCr,		/* Y/Cb/Cr (also known as YUV) */
 	JCS_CMYK,		/* C/M/Y/K */
 	JCS_YCCK,		/* Y/Cb/Cr/K */
@ -225,7 +226,18 @@ typedef enum {
 	JCS_EXT_BGR,		/* blue/green/red */
 	JCS_EXT_BGRX,		/* blue/green/red/x */
 	JCS_EXT_XBGR,		/* x/blue/green/red */
-	JCS_EXT_XRGB		/* x/red/green/blue */
+	JCS_EXT_XRGB,		/* x/red/green/blue */
+	/* When out_color_space it set to JCS_EXT_RGBX, JCS_EXT_BGRX,
+	   JCS_EXT_XBGR, or JCS_EXT_XRGB during decompression, the X byte is
+	   undefined, and in order to ensure the best performance,
+	   libjpeg-turbo can set that byte to whatever value it wishes.  Use
+	   the following colorspace constants to ensure that the X byte is set
+	   to 0xFF, so that it can be interpreted as an opaque alpha
+	   channel. */
+	JCS_EXT_RGBA,		/* red/green/blue/alpha */
+	JCS_EXT_BGRA,		/* blue/green/red/alpha */
+	JCS_EXT_ABGR,		/* alpha/blue/green/red */
+	JCS_EXT_ARGB		/* alpha/red/green/blue */
 } J_COLOR_SPACE;

 /* DCT/IDCT algorithm options. */
--- a/media/libjpeg/jsimd.h
+++ b/media/libjpeg/jsimd.h
@ -2,6 +2,7 @@
 * jsimd.h
 *
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2011 D. R. Commander
 * 
 * Based on the x86 SIMD extension for IJG JPEG library,
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -13,8 +14,10 @@

 #ifdef NEED_SHORT_EXTERNAL_NAMES
 #define jsimd_can_rgb_ycc                 jSCanRgbYcc
+#define jsimd_can_rgb_gray                jSCanRgbGry
 #define jsimd_can_ycc_rgb                 jSCanYccRgb
 #define jsimd_rgb_ycc_convert             jSRgbYccConv
+#define jsimd_rgb_gray_convert            jSRgbGryConv
 #define jsimd_ycc_rgb_convert             jSYccRgbConv
 #define jsimd_can_h2v2_downsample         jSCanH2V2Down
 #define jsimd_can_h2v1_downsample         jSCanH2V1Down
@ -35,12 +38,17 @@
 #endif /* NEED_SHORT_EXTERNAL_NAMES */

 EXTERN(int) jsimd_can_rgb_ycc JPP((void));
+EXTERN(int) jsimd_can_rgb_gray JPP((void));
 EXTERN(int) jsimd_can_ycc_rgb JPP((void));

 EXTERN(void) jsimd_rgb_ycc_convert
        JPP((j_compress_ptr cinfo,
             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_rgb_gray_convert
+        JPP((j_compress_ptr cinfo,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
 EXTERN(void) jsimd_ycc_rgb_convert
        JPP((j_decompress_ptr cinfo,
             JSAMPIMAGE input_buf, JDIMENSION input_row,
--- a/media/libjpeg/jsimd_none.c
+++ b/media/libjpeg/jsimd_none.c
@ -2,7 +2,7 @@
 * jsimd_none.c
 *
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright 2009 D. R. Commander
+ * Copyright 2009-2011 D. R. Commander
 * 
 * Based on the x86 SIMD extension for IJG JPEG library,
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -24,6 +24,12 @@ jsimd_can_rgb_ycc (void)
  return 0;
 }

+GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+  return 0;
+}
+
 GLOBAL(int)
 jsimd_can_ycc_rgb (void)
 {
@ -37,6 +43,13 @@ jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
 {
 }

+GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                        JDIMENSION output_row, int num_rows)
+{
+}
+
 GLOBAL(void)
 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
                       JSAMPIMAGE input_buf, JDIMENSION input_row,
--- a/media/libjpeg/jutils.c
+++ b/media/libjpeg/jutils.c
@ -77,8 +77,8 @@ jdiv_round_up (long a, long b)
 }


-GLOBAL(size_t)
-jround_up (size_t a, size_t b)
+GLOBAL(long)
+jround_up (long a, long b)
 /* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */
 /* Assumes a >= 0, b > 0 */
 {
--- a/media/libjpeg/jversion.h
+++ b/media/libjpeg/jversion.h
@ -2,7 +2,7 @@
 * jversion.h
 *
 * Copyright (C) 1991-2010, Thomas G. Lane, Guido Vollbeding.
- * Copyright (C) 2010, D. R. Commander.
+ * Copyright (C) 2010, 2012, D. R. Commander.
 * This file is part of the Independent JPEG Group's software.
 * For conditions of distribution and use, see the accompanying README file.
 *
@ -14,24 +14,18 @@

 #define JVERSION	"8b  16-May-2010"

-#define JCOPYRIGHT	"Copyright (C) 2010, Thomas G. Lane, Guido Vollbeding"
-
 #elif JPEG_LIB_VERSION >= 70

 #define JVERSION        "7  27-Jun-2009"

-#define JCOPYRIGHT      "Copyright (C) 2009, Thomas G. Lane, Guido Vollbeding"
-
 #else

 #define JVERSION	"6b  27-Mar-1998"

-#define JCOPYRIGHT	"Copyright (C) 1998, Thomas G. Lane"
-
 #endif

-#define LJTCOPYRIGHT	"Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
-			"Copyright (C) 2004 Landmark Graphics Corporation\n" \
-			"Copyright (C) 2005-2007 Sun Microsystems, Inc.\n" \
+#define JCOPYRIGHT	"Copyright (C) 1991-2010 Thomas G. Lane, Guido Vollbeding\n" \
+			"Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
 			"Copyright (C) 2009 Pierre Ossman for Cendio AB\n" \
-			"Copyright (C) 2009-2011 D. R. Commander"
+			"Copyright (C) 2009-2012 D. R. Commander\n" \
+			"Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)"
--- a/media/libjpeg/mozilla.diff
+++ b/media/libjpeg/mozilla.diff
@ -0,0 +1,70 @@
+--- jmorecfg.h	2012-01-27 00:46:32 -0500
+++ jmorecfg.h	2012-02-10 23:08:03 -0500
+@@ -6,16 +6,17 @@
+  * This file is part of the Independent JPEG Group's software.
+  * For conditions of distribution and use, see the accompanying README file.
+  *
+  * This file contains additional configuration options that customize the
+  * JPEG software for special applications or support machine-dependent
+  * optimizations.  Most users will not need to touch this file.
+  */
+ 
+#include "prtypes.h"
+ 
+ /*
+  * Define BITS_IN_JSAMPLE as either
+  *   8   for 8-bit sample values (the usual setting)
+  *   12  for 12-bit sample values
+  * Only 8 and 12 are legal data precisions for lossy JPEG according to the
+  * JPEG standard, and the IJG code does not support anything else!
+  * We do not support run-time selection of data precision, sorry.
+@@ -127,45 +128,29 @@ typedef char JOCTET;
+  * They must be at least as wide as specified; but making them too big
+  * won't cost a huge amount of memory, so we don't provide special
+  * extraction code like we did for JSAMPLE.  (In other words, these
+  * typedefs live at a different point on the speed/space tradeoff curve.)
+  */
+ 
+ /* UINT8 must hold at least the values 0..255. */
+ 
+-#ifdef HAVE_UNSIGNED_CHAR
+-typedef unsigned char UINT8;
+-#else /* not HAVE_UNSIGNED_CHAR */
+-#ifdef __CHAR_UNSIGNED__
+-typedef char UINT8;
+-#else /* not __CHAR_UNSIGNED__ */
+-typedef short UINT8;
+-#endif /* __CHAR_UNSIGNED__ */
+-#endif /* HAVE_UNSIGNED_CHAR */
+typedef PRUint8 UINT8;
+ 
+ /* UINT16 must hold at least the values 0..65535. */
+ 
+-#ifdef HAVE_UNSIGNED_SHORT
+-typedef unsigned short UINT16;
+-#else /* not HAVE_UNSIGNED_SHORT */
+-typedef unsigned int UINT16;
+-#endif /* HAVE_UNSIGNED_SHORT */
+typedef PRUint16 UINT16;
+ 
+ /* INT16 must hold at least the values -32768..32767. */
+ 
+-#ifndef XMD_H			/* X11/xmd.h correctly defines INT16 */
+-typedef short INT16;
+-#endif
+typedef PRInt16 INT16;
+ 
+ /* INT32 must hold at least signed 32-bit values. */
+ 
+-#ifndef XMD_H			/* X11/xmd.h correctly defines INT32 */
+-typedef long INT32;
+-#endif
+typedef PRInt32 INT32;
+ 
+ /* Datatype used for image dimensions.  The JPEG standard only supports
+  * images up to 64K*64K due to 16-bit fields in SOF markers.  Therefore
+  * "unsigned int" is sufficient on all machines.  However, if you need to
+  * handle larger images and you don't mind deviating from the spec, you
+  * can change this datatype.
+  */
+ 
--- a/media/libjpeg/simd/jcclrmmx.asm
+++ b/media/libjpeg/simd/jcclrmmx.asm
@ -19,8 +19,6 @@
 %include "jcolsamp.inc"

 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
 ;
 ; Convert some rows of samples to the output colorspace.
 ;
--- a/media/libjpeg/simd/jcclrss2-64.asm
+++ b/media/libjpeg/simd/jcclrss2-64.asm
@ -17,8 +17,6 @@
 %include "jcolsamp.inc"

 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
 ;
 ; Convert some rows of samples to the output colorspace.
 ;
--- a/media/libjpeg/simd/jcclrss2.asm
+++ b/media/libjpeg/simd/jcclrss2.asm
@ -16,8 +16,6 @@
 %include "jcolsamp.inc"

 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
 ;
 ; Convert some rows of samples to the output colorspace.
 ;
--- a/media/libjpeg/simd/jccolmmx.asm
+++ b/media/libjpeg/simd/jccolmmx.asm
@ -51,16 +51,19 @@ PD_ONEHALF	times 2 dd  (1 << (SCALEBITS-1))
 	alignz	16

 ; --------------------------------------------------------------------------
+	SECTION	SEG_TEXT
+	BITS	32
+
 %include "jcclrmmx.asm"

 %undef RGB_RED
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
 %define jsimd_rgb_ycc_convert_mmx jsimd_extrgb_ycc_convert_mmx
 %include "jcclrmmx.asm"

@ -68,10 +71,10 @@ PD_ONEHALF	times 2 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
 %define jsimd_rgb_ycc_convert_mmx jsimd_extrgbx_ycc_convert_mmx
 %include "jcclrmmx.asm"

@ -79,10 +82,10 @@ PD_ONEHALF	times 2 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
 %define jsimd_rgb_ycc_convert_mmx jsimd_extbgr_ycc_convert_mmx
 %include "jcclrmmx.asm"

@ -90,10 +93,10 @@ PD_ONEHALF	times 2 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
 %define jsimd_rgb_ycc_convert_mmx jsimd_extbgrx_ycc_convert_mmx
 %include "jcclrmmx.asm"

@ -101,10 +104,10 @@ PD_ONEHALF	times 2 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 3
-%define RGB_GREEN 2
-%define RGB_BLUE 1
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
 %define jsimd_rgb_ycc_convert_mmx jsimd_extxbgr_ycc_convert_mmx
 %include "jcclrmmx.asm"

@ -112,9 +115,9 @@ PD_ONEHALF	times 2 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 1
-%define RGB_GREEN 2
-%define RGB_BLUE 3
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
 %define jsimd_rgb_ycc_convert_mmx jsimd_extxrgb_ycc_convert_mmx
 %include "jcclrmmx.asm"
--- a/media/libjpeg/simd/jccolss2-64.asm
+++ b/media/libjpeg/simd/jccolss2-64.asm
@ -48,16 +48,19 @@ PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
 	alignz	16

 ; --------------------------------------------------------------------------
+	SECTION	SEG_TEXT
+	BITS	64
+
 %include "jcclrss2-64.asm"

 %undef RGB_RED
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
 %define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2
 %include "jcclrss2-64.asm"

@ -65,10 +68,10 @@ PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
 %define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2
 %include "jcclrss2-64.asm"

@ -76,10 +79,10 @@ PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
 %define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2
 %include "jcclrss2-64.asm"

@ -87,10 +90,10 @@ PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
 %define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2
 %include "jcclrss2-64.asm"

@ -98,10 +101,10 @@ PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 3
-%define RGB_GREEN 2
-%define RGB_BLUE 1
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
 %define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2
 %include "jcclrss2-64.asm"

@ -109,9 +112,9 @@ PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 1
-%define RGB_GREEN 2
-%define RGB_BLUE 3
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
 %define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2
 %include "jcclrss2-64.asm"
--- a/media/libjpeg/simd/jccolss2.asm
+++ b/media/libjpeg/simd/jccolss2.asm
@ -48,16 +48,19 @@ PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
 	alignz	16

 ; --------------------------------------------------------------------------
+	SECTION	SEG_TEXT
+	BITS	32
+
 %include "jcclrss2.asm"

 %undef RGB_RED
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
 %define jsimd_rgb_ycc_convert_sse2 jsimd_extrgb_ycc_convert_sse2
 %include "jcclrss2.asm"

@ -65,10 +68,10 @@ PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
 %define jsimd_rgb_ycc_convert_sse2 jsimd_extrgbx_ycc_convert_sse2
 %include "jcclrss2.asm"

@ -76,10 +79,10 @@ PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
 %define jsimd_rgb_ycc_convert_sse2 jsimd_extbgr_ycc_convert_sse2
 %include "jcclrss2.asm"

@ -87,10 +90,10 @@ PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
 %define jsimd_rgb_ycc_convert_sse2 jsimd_extbgrx_ycc_convert_sse2
 %include "jcclrss2.asm"

@ -98,10 +101,10 @@ PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 3
-%define RGB_GREEN 2
-%define RGB_BLUE 1
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
 %define jsimd_rgb_ycc_convert_sse2 jsimd_extxbgr_ycc_convert_sse2
 %include "jcclrss2.asm"

@ -109,9 +112,9 @@ PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 1
-%define RGB_GREEN 2
-%define RGB_BLUE 3
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
 %define jsimd_rgb_ycc_convert_sse2 jsimd_extxrgb_ycc_convert_sse2
 %include "jcclrss2.asm"
--- a/media/libjpeg/simd/jcgrammx.asm
+++ b/media/libjpeg/simd/jcgrammx.asm
@ -0,0 +1,116 @@
+;
+; jcgrammx.asm - grayscale colorspace conversion (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2011 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS	16
+
+F_0_114	equ	 7471			; FIX(0.11400)
+F_0_250	equ	16384			; FIX(0.25000)
+F_0_299	equ	19595			; FIX(0.29900)
+F_0_587	equ	38470			; FIX(0.58700)
+F_0_337	equ	(F_0_587 - F_0_250)	; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+	SECTION	SEG_CONST
+
+	alignz	16
+	global	EXTN(jconst_rgb_gray_convert_mmx)
+
+EXTN(jconst_rgb_gray_convert_mmx):
+
+PW_F0299_F0337	times 2 dw  F_0_299, F_0_337
+PW_F0114_F0250	times 2 dw  F_0_114, F_0_250
+PD_ONEHALF	times 2 dd  (1 << (SCALEBITS-1))
+
+	alignz	16
+
+; --------------------------------------------------------------------------
+	SECTION	SEG_TEXT
+	BITS	32
+
+%include "jcgrymmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_rgb_gray_convert_mmx jsimd_extrgb_gray_convert_mmx
+%include "jcgrymmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_rgb_gray_convert_mmx jsimd_extrgbx_gray_convert_mmx
+%include "jcgrymmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_rgb_gray_convert_mmx jsimd_extbgr_gray_convert_mmx
+%include "jcgrymmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_rgb_gray_convert_mmx jsimd_extbgrx_gray_convert_mmx
+%include "jcgrymmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_rgb_gray_convert_mmx jsimd_extxbgr_gray_convert_mmx
+%include "jcgrymmx.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_rgb_gray_convert_mmx jsimd_extxrgb_gray_convert_mmx
+%include "jcgrymmx.asm"
--- a/media/libjpeg/simd/jcgrass2-64.asm
+++ b/media/libjpeg/simd/jcgrass2-64.asm
@ -0,0 +1,113 @@
+;
+; jcgrass2-64.asm - grayscale colorspace conversion (64-bit SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2011, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS	16
+
+F_0_114	equ	 7471			; FIX(0.11400)
+F_0_250	equ	16384			; FIX(0.25000)
+F_0_299	equ	19595			; FIX(0.29900)
+F_0_587	equ	38470			; FIX(0.58700)
+F_0_337	equ	(F_0_587 - F_0_250)	; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+	SECTION	SEG_CONST
+
+	alignz	16
+	global	EXTN(jconst_rgb_gray_convert_sse2)
+
+EXTN(jconst_rgb_gray_convert_sse2):
+
+PW_F0299_F0337	times 4 dw  F_0_299, F_0_337
+PW_F0114_F0250	times 4 dw  F_0_114, F_0_250
+PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
+
+	alignz	16
+
+; --------------------------------------------------------------------------
+	SECTION	SEG_TEXT
+	BITS	64
+
+%include "jcgryss2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2
+%include "jcgryss2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2
+%include "jcgryss2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2
+%include "jcgryss2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2
+%include "jcgryss2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2
+%include "jcgryss2-64.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2
+%include "jcgryss2-64.asm"
--- a/media/libjpeg/simd/jcgrass2.asm
+++ b/media/libjpeg/simd/jcgrass2.asm
@ -0,0 +1,113 @@
+;
+; jcgrass2.asm - grayscale colorspace conversion (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2011, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jsimdext.inc"
+
+; --------------------------------------------------------------------------
+
+%define SCALEBITS	16
+
+F_0_114	equ	 7471			; FIX(0.11400)
+F_0_250	equ	16384			; FIX(0.25000)
+F_0_299	equ	19595			; FIX(0.29900)
+F_0_587	equ	38470			; FIX(0.58700)
+F_0_337	equ	(F_0_587 - F_0_250)	; FIX(0.58700) - FIX(0.25000)
+
+; --------------------------------------------------------------------------
+	SECTION	SEG_CONST
+
+	alignz	16
+	global	EXTN(jconst_rgb_gray_convert_sse2)
+
+EXTN(jconst_rgb_gray_convert_sse2):
+
+PW_F0299_F0337	times 4 dw  F_0_299, F_0_337
+PW_F0114_F0250	times 4 dw  F_0_114, F_0_250
+PD_ONEHALF	times 4 dd  (1 << (SCALEBITS-1))
+
+	alignz	16
+
+; --------------------------------------------------------------------------
+	SECTION	SEG_TEXT
+	BITS	32
+
+%include "jcgryss2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extrgb_gray_convert_sse2
+%include "jcgryss2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extrgbx_gray_convert_sse2
+%include "jcgryss2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extbgr_gray_convert_sse2
+%include "jcgryss2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extbgrx_gray_convert_sse2
+%include "jcgryss2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extxbgr_gray_convert_sse2
+%include "jcgryss2.asm"
+
+%undef RGB_RED
+%undef RGB_GREEN
+%undef RGB_BLUE
+%undef RGB_PIXELSIZE
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
+%define jsimd_rgb_gray_convert_sse2 jsimd_extxrgb_gray_convert_sse2
+%include "jcgryss2.asm"
--- a/media/libjpeg/simd/jcgrymmx.asm
+++ b/media/libjpeg/simd/jcgrymmx.asm
@ -0,0 +1,357 @@
+;
+; jcgrymmx.asm - grayscale colorspace conversion (MMX)
+;
+; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+; Copyright 2011 D. R. Commander
+;
+; Based on
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_rgb_gray_convert_mmx (JDIMENSION img_width,
+;                             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+;                             JDIMENSION output_row, int num_rows);
+;
+
+%define img_width(b)	(b)+8			; JDIMENSION img_width
+%define input_buf(b)	(b)+12		; JSAMPARRAY input_buf
+%define output_buf(b)	(b)+16		; JSAMPIMAGE output_buf
+%define output_row(b)	(b)+20		; JDIMENSION output_row
+%define num_rows(b)	(b)+24		; int num_rows
+
+%define original_ebp	ebp+0
+%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_MMWORD	; mmword wk[WK_NUM]
+%define WK_NUM		2
+%define gotptr		wk(0)-SIZEOF_POINTER	; void * gotptr
+
+	align	16
+	global	EXTN(jsimd_rgb_gray_convert_mmx)
+
+EXTN(jsimd_rgb_gray_convert_mmx):
+	push	ebp
+	mov	eax,esp				; eax = original ebp
+	sub	esp, byte 4
+	and	esp, byte (-SIZEOF_MMWORD)	; align to 64 bits
+	mov	[esp],eax
+	mov	ebp,esp				; ebp = aligned ebp
+	lea	esp, [wk(0)]
+	pushpic	eax		; make a room for GOT address
+	push	ebx
+;	push	ecx		; need not be preserved
+;	push	edx		; need not be preserved
+	push	esi
+	push	edi
+
+	get_GOT	ebx			; get GOT address
+	movpic	POINTER [gotptr], ebx	; save GOT address
+
+	mov	ecx, JDIMENSION [img_width(eax)]	; num_cols
+	test	ecx,ecx
+	jz	near .return
+
+	push	ecx
+
+	mov	esi, JSAMPIMAGE [output_buf(eax)]
+	mov	ecx, JDIMENSION [output_row(eax)]
+	mov	edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+	lea	edi, [edi+ecx*SIZEOF_JSAMPROW]
+
+	pop	ecx
+
+	mov	esi, JSAMPARRAY [input_buf(eax)]
+	mov	eax, INT [num_rows(eax)]
+	test	eax,eax
+	jle	near .return
+	alignx	16,7
+.rowloop:
+	pushpic	eax
+	push	edi
+	push	esi
+	push	ecx			; col
+
+	mov	esi, JSAMPROW [esi]	; inptr
+	mov	edi, JSAMPROW [edi]	; outptr0
+	movpic	eax, POINTER [gotptr]	; load GOT address (eax)
+
+	cmp	ecx, byte SIZEOF_MMWORD
+	jae	short .columnloop
+	alignx	16,7
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+	push	eax
+	push	edx
+	lea	ecx,[ecx+ecx*2]		; imul ecx,RGB_PIXELSIZE
+	test	cl, SIZEOF_BYTE
+	jz	short .column_ld2
+	sub	ecx, byte SIZEOF_BYTE
+	xor	eax,eax
+	mov	al, BYTE [esi+ecx]
+.column_ld2:
+	test	cl, SIZEOF_WORD
+	jz	short .column_ld4
+	sub	ecx, byte SIZEOF_WORD
+	xor	edx,edx
+	mov	dx, WORD [esi+ecx]
+	shl	eax, WORD_BIT
+	or	eax,edx
+.column_ld4:
+	movd	mmA,eax
+	pop	edx
+	pop	eax
+	test	cl, SIZEOF_DWORD
+	jz	short .column_ld8
+	sub	ecx, byte SIZEOF_DWORD
+	movd	mmG, DWORD [esi+ecx]
+	psllq	mmA, DWORD_BIT
+	por	mmA,mmG
+.column_ld8:
+	test	cl, SIZEOF_MMWORD
+	jz	short .column_ld16
+	movq	mmG,mmA
+	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+	mov	ecx, SIZEOF_MMWORD
+	jmp	short .rgb_gray_cnv
+.column_ld16:
+	test	cl, 2*SIZEOF_MMWORD
+	mov	ecx, SIZEOF_MMWORD
+	jz	short .rgb_gray_cnv
+	movq	mmF,mmA
+	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+	movq	mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+	jmp	short .rgb_gray_cnv
+	alignx	16,7
+
+.columnloop:
+	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+	movq	mmG, MMWORD [esi+1*SIZEOF_MMWORD]
+	movq	mmF, MMWORD [esi+2*SIZEOF_MMWORD]
+
+.rgb_gray_cnv:
+	; mmA=(00 10 20 01 11 21 02 12)
+	; mmG=(22 03 13 23 04 14 24 05)
+	; mmF=(15 25 06 16 26 07 17 27)
+
+	movq      mmD,mmA
+	psllq     mmA,4*BYTE_BIT	; mmA=(-- -- -- -- 00 10 20 01)
+	psrlq     mmD,4*BYTE_BIT	; mmD=(11 21 02 12 -- -- -- --)
+
+	punpckhbw mmA,mmG		; mmA=(00 04 10 14 20 24 01 05)
+	psllq     mmG,4*BYTE_BIT	; mmG=(-- -- -- -- 22 03 13 23)
+
+	punpcklbw mmD,mmF		; mmD=(11 15 21 25 02 06 12 16)
+	punpckhbw mmG,mmF		; mmG=(22 26 03 07 13 17 23 27)
+
+	movq      mmE,mmA
+	psllq     mmA,4*BYTE_BIT	; mmA=(-- -- -- -- 00 04 10 14)
+	psrlq     mmE,4*BYTE_BIT	; mmE=(20 24 01 05 -- -- -- --)
+
+	punpckhbw mmA,mmD		; mmA=(00 02 04 06 10 12 14 16)
+	psllq     mmD,4*BYTE_BIT	; mmD=(-- -- -- -- 11 15 21 25)
+
+	punpcklbw mmE,mmG		; mmE=(20 22 24 26 01 03 05 07)
+	punpckhbw mmD,mmG		; mmD=(11 13 15 17 21 23 25 27)
+
+	pxor      mmH,mmH
+
+	movq      mmC,mmA
+	punpcklbw mmA,mmH		; mmA=(00 02 04 06)
+	punpckhbw mmC,mmH		; mmC=(10 12 14 16)
+
+	movq      mmB,mmE
+	punpcklbw mmE,mmH		; mmE=(20 22 24 26)
+	punpckhbw mmB,mmH		; mmB=(01 03 05 07)
+
+	movq      mmF,mmD
+	punpcklbw mmD,mmH		; mmD=(11 13 15 17)
+	punpckhbw mmF,mmH		; mmF=(21 23 25 27)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+	test	cl, SIZEOF_MMWORD/8
+	jz	short .column_ld2
+	sub	ecx, byte SIZEOF_MMWORD/8
+	movd	mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld2:
+	test	cl, SIZEOF_MMWORD/4
+	jz	short .column_ld4
+	sub	ecx, byte SIZEOF_MMWORD/4
+	movq	mmF,mmA
+	movq	mmA, MMWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld4:
+	test	cl, SIZEOF_MMWORD/2
+	mov	ecx, SIZEOF_MMWORD
+	jz	short .rgb_gray_cnv
+	movq	mmD,mmA
+	movq	mmC,mmF
+	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+	movq	mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+	jmp	short .rgb_gray_cnv
+	alignx	16,7
+
+.columnloop:
+	movq	mmA, MMWORD [esi+0*SIZEOF_MMWORD]
+	movq	mmF, MMWORD [esi+1*SIZEOF_MMWORD]
+	movq	mmD, MMWORD [esi+2*SIZEOF_MMWORD]
+	movq	mmC, MMWORD [esi+3*SIZEOF_MMWORD]
+
+.rgb_gray_cnv:
+	; mmA=(00 10 20 30 01 11 21 31)
+	; mmF=(02 12 22 32 03 13 23 33)
+	; mmD=(04 14 24 34 05 15 25 35)
+	; mmC=(06 16 26 36 07 17 27 37)
+
+	movq      mmB,mmA
+	punpcklbw mmA,mmF		; mmA=(00 02 10 12 20 22 30 32)
+	punpckhbw mmB,mmF		; mmB=(01 03 11 13 21 23 31 33)
+
+	movq      mmG,mmD
+	punpcklbw mmD,mmC		; mmD=(04 06 14 16 24 26 34 36)
+	punpckhbw mmG,mmC		; mmG=(05 07 15 17 25 27 35 37)
+
+	movq      mmE,mmA
+	punpcklwd mmA,mmD		; mmA=(00 02 04 06 10 12 14 16)
+	punpckhwd mmE,mmD		; mmE=(20 22 24 26 30 32 34 36)
+
+	movq      mmH,mmB
+	punpcklwd mmB,mmG		; mmB=(01 03 05 07 11 13 15 17)
+	punpckhwd mmH,mmG		; mmH=(21 23 25 27 31 33 35 37)
+
+	pxor      mmF,mmF
+
+	movq      mmC,mmA
+	punpcklbw mmA,mmF		; mmA=(00 02 04 06)
+	punpckhbw mmC,mmF		; mmC=(10 12 14 16)
+
+	movq      mmD,mmB
+	punpcklbw mmB,mmF		; mmB=(01 03 05 07)
+	punpckhbw mmD,mmF		; mmD=(11 13 15 17)
+
+	movq      mmG,mmE
+	punpcklbw mmE,mmF		; mmE=(20 22 24 26)
+	punpckhbw mmG,mmF		; mmG=(30 32 34 36)
+
+	punpcklbw mmF,mmH
+	punpckhbw mmH,mmH
+	psrlw     mmF,BYTE_BIT		; mmF=(21 23 25 27)
+	psrlw     mmH,BYTE_BIT		; mmH=(31 33 35 37)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+	; mm0=(R0 R2 R4 R6)=RE, mm2=(G0 G2 G4 G6)=GE, mm4=(B0 B2 B4 B6)=BE
+	; mm1=(R1 R3 R5 R7)=RO, mm3=(G1 G3 G5 G7)=GO, mm5=(B1 B3 B5 B7)=BO
+
+	; (Original)
+	; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+	;
+	; (This implementation)
+	; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+
+	movq      mm6,mm1
+	punpcklwd mm1,mm3
+	punpckhwd mm6,mm3
+	pmaddwd   mm1,[GOTOFF(eax,PW_F0299_F0337)] ; mm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+	pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+	movq      mm7, mm6	; mm7=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+	movq      mm6,mm0
+	punpcklwd mm0,mm2
+	punpckhwd mm6,mm2
+	pmaddwd   mm0,[GOTOFF(eax,PW_F0299_F0337)] ; mm0=REL*FIX(0.299)+GEL*FIX(0.337)
+	pmaddwd   mm6,[GOTOFF(eax,PW_F0299_F0337)] ; mm6=REH*FIX(0.299)+GEH*FIX(0.337)
+
+	movq      MMWORD [wk(0)], mm0	; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
+	movq      MMWORD [wk(1)], mm6	; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
+
+	movq      mm0, mm5	; mm0=BO
+	movq      mm6, mm4	; mm6=BE
+
+	movq      mm4,mm0
+	punpcklwd mm0,mm3
+	punpckhwd mm4,mm3
+	pmaddwd   mm0,[GOTOFF(eax,PW_F0114_F0250)] ; mm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+	pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+
+	movq      mm3,[GOTOFF(eax,PD_ONEHALF)]	; mm3=[PD_ONEHALF]
+
+	paddd     mm0, mm1
+	paddd     mm4, mm7
+	paddd     mm0,mm3
+	paddd     mm4,mm3
+	psrld     mm0,SCALEBITS		; mm0=YOL
+	psrld     mm4,SCALEBITS		; mm4=YOH
+	packssdw  mm0,mm4		; mm0=YO
+
+	movq      mm4,mm6
+	punpcklwd mm6,mm2
+	punpckhwd mm4,mm2
+	pmaddwd   mm6,[GOTOFF(eax,PW_F0114_F0250)] ; mm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+	pmaddwd   mm4,[GOTOFF(eax,PW_F0114_F0250)] ; mm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+
+	movq      mm2,[GOTOFF(eax,PD_ONEHALF)]	; mm2=[PD_ONEHALF]
+
+	paddd     mm6, MMWORD [wk(0)]
+	paddd     mm4, MMWORD [wk(1)]
+	paddd     mm6,mm2
+	paddd     mm4,mm2
+	psrld     mm6,SCALEBITS		; mm6=YEL
+	psrld     mm4,SCALEBITS		; mm4=YEH
+	packssdw  mm6,mm4		; mm6=YE
+
+	psllw     mm0,BYTE_BIT
+	por       mm6,mm0		; mm6=Y
+	movq      MMWORD [edi], mm6	; Save Y
+
+	sub	ecx, byte SIZEOF_MMWORD
+	add	esi, byte RGB_PIXELSIZE*SIZEOF_MMWORD	; inptr
+	add	edi, byte SIZEOF_MMWORD			; outptr0
+	cmp	ecx, byte SIZEOF_MMWORD
+	jae	near .columnloop
+	test	ecx,ecx
+	jnz	near .column_ld1
+
+	pop	ecx			; col
+	pop	esi
+	pop	edi
+	poppic	eax
+
+	add	esi, byte SIZEOF_JSAMPROW	; input_buf
+	add	edi, byte SIZEOF_JSAMPROW
+	dec	eax				; num_rows
+	jg	near .rowloop
+
+	emms		; empty MMX state
+
+.return:
+	pop	edi
+	pop	esi
+;	pop	edx		; need not be preserved
+;	pop	ecx		; need not be preserved
+	pop	ebx
+	mov	esp,ebp		; esp <- aligned ebp
+	pop	esp		; esp <- original ebp
+	pop	ebp
+	ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+	align	16
--- a/media/libjpeg/simd/jcgryss2-64.asm
+++ b/media/libjpeg/simd/jcgryss2-64.asm
@ -0,0 +1,364 @@
+;
+; jcgryss2-64.asm - grayscale colorspace conversion (64-bit SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2011, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_rgb_gray_convert_sse2 (JDIMENSION img_width,
+;                              JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+;                              JDIMENSION output_row, int num_rows);
+;
+
+; r10 = JDIMENSION img_width
+; r11 = JSAMPARRAY input_buf
+; r12 = JSAMPIMAGE output_buf
+; r13 = JDIMENSION output_row
+; r14 = int num_rows
+
+%define wk(i)		rbp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
+%define WK_NUM		2
+
+	align	16
+
+	global	EXTN(jsimd_rgb_gray_convert_sse2)
+
+EXTN(jsimd_rgb_gray_convert_sse2):
+	push	rbp
+	mov	rax,rsp				; rax = original rbp
+	sub	rsp, byte 4
+	and	rsp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
+	mov	[rsp],rax
+	mov	rbp,rsp				; rbp = aligned rbp
+	lea	rsp, [wk(0)]
+	collect_args
+	push	rbx
+
+	mov	rcx, r10
+	test	rcx,rcx
+	jz	near .return
+
+	push	rcx
+
+	mov rsi, r12
+	mov rcx, r13
+	mov	rdi, JSAMPARRAY [rsi+0*SIZEOF_JSAMPARRAY]
+	lea	rdi, [rdi+rcx*SIZEOF_JSAMPROW]
+
+	pop	rcx
+
+	mov rsi, r11
+	mov	eax, r14d
+	test	rax,rax
+	jle	near .return
+.rowloop:
+	push	rdi
+	push	rsi
+	push	rcx			; col
+
+	mov	rsi, JSAMPROW [rsi]	; inptr
+	mov	rdi, JSAMPROW [rdi]	; outptr0
+
+	cmp	rcx, byte SIZEOF_XMMWORD
+	jae	near .columnloop
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+	push	rax
+	push	rdx
+	lea	rcx,[rcx+rcx*2]		; imul ecx,RGB_PIXELSIZE
+	test	cl, SIZEOF_BYTE
+	jz	short .column_ld2
+	sub	rcx, byte SIZEOF_BYTE
+	movzx	rax, BYTE [rsi+rcx]
+.column_ld2:
+	test	cl, SIZEOF_WORD
+	jz	short .column_ld4
+	sub	rcx, byte SIZEOF_WORD
+	movzx	rdx, WORD [rsi+rcx]
+	shl	rax, WORD_BIT
+	or	rax,rdx
+.column_ld4:
+	movd	xmmA,eax
+	pop	rdx
+	pop	rax
+	test	cl, SIZEOF_DWORD
+	jz	short .column_ld8
+	sub	rcx, byte SIZEOF_DWORD
+	movd	xmmF, XMM_DWORD [rsi+rcx]
+	pslldq	xmmA, SIZEOF_DWORD
+	por	xmmA,xmmF
+.column_ld8:
+	test	cl, SIZEOF_MMWORD
+	jz	short .column_ld16
+	sub	rcx, byte SIZEOF_MMWORD
+	movq	xmmB, XMM_MMWORD [rsi+rcx]
+	pslldq	xmmA, SIZEOF_MMWORD
+	por	xmmA,xmmB
+.column_ld16:
+	test	cl, SIZEOF_XMMWORD
+	jz	short .column_ld32
+	movdqa	xmmF,xmmA
+	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+	mov	rcx, SIZEOF_XMMWORD
+	jmp	short .rgb_gray_cnv
+.column_ld32:
+	test	cl, 2*SIZEOF_XMMWORD
+	mov	rcx, SIZEOF_XMMWORD
+	jz	short .rgb_gray_cnv
+	movdqa	xmmB,xmmA
+	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+	movdqu	xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+	jmp	short .rgb_gray_cnv
+
+.columnloop:
+	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+	movdqu	xmmF, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+	movdqu	xmmB, XMMWORD [rsi+2*SIZEOF_XMMWORD]
+
+.rgb_gray_cnv:
+	; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+	; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+	; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+	movdqa    xmmG,xmmA
+	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
+	psrldq    xmmG,8	; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
+
+	punpckhbw xmmA,xmmF	; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
+	pslldq    xmmF,8	; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
+
+	punpcklbw xmmG,xmmB	; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
+	punpckhbw xmmF,xmmB	; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
+
+	movdqa    xmmD,xmmA
+	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
+	psrldq    xmmD,8	; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
+
+	punpckhbw xmmA,xmmG	; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
+	pslldq    xmmG,8	; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
+
+	punpcklbw xmmD,xmmF	; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
+	punpckhbw xmmG,xmmF	; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
+
+	movdqa    xmmE,xmmA
+	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
+	psrldq    xmmE,8	; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
+
+	punpckhbw xmmA,xmmD	; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+	pslldq    xmmD,8	; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
+
+	punpcklbw xmmE,xmmG	; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
+	punpckhbw xmmD,xmmG	; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
+
+	pxor      xmmH,xmmH
+
+	movdqa    xmmC,xmmA
+	punpcklbw xmmA,xmmH	; xmmA=(00 02 04 06 08 0A 0C 0E)
+	punpckhbw xmmC,xmmH	; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+	movdqa    xmmB,xmmE
+	punpcklbw xmmE,xmmH	; xmmE=(20 22 24 26 28 2A 2C 2E)
+	punpckhbw xmmB,xmmH	; xmmB=(01 03 05 07 09 0B 0D 0F)
+
+	movdqa    xmmF,xmmD
+	punpcklbw xmmD,xmmH	; xmmD=(11 13 15 17 19 1B 1D 1F)
+	punpckhbw xmmF,xmmH	; xmmF=(21 23 25 27 29 2B 2D 2F)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+	test	cl, SIZEOF_XMMWORD/16
+	jz	short .column_ld2
+	sub	rcx, byte SIZEOF_XMMWORD/16
+	movd	xmmA, XMM_DWORD [rsi+rcx*RGB_PIXELSIZE]
+.column_ld2:
+	test	cl, SIZEOF_XMMWORD/8
+	jz	short .column_ld4
+	sub	rcx, byte SIZEOF_XMMWORD/8
+	movq	xmmE, XMM_MMWORD [rsi+rcx*RGB_PIXELSIZE]
+	pslldq	xmmA, SIZEOF_MMWORD
+	por	xmmA,xmmE
+.column_ld4:
+	test	cl, SIZEOF_XMMWORD/4
+	jz	short .column_ld8
+	sub	rcx, byte SIZEOF_XMMWORD/4
+	movdqa	xmmE,xmmA
+	movdqu	xmmA, XMMWORD [rsi+rcx*RGB_PIXELSIZE]
+.column_ld8:
+	test	cl, SIZEOF_XMMWORD/2
+	mov	rcx, SIZEOF_XMMWORD
+	jz	short .rgb_gray_cnv
+	movdqa	xmmF,xmmA
+	movdqa	xmmH,xmmE
+	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+	movdqu	xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+	jmp	short .rgb_gray_cnv
+
+.columnloop:
+	movdqu	xmmA, XMMWORD [rsi+0*SIZEOF_XMMWORD]
+	movdqu	xmmE, XMMWORD [rsi+1*SIZEOF_XMMWORD]
+	movdqu	xmmF, XMMWORD [rsi+2*SIZEOF_XMMWORD]
+	movdqu	xmmH, XMMWORD [rsi+3*SIZEOF_XMMWORD]
+
+.rgb_gray_cnv:
+	; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+	; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+	; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+	; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+	movdqa    xmmD,xmmA
+	punpcklbw xmmA,xmmE	; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
+	punpckhbw xmmD,xmmE	; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
+
+	movdqa    xmmC,xmmF
+	punpcklbw xmmF,xmmH	; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
+	punpckhbw xmmC,xmmH	; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
+
+	movdqa    xmmB,xmmA
+	punpcklwd xmmA,xmmF	; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
+	punpckhwd xmmB,xmmF	; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
+
+	movdqa    xmmG,xmmD
+	punpcklwd xmmD,xmmC	; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
+	punpckhwd xmmG,xmmC	; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
+
+	movdqa    xmmE,xmmA
+	punpcklbw xmmA,xmmD	; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+	punpckhbw xmmE,xmmD	; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
+
+	movdqa    xmmH,xmmB
+	punpcklbw xmmB,xmmG	; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
+	punpckhbw xmmH,xmmG	; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
+
+	pxor      xmmF,xmmF
+
+	movdqa    xmmC,xmmA
+	punpcklbw xmmA,xmmF	; xmmA=(00 02 04 06 08 0A 0C 0E)
+	punpckhbw xmmC,xmmF	; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+	movdqa    xmmD,xmmB
+	punpcklbw xmmB,xmmF	; xmmB=(01 03 05 07 09 0B 0D 0F)
+	punpckhbw xmmD,xmmF	; xmmD=(11 13 15 17 19 1B 1D 1F)
+
+	movdqa    xmmG,xmmE
+	punpcklbw xmmE,xmmF	; xmmE=(20 22 24 26 28 2A 2C 2E)
+	punpckhbw xmmG,xmmF	; xmmG=(30 32 34 36 38 3A 3C 3E)
+
+	punpcklbw xmmF,xmmH
+	punpckhbw xmmH,xmmH
+	psrlw     xmmF,BYTE_BIT	; xmmF=(21 23 25 27 29 2B 2D 2F)
+	psrlw     xmmH,BYTE_BIT	; xmmH=(31 33 35 37 39 3B 3D 3F)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+	; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
+	; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
+
+	; (Original)
+	; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+	;
+	; (This implementation)
+	; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+
+	movdqa    xmm6,xmm1
+	punpcklwd xmm1,xmm3
+	punpckhwd xmm6,xmm3
+	pmaddwd   xmm1,[rel PW_F0299_F0337] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+	pmaddwd   xmm6,[rel PW_F0299_F0337] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+	movdqa    xmm7, xmm6	; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+	movdqa    xmm6,xmm0
+	punpcklwd xmm0,xmm2
+	punpckhwd xmm6,xmm2
+	pmaddwd   xmm0,[rel PW_F0299_F0337] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+	pmaddwd   xmm6,[rel PW_F0299_F0337] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
+
+	movdqa    XMMWORD [wk(0)], xmm0	; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
+	movdqa    XMMWORD [wk(1)], xmm6	; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
+
+	movdqa    xmm0, xmm5	; xmm0=BO
+	movdqa    xmm6, xmm4	; xmm6=BE
+
+	movdqa    xmm4,xmm0
+	punpcklwd xmm0,xmm3
+	punpckhwd xmm4,xmm3
+	pmaddwd   xmm0,[rel PW_F0114_F0250] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+	pmaddwd   xmm4,[rel PW_F0114_F0250] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+
+	movdqa    xmm3,[rel PD_ONEHALF]	; xmm3=[PD_ONEHALF]
+
+	paddd     xmm0, xmm1
+	paddd     xmm4, xmm7
+	paddd     xmm0,xmm3
+	paddd     xmm4,xmm3
+	psrld     xmm0,SCALEBITS	; xmm0=YOL
+	psrld     xmm4,SCALEBITS	; xmm4=YOH
+	packssdw  xmm0,xmm4		; xmm0=YO
+
+	movdqa    xmm4,xmm6
+	punpcklwd xmm6,xmm2
+	punpckhwd xmm4,xmm2
+	pmaddwd   xmm6,[rel PW_F0114_F0250] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+	pmaddwd   xmm4,[rel PW_F0114_F0250] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+
+	movdqa    xmm2,[rel PD_ONEHALF]	; xmm2=[PD_ONEHALF]
+
+	paddd     xmm6, XMMWORD [wk(0)]
+	paddd     xmm4, XMMWORD [wk(1)]
+	paddd     xmm6,xmm2
+	paddd     xmm4,xmm2
+	psrld     xmm6,SCALEBITS	; xmm6=YEL
+	psrld     xmm4,SCALEBITS	; xmm4=YEH
+	packssdw  xmm6,xmm4		; xmm6=YE
+
+	psllw     xmm0,BYTE_BIT
+	por       xmm6,xmm0		; xmm6=Y
+	movdqa    XMMWORD [rdi], xmm6	; Save Y
+
+	sub	rcx, byte SIZEOF_XMMWORD
+	add	rsi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; inptr
+	add	rdi, byte SIZEOF_XMMWORD		; outptr0
+	cmp	rcx, byte SIZEOF_XMMWORD
+	jae	near .columnloop
+	test	rcx,rcx
+	jnz	near .column_ld1
+
+	pop	rcx			; col
+	pop	rsi
+	pop	rdi
+
+	add	rsi, byte SIZEOF_JSAMPROW	; input_buf
+	add	rdi, byte SIZEOF_JSAMPROW
+	dec	rax				; num_rows
+	jg	near .rowloop
+
+.return:
+	pop	rbx
+	uncollect_args
+	mov	rsp,rbp		; rsp <- aligned rbp
+	pop	rsp		; rsp <- original rbp
+	pop	rbp
+	ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+	align	16
--- a/media/libjpeg/simd/jcgryss2.asm
+++ b/media/libjpeg/simd/jcgryss2.asm
@ -0,0 +1,383 @@
+;
+; jcgryss2.asm - grayscale colorspace conversion (SSE2)
+;
+; x86 SIMD extension for IJG JPEG library
+; Copyright (C) 1999-2006, MIYASAKA Masaru.
+; Copyright (C) 2011, D. R. Commander.
+; For conditions of distribution and use, see copyright notice in jsimdext.inc
+;
+; This file should be assembled with NASM (Netwide Assembler),
+; can *not* be assembled with Microsoft's MASM or any compatible
+; assembler (including Borland's Turbo Assembler).
+; NASM is available from http://nasm.sourceforge.net/ or
+; http://sourceforge.net/project/showfiles.php?group_id=6208
+;
+; [TAB8]
+
+%include "jcolsamp.inc"
+
+; --------------------------------------------------------------------------
+;
+; Convert some rows of samples to the output colorspace.
+;
+; GLOBAL(void)
+; jsimd_rgb_gray_convert_sse2 (JDIMENSION img_width,
+;                              JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+;                              JDIMENSION output_row, int num_rows);
+;
+
+%define img_width(b)	(b)+8			; JDIMENSION img_width
+%define input_buf(b)	(b)+12		; JSAMPARRAY input_buf
+%define output_buf(b)	(b)+16		; JSAMPIMAGE output_buf
+%define output_row(b)	(b)+20		; JDIMENSION output_row
+%define num_rows(b)	(b)+24		; int num_rows
+
+%define original_ebp	ebp+0
+%define wk(i)		ebp-(WK_NUM-(i))*SIZEOF_XMMWORD	; xmmword wk[WK_NUM]
+%define WK_NUM		2
+%define gotptr		wk(0)-SIZEOF_POINTER	; void * gotptr
+
+	align	16
+
+	global	EXTN(jsimd_rgb_gray_convert_sse2)
+
+EXTN(jsimd_rgb_gray_convert_sse2):
+	push	ebp
+	mov	eax,esp				; eax = original ebp
+	sub	esp, byte 4
+	and	esp, byte (-SIZEOF_XMMWORD)	; align to 128 bits
+	mov	[esp],eax
+	mov	ebp,esp				; ebp = aligned ebp
+	lea	esp, [wk(0)]
+	pushpic	eax		; make a room for GOT address
+	push	ebx
+;	push	ecx		; need not be preserved
+;	push	edx		; need not be preserved
+	push	esi
+	push	edi
+
+	get_GOT	ebx			; get GOT address
+	movpic	POINTER [gotptr], ebx	; save GOT address
+
+	mov	ecx, JDIMENSION [img_width(eax)]
+	test	ecx,ecx
+	jz	near .return
+
+	push	ecx
+
+	mov	esi, JSAMPIMAGE [output_buf(eax)]
+	mov	ecx, JDIMENSION [output_row(eax)]
+	mov	edi, JSAMPARRAY [esi+0*SIZEOF_JSAMPARRAY]
+	lea	edi, [edi+ecx*SIZEOF_JSAMPROW]
+
+	pop	ecx
+
+	mov	esi, JSAMPARRAY [input_buf(eax)]
+	mov	eax, INT [num_rows(eax)]
+	test	eax,eax
+	jle	near .return
+	alignx	16,7
+.rowloop:
+	pushpic	eax
+	push	edi
+	push	esi
+	push	ecx			; col
+
+	mov	esi, JSAMPROW [esi]	; inptr
+	mov	edi, JSAMPROW [edi]	; outptr0
+	movpic	eax, POINTER [gotptr]	; load GOT address (eax)
+
+	cmp	ecx, byte SIZEOF_XMMWORD
+	jae	near .columnloop
+	alignx	16,7
+
+%if RGB_PIXELSIZE == 3 ; ---------------
+
+.column_ld1:
+	push	eax
+	push	edx
+	lea	ecx,[ecx+ecx*2]		; imul ecx,RGB_PIXELSIZE
+	test	cl, SIZEOF_BYTE
+	jz	short .column_ld2
+	sub	ecx, byte SIZEOF_BYTE
+	movzx	eax, BYTE [esi+ecx]
+.column_ld2:
+	test	cl, SIZEOF_WORD
+	jz	short .column_ld4
+	sub	ecx, byte SIZEOF_WORD
+	movzx	edx, WORD [esi+ecx]
+	shl	eax, WORD_BIT
+	or	eax,edx
+.column_ld4:
+	movd	xmmA,eax
+	pop	edx
+	pop	eax
+	test	cl, SIZEOF_DWORD
+	jz	short .column_ld8
+	sub	ecx, byte SIZEOF_DWORD
+	movd	xmmF, XMM_DWORD [esi+ecx]
+	pslldq	xmmA, SIZEOF_DWORD
+	por	xmmA,xmmF
+.column_ld8:
+	test	cl, SIZEOF_MMWORD
+	jz	short .column_ld16
+	sub	ecx, byte SIZEOF_MMWORD
+	movq	xmmB, XMM_MMWORD [esi+ecx]
+	pslldq	xmmA, SIZEOF_MMWORD
+	por	xmmA,xmmB
+.column_ld16:
+	test	cl, SIZEOF_XMMWORD
+	jz	short .column_ld32
+	movdqa	xmmF,xmmA
+	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+	mov	ecx, SIZEOF_XMMWORD
+	jmp	short .rgb_gray_cnv
+.column_ld32:
+	test	cl, 2*SIZEOF_XMMWORD
+	mov	ecx, SIZEOF_XMMWORD
+	jz	short .rgb_gray_cnv
+	movdqa	xmmB,xmmA
+	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+	movdqu	xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+	jmp	short .rgb_gray_cnv
+	alignx	16,7
+
+.columnloop:
+	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+	movdqu	xmmF, XMMWORD [esi+1*SIZEOF_XMMWORD]
+	movdqu	xmmB, XMMWORD [esi+2*SIZEOF_XMMWORD]
+
+.rgb_gray_cnv:
+	; xmmA=(00 10 20 01 11 21 02 12 22 03 13 23 04 14 24 05)
+	; xmmF=(15 25 06 16 26 07 17 27 08 18 28 09 19 29 0A 1A)
+	; xmmB=(2A 0B 1B 2B 0C 1C 2C 0D 1D 2D 0E 1E 2E 0F 1F 2F)
+
+	movdqa    xmmG,xmmA
+	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 10 20 01 11 21 02 12)
+	psrldq    xmmG,8	; xmmG=(22 03 13 23 04 14 24 05 -- -- -- -- -- -- -- --)
+
+	punpckhbw xmmA,xmmF	; xmmA=(00 08 10 18 20 28 01 09 11 19 21 29 02 0A 12 1A)
+	pslldq    xmmF,8	; xmmF=(-- -- -- -- -- -- -- -- 15 25 06 16 26 07 17 27)
+
+	punpcklbw xmmG,xmmB	; xmmG=(22 2A 03 0B 13 1B 23 2B 04 0C 14 1C 24 2C 05 0D)
+	punpckhbw xmmF,xmmB	; xmmF=(15 1D 25 2D 06 0E 16 1E 26 2E 07 0F 17 1F 27 2F)
+
+	movdqa    xmmD,xmmA
+	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 08 10 18 20 28 01 09)
+	psrldq    xmmD,8	; xmmD=(11 19 21 29 02 0A 12 1A -- -- -- -- -- -- -- --)
+
+	punpckhbw xmmA,xmmG	; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 01 05 09 0D)
+	pslldq    xmmG,8	; xmmG=(-- -- -- -- -- -- -- -- 22 2A 03 0B 13 1B 23 2B)
+
+	punpcklbw xmmD,xmmF	; xmmD=(11 15 19 1D 21 25 29 2D 02 06 0A 0E 12 16 1A 1E)
+	punpckhbw xmmG,xmmF	; xmmG=(22 26 2A 2E 03 07 0B 0F 13 17 1B 1F 23 27 2B 2F)
+
+	movdqa    xmmE,xmmA
+	pslldq    xmmA,8	; xmmA=(-- -- -- -- -- -- -- -- 00 04 08 0C 10 14 18 1C)
+	psrldq    xmmE,8	; xmmE=(20 24 28 2C 01 05 09 0D -- -- -- -- -- -- -- --)
+
+	punpckhbw xmmA,xmmD	; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+	pslldq    xmmD,8	; xmmD=(-- -- -- -- -- -- -- -- 11 15 19 1D 21 25 29 2D)
+
+	punpcklbw xmmE,xmmG	; xmmE=(20 22 24 26 28 2A 2C 2E 01 03 05 07 09 0B 0D 0F)
+	punpckhbw xmmD,xmmG	; xmmD=(11 13 15 17 19 1B 1D 1F 21 23 25 27 29 2B 2D 2F)
+
+	pxor      xmmH,xmmH
+
+	movdqa    xmmC,xmmA
+	punpcklbw xmmA,xmmH	; xmmA=(00 02 04 06 08 0A 0C 0E)
+	punpckhbw xmmC,xmmH	; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+	movdqa    xmmB,xmmE
+	punpcklbw xmmE,xmmH	; xmmE=(20 22 24 26 28 2A 2C 2E)
+	punpckhbw xmmB,xmmH	; xmmB=(01 03 05 07 09 0B 0D 0F)
+
+	movdqa    xmmF,xmmD
+	punpcklbw xmmD,xmmH	; xmmD=(11 13 15 17 19 1B 1D 1F)
+	punpckhbw xmmF,xmmH	; xmmF=(21 23 25 27 29 2B 2D 2F)
+
+%else ; RGB_PIXELSIZE == 4 ; -----------
+
+.column_ld1:
+	test	cl, SIZEOF_XMMWORD/16
+	jz	short .column_ld2
+	sub	ecx, byte SIZEOF_XMMWORD/16
+	movd	xmmA, XMM_DWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld2:
+	test	cl, SIZEOF_XMMWORD/8
+	jz	short .column_ld4
+	sub	ecx, byte SIZEOF_XMMWORD/8
+	movq	xmmE, XMM_MMWORD [esi+ecx*RGB_PIXELSIZE]
+	pslldq	xmmA, SIZEOF_MMWORD
+	por	xmmA,xmmE
+.column_ld4:
+	test	cl, SIZEOF_XMMWORD/4
+	jz	short .column_ld8
+	sub	ecx, byte SIZEOF_XMMWORD/4
+	movdqa	xmmE,xmmA
+	movdqu	xmmA, XMMWORD [esi+ecx*RGB_PIXELSIZE]
+.column_ld8:
+	test	cl, SIZEOF_XMMWORD/2
+	mov	ecx, SIZEOF_XMMWORD
+	jz	short .rgb_gray_cnv
+	movdqa	xmmF,xmmA
+	movdqa	xmmH,xmmE
+	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+	movdqu	xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+	jmp	short .rgb_gray_cnv
+	alignx	16,7
+
+.columnloop:
+	movdqu	xmmA, XMMWORD [esi+0*SIZEOF_XMMWORD]
+	movdqu	xmmE, XMMWORD [esi+1*SIZEOF_XMMWORD]
+	movdqu	xmmF, XMMWORD [esi+2*SIZEOF_XMMWORD]
+	movdqu	xmmH, XMMWORD [esi+3*SIZEOF_XMMWORD]
+
+.rgb_gray_cnv:
+	; xmmA=(00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33)
+	; xmmE=(04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37)
+	; xmmF=(08 18 28 38 09 19 29 39 0A 1A 2A 3A 0B 1B 2B 3B)
+	; xmmH=(0C 1C 2C 3C 0D 1D 2D 3D 0E 1E 2E 3E 0F 1F 2F 3F)
+
+	movdqa    xmmD,xmmA
+	punpcklbw xmmA,xmmE	; xmmA=(00 04 10 14 20 24 30 34 01 05 11 15 21 25 31 35)
+	punpckhbw xmmD,xmmE	; xmmD=(02 06 12 16 22 26 32 36 03 07 13 17 23 27 33 37)
+
+	movdqa    xmmC,xmmF
+	punpcklbw xmmF,xmmH	; xmmF=(08 0C 18 1C 28 2C 38 3C 09 0D 19 1D 29 2D 39 3D)
+	punpckhbw xmmC,xmmH	; xmmC=(0A 0E 1A 1E 2A 2E 3A 3E 0B 0F 1B 1F 2B 2F 3B 3F)
+
+	movdqa    xmmB,xmmA
+	punpcklwd xmmA,xmmF	; xmmA=(00 04 08 0C 10 14 18 1C 20 24 28 2C 30 34 38 3C)
+	punpckhwd xmmB,xmmF	; xmmB=(01 05 09 0D 11 15 19 1D 21 25 29 2D 31 35 39 3D)
+
+	movdqa    xmmG,xmmD
+	punpcklwd xmmD,xmmC	; xmmD=(02 06 0A 0E 12 16 1A 1E 22 26 2A 2E 32 36 3A 3E)
+	punpckhwd xmmG,xmmC	; xmmG=(03 07 0B 0F 13 17 1B 1F 23 27 2B 2F 33 37 3B 3F)
+
+	movdqa    xmmE,xmmA
+	punpcklbw xmmA,xmmD	; xmmA=(00 02 04 06 08 0A 0C 0E 10 12 14 16 18 1A 1C 1E)
+	punpckhbw xmmE,xmmD	; xmmE=(20 22 24 26 28 2A 2C 2E 30 32 34 36 38 3A 3C 3E)
+
+	movdqa    xmmH,xmmB
+	punpcklbw xmmB,xmmG	; xmmB=(01 03 05 07 09 0B 0D 0F 11 13 15 17 19 1B 1D 1F)
+	punpckhbw xmmH,xmmG	; xmmH=(21 23 25 27 29 2B 2D 2F 31 33 35 37 39 3B 3D 3F)
+
+	pxor      xmmF,xmmF
+
+	movdqa    xmmC,xmmA
+	punpcklbw xmmA,xmmF	; xmmA=(00 02 04 06 08 0A 0C 0E)
+	punpckhbw xmmC,xmmF	; xmmC=(10 12 14 16 18 1A 1C 1E)
+
+	movdqa    xmmD,xmmB
+	punpcklbw xmmB,xmmF	; xmmB=(01 03 05 07 09 0B 0D 0F)
+	punpckhbw xmmD,xmmF	; xmmD=(11 13 15 17 19 1B 1D 1F)
+
+	movdqa    xmmG,xmmE
+	punpcklbw xmmE,xmmF	; xmmE=(20 22 24 26 28 2A 2C 2E)
+	punpckhbw xmmG,xmmF	; xmmG=(30 32 34 36 38 3A 3C 3E)
+
+	punpcklbw xmmF,xmmH
+	punpckhbw xmmH,xmmH
+	psrlw     xmmF,BYTE_BIT	; xmmF=(21 23 25 27 29 2B 2D 2F)
+	psrlw     xmmH,BYTE_BIT	; xmmH=(31 33 35 37 39 3B 3D 3F)
+
+%endif ; RGB_PIXELSIZE ; ---------------
+
+	; xmm0=R(02468ACE)=RE, xmm2=G(02468ACE)=GE, xmm4=B(02468ACE)=BE
+	; xmm1=R(13579BDF)=RO, xmm3=G(13579BDF)=GO, xmm5=B(13579BDF)=BO
+
+	; (Original)
+	; Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+	;
+	; (This implementation)
+	; Y  =  0.29900 * R + 0.33700 * G + 0.11400 * B + 0.25000 * G
+
+	movdqa    xmm6,xmm1
+	punpcklwd xmm1,xmm3
+	punpckhwd xmm6,xmm3
+	pmaddwd   xmm1,[GOTOFF(eax,PW_F0299_F0337)] ; xmm1=ROL*FIX(0.299)+GOL*FIX(0.337)
+	pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+	movdqa    xmm7, xmm6	; xmm7=ROH*FIX(0.299)+GOH*FIX(0.337)
+
+	movdqa    xmm6,xmm0
+	punpcklwd xmm0,xmm2
+	punpckhwd xmm6,xmm2
+	pmaddwd   xmm0,[GOTOFF(eax,PW_F0299_F0337)] ; xmm0=REL*FIX(0.299)+GEL*FIX(0.337)
+	pmaddwd   xmm6,[GOTOFF(eax,PW_F0299_F0337)] ; xmm6=REH*FIX(0.299)+GEH*FIX(0.337)
+
+	movdqa    XMMWORD [wk(0)], xmm0	; wk(0)=REL*FIX(0.299)+GEL*FIX(0.337)
+	movdqa    XMMWORD [wk(1)], xmm6	; wk(1)=REH*FIX(0.299)+GEH*FIX(0.337)
+
+	movdqa    xmm0, xmm5	; xmm0=BO
+	movdqa    xmm6, xmm4	; xmm6=BE
+
+	movdqa    xmm4,xmm0
+	punpcklwd xmm0,xmm3
+	punpckhwd xmm4,xmm3
+	pmaddwd   xmm0,[GOTOFF(eax,PW_F0114_F0250)] ; xmm0=BOL*FIX(0.114)+GOL*FIX(0.250)
+	pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BOH*FIX(0.114)+GOH*FIX(0.250)
+
+	movdqa    xmm3,[GOTOFF(eax,PD_ONEHALF)]	; xmm3=[PD_ONEHALF]
+
+	paddd     xmm0, xmm1
+	paddd     xmm4, xmm7
+	paddd     xmm0,xmm3
+	paddd     xmm4,xmm3
+	psrld     xmm0,SCALEBITS	; xmm0=YOL
+	psrld     xmm4,SCALEBITS	; xmm4=YOH
+	packssdw  xmm0,xmm4		; xmm0=YO
+
+	movdqa    xmm4,xmm6
+	punpcklwd xmm6,xmm2
+	punpckhwd xmm4,xmm2
+	pmaddwd   xmm6,[GOTOFF(eax,PW_F0114_F0250)] ; xmm6=BEL*FIX(0.114)+GEL*FIX(0.250)
+	pmaddwd   xmm4,[GOTOFF(eax,PW_F0114_F0250)] ; xmm4=BEH*FIX(0.114)+GEH*FIX(0.250)
+
+	movdqa    xmm2,[GOTOFF(eax,PD_ONEHALF)]	; xmm2=[PD_ONEHALF]
+
+	paddd     xmm6, XMMWORD [wk(0)]
+	paddd     xmm4, XMMWORD [wk(1)]
+	paddd     xmm6,xmm2
+	paddd     xmm4,xmm2
+	psrld     xmm6,SCALEBITS	; xmm6=YEL
+	psrld     xmm4,SCALEBITS	; xmm4=YEH
+	packssdw  xmm6,xmm4		; xmm6=YE
+
+	psllw     xmm0,BYTE_BIT
+	por       xmm6,xmm0		; xmm6=Y
+	movdqa    XMMWORD [edi], xmm6	; Save Y
+
+	sub	ecx, byte SIZEOF_XMMWORD
+	add	esi, byte RGB_PIXELSIZE*SIZEOF_XMMWORD	; inptr
+	add	edi, byte SIZEOF_XMMWORD		; outptr0
+	cmp	ecx, byte SIZEOF_XMMWORD
+	jae	near .columnloop
+	test	ecx,ecx
+	jnz	near .column_ld1
+
+	pop	ecx			; col
+	pop	esi
+	pop	edi
+	poppic	eax
+
+	add	esi, byte SIZEOF_JSAMPROW	; input_buf
+	add	edi, byte SIZEOF_JSAMPROW
+	dec	eax				; num_rows
+	jg	near .rowloop
+
+.return:
+	pop	edi
+	pop	esi
+;	pop	edx		; need not be preserved
+;	pop	ecx		; need not be preserved
+	pop	ebx
+	mov	esp,ebp		; esp <- aligned ebp
+	pop	esp		; esp <- original ebp
+	pop	ebp
+	ret
+
+; For some reason, the OS X linker does not honor the request to align the
+; segment unless we do this.
+	align	16
--- a/media/libjpeg/simd/jdclrmmx.asm
+++ b/media/libjpeg/simd/jdclrmmx.asm
@ -19,8 +19,6 @@
 %include "jcolsamp.inc"

 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
 ;
 ; Convert some rows of samples to the output colorspace.
 ;
--- a/media/libjpeg/simd/jdclrss2-64.asm
+++ b/media/libjpeg/simd/jdclrss2-64.asm
@ -20,8 +20,6 @@
 %include "jcolsamp.inc"
 				
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
 ;
 ; Convert some rows of samples to the output colorspace.
 ;
@ -292,6 +290,41 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
 	movdqa	xmmA,xmmD
 	sub	rcx, byte SIZEOF_XMMWORD
 .column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+	; Store the lower 8 bytes of xmmA to the output when it has enough
+	; space.
+	cmp	rcx, byte SIZEOF_MMWORD
+	jb	short .column_st7
+	movq	MMWORD [rdi], xmmA
+	add	rdi, byte SIZEOF_MMWORD
+	sub	rcx, byte SIZEOF_MMWORD
+	psrldq	xmmA, SIZEOF_MMWORD
+.column_st7:
+	; Store the lower 4 bytes of xmmA to the output when it has enough
+	; space.
+	cmp	rcx, byte SIZEOF_DWORD
+	jb	short .column_st3
+	movd	DWORD [rdi], xmmA
+	add	rdi, byte SIZEOF_DWORD
+	sub	rcx, byte SIZEOF_DWORD
+	psrldq	xmmA, SIZEOF_DWORD
+.column_st3:
+	; Store the lower 2 bytes of rax to the output when it has enough
+	; space.
+	movd	eax, xmmA
+	cmp	rcx, byte SIZEOF_WORD
+	jb	short .column_st1
+	mov	WORD [rdi], ax
+	add	rdi, byte SIZEOF_WORD
+	sub	rcx, byte SIZEOF_WORD
+	shr	rax, 16
+.column_st1:
+	; Store the lower 1 byte of rax to the output when it has enough
+	; space.
+	test	rcx, rcx
+	jz	short .nextrow
+	mov	BYTE [rdi], al
+%else
 	mov	rax,rcx
 	xor	rcx, byte 0x0F
 	shl	rcx, 2
@ -331,6 +364,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
 	por	xmmE,xmmC
 .adj0:	; ----------------
 	maskmovdqu xmmA,xmmE			; movntdqu XMMWORD [rdi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------

 %else ; RGB_PIXELSIZE == 4 ; -----------

@ -415,6 +449,22 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
 	movdqa	xmmA,xmmD
 	sub	rcx, byte SIZEOF_XMMWORD/4
 .column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+	; Store two pixels (8 bytes) of xmmA to the output when it has enough
+	; space.
+	cmp	rcx, byte SIZEOF_XMMWORD/8
+	jb	short .column_st7
+	movq	MMWORD [rdi], xmmA
+	add	rdi, byte SIZEOF_XMMWORD/8*4
+	sub	rcx, byte SIZEOF_XMMWORD/8
+	psrldq	xmmA, SIZEOF_XMMWORD/8*4
+.column_st7:
+	; Store one pixel (4 bytes) of xmmA to the output when it has enough
+	; space.
+	test	rcx, rcx
+	jz	short .nextrow
+	movd	DWORD [rdi], xmmA
+%else
 	cmp	rcx, byte SIZEOF_XMMWORD/16
 	jb	near .nextrow
 	mov	rax,rcx
@ -454,6 +504,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
 	por	xmmE,xmmG
 .adj0:	; ----------------
 	maskmovdqu xmmA,xmmE			; movntdqu XMMWORD [rdi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------

 %endif ; RGB_PIXELSIZE ; ---------------

--- a/media/libjpeg/simd/jdclrss2.asm
+++ b/media/libjpeg/simd/jdclrss2.asm
@ -19,8 +19,6 @@
 %include "jcolsamp.inc"
 				
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
 ;
 ; Convert some rows of samples to the output colorspace.
 ;
@ -304,6 +302,41 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
 	movdqa	xmmA,xmmD
 	sub	ecx, byte SIZEOF_XMMWORD
 .column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+	; Store the lower 8 bytes of xmmA to the output when it has enough
+	; space.
+	cmp	ecx, byte SIZEOF_MMWORD
+	jb	short .column_st7
+	movq	MMWORD [edi], xmmA
+	add	edi, byte SIZEOF_MMWORD
+	sub	ecx, byte SIZEOF_MMWORD
+	psrldq	xmmA, SIZEOF_MMWORD
+.column_st7:
+	; Store the lower 4 bytes of xmmA to the output when it has enough
+	; space.
+	cmp	ecx, byte SIZEOF_DWORD
+	jb	short .column_st3
+	movd	DWORD [edi], xmmA
+	add	edi, byte SIZEOF_DWORD
+	sub	ecx, byte SIZEOF_DWORD
+	psrldq	xmmA, SIZEOF_DWORD
+.column_st3:
+	; Store the lower 2 bytes of eax to the output when it has enough
+	; space.
+	movd	eax, xmmA
+	cmp	ecx, byte SIZEOF_WORD
+	jb	short .column_st1
+	mov	WORD [edi], ax
+	add	edi, byte SIZEOF_WORD
+	sub	ecx, byte SIZEOF_WORD
+	shr	eax, 16
+.column_st1:
+	; Store the lower 1 byte of eax to the output when it has enough
+	; space.
+	test	ecx, ecx
+	jz	short .nextrow
+	mov	BYTE [edi], al
+%else
 	mov	eax,ecx
 	xor	ecx, byte 0x0F
 	shl	ecx, 2
@ -343,6 +376,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
 	por	xmmE,xmmC
 .adj0:	; ----------------
 	maskmovdqu xmmA,xmmE			; movntdqu XMMWORD [edi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------

 %else ; RGB_PIXELSIZE == 4 ; -----------

@ -428,6 +462,22 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
 	movdqa	xmmA,xmmD
 	sub	ecx, byte SIZEOF_XMMWORD/4
 .column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+	; Store two pixels (8 bytes) of xmmA to the output when it has enough
+	; space.
+	cmp	ecx, byte SIZEOF_XMMWORD/8
+	jb	short .column_st7
+	movq	MMWORD [edi], xmmA
+	add	edi, byte SIZEOF_XMMWORD/8*4
+	sub	ecx, byte SIZEOF_XMMWORD/8
+	psrldq	xmmA, SIZEOF_XMMWORD/8*4
+.column_st7:
+	; Store one pixel (4 bytes) of xmmA to the output when it has enough
+	; space.
+	test	ecx, ecx
+	jz	short .nextrow
+	movd	DWORD [edi], xmmA
+%else
 	cmp	ecx, byte SIZEOF_XMMWORD/16
 	jb	short .nextrow
 	mov	eax,ecx
@ -467,6 +517,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
 	por	xmmE,xmmG
 .adj0:	; ----------------
 	maskmovdqu xmmA,xmmE			; movntdqu XMMWORD [edi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------

 %endif ; RGB_PIXELSIZE ; ---------------

--- a/media/libjpeg/simd/jdcolmmx.asm
+++ b/media/libjpeg/simd/jdcolmmx.asm
@ -48,16 +48,19 @@ PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
 	alignz	16

 ; --------------------------------------------------------------------------
+	SECTION	SEG_TEXT
+	BITS	32
+
 %include "jdclrmmx.asm"

 %undef RGB_RED
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
 %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgb_convert_mmx
 %include "jdclrmmx.asm"

@ -65,10 +68,10 @@ PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
 %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extrgbx_convert_mmx
 %include "jdclrmmx.asm"

@ -76,10 +79,10 @@ PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
 %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgr_convert_mmx
 %include "jdclrmmx.asm"

@ -87,10 +90,10 @@ PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
 %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extbgrx_convert_mmx
 %include "jdclrmmx.asm"

@ -98,10 +101,10 @@ PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 3
-%define RGB_GREEN 2
-%define RGB_BLUE 1
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
 %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxbgr_convert_mmx
 %include "jdclrmmx.asm"

@ -109,9 +112,9 @@ PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 1
-%define RGB_GREEN 2
-%define RGB_BLUE 3
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
 %define jsimd_ycc_rgb_convert_mmx jsimd_ycc_extxrgb_convert_mmx
 %include "jdclrmmx.asm"
--- a/media/libjpeg/simd/jdcolss2-64.asm
+++ b/media/libjpeg/simd/jdcolss2-64.asm
@ -48,16 +48,19 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 	alignz	16

 ; --------------------------------------------------------------------------
+	SECTION	SEG_TEXT
+	BITS	64
+
 %include "jdclrss2-64.asm"

 %undef RGB_RED
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
 %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2
 %include "jdclrss2-64.asm"

@ -65,10 +68,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
 %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2
 %include "jdclrss2-64.asm"

@ -76,10 +79,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
 %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2
 %include "jdclrss2-64.asm"

@ -87,10 +90,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
 %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2
 %include "jdclrss2-64.asm"

@ -98,10 +101,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 3
-%define RGB_GREEN 2
-%define RGB_BLUE 1
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
 %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2
 %include "jdclrss2-64.asm"

@ -109,9 +112,9 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 1
-%define RGB_GREEN 2
-%define RGB_BLUE 3
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
 %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2
 %include "jdclrss2-64.asm"
--- a/media/libjpeg/simd/jdcolss2.asm
+++ b/media/libjpeg/simd/jdcolss2.asm
@ -48,16 +48,19 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 	alignz	16

 ; --------------------------------------------------------------------------
+	SECTION	SEG_TEXT
+	BITS	32
+
 %include "jdclrss2.asm"

 %undef RGB_RED
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
 %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgb_convert_sse2
 %include "jdclrss2.asm"

@ -65,10 +68,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
 %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extrgbx_convert_sse2
 %include "jdclrss2.asm"

@ -76,10 +79,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
 %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgr_convert_sse2
 %include "jdclrss2.asm"

@ -87,10 +90,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
 %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extbgrx_convert_sse2
 %include "jdclrss2.asm"

@ -98,10 +101,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 3
-%define RGB_GREEN 2
-%define RGB_BLUE 1
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
 %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxbgr_convert_sse2
 %include "jdclrss2.asm"

@ -109,9 +112,9 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 1
-%define RGB_GREEN 2
-%define RGB_BLUE 3
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
 %define jsimd_ycc_rgb_convert_sse2 jsimd_ycc_extxrgb_convert_sse2
 %include "jdclrss2.asm"
--- a/media/libjpeg/simd/jdmermmx.asm
+++ b/media/libjpeg/simd/jdmermmx.asm
@ -48,16 +48,19 @@ PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
 	alignz	16

 ; --------------------------------------------------------------------------
+	SECTION	SEG_TEXT
+	BITS	32
+
 %include "jdmrgmmx.asm"

 %undef RGB_RED
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgb_merged_upsample_mmx
 %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgb_merged_upsample_mmx
 %include "jdmrgmmx.asm"
@ -66,10 +69,10 @@ PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extrgbx_merged_upsample_mmx
 %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extrgbx_merged_upsample_mmx
 %include "jdmrgmmx.asm"
@ -78,10 +81,10 @@ PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgr_merged_upsample_mmx
 %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgr_merged_upsample_mmx
 %include "jdmrgmmx.asm"
@ -90,10 +93,10 @@ PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extbgrx_merged_upsample_mmx
 %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extbgrx_merged_upsample_mmx
 %include "jdmrgmmx.asm"
@ -102,10 +105,10 @@ PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 3
-%define RGB_GREEN 2
-%define RGB_BLUE 1
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxbgr_merged_upsample_mmx
 %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxbgr_merged_upsample_mmx
 %include "jdmrgmmx.asm"
@ -114,10 +117,10 @@ PD_ONEHALF	times 2 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 1
-%define RGB_GREEN 2
-%define RGB_BLUE 3
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_mmx jsimd_h2v1_extxrgb_merged_upsample_mmx
 %define jsimd_h2v2_merged_upsample_mmx jsimd_h2v2_extxrgb_merged_upsample_mmx
 %include "jdmrgmmx.asm"
--- a/media/libjpeg/simd/jdmerss2-64.asm
+++ b/media/libjpeg/simd/jdmerss2-64.asm
@ -48,16 +48,19 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 	alignz	16

 ; --------------------------------------------------------------------------
+	SECTION	SEG_TEXT
+	BITS	64
+
 %include "jdmrgss2-64.asm"

 %undef RGB_RED
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2
 %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2
 %include "jdmrgss2-64.asm"
@ -66,10 +69,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2
 %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2
 %include "jdmrgss2-64.asm"
@ -78,10 +81,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2
 %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2
 %include "jdmrgss2-64.asm"
@ -90,10 +93,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2
 %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2
 %include "jdmrgss2-64.asm"
@ -102,10 +105,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 3
-%define RGB_GREEN 2
-%define RGB_BLUE 1
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2
 %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2
 %include "jdmrgss2-64.asm"
@ -114,10 +117,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 1
-%define RGB_GREEN 2
-%define RGB_BLUE 3
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2
 %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2
 %include "jdmrgss2-64.asm"
--- a/media/libjpeg/simd/jdmerss2.asm
+++ b/media/libjpeg/simd/jdmerss2.asm
@ -48,16 +48,19 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 	alignz	16

 ; --------------------------------------------------------------------------
+	SECTION	SEG_TEXT
+	BITS	32
+
 %include "jdmrgss2.asm"

 %undef RGB_RED
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_RGB_RED
+%define RGB_GREEN EXT_RGB_GREEN
+%define RGB_BLUE EXT_RGB_BLUE
+%define RGB_PIXELSIZE EXT_RGB_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgb_merged_upsample_sse2
 %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgb_merged_upsample_sse2
 %include "jdmrgss2.asm"
@ -66,10 +69,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 0
-%define RGB_GREEN 1
-%define RGB_BLUE 2
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_RGBX_RED
+%define RGB_GREEN EXT_RGBX_GREEN
+%define RGB_BLUE EXT_RGBX_BLUE
+%define RGB_PIXELSIZE EXT_RGBX_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extrgbx_merged_upsample_sse2
 %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extrgbx_merged_upsample_sse2
 %include "jdmrgss2.asm"
@ -78,10 +81,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 3
+%define RGB_RED EXT_BGR_RED
+%define RGB_GREEN EXT_BGR_GREEN
+%define RGB_BLUE EXT_BGR_BLUE
+%define RGB_PIXELSIZE EXT_BGR_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgr_merged_upsample_sse2
 %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgr_merged_upsample_sse2
 %include "jdmrgss2.asm"
@ -90,10 +93,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 2
-%define RGB_GREEN 1
-%define RGB_BLUE 0
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_BGRX_RED
+%define RGB_GREEN EXT_BGRX_GREEN
+%define RGB_BLUE EXT_BGRX_BLUE
+%define RGB_PIXELSIZE EXT_BGRX_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extbgrx_merged_upsample_sse2
 %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extbgrx_merged_upsample_sse2
 %include "jdmrgss2.asm"
@ -102,10 +105,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 3
-%define RGB_GREEN 2
-%define RGB_BLUE 1
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XBGR_RED
+%define RGB_GREEN EXT_XBGR_GREEN
+%define RGB_BLUE EXT_XBGR_BLUE
+%define RGB_PIXELSIZE EXT_XBGR_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxbgr_merged_upsample_sse2
 %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxbgr_merged_upsample_sse2
 %include "jdmrgss2.asm"
@ -114,10 +117,10 @@ PD_ONEHALF	times 4 dd  1 << (SCALEBITS-1)
 %undef RGB_GREEN
 %undef RGB_BLUE
 %undef RGB_PIXELSIZE
-%define RGB_RED 1
-%define RGB_GREEN 2
-%define RGB_BLUE 3
-%define RGB_PIXELSIZE 4
+%define RGB_RED EXT_XRGB_RED
+%define RGB_GREEN EXT_XRGB_GREEN
+%define RGB_BLUE EXT_XRGB_BLUE
+%define RGB_PIXELSIZE EXT_XRGB_PIXELSIZE
 %define jsimd_h2v1_merged_upsample_sse2 jsimd_h2v1_extxrgb_merged_upsample_sse2
 %define jsimd_h2v2_merged_upsample_sse2 jsimd_h2v2_extxrgb_merged_upsample_sse2
 %include "jdmrgss2.asm"
--- a/media/libjpeg/simd/jdmrgmmx.asm
+++ b/media/libjpeg/simd/jdmrgmmx.asm
@ -19,8 +19,6 @@
 %include "jcolsamp.inc"

 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
 ;
 ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
 ;
--- a/media/libjpeg/simd/jdmrgss2-64.asm
+++ b/media/libjpeg/simd/jdmrgss2-64.asm
@ -20,8 +20,6 @@
 %include "jcolsamp.inc"
 				
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	64
 ;
 ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
 ;
@ -296,6 +294,41 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 	movdqa	xmmA,xmmD
 	sub	rcx, byte SIZEOF_XMMWORD
 .column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+	; Store the lower 8 bytes of xmmA to the output when it has enough
+	; space.
+	cmp	rcx, byte SIZEOF_MMWORD
+	jb	short .column_st7
+	movq	MMWORD [rdi], xmmA
+	add	rdi, byte SIZEOF_MMWORD
+	sub	rcx, byte SIZEOF_MMWORD
+	psrldq	xmmA, SIZEOF_MMWORD
+.column_st7:
+	; Store the lower 4 bytes of xmmA to the output when it has enough
+	; space.
+	cmp	rcx, byte SIZEOF_DWORD
+	jb	short .column_st3
+	movd	DWORD [rdi], xmmA
+	add	rdi, byte SIZEOF_DWORD
+	sub	rcx, byte SIZEOF_DWORD
+	psrldq	xmmA, SIZEOF_DWORD
+.column_st3:
+	; Store the lower 2 bytes of rax to the output when it has enough
+	; space.
+	movd	eax, xmmA
+	cmp	rcx, byte SIZEOF_WORD
+	jb	short .column_st1
+	mov	WORD [rdi], ax
+	add	rdi, byte SIZEOF_WORD
+	sub	rcx, byte SIZEOF_WORD
+	shr	rax, 16
+.column_st1:
+	; Store the lower 1 byte of rax to the output when it has enough
+	; space.
+	test	rcx, rcx
+	jz	short .endcolumn
+	mov	BYTE [rdi], al
+%else
 	mov	rax,rcx
 	xor	rcx, byte 0x0F
 	shl	rcx, 2
@ -335,6 +368,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 	por	xmmE,xmmC
 .adj0:	; ----------------
 	maskmovdqu xmmA,xmmE			; movntdqu XMMWORD [edi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------

 %else ; RGB_PIXELSIZE == 4 ; -----------

@ -422,6 +456,22 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 	movdqa	xmmA,xmmD
 	sub	rcx, byte SIZEOF_XMMWORD/4
 .column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+	; Store two pixels (8 bytes) of xmmA to the output when it has enough
+	; space.
+	cmp	rcx, byte SIZEOF_XMMWORD/8
+	jb	short .column_st7
+	movq	MMWORD [rdi], xmmA
+	add	rdi, byte SIZEOF_XMMWORD/8*4
+	sub	rcx, byte SIZEOF_XMMWORD/8
+	psrldq	xmmA, SIZEOF_XMMWORD/8*4
+.column_st7:
+	; Store one pixel (4 bytes) of xmmA to the output when it has enough
+	; space.
+	test	rcx, rcx
+	jz	short .endcolumn
+	movd	DWORD [rdi], xmmA
+%else
 	cmp	rcx, byte SIZEOF_XMMWORD/16
 	jb	near .endcolumn
 	mov	rax,rcx
@ -461,6 +511,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 	por	xmmE,xmmG
 .adj0:	; ----------------
 	maskmovdqu xmmA,xmmE			; movntdqu XMMWORD [edi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------

 %endif ; RGB_PIXELSIZE ; ---------------

--- a/media/libjpeg/simd/jdmrgss2.asm
+++ b/media/libjpeg/simd/jdmrgss2.asm
@ -19,8 +19,6 @@
 %include "jcolsamp.inc"
 				
 ; --------------------------------------------------------------------------
-	SECTION	SEG_TEXT
-	BITS	32
 ;
 ; Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
 ;
@ -309,6 +307,41 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 	movdqa	xmmA,xmmD
 	sub	ecx, byte SIZEOF_XMMWORD
 .column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+	; Store the lower 8 bytes of xmmA to the output when it has enough
+	; space.
+	cmp	ecx, byte SIZEOF_MMWORD
+	jb	short .column_st7
+	movq	MMWORD [edi], xmmA
+	add	edi, byte SIZEOF_MMWORD
+	sub	ecx, byte SIZEOF_MMWORD
+	psrldq	xmmA, SIZEOF_MMWORD
+.column_st7:
+	; Store the lower 4 bytes of xmmA to the output when it has enough
+	; space.
+	cmp	ecx, byte SIZEOF_DWORD
+	jb	short .column_st3
+	movd	DWORD [edi], xmmA
+	add	edi, byte SIZEOF_DWORD
+	sub	ecx, byte SIZEOF_DWORD
+	psrldq	xmmA, SIZEOF_DWORD
+.column_st3:
+	; Store the lower 2 bytes of eax to the output when it has enough
+	; space.
+	movd	eax, xmmA
+	cmp	ecx, byte SIZEOF_WORD
+	jb	short .column_st1
+	mov	WORD [edi], ax
+	add	edi, byte SIZEOF_WORD
+	sub	ecx, byte SIZEOF_WORD
+	shr	eax, 16
+.column_st1:
+	; Store the lower 1 byte of eax to the output when it has enough
+	; space.
+	test	ecx, ecx
+	jz	short .endcolumn
+	mov	BYTE [edi], al
+%else
 	mov	eax,ecx
 	xor	ecx, byte 0x0F
 	shl	ecx, 2
@ -348,6 +381,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 	por	xmmE,xmmC
 .adj0:	; ----------------
 	maskmovdqu xmmA,xmmE			; movntdqu XMMWORD [edi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------

 %else ; RGB_PIXELSIZE == 4 ; -----------

@ -436,6 +470,22 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 	movdqa	xmmA,xmmD
 	sub	ecx, byte SIZEOF_XMMWORD/4
 .column_st15:
+%ifdef STRICT_MEMORY_ACCESS
+	; Store two pixels (8 bytes) of xmmA to the output when it has enough
+	; space.
+	cmp	ecx, byte SIZEOF_XMMWORD/8
+	jb	short .column_st7
+	movq	MMWORD [edi], xmmA
+	add	edi, byte SIZEOF_XMMWORD/2
+	sub	ecx, byte SIZEOF_XMMWORD/8
+	psrldq	xmmA, 64
+.column_st7:
+	; Store one pixel (4 bytes) of xmmA to the output when it has enough
+	; space.
+	test	ecx, ecx
+	jz	short .endcolumn
+	movd	DWORD [edi], xmmA
+%else
 	cmp	ecx, byte SIZEOF_XMMWORD/16
 	jb	short .endcolumn
 	mov	eax,ecx
@ -475,6 +525,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
 	por	xmmE,xmmG
 .adj0:	; ----------------
 	maskmovdqu xmmA,xmmE			; movntdqu XMMWORD [edi], xmmA
+%endif ; STRICT_MEMORY_ACCESS ; ---------------

 %endif ; RGB_PIXELSIZE ; ---------------

--- a/media/libjpeg/simd/jsimd.h
+++ b/media/libjpeg/simd/jsimd.h
@ -2,6 +2,7 @@
 * simd/jsimd.h
 *
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2011 D. R. Commander
 * 
 * Based on the x86 SIMD extension for IJG JPEG library,
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -11,11 +12,12 @@

 /* Bitmask for supported acceleration methods */

-#define JSIMD_NONE    0x00
-#define JSIMD_MMX     0x01
-#define JSIMD_3DNOW   0x02
-#define JSIMD_SSE     0x04
-#define JSIMD_SSE2    0x08
+#define JSIMD_NONE       0x00
+#define JSIMD_MMX        0x01
+#define JSIMD_3DNOW      0x02
+#define JSIMD_SSE        0x04
+#define JSIMD_SSE2       0x08
+#define JSIMD_ARM_NEON   0x10

 /* Short forms of external names for systems with brain-damaged linkers. */

@ -28,6 +30,13 @@
 #define jsimd_extbgrx_ycc_convert_mmx         jSEXTBGRXYCCM
 #define jsimd_extxbgr_ycc_convert_mmx         jSEXTXBGRYCCM
 #define jsimd_extxrgb_ycc_convert_mmx         jSEXTXRGBYCCM
+#define jsimd_rgb_gray_convert_mmx            jSRGBGRYM
+#define jsimd_extrgb_gray_convert_mmx         jSEXTRGBGRYM
+#define jsimd_extrgbx_gray_convert_mmx        jSEXTRGBXGRYM
+#define jsimd_extbgr_gray_convert_mmx         jSEXTBGRGRYM
+#define jsimd_extbgrx_gray_convert_mmx        jSEXTBGRXGRYM
+#define jsimd_extxbgr_gray_convert_mmx        jSEXTXBGRGRYM
+#define jsimd_extxrgb_gray_convert_mmx        jSEXTXRGBGRYM
 #define jsimd_ycc_rgb_convert_mmx             jSYCCRGBM
 #define jsimd_ycc_extrgb_convert_mmx          jSYCCEXTRGBM
 #define jsimd_ycc_extrgbx_convert_mmx         jSYCCEXTRGBXM
@ -43,6 +52,14 @@
 #define jsimd_extbgrx_ycc_convert_sse2        jSEXTBGRXYCCS2
 #define jsimd_extxbgr_ycc_convert_sse2        jSEXTXBGRYCCS2
 #define jsimd_extxrgb_ycc_convert_sse2        jSEXTXRGBYCCS2
+#define jconst_rgb_gray_convert_sse2          jSCRGBGRYS2
+#define jsimd_rgb_gray_convert_sse2           jSRGBGRYS2
+#define jsimd_extrgb_gray_convert_sse2        jSEXTRGBGRYS2
+#define jsimd_extrgbx_gray_convert_sse2       jSEXTRGBXGRYS2
+#define jsimd_extbgr_gray_convert_sse2        jSEXTBGRGRYS2
+#define jsimd_extbgrx_gray_convert_sse2       jSEXTBGRXGRYS2
+#define jsimd_extxbgr_gray_convert_sse2       jSEXTXBGRGRYS2
+#define jsimd_extxrgb_gray_convert_sse2       jSEXTXRGBGRYS2
 #define jconst_ycc_rgb_convert_sse2           jSCYCCRGBS2
 #define jsimd_ycc_rgb_convert_sse2            jSYCCRGBS2
 #define jsimd_ycc_extrgb_convert_sse2         jSYCCEXTRGBS2
@ -163,6 +180,35 @@ EXTERN(void) jsimd_extxrgb_ycc_convert_mmx
             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
             JDIMENSION output_row, int num_rows));

+EXTERN(void) jsimd_rgb_gray_convert_mmx
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgb_gray_convert_mmx
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgbx_gray_convert_mmx
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgr_gray_convert_mmx
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgrx_gray_convert_mmx
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxbgr_gray_convert_mmx
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxrgb_gray_convert_mmx
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+
 EXTERN(void) jsimd_ycc_rgb_convert_mmx
        JPP((JDIMENSION out_width,
             JSAMPIMAGE input_buf, JDIMENSION input_row,
@ -222,6 +268,36 @@ EXTERN(void) jsimd_extxrgb_ycc_convert_sse2
             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
             JDIMENSION output_row, int num_rows));

+extern const int jconst_rgb_gray_convert_sse2[];
+EXTERN(void) jsimd_rgb_gray_convert_sse2
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgb_gray_convert_sse2
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgbx_gray_convert_sse2
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgr_gray_convert_sse2
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgrx_gray_convert_sse2
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxbgr_gray_convert_sse2
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxrgb_gray_convert_sse2
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+
 extern const int jconst_ycc_rgb_convert_sse2[];
 EXTERN(void) jsimd_ycc_rgb_convert_sse2
        JPP((JDIMENSION out_width,
@ -252,6 +328,64 @@ EXTERN(void) jsimd_ycc_extxrgb_convert_sse2
             JSAMPIMAGE input_buf, JDIMENSION input_row,
             JSAMPARRAY output_buf, int num_rows));

+EXTERN(void) jsimd_rgb_ycc_convert_neon
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgb_ycc_convert_neon
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extrgbx_ycc_convert_neon
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgr_ycc_convert_neon
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extbgrx_ycc_convert_neon
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxbgr_ycc_convert_neon
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+EXTERN(void) jsimd_extxrgb_ycc_convert_neon
+        JPP((JDIMENSION img_width,
+             JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+             JDIMENSION output_row, int num_rows));
+
+EXTERN(void) jsimd_ycc_rgb_convert_neon
+        JPP((JDIMENSION out_width,
+             JSAMPIMAGE input_buf, JDIMENSION input_row,
+             JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_ycc_extrgb_convert_neon
+        JPP((JDIMENSION out_width,
+             JSAMPIMAGE input_buf, JDIMENSION input_row,
+             JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_ycc_extrgbx_convert_neon
+        JPP((JDIMENSION out_width,
+             JSAMPIMAGE input_buf, JDIMENSION input_row,
+             JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_ycc_extbgr_convert_neon
+        JPP((JDIMENSION out_width,
+             JSAMPIMAGE input_buf, JDIMENSION input_row,
+             JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_ycc_extbgrx_convert_neon
+        JPP((JDIMENSION out_width,
+             JSAMPIMAGE input_buf, JDIMENSION input_row,
+             JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_ycc_extxbgr_convert_neon
+        JPP((JDIMENSION out_width,
+             JSAMPIMAGE input_buf, JDIMENSION input_row,
+             JSAMPARRAY output_buf, int num_rows));
+EXTERN(void) jsimd_ycc_extxrgb_convert_neon
+        JPP((JDIMENSION out_width,
+             JSAMPIMAGE input_buf, JDIMENSION input_row,
+             JSAMPARRAY output_buf, int num_rows));
+
 /* SIMD Downsample */
 EXTERN(void) jsimd_h2v2_downsample_mmx
        JPP((JDIMENSION image_width, int max_v_samp_factor,
@ -397,6 +531,10 @@ EXTERN(void) jsimd_convsamp_sse2 JPP((JSAMPARRAY sample_data,
                                      JDIMENSION start_col,
                                      DCTELEM * workspace));

+EXTERN(void) jsimd_convsamp_neon JPP((JSAMPARRAY sample_data,
+                                      JDIMENSION start_col,
+                                      DCTELEM * workspace));
+
 EXTERN(void) jsimd_convsamp_float_3dnow JPP((JSAMPARRAY sample_data,
                                             JDIMENSION start_col,
                                             FAST_FLOAT * workspace));
@ -418,6 +556,8 @@ EXTERN(void) jsimd_fdct_islow_sse2 JPP((DCTELEM * data));
 extern const int jconst_fdct_islow_sse2[];
 EXTERN(void) jsimd_fdct_ifast_sse2 JPP((DCTELEM * data));

+EXTERN(void) jsimd_fdct_ifast_neon JPP((DCTELEM * data));
+
 EXTERN(void) jsimd_fdct_float_3dnow JPP((FAST_FLOAT * data));

 extern const int jconst_fdct_float_sse[];
@ -432,6 +572,10 @@ EXTERN(void) jsimd_quantize_sse2 JPP((JCOEFPTR coef_block,
                                      DCTELEM * divisors,
                                      DCTELEM * workspace));

+EXTERN(void) jsimd_quantize_neon JPP((JCOEFPTR coef_block,
+                                      DCTELEM * divisors,
+                                      DCTELEM * workspace));
+
 EXTERN(void) jsimd_quantize_float_3dnow JPP((JCOEFPTR coef_block,
                                             FAST_FLOAT * divisors,
                                             FAST_FLOAT * workspace));
@ -464,6 +608,15 @@ EXTERN(void) jsimd_idct_4x4_sse2 JPP((void * dct_table,
                                      JSAMPARRAY output_buf,
                                      JDIMENSION output_col));

+EXTERN(void) jsimd_idct_2x2_neon JPP((void * dct_table,
+                                      JCOEFPTR coef_block,
+                                      JSAMPARRAY output_buf,
+                                      JDIMENSION output_col));
+EXTERN(void) jsimd_idct_4x4_neon JPP((void * dct_table,
+                                      JCOEFPTR coef_block,
+                                      JSAMPARRAY output_buf,
+                                      JDIMENSION output_col));
+
 /* SIMD Inverse DCT */
 EXTERN(void) jsimd_idct_islow_mmx JPP((void * dct_table,
                                       JCOEFPTR coef_block,
@ -485,6 +638,15 @@ EXTERN(void) jsimd_idct_ifast_sse2 JPP((void * dct_table,
                                        JSAMPARRAY output_buf,
                                        JDIMENSION output_col));

+EXTERN(void) jsimd_idct_islow_neon JPP((void * dct_table,
+                                        JCOEFPTR coef_block,
+                                        JSAMPARRAY output_buf,
+                                        JDIMENSION output_col));
+EXTERN(void) jsimd_idct_ifast_neon JPP((void * dct_table,
+                                        JCOEFPTR coef_block,
+                                        JSAMPARRAY output_buf,
+                                        JDIMENSION output_col));
+
 EXTERN(void) jsimd_idct_float_3dnow JPP((void * dct_table,
                                         JCOEFPTR coef_block,
                                         JSAMPARRAY output_buf,
--- a/media/libjpeg/simd/jsimd_arm.c
+++ b/media/libjpeg/simd/jsimd_arm.c
@ -0,0 +1,670 @@
+/*
+ * jsimd_arm.c
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright 2009-2011 D. R. Commander
+ * 
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ * This file contains the interface between the "normal" portions
+ * of the library and the SIMD implementations when running on
+ * ARM architecture.
+ *
+ * Based on the stubs from 'jsimd_none.c'
+ */
+
+#define JPEG_INTERNALS
+#include "../jinclude.h"
+#include "../jpeglib.h"
+#include "../jsimd.h"
+#include "../jdct.h"
+#include "../jsimddct.h"
+#include "jsimd.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+static unsigned int simd_support = ~0;
+
+#if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
+
+#define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
+
+LOCAL(int)
+check_feature (char *buffer, char *feature)
+{
+  char *p;
+  if (*feature == 0)
+    return 0;
+  if (strncmp(buffer, "Features", 8) != 0)
+    return 0;
+  buffer += 8;
+  while (isspace(*buffer))
+    buffer++;
+
+  /* Check if 'feature' is present in the buffer as a separate word */
+  while ((p = strstr(buffer, feature))) {
+    if (p > buffer && !isspace(*(p - 1))) {
+      buffer++;
+      continue;
+    }
+    p += strlen(feature);
+    if (*p != 0 && !isspace(*p)) {
+      buffer++;
+      continue;
+    }
+    return 1;
+  }
+  return 0;
+}
+
+LOCAL(int)
+parse_proc_cpuinfo (int bufsize)
+{
+  char *buffer = (char *)malloc(bufsize);
+  FILE *fd;
+  simd_support = 0;
+
+  if (!buffer)
+    return 0;
+
+  fd = fopen("/proc/cpuinfo", "r");
+  if (fd) {
+    while (fgets(buffer, bufsize, fd)) {
+      if (!strchr(buffer, '\n') && !feof(fd)) {
+        /* "impossible" happened - insufficient size of the buffer! */
+        fclose(fd);
+        free(buffer);
+        return 0;
+      }
+      if (check_feature(buffer, "neon"))
+        simd_support |= JSIMD_ARM_NEON;
+    }
+    fclose(fd);
+  }
+  free(buffer);
+  return 1;
+}
+
+#endif
+
+/*
+ * Check what SIMD accelerations are supported.
+ *
+ * FIXME: This code is racy under a multi-threaded environment.
+ */
+LOCAL(void)
+init_simd (void)
+{
+  char *env = NULL;
+#if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
+  int bufsize = 1024; /* an initial guess for the line buffer size limit */
+#endif
+
+  if (simd_support != ~0U)
+    return;
+
+  simd_support = 0;
+
+#if defined(__ARM_NEON__)
+  simd_support |= JSIMD_ARM_NEON;
+#elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
+  /* We still have a chance to use NEON regardless of globally used
+   * -mcpu/-mfpu options passed to gcc by performing runtime detection via
+   * /proc/cpuinfo parsing on linux/android */
+  while (!parse_proc_cpuinfo(bufsize)) {
+    bufsize *= 2;
+    if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
+      break;
+  }
+#endif
+
+  /* Force different settings through environment variables */
+  env = getenv("JSIMD_FORCE_ARM_NEON");
+  if ((env != NULL) && (strcmp(env, "1") == 0))
+    simd_support &= JSIMD_ARM_NEON;
+  env = getenv("JSIMD_FORCE_NO_SIMD");
+  if ((env != NULL) && (strcmp(env, "1") == 0))
+    simd_support = 0;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_ycc (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_ycc_rgb (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
+                       JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                       JDIMENSION output_row, int num_rows)
+{
+  void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+
+  switch(cinfo->in_color_space)
+  {
+    case JCS_EXT_RGB:
+      neonfct=jsimd_extrgb_ycc_convert_neon;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      neonfct=jsimd_extrgbx_ycc_convert_neon;
+      break;
+    case JCS_EXT_BGR:
+      neonfct=jsimd_extbgr_ycc_convert_neon;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      neonfct=jsimd_extbgrx_ycc_convert_neon;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      neonfct=jsimd_extxbgr_ycc_convert_neon;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      neonfct=jsimd_extxrgb_ycc_convert_neon;
+      break;
+    default:
+      neonfct=jsimd_extrgb_ycc_convert_neon;
+      break;
+  }
+
+  if (simd_support & JSIMD_ARM_NEON)
+    neonfct(cinfo->image_width, input_buf,
+        output_buf, output_row, num_rows);
+}
+
+GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                        JDIMENSION output_row, int num_rows)
+{
+}
+
+GLOBAL(void)
+jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
+                       JSAMPIMAGE input_buf, JDIMENSION input_row,
+                       JSAMPARRAY output_buf, int num_rows)
+{
+  void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
+
+  switch(cinfo->out_color_space)
+  {
+    case JCS_EXT_RGB:
+      neonfct=jsimd_ycc_extrgb_convert_neon;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      neonfct=jsimd_ycc_extrgbx_convert_neon;
+      break;
+    case JCS_EXT_BGR:
+      neonfct=jsimd_ycc_extbgr_convert_neon;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      neonfct=jsimd_ycc_extbgrx_convert_neon;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      neonfct=jsimd_ycc_extxbgr_convert_neon;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      neonfct=jsimd_ycc_extxrgb_convert_neon;
+      break;
+  default:
+      neonfct=jsimd_ycc_extrgb_convert_neon;
+      break;
+  }
+
+  if (simd_support & JSIMD_ARM_NEON)
+    neonfct(cinfo->output_width, input_buf,
+        input_row, output_buf, num_rows);
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_downsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_downsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
+                       JSAMPARRAY input_data, JSAMPARRAY output_data)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr, 
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_upsample (j_decompress_ptr cinfo,
+                     jpeg_component_info * compptr, 
+                     JSAMPARRAY input_data,
+                     JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_fancy_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_fancy_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr, 
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
+                           jpeg_component_info * compptr, 
+                           JSAMPARRAY input_data,
+                           JSAMPARRAY * output_data_ptr)
+{
+}
+
+GLOBAL(int)
+jsimd_can_h2v2_merged_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_h2v1_merged_upsample (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+}
+
+GLOBAL(void)
+jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
+                            JSAMPIMAGE input_buf,
+                            JDIMENSION in_row_group_ctr,
+                            JSAMPARRAY output_buf)
+{
+}
+
+GLOBAL(int)
+jsimd_can_convsamp (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_convsamp_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
+                DCTELEM * workspace)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_convsamp_neon(sample_data, start_col, workspace);
+}
+
+GLOBAL(void)
+jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
+                      FAST_FLOAT * workspace)
+{
+}
+
+GLOBAL(int)
+jsimd_can_fdct_islow (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_ifast (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_fdct_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_fdct_islow (DCTELEM * data)
+{
+}
+
+GLOBAL(void)
+jsimd_fdct_ifast (DCTELEM * data)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_fdct_ifast_neon(data);
+}
+
+GLOBAL(void)
+jsimd_fdct_float (FAST_FLOAT * data)
+{
+}
+
+GLOBAL(int)
+jsimd_can_quantize (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (sizeof(DCTELEM) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_quantize_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
+                DCTELEM * workspace)
+{
+  if (simd_support & JSIMD_ARM_NEON)
+    jsimd_quantize_neon(coef_block, divisors, workspace);
+}
+
+GLOBAL(void)
+jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
+                      FAST_FLOAT * workspace)
+{
+}
+
+GLOBAL(int)
+jsimd_can_idct_2x2 (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if ((simd_support & JSIMD_ARM_NEON))
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_4x4 (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if ((simd_support & JSIMD_ARM_NEON))
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  if ((simd_support & JSIMD_ARM_NEON))
+    jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  if ((simd_support & JSIMD_ARM_NEON))
+    jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
+}
+
+GLOBAL(int)
+jsimd_can_idct_islow (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(ISLOW_MULT_TYPE) != 2)
+    return 0;
+
+  if (simd_support & JSIMD_ARM_NEON)
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_ifast (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (DCTSIZE != 8)
+    return 0;
+  if (sizeof(JCOEF) != 2)
+    return 0;
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if (sizeof(IFAST_MULT_TYPE) != 2)
+    return 0;
+  if (IFAST_SCALE_BITS != 2)
+    return 0;
+
+  if ((simd_support & JSIMD_ARM_NEON))
+    return 1;
+
+  return 0;
+}
+
+GLOBAL(int)
+jsimd_can_idct_float (void)
+{
+  init_simd();
+
+  return 0;
+}
+
+GLOBAL(void)
+jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  if ((simd_support & JSIMD_ARM_NEON))
+    jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+  if ((simd_support & JSIMD_ARM_NEON))
+    jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col);
+}
+
+GLOBAL(void)
+jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
+                JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                JDIMENSION output_col)
+{
+}
+
--- a/media/libjpeg/simd/jsimd_arm_neon.S
+++ b/media/libjpeg/simd/jsimd_arm_neon.S
--- a/media/libjpeg/simd/jsimd_i386.c
+++ b/media/libjpeg/simd/jsimd_i386.c
@ -2,7 +2,7 @@
 * jsimd_i386.c
 *
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright 2009 D. R. Commander
+ * Copyright 2009-2011 D. R. Commander
 * 
 * Based on the x86 SIMD extension for IJG JPEG library,
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -41,7 +41,7 @@ init_simd (void)
 {
  char *env = NULL;

-  if (simd_support != ~0)
+  if (simd_support != ~0U)
    return;

  simd_support = jpeg_simd_cpu_support();
@ -83,6 +83,28 @@ jsimd_can_rgb_ycc (void)
  return 0;
 }

+GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+  init_simd();
+
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
+    return 1;
+  if (simd_support & JSIMD_MMX)
+    return 1;
+
+  return 0;
+}
+
 GLOBAL(int)
 jsimd_can_ycc_rgb (void)
 {
@ -120,6 +142,7 @@ jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
      mmxfct=jsimd_extrgb_ycc_convert_mmx;
      break;
    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
      sse2fct=jsimd_extrgbx_ycc_convert_sse2;
      mmxfct=jsimd_extrgbx_ycc_convert_mmx;
      break;
@ -128,14 +151,17 @@ jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
      mmxfct=jsimd_extbgr_ycc_convert_mmx;
      break;
    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
      sse2fct=jsimd_extbgrx_ycc_convert_sse2;
      mmxfct=jsimd_extbgrx_ycc_convert_mmx;
      break;
    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
      sse2fct=jsimd_extxbgr_ycc_convert_sse2;
      mmxfct=jsimd_extxbgr_ycc_convert_mmx;
      break;
    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
      sse2fct=jsimd_extxrgb_ycc_convert_sse2;
      mmxfct=jsimd_extxrgb_ycc_convert_mmx;
      break;
@ -154,6 +180,59 @@ jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
        output_buf, output_row, num_rows);
 }

+GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                        JDIMENSION output_row, int num_rows)
+{
+  void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+  void (*mmxfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+
+  switch(cinfo->in_color_space)
+  {
+    case JCS_EXT_RGB:
+      sse2fct=jsimd_extrgb_gray_convert_sse2;
+      mmxfct=jsimd_extrgb_gray_convert_mmx;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      sse2fct=jsimd_extrgbx_gray_convert_sse2;
+      mmxfct=jsimd_extrgbx_gray_convert_mmx;
+      break;
+    case JCS_EXT_BGR:
+      sse2fct=jsimd_extbgr_gray_convert_sse2;
+      mmxfct=jsimd_extbgr_gray_convert_mmx;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      sse2fct=jsimd_extbgrx_gray_convert_sse2;
+      mmxfct=jsimd_extbgrx_gray_convert_mmx;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      sse2fct=jsimd_extxbgr_gray_convert_sse2;
+      mmxfct=jsimd_extxbgr_gray_convert_mmx;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      sse2fct=jsimd_extxrgb_gray_convert_sse2;
+      mmxfct=jsimd_extxrgb_gray_convert_mmx;
+      break;
+    default:
+      sse2fct=jsimd_rgb_gray_convert_sse2;
+      mmxfct=jsimd_rgb_gray_convert_mmx;
+      break;
+  }
+
+  if ((simd_support & JSIMD_SSE2) &&
+      IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
+    sse2fct(cinfo->image_width, input_buf,
+        output_buf, output_row, num_rows);
+  else if (simd_support & JSIMD_MMX)
+    mmxfct(cinfo->image_width, input_buf,
+        output_buf, output_row, num_rows);
+}
+
 GLOBAL(void)
 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
                       JSAMPIMAGE input_buf, JDIMENSION input_row,
@ -169,6 +248,7 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
      mmxfct=jsimd_ycc_extrgb_convert_mmx;
      break;
    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
      sse2fct=jsimd_ycc_extrgbx_convert_sse2;
      mmxfct=jsimd_ycc_extrgbx_convert_mmx;
      break;
@ -177,14 +257,17 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
      mmxfct=jsimd_ycc_extbgr_convert_mmx;
      break;
    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
      sse2fct=jsimd_ycc_extbgrx_convert_sse2;
      mmxfct=jsimd_ycc_extbgrx_convert_mmx;
      break;
    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
      sse2fct=jsimd_ycc_extxbgr_convert_sse2;
      mmxfct=jsimd_ycc_extxbgr_convert_mmx;
      break;
    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
      sse2fct=jsimd_ycc_extxrgb_convert_sse2;
      mmxfct=jsimd_ycc_extxrgb_convert_mmx;
      break;
@ -461,6 +544,7 @@ jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
      mmxfct=jsimd_h2v2_extrgb_merged_upsample_mmx;
      break;
    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
      sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
      mmxfct=jsimd_h2v2_extrgbx_merged_upsample_mmx;
      break;
@ -469,14 +553,17 @@ jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
      mmxfct=jsimd_h2v2_extbgr_merged_upsample_mmx;
      break;
    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
      sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
      mmxfct=jsimd_h2v2_extbgrx_merged_upsample_mmx;
      break;
    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
      sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
      mmxfct=jsimd_h2v2_extxbgr_merged_upsample_mmx;
      break;
    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
      sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
      mmxfct=jsimd_h2v2_extxrgb_merged_upsample_mmx;
      break;
@ -511,6 +598,7 @@ jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
      mmxfct=jsimd_h2v1_extrgb_merged_upsample_mmx;
      break;
    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
      sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
      mmxfct=jsimd_h2v1_extrgbx_merged_upsample_mmx;
      break;
@ -519,14 +607,17 @@ jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
      mmxfct=jsimd_h2v1_extbgr_merged_upsample_mmx;
      break;
    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
      sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
      mmxfct=jsimd_h2v1_extbgrx_merged_upsample_mmx;
      break;
    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
      sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
      mmxfct=jsimd_h2v1_extxbgr_merged_upsample_mmx;
      break;
    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
      sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
      mmxfct=jsimd_h2v1_extxrgb_merged_upsample_mmx;
      break;
--- a/media/libjpeg/simd/jsimd_x86_64.c
+++ b/media/libjpeg/simd/jsimd_x86_64.c
@ -2,7 +2,7 @@
 * jsimd_x86_64.c
 *
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright 2009 D. R. Commander
+ * Copyright 2009-2011 D. R. Commander
 * 
 * Based on the x86 SIMD extension for IJG JPEG library,
 * Copyright (C) 1999-2006, MIYASAKA Masaru.
@ -46,6 +46,23 @@ jsimd_can_rgb_ycc (void)
  return 1;
 }

+GLOBAL(int)
+jsimd_can_rgb_gray (void)
+{
+  /* The code is optimised for these values only */
+  if (BITS_IN_JSAMPLE != 8)
+    return 0;
+  if (sizeof(JDIMENSION) != 4)
+    return 0;
+  if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
+    return 0;
+
+  if (!IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
+    return 0;
+
+  return 1;
+}
+
 GLOBAL(int)
 jsimd_can_ycc_rgb (void)
 {
@ -76,18 +93,22 @@ jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
      sse2fct=jsimd_extrgb_ycc_convert_sse2;
      break;
    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
      sse2fct=jsimd_extrgbx_ycc_convert_sse2;
      break;
    case JCS_EXT_BGR:
      sse2fct=jsimd_extbgr_ycc_convert_sse2;
      break;
    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
      sse2fct=jsimd_extbgrx_ycc_convert_sse2;
      break;
    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
      sse2fct=jsimd_extxbgr_ycc_convert_sse2;
      break;
    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
      sse2fct=jsimd_extxrgb_ycc_convert_sse2;
      break;
    default:
@ -98,6 +119,45 @@ jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
  sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
 }

+GLOBAL(void)
+jsimd_rgb_gray_convert (j_compress_ptr cinfo,
+                        JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
+                        JDIMENSION output_row, int num_rows)
+{
+  void (*sse2fct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
+
+  switch(cinfo->in_color_space)
+  {
+    case JCS_EXT_RGB:
+      sse2fct=jsimd_extrgb_gray_convert_sse2;
+      break;
+    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
+      sse2fct=jsimd_extrgbx_gray_convert_sse2;
+      break;
+    case JCS_EXT_BGR:
+      sse2fct=jsimd_extbgr_gray_convert_sse2;
+      break;
+    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
+      sse2fct=jsimd_extbgrx_gray_convert_sse2;
+      break;
+    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
+      sse2fct=jsimd_extxbgr_gray_convert_sse2;
+      break;
+    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
+      sse2fct=jsimd_extxrgb_gray_convert_sse2;
+      break;
+    default:
+      sse2fct=jsimd_rgb_gray_convert_sse2;
+      break;
+  }
+
+  sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
+}
+
 GLOBAL(void)
 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
                       JSAMPIMAGE input_buf, JDIMENSION input_row,
@ -111,18 +171,22 @@ jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
      sse2fct=jsimd_ycc_extrgb_convert_sse2;
      break;
    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
      sse2fct=jsimd_ycc_extrgbx_convert_sse2;
      break;
    case JCS_EXT_BGR:
      sse2fct=jsimd_ycc_extbgr_convert_sse2;
      break;
    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
      sse2fct=jsimd_ycc_extbgrx_convert_sse2;
      break;
    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
      sse2fct=jsimd_ycc_extxbgr_convert_sse2;
      break;
    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
      sse2fct=jsimd_ycc_extxrgb_convert_sse2;
      break;
    default:
@ -321,18 +385,22 @@ jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
      sse2fct=jsimd_h2v2_extrgb_merged_upsample_sse2;
      break;
    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
      sse2fct=jsimd_h2v2_extrgbx_merged_upsample_sse2;
      break;
    case JCS_EXT_BGR:
      sse2fct=jsimd_h2v2_extbgr_merged_upsample_sse2;
      break;
    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
      sse2fct=jsimd_h2v2_extbgrx_merged_upsample_sse2;
      break;
    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
      sse2fct=jsimd_h2v2_extxbgr_merged_upsample_sse2;
      break;
    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
      sse2fct=jsimd_h2v2_extxrgb_merged_upsample_sse2;
      break;
    default:
@ -357,18 +425,22 @@ jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
      sse2fct=jsimd_h2v1_extrgb_merged_upsample_sse2;
      break;
    case JCS_EXT_RGBX:
+    case JCS_EXT_RGBA:
      sse2fct=jsimd_h2v1_extrgbx_merged_upsample_sse2;
      break;
    case JCS_EXT_BGR:
      sse2fct=jsimd_h2v1_extbgr_merged_upsample_sse2;
      break;
    case JCS_EXT_BGRX:
+    case JCS_EXT_BGRA:
      sse2fct=jsimd_h2v1_extbgrx_merged_upsample_sse2;
      break;
    case JCS_EXT_XBGR:
+    case JCS_EXT_ABGR:
      sse2fct=jsimd_h2v1_extxbgr_merged_upsample_sse2;
      break;
    case JCS_EXT_XRGB:
+    case JCS_EXT_ARGB:
      sse2fct=jsimd_h2v1_extxrgb_merged_upsample_sse2;
      break;
    default:
--- a/media/libjpeg/simd/jsimdcfg.inc
+++ b/media/libjpeg/simd/jsimdcfg.inc
@ -13,6 +13,31 @@
 %define RGB_GREEN 1
 %define RGB_BLUE 2
 %define RGB_PIXELSIZE 3
+%define EXT_RGB_RED 0
+%define EXT_RGB_GREEN 1
+%define EXT_RGB_BLUE 2
+%define EXT_RGB_PIXELSIZE 3
+%define EXT_RGBX_RED 0
+%define EXT_RGBX_GREEN 1
+%define EXT_RGBX_BLUE 2
+%define EXT_RGBX_PIXELSIZE 4
+%define EXT_BGR_RED 2
+%define EXT_BGR_GREEN 1
+%define EXT_BGR_BLUE 0
+%define EXT_BGR_PIXELSIZE 3
+%define EXT_BGRX_RED 2
+%define EXT_BGRX_GREEN 1
+%define EXT_BGRX_BLUE 0
+%define EXT_BGRX_PIXELSIZE 4
+%define EXT_XBGR_RED 3
+%define EXT_XBGR_GREEN 2
+%define EXT_XBGR_BLUE 1
+%define EXT_XBGR_PIXELSIZE 4
+%define EXT_XRGB_RED 1
+%define EXT_XRGB_GREEN 2
+%define EXT_XRGB_BLUE 3
+%define EXT_XRGB_PIXELSIZE 4
+%define RGBX_FILLER_0XFF 1
 ; Representation of a single sample (pixel element value).
 ; On this SIMD implementation, this must be 'unsigned char'.
 ;
--- a/media/libjpeg/simd/jsimdcfg.inc.h
+++ b/media/libjpeg/simd/jsimdcfg.inc.h
@ -1,168 +0,0 @@
-// This file generates the include file for the assembly
-// implementations by abusing the C preprocessor.
-//
-// Note: Some things are manually defined as they need to
-// be mapped to NASM types.
-
-;
-; Automatically generated include file from jsimdcfg.inc.h
-;
-
-#define JPEG_INTERNALS
-
-#include "../jpeglib.h"
-#include "../jconfig.h"
-#include "../jmorecfg.h"
-#include "jsimd.h"
-
-#define define(var) %define _cpp_protection_##var
-#define definev(var) %define _cpp_protection_##var var
-
-;
-; -- jpeglib.h
-;
-
-definev(DCTSIZE)
-definev(DCTSIZE2)
-
-;
-; -- jmorecfg.h
-;
-
-definev(RGB_RED)
-definev(RGB_GREEN)
-definev(RGB_BLUE)
-
-definev(RGB_PIXELSIZE)
-
-; Representation of a single sample (pixel element value).
-; On this SIMD implementation, this must be 'unsigned char'.
-;
-
-%define JSAMPLE                 byte          ; unsigned char
-%define SIZEOF_JSAMPLE          SIZEOF_BYTE   ; sizeof(JSAMPLE)
-
-definev(CENTERJSAMPLE)
-
-; Representation of a DCT frequency coefficient.
-; On this SIMD implementation, this must be 'short'.
-;
-%define JCOEF                   word          ; short
-%define SIZEOF_JCOEF            SIZEOF_WORD   ; sizeof(JCOEF)
-
-; Datatype used for image dimensions.
-; On this SIMD implementation, this must be 'unsigned int'.
-;
-%define JDIMENSION              dword         ; unsigned int
-%define SIZEOF_JDIMENSION       SIZEOF_DWORD  ; sizeof(JDIMENSION)
-
-%define JSAMPROW                POINTER       ; JSAMPLE FAR * (jpeglib.h)
-%define JSAMPARRAY              POINTER       ; JSAMPROW *    (jpeglib.h)
-%define JSAMPIMAGE              POINTER       ; JSAMPARRAY *  (jpeglib.h)
-%define JCOEFPTR                POINTER       ; JCOEF FAR *   (jpeglib.h)
-%define SIZEOF_JSAMPROW         SIZEOF_POINTER  ; sizeof(JSAMPROW)
-%define SIZEOF_JSAMPARRAY       SIZEOF_POINTER  ; sizeof(JSAMPARRAY)
-%define SIZEOF_JSAMPIMAGE       SIZEOF_POINTER  ; sizeof(JSAMPIMAGE)
-%define SIZEOF_JCOEFPTR         SIZEOF_POINTER  ; sizeof(JCOEFPTR)
-
-;
-; -- jdct.h
-;
-
-; A forward DCT routine is given a pointer to a work area of type DCTELEM[];
-; the DCT is to be performed in-place in that buffer.
-; To maximize parallelism, Type DCTELEM is changed to short (originally, int).
-;
-%define DCTELEM                 word          ; short
-%define SIZEOF_DCTELEM          SIZEOF_WORD   ; sizeof(DCTELEM)
-
-%define FAST_FLOAT              FP32            ; float
-%define SIZEOF_FAST_FLOAT       SIZEOF_FP32     ; sizeof(FAST_FLOAT)
-
-; To maximize parallelism, Type MULTIPLIER is changed to short.
-;
-%define ISLOW_MULT_TYPE         word          ; must be short
-%define SIZEOF_ISLOW_MULT_TYPE  SIZEOF_WORD   ; sizeof(ISLOW_MULT_TYPE)
-
-%define IFAST_MULT_TYPE         word          ; must be short
-%define SIZEOF_IFAST_MULT_TYPE  SIZEOF_WORD   ; sizeof(IFAST_MULT_TYPE)
-%define IFAST_SCALE_BITS        2             ; fractional bits in scale factors
-
-%define FLOAT_MULT_TYPE         FP32          ; must be float
-%define SIZEOF_FLOAT_MULT_TYPE  SIZEOF_FP32   ; sizeof(FLOAT_MULT_TYPE)
-
-;
-; -- jsimd.h
-;
-
-definev(JSIMD_NONE)
-definev(JSIMD_MMX)
-definev(JSIMD_3DNOW)
-definev(JSIMD_SSE)
-definev(JSIMD_SSE2)
-
-; Short forms of external names for systems with brain-damaged linkers.
-;
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-definev(jpeg_simd_cpu_support)
-definev(jsimd_rgb_ycc_convert_mmx)
-definev(jsimd_ycc_rgb_convert_mmx)
-definev(jconst_rgb_ycc_convert_sse2)
-definev(jsimd_rgb_ycc_convert_sse2)
-definev(jconst_ycc_rgb_convert_sse2)
-definev(jsimd_ycc_rgb_convert_sse2)
-definev(jsimd_h2v2_downsample_mmx)
-definev(jsimd_h2v1_downsample_mmx)
-definev(jsimd_h2v2_downsample_sse2)
-definev(jsimd_h2v1_downsample_sse2)
-definev(jsimd_h2v2_upsample_mmx)
-definev(jsimd_h2v1_upsample_mmx)
-definev(jsimd_h2v1_fancy_upsample_mmx)
-definev(jsimd_h2v2_fancy_upsample_mmx)
-definev(jsimd_h2v1_merged_upsample_mmx)
-definev(jsimd_h2v2_merged_upsample_mmx)
-definev(jsimd_h2v2_upsample_sse2)
-definev(jsimd_h2v1_upsample_sse2)
-definev(jconst_fancy_upsample_sse2)
-definev(jsimd_h2v1_fancy_upsample_sse2)
-definev(jsimd_h2v2_fancy_upsample_sse2)
-definev(jconst_merged_upsample_sse2)
-definev(jsimd_h2v1_merged_upsample_sse2)
-definev(jsimd_h2v2_merged_upsample_sse2)
-definev(jsimd_convsamp_mmx)
-definev(jsimd_convsamp_sse2)
-definev(jsimd_convsamp_float_3dnow)
-definev(jsimd_convsamp_float_sse)
-definev(jsimd_convsamp_float_sse2)
-definev(jsimd_fdct_islow_mmx)
-definev(jsimd_fdct_ifast_mmx)
-definev(jconst_fdct_islow_sse2)
-definev(jsimd_fdct_islow_sse2)
-definev(jconst_fdct_ifast_sse2)
-definev(jsimd_fdct_ifast_sse2)
-definev(jsimd_fdct_float_3dnow)
-definev(jconst_fdct_float_sse)
-definev(jsimd_fdct_float_sse)
-definev(jsimd_quantize_mmx)
-definev(jsimd_quantize_sse2)
-definev(jsimd_quantize_float_3dnow)
-definev(jsimd_quantize_float_sse)
-definev(jsimd_quantize_float_sse2)
-definev(jsimd_idct_2x2_mmx)
-definev(jsimd_idct_4x4_mmx)
-definev(jconst_idct_red_sse2)
-definev(jsimd_idct_2x2_sse2)
-definev(jsimd_idct_4x4_sse2)
-definev(jsimd_idct_islow_mmx)
-definev(jsimd_idct_ifast_mmx)
-definev(jconst_idct_islow_sse2)
-definev(jsimd_idct_islow_sse2)
-definev(jconst_idct_ifast_sse2)
-definev(jsimd_idct_ifast_sse2)
-definev(jsimd_idct_float_3dnow)
-definev(jconst_idct_float_sse)
-definev(jsimd_idct_float_sse)
-definev(jconst_idct_float_sse2)
-definev(jsimd_idct_float_sse2)
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
--- a/media/libjpeg/simd/jsimdext.inc
+++ b/media/libjpeg/simd/jsimdext.inc
@ -38,19 +38,27 @@

 ; -- segment definition --
 ;
+%ifdef __YASM_VER__
+%define SEG_TEXT    .text  align=16
+%define SEG_CONST   .rdata align=16
+%else
 %define SEG_TEXT    .text  align=16 public use32 class=CODE
 %define SEG_CONST   .rdata align=16 public use32 class=CONST
+%endif

 %elifdef WIN64	; ----(nasm -fwin64 -DWIN64 ...)--------
 ; * Microsoft Visual C++

 ; -- segment definition --
 ;
+%ifdef __YASM_VER__
+%define SEG_TEXT    .text  align=16
+%define SEG_CONST   .rdata align=16
+%else
 %define SEG_TEXT    .text  align=16 public use64 class=CODE
 %define SEG_CONST   .rdata align=16 public use64 class=CONST
-%ifdef MSVC
-%define EXTN(name)  name			; foo() -> foo
 %endif
+%define EXTN(name)  name			; foo() -> foo

 %elifdef OBJ32	; ----(nasm -fobj -DOBJ32 ...)----------
 ; * Borland C++ (Win32)
@ -78,6 +86,8 @@ section .note.GNU-stack noalloc noexec nowrite progbits
 %define SEG_CONST   .rodata progbits alloc noexec nowrite align=16
 %endif

+%define STRICT_MEMORY_ACCESS 1
+
 ; To make the code position-independent, append -DPIC to the commandline
 ;
 %define GOT_SYMBOL  _GLOBAL_OFFSET_TABLE_	; ELF supports PIC
@ -299,8 +309,6 @@ const_base:
 %ifdef WIN64

 %imacro collect_args 0
-	push r10
-	push r11
 	push r12
 	push r13
 	push r14
@ -330,8 +338,6 @@ const_base:
 	pop r14
 	pop r13
 	pop r12
-	pop r11
-	pop r10
 %endmacro

 %else
--- a/media/libjpeg/transupp.h
+++ b/media/libjpeg/transupp.h
@ -1,210 +0,0 @@
-/*
- * transupp.h
- *
- * Copyright (C) 1997-2009, Thomas G. Lane, Guido Vollbeding.
- * This file is part of the Independent JPEG Group's software.
- * For conditions of distribution and use, see the accompanying README file.
- *
- * This file contains declarations for image transformation routines and
- * other utility code used by the jpegtran sample application.  These are
- * NOT part of the core JPEG library.  But we keep these routines separate
- * from jpegtran.c to ease the task of maintaining jpegtran-like programs
- * that have other user interfaces.
- *
- * NOTE: all the routines declared here have very specific requirements
- * about when they are to be executed during the reading and writing of the
- * source and destination files.  See the comments in transupp.c, or see
- * jpegtran.c for an example of correct usage.
- */
-
-/* If you happen not to want the image transform support, disable it here */
-#ifndef TRANSFORMS_SUPPORTED
-#define TRANSFORMS_SUPPORTED 1		/* 0 disables transform code */
-#endif
-
-/*
- * Although rotating and flipping data expressed as DCT coefficients is not
- * hard, there is an asymmetry in the JPEG format specification for images
- * whose dimensions aren't multiples of the iMCU size.  The right and bottom
- * image edges are padded out to the next iMCU boundary with junk data; but
- * no padding is possible at the top and left edges.  If we were to flip
- * the whole image including the pad data, then pad garbage would become
- * visible at the top and/or left, and real pixels would disappear into the
- * pad margins --- perhaps permanently, since encoders & decoders may not
- * bother to preserve DCT blocks that appear to be completely outside the
- * nominal image area.  So, we have to exclude any partial iMCUs from the
- * basic transformation.
- *
- * Transpose is the only transformation that can handle partial iMCUs at the
- * right and bottom edges completely cleanly.  flip_h can flip partial iMCUs
- * at the bottom, but leaves any partial iMCUs at the right edge untouched.
- * Similarly flip_v leaves any partial iMCUs at the bottom edge untouched.
- * The other transforms are defined as combinations of these basic transforms
- * and process edge blocks in a way that preserves the equivalence.
- *
- * The "trim" option causes untransformable partial iMCUs to be dropped;
- * this is not strictly lossless, but it usually gives the best-looking
- * result for odd-size images.  Note that when this option is active,
- * the expected mathematical equivalences between the transforms may not hold.
- * (For example, -rot 270 -trim trims only the bottom edge, but -rot 90 -trim
- * followed by -rot 180 -trim trims both edges.)
- *
- * We also offer a lossless-crop option, which discards data outside a given
- * image region but losslessly preserves what is inside.  Like the rotate and
- * flip transforms, lossless crop is restricted by the JPEG format: the upper
- * left corner of the selected region must fall on an iMCU boundary.  If this
- * does not hold for the given crop parameters, we silently move the upper left
- * corner up and/or left to make it so, simultaneously increasing the region
- * dimensions to keep the lower right crop corner unchanged.  (Thus, the
- * output image covers at least the requested region, but may cover more.)
- *
- * We also provide a lossless-resize option, which is kind of a lossless-crop
- * operation in the DCT coefficient block domain - it discards higher-order
- * coefficients and losslessly preserves lower-order coefficients of a
- * sub-block.
- *
- * Rotate/flip transform, resize, and crop can be requested together in a
- * single invocation.  The crop is applied last --- that is, the crop region
- * is specified in terms of the destination image after transform/resize.
- *
- * We also offer a "force to grayscale" option, which simply discards the
- * chrominance channels of a YCbCr image.  This is lossless in the sense that
- * the luminance channel is preserved exactly.  It's not the same kind of
- * thing as the rotate/flip transformations, but it's convenient to handle it
- * as part of this package, mainly because the transformation routines have to
- * be aware of the option to know how many components to work on.
- */
-
-
-/* Short forms of external names for systems with brain-damaged linkers. */
-
-#ifdef NEED_SHORT_EXTERNAL_NAMES
-#define jtransform_parse_crop_spec	jTrParCrop
-#define jtransform_request_workspace	jTrRequest
-#define jtransform_adjust_parameters	jTrAdjust
-#define jtransform_execute_transform	jTrExec
-#define jtransform_perfect_transform	jTrPerfect
-#define jcopy_markers_setup		jCMrkSetup
-#define jcopy_markers_execute		jCMrkExec
-#endif /* NEED_SHORT_EXTERNAL_NAMES */
-
-
-/*
- * Codes for supported types of image transformations.
- */
-
-typedef enum {
-	JXFORM_NONE,		/* no transformation */
-	JXFORM_FLIP_H,		/* horizontal flip */
-	JXFORM_FLIP_V,		/* vertical flip */
-	JXFORM_TRANSPOSE,	/* transpose across UL-to-LR axis */
-	JXFORM_TRANSVERSE,	/* transpose across UR-to-LL axis */
-	JXFORM_ROT_90,		/* 90-degree clockwise rotation */
-	JXFORM_ROT_180,		/* 180-degree rotation */
-	JXFORM_ROT_270		/* 270-degree clockwise (or 90 ccw) */
-} JXFORM_CODE;
-
-/*
- * Codes for crop parameters, which can individually be unspecified,
- * positive, or negative.  (Negative width or height makes no sense, though.)
- */
-
-typedef enum {
-	JCROP_UNSET,
-	JCROP_POS,
-	JCROP_NEG
-} JCROP_CODE;
-
-/*
- * Transform parameters struct.
- * NB: application must not change any elements of this struct after
- * calling jtransform_request_workspace.
- */
-
-typedef struct {
-  /* Options: set by caller */
-  JXFORM_CODE transform;	/* image transform operator */
-  boolean perfect;		/* if TRUE, fail if partial MCUs are requested */
-  boolean trim;			/* if TRUE, trim partial MCUs as needed */
-  boolean force_grayscale;	/* if TRUE, convert color image to grayscale */
-  boolean crop;			/* if TRUE, crop source image */
-
-  /* Crop parameters: application need not set these unless crop is TRUE.
-   * These can be filled in by jtransform_parse_crop_spec().
-   */
-  JDIMENSION crop_width;	/* Width of selected region */
-  JCROP_CODE crop_width_set;
-  JDIMENSION crop_height;	/* Height of selected region */
-  JCROP_CODE crop_height_set;
-  JDIMENSION crop_xoffset;	/* X offset of selected region */
-  JCROP_CODE crop_xoffset_set;	/* (negative measures from right edge) */
-  JDIMENSION crop_yoffset;	/* Y offset of selected region */
-  JCROP_CODE crop_yoffset_set;	/* (negative measures from bottom edge) */
-
-  /* Internal workspace: caller should not touch these */
-  int num_components;		/* # of components in workspace */
-  jvirt_barray_ptr * workspace_coef_arrays; /* workspace for transformations */
-  JDIMENSION output_width;	/* cropped destination dimensions */
-  JDIMENSION output_height;
-  JDIMENSION x_crop_offset;	/* destination crop offsets measured in iMCUs */
-  JDIMENSION y_crop_offset;
-  int iMCU_sample_width;	/* destination iMCU size */
-  int iMCU_sample_height;
-} jpeg_transform_info;
-
-
-#if TRANSFORMS_SUPPORTED
-
-/* Parse a crop specification (written in X11 geometry style) */
-EXTERN(boolean) jtransform_parse_crop_spec
-	JPP((jpeg_transform_info *info, const char *spec));
-/* Request any required workspace */
-EXTERN(boolean) jtransform_request_workspace
-	JPP((j_decompress_ptr srcinfo, jpeg_transform_info *info));
-/* Adjust output image parameters */
-EXTERN(jvirt_barray_ptr *) jtransform_adjust_parameters
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     jvirt_barray_ptr *src_coef_arrays,
-	     jpeg_transform_info *info));
-/* Execute the actual transformation, if any */
-EXTERN(void) jtransform_execute_transform
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     jvirt_barray_ptr *src_coef_arrays,
-	     jpeg_transform_info *info));
-/* Determine whether lossless transformation is perfectly
- * possible for a specified image and transformation.
- */
-EXTERN(boolean) jtransform_perfect_transform
-	JPP((JDIMENSION image_width, JDIMENSION image_height,
-	     int MCU_width, int MCU_height,
-	     JXFORM_CODE transform));
-
-/* jtransform_execute_transform used to be called
- * jtransform_execute_transformation, but some compilers complain about
- * routine names that long.  This macro is here to avoid breaking any
- * old source code that uses the original name...
- */
-#define jtransform_execute_transformation	jtransform_execute_transform
-
-#endif /* TRANSFORMS_SUPPORTED */
-
-
-/*
- * Support for copying optional markers from source to destination file.
- */
-
-typedef enum {
-	JCOPYOPT_NONE,		/* copy no optional markers */
-	JCOPYOPT_COMMENTS,	/* copy only comment (COM) markers */
-	JCOPYOPT_ALL		/* copy all optional markers */
-} JCOPY_OPTION;
-
-#define JCOPYOPT_DEFAULT  JCOPYOPT_COMMENTS	/* recommended default */
-
-/* Setup decompression object to save desired markers in memory */
-EXTERN(void) jcopy_markers_setup
-	JPP((j_decompress_ptr srcinfo, JCOPY_OPTION option));
-/* Copy markers saved in the given source object to the destination object */
-EXTERN(void) jcopy_markers_execute
-	JPP((j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
-	     JCOPY_OPTION option));