Bug 462082 - Update libtheora to latest stable version - rs=roc

2009-01-22 13:00:49 +13:00 · 2009-01-22 13:00:49 +13:00 · 32da7c5a78
--- a/media/libtheora/455357_wince_local_variable_macro_clash_patch
+++ b/media/libtheora/455357_wince_local_variable_macro_clash_patch
@ -0,0 +1,40 @@
+diff --git a/media/libtheora/lib/dec/decode.c b/media/libtheora/lib/dec/decode.c
+--- a/media/libtheora/lib/dec/decode.c
+++ b/media/libtheora/lib/dec/decode.c
+@@ -1803,8 +1803,8 @@
+ 
+ static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
+  int _dc_scale,int _sharp_mod,int _strong){
+-  static const int     MOD_MAX[2]={24,32};
+-  static const int     MOD_SHIFT[2]={1,0};
+  static const int     OCDB_MOD_MAX[2]={24,32};
+  static const int     OCDB_MOD_SHIFT[2]={1,0};
+   const unsigned char *psrc;
+   const unsigned char *src;
+   const unsigned char *nsrc;
+@@ -1814,14 +1814,14 @@
+   int                  mod_hi;
+   int                  by;
+   int                  bx;
+-  mod_hi=OC_MINI(3*_dc_scale,MOD_MAX[_strong]);
+  mod_hi=OC_MINI(3*_dc_scale,OCDB_MOD_MAX[_strong]);
+   dst=_idata;
+   src=dst;
+   psrc=src-(_ystride&-!(_b&4));
+   for(by=0;by<9;by++){
+     for(bx=0;bx<8;bx++){
+       int mod;
+-      mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<MOD_SHIFT[_strong]);
+      mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OCDB_MOD_SHIFT[_strong]);
+       vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
+     }
+     psrc=src;
+@@ -1833,7 +1833,7 @@
+     src=nsrc;
+     for(by=0;by<8;by++){
+       int mod;
+-      mod=32+_dc_scale-(abs(*src-*psrc)<<MOD_SHIFT[_strong]);
+      mod=32+_dc_scale-(abs(*src-*psrc)<<OCDB_MOD_SHIFT[_strong]);
+       hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
+       psrc+=_ystride;
+       src+=_ystride;
--- a/media/libtheora/AUTHORS
+++ b/media/libtheora/AUTHORS
@ -20,11 +20,19 @@ Conrad Parker

 Mauricio Piacentini
 	- Original win32 projects and example ports
+	- dump_video example
 	- VP3->Theora transcoder

 Silvia Pfeiffer
 	- Figures for the spec

+Vegard Nossum
+	- Original png2theora implementation
+
+Rudolf Marek
+Nils Pipenbrinck
+	- MMX optimizations
+
 Michael Smith
 Andre Pang
 calc
--- a/media/libtheora/CHANGES
+++ b/media/libtheora/CHANGES
@ -1,3 +1,32 @@
+libtheora 1.0 (2008 November 3)
+
+ - Merge x86 assembly for forward DCT from Thusnelda branch.
+ - Update 32 bit MMX with loop filter fix.
+ - Check for an uninitialized state before dereferencing in propagating
+   decode calls.
+ - Remove all TH_DEBUG statements.
+ - Rename the bitpacker source files copied from libogg to avoid
+   confusing simple build systems using both libraries.
+ - Declare bitfield entries to be explicitly signed for Solaris cc.
+ - Set quantization parameters to default values when an empty buffer is
+   passed with TH_ENCCTL_SET_QUANT_PARAMS.
+ - Split encoder and decoder tests depending on configure settings.
+ - Return lstylex.sty to the distribution.
+ - Disable inline assembly on gcc versions prior to 3.1.
+ - Remove extern references for OC_*_QUANT_MIN.
+ - Make various data tables static const so they can be read-only.
+ - Remove ENCCTL codes from the old encoder API.
+ - Implement TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE ctl.
+ - Fix segfault when exactly one of the width or height is not a multiple
+   of 16, but the other is.
+ - Compute the correct vertical offset for chroma.
+ - cpuid assembly fix for MSVC.
+ - Add VS2008 project files.
+ - Build updates for 64-bit platforms, Mingw32, VS and XCode.
+ - Do not clobber the cropping rectangle.
+ - Declare ourselves 1.0final to pkg-config to sort after beta releases.
+ - Fix the scons build to include asm in libtheoradec/enc.
+
 libtheora 1.0beta3 (2008 April 16)

 - Build new libtheoradec and libtheoraenc libraries
@ -10,7 +39,7 @@ libtheora 1.0beta3 (2008 April 16)
 - Use a custom copy of the libogg bitpacker in the decoder
   to avoid function call overhead.
 - MMX code improved and ported to MSVC.
- - Fix a problem with the MMX code on SELinux 
+ - Fix a problem with the MMX code on SELinux.
 - Fix a problem with decoder quantizer initialization.
 - Fix a page queue problem with png2theora.
 - Improved robustness.
--- a/media/libtheora/COPYING
+++ b/media/libtheora/COPYING
@ -1,4 +1,4 @@
-Copyright (C) 2002-2007 Xiph.org Foundation
+Copyright (C) 2002-2008 Xiph.Org Foundation and contributors.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
@ -11,7 +11,7 @@ notice, this list of conditions and the following disclaimer.
 notice, this list of conditions and the following disclaimer in the
 documentation and/or other materials provided with the distribution.

- Neither the name of the Xiph.org Foundation nor the names of its
+- Neither the name of the Xiph.Org Foundation nor the names of its
 contributors may be used to endorse or promote products derived from
 this software without specific prior written permission.

--- a/media/libtheora/README
+++ b/media/libtheora/README
@ -1,5 +1,5 @@
 -------------------------------------------------------------------------
-          The Xiph.org Foundation's libtheora 1.0beta1 release
+             The Xiph.org Foundation's libtheora 1.0 release
 -------------------------------------------------------------------------

 *** What is Theora?
@ -8,8 +8,8 @@ Theora is Xiph.Org's first publicly released video codec, intended
 for use within the Foundation's Ogg multimedia streaming system.
 Theora is derived directly from On2's VP3 codec; Currently the 
 encoders are nearly identical, but Theora will make use of new
-features supported by the decoder to improve over what is 
-is possible with VP3.
+features supported by the decoder to improve on what is possible
+with VP3.

 *** Where is Theora?

@ -18,12 +18,6 @@ can be gotten from www.theora.org or the main Xiph.Org site at
 www.xiph.org.  Development source is kept in an open subversion 
 repository, see http://theora.org/svn/ for instructions.

-*** What is the goal of this release?
-
-This is the first beta release of the 1.0 reference implementation.
-It is intended to completely support the decoder specification, and
-gather feedback on the implementation before declaring it stable.
-
 -------------------------------------------------------------------------
 Getting started with the code
 -------------------------------------------------------------------------
@ -38,24 +32,33 @@ Requirements summary:

  For example encoder:

-      as above
+      as above,

      libvorbis and libvorbisenc 1.0.1 or newer.

+  For creating a source distribution package:
+
+      as above,
+
+      Doxygen to build the API documentation,
+      pdflatex and fig2dev to build the format specification
+        (transfig package in Ubuntu).
+
  For the player only:

      as above,

-      SDL (Simple Direct media Layer) libraries and headers
- 
-      OSS audio driver and development headers
+      SDL (Simple Direct media Layer) libraries and headers,
+      OSS audio driver and development headers.

 The provided build system is the GNU automake/autoconf system, and
 the main library, libtheora, should already build smoothly on any
 system.  Failure of libtheora to build on a GNU-enabled system is
 considered a bug; please report problems to theora-dev@xiph.org.

-Some windows build support is included in the win32 directory.
+Windows build support is included in the win32 directory.
+
+Project files for Apple XCode is included in the macosx directory.

 There is also an experimental scons build.

--- a/media/libtheora/README_MOZILLA
+++ b/media/libtheora/README_MOZILLA
@ -1,7 +1,7 @@
-The source from this directory was copied from the libtheora-1.0beta3
+The source from this directory was copied from the libtheora-1.0
 source distribution using the update.sh script. The changes made were
 those applied by update.sh, the addition/update of Makefile.in files
-for the Mozilla build system and the patch in bug 450265.
+for the Mozilla build system and the patch in bug below.

 Bug 455357 - WinCE LibTheora Pre-defined Macro usage in local variable
  455357_wince_local_variable_macro_clash_patch
--- a/media/libtheora/changeset_r15144.diff
+++ b/media/libtheora/changeset_r15144.diff
@ -1,27 +0,0 @@
-Index: /trunk/theora/lib/dec/decapiwrapper.c
-===================================================================
--- /trunk/theora/lib/dec/decapiwrapper.c (revision 14385)
-+++ /trunk/theora/lib/dec/decapiwrapper.c (revision 15144)
-@@ -168,10 +168,13 @@
-   ogg_int64_t     gp;
-   int             ret;
-+  
-+  if(!_td || !_td->i || !_td->i->codec_setup)return OC_FAULT;
-   api=(th_api_wrapper *)_td->i->codec_setup;
-+  if(!api || !api->decode)return OC_FAULT;
-   ret=th_decode_packetin(api->decode,_op,&gp);
- 
- #ifdef _TH_DEBUG_
-   dframe++;
-#endif 
-+#endif
- 
-   if(ret<0)return OC_BADPACKET;
-@@ -185,5 +188,7 @@
-   int              ret;
- 
-+  if(!_td || !_td->i || !_td->i->codec_setup)return OC_FAULT;
-   api=(th_api_wrapper *)_td->i->codec_setup;
-+  if(!api || !api->decode)return OC_FAULT;
-   ret=th_decode_ycbcr_out(api->decode,buf);
-   if(ret>=0){
--- a/media/libtheora/include/theora/codec.h
+++ b/media/libtheora/include/theora/codec.h
@ -112,13 +112,18 @@ typedef enum{
 *  specification</a>, Section 4.4, for details on the precise sample
 *  locations.*/
 typedef enum{
-  /**Chroma decimation by 2 in both the X and Y directions (4:2:0).*/
+  /**Chroma decimation by 2 in both the X and Y directions (4:2:0).
+     The Cb and Cr chroma planes are half the width and half the height of the
+      luma plane.*/
  TH_PF_420,
  /**Currently reserved.*/
  TH_PF_RSVD,
-  /**Chroma decimation by 2 in the X direction (4:2:2).*/
+  /**Chroma decimation by 2 in the X direction (4:2:2).
+     The Cb and Cr chroma planes are half the width of the luma plane, but full
+      height.*/
  TH_PF_422,
-  /**No chroma decimation (4:4:4).*/
+  /**No chroma decimation (4:4:4).
+     The Cb and Cr chroma planes are full width and full height.*/
  TH_PF_444,
  /**The total number of currently defined pixel formats.*/
  TH_PF_NFORMATS
--- a/media/libtheora/include/theora/config.h
+++ b/media/libtheora/include/theora/config.h
@ -56,19 +56,19 @@
 #define PACKAGE_NAME "libtheora"

 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "libtheora 1.0beta3"
+#define PACKAGE_STRING "libtheora 1.0"

 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "libtheora"

 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.0beta3"
+#define PACKAGE_VERSION "1.0"

 /* Define to 1 if you have the ANSI C header files. */
 #define STDC_HEADERS 1

 /* Define to exclude encode support from the build */
- 
+/* #undef THEORA_DISABLE_ENCODE */

 /* Define to exclude floating point code from the build */
 /* #undef THEORA_DISABLE_FLOAT */
@ -77,4 +77,4 @@
 

 /* Version number of package */
-#define VERSION "1.0beta3"
+#define VERSION "1.0"
--- a/media/libtheora/include/theora/theora.h
+++ b/media/libtheora/include/theora/theora.h
@ -315,6 +315,20 @@ typedef struct theora_comment{
 */
 #define TH_DECCTL_SET_PPLEVEL (3)

+/**Sets the maximum distance between key frames.
+ * This can be changed during an encode, but will be bounded by
+ *  <tt>1<<th_info#keyframe_granule_shift</tt>.
+ * If it is set before encoding begins, th_info#keyframe_granule_shift will
+ *  be enlarged appropriately.
+ *
+ * \param[in]  buf <tt>ogg_uint32_t</tt>: The maximum distance between key
+ *                   frames.
+ * \param[out] buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
+ * \retval TH_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
+ * \retval TH_IMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
+
 /**Set the granule position.
 * Call this after a seek, to update the internal granulepos
 * in the decoder, to insure that subsequent frames are marked
@ -332,20 +346,6 @@ typedef struct theora_comment{
 *  \ref decctlcodes "decoder control codes".
 * Keep any experimental or vendor-specific values above \c 0x8000.*/
 /*@{*/
-/**Sets the Huffman tables to use.
- * The tables are copied, not stored by reference, so they can be freed after
- *  this call.
- * <tt>NULL</tt> may be specified to revert to the default tables.
- *
- * \param[in] buf <tt>#th_huff_code[#TH_NHUFFMAN_TABLES][#TH_NDCT_TOKENS]</tt>
- * \retval TH_FAULT  \a theora_state is <tt>NULL</tt>.
- * \retval TH_EINVAL Encoding has already begun or one or more of the given
- *                     tables is not full or prefix-free, \a buf is
- *                     <tt>NULL</tt> and \a buf_sz is not zero, or \a buf is
- *                     non-<tt>NULL</tt> and \a buf_sz is not
- *                     <tt>sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS</tt>.
- * \retval TH_IMPL   Not supported by this implementation.*/
-#define TH_ENCCTL_SET_HUFFMAN_CODES (0)
 /**Sets the quantization parameters to use.
 * The parameters are copied, not stored by reference, so they can be freed
 *  after this call.
@ -365,19 +365,6 @@ typedef struct theora_comment{
 *                    <tt>sizeof(#th_quant_info)</tt>.
 * \retval TH_IMPL   Not supported by this implementation.*/
 #define TH_ENCCTL_SET_QUANT_PARAMS (2)
-/**Sets the maximum distance between key frames.
- * This can be changed during an encode, but will be bounded by
- *  <tt>1<<th_info#keyframe_granule_shift</tt>.
- * If it is set before encoding begins, th_info#keyframe_granule_shift will
- *  be enlarged appropriately.
- *
- * \param[in]  buf <tt>ogg_uint32_t</tt>: The maximum distance between key
- *                   frames.
- * \param[out] buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
- * \retval TH_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
- * \retval TH_EINVAL \a buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
- * \retval TH_IMPL   Not supported by this implementation.*/
-#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
 /**Disables any encoder features that would prevent lossless transcoding back
 *  to VP3.
 * This primarily means disabling block-level QI values and not using 4MV mode
@ -434,41 +421,6 @@ typedef struct theora_comment{
 * \retval TH_IMPL   Not supported by this implementation in the current
 *                    encoding mode.*/
 #define TH_ENCCTL_SET_SPLEVEL (14)
-/**Puts the encoder in VBR mode.
- * This can be done at any time during the encoding process, with different
- *  configuration parameters, to encode different regions of the video segment
- *  with different qualities.
- * See the #th_info struct documentation for details on how the default
- *  encoding mode is chosen.
- *
- * \param[in] buf <tt>#th_vbr_cfg</tt>: the configuration parameters.
- *                 This may be <tt>NULL</tt>, in which case the current VBR
- *                  configuration is unchanged.
- *                 The default is to use the QI setting passed in via the
- *                  #th_info struct when the encoder was initialized, with a
- *                  full range of admissible quantizers.
- * \retval OC_EFAULT \a theora_state is <tt>NULL</tt>.
- * \retval TH_EINVAL The configuration parameters do not meet one of their
- *                    stated requirements, \a buf is <tt>NULL</tt> and
- *                    \a buf_sz is not zero, or \a buf is non-<tt>NULL</tt>
- *                    and \a buf_sz is not <tt>sizeof(#th_vbr_cfg)</tt>.
- * \retval TH_IMPL   Not supported by this implementation.*/
-#define TH_ENCCTL_SETUP_VBR (16)
-/**Puts the encoder in CQI mode.
- * This can be done at any time during the encoding process, with different QI
- *  values.
- * See the #th_info struct documentation for details on how the default
- *  encoding mode is chosen.
- *
- * \param[in] buf <tt>#th_cqi_cfg</tt>: the configuration parameters.
- *                 This may be <tt>NULL</tt>, in which case the current CQI
- *                  configuration is unchanged.
- *                 The default is to use the QI setting passed in via the
- *                  #th_info struct when the encoder was initialized.
- * \retval OC_EFAULT \a theora_state is <tt>NULL</tt>.
- * \retval TH_EINVAL \a buf_sz is not <tt>sizeof(#th_cqi_cfg)</tt>.
- * \retval TH_IMPL   Not supported by this implementation.*/
-#define TH_ENCCTL_SETUP_CQI (18)
 /*@}*/

 #define OC_FAULT       -1       /**< General failure */
--- a/media/libtheora/include/theora/theoradec.h
+++ b/media/libtheora/include/theora/theoradec.h
@ -20,6 +20,7 @@

 #if !defined(_O_THEORA_THEORADEC_H_)
 # define _O_THEORA_THEORADEC_H_ (1)
+# include <stddef.h>
 # include <ogg/ogg.h>
 # include "codec.h"

@ -213,6 +214,22 @@ typedef struct th_setup_info th_setup_info;
 extern int th_decode_headerin(th_info *_info,th_comment *_tc,
 th_setup_info **_setup,ogg_packet *_op);
 /**Allocates a decoder instance.
+ *
+ * <b>Security Warning:</b> The Theora format supports very large frame sizes,
+ *  potentially even larger than the address space of a 32-bit machine, and
+ *  creating a decoder context allocates the space for several frames of data.
+ * If the allocation fails here, your program will crash, possibly at some
+ *  future point because the OS kernel returned a valid memory range and will
+ *  only fail when it tries to map the pages in it the first time they are
+ *  used.
+ * Even if it succeeds, you may experience a denial of service if the frame
+ *  size is large enough to cause excessive paging.
+ * If you are integrating libtheora in a larger application where such things
+ *  are undesirable, it is highly recommended that you check the frame size in
+ *  \a _info before calling this function and refuse to decode streams where it
+ *  is larger than some reasonable maximum.
+ * libtheora will not check this for you, because there may be machines that
+ *  can handle such streams and applications that wish to.
 * \param _info  A #th_info struct filled via th_decode_headerin().
 * \param _setup A #th_setup_info handle returned via
 *                th_decode_headerin().
@ -253,7 +270,7 @@ extern int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
 *                       The player can skip the call to th_decode_ycbcr_out(),
 *                        as the contents of the decoded frame buffer have not
 *                        changed.
- * \retval TH_EFAULT     \a _dec or _op was <tt>NULL</tt>.
+ * \retval TH_EFAULT     \a _dec or \a _op was <tt>NULL</tt>.
 * \retval TH_EBADPACKET \a _op does not contain encoded video data.
 * \retval TH_EIMPL      The video data uses bitstream features which this
 *                        library does not support.*/
--- a/media/libtheora/lib/Makefile.in
+++ b/media/libtheora/lib/Makefile.in
@ -48,12 +48,10 @@ FORCE_STATIC_LIB= 1
 DEFINES += -DTHEORA_DISABLE_ENCODE

 ifeq ($(findstring 86,$(OS_TEST)), 86)
-ifneq ($(OS_ARCH),WINNT)
 ifneq ($(OS_ARCH),SunOS)
 DEFINES += -DOC_X86ASM -DUSE_ASM
 endif
 endif
-endif

 VPATH		:= $(srcdir) $(srcdir)/dec

@ -62,7 +60,7 @@ CSRCS		= \
 		huffdec.c \
 		quant.c \
 		dequant.c \
-		bitwise.c \
+		bitpack.c \
 		internal.c \
 		decinfo.c \
 		decapiwrapper.c \
@ -75,6 +73,17 @@ CSRCS		= \
 		$(NULL)

 ifeq ($(findstring 86,$(OS_TEST)), 86)
+ifeq ($(OS_ARCH),WINNT)
+VPATH		+= $(srcdir)/dec/x86_vc
+
+CSRCS		+= \
+		mmxfrag.c \
+		mmxloopfilter.c \
+		x86state.c \
+		mmxstate.c \
+		mmxidct.c \
+		$(NULL)
+else
 VPATH		+= $(srcdir)/dec/x86

 CSRCS		+= \
@ -84,6 +93,7 @@ CSRCS		+= \
 		mmxidct.c \
 		$(NULL)
 endif
+endif

 include $(topsrcdir)/config/rules.mk

--- a/media/libtheora/lib/config.h
+++ b/media/libtheora/lib/config.h
@ -56,19 +56,19 @@
 #define PACKAGE_NAME "libtheora"

 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "libtheora 1.0beta3"
+#define PACKAGE_STRING "libtheora 1.0"

 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "libtheora"

 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.0beta3"
+#define PACKAGE_VERSION "1.0"

 /* Define to 1 if you have the ANSI C header files. */
 #define STDC_HEADERS 1

 /* Define to exclude encode support from the build */
- 
+/* #undef THEORA_DISABLE_ENCODE */

 /* Define to exclude floating point code from the build */
 /* #undef THEORA_DISABLE_FLOAT */
@ -77,4 +77,4 @@
 

 /* Version number of package */
-#define VERSION "1.0beta3"
+#define VERSION "1.0"
--- a/media/libtheora/lib/cpu.c
+++ b/media/libtheora/lib/cpu.c
@ -6,7 +6,7 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

@ -14,29 +14,51 @@
  Originally written by Rudolf Marek.

 function:
-  last mod: $Id: cpu.c 14718 2008-04-12 08:36:58Z conrad $
+  last mod: $Id: cpu.c 15427 2008-10-21 02:36:19Z xiphmont $

 ********************************************************************/

 #include "cpu.h"

 #if !defined(USE_ASM)
-
-ogg_uint32_t oc_cpu_flags_get(void){
+static ogg_uint32_t oc_cpu_flags_get(void){
  return 0;
 }
+#else
+# if !defined(_MSC_VER)
+#  if defined(__amd64__)||defined(__x86_64__)
+/*On x86-64, gcc seems to be able to figure out how to save %rbx for us when
+   compiling with -fPIC.*/
+#   define cpuid(_op,_eax,_ebx,_ecx,_edx) \
+  __asm__ __volatile__( \
+   "cpuid\n\t" \
+   :[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
+   :"a"(_op) \
+   :"cc" \
+  )
+#  else
+/*On x86-32, not so much.*/
+#   define cpuid(_op,_eax,_ebx,_ecx,_edx) \
+  __asm__ __volatile__( \
+   "xchgl %%ebx,%[ebx]\n\t" \
+   "cpuid\n\t" \
+   "xchgl %%ebx,%[ebx]\n\t" \
+   :[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
+   :"a"(_op) \
+   :"cc" \
+  )
+#  endif
+# else
+/*Why does MSVC need this complicated rigamarole?
+  At this point I honestly do not care.*/

-#else /* USE_ASM */
-
-# if defined(_MSC_VER)
-/*  Visual C cpuid helper function. For VS2005 we could
-    as well use the _cpuid builtin, but that wouldn't work
-    for VS2003 users, so we do it in inline assembler */
-
-static void oc_cpuid_helper (ogg_uint32_t * CpuInfo, ogg_uint32_t op){
+/*Visual C cpuid helper function.
+  For VS2005 we could as well use the _cpuid builtin, but that wouldn't work
+   for VS2003 users, so we do it in inline assembler.*/
+static void oc_cpuid_helper(ogg_uint32_t _cpu_info[4],ogg_uint32_t _op){
  _asm{
-    mov eax, [op]
-    mov esi, CpuInfo
+    mov eax,[_op]
+    mov esi,_cpu_info
    cpuid
    mov [esi+0],eax
    mov [esi+4],ebx
@ -46,117 +68,160 @@ static void oc_cpuid_helper (ogg_uint32_t * CpuInfo, ogg_uint32_t op){
 }

 #  define cpuid(_op,_eax,_ebx,_ecx,_edx) \
-  {                                    \
-    ogg_uint32_t nfo[4];               \
-    oc_cpuid_helper (nfo, (_op));      \
-    (_eax) = nfo[0],(_ebx) = nfo[1];   \
-    (_ecx) = nfo[2],(_edx) = nfo[3];   \
+  do{ \
+    ogg_uint32_t cpu_info[4]; \
+    oc_cpuid_helper(cpu_info,_op); \
+    (_eax)=cpu_info[0]; \
+    (_ebx)=cpu_info[1]; \
+    (_ecx)=cpu_info[2]; \
+    (_edx)=cpu_info[3]; \
+  }while(0)
+
+static void oc_detect_cpuid_helper(ogg_uint32_t *_eax,ogg_uint32_t *_ebx){
+  _asm{
+    pushfd
+    pushfd
+    pop eax
+    mov ebx,eax
+    xor eax,200000h
+    push eax
+    popfd
+    pushfd
+    pop eax
+    popfd
+    mov ecx,_eax
+    mov [ecx],eax
+    mov ecx,_ebx
+    mov [ecx],ebx
+  }
+}
+# endif
+
+static ogg_uint32_t oc_parse_intel_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
+  ogg_uint32_t flags;
+  /*If there isn't even MMX, give up.*/
+  if(!(_edx&0x00800000))return 0;
+  flags=OC_CPU_X86_MMX;
+  if(_edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
+  if(_edx&0x04000000)flags|=OC_CPU_X86_SSE2;
+  if(_ecx&0x00000001)flags|=OC_CPU_X86_PNI;
+  if(_ecx&0x00000100)flags|=OC_CPU_X86_SSSE3;
+  if(_ecx&0x00080000)flags|=OC_CPU_X86_SSE4_1;
+  if(_ecx&0x00100000)flags|=OC_CPU_X86_SSE4_2;
+  return flags;
 }

-# elif (defined(__amd64__) || defined(__x86_64__))
+static ogg_uint32_t oc_parse_amd_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
+  ogg_uint32_t flags;
+  /*If there isn't even MMX, give up.*/
+  if(!(_edx&0x00800000))return 0;
+  flags=OC_CPU_X86_MMX;
+  if(_edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
+  if(_edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
+  if(_edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
+  if(_ecx&0x00000040)flags|=OC_CPU_X86_SSE4A;
+  if(_ecx&0x00000800)flags|=OC_CPU_X86_SSE5;
+  return flags;
+}

-#  define cpuid(_op,_eax,_ebx,_ecx,_edx) \
-  __asm__ __volatile__( \
-   "push %%rbx\n\t" \
-   "cpuid\n\t" \
-   "movl %%ebx,%1\n\t" \
-   "pop  %%rbx\n\t" \
-   :"=a" (_eax), \
-    "=r" (_ebx), \
-    "=c" (_ecx), \
-    "=d" (_edx) \
-   :"a" (_op) \
-   :"cc" \
-  )
-# else /* x86_32, GCC */
-
-#  define cpuid(_op,_eax,_ebx,_ecx,_edx) \
-  __asm__ __volatile__( \
-   "pushl %%ebx\n\t" \
-   "cpuid\n\t" \
-   "movl  %%ebx,%1\n\t" \
-   "popl  %%ebx\n\t" \
-   :"=a" (_eax), \
-    "=r" (_ebx), \
-    "=c" (_ecx), \
-    "=d" (_edx) \
-   :"a" (_op) \
-   :"cc" \
-  )
-
-# endif /* arch switch */
-
-ogg_uint32_t oc_cpu_flags_get(void){
-  ogg_uint32_t flags = 0;
+static ogg_uint32_t oc_cpu_flags_get(void){
+  ogg_uint32_t flags;
  ogg_uint32_t eax;
  ogg_uint32_t ebx;
  ogg_uint32_t ecx;
  ogg_uint32_t edx;
-
-# if !defined(_MSC_VER) && !defined(__amd64__) && !defined(__x86_64__)
-  /* check for cpuid */
+# if !defined(__amd64__)&&!defined(__x86_64__)
+  /*Not all x86-32 chips support cpuid, so we have to check.*/
+#  if !defined(_MSC_VER)
  __asm__ __volatile__(
   "pushfl\n\t"
   "pushfl\n\t"
-   "popl          %0\n\t"
-   "movl          %0,%1\n\t"
-   "xorl   $0x200000,%0\n\t"
-   "pushl         %0\n\t"
+   "popl %[a]\n\t"
+   "movl %[a],%[b]\n\t"
+   "xorl $0x200000,%[a]\n\t"
+   "pushl %[a]\n\t"
   "popfl\n\t"
   "pushfl\n\t"
-   "popl          %0\n\t"
+   "popl %[a]\n\t"
   "popfl\n\t"
-   :"=r" (eax),
-    "=r" (ebx)
+   :[a]"=r"(eax),[b]"=r"(ebx)
   :
   :"cc"
  );
+#  else
+  oc_detect_cpuid_helper(&eax,&ebx);
+#  endif
  /*No cpuid.*/
  if(eax==ebx)return 0;
-# endif /* GCC, x86_32 */
-
+# endif
  cpuid(0,eax,ebx,ecx,edx);
-  if(ebx==0x756e6547&&edx==0x49656e69&&ecx==0x6c65746e){
-    /*Intel:*/
-inteltest:
+  /*         l e t n          I e n i          u n e G*/
+  if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547||
+   /*      6 8 x M          T e n i          u n e G*/
+   ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){
+    /*Intel, Transmeta (tested with Crusoe TM5800):*/
    cpuid(1,eax,ebx,ecx,edx);
-    if((edx&0x00800000)==0)return 0;
-    flags=OC_CPU_X86_MMX;
-    if(edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
-    if(edx&0x04000000)flags|=OC_CPU_X86_SSE2;
+    flags=oc_parse_intel_flags(edx,ecx);
  }
-  else if(ebx==0x68747541&&edx==0x69746e65&&ecx==0x444d4163 ||
-          ebx==0x646f6547&&edx==0x79622065&&ecx==0x43534e20){
-    /*AMD:*/
-    /*Geode:*/
+  /*              D M A c          i t n e          h t u A*/
+  else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541||
+   /*      C S N            y b   e          d o e G*/
+   ecx==0x43534E20&&edx==0x79622065&&ebx==0x646F6547){
+    /*AMD, Geode:*/
    cpuid(0x80000000,eax,ebx,ecx,edx);
-    if(eax<0x80000001)goto inteltest;
+    if(eax<0x80000001)flags=0;
+    else{
      cpuid(0x80000001,eax,ebx,ecx,edx);
-    if((edx&0x00800000)==0)return 0;
-    flags=OC_CPU_X86_MMX;
-    if(edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
-    if(edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
-    if(edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
+      flags=oc_parse_amd_flags(edx,ecx);
+    }
+    /*Also check for SSE.*/
+    cpuid(1,eax,ebx,ecx,edx);
+    flags|=oc_parse_intel_flags(edx,ecx);
+  }
+  /*Technically some VIA chips can be configured in the BIOS to return any
+     string here the user wants.
+    There is a special detection method that can be used to identify such
+     processors, but in my opinion, if the user really wants to change it, they
+     deserve what they get.*/
+  /*              s l u a          H r u a          t n e C*/
+  else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){
+    /*VIA:*/
+    /*I only have documentation for the C7 (Esther) and Isaiah (forthcoming)
+       chips (thanks to the engineers from Centaur Technology who provided it).
+      These chips support Intel-like cpuid info.
+      The C3-2 (Nehemiah) cores appear to, as well.*/
+    cpuid(1,eax,ebx,ecx,edx);
+    flags=oc_parse_intel_flags(edx,ecx);
+    cpuid(0x80000000,eax,ebx,ecx,edx);
+    if(eax>=0x80000001){
+      /*The (non-Nehemiah) C3 processors support AMD-like cpuid info.
+        We need to check this even if the Intel test succeeds to pick up 3DNow!
+         support on these processors.
+        Unlike actual AMD processors, we cannot _rely_ on this info, since
+         some cores (e.g., the 693 stepping of the Nehemiah) claim to support
+         this function, yet return edx=0, despite the Intel test indicating
+         MMX support.
+        Therefore the features detected here are strictly added to those
+         detected by the Intel test.*/
+      /*TODO: How about earlier chips?*/
+      cpuid(0x80000001,eax,ebx,ecx,edx);
+      /*Note: As of the C7, this function returns Intel-style extended feature
+         flags, not AMD-style.
+        Currently, this only defines bits 11, 20, and 29 (0x20100800), which
+         do not conflict with any of the AMD flags we inspect.
+        For the remaining bits, Intel tells us, "Do not count on their value",
+         but VIA assures us that they will all be zero (at least on the C7 and
+         Isaiah chips).
+        In the (unlikely) event a future processor uses bits 18, 19, 30, or 31
+         (0xC0C00000) for something else, we will have to add code to detect
+         the model to decide when it is appropriate to inspect them.*/
+      flags|=oc_parse_amd_flags(edx,ecx);
+    }
  }
  else{
    /*Implement me.*/
    flags=0;
  }
-
-# ifdef DEBUG
-  if (flags) {
-    TH_DEBUG("vectorized instruction sets supported:");
-    if (flags & OC_CPU_X86_MMX)      TH_DEBUG(" mmx");
-    if (flags & OC_CPU_X86_MMXEXT)   TH_DEBUG(" mmxext");
-    if (flags & OC_CPU_X86_SSE)      TH_DEBUG(" sse");
-    if (flags & OC_CPU_X86_SSE2)     TH_DEBUG(" sse2");
-    if (flags & OC_CPU_X86_3DNOW)    TH_DEBUG(" 3dnow");
-    if (flags & OC_CPU_X86_3DNOWEXT) TH_DEBUG(" 3dnowext");
-    TH_DEBUG("\n");
-  }
-# endif
-
  return flags;
 }
-
-#endif /* USE_ASM */
+#endif
--- a/media/libtheora/lib/cpu.h
+++ b/media/libtheora/lib/cpu.h
@ -6,11 +6,11 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************
 function:
-    last mod: $Id: cpu.h 13884 2007-09-22 08:38:10Z giles $
+    last mod: $Id: cpu.h 15430 2008-10-21 05:03:55Z giles $

 ********************************************************************/

@ -24,7 +24,11 @@
 #define OC_CPU_X86_MMXEXT   (1<<3)
 #define OC_CPU_X86_SSE      (1<<4)
 #define OC_CPU_X86_SSE2     (1<<5)
-
-ogg_uint32_t oc_cpu_flags_get(void);
+#define OC_CPU_X86_PNI      (1<<6)
+#define OC_CPU_X86_SSSE3    (1<<7)
+#define OC_CPU_X86_SSE4_1   (1<<8)
+#define OC_CPU_X86_SSE4_2   (1<<9)
+#define OC_CPU_X86_SSE4A    (1<<10)
+#define OC_CPU_X86_SSE5     (1<<11)

 #endif
--- a/media/libtheora/lib/dec/apiwrapper.c
+++ b/media/libtheora/lib/dec/apiwrapper.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: apiwrapper.c 14321 2007-12-22 18:09:29Z tterribe $
+    last mod: $Id: apiwrapper.c 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

--- a/media/libtheora/lib/dec/apiwrapper.h
+++ b/media/libtheora/lib/dec/apiwrapper.h
@ -6,7 +6,7 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

--- a/media/libtheora/lib/dec/bitpack.c
+++ b/media/libtheora/lib/dec/bitpack.c
@ -0,0 +1,121 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008             *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function: packing variable sized words into an octet stream
+  last mod: $Id: bitpack.c 15400 2008-10-15 12:10:58Z tterribe $
+
+ ********************************************************************/
+
+/*We're 'MSb' endian; if we write a word but read individual bits,
+   then we'll read the MSb first.*/
+
+#include <string.h>
+#include <stdlib.h>
+#include "bitpack.h"
+
+void theorapackB_readinit(oggpack_buffer *_b,unsigned char *_buf,int _bytes){
+  memset(_b,0,sizeof(*_b));
+  _b->buffer=_b->ptr=_buf;
+  _b->storage=_bytes;
+}
+
+int theorapackB_look1(oggpack_buffer *_b,long *_ret){
+  if(_b->endbyte>=_b->storage){
+    *_ret=0L;
+    return -1;
+  }
+  *_ret=(_b->ptr[0]>>7-_b->endbit)&1;
+  return 0;
+}
+
+void theorapackB_adv1(oggpack_buffer *_b){
+  if(++(_b->endbit)>7){
+    _b->endbit=0;
+    _b->ptr++;
+    _b->endbyte++;
+  }
+}
+
+/*Here we assume that 0<=_bits&&_bits<=32.*/
+int theorapackB_read(oggpack_buffer *_b,int _bits,long *_ret){
+  long ret;
+  long m;
+  long d;
+  int fail;
+  m=32-_bits;
+  _bits+=_b->endbit;
+  d=_b->storage-_b->endbyte;
+  if(d<=4){
+    /*Not the main path.*/
+    if(d*8<_bits){
+      *_ret=0L;
+      fail=-1;
+      goto overflow;
+    }
+    /*Special case to avoid reading _b->ptr[0], which might be past the end of
+       the buffer; also skips some useless accounting.*/
+    else if(!_bits){
+      *_ret=0L;
+      return 0;
+    }
+  }
+  ret=_b->ptr[0]<<24+_b->endbit;
+  if(_bits>8){
+    ret|=_b->ptr[1]<<16+_b->endbit;
+    if(_bits>16){
+      ret|=_b->ptr[2]<<8+_b->endbit;
+      if(_bits>24){
+        ret|=_b->ptr[3]<<_b->endbit;
+        if(_bits>32)ret|=_b->ptr[4]>>8-_b->endbit;
+      }
+    }
+  }
+  *_ret=((ret&0xFFFFFFFFUL)>>(m>>1))>>(m+1>>1);
+  fail=0;
+overflow:
+  _b->ptr+=_bits>>3;
+  _b->endbyte+=_bits>>3;
+  _b->endbit=_bits&7;
+  return fail;
+}
+
+int theorapackB_read1(oggpack_buffer *_b,long *_ret){
+  int fail;
+  if(_b->endbyte>=_b->storage){
+    /*Not the main path.*/
+    *_ret=0L;
+    fail=-1;
+  }
+  else{
+    *_ret=(_b->ptr[0]>>7-_b->endbit)&1;
+    fail=0;
+  }
+  _b->endbit++;
+  if(_b->endbit>7){
+    _b->endbit=0;
+    _b->ptr++;
+    _b->endbyte++;
+  }
+  return fail;
+}
+
+long theorapackB_bytes(oggpack_buffer *_b){
+  return _b->endbyte+(_b->endbit+7>>3);
+}
+
+long theorapackB_bits(oggpack_buffer *_b){
+  return _b->endbyte*8+_b->endbit;
+}
+
+unsigned char *theorapackB_get_buffer(oggpack_buffer *_b){
+  return _b->buffer;
+}
--- a/media/libtheora/lib/dec/bitpack.h
+++ b/media/libtheora/lib/dec/bitpack.h
@ -0,0 +1,38 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008             *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function: packing variable sized words into an octet stream
+  last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $
+
+ ********************************************************************/
+#if !defined(_bitpack_H)
+# define _bitpack_H (1)
+# include <ogg/ogg.h>
+
+void theorapackB_readinit(oggpack_buffer *_b,unsigned char *_buf,int _bytes);
+int theorapackB_look1(oggpack_buffer *_b,long *_ret);
+void theorapackB_adv1(oggpack_buffer *_b);
+/*Here we assume 0<=_bits&&_bits<=32.*/
+int theorapackB_read(oggpack_buffer *_b,int _bits,long *_ret);
+int theorapackB_read1(oggpack_buffer *_b,long *_ret);
+long theorapackB_bytes(oggpack_buffer *_b);
+long theorapackB_bits(oggpack_buffer *_b);
+unsigned char *theorapackB_get_buffer(oggpack_buffer *_b);
+
+/*These two functions are implemented locally in huffdec.c*/
+/*Read in bits without advancing the bitptr.
+  Here we assume 0<=_bits&&_bits<=32.*/
+/*static int theorapackB_look(oggpack_buffer *_b,int _bits,long *_ret);*/
+/*static void theorapackB_adv(oggpack_buffer *_b,int _bits);*/
+
+
+#endif
--- a/media/libtheora/lib/dec/bitwise.c
+++ b/media/libtheora/lib/dec/bitwise.c
@ -1,126 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002             *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function: packing variable sized words into an octet stream
-  last mod: $Id: bitwise.c 14546 2008-02-29 01:14:05Z tterribe $
-
- ********************************************************************/
-
-/* We're 'MSb' endian; if we write a word but read individual bits,
-   then we'll read the msb first */
-
-#include <string.h>
-#include <stdlib.h>
-#include "bitwise.h"
-
-void theorapackB_reset(oggpack_buffer *b){
-  b->ptr=b->buffer;
-  b->buffer[0]=0;
-  b->endbit=b->endbyte=0;
-}
-
-void theorapackB_readinit(oggpack_buffer *b,unsigned char *buf,int bytes){
-  memset(b,0,sizeof(*b));
-  b->buffer=b->ptr=buf;
-  b->storage=bytes;
-}
-
-int theorapackB_look1(oggpack_buffer *b,long *_ret){
-  if(b->endbyte>=b->storage){
-    *_ret=0L;
-    return -1;
-  }
-  *_ret=((b->ptr[0]>>(7-b->endbit))&1);
-  return 0;
-}
-
-void theorapackB_adv1(oggpack_buffer *b){
-  if(++(b->endbit)>7){
-    b->endbit=0;
-    b->ptr++;
-    b->endbyte++;
-  }
-}
-
-/* bits <= 32 */
-int theorapackB_read(oggpack_buffer *b,int bits,long *_ret){
-  long ret;
-  long m;
-  int fail;
-  m=32-bits;
-  bits+=b->endbit;
-  if(b->endbyte+4>=b->storage){
-    /* not the main path */
-    if(b->endbyte*8+bits>b->storage*8){
-      *_ret=0L;
-      fail=-1;
-      goto overflow;
-    }
-    /* special case to avoid reading b->ptr[0], which might be past the end of
-        the buffer; also skips some useless accounting */
-    else if(!bits){
-      *_ret=0L;
-      return 0;
-    }
-  }
-  ret=b->ptr[0]<<(24+b->endbit);
-  if(bits>8){
-    ret|=b->ptr[1]<<(16+b->endbit);
-    if(bits>16){
-      ret|=b->ptr[2]<<(8+b->endbit);
-      if(bits>24){
-        ret|=b->ptr[3]<<(b->endbit);
-        if(bits>32 && b->endbit)
-          ret|=b->ptr[4]>>(8-b->endbit);
-      }
-    }
-  }
-  *_ret=((ret&0xffffffffUL)>>(m>>1))>>((m+1)>>1);
-  fail=0;
-overflow:
-  b->ptr+=bits/8;
-  b->endbyte+=bits/8;
-  b->endbit=bits&7;
-  return fail;
-}
-
-int theorapackB_read1(oggpack_buffer *b,long *_ret){
-  int fail;
-  if(b->endbyte>=b->storage){
-    /* not the main path */
-    *_ret=0L;
-    fail=-1;
-    goto overflow;
-  }
-  *_ret=(b->ptr[0]>>(7-b->endbit))&1;
-  fail=0;
-overflow:
-  b->endbit++;
-  if(b->endbit>7){
-    b->endbit=0;
-    b->ptr++;
-    b->endbyte++;
-  }
-  return fail;
-}
-
-long theorapackB_bytes(oggpack_buffer *b){
-  return(b->endbyte+(b->endbit+7)/8);
-}
-
-long theorapackB_bits(oggpack_buffer *b){
-  return(b->endbyte*8+b->endbit);
-}
-
-unsigned char *theorapackB_get_buffer(oggpack_buffer *b){
-  return(b->buffer);
-}
--- a/media/libtheora/lib/dec/bitwise.h
+++ b/media/libtheora/lib/dec/bitwise.h
@ -1,76 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002             *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function: packing variable sized words into an octet stream
-  last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $
-
- ********************************************************************/
-#if !defined(_bitwise_H)
-# define _bitwise_H (1)
-# include <ogg/ogg.h>
-
-void theorapackB_reset(oggpack_buffer *b);
-void theorapackB_readinit(oggpack_buffer *b,unsigned char *buf,int bytes);
-/* Read in bits without advancing the bitptr; bits <= 32 */
-static int theorapackB_look(oggpack_buffer *b,int bits,long *_ret);
-int theorapackB_look1(oggpack_buffer *b,long *_ret);
-static void theorapackB_adv(oggpack_buffer *b,int bits);
-void theorapackB_adv1(oggpack_buffer *b);
-/* bits <= 32 */
-int theorapackB_read(oggpack_buffer *b,int bits,long *_ret);
-int theorapackB_read1(oggpack_buffer *b,long *_ret);
-long theorapackB_bytes(oggpack_buffer *b);
-long theorapackB_bits(oggpack_buffer *b);
-unsigned char *theorapackB_get_buffer(oggpack_buffer *b);
-
-/*These two functions are only used in one place, and declaring them static so
-   they can be inlined saves considerable function call overhead.*/
-
-/* Read in bits without advancing the bitptr; bits <= 32 */
-static int theorapackB_look(oggpack_buffer *b,int bits,long *_ret){
-  long ret;
-  long m;
-  m=32-bits;
-  bits+=b->endbit;
-  if(b->endbyte+4>=b->storage){
-    /* not the main path */
-    if(b->endbyte>=b->storage){
-      *_ret=0L;
-      return -1;
-    }
-    /*If we have some bits left, but not enough, return the ones we have.*/
-    if((b->storage-b->endbyte)*8<bits)bits=(b->storage-b->endbyte)*8;
-  }
-  ret=b->ptr[0]<<(24+b->endbit);
-  if(bits>8){
-    ret|=b->ptr[1]<<(16+b->endbit);
-    if(bits>16){
-      ret|=b->ptr[2]<<(8+b->endbit);
-      if(bits>24){
-        ret|=b->ptr[3]<<(b->endbit);
-        if(bits>32&&b->endbit)
-          ret|=b->ptr[4]>>(8-b->endbit);
-      }
-    }
-  }
-  *_ret=((ret&0xffffffff)>>(m>>1))>>((m+1)>>1);
-  return 0;
-}
-
-static void theorapackB_adv(oggpack_buffer *b,int bits){
-  bits+=b->endbit;
-  b->ptr+=bits/8;
-  b->endbyte+=bits/8;
-  b->endbit=bits&7;
-}
-
-#endif
--- a/media/libtheora/lib/dec/dct.h
+++ b/media/libtheora/lib/dec/dct.h
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-  last mod: $Id: dct.h 13884 2007-09-22 08:38:10Z giles $
+  last mod: $Id: dct.h 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

--- a/media/libtheora/lib/dec/decapiwrapper.c
+++ b/media/libtheora/lib/dec/decapiwrapper.c
@ -6,7 +6,7 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

@ -29,10 +29,6 @@ static void th_dec_api_clear(th_api_wrapper *_api){

 static void theora_decode_clear(theora_state *_td){
  if(_td->i!=NULL)theora_info_clear(_td->i);
-#ifdef _TH_DEBUG_
-  fclose(debugout);
-  debugout=NULL;
-#endif
  memset(_td,0,sizeof(*_td));
 }

@ -92,7 +88,6 @@ int theora_decode_init(theora_state *_td,theora_info *_ci){
  th_api_info    *apiinfo;
  th_api_wrapper *api;
  th_info         info;
-
  api=(th_api_wrapper *)_ci->codec_setup;
  /*Allocate our own combined API wrapper/theora_info struct.
    We put them both in one malloc'd block so that when the API wrapper is
@ -130,11 +125,6 @@ int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){
  th_api_wrapper *api;
  th_info         info;
  int             ret;
-
-#ifdef _TH_DEBUG_
-  debugout = fopen("theoradec-debugout.txt","w");
-#endif
-
  api=(th_api_wrapper *)_ci->codec_setup;
  /*Allocate an API wrapper struct on demand, since it will not also include a
     theora_info struct like the ones that are used in a theora_state struct.*/
@ -167,16 +157,9 @@ int theora_decode_packetin(theora_state *_td,ogg_packet *_op){
  th_api_wrapper *api;
  ogg_int64_t     gp;
  int             ret;
-  
  if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
  api=(th_api_wrapper *)_td->i->codec_setup;
-  if(!api || !api->decode)return OC_FAULT;
  ret=th_decode_packetin(api->decode,_op,&gp);
-
-#ifdef _TH_DEBUG_
-  dframe++;
-#endif
-
  if(ret<0)return OC_BADPACKET;
  _td->granulepos=gp;
  return 0;
@ -186,10 +169,9 @@ int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){
  th_api_wrapper  *api;
  th_ycbcr_buffer  buf;
  int              ret;
-
  if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
  api=(th_api_wrapper *)_td->i->codec_setup;
-  if(!api || !api->decode)return OC_FAULT;
+  if(!api->decode)return OC_FAULT;
  ret=th_decode_ycbcr_out(api->decode,buf);
  if(ret>=0){
    _yuv->y_width=buf[0].width;
@ -202,6 +184,5 @@ int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){
    _yuv->u=buf[1].data;
    _yuv->v=buf[2].data;
  }
-
  return ret;
 }
--- a/media/libtheora/lib/dec/decinfo.c
+++ b/media/libtheora/lib/dec/decinfo.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: decinfo.c 14719 2008-04-12 11:36:40Z tterribe $
+    last mod: $Id: decinfo.c 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

--- a/media/libtheora/lib/dec/decint.h
+++ b/media/libtheora/lib/dec/decint.h
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: decint.h 14369 2008-01-05 23:15:32Z tterribe $
+    last mod: $Id: decint.h 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

@ -20,7 +20,7 @@
 # define _decint_H (1)
 # include "theora/theoradec.h"
 # include "../internal.h"
-# include "bitwise.h"
+# include "bitpack.h"

 typedef struct th_setup_info oc_setup_info;
 typedef struct th_dec_ctx    oc_dec_ctx;
--- a/media/libtheora/lib/dec/decode.c
+++ b/media/libtheora/lib/dec/decode.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: decode.c 14385 2008-01-09 19:53:18Z giles $
+    last mod: $Id: decode.c 15403 2008-10-16 12:44:05Z tterribe $

 ********************************************************************/

@ -210,38 +210,28 @@ static void oc_dec_clear(oc_dec_ctx *_dec){

 static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
  long val;
-
-  TH_DEBUG("\n>>>> beginning frame %ld\n\n",dframe);
-
  /*Check to make sure this is a data packet.*/
  theorapackB_read1(&_dec->opb,&val);
-  TH_DEBUG("frame type = %s, ",val==0?"video":"unknown");
  if(val!=0)return TH_EBADPACKET;
  /*Read in the frame type (I or P).*/
  theorapackB_read1(&_dec->opb,&val);
  _dec->state.frame_type=(int)val;
-  TH_DEBUG("%s\n",val?"predicted":"key");
  /*Read in the current qi.*/
  theorapackB_read(&_dec->opb,6,&val);
  _dec->state.qis[0]=(int)val;
-  TH_DEBUG("frame quality = { %ld ",val);
  theorapackB_read1(&_dec->opb,&val);
  if(!val)_dec->state.nqis=1;
  else{
    theorapackB_read(&_dec->opb,6,&val);
    _dec->state.qis[1]=(int)val;
-    TH_DEBUG("%ld ",val);
    theorapackB_read1(&_dec->opb,&val);
    if(!val)_dec->state.nqis=2;
    else{
      theorapackB_read(&_dec->opb,6,&val);
-      TH_DEBUG("%ld ",val);
      _dec->state.qis[2]=(int)val;
      _dec->state.nqis=3;
    }
  }
-  TH_DEBUG("}\n");
-
  if(_dec->state.frame_type==OC_INTRA_FRAME){
    /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
      Most of the other unused bits in the VP3 headers were eliminated.
@ -305,7 +295,6 @@ static int oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
  int    run_count;
  theorapackB_read1(&_dec->opb,&val);
  flag=(int)val;
-
  sb=_dec->state.sbs;
  sb_end=sb+_dec->state.nsbs;
  run_count=npartial=0;
@ -319,7 +308,6 @@ static int oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
      npartial+=flag;
      sb++;
    }
-
    while(--run_count>0&&sb<sb_end);
    if(full_run&&sb<sb_end){
      theorapackB_read1(&_dec->opb,&val);
@ -349,7 +337,6 @@ static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
  for(;sb->coded_partially;sb++);
  theorapackB_read1(&_dec->opb,&val);
  flag=(int)val;
-
  while(sb<sb_end){
    int full_run;
    run_count=oc_sb_run_unpack(&_dec->opb);
@ -428,71 +415,6 @@ static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
  }
  /*TODO: run_count should be 0 here.
    If it's not, we should issue a warning of some kind.*/
-
-
-#ifdef _TH_DEBUG_
-  // assuming 4:2:0 right now; THIS IS WRONG but only an issue if dumping debug info
-  TH_DEBUG("predicted (partially coded frame)\n");
-  TH_DEBUG("superblock coded flags = {");
-  int x,y,i;
-  int w = _dec->state.info.frame_width;
-  int h = _dec->state.info.frame_height;
-
-  i=0;
-  for(y=0;y< (h+31)/32;y++){
-    TH_DEBUG("\n   ");
-    for(x=0;x< (w+31)/32;x++,i++)
-      TH_DEBUG("%x", (_dec->state.sbs[i].coded_partially!=0)|
-	       (_dec->state.sbs[i].coded_fully));
-  }
-
-  TH_DEBUG("\n   ");
-  for(y=0;y< (h+63)/64;y++){
-    TH_DEBUG("\n   ");
-    for(x=0;x< (w+63)/64;x++,i++)
-      TH_DEBUG("%x", (_dec->state.sbs[i].coded_partially!=0)|
-	       (_dec->state.sbs[i].coded_fully));
-  }
-  TH_DEBUG("\n   ");
-  for(y=0;y< (h+63)/64;y++){
-    TH_DEBUG("\n   ");
-    for(x=0;x< (w+63)/64;x++,i++)
-      TH_DEBUG("%x", (_dec->state.sbs[i].coded_partially!=0)|
-	       (_dec->state.sbs[i].coded_fully));
-  }
-  TH_DEBUG("\n}\n");
-
-  if(i!=_dec->state.nsbs)
-    TH_DEBUG("WARNING!  superblock count, raster %d != flat %d\n",
-	     i,_dec->state.nsbs);
-  
-  TH_DEBUG("block coded flags = {");
-
-  i=0;
-  for(y=0;y< (h+7)/8;y++){
-    TH_DEBUG("\n   ");
-    for(x=0;x< (w+7)/8;x++,i++)
-      TH_DEBUG("%x", (_dec->state.frags[i].coded!=0));
-  }
-  TH_DEBUG("\n   ");
-  for(y=0;y< (h+15)/16;y++){
-    TH_DEBUG("\n   ");
-    for(x=0;x< (w+15)/16;x++,i++)
-      TH_DEBUG("%x", (_dec->state.frags[i].coded!=0));
-  }
-  TH_DEBUG("\n   ");
-  for(y=0;y< (h+15)/16;y++){
-    TH_DEBUG("\n   ");
-    for(x=0;x< (w+15)/16;x++,i++)
-      TH_DEBUG("%x", (_dec->state.frags[i].coded!=0));
-  }
-  TH_DEBUG("\n}\n");
-  
-  if(i!=_dec->state.nfrags)
-    TH_DEBUG("WARNING!  block count, raster %d != flat %d\n",
-	     i,_dec->state.nfrags);
-#endif	      
-
 }


@ -521,40 +443,31 @@ static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
  oc_mb               *mb;
  oc_mb               *mb_end;
  const int           *alphabet;
-  long                 val,j;
+  long                 val;
  int                  scheme0_alphabet[8];
  int                  mode_scheme;
  theorapackB_read(&_dec->opb,3,&val);
  mode_scheme=(int)val;
-  TH_DEBUG("mode encode scheme = %d\n",(int)val);
-
  if(mode_scheme==0){
    int mi;
    /*Just in case, initialize the modes to something.
      If the bitstream doesn't contain each index exactly once, it's likely
       corrupt and the rest of the packet is garbage anyway, but this way we
       won't crash, and we'll decode SOMETHING.*/
-    TH_DEBUG("mode scheme list = { ");
    /*LOOP VECTORIZES.*/
    for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
    for(mi=0;mi<OC_NMODES;mi++){
      theorapackB_read(&_dec->opb,3,&val);
      scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
-      TH_DEBUG("%d ",(int)val);
    }
-    TH_DEBUG("}\n");
    alphabet=scheme0_alphabet;
-  }else 
-    alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
-  if(mode_scheme==7)
-    mode_unpack=oc_clc_mode_unpack;
-  else 
-    mode_unpack=oc_vlc_mode_unpack;
+  }
+  else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
+  if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack;
+  else mode_unpack=oc_vlc_mode_unpack;
  mb=_dec->state.mbs;
  mb_end=mb+_dec->state.nmbs;
-
-  TH_DEBUG("mode list = { ");
-  for(j=0;mb<mb_end;mb++){
+  for(;mb<mb_end;mb++){
    if(mb->mode!=OC_MODE_INVALID){
      int bi;
      for(bi=0;bi<4;bi++){
@ -562,21 +475,10 @@ static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
        fragi=mb->map[0][bi];
        if(fragi>=0&&_dec->state.frags[fragi].coded)break;
      }
-      if(bi<4){
-	mb->mode=alphabet[(*mode_unpack)(&_dec->opb)];
-	
-#ifdef _TH_DEBUG_
-	if((j&0x1f)==0)
-	  TH_DEBUG("\n   ");
-	TH_DEBUG("%d ",mb->mode);
-	j++;
-#endif
-
-      }else 
-	mb->mode=OC_MODE_INTER_NOMV;
+      if(bi<4)mb->mode=alphabet[(*mode_unpack)(&_dec->opb)];
+      else mb->mode=OC_MODE_INTER_NOMV;
    }
  }
-  TH_DEBUG("\n}\n");
 }


@ -629,23 +531,16 @@ static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
  const int              *map_idxs;
  long                    val;
  int                     map_nidxs;
-#ifdef _TH_DEBUG_
-  int                     j=0;
-#endif
  oc_mv                   last_mv[2];
  oc_mv                   cbmvs[4];
  set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
  theorapackB_read1(&_dec->opb,&val);
-  TH_DEBUG("motion vector table = %d\n",(int)val);
  mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack;
  map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
  map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
  memset(last_mv,0,sizeof(last_mv));
  mb=_dec->state.mbs;
  mb_end=mb+_dec->state.nmbs;
-
-  TH_DEBUG("motion vectors = {");
-
  for(;mb<mb_end;mb++)if(mb->mode!=OC_MODE_INVALID){
    oc_fragment *frag;
    oc_mv        mbmv;
@ -667,8 +562,7 @@ static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
    if(ncoded<=0)continue;
    mb_mode=mb->mode;
    switch(mb_mode){
-    case OC_MODE_INTER_MV_FOUR:
-      {
+      case OC_MODE_INTER_MV_FOUR:{
        oc_mv       lbmvs[4];
        int         bi;
        /*Mark the tail of the list, so we don't accidentally go past it.*/
@ -680,14 +574,6 @@ static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
            frag->mbmode=mb_mode;
            frag->mv[0]=lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
            frag->mv[1]=lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-
-#ifdef _TH_DEBUG_
-	    if((j&0x7)==0)
-	      TH_DEBUG("\n   ");
-	    TH_DEBUG("%+03d,%+03d ",frag->mv[0],frag->mv[1]);
-	    j++;
-#endif
-
          }
          else lbmvs[bi][0]=lbmvs[bi][1]=0;
        }
@ -708,57 +594,30 @@ static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
            frag->mv[1]=cbmvs[bi][1];
          }
        }
-      }
-      break;
-    case OC_MODE_INTER_MV:
-      {
+      }break;
+      case OC_MODE_INTER_MV:{
        last_mv[1][0]=last_mv[0][0];
        last_mv[1][1]=last_mv[0][1];
        mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
        mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-
-#ifdef _TH_DEBUG_
-	if((j&0x7)==0)
-	  TH_DEBUG("\n   ");
-	TH_DEBUG("%+03d,%+03d ",mbmv[0],mbmv[1]);
-	j++;
-#endif
-
-      }
-      break;
-    case OC_MODE_INTER_MV_LAST:
-      {
+      }break;
+      case OC_MODE_INTER_MV_LAST:{
        mbmv[0]=last_mv[0][0];
        mbmv[1]=last_mv[0][1];
-      }
-      break;
-    case OC_MODE_INTER_MV_LAST2:
-      {
+      }break;
+      case OC_MODE_INTER_MV_LAST2:{
        mbmv[0]=last_mv[1][0];
        mbmv[1]=last_mv[1][1];
        last_mv[1][0]=last_mv[0][0];
        last_mv[1][1]=last_mv[0][1];
        last_mv[0][0]=mbmv[0];
        last_mv[0][1]=mbmv[1];
-      }
-      break;
-    case OC_MODE_GOLDEN_MV:
-      {
+      }break;
+      case OC_MODE_GOLDEN_MV:{
        mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
        mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
-
-#ifdef _TH_DEBUG_
-	if((j&0x7)==0)
-	  TH_DEBUG("\n   ");
-	TH_DEBUG("%+03d,%+03d ",mbmv[0],mbmv[1]);
-	j++;
-#endif
-
-      }
-      break;
-    default:
-      mbmv[0]=mbmv[1]=0;
-      break;
+      }break;
+      default:mbmv[0]=mbmv[1]=0;break;
    }
    /*4MV mode fills in the fragments itself.
      For all other modes we can use this common code.*/
@ -773,9 +632,6 @@ static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
      }
    }
  }
-
-  TH_DEBUG("\n}\n");
-
 }

 static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
@ -798,7 +654,7 @@ static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
  else{
    long val;
    int  flag;
-    int  nqi0;
+    int  nqi1;
    int  run_count;
    /*Otherwise, we decode a qi index for each fragment, using two passes of
      the same binary RLE scheme used for super-block coded bits.
@ -810,14 +666,14 @@ static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
      with the corresponding qi's for this frame.*/
    theorapackB_read1(&_dec->opb,&val);
    flag=(int)val;
-    run_count=nqi0=0;
+    run_count=nqi1=0;
    while(coded_fragi<coded_fragi_end){
      int full_run;
      run_count=oc_sb_run_unpack(&_dec->opb);
      full_run=run_count>=4129;
      do{
        _dec->state.frags[*coded_fragi++].qi=flag;
-        nqi0+=!flag;
+        nqi1+=flag;
      }
      while(--run_count>0&&coded_fragi<coded_fragi_end);
      if(full_run&&coded_fragi<coded_fragi_end){
@ -830,7 +686,7 @@ static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
      If it's not, we should issue a warning of some kind.*/
    /*If we have 3 different qi's for this frame, and there was at least one
       fragment with a non-zero qi, make the second pass.*/
-    if(_dec->state.nqis==3&&nqi0<ncoded_fragis){
+    if(_dec->state.nqis==3&&nqi1>0){
      /*Skip qii==0 fragments.*/
      for(coded_fragi=_dec->state.coded_fragis;
       _dec->state.frags[*coded_fragi].qi==0;coded_fragi++);
@ -1503,9 +1359,6 @@ static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec,
  for(fragy=fragy0;fragy<fragy_end;fragy++){
    for(fragx=0;fragx<fplane->nhfrags;fragx++,frag++){
      if(!frag->coded)continue;
-#ifdef _TH_DEBUG_
-      frag->quant[0] = frag->dc; /* stash un-predicted dc for debug output */
-#endif
      pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc+=
       oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
      ncoded_fragis++;
@ -1597,40 +1450,6 @@ static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
  _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
  oc_state_frag_copy(&_dec->state,_pipe->uncoded_fragis[_pli],
   _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
-
-#ifdef _TH_DEBUG_
-  {
-    int i,j,k;
-    int framei=_dec->state.ref_frame_idx[OC_FRAME_SELF];
-    int ystride=_dec->state.ref_frame_bufs[framei][_pli].stride;
-    int *fragi_end = _pipe->coded_fragis[_pli];
-    int *fragi = fragi_end-_pipe->ncoded_fragis[_pli];
-
-    for(;fragi<fragi_end;fragi++){
-      oc_fragment   *frag=_dec->state.frags+*fragi;
-      unsigned char *src=frag->buffer[framei];
-      for(i=0,j=0;j<8;j++){
-	for(k=0;k<8;k++,i++)
-	  frag->recon[i] = src[k];
-	src+=ystride;
-      }
-    }
-
-    fragi = _pipe->uncoded_fragis[_pli];
-    fragi_end = fragi+_pipe->nuncoded_fragis[_pli];
-
-    for(;fragi<fragi_end;fragi++){
-      oc_fragment   *frag=_dec->state.frags+*fragi;
-      unsigned char *src=frag->buffer[framei];
-      for(i=0,j=0;j<8;j++){
-	for(k=0;k<8;k++,i++)
-	  frag->recon[i] = src[k];
-	src+=ystride;
-      }
-    }
-  }
-#endif
-    
 }

 /*Filter a horizontal block edge.*/
@ -1941,10 +1760,10 @@ static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
      if(strong&&var>sthresh){
        oc_dering_block(idata+x,iplane->stride,b,
         _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
-        if(_pli||(b&1)&&*(variance-1)>OC_DERING_THRESH4||
-         (b&2)&&variance[1]>OC_DERING_THRESH4||
-         (b&4)&&*(variance-fplane->nvfrags)>OC_DERING_THRESH4||
-         (b&8)&&variance[fplane->nvfrags]>OC_DERING_THRESH4){
+        if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
+         !(b&2)&&variance[1]>OC_DERING_THRESH4||
+         !(b&4)&&*(variance-fplane->nvfrags)>OC_DERING_THRESH4||
+         !(b&8)&&variance[fplane->nvfrags]>OC_DERING_THRESH4){
          oc_dering_block(idata+x,iplane->stride,b,
           _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
          oc_dering_block(idata+x,iplane->stride,b,
@ -2039,7 +1858,6 @@ int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
  /*A completely empty packet indicates a dropped frame and is treated exactly
     like an inter frame with no coded blocks.
    Only proceed if we have a non-empty packet.*/
-
  if(_op->bytes!=0){
    oc_dec_pipeline_state pipe;
    th_ycbcr_buffer       stripe_buf;
@ -2093,7 +1911,6 @@ int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
    }
    oc_dec_block_qis_unpack(_dec);
    oc_dec_residual_tokens_unpack(_dec);
-
    /*Update granule position.
      This must be done before the striped decode callbacks so that the
       application knows what to do with the frame data.*/
@ -2203,91 +2020,6 @@ int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
      }
      notstart=1;
    }
-
-#ifdef _TH_DEBUG_
-    {
-      int x,y,i,j,k,xn,yn;
-      int plane;
-      int buf;
-
-      /* dump fragment DCT components */
-      for(plane=0;plane<3;plane++){
-	char *plstr;
-	int offset;
-	switch(plane){
-	case 0:
-	  plstr="Y";
-	  xn = _dec->state.info.frame_width>>3;
-	  yn = _dec->state.info.frame_height>>3;
-	  offset = 0; 
-	  break;
-	case 1:
-	  plstr="U";
-	  xn = _dec->state.info.frame_width>>4;
-	  yn = _dec->state.info.frame_height>>4;
-	  offset = xn*yn*4;
-	  break;
-	case 2:
-	  plstr="V";
-	  xn = _dec->state.info.frame_width>>4;
-	  yn = _dec->state.info.frame_height>>4;
-	  offset = xn*yn*5;
-	  break;
-	}
-	for(y=0;y<yn;y++){
-	  for(x=0;x<xn;x++,i++){
-	    
-	    for(buf=0;buf<4;buf++){
-	      int *ptr;
-	      char *bufn;
-	      int codecheck=0;
-
-	      i = offset + y*xn + x;
-
-	      switch(buf){
-	      case 0:
-		codecheck=1;
-		bufn = "coded";
-		ptr = _dec->state.frags[i].quant;
-		break;
-	      case 1:
-		codecheck=1;
-		bufn = "coeff";
-		ptr = _dec->state.frags[i].freq;
-		break;
-	      case 2:
-		codecheck=1;
-		bufn = "idct";
-		ptr = _dec->state.frags[i].time;
-		break;
-	      case 3:
-		bufn = "recon";
-		ptr = _dec->state.frags[i].loop;
-		break;
-	      }
-	      
-	      
-	      TH_DEBUG("%s %s [%d][%d] = {",bufn,plstr,x,y);
-	      if(codecheck && !_dec->state.frags[i].coded)
-		TH_DEBUG(" not coded }\n");
-	      else{
-		int l=0;
-		for(j=0;j<8;j++){
-		  TH_DEBUG("\n   ");
-		  for(k=0;k<8;k++,l++){
-		    TH_DEBUG("%d ",ptr[l]);
-		  }
-		}
-		TH_DEBUG(" }\n");
-	      }
-	    }
-	    TH_DEBUG("\n");
-	  }
-	}
-      }
-    }
-#endif
-
    /*Finish filling in the reference frame borders.*/
    for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
    /*Update the reference frame indices.*/
--- a/media/libtheora/lib/dec/dequant.c
+++ b/media/libtheora/lib/dec/dequant.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: dequant.c 14369 2008-01-05 23:15:32Z tterribe $
+    last mod: $Id: dequant.c 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

@ -127,65 +127,6 @@ int oc_quant_params_unpack(oggpack_buffer *_opb,
    }
    while(qri-->0);
  }
-
-#ifdef _TH_DEBUG_
-  /* dump the tables */
-  {
-    int i, j, k, l, m;
-    TH_DEBUG("loop filter limits = {");
-    for(i=0;i<64;){
-      TH_DEBUG("\n        ");
-      for(j=0;j<16;i++,j++)
-	TH_DEBUG("%3d ",_qinfo->loop_filter_limits[i]);
-    }
-    TH_DEBUG("\n}\n\n");
-
-    TH_DEBUG("ac scale = {");
-    for(i=0;i<64;){
-      TH_DEBUG("\n        ");
-      for(j=0;j<16;i++,j++)
-	TH_DEBUG("%3d ",_qinfo->ac_scale[i]);
-    }
-    TH_DEBUG("\n}\n\n");
-
-    TH_DEBUG("dc scale = {");
-    for(i=0;i<64;){
-      TH_DEBUG("\n        ");
-      for(j=0;j<16;i++,j++)
-	TH_DEBUG("%3d ",_qinfo->dc_scale[i]);
-    }
-    TH_DEBUG("\n}\n\n");
-
-    for(k=0;k<2;k++)
-      for(l=0;l<3;l++){
-	char *name[2][3]={
-	  {"intra Y bases","intra U bases", "intra V bases"},
-	  {"inter Y bases","inter U bases", "inter V bases"}
-	};
-
-	th_quant_ranges *r = &_qinfo->qi_ranges[k][l];
-	TH_DEBUG("%s = {\n",name[k][l]);
-	TH_DEBUG("        ranges = %d\n",r->nranges);
-	TH_DEBUG("        intervals = { ");
-	for(i=0;i<r->nranges;i++)
-	  TH_DEBUG("%3d ",r->sizes[i]);
-	TH_DEBUG("}\n");
-	TH_DEBUG("\n        matricies = { ");
-	for(m=0;m<r->nranges+1;m++){
-	  TH_DEBUG("\n          { ");
-	  for(i=0;i<64;){
-	    TH_DEBUG("\n            ");
-	    for(j=0;j<8;i++,j++)
-	      TH_DEBUG("%3d ",r->base_matrices[m][i]);
-	  }
-	  TH_DEBUG("\n          }");
-	}
-	TH_DEBUG("\n        }\n");
-      }
-  }
-    
-#endif
-
  _ogg_free(base_mats);
  return 0;
 }
@ -227,4 +168,3 @@ void oc_quant_params_clear(th_quant_info *_qinfo){
    _ogg_free((void *)_qinfo->qi_ranges[qti][pli].base_matrices);
  }
 }
-
--- a/media/libtheora/lib/dec/dequant.h
+++ b/media/libtheora/lib/dec/dequant.h
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: dequant.h 13884 2007-09-22 08:38:10Z giles $
+    last mod: $Id: dequant.h 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

--- a/media/libtheora/lib/dec/enquant.h
+++ b/media/libtheora/lib/dec/enquant.h
@ -1,43 +0,0 @@
-/********************************************************************
- *                                                                  *
- * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
- * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
- * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
- * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
- *                                                                  *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
- *                                                                  *
- ********************************************************************
-
-  function:
-    last mod: $Id: enquant.h 13884 2007-09-22 08:38:10Z giles $
-
- ********************************************************************/
-
-#if !defined(_enquant_H)
-# define _enquant_H (1)
-# include "quant.h"
-
-/*The amount to scale the forward quantizer value by.*/
-#define OC_FQUANT_SCALE ((ogg_uint32_t)1<<OC_FQUANT_SHIFT)
-/*The amount to add to the scaled forward quantizer for rounding.*/
-#define OC_FQUANT_ROUND (1<<OC_FQUANT_SHIFT-1)
-/*The amount to shift the resulting product by.*/
-#define OC_FQUANT_SHIFT (16)
-
-
-
-/*The default quantization parameters used by VP3.1.*/
-extern const th_quant_info TH_VP31_QUANT_INFO;
-/*Our default quantization parameters.*/
-extern const th_quant_info OC_DEF_QUANT_INFO[4];
-
-
-
-void oc_quant_params_pack(oggpack_buffer *_opb,
- const th_quant_info *_qinfo);
-void oc_enquant_tables_init(oc_quant_table *_dequant[2][3],
- oc_quant_table *_enquant[2][3],const th_quant_info *_qinfo);
-
-#endif
--- a/media/libtheora/lib/dec/fragment.c
+++ b/media/libtheora/lib/dec/fragment.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: fragment.c 14348 2008-01-04 18:17:00Z tterribe $
+    last mod: $Id: fragment.c 15469 2008-10-30 12:49:42Z tterribe $

 ********************************************************************/

--- a/media/libtheora/lib/dec/huffdec.c
+++ b/media/libtheora/lib/dec/huffdec.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: huffdec.c 14493 2008-02-13 09:25:37Z tterribe $
+    last mod: $Id: huffdec.c 15431 2008-10-21 05:04:02Z giles $

 ********************************************************************/

@ -25,6 +25,52 @@
 #define _ogg_offsetof(_type,_field)\
 ((size_t)((char *)&((_type *)0)->_field-(char *)0))

+/*These two functions are really part of the bitpack.c module, but
+  they are only used here. Declaring local static versions so they
+  can be inlined saves considerable function call overhead.*/
+
+/*Read in bits without advancing the bitptr.
+  Here we assume 0<=_bits&&_bits<=32.*/
+static int theorapackB_look(oggpack_buffer *_b,int _bits,long *_ret){
+  long ret;
+  long m;
+  long d;
+  m=32-_bits;
+  _bits+=_b->endbit;
+  d=_b->storage-_b->endbyte;
+  if(d<=4){
+    /*Not the main path.*/
+    if(d<=0){
+      *_ret=0L;
+      return -(_bits>d*8);
+    }
+    /*If we have some bits left, but not enough, return the ones we have.*/
+    if(d*8<_bits)_bits=d*8;
+  }
+  ret=_b->ptr[0]<<24+_b->endbit;
+  if(_bits>8){
+    ret|=_b->ptr[1]<<16+_b->endbit;
+    if(_bits>16){
+      ret|=_b->ptr[2]<<8+_b->endbit;
+      if(_bits>24){
+        ret|=_b->ptr[3]<<_b->endbit;
+        if(_bits>32)ret|=_b->ptr[4]>>8-_b->endbit;
+      }
+    }
+  }
+  *_ret=((ret&0xFFFFFFFF)>>(m>>1))>>(m+1>>1);
+  return 0;
+}
+
+/*advance the bitptr*/
+static void theorapackB_adv(oggpack_buffer *_b,int _bits){
+  _bits+=_b->endbit;
+  _b->ptr+=_bits>>3;
+  _b->endbyte+=_bits>>3;
+  _b->endbit=_bits&7;
+}
+
+
 /*The log_2 of the size of a lookup table is allowed to grow to relative to
   the number of unique nodes it contains.
  E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is
--- a/media/libtheora/lib/dec/huffdec.h
+++ b/media/libtheora/lib/dec/huffdec.h
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: huffdec.h 14359 2008-01-04 20:11:13Z tterribe $
+    last mod: $Id: huffdec.h 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

--- a/media/libtheora/lib/dec/huffman.h
+++ b/media/libtheora/lib/dec/huffman.h
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: huffman.h 13884 2007-09-22 08:38:10Z giles $
+    last mod: $Id: huffman.h 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

--- a/media/libtheora/lib/dec/idct.c
+++ b/media/libtheora/lib/dec/idct.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: idct.c 13884 2007-09-22 08:38:10Z giles $
+    last mod: $Id: idct.c 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

@ -169,7 +169,6 @@ static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){
  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
 }

-
 /*Performs an inverse 8 point Type-II DCT transform.
  The output is scaled by a factor of 2 relative to the orthonormal version of
   the transform.
@ -204,7 +203,6 @@ static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){
  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
 }

-
 /*Performs an inverse 8 point Type-II DCT transform.
  The output is scaled by a factor of 2 relative to the orthonormal version of
   the transform.
--- a/media/libtheora/lib/dec/idct.h
+++ b/media/libtheora/lib/dec/idct.h
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: idct.h 13884 2007-09-22 08:38:10Z giles $
+    last mod: $Id: idct.h 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

--- a/media/libtheora/lib/dec/info.c
+++ b/media/libtheora/lib/dec/info.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: info.c 13884 2007-09-22 08:38:10Z giles $
+    last mod: $Id: info.c 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

--- a/media/libtheora/lib/dec/internal.c
+++ b/media/libtheora/lib/dec/internal.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: internal.c 14385 2008-01-09 19:53:18Z giles $
+    last mod: $Id: internal.c 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

--- a/media/libtheora/lib/dec/ocintrin.h
+++ b/media/libtheora/lib/dec/ocintrin.h
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: ocintrin.h 13884 2007-09-22 08:38:10Z giles $
+    last mod: $Id: ocintrin.h 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

--- a/media/libtheora/lib/dec/quant.c
+++ b/media/libtheora/lib/dec/quant.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: quant.c 14375 2008-01-06 05:37:33Z tterribe $
+    last mod: $Id: quant.c 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

@ -21,8 +21,8 @@
 #include "quant.h"
 #include "decint.h"

-unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2};
-unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
+static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2};
+static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};

 /*Initializes the dequantization tables from a set of quantizer info.
  Currently the dequantizer (and elsewhere enquantizer) tables are expected to
@ -39,34 +39,31 @@ unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
   qi values change between frames (this is what VP3 did).*/
 void oc_dequant_tables_init(oc_quant_table *_dequant[2][3],
 int _pp_dc_scale[64],const th_quant_info *_qinfo){
-  int          qti; /* coding mode: intra or inter */
-  int          pli; /* Y U V */
+  /*coding mode: intra or inter.*/
+  int          qti;
+  /*Y', C_b, C_r*/
+  int          pli;
  for(qti=0;qti<2;qti++){
    for(pli=0;pli<3;pli++){
      oc_quant_tables stage;
-
-      int qi;  /* quality index */
-      int qri; /* range iterator */
-      
+      /*Quality index.*/
+      int qi;
+      /*Range iterator.*/
+      int qri;
      for(qi=0,qri=0; qri<=_qinfo->qi_ranges[qti][pli].nranges; qri++){
        th_quant_base base;
-	
        ogg_uint32_t  q;
        int           qi_start;
        int           qi_end;
        int           ci;
        memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
         sizeof(base));
-
        qi_start=qi;
-	if(qri==_qinfo->qi_ranges[qti][pli].nranges)
-	  qi_end=qi+1;
-	else 
-	  qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
-	
-	/* Iterate over quality indicies in this range */
+        if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1;
+        else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
+        /*Iterate over quality indicies in this range.*/
        for(;;){
-	  
+          ogg_uint32_t qfac;
          /*In the original VP3.2 code, the rounding offset and the size of the
             dead zone around 0 were controlled by a "sharpness" parameter.
            The size of our dead zone is now controlled by the per-coefficient
@ -76,77 +73,50 @@ void oc_dequant_tables_init(oc_quant_table *_dequant[2][3],
            Hence, all of that VP3.2 code is gone from here, and the remaining
             floating point code has been implemented as equivalent integer code
             with exact precision.*/
-
-	  /* for postprocess, not dequant */
-	  if(_pp_dc_scale!=NULL)
-	    _pp_dc_scale[qi]=(int)((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/160);
-
+          qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0];
+          /*For postprocessing, not dequantization.*/
+          if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160);
          /*Scale DC the coefficient from the proper table.*/
-	  q=((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/100)<<2;
+          q=(qfac/100)<<2;
          q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
          stage[qi][0]=(ogg_uint16_t)q;
-	  
          /*Now scale AC coefficients from the proper table.*/
          for(ci=1;ci<64;ci++){
            q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2;
            q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
            stage[qi][ci]=(ogg_uint16_t)q;
          }
-	  
          if(++qi>=qi_end)break;
-	  
          /*Interpolate the next base matrix.*/
          for(ci=0;ci<64;ci++){
-	    base[ci]=(unsigned char)
-	      ((2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
+            base[ci]=(unsigned char)(
+             (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
             (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
             +_qinfo->qi_ranges[qti][pli].sizes[qri])/
             (2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
          }
        }
      }
-
-      /* Staging matricies complete; commit to memory only if this
-	 isn't a duplicate of a preceeding plane. This simple check
-	 helps us improve cache coherency later.*/
+      /*Staging matrices complete; commit to memory only if this isn't a
+         duplicate of a preceeding plane.
+        This simple check helps us improve cache coherency later.*/
      {
-	int dupe = 0;
-	int i,j;
-	for(i=0;i<=qti;i++){
-	  for(j=0;j<(i<qti?3:pli);j++){
-	    if(!memcmp(stage,_dequant[i][j],sizeof(stage))){
+        int dupe;
+        int qtj;
+        int plj;
+        dupe=0;
+        for(qtj=0;qtj<=qti;qtj++){
+          for(plj=0;plj<(qtj<qti?3:pli);plj++){
+            if(!memcmp(stage,_dequant[qtj][plj],sizeof(stage))){
              dupe=1;
              break;
            }
          }
          if(dupe)break;
        }
-	if(dupe){
-	  _dequant[qti][pli]=_dequant[i][j];
-	}else{
-	  memcpy(_dequant[qti][pli],stage,sizeof(stage));
+        if(dupe)_dequant[qti][pli]=_dequant[qtj][plj];
+        else memcpy(_dequant[qti][pli],stage,sizeof(stage));
      }
    }
  }
 }
-
-#ifdef _TH_DEBUG_
-  int i, j, k, l;
-  /* dump the calculated quantizer tables */
-  for(i=0;i<2;i++){
-    for(j=0;j<3;j++){
-      for(k=0;k<64;k++){
-	TH_DEBUG("quantizer table [%s][%s][Q%d] = {",
-		 (i==0?"intra":"inter"),(j==0?"Y":(j==1?"U":"V")),k);
-	for(l=0;l<64;l++){
-	  if((l&7)==0)
-	    TH_DEBUG("\n   ");
-	  TH_DEBUG("%4d ",_dequant[i][j][k][l]);
-	}
-	TH_DEBUG("}\n");
-      }
-    }
-  }
-#endif
-
-}
--- a/media/libtheora/lib/dec/quant.h
+++ b/media/libtheora/lib/dec/quant.h
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: quant.h 14059 2007-10-28 23:43:27Z xiphmont $
+    last mod: $Id: quant.h 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

@ -24,23 +24,11 @@ typedef ogg_uint16_t   oc_quant_table[64];
 typedef oc_quant_table oc_quant_tables[64];


-
 /*Maximum scaled quantizer value.*/
 #define OC_QUANT_MAX          (1024<<2)


-
-/*Minimum scaled DC coefficient frame quantizer value for intra and inter
-   modes.*/
-extern unsigned OC_DC_QUANT_MIN[2];
-/*Minimum scaled AC coefficient frame quantizer value for intra and inter
-   modes.*/
-extern unsigned OC_AC_QUANT_MIN[2];
-
-
-
 void oc_dequant_tables_init(oc_quant_table *_dequant[2][3],
-			    int _pp_dc_scale[64],
-			    const th_quant_info *_qinfo);
+ int _pp_dc_scale[64],const th_quant_info *_qinfo);

 #endif
--- a/media/libtheora/lib/dec/state.c
+++ b/media/libtheora/lib/dec/state.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: state.c 14714 2008-04-12 01:04:43Z giles $
+    last mod: $Id: state.c 15469 2008-10-30 12:49:42Z tterribe $

 ********************************************************************/

@ -831,37 +831,11 @@ void oc_state_frag_recon_c(oc_theora_state *_state,oc_fragment *_frag,
    ogg_int16_t p;
    /*Why is the iquant product rounded in this case and no others?
      Who knows.*/
-
    p=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant+15>>5);
    /*LOOP VECTORIZES.*/
    for(ci=0;ci<64;ci++)res_buf[ci]=p;
-
-#ifdef _TH_DEBUG_
-    {
-      int i;
-      _frag->freq[0] = _frag->dc*_dc_iquant;
-      _frag->time[0] = p;
-      for(i=1;i<64;i++){
-	_frag->quant[i] = 0;
-	_frag->freq[i] = 0;
-	_frag->time[i] = p;
-      }
-    }
-#endif
-
  }
  else{
-
-#ifdef _TH_DEBUG_
-    {
-      int i;
-      for(i=1;i<_ncoefs;i++)
-	_frag->quant[i] = _dct_coeffs[i];
-      for(;i<64;i++)
-	_frag->quant[i] = 0;
-    }
-#endif
-
    /*First, dequantize the coefficients.*/
    dct_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant);
    for(zzi=1;zzi<_ncoefs;zzi++){
@ -869,21 +843,6 @@ void oc_state_frag_recon_c(oc_theora_state *_state,oc_fragment *_frag,
      ci=OC_FZIG_ZAG[zzi];
      dct_buf[ci]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*_ac_iquant[ci]);
    }
-
-#ifdef _TH_DEBUG_
-    for(;zzi<64;zzi++){
-      int ci;
-      ci=OC_FZIG_ZAG[zzi];
-      dct_buf[ci]=0;
-    }
-
-    {
-      int i;
-      for(i=0;i<64;i++)
-	_frag->freq[i] = dct_buf[i];
-    }
-#endif
-
    /*Then, fill in the remainder of the coefficients with 0's, and perform
       the iDCT.*/
    if(_last_zzi<10){
@ -894,15 +853,6 @@ void oc_state_frag_recon_c(oc_theora_state *_state,oc_fragment *_frag,
      for(;zzi<64;zzi++)dct_buf[OC_FZIG_ZAG[zzi]]=0;
      oc_idct8x8_c(res_buf,dct_buf);
    }
-
-#ifdef _TH_DEBUG_
-    {
-      int i;
-      for(i=0;i<64;i++)
-	_frag->time[i] = res_buf[i];
-    }
-#endif
-
  }
  /*Fill in the target buffer.*/
  dst_framei=_state->ref_frame_idx[OC_FRAME_SELF];
@ -1050,7 +1000,6 @@ void oc_state_loop_filter_frag_rows_c(oc_theora_state *_state,int *_bv,
  _bv+=127;
  iplane=_state->ref_frame_bufs[_refi]+_pli;
  fplane=_state->fplanes+_pli;
-
  /*The following loops are constructed somewhat non-intuitively on purpose.
    The main idea is: if a block boundary has at least one coded fragment on
     it, the filter is applied to it.
@ -1079,46 +1028,6 @@ void oc_state_loop_filter_frag_rows_c(oc_theora_state *_state,int *_bv,
           iplane->stride,_bv);
        }
      }
-
-
-#ifdef _TH_DEBUG_
-      {
-	int i,j,k,l;
-	unsigned char *src;
-	
-	for(l=0;l<5;l++){
-	  oc_fragment *f;
-	  switch(l){
-	  case 0: 
-	    f = frag;
-	    break;
-	  case 1: /* left */
-	    if(frag == frag0)continue;
-	    f = frag-1;
-	    break;
-	  case 2: /* bottom (top once flipped) */
-	    if(frag0 == frag_top)continue;
-	    f = frag - fplane->nhfrags;
-	    break;
-	  case 3: /* right */
-	    if(frag+1 >= frag_end) continue;
-	    f = frag + 1;
-	    break;
-	  case 4: /* top (bottom once flipped) */
-	    if(frag+fplane->nhfrags >= frag_bot)continue;
-	    f = frag + fplane->nhfrags;
-	    break;
-	  }
-	  
-	  src = f->buffer[_refi];
-	  for(i=0,j=0;j<8;j++){
-	    for(k=0;k<8;k++,i++)
-	      f->loop[i] = src[k];
-	    src+=iplane->stride;
-	  }
-	}
-      }
-#endif
      frag++;
    }
    frag0+=fplane->nhfrags;
--- a/media/libtheora/lib/dec/x86/mmxfrag.c
+++ b/media/libtheora/lib/dec/x86/mmxfrag.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: mmxfrag.c 14345 2008-01-04 18:02:21Z tterribe $
+    last mod: $Id: mmxfrag.c 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

@ -21,6 +21,7 @@
  Note: Loops are unrolled for best performance.
  The iteration each instruction belongs to is marked in the comments as #i.*/
 #include "x86int.h"
+#include <stddef.h>

 #if defined(USE_ASM)

@ -133,8 +134,8 @@ void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
    :[residue]"r"(_residue),
     [dst]"r"(_dst),
     [dst4]"r"(_dst+(_dst_ystride<<2)),
-     [dst_ystride]"r"((long)_dst_ystride),
-     [dst_ystride3]"r"((long)_dst_ystride*3)
+     [dst_ystride]"r"((ptrdiff_t)_dst_ystride),
+     [dst_ystride3]"r"((ptrdiff_t)_dst_ystride*3)
    :"memory"
  );
 }
@ -185,8 +186,8 @@ void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride,
      /*Advance dst.*/
      "lea (%[dst],%[dst_ystride],2),%[dst]\n\t"
      :[residue]"+r"(_residue),[dst]"+r"(_dst),[src]"+r"(_src)
-      :[dst_ystride]"r"((long)_dst_ystride),
-       [src_ystride]"r"((long)_src_ystride)
+      :[dst_ystride]"r"((ptrdiff_t)_dst_ystride),
+       [src_ystride]"r"((ptrdiff_t)_src_ystride)
      :"memory"
    );
  }
@ -278,7 +279,7 @@ void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride,
      "lea (%[dst],%[ystride],2),%[dst]\n\t"
     :[dst]"+r"(_dst),[residue]"+r"(_residue),
      [src1]"+r"(_src1),[src2]"+r"(_src2)
-     :[ystride]"r"((long)_dst_ystride)
+     :[ystride]"r"((ptrdiff_t)_dst_ystride)
     :"memory"
    );
  }
--- a/media/libtheora/lib/dec/x86/mmxidct.c
+++ b/media/libtheora/lib/dec/x86/mmxidct.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: mmxidct.c 14357 2008-01-04 20:05:28Z tterribe $
+    last mod: $Id: mmxidct.c 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

--- a/media/libtheora/lib/dec/x86/mmxstate.c
+++ b/media/libtheora/lib/dec/x86/mmxstate.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: mmxstate.c 14385 2008-01-09 19:53:18Z giles $
+    last mod: $Id: mmxstate.c 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

@ -19,6 +19,7 @@
  Originally written by Rudolf Marek.*/
 #include "x86int.h"
 #include "../../internal.h"
+#include <stddef.h>

 #if defined(USE_ASM)

@ -182,9 +183,9 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
  const int *fragi;
  const int *fragi_end;
  int        dst_framei;
-  long       dst_ystride;
+  ptrdiff_t  dst_ystride;
  int        src_framei;
-  long       src_ystride;
+  ptrdiff_t  src_ystride;
  dst_framei=_state->ref_frame_idx[_dst_frame];
  src_framei=_state->ref_frame_idx[_src_frame];
  dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
@ -194,14 +195,14 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
    oc_fragment   *frag;
    unsigned char *dst;
    unsigned char *src;
-    long           esi;
+    ptrdiff_t      s;
    frag=_state->frags+*fragi;
    dst=frag->buffer[dst_framei];
    src=frag->buffer[src_framei];
    __asm__ __volatile__(
      /*src+0*src_ystride*/
      "movq (%[src]),%%mm0\n\t"
-      /*esi=src_ystride*3*/
+      /*s=src_ystride*3*/
      "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t"
      /*src+1*src_ystride*/
      "movq (%[src],%[src_ystride]),%%mm1\n\t"
@ -211,7 +212,7 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
      "movq (%[src],%[s]),%%mm3\n\t"
      /*dst+0*dst_ystride*/
      "movq %%mm0,(%[dst])\n\t"
-      /*esi=dst_ystride*3*/
+      /*s=dst_ystride*3*/
      "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t"
      /*dst+1*dst_ystride*/
      "movq %%mm1,(%[dst],%[dst_ystride])\n\t"
@ -225,7 +226,7 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
      "lea (%[dst],%[dst_ystride],4),%[dst]\n\t"
      /*src+0*src_ystride*/
      "movq (%[src]),%%mm0\n\t"
-      /*esi=src_ystride*3*/
+      /*s=src_ystride*3*/
      "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t"
      /*src+1*src_ystride*/
      "movq (%[src],%[src_ystride]),%%mm1\n\t"
@ -235,7 +236,7 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
      "movq (%[src],%[s]),%%mm3\n\t"
      /*dst+0*dst_ystride*/
      "movq %%mm0,(%[dst])\n\t"
-      /*esi=dst_ystride*3*/
+      /*s=dst_ystride*3*/
      "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t"
      /*dst+1*dst_ystride*/
      "movq %%mm1,(%[dst],%[dst_ystride])\n\t"
@ -243,7 +244,7 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
      "movq %%mm2,(%[dst],%[dst_ystride],2)\n\t"
      /*dst+3*dst_ystride*/
      "movq %%mm3,(%[dst],%[s])\n\t"
-      :[s]"=&S"(esi)
+      :[s]"=&r"(s)
      :[dst]"r"(dst),[src]"r"(src),[dst_ystride]"r"(dst_ystride),
       [src_ystride]"r"(src_ystride)
      :"memory"
@ -255,12 +256,12 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,

 static void loop_filter_v(unsigned char *_pix,int _ystride,
 const ogg_int16_t *_ll){
-  long esi;
+  ptrdiff_t s;
  _pix-=_ystride*2;
  __asm__ __volatile__(
    /*mm0=0*/
    "pxor %%mm0,%%mm0\n\t"
-    /*esi=_ystride*3*/
+    /*s=_ystride*3*/
    "lea (%[ystride],%[ystride],2),%[s]\n\t"
    /*mm7=_pix[0...8]*/
    "movq (%[pix]),%%mm7\n\t"
@ -427,8 +428,8 @@ static void loop_filter_v(unsigned char *_pix,int _ystride,
    /*Write it back out.*/
    "movq %%mm4,(%[pix],%[ystride])\n\t"
    "movq %%mm1,(%[pix],%[ystride],2)\n\t"
-    :[s]"=&S"(esi)
-    :[pix]"r"(_pix),[ystride]"r"((long)_ystride),[ll]"r"(_ll)
+    :[s]"=&r"(s)
+    :[pix]"r"(_pix),[ystride]"r"((ptrdiff_t)_ystride),[ll]"r"(_ll)
    :"memory"
  );
 }
@ -437,14 +438,16 @@ static void loop_filter_v(unsigned char *_pix,int _ystride,
  Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
   four p0's to one register we must transpose the values in four mmx regs.
  When half is done we repeat this for the rest.*/
-static void loop_filter_h4(unsigned char *_pix,long _ystride,
+static void loop_filter_h4(unsigned char *_pix,ptrdiff_t _ystride,
 const ogg_int16_t *_ll){
-  long esi;
-  long edi;
+  ptrdiff_t s;
+  /*d doesn't technically need to be 64-bit on x86-64, but making it so will
+     help avoid partial register stalls.*/
+  ptrdiff_t d;
  __asm__ __volatile__(
    /*x x x x 3 2 1 0*/
    "movd (%[pix]),%%mm0\n\t"
-    /*esi=_ystride*3*/
+    /*s=_ystride*3*/
    "lea (%[ystride],%[ystride],2),%[s]\n\t"
    /*x x x x 7 6 5 4*/
    "movd (%[pix],%[ystride]),%%mm1\n\t"
@ -557,19 +560,19 @@ static void loop_filter_h4(unsigned char *_pix,long _ystride,
    "packuswb %%mm7,%%mm4\n\t"
    /*mm5=E D A 9 6 5 2 1*/
    "punpcklbw %%mm4,%%mm5\n\t"
-    /*edi=6 5 2 1*/
-    "movd %%mm5,%%edi\n\t"
-    "movw %%di,1(%[pix])\n\t"
+    /*d=6 5 2 1*/
+    "movd %%mm5,%[d]\n\t"
+    "movw %w[d],1(%[pix])\n\t"
    /*Why is there such a big stall here?*/
    "psrlq $32,%%mm5\n\t"
-    "shrl $16,%%edi\n\t"
-    "movw %%di,1(%[pix],%[ystride])\n\t"
-    /*edi=E D A 9*/
-    "movd %%mm5,%%edi\n\t"
-    "movw %%di,1(%[pix],%[ystride],2)\n\t"
-    "shrl $16,%%edi\n\t"
-    "movw %%di,1(%[pix],%[s])\n\t"
-    :[s]"=&S"(esi),[d]"=&D"(edi),
+    "shr $16,%[d]\n\t"
+    "movw %w[d],1(%[pix],%[ystride])\n\t"
+    /*d=E D A 9*/
+    "movd %%mm5,%[d]\n\t"
+    "movw %w[d],1(%[pix],%[ystride],2)\n\t"
+    "shr $16,%[d]\n\t"
+    "movw %w[d],1(%[pix],%[s])\n\t"
+    :[s]"=&r"(s),[d]"=&r"(d),
     [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll)
    :
    :"memory"
--- a/media/libtheora/lib/dec/x86/x86int.h
+++ b/media/libtheora/lib/dec/x86/x86int.h
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: x86int.h 14375 2008-01-06 05:37:33Z tterribe $
+    last mod: $Id: x86int.h 15400 2008-10-15 12:10:58Z tterribe $

 ********************************************************************/

--- a/media/libtheora/lib/dec/x86/x86state.c
+++ b/media/libtheora/lib/dec/x86/x86state.c
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: x86state.c 13884 2007-09-22 08:38:10Z giles $
+    last mod: $Id: x86state.c 15427 2008-10-21 02:36:19Z xiphmont $

 ********************************************************************/

@ -19,7 +19,7 @@

 #if defined(USE_ASM)

-#include "../../cpu.h"
+#include "../../cpu.c"

 void oc_state_vtable_init_x86(oc_theora_state *_state){
  _state->cpu_flags=oc_cpu_flags_get();
--- a/media/libtheora/lib/dec/x86_vc/mmxfrag.c
+++ b/media/libtheora/lib/dec/x86_vc/mmxfrag.c
@ -0,0 +1,214 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id:
+
+ ********************************************************************/
+#include "../../internal.h"
+
+/* ------------------------------------------------------------------------
+  MMX reconstruction fragment routines for Visual Studio.
+  Tested with VS2005. Should compile for VS2003 and VC6 as well.
+
+  Initial implementation 2007 by Nils Pipenbrinck.
+  ---------------------------------------------------------------------*/
+
+#if defined(USE_ASM)
+
+void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
+ const ogg_int16_t *_residue){
+  /* ---------------------------------------------------------------------
+  This function does the inter reconstruction step with 8 iterations
+  unrolled. The iteration for each instruction is noted by the #id in the
+  comments (in case you want to reconstruct it)
+  --------------------------------------------------------------------- */
+  _asm{
+    mov       edi, [_residue]     /* load residue ptr     */
+    mov       eax, 0x00800080     /* generate constant    */
+    mov       ebx, [_dst_ystride] /* load dst-stride      */
+    mov       edx, [_dst]         /* load dest pointer    */
+
+    /* unrolled loop begins here */
+
+    movd      mm0, eax            /* load constant        */
+    movq      mm1, [edi+ 8*0]     /* #1 load low residue  */
+    movq      mm2, [edi+ 8*1]     /* #1 load high residue */
+    punpckldq mm0, mm0            /* build constant       */
+    movq      mm3, [edi+ 8*2]     /* #2 load low residue  */
+    movq      mm4, [edi+ 8*3]     /* #2 load high residue */
+    movq      mm5, [edi+ 8*4]     /* #3 load low residue  */
+    movq      mm6, [edi+ 8*5]     /* #3 load high residue */
+    paddsw    mm1, mm0            /* #1 bias low  residue */
+    paddsw    mm2, mm0            /* #1 bias high residue */
+    packuswb  mm1, mm2            /* #1 pack to byte      */
+    paddsw    mm3, mm0            /* #2 bias low  residue */
+    paddsw    mm4, mm0            /* #2 bias high residue */
+    packuswb  mm3, mm4            /* #2 pack to byte      */
+    paddsw    mm5, mm0            /* #3 bias low  residue */
+    paddsw    mm6, mm0            /* #3 bias high residue */
+    packuswb  mm5, mm6            /* #3 pack to byte      */
+    movq      [edx], mm1          /* #1 write row         */
+    movq      [edx + ebx], mm3    /* #2 write row         */
+    movq      [edx + ebx*2], mm5  /* #3 write row         */
+    movq      mm1, [edi+ 8*6]     /* #4 load low residue  */
+    lea       ecx, [ebx + ebx*2]  /* make dst_ystride * 3 */
+    movq      mm2, [edi+ 8*7]     /* #4 load high residue */
+    movq      mm3, [edi+ 8*8]     /* #5 load low residue  */
+    lea       esi, [ebx*4 + ebx]  /* make dst_ystride * 5 */
+    movq      mm4, [edi+ 8*9]     /* #5 load high residue */
+    movq      mm5, [edi+ 8*10]    /* #6 load low residue  */
+    lea       eax, [ecx*2 + ebx]  /* make dst_ystride * 7 */
+    movq      mm6, [edi+ 8*11]    /* #6 load high residue */
+    paddsw    mm1, mm0            /* #4 bias low  residue */
+    paddsw    mm2, mm0            /* #4 bias high residue */
+    packuswb  mm1, mm2            /* #4 pack to byte      */
+    paddsw    mm3, mm0            /* #5 bias low  residue */
+    paddsw    mm4, mm0            /* #5 bias high residue */
+    packuswb  mm3, mm4            /* #5 pack to byte      */
+    paddsw    mm5, mm0            /* #6 bias low  residue */
+    paddsw    mm6, mm0            /* #6 bias high residue */
+    packuswb  mm5, mm6            /* #6 pack to byte      */
+    movq      [edx + ecx], mm1    /* #4 write row         */
+    movq      [edx + ebx*4], mm3  /* #5 write row         */
+    movq      [edx + esi], mm5    /* #6 write row         */
+    movq      mm1, [edi+ 8*12]    /* #7 load low residue  */
+    movq      mm2, [edi+ 8*13]    /* #7 load high residue */
+    movq      mm3, [edi+ 8*14]    /* #8 load low residue  */
+    movq      mm4, [edi+ 8*15]    /* #8 load high residue */
+    paddsw    mm1, mm0            /* #7 bias low  residue */
+    paddsw    mm2, mm0            /* #7 bias high residue */
+    packuswb  mm1, mm2            /* #7 pack to byte      */
+    paddsw    mm3, mm0            /* #8 bias low  residue */
+    paddsw    mm4, mm0            /* #8 bias high residue */
+    packuswb  mm3, mm4            /* #8 pack to byte      */
+    movq      [edx + ecx*2], mm1  /* #7 write row         */
+    movq      [edx + eax], mm3    /* #8 write row         */
+  }
+}
+
+
+
+void oc_frag_recon_inter_mmx (unsigned char *_dst, int _dst_ystride,
+ const unsigned char *_src, int _src_ystride, const ogg_int16_t *_residue){
+  /* ---------------------------------------------------------------------
+  This function does the inter reconstruction step with two iterations
+  running in parallel to hide some load-latencies and break the dependency
+  chains. The iteration for each instruction is noted by the #id in the
+  comments (in case you want to reconstruct it)
+  --------------------------------------------------------------------- */
+  _asm{
+    pxor      mm0, mm0          /* generate constant 0 */
+    mov       esi, [_src]
+    mov       edi, [_residue]
+    mov       eax, [_src_ystride]
+    mov       edx, [_dst]
+    mov       ebx, [_dst_ystride]
+    mov       ecx, 4
+
+    align 16
+
+nextchunk:
+    movq      mm3, [esi]        /* #1 load source        */
+    movq      mm1, [edi+0]      /* #1 load residium low  */
+    movq      mm2, [edi+8]      /* #1 load residium high */
+    movq      mm7, [esi+eax]    /* #2 load source        */
+    movq      mm4, mm3          /* #1 get copy of src    */
+    movq      mm5, [edi+16]     /* #2 load residium low  */
+    punpckhbw mm4, mm0          /* #1 expand high source */
+    movq      mm6, [edi+24]     /* #2 load residium high */
+    punpcklbw mm3, mm0          /* #1 expand low  source */
+    paddsw    mm4, mm2          /* #1 add residium high  */
+    movq      mm2, mm7          /* #2 get copy of src    */
+    paddsw    mm3, mm1          /* #1 add residium low   */
+    punpckhbw mm2, mm0          /* #2 expand high source */
+    packuswb  mm3, mm4          /* #1 final row pixels   */
+    punpcklbw mm7, mm0          /* #2 expand low  source */
+    movq      [edx], mm3        /* #1 write row          */
+    paddsw    mm2, mm6          /* #2 add residium high  */
+    add       edi, 32           /* residue += 4          */
+    paddsw    mm7, mm5          /* #2 add residium low   */
+    sub       ecx, 1            /* update loop counter   */
+    packuswb  mm7, mm2          /* #2 final row          */
+    lea       esi, [esi+eax*2]  /* src += stride * 2     */
+    movq      [edx + ebx], mm7  /* #2 write row          */
+    lea       edx, [edx+ebx*2]  /* dst += stride * 2     */
+    jne       nextchunk
+  }
+}
+
+
+void oc_frag_recon_inter2_mmx(unsigned char *_dst,  int _dst_ystride,
+ const unsigned char *_src1,  int _src1_ystride, const unsigned char *_src2,
+ int _src2_ystride,const ogg_int16_t *_residue){
+  /* ---------------------------------------------------------------------
+  This function does the inter2 reconstruction step.The building of the
+  average is done with a bit-twiddeling trick to avoid excessive register
+  copy work during byte to word conversion.
+
+              average = (a & b) + (((a ^ b) & 0xfe) >> 1);
+
+  (shown for a single byte; it's done with 8 of them at a time)
+
+  Slightly faster than the obvious method using add and shift, but not
+  earthshaking improvement either.
+
+  If anyone comes up with a way that produces bit-identical outputs
+  using the pavgb instruction let me know and I'll do the 3dnow codepath.
+  --------------------------------------------------------------------- */
+ _asm{
+   mov        eax, 0xfefefefe
+   mov        esi, [_src1]
+   mov        edi, [_src2]
+   movd       mm1, eax
+   mov        ebx, [_residue]
+   mov        edx, [_dst]
+   mov        eax, [_dst_ystride]
+   punpckldq  mm1, mm1            /* replicate lsb32     */
+   mov        ecx, 8              /* init loop counter   */
+   pxor       mm0, mm0            /* constant zero       */
+   sub        edx, eax            /* dst -= dst_stride   */
+
+   align      16
+
+nextrow:
+   movq       mm2,  [esi]         /* load source1        */
+   movq       mm3,  [edi]         /* load source2        */
+   movq       mm5,  [ebx + 0]     /* load lower residue  */
+   movq       mm6,  [ebx + 8]     /* load higer residue  */
+   add        esi,  _src1_ystride /* src1 += src1_stride */
+   add        edi,  _src2_ystride /* src2 += src1_stride */
+   movq       mm4,  mm2           /* get copy of source1 */
+   pand       mm2,  mm3           /* s1 & s2 (avg part)  */
+   pxor       mm3,  mm4           /* s1 ^ s2 (avg part)  */
+   add        ebx,  16            /* residue++           */
+   pand       mm3,  mm1           /* mask out low bits   */
+   psrlq      mm3,  1             /* shift xor avg-part  */
+   paddd      mm3,  mm2           /* build final average */
+   add        edx,  eax           /* dst += dst_stride   */
+   movq       mm2,  mm3           /* get copy of average */
+   punpckhbw  mm3,  mm0           /* average high        */
+   punpcklbw  mm2,  mm0           /* average low         */
+   paddsw     mm3,  mm6           /* high + residue      */
+   paddsw     mm2,  mm5           /* low  + residue      */
+   sub        ecx,  1             /* update loop counter */
+   packuswb   mm2,  mm3           /* pack and saturate   */
+   movq       [edx], mm2          /* write row           */
+   jne        nextrow
+ }
+}
+
+void oc_restore_fpu_mmx(void){
+  _asm { emms }
+}
+
+#endif
--- a/media/libtheora/lib/dec/x86_vc/mmxidct.c
+++ b/media/libtheora/lib/dec/x86_vc/mmxidct.c
--- a/media/libtheora/lib/dec/x86_vc/mmxloopfilter.c
+++ b/media/libtheora/lib/dec/x86_vc/mmxloopfilter.c
@ -0,0 +1,377 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id:
+
+ ********************************************************************/
+
+/* -------------------------------------------------------------------
+  MMX based loop filter for the theora codec.
+
+  Originally written by Rudolf Marek, based on code from On2's VP3.
+  Converted to Visual Studio inline assembly by Nils Pipenbrinck.
+
+  Note: I can't test these since my example files never get into the
+  loop filters, but the code has been converted semi-automatic from
+  the GCC sources, so it ought to work.
+  ---------------------------------------------------------------------*/
+#include "../../internal.h"
+#include "x86int.h"
+#include <mmintrin.h>
+
+#if defined(USE_ASM)
+
+
+
+static void loop_filter_v(unsigned char *_pix,int _ystride,
+                          const ogg_int16_t *_ll){
+  _asm {
+    mov       eax,  [_pix]
+    mov       edx,  [_ystride]
+    mov       ebx,  [_ll]
+
+    /* _pix -= ystride */
+    sub       eax,   edx
+    /*  mm0=0          */
+    pxor      mm0,   mm0
+    /* _pix -= ystride */
+    sub       eax,   edx
+    /*  esi=_ystride*3 */
+    lea       esi, [edx + edx*2]
+
+    /*  mm7=_pix[0...8]*/
+    movq      mm7, [eax]
+    /*  mm4=_pix[0...8+_ystride*3]*/
+    movq      mm4, [eax + esi]
+    /*  mm6=_pix[0...8]*/
+    movq      mm6, mm7
+    /*  Expand unsigned _pix[0...3] to 16 bits.*/
+    punpcklbw mm6, mm0
+    movq      mm5, mm4
+    /*  Expand unsigned _pix[4...7] to 16 bits.*/
+    punpckhbw mm7, mm0
+    punpcklbw mm4, mm0
+    /*  Expand other arrays too.*/
+    punpckhbw mm5, mm0
+    /*mm7:mm6=_p[0...7]-_p[0...7+_ystride*3]:*/
+    psubw     mm6, mm4
+    psubw     mm7, mm5
+    /*mm5=mm4=_pix[0...7+_ystride]*/
+    movq      mm4, [eax + edx]
+    /*mm1=mm3=mm2=_pix[0..7]+_ystride*2]*/
+    movq      mm2, [eax + edx*2]
+    movq      mm5, mm4
+    movq      mm3, mm2
+    movq      mm1, mm2
+    /*Expand these arrays.*/
+    punpckhbw mm5, mm0
+    punpcklbw mm4, mm0
+    punpckhbw mm3, mm0
+    punpcklbw mm2, mm0
+    pcmpeqw   mm0, mm0
+    /*mm0=3 3 3 3
+    mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/
+    psubw     mm3, mm5
+    psrlw     mm0, 14
+    psubw     mm2, mm4
+    /*Scale by 3.*/
+    pmullw    mm3, mm0
+    pmullw    mm2, mm0
+    /*mm0=4 4 4 4
+    f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+
+     3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/
+    psrlw     mm0, 1
+    paddw     mm3, mm7
+    psllw     mm0, 2
+    paddw     mm2, mm6
+    /*Add 4.*/
+    paddw     mm3, mm0
+    paddw     mm2, mm0
+    /*"Divide" by 8.*/
+    psraw     mm3, 3
+    psraw     mm2, 3
+    /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/
+    /*Free up mm5.*/
+    packuswb  mm4, mm5
+    /*mm0=L L L L*/
+    movq      mm0, [ebx]
+    /*if(R_i<-2L||R_i>2L)R_i=0:*/
+    movq      mm5, mm2
+    pxor      mm6, mm6
+    movq      mm7, mm0
+    psubw     mm6, mm0
+    psllw     mm7, 1
+    psllw     mm6, 1
+    /*mm2==R_3 R_2 R_1 R_0*/
+    /*mm5==R_3 R_2 R_1 R_0*/
+    /*mm6==-2L -2L -2L -2L*/
+    /*mm7==2L 2L 2L 2L*/
+    pcmpgtw   mm7, mm2
+    pcmpgtw   mm5, mm6
+    pand      mm2, mm7
+    movq      mm7, mm0
+    pand      mm2, mm5
+    psllw     mm7, 1
+    movq      mm5, mm3
+    /*mm3==R_7 R_6 R_5 R_4*/
+    /*mm5==R_7 R_6 R_5 R_4*/
+    /*mm6==-2L -2L -2L -2L*/
+    /*mm7==2L 2L 2L 2L*/
+    pcmpgtw   mm7, mm3
+    pcmpgtw   mm5, mm6
+    pand      mm3, mm7
+    movq      mm7, mm0
+    pand      mm3, mm5
+   /*if(R_i<-L)R_i'=R_i+2L;
+     if(R_i>L)R_i'=R_i-2L;
+     if(R_i<-L||R_i>L)R_i=-R_i':*/
+    psraw     mm6, 1
+    movq      mm5, mm2
+    psllw     mm7, 1
+    /*mm2==R_3 R_2 R_1 R_0*/
+    /*mm5==R_3 R_2 R_1 R_0*/
+    /*mm6==-L -L -L -L*/
+    /*mm0==L L L L*/
+    /*mm5=R_i>L?FF:00*/
+    pcmpgtw   mm5, mm0
+    /*mm6=-L>R_i?FF:00*/
+    pcmpgtw   mm6, mm2
+    /*mm7=R_i>L?2L:0*/
+    pand      mm7, mm5
+    /*mm2=R_i>L?R_i-2L:R_i*/
+    psubw     mm2, mm7
+    movq      mm7, mm0
+    /*mm5=-L>R_i||R_i>L*/
+    por       mm5, mm6
+    psllw     mm7, 1
+    /*mm7=-L>R_i?2L:0*/
+    pand      mm7, mm6
+    pxor      mm6, mm6
+    /*mm2=-L>R_i?R_i+2L:R_i*/
+    paddw     mm2, mm7
+    psubw     mm6, mm0
+    /*mm5=-L>R_i||R_i>L?-R_i':0*/
+    pand      mm5, mm2
+    movq      mm7, mm0
+    /*mm2=-L>R_i||R_i>L?0:R_i*/
+    psubw     mm2, mm5
+    psllw     mm7, 1
+    /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
+    psubw     mm2, mm5
+    movq      mm5, mm3
+    /*mm3==R_7 R_6 R_5 R_4*/
+    /*mm5==R_7 R_6 R_5 R_4*/
+    /*mm6==-L -L -L -L*/
+    /*mm0==L L L L*/
+    /*mm6=-L>R_i?FF:00*/
+    pcmpgtw   mm6, mm3
+    /*mm5=R_i>L?FF:00*/
+    pcmpgtw   mm5, mm0
+    /*mm7=R_i>L?2L:0*/
+    pand      mm7, mm5
+    /*mm2=R_i>L?R_i-2L:R_i*/
+    psubw     mm3, mm7
+    psllw     mm0, 1
+    /*mm5=-L>R_i||R_i>L*/
+    por       mm5, mm6
+    /*mm0=-L>R_i?2L:0*/
+    pand      mm0, mm6
+    /*mm3=-L>R_i?R_i+2L:R_i*/
+    paddw     mm3, mm0
+    /*mm5=-L>R_i||R_i>L?-R_i':0*/
+    pand      mm5, mm3
+    /*mm2=-L>R_i||R_i>L?0:R_i*/
+    psubw     mm3, mm5
+    /*mm3=-L>R_i||R_i>L?-R_i':R_i*/
+    psubw     mm3, mm5
+    /*Unfortunately, there's no unsigned byte+signed byte with unsigned
+       saturation op code, so we have to promote things back 16 bits.*/
+    pxor      mm0, mm0
+    movq      mm5, mm4
+    punpcklbw mm4, mm0
+    punpckhbw mm5, mm0
+    movq      mm6, mm1
+    punpcklbw mm1, mm0
+    punpckhbw mm6, mm0
+    /*_pix[0...8+_ystride]+=R_i*/
+    paddw     mm4, mm2
+    paddw     mm5, mm3
+    /*_pix[0...8+_ystride*2]-=R_i*/
+    psubw     mm1, mm2
+    psubw     mm6, mm3
+    packuswb  mm4, mm5
+    packuswb  mm1, mm6
+    /*Write it back out.*/
+    movq    [eax + edx], mm4
+    movq    [eax + edx*2], mm1
+  }
+}
+
+/*This code implements the bulk of loop_filter_h().
+  Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
+   four p0's to one register we must transpose the values in four mmx regs.
+  When half is done we repeat this for the rest.*/
+static void loop_filter_h4(unsigned char *_pix,long _ystride,
+                           const ogg_int16_t *_ll){
+  /* todo: merge the comments from the GCC sources */
+  _asm {
+    mov   ecx, [_pix]
+    mov   edx, [_ystride]
+    mov   eax, [_ll]
+    /*esi=_ystride*3*/
+    lea     esi, [edx + edx*2]
+
+    movd    mm0, dword ptr [ecx]
+    movd    mm1, dword ptr [ecx + edx]
+    movd    mm2, dword ptr [ecx + edx*2]
+    movd    mm3, dword ptr [ecx + esi]
+    punpcklbw mm0, mm1
+    punpcklbw mm2, mm3
+    movq    mm1, mm0
+    punpckhwd mm0, mm2
+    punpcklwd mm1, mm2
+    pxor    mm7, mm7
+    movq    mm5, mm1
+    punpcklbw mm1, mm7
+    punpckhbw mm5, mm7
+    movq    mm3, mm0
+    punpcklbw mm0, mm7
+    punpckhbw mm3, mm7
+    psubw   mm1, mm3
+    movq    mm4, mm0
+    pcmpeqw mm2, mm2
+    psubw   mm0, mm5
+    psrlw   mm2, 14
+    pmullw  mm0, mm2
+    psrlw   mm2, 1
+    paddw   mm0, mm1
+    psllw   mm2, 2
+    paddw   mm0, mm2
+    psraw   mm0, 3
+    movq    mm6, qword ptr [eax]
+    movq    mm1, mm0
+    pxor    mm2, mm2
+    movq    mm3, mm6
+    psubw   mm2, mm6
+    psllw   mm3, 1
+    psllw   mm2, 1
+    pcmpgtw mm3, mm0
+    pcmpgtw mm1, mm2
+    pand    mm0, mm3
+    pand    mm0, mm1
+    psraw   mm2, 1
+    movq    mm1, mm0
+    movq    mm3, mm6
+    pcmpgtw mm2, mm0
+    pcmpgtw mm1, mm6
+    psllw   mm3, 1
+    psllw   mm6, 1
+    pand    mm3, mm1
+    pand    mm6, mm2
+    psubw   mm0, mm3
+    por     mm1, mm2
+    paddw   mm0, mm6
+    pand    mm1, mm0
+    psubw   mm0, mm1
+    psubw   mm0, mm1
+    paddw   mm5, mm0
+    psubw   mm4, mm0
+    packuswb mm5, mm7
+    packuswb mm4, mm7
+    punpcklbw mm5, mm4
+    movd    edi, mm5
+    mov     word ptr [ecx + 01H], di
+    psrlq   mm5, 32
+    shr     edi, 16
+    mov     word ptr [ecx + edx + 01H], di
+    movd    edi, mm5
+    mov     word ptr [ecx + edx*2 + 01H], di
+    shr     edi, 16
+    mov     word ptr [ecx + esi + 01H], di
+  }
+}
+
+static void loop_filter_h(unsigned char *_pix,int _ystride,
+                          const ogg_int16_t *_ll){
+  _pix-=2;
+  loop_filter_h4(_pix,_ystride,_ll);
+  loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll);
+}
+
+
+/*We copy the whole function because the MMX routines will be inlined 4 times,
+   and we can do just a single emms call at the end this way.
+  We also do not use the _bv lookup table, instead computing the values that
+   would lie in it on the fly.*/
+
+/*Apply the loop filter to a given set of fragment rows in the given plane.
+  The filter may be run on the bottom edge, affecting pixels in the next row of
+   fragments, so this row also needs to be available.
+  _bv:        The bounding values array.
+  _refi:      The index of the frame buffer to filter.
+  _pli:       The color plane to filter.
+  _fragy0:    The Y coordinate of the first fragment row to filter.
+  _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
+void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
+ int _refi,int _pli,int _fragy0,int _fragy_end){
+  ogg_int16_t __declspec(align(8))        ll[4];
+  th_img_plane                            *iplane;
+  oc_fragment_plane                       *fplane;
+  oc_fragment                             *frag_top;
+  oc_fragment                             *frag0;
+  oc_fragment                             *frag;
+  oc_fragment                             *frag_end;
+  oc_fragment                             *frag0_end;
+  oc_fragment                             *frag_bot;
+  ll[0]=ll[1]=ll[2]=ll[3]=
+   (ogg_int16_t)_state->loop_filter_limits[_state->qis[0]];
+  iplane=_state->ref_frame_bufs[_refi]+_pli;
+  fplane=_state->fplanes+_pli;
+  /*The following loops are constructed somewhat non-intuitively on purpose.
+    The main idea is: if a block boundary has at least one coded fragment on
+     it, the filter is applied to it.
+    However, the order that the filters are applied in matters, and VP3 chose
+     the somewhat strange ordering used below.*/
+  frag_top=_state->frags+fplane->froffset;
+  frag0=frag_top+_fragy0*fplane->nhfrags;
+  frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags;
+  frag_bot=_state->frags+fplane->froffset+fplane->nfrags;
+  while(frag0<frag0_end){
+    frag=frag0;
+    frag_end=frag+fplane->nhfrags;
+    while(frag<frag_end){
+      if(frag->coded){
+        if(frag>frag0){
+          loop_filter_h(frag->buffer[_refi],iplane->stride,ll);
+        }
+        if(frag0>frag_top){
+          loop_filter_v(frag->buffer[_refi],iplane->stride,ll);
+        }
+        if(frag+1<frag_end&&!(frag+1)->coded){
+          loop_filter_h(frag->buffer[_refi]+8,iplane->stride,ll);
+        }
+        if(frag+fplane->nhfrags<frag_bot&&!(frag+fplane->nhfrags)->coded){
+          loop_filter_v((frag+fplane->nhfrags)->buffer[_refi],
+           iplane->stride,ll);
+        }
+      }
+      frag++;
+    }
+    frag0+=fplane->nhfrags;
+  }
+
+  /*This needs to be removed when decode specific functions are implemented:*/
+  _mm_empty();
+}
+
+#endif
--- a/media/libtheora/lib/dec/x86_vc/mmxstate.c
+++ b/media/libtheora/lib/dec/x86_vc/mmxstate.c
@ -0,0 +1,189 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: mmxstate.c 15400 2008-10-15 12:10:58Z tterribe $
+
+ ********************************************************************/
+
+/* ------------------------------------------------------------------------
+  MMX acceleration of complete fragment reconstruction algorithm.
+    Originally written by Rudolf Marek.
+
+  Conversion to MSC intrinsics by Nils Pipenbrinck.
+  ---------------------------------------------------------------------*/
+#if defined(USE_ASM)
+
+#include "../../internal.h"
+#include "../idct.h"
+#include "x86int.h"
+#include <mmintrin.h>
+
+static const unsigned char OC_FZIG_ZAGMMX[64]=
+{
+   0, 8, 1, 2, 9,16,24,17,
+  10, 3,32,11,18,25, 4,12,
+   5,26,19,40,33,34,41,48,
+  27, 6,13,20,28,21,14, 7,
+  56,49,42,35,43,50,57,36,
+  15,22,29,30,23,44,37,58,
+  51,59,38,45,52,31,60,53,
+  46,39,47,54,61,62,55,63
+};
+
+/* Fill a block with value */
+static __inline void loc_fill_mmx_value (__m64 * _dst, __m64 _value){
+  __m64 t   = _value;
+  _dst[0]  = t;  _dst[1]  = t;  _dst[2]  = t;  _dst[3]  = t;
+  _dst[4]  = t;  _dst[5]  = t;  _dst[6]  = t;  _dst[7]  = t;
+  _dst[8]  = t;  _dst[9]  = t;  _dst[10] = t;  _dst[11] = t;
+  _dst[12] = t;  _dst[13] = t;  _dst[14] = t;  _dst[15] = t;
+}
+
+/* copy a block of 8 byte elements using different strides */
+static __inline void loc_blockcopy_mmx (unsigned char * _dst, int _dst_ystride,
+                                        unsigned char * _src, int _src_ystride){
+  __m64 a,b,c,d,e,f,g,h;
+  a = *(__m64*)(_src + 0 * _src_ystride);
+  b = *(__m64*)(_src + 1 * _src_ystride);
+  c = *(__m64*)(_src + 2 * _src_ystride);
+  d = *(__m64*)(_src + 3 * _src_ystride);
+  e = *(__m64*)(_src + 4 * _src_ystride);
+  f = *(__m64*)(_src + 5 * _src_ystride);
+  g = *(__m64*)(_src + 6 * _src_ystride);
+  h = *(__m64*)(_src + 7 * _src_ystride);
+  *(__m64*)(_dst + 0 * _dst_ystride) = a;
+  *(__m64*)(_dst + 1 * _dst_ystride) = b;
+  *(__m64*)(_dst + 2 * _dst_ystride) = c;
+  *(__m64*)(_dst + 3 * _dst_ystride) = d;
+  *(__m64*)(_dst + 4 * _dst_ystride) = e;
+  *(__m64*)(_dst + 5 * _dst_ystride) = f;
+  *(__m64*)(_dst + 6 * _dst_ystride) = g;
+  *(__m64*)(_dst + 7 * _dst_ystride) = h;
+}
+
+void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
+ ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
+  ogg_int16_t __declspec(align(16)) res_buf[64];
+  int dst_framei;
+  int dst_ystride;
+  int zzi;
+  /*_last_zzi is subtly different from an actual count of the number of
+     coefficients we decoded for this block.
+    It contains the value of zzi BEFORE the final token in the block was
+     decoded.
+    In most cases this is an EOB token (the continuation of an EOB run from a
+     previous block counts), and so this is the same as the coefficient count.
+    However, in the case that the last token was NOT an EOB token, but filled
+     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
+    Provided the last token was not a pure zero run, the minimum value it can
+     be is 46, and so that doesn't affect any of the cases in this routine.
+    However, if the last token WAS a pure zero run of length 63, then _last_zzi
+     will be 1 while the number of coefficients decoded is 64.
+    Thus, we will trigger the following special case, where the real
+     coefficient count would not.
+    Note also that a zero run of length 64 will give _last_zzi a value of 0,
+     but we still process the DC coefficient, which might have a non-zero value
+     due to DC prediction.
+    Although convoluted, this is arguably the correct behavior: it allows us to
+     dequantize fewer coefficients and use a smaller transform when the block
+     ends with a long zero run instead of a normal EOB token.
+    It could be smarter... multiple separate zero runs at the end of a block
+     will fool it, but an encoder that generates these really deserves what it
+     gets.
+    Needless to say we inherited this approach from VP3.*/
+  /*Special case only having a DC component.*/
+  if(_last_zzi<2){
+    __m64 p;
+    /*Why is the iquant product rounded in this case and no others? Who knows.*/
+    p = _m_from_int((ogg_int32_t)_frag->dc*_dc_iquant+15>>5);
+    /* broadcast 16 bits into all 4 mmx subregisters */
+    p = _m_punpcklwd (p,p);
+    p = _m_punpckldq (p,p);
+    loc_fill_mmx_value ((__m64 *)res_buf, p);
+  }
+  else{
+    /*Then, fill in the remainder of the coefficients with 0's, and perform
+       the iDCT.*/
+    /*First zero the buffer.*/
+    /*On K7, etc., this could be replaced with movntq and sfence.*/
+    loc_fill_mmx_value ((__m64 *)res_buf, _mm_setzero_si64());
+
+    res_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant);
+    /*This is planned to be rewritten in MMX.*/
+    for(zzi=1;zzi<_ncoefs;zzi++)
+    {
+      int ci;
+      ci=OC_FZIG_ZAG[zzi];
+      res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*
+       _ac_iquant[ci]);
+    }
+
+    if(_last_zzi<10){
+      oc_idct8x8_10_mmx(res_buf);
+    }
+    else {
+      oc_idct8x8_mmx(res_buf);
+    }
+  }
+  /*Fill in the target buffer.*/
+  dst_framei=_state->ref_frame_idx[OC_FRAME_SELF];
+  dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
+  /*For now ystride values in all ref frames assumed to be equal.*/
+  if(_frag->mbmode==OC_MODE_INTRA){
+    oc_frag_recon_intra_mmx(_frag->buffer[dst_framei],dst_ystride,res_buf);
+  }
+  else{
+    int ref_framei;
+    int ref_ystride;
+    int mvoffsets[2];
+    ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]];
+    ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].stride;
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_frag->mv[0],
+     _frag->mv[1],ref_ystride,_pli)>1){
+      oc_frag_recon_inter2_mmx(_frag->buffer[dst_framei],dst_ystride,
+       _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,
+       _frag->buffer[ref_framei]+mvoffsets[1],ref_ystride,res_buf);
+    }
+    else{
+      oc_frag_recon_inter_mmx(_frag->buffer[dst_framei],dst_ystride,
+       _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,res_buf);
+    }
+  }
+
+  _mm_empty();
+}
+
+
+void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
+ int _nfragis,int _dst_frame,int _src_frame,int _pli){
+  const int *fragi;
+  const int *fragi_end;
+  int        dst_framei;
+  int        dst_ystride;
+  int        src_framei;
+  int        src_ystride;
+  dst_framei=_state->ref_frame_idx[_dst_frame];
+  src_framei=_state->ref_frame_idx[_src_frame];
+  dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
+  src_ystride=_state->ref_frame_bufs[src_framei][_pli].stride;
+  fragi_end=_fragis+_nfragis;
+  for(fragi=_fragis;fragi<fragi_end;fragi++){
+    oc_fragment *frag = _state->frags+*fragi;
+    loc_blockcopy_mmx (frag->buffer[dst_framei], dst_ystride,
+                       frag->buffer[src_framei], src_ystride);
+  }
+  _m_empty();
+}
+
+#endif
--- a/media/libtheora/lib/dec/x86_vc/x86int.h
+++ b/media/libtheora/lib/dec/x86_vc/x86int.h
@ -0,0 +1,49 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86int.h 15400 2008-10-15 12:10:58Z tterribe $
+
+ ********************************************************************/
+
+#if !defined(_x86_x86int_vc_H)
+# define _x86_x86int_vc_H (1)
+# include "../../internal.h"
+
+void oc_state_vtable_init_x86(oc_theora_state *_state);
+
+void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
+ const ogg_int16_t *_residue);
+
+void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue);
+
+void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
+ int _src2_ystride,const ogg_int16_t *_residue);
+
+void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
+ int _nfragis,int _dst_frame,int _src_frame,int _pli);
+
+void oc_restore_fpu_mmx(void);
+
+void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,                                               
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,                                                             
+ ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
+
+void oc_idct8x8_mmx(ogg_int16_t _y[64]);
+void oc_idct8x8_10_mmx(ogg_int16_t _y[64]);
+
+void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,                                                    
+  int _refi,int _pli,int _fragy0,int _fragy_end);
+
+#endif
--- a/media/libtheora/lib/dec/x86_vc/x86state.c
+++ b/media/libtheora/lib/dec/x86_vc/x86state.c
@ -0,0 +1,41 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86state.c 15427 2008-10-21 02:36:19Z xiphmont $
+
+ ********************************************************************/
+
+#if defined(USE_ASM)
+
+#include "x86int.h"
+#include "../../cpu.c"
+
+void oc_state_vtable_init_x86(oc_theora_state *_state){
+  _state->cpu_flags=oc_cpu_flags_get();
+
+  /* fill with defaults */
+  oc_state_vtable_init_c(_state);
+
+  /* patch MMX functions */
+  if(_state->cpu_flags&OC_CPU_X86_MMX){
+    _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx;
+    _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx;
+    _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx;
+    _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx;
+    _state->opt_vtable.state_frag_copy=oc_state_frag_copy_mmx;
+    _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx;
+    _state->opt_vtable.state_loop_filter_frag_rows=oc_state_loop_filter_frag_rows_mmx;
+  }
+}
+
+#endif
--- a/media/libtheora/lib/internal.h
+++ b/media/libtheora/lib/internal.h
@ -6,12 +6,12 @@
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
- * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
 *                                                                  *
 ********************************************************************

  function:
-    last mod: $Id: internal.h 14714 2008-04-12 01:04:43Z giles $
+    last mod: $Id: internal.h 15469 2008-10-30 12:49:42Z tterribe $

 ********************************************************************/

@ -27,19 +27,6 @@
 # include "dec/huffman.h"
 # include "dec/quant.h"

-/* debug macros */
-#if defined(_MSC_VER) && _MSC_VER < 1400
-static const dframe = 0;
-static void TH_DEBUG(const char *fmt, ...) {}
-#elif defined(_TH_DEBUG_)
-#include <stdio.h>
-extern long dframe;
-extern FILE *debugout;
-#define TH_DEBUG(...) fprintf(debugout, __VA_ARGS__)
-#else
-#define TH_DEBUG(...)
-#endif
-
 /*Thank you Microsoft, I know the order of operations.*/
 # if defined(_MSC_VER)
 #  pragma warning(disable:4554) /* order of operations */
@ -47,7 +34,7 @@ extern FILE *debugout;
 # endif

 /*This library's version.*/
-# define OC_VENDOR_STRING "Xiph.Org libTheora I 20071025 3 2 1"
+# define OC_VENDOR_STRING "Xiph.Org libTheora I 20081020 3 2 1"

 /*Theora bitstream version.*/
 # define TH_VERSION_MAJOR (3)
@ -224,10 +211,14 @@ typedef struct{
  unsigned        invalid:1;
  /*The quality index used for this fragment's AC coefficients.*/
  unsigned        qi:6;
-  /*The mode of the macroblock this fragment belongs to.*/
-  int             mbmode:8;
-  /*The prediction-corrected DC component.*/
-  int             dc:16;
+  /*The mode of the macroblock this fragment belongs to.
+    Note that the C standard requires an explicit signed keyword for bitfield
+     types, since some compilers may treat them as unsigned without it.*/
+  signed int      mbmode:8;
+  /*The prediction-corrected DC component.
+    Note that the C standard requires an explicit signed keyword for bitfield
+     types, since some compilers may treat them as unsigned without it.*/
+  signed int      dc:16;
  /*A pointer to the portion of an image covered by this fragment in several
     images.
    The first three are reconstructed frame buffers, while the last is the
@ -241,14 +232,6 @@ typedef struct{
  oc_border_info *border;
  /*The motion vector used for this fragment.*/
  oc_mv           mv;
-
-#ifdef _TH_DEBUG_
-  int quant[64];
-  int freq[64];
-  int time[64];
-  int recon[64];
-  int loop[64];
-#endif
 }oc_fragment;


--- a/media/libtheora/update.sh
+++ b/media/libtheora/update.sh
@ -22,10 +22,15 @@ cp $1/lib/dec/x86/x86state.c ./lib/dec/x86/x86state.c
 cp $1/lib/dec/x86/x86int.h ./lib/dec/x86/x86int.h
 cp $1/lib/dec/x86/mmxstate.c ./lib/dec/x86/mmxstate.c
 cp $1/lib/dec/x86/mmxidct.c ./lib/dec/x86/mmxidct.c
-cp $1/lib/dec/bitwise.h ./lib/dec/bitwise.h
+cp $1/lib/dec/x86_vc/mmxfrag.c ./lib/dec/x86_vc/mmxfrag.c
+cp $1/lib/dec/x86_vc/mmxidct.c ./lib/dec/x86_vc/mmxidct.c
+cp $1/lib/dec/x86_vc/mmxloopfilter.c ./lib/dec/x86_vc/mmxloopfilter.c
+cp $1/lib/dec/x86_vc/mmxstate.c ./lib/dec/x86_vc/mmxstate.c
+cp $1/lib/dec/x86_vc/x86int.h ./lib/dec/x86_vc/x86int.h
+cp $1/lib/dec/x86_vc/x86state.c ./lib/dec/x86_vc/x86state.c
+cp $1/lib/dec/bitpack.h ./lib/dec/bitpack.h
 cp $1/lib/dec/quant.c ./lib/dec/quant.c
-cp $1/lib/dec/bitwise.c ./lib/dec/bitwise.c
-cp $1/lib/dec/enquant.h ./lib/dec/enquant.h
+cp $1/lib/dec/bitpack.c ./lib/dec/bitpack.c
 cp $1/lib/dec/internal.c ./lib/dec/internal.c
 cp $1/lib/dec/huffdec.h ./lib/dec/huffdec.h
 cp $1/lib/dec/dct.h ./lib/dec/dct.h
@ -47,4 +52,4 @@ cp $1/lib/internal.h ./lib/internal.h
 cp $1/include/theora/theora.h ./include/theora/theora.h
 cp $1/include/theora/theoradec.h ./include/theora/theoradec.h
 cp $1/include/theora/codec.h ./include/theora/codec.h
-patch -p3 <changeset_r15144.diff
+patch -p3 <455357_wince_local_variable_macro_clash_patch