Bug 422538. Import libtheora

2008-07-29 23:38:23 -07:00 · 2008-07-29 23:38:23 -07:00 · 5946ae5311
--- a/modules/libtheora/AUTHORS
+++ b/modules/libtheora/AUTHORS
@ -0,0 +1,42 @@
+Monty <monty@xiph.org>
+	- Original VP3 port
+
+Ralph Giles
+Timothy B. Terriberry 
+	- Ongoing development
+	
+Dan B. Miller
+	- Pre alpha3 development
+	
+Wim Tayman
+Dan Lenski
+	- MMX optimized functions
+	
+Aaron Colwell
+Thomas Vander Stichele
+Jan Gerber
+Conrad Parker
+	- Bug fixes, enhancements, build systems.
+	
+Mauricio Piacentini
+	- Original win32 projects and example ports
+	- VP3->Theora transcoder
+
+Silvia Pfeiffer
+	- Figures for the spec
+
+Michael Smith
+Andre Pang
+calc
+ccheney
+brendan
+Edward Hervey
+Adam Moss
+Colin Ward
+Jeremy C. Reed
+Arc Riley
+Rodolphe Ortalo
+	- Bug fixes
+
+
+and other Xiph.org contributors
--- a/modules/libtheora/CHANGES
+++ b/modules/libtheora/CHANGES
@ -0,0 +1,140 @@
+libtheora 1.0beta3 (2008 April 16)
+
+ - Build new libtheoradec and libtheoraenc libraries
+   supporting the new API from theora-exp. This API should
+   not be considered stable yet.
+ - Change granule_frame() to return an index as documented.
+   This is a change of behaviour from 1.0beta1.
+ - Document that granule_time() returns the end of the 
+   presentation interval. 
+ - Use a custom copy of the libogg bitpacker in the decoder
+   to avoid function call overhead.
+ - MMX code improved and ported to MSVC.
+ - Fix a problem with the MMX code on SELinux 
+ - Fix a problem with decoder quantizer initialization.
+ - Fix a page queue problem with png2theora.
+ - Improved robustness.
+ - Updated VS2005 project files.
+ - Dropped build support for Microsoft VS2003.
+ - Dropped build support for the unreleased libogg2.
+ - Added the specification to the autotools build.
+ - Specification corrections.
+
+libtheora 1.0beta2 (2007 October 12)
+
+ - Fix a crash bug on char-is-unsigned architectures (PowerPC)
+ - Fix a buffer sizing issue that caused rare encoder crashes
+ - Fix a buffer alignment issue
+ - Build fixes for MingW32, MSVC
+ - Improved format documentation.
+
+libtheora 1.0beta1 (2007 September 22)
+
+ - Granulepos scheme modified to match other codecs. This bumps
+   the bitstream revision to 3.2.1. Bitstreams marked 3.2.0 are
+   handled correctly by this decoder. Older decoders will show
+   a one frame sync error in the less noticable direction.
+
+libtheora 1.0alpha8 (2007 September 18)
+
+ - Switch to new spec compliant decoder from theora-exp branch.
+   Written by Dr. Timothy Terriberry.
+ - Add support to the encoder for using quantization settings
+   provided by the application.
+ - more assembly optimizations
+
+libtheora 1.0alpha7 (2006 June 20)
+
+ - Enable mmx assembly by default
+ - Avoid some relocations that caused problems on SELinux
+ - Other build fixes
+ - time testing mode (-f) for the dump_video example
+
+libtheora 1.0alpha6 (2006 May 30)
+
+ * Merge theora-mmx simd acceleration (x86_32 and x86_64)
+ * Major RTP payload specification update
+ * Minor format specification updates
+ * Fix some spurious calls to free() instead of _ogg_free()
+ * Fix invalid array indexing in PixelLineSearch()
+ * Improve robustness against invalid input
+ * General warning cleanup
+ * The offset_y member now means what every application thought it meant
+   (offset from the top). This will mean some old files (those with a 
+   non-centered image created with a buggy encoder) will display differently.
+
+libtheora 1.0alpha5 (2005 August 20)
+
+ * Fixed bitrate management bugs that caused popping and encode
+   errors
+ * Fixed a crash problem with the theora_state internals not
+   being intialized properly.
+ * new utility function:
+   - theora_granule_shift()
+ * dump_video example now makes YUV4MPEG files by default, so
+   the results can be fed back to encoder_example and similar
+   tools. The old behavior is restored through the '-r' switch.
+ * ./configure now prints a summary
+ * simple unit test of the comment api under 'make check'
+ * misc code cleanup, warning and leak fixes
+
+libtheora 1.0alpha4 (2004 December 15)
+
+ * first draft of the Theora I Format Specification
+ * API documentation generated from theora.h with Doxygen
+ * fix a double-update bug in the motion analysis
+ * apply the loop filter before filling motion vector border 
+   in the reference frame
+ * new utility functions:
+   - theora_packet_isheader(),
+   - theora_packet_iskeyframe()
+   - theora_granule_frame()
+ * optional support for building without floating point
+ * optional support for building without encode support 
+ * various build and packaging fixes
+ * pkg-config support
+ * SymbianOS build support
+
+libtheora 1.0alpha3 (2004 March 20)
+
+ UPDATE: on 2004 July 1 the Theora I bitstream format was frozen. Files
+ produced by the libtheora 1.0alpha3 reference encoder will always be
+ decodable by the Theora I spec.
+
+ * Bitstream info header FORMAT CHANGES:
+   - move the granulepos shift field to maintain byte alignment longer.
+   - reserve 5 additional bits for subsampling and interlace flags.
+ * Bitstream setup header FORMAT CHANGES:
+   - support for a range of interpolated quant matricies.
+   - include the in-loop block filter coeff.
+ * Bitsteam data packet FORMAT CHANGES:
+   - Reserve a bit for per-block Q index selection.
+   - Flip the coded image orientation for compatibility with VP3.
+     This allows lossless transcoding of VP3 content, but files
+     encoded with earlier theora releases would play upside down.
+ * example VP3 lossless transcoder
+ * optional support for libogg2
+ * timing improvements in the example player
+ * packaging and build system updates and fixes
+
+libtheora 1.0alpha2 (2003 June 9)
+
+ * bitstream FORMAT CHANGES:
+   - store the quant tables in a third setup header for
+     future encoder flexibility
+   - store the huffman tables in the third setup header
+   - add a field for marking the colorspace to the info header
+   - add crop parameters for non-multiple-of-16 frame sizes
+   - add a second vorbiscomment-style metadata header
+ * API changes to handle multiple headers with a single 
+   theora_decode_header() call, like libvorbis
+ * code cleanup and minor fixes
+ * new dump_video code example/utility
+ * experimental win32 code examples
+
+libtheora 1.0alpha1 (2002 September 25)
+
+ * First release of the theora reference implementation
+ * Port of the newly opened VP3 code to the Ogg container
+ * Rewrite of the code for portability and to use the libogg bitpacker
+
--- a/modules/libtheora/COPYING
+++ b/modules/libtheora/COPYING
@ -0,0 +1,28 @@
+Copyright (C) 2002-2007 Xiph.org Foundation
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+- Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+- Neither the name of the Xiph.org Foundation nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION
+OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/modules/libtheora/LICENSE
+++ b/modules/libtheora/LICENSE
@ -0,0 +1,18 @@
+Please see the file COPYING for the copyright license for this software.
+
+In addition to and irrespective of the copyright license associated
+with this software, On2 Technologies, Inc. makes the following statement
+regarding technology used in this software:
+
+  On2 represents and warrants that it shall not assert any rights 
+  relating to infringement of On2's registered patents, nor initiate
+  any litigation asserting such rights, against any person who, or
+  entity which utilizes the On2 VP3 Codec Software, including any 
+  use, distribution, and sale of said Software; which make changes, 
+  modifications, and improvements in said Software; and to use,
+  distribute, and sell said changes as well as applications for other 
+  fields of use.
+
+This reference implementation is originally derived from the On2 VP3
+Codec Software, and the Theora video format is essentially compatible
+with the VP3 video format, consisting of a backward-compatible superset.
--- a/modules/libtheora/Makefile.in
+++ b/modules/libtheora/Makefile.in
@ -0,0 +1,51 @@
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is Mozilla code.
+#
+# The Initial Developer of the Original Code is the Mozilla Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2007
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#  Chris Double <chris.double@double.co.nz>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 2 or later (the "GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+DEPTH		= ../..
+topsrcdir	= @top_srcdir@
+srcdir		= @srcdir@
+VPATH		= @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE		= theora
+
+DIRS		= \
+		include \
+		lib \
+		$(NULL)
+
+include $(topsrcdir)/config/rules.mk
--- a/modules/libtheora/README
+++ b/modules/libtheora/README
@ -0,0 +1,143 @@
+-------------------------------------------------------------------------
+          The Xiph.org Foundation's libtheora 1.0beta1 release
+-------------------------------------------------------------------------
+
+*** What is Theora?
+
+Theora is Xiph.Org's first publicly released video codec, intended
+for use within the Foundation's Ogg multimedia streaming system.
+Theora is derived directly from On2's VP3 codec; Currently the 
+encoders are nearly identical, but Theora will make use of new
+features supported by the decoder to improve over what is 
+is possible with VP3.
+
+*** Where is Theora?
+
+Theora's main site is www.theora.org.  Theora and related libraries
+can be gotten from www.theora.org or the main Xiph.Org site at
+www.xiph.org.  Development source is kept in an open subversion 
+repository, see http://theora.org/svn/ for instructions.
+
+*** What is the goal of this release?
+
+This is the first beta release of the 1.0 reference implementation.
+It is intended to completely support the decoder specification, and
+gather feedback on the implementation before declaring it stable.
+
+-------------------------------------------------------------------------
+Getting started with the code
+-------------------------------------------------------------------------
+
+*** What do I need to build the source?
+
+Requirements summary:
+
+  For libtheora:  
+         
+      libogg 1.1 or newer.
+
+  For example encoder:
+
+      as above
+
+      libvorbis and libvorbisenc 1.0.1 or newer.
+
+  For the player only:
+
+      as above, 
+
+      SDL (Simple Direct media Layer) libraries and headers
+ 
+      OSS audio driver and development headers
+
+The provided build system is the GNU automake/autoconf system, and
+the main library, libtheora, should already build smoothly on any
+system.  Failure of libtheora to build on a GNU-enabled system is
+considered a bug; please report problems to theora-dev@xiph.org.
+
+Some windows build support is included in the win32 directory.
+
+There is also an experimental scons build.
+
+*** How do I use the sample encoder?
+
+The sample encoder takes raw video in YUV4MPEG2 format, as used by
+lavtools, mjpeg-tools and other packages. The encoder expects audio,
+if any, in a separate wave WAV file. Try 'encoder_example -h' for a 
+complete list of options.
+
+An easy way to get raw video and audio files is to use MPlayer as an
+export utility.  The options " -ao pcm -vo yuv4mpeg " will export a
+wav file named audiodump.wav and a YUV video file in the correct
+format for encoder_example as stream.yuv.  Be careful when exporting
+video alone; MPlayer may drop frames to 'keep up' with the audio
+timer.  The example encoder can't properly synchronize input audio and
+video file that aren't in sync to begin with.  
+
+The encoder will also take video or audio on stdin if '-' is specified
+as the input file name.
+
+There is also a 'png2theora' example which accepts a set of image
+files in that format.
+
+*** How do I use the sample player?
+
+The sample player takes an Ogg file on standard in; the file may be
+audio alone, video alone or video with audio.  
+
+*** What other tools are available?
+
+The programs in the examples directory are intended as tutorial source 
+for developers using the library. As such they sacrifice features and 
+robustness in the interests of comprehension and should not be 
+considered serious applications.
+
+If you're wanting to just use theora, consider the programs linked
+from http://www.theora.org/. There is playback support in a number
+of common free players, and plugins for major media frameworks.
+Jan Gerber's ffmpeg2theora is an excellent encoding front end.
+
+-------------------------------------------------------------------------
+Troubleshooting the build process
+-------------------------------------------------------------------------
+
+*** Compile error, such as:
+
+encoder_internal.h:664: parse error before `ogg_uint16_t'
+
+This means you have version of libogg prior to 1.1. A *complete* new Ogg 
+install, libs and headers is needed.
+
+Also be sure that there aren't multiple copies of Ogg installed in
+/usr and /usr/local; an older one might be first on the search path
+for libs and headers.
+
+*** Link error, such as:
+
+undefined reference to `oggpackB_stream'
+
+See above; you need libogg 1.1 or later.
+
+*** Link error, such as:
+
+undefined reference to `vorbis_granule_time'
+
+You need libvorbis and libvorbisenc from the 1.0.1 release or later.
+
+*** Link error, such as:
+
+/usr/lib/libSDL.a(SDL_esdaudio.lo): In function `ESD_OpenAudio':
+SDL_esdaudio.lo(.text+0x25d): undefined reference to `esd_play_stream'
+
+Be sure to use an SDL that's built to work with OSS.  If you use an
+SDL that is also built with ESD and/or ALSA support, it will try to
+suck in all those extra libraries at link time too.  That will only
+work if the extra libraries are also installed.
+
+*** Link warning, such as:
+
+libtool: link: warning: library `/usr/lib/libogg.la' was moved.
+libtool: link: warning: library `/usr/lib/libogg.la' was moved.
+
+Re-run theora/autogen.sh after an Ogg or Vorbis rebuild/reinstall
+
--- a/modules/libtheora/README_MOZILLA
+++ b/modules/libtheora/README_MOZILLA
@ -0,0 +1,4 @@
+The source from this directory was copied from the libtheora-1.0beta3
+source distribution using the update.sh script. The only changes made
+were those applied by update.sh and the addition/upate of Makefile.in
+files for the Mozilla build system.
--- a/modules/libtheora/include/Makefile.in
+++ b/modules/libtheora/include/Makefile.in
@ -0,0 +1,47 @@
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is Mozilla code.
+#
+# The Initial Developer of the Original Code is the Mozilla Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2007
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#  Chris Double <chris.double@double.co.nz>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 2 or later (the "GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+DEPTH		= ../../..
+topsrcdir	= @top_srcdir@
+srcdir		= @srcdir@
+VPATH		= @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE		= theora
+DIRS		= theora
+
+include $(topsrcdir)/config/rules.mk
--- a/modules/libtheora/include/theora/Makefile.in
+++ b/modules/libtheora/include/theora/Makefile.in
@ -0,0 +1,52 @@
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is Mozilla code.
+#
+# The Initial Developer of the Original Code is the Mozilla Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2007
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#  Chris Double <chris.double@double.co.nz>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 2 or later (the "GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+DEPTH		= ../../../..
+topsrcdir	= @top_srcdir@
+srcdir		= @srcdir@
+VPATH		= @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE		= theora
+
+EXPORTS		= \
+		theora.h \
+		theoradec.h \
+		codec.h \
+		$(NULL)
+
+include $(topsrcdir)/config/rules.mk
--- a/modules/libtheora/include/theora/codec.h
+++ b/modules/libtheora/include/theora/codec.h
@ -0,0 +1,589 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $
+
+ ********************************************************************/
+
+/**\mainpage
+ * 
+ * \section intro Introduction
+ *
+ * This is the documentation for <tt>libtheora</tt> C API.
+ * The current reference
+ * implementation for <a href="http://www.theora.org/">Theora</a>, a free,
+ * patent-unencumbered video codec.
+ * Theora is derived from On2's VP3 codec with additional features and
+ *  integration for Ogg multimedia formats by
+ *  <a href="http://www.xiph.org/">the Xiph.Org Foundation</a>.
+ * Complete documentation of the format itself is available in
+ * <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
+ *  specification</a>.
+ *
+ * \subsection Organization
+ *
+ * The functions documented here are actually subdivided into three 
+ * separate libraries:
+ * - <tt>libtheoraenc</tt> contains the encoder interface,
+ *   described in \ref encfuncs.
+ * - <tt>libtheoradec</tt> contains the decoder interface and
+ *   routines shared with the encoder.
+ *   You must also link to this if you link to <tt>libtheoraenc</tt>.
+ *   The routines in this library are described in \ref decfuncs and 
+ *   \ref basefuncs.
+ * - <tt>libtheora</tt> contains the \ref oldfuncs.
+ *
+ * New code should link to <tt>libtheoradec</tt> and, if using encoder
+ * features, <tt>libtheoraenc</tt>. Together these two export both
+ * the standard and the legacy API, so this is all that is needed by
+ * any code. The older <tt>libtheora</tt> library is provided just for
+ * compatibility with older build configurations.
+ *
+ * In general the recommended 1.x API symbols can be distinguished
+ * by their <tt>th_</tt> or <tt>TH_</tt> namespace prefix.
+ * The older, legacy API uses <tt>theora_</tt> or <tt>OC_</tt>
+ * prefixes instead.
+ */
+
+/**\file
+ * The shared <tt>libtheoradec</tt> and <tt>libtheoraenc</tt> C API.
+ * You don't need to include this directly.*/
+
+#if !defined(_O_THEORA_CODEC_H_)
+# define _O_THEORA_CODEC_H_ (1)
+# include <ogg/ogg.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+
+/**\name Return codes*/
+/*@{*/
+/**An invalid pointer was provided.*/
+#define TH_EFAULT     (-1)
+/**An invalid argument was provided.*/
+#define TH_EINVAL     (-10)
+/**The contents of the header were incomplete, invalid, or unexpected.*/
+#define TH_EBADHEADER (-20)
+/**The header does not belong to a Theora stream.*/
+#define TH_ENOTFORMAT (-21)
+/**The bitstream version is too high.*/
+#define TH_EVERSION   (-22)
+/**The specified function is not implemented.*/
+#define TH_EIMPL      (-23)
+/**There were errors in the video data packet.*/
+#define TH_EBADPACKET (-24)
+/**The decoded packet represented a dropped frame.
+   The player can continue to display the current frame, as the contents of the
+    decoded frame buffer have not changed.*/
+#define TH_DUPFRAME   (1)
+/*@}*/
+
+/**The currently defined color space tags.
+ * See <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
+ *  specification</a>, Chapter 4, for exact details on the meaning of each of
+ *  these color spaces.*/
+typedef enum{
+  /**The color space was not specified at the encoder.
+      It may be conveyed by an external means.*/
+  TH_CS_UNSPECIFIED,
+  /**A color space designed for NTSC content.*/
+  TH_CS_ITU_REC_470M,
+  /**A color space designed for PAL/SECAM content.*/
+  TH_CS_ITU_REC_470BG,
+  /**The total number of currently defined color spaces.*/
+  TH_CS_NSPACES
+}th_colorspace;
+
+/**The currently defined pixel format tags.
+ * See <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
+ *  specification</a>, Section 4.4, for details on the precise sample
+ *  locations.*/
+typedef enum{
+  /**Chroma decimation by 2 in both the X and Y directions (4:2:0).*/
+  TH_PF_420,
+  /**Currently reserved.*/
+  TH_PF_RSVD,
+  /**Chroma decimation by 2 in the X direction (4:2:2).*/
+  TH_PF_422,
+  /**No chroma decimation (4:4:4).*/
+  TH_PF_444,
+  /**The total number of currently defined pixel formats.*/
+  TH_PF_NFORMATS
+}th_pixel_fmt;
+
+
+
+/**A buffer for a single color plane in an uncompressed image.
+ * This contains the image data in a left-to-right, top-down format.
+ * Each row of pixels is stored contiguously in memory, but successive rows
+ *  need not be.
+ * Use \a stride to compute the offset of the next row.
+ * The encoder accepts both positive \a stride values (top-down in memory) and
+ *  negative (bottom-up in memory).
+ * The decoder currently always generates images with positive strides.*/
+typedef struct{
+  /**The width of this plane.*/
+  int            width;
+  /**The height of this plane.*/
+  int            height;
+  /**The offset in bytes between successive rows.*/
+  int            stride;
+  /**A pointer to the beginning of the first row.*/
+  unsigned char *data;
+}th_img_plane;
+
+/**A complete image buffer for an uncompressed frame.
+ * The chroma planes may be decimated by a factor of two in either direction,
+ *  as indicated by th_info#pixel_fmt.
+ * The width and height of the Y' plane must be multiples of 16.
+ * They may need to be cropped for display, using the rectangle specified by
+ *  th_info#pic_x, th_info#pic_y, th_info#pic_width, and
+ *  th_info#pic_height.
+ * All samples are 8 bits.
+ * \note The term YUV often used to describe a colorspace is ambiguous.
+ * The exact parameters of the RGB to YUV conversion process aside, in many
+ *  contexts the U and V channels actually have opposite meanings.
+ * To avoid this confusion, we are explicit: the name of the color channels are
+ *  Y'CbCr, and they appear in that order, always.
+ * The prime symbol denotes that the Y channel is non-linear.
+ * Cb and Cr stand for "Chroma blue" and "Chroma red", respectively.*/
+typedef th_img_plane th_ycbcr_buffer[3];
+
+/**Theora bitstream information.
+ * This contains the basic playback parameters for a stream, and corresponds to 
+ *  the initial 'info' header packet.
+ * To initialize an encoder, the application fills in this structure and
+ *  passes it to th_encode_alloc().
+ * A default encoding mode is chosen based on the values of the #quality and
+ *  #target_bitrate fields.
+ * On decode, it is filled in by th_decode_headerin(), and then passed to
+ *  th_decode_alloc().
+ *
+ * Encoded Theora frames must be a multiple of 16 in size;
+ *  this is what the #frame_width and #frame_height members represent.
+ * To handle arbitrary picture sizes, a crop rectangle is specified in the
+ *  #pic_x, #pic_y, #pic_width and #pic_height members.
+ *
+ * All frame buffers contain pointers to the full, padded frame.
+ * However, the current encoder <em>will not</em> reference pixels outside of
+ *  the cropped picture region, and the application does not need to fill them
+ *  in.
+ * The decoder <em>will</em> allocate storage for a full frame, but the
+ *  application <em>should not</em> rely on the padding containing sensible
+ *  data.
+ *
+ * It is also generally recommended that the offsets and sizes should still be
+ *  multiples of 2 to avoid chroma sampling shifts when chroma is sub-sampled.
+ * See <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
+ *  specification</a>, Section 4.4, for more details.
+ *
+ * Frame rate, in frames per second, is stored as a rational fraction, as is
+ *  the pixel aspect ratio.
+ * Note that this refers to the aspect ratio of the individual pixels, not of
+ *  the overall frame itself.
+ * The frame aspect ratio can be computed from pixel aspect ratio using the
+ *  image dimensions.*/
+typedef struct{
+  /**\name Theora version
+   * Bitstream version information.*/
+  /*@{*/
+  unsigned char version_major;
+  unsigned char version_minor;
+  unsigned char version_subminor;
+  /*@}*/
+  /**The encoded frame width.
+   * This must be a multiple of 16, and less than 1048576.*/
+  ogg_uint32_t  frame_width;
+  /**The encoded frame height.
+   * This must be a multiple of 16, and less than 1048576.*/
+  ogg_uint32_t  frame_height;
+  /**The displayed picture width.
+   * This must be no larger than width.*/
+  ogg_uint32_t  pic_width;
+  /**The displayed picture height.
+   * This must be no larger than height.*/
+  ogg_uint32_t  pic_height;
+  /**The X offset of the displayed picture.
+   * This must be no larger than #frame_width-#pic_width or 255, whichever is
+   *  smaller.*/
+  ogg_uint32_t  pic_x;
+  /**The Y offset of the displayed picture.
+   * This must be no larger than #frame_height-#pic_height, and
+   *  #frame_height-#pic_height-#pic_y must be no larger than 255.
+   * This slightly funny restriction is due to the fact that the offset is
+   *  specified from the top of the image for consistency with the standard
+   *  graphics left-handed coordinate system used throughout this API, while it
+   *  is stored in the encoded stream as an offset from the bottom.*/
+  ogg_uint32_t  pic_y;
+  /**\name Frame rate
+   * The frame rate, as a fraction.
+   * If either is 0, the frame rate is undefined.*/
+  /*@{*/
+  ogg_uint32_t  fps_numerator;
+  ogg_uint32_t  fps_denominator;
+  /*@}*/
+  /**\name Aspect ratio
+   * The aspect ratio of the pixels.
+   * If either value is zero, the aspect ratio is undefined.
+   * If not specified by any external means, 1:1 should be assumed.
+   * The aspect ratio of the full picture can be computed as
+   * \code
+   *  aspect_numerator*pic_width/(aspect_denominator*pic_height).
+   * \endcode */
+  /*@{*/
+  ogg_uint32_t  aspect_numerator;
+  ogg_uint32_t  aspect_denominator;
+  /*@}*/
+  /**The color space.*/
+  th_colorspace colorspace;
+  /**The pixel format.*/
+  th_pixel_fmt  pixel_fmt;
+  /**The target bit-rate in bits per second.
+     If initializing an encoder with this struct, set this field to a non-zero
+      value to activate CBR encoding by default.*/
+  /*TODO: Current encoder does not support CBR mode, or anything like it.
+    We also don't really know what nominal rate each quality level
+     corresponds to yet.*/
+  int           target_bitrate;
+  /**The target quality level.
+     Valid values range from 0 to 63, inclusive, with higher values giving
+      higher quality.
+     If initializing an encoder with this struct, and #target_bitrate is set
+      to zero, VBR encoding at this quality will be activated by default.*/
+  /*Currently this is set so that a qi of 0 corresponds to distortions of 24
+     times the JND, and each increase by 16 halves that value.
+    This gives us fine discrimination at low qualities, yet effective rate
+     control at high qualities.
+    The qi value 63 is special, however.
+    For this, the highest quality, we use one half of a JND for our threshold.
+    Due to the lower bounds placed on allowable quantizers in Theora, we will
+     not actually be able to achieve quality this good, but this should
+     provide as close to visually lossless quality as Theora is capable of.
+    We could lift the quantizer restrictions without breaking VP3.1
+     compatibility, but this would result in quantized coefficients that are
+     too large for the current bitstream to be able to store.
+    We'd have to redesign the token syntax to store these large coefficients,
+     which would make transcoding complex.*/
+  int           quality;
+  /**The amount to shift to extract the last keyframe number from the granule
+   *  position.
+   * This can be at most 31.
+   * th_info_init() will set this to a default value (currently <tt>6</tt>,
+   *  which is good for streaming applications), but you can set it to 0 to
+   *  make every frame a keyframe.
+   * The maximum distance between key frames is
+   *  <tt>1<<#keyframe_granule_shift</tt>.
+   * The keyframe frequency can be more finely controlled with
+   *  #TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, which can also be adjusted
+   *  during encoding (for example, to force the next frame to be a keyframe),
+   *  but it cannot be set larger than the amount permitted by this field after
+   *  the headers have been output.*/
+  int           keyframe_granule_shift;
+}th_info;
+
+/**The comment information.
+ *
+ * This structure holds the in-stream metadata corresponding to
+ *  the 'comment' header packet.
+ * The comment header is meant to be used much like someone jotting a quick
+ *  note on the label of a video.
+ * It should be a short, to the point text note that can be more than a couple
+ *  words, but not more than a short paragraph.
+ *
+ * The metadata is stored as a series of (tag, value) pairs, in
+ *  length-encoded string vectors.
+ * The first occurrence of the '=' character delimits the tag and value.
+ * A particular tag may occur more than once, and order is significant.
+ * The character set encoding for the strings is always UTF-8, but the tag
+ *  names are limited to ASCII, and treated as case-insensitive.
+ * See <a href="http://www.theora.org/doc/Theora_I_spec.pdf">the Theora
+ *  specification</a>, Section 6.3.3 for details.
+ *
+ * In filling in this structure, th_decode_headerin() will null-terminate
+ *  the user_comment strings for safety.
+ * However, the bitstream format itself treats them as 8-bit clean vectors,
+ *  possibly containing null characters, and so the length array should be
+ *  treated as their authoritative length.
+ */
+typedef struct th_comment{
+  /**The array of comment string vectors.*/
+  char **user_comments;
+  /**An array of the corresponding length of each vector, in bytes.*/
+  int   *comment_lengths;
+  /**The total number of comment strings.*/
+  int    comments;
+  /**The null-terminated vendor string.
+     This identifies the software used to encode the stream.*/
+  char  *vendor;
+}th_comment;
+
+
+
+/**A single base matrix.*/
+typedef unsigned char th_quant_base[64];
+
+/**A set of \a qi ranges.*/
+typedef struct{
+  /**The number of ranges in the set.*/
+  int                  nranges;
+  /**The size of each of the #nranges ranges.
+     These must sum to 63.*/
+  const int           *sizes;
+  /**#nranges <tt>+1</tt> base matrices.
+     Matrices \a i and <tt>i+1</tt> form the endpoints of range \a i.*/
+  const th_quant_base *base_matrices;
+}th_quant_ranges;
+
+/**A complete set of quantization parameters.
+   The quantizer for each coefficient is calculated as:
+   \code
+    Q=MAX(MIN(qmin[qti][ci!=0],scale[ci!=0][qi]*base[qti][pli][qi][ci]/100),
+     1024).
+   \endcode
+
+   \a qti is the quantization type index: 0 for intra, 1 for inter.
+   <tt>ci!=0</tt> is 0 for the DC coefficient and 1 for AC coefficients.
+   \a qi is the quality index, ranging between 0 (low quality) and 63 (high
+    quality).
+   \a pli is the color plane index: 0 for Y', 1 for Cb, 2 for Cr.
+   \a ci is the DCT coefficient index.
+   Coefficient indices correspond to the normal 2D DCT block
+    ordering--row-major with low frequencies first--\em not zig-zag order.
+
+   Minimum quantizers are constant, and are given by:
+   \code
+   qmin[2][2]={{4,2},{8,4}}.
+   \endcode
+
+   Parameters that can be stored in the bitstream are as follows:
+    - The two scale matrices ac_scale and dc_scale.
+      \code
+      scale[2][64]={dc_scale,ac_scale}.
+      \endcode
+    - The base matrices for each \a qi, \a qti and \a pli (up to 384 in all).
+      In order to avoid storing a full 384 base matrices, only a sparse set of
+       matrices are stored, and the rest are linearly interpolated.
+      This is done as follows.
+      For each \a qti and \a pli, a series of \a n \a qi ranges is defined.
+      The size of each \a qi range can vary arbitrarily, but they must sum to
+       63.
+      Then, <tt>n+1</tt> matrices are specified, one for each endpoint of the
+       ranges.
+      For interpolation purposes, each range's endpoints are the first \a qi
+       value it contains and one past the last \a qi value it contains.
+      Fractional values are rounded to the nearest integer, with ties rounded
+       away from zero.
+
+      Base matrices are stored by reference, so if the same matrices are used
+       multiple times, they will only appear once in the bitstream.
+      The bitstream is also capable of omitting an entire set of ranges and
+       its associated matrices if they are the same as either the previous
+       set (indexed in row-major order) or if the inter set is the same as the
+       intra set.
+
+    - Loop filter limit values.
+      The same limits are used for the loop filter in all color planes, despite
+       potentially differing levels of quantization in each.
+
+   For the current encoder, <tt>scale[ci!=0][qi]</tt> must be no greater
+    than <tt>scale[ci!=0][qi-1]</tt> and <tt>base[qti][pli][qi][ci]</tt> must
+    be no greater than <tt>base[qti][pli][qi-1][ci]</tt>.
+   These two conditions ensure that the actual quantizer for a given \a qti,
+    \a pli, and \a ci does not increase as \a qi increases.
+   This is not required by the decoder.*/
+typedef struct{
+  /**The DC scaling factors.*/
+  ogg_uint16_t    dc_scale[64];
+  /**The AC scaling factors.*/
+  ogg_uint16_t    ac_scale[64];
+  /**The loop filter limit values.*/
+  unsigned char   loop_filter_limits[64];
+  /**The \a qi ranges for each \a ci and \a pli.*/
+  th_quant_ranges qi_ranges[2][3];
+}th_quant_info;
+
+
+
+/**The number of Huffman tables used by Theora.*/
+#define TH_NHUFFMAN_TABLES (80)
+/**The number of DCT token values in each table.*/
+#define TH_NDCT_TOKENS     (32)
+
+/**A Huffman code for a Theora DCT token.
+ * Each set of Huffman codes in a given table must form a complete, prefix-free
+ *  code.
+ * There is no requirement that all the tokens in a table have a valid code,
+ *  but the current encoder is not optimized to take advantage of this.
+ * If each of the five grouops of 16 tables does not contain at least one table
+ *  with a code for every token, then the encoder may fail to encode certain
+ *  frames.
+ * The complete table in the first group of 16 does not have to be in the same
+ *  place as the complete table in the other groups, but the complete tables in
+ *  the remaining four groups must all be in the same place.*/
+typedef struct{
+  /**The bit pattern for the code, with the LSbit of the pattern aligned in
+   *   the LSbit of the word.*/
+  ogg_uint32_t pattern;
+  /**The number of bits in the code.
+   * This must be between 0 and 32, inclusive.*/
+  int          nbits;
+}th_huff_code;
+
+
+
+/**\defgroup basefuncs Functions Shared by Encode and Decode*/
+/*@{*/
+/**\name Basic shared functions*/
+/*@{*/
+/**Retrieves a human-readable string to identify the library vendor and
+ *  version.
+ * \return the version string.*/
+extern const char *th_version_string(void);
+/**Retrieves the library version number.
+ * This is the highest bitstream version that the encoder library will produce,
+ *  or that the decoder library can decode.
+ * This number is composed of a 16-bit major version, 8-bit minor version
+ * and 8 bit sub-version, composed as follows:
+ * \code
+ * (VERSION_MAJOR<<16)+(VERSION_MINOR<<8)+(VERSION_SUBMINOR)
+ * \endcode
+ * \return the version number.*/
+extern ogg_uint32_t th_version_number(void);
+/**Converts a granule position to an absolute frame index, starting at
+ *  <tt>0</tt>.
+ * The granule position is interpreted in the context of a given
+ *  #th_enc_ctx or #th_dec_ctx handle (either will suffice).
+ * \param _encdec  A previously allocated #th_enc_ctx or #th_dec_ctx
+ *                  handle.
+ * \param _granpos The granule position to convert.
+ * \returns The absolute frame index corresponding to \a _granpos.
+ * \retval -1 The given granule position was invalid (i.e. negative).*/
+extern ogg_int64_t th_granule_frame(void *_encdec,ogg_int64_t _granpos);
+/**Converts a granule position to an absolute time in seconds.
+ * The granule position is interpreted in the context of a given
+ *  #th_enc_ctx or #th_dec_ctx handle (either will suffice).
+ * \param _encdec  A previously allocated #th_enc_ctx or #th_dec_ctx
+ *                  handle.
+ * \param _granpos The granule position to convert.
+ * \return The absolute time in seconds corresponding to \a _granpos.
+ *         This is the "end time" for the frame, or the latest time it should
+ *          be displayed.
+ *         It is not the presentation time.
+ * \retval -1 The given granule position was invalid (i.e. negative).*/
+extern double th_granule_time(void *_encdec,ogg_int64_t _granpos);
+/**Determines whether a Theora packet is a header or not.
+ * This function does no verification beyond checking the packet type bit, so
+ *  it should not be used for bitstream identification; use
+ *  th_decode_headerin() for that.
+ * As per the Theora specification, an empty (0-byte) packet is treated as a
+ *  data packet (a delta frame with no coded blocks).
+ * \param _op An <tt>ogg_packet</tt> containing encoded Theora data.
+ * \retval 1 The packet is a header packet
+ * \retval 0 The packet is a video data packet.*/
+extern int th_packet_isheader(ogg_packet *_op);
+/**Determines whether a theora packet is a key frame or not.
+ * This function does no verification beyond checking the packet type and
+ *  key frame bits, so it should not be used for bitstream identification; use
+ *  th_decode_headerin() for that.
+ * As per the Theora specification, an empty (0-byte) packet is treated as a
+ *  delta frame (with no coded blocks).
+ * \param _op An <tt>ogg_packet</tt> containing encoded Theora data.
+ * \retval 1  The packet contains a key frame.
+ * \retval 0  The packet contains a delta frame.
+ * \retval -1 The packet is not a video data packet.*/
+extern int th_packet_iskeyframe(ogg_packet *_op);
+/*@}*/
+
+
+/**\name Functions for manipulating header data*/
+/*@{*/
+/**Initializes a th_info structure.
+ * This should be called on a freshly allocated #th_info structure before
+ *  attempting to use it.
+ * \param _info The #th_info struct to initialize.*/
+extern void th_info_init(th_info *_info);
+/**Clears a #th_info structure.
+ * This should be called on a #th_info structure after it is no longer
+ *  needed.
+ * \param _info The #th_info struct to clear.*/
+extern void th_info_clear(th_info *_info);
+
+/**Initialize a #th_comment structure.
+ * This should be called on a freshly allocated #th_comment structure
+ *  before attempting to use it.
+ * \param _tc The #th_comment struct to initialize.*/
+extern void th_comment_init(th_comment *_tc);
+/**Add a comment to an initialized #th_comment structure.
+ * \note Neither th_comment_add() nor th_comment_add_tag() support
+ *  comments containing null values, although the bitstream format does
+ *  support them.
+ * To add such comments you will need to manipulate the #th_comment
+ *  structure directly.
+ * \param _tc      The #th_comment struct to add the comment to.
+ * \param _comment Must be a null-terminated UTF-8 string containing the
+ *                  comment in "TAG=the value" form.*/
+extern void th_comment_add(th_comment *_tc, char *_comment);
+/**Add a comment to an initialized #th_comment structure.
+ * \note Neither th_comment_add() nor th_comment_add_tag() support
+ *  comments containing null values, although the bitstream format does
+ *  support them.
+ * To add such comments you will need to manipulate the #th_comment
+ *  structure directly.
+ * \param _tc  The #th_comment struct to add the comment to.
+ * \param _tag A null-terminated string containing the tag  associated with
+ *              the comment.
+ * \param _val The corresponding value as a null-terminated string.*/
+extern void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val);
+/**Look up a comment value by its tag.
+ * \param _tc    An initialized #th_comment structure.
+ * \param _tag   The tag to look up.
+ * \param _count The instance of the tag.
+ *               The same tag can appear multiple times, each with a distinct
+ *                value, so an index is required to retrieve them all.
+ *               The order in which these values appear is significant and
+ *                should be preserved.
+ *               Use th_comment_query_count() to get the legal range for
+ *                the \a _count parameter.
+ * \return A pointer to the queried tag's value.
+ *         This points directly to data in the #th_comment structure.
+ *         It should not be modified or freed by the application, and
+ *          modifications to the structure may invalidate the pointer.
+ * \retval NULL If no matching tag is found.*/
+extern char *th_comment_query(th_comment *_tc,char *_tag,int _count);
+/**Look up the number of instances of a tag.
+ * Call this first when querying for a specific tag and then iterate over the
+ *  number of instances with separate calls to th_comment_query() to
+ *  retrieve all the values for that tag in order.
+ * \param _tc    An initialized #th_comment structure.
+ * \param _tag   The tag to look up.
+ * \return The number on instances of this particular tag.*/
+extern int th_comment_query_count(th_comment *_tc,char *_tag);
+/**Clears a #th_comment structure.
+ * This should be called on a #th_comment structure after it is no longer
+ *  needed.
+ * It will free all memory used by the structure members.
+ * \param _tc The #th_comment struct to clear.*/
+extern void th_comment_clear(th_comment *_tc);
+/*@}*/
+/*@}*/
+
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
--- a/modules/libtheora/include/theora/config.h
+++ b/modules/libtheora/include/theora/config.h
@ -0,0 +1,80 @@
+/* config.h.  Generated from config.h.in by configure.  */
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the <machine/soundcard.h> header file. */
+/* #undef HAVE_MACHINE_SOUNDCARD_H */
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the <soundcard.h> header file. */
+/* #undef HAVE_SOUNDCARD_H */
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/soundcard.h> header file. */
+#define HAVE_SYS_SOUNDCARD_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+/* #undef NO_MINUS_C_MINUS_O */
+
+/* enable x86 assambler optimization */
+ 
+
+/* Name of package */
+#define PACKAGE "libtheora"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT ""
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "libtheora"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "libtheora 1.0beta3"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "libtheora"
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "1.0beta3"
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Define to exclude encode support from the build */
+ 
+
+/* Define to exclude floating point code from the build */
+/* #undef THEORA_DISABLE_FLOAT */
+
+/* make use of asm optimization */
+ 
+
+/* Version number of package */
+#define VERSION "1.0beta3"
--- a/modules/libtheora/include/theora/theora.h
+++ b/modules/libtheora/include/theora/theora.h
@ -0,0 +1,842 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: theora.h,v 1.17 2003/12/06 18:06:19 arc Exp $
+
+ ********************************************************************/
+
+#ifndef _O_THEORA_H_
+#define _O_THEORA_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif /* __cplusplus */
+
+#include <stddef.h>	/* for size_t */
+
+#include <ogg/ogg.h>
+
+/** \defgroup oldfuncs Legacy pre-1.0 C API */
+/*  @{ */
+
+/** \mainpage
+ * 
+ * \section intro Introduction
+ *
+ * This is the documentation for the libtheora legacy C API, declared in 
+ * the theora.h header, which describes the old interface used before
+ * the 1.0 release. This API was widely deployed for several years and
+ * remains supported, but for new code we recommend the cleaner API 
+ * declared in theoradec.h and theoraenc.h.
+ *
+ * libtheora is the reference implementation for
+ * <a href="http://www.theora.org/">Theora</a>, a free video codec.
+ * Theora is derived from On2's VP3 codec with improved integration for
+ * Ogg multimedia formats by <a href="http://www.xiph.org/">Xiph.Org</a>.
+ * 
+ * \section overview Overview
+ *
+ * This library will both decode and encode theora packets to/from raw YUV 
+ * frames.  In either case, the packets will most likely either come from or
+ * need to be embedded in an Ogg stream.  Use 
+ * <a href="http://xiph.org/ogg/">libogg</a> or 
+ * <a href="http://www.annodex.net/software/liboggz/index.html">liboggz</a>
+ * to extract/package these packets.
+ *
+ * \section decoding Decoding Process
+ *
+ * Decoding can be separated into the following steps:
+ * -# initialise theora_info and theora_comment structures using 
+ *    theora_info_init() and theora_comment_init():
+ \verbatim
+ theora_info     info;
+ theora_comment  comment;
+   
+ theora_info_init(&info);
+ theora_comment_init(&comment);
+ \endverbatim
+ * -# retrieve header packets from Ogg stream (there should be 3) and decode 
+ *    into theora_info and theora_comment structures using 
+ *    theora_decode_header().  See \ref identification for more information on 
+ *    identifying which packets are theora packets.
+ \verbatim
+ int i;
+ for (i = 0; i < 3; i++)
+ {
+   (get a theora packet "op" from the Ogg stream)
+   theora_decode_header(&info, &comment, op);
+ }
+ \endverbatim
+ * -# initialise the decoder based on the information retrieved into the
+ *    theora_info struct by theora_decode_header().  You will need a 
+ *    theora_state struct.
+ \verbatim
+ theora_state state;
+ 
+ theora_decode_init(&state, &info);
+ \endverbatim
+ * -# pass in packets and retrieve decoded frames!  See the yuv_buffer 
+ *    documentation for information on how to retrieve raw YUV data.
+ \verbatim
+ yuf_buffer buffer;
+ while (last packet was not e_o_s) {
+   (get a theora packet "op" from the Ogg stream)
+   theora_decode_packetin(&state, op);
+   theora_decode_YUVout(&state, &buffer);
+ }
+ \endverbatim
+ *  
+ *
+ * \subsection identification Identifying Theora Packets
+ *
+ * All streams inside an Ogg file have a unique serial_no attached to the 
+ * stream.  Typically, you will want to 
+ *  - retrieve the serial_no for each b_o_s (beginning of stream) page 
+ *    encountered within the Ogg file; 
+ *  - test the first (only) packet on that page to determine if it is a theora 
+ *    packet;
+ *  - once you have found a theora b_o_s page then use the retrieved serial_no 
+ *    to identify future packets belonging to the same theora stream.
+ * 
+ * Note that you \e cannot use theora_packet_isheader() to determine if a 
+ * packet is a theora packet or not, as this function does not perform any
+ * checking beyond whether a header bit is present.  Instead, use the
+ * theora_decode_header() function and check the return value; or examine the
+ * header bytes at the beginning of the Ogg page.
+ *
+ * \subsection example Example Decoder 
+ *
+ * See <a href="http://svn.xiph.org/trunk/theora/examples/dump_video.c">
+ * examples/dump_video.c</a> for a simple decoder implementation.
+ *
+ * \section encoding Encoding Process
+ *
+ * See <a href="http://svn.xiph.org/trunk/theora/examples/encoder_example.c">
+ * examples/encoder_example.c</a> for a simple encoder implementation.
+ */
+
+/** \file
+ * The libtheora pre-1.0 legacy C API.
+ */
+
+/**
+ * A YUV buffer for passing uncompressed frames to and from the codec.
+ * This holds a Y'CbCr frame in planar format. The CbCr planes can be
+ * subsampled and have their own separate dimensions and row stride
+ * offsets. Note that the strides may be negative in some 
+ * configurations. For theora the width and height of the largest plane
+ * must be a multiple of 16. The actual meaningful picture size and 
+ * offset are stored in the theora_info structure; frames returned by
+ * the decoder may need to be cropped for display.
+ *
+ * All samples are 8 bits. Within each plane samples are ordered by
+ * row from the top of the frame to the bottom. Within each row samples
+ * are ordered from left to right.
+ *
+ * During decode, the yuv_buffer struct is allocated by the user, but all
+ * fields (including luma and chroma pointers) are filled by the library.  
+ * These pointers address library-internal memory and their contents should 
+ * not be modified.
+ *
+ * Conversely, during encode the user allocates the struct and fills out all
+ * fields.  The user also manages the data addressed by the luma and chroma
+ * pointers.  See the encoder_example.c and dump_video.c example files in
+ * theora/examples/ for more information.
+ */
+typedef struct {
+    int   y_width;      /**< Width of the Y' luminance plane */
+    int   y_height;     /**< Height of the luminance plane */
+    int   y_stride;     /**< Offset in bytes between successive rows */
+
+    int   uv_width;     /**< Width of the Cb and Cr chroma planes */
+    int   uv_height;    /**< Height of the chroma planes */
+    int   uv_stride;    /**< Offset between successive chroma rows */
+    unsigned char *y;   /**< Pointer to start of luminance data */
+    unsigned char *u;   /**< Pointer to start of Cb data */
+    unsigned char *v;   /**< Pointer to start of Cr data */
+
+} yuv_buffer;
+
+/**
+ * A Colorspace.
+ */
+typedef enum {
+  OC_CS_UNSPECIFIED,    /**< The colorspace is unknown or unspecified */
+  OC_CS_ITU_REC_470M,   /**< This is the best option for 'NTSC' content */
+  OC_CS_ITU_REC_470BG,  /**< This is the best option for 'PAL' content */
+  OC_CS_NSPACES         /**< This marks the end of the defined colorspaces */
+} theora_colorspace;
+
+/**
+ * A Chroma subsampling
+ *
+ * These enumerate the available chroma subsampling options supported
+ * by the theora format. See Section 4.4 of the specification for
+ * exact definitions.
+ */
+typedef enum {
+  OC_PF_420,    /**< Chroma subsampling by 2 in each direction (4:2:0) */
+  OC_PF_RSVD,   /**< Reserved value */
+  OC_PF_422,    /**< Horizonatal chroma subsampling by 2 (4:2:2) */
+  OC_PF_444,    /**< No chroma subsampling at all (4:4:4) */
+} theora_pixelformat;
+
+/**
+ * Theora bitstream info.
+ * Contains the basic playback parameters for a stream,
+ * corresponding to the initial 'info' header packet.
+ * 
+ * Encoded theora frames must be a multiple of 16 in width and height.
+ * To handle other frame sizes, a crop rectangle is specified in
+ * frame_height and frame_width, offset_x and * offset_y. The offset
+ * and size should still be a multiple of 2 to avoid chroma sampling
+ * shifts. Offset values in this structure are measured from the
+ * upper left of the image.
+ *
+ * Frame rate, in frames per second, is stored as a rational
+ * fraction. Aspect ratio is also stored as a rational fraction, and
+ * refers to the aspect ratio of the frame pixels, not of the
+ * overall frame itself.
+ * 
+ * See <a href="http://svn.xiph.org/trunk/theora/examples/encoder_example.c">
+ * examples/encoder_example.c</a> for usage examples of the
+ * other paramters and good default settings for the encoder parameters.
+ */
+typedef struct {
+  ogg_uint32_t  width;		/**< encoded frame width  */
+  ogg_uint32_t  height;		/**< encoded frame height */
+  ogg_uint32_t  frame_width;	/**< display frame width  */
+  ogg_uint32_t  frame_height;	/**< display frame height */
+  ogg_uint32_t  offset_x;	/**< horizontal offset of the displayed frame */
+  ogg_uint32_t  offset_y;	/**< vertical offset of the displayed frame */
+  ogg_uint32_t  fps_numerator;	    /**< frame rate numerator **/
+  ogg_uint32_t  fps_denominator;    /**< frame rate denominator **/
+  ogg_uint32_t  aspect_numerator;   /**< pixel aspect ratio numerator */
+  ogg_uint32_t  aspect_denominator; /**< pixel aspect ratio denominator */
+  theora_colorspace colorspace;	    /**< colorspace */
+  int           target_bitrate;	    /**< nominal bitrate in bits per second */
+  int           quality;  /**< Nominal quality setting, 0-63 */
+  int           quick_p;  /**< Quick encode/decode */
+
+  /* decode only */
+  unsigned char version_major;
+  unsigned char version_minor;
+  unsigned char version_subminor;
+
+  void *codec_setup;
+
+  /* encode only */
+  int           dropframes_p;
+  int           keyframe_auto_p;
+  ogg_uint32_t  keyframe_frequency;
+  ogg_uint32_t  keyframe_frequency_force;  /* also used for decode init to
+                                              get granpos shift correct */
+  ogg_uint32_t  keyframe_data_target_bitrate;
+  ogg_int32_t   keyframe_auto_threshold;
+  ogg_uint32_t  keyframe_mindistance;
+  ogg_int32_t   noise_sensitivity;
+  ogg_int32_t   sharpness;
+
+  theora_pixelformat pixelformat;	/**< chroma subsampling mode to expect */
+
+} theora_info;
+
+/** Codec internal state and context.
+ */
+typedef struct{
+  theora_info *i;
+  ogg_int64_t granulepos;
+
+  void *internal_encode;
+  void *internal_decode;
+
+} theora_state;
+
+/** 
+ * Comment header metadata.
+ *
+ * This structure holds the in-stream metadata corresponding to
+ * the 'comment' header packet.
+ *
+ * Meta data is stored as a series of (tag, value) pairs, in
+ * length-encoded string vectors. The first occurence of the 
+ * '=' character delimits the tag and value. A particular tag
+ * may occur more than once. The character set encoding for
+ * the strings is always UTF-8, but the tag names are limited
+ * to case-insensitive ASCII. See the spec for details.
+ *
+ * In filling in this structure, theora_decode_header() will
+ * null-terminate the user_comment strings for safety. However,
+ * the bitstream format itself treats them as 8-bit clean,
+ * and so the length array should be treated as authoritative
+ * for their length.
+ */
+typedef struct theora_comment{
+  char **user_comments;         /**< An array of comment string vectors */
+  int   *comment_lengths;       /**< An array of corresponding string vector lengths in bytes */
+  int    comments;              /**< The total number of comment string vectors */
+  char  *vendor;                /**< The vendor string identifying the encoder, null terminated */
+
+} theora_comment;
+
+
+/**\name theora_control() codes */
+
+/**\anchor decctlcodes
+ * These are the available request codes for theora_control()
+ * when called with a decoder instance.
+ * By convention, these are odd, to distinguish them from the
+ *  \ref encctlcodes "encoder control codes".
+ * Keep any experimental or vendor-specific values above \c 0x8000.*/
+
+/**Get the maximum post-processing level.
+ * The decoder supports a post-processing filter that can improve
+ * the appearance of the decoded images. This returns the highest
+ * level setting for this post-processor, corresponding to maximum
+ * improvement and computational expense.
+ */
+#define TH_DECCTL_GET_PPLEVEL_MAX (1)
+
+/**Set the post-processing level.
+ * Sets the level of post-processing to use when decoding the 
+ * compressed stream. This must be a value between zero (off)
+ * and the maximum returned by TH_DECCTL_GET_PPLEVEL_MAX.
+ */
+#define TH_DECCTL_SET_PPLEVEL (3)
+
+/**Set the granule position.
+ * Call this after a seek, to update the internal granulepos
+ * in the decoder, to insure that subsequent frames are marked
+ * properly. If you track timestamps yourself and do not use
+ * the granule postion returned by the decoder, then you do
+ * not need to use this control.
+ */
+#define TH_DECCTL_SET_GRANPOS (5)
+
+
+/**\anchor encctlcodes
+ * These are the available request codes for theora_control()
+ * when called with an encoder instance.
+ * By convention, these are even, to distinguish them from the
+ *  \ref decctlcodes "decoder control codes".
+ * Keep any experimental or vendor-specific values above \c 0x8000.*/
+/*@{*/
+/**Sets the Huffman tables to use.
+ * The tables are copied, not stored by reference, so they can be freed after
+ *  this call.
+ * <tt>NULL</tt> may be specified to revert to the default tables.
+ *
+ * \param[in] buf <tt>#th_huff_code[#TH_NHUFFMAN_TABLES][#TH_NDCT_TOKENS]</tt>
+ * \retval TH_FAULT  \a theora_state is <tt>NULL</tt>.
+ * \retval TH_EINVAL Encoding has already begun or one or more of the given
+ *                     tables is not full or prefix-free, \a buf is
+ *                     <tt>NULL</tt> and \a buf_sz is not zero, or \a buf is
+ *                     non-<tt>NULL</tt> and \a buf_sz is not
+ *                     <tt>sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS</tt>.
+ * \retval TH_IMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_HUFFMAN_CODES (0)
+/**Sets the quantization parameters to use.
+ * The parameters are copied, not stored by reference, so they can be freed
+ *  after this call.
+ * <tt>NULL</tt> may be specified to revert to the default parameters.
+ * For the current encoder, <tt>scale[ci!=0][qi]</tt> must be no greater than
+ *  <tt>scale[ci!=0][qi-1]</tt> and <tt>base[qti][pli][qi][ci]</tt> must be no
+ *  greater than <tt>base[qti][pli][qi-1][ci]</tt>.
+ * These two conditions ensure that the actual quantizer for a given \a qti,
+ *  \a pli, and \a ci does not increase as \a qi increases.
+ *
+ * \param[in] buf #th_quant_info
+ * \retval TH_FAULT  \a theora_state is <tt>NULL</tt>.
+ * \retval TH_EINVAL Encoding has already begun, the quantization parameters
+ *                    do not meet one of the above stated conditions, \a buf
+ *                    is <tt>NULL</tt> and \a buf_sz is not zero, or \a buf
+ *                    is non-<tt>NULL</tt> and \a buf_sz is not
+ *                    <tt>sizeof(#th_quant_info)</tt>.
+ * \retval TH_IMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_QUANT_PARAMS (2)
+/**Sets the maximum distance between key frames.
+ * This can be changed during an encode, but will be bounded by
+ *  <tt>1<<th_info#keyframe_granule_shift</tt>.
+ * If it is set before encoding begins, th_info#keyframe_granule_shift will
+ *  be enlarged appropriately.
+ *
+ * \param[in]  buf <tt>ogg_uint32_t</tt>: The maximum distance between key
+ *                   frames.
+ * \param[out] buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
+ * \retval TH_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
+ * \retval TH_IMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
+/**Disables any encoder features that would prevent lossless transcoding back
+ *  to VP3.
+ * This primarily means disabling block-level QI values and not using 4MV mode
+ *  when any of the luma blocks in a macro block are not coded.
+ * It also includes using the VP3 quantization tables and Huffman codes; if you
+ *  set them explicitly after calling this function, the resulting stream will
+ *  not be VP3-compatible.
+ * If you enable VP3-compatibility when encoding 4:2:2 or 4:4:4 source
+ *  material, or when using a picture region smaller than the full frame (e.g.
+ *  a non-multiple-of-16 width or height), then non-VP3 bitstream features will
+ *  still be disabled, but the stream will still not be VP3-compatible, as VP3
+ *  was not capable of encoding such formats.
+ * If you call this after encoding has already begun, then the quantization
+ *  tables and codebooks cannot be changed, but the frame-level features will
+ *  be enabled or disabled as requested.
+ *
+ * \param[in]  buf <tt>int</tt>: a non-zero value to enable VP3 compatibility,
+ *                   or 0 to disable it (the default).
+ * \param[out] buf <tt>int</tt>: 1 if all bitstream features required for
+ *                   VP3-compatibility could be set, and 0 otherwise.
+ *                  The latter will be returned if the pixel format is not
+ *                   4:2:0, the picture region is smaller than the full frame,
+ *                   or if encoding has begun, preventing the quantization
+ *                   tables and codebooks from being set.
+ * \retval TH_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval TH_IMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SET_VP3_COMPATIBLE (10)
+/**Gets the maximum speed level.
+ * Higher speed levels favor quicker encoding over better quality per bit.
+ * Depending on the encoding mode, and the internal algorithms used, quality
+ *  may actually improve, but in this case bitrate will also likely increase.
+ * In any case, overall rate/distortion performance will probably decrease.
+ * The maximum value, and the meaning of each value, may change depending on
+ *  the current encoding mode (VBR vs. CQI, etc.).
+ *
+ * \param[out] buf int: The maximum encoding speed level.
+ * \retval TH_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval TH_IMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_GET_SPLEVEL_MAX (12)
+/**Sets the speed level.
+ * By default a speed value of 1 is used.
+ *
+ * \param[in] buf int: The new encoding speed level.
+ *                      0 is slowest, larger values use less CPU.
+ * \retval TH_FAULT  \a theora_state or \a buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a buf_sz is not <tt>sizeof(int)</tt>, or the
+ *                    encoding speed level is out of bounds.
+ *                   The maximum encoding speed level may be
+ *                    implementation- and encoding mode-specific, and can be
+ *                    obtained via #TH_ENCCTL_GET_SPLEVEL_MAX.
+ * \retval TH_IMPL   Not supported by this implementation in the current
+ *                    encoding mode.*/
+#define TH_ENCCTL_SET_SPLEVEL (14)
+/**Puts the encoder in VBR mode.
+ * This can be done at any time during the encoding process, with different
+ *  configuration parameters, to encode different regions of the video segment
+ *  with different qualities.
+ * See the #th_info struct documentation for details on how the default
+ *  encoding mode is chosen.
+ *
+ * \param[in] buf <tt>#th_vbr_cfg</tt>: the configuration parameters.
+ *                 This may be <tt>NULL</tt>, in which case the current VBR
+ *                  configuration is unchanged.
+ *                 The default is to use the QI setting passed in via the
+ *                  #th_info struct when the encoder was initialized, with a
+ *                  full range of admissible quantizers.
+ * \retval OC_EFAULT \a theora_state is <tt>NULL</tt>.
+ * \retval TH_EINVAL The configuration parameters do not meet one of their
+ *                    stated requirements, \a buf is <tt>NULL</tt> and
+ *                    \a buf_sz is not zero, or \a buf is non-<tt>NULL</tt>
+ *                    and \a buf_sz is not <tt>sizeof(#th_vbr_cfg)</tt>.
+ * \retval TH_IMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SETUP_VBR (16)
+/**Puts the encoder in CQI mode.
+ * This can be done at any time during the encoding process, with different QI
+ *  values.
+ * See the #th_info struct documentation for details on how the default
+ *  encoding mode is chosen.
+ *
+ * \param[in] buf <tt>#th_cqi_cfg</tt>: the configuration parameters.
+ *                 This may be <tt>NULL</tt>, in which case the current CQI
+ *                  configuration is unchanged.
+ *                 The default is to use the QI setting passed in via the
+ *                  #th_info struct when the encoder was initialized.
+ * \retval OC_EFAULT \a theora_state is <tt>NULL</tt>.
+ * \retval TH_EINVAL \a buf_sz is not <tt>sizeof(#th_cqi_cfg)</tt>.
+ * \retval TH_IMPL   Not supported by this implementation.*/
+#define TH_ENCCTL_SETUP_CQI (18)
+/*@}*/
+
+#define OC_FAULT       -1       /**< General failure */
+#define OC_EINVAL      -10      /**< Library encountered invalid internal data */
+#define OC_DISABLED    -11      /**< Requested action is disabled */
+#define OC_BADHEADER   -20      /**< Header packet was corrupt/invalid */
+#define OC_NOTFORMAT   -21      /**< Packet is not a theora packet */
+#define OC_VERSION     -22      /**< Bitstream version is not handled */
+#define OC_IMPL        -23      /**< Feature or action not implemented */
+#define OC_BADPACKET   -24      /**< Packet is corrupt */
+#define OC_NEWPACKET   -25      /**< Packet is an (ignorable) unhandled extension */
+#define OC_DUPFRAME    1        /**< Packet is a dropped frame */
+
+/** 
+ * Retrieve a human-readable string to identify the encoder vendor and version.
+ * \returns A version string.
+ */
+extern const char *theora_version_string(void);
+
+/**
+ * Retrieve a 32-bit version number.
+ * This number is composed of a 16-bit major version, 8-bit minor version
+ * and 8 bit sub-version, composed as follows:
+<pre>
+   (VERSION_MAJOR<<16) + (VERSION_MINOR<<8) + (VERSION_SUB)
+</pre>
+* \returns The version number.
+*/
+extern ogg_uint32_t theora_version_number(void);
+
+/**
+ * Initialize the theora encoder.
+ * \param th The theora_state handle to initialize for encoding.
+ * \param ti A theora_info struct filled with the desired encoding parameters.
+ * \retval 0 Success
+ */
+extern int theora_encode_init(theora_state *th, theora_info *ti);
+
+/**
+ * Submit a YUV buffer to the theora encoder.
+ * \param t A theora_state handle previously initialized for encoding.
+ * \param yuv A buffer of YUV data to encode.  Note that both the yuv_buffer
+ *            struct and the luma/chroma buffers within should be allocated by
+ *            the user.
+ * \retval OC_EINVAL Encoder is not ready, or is finished.
+ * \retval -1 The size of the given frame differs from those previously input
+ * \retval 0 Success
+ */
+extern int theora_encode_YUVin(theora_state *t, yuv_buffer *yuv);
+
+/**
+ * Request the next packet of encoded video. 
+ * The encoded data is placed in a user-provided ogg_packet structure.
+ * \param t A theora_state handle previously initialized for encoding.
+ * \param last_p whether this is the last packet the encoder should produce.
+ * \param op An ogg_packet structure to fill. libtheora will set all
+ *           elements of this structure, including a pointer to encoded
+ *           data. The memory for the encoded data is owned by libtheora.
+ * \retval 0 No internal storage exists OR no packet is ready
+ * \retval -1 The encoding process has completed
+ * \retval 1 Success
+ */
+extern int theora_encode_packetout( theora_state *t, int last_p,
+                                    ogg_packet *op);
+
+/**
+ * Request a packet containing the initial header.
+ * A pointer to the header data is placed in a user-provided ogg_packet
+ * structure.
+ * \param t A theora_state handle previously initialized for encoding.
+ * \param op An ogg_packet structure to fill. libtheora will set all
+ *           elements of this structure, including a pointer to the header
+ *           data. The memory for the header data is owned by libtheora.
+ * \retval 0 Success
+ */
+extern int theora_encode_header(theora_state *t, ogg_packet *op);
+
+/**
+ * Request a comment header packet from provided metadata.
+ * A pointer to the comment data is placed in a user-provided ogg_packet
+ * structure.
+ * \param tc A theora_comment structure filled with the desired metadata
+ * \param op An ogg_packet structure to fill. libtheora will set all
+ *           elements of this structure, including a pointer to the encoded
+ *           comment data. The memory for the comment data is owned by
+ *           libtheora.
+ * \retval 0 Success
+ */
+extern int theora_encode_comment(theora_comment *tc, ogg_packet *op);
+
+/**
+ * Request a packet containing the codebook tables for the stream.
+ * A pointer to the codebook data is placed in a user-provided ogg_packet
+ * structure.
+ * \param t A theora_state handle previously initialized for encoding.
+ * \param op An ogg_packet structure to fill. libtheora will set all
+ *           elements of this structure, including a pointer to the codebook
+ *           data. The memory for the header data is owned by libtheora.
+ * \retval 0 Success
+ */
+extern int theora_encode_tables(theora_state *t, ogg_packet *op);
+
+/**
+ * Decode an Ogg packet, with the expectation that the packet contains
+ * an initial header, comment data or codebook tables.
+ *
+ * \param ci A theora_info structure to fill. This must have been previously
+ *           initialized with theora_info_init(). If \a op contains an initial
+ *           header, theora_decode_header() will fill \a ci with the
+ *           parsed header values. If \a op contains codebook tables,
+ *           theora_decode_header() will parse these and attach an internal
+ *           representation to \a ci->codec_setup.
+ * \param cc A theora_comment structure to fill. If \a op contains comment
+ *           data, theora_decode_header() will fill \a cc with the parsed
+ *           comments.
+ * \param op An ogg_packet structure which you expect contains an initial
+ *           header, comment data or codebook tables.
+ *
+ * \retval OC_BADHEADER \a op is NULL; OR the first byte of \a op->packet
+ *                      has the signature of an initial packet, but op is
+ *                      not a b_o_s packet; OR this packet has the signature
+ *                      of an initial header packet, but an initial header
+ *                      packet has already been seen; OR this packet has the
+ *                      signature of a comment packet, but the initial header
+ *                      has not yet been seen; OR this packet has the signature
+ *                      of a comment packet, but contains invalid data; OR
+ *                      this packet has the signature of codebook tables,
+ *                      but the initial header or comments have not yet
+ *                      been seen; OR this packet has the signature of codebook
+ *                      tables, but contains invalid data;
+ *                      OR the stream being decoded has a compatible version
+ *                      but this packet does not have the signature of a
+ *                      theora initial header, comments, or codebook packet
+ * \retval OC_VERSION   The packet data of \a op is an initial header with
+ *                      a version which is incompatible with this version of
+ *                      libtheora.
+ * \retval OC_NEWPACKET the stream being decoded has an incompatible (future)
+ *                      version and contains an unknown signature.
+ * \retval 0            Success
+ *
+ * \note The normal usage is that theora_decode_header() be called on the
+ *       first three packets of a theora logical bitstream in succession.
+ */
+extern int theora_decode_header(theora_info *ci, theora_comment *cc,
+                                ogg_packet *op);
+
+/**
+ * Initialize a theora_state handle for decoding.
+ * \param th The theora_state handle to initialize.
+ * \param c  A theora_info struct filled with the desired decoding parameters.
+ *           This is of course usually obtained from a previous call to
+ *           theora_decode_header().
+ * \retval 0 Success
+ */
+extern int theora_decode_init(theora_state *th, theora_info *c);
+
+/**
+ * Input a packet containing encoded data into the theora decoder.
+ * \param th A theora_state handle previously initialized for decoding.
+ * \param op An ogg_packet containing encoded theora data.
+ * \retval 0 Success
+ * \retval OC_BADPACKET \a op does not contain encoded video data
+ */
+extern int theora_decode_packetin(theora_state *th,ogg_packet *op);
+
+/**
+ * Output the next available frame of decoded YUV data.
+ * \param th A theora_state handle previously initialized for decoding.
+ * \param yuv A yuv_buffer in which libtheora should place the decoded data.
+ *            Note that the buffer struct itself is allocated by the user, but
+ *            that the luma and chroma pointers will be filled in by the 
+ *            library.  Also note that these luma and chroma regions should be 
+ *            considered read-only by the user.
+ * \retval 0 Success
+ */
+extern int theora_decode_YUVout(theora_state *th,yuv_buffer *yuv);
+
+/**
+ * Report whether a theora packet is a header or not
+ * This function does no verification beyond checking the header
+ * flag bit so it should not be used for bitstream identification;
+ * use theora_decode_header() for that.
+ *
+ * \param op An ogg_packet containing encoded theora data.
+ * \retval 1 The packet is a header packet
+ * \retval 0 The packet is not a header packet (and so contains frame data)
+ *
+ * Thus function was added in the 1.0alpha4 release.
+ */
+extern int theora_packet_isheader(ogg_packet *op);
+
+/**
+ * Report whether a theora packet is a keyframe or not
+ *
+ * \param op An ogg_packet containing encoded theora data.
+ * \retval 1 The packet contains a keyframe image
+ * \retval 0 The packet is contains an interframe delta
+ * \retval -1 The packet is not an image data packet at all
+ *
+ * Thus function was added in the 1.0alpha4 release.
+ */
+extern int theora_packet_iskeyframe(ogg_packet *op);
+
+/**
+ * Report the granulepos shift radix
+ *
+ * When embedded in Ogg, Theora uses a two-part granulepos, 
+ * splitting the 64-bit field into two pieces. The more-significant
+ * section represents the frame count at the last keyframe,
+ * and the less-significant section represents the count of
+ * frames since the last keyframe. In this way the overall
+ * field is still non-decreasing with time, but usefully encodes
+ * a pointer to the last keyframe, which is necessary for
+ * correctly restarting decode after a seek. 
+ *
+ * This function reports the number of bits used to represent
+ * the distance to the last keyframe, and thus how the granulepos
+ * field must be shifted or masked to obtain the two parts.
+ * 
+ * Since libtheora returns compressed data in an ogg_packet
+ * structure, this may be generally useful even if the Theora
+ * packets are not being used in an Ogg container. 
+ *
+ * \param ti A previously initialized theora_info struct
+ * \returns The bit shift dividing the two granulepos fields
+ *
+ * This function was added in the 1.0alpha5 release.
+ */
+int theora_granule_shift(theora_info *ti);
+
+/**
+ * Convert a granulepos to an absolute frame index, starting at 0.
+ * The granulepos is interpreted in the context of a given theora_state handle.
+ * 
+ * Note that while the granulepos encodes the frame count (i.e. starting
+ * from 1) this call returns the frame index, starting from zero. Thus
+ * One can calculate the presentation time by multiplying the index by
+ * the rate.
+ *
+ * \param th A previously initialized theora_state handle (encode or decode)
+ * \param granulepos The granulepos to convert.
+ * \returns The frame index corresponding to \a granulepos.
+ * \retval -1 The given granulepos is undefined (i.e. negative)
+ *
+ * Thus function was added in the 1.0alpha4 release.
+ */
+extern ogg_int64_t theora_granule_frame(theora_state *th,ogg_int64_t granulepos);
+
+/**
+ * Convert a granulepos to absolute time in seconds. The granulepos is
+ * interpreted in the context of a given theora_state handle, and gives
+ * the end time of a frame's presentation as used in Ogg mux ordering.
+ *
+ * \param th A previously initialized theora_state handle (encode or decode)
+ * \param granulepos The granulepos to convert.
+ * \returns The absolute time in seconds corresponding to \a granulepos.
+ *          This is the "end time" for the frame, or the latest time it should
+ *           be displayed.
+ *          It is not the presentation time.
+ * \retval -1. The given granulepos is undefined (i.e. negative), or
+ * \retval -1. The function has been disabled because floating 
+ *              point support is not available.
+ */
+extern double theora_granule_time(theora_state *th,ogg_int64_t granulepos);
+
+/**
+ * Initialize a theora_info structure. All values within the given theora_info
+ * structure are initialized, and space is allocated within libtheora for
+ * internal codec setup data.
+ * \param c A theora_info struct to initialize.
+ */
+extern void theora_info_init(theora_info *c);
+
+/**
+ * Clear a theora_info structure. All values within the given theora_info
+ * structure are cleared, and associated internal codec setup data is freed.
+ * \param c A theora_info struct to initialize.
+ */
+extern void theora_info_clear(theora_info *c);
+
+/**
+ * Free all internal data associated with a theora_state handle.
+ * \param t A theora_state handle.
+ */
+extern void theora_clear(theora_state *t);
+
+/**
+ * Initialize an allocated theora_comment structure
+ * \param tc An allocated theora_comment structure 
+ **/
+extern void theora_comment_init(theora_comment *tc);
+
+/**
+ * Add a comment to an initialized theora_comment structure
+ * \param tc A previously initialized theora comment structure
+ * \param comment A null-terminated string encoding the comment in the form
+ *                "TAG=the value"
+ *
+ * Neither theora_comment_add() nor theora_comment_add_tag() support
+ * comments containing null values, although the bitstream format
+ * supports this. To add such comments you will need to manipulate
+ * the theora_comment structure directly.
+ **/
+
+extern void theora_comment_add(theora_comment *tc, char *comment);
+
+/**
+ * Add a comment to an initialized theora_comment structure.
+ * \param tc A previously initialized theora comment structure
+ * \param tag A null-terminated string containing the tag 
+ *            associated with the comment.
+ * \param value The corresponding value as a null-terminated string
+ *
+ * Neither theora_comment_add() nor theora_comment_add_tag() support
+ * comments containing null values, although the bitstream format
+ * supports this. To add such comments you will need to manipulate
+ * the theora_comment structure directly.
+ **/
+extern void theora_comment_add_tag(theora_comment *tc,
+                                       char *tag, char *value);
+
+/**
+ * Look up a comment value by tag.
+ * \param tc Tn initialized theora_comment structure
+ * \param tag The tag to look up
+ * \param count The instance of the tag. The same tag can appear multiple
+ *              times, each with a distinct and ordered value, so an index
+ *              is required to retrieve them all.
+ * \returns A pointer to the queried tag's value
+ * \retval NULL No matching tag is found
+ *
+ * \note Use theora_comment_query_count() to get the legal range for the
+ * count parameter.
+ **/
+
+extern char *theora_comment_query(theora_comment *tc, char *tag, int count);
+
+/** Look up the number of instances of a tag.
+ *  \param tc An initialized theora_comment structure
+ *  \param tag The tag to look up
+ *  \returns The number on instances of a particular tag.
+ * 
+ *  Call this first when querying for a specific tag and then interate
+ *  over the number of instances with separate calls to 
+ *  theora_comment_query() to retrieve all instances in order.
+ **/
+extern int   theora_comment_query_count(theora_comment *tc, char *tag);
+
+/**
+ * Clear an allocated theora_comment struct so that it can be freed.
+ * \param tc An allocated theora_comment structure.
+ **/
+extern void  theora_comment_clear(theora_comment *tc);
+
+/**Encoder control function.
+ * This is used to provide advanced control the encoding process.
+ * \param th     A #theora_state handle.
+ * \param req    The control code to process.
+ *                See \ref encctlcodes "the list of available control codes"
+ *                 for details.
+ * \param buf    The parameters for this control code.
+ * \param buf_sz The size of the parameter buffer.*/
+extern int theora_control(theora_state *th,int req,void *buf,size_t buf_sz);
+
+/* @} */ /* end oldfuncs doxygen group */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _O_THEORA_H_ */
--- a/modules/libtheora/include/theora/theoradec.h
+++ b/modules/libtheora/include/theora/theoradec.h
@ -0,0 +1,290 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: theora.h,v 1.8 2004/03/15 22:17:32 derf Exp $
+
+ ********************************************************************/
+
+/**\file
+ * The <tt>libtheoradec</tt> C decoding API.*/
+
+#if !defined(_O_THEORA_THEORADEC_H_)
+# define _O_THEORA_THEORADEC_H_ (1)
+# include <ogg/ogg.h>
+# include "codec.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+
+/**\name th_decode_ctl() codes
+ * \anchor decctlcodes
+ * These are the available request codes for th_decode_ctl().
+ * By convention, these are odd, to distinguish them from the
+ *  \ref encctlcodes "encoder control codes".
+ * Keep any experimental or vendor-specific values above \c 0x8000.*/
+/*@{*/
+/**Gets the maximum post-processing level.
+ *
+ * \param[out] _buf int: The maximum post-processing level.
+ * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL  \a _buf_sz is not <tt>sizeof(int)</tt>.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_DECCTL_GET_PPLEVEL_MAX (1)
+/**Sets the post-processing level.
+ * By default, post-processing is disabled.
+ *
+ * \param[in] _buf int: The new post-processing level.
+ *                      0 to disable; larger values use more CPU.
+ * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL  \a _buf_sz is not <tt>sizeof(int)</tt>, or the
+ *                     post-processing level is out of bounds.
+ *                    The maximum post-processing level may be
+ *                     implementation-specific, and can be obtained via
+ *                     #TH_DECCTL_GET_PPLEVEL_MAX.
+ * \retval TH_EIMPL   Not supported by this implementation.*/
+#define TH_DECCTL_SET_PPLEVEL (3)
+/**Sets the granule position.
+ * Call this after a seek, before decoding the first frame, to ensure that the
+ *  proper granule position is returned for all subsequent frames.
+ * If you track timestamps yourself and do not use the granule position
+ *  returned by the decoder, then you need not call this function.
+ *
+ * \param[in] _buf <tt>ogg_int64_t</tt>: The granule position of the next
+ *                  frame.
+ * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL  \a _buf_sz is not <tt>sizeof(ogg_int64_t)</tt>, or the
+ *                     granule position is negative.*/
+#define TH_DECCTL_SET_GRANPOS (5)
+/**Sets the striped decode callback function.
+ * If set, this function will be called as each piece of a frame is fully
+ *  decoded in th_decode_packetin().
+ * You can pass in a #th_stripe_callback with
+ *  th_stripe_callback#stripe_decoded set to <tt>NULL</tt> to disable the
+ *  callbacks at any point.
+ * Enabling striped decode does not prevent you from calling
+ *  th_decode_ycbcr_out() after the frame is fully decoded.
+ *
+ * \param[in]  _buf #th_stripe_callback: The callback parameters.
+ * \retval TH_EFAULT  \a _dec_ctx or \a _buf is <tt>NULL</tt>.
+ * \retval TH_EINVAL  \a _buf_sz is not
+ *                     <tt>sizeof(th_stripe_callback)</tt>.*/
+#define TH_DECCTL_SET_STRIPE_CB (7)
+/*@}*/
+
+
+
+/**A callback function for striped decode.
+ * This is a function pointer to an application-provided function that will be
+ *  called each time a section of the image is fully decoded in
+ *  th_decode_packetin().
+ * This allows the application to process the section immediately, while it is
+ *  still in cache.
+ * Note that the frame is decoded bottom to top, so \a _yfrag0 will steadily
+ *  decrease with each call until it reaches 0, at which point the full frame
+ *  is decoded.
+ * The number of fragment rows made available in each call depends on the pixel
+ *  format and the number of post-processing filters enabled, and may not even
+ *  be constant for the entire frame.
+ * If a non-<tt>NULL</tt> \a _granpos pointer is passed to
+ *  th_decode_packetin(), the granule position for the frame will be stored
+ *  in it before the first callback is made.
+ * If an entire frame is dropped (a 0-byte packet), then no callbacks will be
+ *  made at all for that frame.
+ * \param _ctx       An application-provided context pointer.
+ * \param _buf       The image buffer for the decoded frame.
+ * \param _yfrag0    The Y coordinate of the first row of 8x8 fragments
+ *                    decoded.
+ *                   Multiply this by 8 to obtain the pixel row number in the
+ *                    luma plane.
+ *                   If the chroma planes are subsampled in the Y direction,
+ *                    this will always be divisible by two.
+ * \param _yfrag_end The Y coordinate of the first row of 8x8 fragments past
+ *                    the newly decoded section.
+ *                   If the chroma planes are subsampled in the Y direction,
+ *                    this will always be divisible by two.
+ *                   I.e., this section contains fragment rows
+ *                    <tt>\a _yfrag0 ...\a _yfrag_end -1</tt>.*/
+typedef void (*th_stripe_decoded_func)(void *_ctx,th_ycbcr_buffer _buf,
+ int _yfrag0,int _yfrag_end);
+
+/**The striped decode callback data to pass to #TH_DECCTL_SET_STRIPE_CB.*/
+typedef struct{
+  /**An application-provided context pointer.
+   * This will be passed back verbatim to the application.*/
+  void                   *ctx;
+  /**The callback function pointer.*/
+  th_stripe_decoded_func  stripe_decoded;
+}th_stripe_callback;
+
+
+
+/**\name Decoder state
+   The following data structures are opaque, and their contents are not
+    publicly defined by this API.
+   Referring to their internals directly is unsupported, and may break without
+    warning.*/
+/*@{*/
+/**The decoder context.*/
+typedef struct th_dec_ctx    th_dec_ctx;
+/**Setup information.
+   This contains auxiliary information (Huffman tables and quantization
+    parameters) decoded from the setup header by th_decode_headerin() to be
+    passed to th_decode_alloc().
+   It can be re-used to initialize any number of decoders, and can be freed
+    via th_setup_free() at any time.*/
+typedef struct th_setup_info th_setup_info;
+/*@}*/
+
+
+
+/**\defgroup decfuncs Functions for Decoding*/
+/*@{*/
+/**\name Functions for decoding
+ * You must link to <tt>libtheoradec</tt> if you use any of the 
+ * functions in this section.
+ *
+ * The functions are listed in the order they are used in a typical decode.
+ * The basic steps are:
+ * - Parse the header packets by repeatedly calling th_decode_headerin().
+ * - Allocate a #th_dec_ctx handle with th_decode_alloc().
+ * - Call th_setup_free() to free any memory used for codec setup
+ *    information.
+ * - Perform any additional decoder configuration with th_decode_ctl().
+ * - For each video data packet:
+ *   - Submit the packet to the decoder via th_decode_packetin().
+ *   - Retrieve the uncompressed video data via th_decode_ycbcr_out().
+ * - Call th_decode_free() to release all decoder memory.*/
+/*@{*/
+/**Decodes the header packets of a Theora stream.
+ * This should be called on the initial packets of the stream, in succession,
+ *  until it returns <tt>0</tt>, indicating that all headers have been
+ *  processed, or an error is encountered.
+ * At least three header packets are required, and additional optional header
+ *  packets may follow.
+ * This can be used on the first packet of any logical stream to determine if
+ *  that stream is a Theora stream.
+ * \param _info  A #th_info structure to fill in.
+ *               This must have been previously initialized with
+ *                th_info_init().
+ *               The application may immediately begin using the contents of
+ *                this structure after the first header is decoded, though it
+ *                must continue to be passed in on all subsequent calls.
+ * \param _tc    A #th_comment structure to fill in.
+ *               The application may immediately begin using the contents of
+ *                this structure after the second header is decoded, though it
+ *                must continue to be passed in on all subsequent calls.
+ * \param _setup Returns a pointer to additional, private setup information
+ *                needed by the decoder.
+ *               The contents of this pointer must be initialized to
+ *                <tt>NULL</tt> on the first call, and the returned value must
+ *                continue to be passed in on all subsequent calls.
+ * \param _op    An <tt>ogg_packet</tt> structure which contains one of the
+ *                initial packets of an Ogg logical stream.
+ * \return A positive value indicates that a Theora header was successfully
+ *          processed.
+ * \retval 0             The first video data packet was encountered after all
+ *                        required header packets were parsed.
+ *                       The packet just passed in on this call should be saved
+ *                        and fed to th_decode_packetin() to begin decoding
+ *                        video data.
+ * \retval TH_EFAULT     One of \a _info, \a _tc, or \a _setup was
+ *                        <tt>NULL</tt>.
+ * \retval TH_EBADHEADER \a _op was <tt>NULL</tt>, the packet was not the next
+ *                        header packet in the expected sequence, or the format
+ *                        of the header data was invalid.
+ * \retval TH_EVERSION   The packet data was a Theora info header, but for a
+ *                        bitstream version not decodable with this version of
+ *                        <tt>libtheoradec</tt>.
+ * \retval TH_ENOTFORMAT The packet was not a Theora header.
+ */
+extern int th_decode_headerin(th_info *_info,th_comment *_tc,
+ th_setup_info **_setup,ogg_packet *_op);
+/**Allocates a decoder instance.
+ * \param _info  A #th_info struct filled via th_decode_headerin().
+ * \param _setup A #th_setup_info handle returned via
+ *                th_decode_headerin().
+ * \return The initialized #th_dec_ctx handle.
+ * \retval NULL If the decoding parameters were invalid.*/
+extern th_dec_ctx *th_decode_alloc(const th_info *_info,
+ const th_setup_info *_setup);
+/**Releases all storage used for the decoder setup information.
+ * This should be called after you no longer want to create any decoders for
+ *  a stream whose headers you have parsed with th_decode_headerin().
+ * \param _setup The setup information to free.
+ *               This can safely be <tt>NULL</tt>.*/
+extern void th_setup_free(th_setup_info *_setup);
+/**Decoder control function.
+ * This is used to provide advanced control of the decoding process.
+ * \param _dec    A #th_dec_ctx handle.
+ * \param _req    The control code to process.
+ *                See \ref decctlcodes "the list of available control codes"
+ *                 for details.
+ * \param _buf    The parameters for this control code.
+ * \param _buf_sz The size of the parameter buffer.*/
+extern int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
+ size_t _buf_sz);
+/**Submits a packet containing encoded video data to the decoder.
+ * \param _dec     A #th_dec_ctx handle.
+ * \param _op      An <tt>ogg_packet</tt> containing encoded video data.
+ * \param _granpos Returns the granule position of the decoded packet.
+ *                 If non-<tt>NULL</tt>, the granule position for this specific
+ *                  packet is stored in this location.
+ *                 This is computed incrementally from previously decoded
+ *                  packets.
+ *                 After a seek, the correct granule position must be set via
+ *                  #TH_DECCTL_SET_GRANPOS for this to work properly.
+ * \retval 0             Success.
+ *                       A new decoded frame can be retrieved by calling
+ *                        th_decode_ycbcr_out().
+ * \retval TH_DUPFRAME   The packet represented a dropped (0-byte) frame.
+ *                       The player can skip the call to th_decode_ycbcr_out(),
+ *                        as the contents of the decoded frame buffer have not
+ *                        changed.
+ * \retval TH_EFAULT     \a _dec or _op was <tt>NULL</tt>.
+ * \retval TH_EBADPACKET \a _op does not contain encoded video data.
+ * \retval TH_EIMPL      The video data uses bitstream features which this
+ *                        library does not support.*/
+extern int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
+ ogg_int64_t *_granpos);
+/**Outputs the next available frame of decoded Y'CbCr data.
+ * If a striped decode callback has been set with #TH_DECCTL_SET_STRIPE_CB,
+ *  then the application does not need to call this function.
+ * \param _dec   A #th_dec_ctx handle.
+ * \param _ycbcr A video buffer structure to fill in.
+ *               <tt>libtheoradec</tt> will fill in all the members of this
+ *                structure, including the pointers to the uncompressed video
+ *                data.
+ *               The memory for this video data is owned by
+ *                <tt>libtheoradec</tt>.
+ *               It may be freed or overwritten without notification when
+ *                subsequent frames are decoded.
+ * \retval 0 Success
+ */
+extern int th_decode_ycbcr_out(th_dec_ctx *_dec,
+ th_ycbcr_buffer _ycbcr);
+/**Frees an allocated decoder instance.
+ * \param _dec A #th_dec_ctx handle.*/
+extern void th_decode_free(th_dec_ctx *_dec);
+/*@}*/
+/*@}*/
+
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
--- a/modules/libtheora/lib/Makefile.in
+++ b/modules/libtheora/lib/Makefile.in
@ -0,0 +1,92 @@
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License Version
+# 1.1 (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS" basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+# for the specific language governing rights and limitations under the
+# License.
+#
+# The Original Code is Mozilla code.
+#
+# The Initial Developer of the Original Code is the Mozilla Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2007
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#  Chris Double <chris.double@double.co.nz>
+#
+# Alternatively, the contents of this file may be used under the terms of
+# either the GNU General Public License Version 2 or later (the "GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable instead
+# of those above. If you wish to allow use of your version of this file only
+# under the terms of either the GPL or the LGPL, and not to allow others to
+# use your version of this file under the terms of the MPL, indicate your
+# decision by deleting the provisions above and replace them with the notice
+# and other provisions required by the GPL or the LGPL. If you do not delete
+# the provisions above, a recipient may use your version of this file under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+DEPTH		= ../../..
+topsrcdir	= @top_srcdir@
+srcdir		= @srcdir@
+VPATH		= @srcdir@
+
+include $(DEPTH)/config/autoconf.mk
+
+MODULE		= theora
+LIBRARY_NAME	= theora
+FORCE_STATIC_LIB= 1
+
+# The encoder is currently not included.
+DEFINES += -DTHEORA_DISABLE_ENCODE
+
+ifeq ($(findstring 86,$(OS_TEST)), 86)
+DEFINES += -DOC_X86ASM
+endif
+
+ifeq ($(OS_ARCH),Linux)
+DEFINES += -DUSE_ASM
+endif
+
+CSRCS		= \
+		cpu.c \
+		dec/huffdec.c \
+		dec/quant.c \
+		dec/dequant.c \
+		dec/bitwise.c \
+		dec/internal.c \
+		dec/decinfo.c \
+		dec/decapiwrapper.c \
+		dec/idct.c \
+		dec/state.c \
+		dec/info.c \
+		dec/fragment.c \
+		dec/apiwrapper.c \
+		dec/decode.c \
+		$(NULL)
+
+ifeq ($(findstring 86,$(OS_TEST)), 86)
+CSRCS		+= \
+		dec/x86/mmxfrag.c \
+		dec/x86/x86state.c \
+		dec/x86/mmxstate.c \
+		dec/x86/mmxidct.c \
+		$(NULL)
+endif
+
+include $(topsrcdir)/config/rules.mk
+
+LOCAL_INCLUDES = -I$(srcdir)
+EXTRA_OBJDIRS = dec dec/x86
+
+export::
+	mkdir -p $(EXTRA_OBJDIRS)
+
--- a/modules/libtheora/lib/config.h
+++ b/modules/libtheora/lib/config.h
@ -0,0 +1,80 @@
+/* config.h.  Generated from config.h.in by configure.  */
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#define HAVE_DLFCN_H 1
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the <machine/soundcard.h> header file. */
+/* #undef HAVE_MACHINE_SOUNDCARD_H */
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the <soundcard.h> header file. */
+/* #undef HAVE_SOUNDCARD_H */
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/soundcard.h> header file. */
+#define HAVE_SYS_SOUNDCARD_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if your C compiler doesn't accept -c and -o together. */
+/* #undef NO_MINUS_C_MINUS_O */
+
+/* enable x86 assambler optimization */
+ 
+
+/* Name of package */
+#define PACKAGE "libtheora"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT ""
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "libtheora"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "libtheora 1.0beta3"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "libtheora"
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "1.0beta3"
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Define to exclude encode support from the build */
+ 
+
+/* Define to exclude floating point code from the build */
+/* #undef THEORA_DISABLE_FLOAT */
+
+/* make use of asm optimization */
+ 
+
+/* Version number of package */
+#define VERSION "1.0beta3"
--- a/modules/libtheora/lib/cpu.c
+++ b/modules/libtheora/lib/cpu.c
@ -0,0 +1,162 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+ 
+ CPU capability detection for x86 processors.
+  Originally written by Rudolf Marek.
+  
+ function:
+  last mod: $Id: cpu.c 14718 2008-04-12 08:36:58Z conrad $
+
+ ********************************************************************/
+
+#include "cpu.h"
+
+#if !defined(USE_ASM)
+
+ogg_uint32_t oc_cpu_flags_get(void){
+  return 0;
+}
+
+#else /* USE_ASM */
+
+# if defined(_MSC_VER)
+/*  Visual C cpuid helper function. For VS2005 we could
+    as well use the _cpuid builtin, but that wouldn't work
+    for VS2003 users, so we do it in inline assembler */
+
+static void oc_cpuid_helper (ogg_uint32_t * CpuInfo, ogg_uint32_t op){
+  _asm {
+    mov eax, [op]
+    mov esi, CpuInfo
+    cpuid
+    mov [esi + 0], eax
+    mov [esi + 4], ebx
+    mov [esi + 8], ecx
+    mov [esi +12], edx
+  }
+}
+
+#  define cpuid(_op,_eax,_ebx,_ecx,_edx) \
+  {                                    \
+    ogg_uint32_t nfo[4];               \
+    oc_cpuid_helper (nfo, (_op));      \
+    (_eax) = nfo[0],(_ebx) = nfo[1];   \
+    (_ecx) = nfo[2],(_edx) = nfo[3];   \
+  }
+
+# elif (defined(__amd64__) || defined(__x86_64__))
+
+#  define cpuid(_op,_eax,_ebx,_ecx,_edx) \
+  __asm__ __volatile__( \
+   "push %%rbx\n\t" \
+   "cpuid\n\t" \
+   "movl %%ebx,%1\n\t" \
+   "pop  %%rbx\n\t" \
+   :"=a" (_eax), \
+    "=r" (_ebx), \
+    "=c" (_ecx), \
+    "=d" (_edx) \
+   :"a" (_op) \
+   :"cc" \
+  )
+# else /* x86_32, GCC */
+
+#  define cpuid(_op,_eax,_ebx,_ecx,_edx) \
+  __asm__ __volatile__( \
+   "pushl %%ebx\n\t" \
+   "cpuid\n\t" \
+   "movl  %%ebx,%1\n\t" \
+   "popl  %%ebx\n\t" \
+   :"=a" (_eax), \
+    "=r" (_ebx), \
+    "=c" (_ecx), \
+    "=d" (_edx) \
+   :"a" (_op) \
+   :"cc" \
+  )
+
+# endif /* arch switch */
+
+ogg_uint32_t oc_cpu_flags_get(void){
+  ogg_uint32_t flags = 0;
+  ogg_uint32_t eax;
+  ogg_uint32_t ebx;
+  ogg_uint32_t ecx;
+  ogg_uint32_t edx;
+
+# if !defined(_MSC_VER) && !defined(__amd64__) && !defined(__x86_64__)
+  /* check for cpuid */
+  __asm__ __volatile__(
+   "pushfl\n\t"
+   "pushfl\n\t"
+   "popl          %0\n\t"
+   "movl          %0,%1\n\t"
+   "xorl   $0x200000,%0\n\t"
+   "pushl         %0\n\t"
+   "popfl\n\t"
+   "pushfl\n\t"
+   "popl          %0\n\t"
+   "popfl\n\t"
+   :"=r" (eax),
+    "=r" (ebx)
+   :
+   :"cc"
+  );
+  /*No cpuid.*/
+  if(eax==ebx)return 0;
+# endif /* GCC, x86_32 */
+
+  cpuid(0,eax,ebx,ecx,edx);
+  if(ebx==0x756e6547&&edx==0x49656e69&&ecx==0x6c65746e){
+    /*Intel:*/
+inteltest:
+    cpuid(1,eax,ebx,ecx,edx);
+    if((edx&0x00800000)==0)return 0;
+    flags=OC_CPU_X86_MMX;
+    if(edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
+    if(edx&0x04000000)flags|=OC_CPU_X86_SSE2;
+  }
+  else if(ebx==0x68747541&&edx==0x69746e65&&ecx==0x444d4163 ||
+          ebx==0x646f6547&&edx==0x79622065&&ecx==0x43534e20){
+    /*AMD:*/
+    /*Geode:*/
+    cpuid(0x80000000,eax,ebx,ecx,edx);
+    if(eax<0x80000001)goto inteltest;
+    cpuid(0x80000001,eax,ebx,ecx,edx);
+    if((edx&0x00800000)==0)return 0;
+    flags=OC_CPU_X86_MMX;
+    if(edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
+    if(edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
+    if(edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
+  }
+  else{
+    /*Implement me.*/
+    flags=0;
+  }
+
+# ifdef DEBUG
+  if (flags) {
+    TH_DEBUG("vectorized instruction sets supported:");
+    if (flags & OC_CPU_X86_MMX)      TH_DEBUG(" mmx");
+    if (flags & OC_CPU_X86_MMXEXT)   TH_DEBUG(" mmxext");
+    if (flags & OC_CPU_X86_SSE)      TH_DEBUG(" sse");
+    if (flags & OC_CPU_X86_SSE2)     TH_DEBUG(" sse2");
+    if (flags & OC_CPU_X86_3DNOW)    TH_DEBUG(" 3dnow");
+    if (flags & OC_CPU_X86_3DNOWEXT) TH_DEBUG(" 3dnowext");
+    TH_DEBUG("\n");
+  }
+# endif
+
+  return flags;
+}
+
+#endif /* USE_ASM */
--- a/modules/libtheora/lib/cpu.h
+++ b/modules/libtheora/lib/cpu.h
@ -0,0 +1,30 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+ function:
+    last mod: $Id: cpu.h 13884 2007-09-22 08:38:10Z giles $
+
+ ********************************************************************/
+
+#if !defined(_x86_cpu_H)
+# define _x86_cpu_H (1)
+#include "internal.h"
+
+#define OC_CPU_X86_MMX    (1<<0)
+#define OC_CPU_X86_3DNOW  (1<<1)
+#define OC_CPU_X86_3DNOWEXT (1<<2)
+#define OC_CPU_X86_MMXEXT (1<<3)
+#define OC_CPU_X86_SSE    (1<<4)
+#define OC_CPU_X86_SSE2   (1<<5)
+
+ogg_uint32_t oc_cpu_flags_get(void);
+
+#endif
--- a/modules/libtheora/lib/dec/apiwrapper.c
+++ b/modules/libtheora/lib/dec/apiwrapper.c
@ -0,0 +1,166 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: apiwrapper.c 14321 2007-12-22 18:09:29Z tterribe $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "apiwrapper.h"
+
+
+
+const char *theora_version_string(void){
+  return th_version_string();
+}
+
+ogg_uint32_t theora_version_number(void){
+  return th_version_number();
+}
+
+void theora_info_init(theora_info *_ci){
+  memset(_ci,0,sizeof(*_ci));
+}
+
+void theora_info_clear(theora_info *_ci){
+  th_api_wrapper *api;
+  api=(th_api_wrapper *)_ci->codec_setup;
+  memset(_ci,0,sizeof(*_ci));
+  if(api!=NULL){
+    if(api->clear!=NULL)(*api->clear)(api);
+    _ogg_free(api);
+  }
+}
+
+void theora_clear(theora_state *_th){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    (*((oc_state_dispatch_vtbl *)_th->internal_decode)->clear)(_th);
+  }
+  if(_th->internal_encode!=NULL){
+    (*((oc_state_dispatch_vtbl *)_th->internal_encode)->clear)(_th);
+  }
+  if(_th->i!=NULL)theora_info_clear(_th->i);
+  memset(_th,0,sizeof(*_th));
+}
+
+int theora_control(theora_state *_th,int _req,void *_buf,size_t _buf_sz){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    return (*((oc_state_dispatch_vtbl *)_th->internal_decode)->control)(_th,
+     _req,_buf,_buf_sz);
+  }
+  else if(_th->internal_encode!=NULL){
+    return (*((oc_state_dispatch_vtbl *)_th->internal_encode)->control)(_th,
+     _req,_buf,_buf_sz);
+  }
+  else return TH_EINVAL;
+}
+
+ogg_int64_t theora_granule_frame(theora_state *_th,ogg_int64_t _gp){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    return (*((oc_state_dispatch_vtbl *)_th->internal_decode)->granule_frame)(
+     _th,_gp);
+  }
+  else if(_th->internal_encode!=NULL){
+    return (*((oc_state_dispatch_vtbl *)_th->internal_encode)->granule_frame)(
+     _th,_gp);
+  }
+  else return -1;
+}
+
+double theora_granule_time(theora_state *_th, ogg_int64_t _gp){
+  /*Provide compatibility with mixed encoder and decoder shared lib versions.*/
+  if(_th->internal_decode!=NULL){
+    return (*((oc_state_dispatch_vtbl *)_th->internal_decode)->granule_time)(
+     _th,_gp);
+  }
+  else if(_th->internal_encode!=NULL){
+    return (*((oc_state_dispatch_vtbl *)_th->internal_encode)->granule_time)(
+     _th,_gp);
+  }
+  else return -1;
+}
+
+void oc_theora_info2th_info(th_info *_info,const theora_info *_ci){
+  _info->version_major=_ci->version_major;
+  _info->version_minor=_ci->version_minor;
+  _info->version_subminor=_ci->version_subminor;
+  _info->frame_width=_ci->width;
+  _info->frame_height=_ci->height;
+  _info->pic_width=_ci->frame_width;
+  _info->pic_height=_ci->frame_height;
+  _info->pic_x=_ci->offset_x;
+  _info->pic_y=_ci->offset_y;
+  _info->fps_numerator=_ci->fps_numerator;
+  _info->fps_denominator=_ci->fps_denominator;
+  _info->aspect_numerator=_ci->aspect_numerator;
+  _info->aspect_denominator=_ci->aspect_denominator;
+  switch(_ci->colorspace){
+    case OC_CS_ITU_REC_470M:_info->colorspace=TH_CS_ITU_REC_470M;break;
+    case OC_CS_ITU_REC_470BG:_info->colorspace=TH_CS_ITU_REC_470BG;break;
+    default:_info->colorspace=TH_CS_UNSPECIFIED;break;
+  }
+  switch(_ci->pixelformat){
+    case OC_PF_420:_info->pixel_fmt=TH_PF_420;break;
+    case OC_PF_422:_info->pixel_fmt=TH_PF_422;break;
+    case OC_PF_444:_info->pixel_fmt=TH_PF_444;break;
+    default:_info->pixel_fmt=TH_PF_RSVD;
+  }
+  _info->target_bitrate=_ci->target_bitrate;
+  _info->quality=_ci->quality;
+  _info->keyframe_granule_shift=_ci->keyframe_frequency_force>0?
+   OC_MINI(31,oc_ilog(_ci->keyframe_frequency_force-1)):0;
+}
+
+int theora_packet_isheader(ogg_packet *_op){
+  return th_packet_isheader(_op);
+}
+
+int theora_packet_iskeyframe(ogg_packet *_op){
+  return th_packet_iskeyframe(_op);
+}
+
+int theora_granule_shift(theora_info *_ci){
+  /*This breaks when keyframe_frequency_force is not positive or is larger than
+     2**31 (if your int is more than 32 bits), but that's what the original
+     function does.*/
+  return oc_ilog(_ci->keyframe_frequency_force-1);
+}
+
+void theora_comment_init(theora_comment *_tc){
+  th_comment_init((th_comment *)_tc);
+}
+
+char *theora_comment_query(theora_comment *_tc,char *_tag,int _count){
+  return th_comment_query((th_comment *)_tc,_tag,_count);
+}
+
+int theora_comment_query_count(theora_comment *_tc,char *_tag){
+  return th_comment_query_count((th_comment *)_tc,_tag);
+}
+
+void theora_comment_clear(theora_comment *_tc){
+  th_comment_clear((th_comment *)_tc);
+}
+
+void theora_comment_add(theora_comment *_tc,char *_comment){
+  th_comment_add((th_comment *)_tc,_comment);
+}
+
+void theora_comment_add_tag(theora_comment *_tc, char *_tag, char *_value){
+  th_comment_add_tag((th_comment *)_tc,_tag,_value);
+}
--- a/modules/libtheora/lib/dec/apiwrapper.h
+++ b/modules/libtheora/lib/dec/apiwrapper.h
@ -0,0 +1,55 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: apiwrapper.h 13596 2007-08-23 20:05:38Z tterribe $
+
+ ********************************************************************/
+
+#if !defined(_apiwrapper_H)
+# define _apiwrapper_H (1)
+# include <ogg/ogg.h>
+# include <theora/theora.h>
+# include "theora/theoradec.h"
+/*# include "theora/theoraenc.h"*/
+typedef struct th_enc_ctx th_enc_ctx;
+# include "../internal.h"
+
+typedef struct th_api_wrapper th_api_wrapper;
+typedef struct th_api_info    th_api_info;
+
+/*Provide an entry point for the codec setup to clear itself in case we ever
+   want to break pieces off into a common base library shared by encoder and
+   decoder.
+  In addition, this makes several other pieces of the API wrapper cleaner.*/
+typedef void (*oc_setup_clear_func)(void *_ts);
+
+/*Generally only one of these pointers will be non-NULL in any given instance.
+  Technically we do not even really need this struct, since we should be able
+   to figure out which one from "context", but doing it this way makes sure we
+   don't flub it up.*/
+struct th_api_wrapper{
+  oc_setup_clear_func  clear;
+  th_setup_info       *setup;
+  th_dec_ctx          *decode;
+  th_enc_ctx          *encode;
+};
+
+struct th_api_info{
+  th_api_wrapper api;
+  theora_info    info;
+};
+
+
+void oc_theora_info2th_info(th_info *_info,const theora_info *_ci);
+
+#endif
--- a/modules/libtheora/lib/dec/bitwise.c
+++ b/modules/libtheora/lib/dec/bitwise.c
@ -0,0 +1,126 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002             *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function: packing variable sized words into an octet stream
+  last mod: $Id: bitwise.c 14546 2008-02-29 01:14:05Z tterribe $
+
+ ********************************************************************/
+
+/* We're 'MSb' endian; if we write a word but read individual bits,
+   then we'll read the msb first */
+
+#include <string.h>
+#include <stdlib.h>
+#include "bitwise.h"
+
+void theorapackB_reset(oggpack_buffer *b){
+  b->ptr=b->buffer;
+  b->buffer[0]=0;
+  b->endbit=b->endbyte=0;
+}
+
+void theorapackB_readinit(oggpack_buffer *b,unsigned char *buf,int bytes){
+  memset(b,0,sizeof(*b));
+  b->buffer=b->ptr=buf;
+  b->storage=bytes;
+}
+
+int theorapackB_look1(oggpack_buffer *b,long *_ret){
+  if(b->endbyte>=b->storage){
+    *_ret=0L;
+    return -1;
+  }
+  *_ret=((b->ptr[0]>>(7-b->endbit))&1);
+  return 0;
+}
+
+void theorapackB_adv1(oggpack_buffer *b){
+  if(++(b->endbit)>7){
+    b->endbit=0;
+    b->ptr++;
+    b->endbyte++;
+  }
+}
+
+/* bits <= 32 */
+int theorapackB_read(oggpack_buffer *b,int bits,long *_ret){
+  long ret;
+  long m;
+  int fail;
+  m=32-bits;
+  bits+=b->endbit;
+  if(b->endbyte+4>=b->storage){
+    /* not the main path */
+    if(b->endbyte*8+bits>b->storage*8){
+      *_ret=0L;
+      fail=-1;
+      goto overflow;
+    }
+    /* special case to avoid reading b->ptr[0], which might be past the end of
+        the buffer; also skips some useless accounting */
+    else if(!bits){
+      *_ret=0L;
+      return 0;
+    }
+  }
+  ret=b->ptr[0]<<(24+b->endbit);
+  if(bits>8){
+    ret|=b->ptr[1]<<(16+b->endbit);
+    if(bits>16){
+      ret|=b->ptr[2]<<(8+b->endbit);
+      if(bits>24){
+        ret|=b->ptr[3]<<(b->endbit);
+        if(bits>32 && b->endbit)
+          ret|=b->ptr[4]>>(8-b->endbit);
+      }
+    }
+  }
+  *_ret=((ret&0xffffffffUL)>>(m>>1))>>((m+1)>>1);
+  fail=0;
+overflow:
+  b->ptr+=bits/8;
+  b->endbyte+=bits/8;
+  b->endbit=bits&7;
+  return fail;
+}
+
+int theorapackB_read1(oggpack_buffer *b,long *_ret){
+  int fail;
+  if(b->endbyte>=b->storage){
+    /* not the main path */
+    *_ret=0L;
+    fail=-1;
+    goto overflow;
+  }
+  *_ret=(b->ptr[0]>>(7-b->endbit))&1;
+  fail=0;
+overflow:
+  b->endbit++;
+  if(b->endbit>7){
+    b->endbit=0;
+    b->ptr++;
+    b->endbyte++;
+  }
+  return fail;
+}
+
+long theorapackB_bytes(oggpack_buffer *b){
+  return(b->endbyte+(b->endbit+7)/8);
+}
+
+long theorapackB_bits(oggpack_buffer *b){
+  return(b->endbyte*8+b->endbit);
+}
+
+unsigned char *theorapackB_get_buffer(oggpack_buffer *b){
+  return(b->buffer);
+}
--- a/modules/libtheora/lib/dec/bitwise.h
+++ b/modules/libtheora/lib/dec/bitwise.h
@ -0,0 +1,76 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002             *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function: packing variable sized words into an octet stream
+  last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $
+
+ ********************************************************************/
+#if !defined(_bitwise_H)
+# define _bitwise_H (1)
+# include <ogg/ogg.h>
+
+void theorapackB_reset(oggpack_buffer *b);
+void theorapackB_readinit(oggpack_buffer *b,unsigned char *buf,int bytes);
+/* Read in bits without advancing the bitptr; bits <= 32 */
+static int theorapackB_look(oggpack_buffer *b,int bits,long *_ret);
+int theorapackB_look1(oggpack_buffer *b,long *_ret);
+static void theorapackB_adv(oggpack_buffer *b,int bits);
+void theorapackB_adv1(oggpack_buffer *b);
+/* bits <= 32 */
+int theorapackB_read(oggpack_buffer *b,int bits,long *_ret);
+int theorapackB_read1(oggpack_buffer *b,long *_ret);
+long theorapackB_bytes(oggpack_buffer *b);
+long theorapackB_bits(oggpack_buffer *b);
+unsigned char *theorapackB_get_buffer(oggpack_buffer *b);
+
+/*These two functions are only used in one place, and declaring them static so
+   they can be inlined saves considerable function call overhead.*/
+
+/* Read in bits without advancing the bitptr; bits <= 32 */
+static int theorapackB_look(oggpack_buffer *b,int bits,long *_ret){
+  long ret;
+  long m;
+  m=32-bits;
+  bits+=b->endbit;
+  if(b->endbyte+4>=b->storage){
+    /* not the main path */
+    if(b->endbyte>=b->storage){
+      *_ret=0L;
+      return -1;
+    }
+    /*If we have some bits left, but not enough, return the ones we have.*/
+    if((b->storage-b->endbyte)*8<bits)bits=(b->storage-b->endbyte)*8;
+  }
+  ret=b->ptr[0]<<(24+b->endbit);
+  if(bits>8){
+    ret|=b->ptr[1]<<(16+b->endbit);
+    if(bits>16){
+      ret|=b->ptr[2]<<(8+b->endbit);
+      if(bits>24){
+        ret|=b->ptr[3]<<(b->endbit);
+        if(bits>32&&b->endbit)
+          ret|=b->ptr[4]>>(8-b->endbit);
+      }
+    }
+  }
+  *_ret=((ret&0xffffffff)>>(m>>1))>>((m+1)>>1);
+  return 0;
+}
+
+static void theorapackB_adv(oggpack_buffer *b,int bits){
+  bits+=b->endbit;
+  b->ptr+=bits/8;
+  b->endbyte+=bits/8;
+  b->endbit=bits&7;
+}
+
+#endif
--- a/modules/libtheora/lib/dec/dct.h
+++ b/modules/libtheora/lib/dec/dct.h
@ -0,0 +1,31 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+  last mod: $Id: dct.h 13884 2007-09-22 08:38:10Z giles $
+
+ ********************************************************************/
+
+/*Definitions shared by the forward and inverse DCT transforms.*/
+#if !defined(_dct_H)
+# define _dct_H (1)
+
+/*cos(n*pi/16) (resp. sin(m*pi/16)) scaled by 65536.*/
+#define OC_C1S7 ((ogg_int32_t)64277)
+#define OC_C2S6 ((ogg_int32_t)60547)
+#define OC_C3S5 ((ogg_int32_t)54491)
+#define OC_C4S4 ((ogg_int32_t)46341)
+#define OC_C5S3 ((ogg_int32_t)36410)
+#define OC_C6S2 ((ogg_int32_t)25080)
+#define OC_C7S1 ((ogg_int32_t)12785)
+
+#endif
--- a/modules/libtheora/lib/dec/decapiwrapper.c
+++ b/modules/libtheora/lib/dec/decapiwrapper.c
@ -0,0 +1,202 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: decapiwrapper.c 13596 2007-08-23 20:05:38Z tterribe $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "apiwrapper.h"
+#include "theora/theoradec.h"
+
+static void th_dec_api_clear(th_api_wrapper *_api){
+  if(_api->setup)th_setup_free(_api->setup);
+  if(_api->decode)th_decode_free(_api->decode);
+  memset(_api,0,sizeof(*_api));
+}
+
+static void theora_decode_clear(theora_state *_td){
+  if(_td->i!=NULL)theora_info_clear(_td->i);
+#ifdef _TH_DEBUG_
+  fclose(debugout);
+  debugout=NULL;
+#endif
+  memset(_td,0,sizeof(*_td));
+}
+
+static int theora_decode_control(theora_state *_td,int _req,
+ void *_buf,size_t _buf_sz){
+  return th_decode_ctl(((th_api_wrapper *)_td->i->codec_setup)->decode,
+   _req,_buf,_buf_sz);
+}
+
+static ogg_int64_t theora_decode_granule_frame(theora_state *_td,
+ ogg_int64_t _gp){
+  return th_granule_frame(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
+}
+
+static double theora_decode_granule_time(theora_state *_td,ogg_int64_t _gp){
+  return th_granule_time(((th_api_wrapper *)_td->i->codec_setup)->decode,_gp);
+}
+
+static const oc_state_dispatch_vtbl OC_DEC_DISPATCH_VTBL={
+  (oc_state_clear_func)theora_decode_clear,
+  (oc_state_control_func)theora_decode_control,
+  (oc_state_granule_frame_func)theora_decode_granule_frame,
+  (oc_state_granule_time_func)theora_decode_granule_time,
+};
+
+static void th_info2theora_info(theora_info *_ci,const th_info *_info){
+  _ci->version_major=_info->version_major;
+  _ci->version_minor=_info->version_minor;
+  _ci->version_subminor=_info->version_subminor;
+  _ci->width=_info->frame_width;
+  _ci->height=_info->frame_height;
+  _ci->frame_width=_info->pic_width;
+  _ci->frame_height=_info->pic_height;
+  _ci->offset_x=_info->pic_x;
+  _ci->offset_y=_info->pic_y;
+  _ci->fps_numerator=_info->fps_numerator;
+  _ci->fps_denominator=_info->fps_denominator;
+  _ci->aspect_numerator=_info->aspect_numerator;
+  _ci->aspect_denominator=_info->aspect_denominator;
+  switch(_info->colorspace){
+    case TH_CS_ITU_REC_470M:_ci->colorspace=OC_CS_ITU_REC_470M;break;
+    case TH_CS_ITU_REC_470BG:_ci->colorspace=OC_CS_ITU_REC_470BG;break;
+    default:_ci->colorspace=OC_CS_UNSPECIFIED;break;
+  }
+  switch(_info->pixel_fmt){
+    case TH_PF_420:_ci->pixelformat=OC_PF_420;break;
+    case TH_PF_422:_ci->pixelformat=OC_PF_422;break;
+    case TH_PF_444:_ci->pixelformat=OC_PF_444;break;
+    default:_ci->pixelformat=OC_PF_RSVD;
+  }
+  _ci->target_bitrate=_info->target_bitrate;
+  _ci->quality=_info->quality;
+  _ci->keyframe_frequency_force=1<<_info->keyframe_granule_shift;
+}
+
+int theora_decode_init(theora_state *_td,theora_info *_ci){
+  th_api_info    *apiinfo;
+  th_api_wrapper *api;
+  th_info         info;
+
+  api=(th_api_wrapper *)_ci->codec_setup;
+  /*Allocate our own combined API wrapper/theora_info struct.
+    We put them both in one malloc'd block so that when the API wrapper is
+     freed, the info struct goes with it.
+    This avoids having to figure out whether or not we need to free the info
+     struct in either theora_info_clear() or theora_clear().*/
+  apiinfo=(th_api_info *)_ogg_calloc(1,sizeof(*apiinfo));
+  /*Make our own copy of the info struct, since its lifetime should be
+     independent of the one we were passed in.*/
+  *&apiinfo->info=*_ci;
+  /*Convert the info struct now instead of saving the the one we decoded with
+     theora_decode_header(), since the user might have modified values (i.e.,
+     color space, aspect ratio, etc. can be specified from a higher level).
+    The user also might be doing something "clever" with the header packets if
+     they are not using an Ogg encapsulation.*/
+  oc_theora_info2th_info(&info,_ci);
+  /*Don't bother to copy the setup info; th_decode_alloc() makes its own copy
+     of the stuff it needs.*/
+  apiinfo->api.decode=th_decode_alloc(&info,api->setup);
+  if(apiinfo->api.decode==NULL){
+    _ogg_free(apiinfo);
+    return OC_EINVAL;
+  }
+  apiinfo->api.clear=(oc_setup_clear_func)th_dec_api_clear;
+  _td->internal_encode=NULL;
+  /*Provide entry points for ABI compatibility with old decoder shared libs.*/
+  _td->internal_decode=(void *)&OC_DEC_DISPATCH_VTBL;
+  _td->granulepos=0;
+  _td->i=&apiinfo->info;
+  _td->i->codec_setup=&apiinfo->api;
+  return 0;
+}
+
+int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){
+  th_api_wrapper *api;
+  th_info         info;
+  int             ret;
+
+#ifdef _TH_DEBUG_
+  debugout = fopen("theoradec-debugout.txt","w");
+#endif
+
+  api=(th_api_wrapper *)_ci->codec_setup;
+  /*Allocate an API wrapper struct on demand, since it will not also include a
+     theora_info struct like the ones that are used in a theora_state struct.*/
+  if(api==NULL){
+    _ci->codec_setup=_ogg_calloc(1,sizeof(*api));
+    api=(th_api_wrapper *)_ci->codec_setup;
+    api->clear=(oc_setup_clear_func)th_dec_api_clear;
+  }
+  /*Convert from the theora_info struct instead of saving our own th_info
+     struct between calls.
+    The user might be doing something "clever" with the header packets if they
+     are not using an Ogg encapsulation, and we don't want to break this.*/
+  oc_theora_info2th_info(&info,_ci);
+  /*We rely on the fact that theora_comment and th_comment structures are
+     actually identical.
+    Take care not to change this fact unless you change the code here as
+     well!*/
+  ret=th_decode_headerin(&info,(th_comment *)_cc,&api->setup,_op);
+  /*We also rely on the fact that the error return code values are the same,
+    and that the implementations of these two functions return the same set of
+    them.
+   Note that theora_decode_header() really can return OC_NOTFORMAT, even
+    though it is not currently documented to do so.*/
+  if(ret<0)return ret;
+  th_info2theora_info(_ci,&info);
+  return 0;
+}
+
+int theora_decode_packetin(theora_state *_td,ogg_packet *_op){
+  th_api_wrapper *api;
+  ogg_int64_t     gp;
+  int             ret;
+  api=(th_api_wrapper *)_td->i->codec_setup;
+  ret=th_decode_packetin(api->decode,_op,&gp);
+
+#ifdef _TH_DEBUG_
+  dframe++;
+#endif 
+
+  if(ret<0)return OC_BADPACKET;
+  _td->granulepos=gp;
+  return 0;
+}
+
+int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){
+  th_api_wrapper  *api;
+  th_ycbcr_buffer  buf;
+  int              ret;
+
+  api=(th_api_wrapper *)_td->i->codec_setup;
+  ret=th_decode_ycbcr_out(api->decode,buf);
+  if(ret>=0){
+    _yuv->y_width=buf[0].width;
+    _yuv->y_height=buf[0].height;
+    _yuv->y_stride=buf[0].stride;
+    _yuv->uv_width=buf[1].width;
+    _yuv->uv_height=buf[1].height;
+    _yuv->uv_stride=buf[1].stride;
+    _yuv->y=buf[0].data;
+    _yuv->u=buf[1].data;
+    _yuv->v=buf[2].data;
+  }
+
+  return ret;
+}
--- a/modules/libtheora/lib/dec/decinfo.c
+++ b/modules/libtheora/lib/dec/decinfo.c
@ -0,0 +1,239 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: decinfo.c 14719 2008-04-12 11:36:40Z tterribe $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include "decint.h"
+
+
+
+/*Unpacks a series of octets from a given byte array into the pack buffer.
+  No checking is done to ensure the buffer contains enough data.
+  _opb: The pack buffer to read the octets from.
+  _buf: The byte array to store the unpacked bytes in.
+  _len: The number of octets to unpack.*/
+static void oc_unpack_octets(oggpack_buffer *_opb,char *_buf,size_t _len){
+  while(_len-->0){
+    long val;
+    theorapackB_read(_opb,8,&val);
+    *_buf++=(char)val;
+  }
+}
+
+/*Unpacks a 32-bit integer encoded by octets in little-endian form.*/
+static long oc_unpack_length(oggpack_buffer *_opb){
+  long ret[4];
+  int  i;
+  for(i=0;i<4;i++)theorapackB_read(_opb,8,ret+i);
+  return ret[0]|ret[1]<<8|ret[2]<<16|ret[3]<<24;
+}
+
+static int oc_info_unpack(oggpack_buffer *_opb,th_info *_info){
+  long val;
+  /*Check the codec bitstream version.*/
+  theorapackB_read(_opb,8,&val);
+  _info->version_major=(unsigned char)val;
+  theorapackB_read(_opb,8,&val);
+  _info->version_minor=(unsigned char)val;
+  theorapackB_read(_opb,8,&val);
+  _info->version_subminor=(unsigned char)val;
+  /*verify we can parse this bitstream version.
+     We accept earlier minors and all subminors, by spec*/
+  if(_info->version_major>TH_VERSION_MAJOR||
+   _info->version_major==TH_VERSION_MAJOR&&
+   _info->version_minor>TH_VERSION_MINOR){
+    return TH_EVERSION;
+  }
+  /*Read the encoded frame description.*/
+  theorapackB_read(_opb,16,&val);
+  _info->frame_width=(ogg_uint32_t)val<<4;
+  theorapackB_read(_opb,16,&val);
+  _info->frame_height=(ogg_uint32_t)val<<4;
+  theorapackB_read(_opb,24,&val);
+  _info->pic_width=(ogg_uint32_t)val;
+  theorapackB_read(_opb,24,&val);
+  _info->pic_height=(ogg_uint32_t)val;
+  theorapackB_read(_opb,8,&val);
+  _info->pic_x=(ogg_uint32_t)val;
+  /*Note: The sense of pic_y is inverted in what we pass back to the
+     application compared to how it is stored in the bitstream.
+    This is because the bitstream uses a right-handed coordinate system, while
+     applications expect a left-handed one.*/
+  theorapackB_read(_opb,8,&val);
+  _info->pic_y=_info->frame_height-_info->pic_height-(ogg_uint32_t)val;
+  theorapackB_read(_opb,32,&val);
+  _info->fps_numerator=(ogg_uint32_t)val;
+  theorapackB_read(_opb,32,&val);
+  _info->fps_denominator=(ogg_uint32_t)val;
+  if(_info->frame_width==0||_info->frame_height==0||
+   _info->pic_width+_info->pic_x>_info->frame_width||
+   _info->pic_height+_info->pic_y>_info->frame_height||
+   _info->fps_numerator==0||_info->fps_denominator==0){
+    return TH_EBADHEADER;
+  }
+  theorapackB_read(_opb,24,&val);
+  _info->aspect_numerator=(ogg_uint32_t)val;
+  theorapackB_read(_opb,24,&val);
+  _info->aspect_denominator=(ogg_uint32_t)val;
+  theorapackB_read(_opb,8,&val);
+  _info->colorspace=(th_colorspace)val;
+  theorapackB_read(_opb,24,&val);
+  _info->target_bitrate=(int)val;
+  theorapackB_read(_opb,6,&val);
+  _info->quality=(int)val;
+  theorapackB_read(_opb,5,&val);
+  _info->keyframe_granule_shift=(int)val;
+  theorapackB_read(_opb,2,&val);
+  _info->pixel_fmt=(th_pixel_fmt)val;
+  if(_info->pixel_fmt==TH_PF_RSVD)return TH_EBADHEADER;
+  if(theorapackB_read(_opb,3,&val)<0||val!=0)return TH_EBADHEADER;
+  return 0;
+}
+
+static int oc_comment_unpack(oggpack_buffer *_opb,th_comment *_tc){
+  long len;
+  int  i;
+  /*Read the vendor string.*/
+  len=oc_unpack_length(_opb);
+  if(len<0||theorapackB_bytes(_opb)+len>_opb->storage)return TH_EBADHEADER;
+  _tc->vendor=_ogg_malloc((size_t)len+1);
+  oc_unpack_octets(_opb,_tc->vendor,len);
+  _tc->vendor[len]='\0';
+  /*Read the user comments.*/
+  _tc->comments=(int)oc_unpack_length(_opb);
+  if(_tc->comments<0||_tc->comments>(LONG_MAX>>2)||
+   theorapackB_bytes(_opb)+((long)_tc->comments<<2)>_opb->storage){
+    return TH_EBADHEADER;
+  }
+  _tc->comment_lengths=(int *)_ogg_malloc(
+   _tc->comments*sizeof(_tc->comment_lengths[0]));
+  _tc->user_comments=(char **)_ogg_malloc(
+   _tc->comments*sizeof(_tc->user_comments[0]));
+  for(i=0;i<_tc->comments;i++){
+    len=oc_unpack_length(_opb);
+    if(len<0||theorapackB_bytes(_opb)+len>_opb->storage){
+      _tc->comments=i;
+      return TH_EBADHEADER;
+    }
+    _tc->comment_lengths[i]=len;
+    _tc->user_comments[i]=_ogg_malloc((size_t)len+1);
+    oc_unpack_octets(_opb,_tc->user_comments[i],len);
+    _tc->user_comments[i][len]='\0';
+  }
+  return theorapackB_read(_opb,0,&len)<0?TH_EBADHEADER:0;
+}
+
+static int oc_setup_unpack(oggpack_buffer *_opb,th_setup_info *_setup){
+  int ret;
+  /*Read the quantizer tables.*/
+  ret=oc_quant_params_unpack(_opb,&_setup->qinfo);
+  if(ret<0)return ret;
+  /*Read the Huffman trees.*/
+  return oc_huff_trees_unpack(_opb,_setup->huff_tables);
+}
+
+static void oc_setup_clear(th_setup_info *_setup){
+  oc_quant_params_clear(&_setup->qinfo);
+  oc_huff_trees_clear(_setup->huff_tables);
+}
+
+static int oc_dec_headerin(oggpack_buffer *_opb,th_info *_info,
+ th_comment *_tc,th_setup_info **_setup,ogg_packet *_op){
+  char buffer[6];
+  long val;
+  int  packtype;
+  int  ret;
+  theorapackB_read(_opb,8,&val);
+  packtype=(int)val;
+  /*If we're at a data packet and we have received all three headers, we're
+     done.*/
+  if(!(packtype&0x80)&&_info->frame_width>0&&_tc->vendor!=NULL&&*_setup!=NULL){
+    return 0;
+  }
+  /*Check the codec string.*/
+  oc_unpack_octets(_opb,buffer,6);
+  if(memcmp(buffer,"theora",6)!=0)return TH_ENOTFORMAT;
+  switch(packtype){
+    /*Codec info header.*/
+    case 0x80:{
+      /*This should be the first packet, and we should not already be
+         initialized.*/
+      if(!_op->b_o_s||_info->frame_width>0)return TH_EBADHEADER;
+      ret=oc_info_unpack(_opb,_info);
+      if(ret<0)th_info_clear(_info);
+      else ret=3;
+    }break;
+    /*Comment header.*/
+    case 0x81:{
+      if(_tc==NULL)return TH_EFAULT;
+      /*We shoud have already decoded the info header, and should not yet have
+         decoded the comment header.*/
+      if(_info->frame_width==0||_tc->vendor!=NULL)return TH_EBADHEADER;
+      ret=oc_comment_unpack(_opb,_tc);
+      if(ret<0)th_comment_clear(_tc);
+      else ret=2;
+    }break;
+    /*Codec setup header.*/
+    case 0x82:{
+      oc_setup_info *setup;
+      if(_tc==NULL||_setup==NULL)return TH_EFAULT;
+      /*We should have already decoded the info header and the comment header,
+         and should not yet have decoded the setup header.*/
+      if(_info->frame_width==0||_tc->vendor==NULL||*_setup!=NULL){
+        return TH_EBADHEADER;
+      }
+      setup=(oc_setup_info *)_ogg_calloc(1,sizeof(*setup));
+      ret=oc_setup_unpack(_opb,setup);
+      if(ret<0){
+        oc_setup_clear(setup);
+        _ogg_free(setup);
+      }
+      else{
+        *_setup=setup;
+        ret=1;
+      }
+    }break;
+    default:{
+      /*We don't know what this header is.*/
+      return TH_EBADHEADER;
+    }break;
+  }
+  return ret;
+}
+
+
+/*Decodes one header packet.
+  This should be called repeatedly with the packets at the beginning of the
+   stream until it returns 0.*/
+int th_decode_headerin(th_info *_info,th_comment *_tc,
+ th_setup_info **_setup,ogg_packet *_op){
+  oggpack_buffer opb;
+  int            ret;
+  if(_op==NULL)return TH_EBADHEADER;
+  if(_info==NULL)return TH_EFAULT;
+  theorapackB_readinit(&opb,_op->packet,_op->bytes);
+  ret=oc_dec_headerin(&opb,_info,_tc,_setup,_op);
+  return ret;
+}
+
+void th_setup_free(th_setup_info *_setup){
+  if(_setup!=NULL){
+    oc_setup_clear(_setup);
+    _ogg_free(_setup);
+  }
+}
--- a/modules/libtheora/lib/dec/decint.h
+++ b/modules/libtheora/lib/dec/decint.h
@ -0,0 +1,95 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: decint.h 14369 2008-01-05 23:15:32Z tterribe $
+
+ ********************************************************************/
+
+#include <limits.h>
+#if !defined(_decint_H)
+# define _decint_H (1)
+# include "theora/theoradec.h"
+# include "../internal.h"
+# include "bitwise.h"
+
+typedef struct th_setup_info oc_setup_info;
+typedef struct th_dec_ctx    oc_dec_ctx;
+
+# include "idct.h"
+# include "huffdec.h"
+# include "dequant.h"
+
+/*Constants for the packet-in state machine specific to the decoder.*/
+
+/*Next packet to read: Data packet.*/
+#define OC_PACKET_DATA (0)
+
+
+
+struct th_setup_info{
+  /*The Huffman codes.*/
+  oc_huff_node      *huff_tables[TH_NHUFFMAN_TABLES];
+  /*The quantization parameters.*/
+  th_quant_info  qinfo;
+};
+
+
+
+struct th_dec_ctx{
+  /*Shared encoder/decoder state.*/
+  oc_theora_state          state;
+  /*Whether or not packets are ready to be emitted.
+    This takes on negative values while there are remaining header packets to
+     be emitted, reaches 0 when the codec is ready for input, and goes to 1
+     when a frame has been processed and a data packet is ready.*/
+  int                      packet_state;
+  /*Buffer in which to assemble packets.*/
+  oggpack_buffer           opb;
+  /*Huffman decode trees.*/
+  oc_huff_node            *huff_tables[TH_NHUFFMAN_TABLES];
+  /*The index of one past the last token in each plane for each coefficient.
+    The final entries are the total number of tokens for each coefficient.*/
+  int                      ti0[3][64];
+  /*The index of one past the last extra bits entry in each plane for each
+     coefficient.
+    The final entries are the total number of extra bits entries for each
+     coefficient.*/
+  int                      ebi0[3][64];
+  /*The number of outstanding EOB runs at the start of each coefficient in each
+     plane.*/
+  int                      eob_runs[3][64];
+  /*The DCT token lists.*/
+  unsigned char          **dct_tokens;
+  /*The extra bits associated with DCT tokens.*/
+  ogg_uint16_t           **extra_bits;
+  /*The out-of-loop post-processing level.*/
+  int                      pp_level;
+  /*The DC scale used for out-of-loop deblocking.*/
+  int                      pp_dc_scale[64];
+  /*The sharpen modifier used for out-of-loop deringing.*/
+  int                      pp_sharp_mod[64];
+  /*The DC quantization index of each block.*/
+  unsigned char           *dc_qis;
+  /*The variance of each block.*/
+  int                     *variances;
+  /*The storage for the post-processed frame buffer.*/
+  unsigned char           *pp_frame_data;
+  /*Whether or not the post-processsed frame buffer has space for chroma.*/
+  int                      pp_frame_has_chroma;
+  /*The buffer used for the post-processed frame.*/
+  th_ycbcr_buffer      pp_frame_buf;
+  /*The striped decode callback function.*/
+  th_stripe_callback   stripe_cb;
+};
+
+#endif
--- a/modules/libtheora/lib/dec/decode.c
+++ b/modules/libtheora/lib/dec/decode.c
--- a/modules/libtheora/lib/dec/dequant.c
+++ b/modules/libtheora/lib/dec/dequant.c
@ -0,0 +1,230 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: dequant.c 14369 2008-01-05 23:15:32Z tterribe $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "dequant.h"
+#include "decint.h"
+
+int oc_quant_params_unpack(oggpack_buffer *_opb,
+ th_quant_info *_qinfo){
+  th_quant_base *base_mats;
+  long           val;
+  int            nbase_mats;
+  int            sizes[64];
+  int            indices[64];
+  int            nbits;
+  int            bmi;
+  int            ci;
+  int            qti;
+  int            pli;
+  int            qri;
+  int            qi;
+  int            i;
+  theorapackB_read(_opb,3,&val);
+  nbits=(int)val;
+  for(qi=0;qi<64;qi++){
+    theorapackB_read(_opb,nbits,&val);
+    _qinfo->loop_filter_limits[qi]=(unsigned char)val;
+  }
+  theorapackB_read(_opb,4,&val);
+  nbits=(int)val+1;
+  for(qi=0;qi<64;qi++){
+    theorapackB_read(_opb,nbits,&val);
+    _qinfo->ac_scale[qi]=(ogg_uint16_t)val;
+  }
+  theorapackB_read(_opb,4,&val);
+  nbits=(int)val+1;
+  for(qi=0;qi<64;qi++){
+    theorapackB_read(_opb,nbits,&val);
+    _qinfo->dc_scale[qi]=(ogg_uint16_t)val;
+  }
+  theorapackB_read(_opb,9,&val);
+  nbase_mats=(int)val+1;
+  base_mats=_ogg_malloc(nbase_mats*sizeof(base_mats[0]));
+  for(bmi=0;bmi<nbase_mats;bmi++){
+    for(ci=0;ci<64;ci++){
+      theorapackB_read(_opb,8,&val);
+      base_mats[bmi][ci]=(unsigned char)val;
+    }
+  }
+  nbits=oc_ilog(nbase_mats-1);
+  for(i=0;i<6;i++){
+    th_quant_ranges *qranges;
+    th_quant_base   *qrbms;
+    int             *qrsizes;
+    qti=i/3;
+    pli=i%3;
+    qranges=_qinfo->qi_ranges[qti]+pli;
+    if(i>0){
+      theorapackB_read1(_opb,&val);
+      if(!val){
+        int qtj;
+        int plj;
+        if(qti>0){
+          theorapackB_read1(_opb,&val);
+          if(val){
+            qtj=qti-1;
+            plj=pli;
+          }
+          else{
+            qtj=(i-1)/3;
+            plj=(i-1)%3;
+          }
+        }
+        else{
+          qtj=(i-1)/3;
+          plj=(i-1)%3;
+        }
+        *qranges=*(_qinfo->qi_ranges[qtj]+plj);
+        continue;
+      }
+    }
+    theorapackB_read(_opb,nbits,&val);
+    indices[0]=(int)val;
+    for(qi=qri=0;qi<63;){
+      theorapackB_read(_opb,oc_ilog(62-qi),&val);
+      sizes[qri]=(int)val+1;
+      qi+=(int)val+1;
+      theorapackB_read(_opb,nbits,&val);
+      indices[++qri]=(int)val;
+    }
+    /*Note: The caller is responsible for cleaning up any partially
+       constructed qinfo.*/
+    if(qi>63){
+      _ogg_free(base_mats);
+      return TH_EBADHEADER;
+    }
+    qranges->nranges=qri;
+    qranges->sizes=qrsizes=(int *)_ogg_malloc(qri*sizeof(qrsizes[0]));
+    memcpy(qrsizes,sizes,qri*sizeof(qrsizes[0]));
+    qrbms=(th_quant_base *)_ogg_malloc((qri+1)*sizeof(qrbms[0]));
+    qranges->base_matrices=(const th_quant_base *)qrbms;
+    do{
+      bmi=indices[qri];
+      /*Note: The caller is responsible for cleaning up any partially
+         constructed qinfo.*/
+      if(bmi>=nbase_mats){
+        _ogg_free(base_mats);
+        return TH_EBADHEADER;
+      }
+      memcpy(qrbms[qri],base_mats[bmi],sizeof(qrbms[qri]));
+    }
+    while(qri-->0);
+  }
+
+#ifdef _TH_DEBUG_
+  /* dump the tables */
+  {
+    int i, j, k, l, m;
+    TH_DEBUG("loop filter limits = {");
+    for(i=0;i<64;){
+      TH_DEBUG("\n        ");
+      for(j=0;j<16;i++,j++)
+	TH_DEBUG("%3d ",_qinfo->loop_filter_limits[i]);
+    }
+    TH_DEBUG("\n}\n\n");
+
+    TH_DEBUG("ac scale = {");
+    for(i=0;i<64;){
+      TH_DEBUG("\n        ");
+      for(j=0;j<16;i++,j++)
+	TH_DEBUG("%3d ",_qinfo->ac_scale[i]);
+    }
+    TH_DEBUG("\n}\n\n");
+
+    TH_DEBUG("dc scale = {");
+    for(i=0;i<64;){
+      TH_DEBUG("\n        ");
+      for(j=0;j<16;i++,j++)
+	TH_DEBUG("%3d ",_qinfo->dc_scale[i]);
+    }
+    TH_DEBUG("\n}\n\n");
+
+    for(k=0;k<2;k++)
+      for(l=0;l<3;l++){
+	char *name[2][3]={
+	  {"intra Y bases","intra U bases", "intra V bases"},
+	  {"inter Y bases","inter U bases", "inter V bases"}
+	};
+
+	th_quant_ranges *r = &_qinfo->qi_ranges[k][l];
+	TH_DEBUG("%s = {\n",name[k][l]);
+	TH_DEBUG("        ranges = %d\n",r->nranges);
+	TH_DEBUG("        intervals = { ");
+	for(i=0;i<r->nranges;i++)
+	  TH_DEBUG("%3d ",r->sizes[i]);
+	TH_DEBUG("}\n");
+	TH_DEBUG("\n        matricies = { ");
+	for(m=0;m<r->nranges+1;m++){
+	  TH_DEBUG("\n          { ");
+	  for(i=0;i<64;){
+	    TH_DEBUG("\n            ");
+	    for(j=0;j<8;i++,j++)
+	      TH_DEBUG("%3d ",r->base_matrices[m][i]);
+	  }
+	  TH_DEBUG("\n          }");
+	}
+	TH_DEBUG("\n        }\n");
+      }
+  }
+    
+#endif
+
+  _ogg_free(base_mats);
+  return 0;
+}
+
+void oc_quant_params_clear(th_quant_info *_qinfo){
+  int i;
+  for(i=6;i-->0;){
+    int qti;
+    int pli;
+    qti=i/3;
+    pli=i%3;
+    /*Clear any duplicate pointer references.*/
+    if(i>0){
+      int qtj;
+      int plj;
+      qtj=(i-1)/3;
+      plj=(i-1)%3;
+      if(_qinfo->qi_ranges[qti][pli].sizes==
+       _qinfo->qi_ranges[qtj][plj].sizes){
+        _qinfo->qi_ranges[qti][pli].sizes=NULL;
+      }
+      if(_qinfo->qi_ranges[qti][pli].base_matrices==
+       _qinfo->qi_ranges[qtj][plj].base_matrices){
+        _qinfo->qi_ranges[qti][pli].base_matrices=NULL;
+      }
+    }
+    if(qti>0){
+      if(_qinfo->qi_ranges[1][pli].sizes==
+       _qinfo->qi_ranges[0][pli].sizes){
+        _qinfo->qi_ranges[1][pli].sizes=NULL;
+      }
+      if(_qinfo->qi_ranges[1][pli].base_matrices==
+       _qinfo->qi_ranges[0][pli].base_matrices){
+        _qinfo->qi_ranges[1][pli].base_matrices=NULL;
+      }
+    }
+    /*Now free all the non-duplicate storage.*/
+    _ogg_free((void *)_qinfo->qi_ranges[qti][pli].sizes);
+    _ogg_free((void *)_qinfo->qi_ranges[qti][pli].base_matrices);
+  }
+}
+
--- a/modules/libtheora/lib/dec/dequant.h
+++ b/modules/libtheora/lib/dec/dequant.h
@ -0,0 +1,26 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: dequant.h 13884 2007-09-22 08:38:10Z giles $
+
+ ********************************************************************/
+
+#if !defined(_dequant_H)
+# define _dequant_H (1)
+# include "quant.h"
+
+int oc_quant_params_unpack(oggpack_buffer *_opb,
+ th_quant_info *_qinfo);
+void oc_quant_params_clear(th_quant_info *_qinfo);
+
+#endif
--- a/modules/libtheora/lib/dec/enquant.h
+++ b/modules/libtheora/lib/dec/enquant.h
@ -0,0 +1,43 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: enquant.h 13884 2007-09-22 08:38:10Z giles $
+
+ ********************************************************************/
+
+#if !defined(_enquant_H)
+# define _enquant_H (1)
+# include "quant.h"
+
+/*The amount to scale the forward quantizer value by.*/
+#define OC_FQUANT_SCALE ((ogg_uint32_t)1<<OC_FQUANT_SHIFT)
+/*The amount to add to the scaled forward quantizer for rounding.*/
+#define OC_FQUANT_ROUND (1<<OC_FQUANT_SHIFT-1)
+/*The amount to shift the resulting product by.*/
+#define OC_FQUANT_SHIFT (16)
+
+
+
+/*The default quantization parameters used by VP3.1.*/
+extern const th_quant_info TH_VP31_QUANT_INFO;
+/*Our default quantization parameters.*/
+extern const th_quant_info OC_DEF_QUANT_INFO[4];
+
+
+
+void oc_quant_params_pack(oggpack_buffer *_opb,
+ const th_quant_info *_qinfo);
+void oc_enquant_tables_init(oc_quant_table *_dequant[2][3],
+ oc_quant_table *_enquant[2][3],const th_quant_info *_qinfo);
+
+#endif
--- a/modules/libtheora/lib/dec/fragment.c
+++ b/modules/libtheora/lib/dec/fragment.c
@ -0,0 +1,199 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: fragment.c 14348 2008-01-04 18:17:00Z tterribe $
+
+ ********************************************************************/
+
+#include "../internal.h"
+
+void oc_frag_recon_intra(const oc_theora_state *_state,unsigned char *_dst,
+ int _dst_ystride,const ogg_int16_t *_residue){
+  _state->opt_vtable.frag_recon_intra(_dst,_dst_ystride,_residue);
+}
+
+void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride,
+ const ogg_int16_t *_residue){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++){
+      int res;
+      res=*_residue++;
+      _dst[j]=OC_CLAMP255(res+128);
+    }
+    _dst+=_dst_ystride;
+  }
+}
+
+void oc_frag_recon_inter(const oc_theora_state *_state,unsigned char *_dst,
+ int _dst_ystride,const unsigned char *_src,int _src_ystride,
+ const ogg_int16_t *_residue){
+  _state->opt_vtable.frag_recon_inter(_dst,_dst_ystride,_src,_src_ystride,
+   _residue);
+}
+
+void oc_frag_recon_inter_c(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++){
+      int res;
+      res=*_residue++;
+      _dst[j]=OC_CLAMP255(res+_src[j]);
+    }
+    _dst+=_dst_ystride;
+    _src+=_src_ystride;
+  }
+}
+
+void oc_frag_recon_inter2(const oc_theora_state *_state,unsigned char *_dst,
+ int _dst_ystride,const unsigned char *_src1,int _src1_ystride,
+ const unsigned char *_src2,int _src2_ystride,const ogg_int16_t *_residue){
+  _state->opt_vtable.frag_recon_inter2(_dst,_dst_ystride,_src1,_src1_ystride,
+   _src2,_src2_ystride,_residue);
+}
+
+void oc_frag_recon_inter2_c(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
+ int _src2_ystride,const ogg_int16_t *_residue){
+  int i;
+  for(i=0;i<8;i++){
+    int j;
+    for(j=0;j<8;j++){
+      int res;
+      res=*_residue++;
+      _dst[j]=OC_CLAMP255(res+((int)_src1[j]+_src2[j]>>1));
+    }
+    _dst+=_dst_ystride;
+    _src1+=_src1_ystride;
+    _src2+=_src2_ystride;
+  }
+}
+
+/*Computes the predicted DC value for the given fragment.
+  This requires that the fully decoded DC values be available for the left,
+   upper-left, upper, and upper-right fragments (if they exist).
+  _frag:      The fragment to predict the DC value for.
+  _fplane:    The fragment plane the fragment belongs to.
+  _x:         The x-coordinate of the fragment.
+  _y:         The y-coordinate of the fragment.
+  _pred_last: The last fully-decoded DC value for each predictor frame
+               (OC_FRAME_GOLD, OC_FRAME_PREV and OC_FRAME_SELF).
+              This should be initialized to 0's for the first fragment in each
+               color plane.
+  Return: The predicted DC value for this fragment.*/
+int oc_frag_pred_dc(const oc_fragment *_frag,
+ const oc_fragment_plane *_fplane,int _x,int _y,int _pred_last[3]){
+  static const int PRED_SCALE[16][4]={
+    /*0*/
+    {0,0,0,0},
+    /*OC_PL*/
+    {1,0,0,0},
+    /*OC_PUL*/
+    {1,0,0,0},
+    /*OC_PL|OC_PUL*/
+    {1,0,0,0},
+    /*OC_PU*/
+    {1,0,0,0},
+    /*OC_PL|OC_PU*/
+    {1,1,0,0},
+    /*OC_PUL|OC_PU*/
+    {0,1,0,0},
+    /*OC_PL|OC_PUL|PC_PU*/
+    {29,-26,29,0},
+    /*OC_PUR*/
+    {1,0,0,0},
+    /*OC_PL|OC_PUR*/
+    {75,53,0,0},
+    /*OC_PUL|OC_PUR*/
+    {1,1,0,0},
+    /*OC_PL|OC_PUL|OC_PUR*/
+    {75,0,53,0},
+    /*OC_PU|OC_PUR*/
+    {1,0,0,0},
+    /*OC_PL|OC_PU|OC_PUR*/
+    {75,0,53,0},
+    /*OC_PUL|OC_PU|OC_PUR*/
+    {3,10,3,0},
+    /*OC_PL|OC_PUL|OC_PU|OC_PUR*/
+    {29,-26,29,0}
+  };
+  static const int PRED_SHIFT[16]={0,0,0,0,0,1,0,5,0,7,1,7,0,7,4,5};
+  static const int PRED_RMASK[16]={0,0,0,0,0,1,0,31,0,127,1,127,0,127,15,31};
+  static const int BC_MASK[8]={
+    /*No boundary condition.*/
+    OC_PL|OC_PUL|OC_PU|OC_PUR,
+    /*Left column.*/
+    OC_PU|OC_PUR,
+    /*Top row.*/
+    OC_PL,
+    /*Top row, left column.*/
+    0,
+    /*Right column.*/
+    OC_PL|OC_PUL|OC_PU,
+    /*Right and left column.*/
+    OC_PU,
+    /*Top row, right column.*/
+    OC_PL,
+    /*Top row, right and left column.*/
+    0
+  };
+  /*Predictor fragments, left, up-left, up, up-right.*/
+  const oc_fragment *predfr[4];
+  /*The frame used for prediction for this fragment.*/
+  int                pred_frame;
+  /*The boundary condition flags.*/
+  int                bc;
+  /*DC predictor values: left, up-left, up, up-right, missing values
+     skipped.*/
+  int                p[4];
+  /*Predictor count.*/
+  int                np;
+  /*Which predictor constants to use.*/
+  int                pflags;
+  /*The predicted DC value.*/
+  int                ret;
+  int                i;
+  pred_frame=OC_FRAME_FOR_MODE[_frag->mbmode];
+  bc=(_x==0)+((_y==0)<<1)+((_x+1==_fplane->nhfrags)<<2);
+  predfr[0]=_frag-1;
+  predfr[1]=_frag-_fplane->nhfrags-1;
+  predfr[2]=predfr[1]+1;
+  predfr[3]=predfr[2]+1;
+  np=0;
+  pflags=0;
+  for(i=0;i<4;i++){
+    int pflag;
+    pflag=1<<i;
+    if((BC_MASK[bc]&pflag)&&predfr[i]->coded&&
+     OC_FRAME_FOR_MODE[predfr[i]->mbmode]==pred_frame){
+      p[np++]=predfr[i]->dc;
+      pflags|=pflag;
+    }
+  }
+  if(pflags==0)return _pred_last[pred_frame];
+  else{
+    ret=PRED_SCALE[pflags][0]*p[0];
+    /*LOOP VECTORIZES.*/
+    for(i=1;i<np;i++)ret+=PRED_SCALE[pflags][i]*p[i];
+    ret=OC_DIV_POW2(ret,PRED_SHIFT[pflags],PRED_RMASK[pflags]);
+  }
+  if((pflags&(OC_PL|OC_PUL|OC_PU))==(OC_PL|OC_PUL|OC_PU)){
+    if(abs(ret-p[2])>128)ret=p[2];
+    else if(abs(ret-p[0])>128)ret=p[0];
+    else if(abs(ret-p[1])>128)ret=p[1];
+  }
+  return ret;
+}
--- a/modules/libtheora/lib/dec/huffdec.c
+++ b/modules/libtheora/lib/dec/huffdec.c
@ -0,0 +1,279 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: huffdec.c 14493 2008-02-13 09:25:37Z tterribe $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <ogg/ogg.h>
+#include "huffdec.h"
+#include "decint.h"
+
+
+/*The ANSI offsetof macro is broken on some platforms (e.g., older DECs).*/
+#define _ogg_offsetof(_type,_field)\
+ ((size_t)((char *)&((_type *)0)->_field-(char *)0))
+
+/*The log_2 of the size of a lookup table is allowed to grow to relative to
+   the number of unique nodes it contains.
+  E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is
+   wasted (each node will have an amortized cost of at most 20 bytes when using
+   4-byte pointers).
+  Larger numbers can decode tokens with fewer read operations, while smaller
+   numbers may save more space (requiring as little as 8 bytes amortized per
+   node, though there will be more nodes).
+  With a sample file:
+  32233473 read calls are required when no tree collapsing is done (100.0%).
+  19269269 read calls are required when OC_HUFF_SLUSH is 0 (59.8%).
+  11144969 read calls are required when OC_HUFF_SLUSH is 1 (34.6%).
+  10538563 read calls are required when OC_HUFF_SLUSH is 2 (32.7%).
+  10192578 read calls are required when OC_HUFF_SLUSH is 3 (31.6%).
+  Since a value of 1 gets us the vast majority of the speed-up with only a
+   small amount of wasted memory, this is what we use.*/
+#define OC_HUFF_SLUSH (1)
+
+
+/*Allocates a Huffman tree node that represents a subtree of depth _nbits.
+  _nbits: The depth of the subtree.
+          If this is 0, the node is a leaf node.
+          Otherwise 1<<_nbits pointers are allocated for children.
+  Return: The newly allocated and fully initialized Huffman tree node.*/
+static oc_huff_node *oc_huff_node_alloc(int _nbits){
+  oc_huff_node *ret;
+  size_t        size;
+  size=_ogg_offsetof(oc_huff_node,nodes);
+  if(_nbits>0)size+=sizeof(oc_huff_node *)*(1<<_nbits);
+  ret=_ogg_calloc(1,size);
+  ret->nbits=(unsigned char)_nbits;
+  return ret;
+}
+
+/*Frees a Huffman tree node allocated with oc_huf_node_alloc.
+  _node: The node to free.
+         This may be NULL.*/
+static void oc_huff_node_free(oc_huff_node *_node){
+  _ogg_free(_node);
+}
+
+/*Frees the memory used by a Huffman tree.
+  _node: The Huffman tree to free.
+         This may be NULL.*/
+static void oc_huff_tree_free(oc_huff_node *_node){
+  if(_node==NULL)return;
+  if(_node->nbits){
+    int nchildren;
+    int i;
+    int inext;
+    nchildren=1<<_node->nbits;
+    for(i=0;i<nchildren;i=inext){
+      inext=i+(_node->nodes[i]!=NULL?1<<_node->nbits-_node->nodes[i]->depth:1);
+      oc_huff_tree_free(_node->nodes[i]);
+    }
+  }
+  oc_huff_node_free(_node);
+}
+
+/*Unpacks a sub-tree from the given buffer.
+  _opb:    The buffer to unpack from.
+  _binode: The location to store a pointer to the sub-tree in.
+  _depth:  The current depth of the tree.
+           This is used to prevent infinite recursion.
+  Return: 0 on success, or a negative value on error.*/
+static int oc_huff_tree_unpack(oggpack_buffer *_opb,
+ oc_huff_node **_binode,int _depth){
+  oc_huff_node *binode;
+  long          bits;
+  /*Prevent infinite recursion.*/
+  if(++_depth>32)return TH_EBADHEADER;
+  if(theorapackB_read1(_opb,&bits)<0)return TH_EBADHEADER;
+  /*Read an internal node:*/
+  if(!bits){
+    int ret;
+    binode=oc_huff_node_alloc(1);
+    binode->depth=(unsigned char)(_depth>1);
+    ret=oc_huff_tree_unpack(_opb,binode->nodes,_depth);
+    if(ret>=0)ret=oc_huff_tree_unpack(_opb,binode->nodes+1,_depth);
+    if(ret<0){
+      oc_huff_tree_free(binode);
+      *_binode=NULL;
+      return ret;
+    }
+  }
+  /*Read a leaf node:*/
+  else{
+    if(theorapackB_read(_opb,OC_NDCT_TOKEN_BITS,&bits)<0)return TH_EBADHEADER;
+    binode=oc_huff_node_alloc(0);
+    binode->depth=(unsigned char)(_depth>1);
+    binode->token=(unsigned char)bits;
+  }
+  *_binode=binode;
+  return 0;
+}
+
+/*Finds the depth of shortest branch of the given sub-tree.
+  The tree must be binary.
+  _binode: The root of the given sub-tree.
+           _binode->nbits must be 0 or 1.
+  Return: The smallest depth of a leaf node in this sub-tree.
+          0 indicates this sub-tree is a leaf node.*/
+static int oc_huff_tree_mindepth(oc_huff_node *_binode){
+  int depth0;
+  int depth1;
+  if(_binode->nbits==0)return 0;
+  depth0=oc_huff_tree_mindepth(_binode->nodes[0]);
+  depth1=oc_huff_tree_mindepth(_binode->nodes[1]);
+  return OC_MINI(depth0,depth1)+1;
+}
+
+/*Finds the number of internal nodes at a given depth, plus the number of
+   leaves at that depth or shallower.
+  The tree must be binary.
+  _binode: The root of the given sub-tree.
+           _binode->nbits must be 0 or 1.
+  Return: The number of entries that would be contained in a jump table of the
+           given depth.*/
+static int oc_huff_tree_occupancy(oc_huff_node *_binode,int _depth){
+  if(_binode->nbits==0||_depth<=0)return 1;
+  else{
+    return oc_huff_tree_occupancy(_binode->nodes[0],_depth-1)+
+     oc_huff_tree_occupancy(_binode->nodes[1],_depth-1);
+  }
+}
+
+static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode);
+
+/*Fills the given nodes table with all the children in the sub-tree at the
+   given depth.
+  The nodes in the sub-tree with a depth less than that stored in the table
+   are freed.
+  The sub-tree must be binary and complete up until the given depth.
+  _nodes:  The nodes table to fill.
+  _binode: The root of the sub-tree to fill it with.
+           _binode->nbits must be 0 or 1.
+  _level:  The current level in the table.
+           0 indicates that the current node should be stored, regardless of
+            whether it is a leaf node or an internal node.
+  _depth:  The depth of the nodes to fill the table with, relative to their
+            parent.*/
+static void oc_huff_node_fill(oc_huff_node **_nodes,
+ oc_huff_node *_binode,int _level,int _depth){
+  if(_level<=0||_binode->nbits==0){
+    int i;
+    _binode->depth=(unsigned char)(_depth-_level);
+    _nodes[0]=oc_huff_tree_collapse(_binode);
+    for(i=1;i<1<<_level;i++)_nodes[i]=_nodes[0];
+  }
+  else{
+    _level--;
+    oc_huff_node_fill(_nodes,_binode->nodes[0],_level,_depth);
+    oc_huff_node_fill(_nodes+(1<<_level),_binode->nodes[1],_level,_depth);
+    oc_huff_node_free(_binode);
+  }
+}
+
+/*Finds the largest complete sub-tree rooted at the current node and collapses
+   it into a single node.
+  This procedure is then applied recursively to all the children of that node.
+  _binode: The root of the sub-tree to collapse.
+           _binode->nbits must be 0 or 1.
+  Return: The new root of the collapsed sub-tree.*/
+static oc_huff_node *oc_huff_tree_collapse(oc_huff_node *_binode){
+  oc_huff_node *root;
+  int           mindepth;
+  int           depth;
+  int           loccupancy;
+  int           occupancy;
+  depth=mindepth=oc_huff_tree_mindepth(_binode);
+  occupancy=1<<mindepth;
+  do{
+    loccupancy=occupancy;
+    occupancy=oc_huff_tree_occupancy(_binode,++depth);
+  }
+  while(occupancy>loccupancy&&occupancy>=1<<OC_MAXI(depth-OC_HUFF_SLUSH,0));
+  depth--;
+  if(depth<=1)return _binode;
+  root=oc_huff_node_alloc(depth);
+  root->depth=_binode->depth;
+  oc_huff_node_fill(root->nodes,_binode,depth,depth);
+  return root;
+}
+
+/*Makes a copy of the given Huffman tree.
+  _node: The Huffman tree to copy.
+  Return: The copy of the Huffman tree.*/
+static oc_huff_node *oc_huff_tree_copy(const oc_huff_node *_node){
+  oc_huff_node *ret;
+  ret=oc_huff_node_alloc(_node->nbits);
+  ret->depth=_node->depth;
+  if(_node->nbits){
+    int nchildren;
+    int i;
+    int inext;
+    nchildren=1<<_node->nbits;
+    for(i=0;i<nchildren;){
+      ret->nodes[i]=oc_huff_tree_copy(_node->nodes[i]);
+      inext=i+(1<<_node->nbits-ret->nodes[i]->depth);
+      while(++i<inext)ret->nodes[i]=ret->nodes[i-1];
+    }
+  }
+  else ret->token=_node->token;
+  return ret;
+}
+
+/*Unpacks a set of Huffman trees, and reduces them to a collapsed
+   representation.
+  _opb:   The buffer to unpack the trees from.
+  _nodes: The table to fill with the Huffman trees.
+  Return: 0 on success, or a negative value on error.*/
+int oc_huff_trees_unpack(oggpack_buffer *_opb,
+ oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++){
+    int ret;
+    ret=oc_huff_tree_unpack(_opb,_nodes+i,0);
+    if(ret<0)return ret;
+    _nodes[i]=oc_huff_tree_collapse(_nodes[i]);
+  }
+  return 0;
+}
+
+/*Makes a copy of the given set of Huffman trees.
+  _dst: The array to store the copy in.
+  _src: The array of trees to copy.*/
+void oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
+ const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++)_dst[i]=oc_huff_tree_copy(_src[i]);
+}
+
+/*Frees the memory used by a set of Huffman trees.
+  _nodes: The array of trees to free.*/
+void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]){
+  int i;
+  for(i=0;i<TH_NHUFFMAN_TABLES;i++)oc_huff_tree_free(_nodes[i]);
+}
+
+/*Unpacks a single token using the given Huffman tree.
+  _opb:  The buffer to unpack the token from.
+  _node: The tree to unpack the token with.
+  Return: The token value.*/
+int oc_huff_token_decode(oggpack_buffer *_opb,const oc_huff_node *_node){
+  long bits;
+  while(_node->nbits!=0){
+    theorapackB_look(_opb,_node->nbits,&bits);
+    _node=_node->nodes[bits];
+    theorapackB_adv(_opb,_node->depth);
+  }
+  return _node->token;
+}
--- a/modules/libtheora/lib/dec/huffdec.h
+++ b/modules/libtheora/lib/dec/huffdec.h
@ -0,0 +1,91 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: huffdec.h 14359 2008-01-04 20:11:13Z tterribe $
+
+ ********************************************************************/
+
+#if !defined(_huffdec_H)
+# define _huffdec_H (1)
+# include "huffman.h"
+
+
+
+typedef struct oc_huff_node oc_huff_node;
+
+/*A node in the Huffman tree.
+  Instead of storing every branching in the tree, subtrees can be collapsed
+   into one node, with a table of size 1<<nbits pointing directly to its
+   descedents nbits levels down.
+  This allows more than one bit to be read at a time, and avoids following all
+   the intermediate branches with next to no increased code complexity once
+   the collapsed tree has been built.
+  We do _not_ require that a subtree be complete to be collapsed, but instead
+   store duplicate pointers in the table, and record the actual depth of the
+   node below its parent.
+  This tells us the number of bits to advance the stream after reaching it.
+
+  This turns out to be equivalent to the method described in \cite{Hash95},
+   without the requirement that codewords be sorted by length.
+  If the codewords were sorted by length (so-called ``canonical-codes''), they
+   could be decoded much faster via either Lindell and Moffat's approach or
+   Hashemian's Condensed Huffman Code approach, the latter of which has an
+   extremely small memory footprint.
+  We can't use Choueka et al.'s finite state machine approach, which is
+   extremely fast, because we can't allow multiple symbols to be output at a
+   time; the codebook can and does change between symbols.
+  It also has very large memory requirements, which impairs cache coherency.
+
+  @ARTICLE{Hash95,
+    author="Reza Hashemian",
+    title="Memory Efficient and High-Speed Search {Huffman} Coding",
+    journal="{IEEE} Transactions on Communications",
+    volume=43,
+    number=10,
+    pages="2576--2581",
+    month=Oct,
+    year=1995
+  }*/
+struct oc_huff_node{
+  /*The number of bits of the code needed to descend through this node.
+    0 indicates a leaf node.
+    Otherwise there are 1<<nbits nodes in the nodes table, which can be
+     indexed by reading nbits bits from the stream.*/
+  unsigned char  nbits;
+  /*The value of a token stored in a leaf node.
+    The value in non-leaf nodes is undefined.*/
+  unsigned char  token;
+  /*The depth of the current node, relative to its parent in the collapsed
+     tree.
+    This can be less than its parent's nbits value, in which case there are
+     1<<nbits-depth copies of this node in the table, and the bitstream should
+     only be advanced depth bits after reaching this node.*/
+  unsigned char  depth;
+  /*The table of child nodes.
+    The ACTUAL size of this array is 1<<nbits, despite what the declaration
+     below claims.
+    The exception is that for leaf nodes the size is 0.*/
+  oc_huff_node  *nodes[1];
+};
+
+
+
+int oc_huff_trees_unpack(oggpack_buffer *_opb,
+ oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
+void oc_huff_trees_copy(oc_huff_node *_dst[TH_NHUFFMAN_TABLES],
+ const oc_huff_node *const _src[TH_NHUFFMAN_TABLES]);
+void oc_huff_trees_clear(oc_huff_node *_nodes[TH_NHUFFMAN_TABLES]);
+int oc_huff_token_decode(oggpack_buffer *_opb,const oc_huff_node *_node);
+
+
+#endif
--- a/modules/libtheora/lib/dec/huffman.h
+++ b/modules/libtheora/lib/dec/huffman.h
@ -0,0 +1,70 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: huffman.h 13884 2007-09-22 08:38:10Z giles $
+
+ ********************************************************************/
+
+#if !defined(_huffman_H)
+# define _hufffman_H (1)
+# include "theora/codec.h"
+# include "ocintrin.h"
+
+/*The range of valid quantized DCT coefficient values.
+  VP3 used 511 in the encoder, but the bitstream is capable of 580.*/
+#define OC_DCT_VAL_RANGE         (580)
+
+#define OC_NDCT_TOKEN_BITS       (5)
+
+#define OC_DCT_EOB1_TOKEN        (0)
+#define OC_DCT_EOB2_TOKEN        (1)
+#define OC_DCT_EOB3_TOKEN        (2)
+#define OC_DCT_REPEAT_RUN0_TOKEN (3)
+#define OC_DCT_REPEAT_RUN1_TOKEN (4)
+#define OC_DCT_REPEAT_RUN2_TOKEN (5)
+#define OC_DCT_REPEAT_RUN3_TOKEN (6)
+
+#define OC_DCT_SHORT_ZRL_TOKEN   (7)
+#define OC_DCT_ZRL_TOKEN         (8)
+
+#define OC_ONE_TOKEN             (9)
+#define OC_MINUS_ONE_TOKEN       (10)
+#define OC_TWO_TOKEN             (11)
+#define OC_MINUS_TWO_TOKEN       (12)
+
+#define OC_DCT_VAL_CAT2          (13)
+#define OC_DCT_VAL_CAT3          (17)
+#define OC_DCT_VAL_CAT4          (18)
+#define OC_DCT_VAL_CAT5          (19)
+#define OC_DCT_VAL_CAT6          (20)
+#define OC_DCT_VAL_CAT7          (21)
+#define OC_DCT_VAL_CAT8          (22)
+
+#define OC_DCT_RUN_CAT1A         (23)
+#define OC_DCT_RUN_CAT1B         (28)
+#define OC_DCT_RUN_CAT1C         (29)
+#define OC_DCT_RUN_CAT2A         (30)
+#define OC_DCT_RUN_CAT2B         (31)
+
+#define OC_NDCT_EOB_TOKEN_MAX    (7)
+#define OC_NDCT_ZRL_TOKEN_MAX    (9)
+#define OC_NDCT_VAL_MAX          (23)
+#define OC_NDCT_VAL_CAT1_MAX     (13)
+#define OC_NDCT_VAL_CAT2_MAX     (17)
+#define OC_NDCT_VAL_CAT2_SIZE    (OC_NDCT_VAL_CAT2_MAX-OC_DCT_VAL_CAT2)
+#define OC_NDCT_RUN_MAX          (32)
+#define OC_NDCT_RUN_CAT1A_MAX    (28)
+
+extern const int OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS];
+
+#endif
--- a/modules/libtheora/lib/dec/idct.c
+++ b/modules/libtheora/lib/dec/idct.c
@ -0,0 +1,270 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: idct.c 13884 2007-09-22 08:38:10Z giles $
+
+ ********************************************************************/
+
+#include <string.h>
+#include <ogg/ogg.h>
+#include "dct.h"
+#include "idct.h"
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      The first 8 entries are used (e.g., from a row of an 8x8 block).*/
+static void idct8(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  /*0-1 butterfly.*/
+  t[0]=OC_C4S4*(ogg_int16_t)(_x[0]+_x[4])>>16;
+  t[1]=OC_C4S4*(ogg_int16_t)(_x[0]-_x[4])>>16;
+  /*2-3 rotation by 6pi/16.*/
+  t[2]=(OC_C6S2*_x[2]>>16)-(OC_C2S6*_x[6]>>16);
+  t[3]=(OC_C2S6*_x[2]>>16)+(OC_C6S2*_x[6]>>16);
+  /*4-7 rotation by 7pi/16.*/
+  t[4]=(OC_C7S1*_x[1]>>16)-(OC_C1S7*_x[7]>>16);
+  /*5-6 rotation by 3pi/16.*/
+  t[5]=(OC_C3S5*_x[5]>>16)-(OC_C5S3*_x[3]>>16);
+  t[6]=(OC_C5S3*_x[5]>>16)+(OC_C3S5*_x[3]>>16);
+  t[7]=(OC_C1S7*_x[1]>>16)+(OC_C7S1*_x[7]>>16);
+  /*Stage 2:*/
+  /*4-5 butterfly.*/
+  r=t[4]+t[5];
+  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
+  t[4]=r;
+  /*7-6 butterfly.*/
+  r=t[7]+t[6];
+  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
+  t[7]=r;
+  /*Stage 3:*/
+  /*0-3 butterfly.*/
+  r=t[0]+t[3];
+  t[3]=t[0]-t[3];
+  t[0]=r;
+  /*1-2 butterfly.*/
+  r=t[1]+t[2];
+  t[2]=t[1]-t[2];
+  t[1]=r;
+  /*6-5 butterfly.*/
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  /*0-7 butterfly.*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  /*1-6 butterfly.*/
+  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
+  /*2-5 butterfly.*/
+  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
+  /*3-4 butterfly.*/
+  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first 4 entries are used.
+      The other 4 are assumed to be 0.*/
+static void idct8_4(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  t[0]=OC_C4S4*_x[0]>>16;
+  t[2]=OC_C6S2*_x[2]>>16;
+  t[3]=OC_C2S6*_x[2]>>16;
+  t[4]=OC_C7S1*_x[1]>>16;
+  t[5]=-(OC_C5S3*_x[3]>>16);
+  t[6]=OC_C3S5*_x[3]>>16;
+  t[7]=OC_C1S7*_x[1]>>16;
+  /*Stage 2:*/
+  r=t[4]+t[5];
+  t[5]=OC_C4S4*(ogg_int16_t)(t[4]-t[5])>>16;
+  t[4]=r;
+  r=t[7]+t[6];
+  t[6]=OC_C4S4*(ogg_int16_t)(t[7]-t[6])>>16;
+  t[7]=r;
+  /*Stage 3:*/
+  t[1]=t[0]+t[2];
+  t[2]=t[0]-t[2];
+  r=t[0]+t[3];
+  t[3]=t[0]-t[3];
+  t[0]=r;
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
+  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
+  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first 3 entries are used.
+      The other 5 are assumed to be 0.*/
+static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  t[0]=OC_C4S4*_x[0]>>16;
+  t[2]=OC_C6S2*_x[2]>>16;
+  t[3]=OC_C2S6*_x[2]>>16;
+  t[4]=OC_C7S1*_x[1]>>16;
+  t[7]=OC_C1S7*_x[1]>>16;
+  /*Stage 2:*/
+  t[5]=OC_C4S4*t[4]>>16;
+  t[6]=OC_C4S4*t[7]>>16;
+  /*Stage 3:*/
+  t[1]=t[0]+t[2];
+  t[2]=t[0]-t[2];
+  r=t[0]+t[3];
+  t[3]=t[0]-t[3];
+  t[0]=r;
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  _y[1<<3]=(ogg_int16_t)(t[1]+t[6]);
+  _y[2<<3]=(ogg_int16_t)(t[2]+t[5]);
+  _y[3<<3]=(ogg_int16_t)(t[3]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[3]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[2]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[1]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first 2 entries are used.
+      The other 6 are assumed to be 0.*/
+static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){
+  ogg_int32_t t[8];
+  ogg_int32_t r;
+  /*Stage 1:*/
+  t[0]=OC_C4S4*_x[0]>>16;
+  t[4]=OC_C7S1*_x[1]>>16;
+  t[7]=OC_C1S7*_x[1]>>16;
+  /*Stage 2:*/
+  t[5]=OC_C4S4*t[4]>>16;
+  t[6]=OC_C4S4*t[7]>>16;
+  /*Stage 3:*/
+  r=t[6]+t[5];
+  t[5]=t[6]-t[5];
+  t[6]=r;
+  /*Stage 4:*/
+  _y[0<<3]=(ogg_int16_t)(t[0]+t[7]);
+  _y[1<<3]=(ogg_int16_t)(t[0]+t[6]);
+  _y[2<<3]=(ogg_int16_t)(t[0]+t[5]);
+  _y[3<<3]=(ogg_int16_t)(t[0]+t[4]);
+  _y[4<<3]=(ogg_int16_t)(t[0]-t[4]);
+  _y[5<<3]=(ogg_int16_t)(t[0]-t[5]);
+  _y[6<<3]=(ogg_int16_t)(t[0]-t[6]);
+  _y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
+}
+
+
+/*Performs an inverse 8 point Type-II DCT transform.
+  The output is scaled by a factor of 2 relative to the orthonormal version of
+   the transform.
+  _y: The buffer to store the result in.
+      Data will be placed in every 8th entry (e.g., in a column of an 8x8
+       block).
+  _x: The input coefficients.
+      Only the first entry is used.
+      The other 7 are assumed to be 0.*/
+static void idct8_1(ogg_int16_t *_y,const ogg_int16_t _x[1]){
+  _y[0<<3]=_y[1<<3]=_y[2<<3]=_y[3<<3]=
+   _y[4<<3]=_y[5<<3]=_y[6<<3]=_y[7<<3]=(ogg_int16_t)(OC_C4S4*_x[0]>>16);
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients. */
+void oc_idct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  const ogg_int16_t *in;
+  ogg_int16_t       *end;
+  ogg_int16_t       *out;
+  ogg_int16_t        w[64];
+  /*Transform rows of x into columns of w.*/
+  for(in=_x,out=w,end=out+8;out<end;in+=8,out++)idct8(out,in);
+  /*Transform rows of w into columns of y.*/
+  for(in=w,out=_y,end=out+8;out<end;in+=8,out++)idct8(out,in);
+  /*Adjust for scale factor.*/
+  for(out=_y,end=out+64;out<end;out++)*out=(ogg_int16_t)(*out+8>>4);
+}
+
+/*Performs an inverse 8x8 Type-II DCT transform.
+  The input is assumed to be scaled by a factor of 4 relative to orthonormal
+   version of the transform.
+  All coefficients but the first 10 in zig-zag scan order are assumed to be 0:
+   x  x  x  x  0  0  0  0
+   x  x  x  0  0  0  0  0
+   x  x  0  0  0  0  0  0
+   x  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+   0  0  0  0  0  0  0  0
+  _y: The buffer to store the result in.
+      This may be the same as _x.
+  _x: The input coefficients. */
+void oc_idct8x8_10_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]){
+  const ogg_int16_t *in;
+  ogg_int16_t       *end;
+  ogg_int16_t       *out;
+  ogg_int16_t        w[64];
+  /*Transform rows of x into columns of w.*/
+  idct8_4(w,_x);
+  idct8_3(w+1,_x+8);
+  idct8_2(w+2,_x+16);
+  idct8_1(w+3,_x+24);
+  /*Transform rows of w into columns of y.*/
+  for(in=w,out=_y,end=out+8;out<end;in+=8,out++)idct8_4(out,in);
+  /*Adjust for scale factor.*/
+  for(out=_y,end=out+64;out<end;out++)*out=(ogg_int16_t)(*out+8>>4);
+}
--- a/modules/libtheora/lib/dec/idct.h
+++ b/modules/libtheora/lib/dec/idct.h
@ -0,0 +1,26 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: idct.h 13884 2007-09-22 08:38:10Z giles $
+
+ ********************************************************************/
+
+/*Inverse DCT transforms.*/
+#include <ogg/ogg.h>
+#if !defined(_idct_H)
+# define _idct_H (1)
+
+void oc_idct8x8_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+void oc_idct8x8_10_c(ogg_int16_t _y[64],const ogg_int16_t _x[64]);
+
+#endif
--- a/modules/libtheora/lib/dec/info.c
+++ b/modules/libtheora/lib/dec/info.c
@ -0,0 +1,123 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: info.c 13884 2007-09-22 08:38:10Z giles $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include "../internal.h"
+
+
+
+/*This is more or less the same as strncasecmp, but that doesn't exist
+   everywhere, and this is a fairly trivial function, so we include it.
+  Note: We take advantage of the fact that we know _n is less than or equal to
+   the length of at least one of the strings.*/
+static int oc_tagcompare(const char *_s1,const char *_s2,int _n){
+  int c;
+  for(c=0;c<_n;c++){
+    if(toupper(_s1[c])!=toupper(_s2[c]))return !0;
+  }
+  return _s1[c]!='=';
+}
+
+
+
+void th_info_init(th_info *_info){
+  memset(_info,0,sizeof(*_info));
+  _info->version_major=TH_VERSION_MAJOR;
+  _info->version_minor=TH_VERSION_MINOR;
+  _info->version_subminor=TH_VERSION_SUB;
+  _info->keyframe_granule_shift=6;
+}
+
+void th_info_clear(th_info *_info){
+  memset(_info,0,sizeof(*_info));
+}
+
+
+
+void th_comment_init(th_comment *_tc){
+  memset(_tc,0,sizeof(*_tc));
+}
+
+void th_comment_add(th_comment *_tc,char *_comment){
+  int comment_len;
+  _tc->user_comments=_ogg_realloc(_tc->user_comments,
+   (_tc->comments+2)*sizeof(*_tc->user_comments));
+  _tc->comment_lengths=_ogg_realloc(_tc->comment_lengths,
+   (_tc->comments+2)*sizeof(*_tc->comment_lengths));
+  comment_len=strlen(_comment);
+  _tc->comment_lengths[_tc->comments]=comment_len;
+  _tc->user_comments[_tc->comments]=_ogg_malloc(comment_len+1);
+  memcpy(_tc->user_comments[_tc->comments],_comment,comment_len+1);
+  _tc->comments++;
+  _tc->user_comments[_tc->comments]=NULL;
+}
+
+void th_comment_add_tag(th_comment *_tc,char *_tag,char *_val){
+  char *comment;
+  int   tag_len;
+  int   val_len;
+  tag_len=strlen(_tag);
+  val_len=strlen(_val);
+  /*+2 for '=' and '\0'.*/
+  comment=_ogg_malloc(tag_len+val_len+2);
+  memcpy(comment,_tag,tag_len);
+  comment[tag_len]='=';
+  memcpy(comment+tag_len+1,_val,val_len+1);
+  th_comment_add(_tc,comment);
+  _ogg_free(comment);
+}
+
+char *th_comment_query(th_comment *_tc,char *_tag,int _count){
+  long i;
+  int  found;
+  int  tag_len;
+  tag_len=strlen(_tag);
+  found=0;
+  for(i=0;i<_tc->comments;i++){
+    if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len)){
+      /*We return a pointer to the data, not a copy.*/
+      if(_count==found++)return _tc->user_comments[i]+tag_len+1;
+    }
+  }
+  /*Didn't find anything.*/
+  return NULL;
+}
+
+int th_comment_query_count(th_comment *_tc,char *_tag){
+  long i;
+  int  tag_len;
+  int  count;
+  tag_len=strlen(_tag);
+  count=0;
+  for(i=0;i<_tc->comments;i++){
+    if(!oc_tagcompare(_tc->user_comments[i],_tag,tag_len))count++;
+  }
+  return count;
+}
+
+void th_comment_clear(th_comment *_tc){
+  if(_tc!=NULL){
+    long i;
+    for(i=0;i<_tc->comments;i++)_ogg_free(_tc->user_comments[i]);
+    _ogg_free(_tc->user_comments);
+    _ogg_free(_tc->comment_lengths);
+    _ogg_free(_tc->vendor);
+    memset(_tc,0,sizeof(*_tc));
+  }
+}
--- a/modules/libtheora/lib/dec/internal.c
+++ b/modules/libtheora/lib/dec/internal.c
@ -0,0 +1,383 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: internal.c 14385 2008-01-09 19:53:18Z giles $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include "../internal.h"
+#include "idct.h"
+
+
+
+/*A map from the index in the zig zag scan to the coefficient number in a
+   block.
+  All zig zag indices beyond 63 are sent to coefficient 64, so that zero runs
+   past the end of a block in bogus streams get mapped to a known location.*/
+const int OC_FZIG_ZAG[128]={
+   0, 1, 8,16, 9, 2, 3,10,
+  17,24,32,25,18,11, 4, 5,
+  12,19,26,33,40,48,41,34,
+  27,20,13, 6, 7,14,21,28,
+  35,42,49,56,57,50,43,36,
+  29,22,15,23,30,37,44,51,
+  58,59,52,45,38,31,39,46,
+  53,60,61,54,47,55,62,63,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64,
+  64,64,64,64,64,64,64,64
+};
+
+/*A map from the coefficient number in a block to its index in the zig zag
+   scan.*/
+const int OC_IZIG_ZAG[64]={
+   0, 1, 5, 6,14,15,27,28,
+   2, 4, 7,13,16,26,29,42,
+   3, 8,12,17,25,30,41,43,
+   9,11,18,24,31,40,44,53,
+  10,19,23,32,39,45,52,54,
+  20,22,33,38,46,51,55,60,
+  21,34,37,47,50,56,59,61,
+  35,36,48,49,57,58,62,63
+};
+
+/*The predictor frame to use for each macro block mode.*/
+const int OC_FRAME_FOR_MODE[8]={
+  /*OC_MODE_INTER_NOMV*/
+  OC_FRAME_PREV,
+  /*OC_MODE_INTRA*/
+  OC_FRAME_SELF,
+  /*OC_MODE_INTER_MV*/
+  OC_FRAME_PREV,
+  /*OC_MODE_INTER_MV_LAST*/
+  OC_FRAME_PREV,
+  /*OC_MODE_INTER_MV_LAST2*/
+  OC_FRAME_PREV,
+  /*OC_MODE_GOLDEN*/
+  OC_FRAME_GOLD,
+  /*OC_MODE_GOLDEN_MV*/
+  OC_FRAME_GOLD,
+  /*OC_MODE_INTER_MV_FOUR*/
+  OC_FRAME_PREV,
+};
+
+/*A map from physical macro block ordering to bitstream macro block
+   ordering within a super block.*/
+const int OC_MB_MAP[2][2]={{0,3},{1,2}};
+
+/*A list of the indices in the oc_mb.map array that can be valid for each of
+   the various chroma decimation types.*/
+const int OC_MB_MAP_IDXS[TH_PF_NFORMATS][12]={
+  {0,1,2,3,4,8},
+  {0,1,2,3,4,5,8,9},
+  {0,1,2,3,4,6,8,10},
+  {0,1,2,3,4,5,6,7,8,9,10,11}
+};
+
+/*The number of indices in the oc_mb.map array that can be valid for each of
+   the various chroma decimation types.*/
+const int OC_MB_MAP_NIDXS[TH_PF_NFORMATS]={6,8,8,12};
+
+/*The number of extra bits that are coded with each of the DCT tokens.
+  Each DCT token has some fixed number of additional bits (possibly 0) stored
+   after the token itself, containing, for example, coefficient magnitude,
+   sign bits, etc.*/
+const int OC_DCT_TOKEN_EXTRA_BITS[TH_NDCT_TOKENS]={
+  0,0,0,2,3,4,12,3,6,
+  0,0,0,0,
+  1,1,1,1,2,3,4,5,6,10,
+  1,1,1,1,1,3,4,
+  2,3
+};
+
+
+
+int oc_ilog(unsigned _v){
+  int ret;
+  for(ret=0;_v;ret++)_v>>=1;
+  return ret;
+}
+
+
+
+/*Determines the number of blocks or coefficients to be skipped for a given
+   token value.
+  _token:      The token value to skip.
+  _extra_bits: The extra bits attached to this token.
+  Return: A positive value indicates that number of coefficients are to be
+           skipped in the current block.
+          Otherwise, the negative of the return value indicates that number of
+           blocks are to be ended.*/
+typedef int (*oc_token_skip_func)(int _token,int _extra_bits);
+
+/*Handles the simple end of block tokens.*/
+static int oc_token_skip_eob(int _token,int _extra_bits){
+  static const int NBLOCKS_ADJUST[OC_NDCT_EOB_TOKEN_MAX]={1,2,3,4,8,16,0};
+  return -_extra_bits-NBLOCKS_ADJUST[_token];
+}
+
+/*The last EOB token has a special case, where an EOB run of size zero ends all
+   the remaining blocks in the frame.*/
+static int oc_token_skip_eob6(int _token,int _extra_bits){
+  if(!_extra_bits)return -INT_MAX;
+  return -_extra_bits;
+}
+
+/*Handles the pure zero run tokens.*/
+static int oc_token_skip_zrl(int _token,int _extra_bits){
+  return _extra_bits+1;
+}
+
+/*Handles a normal coefficient value token.*/
+static int oc_token_skip_val(void){
+  return 1;
+}
+
+/*Handles a category 1A zero run/coefficient value combo token.*/
+static int oc_token_skip_run_cat1a(int _token){
+  return _token-OC_DCT_RUN_CAT1A+2;
+}
+
+/*Handles category 1b and 2 zero run/coefficient value combo tokens.*/
+static int oc_token_skip_run(int _token,int _extra_bits){
+  static const int NCOEFFS_ADJUST[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
+    7,11,2,3
+  };
+  static const int NCOEFFS_MASK[OC_NDCT_RUN_MAX-OC_DCT_RUN_CAT1B]={
+    3,7,0,1
+  };
+  _token-=OC_DCT_RUN_CAT1B;
+  return (_extra_bits&NCOEFFS_MASK[_token])+NCOEFFS_ADJUST[_token];
+}
+
+/*A jump table for computing the number of coefficients or blocks to skip for
+   a given token value.
+  This reduces all the conditional branches, etc., needed to parse these token
+   values down to one indirect jump.*/
+static const oc_token_skip_func OC_TOKEN_SKIP_TABLE[TH_NDCT_TOKENS]={
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob,
+  oc_token_skip_eob6,
+  oc_token_skip_zrl,
+  oc_token_skip_zrl,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_val,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  (oc_token_skip_func)oc_token_skip_run_cat1a,
+  oc_token_skip_run,
+  oc_token_skip_run,
+  oc_token_skip_run,
+  oc_token_skip_run
+};
+
+/*Determines the number of blocks or coefficients to be skipped for a given
+   token value.
+  _token:      The token value to skip.
+  _extra_bits: The extra bits attached to this token.
+  Return: A positive value indicates that number of coefficients are to be
+           skipped in the current block.
+          Otherwise, the negative of the return value indicates that number of
+           blocks are to be ended.
+          0 will never be returned, so that at least one coefficient in one
+           block will always be decoded for every token.*/
+int oc_dct_token_skip(int _token,int _extra_bits){
+  return (*OC_TOKEN_SKIP_TABLE[_token])(_token,_extra_bits);
+}
+
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the X and Y directions.
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs00(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=_lbmvs[0][0]+_lbmvs[1][0]+_lbmvs[2][0]+_lbmvs[3][0];
+  dy=_lbmvs[0][1]+_lbmvs[1][1]+_lbmvs[2][1]+_lbmvs[3][1];
+  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,2,2);
+  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,2,2);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the Y direction.
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs01(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=_lbmvs[0][0]+_lbmvs[2][0];
+  dy=_lbmvs[0][1]+_lbmvs[2][1];
+  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+  dx=_lbmvs[1][0]+_lbmvs[3][0];
+  dy=_lbmvs[1][1]+_lbmvs[3][1];
+  _cbmvs[1][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[1][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with chroma decimated in the X direction.
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs10(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  int dx;
+  int dy;
+  dx=_lbmvs[0][0]+_lbmvs[1][0];
+  dy=_lbmvs[0][1]+_lbmvs[1][1];
+  _cbmvs[0][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[0][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+  dx=_lbmvs[2][0]+_lbmvs[3][0];
+  dy=_lbmvs[2][1]+_lbmvs[3][1];
+  _cbmvs[2][0]=(signed char)OC_DIV_ROUND_POW2(dx,1,1);
+  _cbmvs[2][1]=(signed char)OC_DIV_ROUND_POW2(dy,1,1);
+}
+
+/*The function used to fill in the chroma plane motion vectors for a macro
+   block when 4 different motion vectors are specified in the luma plane.
+  This version is for use with no chroma decimation.
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lmbmv: The luma macro-block level motion vector to fill in for use in
+           prediction.
+  _lbmvs: The luma block-level motion vectors.*/
+static void oc_set_chroma_mvs11(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]){
+  memcpy(_cbmvs,_lbmvs,4*sizeof(_lbmvs[0]));
+}
+
+/*A table of functions used to fill in the chroma plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.*/
+const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS]={
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs00,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs01,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs10,
+  (oc_set_chroma_mvs_func)oc_set_chroma_mvs11
+};
+
+
+
+void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz){
+  size_t  rowsz;
+  size_t  colsz;
+  size_t  datsz;
+  char   *ret;
+  colsz=_height*sizeof(void *);
+  rowsz=_sz*_width;
+  datsz=rowsz*_height;
+  /*Alloc array and row pointers.*/
+  ret=(char *)_ogg_malloc(datsz+colsz);
+  /*Initialize the array.*/
+  if(ret!=NULL){
+    size_t   i;
+    void   **p;
+    char    *datptr;
+    p=(void **)ret;
+    i=_height;
+    for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr;
+  }
+  return (void **)ret;
+}
+
+void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz){
+  size_t  colsz;
+  size_t  rowsz;
+  size_t  datsz;
+  char   *ret;
+  colsz=_height*sizeof(void *);
+  rowsz=_sz*_width;
+  datsz=rowsz*_height;
+  /*Alloc array and row pointers.*/
+  ret=(char *)_ogg_calloc(datsz+colsz,1);
+  /*Initialize the array.*/
+  if(ret!=NULL){
+    size_t   i;
+    void   **p;
+    char    *datptr;
+    p=(void **)ret;
+    i=_height;
+    for(datptr=ret+colsz;i-->0;p++,datptr+=rowsz)*p=(void *)datptr;
+  }
+  return (void **)ret;
+}
+
+void oc_free_2d(void *_ptr){
+  _ogg_free(_ptr);
+}
+
+/*Fills in a Y'CbCr buffer with a pointer to the image data in the first
+   buffer, but with the opposite vertical orientation.
+  _dst: The destination buffer.
+        This can be the same as _src.
+  _src: The source buffer.*/
+void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
+ const th_ycbcr_buffer _src){
+  int pli;
+  for(pli=0;pli<3;pli++){
+    _dst[pli].width=_src[pli].width;
+    _dst[pli].height=_src[pli].height;
+    _dst[pli].stride=-_src[pli].stride;
+    _dst[pli].data=_src[pli].data+(1-_dst[pli].height)*_dst[pli].stride;
+  }
+}
+
+const char *th_version_string(void){
+  return OC_VENDOR_STRING;
+}
+
+ogg_uint32_t th_version_number(void){
+  return (TH_VERSION_MAJOR<<16)+(TH_VERSION_MINOR<<8)+(TH_VERSION_SUB);
+}
+
+/*Determines the packet type.
+  Note that this correctly interprets a 0-byte packet as a video data packet.
+  Return: 1 for a header packet, 0 for a data packet.*/
+int th_packet_isheader(ogg_packet *_op){
+  return _op->bytes>0?_op->packet[0]>>7:0;
+}
+
+/*Determines the frame type of a video data packet.
+  Note that this correctly interprets a 0-byte packet as a delta frame.
+  Return: 1 for a key frame, 0 for a delta frame, and -1 for a header
+           packet.*/
+int th_packet_iskeyframe(ogg_packet *_op){
+  return _op->bytes<=0?0:_op->packet[0]&0x80?-1:!(_op->packet[0]&0x40);
+}
--- a/modules/libtheora/lib/dec/ocintrin.h
+++ b/modules/libtheora/lib/dec/ocintrin.h
@ -0,0 +1,88 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: ocintrin.h 13884 2007-09-22 08:38:10Z giles $
+
+ ********************************************************************/
+
+/*Some common macros for potential platform-specific optimization.*/
+#include <math.h>
+#if !defined(_ocintrin_H)
+# define _ocintrin_H (1)
+
+/*Some specific platforms may have optimized intrinsic or inline assembly
+   versions of these functions which can substantially improve performance.
+  We define macros for them to allow easy incorporation of these non-ANSI
+   features.*/
+
+/*Branchless, but not correct for differences larger than INT_MAX.
+static int oc_mini(int _a,int _b){
+  int ambsign;
+  ambsign=_a-_b>>sizeof(int)*8-1;
+  return (_a&~ambsign)+(_b&ambsign);
+}*/
+
+
+#define OC_MAXI(_a,_b)      ((_a)<(_b)?(_b):(_a))
+#define OC_MINI(_a,_b)      ((_a)>(_b)?(_b):(_a))
+/*Clamps an integer into the given range.
+  If _a>_c, then the lower bound _a is respected over the upper bound _c (this
+   behavior is required to meet our documented API behavior).
+  _a: The lower bound.
+  _b: The value to clamp.
+  _c: The upper boud.*/
+#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
+#define OC_CLAMP255(_x)     ((unsigned char)((((_x)<0)-1)&((_x)|-((_x)>255))))
+/*Divides an integer by a power of two, truncating towards 0.
+  _dividend: The integer to divide.
+  _shift:    The non-negative power of two to divide by.
+  _rmask:    (1<<_shift)-1*/
+#define OC_DIV_POW2(_dividend,_shift,_rmask)\
+  ((_dividend)+(((_dividend)>>sizeof(_dividend)*8-1)&(_rmask))>>(_shift))
+/*Divides _x by 65536, truncating towards 0.*/
+#define OC_DIV2_16(_x) OC_DIV_POW2(_x,16,0xFFFF)
+/*Divides _x by 2, truncating towards 0.*/
+#define OC_DIV2(_x) OC_DIV_POW2(_x,1,0x1)
+/*Divides _x by 8, truncating towards 0.*/
+#define OC_DIV8(_x) OC_DIV_POW2(_x,3,0x7)
+/*Divides _x by 16, truncating towards 0.*/
+#define OC_DIV16(_x) OC_DIV_POW2(_x,4,0xF)
+/*Right shifts _dividend by _shift, adding _rval, and subtracting one for
+   negative dividends first..
+  When _rval is (1<<_shift-1), this is equivalent to division with rounding
+   ties towards positive infinity.*/
+#define OC_DIV_ROUND_POW2(_dividend,_shift,_rval)\
+  ((_dividend)+((_dividend)>>sizeof(_dividend)*8-1)+(_rval)>>(_shift))
+/*Swaps two integers _a and _b if _a>_b.*/
+#define OC_SORT2I(_a,_b)\
+  if((_a)>(_b)){\
+    int t__;\
+    t__=(_a);\
+    (_a)=(_b);\
+    (_b)=t__;\
+  }
+
+
+
+/*All of these macros should expect floats as arguments.*/
+#define OC_MAXF(_a,_b)      ((_a)<(_b)?(_b):(_a))
+#define OC_MINF(_a,_b)      ((_a)>(_b)?(_b):(_a))
+#define OC_CLAMPF(_a,_b,_c) (OC_MINF(_a,OC_MAXF(_b,_c)))
+#define OC_FABSF(_f)        ((float)fabs(_f))
+#define OC_SQRTF(_f)        ((float)sqrt(_f))
+#define OC_POWF(_b,_e)      ((float)pow(_b,_e))
+#define OC_LOGF(_f)         ((float)log(_f))
+#define OC_IFLOORF(_f)      ((int)floor(_f))
+#define OC_ICEILF(_f)       ((int)ceil(_f))
+
+#endif
--- a/modules/libtheora/lib/dec/quant.c
+++ b/modules/libtheora/lib/dec/quant.c
@ -0,0 +1,152 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: quant.c 14375 2008-01-06 05:37:33Z tterribe $
+
+ ********************************************************************/
+
+#include <stdlib.h>
+#include <string.h>
+#include <ogg/ogg.h>
+#include "quant.h"
+#include "decint.h"
+
+unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2};
+unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
+
+/*Initializes the dequantization tables from a set of quantizer info.
+  Currently the dequantizer (and elsewhere enquantizer) tables are expected to
+   be initialized as pointing to the storage reserved for them in the
+   oc_theora_state (resp. oc_enc_ctx) structure.
+  If some tables are duplicates of others, the pointers will be adjusted to
+   point to a single copy of the tables, but the storage for them will not be
+   freed.
+  If you're concerned about the memory footprint, the obvious thing to do is
+   to move the storage out of its fixed place in the structures and allocate
+   it on demand.
+  However, a much, much better option is to only store the quantization
+   matrices being used for the current frame, and to recalculate these as the
+   qi values change between frames (this is what VP3 did).*/
+void oc_dequant_tables_init(oc_quant_table *_dequant[2][3],
+ int _pp_dc_scale[64],const th_quant_info *_qinfo){
+  int          qti; /* coding mode: intra or inter */
+  int          pli; /* Y U V */
+  for(qti=0;qti<2;qti++){
+    for(pli=0;pli<3;pli++){
+      oc_quant_tables stage;
+
+      int qi;  /* quality index */
+      int qri; /* range iterator */
+      
+      for(qi=0,qri=0; qri<=_qinfo->qi_ranges[qti][pli].nranges; qri++){
+	th_quant_base base;
+	
+	ogg_uint32_t      q;
+	int               qi_start;
+	int               qi_end;
+	int               ci;
+	memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
+	       sizeof(base));
+
+	qi_start=qi;
+	if(qri==_qinfo->qi_ranges[qti][pli].nranges)
+	  qi_end=qi+1;
+	else 
+	  qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
+	
+	/* Iterate over quality indicies in this range */
+	for(;;){
+	  
+	  /*In the original VP3.2 code, the rounding offset and the size of the
+	    dead zone around 0 were controlled by a "sharpness" parameter.
+	    The size of our dead zone is now controlled by the per-coefficient
+	    quality thresholds returned by our HVS module.
+	    We round down from a more accurate value when the quality of the
+	    reconstruction does not fall below our threshold and it saves bits.
+	    Hence, all of that VP3.2 code is gone from here, and the remaining
+	    floating point code has been implemented as equivalent integer code
+	    with exact precision.*/
+
+	  /* for postprocess, not dequant */
+	  if(_pp_dc_scale!=NULL)
+	    _pp_dc_scale[qi]=(int)((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/160);
+
+	  /*Scale DC the coefficient from the proper table.*/
+	  q=((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/100)<<2;
+	  q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
+	  stage[qi][0]=(ogg_uint16_t)q;
+	  
+	  /*Now scale AC coefficients from the proper table.*/
+	  for(ci=1;ci<64;ci++){
+	    q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2;
+	    q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
+	    stage[qi][ci]=(ogg_uint16_t)q;
+	  }
+	  
+	  if(++qi>=qi_end)break;
+	  
+	  /*Interpolate the next base matrix.*/
+	  for(ci=0;ci<64;ci++){
+	    base[ci]=(unsigned char)
+	      ((2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
+		   (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
+		+_qinfo->qi_ranges[qti][pli].sizes[qri])/
+	       (2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
+	  }
+	}
+      }
+
+      /* Staging matricies complete; commit to memory only if this
+	 isn't a duplicate of a preceeding plane. This simple check
+	 helps us improve cache coherency later.*/
+      {
+	int dupe = 0;
+	int i,j;
+	for(i=0;i<=qti;i++){
+	  for(j=0;j<(i<qti?3:pli);j++){
+	    if(!memcmp(stage,_dequant[i][j],sizeof(stage))){
+	      dupe = 1;
+	      break;
+	    }
+	  }
+	  if(dupe)break;
+	}
+	if(dupe){
+	  _dequant[qti][pli]=_dequant[i][j];
+	}else{
+	  memcpy(_dequant[qti][pli],stage,sizeof(stage));
+	}
+      }
+    }
+  }
+
+#ifdef _TH_DEBUG_
+  int i, j, k, l;
+  /* dump the calculated quantizer tables */
+  for(i=0;i<2;i++){
+    for(j=0;j<3;j++){
+      for(k=0;k<64;k++){
+	TH_DEBUG("quantizer table [%s][%s][Q%d] = {",
+		 (i==0?"intra":"inter"),(j==0?"Y":(j==1?"U":"V")),k);
+	for(l=0;l<64;l++){
+	  if((l&7)==0)
+	    TH_DEBUG("\n   ");
+	  TH_DEBUG("%4d ",_dequant[i][j][k][l]);
+	}
+	TH_DEBUG("}\n");
+      }
+    }
+  }
+#endif
+
+}
--- a/modules/libtheora/lib/dec/quant.h
+++ b/modules/libtheora/lib/dec/quant.h
@ -0,0 +1,46 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: quant.h 14059 2007-10-28 23:43:27Z xiphmont $
+
+ ********************************************************************/
+
+#if !defined(_quant_H)
+# define _quant_H (1)
+# include "theora/codec.h"
+# include "ocintrin.h"
+
+typedef ogg_uint16_t   oc_quant_table[64];
+typedef oc_quant_table oc_quant_tables[64];
+
+
+
+/*Maximum scaled quantizer value.*/
+#define OC_QUANT_MAX          (1024<<2)
+
+
+
+/*Minimum scaled DC coefficient frame quantizer value for intra and inter
+   modes.*/
+extern unsigned OC_DC_QUANT_MIN[2];
+/*Minimum scaled AC coefficient frame quantizer value for intra and inter
+   modes.*/
+extern unsigned OC_AC_QUANT_MIN[2];
+
+
+
+void oc_dequant_tables_init(oc_quant_table *_dequant[2][3],
+			    int _pp_dc_scale[64],
+			    const th_quant_info *_qinfo);
+
+#endif
--- a/modules/libtheora/lib/dec/state.c
+++ b/modules/libtheora/lib/dec/state.c
--- a/modules/libtheora/lib/dec/x86/mmxfrag.c
+++ b/modules/libtheora/lib/dec/x86/mmxfrag.c
@ -0,0 +1,290 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: mmxfrag.c 14345 2008-01-04 18:02:21Z tterribe $
+
+ ********************************************************************/
+
+/*MMX acceleration of fragment reconstruction for motion compensation.
+  Originally written by Rudolf Marek.
+  Additional optimization by Nils Pipenbrinck.
+  Note: Loops are unrolled for best performance.
+  The iteration each instruction belongs to is marked in the comments as #i.*/
+#include "x86int.h"
+
+#if defined(USE_ASM)
+
+void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
+ const ogg_int16_t *_residue){
+  __asm__ __volatile__(
+    /*Set mm0 to 0xFFFFFFFFFFFFFFFF.*/
+    "pcmpeqw %%mm0,%%mm0\n\t"
+    /*#0 Load low residue.*/
+    "movq 0*8(%[residue]),%%mm1\n\t"
+    /*#0 Load high residue.*/
+    "movq 1*8(%[residue]),%%mm2\n\t"
+    /*Set mm0 to 0x8000800080008000.*/
+    "psllw $15,%%mm0\n\t"
+    /*#1 Load low residue.*/
+    "movq 2*8(%[residue]),%%mm3\n\t"
+    /*#1 Load high residue.*/
+    "movq 3*8(%[residue]),%%mm4\n\t"
+    /*Set mm0 to 0x0080008000800080.*/
+    "psrlw $8,%%mm0\n\t"
+    /*#2 Load low residue.*/
+    "movq 4*8(%[residue]),%%mm5\n\t"
+    /*#2 Load high residue.*/
+    "movq 5*8(%[residue]),%%mm6\n\t"
+    /*#0 Bias low  residue.*/
+    "paddsw %%mm0,%%mm1\n\t"
+    /*#0 Bias high residue.*/
+    "paddsw %%mm0,%%mm2\n\t"
+    /*#0 Pack to byte.*/
+    "packuswb %%mm2,%%mm1\n\t"
+    /*#1 Bias low  residue.*/
+    "paddsw %%mm0,%%mm3\n\t"
+    /*#1 Bias high residue.*/
+    "paddsw %%mm0,%%mm4\n\t"
+    /*#1 Pack to byte.*/
+    "packuswb %%mm4,%%mm3\n\t"
+    /*#2 Bias low  residue.*/
+    "paddsw %%mm0,%%mm5\n\t"
+    /*#2 Bias high residue.*/
+    "paddsw %%mm0,%%mm6\n\t"
+    /*#2 Pack to byte.*/
+    "packuswb %%mm6,%%mm5\n\t"
+    /*#0 Write row.*/
+    "movq %%mm1,(%[dst])\n\t"
+    /*#1 Write row.*/
+    "movq %%mm3,(%[dst],%[dst_ystride])\n\t"
+    /*#2 Write row.*/
+    "movq %%mm5,(%[dst],%[dst_ystride],2)\n\t"
+    /*#3 Load low residue.*/
+    "movq 6*8(%[residue]),%%mm1\n\t"
+    /*#3 Load high residue.*/
+    "movq 7*8(%[residue]),%%mm2\n\t"
+    /*#4 Load high residue.*/
+    "movq 8*8(%[residue]),%%mm3\n\t"
+    /*#4 Load high residue.*/
+    "movq 9*8(%[residue]),%%mm4\n\t"
+    /*#5 Load high residue.*/
+    "movq 10*8(%[residue]),%%mm5\n\t"
+    /*#5 Load high residue.*/
+    "movq 11*8(%[residue]),%%mm6\n\t"
+    /*#3 Bias low  residue.*/
+    "paddsw %%mm0,%%mm1\n\t"
+    /*#3 Bias high residue.*/
+    "paddsw %%mm0,%%mm2\n\t"
+    /*#3 Pack to byte.*/
+    "packuswb %%mm2,%%mm1\n\t"
+    /*#4 Bias low  residue.*/
+    "paddsw %%mm0,%%mm3\n\t"
+    /*#4 Bias high residue.*/
+    "paddsw %%mm0,%%mm4\n\t"
+    /*#4 Pack to byte.*/
+    "packuswb %%mm4,%%mm3\n\t"
+    /*#5 Bias low  residue.*/
+    "paddsw %%mm0,%%mm5\n\t"
+    /*#5 Bias high residue.*/
+    "paddsw %%mm0,%%mm6\n\t"
+    /*#5 Pack to byte.*/
+    "packuswb %%mm6,%%mm5\n\t"
+    /*#3 Write row.*/
+    "movq %%mm1,(%[dst],%[dst_ystride3])\n\t"
+    /*#4 Write row.*/
+    "movq %%mm3,(%[dst4])\n\t"
+    /*#5 Write row.*/
+    "movq %%mm5,(%[dst4],%[dst_ystride])\n\t"
+    /*#6 Load low residue.*/
+    "movq 12*8(%[residue]),%%mm1\n\t"
+    /*#6 Load high residue.*/
+    "movq 13*8(%[residue]),%%mm2\n\t"
+    /*#7 Load low residue.*/
+    "movq 14*8(%[residue]),%%mm3\n\t"
+    /*#7 Load high residue.*/
+    "movq 15*8(%[residue]),%%mm4\n\t"
+    /*#6 Bias low  residue.*/
+    "paddsw %%mm0,%%mm1\n\t"
+    /*#6 Bias high residue.*/
+    "paddsw %%mm0,%%mm2\n\t"
+    /*#6 Pack to byte.*/
+    "packuswb %%mm2,%%mm1\n\t"
+    /*#7 Bias low  residue.*/
+    "paddsw %%mm0,%%mm3\n\t"
+    /*#7 Bias high residue.*/
+    "paddsw %%mm0,%%mm4\n\t"
+    /*#7 Pack to byte.*/
+    "packuswb %%mm4,%%mm3\n\t"
+    /*#6 Write row.*/
+    "movq %%mm1,(%[dst4],%[dst_ystride],2)\n\t"
+    /*#7 Write row.*/
+    "movq %%mm3,(%[dst4],%[dst_ystride3])\n\t"
+    :
+    :[residue]"r"(_residue),
+     [dst]"r"(_dst),
+     [dst4]"r"(_dst+(_dst_ystride<<2)),
+     [dst_ystride]"r"((long)_dst_ystride),
+     [dst_ystride3]"r"((long)_dst_ystride*3)
+    :"memory"
+  );
+}
+
+void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue){
+  int i;
+  /*Zero mm0.*/
+  __asm__ __volatile__("pxor %%mm0,%%mm0\n\t"::);
+  for(i=4;i-->0;){
+    __asm__ __volatile__(
+      /*#0 Load source.*/
+      "movq (%[src]),%%mm3\n\t"
+      /*#1 Load source.*/
+      "movq (%[src],%[src_ystride]),%%mm7\n\t"
+      /*#0 Get copy of src.*/
+      "movq %%mm3,%%mm4\n\t"
+      /*#0 Expand high source.*/
+      "punpckhbw %%mm0,%%mm4\n\t"
+      /*#0 Expand low  source.*/
+      "punpcklbw %%mm0,%%mm3\n\t"
+      /*#0 Add residue high.*/
+      "paddsw 8(%[residue]),%%mm4\n\t"
+      /*#1 Get copy of src.*/
+      "movq %%mm7,%%mm2\n\t"
+      /*#0 Add residue low.*/
+      "paddsw (%[residue]), %%mm3\n\t"
+      /*#1 Expand high source.*/
+      "punpckhbw %%mm0,%%mm2\n\t"
+      /*#0 Pack final row pixels.*/
+      "packuswb %%mm4,%%mm3\n\t"
+      /*#1 Expand low  source.*/
+      "punpcklbw %%mm0,%%mm7\n\t"
+      /*#1 Add residue low.*/
+      "paddsw 16(%[residue]),%%mm7\n\t"
+      /*#1 Add residue high.*/
+      "paddsw 24(%[residue]),%%mm2\n\t"
+      /*Advance residue.*/
+      "lea 32(%[residue]),%[residue]\n\t"
+      /*#1 Pack final row pixels.*/
+      "packuswb %%mm2,%%mm7\n\t"
+      /*Advance src.*/
+      "lea (%[src],%[src_ystride],2),%[src]\n\t"
+      /*#0 Write row.*/
+      "movq %%mm3,(%[dst])\n\t"
+      /*#1 Write row.*/
+      "movq %%mm7,(%[dst],%[dst_ystride])\n\t"
+      /*Advance dst.*/
+      "lea (%[dst],%[dst_ystride],2),%[dst]\n\t"
+      :[residue]"+r"(_residue),[dst]"+r"(_dst),[src]"+r"(_src)
+      :[dst_ystride]"r"((long)_dst_ystride),
+       [src_ystride]"r"((long)_src_ystride)
+      :"memory"
+    );
+  }
+}
+
+void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
+ int _src2_ystride,const ogg_int16_t *_residue){
+  int i;
+  /*NOTE: This assumes that
+     _dst_ystride==_src1_ystride&&_dst_ystride==_src2_ystride.
+    This is currently always the case, but a slower fallback version will need
+     to be written if it ever is not.*/
+  /*Zero mm7.*/
+  __asm__ __volatile__("pxor %%mm7,%%mm7\n\t"::);
+  for(i=4;i-->0;){
+    __asm__ __volatile__(
+      /*#0 Load src1.*/
+      "movq (%[src1]),%%mm0\n\t"
+      /*#0 Load src2.*/
+      "movq (%[src2]),%%mm2\n\t"
+      /*#0 Copy src1.*/
+      "movq %%mm0,%%mm1\n\t"
+      /*#0 Copy src2.*/
+      "movq %%mm2,%%mm3\n\t"
+      /*#1 Load src1.*/
+      "movq (%[src1],%[ystride]),%%mm4\n\t"
+      /*#0 Unpack lower src1.*/
+      "punpcklbw %%mm7,%%mm0\n\t"
+      /*#1 Load src2.*/
+      "movq (%[src2],%[ystride]),%%mm5\n\t"
+      /*#0 Unpack higher src1.*/
+      "punpckhbw %%mm7,%%mm1\n\t"
+      /*#0 Unpack lower src2.*/
+      "punpcklbw %%mm7,%%mm2\n\t"
+      /*#0 Unpack higher src2.*/
+      "punpckhbw %%mm7,%%mm3\n\t"
+      /*Advance src1 ptr.*/
+      "lea (%[src1],%[ystride],2),%[src1]\n\t"
+      /*Advance src2 ptr.*/
+      "lea (%[src2],%[ystride],2),%[src2]\n\t"
+      /*#0 Lower src1+src2.*/
+      "paddsw %%mm2,%%mm0\n\t"
+      /*#0 Higher src1+src2.*/
+      "paddsw %%mm3,%%mm1\n\t"
+      /*#1 Copy src1.*/
+      "movq %%mm4,%%mm2\n\t"
+      /*#0 Build lo average.*/
+      "psraw $1,%%mm0\n\t"
+      /*#1 Copy src2.*/
+      "movq %%mm5,%%mm3\n\t"
+      /*#1 Unpack lower src1.*/
+      "punpcklbw %%mm7,%%mm4\n\t"
+      /*#0 Build hi average.*/
+      "psraw $1,%%mm1\n\t"
+      /*#1 Unpack higher src1.*/
+      "punpckhbw %%mm7,%%mm2\n\t"
+      /*#0 low+=residue.*/
+      "paddsw (%[residue]),%%mm0\n\t"
+      /*#1 Unpack lower src2.*/
+      "punpcklbw %%mm7,%%mm5\n\t"
+      /*#0 high+=residue.*/
+      "paddsw 8(%[residue]),%%mm1\n\t"
+      /*#1 Unpack higher src2.*/
+      "punpckhbw %%mm7,%%mm3\n\t"
+      /*#1 Lower src1+src2.*/
+      "paddsw %%mm4,%%mm5\n\t"
+      /*#0 Pack and saturate.*/
+      "packuswb %%mm1,%%mm0\n\t"
+      /*#1 Higher src1+src2.*/
+      "paddsw %%mm2,%%mm3\n\t"
+      /*#0 Write row.*/
+      "movq %%mm0,(%[dst])\n\t"
+      /*#1 Build lo average.*/
+      "psraw $1,%%mm5\n\t"
+      /*#1 Build hi average.*/
+      "psraw $1,%%mm3\n\t"
+      /*#1 low+=residue.*/
+      "paddsw 16(%[residue]),%%mm5\n\t"
+      /*#1 high+=residue.*/
+      "paddsw 24(%[residue]),%%mm3\n\t"
+      /*#1 Pack and saturate.*/
+      "packuswb  %%mm3,%%mm5\n\t"
+      /*#1 Write row ptr.*/
+      "movq %%mm5,(%[dst],%[ystride])\n\t"
+      /*Advance residue ptr.*/
+      "add $32,%[residue]\n\t"
+      /*Advance dest ptr.*/
+      "lea (%[dst],%[ystride],2),%[dst]\n\t"
+     :[dst]"+r"(_dst),[residue]"+r"(_residue),
+      [src1]"+r"(_src1),[src2]"+r"(_src2)
+     :[ystride]"r"((long)_dst_ystride)
+     :"memory"
+    );
+  }
+}
+
+void oc_restore_fpu_mmx(void){
+  __asm__ __volatile__("emms\n\t");
+}
+#endif
--- a/modules/libtheora/lib/dec/x86/mmxidct.c
+++ b/modules/libtheora/lib/dec/x86/mmxidct.c
@ -0,0 +1,535 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: mmxidct.c 14357 2008-01-04 20:05:28Z tterribe $
+
+ ********************************************************************/
+
+/*MMX acceleration of Theora's iDCT.
+  Originally written by Rudolf Marek, based on code from On2's VP3.*/
+#include <ogg/ogg.h>
+#include "../dct.h"
+#include "../idct.h"
+
+#include "x86int.h"
+
+#if defined(USE_ASM)
+
+/*These are offsets into the table of constants below.*/
+/*7 rows of cosines, in order: pi/16 * (1 ... 7).*/
+#define OC_COSINE_OFFSET (0)
+/*A row of 8's.*/
+#define OC_EIGHT_OFFSET  (56)
+
+
+
+/*A table of constants used by the MMX routines.*/
+static const ogg_uint16_t __attribute__((aligned(8),used))
+ OC_IDCT_CONSTS[(7+1)*4]={
+  (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
+  (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7,
+  (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
+  (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6,
+  (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
+  (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5,
+  (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
+  (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4,
+  (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
+  (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3,
+  (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
+  (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2,
+  (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
+  (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1,
+      8,    8,    8,    8
+};
+
+/*Converts the expression in the argument to a string.*/
+#define OC_M2STR(_s) #_s
+
+/*38 cycles*/
+#define OC_IDCT_BEGIN \
+  "#OC_IDCT_BEGIN\n\t" \
+  "movq "OC_I(3)",%%mm2\n\t" \
+  "movq "OC_C(3)",%%mm6\n\t" \
+  "movq %%mm2,%%mm4\n\t" \
+  "movq "OC_J(5)",%%mm7\n\t" \
+  "pmulhw %%mm6,%%mm4\n\t" \
+  "movq "OC_C(5)",%%mm1\n\t" \
+  "pmulhw %%mm7,%%mm6\n\t" \
+  "movq %%mm1,%%mm5\n\t" \
+  "pmulhw %%mm2,%%mm1\n\t" \
+  "movq "OC_I(1)",%%mm3\n\t" \
+  "pmulhw %%mm7,%%mm5\n\t" \
+  "movq "OC_C(1)",%%mm0\n\t" \
+  "paddw %%mm2,%%mm4\n\t" \
+  "paddw %%mm7,%%mm6\n\t" \
+  "paddw %%mm1,%%mm2\n\t" \
+  "movq "OC_J(7)",%%mm1\n\t" \
+  "paddw %%mm5,%%mm7\n\t" \
+  "movq %%mm0,%%mm5\n\t" \
+  "pmulhw %%mm3,%%mm0\n\t" \
+  "paddw %%mm7,%%mm4\n\t" \
+  "pmulhw %%mm1,%%mm5\n\t" \
+  "movq "OC_C(7)",%%mm7\n\t" \
+  "psubw %%mm2,%%mm6\n\t" \
+  "paddw %%mm3,%%mm0\n\t" \
+  "pmulhw %%mm7,%%mm3\n\t" \
+  "movq "OC_I(2)",%%mm2\n\t" \
+  "pmulhw %%mm1,%%mm7\n\t" \
+  "paddw %%mm1,%%mm5\n\t" \
+  "movq %%mm2,%%mm1\n\t" \
+  "pmulhw "OC_C(2)",%%mm2\n\t" \
+  "psubw %%mm5,%%mm3\n\t" \
+  "movq "OC_J(6)",%%mm5\n\t" \
+  "paddw %%mm7,%%mm0\n\t" \
+  "movq %%mm5,%%mm7\n\t" \
+  "psubw %%mm4,%%mm0\n\t" \
+  "pmulhw "OC_C(2)",%%mm5\n\t" \
+  "paddw %%mm1,%%mm2\n\t" \
+  "pmulhw "OC_C(6)",%%mm1\n\t" \
+  "paddw %%mm4,%%mm4\n\t" \
+  "paddw %%mm0,%%mm4\n\t" \
+  "psubw %%mm6,%%mm3\n\t" \
+  "paddw %%mm7,%%mm5\n\t" \
+  "paddw %%mm6,%%mm6\n\t" \
+  "pmulhw "OC_C(6)",%%mm7\n\t" \
+  "paddw %%mm3,%%mm6\n\t" \
+  "movq %%mm4,"OC_I(1)"\n\t" \
+  "psubw %%mm5,%%mm1\n\t" \
+  "movq "OC_C(4)",%%mm4\n\t" \
+  "movq %%mm3,%%mm5\n\t" \
+  "pmulhw %%mm4,%%mm3\n\t" \
+  "paddw %%mm2,%%mm7\n\t" \
+  "movq %%mm6,"OC_I(2)"\n\t" \
+  "movq %%mm0,%%mm2\n\t" \
+  "movq "OC_I(0)",%%mm6\n\t" \
+  "pmulhw %%mm4,%%mm0\n\t" \
+  "paddw %%mm3,%%mm5\n\t" \
+  "movq "OC_J(4)",%%mm3\n\t" \
+  "psubw %%mm1,%%mm5\n\t" \
+  "paddw %%mm0,%%mm2\n\t" \
+  "psubw %%mm3,%%mm6\n\t" \
+  "movq %%mm6,%%mm0\n\t" \
+  "pmulhw %%mm4,%%mm6\n\t" \
+  "paddw %%mm3,%%mm3\n\t" \
+  "paddw %%mm1,%%mm1\n\t" \
+  "paddw %%mm0,%%mm3\n\t" \
+  "paddw %%mm5,%%mm1\n\t" \
+  "pmulhw %%mm3,%%mm4\n\t" \
+  "paddw %%mm0,%%mm6\n\t" \
+  "psubw %%mm2,%%mm6\n\t" \
+  "paddw %%mm2,%%mm2\n\t" \
+  "movq "OC_I(1)",%%mm0\n\t" \
+  "paddw %%mm6,%%mm2\n\t" \
+  "paddw %%mm3,%%mm4\n\t" \
+  "psubw %%mm1,%%mm2\n\t" \
+  "#end OC_IDCT_BEGIN\n\t" \
+
+/*38+8=46 cycles.*/
+#define OC_ROW_IDCT \
+  "#OC_ROW_IDCT\n" \
+  OC_IDCT_BEGIN \
+  /*r3=D'*/ \
+  "movq "OC_I(2)",%%mm3\n\t" \
+  /*r4=E'=E-G*/ \
+  "psubw %%mm7,%%mm4\n\t" \
+  /*r1=H'+H'*/ \
+  "paddw %%mm1,%%mm1\n\t" \
+  /*r7=G+G*/ \
+  "paddw %%mm7,%%mm7\n\t" \
+  /*r1=R1=A''+H'*/ \
+  "paddw %%mm2,%%mm1\n\t" \
+  /*r7=G'=E+G*/ \
+  "paddw %%mm4,%%mm7\n\t" \
+  /*r4=R4=E'-D'*/ \
+  "psubw %%mm3,%%mm4\n\t" \
+  "paddw %%mm3,%%mm3\n\t" \
+  /*r6=R6=F'-B''*/ \
+  "psubw %%mm5,%%mm6\n\t" \
+  "paddw %%mm5,%%mm5\n\t" \
+  /*r3=R3=E'+D'*/ \
+  "paddw %%mm4,%%mm3\n\t" \
+  /*r5=R5=F'+B''*/ \
+  "paddw %%mm6,%%mm5\n\t" \
+  /*r7=R7=G'-C'*/ \
+  "psubw %%mm0,%%mm7\n\t" \
+  "paddw %%mm0,%%mm0\n\t" \
+  /*Save R1.*/ \
+  "movq %%mm1,"OC_I(1)"\n\t" \
+  /*r0=R0=G.+C.*/ \
+  "paddw %%mm7,%%mm0\n\t" \
+  "#end OC_ROW_IDCT\n\t" \
+
+/*The following macro does two 4x4 transposes in place.
+  At entry, we assume:
+    r0 = a3 a2 a1 a0
+  I(1) = b3 b2 b1 b0
+    r2 = c3 c2 c1 c0
+    r3 = d3 d2 d1 d0
+
+    r4 = e3 e2 e1 e0
+    r5 = f3 f2 f1 f0
+    r6 = g3 g2 g1 g0
+    r7 = h3 h2 h1 h0
+
+  At exit, we have:
+  I(0) = d0 c0 b0 a0
+  I(1) = d1 c1 b1 a1
+  I(2) = d2 c2 b2 a2
+  I(3) = d3 c3 b3 a3
+
+  J(4) = h0 g0 f0 e0
+  J(5) = h1 g1 f1 e1
+  J(6) = h2 g2 f2 e2
+  J(7) = h3 g3 f3 e3
+
+  I(0) I(1) I(2) I(3) is the transpose of r0 I(1) r2 r3.
+  J(4) J(5) J(6) J(7) is the transpose of r4   r5 r6 r7.
+
+  Since r1 is free at entry, we calculate the Js first.*/
+/*19 cycles.*/
+#define OC_TRANSPOSE \
+  "#OC_TRANSPOSE\n\t" \
+  "movq %%mm4,%%mm1\n\t" \
+  "punpcklwd %%mm5,%%mm4\n\t" \
+  "movq %%mm0,"OC_I(0)"\n\t" \
+  "punpckhwd %%mm5,%%mm1\n\t" \
+  "movq %%mm6,%%mm0\n\t" \
+  "punpcklwd %%mm7,%%mm6\n\t" \
+  "movq %%mm4,%%mm5\n\t" \
+  "punpckldq %%mm6,%%mm4\n\t" \
+  "punpckhdq %%mm6,%%mm5\n\t" \
+  "movq %%mm1,%%mm6\n\t" \
+  "movq %%mm4,"OC_J(4)"\n\t" \
+  "punpckhwd %%mm7,%%mm0\n\t" \
+  "movq %%mm5,"OC_J(5)"\n\t" \
+  "punpckhdq %%mm0,%%mm6\n\t" \
+  "movq "OC_I(0)",%%mm4\n\t" \
+  "punpckldq %%mm0,%%mm1\n\t" \
+  "movq "OC_I(1)",%%mm5\n\t" \
+  "movq %%mm4,%%mm0\n\t" \
+  "movq %%mm6,"OC_J(7)"\n\t" \
+  "punpcklwd %%mm5,%%mm0\n\t" \
+  "movq %%mm1,"OC_J(6)"\n\t" \
+  "punpckhwd %%mm5,%%mm4\n\t" \
+  "movq %%mm2,%%mm5\n\t" \
+  "punpcklwd %%mm3,%%mm2\n\t" \
+  "movq %%mm0,%%mm1\n\t" \
+  "punpckldq %%mm2,%%mm0\n\t" \
+  "punpckhdq %%mm2,%%mm1\n\t" \
+  "movq %%mm4,%%mm2\n\t" \
+  "movq %%mm0,"OC_I(0)"\n\t" \
+  "punpckhwd %%mm3,%%mm5\n\t" \
+  "movq %%mm1,"OC_I(1)"\n\t" \
+  "punpckhdq %%mm5,%%mm4\n\t" \
+  "punpckldq %%mm5,%%mm2\n\t" \
+  "movq %%mm4,"OC_I(3)"\n\t" \
+  "movq %%mm2,"OC_I(2)"\n\t" \
+  "#end OC_TRANSPOSE\n\t" \
+
+/*38+19=57 cycles.*/
+#define OC_COLUMN_IDCT \
+  "#OC_COLUMN_IDCT\n" \
+  OC_IDCT_BEGIN \
+  "paddw "OC_8",%%mm2\n\t" \
+  /*r1=H'+H'*/ \
+  "paddw %%mm1,%%mm1\n\t" \
+  /*r1=R1=A''+H'*/ \
+  "paddw %%mm2,%%mm1\n\t" \
+  /*r2=NR2*/ \
+  "psraw $4,%%mm2\n\t" \
+  /*r4=E'=E-G*/ \
+  "psubw %%mm7,%%mm4\n\t" \
+  /*r1=NR1*/ \
+  "psraw $4,%%mm1\n\t" \
+  /*r3=D'*/ \
+  "movq "OC_I(2)",%%mm3\n\t" \
+  /*r7=G+G*/ \
+  "paddw %%mm7,%%mm7\n\t" \
+  /*Store NR2 at I(2).*/ \
+  "movq %%mm2,"OC_I(2)"\n\t" \
+  /*r7=G'=E+G*/ \
+  "paddw %%mm4,%%mm7\n\t" \
+  /*Store NR1 at I(1).*/ \
+  "movq %%mm1,"OC_I(1)"\n\t" \
+  /*r4=R4=E'-D'*/ \
+  "psubw %%mm3,%%mm4\n\t" \
+  "paddw "OC_8",%%mm4\n\t" \
+  /*r3=D'+D'*/ \
+  "paddw %%mm3,%%mm3\n\t" \
+  /*r3=R3=E'+D'*/ \
+  "paddw %%mm4,%%mm3\n\t" \
+  /*r4=NR4*/ \
+  "psraw $4,%%mm4\n\t" \
+  /*r6=R6=F'-B''*/ \
+  "psubw %%mm5,%%mm6\n\t" \
+  /*r3=NR3*/ \
+  "psraw $4,%%mm3\n\t" \
+  "paddw "OC_8",%%mm6\n\t" \
+  /*r5=B''+B''*/ \
+  "paddw %%mm5,%%mm5\n\t" \
+  /*r5=R5=F'+B''*/ \
+  "paddw %%mm6,%%mm5\n\t" \
+  /*r6=NR6*/ \
+  "psraw $4,%%mm6\n\t" \
+  /*Store NR4 at J(4).*/ \
+  "movq %%mm4,"OC_J(4)"\n\t" \
+  /*r5=NR5*/ \
+  "psraw $4,%%mm5\n\t" \
+  /*Store NR3 at I(3).*/ \
+  "movq %%mm3,"OC_I(3)"\n\t" \
+  /*r7=R7=G'-C'*/ \
+  "psubw %%mm0,%%mm7\n\t" \
+  "paddw "OC_8",%%mm7\n\t" \
+  /*r0=C'+C'*/ \
+  "paddw %%mm0,%%mm0\n\t" \
+  /*r0=R0=G'+C'*/ \
+  "paddw %%mm7,%%mm0\n\t" \
+  /*r7=NR7*/ \
+  "psraw $4,%%mm7\n\t" \
+  /*Store NR6 at J(6).*/ \
+  "movq %%mm6,"OC_J(6)"\n\t" \
+  /*r0=NR0*/ \
+  "psraw $4,%%mm0\n\t" \
+  /*Store NR5 at J(5).*/ \
+  "movq %%mm5,"OC_J(5)"\n\t" \
+  /*Store NR7 at J(7).*/ \
+  "movq %%mm7,"OC_J(7)"\n\t" \
+  /*Store NR0 at I(0).*/ \
+  "movq %%mm0,"OC_I(0)"\n\t" \
+  "#end OC_COLUMN_IDCT\n\t" \
+
+#define OC_MID(_m,_i) OC_M2STR(_m+(_i)*8)"(%[c])"
+#define OC_C(_i)      OC_MID(OC_COSINE_OFFSET,_i-1)
+#define OC_8          OC_MID(OC_EIGHT_OFFSET,0)
+
+void oc_idct8x8_mmx(ogg_int16_t _y[64]){
+  /*This routine accepts an 8x8 matrix, but in transposed form.
+    Every 4x4 submatrix is transposed.*/
+  __asm__ __volatile__(
+#define OC_I(_k)      OC_M2STR((_k*16))"(%[y])"
+#define OC_J(_k)      OC_M2STR(((_k-4)*16)+8)"(%[y])"
+    OC_ROW_IDCT
+    OC_TRANSPOSE
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k)      OC_M2STR((_k*16)+64)"(%[y])"
+#define OC_J(_k)      OC_M2STR(((_k-4)*16)+72)"(%[y])"
+    OC_ROW_IDCT
+    OC_TRANSPOSE
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k)      OC_M2STR((_k*16))"(%[y])"
+#define OC_J(_k)      OC_I(_k)
+    OC_COLUMN_IDCT
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k)      OC_M2STR((_k*16)+8)"(%[y])"
+#define OC_J(_k)      OC_I(_k)
+    OC_COLUMN_IDCT
+#undef  OC_I
+#undef  OC_J
+    "emms\n\t"
+    :
+    :[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS)
+  );
+}
+
+/*25 cycles.*/
+#define OC_IDCT_BEGIN_10 \
+ "#OC_IDCT_BEGIN_10\n\t" \
+ "movq "OC_I(3)",%%mm2\n\t" \
+ "nop\n\t" \
+ "movq "OC_C(3)",%%mm6\n\t" \
+ "movq %%mm2,%%mm4\n\t" \
+ "movq "OC_C(5)",%%mm1\n\t" \
+ "pmulhw %%mm6,%%mm4\n\t" \
+ "movq "OC_I(1)",%%mm3\n\t" \
+ "pmulhw %%mm2,%%mm1\n\t" \
+ "movq "OC_C(1)",%%mm0\n\t" \
+ "paddw %%mm2,%%mm4\n\t" \
+ "pxor %%mm6,%%mm6\n\t" \
+ "paddw %%mm1,%%mm2\n\t" \
+ "movq "OC_I(2)",%%mm5\n\t" \
+ "pmulhw %%mm3,%%mm0\n\t" \
+ "movq %%mm5,%%mm1\n\t" \
+ "paddw %%mm3,%%mm0\n\t" \
+ "pmulhw "OC_C(7)",%%mm3\n\t" \
+ "psubw %%mm2,%%mm6\n\t" \
+ "pmulhw "OC_C(2)",%%mm5\n\t" \
+ "psubw %%mm4,%%mm0\n\t" \
+ "movq "OC_I(2)",%%mm7\n\t" \
+ "paddw %%mm4,%%mm4\n\t" \
+ "paddw %%mm5,%%mm7\n\t" \
+ "paddw %%mm0,%%mm4\n\t" \
+ "pmulhw "OC_C(6)",%%mm1\n\t" \
+ "psubw %%mm6,%%mm3\n\t" \
+ "movq %%mm4,"OC_I(1)"\n\t" \
+ "paddw %%mm6,%%mm6\n\t" \
+ "movq "OC_C(4)",%%mm4\n\t" \
+ "paddw %%mm3,%%mm6\n\t" \
+ "movq %%mm3,%%mm5\n\t" \
+ "pmulhw %%mm4,%%mm3\n\t" \
+ "movq %%mm6,"OC_I(2)"\n\t" \
+ "movq %%mm0,%%mm2\n\t" \
+ "movq "OC_I(0)",%%mm6\n\t" \
+ "pmulhw %%mm4,%%mm0\n\t" \
+ "paddw %%mm3,%%mm5\n\t" \
+ "paddw %%mm0,%%mm2\n\t" \
+ "psubw %%mm1,%%mm5\n\t" \
+ "pmulhw %%mm4,%%mm6\n\t" \
+ "paddw "OC_I(0)",%%mm6\n\t" \
+ "paddw %%mm1,%%mm1\n\t" \
+ "movq %%mm6,%%mm4\n\t" \
+ "paddw %%mm5,%%mm1\n\t" \
+ "psubw %%mm2,%%mm6\n\t" \
+ "paddw %%mm2,%%mm2\n\t" \
+ "movq "OC_I(1)",%%mm0\n\t" \
+ "paddw %%mm6,%%mm2\n\t" \
+ "psubw %%mm1,%%mm2\n\t" \
+ "nop\n\t" \
+ "#end OC_IDCT_BEGIN_10\n\t" \
+
+/*25+8=33 cycles.*/
+#define OC_ROW_IDCT_10 \
+ "#OC_ROW_IDCT_10\n\t" \
+ OC_IDCT_BEGIN_10 \
+ /*r3=D'*/ \
+ "movq "OC_I(2)",%%mm3\n\t" \
+ /*r4=E'=E-G*/ \
+ "psubw %%mm7,%%mm4\n\t" \
+ /*r1=H'+H'*/ \
+ "paddw %%mm1,%%mm1\n\t" \
+ /*r7=G+G*/ \
+ "paddw %%mm7,%%mm7\n\t" \
+ /*r1=R1=A''+H'*/ \
+ "paddw %%mm2,%%mm1\n\t" \
+ /*r7=G'=E+G*/ \
+ "paddw %%mm4,%%mm7\n\t" \
+ /*r4=R4=E'-D'*/ \
+ "psubw %%mm3,%%mm4\n\t" \
+ "paddw %%mm3,%%mm3\n\t" \
+ /*r6=R6=F'-B''*/ \
+ "psubw %%mm5,%%mm6\n\t" \
+ "paddw %%mm5,%%mm5\n\t" \
+ /*r3=R3=E'+D'*/ \
+ "paddw %%mm4,%%mm3\n\t" \
+ /*r5=R5=F'+B''*/ \
+ "paddw %%mm6,%%mm5\n\t" \
+ /*r7=R7=G'-C'*/ \
+ "psubw %%mm0,%%mm7\n\t" \
+ "paddw %%mm0,%%mm0\n\t" \
+ /*Save R1.*/ \
+ "movq %%mm1,"OC_I(1)"\n\t" \
+ /*r0=R0=G'+C'*/ \
+ "paddw %%mm7,%%mm0\n\t" \
+ "#end OC_ROW_IDCT_10\n\t" \
+
+/*25+19=44 cycles'*/
+#define OC_COLUMN_IDCT_10 \
+ "#OC_COLUMN_IDCT_10\n\t" \
+ OC_IDCT_BEGIN_10 \
+ "paddw "OC_8",%%mm2\n\t" \
+ /*r1=H'+H'*/ \
+ "paddw %%mm1,%%mm1\n\t" \
+ /*r1=R1=A''+H'*/ \
+ "paddw %%mm2,%%mm1\n\t" \
+ /*r2=NR2*/ \
+ "psraw $4,%%mm2\n\t" \
+ /*r4=E'=E-G*/ \
+ "psubw %%mm7,%%mm4\n\t" \
+ /*r1=NR1*/ \
+ "psraw $4,%%mm1\n\t" \
+ /*r3=D'*/ \
+ "movq "OC_I(2)",%%mm3\n\t" \
+ /*r7=G+G*/ \
+ "paddw %%mm7,%%mm7\n\t" \
+ /*Store NR2 at I(2).*/ \
+ "movq %%mm2,"OC_I(2)"\n\t" \
+ /*r7=G'=E+G*/ \
+ "paddw %%mm4,%%mm7\n\t" \
+ /*Store NR1 at I(1).*/ \
+ "movq %%mm1,"OC_I(1)"\n\t" \
+ /*r4=R4=E'-D'*/ \
+ "psubw %%mm3,%%mm4\n\t" \
+ "paddw "OC_8",%%mm4\n\t" \
+ /*r3=D'+D'*/ \
+ "paddw %%mm3,%%mm3\n\t" \
+ /*r3=R3=E'+D'*/ \
+ "paddw %%mm4,%%mm3\n\t" \
+ /*r4=NR4*/ \
+ "psraw $4,%%mm4\n\t" \
+ /*r6=R6=F'-B''*/ \
+ "psubw %%mm5,%%mm6\n\t" \
+ /*r3=NR3*/ \
+ "psraw $4,%%mm3\n\t" \
+ "paddw "OC_8",%%mm6\n\t" \
+ /*r5=B''+B''*/ \
+ "paddw %%mm5,%%mm5\n\t" \
+ /*r5=R5=F'+B''*/ \
+ "paddw %%mm6,%%mm5\n\t" \
+ /*r6=NR6*/ \
+ "psraw $4,%%mm6\n\t" \
+ /*Store NR4 at J(4).*/ \
+ "movq %%mm4,"OC_J(4)"\n\t" \
+ /*r5=NR5*/ \
+ "psraw $4,%%mm5\n\t" \
+ /*Store NR3 at I(3).*/ \
+ "movq %%mm3,"OC_I(3)"\n\t" \
+ /*r7=R7=G'-C'*/ \
+ "psubw %%mm0,%%mm7\n\t" \
+ "paddw "OC_8",%%mm7\n\t" \
+ /*r0=C'+C'*/ \
+ "paddw %%mm0,%%mm0\n\t" \
+ /*r0=R0=G'+C'*/ \
+ "paddw %%mm7,%%mm0\n\t" \
+ /*r7=NR7*/ \
+ "psraw $4,%%mm7\n\t" \
+ /*Store NR6 at J(6).*/ \
+ "movq %%mm6,"OC_J(6)"\n\t" \
+ /*r0=NR0*/ \
+ "psraw $4,%%mm0\n\t" \
+ /*Store NR5 at J(5).*/ \
+ "movq %%mm5,"OC_J(5)"\n\t" \
+ /*Store NR7 at J(7).*/ \
+ "movq %%mm7,"OC_J(7)"\n\t" \
+ /*Store NR0 at I(0).*/ \
+ "movq %%mm0,"OC_I(0)"\n\t" \
+ "#end OC_COLUMN_IDCT_10\n\t" \
+
+void oc_idct8x8_10_mmx(ogg_int16_t _y[64]){
+  __asm__ __volatile__(
+#define OC_I(_k) OC_M2STR((_k*16))"(%[y])"
+#define OC_J(_k) OC_M2STR(((_k-4)*16)+8)"(%[y])"
+    /*Done with dequant, descramble, and partial transpose.
+      Now do the iDCT itself.*/
+    OC_ROW_IDCT_10
+    OC_TRANSPOSE
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k) OC_M2STR((_k*16))"(%[y])"
+#define OC_J(_k) OC_I(_k)
+    OC_COLUMN_IDCT_10
+#undef  OC_I
+#undef  OC_J
+#define OC_I(_k) OC_M2STR((_k*16)+8)"(%[y])"
+#define OC_J(_k) OC_I(_k)
+    OC_COLUMN_IDCT_10
+#undef  OC_I
+#undef  OC_J
+    "emms\n\t"
+    :
+    :[y]"r"(_y),[c]"r"(OC_IDCT_CONSTS)
+  );
+}
+#endif
--- a/modules/libtheora/lib/dec/x86/mmxstate.c
+++ b/modules/libtheora/lib/dec/x86/mmxstate.c
@ -0,0 +1,650 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: mmxstate.c 14385 2008-01-09 19:53:18Z giles $
+
+ ********************************************************************/
+
+/*MMX acceleration of complete fragment reconstruction algorithm.
+  Originally written by Rudolf Marek.*/
+#include "x86int.h"
+#include "../../internal.h"
+
+#if defined(USE_ASM)
+
+static const __attribute__((aligned(8),used)) int OC_FZIG_ZAGMMX[64]={
+   0, 8, 1, 2, 9,16,24,17,
+  10, 3,32,11,18,25, 4,12,
+   5,26,19,40,33,34,41,48,
+  27, 6,13,20,28,21,14, 7,
+  56,49,42,35,43,50,57,36,
+  15,22,29,30,23,44,37,58,
+  51,59,38,45,52,31,60,53,
+  46,39,47,54,61,62,55,63
+};
+
+
+
+void oc_state_frag_recon_mmx(oc_theora_state *_state,oc_fragment *_frag,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
+ ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
+  ogg_int16_t  __attribute__((aligned(8))) res_buf[64];
+  int dst_framei;
+  int dst_ystride;
+  int zzi;
+  /*_last_zzi is subtly different from an actual count of the number of
+     coefficients we decoded for this block.
+    It contains the value of zzi BEFORE the final token in the block was
+     decoded.
+    In most cases this is an EOB token (the continuation of an EOB run from a
+     previous block counts), and so this is the same as the coefficient count.
+    However, in the case that the last token was NOT an EOB token, but filled
+     the block up with exactly 64 coefficients, _last_zzi will be less than 64.
+    Provided the last token was not a pure zero run, the minimum value it can
+     be is 46, and so that doesn't affect any of the cases in this routine.
+    However, if the last token WAS a pure zero run of length 63, then _last_zzi
+     will be 1 while the number of coefficients decoded is 64.
+    Thus, we will trigger the following special case, where the real
+     coefficient count would not.
+    Note also that a zero run of length 64 will give _last_zzi a value of 0,
+     but we still process the DC coefficient, which might have a non-zero value
+     due to DC prediction.
+    Although convoluted, this is arguably the correct behavior: it allows us to
+     dequantize fewer coefficients and use a smaller transform when the block
+     ends with a long zero run instead of a normal EOB token.
+    It could be smarter... multiple separate zero runs at the end of a block
+     will fool it, but an encoder that generates these really deserves what it
+     gets.
+    Needless to say we inherited this approach from VP3.*/
+  /*Special case only having a DC component.*/
+  if(_last_zzi<2){
+    ogg_uint16_t p;
+    /*Why is the iquant product rounded in this case and no others?
+      Who knows.*/
+    p=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant+15>>5);
+    /*Fill res_buf with p.*/
+    __asm__ __volatile__(
+      /*mm0=0000 0000 0000 AAAA*/
+      "movd %[p],%%mm0\n\t"
+      /*mm1=0000 0000 0000 AAAA*/
+      "movd %[p],%%mm1\n\t"
+      /*mm0=0000 0000 AAAA 0000*/
+      "pslld $16,%%mm0\n\t"
+      /*mm0=0000 0000 AAAA AAAA*/
+      "por %%mm1,%%mm0\n\t"
+      /*mm0=AAAA AAAA AAAA AAAA*/
+      "punpcklwd %%mm0,%%mm0\n\t"
+      "movq %%mm0,(%[res_buf])\n\t"
+      "movq %%mm0,8(%[res_buf])\n\t"
+      "movq %%mm0,16(%[res_buf])\n\t"
+      "movq %%mm0,24(%[res_buf])\n\t"
+      "movq %%mm0,32(%[res_buf])\n\t"
+      "movq %%mm0,40(%[res_buf])\n\t"
+      "movq %%mm0,48(%[res_buf])\n\t"
+      "movq %%mm0,56(%[res_buf])\n\t"
+      "movq %%mm0,64(%[res_buf])\n\t"
+      "movq %%mm0,72(%[res_buf])\n\t"
+      "movq %%mm0,80(%[res_buf])\n\t"
+      "movq %%mm0,88(%[res_buf])\n\t"
+      "movq %%mm0,96(%[res_buf])\n\t"
+      "movq %%mm0,104(%[res_buf])\n\t"
+      "movq %%mm0,112(%[res_buf])\n\t"
+      "movq %%mm0,120(%[res_buf])\n\t"
+      :
+      :[res_buf]"r"(res_buf),[p]"r"((unsigned)p)
+      :"memory"
+    );
+  }
+  else{
+    /*Then, fill in the remainder of the coefficients with 0's, and perform
+       the iDCT.*/
+    /*First zero the buffer.*/
+    /*On K7, etc., this could be replaced with movntq and sfence.*/
+    __asm__ __volatile__(
+      "pxor %%mm0,%%mm0\n\t"
+      "movq %%mm0,(%[res_buf])\n\t"
+      "movq %%mm0,8(%[res_buf])\n\t"
+      "movq %%mm0,16(%[res_buf])\n\t"
+      "movq %%mm0,24(%[res_buf])\n\t"
+      "movq %%mm0,32(%[res_buf])\n\t"
+      "movq %%mm0,40(%[res_buf])\n\t"
+      "movq %%mm0,48(%[res_buf])\n\t"
+      "movq %%mm0,56(%[res_buf])\n\t"
+      "movq %%mm0,64(%[res_buf])\n\t"
+      "movq %%mm0,72(%[res_buf])\n\t"
+      "movq %%mm0,80(%[res_buf])\n\t"
+      "movq %%mm0,88(%[res_buf])\n\t"
+      "movq %%mm0,96(%[res_buf])\n\t"
+      "movq %%mm0,104(%[res_buf])\n\t"
+      "movq %%mm0,112(%[res_buf])\n\t"
+      "movq %%mm0,120(%[res_buf])\n\t"
+      :
+      :[res_buf]"r"(res_buf)
+      :"memory"
+    );
+    res_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant);
+    /*This is planned to be rewritten in MMX.*/
+    for(zzi=1;zzi<_ncoefs;zzi++){
+      int ci;
+      ci=OC_FZIG_ZAG[zzi];
+      res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*
+       _ac_iquant[ci]);
+    }
+    if(_last_zzi<10)oc_idct8x8_10_mmx(res_buf);
+    else oc_idct8x8_mmx(res_buf);
+  }
+  /*Fill in the target buffer.*/
+  dst_framei=_state->ref_frame_idx[OC_FRAME_SELF];
+  dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
+  /*For now ystride values in all ref frames assumed to be equal.*/
+  if(_frag->mbmode==OC_MODE_INTRA){
+    oc_frag_recon_intra_mmx(_frag->buffer[dst_framei],dst_ystride,res_buf);
+  }
+  else{
+    int ref_framei;
+    int ref_ystride;
+    int mvoffsets[2];
+    ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]];
+    ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].stride;
+    if(oc_state_get_mv_offsets(_state,mvoffsets,_frag->mv[0],_frag->mv[1],
+     ref_ystride,_pli)>1){
+      oc_frag_recon_inter2_mmx(_frag->buffer[dst_framei],dst_ystride,
+       _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,
+       _frag->buffer[ref_framei]+mvoffsets[1],ref_ystride,res_buf);
+    }
+    else{
+      oc_frag_recon_inter_mmx(_frag->buffer[dst_framei],dst_ystride,
+       _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,res_buf);
+    }
+  }
+  oc_restore_fpu(_state);
+}
+
+/*Copies the fragments specified by the lists of fragment indices from one
+   frame to another.
+  _fragis:    A pointer to a list of fragment indices.
+  _nfragis:   The number of fragment indices to copy.
+  _dst_frame: The reference frame to copy to.
+  _src_frame: The reference frame to copy from.
+  _pli:       The color plane the fragments lie in.*/
+void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
+ int _nfragis,int _dst_frame,int _src_frame,int _pli){
+  const int *fragi;
+  const int *fragi_end;
+  int        dst_framei;
+  long       dst_ystride;
+  int        src_framei;
+  long       src_ystride;
+  dst_framei=_state->ref_frame_idx[_dst_frame];
+  src_framei=_state->ref_frame_idx[_src_frame];
+  dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
+  src_ystride=_state->ref_frame_bufs[src_framei][_pli].stride;
+  fragi_end=_fragis+_nfragis;
+  for(fragi=_fragis;fragi<fragi_end;fragi++){
+    oc_fragment   *frag;
+    unsigned char *dst;
+    unsigned char *src;
+    long           esi;
+    frag=_state->frags+*fragi;
+    dst=frag->buffer[dst_framei];
+    src=frag->buffer[src_framei];
+    __asm__ __volatile__(
+      /*src+0*src_ystride*/
+      "movq (%[src]),%%mm0\n\t"
+      /*esi=src_ystride*3*/
+      "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t"
+      /*src+1*src_ystride*/
+      "movq (%[src],%[src_ystride]),%%mm1\n\t"
+      /*src+2*src_ystride*/
+      "movq (%[src],%[src_ystride],2),%%mm2\n\t"
+      /*src+3*src_ystride*/
+      "movq (%[src],%[s]),%%mm3\n\t"
+      /*dst+0*dst_ystride*/
+      "movq %%mm0,(%[dst])\n\t"
+      /*esi=dst_ystride*3*/
+      "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t"
+      /*dst+1*dst_ystride*/
+      "movq %%mm1,(%[dst],%[dst_ystride])\n\t"
+      /*Pointer to next 4.*/
+      "lea (%[src],%[src_ystride],4),%[src]\n\t"
+      /*dst+2*dst_ystride*/
+      "movq %%mm2,(%[dst],%[dst_ystride],2)\n\t"
+      /*dst+3*dst_ystride*/
+      "movq %%mm3,(%[dst],%[s])\n\t"
+      /*Pointer to next 4.*/
+      "lea (%[dst],%[dst_ystride],4),%[dst]\n\t"
+      /*src+0*src_ystride*/
+      "movq (%[src]),%%mm0\n\t"
+      /*esi=src_ystride*3*/
+      "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t"
+      /*src+1*src_ystride*/
+      "movq (%[src],%[src_ystride]),%%mm1\n\t"
+      /*src+2*src_ystride*/
+      "movq (%[src],%[src_ystride],2),%%mm2\n\t"
+      /*src+3*src_ystride*/
+      "movq (%[src],%[s]),%%mm3\n\t"
+      /*dst+0*dst_ystride*/
+      "movq %%mm0,(%[dst])\n\t"
+      /*esi=dst_ystride*3*/
+      "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t"
+      /*dst+1*dst_ystride*/
+      "movq %%mm1,(%[dst],%[dst_ystride])\n\t"
+      /*dst+2*dst_ystride*/
+      "movq %%mm2,(%[dst],%[dst_ystride],2)\n\t"
+      /*dst+3*dst_ystride*/
+      "movq %%mm3,(%[dst],%[s])\n\t"
+      :[s]"=&S"(esi)
+      :[dst]"r"(dst),[src]"r"(src),[dst_ystride]"r"(dst_ystride),
+       [src_ystride]"r"(src_ystride)
+      :"memory"
+    );
+  }
+  /*This needs to be removed when decode specific functions are implemented:*/
+  __asm__ __volatile__("emms\n\t");
+}
+
+static void loop_filter_v(unsigned char *_pix,int _ystride,
+ const ogg_int16_t *_ll){
+  long esi;
+  _pix-=_ystride*2;
+  __asm__ __volatile__(
+    /*mm0=0*/
+    "pxor %%mm0,%%mm0\n\t"
+    /*esi=_ystride*3*/
+    "lea (%[ystride],%[ystride],2),%[s]\n\t"
+    /*mm7=_pix[0...8]*/
+    "movq (%[pix]),%%mm7\n\t"
+    /*mm4=_pix[0...8+_ystride*3]*/
+    "movq (%[pix],%[s]),%%mm4\n\t"
+    /*mm6=_pix[0...8]*/
+    "movq %%mm7,%%mm6\n\t"
+    /*Expand unsigned _pix[0...3] to 16 bits.*/
+    "punpcklbw %%mm0,%%mm6\n\t"
+    "movq %%mm4,%%mm5\n\t"
+    /*Expand unsigned _pix[4...8] to 16 bits.*/
+    "punpckhbw %%mm0,%%mm7\n\t"
+    /*Expand other arrays too.*/
+    "punpcklbw %%mm0,%%mm4\n\t"
+    "punpckhbw %%mm0,%%mm5\n\t"
+    /*mm7:mm6=_p[0...8]-_p[0...8+_ystride*3]:*/
+    "psubw %%mm4,%%mm6\n\t"
+    "psubw %%mm5,%%mm7\n\t"
+    /*mm5=mm4=_pix[0...8+_ystride]*/
+    "movq (%[pix],%[ystride]),%%mm4\n\t"
+    /*mm1=mm3=mm2=_pix[0..8]+_ystride*2]*/
+    "movq (%[pix],%[ystride],2),%%mm2\n\t"
+    "movq %%mm4,%%mm5\n\t"
+    "movq %%mm2,%%mm3\n\t"
+    "movq %%mm2,%%mm1\n\t"
+    /*Expand these arrays.*/
+    "punpckhbw %%mm0,%%mm5\n\t"
+    "punpcklbw %%mm0,%%mm4\n\t"
+    "punpckhbw %%mm0,%%mm3\n\t"
+    "punpcklbw %%mm0,%%mm2\n\t"
+    /*mm0=3 3 3 3
+      mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/
+    "pcmpeqw %%mm0,%%mm0\n\t"
+    "psubw %%mm5,%%mm3\n\t"
+    "psrlw $14,%%mm0\n\t"
+    "psubw %%mm4,%%mm2\n\t"
+    /*Scale by 3.*/
+    "pmullw %%mm0,%%mm3\n\t"
+    "pmullw %%mm0,%%mm2\n\t"
+    /*mm0=4 4 4 4
+      f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+
+       3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/
+    "psrlw $1,%%mm0\n\t"
+    "paddw %%mm7,%%mm3\n\t"
+    "psllw $2,%%mm0\n\t"
+    "paddw %%mm6,%%mm2\n\t"
+    /*Add 4.*/
+    "paddw %%mm0,%%mm3\n\t"
+    "paddw %%mm0,%%mm2\n\t"
+    /*"Divide" by 8.*/
+    "psraw $3,%%mm3\n\t"
+    "psraw $3,%%mm2\n\t"
+    /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/
+    /*Free up mm5.*/
+    "packuswb %%mm5,%%mm4\n\t"
+    /*mm0=L L L L*/
+    "movq (%[ll]),%%mm0\n\t"
+    /*if(R_i<-2L||R_i>2L)R_i=0:*/
+    "movq %%mm2,%%mm5\n\t"
+    "pxor %%mm6,%%mm6\n\t"
+    "movq %%mm0,%%mm7\n\t"
+    "psubw %%mm0,%%mm6\n\t"
+    "psllw $1,%%mm7\n\t"
+    "psllw $1,%%mm6\n\t"
+    /*mm2==R_3 R_2 R_1 R_0*/
+    /*mm5==R_3 R_2 R_1 R_0*/
+    /*mm6==-2L -2L -2L -2L*/
+    /*mm7==2L 2L 2L 2L*/
+    "pcmpgtw %%mm2,%%mm7\n\t"
+    "pcmpgtw %%mm6,%%mm5\n\t"
+    "pand %%mm7,%%mm2\n\t"
+    "movq %%mm0,%%mm7\n\t"
+    "pand %%mm5,%%mm2\n\t"
+    "psllw $1,%%mm7\n\t"
+    "movq %%mm3,%%mm5\n\t"
+    /*mm3==R_7 R_6 R_5 R_4*/
+    /*mm5==R_7 R_6 R_5 R_4*/
+    /*mm6==-2L -2L -2L -2L*/
+    /*mm7==2L 2L 2L 2L*/
+    "pcmpgtw %%mm3,%%mm7\n\t"
+    "pcmpgtw %%mm6,%%mm5\n\t"
+    "pand %%mm7,%%mm3\n\t"
+    "movq %%mm0,%%mm7\n\t"
+    "pand %%mm5,%%mm3\n\t"
+    /*if(R_i<-L)R_i'=R_i+2L;
+      if(R_i>L)R_i'=R_i-2L;
+      if(R_i<-L||R_i>L)R_i=-R_i':*/
+    "psraw $1,%%mm6\n\t"
+    "movq %%mm2,%%mm5\n\t"
+    "psllw $1,%%mm7\n\t"
+    /*mm2==R_3 R_2 R_1 R_0*/
+    /*mm5==R_3 R_2 R_1 R_0*/
+    /*mm6==-L -L -L -L*/
+    /*mm0==L L L L*/
+    /*mm5=R_i>L?FF:00*/
+    "pcmpgtw %%mm0,%%mm5\n\t"
+    /*mm6=-L>R_i?FF:00*/
+    "pcmpgtw %%mm2,%%mm6\n\t"
+    /*mm7=R_i>L?2L:0*/
+    "pand %%mm5,%%mm7\n\t"
+    /*mm2=R_i>L?R_i-2L:R_i*/
+    "psubw %%mm7,%%mm2\n\t"
+    "movq %%mm0,%%mm7\n\t"
+    /*mm5=-L>R_i||R_i>L*/
+    "por %%mm6,%%mm5\n\t"
+    "psllw $1,%%mm7\n\t"
+    /*mm7=-L>R_i?2L:0*/
+    "pand %%mm6,%%mm7\n\t"
+    "pxor %%mm6,%%mm6\n\t"
+    /*mm2=-L>R_i?R_i+2L:R_i*/
+    "paddw %%mm7,%%mm2\n\t"
+    "psubw %%mm0,%%mm6\n\t"
+    /*mm5=-L>R_i||R_i>L?-R_i':0*/
+    "pand %%mm2,%%mm5\n\t"
+    "movq %%mm0,%%mm7\n\t"
+    /*mm2=-L>R_i||R_i>L?0:R_i*/
+    "psubw %%mm5,%%mm2\n\t"
+    "psllw $1,%%mm7\n\t"
+    /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
+    "psubw %%mm5,%%mm2\n\t"
+    "movq %%mm3,%%mm5\n\t"
+    /*mm3==R_7 R_6 R_5 R_4*/
+    /*mm5==R_7 R_6 R_5 R_4*/
+    /*mm6==-L -L -L -L*/
+    /*mm0==L L L L*/
+    /*mm6=-L>R_i?FF:00*/
+    "pcmpgtw %%mm3,%%mm6\n\t"
+    /*mm5=R_i>L?FF:00*/
+    "pcmpgtw %%mm0,%%mm5\n\t"
+    /*mm7=R_i>L?2L:0*/
+    "pand %%mm5,%%mm7\n\t"
+    /*mm2=R_i>L?R_i-2L:R_i*/
+    "psubw %%mm7,%%mm3\n\t"
+    "psllw $1,%%mm0\n\t"
+    /*mm5=-L>R_i||R_i>L*/
+    "por %%mm6,%%mm5\n\t"
+    /*mm0=-L>R_i?2L:0*/
+    "pand %%mm6,%%mm0\n\t"
+    /*mm3=-L>R_i?R_i+2L:R_i*/
+    "paddw %%mm0,%%mm3\n\t"
+    /*mm5=-L>R_i||R_i>L?-R_i':0*/
+    "pand %%mm3,%%mm5\n\t"
+    /*mm2=-L>R_i||R_i>L?0:R_i*/
+    "psubw %%mm5,%%mm3\n\t"
+    /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
+    "psubw %%mm5,%%mm3\n\t"
+    /*Unfortunately, there's no unsigned byte+signed byte with unsigned
+       saturation op code, so we have to promote things back 16 bits.*/
+    "pxor %%mm0,%%mm0\n\t"
+    "movq %%mm4,%%mm5\n\t"
+    "punpcklbw %%mm0,%%mm4\n\t"
+    "punpckhbw %%mm0,%%mm5\n\t"
+    "movq %%mm1,%%mm6\n\t"
+    "punpcklbw %%mm0,%%mm1\n\t"
+    "punpckhbw %%mm0,%%mm6\n\t"
+    /*_pix[0...8+_ystride]+=R_i*/
+    "paddw %%mm2,%%mm4\n\t"
+    "paddw %%mm3,%%mm5\n\t"
+    /*_pix[0...8+_ystride*2]-=R_i*/
+    "psubw %%mm2,%%mm1\n\t"
+    "psubw %%mm3,%%mm6\n\t"
+    "packuswb %%mm5,%%mm4\n\t"
+    "packuswb %%mm6,%%mm1\n\t"
+    /*Write it back out.*/
+    "movq %%mm4,(%[pix],%[ystride])\n\t"
+    "movq %%mm1,(%[pix],%[ystride],2)\n\t"
+    :[s]"=&S"(esi)
+    :[pix]"r"(_pix),[ystride]"r"((long)_ystride),[ll]"r"(_ll)
+    :"memory"
+  );
+}
+
+/*This code implements the bulk of loop_filter_h().
+  Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
+   four p0's to one register we must transpose the values in four mmx regs.
+  When half is done we repeat this for the rest.*/
+static void loop_filter_h4(unsigned char *_pix,long _ystride,
+ const ogg_int16_t *_ll){
+  long esi;
+  long edi;
+  __asm__ __volatile__(
+    /*x x x x 3 2 1 0*/
+    "movd (%[pix]),%%mm0\n\t"
+    /*esi=_ystride*3*/
+    "lea (%[ystride],%[ystride],2),%[s]\n\t"
+    /*x x x x 7 6 5 4*/
+    "movd (%[pix],%[ystride]),%%mm1\n\t"
+    /*x x x x B A 9 8*/
+    "movd (%[pix],%[ystride],2),%%mm2\n\t"
+    /*x x x x F E D C*/
+    "movd (%[pix],%[s]),%%mm3\n\t"
+    /*mm0=7 3 6 2 5 1 4 0*/
+    "punpcklbw %%mm1,%%mm0\n\t"
+    /*mm2=F B E A D 9 C 8*/
+    "punpcklbw %%mm3,%%mm2\n\t"
+    /*mm1=7 3 6 2 5 1 4 0*/
+    "movq %%mm0,%%mm1\n\t"
+    /*mm0=F B 7 3 E A 6 2*/
+    "punpckhwd %%mm2,%%mm0\n\t"
+    /*mm1=D 9 5 1 C 8 4 0*/
+    "punpcklwd %%mm2,%%mm1\n\t"
+    "pxor %%mm7,%%mm7\n\t"
+    /*mm5=D 9 5 1 C 8 4 0*/
+    "movq %%mm1,%%mm5\n\t"
+    /*mm1=x C x 8 x 4 x 0==pix[0]*/
+    "punpcklbw %%mm7,%%mm1\n\t"
+    /*mm5=x D x 9 x 5 x 1==pix[1]*/
+    "punpckhbw %%mm7,%%mm5\n\t"
+    /*mm3=F B 7 3 E A 6 2*/
+    "movq %%mm0,%%mm3\n\t"
+    /*mm0=x E x A x 6 x 2==pix[2]*/
+    "punpcklbw %%mm7,%%mm0\n\t"
+    /*mm3=x F x B x 7 x 3==pix[3]*/
+    "punpckhbw %%mm7,%%mm3\n\t"
+    /*mm1=mm1-mm3==pix[0]-pix[3]*/
+    "psubw %%mm3,%%mm1\n\t"
+    /*Save a copy of pix[2] for later.*/
+    "movq %%mm0,%%mm4\n\t"
+    /*mm2=3 3 3 3
+      mm0=mm0-mm5==pix[2]-pix[1]*/
+    "pcmpeqw %%mm2,%%mm2\n\t"
+    "psubw %%mm5,%%mm0\n\t"
+    "psrlw $14,%%mm2\n\t"
+    /*Scale by 3.*/
+    "pmullw %%mm2,%%mm0\n\t"
+    /*mm2=4 4 4 4
+      f=mm1==_pix[0]-_pix[3]+ 3*(_pix[2]-_pix[1])*/
+    "psrlw $1,%%mm2\n\t"
+    "paddw %%mm1,%%mm0\n\t"
+    "psllw $2,%%mm2\n\t"
+    /*Add 4.*/
+    "paddw %%mm2,%%mm0\n\t"
+    /*"Divide" by 8, producing the residuals R_i.*/
+    "psraw $3,%%mm0\n\t"
+    /*Now compute lflim of mm0 cf. Section 7.10 of the sepc.*/
+    /*mm6=L L L L*/
+    "movq (%[ll]),%%mm6\n\t"
+    /*if(R_i<-2L||R_i>2L)R_i=0:*/
+    "movq %%mm0,%%mm1\n\t"
+    "pxor %%mm2,%%mm2\n\t"
+    "movq %%mm6,%%mm3\n\t"
+    "psubw %%mm6,%%mm2\n\t"
+    "psllw $1,%%mm3\n\t"
+    "psllw $1,%%mm2\n\t"
+    /*mm0==R_3 R_2 R_1 R_0*/
+    /*mm1==R_3 R_2 R_1 R_0*/
+    /*mm2==-2L -2L -2L -2L*/
+    /*mm3==2L 2L 2L 2L*/
+    "pcmpgtw %%mm0,%%mm3\n\t"
+    "pcmpgtw %%mm2,%%mm1\n\t"
+    "pand %%mm3,%%mm0\n\t"
+    "pand %%mm1,%%mm0\n\t"
+    /*if(R_i<-L)R_i'=R_i+2L;
+      if(R_i>L)R_i'=R_i-2L;
+      if(R_i<-L||R_i>L)R_i=-R_i':*/
+    "psraw $1,%%mm2\n\t"
+    "movq %%mm0,%%mm1\n\t"
+    "movq %%mm6,%%mm3\n\t"
+    /*mm0==R_3 R_2 R_1 R_0*/
+    /*mm1==R_3 R_2 R_1 R_0*/
+    /*mm2==-L -L -L -L*/
+    /*mm6==L L L L*/
+    /*mm2=-L>R_i?FF:00*/
+    "pcmpgtw %%mm0,%%mm2\n\t"
+    /*mm1=R_i>L?FF:00*/
+    "pcmpgtw %%mm6,%%mm1\n\t"
+    /*mm3=2L 2L 2L 2L*/
+    "psllw $1,%%mm3\n\t"
+    /*mm6=2L 2L 2L 2L*/
+    "psllw $1,%%mm6\n\t"
+    /*mm3=R_i>L?2L:0*/
+    "pand %%mm1,%%mm3\n\t"
+    /*mm6=-L>R_i?2L:0*/
+    "pand %%mm2,%%mm6\n\t"
+    /*mm0=R_i>L?R_i-2L:R_i*/
+    "psubw %%mm3,%%mm0\n\t"
+    /*mm1=-L>R_i||R_i>L*/
+    "por %%mm2,%%mm1\n\t"
+    /*mm0=-L>R_i?R_i+2L:R_i*/
+    "paddw %%mm6,%%mm0\n\t"
+    /*mm1=-L>R_i||R_i>L?R_i':0*/
+    "pand %%mm0,%%mm1\n\t"
+    /*mm0=-L>R_i||R_i>L?0:R_i*/
+    "psubw %%mm1,%%mm0\n\t"
+    /*mm0=-L>R_i||R_i>L?-R_i':R_i*/
+    "psubw %%mm1,%%mm0\n\t"
+    /*_pix[1]+=R_i;*/
+    "paddw %%mm0,%%mm5\n\t"
+    /*_pix[2]-=R_i;*/
+    "psubw %%mm0,%%mm4\n\t"
+    /*mm5=x x x x D 9 5 1*/
+    "packuswb %%mm7,%%mm5\n\t"
+    /*mm4=x x x x E A 6 2*/
+    "packuswb %%mm7,%%mm4\n\t"
+    /*mm5=E D A 9 6 5 2 1*/
+    "punpcklbw %%mm4,%%mm5\n\t"
+    /*edi=6 5 2 1*/
+    "movd %%mm5,%%edi\n\t"
+    "movw %%di,1(%[pix])\n\t"
+    /*Why is there such a big stall here?*/
+    "psrlq $32,%%mm5\n\t"
+    "shrl $16,%%edi\n\t"
+    "movw %%di,1(%[pix],%[ystride])\n\t"
+    /*edi=E D A 9*/
+    "movd %%mm5,%%edi\n\t"
+    "movw %%di,1(%[pix],%[ystride],2)\n\t"
+    "shrl $16,%%edi\n\t"
+    "movw %%di,1(%[pix],%[s])\n\t"
+    :[s]"=&S"(esi),[d]"=&D"(edi),
+     [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll)
+    :
+    :"memory"
+  );
+}
+
+static void loop_filter_h(unsigned char *_pix,int _ystride,
+ const ogg_int16_t *_ll){
+  _pix-=2;
+  loop_filter_h4(_pix,_ystride,_ll);
+  loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll);
+}
+
+/*We copy the whole function because the MMX routines will be inlined 4 times,
+   and we can do just a single emms call at the end this way.
+  We also do not use the _bv lookup table, instead computing the values that
+   would lie in it on the fly.*/
+
+/*Apply the loop filter to a given set of fragment rows in the given plane.
+  The filter may be run on the bottom edge, affecting pixels in the next row of
+   fragments, so this row also needs to be available.
+  _bv:        The bounding values array.
+  _refi:      The index of the frame buffer to filter.
+  _pli:       The color plane to filter.
+  _fragy0:    The Y coordinate of the first fragment row to filter.
+  _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
+void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
+ int _refi,int _pli,int _fragy0,int _fragy_end){
+  ogg_int16_t __attribute__((aligned(8)))  ll[4];
+  th_img_plane                            *iplane;
+  oc_fragment_plane                       *fplane;
+  oc_fragment                             *frag_top;
+  oc_fragment                             *frag0;
+  oc_fragment                             *frag;
+  oc_fragment                             *frag_end;
+  oc_fragment                             *frag0_end;
+  oc_fragment                             *frag_bot;
+  ll[0]=ll[1]=ll[2]=ll[3]=
+   (ogg_int16_t)_state->loop_filter_limits[_state->qis[0]];
+  iplane=_state->ref_frame_bufs[_refi]+_pli;
+  fplane=_state->fplanes+_pli;
+  /*The following loops are constructed somewhat non-intuitively on purpose.
+    The main idea is: if a block boundary has at least one coded fragment on
+     it, the filter is applied to it.
+    However, the order that the filters are applied in matters, and VP3 chose
+     the somewhat strange ordering used below.*/
+  frag_top=_state->frags+fplane->froffset;
+  frag0=frag_top+_fragy0*fplane->nhfrags;
+  frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags;
+  frag_bot=_state->frags+fplane->froffset+fplane->nfrags;
+  while(frag0<frag0_end){
+    frag=frag0;
+    frag_end=frag+fplane->nhfrags;
+    while(frag<frag_end){
+      if(frag->coded){
+        if(frag>frag0){
+          loop_filter_h(frag->buffer[_refi],iplane->stride,ll);
+        }
+        if(frag0>frag_top){
+          loop_filter_v(frag->buffer[_refi],iplane->stride,ll);
+        }
+        if(frag+1<frag_end&&!(frag+1)->coded){
+          loop_filter_h(frag->buffer[_refi]+8,iplane->stride,ll);
+        }
+        if(frag+fplane->nhfrags<frag_bot&&!(frag+fplane->nhfrags)->coded){
+          loop_filter_v((frag+fplane->nhfrags)->buffer[_refi],
+           iplane->stride,ll);
+        }
+      }
+      frag++;
+    }
+    frag0+=fplane->nhfrags;
+  }
+  /*This needs to be removed when decode specific functions are implemented:*/
+  __asm__ __volatile__("emms\n\t");
+}
+
+#endif
--- a/modules/libtheora/lib/dec/x86/x86int.h
+++ b/modules/libtheora/lib/dec/x86/x86int.h
@ -0,0 +1,42 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86int.h 14375 2008-01-06 05:37:33Z tterribe $
+
+ ********************************************************************/
+
+#if !defined(_x86_x86int_H)
+# define _x86_x86int_H (1)
+# include "../../internal.h"
+
+void oc_state_vtable_init_x86(oc_theora_state *_state);
+
+void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
+ const ogg_int16_t *_residue);
+void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue);
+void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
+ int _src2_ystride,const ogg_int16_t *_residue);
+void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
+ int _nfragis,int _dst_frame,int _src_frame,int _pli);
+void oc_state_frag_recon_mmx(oc_theora_state *_state,oc_fragment *_frag,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
+ ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
+void oc_restore_fpu_mmx(void);
+void oc_idct8x8_mmx(ogg_int16_t _y[64]);
+void oc_idct8x8_10_mmx(ogg_int16_t _y[64]);
+void oc_fill_idct_constants_mmx(void);
+void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
+ int _refi,int _pli,int _fragy0,int _fragy_end);
+#endif
--- a/modules/libtheora/lib/dec/x86/x86state.c
+++ b/modules/libtheora/lib/dec/x86/x86state.c
@ -0,0 +1,38 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: x86state.c 13884 2007-09-22 08:38:10Z giles $
+
+ ********************************************************************/
+
+#include "x86int.h"
+
+#if defined(USE_ASM)
+
+#include "../../cpu.h"
+
+void oc_state_vtable_init_x86(oc_theora_state *_state){
+  _state->cpu_flags=oc_cpu_flags_get();
+  if(_state->cpu_flags&OC_CPU_X86_MMX){
+    _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx;
+    _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx;
+    _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx;
+    _state->opt_vtable.state_frag_copy=oc_state_frag_copy_mmx;
+    _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx;
+    _state->opt_vtable.state_loop_filter_frag_rows=
+     oc_state_loop_filter_frag_rows_mmx;
+    _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx;
+  }
+  else oc_state_vtable_init_c(_state);
+}
+#endif
--- a/modules/libtheora/lib/internal.h
+++ b/modules/libtheora/lib/internal.h
@ -0,0 +1,505 @@
+/********************************************************************
+ *                                                                  *
+ * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
+ * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
+ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
+ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
+ *                                                                  *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007                *
+ * by the Xiph.Org Foundation http://www.xiph.org/                  *
+ *                                                                  *
+ ********************************************************************
+
+  function:
+    last mod: $Id: internal.h 14714 2008-04-12 01:04:43Z giles $
+
+ ********************************************************************/
+
+#if !defined(_internal_H)
+# define _internal_H (1)
+# include <stdlib.h>
+# if defined(HAVE_CONFIG_H)
+#  include <config.h>
+# endif
+# include "theora/codec.h"
+# include "theora/theora.h"
+# include "dec/ocintrin.h"
+# include "dec/huffman.h"
+# include "dec/quant.h"
+
+/* debug macros */
+#ifdef _TH_DEBUG_
+#include <stdio.h>
+extern long dframe;
+extern FILE *debugout;
+#define TH_DEBUG(...) fprintf(debugout, __VA_ARGS__)
+#else
+#define TH_DEBUG(...)
+#endif
+
+/*Thank you Microsoft, I know the order of operations.*/
+# if defined(_MSC_VER)
+#  pragma warning(disable:4554) /* order of operations */
+#  pragma warning(disable:4799) /* disable missing EMMS warnings */
+# endif
+
+/*This library's version.*/
+# define OC_VENDOR_STRING "Xiph.Org libTheora I 20071025 3 2 1"
+
+/*Theora bitstream version.*/
+# define TH_VERSION_MAJOR (3)
+# define TH_VERSION_MINOR (2)
+# define TH_VERSION_SUB   (1)
+# define TH_VERSION_CHECK(_info,_maj,_min,_sub) \
+ ((_info)->version_major>(_maj)||(_info)->version_major==(_maj)&& \
+ ((_info)->version_minor>(_min)||(_info)->version_minor==(_min)&& \
+ (_info)->version_subminor>=(_sub)))
+
+/*A keyframe.*/
+#define OC_INTRA_FRAME (0)
+/*A predicted frame.*/
+#define OC_INTER_FRAME (1)
+/*A frame of unknown type (frame type decision has not yet been made).*/
+#define OC_UNKWN_FRAME (-1)
+
+/*The amount of padding to add to the reconstructed frame buffers on all
+   sides.
+  This is used to allow unrestricted motion vectors without special casing.
+  This must be a multiple of 2.*/
+#define OC_UMV_PADDING (16)
+
+/*Frame classification indices.*/
+/*The previous golden frame.*/
+#define OC_FRAME_GOLD (0)
+/*The previous frame.*/
+#define OC_FRAME_PREV (1)
+/*The current frame.*/
+#define OC_FRAME_SELF (2)
+
+/*The input or output buffer.*/
+#define OC_FRAME_IO   (3)
+
+/*Macroblock modes.*/
+/*Macro block is invalid: It is never coded.*/
+#define OC_MODE_INVALID        (-1)
+/*Encoded difference from the same macro block in the previous frame.*/
+#define OC_MODE_INTER_NOMV     (0)
+/*Encoded with no motion compensated prediction.*/
+#define OC_MODE_INTRA          (1)
+/*Encoded difference from the previous frame offset by the given motion 
+  vector.*/
+#define OC_MODE_INTER_MV       (2)
+/*Encoded difference from the previous frame offset by the last coded motion 
+  vector.*/
+#define OC_MODE_INTER_MV_LAST  (3)
+/*Encoded difference from the previous frame offset by the second to last 
+  coded motion vector.*/
+#define OC_MODE_INTER_MV_LAST2 (4)
+/*Encoded difference from the same macro block in the previous golden 
+  frame.*/
+#define OC_MODE_GOLDEN_NOMV    (5)
+/*Encoded difference from the previous golden frame offset by the given motion 
+  vector.*/
+#define OC_MODE_GOLDEN_MV      (6)
+/*Encoded difference from the previous frame offset by the individual motion 
+  vectors given for each block.*/
+#define OC_MODE_INTER_MV_FOUR  (7)
+/*The number of (coded) modes.*/
+#define OC_NMODES              (8)
+
+/*Macro block is not coded.*/
+#define OC_MODE_NOT_CODED      (8)
+
+/*Predictor bit flags.*/
+/*Left.*/
+#define OC_PL  (1)
+/*Upper-left.*/
+#define OC_PUL (2)
+/*Up.*/
+#define OC_PU  (4)
+/*Upper-right.*/
+#define OC_PUR (8)
+
+/*Constants for the packet state machine common between encoder and decoder.*/
+
+/*Next packet to emit/read: Codec info header.*/
+#define OC_PACKET_INFO_HDR    (-3)
+/*Next packet to emit/read: Comment header.*/
+#define OC_PACKET_COMMENT_HDR (-2)
+/*Next packet to emit/read: Codec setup header.*/
+#define OC_PACKET_SETUP_HDR   (-1)
+/*No more packets to emit/read.*/
+#define OC_PACKET_DONE        (INT_MAX)
+
+
+
+typedef struct oc_theora_state oc_theora_state;
+
+
+
+/*A map from a super block to fragment numbers.*/
+typedef int         oc_sb_map[4][4];
+/*A map from a macro block to fragment numbers.*/
+typedef int         oc_mb_map[3][4];
+/*A motion vector.*/
+typedef signed char oc_mv[2];
+
+
+
+/*Super block information.
+  Super blocks are 32x32 segments of pixels in a single color plane indexed
+   in image order.
+  Internally, super blocks are broken up into four quadrants, each of which
+   contains a 2x2 pattern of blocks, each of which is an 8x8 block of pixels.
+  Quadrants, and the blocks within them, are indexed in a special order called
+   a "Hilbert curve" within the super block.
+
+  In order to differentiate between the Hilbert-curve indexing strategy and
+   the regular image order indexing strategy, blocks indexed in image order
+   are called "fragments".
+  Fragments are indexed in image order, left to right, then bottom to top,
+   from Y plane to Cb plane to Cr plane.*/
+typedef struct{
+  unsigned  coded_fully:1;
+  unsigned  coded_partially:1;
+  unsigned  quad_valid:4;
+  oc_sb_map map;
+}oc_sb;
+
+
+
+/*Macro block information.
+  The co-located fragments in all image planes corresponding to the location of
+   a single luma plane super block quadrant forms a macro block.
+  Thus there is only a single set of macro blocks for all planes, which
+   contains between 6 and 12 fragments, depending on the pixel format.
+  Therefore macro block information is kept in a separate array from super
+   blocks, to avoid unused space in the other planes.*/
+typedef struct{
+  /*The current macro block mode.
+    A negative number indicates the macro block lies entirely outside the
+     coded frame.*/
+  int           mode;
+  /*The X location of the macro block's upper-left hand pixel.*/
+  int           x;
+  /*The Y location of the macro block's upper-right hand pixel.*/
+  int           y;
+  /*The fragments that belong to this macro block in each color plane.
+    Fragments are stored in image order (left to right then top to bottom).
+    When chroma components are decimated, the extra fragments have an index of
+     -1.*/
+  oc_mb_map     map;
+}oc_mb;
+
+
+
+/*Information about a fragment which intersects the border of the displayable
+   region.
+  This marks which pixels belong to the displayable region, and is used to
+   ensure that pixels outside of this region are never referenced.
+  This allows applications to pass in buffers that are really the size of the
+   displayable region without causing a seg fault.*/
+typedef struct{
+  /*A bit mask marking which pixels are in the displayable region.
+    Pixel (x,y) corresponds to bit (y<<3|x).*/
+  ogg_int64_t mask;
+  /*The number of pixels in the displayable region.
+    This is always positive, and always less than 64.*/
+  int         npixels;
+}oc_border_info;
+
+
+
+/*Fragment information.*/
+typedef struct{
+  /*A flag indicating whether or not this fragment is coded.*/
+  unsigned        coded:1;
+  /*A flag indicating that all of this fragment lies outside the displayable
+     region of the frame.
+    Note the contrast with an invalid macro block, which is outside the coded
+     frame, not just the displayable one.*/
+  unsigned        invalid:1;
+  /*The quality index used for this fragment's AC coefficients.*/
+  unsigned        qi:6;
+  /*The mode of the macroblock this fragment belongs to.*/
+  int             mbmode:8;
+  /*The prediction-corrected DC component.*/
+  int             dc:16;
+  /*A pointer to the portion of an image covered by this fragment in several
+     images.
+    The first three are reconstructed frame buffers, while the last is the
+     input image buffer.
+    The appropriate stride value is determined by the color plane the fragment
+     belongs in.*/
+  unsigned char  *buffer[4];
+  /*Information for fragments which lie partially outside the displayable
+     region.
+    For fragments completely inside or outside this region, this is NULL.*/
+  oc_border_info *border;
+  /*The motion vector used for this fragment.*/
+  oc_mv           mv;
+
+#ifdef _TH_DEBUG_
+  int quant[64];
+  int freq[64];
+  int time[64];
+  int recon[64];
+  int loop[64];
+#endif
+}oc_fragment;
+
+
+
+/*A description of each fragment plane.*/
+typedef struct{
+  /*The number of fragments in the horizontal direction.*/
+  int nhfrags;
+  /*The number of fragments in the vertical direction.*/
+  int nvfrags;
+  /*The offset of the first fragment in the plane.*/
+  int froffset;
+  /*The total number of fragments in the plane.*/
+  int nfrags;
+  /*The number of super blocks in the horizontal direction.*/
+  int nhsbs;
+  /*The number of super blocks in the vertical direction.*/
+  int nvsbs;
+  /*The offset of the first super block in the plane.*/
+  int sboffset;
+  /*The total number of super blocks in the plane.*/
+  int nsbs;
+}oc_fragment_plane;
+
+
+
+/*The shared (encoder and decoder) functions that have accelerated variants.*/
+typedef struct{
+  void (*frag_recon_intra)(unsigned char *_dst,int _dst_ystride,
+   const ogg_int16_t *_residue);
+  void (*frag_recon_inter)(unsigned char *_dst,int _dst_ystride,
+   const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue);
+  void (*frag_recon_inter2)(unsigned char *_dst,int _dst_ystride,
+   const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
+   int _src2_ystride,const ogg_int16_t *_residue);
+  void (*state_frag_copy)(const oc_theora_state *_state,
+   const int *_fragis,int _nfragis,int _dst_frame,int _src_frame,int _pli);
+  void (*state_frag_recon)(oc_theora_state *_state,oc_fragment *_frag,
+   int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
+   ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
+  void (*restore_fpu)(void);
+  void (*state_loop_filter_frag_rows)(oc_theora_state *_state,int *_bv,
+   int _refi,int _pli,int _fragy0,int _fragy_end);  
+}oc_base_opt_vtable;
+
+
+
+/*Common state information between the encoder and decoder.*/
+struct oc_theora_state{
+  /*The stream information.*/
+  th_info           info;
+  /*Table for shared accelerated functions.*/
+  oc_base_opt_vtable    opt_vtable;
+  /*CPU flags to detect the presence of extended instruction sets.*/
+  ogg_uint32_t          cpu_flags;
+  /*The fragment plane descriptions.*/
+  oc_fragment_plane     fplanes[3];
+  /*The total number of fragments in a single frame.*/
+  int                   nfrags;
+  /*The list of fragments, indexed in image order.*/
+  oc_fragment          *frags;
+  /*The total number of super blocks in a single frame.*/
+  int                   nsbs;
+  /*The list of super blocks, indexed in image order.*/
+  oc_sb                *sbs;
+  /*The number of macro blocks in the X direction.*/
+  int                   nhmbs;
+  /*The number of macro blocks in the Y direction.*/
+  int                   nvmbs;
+  /*The total number of macro blocks.*/
+  int                   nmbs;
+  /*The list of macro blocks, indexed in super block order.
+    That is, the macro block corresponding to the macro block mbi in (luma
+     plane) super block sbi is (sbi<<2|mbi).*/
+  oc_mb                *mbs;
+  /*The list of coded fragments, in coded order.*/
+  int                  *coded_fragis;
+  /*The number of coded fragments in each plane.*/
+  int                   ncoded_fragis[3];
+  /*The list of uncoded fragments.
+    This just past the end of the list, which is in reverse order, and
+     uses the same block of allocated storage as the coded_fragis list.*/
+  int                  *uncoded_fragis;
+  /*The number of uncoded fragments in each plane.*/
+  int                   nuncoded_fragis[3];
+  /*The list of coded macro blocks in the Y plane, in coded order.*/
+  int                  *coded_mbis;
+  /*The number of coded macro blocks in the Y plane.*/
+  int                   ncoded_mbis;
+  /*A copy of the image data used to fill the input pointers in each fragment.
+    If the data pointers or strides change, these input pointers must be
+     re-populated.*/
+  th_ycbcr_buffer   input;
+  /*The number of unique border patterns.*/
+  int                   nborders;
+  /*The storage for the border info for all border fragments.
+    This data is pointed to from the appropriate fragments.*/
+  oc_border_info        borders[16];
+  /*The index of the buffers being used for each OC_FRAME_* reference frame.*/
+  int                   ref_frame_idx[3];
+  /*The actual buffers used for the previously decoded frames.*/
+  th_ycbcr_buffer   ref_frame_bufs[3];
+  /*The storage for the reference frame buffers.*/
+  unsigned char        *ref_frame_data;
+  /*The frame number of the last keyframe.*/
+  ogg_int64_t           keyframe_num;
+  /*The frame number of the current frame.*/
+  ogg_int64_t           curframe_num;
+  /*The granpos of the current frame.*/
+  ogg_int64_t           granpos;
+  /*The type of the current frame.*/
+  int                   frame_type;
+  /*The quality indices of the current frame.*/
+  int                   qis[3];
+  /*The number of quality indices used in the current frame.*/
+  int                   nqis;
+  /*The dequantization tables.*/
+  oc_quant_table       *dequant_tables[2][3];
+  oc_quant_tables       dequant_table_data[2][3];
+  /*Loop filter strength parameters.*/
+  unsigned char         loop_filter_limits[64];
+};
+
+
+
+/*The function type used to fill in the chroma plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.
+  _cbmvs: The chroma block-level motion vectors to fill in.
+  _lmbmv: The luma macro-block level motion vector to fill in for use in
+           prediction.
+  _lbmvs: The luma block-level motion vectors.*/
+typedef void (*oc_set_chroma_mvs_func)(oc_mv _cbmvs[4],const oc_mv _lbmvs[4]);
+
+
+
+/*A map from the index in the zig zag scan to the coefficient number in a
+   block.
+  The extra 64 entries send out of bounds indexes to index 64.
+  This is used to safely ignore invalid zero runs when decoding
+   coefficients.*/
+extern const int OC_FZIG_ZAG[128];
+/*A map from the coefficient number in a block to its index in the zig zag
+   scan.*/
+extern const int OC_IZIG_ZAG[64];
+/*The predictor frame to use for each macro block mode.*/
+extern const int OC_FRAME_FOR_MODE[OC_NMODES];
+/*A map from physical macro block ordering to bitstream macro block
+   ordering within a super block.*/
+extern const int OC_MB_MAP[2][2];
+/*A list of the indices in the oc_mb.map array that can be valid for each of
+   the various chroma decimation types.*/
+extern const int OC_MB_MAP_IDXS[TH_PF_NFORMATS][12];
+/*The number of indices in the oc_mb.map array that can be valid for each of
+   the various chroma decimation types.*/
+extern const int OC_MB_MAP_NIDXS[TH_PF_NFORMATS];
+/*A table of functions used to fill in the Cb,Cr plane motion vectors for a
+   macro block when 4 different motion vectors are specified in the luma
+   plane.*/
+extern const oc_set_chroma_mvs_func OC_SET_CHROMA_MVS_TABLE[TH_PF_NFORMATS];
+
+
+
+int oc_ilog(unsigned _v);
+void **oc_malloc_2d(size_t _height,size_t _width,size_t _sz);
+void **oc_calloc_2d(size_t _height,size_t _width,size_t _sz);
+void oc_free_2d(void *_ptr);
+
+void oc_ycbcr_buffer_flip(th_ycbcr_buffer _dst,
+ const th_ycbcr_buffer _src);
+
+int oc_dct_token_skip(int _token,int _extra_bits);
+
+int oc_frag_pred_dc(const oc_fragment *_frag,
+ const oc_fragment_plane *_fplane,int _x,int _y,int _pred_last[3]);
+
+int oc_state_init(oc_theora_state *_state,const th_info *_info);
+void oc_state_clear(oc_theora_state *_state);
+void oc_state_vtable_init_c(oc_theora_state *_state);
+void oc_state_borders_fill_rows(oc_theora_state *_state,int _refi,int _pli,
+ int _y0,int _yend);
+void oc_state_borders_fill_caps(oc_theora_state *_state,int _refi,int _pli);
+void oc_state_borders_fill(oc_theora_state *_state,int _refi);
+void oc_state_fill_buffer_ptrs(oc_theora_state *_state,int _buf_idx,
+ th_ycbcr_buffer _img);
+int oc_state_mbi_for_pos(oc_theora_state *_state,int _mbx,int _mby);
+int oc_state_get_mv_offsets(oc_theora_state *_state,int *_offsets,
+ int _dx,int _dy,int _ystride,int _pli);
+
+int oc_state_loop_filter_init(oc_theora_state *_state,int *_bv);
+void oc_state_loop_filter(oc_theora_state *_state,int _frame);
+#if defined(OC_DUMP_IMAGES)
+int oc_state_dump_frame(const oc_theora_state *_state,int _frame,
+ const char *_suf);
+#endif
+
+/*Shared accelerated functions.*/
+void oc_frag_recon_intra(const oc_theora_state *_state,
+ unsigned char *_dst,int _dst_ystride,const ogg_int16_t *_residue);
+void oc_frag_recon_inter(const oc_theora_state *_state,
+ unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue);
+void oc_frag_recon_inter2(const oc_theora_state *_state,
+ unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
+ int _src2_ystride,const ogg_int16_t *_residue);
+void oc_state_frag_copy(const oc_theora_state *_state,const int *_fragis,
+ int _nfragis,int _dst_frame,int _src_frame,int _pli);
+void oc_state_frag_recon(oc_theora_state *_state,oc_fragment *_frag,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
+ ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
+void oc_state_loop_filter_frag_rows(oc_theora_state *_state,int *_bv,
+ int _refi,int _pli,int _fragy0,int _fragy_end);
+void oc_restore_fpu(const oc_theora_state *_state);
+
+/*Default pure-C implementations.*/
+void oc_frag_recon_intra_c(unsigned char *_dst,int _dst_ystride,
+ const ogg_int16_t *_residue);
+void oc_frag_recon_inter_c(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue);
+void oc_frag_recon_inter2_c(unsigned char *_dst,int _dst_ystride,
+ const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
+ int _src2_ystride,const ogg_int16_t *_residue);
+void oc_state_frag_copy_c(const oc_theora_state *_state,const int *_fragis,
+ int _nfragis,int _dst_frame,int _src_frame,int _pli);
+void oc_state_frag_recon_c(oc_theora_state *_state,oc_fragment *_frag,
+ int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
+ ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
+void oc_state_loop_filter_frag_rows_c(oc_theora_state *_state,int *_bv,
+ int _refi,int _pli,int _fragy0,int _fragy_end);
+void oc_restore_fpu_c(void);
+
+/*We need a way to call a few encoder functions without introducing a link-time
+   dependency into the decoder, while still allowing the old alpha API which
+   does not distinguish between encoder and decoder objects to be used.
+  We do this by placing a function table at the start of the encoder object
+   which can dispatch into the encoder library.
+  We do a similar thing for the decoder in case we ever decide to split off a
+   common base library.*/
+typedef void (*oc_state_clear_func)(theora_state *_th);
+typedef int (*oc_state_control_func)(theora_state *th,int req,
+ void *buf,size_t buf_sz);
+typedef ogg_int64_t (*oc_state_granule_frame_func)(theora_state *_th,
+ ogg_int64_t _granulepos);
+typedef double (*oc_state_granule_time_func)(theora_state *_th,
+ ogg_int64_t _granulepos);
+
+typedef struct oc_state_dispatch_vtbl oc_state_dispatch_vtbl;
+
+struct oc_state_dispatch_vtbl{
+  oc_state_clear_func         clear;
+  oc_state_control_func       control;
+  oc_state_granule_frame_func granule_frame;
+  oc_state_granule_time_func  granule_time;
+};
+
+#endif
--- a/modules/libtheora/update.sh
+++ b/modules/libtheora/update.sh
@ -0,0 +1,49 @@
+# Usage: ./update.sh <theora_src_directory>
+#
+# Copies the needed files from a directory containing the original
+# libtheora source that we need for the Mozilla HTML5 media support.
+sed s/\#define\ OC_X86ASM//g $1/config.h >./lib/config.h
+sed s/\#define\ USE_ASM//g ./lib/config.h >./lib/config.h2
+sed s/\#define\ THEORA_DISABLE_ENCODE//g ./lib/config.h2 >./lib/config.h
+rm ./lib/config.h2
+cp ./lib/config.h ./include/theora/config.h
+cp $1/LICENSE ./LICENSE
+cp $1/CHANGES ./CHANGES
+cp $1/COPYING ./COPYING
+cp $1/README ./README
+cp $1/AUTHORS ./AUTHORS
+cp $1/lib/cpu.c ./lib/cpu.c
+cp $1/lib/cpu.h ./lib/cpu.h
+cp $1/lib/dec/ocintrin.h ./lib/dec/ocintrin.h
+cp $1/lib/dec/huffdec.c ./lib/dec/huffdec.c
+cp $1/lib/dec/apiwrapper.h ./lib/dec/apiwrapper.h
+cp $1/lib/dec/x86/mmxfrag.c ./lib/dec/x86/mmxfrag.c
+cp $1/lib/dec/x86/x86state.c ./lib/dec/x86/x86state.c
+cp $1/lib/dec/x86/x86int.h ./lib/dec/x86/x86int.h
+cp $1/lib/dec/x86/mmxstate.c ./lib/dec/x86/mmxstate.c
+cp $1/lib/dec/x86/mmxidct.c ./lib/dec/x86/mmxidct.c
+cp $1/lib/dec/bitwise.h ./lib/dec/bitwise.h
+cp $1/lib/dec/quant.c ./lib/dec/quant.c
+cp $1/lib/dec/bitwise.c ./lib/dec/bitwise.c
+cp $1/lib/dec/enquant.h ./lib/dec/enquant.h
+cp $1/lib/dec/internal.c ./lib/dec/internal.c
+cp $1/lib/dec/huffdec.h ./lib/dec/huffdec.h
+cp $1/lib/dec/dct.h ./lib/dec/dct.h
+cp $1/lib/dec/idct.h ./lib/dec/idct.h
+cp $1/lib/dec/decinfo.c ./lib/dec/decinfo.c
+cp $1/lib/dec/decapiwrapper.c ./lib/dec/decapiwrapper.c
+cp $1/lib/dec/idct.c ./lib/dec/idct.c
+cp $1/lib/dec/state.c ./lib/dec/state.c
+cp $1/lib/dec/info.c ./lib/dec/info.c
+cp $1/lib/dec/huffman.h ./lib/dec/huffman.h
+cp $1/lib/dec/decint.h ./lib/dec/decint.h
+cp $1/lib/dec/fragment.c ./lib/dec/fragment.c
+cp $1/lib/dec/apiwrapper.c ./lib/dec/apiwrapper.c
+cp $1/lib/dec/decode.c ./lib/dec/decode.c
+cp $1/lib/dec/dequant.c ./lib/dec/dequant.c
+cp $1/lib/dec/quant.h ./lib/dec/quant.h
+cp $1/lib/dec/dequant.h ./lib/dec/dequant.h
+cp $1/lib/internal.h ./lib/internal.h
+cp $1/include/theora/theora.h ./include/theora/theora.h
+cp $1/include/theora/theoradec.h ./include/theora/theoradec.h
+cp $1/include/theora/codec.h ./include/theora/codec.h