diff --git a/media/libtheora/455357_wince_local_variable_macro_clash_patch b/media/libtheora/455357_wince_local_variable_macro_clash_patch new file mode 100644 index 000000000000..0593d1b84442 --- /dev/null +++ b/media/libtheora/455357_wince_local_variable_macro_clash_patch @@ -0,0 +1,40 @@ +diff --git a/media/libtheora/lib/dec/decode.c b/media/libtheora/lib/dec/decode.c +--- a/media/libtheora/lib/dec/decode.c ++++ b/media/libtheora/lib/dec/decode.c +@@ -1803,8 +1803,8 @@ + + static void oc_dering_block(unsigned char *_idata,int _ystride,int _b, + int _dc_scale,int _sharp_mod,int _strong){ +- static const int MOD_MAX[2]={24,32}; +- static const int MOD_SHIFT[2]={1,0}; ++ static const int OCDB_MOD_MAX[2]={24,32}; ++ static const int OCDB_MOD_SHIFT[2]={1,0}; + const unsigned char *psrc; + const unsigned char *src; + const unsigned char *nsrc; +@@ -1814,14 +1814,14 @@ + int mod_hi; + int by; + int bx; +- mod_hi=OC_MINI(3*_dc_scale,MOD_MAX[_strong]); ++ mod_hi=OC_MINI(3*_dc_scale,OCDB_MOD_MAX[_strong]); + dst=_idata; + src=dst; + psrc=src-(_ystride&-!(_b&4)); + for(by=0;by<9;by++){ + for(bx=0;bx<8;bx++){ + int mod; +- mod=32+_dc_scale-(abs(src[bx]-psrc[bx])< Ralph Giles Timothy B. Terriberry - Ongoing development - + Dan B. Miller - Pre alpha3 development - + Wim Tayman Dan Lenski - MMX optimized functions - + Aaron Colwell Thomas Vander Stichele Jan Gerber Conrad Parker - Bug fixes, enhancements, build systems. - + Mauricio Piacentini - Original win32 projects and example ports + - dump_video example - VP3->Theora transcoder Silvia Pfeiffer - Figures for the spec +Vegard Nossum + - Original png2theora implementation + +Rudolf Marek +Nils Pipenbrinck + - MMX optimizations + Michael Smith Andre Pang calc diff --git a/media/libtheora/CHANGES b/media/libtheora/CHANGES index 3c33f674cf9d..74183d91b436 100644 --- a/media/libtheora/CHANGES +++ b/media/libtheora/CHANGES @@ -1,3 +1,32 @@ +libtheora 1.0 (2008 November 3) + + - Merge x86 assembly for forward DCT from Thusnelda branch. + - Update 32 bit MMX with loop filter fix. + - Check for an uninitialized state before dereferencing in propagating + decode calls. + - Remove all TH_DEBUG statements. + - Rename the bitpacker source files copied from libogg to avoid + confusing simple build systems using both libraries. + - Declare bitfield entries to be explicitly signed for Solaris cc. + - Set quantization parameters to default values when an empty buffer is + passed with TH_ENCCTL_SET_QUANT_PARAMS. + - Split encoder and decoder tests depending on configure settings. + - Return lstylex.sty to the distribution. + - Disable inline assembly on gcc versions prior to 3.1. + - Remove extern references for OC_*_QUANT_MIN. + - Make various data tables static const so they can be read-only. + - Remove ENCCTL codes from the old encoder API. + - Implement TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE ctl. + - Fix segfault when exactly one of the width or height is not a multiple + of 16, but the other is. + - Compute the correct vertical offset for chroma. + - cpuid assembly fix for MSVC. + - Add VS2008 project files. + - Build updates for 64-bit platforms, Mingw32, VS and XCode. + - Do not clobber the cropping rectangle. + - Declare ourselves 1.0final to pkg-config to sort after beta releases. + - Fix the scons build to include asm in libtheoradec/enc. + libtheora 1.0beta3 (2008 April 16) - Build new libtheoradec and libtheoraenc libraries @@ -5,12 +34,12 @@ libtheora 1.0beta3 (2008 April 16) not be considered stable yet. - Change granule_frame() to return an index as documented. This is a change of behaviour from 1.0beta1. - - Document that granule_time() returns the end of the - presentation interval. + - Document that granule_time() returns the end of the + presentation interval. - Use a custom copy of the libogg bitpacker in the decoder to avoid function call overhead. - MMX code improved and ported to MSVC. - - Fix a problem with the MMX code on SELinux + - Fix a problem with the MMX code on SELinux. - Fix a problem with decoder quantizer initialization. - Fix a page queue problem with png2theora. - Improved robustness. diff --git a/media/libtheora/COPYING b/media/libtheora/COPYING index c35e1882c24d..5a711972df16 100644 --- a/media/libtheora/COPYING +++ b/media/libtheora/COPYING @@ -1,4 +1,4 @@ -Copyright (C) 2002-2007 Xiph.org Foundation +Copyright (C) 2002-2008 Xiph.Org Foundation and contributors. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -11,7 +11,7 @@ notice, this list of conditions and the following disclaimer. notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -- Neither the name of the Xiph.org Foundation nor the names of its +- Neither the name of the Xiph.Org Foundation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. diff --git a/media/libtheora/README b/media/libtheora/README index 7d13bc3e2fe5..fc8dc0424c10 100644 --- a/media/libtheora/README +++ b/media/libtheora/README @@ -1,5 +1,5 @@ ------------------------------------------------------------------------- - The Xiph.org Foundation's libtheora 1.0beta1 release + The Xiph.org Foundation's libtheora 1.0 release ------------------------------------------------------------------------- *** What is Theora? @@ -8,8 +8,8 @@ Theora is Xiph.Org's first publicly released video codec, intended for use within the Foundation's Ogg multimedia streaming system. Theora is derived directly from On2's VP3 codec; Currently the encoders are nearly identical, but Theora will make use of new -features supported by the decoder to improve over what is -is possible with VP3. +features supported by the decoder to improve on what is possible +with VP3. *** Where is Theora? @@ -18,12 +18,6 @@ can be gotten from www.theora.org or the main Xiph.Org site at www.xiph.org. Development source is kept in an open subversion repository, see http://theora.org/svn/ for instructions. -*** What is the goal of this release? - -This is the first beta release of the 1.0 reference implementation. -It is intended to completely support the decoder specification, and -gather feedback on the implementation before declaring it stable. - ------------------------------------------------------------------------- Getting started with the code ------------------------------------------------------------------------- @@ -32,30 +26,39 @@ Getting started with the code Requirements summary: - For libtheora: - + For libtheora: + libogg 1.1 or newer. For example encoder: - as above + as above, libvorbis and libvorbisenc 1.0.1 or newer. + For creating a source distribution package: + + as above, + + Doxygen to build the API documentation, + pdflatex and fig2dev to build the format specification + (transfig package in Ubuntu). + For the player only: - as above, + as above, - SDL (Simple Direct media Layer) libraries and headers - - OSS audio driver and development headers + SDL (Simple Direct media Layer) libraries and headers, + OSS audio driver and development headers. The provided build system is the GNU automake/autoconf system, and the main library, libtheora, should already build smoothly on any system. Failure of libtheora to build on a GNU-enabled system is considered a bug; please report problems to theora-dev@xiph.org. -Some windows build support is included in the win32 directory. +Windows build support is included in the win32 directory. + +Project files for Apple XCode is included in the macosx directory. There is also an experimental scons build. @@ -83,7 +86,7 @@ files in that format. *** How do I use the sample player? The sample player takes an Ogg file on standard in; the file may be -audio alone, video alone or video with audio. +audio alone, video alone or video with audio. *** What other tools are available? diff --git a/media/libtheora/README_MOZILLA b/media/libtheora/README_MOZILLA index 64f9edf9bd4c..bf1b5803895f 100644 --- a/media/libtheora/README_MOZILLA +++ b/media/libtheora/README_MOZILLA @@ -1,7 +1,7 @@ -The source from this directory was copied from the libtheora-1.0beta3 +The source from this directory was copied from the libtheora-1.0 source distribution using the update.sh script. The changes made were those applied by update.sh, the addition/update of Makefile.in files -for the Mozilla build system and the patch in bug 450265. +for the Mozilla build system and the patch in bug below. Bug 455357 - WinCE LibTheora Pre-defined Macro usage in local variable 455357_wince_local_variable_macro_clash_patch diff --git a/media/libtheora/changeset_r15144.diff b/media/libtheora/changeset_r15144.diff deleted file mode 100644 index 80bf59b4dc01..000000000000 --- a/media/libtheora/changeset_r15144.diff +++ /dev/null @@ -1,27 +0,0 @@ -Index: /trunk/theora/lib/dec/decapiwrapper.c -=================================================================== ---- /trunk/theora/lib/dec/decapiwrapper.c (revision 14385) -+++ /trunk/theora/lib/dec/decapiwrapper.c (revision 15144) -@@ -168,10 +168,13 @@ - ogg_int64_t gp; - int ret; -+ -+ if(!_td || !_td->i || !_td->i->codec_setup)return OC_FAULT; - api=(th_api_wrapper *)_td->i->codec_setup; -+ if(!api || !api->decode)return OC_FAULT; - ret=th_decode_packetin(api->decode,_op,&gp); - - #ifdef _TH_DEBUG_ - dframe++; --#endif -+#endif - - if(ret<0)return OC_BADPACKET; -@@ -185,5 +188,7 @@ - int ret; - -+ if(!_td || !_td->i || !_td->i->codec_setup)return OC_FAULT; - api=(th_api_wrapper *)_td->i->codec_setup; -+ if(!api || !api->decode)return OC_FAULT; - ret=th_decode_ycbcr_out(api->decode,buf); - if(ret>=0){ diff --git a/media/libtheora/include/theora/codec.h b/media/libtheora/include/theora/codec.h index b40c383491e7..afdc1b0fa114 100644 --- a/media/libtheora/include/theora/codec.h +++ b/media/libtheora/include/theora/codec.h @@ -112,13 +112,18 @@ typedef enum{ * specification, Section 4.4, for details on the precise sample * locations.*/ typedef enum{ - /**Chroma decimation by 2 in both the X and Y directions (4:2:0).*/ + /**Chroma decimation by 2 in both the X and Y directions (4:2:0). + The Cb and Cr chroma planes are half the width and half the height of the + luma plane.*/ TH_PF_420, /**Currently reserved.*/ TH_PF_RSVD, - /**Chroma decimation by 2 in the X direction (4:2:2).*/ + /**Chroma decimation by 2 in the X direction (4:2:2). + The Cb and Cr chroma planes are half the width of the luma plane, but full + height.*/ TH_PF_422, - /**No chroma decimation (4:4:4).*/ + /**No chroma decimation (4:4:4). + The Cb and Cr chroma planes are full width and full height.*/ TH_PF_444, /**The total number of currently defined pixel formats.*/ TH_PF_NFORMATS diff --git a/media/libtheora/include/theora/config.h b/media/libtheora/include/theora/config.h index 3ac50fe90b86..0cbbbcac3fa3 100644 --- a/media/libtheora/include/theora/config.h +++ b/media/libtheora/include/theora/config.h @@ -56,19 +56,19 @@ #define PACKAGE_NAME "libtheora" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "libtheora 1.0beta3" +#define PACKAGE_STRING "libtheora 1.0" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "libtheora" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.0beta3" +#define PACKAGE_VERSION "1.0" /* Define to 1 if you have the ANSI C header files. */ #define STDC_HEADERS 1 /* Define to exclude encode support from the build */ - +/* #undef THEORA_DISABLE_ENCODE */ /* Define to exclude floating point code from the build */ /* #undef THEORA_DISABLE_FLOAT */ @@ -77,4 +77,4 @@ /* Version number of package */ -#define VERSION "1.0beta3" +#define VERSION "1.0" diff --git a/media/libtheora/include/theora/theora.h b/media/libtheora/include/theora/theora.h index 656460a6a390..dbef716753fd 100644 --- a/media/libtheora/include/theora/theora.h +++ b/media/libtheora/include/theora/theora.h @@ -315,6 +315,20 @@ typedef struct theora_comment{ */ #define TH_DECCTL_SET_PPLEVEL (3) +/**Sets the maximum distance between key frames. + * This can be changed during an encode, but will be bounded by + * 1<. + * If it is set before encoding begins, th_info#keyframe_granule_shift will + * be enlarged appropriately. + * + * \param[in] buf ogg_uint32_t: The maximum distance between key + * frames. + * \param[out] buf ogg_uint32_t: The actual maximum distance set. + * \retval TH_FAULT \a theora_state or \a buf is NULL. + * \retval TH_EINVAL \a buf_sz is not sizeof(ogg_uint32_t). + * \retval TH_IMPL Not supported by this implementation.*/ +#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4) + /**Set the granule position. * Call this after a seek, to update the internal granulepos * in the decoder, to insure that subsequent frames are marked @@ -332,20 +346,6 @@ typedef struct theora_comment{ * \ref decctlcodes "decoder control codes". * Keep any experimental or vendor-specific values above \c 0x8000.*/ /*@{*/ -/**Sets the Huffman tables to use. - * The tables are copied, not stored by reference, so they can be freed after - * this call. - * NULL may be specified to revert to the default tables. - * - * \param[in] buf #th_huff_code[#TH_NHUFFMAN_TABLES][#TH_NDCT_TOKENS] - * \retval TH_FAULT \a theora_state is NULL. - * \retval TH_EINVAL Encoding has already begun or one or more of the given - * tables is not full or prefix-free, \a buf is - * NULL and \a buf_sz is not zero, or \a buf is - * non-NULL and \a buf_sz is not - * sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS. - * \retval TH_IMPL Not supported by this implementation.*/ -#define TH_ENCCTL_SET_HUFFMAN_CODES (0) /**Sets the quantization parameters to use. * The parameters are copied, not stored by reference, so they can be freed * after this call. @@ -365,19 +365,6 @@ typedef struct theora_comment{ * sizeof(#th_quant_info). * \retval TH_IMPL Not supported by this implementation.*/ #define TH_ENCCTL_SET_QUANT_PARAMS (2) -/**Sets the maximum distance between key frames. - * This can be changed during an encode, but will be bounded by - * 1<. - * If it is set before encoding begins, th_info#keyframe_granule_shift will - * be enlarged appropriately. - * - * \param[in] buf ogg_uint32_t: The maximum distance between key - * frames. - * \param[out] buf ogg_uint32_t: The actual maximum distance set. - * \retval TH_FAULT \a theora_state or \a buf is NULL. - * \retval TH_EINVAL \a buf_sz is not sizeof(ogg_uint32_t). - * \retval TH_IMPL Not supported by this implementation.*/ -#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4) /**Disables any encoder features that would prevent lossless transcoding back * to VP3. * This primarily means disabling block-level QI values and not using 4MV mode @@ -434,41 +421,6 @@ typedef struct theora_comment{ * \retval TH_IMPL Not supported by this implementation in the current * encoding mode.*/ #define TH_ENCCTL_SET_SPLEVEL (14) -/**Puts the encoder in VBR mode. - * This can be done at any time during the encoding process, with different - * configuration parameters, to encode different regions of the video segment - * with different qualities. - * See the #th_info struct documentation for details on how the default - * encoding mode is chosen. - * - * \param[in] buf #th_vbr_cfg: the configuration parameters. - * This may be NULL, in which case the current VBR - * configuration is unchanged. - * The default is to use the QI setting passed in via the - * #th_info struct when the encoder was initialized, with a - * full range of admissible quantizers. - * \retval OC_EFAULT \a theora_state is NULL. - * \retval TH_EINVAL The configuration parameters do not meet one of their - * stated requirements, \a buf is NULL and - * \a buf_sz is not zero, or \a buf is non-NULL - * and \a buf_sz is not sizeof(#th_vbr_cfg). - * \retval TH_IMPL Not supported by this implementation.*/ -#define TH_ENCCTL_SETUP_VBR (16) -/**Puts the encoder in CQI mode. - * This can be done at any time during the encoding process, with different QI - * values. - * See the #th_info struct documentation for details on how the default - * encoding mode is chosen. - * - * \param[in] buf #th_cqi_cfg: the configuration parameters. - * This may be NULL, in which case the current CQI - * configuration is unchanged. - * The default is to use the QI setting passed in via the - * #th_info struct when the encoder was initialized. - * \retval OC_EFAULT \a theora_state is NULL. - * \retval TH_EINVAL \a buf_sz is not sizeof(#th_cqi_cfg). - * \retval TH_IMPL Not supported by this implementation.*/ -#define TH_ENCCTL_SETUP_CQI (18) /*@}*/ #define OC_FAULT -1 /**< General failure */ diff --git a/media/libtheora/include/theora/theoradec.h b/media/libtheora/include/theora/theoradec.h index 24f79bef30f3..7c08caadf982 100644 --- a/media/libtheora/include/theora/theoradec.h +++ b/media/libtheora/include/theora/theoradec.h @@ -20,6 +20,7 @@ #if !defined(_O_THEORA_THEORADEC_H_) # define _O_THEORA_THEORADEC_H_ (1) +# include # include # include "codec.h" @@ -213,6 +214,22 @@ typedef struct th_setup_info th_setup_info; extern int th_decode_headerin(th_info *_info,th_comment *_tc, th_setup_info **_setup,ogg_packet *_op); /**Allocates a decoder instance. + * + * Security Warning: The Theora format supports very large frame sizes, + * potentially even larger than the address space of a 32-bit machine, and + * creating a decoder context allocates the space for several frames of data. + * If the allocation fails here, your program will crash, possibly at some + * future point because the OS kernel returned a valid memory range and will + * only fail when it tries to map the pages in it the first time they are + * used. + * Even if it succeeds, you may experience a denial of service if the frame + * size is large enough to cause excessive paging. + * If you are integrating libtheora in a larger application where such things + * are undesirable, it is highly recommended that you check the frame size in + * \a _info before calling this function and refuse to decode streams where it + * is larger than some reasonable maximum. + * libtheora will not check this for you, because there may be machines that + * can handle such streams and applications that wish to. * \param _info A #th_info struct filled via th_decode_headerin(). * \param _setup A #th_setup_info handle returned via * th_decode_headerin(). @@ -253,7 +270,7 @@ extern int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf, * The player can skip the call to th_decode_ycbcr_out(), * as the contents of the decoded frame buffer have not * changed. - * \retval TH_EFAULT \a _dec or _op was NULL. + * \retval TH_EFAULT \a _dec or \a _op was NULL. * \retval TH_EBADPACKET \a _op does not contain encoded video data. * \retval TH_EIMPL The video data uses bitstream features which this * library does not support.*/ diff --git a/media/libtheora/lib/Makefile.in b/media/libtheora/lib/Makefile.in index eaa056da7372..0e7cf3a9792c 100644 --- a/media/libtheora/lib/Makefile.in +++ b/media/libtheora/lib/Makefile.in @@ -48,12 +48,10 @@ FORCE_STATIC_LIB= 1 DEFINES += -DTHEORA_DISABLE_ENCODE ifeq ($(findstring 86,$(OS_TEST)), 86) -ifneq ($(OS_ARCH),WINNT) ifneq ($(OS_ARCH),SunOS) DEFINES += -DOC_X86ASM -DUSE_ASM endif endif -endif VPATH := $(srcdir) $(srcdir)/dec @@ -62,7 +60,7 @@ CSRCS = \ huffdec.c \ quant.c \ dequant.c \ - bitwise.c \ + bitpack.c \ internal.c \ decinfo.c \ decapiwrapper.c \ @@ -75,6 +73,17 @@ CSRCS = \ $(NULL) ifeq ($(findstring 86,$(OS_TEST)), 86) +ifeq ($(OS_ARCH),WINNT) +VPATH += $(srcdir)/dec/x86_vc + +CSRCS += \ + mmxfrag.c \ + mmxloopfilter.c \ + x86state.c \ + mmxstate.c \ + mmxidct.c \ + $(NULL) +else VPATH += $(srcdir)/dec/x86 CSRCS += \ @@ -84,6 +93,7 @@ CSRCS += \ mmxidct.c \ $(NULL) endif +endif include $(topsrcdir)/config/rules.mk diff --git a/media/libtheora/lib/config.h b/media/libtheora/lib/config.h index 3ac50fe90b86..0cbbbcac3fa3 100644 --- a/media/libtheora/lib/config.h +++ b/media/libtheora/lib/config.h @@ -56,19 +56,19 @@ #define PACKAGE_NAME "libtheora" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "libtheora 1.0beta3" +#define PACKAGE_STRING "libtheora 1.0" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "libtheora" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.0beta3" +#define PACKAGE_VERSION "1.0" /* Define to 1 if you have the ANSI C header files. */ #define STDC_HEADERS 1 /* Define to exclude encode support from the build */ - +/* #undef THEORA_DISABLE_ENCODE */ /* Define to exclude floating point code from the build */ /* #undef THEORA_DISABLE_FLOAT */ @@ -77,4 +77,4 @@ /* Version number of package */ -#define VERSION "1.0beta3" +#define VERSION "1.0" diff --git a/media/libtheora/lib/cpu.c b/media/libtheora/lib/cpu.c index c1b19d8071f0..8da50d0703c8 100644 --- a/media/libtheora/lib/cpu.c +++ b/media/libtheora/lib/cpu.c @@ -6,157 +6,222 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** - + CPU capability detection for x86 processors. Originally written by Rudolf Marek. - + function: - last mod: $Id: cpu.c 14718 2008-04-12 08:36:58Z conrad $ + last mod: $Id: cpu.c 15427 2008-10-21 02:36:19Z xiphmont $ ********************************************************************/ #include "cpu.h" #if !defined(USE_ASM) - -ogg_uint32_t oc_cpu_flags_get(void){ +static ogg_uint32_t oc_cpu_flags_get(void){ return 0; } +#else +# if !defined(_MSC_VER) +# if defined(__amd64__)||defined(__x86_64__) +/*On x86-64, gcc seems to be able to figure out how to save %rbx for us when + compiling with -fPIC.*/ +# define cpuid(_op,_eax,_ebx,_ecx,_edx) \ + __asm__ __volatile__( \ + "cpuid\n\t" \ + :[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \ + :"a"(_op) \ + :"cc" \ + ) +# else +/*On x86-32, not so much.*/ +# define cpuid(_op,_eax,_ebx,_ecx,_edx) \ + __asm__ __volatile__( \ + "xchgl %%ebx,%[ebx]\n\t" \ + "cpuid\n\t" \ + "xchgl %%ebx,%[ebx]\n\t" \ + :[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \ + :"a"(_op) \ + :"cc" \ + ) +# endif +# else +/*Why does MSVC need this complicated rigamarole? + At this point I honestly do not care.*/ -#else /* USE_ASM */ - -# if defined(_MSC_VER) -/* Visual C cpuid helper function. For VS2005 we could - as well use the _cpuid builtin, but that wouldn't work - for VS2003 users, so we do it in inline assembler */ - -static void oc_cpuid_helper (ogg_uint32_t * CpuInfo, ogg_uint32_t op){ - _asm { - mov eax, [op] - mov esi, CpuInfo +/*Visual C cpuid helper function. + For VS2005 we could as well use the _cpuid builtin, but that wouldn't work + for VS2003 users, so we do it in inline assembler.*/ +static void oc_cpuid_helper(ogg_uint32_t _cpu_info[4],ogg_uint32_t _op){ + _asm{ + mov eax,[_op] + mov esi,_cpu_info cpuid - mov [esi + 0], eax - mov [esi + 4], ebx - mov [esi + 8], ecx - mov [esi +12], edx + mov [esi+0],eax + mov [esi+4],ebx + mov [esi+8],ecx + mov [esi+12],edx } } # define cpuid(_op,_eax,_ebx,_ecx,_edx) \ - { \ - ogg_uint32_t nfo[4]; \ - oc_cpuid_helper (nfo, (_op)); \ - (_eax) = nfo[0],(_ebx) = nfo[1]; \ - (_ecx) = nfo[2],(_edx) = nfo[3]; \ + do{ \ + ogg_uint32_t cpu_info[4]; \ + oc_cpuid_helper(cpu_info,_op); \ + (_eax)=cpu_info[0]; \ + (_ebx)=cpu_info[1]; \ + (_ecx)=cpu_info[2]; \ + (_edx)=cpu_info[3]; \ + }while(0) + +static void oc_detect_cpuid_helper(ogg_uint32_t *_eax,ogg_uint32_t *_ebx){ + _asm{ + pushfd + pushfd + pop eax + mov ebx,eax + xor eax,200000h + push eax + popfd + pushfd + pop eax + popfd + mov ecx,_eax + mov [ecx],eax + mov ecx,_ebx + mov [ecx],ebx } +} +# endif -# elif (defined(__amd64__) || defined(__x86_64__)) +static ogg_uint32_t oc_parse_intel_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){ + ogg_uint32_t flags; + /*If there isn't even MMX, give up.*/ + if(!(_edx&0x00800000))return 0; + flags=OC_CPU_X86_MMX; + if(_edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE; + if(_edx&0x04000000)flags|=OC_CPU_X86_SSE2; + if(_ecx&0x00000001)flags|=OC_CPU_X86_PNI; + if(_ecx&0x00000100)flags|=OC_CPU_X86_SSSE3; + if(_ecx&0x00080000)flags|=OC_CPU_X86_SSE4_1; + if(_ecx&0x00100000)flags|=OC_CPU_X86_SSE4_2; + return flags; +} -# define cpuid(_op,_eax,_ebx,_ecx,_edx) \ - __asm__ __volatile__( \ - "push %%rbx\n\t" \ - "cpuid\n\t" \ - "movl %%ebx,%1\n\t" \ - "pop %%rbx\n\t" \ - :"=a" (_eax), \ - "=r" (_ebx), \ - "=c" (_ecx), \ - "=d" (_edx) \ - :"a" (_op) \ - :"cc" \ - ) -# else /* x86_32, GCC */ +static ogg_uint32_t oc_parse_amd_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){ + ogg_uint32_t flags; + /*If there isn't even MMX, give up.*/ + if(!(_edx&0x00800000))return 0; + flags=OC_CPU_X86_MMX; + if(_edx&0x00400000)flags|=OC_CPU_X86_MMXEXT; + if(_edx&0x80000000)flags|=OC_CPU_X86_3DNOW; + if(_edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT; + if(_ecx&0x00000040)flags|=OC_CPU_X86_SSE4A; + if(_ecx&0x00000800)flags|=OC_CPU_X86_SSE5; + return flags; +} -# define cpuid(_op,_eax,_ebx,_ecx,_edx) \ - __asm__ __volatile__( \ - "pushl %%ebx\n\t" \ - "cpuid\n\t" \ - "movl %%ebx,%1\n\t" \ - "popl %%ebx\n\t" \ - :"=a" (_eax), \ - "=r" (_ebx), \ - "=c" (_ecx), \ - "=d" (_edx) \ - :"a" (_op) \ - :"cc" \ - ) - -# endif /* arch switch */ - -ogg_uint32_t oc_cpu_flags_get(void){ - ogg_uint32_t flags = 0; +static ogg_uint32_t oc_cpu_flags_get(void){ + ogg_uint32_t flags; ogg_uint32_t eax; ogg_uint32_t ebx; ogg_uint32_t ecx; ogg_uint32_t edx; - -# if !defined(_MSC_VER) && !defined(__amd64__) && !defined(__x86_64__) - /* check for cpuid */ +# if !defined(__amd64__)&&!defined(__x86_64__) + /*Not all x86-32 chips support cpuid, so we have to check.*/ +# if !defined(_MSC_VER) __asm__ __volatile__( "pushfl\n\t" "pushfl\n\t" - "popl %0\n\t" - "movl %0,%1\n\t" - "xorl $0x200000,%0\n\t" - "pushl %0\n\t" + "popl %[a]\n\t" + "movl %[a],%[b]\n\t" + "xorl $0x200000,%[a]\n\t" + "pushl %[a]\n\t" "popfl\n\t" "pushfl\n\t" - "popl %0\n\t" + "popl %[a]\n\t" "popfl\n\t" - :"=r" (eax), - "=r" (ebx) + :[a]"=r"(eax),[b]"=r"(ebx) : :"cc" ); +# else + oc_detect_cpuid_helper(&eax,&ebx); +# endif /*No cpuid.*/ if(eax==ebx)return 0; -# endif /* GCC, x86_32 */ - +# endif cpuid(0,eax,ebx,ecx,edx); - if(ebx==0x756e6547&&edx==0x49656e69&&ecx==0x6c65746e){ - /*Intel:*/ -inteltest: + /* l e t n I e n i u n e G*/ + if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547|| + /* 6 8 x M T e n i u n e G*/ + ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){ + /*Intel, Transmeta (tested with Crusoe TM5800):*/ cpuid(1,eax,ebx,ecx,edx); - if((edx&0x00800000)==0)return 0; - flags=OC_CPU_X86_MMX; - if(edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE; - if(edx&0x04000000)flags|=OC_CPU_X86_SSE2; + flags=oc_parse_intel_flags(edx,ecx); } - else if(ebx==0x68747541&&edx==0x69746e65&&ecx==0x444d4163 || - ebx==0x646f6547&&edx==0x79622065&&ecx==0x43534e20){ - /*AMD:*/ - /*Geode:*/ + /* D M A c i t n e h t u A*/ + else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541|| + /* C S N y b e d o e G*/ + ecx==0x43534E20&&edx==0x79622065&&ebx==0x646F6547){ + /*AMD, Geode:*/ cpuid(0x80000000,eax,ebx,ecx,edx); - if(eax<0x80000001)goto inteltest; - cpuid(0x80000001,eax,ebx,ecx,edx); - if((edx&0x00800000)==0)return 0; - flags=OC_CPU_X86_MMX; - if(edx&0x80000000)flags|=OC_CPU_X86_3DNOW; - if(edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT; - if(edx&0x00400000)flags|=OC_CPU_X86_MMXEXT; + if(eax<0x80000001)flags=0; + else{ + cpuid(0x80000001,eax,ebx,ecx,edx); + flags=oc_parse_amd_flags(edx,ecx); + } + /*Also check for SSE.*/ + cpuid(1,eax,ebx,ecx,edx); + flags|=oc_parse_intel_flags(edx,ecx); + } + /*Technically some VIA chips can be configured in the BIOS to return any + string here the user wants. + There is a special detection method that can be used to identify such + processors, but in my opinion, if the user really wants to change it, they + deserve what they get.*/ + /* s l u a H r u a t n e C*/ + else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){ + /*VIA:*/ + /*I only have documentation for the C7 (Esther) and Isaiah (forthcoming) + chips (thanks to the engineers from Centaur Technology who provided it). + These chips support Intel-like cpuid info. + The C3-2 (Nehemiah) cores appear to, as well.*/ + cpuid(1,eax,ebx,ecx,edx); + flags=oc_parse_intel_flags(edx,ecx); + cpuid(0x80000000,eax,ebx,ecx,edx); + if(eax>=0x80000001){ + /*The (non-Nehemiah) C3 processors support AMD-like cpuid info. + We need to check this even if the Intel test succeeds to pick up 3DNow! + support on these processors. + Unlike actual AMD processors, we cannot _rely_ on this info, since + some cores (e.g., the 693 stepping of the Nehemiah) claim to support + this function, yet return edx=0, despite the Intel test indicating + MMX support. + Therefore the features detected here are strictly added to those + detected by the Intel test.*/ + /*TODO: How about earlier chips?*/ + cpuid(0x80000001,eax,ebx,ecx,edx); + /*Note: As of the C7, this function returns Intel-style extended feature + flags, not AMD-style. + Currently, this only defines bits 11, 20, and 29 (0x20100800), which + do not conflict with any of the AMD flags we inspect. + For the remaining bits, Intel tells us, "Do not count on their value", + but VIA assures us that they will all be zero (at least on the C7 and + Isaiah chips). + In the (unlikely) event a future processor uses bits 18, 19, 30, or 31 + (0xC0C00000) for something else, we will have to add code to detect + the model to decide when it is appropriate to inspect them.*/ + flags|=oc_parse_amd_flags(edx,ecx); + } } else{ /*Implement me.*/ flags=0; } - -# ifdef DEBUG - if (flags) { - TH_DEBUG("vectorized instruction sets supported:"); - if (flags & OC_CPU_X86_MMX) TH_DEBUG(" mmx"); - if (flags & OC_CPU_X86_MMXEXT) TH_DEBUG(" mmxext"); - if (flags & OC_CPU_X86_SSE) TH_DEBUG(" sse"); - if (flags & OC_CPU_X86_SSE2) TH_DEBUG(" sse2"); - if (flags & OC_CPU_X86_3DNOW) TH_DEBUG(" 3dnow"); - if (flags & OC_CPU_X86_3DNOWEXT) TH_DEBUG(" 3dnowext"); - TH_DEBUG("\n"); - } -# endif - return flags; } - -#endif /* USE_ASM */ +#endif diff --git a/media/libtheora/lib/cpu.h b/media/libtheora/lib/cpu.h index 12ab32dbce3d..efe5e9f52717 100644 --- a/media/libtheora/lib/cpu.h +++ b/media/libtheora/lib/cpu.h @@ -6,11 +6,11 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: cpu.h 13884 2007-09-22 08:38:10Z giles $ + last mod: $Id: cpu.h 15430 2008-10-21 05:03:55Z giles $ ********************************************************************/ @@ -18,13 +18,17 @@ # define _x86_cpu_H (1) #include "internal.h" -#define OC_CPU_X86_MMX (1<<0) -#define OC_CPU_X86_3DNOW (1<<1) +#define OC_CPU_X86_MMX (1<<0) +#define OC_CPU_X86_3DNOW (1<<1) #define OC_CPU_X86_3DNOWEXT (1<<2) -#define OC_CPU_X86_MMXEXT (1<<3) -#define OC_CPU_X86_SSE (1<<4) -#define OC_CPU_X86_SSE2 (1<<5) - -ogg_uint32_t oc_cpu_flags_get(void); +#define OC_CPU_X86_MMXEXT (1<<3) +#define OC_CPU_X86_SSE (1<<4) +#define OC_CPU_X86_SSE2 (1<<5) +#define OC_CPU_X86_PNI (1<<6) +#define OC_CPU_X86_SSSE3 (1<<7) +#define OC_CPU_X86_SSE4_1 (1<<8) +#define OC_CPU_X86_SSE4_2 (1<<9) +#define OC_CPU_X86_SSE4A (1<<10) +#define OC_CPU_X86_SSE5 (1<<11) #endif diff --git a/media/libtheora/lib/dec/apiwrapper.c b/media/libtheora/lib/dec/apiwrapper.c index d44313df57d3..aa4416d7f17c 100644 --- a/media/libtheora/lib/dec/apiwrapper.c +++ b/media/libtheora/lib/dec/apiwrapper.c @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: apiwrapper.c 14321 2007-12-22 18:09:29Z tterribe $ + last mod: $Id: apiwrapper.c 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ diff --git a/media/libtheora/lib/dec/apiwrapper.h b/media/libtheora/lib/dec/apiwrapper.h index 65dc72f726ce..211021fc08ef 100644 --- a/media/libtheora/lib/dec/apiwrapper.h +++ b/media/libtheora/lib/dec/apiwrapper.h @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** diff --git a/media/libtheora/lib/dec/bitpack.c b/media/libtheora/lib/dec/bitpack.c new file mode 100644 index 000000000000..3836150c24c6 --- /dev/null +++ b/media/libtheora/lib/dec/bitpack.c @@ -0,0 +1,121 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: packing variable sized words into an octet stream + last mod: $Id: bitpack.c 15400 2008-10-15 12:10:58Z tterribe $ + + ********************************************************************/ + +/*We're 'MSb' endian; if we write a word but read individual bits, + then we'll read the MSb first.*/ + +#include +#include +#include "bitpack.h" + +void theorapackB_readinit(oggpack_buffer *_b,unsigned char *_buf,int _bytes){ + memset(_b,0,sizeof(*_b)); + _b->buffer=_b->ptr=_buf; + _b->storage=_bytes; +} + +int theorapackB_look1(oggpack_buffer *_b,long *_ret){ + if(_b->endbyte>=_b->storage){ + *_ret=0L; + return -1; + } + *_ret=(_b->ptr[0]>>7-_b->endbit)&1; + return 0; +} + +void theorapackB_adv1(oggpack_buffer *_b){ + if(++(_b->endbit)>7){ + _b->endbit=0; + _b->ptr++; + _b->endbyte++; + } +} + +/*Here we assume that 0<=_bits&&_bits<=32.*/ +int theorapackB_read(oggpack_buffer *_b,int _bits,long *_ret){ + long ret; + long m; + long d; + int fail; + m=32-_bits; + _bits+=_b->endbit; + d=_b->storage-_b->endbyte; + if(d<=4){ + /*Not the main path.*/ + if(d*8<_bits){ + *_ret=0L; + fail=-1; + goto overflow; + } + /*Special case to avoid reading _b->ptr[0], which might be past the end of + the buffer; also skips some useless accounting.*/ + else if(!_bits){ + *_ret=0L; + return 0; + } + } + ret=_b->ptr[0]<<24+_b->endbit; + if(_bits>8){ + ret|=_b->ptr[1]<<16+_b->endbit; + if(_bits>16){ + ret|=_b->ptr[2]<<8+_b->endbit; + if(_bits>24){ + ret|=_b->ptr[3]<<_b->endbit; + if(_bits>32)ret|=_b->ptr[4]>>8-_b->endbit; + } + } + } + *_ret=((ret&0xFFFFFFFFUL)>>(m>>1))>>(m+1>>1); + fail=0; +overflow: + _b->ptr+=_bits>>3; + _b->endbyte+=_bits>>3; + _b->endbit=_bits&7; + return fail; +} + +int theorapackB_read1(oggpack_buffer *_b,long *_ret){ + int fail; + if(_b->endbyte>=_b->storage){ + /*Not the main path.*/ + *_ret=0L; + fail=-1; + } + else{ + *_ret=(_b->ptr[0]>>7-_b->endbit)&1; + fail=0; + } + _b->endbit++; + if(_b->endbit>7){ + _b->endbit=0; + _b->ptr++; + _b->endbyte++; + } + return fail; +} + +long theorapackB_bytes(oggpack_buffer *_b){ + return _b->endbyte+(_b->endbit+7>>3); +} + +long theorapackB_bits(oggpack_buffer *_b){ + return _b->endbyte*8+_b->endbit; +} + +unsigned char *theorapackB_get_buffer(oggpack_buffer *_b){ + return _b->buffer; +} diff --git a/media/libtheora/lib/dec/bitpack.h b/media/libtheora/lib/dec/bitpack.h new file mode 100644 index 000000000000..1bff3fa5025d --- /dev/null +++ b/media/libtheora/lib/dec/bitpack.h @@ -0,0 +1,38 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: packing variable sized words into an octet stream + last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $ + + ********************************************************************/ +#if !defined(_bitpack_H) +# define _bitpack_H (1) +# include + +void theorapackB_readinit(oggpack_buffer *_b,unsigned char *_buf,int _bytes); +int theorapackB_look1(oggpack_buffer *_b,long *_ret); +void theorapackB_adv1(oggpack_buffer *_b); +/*Here we assume 0<=_bits&&_bits<=32.*/ +int theorapackB_read(oggpack_buffer *_b,int _bits,long *_ret); +int theorapackB_read1(oggpack_buffer *_b,long *_ret); +long theorapackB_bytes(oggpack_buffer *_b); +long theorapackB_bits(oggpack_buffer *_b); +unsigned char *theorapackB_get_buffer(oggpack_buffer *_b); + +/*These two functions are implemented locally in huffdec.c*/ +/*Read in bits without advancing the bitptr. + Here we assume 0<=_bits&&_bits<=32.*/ +/*static int theorapackB_look(oggpack_buffer *_b,int _bits,long *_ret);*/ +/*static void theorapackB_adv(oggpack_buffer *_b,int _bits);*/ + + +#endif diff --git a/media/libtheora/lib/dec/bitwise.c b/media/libtheora/lib/dec/bitwise.c deleted file mode 100644 index be118d1b08bc..000000000000 --- a/media/libtheora/lib/dec/bitwise.c +++ /dev/null @@ -1,126 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: packing variable sized words into an octet stream - last mod: $Id: bitwise.c 14546 2008-02-29 01:14:05Z tterribe $ - - ********************************************************************/ - -/* We're 'MSb' endian; if we write a word but read individual bits, - then we'll read the msb first */ - -#include -#include -#include "bitwise.h" - -void theorapackB_reset(oggpack_buffer *b){ - b->ptr=b->buffer; - b->buffer[0]=0; - b->endbit=b->endbyte=0; -} - -void theorapackB_readinit(oggpack_buffer *b,unsigned char *buf,int bytes){ - memset(b,0,sizeof(*b)); - b->buffer=b->ptr=buf; - b->storage=bytes; -} - -int theorapackB_look1(oggpack_buffer *b,long *_ret){ - if(b->endbyte>=b->storage){ - *_ret=0L; - return -1; - } - *_ret=((b->ptr[0]>>(7-b->endbit))&1); - return 0; -} - -void theorapackB_adv1(oggpack_buffer *b){ - if(++(b->endbit)>7){ - b->endbit=0; - b->ptr++; - b->endbyte++; - } -} - -/* bits <= 32 */ -int theorapackB_read(oggpack_buffer *b,int bits,long *_ret){ - long ret; - long m; - int fail; - m=32-bits; - bits+=b->endbit; - if(b->endbyte+4>=b->storage){ - /* not the main path */ - if(b->endbyte*8+bits>b->storage*8){ - *_ret=0L; - fail=-1; - goto overflow; - } - /* special case to avoid reading b->ptr[0], which might be past the end of - the buffer; also skips some useless accounting */ - else if(!bits){ - *_ret=0L; - return 0; - } - } - ret=b->ptr[0]<<(24+b->endbit); - if(bits>8){ - ret|=b->ptr[1]<<(16+b->endbit); - if(bits>16){ - ret|=b->ptr[2]<<(8+b->endbit); - if(bits>24){ - ret|=b->ptr[3]<<(b->endbit); - if(bits>32 && b->endbit) - ret|=b->ptr[4]>>(8-b->endbit); - } - } - } - *_ret=((ret&0xffffffffUL)>>(m>>1))>>((m+1)>>1); - fail=0; -overflow: - b->ptr+=bits/8; - b->endbyte+=bits/8; - b->endbit=bits&7; - return fail; -} - -int theorapackB_read1(oggpack_buffer *b,long *_ret){ - int fail; - if(b->endbyte>=b->storage){ - /* not the main path */ - *_ret=0L; - fail=-1; - goto overflow; - } - *_ret=(b->ptr[0]>>(7-b->endbit))&1; - fail=0; -overflow: - b->endbit++; - if(b->endbit>7){ - b->endbit=0; - b->ptr++; - b->endbyte++; - } - return fail; -} - -long theorapackB_bytes(oggpack_buffer *b){ - return(b->endbyte+(b->endbit+7)/8); -} - -long theorapackB_bits(oggpack_buffer *b){ - return(b->endbyte*8+b->endbit); -} - -unsigned char *theorapackB_get_buffer(oggpack_buffer *b){ - return(b->buffer); -} diff --git a/media/libtheora/lib/dec/bitwise.h b/media/libtheora/lib/dec/bitwise.h deleted file mode 100644 index bce0c4de4b8e..000000000000 --- a/media/libtheora/lib/dec/bitwise.h +++ /dev/null @@ -1,76 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: packing variable sized words into an octet stream - last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $ - - ********************************************************************/ -#if !defined(_bitwise_H) -# define _bitwise_H (1) -# include - -void theorapackB_reset(oggpack_buffer *b); -void theorapackB_readinit(oggpack_buffer *b,unsigned char *buf,int bytes); -/* Read in bits without advancing the bitptr; bits <= 32 */ -static int theorapackB_look(oggpack_buffer *b,int bits,long *_ret); -int theorapackB_look1(oggpack_buffer *b,long *_ret); -static void theorapackB_adv(oggpack_buffer *b,int bits); -void theorapackB_adv1(oggpack_buffer *b); -/* bits <= 32 */ -int theorapackB_read(oggpack_buffer *b,int bits,long *_ret); -int theorapackB_read1(oggpack_buffer *b,long *_ret); -long theorapackB_bytes(oggpack_buffer *b); -long theorapackB_bits(oggpack_buffer *b); -unsigned char *theorapackB_get_buffer(oggpack_buffer *b); - -/*These two functions are only used in one place, and declaring them static so - they can be inlined saves considerable function call overhead.*/ - -/* Read in bits without advancing the bitptr; bits <= 32 */ -static int theorapackB_look(oggpack_buffer *b,int bits,long *_ret){ - long ret; - long m; - m=32-bits; - bits+=b->endbit; - if(b->endbyte+4>=b->storage){ - /* not the main path */ - if(b->endbyte>=b->storage){ - *_ret=0L; - return -1; - } - /*If we have some bits left, but not enough, return the ones we have.*/ - if((b->storage-b->endbyte)*8storage-b->endbyte)*8; - } - ret=b->ptr[0]<<(24+b->endbit); - if(bits>8){ - ret|=b->ptr[1]<<(16+b->endbit); - if(bits>16){ - ret|=b->ptr[2]<<(8+b->endbit); - if(bits>24){ - ret|=b->ptr[3]<<(b->endbit); - if(bits>32&&b->endbit) - ret|=b->ptr[4]>>(8-b->endbit); - } - } - } - *_ret=((ret&0xffffffff)>>(m>>1))>>((m+1)>>1); - return 0; -} - -static void theorapackB_adv(oggpack_buffer *b,int bits){ - bits+=b->endbit; - b->ptr+=bits/8; - b->endbyte+=bits/8; - b->endbit=bits&7; -} - -#endif diff --git a/media/libtheora/lib/dec/dct.h b/media/libtheora/lib/dec/dct.h index 8562ad161a5e..09043dc511b9 100644 --- a/media/libtheora/lib/dec/dct.h +++ b/media/libtheora/lib/dec/dct.h @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: dct.h 13884 2007-09-22 08:38:10Z giles $ + last mod: $Id: dct.h 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ diff --git a/media/libtheora/lib/dec/decapiwrapper.c b/media/libtheora/lib/dec/decapiwrapper.c index 855929cddadc..bceec6c26c9b 100644 --- a/media/libtheora/lib/dec/decapiwrapper.c +++ b/media/libtheora/lib/dec/decapiwrapper.c @@ -6,7 +6,7 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** @@ -29,10 +29,6 @@ static void th_dec_api_clear(th_api_wrapper *_api){ static void theora_decode_clear(theora_state *_td){ if(_td->i!=NULL)theora_info_clear(_td->i); -#ifdef _TH_DEBUG_ - fclose(debugout); - debugout=NULL; -#endif memset(_td,0,sizeof(*_td)); } @@ -92,7 +88,6 @@ int theora_decode_init(theora_state *_td,theora_info *_ci){ th_api_info *apiinfo; th_api_wrapper *api; th_info info; - api=(th_api_wrapper *)_ci->codec_setup; /*Allocate our own combined API wrapper/theora_info struct. We put them both in one malloc'd block so that when the API wrapper is @@ -130,11 +125,6 @@ int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){ th_api_wrapper *api; th_info info; int ret; - -#ifdef _TH_DEBUG_ - debugout = fopen("theoradec-debugout.txt","w"); -#endif - api=(th_api_wrapper *)_ci->codec_setup; /*Allocate an API wrapper struct on demand, since it will not also include a theora_info struct like the ones that are used in a theora_state struct.*/ @@ -167,16 +157,9 @@ int theora_decode_packetin(theora_state *_td,ogg_packet *_op){ th_api_wrapper *api; ogg_int64_t gp; int ret; - - if(!_td || !_td->i || !_td->i->codec_setup)return OC_FAULT; + if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT; api=(th_api_wrapper *)_td->i->codec_setup; - if(!api || !api->decode)return OC_FAULT; ret=th_decode_packetin(api->decode,_op,&gp); - -#ifdef _TH_DEBUG_ - dframe++; -#endif - if(ret<0)return OC_BADPACKET; _td->granulepos=gp; return 0; @@ -186,10 +169,9 @@ int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){ th_api_wrapper *api; th_ycbcr_buffer buf; int ret; - - if(!_td || !_td->i || !_td->i->codec_setup)return OC_FAULT; + if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT; api=(th_api_wrapper *)_td->i->codec_setup; - if(!api || !api->decode)return OC_FAULT; + if(!api->decode)return OC_FAULT; ret=th_decode_ycbcr_out(api->decode,buf); if(ret>=0){ _yuv->y_width=buf[0].width; @@ -202,6 +184,5 @@ int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){ _yuv->u=buf[1].data; _yuv->v=buf[2].data; } - return ret; } diff --git a/media/libtheora/lib/dec/decinfo.c b/media/libtheora/lib/dec/decinfo.c index f46320c74fa1..3c4ba868a6d9 100644 --- a/media/libtheora/lib/dec/decinfo.c +++ b/media/libtheora/lib/dec/decinfo.c @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: decinfo.c 14719 2008-04-12 11:36:40Z tterribe $ + last mod: $Id: decinfo.c 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ diff --git a/media/libtheora/lib/dec/decint.h b/media/libtheora/lib/dec/decint.h index 656006897ed8..7924c0e0c590 100644 --- a/media/libtheora/lib/dec/decint.h +++ b/media/libtheora/lib/dec/decint.h @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: decint.h 14369 2008-01-05 23:15:32Z tterribe $ + last mod: $Id: decint.h 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ @@ -20,7 +20,7 @@ # define _decint_H (1) # include "theora/theoradec.h" # include "../internal.h" -# include "bitwise.h" +# include "bitpack.h" typedef struct th_setup_info oc_setup_info; typedef struct th_dec_ctx oc_dec_ctx; @@ -47,45 +47,45 @@ struct th_setup_info{ struct th_dec_ctx{ /*Shared encoder/decoder state.*/ - oc_theora_state state; + oc_theora_state state; /*Whether or not packets are ready to be emitted. This takes on negative values while there are remaining header packets to be emitted, reaches 0 when the codec is ready for input, and goes to 1 when a frame has been processed and a data packet is ready.*/ - int packet_state; + int packet_state; /*Buffer in which to assemble packets.*/ - oggpack_buffer opb; + oggpack_buffer opb; /*Huffman decode trees.*/ - oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES]; + oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES]; /*The index of one past the last token in each plane for each coefficient. The final entries are the total number of tokens for each coefficient.*/ - int ti0[3][64]; + int ti0[3][64]; /*The index of one past the last extra bits entry in each plane for each coefficient. The final entries are the total number of extra bits entries for each coefficient.*/ - int ebi0[3][64]; + int ebi0[3][64]; /*The number of outstanding EOB runs at the start of each coefficient in each plane.*/ - int eob_runs[3][64]; + int eob_runs[3][64]; /*The DCT token lists.*/ - unsigned char **dct_tokens; + unsigned char **dct_tokens; /*The extra bits associated with DCT tokens.*/ - ogg_uint16_t **extra_bits; + ogg_uint16_t **extra_bits; /*The out-of-loop post-processing level.*/ - int pp_level; + int pp_level; /*The DC scale used for out-of-loop deblocking.*/ - int pp_dc_scale[64]; + int pp_dc_scale[64]; /*The sharpen modifier used for out-of-loop deringing.*/ - int pp_sharp_mod[64]; + int pp_sharp_mod[64]; /*The DC quantization index of each block.*/ - unsigned char *dc_qis; + unsigned char *dc_qis; /*The variance of each block.*/ - int *variances; + int *variances; /*The storage for the post-processed frame buffer.*/ - unsigned char *pp_frame_data; + unsigned char *pp_frame_data; /*Whether or not the post-processsed frame buffer has space for chroma.*/ - int pp_frame_has_chroma; + int pp_frame_has_chroma; /*The buffer used for the post-processed frame.*/ th_ycbcr_buffer pp_frame_buf; /*The striped decode callback function.*/ diff --git a/media/libtheora/lib/dec/decode.c b/media/libtheora/lib/dec/decode.c index c9ac408c19ed..bedacd15a03d 100644 --- a/media/libtheora/lib/dec/decode.c +++ b/media/libtheora/lib/dec/decode.c @@ -6,13 +6,13 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: decode.c 14385 2008-01-09 19:53:18Z giles $ - + last mod: $Id: decode.c 15403 2008-10-16 12:44:05Z tterribe $ + ********************************************************************/ #include @@ -170,7 +170,7 @@ static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info, _dec->state.dequant_table_data[qti][pli]; } oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale, - &_setup->qinfo); + &_setup->qinfo); for(qi=0;qi<64;qi++){ int qsum; qsum=0; @@ -210,38 +210,28 @@ static void oc_dec_clear(oc_dec_ctx *_dec){ static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){ long val; - - TH_DEBUG("\n>>>> beginning frame %ld\n\n",dframe); - /*Check to make sure this is a data packet.*/ theorapackB_read1(&_dec->opb,&val); - TH_DEBUG("frame type = %s, ",val==0?"video":"unknown"); if(val!=0)return TH_EBADPACKET; /*Read in the frame type (I or P).*/ theorapackB_read1(&_dec->opb,&val); _dec->state.frame_type=(int)val; - TH_DEBUG("%s\n",val?"predicted":"key"); /*Read in the current qi.*/ theorapackB_read(&_dec->opb,6,&val); _dec->state.qis[0]=(int)val; - TH_DEBUG("frame quality = { %ld ",val); theorapackB_read1(&_dec->opb,&val); if(!val)_dec->state.nqis=1; else{ theorapackB_read(&_dec->opb,6,&val); _dec->state.qis[1]=(int)val; - TH_DEBUG("%ld ",val); theorapackB_read1(&_dec->opb,&val); if(!val)_dec->state.nqis=2; else{ theorapackB_read(&_dec->opb,6,&val); - TH_DEBUG("%ld ",val); _dec->state.qis[2]=(int)val; _dec->state.nqis=3; } } - TH_DEBUG("}\n"); - if(_dec->state.frame_type==OC_INTRA_FRAME){ /*Keyframes have 3 unused configuration bits, holdovers from VP3 days. Most of the other unused bits in the VP3 headers were eliminated. @@ -305,7 +295,6 @@ static int oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){ int run_count; theorapackB_read1(&_dec->opb,&val); flag=(int)val; - sb=_dec->state.sbs; sb_end=sb+_dec->state.nsbs; run_count=npartial=0; @@ -319,7 +308,6 @@ static int oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){ npartial+=flag; sb++; } - while(--run_count>0&&sbopb,&val); @@ -349,7 +337,6 @@ static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){ for(;sb->coded_partially;sb++); theorapackB_read1(&_dec->opb,&val); flag=(int)val; - while(sbopb); @@ -428,71 +415,6 @@ static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){ } /*TODO: run_count should be 0 here. If it's not, we should issue a warning of some kind.*/ - - -#ifdef _TH_DEBUG_ - // assuming 4:2:0 right now; THIS IS WRONG but only an issue if dumping debug info - TH_DEBUG("predicted (partially coded frame)\n"); - TH_DEBUG("superblock coded flags = {"); - int x,y,i; - int w = _dec->state.info.frame_width; - int h = _dec->state.info.frame_height; - - i=0; - for(y=0;y< (h+31)/32;y++){ - TH_DEBUG("\n "); - for(x=0;x< (w+31)/32;x++,i++) - TH_DEBUG("%x", (_dec->state.sbs[i].coded_partially!=0)| - (_dec->state.sbs[i].coded_fully)); - } - - TH_DEBUG("\n "); - for(y=0;y< (h+63)/64;y++){ - TH_DEBUG("\n "); - for(x=0;x< (w+63)/64;x++,i++) - TH_DEBUG("%x", (_dec->state.sbs[i].coded_partially!=0)| - (_dec->state.sbs[i].coded_fully)); - } - TH_DEBUG("\n "); - for(y=0;y< (h+63)/64;y++){ - TH_DEBUG("\n "); - for(x=0;x< (w+63)/64;x++,i++) - TH_DEBUG("%x", (_dec->state.sbs[i].coded_partially!=0)| - (_dec->state.sbs[i].coded_fully)); - } - TH_DEBUG("\n}\n"); - - if(i!=_dec->state.nsbs) - TH_DEBUG("WARNING! superblock count, raster %d != flat %d\n", - i,_dec->state.nsbs); - - TH_DEBUG("block coded flags = {"); - - i=0; - for(y=0;y< (h+7)/8;y++){ - TH_DEBUG("\n "); - for(x=0;x< (w+7)/8;x++,i++) - TH_DEBUG("%x", (_dec->state.frags[i].coded!=0)); - } - TH_DEBUG("\n "); - for(y=0;y< (h+15)/16;y++){ - TH_DEBUG("\n "); - for(x=0;x< (w+15)/16;x++,i++) - TH_DEBUG("%x", (_dec->state.frags[i].coded!=0)); - } - TH_DEBUG("\n "); - for(y=0;y< (h+15)/16;y++){ - TH_DEBUG("\n "); - for(x=0;x< (w+15)/16;x++,i++) - TH_DEBUG("%x", (_dec->state.frags[i].coded!=0)); - } - TH_DEBUG("\n}\n"); - - if(i!=_dec->state.nfrags) - TH_DEBUG("WARNING! block count, raster %d != flat %d\n", - i,_dec->state.nfrags); -#endif - } @@ -521,62 +443,42 @@ static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){ oc_mb *mb; oc_mb *mb_end; const int *alphabet; - long val,j; + long val; int scheme0_alphabet[8]; int mode_scheme; theorapackB_read(&_dec->opb,3,&val); mode_scheme=(int)val; - TH_DEBUG("mode encode scheme = %d\n",(int)val); - if(mode_scheme==0){ int mi; /*Just in case, initialize the modes to something. If the bitstream doesn't contain each index exactly once, it's likely corrupt and the rest of the packet is garbage anyway, but this way we won't crash, and we'll decode SOMETHING.*/ - TH_DEBUG("mode scheme list = { "); /*LOOP VECTORIZES.*/ for(mi=0;miopb,3,&val); scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi]; - TH_DEBUG("%d ",(int)val); } - TH_DEBUG("}\n"); alphabet=scheme0_alphabet; - }else - alphabet=OC_MODE_ALPHABETS[mode_scheme-1]; - if(mode_scheme==7) - mode_unpack=oc_clc_mode_unpack; - else - mode_unpack=oc_vlc_mode_unpack; + } + else alphabet=OC_MODE_ALPHABETS[mode_scheme-1]; + if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack; + else mode_unpack=oc_vlc_mode_unpack; mb=_dec->state.mbs; mb_end=mb+_dec->state.nmbs; - - TH_DEBUG("mode list = { "); - for(j=0;mbmode!=OC_MODE_INVALID){ int bi; for(bi=0;bi<4;bi++){ - int fragi; - fragi=mb->map[0][bi]; - if(fragi>=0&&_dec->state.frags[fragi].coded)break; + int fragi; + fragi=mb->map[0][bi]; + if(fragi>=0&&_dec->state.frags[fragi].coded)break; } - if(bi<4){ - mb->mode=alphabet[(*mode_unpack)(&_dec->opb)]; - -#ifdef _TH_DEBUG_ - if((j&0x1f)==0) - TH_DEBUG("\n "); - TH_DEBUG("%d ",mb->mode); - j++; -#endif - - }else - mb->mode=OC_MODE_INTER_NOMV; + if(bi<4)mb->mode=alphabet[(*mode_unpack)(&_dec->opb)]; + else mb->mode=OC_MODE_INTER_NOMV; } } - TH_DEBUG("\n}\n"); } @@ -629,23 +531,16 @@ static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){ const int *map_idxs; long val; int map_nidxs; -#ifdef _TH_DEBUG_ - int j=0; -#endif oc_mv last_mv[2]; oc_mv cbmvs[4]; set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt]; theorapackB_read1(&_dec->opb,&val); - TH_DEBUG("motion vector table = %d\n",(int)val); mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack; map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt]; map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt]; memset(last_mv,0,sizeof(last_mv)); mb=_dec->state.mbs; mb_end=mb+_dec->state.nmbs; - - TH_DEBUG("motion vectors = {"); - for(;mbmode!=OC_MODE_INVALID){ oc_fragment *frag; oc_mv mbmv; @@ -667,98 +562,62 @@ static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){ if(ncoded<=0)continue; mb_mode=mb->mode; switch(mb_mode){ - case OC_MODE_INTER_MV_FOUR: - { - oc_mv lbmvs[4]; - int bi; - /*Mark the tail of the list, so we don't accidentally go past it.*/ - coded[ncoded]=-1; - for(bi=codedi=0;bi<4;bi++){ - if(coded[codedi]==bi){ - codedi++; - frag=_dec->state.frags+mb->map[0][bi]; - frag->mbmode=mb_mode; - frag->mv[0]=lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb); - frag->mv[1]=lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); - -#ifdef _TH_DEBUG_ - if((j&0x7)==0) - TH_DEBUG("\n "); - TH_DEBUG("%+03d,%+03d ",frag->mv[0],frag->mv[1]); - j++; -#endif - - } - else lbmvs[bi][0]=lbmvs[bi][1]=0; - } - if(codedi>0){ - last_mv[1][0]=last_mv[0][0]; - last_mv[1][1]=last_mv[0][1]; - last_mv[0][0]=lbmvs[coded[codedi-1]][0]; - last_mv[0][1]=lbmvs[coded[codedi-1]][1]; - } - if(codedistate.frags+mb->map[mapi>>2][bi]; - frag->mbmode=mb_mode; - frag->mv[0]=cbmvs[bi][0]; - frag->mv[1]=cbmvs[bi][1]; - } - } - } - break; - case OC_MODE_INTER_MV: - { - last_mv[1][0]=last_mv[0][0]; - last_mv[1][1]=last_mv[0][1]; - mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb); - mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); - -#ifdef _TH_DEBUG_ - if((j&0x7)==0) - TH_DEBUG("\n "); - TH_DEBUG("%+03d,%+03d ",mbmv[0],mbmv[1]); - j++; -#endif - - } - break; - case OC_MODE_INTER_MV_LAST: - { + case OC_MODE_INTER_MV_FOUR:{ + oc_mv lbmvs[4]; + int bi; + /*Mark the tail of the list, so we don't accidentally go past it.*/ + coded[ncoded]=-1; + for(bi=codedi=0;bi<4;bi++){ + if(coded[codedi]==bi){ + codedi++; + frag=_dec->state.frags+mb->map[0][bi]; + frag->mbmode=mb_mode; + frag->mv[0]=lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb); + frag->mv[1]=lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); + } + else lbmvs[bi][0]=lbmvs[bi][1]=0; + } + if(codedi>0){ + last_mv[1][0]=last_mv[0][0]; + last_mv[1][1]=last_mv[0][1]; + last_mv[0][0]=lbmvs[coded[codedi-1]][0]; + last_mv[0][1]=lbmvs[coded[codedi-1]][1]; + } + if(codedistate.frags+mb->map[mapi>>2][bi]; + frag->mbmode=mb_mode; + frag->mv[0]=cbmvs[bi][0]; + frag->mv[1]=cbmvs[bi][1]; + } + } + }break; + case OC_MODE_INTER_MV:{ + last_mv[1][0]=last_mv[0][0]; + last_mv[1][1]=last_mv[0][1]; + mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb); + mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb); + }break; + case OC_MODE_INTER_MV_LAST:{ mbmv[0]=last_mv[0][0]; mbmv[1]=last_mv[0][1]; - } - break; - case OC_MODE_INTER_MV_LAST2: - { + }break; + case OC_MODE_INTER_MV_LAST2:{ mbmv[0]=last_mv[1][0]; mbmv[1]=last_mv[1][1]; last_mv[1][0]=last_mv[0][0]; last_mv[1][1]=last_mv[0][1]; last_mv[0][0]=mbmv[0]; last_mv[0][1]=mbmv[1]; - } - break; - case OC_MODE_GOLDEN_MV: - { + }break; + case OC_MODE_GOLDEN_MV:{ mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb); mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb); - -#ifdef _TH_DEBUG_ - if((j&0x7)==0) - TH_DEBUG("\n "); - TH_DEBUG("%+03d,%+03d ",mbmv[0],mbmv[1]); - j++; -#endif - - } - break; - default: - mbmv[0]=mbmv[1]=0; - break; + }break; + default:mbmv[0]=mbmv[1]=0;break; } /*4MV mode fills in the fragments itself. For all other modes we can use this common code.*/ @@ -773,9 +632,6 @@ static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){ } } } - - TH_DEBUG("\n}\n"); - } static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){ @@ -798,7 +654,7 @@ static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){ else{ long val; int flag; - int nqi0; + int nqi1; int run_count; /*Otherwise, we decode a qi index for each fragment, using two passes of the same binary RLE scheme used for super-block coded bits. @@ -810,14 +666,14 @@ static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){ with the corresponding qi's for this frame.*/ theorapackB_read1(&_dec->opb,&val); flag=(int)val; - run_count=nqi0=0; + run_count=nqi1=0; while(coded_fragiopb); full_run=run_count>=4129; do{ _dec->state.frags[*coded_fragi++].qi=flag; - nqi0+=!flag; + nqi1+=flag; } while(--run_count>0&&coded_fragistate.nqis==3&&nqi0state.nqis==3&&nqi1>0){ /*Skip qii==0 fragments.*/ for(coded_fragi=_dec->state.coded_fragis; _dec->state.frags[*coded_fragi].qi==0;coded_fragi++); @@ -1362,7 +1218,7 @@ static int oc_dec_postprocess_init(oc_dec_ctx *_dec){ if(_dec->pp_levelvariances=(int *)_ogg_realloc(_dec->variances, _dec->state.fplanes[0].nfrags*sizeof(_dec->variances[0])); - _dec->pp_frame_data=(unsigned char *)_ogg_realloc( + _dec->pp_frame_data=(unsigned char *)_ogg_realloc( _dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0])); _dec->pp_frame_buf[0].width=_dec->state.info.frame_width; _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; @@ -1382,7 +1238,7 @@ static int oc_dec_postprocess_init(oc_dec_ctx *_dec){ c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2); c_sz=c_w*c_h; frame_sz+=c_sz<<1; - _dec->pp_frame_data=(unsigned char *)_ogg_realloc( + _dec->pp_frame_data=(unsigned char *)_ogg_realloc( _dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0])); _dec->pp_frame_buf[0].width=_dec->state.info.frame_width; _dec->pp_frame_buf[0].height=_dec->state.info.frame_height; @@ -1503,9 +1359,6 @@ static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec, for(fragy=fragy0;fragynhfrags;fragx++,frag++){ if(!frag->coded)continue; -#ifdef _TH_DEBUG_ - frag->quant[0] = frag->dc; /* stash un-predicted dc for debug output */ -#endif pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc+= oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last); ncoded_fragis++; @@ -1597,40 +1450,6 @@ static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec, _pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli]; oc_state_frag_copy(&_dec->state,_pipe->uncoded_fragis[_pli], _pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli); - -#ifdef _TH_DEBUG_ - { - int i,j,k; - int framei=_dec->state.ref_frame_idx[OC_FRAME_SELF]; - int ystride=_dec->state.ref_frame_bufs[framei][_pli].stride; - int *fragi_end = _pipe->coded_fragis[_pli]; - int *fragi = fragi_end-_pipe->ncoded_fragis[_pli]; - - for(;fragistate.frags+*fragi; - unsigned char *src=frag->buffer[framei]; - for(i=0,j=0;j<8;j++){ - for(k=0;k<8;k++,i++) - frag->recon[i] = src[k]; - src+=ystride; - } - } - - fragi = _pipe->uncoded_fragis[_pli]; - fragi_end = fragi+_pipe->nuncoded_fragis[_pli]; - - for(;fragistate.frags+*fragi; - unsigned char *src=frag->buffer[framei]; - for(i=0,j=0;j<8;j++){ - for(k=0;k<8;k++,i++) - frag->recon[i] = src[k]; - src+=ystride; - } - } - } -#endif - } /*Filter a horizontal block edge.*/ @@ -1909,7 +1728,7 @@ static void oc_dering_block(unsigned char *_idata,int _ystride,int _b, static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img, int _pli,int _fragy0,int _fragy_end){ - th_img_plane *iplane; + th_img_plane *iplane; oc_fragment_plane *fplane; oc_fragment *frag; int *variance; @@ -1941,10 +1760,10 @@ static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img, if(strong&&var>sthresh){ oc_dering_block(idata+x,iplane->stride,b, _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); - if(_pli||(b&1)&&*(variance-1)>OC_DERING_THRESH4|| - (b&2)&&variance[1]>OC_DERING_THRESH4|| - (b&4)&&*(variance-fplane->nvfrags)>OC_DERING_THRESH4|| - (b&8)&&variance[fplane->nvfrags]>OC_DERING_THRESH4){ + if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4|| + !(b&2)&&variance[1]>OC_DERING_THRESH4|| + !(b&4)&&*(variance-fplane->nvfrags)>OC_DERING_THRESH4|| + !(b&8)&&variance[fplane->nvfrags]>OC_DERING_THRESH4){ oc_dering_block(idata+x,iplane->stride,b, _dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1); oc_dering_block(idata+x,iplane->stride,b, @@ -2039,7 +1858,6 @@ int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op, /*A completely empty packet indicates a dropped frame and is treated exactly like an inter frame with no coded blocks. Only proceed if we have a non-empty packet.*/ - if(_op->bytes!=0){ oc_dec_pipeline_state pipe; th_ycbcr_buffer stripe_buf; @@ -2093,7 +1911,6 @@ int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op, } oc_dec_block_qis_unpack(_dec); oc_dec_residual_tokens_unpack(_dec); - /*Update granule position. This must be done before the striped decode callbacks so that the application knows what to do with the frame data.*/ @@ -2203,91 +2020,6 @@ int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op, } notstart=1; } - -#ifdef _TH_DEBUG_ - { - int x,y,i,j,k,xn,yn; - int plane; - int buf; - - /* dump fragment DCT components */ - for(plane=0;plane<3;plane++){ - char *plstr; - int offset; - switch(plane){ - case 0: - plstr="Y"; - xn = _dec->state.info.frame_width>>3; - yn = _dec->state.info.frame_height>>3; - offset = 0; - break; - case 1: - plstr="U"; - xn = _dec->state.info.frame_width>>4; - yn = _dec->state.info.frame_height>>4; - offset = xn*yn*4; - break; - case 2: - plstr="V"; - xn = _dec->state.info.frame_width>>4; - yn = _dec->state.info.frame_height>>4; - offset = xn*yn*5; - break; - } - for(y=0;ystate.frags[i].quant; - break; - case 1: - codecheck=1; - bufn = "coeff"; - ptr = _dec->state.frags[i].freq; - break; - case 2: - codecheck=1; - bufn = "idct"; - ptr = _dec->state.frags[i].time; - break; - case 3: - bufn = "recon"; - ptr = _dec->state.frags[i].loop; - break; - } - - - TH_DEBUG("%s %s [%d][%d] = {",bufn,plstr,x,y); - if(codecheck && !_dec->state.frags[i].coded) - TH_DEBUG(" not coded }\n"); - else{ - int l=0; - for(j=0;j<8;j++){ - TH_DEBUG("\n "); - for(k=0;k<8;k++,l++){ - TH_DEBUG("%d ",ptr[l]); - } - } - TH_DEBUG(" }\n"); - } - } - TH_DEBUG("\n"); - } - } - } - } -#endif - /*Finish filling in the reference frame borders.*/ for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli); /*Update the reference frame indices.*/ diff --git a/media/libtheora/lib/dec/dequant.c b/media/libtheora/lib/dec/dequant.c index e538931f1666..50bba85b84fa 100644 --- a/media/libtheora/lib/dec/dequant.c +++ b/media/libtheora/lib/dec/dequant.c @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: dequant.c 14369 2008-01-05 23:15:32Z tterribe $ + last mod: $Id: dequant.c 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ @@ -127,65 +127,6 @@ int oc_quant_params_unpack(oggpack_buffer *_opb, } while(qri-->0); } - -#ifdef _TH_DEBUG_ - /* dump the tables */ - { - int i, j, k, l, m; - TH_DEBUG("loop filter limits = {"); - for(i=0;i<64;){ - TH_DEBUG("\n "); - for(j=0;j<16;i++,j++) - TH_DEBUG("%3d ",_qinfo->loop_filter_limits[i]); - } - TH_DEBUG("\n}\n\n"); - - TH_DEBUG("ac scale = {"); - for(i=0;i<64;){ - TH_DEBUG("\n "); - for(j=0;j<16;i++,j++) - TH_DEBUG("%3d ",_qinfo->ac_scale[i]); - } - TH_DEBUG("\n}\n\n"); - - TH_DEBUG("dc scale = {"); - for(i=0;i<64;){ - TH_DEBUG("\n "); - for(j=0;j<16;i++,j++) - TH_DEBUG("%3d ",_qinfo->dc_scale[i]); - } - TH_DEBUG("\n}\n\n"); - - for(k=0;k<2;k++) - for(l=0;l<3;l++){ - char *name[2][3]={ - {"intra Y bases","intra U bases", "intra V bases"}, - {"inter Y bases","inter U bases", "inter V bases"} - }; - - th_quant_ranges *r = &_qinfo->qi_ranges[k][l]; - TH_DEBUG("%s = {\n",name[k][l]); - TH_DEBUG(" ranges = %d\n",r->nranges); - TH_DEBUG(" intervals = { "); - for(i=0;inranges;i++) - TH_DEBUG("%3d ",r->sizes[i]); - TH_DEBUG("}\n"); - TH_DEBUG("\n matricies = { "); - for(m=0;mnranges+1;m++){ - TH_DEBUG("\n { "); - for(i=0;i<64;){ - TH_DEBUG("\n "); - for(j=0;j<8;i++,j++) - TH_DEBUG("%3d ",r->base_matrices[m][i]); - } - TH_DEBUG("\n }"); - } - TH_DEBUG("\n }\n"); - } - } - -#endif - _ogg_free(base_mats); return 0; } @@ -227,4 +168,3 @@ void oc_quant_params_clear(th_quant_info *_qinfo){ _ogg_free((void *)_qinfo->qi_ranges[qti][pli].base_matrices); } } - diff --git a/media/libtheora/lib/dec/dequant.h b/media/libtheora/lib/dec/dequant.h index f99f819763f1..928b509e50d2 100644 --- a/media/libtheora/lib/dec/dequant.h +++ b/media/libtheora/lib/dec/dequant.h @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: dequant.h 13884 2007-09-22 08:38:10Z giles $ + last mod: $Id: dequant.h 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ diff --git a/media/libtheora/lib/dec/enquant.h b/media/libtheora/lib/dec/enquant.h deleted file mode 100644 index 6c441c18ab98..000000000000 --- a/media/libtheora/lib/dec/enquant.h +++ /dev/null @@ -1,43 +0,0 @@ -/******************************************************************** - * * - * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * - * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * - * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * - * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * - * * - * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * - * * - ******************************************************************** - - function: - last mod: $Id: enquant.h 13884 2007-09-22 08:38:10Z giles $ - - ********************************************************************/ - -#if !defined(_enquant_H) -# define _enquant_H (1) -# include "quant.h" - -/*The amount to scale the forward quantizer value by.*/ -#define OC_FQUANT_SCALE ((ogg_uint32_t)1<_field-(char *)0)) +/*These two functions are really part of the bitpack.c module, but + they are only used here. Declaring local static versions so they + can be inlined saves considerable function call overhead.*/ + +/*Read in bits without advancing the bitptr. + Here we assume 0<=_bits&&_bits<=32.*/ +static int theorapackB_look(oggpack_buffer *_b,int _bits,long *_ret){ + long ret; + long m; + long d; + m=32-_bits; + _bits+=_b->endbit; + d=_b->storage-_b->endbyte; + if(d<=4){ + /*Not the main path.*/ + if(d<=0){ + *_ret=0L; + return -(_bits>d*8); + } + /*If we have some bits left, but not enough, return the ones we have.*/ + if(d*8<_bits)_bits=d*8; + } + ret=_b->ptr[0]<<24+_b->endbit; + if(_bits>8){ + ret|=_b->ptr[1]<<16+_b->endbit; + if(_bits>16){ + ret|=_b->ptr[2]<<8+_b->endbit; + if(_bits>24){ + ret|=_b->ptr[3]<<_b->endbit; + if(_bits>32)ret|=_b->ptr[4]>>8-_b->endbit; + } + } + } + *_ret=((ret&0xFFFFFFFF)>>(m>>1))>>(m+1>>1); + return 0; +} + +/*advance the bitptr*/ +static void theorapackB_adv(oggpack_buffer *_b,int _bits){ + _bits+=_b->endbit; + _b->ptr+=_bits>>3; + _b->endbyte+=_bits>>3; + _b->endbit=_bits&7; +} + + /*The log_2 of the size of a lookup table is allowed to grow to relative to the number of unique nodes it contains. E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is diff --git a/media/libtheora/lib/dec/huffdec.h b/media/libtheora/lib/dec/huffdec.h index 4d3ada3de437..cc87b409224b 100644 --- a/media/libtheora/lib/dec/huffdec.h +++ b/media/libtheora/lib/dec/huffdec.h @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: huffdec.h 14359 2008-01-04 20:11:13Z tterribe $ + last mod: $Id: huffdec.h 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ diff --git a/media/libtheora/lib/dec/huffman.h b/media/libtheora/lib/dec/huffman.h index 6137eb6c0483..59096e1e8b70 100644 --- a/media/libtheora/lib/dec/huffman.h +++ b/media/libtheora/lib/dec/huffman.h @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: huffman.h 13884 2007-09-22 08:38:10Z giles $ + last mod: $Id: huffman.h 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ diff --git a/media/libtheora/lib/dec/idct.c b/media/libtheora/lib/dec/idct.c index 614e61539a26..21ac83f14989 100644 --- a/media/libtheora/lib/dec/idct.c +++ b/media/libtheora/lib/dec/idct.c @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: idct.c 13884 2007-09-22 08:38:10Z giles $ + last mod: $Id: idct.c 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ @@ -169,7 +169,6 @@ static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){ _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); } - /*Performs an inverse 8 point Type-II DCT transform. The output is scaled by a factor of 2 relative to the orthonormal version of the transform. @@ -204,7 +203,6 @@ static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){ _y[7<<3]=(ogg_int16_t)(t[0]-t[7]); } - /*Performs an inverse 8 point Type-II DCT transform. The output is scaled by a factor of 2 relative to the orthonormal version of the transform. diff --git a/media/libtheora/lib/dec/idct.h b/media/libtheora/lib/dec/idct.h index b52e4b449e50..3ee53712e5c6 100644 --- a/media/libtheora/lib/dec/idct.h +++ b/media/libtheora/lib/dec/idct.h @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: idct.h 13884 2007-09-22 08:38:10Z giles $ + last mod: $Id: idct.h 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ diff --git a/media/libtheora/lib/dec/info.c b/media/libtheora/lib/dec/info.c index d4989efd547a..26e7f42a9cdb 100644 --- a/media/libtheora/lib/dec/info.c +++ b/media/libtheora/lib/dec/info.c @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: info.c 13884 2007-09-22 08:38:10Z giles $ + last mod: $Id: info.c 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ diff --git a/media/libtheora/lib/dec/internal.c b/media/libtheora/lib/dec/internal.c index 2d341bf8ba36..3fe62e55b4c9 100644 --- a/media/libtheora/lib/dec/internal.c +++ b/media/libtheora/lib/dec/internal.c @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: internal.c 14385 2008-01-09 19:53:18Z giles $ + last mod: $Id: internal.c 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ diff --git a/media/libtheora/lib/dec/ocintrin.h b/media/libtheora/lib/dec/ocintrin.h index 44a54630d05f..317f5aeaed0d 100644 --- a/media/libtheora/lib/dec/ocintrin.h +++ b/media/libtheora/lib/dec/ocintrin.h @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: ocintrin.h 13884 2007-09-22 08:38:10Z giles $ + last mod: $Id: ocintrin.h 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ diff --git a/media/libtheora/lib/dec/quant.c b/media/libtheora/lib/dec/quant.c index 800860007657..5cb7784dbc7a 100644 --- a/media/libtheora/lib/dec/quant.c +++ b/media/libtheora/lib/dec/quant.c @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: quant.c 14375 2008-01-06 05:37:33Z tterribe $ + last mod: $Id: quant.c 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ @@ -21,8 +21,8 @@ #include "quant.h" #include "decint.h" -unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2}; -unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2}; +static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2}; +static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2}; /*Initializes the dequantization tables from a set of quantizer info. Currently the dequantizer (and elsewhere enquantizer) tables are expected to @@ -39,114 +39,84 @@ unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2}; qi values change between frames (this is what VP3 did).*/ void oc_dequant_tables_init(oc_quant_table *_dequant[2][3], int _pp_dc_scale[64],const th_quant_info *_qinfo){ - int qti; /* coding mode: intra or inter */ - int pli; /* Y U V */ + /*coding mode: intra or inter.*/ + int qti; + /*Y', C_b, C_r*/ + int pli; for(qti=0;qti<2;qti++){ for(pli=0;pli<3;pli++){ oc_quant_tables stage; - - int qi; /* quality index */ - int qri; /* range iterator */ - + /*Quality index.*/ + int qi; + /*Range iterator.*/ + int qri; for(qi=0,qri=0; qri<=_qinfo->qi_ranges[qti][pli].nranges; qri++){ - th_quant_base base; - - ogg_uint32_t q; - int qi_start; - int qi_end; - int ci; - memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri], - sizeof(base)); - - qi_start=qi; - if(qri==_qinfo->qi_ranges[qti][pli].nranges) - qi_end=qi+1; - else - qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri]; - - /* Iterate over quality indicies in this range */ - for(;;){ - - /*In the original VP3.2 code, the rounding offset and the size of the - dead zone around 0 were controlled by a "sharpness" parameter. - The size of our dead zone is now controlled by the per-coefficient - quality thresholds returned by our HVS module. - We round down from a more accurate value when the quality of the - reconstruction does not fall below our threshold and it saves bits. - Hence, all of that VP3.2 code is gone from here, and the remaining - floating point code has been implemented as equivalent integer code - with exact precision.*/ - - /* for postprocess, not dequant */ - if(_pp_dc_scale!=NULL) - _pp_dc_scale[qi]=(int)((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/160); - - /*Scale DC the coefficient from the proper table.*/ - q=((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/100)<<2; - q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX); - stage[qi][0]=(ogg_uint16_t)q; - - /*Now scale AC coefficients from the proper table.*/ - for(ci=1;ci<64;ci++){ - q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2; - q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX); - stage[qi][ci]=(ogg_uint16_t)q; - } - - if(++qi>=qi_end)break; - - /*Interpolate the next base matrix.*/ - for(ci=0;ci<64;ci++){ - base[ci]=(unsigned char) - ((2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+ - (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci]) - +_qinfo->qi_ranges[qti][pli].sizes[qri])/ - (2*_qinfo->qi_ranges[qti][pli].sizes[qri])); - } - } + th_quant_base base; + ogg_uint32_t q; + int qi_start; + int qi_end; + int ci; + memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri], + sizeof(base)); + qi_start=qi; + if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1; + else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri]; + /*Iterate over quality indicies in this range.*/ + for(;;){ + ogg_uint32_t qfac; + /*In the original VP3.2 code, the rounding offset and the size of the + dead zone around 0 were controlled by a "sharpness" parameter. + The size of our dead zone is now controlled by the per-coefficient + quality thresholds returned by our HVS module. + We round down from a more accurate value when the quality of the + reconstruction does not fall below our threshold and it saves bits. + Hence, all of that VP3.2 code is gone from here, and the remaining + floating point code has been implemented as equivalent integer code + with exact precision.*/ + qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]; + /*For postprocessing, not dequantization.*/ + if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160); + /*Scale DC the coefficient from the proper table.*/ + q=(qfac/100)<<2; + q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX); + stage[qi][0]=(ogg_uint16_t)q; + /*Now scale AC coefficients from the proper table.*/ + for(ci=1;ci<64;ci++){ + q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2; + q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX); + stage[qi][ci]=(ogg_uint16_t)q; + } + if(++qi>=qi_end)break; + /*Interpolate the next base matrix.*/ + for(ci=0;ci<64;ci++){ + base[ci]=(unsigned char)( + (2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+ + (qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci]) + +_qinfo->qi_ranges[qti][pli].sizes[qri])/ + (2*_qinfo->qi_ranges[qti][pli].sizes[qri])); + } + } } - - /* Staging matricies complete; commit to memory only if this - isn't a duplicate of a preceeding plane. This simple check - helps us improve cache coherency later.*/ + /*Staging matrices complete; commit to memory only if this isn't a + duplicate of a preceeding plane. + This simple check helps us improve cache coherency later.*/ { - int dupe = 0; - int i,j; - for(i=0;i<=qti;i++){ - for(j=0;j<(idc*_dc_iquant+15>>5); /*LOOP VECTORIZES.*/ for(ci=0;ci<64;ci++)res_buf[ci]=p; - -#ifdef _TH_DEBUG_ - { - int i; - _frag->freq[0] = _frag->dc*_dc_iquant; - _frag->time[0] = p; - for(i=1;i<64;i++){ - _frag->quant[i] = 0; - _frag->freq[i] = 0; - _frag->time[i] = p; - } - } -#endif - } else{ - -#ifdef _TH_DEBUG_ - { - int i; - for(i=1;i<_ncoefs;i++) - _frag->quant[i] = _dct_coeffs[i]; - for(;i<64;i++) - _frag->quant[i] = 0; - } -#endif - /*First, dequantize the coefficients.*/ dct_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant); for(zzi=1;zzi<_ncoefs;zzi++){ @@ -869,21 +843,6 @@ void oc_state_frag_recon_c(oc_theora_state *_state,oc_fragment *_frag, ci=OC_FZIG_ZAG[zzi]; dct_buf[ci]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*_ac_iquant[ci]); } - -#ifdef _TH_DEBUG_ - for(;zzi<64;zzi++){ - int ci; - ci=OC_FZIG_ZAG[zzi]; - dct_buf[ci]=0; - } - - { - int i; - for(i=0;i<64;i++) - _frag->freq[i] = dct_buf[i]; - } -#endif - /*Then, fill in the remainder of the coefficients with 0's, and perform the iDCT.*/ if(_last_zzi<10){ @@ -894,15 +853,6 @@ void oc_state_frag_recon_c(oc_theora_state *_state,oc_fragment *_frag, for(;zzi<64;zzi++)dct_buf[OC_FZIG_ZAG[zzi]]=0; oc_idct8x8_c(res_buf,dct_buf); } - -#ifdef _TH_DEBUG_ - { - int i; - for(i=0;i<64;i++) - _frag->time[i] = res_buf[i]; - } -#endif - } /*Fill in the target buffer.*/ dst_framei=_state->ref_frame_idx[OC_FRAME_SELF]; @@ -1038,7 +988,7 @@ void oc_state_loop_filter_frag_rows(oc_theora_state *_state,int *_bv, } void oc_state_loop_filter_frag_rows_c(oc_theora_state *_state,int *_bv, - int _refi,int _pli,int _fragy0,int _fragy_end){ + int _refi,int _pli,int _fragy0,int _fragy_end){ th_img_plane *iplane; oc_fragment_plane *fplane; oc_fragment *frag_top; @@ -1050,7 +1000,6 @@ void oc_state_loop_filter_frag_rows_c(oc_theora_state *_state,int *_bv, _bv+=127; iplane=_state->ref_frame_bufs[_refi]+_pli; fplane=_state->fplanes+_pli; - /*The following loops are constructed somewhat non-intuitively on purpose. The main idea is: if a block boundary has at least one coded fragment on it, the filter is applied to it. @@ -1079,46 +1028,6 @@ void oc_state_loop_filter_frag_rows_c(oc_theora_state *_state,int *_bv, iplane->stride,_bv); } } - - -#ifdef _TH_DEBUG_ - { - int i,j,k,l; - unsigned char *src; - - for(l=0;l<5;l++){ - oc_fragment *f; - switch(l){ - case 0: - f = frag; - break; - case 1: /* left */ - if(frag == frag0)continue; - f = frag-1; - break; - case 2: /* bottom (top once flipped) */ - if(frag0 == frag_top)continue; - f = frag - fplane->nhfrags; - break; - case 3: /* right */ - if(frag+1 >= frag_end) continue; - f = frag + 1; - break; - case 4: /* top (bottom once flipped) */ - if(frag+fplane->nhfrags >= frag_bot)continue; - f = frag + fplane->nhfrags; - break; - } - - src = f->buffer[_refi]; - for(i=0,j=0;j<8;j++){ - for(k=0;k<8;k++,i++) - f->loop[i] = src[k]; - src+=iplane->stride; - } - } - } -#endif frag++; } frag0+=fplane->nhfrags; diff --git a/media/libtheora/lib/dec/x86/mmxfrag.c b/media/libtheora/lib/dec/x86/mmxfrag.c index 8b4059778916..b4f8167a628f 100644 --- a/media/libtheora/lib/dec/x86/mmxfrag.c +++ b/media/libtheora/lib/dec/x86/mmxfrag.c @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: mmxfrag.c 14345 2008-01-04 18:02:21Z tterribe $ + last mod: $Id: mmxfrag.c 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ @@ -21,6 +21,7 @@ Note: Loops are unrolled for best performance. The iteration each instruction belongs to is marked in the comments as #i.*/ #include "x86int.h" +#include #if defined(USE_ASM) @@ -133,8 +134,8 @@ void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride, :[residue]"r"(_residue), [dst]"r"(_dst), [dst4]"r"(_dst+(_dst_ystride<<2)), - [dst_ystride]"r"((long)_dst_ystride), - [dst_ystride3]"r"((long)_dst_ystride*3) + [dst_ystride]"r"((ptrdiff_t)_dst_ystride), + [dst_ystride3]"r"((ptrdiff_t)_dst_ystride*3) :"memory" ); } @@ -185,8 +186,8 @@ void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride, /*Advance dst.*/ "lea (%[dst],%[dst_ystride],2),%[dst]\n\t" :[residue]"+r"(_residue),[dst]"+r"(_dst),[src]"+r"(_src) - :[dst_ystride]"r"((long)_dst_ystride), - [src_ystride]"r"((long)_src_ystride) + :[dst_ystride]"r"((ptrdiff_t)_dst_ystride), + [src_ystride]"r"((ptrdiff_t)_src_ystride) :"memory" ); } @@ -278,7 +279,7 @@ void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride, "lea (%[dst],%[ystride],2),%[dst]\n\t" :[dst]"+r"(_dst),[residue]"+r"(_residue), [src1]"+r"(_src1),[src2]"+r"(_src2) - :[ystride]"r"((long)_dst_ystride) + :[ystride]"r"((ptrdiff_t)_dst_ystride) :"memory" ); } diff --git a/media/libtheora/lib/dec/x86/mmxidct.c b/media/libtheora/lib/dec/x86/mmxidct.c index 52d4a973ac3f..5dbbe201ad51 100644 --- a/media/libtheora/lib/dec/x86/mmxidct.c +++ b/media/libtheora/lib/dec/x86/mmxidct.c @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: mmxidct.c 14357 2008-01-04 20:05:28Z tterribe $ + last mod: $Id: mmxidct.c 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ diff --git a/media/libtheora/lib/dec/x86/mmxstate.c b/media/libtheora/lib/dec/x86/mmxstate.c index 694a53159629..1753646724f7 100644 --- a/media/libtheora/lib/dec/x86/mmxstate.c +++ b/media/libtheora/lib/dec/x86/mmxstate.c @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: mmxstate.c 14385 2008-01-09 19:53:18Z giles $ + last mod: $Id: mmxstate.c 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ @@ -19,6 +19,7 @@ Originally written by Rudolf Marek.*/ #include "x86int.h" #include "../../internal.h" +#include #if defined(USE_ASM) @@ -182,9 +183,9 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis, const int *fragi; const int *fragi_end; int dst_framei; - long dst_ystride; + ptrdiff_t dst_ystride; int src_framei; - long src_ystride; + ptrdiff_t src_ystride; dst_framei=_state->ref_frame_idx[_dst_frame]; src_framei=_state->ref_frame_idx[_src_frame]; dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride; @@ -194,14 +195,14 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis, oc_fragment *frag; unsigned char *dst; unsigned char *src; - long esi; + ptrdiff_t s; frag=_state->frags+*fragi; dst=frag->buffer[dst_framei]; src=frag->buffer[src_framei]; __asm__ __volatile__( /*src+0*src_ystride*/ "movq (%[src]),%%mm0\n\t" - /*esi=src_ystride*3*/ + /*s=src_ystride*3*/ "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t" /*src+1*src_ystride*/ "movq (%[src],%[src_ystride]),%%mm1\n\t" @@ -211,7 +212,7 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis, "movq (%[src],%[s]),%%mm3\n\t" /*dst+0*dst_ystride*/ "movq %%mm0,(%[dst])\n\t" - /*esi=dst_ystride*3*/ + /*s=dst_ystride*3*/ "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t" /*dst+1*dst_ystride*/ "movq %%mm1,(%[dst],%[dst_ystride])\n\t" @@ -225,7 +226,7 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis, "lea (%[dst],%[dst_ystride],4),%[dst]\n\t" /*src+0*src_ystride*/ "movq (%[src]),%%mm0\n\t" - /*esi=src_ystride*3*/ + /*s=src_ystride*3*/ "lea (%[src_ystride],%[src_ystride],2),%[s]\n\t" /*src+1*src_ystride*/ "movq (%[src],%[src_ystride]),%%mm1\n\t" @@ -235,7 +236,7 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis, "movq (%[src],%[s]),%%mm3\n\t" /*dst+0*dst_ystride*/ "movq %%mm0,(%[dst])\n\t" - /*esi=dst_ystride*3*/ + /*s=dst_ystride*3*/ "lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t" /*dst+1*dst_ystride*/ "movq %%mm1,(%[dst],%[dst_ystride])\n\t" @@ -243,7 +244,7 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis, "movq %%mm2,(%[dst],%[dst_ystride],2)\n\t" /*dst+3*dst_ystride*/ "movq %%mm3,(%[dst],%[s])\n\t" - :[s]"=&S"(esi) + :[s]"=&r"(s) :[dst]"r"(dst),[src]"r"(src),[dst_ystride]"r"(dst_ystride), [src_ystride]"r"(src_ystride) :"memory" @@ -255,12 +256,12 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis, static void loop_filter_v(unsigned char *_pix,int _ystride, const ogg_int16_t *_ll){ - long esi; + ptrdiff_t s; _pix-=_ystride*2; __asm__ __volatile__( /*mm0=0*/ "pxor %%mm0,%%mm0\n\t" - /*esi=_ystride*3*/ + /*s=_ystride*3*/ "lea (%[ystride],%[ystride],2),%[s]\n\t" /*mm7=_pix[0...8]*/ "movq (%[pix]),%%mm7\n\t" @@ -427,8 +428,8 @@ static void loop_filter_v(unsigned char *_pix,int _ystride, /*Write it back out.*/ "movq %%mm4,(%[pix],%[ystride])\n\t" "movq %%mm1,(%[pix],%[ystride],2)\n\t" - :[s]"=&S"(esi) - :[pix]"r"(_pix),[ystride]"r"((long)_ystride),[ll]"r"(_ll) + :[s]"=&r"(s) + :[pix]"r"(_pix),[ystride]"r"((ptrdiff_t)_ystride),[ll]"r"(_ll) :"memory" ); } @@ -437,14 +438,16 @@ static void loop_filter_v(unsigned char *_pix,int _ystride, Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all four p0's to one register we must transpose the values in four mmx regs. When half is done we repeat this for the rest.*/ -static void loop_filter_h4(unsigned char *_pix,long _ystride, +static void loop_filter_h4(unsigned char *_pix,ptrdiff_t _ystride, const ogg_int16_t *_ll){ - long esi; - long edi; + ptrdiff_t s; + /*d doesn't technically need to be 64-bit on x86-64, but making it so will + help avoid partial register stalls.*/ + ptrdiff_t d; __asm__ __volatile__( /*x x x x 3 2 1 0*/ "movd (%[pix]),%%mm0\n\t" - /*esi=_ystride*3*/ + /*s=_ystride*3*/ "lea (%[ystride],%[ystride],2),%[s]\n\t" /*x x x x 7 6 5 4*/ "movd (%[pix],%[ystride]),%%mm1\n\t" @@ -557,19 +560,19 @@ static void loop_filter_h4(unsigned char *_pix,long _ystride, "packuswb %%mm7,%%mm4\n\t" /*mm5=E D A 9 6 5 2 1*/ "punpcklbw %%mm4,%%mm5\n\t" - /*edi=6 5 2 1*/ - "movd %%mm5,%%edi\n\t" - "movw %%di,1(%[pix])\n\t" + /*d=6 5 2 1*/ + "movd %%mm5,%[d]\n\t" + "movw %w[d],1(%[pix])\n\t" /*Why is there such a big stall here?*/ "psrlq $32,%%mm5\n\t" - "shrl $16,%%edi\n\t" - "movw %%di,1(%[pix],%[ystride])\n\t" - /*edi=E D A 9*/ - "movd %%mm5,%%edi\n\t" - "movw %%di,1(%[pix],%[ystride],2)\n\t" - "shrl $16,%%edi\n\t" - "movw %%di,1(%[pix],%[s])\n\t" - :[s]"=&S"(esi),[d]"=&D"(edi), + "shr $16,%[d]\n\t" + "movw %w[d],1(%[pix],%[ystride])\n\t" + /*d=E D A 9*/ + "movd %%mm5,%[d]\n\t" + "movw %w[d],1(%[pix],%[ystride],2)\n\t" + "shr $16,%[d]\n\t" + "movw %w[d],1(%[pix],%[s])\n\t" + :[s]"=&r"(s),[d]"=&r"(d), [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll) : :"memory" diff --git a/media/libtheora/lib/dec/x86/x86int.h b/media/libtheora/lib/dec/x86/x86int.h index 0576376b1a8c..05f9c57c1578 100644 --- a/media/libtheora/lib/dec/x86/x86int.h +++ b/media/libtheora/lib/dec/x86/x86int.h @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: x86int.h 14375 2008-01-06 05:37:33Z tterribe $ + last mod: $Id: x86int.h 15400 2008-10-15 12:10:58Z tterribe $ ********************************************************************/ diff --git a/media/libtheora/lib/dec/x86/x86state.c b/media/libtheora/lib/dec/x86/x86state.c index fac6db319c91..28a559ba4433 100644 --- a/media/libtheora/lib/dec/x86/x86state.c +++ b/media/libtheora/lib/dec/x86/x86state.c @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: x86state.c 13884 2007-09-22 08:38:10Z giles $ + last mod: $Id: x86state.c 15427 2008-10-21 02:36:19Z xiphmont $ ********************************************************************/ @@ -19,7 +19,7 @@ #if defined(USE_ASM) -#include "../../cpu.h" +#include "../../cpu.c" void oc_state_vtable_init_x86(oc_theora_state *_state){ _state->cpu_flags=oc_cpu_flags_get(); diff --git a/media/libtheora/lib/dec/x86_vc/mmxfrag.c b/media/libtheora/lib/dec/x86_vc/mmxfrag.c new file mode 100644 index 000000000000..e87e0640d0e9 --- /dev/null +++ b/media/libtheora/lib/dec/x86_vc/mmxfrag.c @@ -0,0 +1,214 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: + + ********************************************************************/ +#include "../../internal.h" + +/* ------------------------------------------------------------------------ + MMX reconstruction fragment routines for Visual Studio. + Tested with VS2005. Should compile for VS2003 and VC6 as well. + + Initial implementation 2007 by Nils Pipenbrinck. + ---------------------------------------------------------------------*/ + +#if defined(USE_ASM) + +void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride, + const ogg_int16_t *_residue){ + /* --------------------------------------------------------------------- + This function does the inter reconstruction step with 8 iterations + unrolled. The iteration for each instruction is noted by the #id in the + comments (in case you want to reconstruct it) + --------------------------------------------------------------------- */ + _asm{ + mov edi, [_residue] /* load residue ptr */ + mov eax, 0x00800080 /* generate constant */ + mov ebx, [_dst_ystride] /* load dst-stride */ + mov edx, [_dst] /* load dest pointer */ + + /* unrolled loop begins here */ + + movd mm0, eax /* load constant */ + movq mm1, [edi+ 8*0] /* #1 load low residue */ + movq mm2, [edi+ 8*1] /* #1 load high residue */ + punpckldq mm0, mm0 /* build constant */ + movq mm3, [edi+ 8*2] /* #2 load low residue */ + movq mm4, [edi+ 8*3] /* #2 load high residue */ + movq mm5, [edi+ 8*4] /* #3 load low residue */ + movq mm6, [edi+ 8*5] /* #3 load high residue */ + paddsw mm1, mm0 /* #1 bias low residue */ + paddsw mm2, mm0 /* #1 bias high residue */ + packuswb mm1, mm2 /* #1 pack to byte */ + paddsw mm3, mm0 /* #2 bias low residue */ + paddsw mm4, mm0 /* #2 bias high residue */ + packuswb mm3, mm4 /* #2 pack to byte */ + paddsw mm5, mm0 /* #3 bias low residue */ + paddsw mm6, mm0 /* #3 bias high residue */ + packuswb mm5, mm6 /* #3 pack to byte */ + movq [edx], mm1 /* #1 write row */ + movq [edx + ebx], mm3 /* #2 write row */ + movq [edx + ebx*2], mm5 /* #3 write row */ + movq mm1, [edi+ 8*6] /* #4 load low residue */ + lea ecx, [ebx + ebx*2] /* make dst_ystride * 3 */ + movq mm2, [edi+ 8*7] /* #4 load high residue */ + movq mm3, [edi+ 8*8] /* #5 load low residue */ + lea esi, [ebx*4 + ebx] /* make dst_ystride * 5 */ + movq mm4, [edi+ 8*9] /* #5 load high residue */ + movq mm5, [edi+ 8*10] /* #6 load low residue */ + lea eax, [ecx*2 + ebx] /* make dst_ystride * 7 */ + movq mm6, [edi+ 8*11] /* #6 load high residue */ + paddsw mm1, mm0 /* #4 bias low residue */ + paddsw mm2, mm0 /* #4 bias high residue */ + packuswb mm1, mm2 /* #4 pack to byte */ + paddsw mm3, mm0 /* #5 bias low residue */ + paddsw mm4, mm0 /* #5 bias high residue */ + packuswb mm3, mm4 /* #5 pack to byte */ + paddsw mm5, mm0 /* #6 bias low residue */ + paddsw mm6, mm0 /* #6 bias high residue */ + packuswb mm5, mm6 /* #6 pack to byte */ + movq [edx + ecx], mm1 /* #4 write row */ + movq [edx + ebx*4], mm3 /* #5 write row */ + movq [edx + esi], mm5 /* #6 write row */ + movq mm1, [edi+ 8*12] /* #7 load low residue */ + movq mm2, [edi+ 8*13] /* #7 load high residue */ + movq mm3, [edi+ 8*14] /* #8 load low residue */ + movq mm4, [edi+ 8*15] /* #8 load high residue */ + paddsw mm1, mm0 /* #7 bias low residue */ + paddsw mm2, mm0 /* #7 bias high residue */ + packuswb mm1, mm2 /* #7 pack to byte */ + paddsw mm3, mm0 /* #8 bias low residue */ + paddsw mm4, mm0 /* #8 bias high residue */ + packuswb mm3, mm4 /* #8 pack to byte */ + movq [edx + ecx*2], mm1 /* #7 write row */ + movq [edx + eax], mm3 /* #8 write row */ + } +} + + + +void oc_frag_recon_inter_mmx (unsigned char *_dst, int _dst_ystride, + const unsigned char *_src, int _src_ystride, const ogg_int16_t *_residue){ + /* --------------------------------------------------------------------- + This function does the inter reconstruction step with two iterations + running in parallel to hide some load-latencies and break the dependency + chains. The iteration for each instruction is noted by the #id in the + comments (in case you want to reconstruct it) + --------------------------------------------------------------------- */ + _asm{ + pxor mm0, mm0 /* generate constant 0 */ + mov esi, [_src] + mov edi, [_residue] + mov eax, [_src_ystride] + mov edx, [_dst] + mov ebx, [_dst_ystride] + mov ecx, 4 + + align 16 + +nextchunk: + movq mm3, [esi] /* #1 load source */ + movq mm1, [edi+0] /* #1 load residium low */ + movq mm2, [edi+8] /* #1 load residium high */ + movq mm7, [esi+eax] /* #2 load source */ + movq mm4, mm3 /* #1 get copy of src */ + movq mm5, [edi+16] /* #2 load residium low */ + punpckhbw mm4, mm0 /* #1 expand high source */ + movq mm6, [edi+24] /* #2 load residium high */ + punpcklbw mm3, mm0 /* #1 expand low source */ + paddsw mm4, mm2 /* #1 add residium high */ + movq mm2, mm7 /* #2 get copy of src */ + paddsw mm3, mm1 /* #1 add residium low */ + punpckhbw mm2, mm0 /* #2 expand high source */ + packuswb mm3, mm4 /* #1 final row pixels */ + punpcklbw mm7, mm0 /* #2 expand low source */ + movq [edx], mm3 /* #1 write row */ + paddsw mm2, mm6 /* #2 add residium high */ + add edi, 32 /* residue += 4 */ + paddsw mm7, mm5 /* #2 add residium low */ + sub ecx, 1 /* update loop counter */ + packuswb mm7, mm2 /* #2 final row */ + lea esi, [esi+eax*2] /* src += stride * 2 */ + movq [edx + ebx], mm7 /* #2 write row */ + lea edx, [edx+ebx*2] /* dst += stride * 2 */ + jne nextchunk + } +} + + +void oc_frag_recon_inter2_mmx(unsigned char *_dst, int _dst_ystride, + const unsigned char *_src1, int _src1_ystride, const unsigned char *_src2, + int _src2_ystride,const ogg_int16_t *_residue){ + /* --------------------------------------------------------------------- + This function does the inter2 reconstruction step.The building of the + average is done with a bit-twiddeling trick to avoid excessive register + copy work during byte to word conversion. + + average = (a & b) + (((a ^ b) & 0xfe) >> 1); + + (shown for a single byte; it's done with 8 of them at a time) + + Slightly faster than the obvious method using add and shift, but not + earthshaking improvement either. + + If anyone comes up with a way that produces bit-identical outputs + using the pavgb instruction let me know and I'll do the 3dnow codepath. + --------------------------------------------------------------------- */ + _asm{ + mov eax, 0xfefefefe + mov esi, [_src1] + mov edi, [_src2] + movd mm1, eax + mov ebx, [_residue] + mov edx, [_dst] + mov eax, [_dst_ystride] + punpckldq mm1, mm1 /* replicate lsb32 */ + mov ecx, 8 /* init loop counter */ + pxor mm0, mm0 /* constant zero */ + sub edx, eax /* dst -= dst_stride */ + + align 16 + +nextrow: + movq mm2, [esi] /* load source1 */ + movq mm3, [edi] /* load source2 */ + movq mm5, [ebx + 0] /* load lower residue */ + movq mm6, [ebx + 8] /* load higer residue */ + add esi, _src1_ystride /* src1 += src1_stride */ + add edi, _src2_ystride /* src2 += src1_stride */ + movq mm4, mm2 /* get copy of source1 */ + pand mm2, mm3 /* s1 & s2 (avg part) */ + pxor mm3, mm4 /* s1 ^ s2 (avg part) */ + add ebx, 16 /* residue++ */ + pand mm3, mm1 /* mask out low bits */ + psrlq mm3, 1 /* shift xor avg-part */ + paddd mm3, mm2 /* build final average */ + add edx, eax /* dst += dst_stride */ + movq mm2, mm3 /* get copy of average */ + punpckhbw mm3, mm0 /* average high */ + punpcklbw mm2, mm0 /* average low */ + paddsw mm3, mm6 /* high + residue */ + paddsw mm2, mm5 /* low + residue */ + sub ecx, 1 /* update loop counter */ + packuswb mm2, mm3 /* pack and saturate */ + movq [edx], mm2 /* write row */ + jne nextrow + } +} + +void oc_restore_fpu_mmx(void){ + _asm { emms } +} + +#endif diff --git a/media/libtheora/lib/dec/x86_vc/mmxidct.c b/media/libtheora/lib/dec/x86_vc/mmxidct.c new file mode 100644 index 000000000000..2c171594f6ad --- /dev/null +++ b/media/libtheora/lib/dec/x86_vc/mmxidct.c @@ -0,0 +1,1006 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: + + ********************************************************************/ + +/* ------------------------------------------------------------------- + MMX based IDCT for the theora codec. + + Originally written by Rudolf Marek, based on code from On2's VP3. + Converted to Visual Studio inline assembly by Nils Pipenbrinck. + + ---------------------------------------------------------------------*/ +#if defined(USE_ASM) + +#include +#include "../dct.h" +#include "../idct.h" +#include "x86int.h" + +/*A table of constants used by the MMX routines.*/ +static const __declspec(align(16)) ogg_uint16_t + OC_IDCT_CONSTS[(7+1)*4]={ + (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, + (ogg_uint16_t)OC_C1S7,(ogg_uint16_t)OC_C1S7, + (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, + (ogg_uint16_t)OC_C2S6,(ogg_uint16_t)OC_C2S6, + (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, + (ogg_uint16_t)OC_C3S5,(ogg_uint16_t)OC_C3S5, + (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, + (ogg_uint16_t)OC_C4S4,(ogg_uint16_t)OC_C4S4, + (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, + (ogg_uint16_t)OC_C5S3,(ogg_uint16_t)OC_C5S3, + (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, + (ogg_uint16_t)OC_C6S2,(ogg_uint16_t)OC_C6S2, + (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, + (ogg_uint16_t)OC_C7S1,(ogg_uint16_t)OC_C7S1, + 8, 8, 8, 8 +}; + + +void oc_idct8x8_10_mmx(ogg_int16_t _y[64]){ + _asm { + mov edx, [_y] + mov eax, offset OC_IDCT_CONSTS + movq mm2, [edx + 30H] + movq mm6, [eax + 10H] + movq mm4, mm2 + movq mm7, [edx + 18H] + pmulhw mm4, mm6 + movq mm1, [eax + 20H] + pmulhw mm6, mm7 + movq mm5, mm1 + pmulhw mm1, mm2 + movq mm3, [edx + 10H] + pmulhw mm5, mm7 + movq mm0, [eax] + paddw mm4, mm2 + paddw mm6, mm7 + paddw mm2, mm1 + movq mm1, [edx + 38H] + paddw mm7, mm5 + movq mm5, mm0 + pmulhw mm0, mm3 + paddw mm4, mm7 + pmulhw mm5, mm1 + movq mm7, [eax + 30H] + psubw mm6, mm2 + paddw mm0, mm3 + pmulhw mm3, mm7 + movq mm2, [edx + 20H] + pmulhw mm7, mm1 + paddw mm5, mm1 + movq mm1, mm2 + pmulhw mm2, [eax + 08H] + psubw mm3, mm5 + movq mm5, [edx + 28H] + paddw mm0, mm7 + movq mm7, mm5 + psubw mm0, mm4 + pmulhw mm5, [eax + 08H] + paddw mm2, mm1 + pmulhw mm1, [eax + 28H] + paddw mm4, mm4 + paddw mm4, mm0 + psubw mm3, mm6 + paddw mm5, mm7 + paddw mm6, mm6 + pmulhw mm7, [eax + 28H] + paddw mm6, mm3 + movq [edx + 10H], mm4 + psubw mm1, mm5 + movq mm4, [eax + 18H] + movq mm5, mm3 + pmulhw mm3, mm4 + paddw mm7, mm2 + movq [edx + 20H], mm6 + movq mm2, mm0 + movq mm6, [edx] + pmulhw mm0, mm4 + paddw mm5, mm3 + movq mm3, [edx + 08H] + psubw mm5, mm1 + paddw mm2, mm0 + psubw mm6, mm3 + movq mm0, mm6 + pmulhw mm6, mm4 + paddw mm3, mm3 + paddw mm1, mm1 + paddw mm3, mm0 + paddw mm1, mm5 + pmulhw mm4, mm3 + paddw mm6, mm0 + psubw mm6, mm2 + paddw mm2, mm2 + movq mm0, [edx + 10H] + paddw mm2, mm6 + paddw mm4, mm3 + psubw mm2, mm1 + movq mm3, [edx + 20H] + psubw mm4, mm7 + paddw mm1, mm1 + paddw mm7, mm7 + paddw mm1, mm2 + paddw mm7, mm4 + psubw mm4, mm3 + paddw mm3, mm3 + psubw mm6, mm5 + paddw mm5, mm5 + paddw mm3, mm4 + paddw mm5, mm6 + psubw mm7, mm0 + paddw mm0, mm0 + movq [edx + 10H], mm1 + paddw mm0, mm7 + movq mm1, mm4 + punpcklwd mm4, mm5 + movq [edx], mm0 + punpckhwd mm1, mm5 + movq mm0, mm6 + punpcklwd mm6, mm7 + movq mm5, mm4 + punpckldq mm4, mm6 + punpckhdq mm5, mm6 + movq mm6, mm1 + movq [edx + 08H], mm4 + punpckhwd mm0, mm7 + movq [edx + 18H], mm5 + punpckhdq mm6, mm0 + movq mm4, [edx] + punpckldq mm1, mm0 + movq mm5, [edx + 10H] + movq mm0, mm4 + movq [edx + 38H], mm6 + punpcklwd mm0, mm5 + movq [edx + 28H], mm1 + punpckhwd mm4, mm5 + movq mm5, mm2 + punpcklwd mm2, mm3 + movq mm1, mm0 + punpckldq mm0, mm2 + punpckhdq mm1, mm2 + movq mm2, mm4 + movq [edx], mm0 + punpckhwd mm5, mm3 + movq [edx + 10H], mm1 + punpckhdq mm4, mm5 + punpckldq mm2, mm5 + movq [edx + 30H], mm4 + movq [edx + 20H], mm2 + movq mm2, [edx + 70H] + movq mm6, [eax + 10H] + movq mm4, mm2 + movq mm7, [edx + 58H] + pmulhw mm4, mm6 + movq mm1, [eax + 20H] + pmulhw mm6, mm7 + movq mm5, mm1 + pmulhw mm1, mm2 + movq mm3, [edx + 50H] + pmulhw mm5, mm7 + movq mm0, [eax] + paddw mm4, mm2 + paddw mm6, mm7 + paddw mm2, mm1 + movq mm1, [edx + 78H] + paddw mm7, mm5 + movq mm5, mm0 + pmulhw mm0, mm3 + paddw mm4, mm7 + pmulhw mm5, mm1 + movq mm7, [eax + 30H] + psubw mm6, mm2 + paddw mm0, mm3 + pmulhw mm3, mm7 + movq mm2, [edx + 60H] + pmulhw mm7, mm1 + paddw mm5, mm1 + movq mm1, mm2 + pmulhw mm2, [eax + 08H] + psubw mm3, mm5 + movq mm5, [edx + 68H] + paddw mm0, mm7 + movq mm7, mm5 + psubw mm0, mm4 + pmulhw mm5, [eax + 08H] + paddw mm2, mm1 + pmulhw mm1, [eax + 28H] + paddw mm4, mm4 + paddw mm4, mm0 + psubw mm3, mm6 + paddw mm5, mm7 + paddw mm6, mm6 + pmulhw mm7, [eax + 28H] + paddw mm6, mm3 + movq [edx + 50H], mm4 + psubw mm1, mm5 + movq mm4, [eax + 18H] + movq mm5, mm3 + pmulhw mm3, mm4 + paddw mm7, mm2 + movq [edx + 60H], mm6 + movq mm2, mm0 + movq mm6, [edx + 40H] + pmulhw mm0, mm4 + paddw mm5, mm3 + movq mm3, [edx + 48H] + psubw mm5, mm1 + paddw mm2, mm0 + psubw mm6, mm3 + movq mm0, mm6 + pmulhw mm6, mm4 + paddw mm3, mm3 + paddw mm1, mm1 + paddw mm3, mm0 + paddw mm1, mm5 + pmulhw mm4, mm3 + paddw mm6, mm0 + psubw mm6, mm2 + paddw mm2, mm2 + movq mm0, [edx + 50H] + paddw mm2, mm6 + paddw mm4, mm3 + psubw mm2, mm1 + movq mm3, [edx + 60H] + psubw mm4, mm7 + paddw mm1, mm1 + paddw mm7, mm7 + paddw mm1, mm2 + paddw mm7, mm4 + psubw mm4, mm3 + paddw mm3, mm3 + psubw mm6, mm5 + paddw mm5, mm5 + paddw mm3, mm4 + paddw mm5, mm6 + psubw mm7, mm0 + paddw mm0, mm0 + movq [edx + 50H], mm1 + paddw mm0, mm7 + movq mm1, mm4 + punpcklwd mm4, mm5 + movq [edx + 40H], mm0 + punpckhwd mm1, mm5 + movq mm0, mm6 + punpcklwd mm6, mm7 + movq mm5, mm4 + punpckldq mm4, mm6 + punpckhdq mm5, mm6 + movq mm6, mm1 + movq [edx + 48H], mm4 + punpckhwd mm0, mm7 + movq [edx + 58H], mm5 + punpckhdq mm6, mm0 + movq mm4, [edx + 40H] + punpckldq mm1, mm0 + movq mm5, [edx + 50H] + movq mm0, mm4 + movq [edx + 78H], mm6 + punpcklwd mm0, mm5 + movq [edx + 68H], mm1 + punpckhwd mm4, mm5 + movq mm5, mm2 + punpcklwd mm2, mm3 + movq mm1, mm0 + punpckldq mm0, mm2 + punpckhdq mm1, mm2 + movq mm2, mm4 + movq [edx + 40H], mm0 + punpckhwd mm5, mm3 + movq [edx + 50H], mm1 + punpckhdq mm4, mm5 + punpckldq mm2, mm5 + movq [edx + 70H], mm4 + movq [edx + 60H], mm2 + movq mm2, [edx + 30H] + movq mm6, [eax + 10H] + movq mm4, mm2 + movq mm7, [edx + 50H] + pmulhw mm4, mm6 + movq mm1, [eax + 20H] + pmulhw mm6, mm7 + movq mm5, mm1 + pmulhw mm1, mm2 + movq mm3, [edx + 10H] + pmulhw mm5, mm7 + movq mm0, [eax] + paddw mm4, mm2 + paddw mm6, mm7 + paddw mm2, mm1 + movq mm1, [edx + 70H] + paddw mm7, mm5 + movq mm5, mm0 + pmulhw mm0, mm3 + paddw mm4, mm7 + pmulhw mm5, mm1 + movq mm7, [eax + 30H] + psubw mm6, mm2 + paddw mm0, mm3 + pmulhw mm3, mm7 + movq mm2, [edx + 20H] + pmulhw mm7, mm1 + paddw mm5, mm1 + movq mm1, mm2 + pmulhw mm2, [eax + 08H] + psubw mm3, mm5 + movq mm5, [edx + 60H] + paddw mm0, mm7 + movq mm7, mm5 + psubw mm0, mm4 + pmulhw mm5, [eax + 08H] + paddw mm2, mm1 + pmulhw mm1, [eax + 28H] + paddw mm4, mm4 + paddw mm4, mm0 + psubw mm3, mm6 + paddw mm5, mm7 + paddw mm6, mm6 + pmulhw mm7, [eax + 28H] + paddw mm6, mm3 + movq [edx + 10H], mm4 + psubw mm1, mm5 + movq mm4, [eax + 18H] + movq mm5, mm3 + pmulhw mm3, mm4 + paddw mm7, mm2 + movq [edx + 20H], mm6 + movq mm2, mm0 + movq mm6, [edx] + pmulhw mm0, mm4 + paddw mm5, mm3 + movq mm3, [edx + 40H] + psubw mm5, mm1 + paddw mm2, mm0 + psubw mm6, mm3 + movq mm0, mm6 + pmulhw mm6, mm4 + paddw mm3, mm3 + paddw mm1, mm1 + paddw mm3, mm0 + paddw mm1, mm5 + pmulhw mm4, mm3 + paddw mm6, mm0 + psubw mm6, mm2 + paddw mm2, mm2 + movq mm0, [edx + 10H] + paddw mm2, mm6 + paddw mm4, mm3 + psubw mm2, mm1 + paddw mm2, [eax + 38H] + paddw mm1, mm1 + paddw mm1, mm2 + psraw mm2, 4 + psubw mm4, mm7 + psraw mm1, 4 + movq mm3, [edx + 20H] + paddw mm7, mm7 + movq [edx + 20H], mm2 + paddw mm7, mm4 + movq [edx + 10H], mm1 + psubw mm4, mm3 + paddw mm4, [eax + 38H] + paddw mm3, mm3 + paddw mm3, mm4 + psraw mm4, 4 + psubw mm6, mm5 + psraw mm3, 4 + paddw mm6, [eax + 38H] + paddw mm5, mm5 + paddw mm5, mm6 + psraw mm6, 4 + movq [edx + 40H], mm4 + psraw mm5, 4 + movq [edx + 30H], mm3 + psubw mm7, mm0 + paddw mm7, [eax + 38H] + paddw mm0, mm0 + paddw mm0, mm7 + psraw mm7, 4 + movq [edx + 60H], mm6 + psraw mm0, 4 + movq [edx + 50H], mm5 + movq [edx + 70H], mm7 + movq [edx], mm0 + movq mm2, [edx + 38H] + movq mm6, [eax + 10H] + movq mm4, mm2 + movq mm7, [edx + 58H] + pmulhw mm4, mm6 + movq mm1, [eax + 20H] + pmulhw mm6, mm7 + movq mm5, mm1 + pmulhw mm1, mm2 + movq mm3, [edx + 18H] + pmulhw mm5, mm7 + movq mm0, [eax] + paddw mm4, mm2 + paddw mm6, mm7 + paddw mm2, mm1 + movq mm1, [edx + 78H] + paddw mm7, mm5 + movq mm5, mm0 + pmulhw mm0, mm3 + paddw mm4, mm7 + pmulhw mm5, mm1 + movq mm7, [eax + 30H] + psubw mm6, mm2 + paddw mm0, mm3 + pmulhw mm3, mm7 + movq mm2, [edx + 28H] + pmulhw mm7, mm1 + paddw mm5, mm1 + movq mm1, mm2 + pmulhw mm2, [eax + 08H] + psubw mm3, mm5 + movq mm5, [edx + 68H] + paddw mm0, mm7 + movq mm7, mm5 + psubw mm0, mm4 + pmulhw mm5, [eax + 08H] + paddw mm2, mm1 + pmulhw mm1, [eax + 28H] + paddw mm4, mm4 + paddw mm4, mm0 + psubw mm3, mm6 + paddw mm5, mm7 + paddw mm6, mm6 + pmulhw mm7, [eax + 28H] + paddw mm6, mm3 + movq [edx + 18H], mm4 + psubw mm1, mm5 + movq mm4, [eax + 18H] + movq mm5, mm3 + pmulhw mm3, mm4 + paddw mm7, mm2 + movq [edx + 28H], mm6 + movq mm2, mm0 + movq mm6, [edx + 08H] + pmulhw mm0, mm4 + paddw mm5, mm3 + movq mm3, [edx + 48H] + psubw mm5, mm1 + paddw mm2, mm0 + psubw mm6, mm3 + movq mm0, mm6 + pmulhw mm6, mm4 + paddw mm3, mm3 + paddw mm1, mm1 + paddw mm3, mm0 + paddw mm1, mm5 + pmulhw mm4, mm3 + paddw mm6, mm0 + psubw mm6, mm2 + paddw mm2, mm2 + movq mm0, [edx + 18H] + paddw mm2, mm6 + paddw mm4, mm3 + psubw mm2, mm1 + paddw mm2, [eax + 38H] + paddw mm1, mm1 + paddw mm1, mm2 + psraw mm2, 4 + psubw mm4, mm7 + psraw mm1, 4 + movq mm3, [edx + 28H] + paddw mm7, mm7 + movq [edx + 28H], mm2 + paddw mm7, mm4 + movq [edx + 18H], mm1 + psubw mm4, mm3 + paddw mm4, [eax + 38H] + paddw mm3, mm3 + paddw mm3, mm4 + psraw mm4, 4 + psubw mm6, mm5 + psraw mm3, 4 + paddw mm6, [eax + 38H] + paddw mm5, mm5 + paddw mm5, mm6 + psraw mm6, 4 + movq [edx + 48H], mm4 + psraw mm5, 4 + movq [edx + 38H], mm3 + psubw mm7, mm0 + paddw mm7, [eax + 38H] + paddw mm0, mm0 + paddw mm0, mm7 + psraw mm7, 4 + movq [edx + 68H], mm6 + psraw mm0, 4 + movq [edx + 58H], mm5 + movq [edx + 78H], mm7 + movq [edx + 08H], mm0 + /* emms */ + } +} + + +void oc_idct8x8_mmx(ogg_int16_t _y[64]){ + _asm { + mov edx, [_y] + mov eax, offset OC_IDCT_CONSTS + movq mm2, [edx + 30H] + movq mm6, [eax + 10H] + movq mm4, mm2 + movq mm7, [edx + 18H] + pmulhw mm4, mm6 + movq mm1, [eax + 20H] + pmulhw mm6, mm7 + movq mm5, mm1 + pmulhw mm1, mm2 + movq mm3, [edx + 10H] + pmulhw mm5, mm7 + movq mm0, [eax] + paddw mm4, mm2 + paddw mm6, mm7 + paddw mm2, mm1 + movq mm1, [edx + 38H] + paddw mm7, mm5 + movq mm5, mm0 + pmulhw mm0, mm3 + paddw mm4, mm7 + pmulhw mm5, mm1 + movq mm7, [eax + 30H] + psubw mm6, mm2 + paddw mm0, mm3 + pmulhw mm3, mm7 + movq mm2, [edx + 20H] + pmulhw mm7, mm1 + paddw mm5, mm1 + movq mm1, mm2 + pmulhw mm2, [eax + 08H] + psubw mm3, mm5 + movq mm5, [edx + 28H] + paddw mm0, mm7 + movq mm7, mm5 + psubw mm0, mm4 + pmulhw mm5, [eax + 08H] + paddw mm2, mm1 + pmulhw mm1, [eax + 28H] + paddw mm4, mm4 + paddw mm4, mm0 + psubw mm3, mm6 + paddw mm5, mm7 + paddw mm6, mm6 + pmulhw mm7, [eax + 28H] + paddw mm6, mm3 + movq [edx + 10H], mm4 + psubw mm1, mm5 + movq mm4, [eax + 18H] + movq mm5, mm3 + pmulhw mm3, mm4 + paddw mm7, mm2 + movq [edx + 20H], mm6 + movq mm2, mm0 + movq mm6, [edx] + pmulhw mm0, mm4 + paddw mm5, mm3 + movq mm3, [edx + 08H] + psubw mm5, mm1 + paddw mm2, mm0 + psubw mm6, mm3 + movq mm0, mm6 + pmulhw mm6, mm4 + paddw mm3, mm3 + paddw mm1, mm1 + paddw mm3, mm0 + paddw mm1, mm5 + pmulhw mm4, mm3 + paddw mm6, mm0 + psubw mm6, mm2 + paddw mm2, mm2 + movq mm0, [edx + 10H] + paddw mm2, mm6 + paddw mm4, mm3 + psubw mm2, mm1 + movq mm3, [edx + 20H] + psubw mm4, mm7 + paddw mm1, mm1 + paddw mm7, mm7 + paddw mm1, mm2 + paddw mm7, mm4 + psubw mm4, mm3 + paddw mm3, mm3 + psubw mm6, mm5 + paddw mm5, mm5 + paddw mm3, mm4 + paddw mm5, mm6 + psubw mm7, mm0 + paddw mm0, mm0 + movq [edx + 10H], mm1 + paddw mm0, mm7 + movq mm1, mm4 + punpcklwd mm4, mm5 + movq [edx], mm0 + punpckhwd mm1, mm5 + movq mm0, mm6 + punpcklwd mm6, mm7 + movq mm5, mm4 + punpckldq mm4, mm6 + punpckhdq mm5, mm6 + movq mm6, mm1 + movq [edx + 08H], mm4 + punpckhwd mm0, mm7 + movq [edx + 18H], mm5 + punpckhdq mm6, mm0 + movq mm4, [edx] + punpckldq mm1, mm0 + movq mm5, [edx + 10H] + movq mm0, mm4 + movq [edx + 38H], mm6 + punpcklwd mm0, mm5 + movq [edx + 28H], mm1 + punpckhwd mm4, mm5 + movq mm5, mm2 + punpcklwd mm2, mm3 + movq mm1, mm0 + punpckldq mm0, mm2 + punpckhdq mm1, mm2 + movq mm2, mm4 + movq [edx], mm0 + punpckhwd mm5, mm3 + movq [edx + 10H], mm1 + punpckhdq mm4, mm5 + punpckldq mm2, mm5 + movq [edx + 30H], mm4 + movq [edx + 20H], mm2 + movq mm2, [edx + 70H] + movq mm6, [eax + 10H] + movq mm4, mm2 + movq mm7, [edx + 58H] + pmulhw mm4, mm6 + movq mm1, [eax + 20H] + pmulhw mm6, mm7 + movq mm5, mm1 + pmulhw mm1, mm2 + movq mm3, [edx + 50H] + pmulhw mm5, mm7 + movq mm0, [eax] + paddw mm4, mm2 + paddw mm6, mm7 + paddw mm2, mm1 + movq mm1, [edx + 78H] + paddw mm7, mm5 + movq mm5, mm0 + pmulhw mm0, mm3 + paddw mm4, mm7 + pmulhw mm5, mm1 + movq mm7, [eax + 30H] + psubw mm6, mm2 + paddw mm0, mm3 + pmulhw mm3, mm7 + movq mm2, [edx + 60H] + pmulhw mm7, mm1 + paddw mm5, mm1 + movq mm1, mm2 + pmulhw mm2, [eax + 08H] + psubw mm3, mm5 + movq mm5, [edx + 68H] + paddw mm0, mm7 + movq mm7, mm5 + psubw mm0, mm4 + pmulhw mm5, [eax + 08H] + paddw mm2, mm1 + pmulhw mm1, [eax + 28H] + paddw mm4, mm4 + paddw mm4, mm0 + psubw mm3, mm6 + paddw mm5, mm7 + paddw mm6, mm6 + pmulhw mm7, [eax + 28H] + paddw mm6, mm3 + movq [edx + 50H], mm4 + psubw mm1, mm5 + movq mm4, [eax + 18H] + movq mm5, mm3 + pmulhw mm3, mm4 + paddw mm7, mm2 + movq [edx + 60H], mm6 + movq mm2, mm0 + movq mm6, [edx + 40H] + pmulhw mm0, mm4 + paddw mm5, mm3 + movq mm3, [edx + 48H] + psubw mm5, mm1 + paddw mm2, mm0 + psubw mm6, mm3 + movq mm0, mm6 + pmulhw mm6, mm4 + paddw mm3, mm3 + paddw mm1, mm1 + paddw mm3, mm0 + paddw mm1, mm5 + pmulhw mm4, mm3 + paddw mm6, mm0 + psubw mm6, mm2 + paddw mm2, mm2 + movq mm0, [edx + 50H] + paddw mm2, mm6 + paddw mm4, mm3 + psubw mm2, mm1 + movq mm3, [edx + 60H] + psubw mm4, mm7 + paddw mm1, mm1 + paddw mm7, mm7 + paddw mm1, mm2 + paddw mm7, mm4 + psubw mm4, mm3 + paddw mm3, mm3 + psubw mm6, mm5 + paddw mm5, mm5 + paddw mm3, mm4 + paddw mm5, mm6 + psubw mm7, mm0 + paddw mm0, mm0 + movq [edx + 50H], mm1 + paddw mm0, mm7 + movq mm1, mm4 + punpcklwd mm4, mm5 + movq [edx + 40H], mm0 + punpckhwd mm1, mm5 + movq mm0, mm6 + punpcklwd mm6, mm7 + movq mm5, mm4 + punpckldq mm4, mm6 + punpckhdq mm5, mm6 + movq mm6, mm1 + movq [edx + 48H], mm4 + punpckhwd mm0, mm7 + movq [edx + 58H], mm5 + punpckhdq mm6, mm0 + movq mm4, [edx + 40H] + punpckldq mm1, mm0 + movq mm5, [edx + 50H] + movq mm0, mm4 + movq [edx + 78H], mm6 + punpcklwd mm0, mm5 + movq [edx + 68H], mm1 + punpckhwd mm4, mm5 + movq mm5, mm2 + punpcklwd mm2, mm3 + movq mm1, mm0 + punpckldq mm0, mm2 + punpckhdq mm1, mm2 + movq mm2, mm4 + movq [edx + 40H], mm0 + punpckhwd mm5, mm3 + movq [edx + 50H], mm1 + punpckhdq mm4, mm5 + punpckldq mm2, mm5 + movq [edx + 70H], mm4 + movq [edx + 60H], mm2 + movq mm2, [edx + 30H] + movq mm6, [eax + 10H] + movq mm4, mm2 + movq mm7, [edx + 50H] + pmulhw mm4, mm6 + movq mm1, [eax + 20H] + pmulhw mm6, mm7 + movq mm5, mm1 + pmulhw mm1, mm2 + movq mm3, [edx + 10H] + pmulhw mm5, mm7 + movq mm0, [eax] + paddw mm4, mm2 + paddw mm6, mm7 + paddw mm2, mm1 + movq mm1, [edx + 70H] + paddw mm7, mm5 + movq mm5, mm0 + pmulhw mm0, mm3 + paddw mm4, mm7 + pmulhw mm5, mm1 + movq mm7, [eax + 30H] + psubw mm6, mm2 + paddw mm0, mm3 + pmulhw mm3, mm7 + movq mm2, [edx + 20H] + pmulhw mm7, mm1 + paddw mm5, mm1 + movq mm1, mm2 + pmulhw mm2, [eax + 08H] + psubw mm3, mm5 + movq mm5, [edx + 60H] + paddw mm0, mm7 + movq mm7, mm5 + psubw mm0, mm4 + pmulhw mm5, [eax + 08H] + paddw mm2, mm1 + pmulhw mm1, [eax + 28H] + paddw mm4, mm4 + paddw mm4, mm0 + psubw mm3, mm6 + paddw mm5, mm7 + paddw mm6, mm6 + pmulhw mm7, [eax + 28H] + paddw mm6, mm3 + movq [edx + 10H], mm4 + psubw mm1, mm5 + movq mm4, [eax + 18H] + movq mm5, mm3 + pmulhw mm3, mm4 + paddw mm7, mm2 + movq [edx + 20H], mm6 + movq mm2, mm0 + movq mm6, [edx] + pmulhw mm0, mm4 + paddw mm5, mm3 + movq mm3, [edx + 40H] + psubw mm5, mm1 + paddw mm2, mm0 + psubw mm6, mm3 + movq mm0, mm6 + pmulhw mm6, mm4 + paddw mm3, mm3 + paddw mm1, mm1 + paddw mm3, mm0 + paddw mm1, mm5 + pmulhw mm4, mm3 + paddw mm6, mm0 + psubw mm6, mm2 + paddw mm2, mm2 + movq mm0, [edx + 10H] + paddw mm2, mm6 + paddw mm4, mm3 + psubw mm2, mm1 + paddw mm2, [eax + 38H] + paddw mm1, mm1 + paddw mm1, mm2 + psraw mm2, 4 + psubw mm4, mm7 + psraw mm1, 4 + movq mm3, [edx + 20H] + paddw mm7, mm7 + movq [edx + 20H], mm2 + paddw mm7, mm4 + movq [edx + 10H], mm1 + psubw mm4, mm3 + paddw mm4, [eax + 38H] + paddw mm3, mm3 + paddw mm3, mm4 + psraw mm4, 4 + psubw mm6, mm5 + psraw mm3, 4 + paddw mm6, [eax + 38H] + paddw mm5, mm5 + paddw mm5, mm6 + psraw mm6, 4 + movq [edx + 40H], mm4 + psraw mm5, 4 + movq [edx + 30H], mm3 + psubw mm7, mm0 + paddw mm7, [eax + 38H] + paddw mm0, mm0 + paddw mm0, mm7 + psraw mm7, 4 + movq [edx + 60H], mm6 + psraw mm0, 4 + movq [edx + 50H], mm5 + movq [edx + 70H], mm7 + movq [edx], mm0 + movq mm2, [edx + 38H] + movq mm6, [eax + 10H] + movq mm4, mm2 + movq mm7, [edx + 58H] + pmulhw mm4, mm6 + movq mm1, [eax + 20H] + pmulhw mm6, mm7 + movq mm5, mm1 + pmulhw mm1, mm2 + movq mm3, [edx + 18H] + pmulhw mm5, mm7 + movq mm0, [eax] + paddw mm4, mm2 + paddw mm6, mm7 + paddw mm2, mm1 + movq mm1, [edx + 78H] + paddw mm7, mm5 + movq mm5, mm0 + pmulhw mm0, mm3 + paddw mm4, mm7 + pmulhw mm5, mm1 + movq mm7, [eax + 30H] + psubw mm6, mm2 + paddw mm0, mm3 + pmulhw mm3, mm7 + movq mm2, [edx + 28H] + pmulhw mm7, mm1 + paddw mm5, mm1 + movq mm1, mm2 + pmulhw mm2, [eax + 08H] + psubw mm3, mm5 + movq mm5, [edx + 68H] + paddw mm0, mm7 + movq mm7, mm5 + psubw mm0, mm4 + pmulhw mm5, [eax + 08H] + paddw mm2, mm1 + pmulhw mm1, [eax + 28H] + paddw mm4, mm4 + paddw mm4, mm0 + psubw mm3, mm6 + paddw mm5, mm7 + paddw mm6, mm6 + pmulhw mm7, [eax + 28H] + paddw mm6, mm3 + movq [edx + 18H], mm4 + psubw mm1, mm5 + movq mm4, [eax + 18H] + movq mm5, mm3 + pmulhw mm3, mm4 + paddw mm7, mm2 + movq [edx + 28H], mm6 + movq mm2, mm0 + movq mm6, [edx + 08H] + pmulhw mm0, mm4 + paddw mm5, mm3 + movq mm3, [edx + 48H] + psubw mm5, mm1 + paddw mm2, mm0 + psubw mm6, mm3 + movq mm0, mm6 + pmulhw mm6, mm4 + paddw mm3, mm3 + paddw mm1, mm1 + paddw mm3, mm0 + paddw mm1, mm5 + pmulhw mm4, mm3 + paddw mm6, mm0 + psubw mm6, mm2 + paddw mm2, mm2 + movq mm0, [edx + 18H] + paddw mm2, mm6 + paddw mm4, mm3 + psubw mm2, mm1 + paddw mm2, [eax + 38H] + paddw mm1, mm1 + paddw mm1, mm2 + psraw mm2, 4 + psubw mm4, mm7 + psraw mm1, 4 + movq mm3, [edx + 28H] + paddw mm7, mm7 + movq [edx + 28H], mm2 + paddw mm7, mm4 + movq [edx + 18H], mm1 + psubw mm4, mm3 + paddw mm4, [eax + 38H] + paddw mm3, mm3 + paddw mm3, mm4 + psraw mm4, 4 + psubw mm6, mm5 + psraw mm3, 4 + paddw mm6, [eax + 38H] + paddw mm5, mm5 + paddw mm5, mm6 + psraw mm6, 4 + movq [edx + 48H], mm4 + psraw mm5, 4 + movq [edx + 38H], mm3 + psubw mm7, mm0 + paddw mm7, [eax + 38H] + paddw mm0, mm0 + paddw mm0, mm7 + psraw mm7, 4 + movq [edx + 68H], mm6 + psraw mm0, 4 + movq [edx + 58H], mm5 + movq [edx + 78H], mm7 + movq [edx + 08H], mm0 + /* emms */ + } +} + +#endif diff --git a/media/libtheora/lib/dec/x86_vc/mmxloopfilter.c b/media/libtheora/lib/dec/x86_vc/mmxloopfilter.c new file mode 100644 index 000000000000..62d06dc89ebb --- /dev/null +++ b/media/libtheora/lib/dec/x86_vc/mmxloopfilter.c @@ -0,0 +1,377 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: + + ********************************************************************/ + +/* ------------------------------------------------------------------- + MMX based loop filter for the theora codec. + + Originally written by Rudolf Marek, based on code from On2's VP3. + Converted to Visual Studio inline assembly by Nils Pipenbrinck. + + Note: I can't test these since my example files never get into the + loop filters, but the code has been converted semi-automatic from + the GCC sources, so it ought to work. + ---------------------------------------------------------------------*/ +#include "../../internal.h" +#include "x86int.h" +#include + +#if defined(USE_ASM) + + + +static void loop_filter_v(unsigned char *_pix,int _ystride, + const ogg_int16_t *_ll){ + _asm { + mov eax, [_pix] + mov edx, [_ystride] + mov ebx, [_ll] + + /* _pix -= ystride */ + sub eax, edx + /* mm0=0 */ + pxor mm0, mm0 + /* _pix -= ystride */ + sub eax, edx + /* esi=_ystride*3 */ + lea esi, [edx + edx*2] + + /* mm7=_pix[0...8]*/ + movq mm7, [eax] + /* mm4=_pix[0...8+_ystride*3]*/ + movq mm4, [eax + esi] + /* mm6=_pix[0...8]*/ + movq mm6, mm7 + /* Expand unsigned _pix[0...3] to 16 bits.*/ + punpcklbw mm6, mm0 + movq mm5, mm4 + /* Expand unsigned _pix[4...7] to 16 bits.*/ + punpckhbw mm7, mm0 + punpcklbw mm4, mm0 + /* Expand other arrays too.*/ + punpckhbw mm5, mm0 + /*mm7:mm6=_p[0...7]-_p[0...7+_ystride*3]:*/ + psubw mm6, mm4 + psubw mm7, mm5 + /*mm5=mm4=_pix[0...7+_ystride]*/ + movq mm4, [eax + edx] + /*mm1=mm3=mm2=_pix[0..7]+_ystride*2]*/ + movq mm2, [eax + edx*2] + movq mm5, mm4 + movq mm3, mm2 + movq mm1, mm2 + /*Expand these arrays.*/ + punpckhbw mm5, mm0 + punpcklbw mm4, mm0 + punpckhbw mm3, mm0 + punpcklbw mm2, mm0 + pcmpeqw mm0, mm0 + /*mm0=3 3 3 3 + mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/ + psubw mm3, mm5 + psrlw mm0, 14 + psubw mm2, mm4 + /*Scale by 3.*/ + pmullw mm3, mm0 + pmullw mm2, mm0 + /*mm0=4 4 4 4 + f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+ + 3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/ + psrlw mm0, 1 + paddw mm3, mm7 + psllw mm0, 2 + paddw mm2, mm6 + /*Add 4.*/ + paddw mm3, mm0 + paddw mm2, mm0 + /*"Divide" by 8.*/ + psraw mm3, 3 + psraw mm2, 3 + /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/ + /*Free up mm5.*/ + packuswb mm4, mm5 + /*mm0=L L L L*/ + movq mm0, [ebx] + /*if(R_i<-2L||R_i>2L)R_i=0:*/ + movq mm5, mm2 + pxor mm6, mm6 + movq mm7, mm0 + psubw mm6, mm0 + psllw mm7, 1 + psllw mm6, 1 + /*mm2==R_3 R_2 R_1 R_0*/ + /*mm5==R_3 R_2 R_1 R_0*/ + /*mm6==-2L -2L -2L -2L*/ + /*mm7==2L 2L 2L 2L*/ + pcmpgtw mm7, mm2 + pcmpgtw mm5, mm6 + pand mm2, mm7 + movq mm7, mm0 + pand mm2, mm5 + psllw mm7, 1 + movq mm5, mm3 + /*mm3==R_7 R_6 R_5 R_4*/ + /*mm5==R_7 R_6 R_5 R_4*/ + /*mm6==-2L -2L -2L -2L*/ + /*mm7==2L 2L 2L 2L*/ + pcmpgtw mm7, mm3 + pcmpgtw mm5, mm6 + pand mm3, mm7 + movq mm7, mm0 + pand mm3, mm5 + /*if(R_i<-L)R_i'=R_i+2L; + if(R_i>L)R_i'=R_i-2L; + if(R_i<-L||R_i>L)R_i=-R_i':*/ + psraw mm6, 1 + movq mm5, mm2 + psllw mm7, 1 + /*mm2==R_3 R_2 R_1 R_0*/ + /*mm5==R_3 R_2 R_1 R_0*/ + /*mm6==-L -L -L -L*/ + /*mm0==L L L L*/ + /*mm5=R_i>L?FF:00*/ + pcmpgtw mm5, mm0 + /*mm6=-L>R_i?FF:00*/ + pcmpgtw mm6, mm2 + /*mm7=R_i>L?2L:0*/ + pand mm7, mm5 + /*mm2=R_i>L?R_i-2L:R_i*/ + psubw mm2, mm7 + movq mm7, mm0 + /*mm5=-L>R_i||R_i>L*/ + por mm5, mm6 + psllw mm7, 1 + /*mm7=-L>R_i?2L:0*/ + pand mm7, mm6 + pxor mm6, mm6 + /*mm2=-L>R_i?R_i+2L:R_i*/ + paddw mm2, mm7 + psubw mm6, mm0 + /*mm5=-L>R_i||R_i>L?-R_i':0*/ + pand mm5, mm2 + movq mm7, mm0 + /*mm2=-L>R_i||R_i>L?0:R_i*/ + psubw mm2, mm5 + psllw mm7, 1 + /*mm2=-L>R_i||R_i>L?-R_i':R_i*/ + psubw mm2, mm5 + movq mm5, mm3 + /*mm3==R_7 R_6 R_5 R_4*/ + /*mm5==R_7 R_6 R_5 R_4*/ + /*mm6==-L -L -L -L*/ + /*mm0==L L L L*/ + /*mm6=-L>R_i?FF:00*/ + pcmpgtw mm6, mm3 + /*mm5=R_i>L?FF:00*/ + pcmpgtw mm5, mm0 + /*mm7=R_i>L?2L:0*/ + pand mm7, mm5 + /*mm2=R_i>L?R_i-2L:R_i*/ + psubw mm3, mm7 + psllw mm0, 1 + /*mm5=-L>R_i||R_i>L*/ + por mm5, mm6 + /*mm0=-L>R_i?2L:0*/ + pand mm0, mm6 + /*mm3=-L>R_i?R_i+2L:R_i*/ + paddw mm3, mm0 + /*mm5=-L>R_i||R_i>L?-R_i':0*/ + pand mm5, mm3 + /*mm2=-L>R_i||R_i>L?0:R_i*/ + psubw mm3, mm5 + /*mm3=-L>R_i||R_i>L?-R_i':R_i*/ + psubw mm3, mm5 + /*Unfortunately, there's no unsigned byte+signed byte with unsigned + saturation op code, so we have to promote things back 16 bits.*/ + pxor mm0, mm0 + movq mm5, mm4 + punpcklbw mm4, mm0 + punpckhbw mm5, mm0 + movq mm6, mm1 + punpcklbw mm1, mm0 + punpckhbw mm6, mm0 + /*_pix[0...8+_ystride]+=R_i*/ + paddw mm4, mm2 + paddw mm5, mm3 + /*_pix[0...8+_ystride*2]-=R_i*/ + psubw mm1, mm2 + psubw mm6, mm3 + packuswb mm4, mm5 + packuswb mm1, mm6 + /*Write it back out.*/ + movq [eax + edx], mm4 + movq [eax + edx*2], mm1 + } +} + +/*This code implements the bulk of loop_filter_h(). + Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all + four p0's to one register we must transpose the values in four mmx regs. + When half is done we repeat this for the rest.*/ +static void loop_filter_h4(unsigned char *_pix,long _ystride, + const ogg_int16_t *_ll){ + /* todo: merge the comments from the GCC sources */ + _asm { + mov ecx, [_pix] + mov edx, [_ystride] + mov eax, [_ll] + /*esi=_ystride*3*/ + lea esi, [edx + edx*2] + + movd mm0, dword ptr [ecx] + movd mm1, dword ptr [ecx + edx] + movd mm2, dword ptr [ecx + edx*2] + movd mm3, dword ptr [ecx + esi] + punpcklbw mm0, mm1 + punpcklbw mm2, mm3 + movq mm1, mm0 + punpckhwd mm0, mm2 + punpcklwd mm1, mm2 + pxor mm7, mm7 + movq mm5, mm1 + punpcklbw mm1, mm7 + punpckhbw mm5, mm7 + movq mm3, mm0 + punpcklbw mm0, mm7 + punpckhbw mm3, mm7 + psubw mm1, mm3 + movq mm4, mm0 + pcmpeqw mm2, mm2 + psubw mm0, mm5 + psrlw mm2, 14 + pmullw mm0, mm2 + psrlw mm2, 1 + paddw mm0, mm1 + psllw mm2, 2 + paddw mm0, mm2 + psraw mm0, 3 + movq mm6, qword ptr [eax] + movq mm1, mm0 + pxor mm2, mm2 + movq mm3, mm6 + psubw mm2, mm6 + psllw mm3, 1 + psllw mm2, 1 + pcmpgtw mm3, mm0 + pcmpgtw mm1, mm2 + pand mm0, mm3 + pand mm0, mm1 + psraw mm2, 1 + movq mm1, mm0 + movq mm3, mm6 + pcmpgtw mm2, mm0 + pcmpgtw mm1, mm6 + psllw mm3, 1 + psllw mm6, 1 + pand mm3, mm1 + pand mm6, mm2 + psubw mm0, mm3 + por mm1, mm2 + paddw mm0, mm6 + pand mm1, mm0 + psubw mm0, mm1 + psubw mm0, mm1 + paddw mm5, mm0 + psubw mm4, mm0 + packuswb mm5, mm7 + packuswb mm4, mm7 + punpcklbw mm5, mm4 + movd edi, mm5 + mov word ptr [ecx + 01H], di + psrlq mm5, 32 + shr edi, 16 + mov word ptr [ecx + edx + 01H], di + movd edi, mm5 + mov word ptr [ecx + edx*2 + 01H], di + shr edi, 16 + mov word ptr [ecx + esi + 01H], di + } +} + +static void loop_filter_h(unsigned char *_pix,int _ystride, + const ogg_int16_t *_ll){ + _pix-=2; + loop_filter_h4(_pix,_ystride,_ll); + loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll); +} + + +/*We copy the whole function because the MMX routines will be inlined 4 times, + and we can do just a single emms call at the end this way. + We also do not use the _bv lookup table, instead computing the values that + would lie in it on the fly.*/ + +/*Apply the loop filter to a given set of fragment rows in the given plane. + The filter may be run on the bottom edge, affecting pixels in the next row of + fragments, so this row also needs to be available. + _bv: The bounding values array. + _refi: The index of the frame buffer to filter. + _pli: The color plane to filter. + _fragy0: The Y coordinate of the first fragment row to filter. + _fragy_end: The Y coordinate of the fragment row to stop filtering at.*/ +void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv, + int _refi,int _pli,int _fragy0,int _fragy_end){ + ogg_int16_t __declspec(align(8)) ll[4]; + th_img_plane *iplane; + oc_fragment_plane *fplane; + oc_fragment *frag_top; + oc_fragment *frag0; + oc_fragment *frag; + oc_fragment *frag_end; + oc_fragment *frag0_end; + oc_fragment *frag_bot; + ll[0]=ll[1]=ll[2]=ll[3]= + (ogg_int16_t)_state->loop_filter_limits[_state->qis[0]]; + iplane=_state->ref_frame_bufs[_refi]+_pli; + fplane=_state->fplanes+_pli; + /*The following loops are constructed somewhat non-intuitively on purpose. + The main idea is: if a block boundary has at least one coded fragment on + it, the filter is applied to it. + However, the order that the filters are applied in matters, and VP3 chose + the somewhat strange ordering used below.*/ + frag_top=_state->frags+fplane->froffset; + frag0=frag_top+_fragy0*fplane->nhfrags; + frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags; + frag_bot=_state->frags+fplane->froffset+fplane->nfrags; + while(frag0nhfrags; + while(fragcoded){ + if(frag>frag0){ + loop_filter_h(frag->buffer[_refi],iplane->stride,ll); + } + if(frag0>frag_top){ + loop_filter_v(frag->buffer[_refi],iplane->stride,ll); + } + if(frag+1coded){ + loop_filter_h(frag->buffer[_refi]+8,iplane->stride,ll); + } + if(frag+fplane->nhfragsnhfrags)->coded){ + loop_filter_v((frag+fplane->nhfrags)->buffer[_refi], + iplane->stride,ll); + } + } + frag++; + } + frag0+=fplane->nhfrags; + } + + /*This needs to be removed when decode specific functions are implemented:*/ + _mm_empty(); +} + +#endif diff --git a/media/libtheora/lib/dec/x86_vc/mmxstate.c b/media/libtheora/lib/dec/x86_vc/mmxstate.c new file mode 100644 index 000000000000..526ef53f38ac --- /dev/null +++ b/media/libtheora/lib/dec/x86_vc/mmxstate.c @@ -0,0 +1,189 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: mmxstate.c 15400 2008-10-15 12:10:58Z tterribe $ + + ********************************************************************/ + +/* ------------------------------------------------------------------------ + MMX acceleration of complete fragment reconstruction algorithm. + Originally written by Rudolf Marek. + + Conversion to MSC intrinsics by Nils Pipenbrinck. + ---------------------------------------------------------------------*/ +#if defined(USE_ASM) + +#include "../../internal.h" +#include "../idct.h" +#include "x86int.h" +#include + +static const unsigned char OC_FZIG_ZAGMMX[64]= +{ + 0, 8, 1, 2, 9,16,24,17, + 10, 3,32,11,18,25, 4,12, + 5,26,19,40,33,34,41,48, + 27, 6,13,20,28,21,14, 7, + 56,49,42,35,43,50,57,36, + 15,22,29,30,23,44,37,58, + 51,59,38,45,52,31,60,53, + 46,39,47,54,61,62,55,63 +}; + +/* Fill a block with value */ +static __inline void loc_fill_mmx_value (__m64 * _dst, __m64 _value){ + __m64 t = _value; + _dst[0] = t; _dst[1] = t; _dst[2] = t; _dst[3] = t; + _dst[4] = t; _dst[5] = t; _dst[6] = t; _dst[7] = t; + _dst[8] = t; _dst[9] = t; _dst[10] = t; _dst[11] = t; + _dst[12] = t; _dst[13] = t; _dst[14] = t; _dst[15] = t; +} + +/* copy a block of 8 byte elements using different strides */ +static __inline void loc_blockcopy_mmx (unsigned char * _dst, int _dst_ystride, + unsigned char * _src, int _src_ystride){ + __m64 a,b,c,d,e,f,g,h; + a = *(__m64*)(_src + 0 * _src_ystride); + b = *(__m64*)(_src + 1 * _src_ystride); + c = *(__m64*)(_src + 2 * _src_ystride); + d = *(__m64*)(_src + 3 * _src_ystride); + e = *(__m64*)(_src + 4 * _src_ystride); + f = *(__m64*)(_src + 5 * _src_ystride); + g = *(__m64*)(_src + 6 * _src_ystride); + h = *(__m64*)(_src + 7 * _src_ystride); + *(__m64*)(_dst + 0 * _dst_ystride) = a; + *(__m64*)(_dst + 1 * _dst_ystride) = b; + *(__m64*)(_dst + 2 * _dst_ystride) = c; + *(__m64*)(_dst + 3 * _dst_ystride) = d; + *(__m64*)(_dst + 4 * _dst_ystride) = e; + *(__m64*)(_dst + 5 * _dst_ystride) = f; + *(__m64*)(_dst + 6 * _dst_ystride) = g; + *(__m64*)(_dst + 7 * _dst_ystride) = h; +} + +void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag, + int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs, + ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){ + ogg_int16_t __declspec(align(16)) res_buf[64]; + int dst_framei; + int dst_ystride; + int zzi; + /*_last_zzi is subtly different from an actual count of the number of + coefficients we decoded for this block. + It contains the value of zzi BEFORE the final token in the block was + decoded. + In most cases this is an EOB token (the continuation of an EOB run from a + previous block counts), and so this is the same as the coefficient count. + However, in the case that the last token was NOT an EOB token, but filled + the block up with exactly 64 coefficients, _last_zzi will be less than 64. + Provided the last token was not a pure zero run, the minimum value it can + be is 46, and so that doesn't affect any of the cases in this routine. + However, if the last token WAS a pure zero run of length 63, then _last_zzi + will be 1 while the number of coefficients decoded is 64. + Thus, we will trigger the following special case, where the real + coefficient count would not. + Note also that a zero run of length 64 will give _last_zzi a value of 0, + but we still process the DC coefficient, which might have a non-zero value + due to DC prediction. + Although convoluted, this is arguably the correct behavior: it allows us to + dequantize fewer coefficients and use a smaller transform when the block + ends with a long zero run instead of a normal EOB token. + It could be smarter... multiple separate zero runs at the end of a block + will fool it, but an encoder that generates these really deserves what it + gets. + Needless to say we inherited this approach from VP3.*/ + /*Special case only having a DC component.*/ + if(_last_zzi<2){ + __m64 p; + /*Why is the iquant product rounded in this case and no others? Who knows.*/ + p = _m_from_int((ogg_int32_t)_frag->dc*_dc_iquant+15>>5); + /* broadcast 16 bits into all 4 mmx subregisters */ + p = _m_punpcklwd (p,p); + p = _m_punpckldq (p,p); + loc_fill_mmx_value ((__m64 *)res_buf, p); + } + else{ + /*Then, fill in the remainder of the coefficients with 0's, and perform + the iDCT.*/ + /*First zero the buffer.*/ + /*On K7, etc., this could be replaced with movntq and sfence.*/ + loc_fill_mmx_value ((__m64 *)res_buf, _mm_setzero_si64()); + + res_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant); + /*This is planned to be rewritten in MMX.*/ + for(zzi=1;zzi<_ncoefs;zzi++) + { + int ci; + ci=OC_FZIG_ZAG[zzi]; + res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]* + _ac_iquant[ci]); + } + + if(_last_zzi<10){ + oc_idct8x8_10_mmx(res_buf); + } + else { + oc_idct8x8_mmx(res_buf); + } + } + /*Fill in the target buffer.*/ + dst_framei=_state->ref_frame_idx[OC_FRAME_SELF]; + dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride; + /*For now ystride values in all ref frames assumed to be equal.*/ + if(_frag->mbmode==OC_MODE_INTRA){ + oc_frag_recon_intra_mmx(_frag->buffer[dst_framei],dst_ystride,res_buf); + } + else{ + int ref_framei; + int ref_ystride; + int mvoffsets[2]; + ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]]; + ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].stride; + if(oc_state_get_mv_offsets(_state,mvoffsets,_frag->mv[0], + _frag->mv[1],ref_ystride,_pli)>1){ + oc_frag_recon_inter2_mmx(_frag->buffer[dst_framei],dst_ystride, + _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride, + _frag->buffer[ref_framei]+mvoffsets[1],ref_ystride,res_buf); + } + else{ + oc_frag_recon_inter_mmx(_frag->buffer[dst_framei],dst_ystride, + _frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,res_buf); + } + } + + _mm_empty(); +} + + +void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis, + int _nfragis,int _dst_frame,int _src_frame,int _pli){ + const int *fragi; + const int *fragi_end; + int dst_framei; + int dst_ystride; + int src_framei; + int src_ystride; + dst_framei=_state->ref_frame_idx[_dst_frame]; + src_framei=_state->ref_frame_idx[_src_frame]; + dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride; + src_ystride=_state->ref_frame_bufs[src_framei][_pli].stride; + fragi_end=_fragis+_nfragis; + for(fragi=_fragis;fragifrags+*fragi; + loc_blockcopy_mmx (frag->buffer[dst_framei], dst_ystride, + frag->buffer[src_framei], src_ystride); + } + _m_empty(); +} + +#endif diff --git a/media/libtheora/lib/dec/x86_vc/x86int.h b/media/libtheora/lib/dec/x86_vc/x86int.h new file mode 100644 index 000000000000..be5016100149 --- /dev/null +++ b/media/libtheora/lib/dec/x86_vc/x86int.h @@ -0,0 +1,49 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86int.h 15400 2008-10-15 12:10:58Z tterribe $ + + ********************************************************************/ + +#if !defined(_x86_x86int_vc_H) +# define _x86_x86int_vc_H (1) +# include "../../internal.h" + +void oc_state_vtable_init_x86(oc_theora_state *_state); + +void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride, + const ogg_int16_t *_residue); + +void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride, + const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue); + +void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride, + const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2, + int _src2_ystride,const ogg_int16_t *_residue); + +void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis, + int _nfragis,int _dst_frame,int _src_frame,int _pli); + +void oc_restore_fpu_mmx(void); + +void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag, + int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs, + ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]); + +void oc_idct8x8_mmx(ogg_int16_t _y[64]); +void oc_idct8x8_10_mmx(ogg_int16_t _y[64]); + +void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv, + int _refi,int _pli,int _fragy0,int _fragy_end); + +#endif diff --git a/media/libtheora/lib/dec/x86_vc/x86state.c b/media/libtheora/lib/dec/x86_vc/x86state.c new file mode 100644 index 000000000000..735390823f47 --- /dev/null +++ b/media/libtheora/lib/dec/x86_vc/x86state.c @@ -0,0 +1,41 @@ +/******************************************************************** + * * + * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. * + * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * + * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * + * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * + * * + * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * + * * + ******************************************************************** + + function: + last mod: $Id: x86state.c 15427 2008-10-21 02:36:19Z xiphmont $ + + ********************************************************************/ + +#if defined(USE_ASM) + +#include "x86int.h" +#include "../../cpu.c" + +void oc_state_vtable_init_x86(oc_theora_state *_state){ + _state->cpu_flags=oc_cpu_flags_get(); + + /* fill with defaults */ + oc_state_vtable_init_c(_state); + + /* patch MMX functions */ + if(_state->cpu_flags&OC_CPU_X86_MMX){ + _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx; + _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx; + _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx; + _state->opt_vtable.restore_fpu=oc_restore_fpu_mmx; + _state->opt_vtable.state_frag_copy=oc_state_frag_copy_mmx; + _state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx; + _state->opt_vtable.state_loop_filter_frag_rows=oc_state_loop_filter_frag_rows_mmx; + } +} + +#endif diff --git a/media/libtheora/lib/internal.h b/media/libtheora/lib/internal.h index a8721fec1976..0413a355aaa5 100644 --- a/media/libtheora/lib/internal.h +++ b/media/libtheora/lib/internal.h @@ -6,12 +6,12 @@ * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 * - * by the Xiph.Org Foundation http://www.xiph.org/ * + * by the Xiph.Org Foundation and contributors http://www.xiph.org/ * * * ******************************************************************** function: - last mod: $Id: internal.h 14714 2008-04-12 01:04:43Z giles $ + last mod: $Id: internal.h 15469 2008-10-30 12:49:42Z tterribe $ ********************************************************************/ @@ -27,19 +27,6 @@ # include "dec/huffman.h" # include "dec/quant.h" -/* debug macros */ -#if defined(_MSC_VER) && _MSC_VER < 1400 -static const dframe = 0; -static void TH_DEBUG(const char *fmt, ...) {} -#elif defined(_TH_DEBUG_) -#include -extern long dframe; -extern FILE *debugout; -#define TH_DEBUG(...) fprintf(debugout, __VA_ARGS__) -#else -#define TH_DEBUG(...) -#endif - /*Thank you Microsoft, I know the order of operations.*/ # if defined(_MSC_VER) # pragma warning(disable:4554) /* order of operations */ @@ -47,7 +34,7 @@ extern FILE *debugout; # endif /*This library's version.*/ -# define OC_VENDOR_STRING "Xiph.Org libTheora I 20071025 3 2 1" +# define OC_VENDOR_STRING "Xiph.Org libTheora I 20081020 3 2 1" /*Theora bitstream version.*/ # define TH_VERSION_MAJOR (3) @@ -224,10 +211,14 @@ typedef struct{ unsigned invalid:1; /*The quality index used for this fragment's AC coefficients.*/ unsigned qi:6; - /*The mode of the macroblock this fragment belongs to.*/ - int mbmode:8; - /*The prediction-corrected DC component.*/ - int dc:16; + /*The mode of the macroblock this fragment belongs to. + Note that the C standard requires an explicit signed keyword for bitfield + types, since some compilers may treat them as unsigned without it.*/ + signed int mbmode:8; + /*The prediction-corrected DC component. + Note that the C standard requires an explicit signed keyword for bitfield + types, since some compilers may treat them as unsigned without it.*/ + signed int dc:16; /*A pointer to the portion of an image covered by this fragment in several images. The first three are reconstructed frame buffers, while the last is the @@ -241,14 +232,6 @@ typedef struct{ oc_border_info *border; /*The motion vector used for this fragment.*/ oc_mv mv; - -#ifdef _TH_DEBUG_ - int quant[64]; - int freq[64]; - int time[64]; - int recon[64]; - int loop[64]; -#endif }oc_fragment; @@ -299,77 +282,77 @@ typedef struct{ /*Common state information between the encoder and decoder.*/ struct oc_theora_state{ /*The stream information.*/ - th_info info; + th_info info; /*Table for shared accelerated functions.*/ - oc_base_opt_vtable opt_vtable; + oc_base_opt_vtable opt_vtable; /*CPU flags to detect the presence of extended instruction sets.*/ - ogg_uint32_t cpu_flags; + ogg_uint32_t cpu_flags; /*The fragment plane descriptions.*/ - oc_fragment_plane fplanes[3]; + oc_fragment_plane fplanes[3]; /*The total number of fragments in a single frame.*/ - int nfrags; + int nfrags; /*The list of fragments, indexed in image order.*/ - oc_fragment *frags; + oc_fragment *frags; /*The total number of super blocks in a single frame.*/ - int nsbs; + int nsbs; /*The list of super blocks, indexed in image order.*/ - oc_sb *sbs; + oc_sb *sbs; /*The number of macro blocks in the X direction.*/ - int nhmbs; + int nhmbs; /*The number of macro blocks in the Y direction.*/ - int nvmbs; + int nvmbs; /*The total number of macro blocks.*/ - int nmbs; + int nmbs; /*The list of macro blocks, indexed in super block order. That is, the macro block corresponding to the macro block mbi in (luma plane) super block sbi is (sbi<<2|mbi).*/ - oc_mb *mbs; + oc_mb *mbs; /*The list of coded fragments, in coded order.*/ - int *coded_fragis; + int *coded_fragis; /*The number of coded fragments in each plane.*/ - int ncoded_fragis[3]; + int ncoded_fragis[3]; /*The list of uncoded fragments. This just past the end of the list, which is in reverse order, and uses the same block of allocated storage as the coded_fragis list.*/ - int *uncoded_fragis; + int *uncoded_fragis; /*The number of uncoded fragments in each plane.*/ - int nuncoded_fragis[3]; + int nuncoded_fragis[3]; /*The list of coded macro blocks in the Y plane, in coded order.*/ - int *coded_mbis; + int *coded_mbis; /*The number of coded macro blocks in the Y plane.*/ - int ncoded_mbis; + int ncoded_mbis; /*A copy of the image data used to fill the input pointers in each fragment. If the data pointers or strides change, these input pointers must be re-populated.*/ - th_ycbcr_buffer input; + th_ycbcr_buffer input; /*The number of unique border patterns.*/ - int nborders; + int nborders; /*The storage for the border info for all border fragments. This data is pointed to from the appropriate fragments.*/ - oc_border_info borders[16]; + oc_border_info borders[16]; /*The index of the buffers being used for each OC_FRAME_* reference frame.*/ - int ref_frame_idx[3]; + int ref_frame_idx[3]; /*The actual buffers used for the previously decoded frames.*/ - th_ycbcr_buffer ref_frame_bufs[3]; + th_ycbcr_buffer ref_frame_bufs[3]; /*The storage for the reference frame buffers.*/ - unsigned char *ref_frame_data; + unsigned char *ref_frame_data; /*The frame number of the last keyframe.*/ - ogg_int64_t keyframe_num; + ogg_int64_t keyframe_num; /*The frame number of the current frame.*/ - ogg_int64_t curframe_num; + ogg_int64_t curframe_num; /*The granpos of the current frame.*/ - ogg_int64_t granpos; + ogg_int64_t granpos; /*The type of the current frame.*/ - int frame_type; + int frame_type; /*The quality indices of the current frame.*/ - int qis[3]; + int qis[3]; /*The number of quality indices used in the current frame.*/ - int nqis; + int nqis; /*The dequantization tables.*/ - oc_quant_table *dequant_tables[2][3]; - oc_quant_tables dequant_table_data[2][3]; + oc_quant_table *dequant_tables[2][3]; + oc_quant_tables dequant_table_data[2][3]; /*Loop filter strength parameters.*/ - unsigned char loop_filter_limits[64]; + unsigned char loop_filter_limits[64]; }; diff --git a/media/libtheora/update.sh b/media/libtheora/update.sh index c60c4e1b106a..5512c5b64f76 100644 --- a/media/libtheora/update.sh +++ b/media/libtheora/update.sh @@ -22,10 +22,15 @@ cp $1/lib/dec/x86/x86state.c ./lib/dec/x86/x86state.c cp $1/lib/dec/x86/x86int.h ./lib/dec/x86/x86int.h cp $1/lib/dec/x86/mmxstate.c ./lib/dec/x86/mmxstate.c cp $1/lib/dec/x86/mmxidct.c ./lib/dec/x86/mmxidct.c -cp $1/lib/dec/bitwise.h ./lib/dec/bitwise.h +cp $1/lib/dec/x86_vc/mmxfrag.c ./lib/dec/x86_vc/mmxfrag.c +cp $1/lib/dec/x86_vc/mmxidct.c ./lib/dec/x86_vc/mmxidct.c +cp $1/lib/dec/x86_vc/mmxloopfilter.c ./lib/dec/x86_vc/mmxloopfilter.c +cp $1/lib/dec/x86_vc/mmxstate.c ./lib/dec/x86_vc/mmxstate.c +cp $1/lib/dec/x86_vc/x86int.h ./lib/dec/x86_vc/x86int.h +cp $1/lib/dec/x86_vc/x86state.c ./lib/dec/x86_vc/x86state.c +cp $1/lib/dec/bitpack.h ./lib/dec/bitpack.h cp $1/lib/dec/quant.c ./lib/dec/quant.c -cp $1/lib/dec/bitwise.c ./lib/dec/bitwise.c -cp $1/lib/dec/enquant.h ./lib/dec/enquant.h +cp $1/lib/dec/bitpack.c ./lib/dec/bitpack.c cp $1/lib/dec/internal.c ./lib/dec/internal.c cp $1/lib/dec/huffdec.h ./lib/dec/huffdec.h cp $1/lib/dec/dct.h ./lib/dec/dct.h @@ -47,4 +52,4 @@ cp $1/lib/internal.h ./lib/internal.h cp $1/include/theora/theora.h ./include/theora/theora.h cp $1/include/theora/theoradec.h ./include/theora/theoradec.h cp $1/include/theora/codec.h ./include/theora/codec.h -patch -p3