Bug 462082 - Update libtheora to latest stable version - rs=roc

This commit is contained in:
Chris Double 2009-01-22 13:00:49 +13:00
Родитель b4764201b3
Коммит 82f24567c3
54 изменённых файлов: 2739 добавлений и 1287 удалений

Просмотреть файл

@ -0,0 +1,40 @@
diff --git a/media/libtheora/lib/dec/decode.c b/media/libtheora/lib/dec/decode.c
--- a/media/libtheora/lib/dec/decode.c
+++ b/media/libtheora/lib/dec/decode.c
@@ -1803,8 +1803,8 @@
static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
int _dc_scale,int _sharp_mod,int _strong){
- static const int MOD_MAX[2]={24,32};
- static const int MOD_SHIFT[2]={1,0};
+ static const int OCDB_MOD_MAX[2]={24,32};
+ static const int OCDB_MOD_SHIFT[2]={1,0};
const unsigned char *psrc;
const unsigned char *src;
const unsigned char *nsrc;
@@ -1814,14 +1814,14 @@
int mod_hi;
int by;
int bx;
- mod_hi=OC_MINI(3*_dc_scale,MOD_MAX[_strong]);
+ mod_hi=OC_MINI(3*_dc_scale,OCDB_MOD_MAX[_strong]);
dst=_idata;
src=dst;
psrc=src-(_ystride&-!(_b&4));
for(by=0;by<9;by++){
for(bx=0;bx<8;bx++){
int mod;
- mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<MOD_SHIFT[_strong]);
+ mod=32+_dc_scale-(abs(src[bx]-psrc[bx])<<OCDB_MOD_SHIFT[_strong]);
vmod[(by<<3)+bx]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
}
psrc=src;
@@ -1833,7 +1833,7 @@
src=nsrc;
for(by=0;by<8;by++){
int mod;
- mod=32+_dc_scale-(abs(*src-*psrc)<<MOD_SHIFT[_strong]);
+ mod=32+_dc_scale-(abs(*src-*psrc)<<OCDB_MOD_SHIFT[_strong]);
hmod[(bx<<3)+by]=mod<-64?_sharp_mod:OC_CLAMPI(0,mod,mod_hi);
psrc+=_ystride;
src+=_ystride;

Просмотреть файл

@ -4,27 +4,35 @@ Monty <monty@xiph.org>
Ralph Giles
Timothy B. Terriberry
- Ongoing development
Dan B. Miller
- Pre alpha3 development
Wim Tayman
Dan Lenski
- MMX optimized functions
Aaron Colwell
Thomas Vander Stichele
Jan Gerber
Conrad Parker
- Bug fixes, enhancements, build systems.
Mauricio Piacentini
- Original win32 projects and example ports
- dump_video example
- VP3->Theora transcoder
Silvia Pfeiffer
- Figures for the spec
Vegard Nossum
- Original png2theora implementation
Rudolf Marek
Nils Pipenbrinck
- MMX optimizations
Michael Smith
Andre Pang
calc

Просмотреть файл

@ -1,3 +1,32 @@
libtheora 1.0 (2008 November 3)
- Merge x86 assembly for forward DCT from Thusnelda branch.
- Update 32 bit MMX with loop filter fix.
- Check for an uninitialized state before dereferencing in propagating
decode calls.
- Remove all TH_DEBUG statements.
- Rename the bitpacker source files copied from libogg to avoid
confusing simple build systems using both libraries.
- Declare bitfield entries to be explicitly signed for Solaris cc.
- Set quantization parameters to default values when an empty buffer is
passed with TH_ENCCTL_SET_QUANT_PARAMS.
- Split encoder and decoder tests depending on configure settings.
- Return lstylex.sty to the distribution.
- Disable inline assembly on gcc versions prior to 3.1.
- Remove extern references for OC_*_QUANT_MIN.
- Make various data tables static const so they can be read-only.
- Remove ENCCTL codes from the old encoder API.
- Implement TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE ctl.
- Fix segfault when exactly one of the width or height is not a multiple
of 16, but the other is.
- Compute the correct vertical offset for chroma.
- cpuid assembly fix for MSVC.
- Add VS2008 project files.
- Build updates for 64-bit platforms, Mingw32, VS and XCode.
- Do not clobber the cropping rectangle.
- Declare ourselves 1.0final to pkg-config to sort after beta releases.
- Fix the scons build to include asm in libtheoradec/enc.
libtheora 1.0beta3 (2008 April 16)
- Build new libtheoradec and libtheoraenc libraries
@ -5,12 +34,12 @@ libtheora 1.0beta3 (2008 April 16)
not be considered stable yet.
- Change granule_frame() to return an index as documented.
This is a change of behaviour from 1.0beta1.
- Document that granule_time() returns the end of the
presentation interval.
- Document that granule_time() returns the end of the
presentation interval.
- Use a custom copy of the libogg bitpacker in the decoder
to avoid function call overhead.
- MMX code improved and ported to MSVC.
- Fix a problem with the MMX code on SELinux
- Fix a problem with the MMX code on SELinux.
- Fix a problem with decoder quantizer initialization.
- Fix a page queue problem with png2theora.
- Improved robustness.

Просмотреть файл

@ -1,4 +1,4 @@
Copyright (C) 2002-2007 Xiph.org Foundation
Copyright (C) 2002-2008 Xiph.Org Foundation and contributors.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@ -11,7 +11,7 @@ notice, this list of conditions and the following disclaimer.
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
- Neither the name of the Xiph.Org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

Просмотреть файл

@ -1,5 +1,5 @@
-------------------------------------------------------------------------
The Xiph.org Foundation's libtheora 1.0beta1 release
The Xiph.org Foundation's libtheora 1.0 release
-------------------------------------------------------------------------
*** What is Theora?
@ -8,8 +8,8 @@ Theora is Xiph.Org's first publicly released video codec, intended
for use within the Foundation's Ogg multimedia streaming system.
Theora is derived directly from On2's VP3 codec; Currently the
encoders are nearly identical, but Theora will make use of new
features supported by the decoder to improve over what is
is possible with VP3.
features supported by the decoder to improve on what is possible
with VP3.
*** Where is Theora?
@ -18,12 +18,6 @@ can be gotten from www.theora.org or the main Xiph.Org site at
www.xiph.org. Development source is kept in an open subversion
repository, see http://theora.org/svn/ for instructions.
*** What is the goal of this release?
This is the first beta release of the 1.0 reference implementation.
It is intended to completely support the decoder specification, and
gather feedback on the implementation before declaring it stable.
-------------------------------------------------------------------------
Getting started with the code
-------------------------------------------------------------------------
@ -32,30 +26,39 @@ Getting started with the code
Requirements summary:
For libtheora:
For libtheora:
libogg 1.1 or newer.
For example encoder:
as above
as above,
libvorbis and libvorbisenc 1.0.1 or newer.
For creating a source distribution package:
as above,
Doxygen to build the API documentation,
pdflatex and fig2dev to build the format specification
(transfig package in Ubuntu).
For the player only:
as above,
as above,
SDL (Simple Direct media Layer) libraries and headers
OSS audio driver and development headers
SDL (Simple Direct media Layer) libraries and headers,
OSS audio driver and development headers.
The provided build system is the GNU automake/autoconf system, and
the main library, libtheora, should already build smoothly on any
system. Failure of libtheora to build on a GNU-enabled system is
considered a bug; please report problems to theora-dev@xiph.org.
Some windows build support is included in the win32 directory.
Windows build support is included in the win32 directory.
Project files for Apple XCode is included in the macosx directory.
There is also an experimental scons build.
@ -83,7 +86,7 @@ files in that format.
*** How do I use the sample player?
The sample player takes an Ogg file on standard in; the file may be
audio alone, video alone or video with audio.
audio alone, video alone or video with audio.
*** What other tools are available?

Просмотреть файл

@ -1,7 +1,7 @@
The source from this directory was copied from the libtheora-1.0beta3
The source from this directory was copied from the libtheora-1.0
source distribution using the update.sh script. The changes made were
those applied by update.sh, the addition/update of Makefile.in files
for the Mozilla build system and the patch in bug 450265.
for the Mozilla build system and the patch in bug below.
Bug 455357 - WinCE LibTheora Pre-defined Macro usage in local variable
455357_wince_local_variable_macro_clash_patch

Просмотреть файл

@ -1,27 +0,0 @@
Index: /trunk/theora/lib/dec/decapiwrapper.c
===================================================================
--- /trunk/theora/lib/dec/decapiwrapper.c (revision 14385)
+++ /trunk/theora/lib/dec/decapiwrapper.c (revision 15144)
@@ -168,10 +168,13 @@
ogg_int64_t gp;
int ret;
+
+ if(!_td || !_td->i || !_td->i->codec_setup)return OC_FAULT;
api=(th_api_wrapper *)_td->i->codec_setup;
+ if(!api || !api->decode)return OC_FAULT;
ret=th_decode_packetin(api->decode,_op,&gp);
#ifdef _TH_DEBUG_
dframe++;
-#endif
+#endif
if(ret<0)return OC_BADPACKET;
@@ -185,5 +188,7 @@
int ret;
+ if(!_td || !_td->i || !_td->i->codec_setup)return OC_FAULT;
api=(th_api_wrapper *)_td->i->codec_setup;
+ if(!api || !api->decode)return OC_FAULT;
ret=th_decode_ycbcr_out(api->decode,buf);
if(ret>=0){

Просмотреть файл

@ -112,13 +112,18 @@ typedef enum{
* specification</a>, Section 4.4, for details on the precise sample
* locations.*/
typedef enum{
/**Chroma decimation by 2 in both the X and Y directions (4:2:0).*/
/**Chroma decimation by 2 in both the X and Y directions (4:2:0).
The Cb and Cr chroma planes are half the width and half the height of the
luma plane.*/
TH_PF_420,
/**Currently reserved.*/
TH_PF_RSVD,
/**Chroma decimation by 2 in the X direction (4:2:2).*/
/**Chroma decimation by 2 in the X direction (4:2:2).
The Cb and Cr chroma planes are half the width of the luma plane, but full
height.*/
TH_PF_422,
/**No chroma decimation (4:4:4).*/
/**No chroma decimation (4:4:4).
The Cb and Cr chroma planes are full width and full height.*/
TH_PF_444,
/**The total number of currently defined pixel formats.*/
TH_PF_NFORMATS

Просмотреть файл

@ -56,19 +56,19 @@
#define PACKAGE_NAME "libtheora"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "libtheora 1.0beta3"
#define PACKAGE_STRING "libtheora 1.0"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "libtheora"
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.0beta3"
#define PACKAGE_VERSION "1.0"
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define to exclude encode support from the build */
/* #undef THEORA_DISABLE_ENCODE */
/* Define to exclude floating point code from the build */
/* #undef THEORA_DISABLE_FLOAT */
@ -77,4 +77,4 @@
/* Version number of package */
#define VERSION "1.0beta3"
#define VERSION "1.0"

Просмотреть файл

@ -315,6 +315,20 @@ typedef struct theora_comment{
*/
#define TH_DECCTL_SET_PPLEVEL (3)
/**Sets the maximum distance between key frames.
* This can be changed during an encode, but will be bounded by
* <tt>1<<th_info#keyframe_granule_shift</tt>.
* If it is set before encoding begins, th_info#keyframe_granule_shift will
* be enlarged appropriately.
*
* \param[in] buf <tt>ogg_uint32_t</tt>: The maximum distance between key
* frames.
* \param[out] buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
* \retval TH_FAULT \a theora_state or \a buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
* \retval TH_IMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
/**Set the granule position.
* Call this after a seek, to update the internal granulepos
* in the decoder, to insure that subsequent frames are marked
@ -332,20 +346,6 @@ typedef struct theora_comment{
* \ref decctlcodes "decoder control codes".
* Keep any experimental or vendor-specific values above \c 0x8000.*/
/*@{*/
/**Sets the Huffman tables to use.
* The tables are copied, not stored by reference, so they can be freed after
* this call.
* <tt>NULL</tt> may be specified to revert to the default tables.
*
* \param[in] buf <tt>#th_huff_code[#TH_NHUFFMAN_TABLES][#TH_NDCT_TOKENS]</tt>
* \retval TH_FAULT \a theora_state is <tt>NULL</tt>.
* \retval TH_EINVAL Encoding has already begun or one or more of the given
* tables is not full or prefix-free, \a buf is
* <tt>NULL</tt> and \a buf_sz is not zero, or \a buf is
* non-<tt>NULL</tt> and \a buf_sz is not
* <tt>sizeof(#th_huff_code)*#TH_NHUFFMAN_TABLES*#TH_NDCT_TOKENS</tt>.
* \retval TH_IMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_HUFFMAN_CODES (0)
/**Sets the quantization parameters to use.
* The parameters are copied, not stored by reference, so they can be freed
* after this call.
@ -365,19 +365,6 @@ typedef struct theora_comment{
* <tt>sizeof(#th_quant_info)</tt>.
* \retval TH_IMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_QUANT_PARAMS (2)
/**Sets the maximum distance between key frames.
* This can be changed during an encode, but will be bounded by
* <tt>1<<th_info#keyframe_granule_shift</tt>.
* If it is set before encoding begins, th_info#keyframe_granule_shift will
* be enlarged appropriately.
*
* \param[in] buf <tt>ogg_uint32_t</tt>: The maximum distance between key
* frames.
* \param[out] buf <tt>ogg_uint32_t</tt>: The actual maximum distance set.
* \retval TH_FAULT \a theora_state or \a buf is <tt>NULL</tt>.
* \retval TH_EINVAL \a buf_sz is not <tt>sizeof(ogg_uint32_t)</tt>.
* \retval TH_IMPL Not supported by this implementation.*/
#define TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE (4)
/**Disables any encoder features that would prevent lossless transcoding back
* to VP3.
* This primarily means disabling block-level QI values and not using 4MV mode
@ -434,41 +421,6 @@ typedef struct theora_comment{
* \retval TH_IMPL Not supported by this implementation in the current
* encoding mode.*/
#define TH_ENCCTL_SET_SPLEVEL (14)
/**Puts the encoder in VBR mode.
* This can be done at any time during the encoding process, with different
* configuration parameters, to encode different regions of the video segment
* with different qualities.
* See the #th_info struct documentation for details on how the default
* encoding mode is chosen.
*
* \param[in] buf <tt>#th_vbr_cfg</tt>: the configuration parameters.
* This may be <tt>NULL</tt>, in which case the current VBR
* configuration is unchanged.
* The default is to use the QI setting passed in via the
* #th_info struct when the encoder was initialized, with a
* full range of admissible quantizers.
* \retval OC_EFAULT \a theora_state is <tt>NULL</tt>.
* \retval TH_EINVAL The configuration parameters do not meet one of their
* stated requirements, \a buf is <tt>NULL</tt> and
* \a buf_sz is not zero, or \a buf is non-<tt>NULL</tt>
* and \a buf_sz is not <tt>sizeof(#th_vbr_cfg)</tt>.
* \retval TH_IMPL Not supported by this implementation.*/
#define TH_ENCCTL_SETUP_VBR (16)
/**Puts the encoder in CQI mode.
* This can be done at any time during the encoding process, with different QI
* values.
* See the #th_info struct documentation for details on how the default
* encoding mode is chosen.
*
* \param[in] buf <tt>#th_cqi_cfg</tt>: the configuration parameters.
* This may be <tt>NULL</tt>, in which case the current CQI
* configuration is unchanged.
* The default is to use the QI setting passed in via the
* #th_info struct when the encoder was initialized.
* \retval OC_EFAULT \a theora_state is <tt>NULL</tt>.
* \retval TH_EINVAL \a buf_sz is not <tt>sizeof(#th_cqi_cfg)</tt>.
* \retval TH_IMPL Not supported by this implementation.*/
#define TH_ENCCTL_SETUP_CQI (18)
/*@}*/
#define OC_FAULT -1 /**< General failure */

Просмотреть файл

@ -20,6 +20,7 @@
#if !defined(_O_THEORA_THEORADEC_H_)
# define _O_THEORA_THEORADEC_H_ (1)
# include <stddef.h>
# include <ogg/ogg.h>
# include "codec.h"
@ -213,6 +214,22 @@ typedef struct th_setup_info th_setup_info;
extern int th_decode_headerin(th_info *_info,th_comment *_tc,
th_setup_info **_setup,ogg_packet *_op);
/**Allocates a decoder instance.
*
* <b>Security Warning:</b> The Theora format supports very large frame sizes,
* potentially even larger than the address space of a 32-bit machine, and
* creating a decoder context allocates the space for several frames of data.
* If the allocation fails here, your program will crash, possibly at some
* future point because the OS kernel returned a valid memory range and will
* only fail when it tries to map the pages in it the first time they are
* used.
* Even if it succeeds, you may experience a denial of service if the frame
* size is large enough to cause excessive paging.
* If you are integrating libtheora in a larger application where such things
* are undesirable, it is highly recommended that you check the frame size in
* \a _info before calling this function and refuse to decode streams where it
* is larger than some reasonable maximum.
* libtheora will not check this for you, because there may be machines that
* can handle such streams and applications that wish to.
* \param _info A #th_info struct filled via th_decode_headerin().
* \param _setup A #th_setup_info handle returned via
* th_decode_headerin().
@ -253,7 +270,7 @@ extern int th_decode_ctl(th_dec_ctx *_dec,int _req,void *_buf,
* The player can skip the call to th_decode_ycbcr_out(),
* as the contents of the decoded frame buffer have not
* changed.
* \retval TH_EFAULT \a _dec or _op was <tt>NULL</tt>.
* \retval TH_EFAULT \a _dec or \a _op was <tt>NULL</tt>.
* \retval TH_EBADPACKET \a _op does not contain encoded video data.
* \retval TH_EIMPL The video data uses bitstream features which this
* library does not support.*/

Просмотреть файл

@ -48,12 +48,10 @@ FORCE_STATIC_LIB= 1
DEFINES += -DTHEORA_DISABLE_ENCODE
ifeq ($(findstring 86,$(OS_TEST)), 86)
ifneq ($(OS_ARCH),WINNT)
ifneq ($(OS_ARCH),SunOS)
DEFINES += -DOC_X86ASM -DUSE_ASM
endif
endif
endif
VPATH := $(srcdir) $(srcdir)/dec
@ -62,7 +60,7 @@ CSRCS = \
huffdec.c \
quant.c \
dequant.c \
bitwise.c \
bitpack.c \
internal.c \
decinfo.c \
decapiwrapper.c \
@ -75,6 +73,17 @@ CSRCS = \
$(NULL)
ifeq ($(findstring 86,$(OS_TEST)), 86)
ifeq ($(OS_ARCH),WINNT)
VPATH += $(srcdir)/dec/x86_vc
CSRCS += \
mmxfrag.c \
mmxloopfilter.c \
x86state.c \
mmxstate.c \
mmxidct.c \
$(NULL)
else
VPATH += $(srcdir)/dec/x86
CSRCS += \
@ -84,6 +93,7 @@ CSRCS += \
mmxidct.c \
$(NULL)
endif
endif
include $(topsrcdir)/config/rules.mk

Просмотреть файл

@ -56,19 +56,19 @@
#define PACKAGE_NAME "libtheora"
/* Define to the full name and version of this package. */
#define PACKAGE_STRING "libtheora 1.0beta3"
#define PACKAGE_STRING "libtheora 1.0"
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME "libtheora"
/* Define to the version of this package. */
#define PACKAGE_VERSION "1.0beta3"
#define PACKAGE_VERSION "1.0"
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Define to exclude encode support from the build */
/* #undef THEORA_DISABLE_ENCODE */
/* Define to exclude floating point code from the build */
/* #undef THEORA_DISABLE_FLOAT */
@ -77,4 +77,4 @@
/* Version number of package */
#define VERSION "1.0beta3"
#define VERSION "1.0"

Просмотреть файл

@ -6,157 +6,222 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
CPU capability detection for x86 processors.
Originally written by Rudolf Marek.
function:
last mod: $Id: cpu.c 14718 2008-04-12 08:36:58Z conrad $
last mod: $Id: cpu.c 15427 2008-10-21 02:36:19Z xiphmont $
********************************************************************/
#include "cpu.h"
#if !defined(USE_ASM)
ogg_uint32_t oc_cpu_flags_get(void){
static ogg_uint32_t oc_cpu_flags_get(void){
return 0;
}
#else
# if !defined(_MSC_VER)
# if defined(__amd64__)||defined(__x86_64__)
/*On x86-64, gcc seems to be able to figure out how to save %rbx for us when
compiling with -fPIC.*/
# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
__asm__ __volatile__( \
"cpuid\n\t" \
:[eax]"=a"(_eax),[ebx]"=b"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
:"a"(_op) \
:"cc" \
)
# else
/*On x86-32, not so much.*/
# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
__asm__ __volatile__( \
"xchgl %%ebx,%[ebx]\n\t" \
"cpuid\n\t" \
"xchgl %%ebx,%[ebx]\n\t" \
:[eax]"=a"(_eax),[ebx]"=r"(_ebx),[ecx]"=c"(_ecx),[edx]"=d"(_edx) \
:"a"(_op) \
:"cc" \
)
# endif
# else
/*Why does MSVC need this complicated rigamarole?
At this point I honestly do not care.*/
#else /* USE_ASM */
# if defined(_MSC_VER)
/* Visual C cpuid helper function. For VS2005 we could
as well use the _cpuid builtin, but that wouldn't work
for VS2003 users, so we do it in inline assembler */
static void oc_cpuid_helper (ogg_uint32_t * CpuInfo, ogg_uint32_t op){
_asm {
mov eax, [op]
mov esi, CpuInfo
/*Visual C cpuid helper function.
For VS2005 we could as well use the _cpuid builtin, but that wouldn't work
for VS2003 users, so we do it in inline assembler.*/
static void oc_cpuid_helper(ogg_uint32_t _cpu_info[4],ogg_uint32_t _op){
_asm{
mov eax,[_op]
mov esi,_cpu_info
cpuid
mov [esi + 0], eax
mov [esi + 4], ebx
mov [esi + 8], ecx
mov [esi +12], edx
mov [esi+0],eax
mov [esi+4],ebx
mov [esi+8],ecx
mov [esi+12],edx
}
}
# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
{ \
ogg_uint32_t nfo[4]; \
oc_cpuid_helper (nfo, (_op)); \
(_eax) = nfo[0],(_ebx) = nfo[1]; \
(_ecx) = nfo[2],(_edx) = nfo[3]; \
do{ \
ogg_uint32_t cpu_info[4]; \
oc_cpuid_helper(cpu_info,_op); \
(_eax)=cpu_info[0]; \
(_ebx)=cpu_info[1]; \
(_ecx)=cpu_info[2]; \
(_edx)=cpu_info[3]; \
}while(0)
static void oc_detect_cpuid_helper(ogg_uint32_t *_eax,ogg_uint32_t *_ebx){
_asm{
pushfd
pushfd
pop eax
mov ebx,eax
xor eax,200000h
push eax
popfd
pushfd
pop eax
popfd
mov ecx,_eax
mov [ecx],eax
mov ecx,_ebx
mov [ecx],ebx
}
}
# endif
# elif (defined(__amd64__) || defined(__x86_64__))
static ogg_uint32_t oc_parse_intel_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
ogg_uint32_t flags;
/*If there isn't even MMX, give up.*/
if(!(_edx&0x00800000))return 0;
flags=OC_CPU_X86_MMX;
if(_edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
if(_edx&0x04000000)flags|=OC_CPU_X86_SSE2;
if(_ecx&0x00000001)flags|=OC_CPU_X86_PNI;
if(_ecx&0x00000100)flags|=OC_CPU_X86_SSSE3;
if(_ecx&0x00080000)flags|=OC_CPU_X86_SSE4_1;
if(_ecx&0x00100000)flags|=OC_CPU_X86_SSE4_2;
return flags;
}
# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
__asm__ __volatile__( \
"push %%rbx\n\t" \
"cpuid\n\t" \
"movl %%ebx,%1\n\t" \
"pop %%rbx\n\t" \
:"=a" (_eax), \
"=r" (_ebx), \
"=c" (_ecx), \
"=d" (_edx) \
:"a" (_op) \
:"cc" \
)
# else /* x86_32, GCC */
static ogg_uint32_t oc_parse_amd_flags(ogg_uint32_t _edx,ogg_uint32_t _ecx){
ogg_uint32_t flags;
/*If there isn't even MMX, give up.*/
if(!(_edx&0x00800000))return 0;
flags=OC_CPU_X86_MMX;
if(_edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
if(_edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
if(_edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
if(_ecx&0x00000040)flags|=OC_CPU_X86_SSE4A;
if(_ecx&0x00000800)flags|=OC_CPU_X86_SSE5;
return flags;
}
# define cpuid(_op,_eax,_ebx,_ecx,_edx) \
__asm__ __volatile__( \
"pushl %%ebx\n\t" \
"cpuid\n\t" \
"movl %%ebx,%1\n\t" \
"popl %%ebx\n\t" \
:"=a" (_eax), \
"=r" (_ebx), \
"=c" (_ecx), \
"=d" (_edx) \
:"a" (_op) \
:"cc" \
)
# endif /* arch switch */
ogg_uint32_t oc_cpu_flags_get(void){
ogg_uint32_t flags = 0;
static ogg_uint32_t oc_cpu_flags_get(void){
ogg_uint32_t flags;
ogg_uint32_t eax;
ogg_uint32_t ebx;
ogg_uint32_t ecx;
ogg_uint32_t edx;
# if !defined(_MSC_VER) && !defined(__amd64__) && !defined(__x86_64__)
/* check for cpuid */
# if !defined(__amd64__)&&!defined(__x86_64__)
/*Not all x86-32 chips support cpuid, so we have to check.*/
# if !defined(_MSC_VER)
__asm__ __volatile__(
"pushfl\n\t"
"pushfl\n\t"
"popl %0\n\t"
"movl %0,%1\n\t"
"xorl $0x200000,%0\n\t"
"pushl %0\n\t"
"popl %[a]\n\t"
"movl %[a],%[b]\n\t"
"xorl $0x200000,%[a]\n\t"
"pushl %[a]\n\t"
"popfl\n\t"
"pushfl\n\t"
"popl %0\n\t"
"popl %[a]\n\t"
"popfl\n\t"
:"=r" (eax),
"=r" (ebx)
:[a]"=r"(eax),[b]"=r"(ebx)
:
:"cc"
);
# else
oc_detect_cpuid_helper(&eax,&ebx);
# endif
/*No cpuid.*/
if(eax==ebx)return 0;
# endif /* GCC, x86_32 */
# endif
cpuid(0,eax,ebx,ecx,edx);
if(ebx==0x756e6547&&edx==0x49656e69&&ecx==0x6c65746e){
/*Intel:*/
inteltest:
/* l e t n I e n i u n e G*/
if(ecx==0x6C65746E&&edx==0x49656E69&&ebx==0x756E6547||
/* 6 8 x M T e n i u n e G*/
ecx==0x3638784D&&edx==0x54656E69&&ebx==0x756E6547){
/*Intel, Transmeta (tested with Crusoe TM5800):*/
cpuid(1,eax,ebx,ecx,edx);
if((edx&0x00800000)==0)return 0;
flags=OC_CPU_X86_MMX;
if(edx&0x02000000)flags|=OC_CPU_X86_MMXEXT|OC_CPU_X86_SSE;
if(edx&0x04000000)flags|=OC_CPU_X86_SSE2;
flags=oc_parse_intel_flags(edx,ecx);
}
else if(ebx==0x68747541&&edx==0x69746e65&&ecx==0x444d4163 ||
ebx==0x646f6547&&edx==0x79622065&&ecx==0x43534e20){
/*AMD:*/
/*Geode:*/
/* D M A c i t n e h t u A*/
else if(ecx==0x444D4163&&edx==0x69746E65&&ebx==0x68747541||
/* C S N y b e d o e G*/
ecx==0x43534E20&&edx==0x79622065&&ebx==0x646F6547){
/*AMD, Geode:*/
cpuid(0x80000000,eax,ebx,ecx,edx);
if(eax<0x80000001)goto inteltest;
cpuid(0x80000001,eax,ebx,ecx,edx);
if((edx&0x00800000)==0)return 0;
flags=OC_CPU_X86_MMX;
if(edx&0x80000000)flags|=OC_CPU_X86_3DNOW;
if(edx&0x40000000)flags|=OC_CPU_X86_3DNOWEXT;
if(edx&0x00400000)flags|=OC_CPU_X86_MMXEXT;
if(eax<0x80000001)flags=0;
else{
cpuid(0x80000001,eax,ebx,ecx,edx);
flags=oc_parse_amd_flags(edx,ecx);
}
/*Also check for SSE.*/
cpuid(1,eax,ebx,ecx,edx);
flags|=oc_parse_intel_flags(edx,ecx);
}
/*Technically some VIA chips can be configured in the BIOS to return any
string here the user wants.
There is a special detection method that can be used to identify such
processors, but in my opinion, if the user really wants to change it, they
deserve what they get.*/
/* s l u a H r u a t n e C*/
else if(ecx==0x736C7561&&edx==0x48727561&&ebx==0x746E6543){
/*VIA:*/
/*I only have documentation for the C7 (Esther) and Isaiah (forthcoming)
chips (thanks to the engineers from Centaur Technology who provided it).
These chips support Intel-like cpuid info.
The C3-2 (Nehemiah) cores appear to, as well.*/
cpuid(1,eax,ebx,ecx,edx);
flags=oc_parse_intel_flags(edx,ecx);
cpuid(0x80000000,eax,ebx,ecx,edx);
if(eax>=0x80000001){
/*The (non-Nehemiah) C3 processors support AMD-like cpuid info.
We need to check this even if the Intel test succeeds to pick up 3DNow!
support on these processors.
Unlike actual AMD processors, we cannot _rely_ on this info, since
some cores (e.g., the 693 stepping of the Nehemiah) claim to support
this function, yet return edx=0, despite the Intel test indicating
MMX support.
Therefore the features detected here are strictly added to those
detected by the Intel test.*/
/*TODO: How about earlier chips?*/
cpuid(0x80000001,eax,ebx,ecx,edx);
/*Note: As of the C7, this function returns Intel-style extended feature
flags, not AMD-style.
Currently, this only defines bits 11, 20, and 29 (0x20100800), which
do not conflict with any of the AMD flags we inspect.
For the remaining bits, Intel tells us, "Do not count on their value",
but VIA assures us that they will all be zero (at least on the C7 and
Isaiah chips).
In the (unlikely) event a future processor uses bits 18, 19, 30, or 31
(0xC0C00000) for something else, we will have to add code to detect
the model to decide when it is appropriate to inspect them.*/
flags|=oc_parse_amd_flags(edx,ecx);
}
}
else{
/*Implement me.*/
flags=0;
}
# ifdef DEBUG
if (flags) {
TH_DEBUG("vectorized instruction sets supported:");
if (flags & OC_CPU_X86_MMX) TH_DEBUG(" mmx");
if (flags & OC_CPU_X86_MMXEXT) TH_DEBUG(" mmxext");
if (flags & OC_CPU_X86_SSE) TH_DEBUG(" sse");
if (flags & OC_CPU_X86_SSE2) TH_DEBUG(" sse2");
if (flags & OC_CPU_X86_3DNOW) TH_DEBUG(" 3dnow");
if (flags & OC_CPU_X86_3DNOWEXT) TH_DEBUG(" 3dnowext");
TH_DEBUG("\n");
}
# endif
return flags;
}
#endif /* USE_ASM */
#endif

Просмотреть файл

@ -6,11 +6,11 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: cpu.h 13884 2007-09-22 08:38:10Z giles $
last mod: $Id: cpu.h 15430 2008-10-21 05:03:55Z giles $
********************************************************************/
@ -18,13 +18,17 @@
# define _x86_cpu_H (1)
#include "internal.h"
#define OC_CPU_X86_MMX (1<<0)
#define OC_CPU_X86_3DNOW (1<<1)
#define OC_CPU_X86_MMX (1<<0)
#define OC_CPU_X86_3DNOW (1<<1)
#define OC_CPU_X86_3DNOWEXT (1<<2)
#define OC_CPU_X86_MMXEXT (1<<3)
#define OC_CPU_X86_SSE (1<<4)
#define OC_CPU_X86_SSE2 (1<<5)
ogg_uint32_t oc_cpu_flags_get(void);
#define OC_CPU_X86_MMXEXT (1<<3)
#define OC_CPU_X86_SSE (1<<4)
#define OC_CPU_X86_SSE2 (1<<5)
#define OC_CPU_X86_PNI (1<<6)
#define OC_CPU_X86_SSSE3 (1<<7)
#define OC_CPU_X86_SSE4_1 (1<<8)
#define OC_CPU_X86_SSE4_2 (1<<9)
#define OC_CPU_X86_SSE4A (1<<10)
#define OC_CPU_X86_SSE5 (1<<11)
#endif

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: apiwrapper.c 14321 2007-12-22 18:09:29Z tterribe $
last mod: $Id: apiwrapper.c 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/

Просмотреть файл

@ -6,7 +6,7 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************

Просмотреть файл

@ -0,0 +1,121 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function: packing variable sized words into an octet stream
last mod: $Id: bitpack.c 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/
/*We're 'MSb' endian; if we write a word but read individual bits,
then we'll read the MSb first.*/
#include <string.h>
#include <stdlib.h>
#include "bitpack.h"
void theorapackB_readinit(oggpack_buffer *_b,unsigned char *_buf,int _bytes){
memset(_b,0,sizeof(*_b));
_b->buffer=_b->ptr=_buf;
_b->storage=_bytes;
}
int theorapackB_look1(oggpack_buffer *_b,long *_ret){
if(_b->endbyte>=_b->storage){
*_ret=0L;
return -1;
}
*_ret=(_b->ptr[0]>>7-_b->endbit)&1;
return 0;
}
void theorapackB_adv1(oggpack_buffer *_b){
if(++(_b->endbit)>7){
_b->endbit=0;
_b->ptr++;
_b->endbyte++;
}
}
/*Here we assume that 0<=_bits&&_bits<=32.*/
int theorapackB_read(oggpack_buffer *_b,int _bits,long *_ret){
long ret;
long m;
long d;
int fail;
m=32-_bits;
_bits+=_b->endbit;
d=_b->storage-_b->endbyte;
if(d<=4){
/*Not the main path.*/
if(d*8<_bits){
*_ret=0L;
fail=-1;
goto overflow;
}
/*Special case to avoid reading _b->ptr[0], which might be past the end of
the buffer; also skips some useless accounting.*/
else if(!_bits){
*_ret=0L;
return 0;
}
}
ret=_b->ptr[0]<<24+_b->endbit;
if(_bits>8){
ret|=_b->ptr[1]<<16+_b->endbit;
if(_bits>16){
ret|=_b->ptr[2]<<8+_b->endbit;
if(_bits>24){
ret|=_b->ptr[3]<<_b->endbit;
if(_bits>32)ret|=_b->ptr[4]>>8-_b->endbit;
}
}
}
*_ret=((ret&0xFFFFFFFFUL)>>(m>>1))>>(m+1>>1);
fail=0;
overflow:
_b->ptr+=_bits>>3;
_b->endbyte+=_bits>>3;
_b->endbit=_bits&7;
return fail;
}
int theorapackB_read1(oggpack_buffer *_b,long *_ret){
int fail;
if(_b->endbyte>=_b->storage){
/*Not the main path.*/
*_ret=0L;
fail=-1;
}
else{
*_ret=(_b->ptr[0]>>7-_b->endbit)&1;
fail=0;
}
_b->endbit++;
if(_b->endbit>7){
_b->endbit=0;
_b->ptr++;
_b->endbyte++;
}
return fail;
}
long theorapackB_bytes(oggpack_buffer *_b){
return _b->endbyte+(_b->endbit+7>>3);
}
long theorapackB_bits(oggpack_buffer *_b){
return _b->endbyte*8+_b->endbit;
}
unsigned char *theorapackB_get_buffer(oggpack_buffer *_b){
return _b->buffer;
}

Просмотреть файл

@ -0,0 +1,38 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggTheora SOURCE CODE IS (C) COPYRIGHT 1994-2008 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function: packing variable sized words into an octet stream
last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $
********************************************************************/
#if !defined(_bitpack_H)
# define _bitpack_H (1)
# include <ogg/ogg.h>
void theorapackB_readinit(oggpack_buffer *_b,unsigned char *_buf,int _bytes);
int theorapackB_look1(oggpack_buffer *_b,long *_ret);
void theorapackB_adv1(oggpack_buffer *_b);
/*Here we assume 0<=_bits&&_bits<=32.*/
int theorapackB_read(oggpack_buffer *_b,int _bits,long *_ret);
int theorapackB_read1(oggpack_buffer *_b,long *_ret);
long theorapackB_bytes(oggpack_buffer *_b);
long theorapackB_bits(oggpack_buffer *_b);
unsigned char *theorapackB_get_buffer(oggpack_buffer *_b);
/*These two functions are implemented locally in huffdec.c*/
/*Read in bits without advancing the bitptr.
Here we assume 0<=_bits&&_bits<=32.*/
/*static int theorapackB_look(oggpack_buffer *_b,int _bits,long *_ret);*/
/*static void theorapackB_adv(oggpack_buffer *_b,int _bits);*/
#endif

Просмотреть файл

@ -1,126 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function: packing variable sized words into an octet stream
last mod: $Id: bitwise.c 14546 2008-02-29 01:14:05Z tterribe $
********************************************************************/
/* We're 'MSb' endian; if we write a word but read individual bits,
then we'll read the msb first */
#include <string.h>
#include <stdlib.h>
#include "bitwise.h"
void theorapackB_reset(oggpack_buffer *b){
b->ptr=b->buffer;
b->buffer[0]=0;
b->endbit=b->endbyte=0;
}
void theorapackB_readinit(oggpack_buffer *b,unsigned char *buf,int bytes){
memset(b,0,sizeof(*b));
b->buffer=b->ptr=buf;
b->storage=bytes;
}
int theorapackB_look1(oggpack_buffer *b,long *_ret){
if(b->endbyte>=b->storage){
*_ret=0L;
return -1;
}
*_ret=((b->ptr[0]>>(7-b->endbit))&1);
return 0;
}
void theorapackB_adv1(oggpack_buffer *b){
if(++(b->endbit)>7){
b->endbit=0;
b->ptr++;
b->endbyte++;
}
}
/* bits <= 32 */
int theorapackB_read(oggpack_buffer *b,int bits,long *_ret){
long ret;
long m;
int fail;
m=32-bits;
bits+=b->endbit;
if(b->endbyte+4>=b->storage){
/* not the main path */
if(b->endbyte*8+bits>b->storage*8){
*_ret=0L;
fail=-1;
goto overflow;
}
/* special case to avoid reading b->ptr[0], which might be past the end of
the buffer; also skips some useless accounting */
else if(!bits){
*_ret=0L;
return 0;
}
}
ret=b->ptr[0]<<(24+b->endbit);
if(bits>8){
ret|=b->ptr[1]<<(16+b->endbit);
if(bits>16){
ret|=b->ptr[2]<<(8+b->endbit);
if(bits>24){
ret|=b->ptr[3]<<(b->endbit);
if(bits>32 && b->endbit)
ret|=b->ptr[4]>>(8-b->endbit);
}
}
}
*_ret=((ret&0xffffffffUL)>>(m>>1))>>((m+1)>>1);
fail=0;
overflow:
b->ptr+=bits/8;
b->endbyte+=bits/8;
b->endbit=bits&7;
return fail;
}
int theorapackB_read1(oggpack_buffer *b,long *_ret){
int fail;
if(b->endbyte>=b->storage){
/* not the main path */
*_ret=0L;
fail=-1;
goto overflow;
}
*_ret=(b->ptr[0]>>(7-b->endbit))&1;
fail=0;
overflow:
b->endbit++;
if(b->endbit>7){
b->endbit=0;
b->ptr++;
b->endbyte++;
}
return fail;
}
long theorapackB_bytes(oggpack_buffer *b){
return(b->endbyte+(b->endbit+7)/8);
}
long theorapackB_bits(oggpack_buffer *b){
return(b->endbyte*8+b->endbit);
}
unsigned char *theorapackB_get_buffer(oggpack_buffer *b){
return(b->buffer);
}

Просмотреть файл

@ -1,76 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function: packing variable sized words into an octet stream
last mod: $Id: bitwise.c 7675 2004-09-01 00:34:39Z xiphmont $
********************************************************************/
#if !defined(_bitwise_H)
# define _bitwise_H (1)
# include <ogg/ogg.h>
void theorapackB_reset(oggpack_buffer *b);
void theorapackB_readinit(oggpack_buffer *b,unsigned char *buf,int bytes);
/* Read in bits without advancing the bitptr; bits <= 32 */
static int theorapackB_look(oggpack_buffer *b,int bits,long *_ret);
int theorapackB_look1(oggpack_buffer *b,long *_ret);
static void theorapackB_adv(oggpack_buffer *b,int bits);
void theorapackB_adv1(oggpack_buffer *b);
/* bits <= 32 */
int theorapackB_read(oggpack_buffer *b,int bits,long *_ret);
int theorapackB_read1(oggpack_buffer *b,long *_ret);
long theorapackB_bytes(oggpack_buffer *b);
long theorapackB_bits(oggpack_buffer *b);
unsigned char *theorapackB_get_buffer(oggpack_buffer *b);
/*These two functions are only used in one place, and declaring them static so
they can be inlined saves considerable function call overhead.*/
/* Read in bits without advancing the bitptr; bits <= 32 */
static int theorapackB_look(oggpack_buffer *b,int bits,long *_ret){
long ret;
long m;
m=32-bits;
bits+=b->endbit;
if(b->endbyte+4>=b->storage){
/* not the main path */
if(b->endbyte>=b->storage){
*_ret=0L;
return -1;
}
/*If we have some bits left, but not enough, return the ones we have.*/
if((b->storage-b->endbyte)*8<bits)bits=(b->storage-b->endbyte)*8;
}
ret=b->ptr[0]<<(24+b->endbit);
if(bits>8){
ret|=b->ptr[1]<<(16+b->endbit);
if(bits>16){
ret|=b->ptr[2]<<(8+b->endbit);
if(bits>24){
ret|=b->ptr[3]<<(b->endbit);
if(bits>32&&b->endbit)
ret|=b->ptr[4]>>(8-b->endbit);
}
}
}
*_ret=((ret&0xffffffff)>>(m>>1))>>((m+1)>>1);
return 0;
}
static void theorapackB_adv(oggpack_buffer *b,int bits){
bits+=b->endbit;
b->ptr+=bits/8;
b->endbyte+=bits/8;
b->endbit=bits&7;
}
#endif

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: dct.h 13884 2007-09-22 08:38:10Z giles $
last mod: $Id: dct.h 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/

Просмотреть файл

@ -6,7 +6,7 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
@ -29,10 +29,6 @@ static void th_dec_api_clear(th_api_wrapper *_api){
static void theora_decode_clear(theora_state *_td){
if(_td->i!=NULL)theora_info_clear(_td->i);
#ifdef _TH_DEBUG_
fclose(debugout);
debugout=NULL;
#endif
memset(_td,0,sizeof(*_td));
}
@ -92,7 +88,6 @@ int theora_decode_init(theora_state *_td,theora_info *_ci){
th_api_info *apiinfo;
th_api_wrapper *api;
th_info info;
api=(th_api_wrapper *)_ci->codec_setup;
/*Allocate our own combined API wrapper/theora_info struct.
We put them both in one malloc'd block so that when the API wrapper is
@ -130,11 +125,6 @@ int theora_decode_header(theora_info *_ci,theora_comment *_cc,ogg_packet *_op){
th_api_wrapper *api;
th_info info;
int ret;
#ifdef _TH_DEBUG_
debugout = fopen("theoradec-debugout.txt","w");
#endif
api=(th_api_wrapper *)_ci->codec_setup;
/*Allocate an API wrapper struct on demand, since it will not also include a
theora_info struct like the ones that are used in a theora_state struct.*/
@ -167,16 +157,9 @@ int theora_decode_packetin(theora_state *_td,ogg_packet *_op){
th_api_wrapper *api;
ogg_int64_t gp;
int ret;
if(!_td || !_td->i || !_td->i->codec_setup)return OC_FAULT;
if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
api=(th_api_wrapper *)_td->i->codec_setup;
if(!api || !api->decode)return OC_FAULT;
ret=th_decode_packetin(api->decode,_op,&gp);
#ifdef _TH_DEBUG_
dframe++;
#endif
if(ret<0)return OC_BADPACKET;
_td->granulepos=gp;
return 0;
@ -186,10 +169,9 @@ int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){
th_api_wrapper *api;
th_ycbcr_buffer buf;
int ret;
if(!_td || !_td->i || !_td->i->codec_setup)return OC_FAULT;
if(!_td||!_td->i||!_td->i->codec_setup)return OC_FAULT;
api=(th_api_wrapper *)_td->i->codec_setup;
if(!api || !api->decode)return OC_FAULT;
if(!api->decode)return OC_FAULT;
ret=th_decode_ycbcr_out(api->decode,buf);
if(ret>=0){
_yuv->y_width=buf[0].width;
@ -202,6 +184,5 @@ int theora_decode_YUVout(theora_state *_td,yuv_buffer *_yuv){
_yuv->u=buf[1].data;
_yuv->v=buf[2].data;
}
return ret;
}

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: decinfo.c 14719 2008-04-12 11:36:40Z tterribe $
last mod: $Id: decinfo.c 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: decint.h 14369 2008-01-05 23:15:32Z tterribe $
last mod: $Id: decint.h 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/
@ -20,7 +20,7 @@
# define _decint_H (1)
# include "theora/theoradec.h"
# include "../internal.h"
# include "bitwise.h"
# include "bitpack.h"
typedef struct th_setup_info oc_setup_info;
typedef struct th_dec_ctx oc_dec_ctx;
@ -47,45 +47,45 @@ struct th_setup_info{
struct th_dec_ctx{
/*Shared encoder/decoder state.*/
oc_theora_state state;
oc_theora_state state;
/*Whether or not packets are ready to be emitted.
This takes on negative values while there are remaining header packets to
be emitted, reaches 0 when the codec is ready for input, and goes to 1
when a frame has been processed and a data packet is ready.*/
int packet_state;
int packet_state;
/*Buffer in which to assemble packets.*/
oggpack_buffer opb;
oggpack_buffer opb;
/*Huffman decode trees.*/
oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES];
oc_huff_node *huff_tables[TH_NHUFFMAN_TABLES];
/*The index of one past the last token in each plane for each coefficient.
The final entries are the total number of tokens for each coefficient.*/
int ti0[3][64];
int ti0[3][64];
/*The index of one past the last extra bits entry in each plane for each
coefficient.
The final entries are the total number of extra bits entries for each
coefficient.*/
int ebi0[3][64];
int ebi0[3][64];
/*The number of outstanding EOB runs at the start of each coefficient in each
plane.*/
int eob_runs[3][64];
int eob_runs[3][64];
/*The DCT token lists.*/
unsigned char **dct_tokens;
unsigned char **dct_tokens;
/*The extra bits associated with DCT tokens.*/
ogg_uint16_t **extra_bits;
ogg_uint16_t **extra_bits;
/*The out-of-loop post-processing level.*/
int pp_level;
int pp_level;
/*The DC scale used for out-of-loop deblocking.*/
int pp_dc_scale[64];
int pp_dc_scale[64];
/*The sharpen modifier used for out-of-loop deringing.*/
int pp_sharp_mod[64];
int pp_sharp_mod[64];
/*The DC quantization index of each block.*/
unsigned char *dc_qis;
unsigned char *dc_qis;
/*The variance of each block.*/
int *variances;
int *variances;
/*The storage for the post-processed frame buffer.*/
unsigned char *pp_frame_data;
unsigned char *pp_frame_data;
/*Whether or not the post-processsed frame buffer has space for chroma.*/
int pp_frame_has_chroma;
int pp_frame_has_chroma;
/*The buffer used for the post-processed frame.*/
th_ycbcr_buffer pp_frame_buf;
/*The striped decode callback function.*/

Просмотреть файл

@ -6,13 +6,13 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: decode.c 14385 2008-01-09 19:53:18Z giles $
last mod: $Id: decode.c 15403 2008-10-16 12:44:05Z tterribe $
********************************************************************/
#include <stdlib.h>
@ -170,7 +170,7 @@ static int oc_dec_init(oc_dec_ctx *_dec,const th_info *_info,
_dec->state.dequant_table_data[qti][pli];
}
oc_dequant_tables_init(_dec->state.dequant_tables,_dec->pp_dc_scale,
&_setup->qinfo);
&_setup->qinfo);
for(qi=0;qi<64;qi++){
int qsum;
qsum=0;
@ -210,38 +210,28 @@ static void oc_dec_clear(oc_dec_ctx *_dec){
static int oc_dec_frame_header_unpack(oc_dec_ctx *_dec){
long val;
TH_DEBUG("\n>>>> beginning frame %ld\n\n",dframe);
/*Check to make sure this is a data packet.*/
theorapackB_read1(&_dec->opb,&val);
TH_DEBUG("frame type = %s, ",val==0?"video":"unknown");
if(val!=0)return TH_EBADPACKET;
/*Read in the frame type (I or P).*/
theorapackB_read1(&_dec->opb,&val);
_dec->state.frame_type=(int)val;
TH_DEBUG("%s\n",val?"predicted":"key");
/*Read in the current qi.*/
theorapackB_read(&_dec->opb,6,&val);
_dec->state.qis[0]=(int)val;
TH_DEBUG("frame quality = { %ld ",val);
theorapackB_read1(&_dec->opb,&val);
if(!val)_dec->state.nqis=1;
else{
theorapackB_read(&_dec->opb,6,&val);
_dec->state.qis[1]=(int)val;
TH_DEBUG("%ld ",val);
theorapackB_read1(&_dec->opb,&val);
if(!val)_dec->state.nqis=2;
else{
theorapackB_read(&_dec->opb,6,&val);
TH_DEBUG("%ld ",val);
_dec->state.qis[2]=(int)val;
_dec->state.nqis=3;
}
}
TH_DEBUG("}\n");
if(_dec->state.frame_type==OC_INTRA_FRAME){
/*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
Most of the other unused bits in the VP3 headers were eliminated.
@ -305,7 +295,6 @@ static int oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
int run_count;
theorapackB_read1(&_dec->opb,&val);
flag=(int)val;
sb=_dec->state.sbs;
sb_end=sb+_dec->state.nsbs;
run_count=npartial=0;
@ -319,7 +308,6 @@ static int oc_dec_partial_sb_flags_unpack(oc_dec_ctx *_dec){
npartial+=flag;
sb++;
}
while(--run_count>0&&sb<sb_end);
if(full_run&&sb<sb_end){
theorapackB_read1(&_dec->opb,&val);
@ -349,7 +337,6 @@ static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx *_dec){
for(;sb->coded_partially;sb++);
theorapackB_read1(&_dec->opb,&val);
flag=(int)val;
while(sb<sb_end){
int full_run;
run_count=oc_sb_run_unpack(&_dec->opb);
@ -428,71 +415,6 @@ static void oc_dec_coded_flags_unpack(oc_dec_ctx *_dec){
}
/*TODO: run_count should be 0 here.
If it's not, we should issue a warning of some kind.*/
#ifdef _TH_DEBUG_
// assuming 4:2:0 right now; THIS IS WRONG but only an issue if dumping debug info
TH_DEBUG("predicted (partially coded frame)\n");
TH_DEBUG("superblock coded flags = {");
int x,y,i;
int w = _dec->state.info.frame_width;
int h = _dec->state.info.frame_height;
i=0;
for(y=0;y< (h+31)/32;y++){
TH_DEBUG("\n ");
for(x=0;x< (w+31)/32;x++,i++)
TH_DEBUG("%x", (_dec->state.sbs[i].coded_partially!=0)|
(_dec->state.sbs[i].coded_fully));
}
TH_DEBUG("\n ");
for(y=0;y< (h+63)/64;y++){
TH_DEBUG("\n ");
for(x=0;x< (w+63)/64;x++,i++)
TH_DEBUG("%x", (_dec->state.sbs[i].coded_partially!=0)|
(_dec->state.sbs[i].coded_fully));
}
TH_DEBUG("\n ");
for(y=0;y< (h+63)/64;y++){
TH_DEBUG("\n ");
for(x=0;x< (w+63)/64;x++,i++)
TH_DEBUG("%x", (_dec->state.sbs[i].coded_partially!=0)|
(_dec->state.sbs[i].coded_fully));
}
TH_DEBUG("\n}\n");
if(i!=_dec->state.nsbs)
TH_DEBUG("WARNING! superblock count, raster %d != flat %d\n",
i,_dec->state.nsbs);
TH_DEBUG("block coded flags = {");
i=0;
for(y=0;y< (h+7)/8;y++){
TH_DEBUG("\n ");
for(x=0;x< (w+7)/8;x++,i++)
TH_DEBUG("%x", (_dec->state.frags[i].coded!=0));
}
TH_DEBUG("\n ");
for(y=0;y< (h+15)/16;y++){
TH_DEBUG("\n ");
for(x=0;x< (w+15)/16;x++,i++)
TH_DEBUG("%x", (_dec->state.frags[i].coded!=0));
}
TH_DEBUG("\n ");
for(y=0;y< (h+15)/16;y++){
TH_DEBUG("\n ");
for(x=0;x< (w+15)/16;x++,i++)
TH_DEBUG("%x", (_dec->state.frags[i].coded!=0));
}
TH_DEBUG("\n}\n");
if(i!=_dec->state.nfrags)
TH_DEBUG("WARNING! block count, raster %d != flat %d\n",
i,_dec->state.nfrags);
#endif
}
@ -521,62 +443,42 @@ static void oc_dec_mb_modes_unpack(oc_dec_ctx *_dec){
oc_mb *mb;
oc_mb *mb_end;
const int *alphabet;
long val,j;
long val;
int scheme0_alphabet[8];
int mode_scheme;
theorapackB_read(&_dec->opb,3,&val);
mode_scheme=(int)val;
TH_DEBUG("mode encode scheme = %d\n",(int)val);
if(mode_scheme==0){
int mi;
/*Just in case, initialize the modes to something.
If the bitstream doesn't contain each index exactly once, it's likely
corrupt and the rest of the packet is garbage anyway, but this way we
won't crash, and we'll decode SOMETHING.*/
TH_DEBUG("mode scheme list = { ");
/*LOOP VECTORIZES.*/
for(mi=0;mi<OC_NMODES;mi++)scheme0_alphabet[mi]=OC_MODE_INTER_NOMV;
for(mi=0;mi<OC_NMODES;mi++){
theorapackB_read(&_dec->opb,3,&val);
scheme0_alphabet[val]=OC_MODE_ALPHABETS[6][mi];
TH_DEBUG("%d ",(int)val);
}
TH_DEBUG("}\n");
alphabet=scheme0_alphabet;
}else
alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
if(mode_scheme==7)
mode_unpack=oc_clc_mode_unpack;
else
mode_unpack=oc_vlc_mode_unpack;
}
else alphabet=OC_MODE_ALPHABETS[mode_scheme-1];
if(mode_scheme==7)mode_unpack=oc_clc_mode_unpack;
else mode_unpack=oc_vlc_mode_unpack;
mb=_dec->state.mbs;
mb_end=mb+_dec->state.nmbs;
TH_DEBUG("mode list = { ");
for(j=0;mb<mb_end;mb++){
for(;mb<mb_end;mb++){
if(mb->mode!=OC_MODE_INVALID){
int bi;
for(bi=0;bi<4;bi++){
int fragi;
fragi=mb->map[0][bi];
if(fragi>=0&&_dec->state.frags[fragi].coded)break;
int fragi;
fragi=mb->map[0][bi];
if(fragi>=0&&_dec->state.frags[fragi].coded)break;
}
if(bi<4){
mb->mode=alphabet[(*mode_unpack)(&_dec->opb)];
#ifdef _TH_DEBUG_
if((j&0x1f)==0)
TH_DEBUG("\n ");
TH_DEBUG("%d ",mb->mode);
j++;
#endif
}else
mb->mode=OC_MODE_INTER_NOMV;
if(bi<4)mb->mode=alphabet[(*mode_unpack)(&_dec->opb)];
else mb->mode=OC_MODE_INTER_NOMV;
}
}
TH_DEBUG("\n}\n");
}
@ -629,23 +531,16 @@ static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
const int *map_idxs;
long val;
int map_nidxs;
#ifdef _TH_DEBUG_
int j=0;
#endif
oc_mv last_mv[2];
oc_mv cbmvs[4];
set_chroma_mvs=OC_SET_CHROMA_MVS_TABLE[_dec->state.info.pixel_fmt];
theorapackB_read1(&_dec->opb,&val);
TH_DEBUG("motion vector table = %d\n",(int)val);
mv_comp_unpack=val?oc_clc_mv_comp_unpack:oc_vlc_mv_comp_unpack;
map_idxs=OC_MB_MAP_IDXS[_dec->state.info.pixel_fmt];
map_nidxs=OC_MB_MAP_NIDXS[_dec->state.info.pixel_fmt];
memset(last_mv,0,sizeof(last_mv));
mb=_dec->state.mbs;
mb_end=mb+_dec->state.nmbs;
TH_DEBUG("motion vectors = {");
for(;mb<mb_end;mb++)if(mb->mode!=OC_MODE_INVALID){
oc_fragment *frag;
oc_mv mbmv;
@ -667,98 +562,62 @@ static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
if(ncoded<=0)continue;
mb_mode=mb->mode;
switch(mb_mode){
case OC_MODE_INTER_MV_FOUR:
{
oc_mv lbmvs[4];
int bi;
/*Mark the tail of the list, so we don't accidentally go past it.*/
coded[ncoded]=-1;
for(bi=codedi=0;bi<4;bi++){
if(coded[codedi]==bi){
codedi++;
frag=_dec->state.frags+mb->map[0][bi];
frag->mbmode=mb_mode;
frag->mv[0]=lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
frag->mv[1]=lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
#ifdef _TH_DEBUG_
if((j&0x7)==0)
TH_DEBUG("\n ");
TH_DEBUG("%+03d,%+03d ",frag->mv[0],frag->mv[1]);
j++;
#endif
}
else lbmvs[bi][0]=lbmvs[bi][1]=0;
}
if(codedi>0){
last_mv[1][0]=last_mv[0][0];
last_mv[1][1]=last_mv[0][1];
last_mv[0][0]=lbmvs[coded[codedi-1]][0];
last_mv[0][1]=lbmvs[coded[codedi-1]][1];
}
if(codedi<ncoded){
(*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
for(;codedi<ncoded;codedi++){
mapi=coded[codedi];
bi=mapi&3;
frag=_dec->state.frags+mb->map[mapi>>2][bi];
frag->mbmode=mb_mode;
frag->mv[0]=cbmvs[bi][0];
frag->mv[1]=cbmvs[bi][1];
}
}
}
break;
case OC_MODE_INTER_MV:
{
last_mv[1][0]=last_mv[0][0];
last_mv[1][1]=last_mv[0][1];
mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
#ifdef _TH_DEBUG_
if((j&0x7)==0)
TH_DEBUG("\n ");
TH_DEBUG("%+03d,%+03d ",mbmv[0],mbmv[1]);
j++;
#endif
}
break;
case OC_MODE_INTER_MV_LAST:
{
case OC_MODE_INTER_MV_FOUR:{
oc_mv lbmvs[4];
int bi;
/*Mark the tail of the list, so we don't accidentally go past it.*/
coded[ncoded]=-1;
for(bi=codedi=0;bi<4;bi++){
if(coded[codedi]==bi){
codedi++;
frag=_dec->state.frags+mb->map[0][bi];
frag->mbmode=mb_mode;
frag->mv[0]=lbmvs[bi][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
frag->mv[1]=lbmvs[bi][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
}
else lbmvs[bi][0]=lbmvs[bi][1]=0;
}
if(codedi>0){
last_mv[1][0]=last_mv[0][0];
last_mv[1][1]=last_mv[0][1];
last_mv[0][0]=lbmvs[coded[codedi-1]][0];
last_mv[0][1]=lbmvs[coded[codedi-1]][1];
}
if(codedi<ncoded){
(*set_chroma_mvs)(cbmvs,(const oc_mv *)lbmvs);
for(;codedi<ncoded;codedi++){
mapi=coded[codedi];
bi=mapi&3;
frag=_dec->state.frags+mb->map[mapi>>2][bi];
frag->mbmode=mb_mode;
frag->mv[0]=cbmvs[bi][0];
frag->mv[1]=cbmvs[bi][1];
}
}
}break;
case OC_MODE_INTER_MV:{
last_mv[1][0]=last_mv[0][0];
last_mv[1][1]=last_mv[0][1];
mbmv[0]=last_mv[0][0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
mbmv[1]=last_mv[0][1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
}break;
case OC_MODE_INTER_MV_LAST:{
mbmv[0]=last_mv[0][0];
mbmv[1]=last_mv[0][1];
}
break;
case OC_MODE_INTER_MV_LAST2:
{
}break;
case OC_MODE_INTER_MV_LAST2:{
mbmv[0]=last_mv[1][0];
mbmv[1]=last_mv[1][1];
last_mv[1][0]=last_mv[0][0];
last_mv[1][1]=last_mv[0][1];
last_mv[0][0]=mbmv[0];
last_mv[0][1]=mbmv[1];
}
break;
case OC_MODE_GOLDEN_MV:
{
}break;
case OC_MODE_GOLDEN_MV:{
mbmv[0]=(signed char)(*mv_comp_unpack)(&_dec->opb);
mbmv[1]=(signed char)(*mv_comp_unpack)(&_dec->opb);
#ifdef _TH_DEBUG_
if((j&0x7)==0)
TH_DEBUG("\n ");
TH_DEBUG("%+03d,%+03d ",mbmv[0],mbmv[1]);
j++;
#endif
}
break;
default:
mbmv[0]=mbmv[1]=0;
break;
}break;
default:mbmv[0]=mbmv[1]=0;break;
}
/*4MV mode fills in the fragments itself.
For all other modes we can use this common code.*/
@ -773,9 +632,6 @@ static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx *_dec){
}
}
}
TH_DEBUG("\n}\n");
}
static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
@ -798,7 +654,7 @@ static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
else{
long val;
int flag;
int nqi0;
int nqi1;
int run_count;
/*Otherwise, we decode a qi index for each fragment, using two passes of
the same binary RLE scheme used for super-block coded bits.
@ -810,14 +666,14 @@ static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
with the corresponding qi's for this frame.*/
theorapackB_read1(&_dec->opb,&val);
flag=(int)val;
run_count=nqi0=0;
run_count=nqi1=0;
while(coded_fragi<coded_fragi_end){
int full_run;
run_count=oc_sb_run_unpack(&_dec->opb);
full_run=run_count>=4129;
do{
_dec->state.frags[*coded_fragi++].qi=flag;
nqi0+=!flag;
nqi1+=flag;
}
while(--run_count>0&&coded_fragi<coded_fragi_end);
if(full_run&&coded_fragi<coded_fragi_end){
@ -830,7 +686,7 @@ static void oc_dec_block_qis_unpack(oc_dec_ctx *_dec){
If it's not, we should issue a warning of some kind.*/
/*If we have 3 different qi's for this frame, and there was at least one
fragment with a non-zero qi, make the second pass.*/
if(_dec->state.nqis==3&&nqi0<ncoded_fragis){
if(_dec->state.nqis==3&&nqi1>0){
/*Skip qii==0 fragments.*/
for(coded_fragi=_dec->state.coded_fragis;
_dec->state.frags[*coded_fragi].qi==0;coded_fragi++);
@ -1362,7 +1218,7 @@ static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
if(_dec->pp_level<OC_PP_LEVEL_DEBLOCKC){
_dec->variances=(int *)_ogg_realloc(_dec->variances,
_dec->state.fplanes[0].nfrags*sizeof(_dec->variances[0]));
_dec->pp_frame_data=(unsigned char *)_ogg_realloc(
_dec->pp_frame_data=(unsigned char *)_ogg_realloc(
_dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0]));
_dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
_dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
@ -1382,7 +1238,7 @@ static int oc_dec_postprocess_init(oc_dec_ctx *_dec){
c_h=_dec->state.info.frame_height>>!(_dec->state.info.pixel_fmt&2);
c_sz=c_w*c_h;
frame_sz+=c_sz<<1;
_dec->pp_frame_data=(unsigned char *)_ogg_realloc(
_dec->pp_frame_data=(unsigned char *)_ogg_realloc(
_dec->pp_frame_data,frame_sz*sizeof(_dec->pp_frame_data[0]));
_dec->pp_frame_buf[0].width=_dec->state.info.frame_width;
_dec->pp_frame_buf[0].height=_dec->state.info.frame_height;
@ -1503,9 +1359,6 @@ static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx *_dec,
for(fragy=fragy0;fragy<fragy_end;fragy++){
for(fragx=0;fragx<fplane->nhfrags;fragx++,frag++){
if(!frag->coded)continue;
#ifdef _TH_DEBUG_
frag->quant[0] = frag->dc; /* stash un-predicted dc for debug output */
#endif
pred_last[OC_FRAME_FOR_MODE[frag->mbmode]]=frag->dc+=
oc_frag_pred_dc(frag,fplane,fragx,fragy,pred_last);
ncoded_fragis++;
@ -1597,40 +1450,6 @@ static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx *_dec,
_pipe->uncoded_fragis[_pli]-=_pipe->nuncoded_fragis[_pli];
oc_state_frag_copy(&_dec->state,_pipe->uncoded_fragis[_pli],
_pipe->nuncoded_fragis[_pli],OC_FRAME_SELF,OC_FRAME_PREV,_pli);
#ifdef _TH_DEBUG_
{
int i,j,k;
int framei=_dec->state.ref_frame_idx[OC_FRAME_SELF];
int ystride=_dec->state.ref_frame_bufs[framei][_pli].stride;
int *fragi_end = _pipe->coded_fragis[_pli];
int *fragi = fragi_end-_pipe->ncoded_fragis[_pli];
for(;fragi<fragi_end;fragi++){
oc_fragment *frag=_dec->state.frags+*fragi;
unsigned char *src=frag->buffer[framei];
for(i=0,j=0;j<8;j++){
for(k=0;k<8;k++,i++)
frag->recon[i] = src[k];
src+=ystride;
}
}
fragi = _pipe->uncoded_fragis[_pli];
fragi_end = fragi+_pipe->nuncoded_fragis[_pli];
for(;fragi<fragi_end;fragi++){
oc_fragment *frag=_dec->state.frags+*fragi;
unsigned char *src=frag->buffer[framei];
for(i=0,j=0;j<8;j++){
for(k=0;k<8;k++,i++)
frag->recon[i] = src[k];
src+=ystride;
}
}
}
#endif
}
/*Filter a horizontal block edge.*/
@ -1909,7 +1728,7 @@ static void oc_dering_block(unsigned char *_idata,int _ystride,int _b,
static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
int _pli,int _fragy0,int _fragy_end){
th_img_plane *iplane;
th_img_plane *iplane;
oc_fragment_plane *fplane;
oc_fragment *frag;
int *variance;
@ -1941,10 +1760,10 @@ static void oc_dec_dering_frag_rows(oc_dec_ctx *_dec,th_img_plane *_img,
if(strong&&var>sthresh){
oc_dering_block(idata+x,iplane->stride,b,
_dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
if(_pli||(b&1)&&*(variance-1)>OC_DERING_THRESH4||
(b&2)&&variance[1]>OC_DERING_THRESH4||
(b&4)&&*(variance-fplane->nvfrags)>OC_DERING_THRESH4||
(b&8)&&variance[fplane->nvfrags]>OC_DERING_THRESH4){
if(_pli||!(b&1)&&*(variance-1)>OC_DERING_THRESH4||
!(b&2)&&variance[1]>OC_DERING_THRESH4||
!(b&4)&&*(variance-fplane->nvfrags)>OC_DERING_THRESH4||
!(b&8)&&variance[fplane->nvfrags]>OC_DERING_THRESH4){
oc_dering_block(idata+x,iplane->stride,b,
_dec->pp_dc_scale[qi],_dec->pp_sharp_mod[qi],1);
oc_dering_block(idata+x,iplane->stride,b,
@ -2039,7 +1858,6 @@ int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
/*A completely empty packet indicates a dropped frame and is treated exactly
like an inter frame with no coded blocks.
Only proceed if we have a non-empty packet.*/
if(_op->bytes!=0){
oc_dec_pipeline_state pipe;
th_ycbcr_buffer stripe_buf;
@ -2093,7 +1911,6 @@ int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
}
oc_dec_block_qis_unpack(_dec);
oc_dec_residual_tokens_unpack(_dec);
/*Update granule position.
This must be done before the striped decode callbacks so that the
application knows what to do with the frame data.*/
@ -2203,91 +2020,6 @@ int th_decode_packetin(th_dec_ctx *_dec,const ogg_packet *_op,
}
notstart=1;
}
#ifdef _TH_DEBUG_
{
int x,y,i,j,k,xn,yn;
int plane;
int buf;
/* dump fragment DCT components */
for(plane=0;plane<3;plane++){
char *plstr;
int offset;
switch(plane){
case 0:
plstr="Y";
xn = _dec->state.info.frame_width>>3;
yn = _dec->state.info.frame_height>>3;
offset = 0;
break;
case 1:
plstr="U";
xn = _dec->state.info.frame_width>>4;
yn = _dec->state.info.frame_height>>4;
offset = xn*yn*4;
break;
case 2:
plstr="V";
xn = _dec->state.info.frame_width>>4;
yn = _dec->state.info.frame_height>>4;
offset = xn*yn*5;
break;
}
for(y=0;y<yn;y++){
for(x=0;x<xn;x++,i++){
for(buf=0;buf<4;buf++){
int *ptr;
char *bufn;
int codecheck=0;
i = offset + y*xn + x;
switch(buf){
case 0:
codecheck=1;
bufn = "coded";
ptr = _dec->state.frags[i].quant;
break;
case 1:
codecheck=1;
bufn = "coeff";
ptr = _dec->state.frags[i].freq;
break;
case 2:
codecheck=1;
bufn = "idct";
ptr = _dec->state.frags[i].time;
break;
case 3:
bufn = "recon";
ptr = _dec->state.frags[i].loop;
break;
}
TH_DEBUG("%s %s [%d][%d] = {",bufn,plstr,x,y);
if(codecheck && !_dec->state.frags[i].coded)
TH_DEBUG(" not coded }\n");
else{
int l=0;
for(j=0;j<8;j++){
TH_DEBUG("\n ");
for(k=0;k<8;k++,l++){
TH_DEBUG("%d ",ptr[l]);
}
}
TH_DEBUG(" }\n");
}
}
TH_DEBUG("\n");
}
}
}
}
#endif
/*Finish filling in the reference frame borders.*/
for(pli=0;pli<3;pli++)oc_state_borders_fill_caps(&_dec->state,refi,pli);
/*Update the reference frame indices.*/

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: dequant.c 14369 2008-01-05 23:15:32Z tterribe $
last mod: $Id: dequant.c 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/
@ -127,65 +127,6 @@ int oc_quant_params_unpack(oggpack_buffer *_opb,
}
while(qri-->0);
}
#ifdef _TH_DEBUG_
/* dump the tables */
{
int i, j, k, l, m;
TH_DEBUG("loop filter limits = {");
for(i=0;i<64;){
TH_DEBUG("\n ");
for(j=0;j<16;i++,j++)
TH_DEBUG("%3d ",_qinfo->loop_filter_limits[i]);
}
TH_DEBUG("\n}\n\n");
TH_DEBUG("ac scale = {");
for(i=0;i<64;){
TH_DEBUG("\n ");
for(j=0;j<16;i++,j++)
TH_DEBUG("%3d ",_qinfo->ac_scale[i]);
}
TH_DEBUG("\n}\n\n");
TH_DEBUG("dc scale = {");
for(i=0;i<64;){
TH_DEBUG("\n ");
for(j=0;j<16;i++,j++)
TH_DEBUG("%3d ",_qinfo->dc_scale[i]);
}
TH_DEBUG("\n}\n\n");
for(k=0;k<2;k++)
for(l=0;l<3;l++){
char *name[2][3]={
{"intra Y bases","intra U bases", "intra V bases"},
{"inter Y bases","inter U bases", "inter V bases"}
};
th_quant_ranges *r = &_qinfo->qi_ranges[k][l];
TH_DEBUG("%s = {\n",name[k][l]);
TH_DEBUG(" ranges = %d\n",r->nranges);
TH_DEBUG(" intervals = { ");
for(i=0;i<r->nranges;i++)
TH_DEBUG("%3d ",r->sizes[i]);
TH_DEBUG("}\n");
TH_DEBUG("\n matricies = { ");
for(m=0;m<r->nranges+1;m++){
TH_DEBUG("\n { ");
for(i=0;i<64;){
TH_DEBUG("\n ");
for(j=0;j<8;i++,j++)
TH_DEBUG("%3d ",r->base_matrices[m][i]);
}
TH_DEBUG("\n }");
}
TH_DEBUG("\n }\n");
}
}
#endif
_ogg_free(base_mats);
return 0;
}
@ -227,4 +168,3 @@ void oc_quant_params_clear(th_quant_info *_qinfo){
_ogg_free((void *)_qinfo->qi_ranges[qti][pli].base_matrices);
}
}

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: dequant.h 13884 2007-09-22 08:38:10Z giles $
last mod: $Id: dequant.h 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/

Просмотреть файл

@ -1,43 +0,0 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: enquant.h 13884 2007-09-22 08:38:10Z giles $
********************************************************************/
#if !defined(_enquant_H)
# define _enquant_H (1)
# include "quant.h"
/*The amount to scale the forward quantizer value by.*/
#define OC_FQUANT_SCALE ((ogg_uint32_t)1<<OC_FQUANT_SHIFT)
/*The amount to add to the scaled forward quantizer for rounding.*/
#define OC_FQUANT_ROUND (1<<OC_FQUANT_SHIFT-1)
/*The amount to shift the resulting product by.*/
#define OC_FQUANT_SHIFT (16)
/*The default quantization parameters used by VP3.1.*/
extern const th_quant_info TH_VP31_QUANT_INFO;
/*Our default quantization parameters.*/
extern const th_quant_info OC_DEF_QUANT_INFO[4];
void oc_quant_params_pack(oggpack_buffer *_opb,
const th_quant_info *_qinfo);
void oc_enquant_tables_init(oc_quant_table *_dequant[2][3],
oc_quant_table *_enquant[2][3],const th_quant_info *_qinfo);
#endif

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: fragment.c 14348 2008-01-04 18:17:00Z tterribe $
last mod: $Id: fragment.c 15469 2008-10-30 12:49:42Z tterribe $
********************************************************************/

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: huffdec.c 14493 2008-02-13 09:25:37Z tterribe $
last mod: $Id: huffdec.c 15431 2008-10-21 05:04:02Z giles $
********************************************************************/
@ -25,6 +25,52 @@
#define _ogg_offsetof(_type,_field)\
((size_t)((char *)&((_type *)0)->_field-(char *)0))
/*These two functions are really part of the bitpack.c module, but
they are only used here. Declaring local static versions so they
can be inlined saves considerable function call overhead.*/
/*Read in bits without advancing the bitptr.
Here we assume 0<=_bits&&_bits<=32.*/
static int theorapackB_look(oggpack_buffer *_b,int _bits,long *_ret){
long ret;
long m;
long d;
m=32-_bits;
_bits+=_b->endbit;
d=_b->storage-_b->endbyte;
if(d<=4){
/*Not the main path.*/
if(d<=0){
*_ret=0L;
return -(_bits>d*8);
}
/*If we have some bits left, but not enough, return the ones we have.*/
if(d*8<_bits)_bits=d*8;
}
ret=_b->ptr[0]<<24+_b->endbit;
if(_bits>8){
ret|=_b->ptr[1]<<16+_b->endbit;
if(_bits>16){
ret|=_b->ptr[2]<<8+_b->endbit;
if(_bits>24){
ret|=_b->ptr[3]<<_b->endbit;
if(_bits>32)ret|=_b->ptr[4]>>8-_b->endbit;
}
}
}
*_ret=((ret&0xFFFFFFFF)>>(m>>1))>>(m+1>>1);
return 0;
}
/*advance the bitptr*/
static void theorapackB_adv(oggpack_buffer *_b,int _bits){
_bits+=_b->endbit;
_b->ptr+=_bits>>3;
_b->endbyte+=_bits>>3;
_b->endbit=_bits&7;
}
/*The log_2 of the size of a lookup table is allowed to grow to relative to
the number of unique nodes it contains.
E.g., if OC_HUFF_SLUSH is 2, then at most 75% of the space in the tree is

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: huffdec.h 14359 2008-01-04 20:11:13Z tterribe $
last mod: $Id: huffdec.h 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: huffman.h 13884 2007-09-22 08:38:10Z giles $
last mod: $Id: huffman.h 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: idct.c 13884 2007-09-22 08:38:10Z giles $
last mod: $Id: idct.c 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/
@ -169,7 +169,6 @@ static void idct8_3(ogg_int16_t *_y,const ogg_int16_t _x[8]){
_y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
}
/*Performs an inverse 8 point Type-II DCT transform.
The output is scaled by a factor of 2 relative to the orthonormal version of
the transform.
@ -204,7 +203,6 @@ static void idct8_2(ogg_int16_t *_y,const ogg_int16_t _x[8]){
_y[7<<3]=(ogg_int16_t)(t[0]-t[7]);
}
/*Performs an inverse 8 point Type-II DCT transform.
The output is scaled by a factor of 2 relative to the orthonormal version of
the transform.

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: idct.h 13884 2007-09-22 08:38:10Z giles $
last mod: $Id: idct.h 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: info.c 13884 2007-09-22 08:38:10Z giles $
last mod: $Id: info.c 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: internal.c 14385 2008-01-09 19:53:18Z giles $
last mod: $Id: internal.c 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: ocintrin.h 13884 2007-09-22 08:38:10Z giles $
last mod: $Id: ocintrin.h 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: quant.c 14375 2008-01-06 05:37:33Z tterribe $
last mod: $Id: quant.c 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/
@ -21,8 +21,8 @@
#include "quant.h"
#include "decint.h"
unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2};
unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2};
static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
/*Initializes the dequantization tables from a set of quantizer info.
Currently the dequantizer (and elsewhere enquantizer) tables are expected to
@ -39,114 +39,84 @@ unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
qi values change between frames (this is what VP3 did).*/
void oc_dequant_tables_init(oc_quant_table *_dequant[2][3],
int _pp_dc_scale[64],const th_quant_info *_qinfo){
int qti; /* coding mode: intra or inter */
int pli; /* Y U V */
/*coding mode: intra or inter.*/
int qti;
/*Y', C_b, C_r*/
int pli;
for(qti=0;qti<2;qti++){
for(pli=0;pli<3;pli++){
oc_quant_tables stage;
int qi; /* quality index */
int qri; /* range iterator */
/*Quality index.*/
int qi;
/*Range iterator.*/
int qri;
for(qi=0,qri=0; qri<=_qinfo->qi_ranges[qti][pli].nranges; qri++){
th_quant_base base;
ogg_uint32_t q;
int qi_start;
int qi_end;
int ci;
memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
sizeof(base));
qi_start=qi;
if(qri==_qinfo->qi_ranges[qti][pli].nranges)
qi_end=qi+1;
else
qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
/* Iterate over quality indicies in this range */
for(;;){
/*In the original VP3.2 code, the rounding offset and the size of the
dead zone around 0 were controlled by a "sharpness" parameter.
The size of our dead zone is now controlled by the per-coefficient
quality thresholds returned by our HVS module.
We round down from a more accurate value when the quality of the
reconstruction does not fall below our threshold and it saves bits.
Hence, all of that VP3.2 code is gone from here, and the remaining
floating point code has been implemented as equivalent integer code
with exact precision.*/
/* for postprocess, not dequant */
if(_pp_dc_scale!=NULL)
_pp_dc_scale[qi]=(int)((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/160);
/*Scale DC the coefficient from the proper table.*/
q=((ogg_uint32_t)_qinfo->dc_scale[qi]*base[0]/100)<<2;
q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
stage[qi][0]=(ogg_uint16_t)q;
/*Now scale AC coefficients from the proper table.*/
for(ci=1;ci<64;ci++){
q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2;
q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
stage[qi][ci]=(ogg_uint16_t)q;
}
if(++qi>=qi_end)break;
/*Interpolate the next base matrix.*/
for(ci=0;ci<64;ci++){
base[ci]=(unsigned char)
((2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
(qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
+_qinfo->qi_ranges[qti][pli].sizes[qri])/
(2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
}
}
th_quant_base base;
ogg_uint32_t q;
int qi_start;
int qi_end;
int ci;
memcpy(base,_qinfo->qi_ranges[qti][pli].base_matrices[qri],
sizeof(base));
qi_start=qi;
if(qri==_qinfo->qi_ranges[qti][pli].nranges)qi_end=qi+1;
else qi_end=qi+_qinfo->qi_ranges[qti][pli].sizes[qri];
/*Iterate over quality indicies in this range.*/
for(;;){
ogg_uint32_t qfac;
/*In the original VP3.2 code, the rounding offset and the size of the
dead zone around 0 were controlled by a "sharpness" parameter.
The size of our dead zone is now controlled by the per-coefficient
quality thresholds returned by our HVS module.
We round down from a more accurate value when the quality of the
reconstruction does not fall below our threshold and it saves bits.
Hence, all of that VP3.2 code is gone from here, and the remaining
floating point code has been implemented as equivalent integer code
with exact precision.*/
qfac=(ogg_uint32_t)_qinfo->dc_scale[qi]*base[0];
/*For postprocessing, not dequantization.*/
if(_pp_dc_scale!=NULL)_pp_dc_scale[qi]=(int)(qfac/160);
/*Scale DC the coefficient from the proper table.*/
q=(qfac/100)<<2;
q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
stage[qi][0]=(ogg_uint16_t)q;
/*Now scale AC coefficients from the proper table.*/
for(ci=1;ci<64;ci++){
q=((ogg_uint32_t)_qinfo->ac_scale[qi]*base[ci]/100)<<2;
q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
stage[qi][ci]=(ogg_uint16_t)q;
}
if(++qi>=qi_end)break;
/*Interpolate the next base matrix.*/
for(ci=0;ci<64;ci++){
base[ci]=(unsigned char)(
(2*((qi_end-qi)*_qinfo->qi_ranges[qti][pli].base_matrices[qri][ci]+
(qi-qi_start)*_qinfo->qi_ranges[qti][pli].base_matrices[qri+1][ci])
+_qinfo->qi_ranges[qti][pli].sizes[qri])/
(2*_qinfo->qi_ranges[qti][pli].sizes[qri]));
}
}
}
/* Staging matricies complete; commit to memory only if this
isn't a duplicate of a preceeding plane. This simple check
helps us improve cache coherency later.*/
/*Staging matrices complete; commit to memory only if this isn't a
duplicate of a preceeding plane.
This simple check helps us improve cache coherency later.*/
{
int dupe = 0;
int i,j;
for(i=0;i<=qti;i++){
for(j=0;j<(i<qti?3:pli);j++){
if(!memcmp(stage,_dequant[i][j],sizeof(stage))){
dupe = 1;
break;
}
}
if(dupe)break;
}
if(dupe){
_dequant[qti][pli]=_dequant[i][j];
}else{
memcpy(_dequant[qti][pli],stage,sizeof(stage));
}
int dupe;
int qtj;
int plj;
dupe=0;
for(qtj=0;qtj<=qti;qtj++){
for(plj=0;plj<(qtj<qti?3:pli);plj++){
if(!memcmp(stage,_dequant[qtj][plj],sizeof(stage))){
dupe=1;
break;
}
}
if(dupe)break;
}
if(dupe)_dequant[qti][pli]=_dequant[qtj][plj];
else memcpy(_dequant[qti][pli],stage,sizeof(stage));
}
}
}
#ifdef _TH_DEBUG_
int i, j, k, l;
/* dump the calculated quantizer tables */
for(i=0;i<2;i++){
for(j=0;j<3;j++){
for(k=0;k<64;k++){
TH_DEBUG("quantizer table [%s][%s][Q%d] = {",
(i==0?"intra":"inter"),(j==0?"Y":(j==1?"U":"V")),k);
for(l=0;l<64;l++){
if((l&7)==0)
TH_DEBUG("\n ");
TH_DEBUG("%4d ",_dequant[i][j][k][l]);
}
TH_DEBUG("}\n");
}
}
}
#endif
}

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: quant.h 14059 2007-10-28 23:43:27Z xiphmont $
last mod: $Id: quant.h 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/
@ -24,23 +24,11 @@ typedef ogg_uint16_t oc_quant_table[64];
typedef oc_quant_table oc_quant_tables[64];
/*Maximum scaled quantizer value.*/
#define OC_QUANT_MAX (1024<<2)
/*Minimum scaled DC coefficient frame quantizer value for intra and inter
modes.*/
extern unsigned OC_DC_QUANT_MIN[2];
/*Minimum scaled AC coefficient frame quantizer value for intra and inter
modes.*/
extern unsigned OC_AC_QUANT_MIN[2];
void oc_dequant_tables_init(oc_quant_table *_dequant[2][3],
int _pp_dc_scale[64],
const th_quant_info *_qinfo);
int _pp_dc_scale[64],const th_quant_info *_qinfo);
#endif

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: state.c 14714 2008-04-12 01:04:43Z giles $
last mod: $Id: state.c 15469 2008-10-30 12:49:42Z tterribe $
********************************************************************/
@ -831,37 +831,11 @@ void oc_state_frag_recon_c(oc_theora_state *_state,oc_fragment *_frag,
ogg_int16_t p;
/*Why is the iquant product rounded in this case and no others?
Who knows.*/
p=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant+15>>5);
/*LOOP VECTORIZES.*/
for(ci=0;ci<64;ci++)res_buf[ci]=p;
#ifdef _TH_DEBUG_
{
int i;
_frag->freq[0] = _frag->dc*_dc_iquant;
_frag->time[0] = p;
for(i=1;i<64;i++){
_frag->quant[i] = 0;
_frag->freq[i] = 0;
_frag->time[i] = p;
}
}
#endif
}
else{
#ifdef _TH_DEBUG_
{
int i;
for(i=1;i<_ncoefs;i++)
_frag->quant[i] = _dct_coeffs[i];
for(;i<64;i++)
_frag->quant[i] = 0;
}
#endif
/*First, dequantize the coefficients.*/
dct_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant);
for(zzi=1;zzi<_ncoefs;zzi++){
@ -869,21 +843,6 @@ void oc_state_frag_recon_c(oc_theora_state *_state,oc_fragment *_frag,
ci=OC_FZIG_ZAG[zzi];
dct_buf[ci]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*_ac_iquant[ci]);
}
#ifdef _TH_DEBUG_
for(;zzi<64;zzi++){
int ci;
ci=OC_FZIG_ZAG[zzi];
dct_buf[ci]=0;
}
{
int i;
for(i=0;i<64;i++)
_frag->freq[i] = dct_buf[i];
}
#endif
/*Then, fill in the remainder of the coefficients with 0's, and perform
the iDCT.*/
if(_last_zzi<10){
@ -894,15 +853,6 @@ void oc_state_frag_recon_c(oc_theora_state *_state,oc_fragment *_frag,
for(;zzi<64;zzi++)dct_buf[OC_FZIG_ZAG[zzi]]=0;
oc_idct8x8_c(res_buf,dct_buf);
}
#ifdef _TH_DEBUG_
{
int i;
for(i=0;i<64;i++)
_frag->time[i] = res_buf[i];
}
#endif
}
/*Fill in the target buffer.*/
dst_framei=_state->ref_frame_idx[OC_FRAME_SELF];
@ -1038,7 +988,7 @@ void oc_state_loop_filter_frag_rows(oc_theora_state *_state,int *_bv,
}
void oc_state_loop_filter_frag_rows_c(oc_theora_state *_state,int *_bv,
int _refi,int _pli,int _fragy0,int _fragy_end){
int _refi,int _pli,int _fragy0,int _fragy_end){
th_img_plane *iplane;
oc_fragment_plane *fplane;
oc_fragment *frag_top;
@ -1050,7 +1000,6 @@ void oc_state_loop_filter_frag_rows_c(oc_theora_state *_state,int *_bv,
_bv+=127;
iplane=_state->ref_frame_bufs[_refi]+_pli;
fplane=_state->fplanes+_pli;
/*The following loops are constructed somewhat non-intuitively on purpose.
The main idea is: if a block boundary has at least one coded fragment on
it, the filter is applied to it.
@ -1079,46 +1028,6 @@ void oc_state_loop_filter_frag_rows_c(oc_theora_state *_state,int *_bv,
iplane->stride,_bv);
}
}
#ifdef _TH_DEBUG_
{
int i,j,k,l;
unsigned char *src;
for(l=0;l<5;l++){
oc_fragment *f;
switch(l){
case 0:
f = frag;
break;
case 1: /* left */
if(frag == frag0)continue;
f = frag-1;
break;
case 2: /* bottom (top once flipped) */
if(frag0 == frag_top)continue;
f = frag - fplane->nhfrags;
break;
case 3: /* right */
if(frag+1 >= frag_end) continue;
f = frag + 1;
break;
case 4: /* top (bottom once flipped) */
if(frag+fplane->nhfrags >= frag_bot)continue;
f = frag + fplane->nhfrags;
break;
}
src = f->buffer[_refi];
for(i=0,j=0;j<8;j++){
for(k=0;k<8;k++,i++)
f->loop[i] = src[k];
src+=iplane->stride;
}
}
}
#endif
frag++;
}
frag0+=fplane->nhfrags;

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2003 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: mmxfrag.c 14345 2008-01-04 18:02:21Z tterribe $
last mod: $Id: mmxfrag.c 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/
@ -21,6 +21,7 @@
Note: Loops are unrolled for best performance.
The iteration each instruction belongs to is marked in the comments as #i.*/
#include "x86int.h"
#include <stddef.h>
#if defined(USE_ASM)
@ -133,8 +134,8 @@ void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
:[residue]"r"(_residue),
[dst]"r"(_dst),
[dst4]"r"(_dst+(_dst_ystride<<2)),
[dst_ystride]"r"((long)_dst_ystride),
[dst_ystride3]"r"((long)_dst_ystride*3)
[dst_ystride]"r"((ptrdiff_t)_dst_ystride),
[dst_ystride3]"r"((ptrdiff_t)_dst_ystride*3)
:"memory"
);
}
@ -185,8 +186,8 @@ void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride,
/*Advance dst.*/
"lea (%[dst],%[dst_ystride],2),%[dst]\n\t"
:[residue]"+r"(_residue),[dst]"+r"(_dst),[src]"+r"(_src)
:[dst_ystride]"r"((long)_dst_ystride),
[src_ystride]"r"((long)_src_ystride)
:[dst_ystride]"r"((ptrdiff_t)_dst_ystride),
[src_ystride]"r"((ptrdiff_t)_src_ystride)
:"memory"
);
}
@ -278,7 +279,7 @@ void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride,
"lea (%[dst],%[ystride],2),%[dst]\n\t"
:[dst]"+r"(_dst),[residue]"+r"(_residue),
[src1]"+r"(_src1),[src2]"+r"(_src2)
:[ystride]"r"((long)_dst_ystride)
:[ystride]"r"((ptrdiff_t)_dst_ystride)
:"memory"
);
}

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: mmxidct.c 14357 2008-01-04 20:05:28Z tterribe $
last mod: $Id: mmxidct.c 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: mmxstate.c 14385 2008-01-09 19:53:18Z giles $
last mod: $Id: mmxstate.c 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/
@ -19,6 +19,7 @@
Originally written by Rudolf Marek.*/
#include "x86int.h"
#include "../../internal.h"
#include <stddef.h>
#if defined(USE_ASM)
@ -182,9 +183,9 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
const int *fragi;
const int *fragi_end;
int dst_framei;
long dst_ystride;
ptrdiff_t dst_ystride;
int src_framei;
long src_ystride;
ptrdiff_t src_ystride;
dst_framei=_state->ref_frame_idx[_dst_frame];
src_framei=_state->ref_frame_idx[_src_frame];
dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
@ -194,14 +195,14 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
oc_fragment *frag;
unsigned char *dst;
unsigned char *src;
long esi;
ptrdiff_t s;
frag=_state->frags+*fragi;
dst=frag->buffer[dst_framei];
src=frag->buffer[src_framei];
__asm__ __volatile__(
/*src+0*src_ystride*/
"movq (%[src]),%%mm0\n\t"
/*esi=src_ystride*3*/
/*s=src_ystride*3*/
"lea (%[src_ystride],%[src_ystride],2),%[s]\n\t"
/*src+1*src_ystride*/
"movq (%[src],%[src_ystride]),%%mm1\n\t"
@ -211,7 +212,7 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
"movq (%[src],%[s]),%%mm3\n\t"
/*dst+0*dst_ystride*/
"movq %%mm0,(%[dst])\n\t"
/*esi=dst_ystride*3*/
/*s=dst_ystride*3*/
"lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t"
/*dst+1*dst_ystride*/
"movq %%mm1,(%[dst],%[dst_ystride])\n\t"
@ -225,7 +226,7 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
"lea (%[dst],%[dst_ystride],4),%[dst]\n\t"
/*src+0*src_ystride*/
"movq (%[src]),%%mm0\n\t"
/*esi=src_ystride*3*/
/*s=src_ystride*3*/
"lea (%[src_ystride],%[src_ystride],2),%[s]\n\t"
/*src+1*src_ystride*/
"movq (%[src],%[src_ystride]),%%mm1\n\t"
@ -235,7 +236,7 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
"movq (%[src],%[s]),%%mm3\n\t"
/*dst+0*dst_ystride*/
"movq %%mm0,(%[dst])\n\t"
/*esi=dst_ystride*3*/
/*s=dst_ystride*3*/
"lea (%[dst_ystride],%[dst_ystride],2),%[s]\n\t"
/*dst+1*dst_ystride*/
"movq %%mm1,(%[dst],%[dst_ystride])\n\t"
@ -243,7 +244,7 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
"movq %%mm2,(%[dst],%[dst_ystride],2)\n\t"
/*dst+3*dst_ystride*/
"movq %%mm3,(%[dst],%[s])\n\t"
:[s]"=&S"(esi)
:[s]"=&r"(s)
:[dst]"r"(dst),[src]"r"(src),[dst_ystride]"r"(dst_ystride),
[src_ystride]"r"(src_ystride)
:"memory"
@ -255,12 +256,12 @@ void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
static void loop_filter_v(unsigned char *_pix,int _ystride,
const ogg_int16_t *_ll){
long esi;
ptrdiff_t s;
_pix-=_ystride*2;
__asm__ __volatile__(
/*mm0=0*/
"pxor %%mm0,%%mm0\n\t"
/*esi=_ystride*3*/
/*s=_ystride*3*/
"lea (%[ystride],%[ystride],2),%[s]\n\t"
/*mm7=_pix[0...8]*/
"movq (%[pix]),%%mm7\n\t"
@ -427,8 +428,8 @@ static void loop_filter_v(unsigned char *_pix,int _ystride,
/*Write it back out.*/
"movq %%mm4,(%[pix],%[ystride])\n\t"
"movq %%mm1,(%[pix],%[ystride],2)\n\t"
:[s]"=&S"(esi)
:[pix]"r"(_pix),[ystride]"r"((long)_ystride),[ll]"r"(_ll)
:[s]"=&r"(s)
:[pix]"r"(_pix),[ystride]"r"((ptrdiff_t)_ystride),[ll]"r"(_ll)
:"memory"
);
}
@ -437,14 +438,16 @@ static void loop_filter_v(unsigned char *_pix,int _ystride,
Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
four p0's to one register we must transpose the values in four mmx regs.
When half is done we repeat this for the rest.*/
static void loop_filter_h4(unsigned char *_pix,long _ystride,
static void loop_filter_h4(unsigned char *_pix,ptrdiff_t _ystride,
const ogg_int16_t *_ll){
long esi;
long edi;
ptrdiff_t s;
/*d doesn't technically need to be 64-bit on x86-64, but making it so will
help avoid partial register stalls.*/
ptrdiff_t d;
__asm__ __volatile__(
/*x x x x 3 2 1 0*/
"movd (%[pix]),%%mm0\n\t"
/*esi=_ystride*3*/
/*s=_ystride*3*/
"lea (%[ystride],%[ystride],2),%[s]\n\t"
/*x x x x 7 6 5 4*/
"movd (%[pix],%[ystride]),%%mm1\n\t"
@ -557,19 +560,19 @@ static void loop_filter_h4(unsigned char *_pix,long _ystride,
"packuswb %%mm7,%%mm4\n\t"
/*mm5=E D A 9 6 5 2 1*/
"punpcklbw %%mm4,%%mm5\n\t"
/*edi=6 5 2 1*/
"movd %%mm5,%%edi\n\t"
"movw %%di,1(%[pix])\n\t"
/*d=6 5 2 1*/
"movd %%mm5,%[d]\n\t"
"movw %w[d],1(%[pix])\n\t"
/*Why is there such a big stall here?*/
"psrlq $32,%%mm5\n\t"
"shrl $16,%%edi\n\t"
"movw %%di,1(%[pix],%[ystride])\n\t"
/*edi=E D A 9*/
"movd %%mm5,%%edi\n\t"
"movw %%di,1(%[pix],%[ystride],2)\n\t"
"shrl $16,%%edi\n\t"
"movw %%di,1(%[pix],%[s])\n\t"
:[s]"=&S"(esi),[d]"=&D"(edi),
"shr $16,%[d]\n\t"
"movw %w[d],1(%[pix],%[ystride])\n\t"
/*d=E D A 9*/
"movd %%mm5,%[d]\n\t"
"movw %w[d],1(%[pix],%[ystride],2)\n\t"
"shr $16,%[d]\n\t"
"movw %w[d],1(%[pix],%[s])\n\t"
:[s]"=&r"(s),[d]"=&r"(d),
[pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll)
:
:"memory"

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: x86int.h 14375 2008-01-06 05:37:33Z tterribe $
last mod: $Id: x86int.h 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: x86state.c 13884 2007-09-22 08:38:10Z giles $
last mod: $Id: x86state.c 15427 2008-10-21 02:36:19Z xiphmont $
********************************************************************/
@ -19,7 +19,7 @@
#if defined(USE_ASM)
#include "../../cpu.h"
#include "../../cpu.c"
void oc_state_vtable_init_x86(oc_theora_state *_state){
_state->cpu_flags=oc_cpu_flags_get();

Просмотреть файл

@ -0,0 +1,214 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id:
********************************************************************/
#include "../../internal.h"
/* ------------------------------------------------------------------------
MMX reconstruction fragment routines for Visual Studio.
Tested with VS2005. Should compile for VS2003 and VC6 as well.
Initial implementation 2007 by Nils Pipenbrinck.
---------------------------------------------------------------------*/
#if defined(USE_ASM)
void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
const ogg_int16_t *_residue){
/* ---------------------------------------------------------------------
This function does the inter reconstruction step with 8 iterations
unrolled. The iteration for each instruction is noted by the #id in the
comments (in case you want to reconstruct it)
--------------------------------------------------------------------- */
_asm{
mov edi, [_residue] /* load residue ptr */
mov eax, 0x00800080 /* generate constant */
mov ebx, [_dst_ystride] /* load dst-stride */
mov edx, [_dst] /* load dest pointer */
/* unrolled loop begins here */
movd mm0, eax /* load constant */
movq mm1, [edi+ 8*0] /* #1 load low residue */
movq mm2, [edi+ 8*1] /* #1 load high residue */
punpckldq mm0, mm0 /* build constant */
movq mm3, [edi+ 8*2] /* #2 load low residue */
movq mm4, [edi+ 8*3] /* #2 load high residue */
movq mm5, [edi+ 8*4] /* #3 load low residue */
movq mm6, [edi+ 8*5] /* #3 load high residue */
paddsw mm1, mm0 /* #1 bias low residue */
paddsw mm2, mm0 /* #1 bias high residue */
packuswb mm1, mm2 /* #1 pack to byte */
paddsw mm3, mm0 /* #2 bias low residue */
paddsw mm4, mm0 /* #2 bias high residue */
packuswb mm3, mm4 /* #2 pack to byte */
paddsw mm5, mm0 /* #3 bias low residue */
paddsw mm6, mm0 /* #3 bias high residue */
packuswb mm5, mm6 /* #3 pack to byte */
movq [edx], mm1 /* #1 write row */
movq [edx + ebx], mm3 /* #2 write row */
movq [edx + ebx*2], mm5 /* #3 write row */
movq mm1, [edi+ 8*6] /* #4 load low residue */
lea ecx, [ebx + ebx*2] /* make dst_ystride * 3 */
movq mm2, [edi+ 8*7] /* #4 load high residue */
movq mm3, [edi+ 8*8] /* #5 load low residue */
lea esi, [ebx*4 + ebx] /* make dst_ystride * 5 */
movq mm4, [edi+ 8*9] /* #5 load high residue */
movq mm5, [edi+ 8*10] /* #6 load low residue */
lea eax, [ecx*2 + ebx] /* make dst_ystride * 7 */
movq mm6, [edi+ 8*11] /* #6 load high residue */
paddsw mm1, mm0 /* #4 bias low residue */
paddsw mm2, mm0 /* #4 bias high residue */
packuswb mm1, mm2 /* #4 pack to byte */
paddsw mm3, mm0 /* #5 bias low residue */
paddsw mm4, mm0 /* #5 bias high residue */
packuswb mm3, mm4 /* #5 pack to byte */
paddsw mm5, mm0 /* #6 bias low residue */
paddsw mm6, mm0 /* #6 bias high residue */
packuswb mm5, mm6 /* #6 pack to byte */
movq [edx + ecx], mm1 /* #4 write row */
movq [edx + ebx*4], mm3 /* #5 write row */
movq [edx + esi], mm5 /* #6 write row */
movq mm1, [edi+ 8*12] /* #7 load low residue */
movq mm2, [edi+ 8*13] /* #7 load high residue */
movq mm3, [edi+ 8*14] /* #8 load low residue */
movq mm4, [edi+ 8*15] /* #8 load high residue */
paddsw mm1, mm0 /* #7 bias low residue */
paddsw mm2, mm0 /* #7 bias high residue */
packuswb mm1, mm2 /* #7 pack to byte */
paddsw mm3, mm0 /* #8 bias low residue */
paddsw mm4, mm0 /* #8 bias high residue */
packuswb mm3, mm4 /* #8 pack to byte */
movq [edx + ecx*2], mm1 /* #7 write row */
movq [edx + eax], mm3 /* #8 write row */
}
}
void oc_frag_recon_inter_mmx (unsigned char *_dst, int _dst_ystride,
const unsigned char *_src, int _src_ystride, const ogg_int16_t *_residue){
/* ---------------------------------------------------------------------
This function does the inter reconstruction step with two iterations
running in parallel to hide some load-latencies and break the dependency
chains. The iteration for each instruction is noted by the #id in the
comments (in case you want to reconstruct it)
--------------------------------------------------------------------- */
_asm{
pxor mm0, mm0 /* generate constant 0 */
mov esi, [_src]
mov edi, [_residue]
mov eax, [_src_ystride]
mov edx, [_dst]
mov ebx, [_dst_ystride]
mov ecx, 4
align 16
nextchunk:
movq mm3, [esi] /* #1 load source */
movq mm1, [edi+0] /* #1 load residium low */
movq mm2, [edi+8] /* #1 load residium high */
movq mm7, [esi+eax] /* #2 load source */
movq mm4, mm3 /* #1 get copy of src */
movq mm5, [edi+16] /* #2 load residium low */
punpckhbw mm4, mm0 /* #1 expand high source */
movq mm6, [edi+24] /* #2 load residium high */
punpcklbw mm3, mm0 /* #1 expand low source */
paddsw mm4, mm2 /* #1 add residium high */
movq mm2, mm7 /* #2 get copy of src */
paddsw mm3, mm1 /* #1 add residium low */
punpckhbw mm2, mm0 /* #2 expand high source */
packuswb mm3, mm4 /* #1 final row pixels */
punpcklbw mm7, mm0 /* #2 expand low source */
movq [edx], mm3 /* #1 write row */
paddsw mm2, mm6 /* #2 add residium high */
add edi, 32 /* residue += 4 */
paddsw mm7, mm5 /* #2 add residium low */
sub ecx, 1 /* update loop counter */
packuswb mm7, mm2 /* #2 final row */
lea esi, [esi+eax*2] /* src += stride * 2 */
movq [edx + ebx], mm7 /* #2 write row */
lea edx, [edx+ebx*2] /* dst += stride * 2 */
jne nextchunk
}
}
void oc_frag_recon_inter2_mmx(unsigned char *_dst, int _dst_ystride,
const unsigned char *_src1, int _src1_ystride, const unsigned char *_src2,
int _src2_ystride,const ogg_int16_t *_residue){
/* ---------------------------------------------------------------------
This function does the inter2 reconstruction step.The building of the
average is done with a bit-twiddeling trick to avoid excessive register
copy work during byte to word conversion.
average = (a & b) + (((a ^ b) & 0xfe) >> 1);
(shown for a single byte; it's done with 8 of them at a time)
Slightly faster than the obvious method using add and shift, but not
earthshaking improvement either.
If anyone comes up with a way that produces bit-identical outputs
using the pavgb instruction let me know and I'll do the 3dnow codepath.
--------------------------------------------------------------------- */
_asm{
mov eax, 0xfefefefe
mov esi, [_src1]
mov edi, [_src2]
movd mm1, eax
mov ebx, [_residue]
mov edx, [_dst]
mov eax, [_dst_ystride]
punpckldq mm1, mm1 /* replicate lsb32 */
mov ecx, 8 /* init loop counter */
pxor mm0, mm0 /* constant zero */
sub edx, eax /* dst -= dst_stride */
align 16
nextrow:
movq mm2, [esi] /* load source1 */
movq mm3, [edi] /* load source2 */
movq mm5, [ebx + 0] /* load lower residue */
movq mm6, [ebx + 8] /* load higer residue */
add esi, _src1_ystride /* src1 += src1_stride */
add edi, _src2_ystride /* src2 += src1_stride */
movq mm4, mm2 /* get copy of source1 */
pand mm2, mm3 /* s1 & s2 (avg part) */
pxor mm3, mm4 /* s1 ^ s2 (avg part) */
add ebx, 16 /* residue++ */
pand mm3, mm1 /* mask out low bits */
psrlq mm3, 1 /* shift xor avg-part */
paddd mm3, mm2 /* build final average */
add edx, eax /* dst += dst_stride */
movq mm2, mm3 /* get copy of average */
punpckhbw mm3, mm0 /* average high */
punpcklbw mm2, mm0 /* average low */
paddsw mm3, mm6 /* high + residue */
paddsw mm2, mm5 /* low + residue */
sub ecx, 1 /* update loop counter */
packuswb mm2, mm3 /* pack and saturate */
movq [edx], mm2 /* write row */
jne nextrow
}
}
void oc_restore_fpu_mmx(void){
_asm { emms }
}
#endif

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,377 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id:
********************************************************************/
/* -------------------------------------------------------------------
MMX based loop filter for the theora codec.
Originally written by Rudolf Marek, based on code from On2's VP3.
Converted to Visual Studio inline assembly by Nils Pipenbrinck.
Note: I can't test these since my example files never get into the
loop filters, but the code has been converted semi-automatic from
the GCC sources, so it ought to work.
---------------------------------------------------------------------*/
#include "../../internal.h"
#include "x86int.h"
#include <mmintrin.h>
#if defined(USE_ASM)
static void loop_filter_v(unsigned char *_pix,int _ystride,
const ogg_int16_t *_ll){
_asm {
mov eax, [_pix]
mov edx, [_ystride]
mov ebx, [_ll]
/* _pix -= ystride */
sub eax, edx
/* mm0=0 */
pxor mm0, mm0
/* _pix -= ystride */
sub eax, edx
/* esi=_ystride*3 */
lea esi, [edx + edx*2]
/* mm7=_pix[0...8]*/
movq mm7, [eax]
/* mm4=_pix[0...8+_ystride*3]*/
movq mm4, [eax + esi]
/* mm6=_pix[0...8]*/
movq mm6, mm7
/* Expand unsigned _pix[0...3] to 16 bits.*/
punpcklbw mm6, mm0
movq mm5, mm4
/* Expand unsigned _pix[4...7] to 16 bits.*/
punpckhbw mm7, mm0
punpcklbw mm4, mm0
/* Expand other arrays too.*/
punpckhbw mm5, mm0
/*mm7:mm6=_p[0...7]-_p[0...7+_ystride*3]:*/
psubw mm6, mm4
psubw mm7, mm5
/*mm5=mm4=_pix[0...7+_ystride]*/
movq mm4, [eax + edx]
/*mm1=mm3=mm2=_pix[0..7]+_ystride*2]*/
movq mm2, [eax + edx*2]
movq mm5, mm4
movq mm3, mm2
movq mm1, mm2
/*Expand these arrays.*/
punpckhbw mm5, mm0
punpcklbw mm4, mm0
punpckhbw mm3, mm0
punpcklbw mm2, mm0
pcmpeqw mm0, mm0
/*mm0=3 3 3 3
mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/
psubw mm3, mm5
psrlw mm0, 14
psubw mm2, mm4
/*Scale by 3.*/
pmullw mm3, mm0
pmullw mm2, mm0
/*mm0=4 4 4 4
f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+
3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/
psrlw mm0, 1
paddw mm3, mm7
psllw mm0, 2
paddw mm2, mm6
/*Add 4.*/
paddw mm3, mm0
paddw mm2, mm0
/*"Divide" by 8.*/
psraw mm3, 3
psraw mm2, 3
/*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/
/*Free up mm5.*/
packuswb mm4, mm5
/*mm0=L L L L*/
movq mm0, [ebx]
/*if(R_i<-2L||R_i>2L)R_i=0:*/
movq mm5, mm2
pxor mm6, mm6
movq mm7, mm0
psubw mm6, mm0
psllw mm7, 1
psllw mm6, 1
/*mm2==R_3 R_2 R_1 R_0*/
/*mm5==R_3 R_2 R_1 R_0*/
/*mm6==-2L -2L -2L -2L*/
/*mm7==2L 2L 2L 2L*/
pcmpgtw mm7, mm2
pcmpgtw mm5, mm6
pand mm2, mm7
movq mm7, mm0
pand mm2, mm5
psllw mm7, 1
movq mm5, mm3
/*mm3==R_7 R_6 R_5 R_4*/
/*mm5==R_7 R_6 R_5 R_4*/
/*mm6==-2L -2L -2L -2L*/
/*mm7==2L 2L 2L 2L*/
pcmpgtw mm7, mm3
pcmpgtw mm5, mm6
pand mm3, mm7
movq mm7, mm0
pand mm3, mm5
/*if(R_i<-L)R_i'=R_i+2L;
if(R_i>L)R_i'=R_i-2L;
if(R_i<-L||R_i>L)R_i=-R_i':*/
psraw mm6, 1
movq mm5, mm2
psllw mm7, 1
/*mm2==R_3 R_2 R_1 R_0*/
/*mm5==R_3 R_2 R_1 R_0*/
/*mm6==-L -L -L -L*/
/*mm0==L L L L*/
/*mm5=R_i>L?FF:00*/
pcmpgtw mm5, mm0
/*mm6=-L>R_i?FF:00*/
pcmpgtw mm6, mm2
/*mm7=R_i>L?2L:0*/
pand mm7, mm5
/*mm2=R_i>L?R_i-2L:R_i*/
psubw mm2, mm7
movq mm7, mm0
/*mm5=-L>R_i||R_i>L*/
por mm5, mm6
psllw mm7, 1
/*mm7=-L>R_i?2L:0*/
pand mm7, mm6
pxor mm6, mm6
/*mm2=-L>R_i?R_i+2L:R_i*/
paddw mm2, mm7
psubw mm6, mm0
/*mm5=-L>R_i||R_i>L?-R_i':0*/
pand mm5, mm2
movq mm7, mm0
/*mm2=-L>R_i||R_i>L?0:R_i*/
psubw mm2, mm5
psllw mm7, 1
/*mm2=-L>R_i||R_i>L?-R_i':R_i*/
psubw mm2, mm5
movq mm5, mm3
/*mm3==R_7 R_6 R_5 R_4*/
/*mm5==R_7 R_6 R_5 R_4*/
/*mm6==-L -L -L -L*/
/*mm0==L L L L*/
/*mm6=-L>R_i?FF:00*/
pcmpgtw mm6, mm3
/*mm5=R_i>L?FF:00*/
pcmpgtw mm5, mm0
/*mm7=R_i>L?2L:0*/
pand mm7, mm5
/*mm2=R_i>L?R_i-2L:R_i*/
psubw mm3, mm7
psllw mm0, 1
/*mm5=-L>R_i||R_i>L*/
por mm5, mm6
/*mm0=-L>R_i?2L:0*/
pand mm0, mm6
/*mm3=-L>R_i?R_i+2L:R_i*/
paddw mm3, mm0
/*mm5=-L>R_i||R_i>L?-R_i':0*/
pand mm5, mm3
/*mm2=-L>R_i||R_i>L?0:R_i*/
psubw mm3, mm5
/*mm3=-L>R_i||R_i>L?-R_i':R_i*/
psubw mm3, mm5
/*Unfortunately, there's no unsigned byte+signed byte with unsigned
saturation op code, so we have to promote things back 16 bits.*/
pxor mm0, mm0
movq mm5, mm4
punpcklbw mm4, mm0
punpckhbw mm5, mm0
movq mm6, mm1
punpcklbw mm1, mm0
punpckhbw mm6, mm0
/*_pix[0...8+_ystride]+=R_i*/
paddw mm4, mm2
paddw mm5, mm3
/*_pix[0...8+_ystride*2]-=R_i*/
psubw mm1, mm2
psubw mm6, mm3
packuswb mm4, mm5
packuswb mm1, mm6
/*Write it back out.*/
movq [eax + edx], mm4
movq [eax + edx*2], mm1
}
}
/*This code implements the bulk of loop_filter_h().
Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
four p0's to one register we must transpose the values in four mmx regs.
When half is done we repeat this for the rest.*/
static void loop_filter_h4(unsigned char *_pix,long _ystride,
const ogg_int16_t *_ll){
/* todo: merge the comments from the GCC sources */
_asm {
mov ecx, [_pix]
mov edx, [_ystride]
mov eax, [_ll]
/*esi=_ystride*3*/
lea esi, [edx + edx*2]
movd mm0, dword ptr [ecx]
movd mm1, dword ptr [ecx + edx]
movd mm2, dword ptr [ecx + edx*2]
movd mm3, dword ptr [ecx + esi]
punpcklbw mm0, mm1
punpcklbw mm2, mm3
movq mm1, mm0
punpckhwd mm0, mm2
punpcklwd mm1, mm2
pxor mm7, mm7
movq mm5, mm1
punpcklbw mm1, mm7
punpckhbw mm5, mm7
movq mm3, mm0
punpcklbw mm0, mm7
punpckhbw mm3, mm7
psubw mm1, mm3
movq mm4, mm0
pcmpeqw mm2, mm2
psubw mm0, mm5
psrlw mm2, 14
pmullw mm0, mm2
psrlw mm2, 1
paddw mm0, mm1
psllw mm2, 2
paddw mm0, mm2
psraw mm0, 3
movq mm6, qword ptr [eax]
movq mm1, mm0
pxor mm2, mm2
movq mm3, mm6
psubw mm2, mm6
psllw mm3, 1
psllw mm2, 1
pcmpgtw mm3, mm0
pcmpgtw mm1, mm2
pand mm0, mm3
pand mm0, mm1
psraw mm2, 1
movq mm1, mm0
movq mm3, mm6
pcmpgtw mm2, mm0
pcmpgtw mm1, mm6
psllw mm3, 1
psllw mm6, 1
pand mm3, mm1
pand mm6, mm2
psubw mm0, mm3
por mm1, mm2
paddw mm0, mm6
pand mm1, mm0
psubw mm0, mm1
psubw mm0, mm1
paddw mm5, mm0
psubw mm4, mm0
packuswb mm5, mm7
packuswb mm4, mm7
punpcklbw mm5, mm4
movd edi, mm5
mov word ptr [ecx + 01H], di
psrlq mm5, 32
shr edi, 16
mov word ptr [ecx + edx + 01H], di
movd edi, mm5
mov word ptr [ecx + edx*2 + 01H], di
shr edi, 16
mov word ptr [ecx + esi + 01H], di
}
}
static void loop_filter_h(unsigned char *_pix,int _ystride,
const ogg_int16_t *_ll){
_pix-=2;
loop_filter_h4(_pix,_ystride,_ll);
loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll);
}
/*We copy the whole function because the MMX routines will be inlined 4 times,
and we can do just a single emms call at the end this way.
We also do not use the _bv lookup table, instead computing the values that
would lie in it on the fly.*/
/*Apply the loop filter to a given set of fragment rows in the given plane.
The filter may be run on the bottom edge, affecting pixels in the next row of
fragments, so this row also needs to be available.
_bv: The bounding values array.
_refi: The index of the frame buffer to filter.
_pli: The color plane to filter.
_fragy0: The Y coordinate of the first fragment row to filter.
_fragy_end: The Y coordinate of the fragment row to stop filtering at.*/
void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
int _refi,int _pli,int _fragy0,int _fragy_end){
ogg_int16_t __declspec(align(8)) ll[4];
th_img_plane *iplane;
oc_fragment_plane *fplane;
oc_fragment *frag_top;
oc_fragment *frag0;
oc_fragment *frag;
oc_fragment *frag_end;
oc_fragment *frag0_end;
oc_fragment *frag_bot;
ll[0]=ll[1]=ll[2]=ll[3]=
(ogg_int16_t)_state->loop_filter_limits[_state->qis[0]];
iplane=_state->ref_frame_bufs[_refi]+_pli;
fplane=_state->fplanes+_pli;
/*The following loops are constructed somewhat non-intuitively on purpose.
The main idea is: if a block boundary has at least one coded fragment on
it, the filter is applied to it.
However, the order that the filters are applied in matters, and VP3 chose
the somewhat strange ordering used below.*/
frag_top=_state->frags+fplane->froffset;
frag0=frag_top+_fragy0*fplane->nhfrags;
frag0_end=frag0+(_fragy_end-_fragy0)*fplane->nhfrags;
frag_bot=_state->frags+fplane->froffset+fplane->nfrags;
while(frag0<frag0_end){
frag=frag0;
frag_end=frag+fplane->nhfrags;
while(frag<frag_end){
if(frag->coded){
if(frag>frag0){
loop_filter_h(frag->buffer[_refi],iplane->stride,ll);
}
if(frag0>frag_top){
loop_filter_v(frag->buffer[_refi],iplane->stride,ll);
}
if(frag+1<frag_end&&!(frag+1)->coded){
loop_filter_h(frag->buffer[_refi]+8,iplane->stride,ll);
}
if(frag+fplane->nhfrags<frag_bot&&!(frag+fplane->nhfrags)->coded){
loop_filter_v((frag+fplane->nhfrags)->buffer[_refi],
iplane->stride,ll);
}
}
frag++;
}
frag0+=fplane->nhfrags;
}
/*This needs to be removed when decode specific functions are implemented:*/
_mm_empty();
}
#endif

Просмотреть файл

@ -0,0 +1,189 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: mmxstate.c 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/
/* ------------------------------------------------------------------------
MMX acceleration of complete fragment reconstruction algorithm.
Originally written by Rudolf Marek.
Conversion to MSC intrinsics by Nils Pipenbrinck.
---------------------------------------------------------------------*/
#if defined(USE_ASM)
#include "../../internal.h"
#include "../idct.h"
#include "x86int.h"
#include <mmintrin.h>
static const unsigned char OC_FZIG_ZAGMMX[64]=
{
0, 8, 1, 2, 9,16,24,17,
10, 3,32,11,18,25, 4,12,
5,26,19,40,33,34,41,48,
27, 6,13,20,28,21,14, 7,
56,49,42,35,43,50,57,36,
15,22,29,30,23,44,37,58,
51,59,38,45,52,31,60,53,
46,39,47,54,61,62,55,63
};
/* Fill a block with value */
static __inline void loc_fill_mmx_value (__m64 * _dst, __m64 _value){
__m64 t = _value;
_dst[0] = t; _dst[1] = t; _dst[2] = t; _dst[3] = t;
_dst[4] = t; _dst[5] = t; _dst[6] = t; _dst[7] = t;
_dst[8] = t; _dst[9] = t; _dst[10] = t; _dst[11] = t;
_dst[12] = t; _dst[13] = t; _dst[14] = t; _dst[15] = t;
}
/* copy a block of 8 byte elements using different strides */
static __inline void loc_blockcopy_mmx (unsigned char * _dst, int _dst_ystride,
unsigned char * _src, int _src_ystride){
__m64 a,b,c,d,e,f,g,h;
a = *(__m64*)(_src + 0 * _src_ystride);
b = *(__m64*)(_src + 1 * _src_ystride);
c = *(__m64*)(_src + 2 * _src_ystride);
d = *(__m64*)(_src + 3 * _src_ystride);
e = *(__m64*)(_src + 4 * _src_ystride);
f = *(__m64*)(_src + 5 * _src_ystride);
g = *(__m64*)(_src + 6 * _src_ystride);
h = *(__m64*)(_src + 7 * _src_ystride);
*(__m64*)(_dst + 0 * _dst_ystride) = a;
*(__m64*)(_dst + 1 * _dst_ystride) = b;
*(__m64*)(_dst + 2 * _dst_ystride) = c;
*(__m64*)(_dst + 3 * _dst_ystride) = d;
*(__m64*)(_dst + 4 * _dst_ystride) = e;
*(__m64*)(_dst + 5 * _dst_ystride) = f;
*(__m64*)(_dst + 6 * _dst_ystride) = g;
*(__m64*)(_dst + 7 * _dst_ystride) = h;
}
void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,
int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]){
ogg_int16_t __declspec(align(16)) res_buf[64];
int dst_framei;
int dst_ystride;
int zzi;
/*_last_zzi is subtly different from an actual count of the number of
coefficients we decoded for this block.
It contains the value of zzi BEFORE the final token in the block was
decoded.
In most cases this is an EOB token (the continuation of an EOB run from a
previous block counts), and so this is the same as the coefficient count.
However, in the case that the last token was NOT an EOB token, but filled
the block up with exactly 64 coefficients, _last_zzi will be less than 64.
Provided the last token was not a pure zero run, the minimum value it can
be is 46, and so that doesn't affect any of the cases in this routine.
However, if the last token WAS a pure zero run of length 63, then _last_zzi
will be 1 while the number of coefficients decoded is 64.
Thus, we will trigger the following special case, where the real
coefficient count would not.
Note also that a zero run of length 64 will give _last_zzi a value of 0,
but we still process the DC coefficient, which might have a non-zero value
due to DC prediction.
Although convoluted, this is arguably the correct behavior: it allows us to
dequantize fewer coefficients and use a smaller transform when the block
ends with a long zero run instead of a normal EOB token.
It could be smarter... multiple separate zero runs at the end of a block
will fool it, but an encoder that generates these really deserves what it
gets.
Needless to say we inherited this approach from VP3.*/
/*Special case only having a DC component.*/
if(_last_zzi<2){
__m64 p;
/*Why is the iquant product rounded in this case and no others? Who knows.*/
p = _m_from_int((ogg_int32_t)_frag->dc*_dc_iquant+15>>5);
/* broadcast 16 bits into all 4 mmx subregisters */
p = _m_punpcklwd (p,p);
p = _m_punpckldq (p,p);
loc_fill_mmx_value ((__m64 *)res_buf, p);
}
else{
/*Then, fill in the remainder of the coefficients with 0's, and perform
the iDCT.*/
/*First zero the buffer.*/
/*On K7, etc., this could be replaced with movntq and sfence.*/
loc_fill_mmx_value ((__m64 *)res_buf, _mm_setzero_si64());
res_buf[0]=(ogg_int16_t)((ogg_int32_t)_frag->dc*_dc_iquant);
/*This is planned to be rewritten in MMX.*/
for(zzi=1;zzi<_ncoefs;zzi++)
{
int ci;
ci=OC_FZIG_ZAG[zzi];
res_buf[OC_FZIG_ZAGMMX[zzi]]=(ogg_int16_t)((ogg_int32_t)_dct_coeffs[zzi]*
_ac_iquant[ci]);
}
if(_last_zzi<10){
oc_idct8x8_10_mmx(res_buf);
}
else {
oc_idct8x8_mmx(res_buf);
}
}
/*Fill in the target buffer.*/
dst_framei=_state->ref_frame_idx[OC_FRAME_SELF];
dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
/*For now ystride values in all ref frames assumed to be equal.*/
if(_frag->mbmode==OC_MODE_INTRA){
oc_frag_recon_intra_mmx(_frag->buffer[dst_framei],dst_ystride,res_buf);
}
else{
int ref_framei;
int ref_ystride;
int mvoffsets[2];
ref_framei=_state->ref_frame_idx[OC_FRAME_FOR_MODE[_frag->mbmode]];
ref_ystride=_state->ref_frame_bufs[ref_framei][_pli].stride;
if(oc_state_get_mv_offsets(_state,mvoffsets,_frag->mv[0],
_frag->mv[1],ref_ystride,_pli)>1){
oc_frag_recon_inter2_mmx(_frag->buffer[dst_framei],dst_ystride,
_frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,
_frag->buffer[ref_framei]+mvoffsets[1],ref_ystride,res_buf);
}
else{
oc_frag_recon_inter_mmx(_frag->buffer[dst_framei],dst_ystride,
_frag->buffer[ref_framei]+mvoffsets[0],ref_ystride,res_buf);
}
}
_mm_empty();
}
void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
int _nfragis,int _dst_frame,int _src_frame,int _pli){
const int *fragi;
const int *fragi_end;
int dst_framei;
int dst_ystride;
int src_framei;
int src_ystride;
dst_framei=_state->ref_frame_idx[_dst_frame];
src_framei=_state->ref_frame_idx[_src_frame];
dst_ystride=_state->ref_frame_bufs[dst_framei][_pli].stride;
src_ystride=_state->ref_frame_bufs[src_framei][_pli].stride;
fragi_end=_fragis+_nfragis;
for(fragi=_fragis;fragi<fragi_end;fragi++){
oc_fragment *frag = _state->frags+*fragi;
loc_blockcopy_mmx (frag->buffer[dst_framei], dst_ystride,
frag->buffer[src_framei], src_ystride);
}
_m_empty();
}
#endif

Просмотреть файл

@ -0,0 +1,49 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: x86int.h 15400 2008-10-15 12:10:58Z tterribe $
********************************************************************/
#if !defined(_x86_x86int_vc_H)
# define _x86_x86int_vc_H (1)
# include "../../internal.h"
void oc_state_vtable_init_x86(oc_theora_state *_state);
void oc_frag_recon_intra_mmx(unsigned char *_dst,int _dst_ystride,
const ogg_int16_t *_residue);
void oc_frag_recon_inter_mmx(unsigned char *_dst,int _dst_ystride,
const unsigned char *_src,int _src_ystride,const ogg_int16_t *_residue);
void oc_frag_recon_inter2_mmx(unsigned char *_dst,int _dst_ystride,
const unsigned char *_src1,int _src1_ystride,const unsigned char *_src2,
int _src2_ystride,const ogg_int16_t *_residue);
void oc_state_frag_copy_mmx(const oc_theora_state *_state,const int *_fragis,
int _nfragis,int _dst_frame,int _src_frame,int _pli);
void oc_restore_fpu_mmx(void);
void oc_state_frag_recon_mmx(oc_theora_state *_state,const oc_fragment *_frag,
int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,int _ncoefs,
ogg_uint16_t _dc_iquant,const ogg_uint16_t _ac_iquant[64]);
void oc_idct8x8_mmx(ogg_int16_t _y[64]);
void oc_idct8x8_10_mmx(ogg_int16_t _y[64]);
void oc_state_loop_filter_frag_rows_mmx(oc_theora_state *_state,int *_bv,
int _refi,int _pli,int _fragy0,int _fragy_end);
#endif

Просмотреть файл

@ -0,0 +1,41 @@
/********************************************************************
* *
* THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: x86state.c 15427 2008-10-21 02:36:19Z xiphmont $
********************************************************************/
#if defined(USE_ASM)
#include "x86int.h"
#include "../../cpu.c"
void oc_state_vtable_init_x86(oc_theora_state *_state){
_state->cpu_flags=oc_cpu_flags_get();
/* fill with defaults */
oc_state_vtable_init_c(_state);
/* patch MMX functions */
if(_state->cpu_flags&OC_CPU_X86_MMX){
_state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_mmx;
_state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_mmx;
_state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_mmx;
_state->opt_vtable.restore_fpu=oc_restore_fpu_mmx;
_state->opt_vtable.state_frag_copy=oc_state_frag_copy_mmx;
_state->opt_vtable.state_frag_recon=oc_state_frag_recon_mmx;
_state->opt_vtable.state_loop_filter_frag_rows=oc_state_loop_filter_frag_rows_mmx;
}
}
#endif

Просмотреть файл

@ -6,12 +6,12 @@
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
* *
********************************************************************
function:
last mod: $Id: internal.h 14714 2008-04-12 01:04:43Z giles $
last mod: $Id: internal.h 15469 2008-10-30 12:49:42Z tterribe $
********************************************************************/
@ -27,19 +27,6 @@
# include "dec/huffman.h"
# include "dec/quant.h"
/* debug macros */
#if defined(_MSC_VER) && _MSC_VER < 1400
static const dframe = 0;
static void TH_DEBUG(const char *fmt, ...) {}
#elif defined(_TH_DEBUG_)
#include <stdio.h>
extern long dframe;
extern FILE *debugout;
#define TH_DEBUG(...) fprintf(debugout, __VA_ARGS__)
#else
#define TH_DEBUG(...)
#endif
/*Thank you Microsoft, I know the order of operations.*/
# if defined(_MSC_VER)
# pragma warning(disable:4554) /* order of operations */
@ -47,7 +34,7 @@ extern FILE *debugout;
# endif
/*This library's version.*/
# define OC_VENDOR_STRING "Xiph.Org libTheora I 20071025 3 2 1"
# define OC_VENDOR_STRING "Xiph.Org libTheora I 20081020 3 2 1"
/*Theora bitstream version.*/
# define TH_VERSION_MAJOR (3)
@ -224,10 +211,14 @@ typedef struct{
unsigned invalid:1;
/*The quality index used for this fragment's AC coefficients.*/
unsigned qi:6;
/*The mode of the macroblock this fragment belongs to.*/
int mbmode:8;
/*The prediction-corrected DC component.*/
int dc:16;
/*The mode of the macroblock this fragment belongs to.
Note that the C standard requires an explicit signed keyword for bitfield
types, since some compilers may treat them as unsigned without it.*/
signed int mbmode:8;
/*The prediction-corrected DC component.
Note that the C standard requires an explicit signed keyword for bitfield
types, since some compilers may treat them as unsigned without it.*/
signed int dc:16;
/*A pointer to the portion of an image covered by this fragment in several
images.
The first three are reconstructed frame buffers, while the last is the
@ -241,14 +232,6 @@ typedef struct{
oc_border_info *border;
/*The motion vector used for this fragment.*/
oc_mv mv;
#ifdef _TH_DEBUG_
int quant[64];
int freq[64];
int time[64];
int recon[64];
int loop[64];
#endif
}oc_fragment;
@ -299,77 +282,77 @@ typedef struct{
/*Common state information between the encoder and decoder.*/
struct oc_theora_state{
/*The stream information.*/
th_info info;
th_info info;
/*Table for shared accelerated functions.*/
oc_base_opt_vtable opt_vtable;
oc_base_opt_vtable opt_vtable;
/*CPU flags to detect the presence of extended instruction sets.*/
ogg_uint32_t cpu_flags;
ogg_uint32_t cpu_flags;
/*The fragment plane descriptions.*/
oc_fragment_plane fplanes[3];
oc_fragment_plane fplanes[3];
/*The total number of fragments in a single frame.*/
int nfrags;
int nfrags;
/*The list of fragments, indexed in image order.*/
oc_fragment *frags;
oc_fragment *frags;
/*The total number of super blocks in a single frame.*/
int nsbs;
int nsbs;
/*The list of super blocks, indexed in image order.*/
oc_sb *sbs;
oc_sb *sbs;
/*The number of macro blocks in the X direction.*/
int nhmbs;
int nhmbs;
/*The number of macro blocks in the Y direction.*/
int nvmbs;
int nvmbs;
/*The total number of macro blocks.*/
int nmbs;
int nmbs;
/*The list of macro blocks, indexed in super block order.
That is, the macro block corresponding to the macro block mbi in (luma
plane) super block sbi is (sbi<<2|mbi).*/
oc_mb *mbs;
oc_mb *mbs;
/*The list of coded fragments, in coded order.*/
int *coded_fragis;
int *coded_fragis;
/*The number of coded fragments in each plane.*/
int ncoded_fragis[3];
int ncoded_fragis[3];
/*The list of uncoded fragments.
This just past the end of the list, which is in reverse order, and
uses the same block of allocated storage as the coded_fragis list.*/
int *uncoded_fragis;
int *uncoded_fragis;
/*The number of uncoded fragments in each plane.*/
int nuncoded_fragis[3];
int nuncoded_fragis[3];
/*The list of coded macro blocks in the Y plane, in coded order.*/
int *coded_mbis;
int *coded_mbis;
/*The number of coded macro blocks in the Y plane.*/
int ncoded_mbis;
int ncoded_mbis;
/*A copy of the image data used to fill the input pointers in each fragment.
If the data pointers or strides change, these input pointers must be
re-populated.*/
th_ycbcr_buffer input;
th_ycbcr_buffer input;
/*The number of unique border patterns.*/
int nborders;
int nborders;
/*The storage for the border info for all border fragments.
This data is pointed to from the appropriate fragments.*/
oc_border_info borders[16];
oc_border_info borders[16];
/*The index of the buffers being used for each OC_FRAME_* reference frame.*/
int ref_frame_idx[3];
int ref_frame_idx[3];
/*The actual buffers used for the previously decoded frames.*/
th_ycbcr_buffer ref_frame_bufs[3];
th_ycbcr_buffer ref_frame_bufs[3];
/*The storage for the reference frame buffers.*/
unsigned char *ref_frame_data;
unsigned char *ref_frame_data;
/*The frame number of the last keyframe.*/
ogg_int64_t keyframe_num;
ogg_int64_t keyframe_num;
/*The frame number of the current frame.*/
ogg_int64_t curframe_num;
ogg_int64_t curframe_num;
/*The granpos of the current frame.*/
ogg_int64_t granpos;
ogg_int64_t granpos;
/*The type of the current frame.*/
int frame_type;
int frame_type;
/*The quality indices of the current frame.*/
int qis[3];
int qis[3];
/*The number of quality indices used in the current frame.*/
int nqis;
int nqis;
/*The dequantization tables.*/
oc_quant_table *dequant_tables[2][3];
oc_quant_tables dequant_table_data[2][3];
oc_quant_table *dequant_tables[2][3];
oc_quant_tables dequant_table_data[2][3];
/*Loop filter strength parameters.*/
unsigned char loop_filter_limits[64];
unsigned char loop_filter_limits[64];
};

Просмотреть файл

@ -22,10 +22,15 @@ cp $1/lib/dec/x86/x86state.c ./lib/dec/x86/x86state.c
cp $1/lib/dec/x86/x86int.h ./lib/dec/x86/x86int.h
cp $1/lib/dec/x86/mmxstate.c ./lib/dec/x86/mmxstate.c
cp $1/lib/dec/x86/mmxidct.c ./lib/dec/x86/mmxidct.c
cp $1/lib/dec/bitwise.h ./lib/dec/bitwise.h
cp $1/lib/dec/x86_vc/mmxfrag.c ./lib/dec/x86_vc/mmxfrag.c
cp $1/lib/dec/x86_vc/mmxidct.c ./lib/dec/x86_vc/mmxidct.c
cp $1/lib/dec/x86_vc/mmxloopfilter.c ./lib/dec/x86_vc/mmxloopfilter.c
cp $1/lib/dec/x86_vc/mmxstate.c ./lib/dec/x86_vc/mmxstate.c
cp $1/lib/dec/x86_vc/x86int.h ./lib/dec/x86_vc/x86int.h
cp $1/lib/dec/x86_vc/x86state.c ./lib/dec/x86_vc/x86state.c
cp $1/lib/dec/bitpack.h ./lib/dec/bitpack.h
cp $1/lib/dec/quant.c ./lib/dec/quant.c
cp $1/lib/dec/bitwise.c ./lib/dec/bitwise.c
cp $1/lib/dec/enquant.h ./lib/dec/enquant.h
cp $1/lib/dec/bitpack.c ./lib/dec/bitpack.c
cp $1/lib/dec/internal.c ./lib/dec/internal.c
cp $1/lib/dec/huffdec.h ./lib/dec/huffdec.h
cp $1/lib/dec/dct.h ./lib/dec/dct.h
@ -47,4 +52,4 @@ cp $1/lib/internal.h ./lib/internal.h
cp $1/include/theora/theora.h ./include/theora/theora.h
cp $1/include/theora/theoradec.h ./include/theora/theoradec.h
cp $1/include/theora/codec.h ./include/theora/codec.h
patch -p3 <changeset_r15144.diff
patch -p3 <455357_wince_local_variable_macro_clash_patch