Bug 593753 - Update libvpx to 0.9.2. r=cpearce a=blocking

This commit is contained in:
Timothy B. Terriberry 2010-09-09 14:06:21 +02:00
Родитель c886a42334
Коммит f5c25b4efb
167 изменённых файлов: 6125 добавлений и 3135 удалений

Просмотреть файл

@ -138,7 +138,6 @@ CSRCS += \
reconinter.c \
reconintra.c \
reconintra4x4.c \
segmentation_common.c \
setupintrarecon.c \
swapyv12buffer.c \
textblit.c \
@ -149,6 +148,7 @@ CSRCS += \
demode.c \
dequantize.c \
detokenize.c \
idct_blk.c \
onyxd_if.c \
threading.c \
vp8_dx_iface.c \
@ -169,6 +169,8 @@ ifdef VPX_X86_ASM
# Building on an x86 platform with a supported assembler, include
# the optimized assembly in the build.
CSRCS += \
idct_blk_mmx.c \
idct_blk_sse2.c \
loopfilter_x86.c \
vp8_asm_stubs.c \
x86_systemdependent.c \
@ -177,6 +179,7 @@ CSRCS += \
ASFILES += \
idctllm_mmx.asm \
idctllm_sse2.asm \
iwalsh_mmx.asm \
iwalsh_sse2.asm \
loopfilter_mmx.asm \
@ -187,6 +190,7 @@ ASFILES += \
recon_sse2.asm \
subpixel_mmx.asm \
subpixel_sse2.asm \
subpixel_ssse3.asm \
dequantize_mmx.asm \
emms.asm \
$(NULL)

Просмотреть файл

@ -1,32 +0,0 @@
diff --git a/media/libvpx/vp8/decoder/decodemv.c b/media/libvpx/vp8/decoder/decodemv.c
--- a/media/libvpx/vp8/decoder/decodemv.c
+++ b/media/libvpx/vp8/decoder/decodemv.c
@@ -222,23 +222,24 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
{
B_MODE_INFO *const bmi = mbmi->partition_bmi + j;
MV *const mv = & bmi->mv.as_mv;
int k = -1; /* first block in subset j */
int mv_contz;
while (j != L[++k])
+ {
+#if CONFIG_DEBUG
if (k >= 16)
-#if CONFIG_DEBUG
+ {
assert(0);
-
-#else
- ;
+ }
#endif
+ }
mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
switch (bmi->mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
{
case NEW4X4:
read_mv(bc, mv, (const MV_CONTEXT *) mvc);
mv->row += best_mv.row;

Просмотреть файл

@ -18,7 +18,7 @@ diff --git a/media/libvpx/vp8/common/loopfilter_filters.c b/media/libvpx/vp8/com
typedef unsigned char uc;
__inline signed char vp8_signed_char_clamp(int t)
static __inline signed char vp8_signed_char_clamp(int t)
{
t = (t < -128 ? -128 : t);
diff --git a/media/libvpx/vpx/internal/vpx_codec_internal.h b/media/libvpx/vpx/internal/vpx_codec_internal.h

Просмотреть файл

@ -1,32 +0,0 @@
diff --git a/media/libvpx/vp8/decoder/decodemv.c b/media/libvpx/vp8/decoder/decodemv.c
--- a/media/libvpx/vp8/decoder/decodemv.c
+++ b/media/libvpx/vp8/decoder/decodemv.c
@@ -264,16 +264,28 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
#ifdef VPX_MODE_COUNT
vp8_mv_cont_count[mv_contz][2]++;
#endif
break;
default:
break;
}
+ /* Clip the MV for this partition so that it does
+ not extend to far out of image. */
+ if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
+ mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
+ else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN + 7)
+ mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN + 7;
+
+ if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
+ mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
+ else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN + 7)
+ mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN + 7;
+
/* Fill (uniform) modes, mvs of jth subset.
Must do it here because ensuing subsets can
refer back to us via "left" or "above". */
do
if (j == L[k])
mi->bmi[k] = *bmi;
while (++k < 16);

Просмотреть файл

@ -1,41 +0,0 @@
diff -r 5c557d4dd0c7 media/libvpx/vpx_ports/x86_abi_support.asm
--- a/media/libvpx/vpx_ports/x86_abi_support.asm Wed Jun 16 11:12:38 2010 +1200
+++ b/media/libvpx/vpx_ports/x86_abi_support.asm Thu Jun 17 15:09:49 2010 -0700
@@ -138,12 +138,16 @@
%endmacro
%endif
%endif
+ %define HIDDEN_DATA
%else
%macro GET_GOT 1
%endmacro
%define GLOBAL wrt rip
%ifidn __OUTPUT_FORMAT__,elf64
%define WRT_PLT wrt ..plt
+ %define HIDDEN_DATA :data hidden
+ %else
+ %define HIDDEN_DATA
%endif
%endif
%ifnmacro GET_GOT
diff -r 5c557d4dd0c7 media/libvpx/vp8/common/x86/subpixel_mmx.asm
--- a/media/libvpx/vp8/common/x86/subpixel_mmx.asm Wed Jun 16 11:12:38 2010 +1200
+++ b/media/libvpx/vp8/common/x86/subpixel_mmx.asm Thu Jun 17 15:09:49 2010 -0700
@@ -731,7 +731,7 @@
times 4 dw 0x40
align 16
-global sym(vp8_six_tap_mmx)
+global sym(vp8_six_tap_mmx) HIDDEN_DATA
sym(vp8_six_tap_mmx):
times 8 dw 0
times 8 dw 0
@@ -791,7 +791,7 @@
align 16
-global sym(vp8_bilinear_filters_mmx)
+global sym(vp8_bilinear_filters_mmx) HIDDEN_DATA
sym(vp8_bilinear_filters_mmx):
times 8 dw 128
times 8 dw 0

Просмотреть файл

@ -75,7 +75,6 @@ commonFiles=(
vp8/common/reconinter.c
vp8/common/reconintra4x4.c
vp8/common/reconintra.c
vp8/common/segmentation_common.c
vp8/common/setupintrarecon.c
vp8/common/swapyv12buffer.c
vp8/common/textblit.c
@ -90,8 +89,11 @@ commonFiles=(
vp8/decoder/dequantize.c
vp8/decoder/detokenize.c
vp8/decoder/generic/dsystemdependent.c
vp8/decoder/idct_blk.c
vp8/decoder/onyxd_if.c
vp8/decoder/threading.c
vp8/decoder/x86/idct_blk_mmx.c
vp8/decoder/x86/idct_blk_sse2.c
vp8/decoder/x86/x86_dsystemdependent.c
vp8/vp8_dx_iface.c
vpx/src/vpx_codec.c
@ -183,6 +185,7 @@ commonFiles=(
vpx_scale/yv12config.h
vpx_scale/yv12extend.h
vp8/common/x86/idctllm_mmx.asm
vp8/common/x86/idctllm_sse2.asm
vp8/common/x86/iwalsh_mmx.asm
vp8/common/x86/iwalsh_sse2.asm
vp8/common/x86/loopfilter_mmx.asm
@ -193,6 +196,7 @@ commonFiles=(
vp8/common/x86/recon_sse2.asm
vp8/common/x86/subpixel_mmx.asm
vp8/common/x86/subpixel_sse2.asm
vp8/common/x86/subpixel_ssse3.asm
vp8/decoder/x86/dequantize_mmx.asm
vpx_ports/emms.asm
vpx_ports/x86_abi_support.asm
@ -241,10 +245,6 @@ done
# Patch to reduce compiler warnings, so we can compile with -Werror in mozilla.
patch -p3 < reduce-warnings-1.patch
patch -p3 < splitmv-bounds.patch
patch -p3 < subpixel-qword.patch
# Patch to make asm globals symbol hidden so linking succeeds on x86-64.
patch -p3 < subpixel-hidden.patch
patch -p3 < emptyif_warning.patch
# Patch to compile with Sun Studio on Solaris
patch -p3 < solaris.patch

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -24,43 +24,36 @@ extern void vp8_init_scan_order_mask();
void vp8_update_mode_info_border(MODE_INFO *mi, int rows, int cols)
{
int i;
vpx_memset(mi - cols - 1, 0, sizeof(MODE_INFO) * cols + 1);
vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1));
for (i = 0; i < rows; i++)
{
vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO));
}
}
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
{
int i;
for (i = 0; i < NUM_YV12_BUFFERS; i++)
vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
vp8_yv12_de_alloc_frame_buffer(&oci->new_frame);
vp8_yv12_de_alloc_frame_buffer(&oci->last_frame);
vp8_yv12_de_alloc_frame_buffer(&oci->golden_frame);
vp8_yv12_de_alloc_frame_buffer(&oci->alt_ref_frame);
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
vpx_free(oci->above_context[Y1CONTEXT]);
vpx_free(oci->above_context[UCONTEXT]);
vpx_free(oci->above_context[VCONTEXT]);
vpx_free(oci->above_context[Y2CONTEXT]);
vpx_free(oci->above_context);
vpx_free(oci->mip);
oci->above_context[Y1CONTEXT] = 0;
oci->above_context[UCONTEXT] = 0;
oci->above_context[VCONTEXT] = 0;
oci->above_context[Y2CONTEXT] = 0;
oci->above_context = 0;
oci->mip = 0;
// Structure used to minitor GF useage
if (oci->gf_active_flags != 0)
vpx_free(oci->gf_active_flags);
oci->gf_active_flags = 0;
}
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
{
int i;
vp8_de_alloc_frame_buffers(oci);
// our internal buffers are always multiples of 16
@ -71,37 +64,33 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
height += 16 - (height & 0xf);
for (i = 0; i < NUM_YV12_BUFFERS; i++)
{
oci->fb_idx_ref_cnt[0] = 0;
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
}
oci->new_fb_idx = 0;
oci->lst_fb_idx = 1;
oci->gld_fb_idx = 2;
oci->alt_fb_idx = 3;
oci->fb_idx_ref_cnt[0] = 1;
oci->fb_idx_ref_cnt[1] = 1;
oci->fb_idx_ref_cnt[2] = 1;
oci->fb_idx_ref_cnt[3] = 1;
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
if (vp8_yv12_alloc_frame_buffer(&oci->new_frame, width, height, VP8BORDERINPIXELS) < 0)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
if (vp8_yv12_alloc_frame_buffer(&oci->last_frame, width, height, VP8BORDERINPIXELS) < 0)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
if (vp8_yv12_alloc_frame_buffer(&oci->golden_frame, width, height, VP8BORDERINPIXELS) < 0)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
if (vp8_yv12_alloc_frame_buffer(&oci->alt_ref_frame, width, height, VP8BORDERINPIXELS) < 0)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
{
vp8_de_alloc_frame_buffers(oci);
@ -123,33 +112,9 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
oci->mi = oci->mip + oci->mode_info_stride + 1;
oci->above_context[Y1CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 4 , 1);
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
if (!oci->above_context[Y1CONTEXT])
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
oci->above_context[UCONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
if (!oci->above_context[UCONTEXT])
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
oci->above_context[VCONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
if (!oci->above_context[VCONTEXT])
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
oci->above_context[Y2CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols , 1);
if (!oci->above_context[Y2CONTEXT])
if (!oci->above_context)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
@ -157,20 +122,6 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
vp8_update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
// Structures used to minitor GF usage
if (oci->gf_active_flags != 0)
vpx_free(oci->gf_active_flags);
oci->gf_active_flags = (unsigned char *)vpx_calloc(oci->mb_rows * oci->mb_cols, 1);
if (!oci->gf_active_flags)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
}
oci->gf_active_count = oci->mb_rows * oci->mb_cols;
return 0;
}
void vp8_setup_version(VP8_COMMON *cm)

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -12,13 +12,13 @@
#include "blockd.h"
#include "vpx_mem/vpx_mem.h"
void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count)
{
vpx_memcpy(t->l, l, sizeof(ENTROPY_CONTEXT) * count);
vpx_memcpy(t->a, a, sizeof(ENTROPY_CONTEXT) * count);
}
const int vp8_block2left[25] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0};
const int vp8_block2above[25] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0};
const int vp8_block2type[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1};
const int vp8_block2context[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3};
const unsigned char vp8_block2left[25] =
{
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
};
const unsigned char vp8_block2above[25] =
{
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
};

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -49,19 +49,19 @@ typedef struct
} POS;
typedef int ENTROPY_CONTEXT;
typedef char ENTROPY_CONTEXT;
typedef struct
{
ENTROPY_CONTEXT l[4];
ENTROPY_CONTEXT a[4];
} TEMP_CONTEXT;
ENTROPY_CONTEXT y1[4];
ENTROPY_CONTEXT u[2];
ENTROPY_CONTEXT v[2];
ENTROPY_CONTEXT y2;
} ENTROPY_CONTEXT_PLANES;
extern void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count);
extern const int vp8_block2left[25];
extern const int vp8_block2above[25];
extern const int vp8_block2type[25];
extern const int vp8_block2context[25];
extern const unsigned char vp8_block2left[25];
extern const unsigned char vp8_block2above[25];
#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
Dest = ((A)!=0) + ((B)!=0);
@ -174,9 +174,8 @@ typedef struct
int dc_diff;
unsigned char segment_id; // Which set of segmentation parameters should be used for this MB
int force_no_skip;
int need_to_clamp_mvs;
B_MODE_INFO partition_bmi[16];
} MB_MODE_INFO;
@ -216,9 +215,10 @@ typedef struct
{
DECLARE_ALIGNED(16, short, diff[400]); // from idct diff
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
DECLARE_ALIGNED(16, short, reference[384]);
//not used DECLARE_ALIGNED(16, short, reference[384]);
DECLARE_ALIGNED(16, short, qcoeff[400]);
DECLARE_ALIGNED(16, short, dqcoeff[400]);
DECLARE_ALIGNED(16, char, eobs[25]);
// 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries.
BLOCKD block[25];
@ -233,14 +233,12 @@ typedef struct
FRAME_TYPE frame_type;
MB_MODE_INFO mbmi;
int up_available;
int left_available;
// Y,U,V,Y2
ENTROPY_CONTEXT *above_context[4]; // row of context for each plane
ENTROPY_CONTEXT(*left_context)[4]; // (up to) 4 contexts ""
ENTROPY_CONTEXT_PLANES *above_context;
ENTROPY_CONTEXT_PLANES *left_context;
// 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active.
unsigned char segmentation_enabled;
@ -276,9 +274,6 @@ typedef struct
int mb_to_top_edge;
int mb_to_bottom_edge;
//char * gf_active_ptr;
signed char *gf_active_ptr;
unsigned int frames_since_golden;
unsigned int frames_till_alt_ref_frame;
vp8_subpix_fn_t subpixel_predict;

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -264,8 +264,10 @@ void vp8_entropy_mode_init()
vp8_tokens_from_tree(vp8_uv_mode_encodings, vp8_uv_mode_tree);
vp8_tokens_from_tree(vp8_mbsplit_encodings, vp8_mbsplit_tree);
vp8_tokens_from_tree(VP8_MVREFENCODINGS, vp8_mv_ref_tree);
vp8_tokens_from_tree(VP8_SUBMVREFENCODINGS, vp8_sub_mv_ref_tree);
vp8_tokens_from_tree_offset(vp8_mv_ref_encoding_array,
vp8_mv_ref_tree, NEARESTMV);
vp8_tokens_from_tree_offset(vp8_sub_mv_ref_encoding_array,
vp8_sub_mv_ref_tree, LEFT4X4);
vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree);
}

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -54,10 +54,6 @@ extern struct vp8_token_struct vp8_mbsplit_encodings [VP8_NUMMBSPLITS];
extern struct vp8_token_struct vp8_mv_ref_encoding_array [VP8_MVREFS];
extern struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS];
#define VP8_MVREFENCODINGS (vp8_mv_ref_encoding_array - NEARESTMV)
#define VP8_SUBMVREFENCODINGS (vp8_sub_mv_ref_encoding_array - LEFT4X4)
extern const vp8_tree_index vp8_small_mvtree[];
extern struct vp8_token_struct vp8_small_mvencodings [8];

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -39,7 +39,10 @@ static void extend_plane_borders
for (i = 0; i < h - 0 + 1; i++)
{
vpx_memset(dest_ptr1, src_ptr1[0], el);
// Some linkers will complain if we call vpx_memset with el set to a
// constant 0.
if (el)
vpx_memset(dest_ptr1, src_ptr1[0], el);
vpx_memset(dest_ptr2, src_ptr2[0], er);
src_ptr1 += sp;
src_ptr2 += sp;

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -32,7 +32,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_c;
rtcd->idct.idct16 = vp8_short_idct4x4llm_c;
rtcd->idct.idct1_scalar = vp8_dc_only_idct_c;
rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c;
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c;
@ -61,7 +61,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
#if CONFIG_POSTPROC || CONFIG_VP8_ENCODER
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
rtcd->postproc.down = vp8_mbpost_proc_down_c;
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -18,8 +18,10 @@
#define prototype_idct(sym) \
void sym(short *input, short *output, int pitch)
#define prototype_idct_scalar(sym) \
void sym(short input, short *output, int pitch)
#define prototype_idct_scalar_add(sym) \
void sym(short input, \
unsigned char *pred, unsigned char *output, \
int pitch, int stride)
#if ARCH_X86 || ARCH_X86_64
#include "x86/idct_x86.h"
@ -39,10 +41,10 @@ extern prototype_idct(vp8_idct_idct1);
#endif
extern prototype_idct(vp8_idct_idct16);
#ifndef vp8_idct_idct1_scalar
#define vp8_idct_idct1_scalar vp8_dc_only_idct_c
#ifndef vp8_idct_idct1_scalar_add
#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_c
#endif
extern prototype_idct_scalar(vp8_idct_idct1_scalar);
extern prototype_idct_scalar_add(vp8_idct_idct1_scalar_add);
#ifndef vp8_idct_iwalsh1
@ -56,14 +58,14 @@ extern prototype_second_order(vp8_idct_iwalsh1);
extern prototype_second_order(vp8_idct_iwalsh16);
typedef prototype_idct((*vp8_idct_fn_t));
typedef prototype_idct_scalar((*vp8_idct_scalar_fn_t));
typedef prototype_idct_scalar_add((*vp8_idct_scalar_add_fn_t));
typedef prototype_second_order((*vp8_second_order_fn_t));
typedef struct
{
vp8_idct_fn_t idct1;
vp8_idct_fn_t idct16;
vp8_idct_scalar_fn_t idct1_scalar;
vp8_idct_fn_t idct1;
vp8_idct_fn_t idct16;
vp8_idct_scalar_add_fn_t idct1_scalar_add;
vp8_second_order_fn_t iwalsh1;
vp8_second_order_fn_t iwalsh16;

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -104,23 +104,30 @@ void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch)
}
}
void vp8_dc_only_idct_c(short input_dc, short *output, int pitch)
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
{
int i;
int a1;
short *op = output;
int shortpitch = pitch >> 1;
a1 = ((input_dc + 4) >> 3);
int a1 = ((input_dc + 4) >> 3);
int r, c;
for (i = 0; i < 4; i++)
for (r = 0; r < 4; r++)
{
op[0] = a1;
op[1] = a1;
op[2] = a1;
op[3] = a1;
op += shortpitch;
for (c = 0; c < 4; c++)
{
int a = a1 + pred_ptr[c] ;
if (a < 0)
a = 0;
if (a > 255)
a = 255;
dst_ptr[c] = (unsigned char) a ;
}
dst_ptr += stride;
pred_ptr += pitch;
}
}
void vp8_short_inv_walsh4x4_c(short *input, short *output)

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -65,7 +65,8 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x
{
int i;
if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
if (x->mode_info_context->mbmi.mode != B_PRED &&
x->mode_info_context->mbmi.mode != SPLITMV)
{
// do 2nd order transform on the dc block

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -117,5 +117,14 @@ typedef struct
#define LF_INVOKE(ctx,fn) vp8_lf_##fn
#endif
typedef void loop_filter_uvfunction
(
unsigned char *u, // source pointer
int p, // pitch
const signed char *flimit,
const signed char *limit,
const signed char *thresh,
unsigned char *v
);
#endif

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -21,7 +21,7 @@
typedef unsigned char uc;
__inline signed char vp8_signed_char_clamp(int t)
static __inline signed char vp8_signed_char_clamp(int t)
{
t = (t < -128 ? -128 : t);
t = (t > 127 ? 127 : t);
@ -30,7 +30,7 @@ __inline signed char vp8_signed_char_clamp(int t)
// should we apply any filter at all ( 11111111 yes, 00000000 no)
__inline signed char vp8_filter_mask(signed char limit, signed char flimit,
static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
{
signed char mask = 0;
@ -50,7 +50,7 @@ __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
}
// is there high variance internal edge ( 11111111 yes, 00000000 no)
__inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
{
signed char hev = 0;
hev |= (abs(p1 - p0) > thresh) * -1;
@ -58,7 +58,7 @@ __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
return hev;
}
__inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
{
signed char ps0, qs0;
@ -164,7 +164,7 @@ void vp8_loop_filter_vertical_edge_c
while (++i < count * 8);
}
__inline void vp8_mbfilter(signed char mask, signed char hev,
static __inline void vp8_mbfilter(signed char mask, signed char hev,
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
{
signed char s, u;
@ -284,7 +284,7 @@ void vp8_mbloop_filter_vertical_edge_c
}
// should we apply any filter at all ( 11111111 yes, 00000000 no)
__inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
{
// Why does this cause problems for win32?
// error C2143: syntax error : missing ';' before 'type'
@ -297,7 +297,7 @@ __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimi
return mask;
}
__inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
{
signed char vp8_filter, Filter1, Filter2;
signed char p1 = (signed char) * op1 ^ 0x80;

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -33,6 +33,7 @@ void vp8_initialize_common(void);
#define MAXQ 127
#define QINDEX_RANGE (MAXQ + 1)
#define NUM_YV12_BUFFERS 4
typedef struct frame_contexts
{
@ -94,15 +95,16 @@ typedef struct VP8Common
YUV_TYPE clr_type;
CLAMP_TYPE clamp_type;
YV12_BUFFER_CONFIG last_frame;
YV12_BUFFER_CONFIG golden_frame;
YV12_BUFFER_CONFIG alt_ref_frame;
YV12_BUFFER_CONFIG new_frame;
YV12_BUFFER_CONFIG *frame_to_show;
YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG temp_scale_frame;
FRAME_TYPE last_frame_type; //Add to check if vp8_frame_init_loop_filter() can be skiped.
FRAME_TYPE last_frame_type; //Add to check if vp8_frame_init_loop_filter() can be skipped.
FRAME_TYPE frame_type;
int show_frame;
@ -131,8 +133,6 @@ typedef struct VP8Common
unsigned int frames_since_golden;
unsigned int frames_till_alt_ref_frame;
unsigned char *gf_active_flags; // Record of which MBs still refer to last golden frame either directly or through 0,0
int gf_active_count;
/* We allocate a MODE_INFO struct for each macroblock, together with
an extra row on top and column on the left to simplify prediction. */
@ -165,8 +165,8 @@ typedef struct VP8Common
int ref_frame_sign_bias[MAX_REF_FRAMES]; // Two state 0, 1
// Y,U,V,Y2
ENTROPY_CONTEXT *above_context[4]; // row of context for each plane
ENTROPY_CONTEXT left_context[4][4]; // (up to) 4 contexts ""
ENTROPY_CONTEXT_PLANES *above_context; // row of context for each plane
ENTROPY_CONTEXT_PLANES left_context; // (up to) 4 contexts ""
// keyframe block modes are predicted by their above, left neighbors
@ -201,6 +201,7 @@ typedef struct VP8Common
void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level);
void vp8_init_loop_filter(VP8_COMMON *cm);
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type);
extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
#endif

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -330,13 +330,6 @@ void vp8_de_noise(YV12_BUFFER_CONFIG *source,
}
//Notes: It is better to change CHAR to unsigned or signed to
//avoid error on ARM platform.
char vp8_an[8][64][3072];
int vp8_cd[8][64];
double vp8_gaussian(double sigma, double mu, double x)
{
return 1 / (sigma * sqrt(2.0 * 3.14159265)) *

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -210,7 +210,8 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
{
int i;
if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
x->mode_info_context->mbmi.mode != SPLITMV)
{
unsigned char *uptr, *vptr;
unsigned char *upred_ptr = &x->predictor[256];
@ -254,16 +255,18 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
}
}
//encoder only
void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
{
if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
x->mode_info_context->mbmi.mode != SPLITMV)
{
unsigned char *ptr_base;
unsigned char *ptr;
unsigned char *pred_ptr = x->predictor;
int mv_row = x->mbmi.mv.as_mv.row;
int mv_col = x->mbmi.mv.as_mv.col;
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int pre_stride = x->block[0].pre_stride;
ptr_base = x->pre.y_buffer;
@ -282,7 +285,7 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
{
int i;
if (x->mbmi.partitioning < 3)
if (x->mode_info_context->mbmi.partitioning < 3)
{
for (i = 0; i < 4; i++)
{
@ -313,7 +316,9 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
{
if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
x->mode_info_context->mbmi.mode != SPLITMV)
{
int offset;
unsigned char *ptr_base;
@ -323,8 +328,8 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
unsigned char *upred_ptr = &x->predictor[256];
unsigned char *vpred_ptr = &x->predictor[320];
int mv_row = x->mbmi.mv.as_mv.row;
int mv_col = x->mbmi.mv.as_mv.col;
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int pre_stride = x->block[0].pre_stride;
ptr_base = x->pre.y_buffer;
@ -361,7 +366,7 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
{
int i;
if (x->mbmi.partitioning < 3)
if (x->mode_info_context->mbmi.partitioning < 3)
{
for (i = 0; i < 4; i++)
{
@ -410,7 +415,7 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
{
int i, j;
if (x->mbmi.mode == SPLITMV)
if (x->mode_info_context->mbmi.mode == SPLITMV)
{
for (i = 0; i < 2; i++)
{
@ -455,8 +460,8 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
}
else
{
int mvrow = x->mbmi.mv.as_mv.row;
int mvcol = x->mbmi.mv.as_mv.col;
int mvrow = x->mode_info_context->mbmi.mv.as_mv.row;
int mvcol = x->mode_info_context->mbmi.mv.as_mv.col;
if (mvrow < 0)
mvrow -= 1;
@ -535,7 +540,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
unsigned char *pred_ptr = x->predictor;
unsigned char *dst_ptr = x->dst.y_buffer;
if (x->mbmi.mode != SPLITMV)
if (x->mode_info_context->mbmi.mode != SPLITMV)
{
int offset;
unsigned char *ptr_base;
@ -547,8 +552,8 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
unsigned char *udst_ptr = x->dst.u_buffer;
unsigned char *vdst_ptr = x->dst.v_buffer;
int mv_row = x->mbmi.mv.as_mv.row;
int mv_col = x->mbmi.mv.as_mv.col;
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int pre_stride = x->dst.y_stride; //x->block[0].pre_stride;
ptr_base = x->pre.y_buffer;
@ -587,7 +592,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
//if sth is wrong, go back to what it is in build_inter_predictors_mb.
int i;
if (x->mbmi.partitioning < 3)
if (x->mode_info_context->mbmi.partitioning < 3)
{
for (i = 0; i < 4; i++)
{

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -43,7 +43,7 @@ void vp8_build_intra_predictors_mby(MACROBLOCKD *x)
}
// for Y
switch (x->mbmi.mode)
switch (x->mode_info_context->mbmi.mode)
{
case DC_PRED:
{
@ -164,7 +164,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
}
// for Y
switch (x->mbmi.mode)
switch (x->mode_info_context->mbmi.mode)
{
case DC_PRED:
{
@ -290,7 +290,7 @@ void vp8_build_intra_predictors_mbuv(MACROBLOCKD *x)
vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
}
switch (x->mbmi.uv_mode)
switch (x->mode_info_context->mbmi.uv_mode)
{
case DC_PRED:
{
@ -430,7 +430,7 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
}
switch (x->mbmi.uv_mode)
switch (x->mode_info_context->mbmi.uv_mode)
{
case DC_PRED:
{

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,64 +0,0 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "segmentation_common.h"
#include "vpx_mem/vpx_mem.h"
void vp8_update_gf_useage_maps(VP8_COMMON *cm, MACROBLOCKD *xd)
{
int mb_row, mb_col;
MODE_INFO *this_mb_mode_info = cm->mi;
xd->gf_active_ptr = (signed char *)cm->gf_active_flags;
if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame))
{
// Reset Gf useage monitors
vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
cm->gf_active_count = cm->mb_rows * cm->mb_cols;
}
else
{
// for each macroblock row in image
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
{
// for each macroblock col in image
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
// If using golden then set GF active flag if not already set.
// If using last frame 0,0 mode then leave flag as it is
// else if using non 0,0 motion or intra modes then clear flag if it is currently set
if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME))
{
if (*(xd->gf_active_ptr) == 0)
{
*(xd->gf_active_ptr) = 1;
cm->gf_active_count ++;
}
}
else if ((this_mb_mode_info->mbmi.mode != ZEROMV) && *(xd->gf_active_ptr))
{
*(xd->gf_active_ptr) = 0;
cm->gf_active_count--;
}
xd->gf_active_ptr++; // Step onto next entry
this_mb_mode_info++; // skip to next mb
}
// this is to account for the border
this_mb_mode_info++;
}
}
}

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -75,7 +75,8 @@
#define thread_sleep(nms) // { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
#else
#include <unistd.h>
#define thread_sleep(nms) usleep(nms*1000);// {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
#include <sched.h>
#define thread_sleep(nms) sched_yield();// {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
#endif
/* Not Windows. Assume pthreads */

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -47,6 +47,12 @@ void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t)
tree2tok(p, t, 0, 0, 0);
}
void vp8_tokens_from_tree_offset(struct vp8_token_struct *p, vp8_tree t,
int offset)
{
tree2tok(p - offset, t, 0, 0, 0);
}
static void branch_counts(
int n, /* n = size of alphabet */
vp8_token tok [ /* n */ ],

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -54,6 +54,8 @@ typedef const struct vp8_token_struct
/* Construct encoding array from tree. */
void vp8_tokens_from_tree(struct vp8_token_struct *, vp8_tree);
void vp8_tokens_from_tree_offset(struct vp8_token_struct *, vp8_tree,
int offset);
/* Convert array of token occurrence counts into a table of probabilities

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -22,7 +22,7 @@
#if HAVE_MMX
extern prototype_idct(vp8_short_idct4x4llm_1_mmx);
extern prototype_idct(vp8_short_idct4x4llm_mmx);
extern prototype_idct_scalar(vp8_dc_only_idct_mmx);
extern prototype_idct_scalar_add(vp8_dc_only_idct_add_mmx);
extern prototype_second_order(vp8_short_inv_walsh4x4_mmx);
extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
@ -34,8 +34,8 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
#undef vp8_idct_idct16
#define vp8_idct_idct16 vp8_short_idct4x4llm_mmx
#undef vp8_idct_idct1_scalar
#define vp8_idct_idct1_scalar vp8_dc_only_idct_mmx
#undef vp8_idct_idct1_scalar_add
#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_mmx
#undef vp8_idct_iwalsh16
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_mmx

Просмотреть файл

@ -1,10 +1,10 @@
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
@ -220,35 +220,61 @@ sym(vp8_short_idct4x4llm_1_mmx):
pop rbp
ret
;void dc_only_idct_mmx(short input_dc, short *output, int pitch)
global sym(vp8_dc_only_idct_mmx)
sym(vp8_dc_only_idct_mmx):
;void vp8_dc_only_idct_add_mmx(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
global sym(vp8_dc_only_idct_add_mmx)
sym(vp8_dc_only_idct_add_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 3
SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
; end prolog
movd mm0, arg(0) ;input_dc
mov rsi, arg(1) ;s -- prediction
mov rdi, arg(2) ;d -- destination
movsxd rax, dword ptr arg(4) ;stride
movsxd rdx, dword ptr arg(3) ;pitch
pxor mm0, mm0
paddw mm0, [fours GLOBAL]
mov rdx, arg(1) ;output
movd mm5, arg(0) ;input_dc
psraw mm0, 3
movsxd rax, dword ptr arg(2) ;pitch
paddw mm5, [fours GLOBAL]
punpcklwd mm0, mm0
punpckldq mm0, mm0
psraw mm5, 3
movq [rdx], mm0
movq [rdx+rax], mm0
punpcklwd mm5, mm5
punpckldq mm5, mm5
movq [rdx+rax*2], mm0
add rdx, rax
movd mm1, [rsi]
punpcklbw mm1, mm0
paddsw mm1, mm5
packuswb mm1, mm0 ; pack and unpack to saturate
movd [rdi], mm1
movq [rdx+rax*2], mm0
movd mm2, [rsi+rdx]
punpcklbw mm2, mm0
paddsw mm2, mm5
packuswb mm2, mm0 ; pack and unpack to saturate
movd [rdi+rax], mm2
movd mm3, [rsi+2*rdx]
punpcklbw mm3, mm0
paddsw mm3, mm5
packuswb mm3, mm0 ; pack and unpack to saturate
movd [rdi+2*rax], mm3
add rdi, rax
add rsi, rdx
movd mm4, [rsi+2*rdx]
punpcklbw mm4, mm0
paddsw mm4, mm5
packuswb mm4, mm0 ; pack and unpack to saturate
movd [rdi+2*rax], mm4
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp

Просмотреть файл

@ -0,0 +1,708 @@
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
;void idct_dequant_0_2x_sse2
; (
; short *qcoeff - 0
; short *dequant - 1
; unsigned char *pre - 2
; unsigned char *dst - 3
; int dst_stride - 4
; int blk_stride - 5
; )
global sym(idct_dequant_0_2x_sse2)
sym(idct_dequant_0_2x_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
; end prolog
mov rdx, arg(1) ; dequant
mov rax, arg(0) ; qcoeff
; Zero out xmm7, for use unpacking
pxor xmm7, xmm7
movd xmm4, [rax]
movd xmm5, [rdx]
pinsrw xmm4, [rax+32], 4
pinsrw xmm5, [rdx], 4
pmullw xmm4, xmm5
; clear coeffs
movd [rax], xmm7
movd [rax+32], xmm7
;pshufb
pshuflw xmm4, xmm4, 00000000b
pshufhw xmm4, xmm4, 00000000b
mov rax, arg(2) ; pre
paddw xmm4, [fours GLOBAL]
movsxd rcx, dword ptr arg(5) ; blk_stride
psraw xmm4, 3
movq xmm0, [rax]
movq xmm1, [rax+rcx]
movq xmm2, [rax+2*rcx]
lea rcx, [3*rcx]
movq xmm3, [rax+rcx]
punpcklbw xmm0, xmm7
punpcklbw xmm1, xmm7
punpcklbw xmm2, xmm7
punpcklbw xmm3, xmm7
mov rax, arg(3) ; dst
movsxd rdx, dword ptr arg(4) ; dst_stride
; Add to predict buffer
paddw xmm0, xmm4
paddw xmm1, xmm4
paddw xmm2, xmm4
paddw xmm3, xmm4
; pack up before storing
packuswb xmm0, xmm7
packuswb xmm1, xmm7
packuswb xmm2, xmm7
packuswb xmm3, xmm7
; store blocks back out
movq [rax], xmm0
movq [rax + rdx], xmm1
lea rax, [rax + 2*rdx]
movq [rax], xmm2
movq [rax + rdx], xmm3
; begin epilog
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
global sym(idct_dequant_full_2x_sse2)
sym(idct_dequant_full_2x_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
GET_GOT rbx
push rsi
push rdi
; end prolog
; special case when 2 blocks have 0 or 1 coeffs
; dc is set as first coeff, so no need to load qcoeff
mov rax, arg(0) ; qcoeff
mov rsi, arg(2) ; pre
mov rdi, arg(3) ; dst
movsxd rcx, dword ptr arg(5) ; blk_stride
; Zero out xmm7, for use unpacking
pxor xmm7, xmm7
mov rdx, arg(1) ; dequant
; note the transpose of xmm1 and xmm2, necessary for shuffle
; to spit out sensicle data
movdqa xmm0, [rax]
movdqa xmm2, [rax+16]
movdqa xmm1, [rax+32]
movdqa xmm3, [rax+48]
; Clear out coeffs
movdqa [rax], xmm7
movdqa [rax+16], xmm7
movdqa [rax+32], xmm7
movdqa [rax+48], xmm7
; dequantize qcoeff buffer
pmullw xmm0, [rdx]
pmullw xmm2, [rdx+16]
pmullw xmm1, [rdx]
pmullw xmm3, [rdx+16]
; repack so block 0 row x and block 1 row x are together
movdqa xmm4, xmm0
punpckldq xmm0, xmm1
punpckhdq xmm4, xmm1
pshufd xmm0, xmm0, 11011000b
pshufd xmm1, xmm4, 11011000b
movdqa xmm4, xmm2
punpckldq xmm2, xmm3
punpckhdq xmm4, xmm3
pshufd xmm2, xmm2, 11011000b
pshufd xmm3, xmm4, 11011000b
; first pass
psubw xmm0, xmm2 ; b1 = 0-2
paddw xmm2, xmm2 ;
movdqa xmm5, xmm1
paddw xmm2, xmm0 ; a1 = 0+2
pmulhw xmm5, [x_s1sqr2 GLOBAL]
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
movdqa xmm7, xmm3
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw xmm7, xmm5 ; c1
movdqa xmm5, xmm1
movdqa xmm4, xmm3
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
paddw xmm5, xmm1
pmulhw xmm3, [x_s1sqr2 GLOBAL]
paddw xmm3, xmm4
paddw xmm3, xmm5 ; d1
movdqa xmm6, xmm2 ; a1
movdqa xmm4, xmm0 ; b1
paddw xmm2, xmm3 ;0
paddw xmm4, xmm7 ;1
psubw xmm0, xmm7 ;2
psubw xmm6, xmm3 ;3
; transpose for the second pass
movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
pshufd xmm0, xmm2, 11011000b
pshufd xmm2, xmm1, 11011000b
pshufd xmm1, xmm5, 11011000b
pshufd xmm3, xmm7, 11011000b
; second pass
psubw xmm0, xmm2 ; b1 = 0-2
paddw xmm2, xmm2
movdqa xmm5, xmm1
paddw xmm2, xmm0 ; a1 = 0+2
pmulhw xmm5, [x_s1sqr2 GLOBAL]
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
movdqa xmm7, xmm3
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw xmm7, xmm5 ; c1
movdqa xmm5, xmm1
movdqa xmm4, xmm3
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
paddw xmm5, xmm1
pmulhw xmm3, [x_s1sqr2 GLOBAL]
paddw xmm3, xmm4
paddw xmm3, xmm5 ; d1
paddw xmm0, [fours GLOBAL]
paddw xmm2, [fours GLOBAL]
movdqa xmm6, xmm2 ; a1
movdqa xmm4, xmm0 ; b1
paddw xmm2, xmm3 ;0
paddw xmm4, xmm7 ;1
psubw xmm0, xmm7 ;2
psubw xmm6, xmm3 ;3
psraw xmm2, 3
psraw xmm0, 3
psraw xmm4, 3
psraw xmm6, 3
; transpose to save
movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
pshufd xmm0, xmm2, 11011000b
pshufd xmm2, xmm1, 11011000b
pshufd xmm1, xmm5, 11011000b
pshufd xmm3, xmm7, 11011000b
pxor xmm7, xmm7
; Load up predict blocks
movq xmm4, [rsi]
movq xmm5, [rsi+rcx]
punpcklbw xmm4, xmm7
punpcklbw xmm5, xmm7
paddw xmm0, xmm4
paddw xmm1, xmm5
movq xmm4, [rsi+2*rcx]
lea rcx, [3*rcx]
movq xmm5, [rsi+rcx]
punpcklbw xmm4, xmm7
punpcklbw xmm5, xmm7
paddw xmm2, xmm4
paddw xmm3, xmm5
.finish:
; pack up before storing
packuswb xmm0, xmm7
packuswb xmm1, xmm7
packuswb xmm2, xmm7
packuswb xmm3, xmm7
; Load destination stride before writing out,
; doesn't need to persist
movsxd rdx, dword ptr arg(4) ; dst_stride
; store blocks back out
movq [rdi], xmm0
movq [rdi + rdx], xmm1
lea rdi, [rdi + 2*rdx]
movq [rdi], xmm2
movq [rdi + rdx], xmm3
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void idct_dequant_dc_0_2x_sse2
; (
; short *qcoeff - 0
; short *dequant - 1
; unsigned char *pre - 2
; unsigned char *dst - 3
; int dst_stride - 4
; short *dc - 5
; )
global sym(idct_dequant_dc_0_2x_sse2)
sym(idct_dequant_dc_0_2x_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
GET_GOT rbx
push rsi
push rdi
; end prolog
; special case when 2 blocks have 0 or 1 coeffs
; dc is set as first coeff, so no need to load qcoeff
mov rax, arg(0) ; qcoeff
mov rsi, arg(2) ; pre
mov rdi, arg(3) ; dst
mov rdx, arg(5) ; dc
; Zero out xmm7, for use unpacking
pxor xmm7, xmm7
; load up 2 dc words here == 2*16 = doubleword
movd xmm4, [rdx]
; Load up predict blocks
movq xmm0, [rsi]
movq xmm1, [rsi+16]
movq xmm2, [rsi+32]
movq xmm3, [rsi+48]
; Duplicate and expand dc across
punpcklwd xmm4, xmm4
punpckldq xmm4, xmm4
; Rounding to dequant and downshift
paddw xmm4, [fours GLOBAL]
psraw xmm4, 3
; Predict buffer needs to be expanded from bytes to words
punpcklbw xmm0, xmm7
punpcklbw xmm1, xmm7
punpcklbw xmm2, xmm7
punpcklbw xmm3, xmm7
; Add to predict buffer
paddw xmm0, xmm4
paddw xmm1, xmm4
paddw xmm2, xmm4
paddw xmm3, xmm4
; pack up before storing
packuswb xmm0, xmm7
packuswb xmm1, xmm7
packuswb xmm2, xmm7
packuswb xmm3, xmm7
; Load destination stride before writing out,
; doesn't need to persist
movsxd rdx, dword ptr arg(4) ; dst_stride
; store blocks back out
movq [rdi], xmm0
movq [rdi + rdx], xmm1
lea rdi, [rdi + 2*rdx]
movq [rdi], xmm2
movq [rdi + rdx], xmm3
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
global sym(idct_dequant_dc_full_2x_sse2)
sym(idct_dequant_dc_full_2x_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
GET_GOT rbx
push rsi
push rdi
; end prolog
; special case when 2 blocks have 0 or 1 coeffs
; dc is set as first coeff, so no need to load qcoeff
mov rax, arg(0) ; qcoeff
mov rsi, arg(2) ; pre
mov rdi, arg(3) ; dst
; Zero out xmm7, for use unpacking
pxor xmm7, xmm7
mov rdx, arg(1) ; dequant
; note the transpose of xmm1 and xmm2, necessary for shuffle
; to spit out sensicle data
movdqa xmm0, [rax]
movdqa xmm2, [rax+16]
movdqa xmm1, [rax+32]
movdqa xmm3, [rax+48]
; Clear out coeffs
movdqa [rax], xmm7
movdqa [rax+16], xmm7
movdqa [rax+32], xmm7
movdqa [rax+48], xmm7
; dequantize qcoeff buffer
pmullw xmm0, [rdx]
pmullw xmm2, [rdx+16]
pmullw xmm1, [rdx]
pmullw xmm3, [rdx+16]
; DC component
mov rdx, arg(5)
; repack so block 0 row x and block 1 row x are together
movdqa xmm4, xmm0
punpckldq xmm0, xmm1
punpckhdq xmm4, xmm1
pshufd xmm0, xmm0, 11011000b
pshufd xmm1, xmm4, 11011000b
movdqa xmm4, xmm2
punpckldq xmm2, xmm3
punpckhdq xmm4, xmm3
pshufd xmm2, xmm2, 11011000b
pshufd xmm3, xmm4, 11011000b
; insert DC component
pinsrw xmm0, [rdx], 0
pinsrw xmm0, [rdx+2], 4
; first pass
psubw xmm0, xmm2 ; b1 = 0-2
paddw xmm2, xmm2 ;
movdqa xmm5, xmm1
paddw xmm2, xmm0 ; a1 = 0+2
pmulhw xmm5, [x_s1sqr2 GLOBAL]
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
movdqa xmm7, xmm3
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw xmm7, xmm5 ; c1
movdqa xmm5, xmm1
movdqa xmm4, xmm3
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
paddw xmm5, xmm1
pmulhw xmm3, [x_s1sqr2 GLOBAL]
paddw xmm3, xmm4
paddw xmm3, xmm5 ; d1
movdqa xmm6, xmm2 ; a1
movdqa xmm4, xmm0 ; b1
paddw xmm2, xmm3 ;0
paddw xmm4, xmm7 ;1
psubw xmm0, xmm7 ;2
psubw xmm6, xmm3 ;3
; transpose for the second pass
movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
pshufd xmm0, xmm2, 11011000b
pshufd xmm2, xmm1, 11011000b
pshufd xmm1, xmm5, 11011000b
pshufd xmm3, xmm7, 11011000b
; second pass
psubw xmm0, xmm2 ; b1 = 0-2
paddw xmm2, xmm2
movdqa xmm5, xmm1
paddw xmm2, xmm0 ; a1 = 0+2
pmulhw xmm5, [x_s1sqr2 GLOBAL]
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
movdqa xmm7, xmm3
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
psubw xmm7, xmm5 ; c1
movdqa xmm5, xmm1
movdqa xmm4, xmm3
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
paddw xmm5, xmm1
pmulhw xmm3, [x_s1sqr2 GLOBAL]
paddw xmm3, xmm4
paddw xmm3, xmm5 ; d1
paddw xmm0, [fours GLOBAL]
paddw xmm2, [fours GLOBAL]
movdqa xmm6, xmm2 ; a1
movdqa xmm4, xmm0 ; b1
paddw xmm2, xmm3 ;0
paddw xmm4, xmm7 ;1
psubw xmm0, xmm7 ;2
psubw xmm6, xmm3 ;3
psraw xmm2, 3
psraw xmm0, 3
psraw xmm4, 3
psraw xmm6, 3
; transpose to save
movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
pshufd xmm0, xmm2, 11011000b
pshufd xmm2, xmm1, 11011000b
pshufd xmm1, xmm5, 11011000b
pshufd xmm3, xmm7, 11011000b
pxor xmm7, xmm7
; Load up predict blocks
movq xmm4, [rsi]
movq xmm5, [rsi+16]
punpcklbw xmm4, xmm7
punpcklbw xmm5, xmm7
paddw xmm0, xmm4
paddw xmm1, xmm5
movq xmm4, [rsi+32]
movq xmm5, [rsi+48]
punpcklbw xmm4, xmm7
punpcklbw xmm5, xmm7
paddw xmm2, xmm4
paddw xmm3, xmm5
.finish:
; pack up before storing
packuswb xmm0, xmm7
packuswb xmm1, xmm7
packuswb xmm2, xmm7
packuswb xmm3, xmm7
; Load destination stride before writing out,
; doesn't need to persist
movsxd rdx, dword ptr arg(4) ; dst_stride
; store blocks back out
movq [rdi], xmm0
movq [rdi + rdx], xmm1
lea rdi, [rdi + 2*rdx]
movq [rdi], xmm2
movq [rdi + rdx], xmm3
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
fours:
times 8 dw 0x0004
align 16
x_s1sqr2:
times 8 dw 0x8A8C
align 16
x_c1sqr2less1:
times 8 dw 0x4E7B

Просмотреть файл

@ -1,10 +1,10 @@
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;

Просмотреть файл

@ -1,10 +1,10 @@
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
@ -17,6 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 2
SAVE_XMM
push rsi
push rdi
; end prolog
@ -101,6 +102,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret

Просмотреть файл

@ -1,10 +1,10 @@
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -34,6 +34,11 @@ prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2;
extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
#if HAVE_MMX
// Horizontal MB filtering
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
@ -157,10 +162,7 @@ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
if (v_ptr)
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
}
@ -183,10 +185,7 @@ void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
if (v_ptr)
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
}
@ -211,10 +210,7 @@ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
if (u_ptr)
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
if (v_ptr)
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
}
@ -241,10 +237,7 @@ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
if (u_ptr)
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
if (v_ptr)
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
}

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;

Просмотреть файл

@ -1,10 +1,10 @@
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
@ -26,6 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -240,6 +241,7 @@ acrossnextcol:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -254,6 +256,7 @@ sym(vp8_mbpost_proc_down_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -439,6 +442,7 @@ loop_row:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -452,6 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -612,6 +617,7 @@ nextcol4:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;

Просмотреть файл

@ -1,10 +1,10 @@
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
@ -67,6 +67,7 @@ sym(vp8_recon4b_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SAVE_XMM
push rsi
push rdi
; end prolog
@ -119,6 +120,7 @@ sym(vp8_recon4b_sse2):
; begin epilog
pop rdi
pop rsi
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
@ -731,7 +731,7 @@ rd:
times 4 dw 0x40
align 16
global sym(vp8_six_tap_mmx) HIDDEN_DATA
global HIDDEN_DATA(sym(vp8_six_tap_mmx))
sym(vp8_six_tap_mmx):
times 8 dw 0
times 8 dw 0
@ -791,7 +791,7 @@ sym(vp8_six_tap_mmx):
align 16
global sym(vp8_bilinear_filters_mmx) HIDDEN_DATA
global HIDDEN_DATA(sym(vp8_bilinear_filters_mmx))
sym(vp8_bilinear_filters_mmx):
times 8 dw 128
times 8 dw 0

Просмотреть файл

@ -1,10 +1,10 @@
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
@ -37,6 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -129,6 +130,7 @@ filter_block1d8_h6_rowloop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -155,6 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -304,6 +307,7 @@ filter_block1d16_h6_sse2_rowloop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -329,6 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -397,6 +402,494 @@ vp8_filter_block1d8_v6_sse2_loop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block1d16_v6_sse2
;(
; unsigned short *src_ptr,
; unsigned char *output_ptr,
; int dst_ptich,
; unsigned int pixels_per_line,
; unsigned int pixel_step,
; unsigned int output_height,
; unsigned int output_width,
; const short *vp8_filter
;)
;/************************************************************************************
; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The
; input pixel array has output_height rows.
;*************************************************************************************/
global sym(vp8_filter_block1d16_v6_sse2)
sym(vp8_filter_block1d16_v6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
; end prolog
mov rax, arg(7) ;vp8_filter
movsxd rdx, dword ptr arg(3) ;pixels_per_line
mov rdi, arg(1) ;output_ptr
mov rsi, arg(0) ;src_ptr
sub rsi, rdx
sub rsi, rdx
movsxd rcx, DWORD PTR arg(5) ;[output_height]
%if ABI_IS_32BIT=0
movsxd r8, dword ptr arg(2) ; dst_ptich
%endif
vp8_filter_block1d16_v6_sse2_loop:
; The order for adding 6-tap is 2 5 3 1 4 6. Read in data in that order.
movdqa xmm1, XMMWORD PTR [rsi + rdx] ; line 2
movdqa xmm2, XMMWORD PTR [rsi + rdx + 16]
pmullw xmm1, [rax + 16]
pmullw xmm2, [rax + 16]
movdqa xmm3, XMMWORD PTR [rsi + rdx * 4] ; line 5
movdqa xmm4, XMMWORD PTR [rsi + rdx * 4 + 16]
pmullw xmm3, [rax + 64]
pmullw xmm4, [rax + 64]
movdqa xmm5, XMMWORD PTR [rsi + rdx * 2] ; line 3
movdqa xmm6, XMMWORD PTR [rsi + rdx * 2 + 16]
pmullw xmm5, [rax + 32]
pmullw xmm6, [rax + 32]
movdqa xmm7, XMMWORD PTR [rsi] ; line 1
movdqa xmm0, XMMWORD PTR [rsi + 16]
pmullw xmm7, [rax]
pmullw xmm0, [rax]
paddsw xmm1, xmm3
paddsw xmm2, xmm4
paddsw xmm1, xmm5
paddsw xmm2, xmm6
paddsw xmm1, xmm7
paddsw xmm2, xmm0
add rsi, rdx
movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] ; line 4
movdqa xmm4, XMMWORD PTR [rsi + rdx * 2 + 16]
pmullw xmm3, [rax + 48]
pmullw xmm4, [rax + 48]
movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] ; line 6
movdqa xmm6, XMMWORD PTR [rsi + rdx * 4 + 16]
pmullw xmm5, [rax + 80]
pmullw xmm6, [rax + 80]
movdqa xmm7, XMMWORD PTR [rd GLOBAL]
pxor xmm0, xmm0 ; clear xmm0
paddsw xmm1, xmm3
paddsw xmm2, xmm4
paddsw xmm1, xmm5
paddsw xmm2, xmm6
paddsw xmm1, xmm7
paddsw xmm2, xmm7
psraw xmm1, 7
psraw xmm2, 7
packuswb xmm1, xmm2 ; pack and saturate
movdqa XMMWORD PTR [rdi], xmm1 ; store the results in the destination
%if ABI_IS_32BIT
add rdi, DWORD PTR arg(2) ;[dst_ptich]
%else
add rdi, r8
%endif
dec rcx ; decrement count
jnz vp8_filter_block1d16_v6_sse2_loop ; next row
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block1d8_h6_only_sse2
;(
; unsigned char *src_ptr,
; unsigned int src_pixels_per_line,
; unsigned char *output_ptr,
; int dst_ptich,
; unsigned int output_height,
; const short *vp8_filter
;)
; First-pass filter only when yoffset==0
global sym(vp8_filter_block1d8_h6_only_sse2)
sym(vp8_filter_block1d8_h6_only_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
; end prolog
mov rdx, arg(5) ;vp8_filter
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;output_ptr
movsxd rcx, dword ptr arg(4) ;output_height
movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source
%if ABI_IS_32BIT=0
movsxd r8, dword ptr arg(3) ;dst_ptich
%endif
pxor xmm0, xmm0 ; clear xmm0 for unpack
filter_block1d8_h6_only_rowloop:
movq xmm3, MMWORD PTR [rsi - 2]
movq xmm1, MMWORD PTR [rsi + 6]
prefetcht2 [rsi+rax-2]
pslldq xmm1, 8
por xmm1, xmm3
movdqa xmm4, xmm1
movdqa xmm5, xmm1
movdqa xmm6, xmm1
movdqa xmm7, xmm1
punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
paddsw xmm4, xmm7
paddsw xmm4, xmm5
paddsw xmm4, xmm3
paddsw xmm4, xmm6
paddsw xmm4, xmm1
paddsw xmm4, [rd GLOBAL]
psraw xmm4, 7
packuswb xmm4, xmm0
movq QWORD PTR [rdi], xmm4 ; store the results in the destination
lea rsi, [rsi + rax]
%if ABI_IS_32BIT
add rdi, DWORD Ptr arg(3) ;dst_ptich
%else
add rdi, r8
%endif
dec rcx
jnz filter_block1d8_h6_only_rowloop ; next row
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block1d16_h6_only_sse2
;(
; unsigned char *src_ptr,
; unsigned int src_pixels_per_line,
; unsigned char *output_ptr,
; int dst_ptich,
; unsigned int output_height,
; const short *vp8_filter
;)
; First-pass filter only when yoffset==0
global sym(vp8_filter_block1d16_h6_only_sse2)
sym(vp8_filter_block1d16_h6_only_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
; end prolog
mov rdx, arg(5) ;vp8_filter
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;output_ptr
movsxd rcx, dword ptr arg(4) ;output_height
movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source
%if ABI_IS_32BIT=0
movsxd r8, dword ptr arg(3) ;dst_ptich
%endif
pxor xmm0, xmm0 ; clear xmm0 for unpack
filter_block1d16_h6_only_sse2_rowloop:
movq xmm3, MMWORD PTR [rsi - 2]
movq xmm1, MMWORD PTR [rsi + 6]
movq xmm2, MMWORD PTR [rsi +14]
pslldq xmm2, 8
por xmm2, xmm1
prefetcht2 [rsi+rax-2]
pslldq xmm1, 8
por xmm1, xmm3
movdqa xmm4, xmm1
movdqa xmm5, xmm1
movdqa xmm6, xmm1
movdqa xmm7, xmm1
punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
paddsw xmm4, xmm7
paddsw xmm4, xmm5
paddsw xmm4, xmm3
paddsw xmm4, xmm6
paddsw xmm4, xmm1
paddsw xmm4, [rd GLOBAL]
psraw xmm4, 7
packuswb xmm4, xmm0 ; lower 8 bytes
movq QWORD Ptr [rdi], xmm4 ; store the results in the destination
movdqa xmm3, xmm2
movdqa xmm4, xmm2
movdqa xmm5, xmm2
movdqa xmm6, xmm2
movdqa xmm7, xmm2
punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
paddsw xmm4, xmm7
paddsw xmm4, xmm5
paddsw xmm4, xmm3
paddsw xmm4, xmm6
paddsw xmm4, xmm2
paddsw xmm4, [rd GLOBAL]
psraw xmm4, 7
packuswb xmm4, xmm0 ; higher 8 bytes
movq QWORD Ptr [rdi+8], xmm4 ; store the results in the destination
lea rsi, [rsi + rax]
%if ABI_IS_32BIT
add rdi, DWORD Ptr arg(3) ;dst_ptich
%else
add rdi, r8
%endif
dec rcx
jnz filter_block1d16_h6_only_sse2_rowloop ; next row
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block1d8_v6_only_sse2
;(
; unsigned char *src_ptr,
; unsigned int src_pixels_per_line,
; unsigned char *output_ptr,
; int dst_ptich,
; unsigned int output_height,
; const short *vp8_filter
;)
; Second-pass filter only when xoffset==0
global sym(vp8_filter_block1d8_v6_only_sse2)
sym(vp8_filter_block1d8_v6_only_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;output_ptr
movsxd rcx, dword ptr arg(4) ;output_height
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
mov rax, arg(5) ;vp8_filter
pxor xmm0, xmm0 ; clear xmm0
movdqa xmm7, XMMWORD PTR [rd GLOBAL]
%if ABI_IS_32BIT=0
movsxd r8, dword ptr arg(3) ; dst_ptich
%endif
vp8_filter_block1d8_v6_only_sse2_loop:
movq xmm1, MMWORD PTR [rsi]
movq xmm2, MMWORD PTR [rsi + rdx]
movq xmm3, MMWORD PTR [rsi + rdx * 2]
movq xmm5, MMWORD PTR [rsi + rdx * 4]
add rsi, rdx
movq xmm4, MMWORD PTR [rsi + rdx * 2]
movq xmm6, MMWORD PTR [rsi + rdx * 4]
punpcklbw xmm1, xmm0
pmullw xmm1, [rax]
punpcklbw xmm2, xmm0
pmullw xmm2, [rax + 16]
punpcklbw xmm3, xmm0
pmullw xmm3, [rax + 32]
punpcklbw xmm5, xmm0
pmullw xmm5, [rax + 64]
punpcklbw xmm4, xmm0
pmullw xmm4, [rax + 48]
punpcklbw xmm6, xmm0
pmullw xmm6, [rax + 80]
paddsw xmm2, xmm5
paddsw xmm2, xmm3
paddsw xmm2, xmm1
paddsw xmm2, xmm4
paddsw xmm2, xmm6
paddsw xmm2, xmm7
psraw xmm2, 7
packuswb xmm2, xmm0 ; pack and saturate
movq QWORD PTR [rdi], xmm2 ; store the results in the destination
%if ABI_IS_32BIT
add rdi, DWORD PTR arg(3) ;[dst_ptich]
%else
add rdi, r8
%endif
dec rcx ; decrement count
jnz vp8_filter_block1d8_v6_only_sse2_loop ; next row
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -415,6 +908,7 @@ sym(vp8_unpack_block1d16_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
;SAVE_XMM ;xmm6, xmm7 are not used here.
GET_GOT rbx
push rsi
push rdi
@ -454,164 +948,7 @@ unpack_block1d16_h6_sse2_rowloop:
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_unpack_block1d8_h6_sse2
;(
; unsigned char *src_ptr,
; unsigned short *output_ptr,
; unsigned int src_pixels_per_line,
; unsigned int output_height,
; unsigned int output_width
;)
global sym(vp8_unpack_block1d8_h6_sse2)
sym(vp8_unpack_block1d8_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
; end prolog
mov rsi, arg(0) ;src_ptr
mov rdi, arg(1) ;output_ptr
movsxd rcx, dword ptr arg(3) ;output_height
movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source
pxor xmm0, xmm0 ; clear xmm0 for unpack
%if ABI_IS_32BIT=0
movsxd r8, dword ptr arg(4) ;output_width ; Pitch for Source
%endif
unpack_block1d8_h6_sse2_rowloop:
movq xmm1, MMWORD PTR [rsi] ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2
lea rsi, [rsi + rax]
punpcklbw xmm1, xmm0
movdqa XMMWORD Ptr [rdi], xmm1
%if ABI_IS_32BIT
add rdi, DWORD Ptr arg(4) ;[output_width]
%else
add rdi, r8
%endif
dec rcx
jnz unpack_block1d8_h6_sse2_rowloop ; next row
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_pack_block1d8_v6_sse2
;(
; short *src_ptr,
; unsigned char *output_ptr,
; int dst_ptich,
; unsigned int pixels_per_line,
; unsigned int output_height,
; unsigned int output_width
;)
global sym(vp8_pack_block1d8_v6_sse2)
sym(vp8_pack_block1d8_v6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
; end prolog
movsxd rdx, dword ptr arg(3) ;pixels_per_line
mov rdi, arg(1) ;output_ptr
mov rsi, arg(0) ;src_ptr
movsxd rcx, DWORD PTR arg(4) ;[output_height]
%if ABI_IS_32BIT=0
movsxd r8, dword ptr arg(5) ;output_width ; Pitch for Source
%endif
pack_block1d8_v6_sse2_loop:
movdqa xmm0, XMMWORD PTR [rsi]
packuswb xmm0, xmm0
movq QWORD PTR [rdi], xmm0 ; store the results in the destination
lea rsi, [rsi+rdx]
%if ABI_IS_32BIT
add rdi, DWORD Ptr arg(5) ;[output_width]
%else
add rdi, r8
%endif
dec rcx ; decrement count
jnz pack_block1d8_v6_sse2_loop ; next row
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_pack_block1d16_v6_sse2
;(
; short *src_ptr,
; unsigned char *output_ptr,
; int dst_ptich,
; unsigned int pixels_per_line,
; unsigned int output_height,
; unsigned int output_width
;)
global sym(vp8_pack_block1d16_v6_sse2)
sym(vp8_pack_block1d16_v6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
; end prolog
movsxd rdx, dword ptr arg(3) ;pixels_per_line
mov rdi, arg(1) ;output_ptr
mov rsi, arg(0) ;src_ptr
movsxd rcx, DWORD PTR arg(4) ;[output_height]
%if ABI_IS_32BIT=0
movsxd r8, dword ptr arg(2) ;dst_pitch
%endif
pack_block1d16_v6_sse2_loop:
movdqa xmm0, XMMWORD PTR [rsi]
movdqa xmm1, XMMWORD PTR [rsi+16]
packuswb xmm0, xmm1
movdqa XMMWORD PTR [rdi], xmm0 ; store the results in the destination
add rsi, rdx
%if ABI_IS_32BIT
add rdi, DWORD Ptr arg(2) ;dst_pitch
%else
add rdi, r8
%endif
dec rcx ; decrement count
jnz pack_block1d16_v6_sse2_loop ; next row
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
;RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -632,6 +969,7 @@ sym(vp8_bilinear_predict16x16_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -879,6 +1217,7 @@ done:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -899,6 +1238,7 @@ sym(vp8_bilinear_predict8x8_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
@ -1022,6 +1362,7 @@ next_row8x8:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret

Просмотреть файл

@ -0,0 +1,931 @@
;
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%include "vpx_ports/x86_abi_support.asm"
%define BLOCK_HEIGHT_WIDTH 4
%define VP8_FILTER_WEIGHT 128
%define VP8_FILTER_SHIFT 7
;/************************************************************************************
; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
; input pixel array has output_height rows. This routine assumes that output_height is an
; even number. This function handles 8 pixels in horizontal direction, calculating ONE
; rows each iteration to take advantage of the 128 bits operations.
;
; This is an implementation of some of the SSE optimizations first seen in ffvp8
;
;*************************************************************************************/
;void vp8_filter_block1d8_h6_ssse3
;(
; unsigned char *src_ptr,
; unsigned int src_pixels_per_line,
; unsigned char *output_ptr,
; unsigned int output_pitch,
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
global sym(vp8_filter_block1d8_h6_ssse3)
sym(vp8_filter_block1d8_h6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
; end prolog
movsxd rdx, DWORD PTR arg(5) ;table index
xor rsi, rsi
shl rdx, 4
movdqa xmm7, [rd GLOBAL]
lea rax, [k0_k5 GLOBAL]
add rax, rdx
mov rdi, arg(2) ;output_ptr
cmp esi, DWORD PTR [rax]
je vp8_filter_block1d8_h4_ssse3
movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
mov rsi, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
movsxd rcx, dword ptr arg(4) ;output_height
movsxd rdx, dword ptr arg(3) ;output_pitch
sub rdi, rdx
;xmm3 free
filter_block1d8_h6_rowloop_ssse3:
movdqu xmm0, XMMWORD PTR [rsi - 2]
movdqa xmm1, xmm0
pshufb xmm0, [shuf1b GLOBAL]
movdqa xmm2, xmm1
pshufb xmm1, [shuf2b GLOBAL]
pmaddubsw xmm0, xmm4
pmaddubsw xmm1, xmm5
pshufb xmm2, [shuf3b GLOBAL]
add rdi, rdx
pmaddubsw xmm2, xmm6
lea rsi, [rsi + rax]
dec rcx
paddsw xmm0, xmm1
paddsw xmm0, xmm7
paddsw xmm0, xmm2
psraw xmm0, 7
packuswb xmm0, xmm0
movq MMWORD Ptr [rdi], xmm0
jnz filter_block1d8_h6_rowloop_ssse3
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
vp8_filter_block1d8_h4_ssse3:
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
movdqa xmm3, XMMWORD PTR [shuf2b GLOBAL]
movdqa xmm4, XMMWORD PTR [shuf3b GLOBAL]
mov rsi, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
movsxd rcx, dword ptr arg(4) ;output_height
movsxd rdx, dword ptr arg(3) ;output_pitch
sub rdi, rdx
;xmm3 free
filter_block1d8_h4_rowloop_ssse3:
movdqu xmm0, XMMWORD PTR [rsi - 2]
movdqa xmm2, xmm0
pshufb xmm0, xmm3 ;[shuf2b GLOBAL]
pshufb xmm2, xmm4 ;[shuf3b GLOBAL]
pmaddubsw xmm0, xmm5
add rdi, rdx
pmaddubsw xmm2, xmm6
lea rsi, [rsi + rax]
dec rcx
paddsw xmm0, xmm7
paddsw xmm0, xmm2
psraw xmm0, 7
packuswb xmm0, xmm0
movq MMWORD Ptr [rdi], xmm0
jnz filter_block1d8_h4_rowloop_ssse3
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block1d16_h6_ssse3
;(
; unsigned char *src_ptr,
; unsigned int src_pixels_per_line,
; unsigned char *output_ptr,
; unsigned int output_pitch,
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
global sym(vp8_filter_block1d16_h6_ssse3)
sym(vp8_filter_block1d16_h6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
GET_GOT rbx
push rsi
push rdi
; end prolog
movsxd rdx, DWORD PTR arg(5) ;table index
xor rsi, rsi
shl rdx, 4 ;
lea rax, [k0_k5 GLOBAL]
add rax, rdx
mov rdi, arg(2) ;output_ptr
movdqa xmm7, [rd GLOBAL]
;;
;; cmp esi, DWORD PTR [rax]
;; je vp8_filter_block1d16_h4_ssse3
mov rsi, arg(0) ;src_ptr
movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
movsxd rcx, dword ptr arg(4) ;output_height
movsxd rdx, dword ptr arg(3) ;output_pitch
filter_block1d16_h6_rowloop_ssse3:
movdqu xmm0, XMMWORD PTR [rsi - 2]
movdqa xmm1, xmm0
pshufb xmm0, [shuf1b GLOBAL]
movdqa xmm2, xmm1
pmaddubsw xmm0, xmm4
pshufb xmm1, [shuf2b GLOBAL]
pshufb xmm2, [shuf3b GLOBAL]
pmaddubsw xmm1, xmm5
movdqu xmm3, XMMWORD PTR [rsi + 6]
pmaddubsw xmm2, xmm6
paddsw xmm0, xmm1
movdqa xmm1, xmm3
pshufb xmm3, [shuf1b GLOBAL]
paddsw xmm0, xmm7
pmaddubsw xmm3, xmm4
paddsw xmm0, xmm2
movdqa xmm2, xmm1
pshufb xmm1, [shuf2b GLOBAL]
pshufb xmm2, [shuf3b GLOBAL]
pmaddubsw xmm1, xmm5
pmaddubsw xmm2, xmm6
psraw xmm0, 7
packuswb xmm0, xmm0
lea rsi, [rsi + rax]
paddsw xmm3, xmm1
paddsw xmm3, xmm7
paddsw xmm3, xmm2
psraw xmm3, 7
packuswb xmm3, xmm3
punpcklqdq xmm0, xmm3
movdqa XMMWORD Ptr [rdi], xmm0
add rdi, rdx
dec rcx
jnz filter_block1d16_h6_rowloop_ssse3
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
vp8_filter_block1d16_h4_ssse3:
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
mov rsi, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
movsxd rcx, dword ptr arg(4) ;output_height
movsxd rdx, dword ptr arg(3) ;output_pitch
filter_block1d16_h4_rowloop_ssse3:
movdqu xmm1, XMMWORD PTR [rsi - 2]
movdqa xmm2, xmm1
pshufb xmm1, [shuf2b GLOBAL]
pshufb xmm2, [shuf3b GLOBAL]
pmaddubsw xmm1, xmm5
movdqu xmm3, XMMWORD PTR [rsi + 6]
pmaddubsw xmm2, xmm6
movdqa xmm0, xmm3
pshufb xmm3, [shuf3b GLOBAL]
pshufb xmm0, [shuf2b GLOBAL]
paddsw xmm1, xmm7
paddsw xmm1, xmm2
pmaddubsw xmm0, xmm5
pmaddubsw xmm3, xmm6
psraw xmm1, 7
packuswb xmm1, xmm1
lea rsi, [rsi + rax]
paddsw xmm3, xmm0
paddsw xmm3, xmm7
psraw xmm3, 7
packuswb xmm3, xmm3
punpcklqdq xmm1, xmm3
movdqa XMMWORD Ptr [rdi], xmm1
add rdi, rdx
dec rcx
jnz filter_block1d16_h4_rowloop_ssse3
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block1d4_h6_ssse3
;(
; unsigned char *src_ptr,
; unsigned int src_pixels_per_line,
; unsigned char *output_ptr,
; unsigned int output_pitch,
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
global sym(vp8_filter_block1d4_h6_ssse3)
sym(vp8_filter_block1d4_h6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
; end prolog
movsxd rdx, DWORD PTR arg(5) ;table index
xor rsi, rsi
shl rdx, 4 ;
lea rax, [k0_k5 GLOBAL]
add rax, rdx
movdqa xmm7, [rd GLOBAL]
cmp esi, DWORD PTR [rax]
je vp8_filter_block1d4_h4_ssse3
movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;output_ptr
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
movsxd rcx, dword ptr arg(4) ;output_height
movsxd rdx, dword ptr arg(3) ;output_pitch
;xmm3 free
filter_block1d4_h6_rowloop_ssse3:
movdqu xmm0, XMMWORD PTR [rsi - 2]
movdqa xmm1, xmm0
pshufb xmm0, [shuf1b GLOBAL]
movdqa xmm2, xmm1
pshufb xmm1, [shuf2b GLOBAL]
pmaddubsw xmm0, xmm4
pshufb xmm2, [shuf3b GLOBAL]
pmaddubsw xmm1, xmm5
;--
pmaddubsw xmm2, xmm6
lea rsi, [rsi + rax]
;--
paddsw xmm0, xmm1
paddsw xmm0, xmm7
pxor xmm1, xmm1
paddsw xmm0, xmm2
psraw xmm0, 7
packuswb xmm0, xmm0
movd DWORD PTR [rdi], xmm0
add rdi, rdx
dec rcx
jnz filter_block1d4_h6_rowloop_ssse3
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
vp8_filter_block1d4_h4_ssse3:
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
movdqa xmm0, XMMWORD PTR [shuf2b GLOBAL]
movdqa xmm3, XMMWORD PTR [shuf3b GLOBAL]
mov rsi, arg(0) ;src_ptr
mov rdi, arg(2) ;output_ptr
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
movsxd rcx, dword ptr arg(4) ;output_height
movsxd rdx, dword ptr arg(3) ;output_pitch
filter_block1d4_h4_rowloop_ssse3:
movdqu xmm1, XMMWORD PTR [rsi - 2]
movdqa xmm2, xmm1
pshufb xmm1, xmm0 ;;[shuf2b GLOBAL]
pshufb xmm2, xmm3 ;;[shuf3b GLOBAL]
pmaddubsw xmm1, xmm5
;--
pmaddubsw xmm2, xmm6
lea rsi, [rsi + rax]
;--
paddsw xmm1, xmm7
paddsw xmm1, xmm2
psraw xmm1, 7
packuswb xmm1, xmm1
movd DWORD PTR [rdi], xmm1
add rdi, rdx
dec rcx
jnz filter_block1d4_h4_rowloop_ssse3
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block1d16_v6_ssse3
;(
; unsigned char *src_ptr,
; unsigned int src_pitch,
; unsigned char *output_ptr,
; unsigned int out_pitch,
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
global sym(vp8_filter_block1d16_v6_ssse3)
sym(vp8_filter_block1d16_v6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
; end prolog
movsxd rdx, DWORD PTR arg(5) ;table index
xor rsi, rsi
shl rdx, 4 ;
lea rax, [k0_k5 GLOBAL]
add rax, rdx
cmp esi, DWORD PTR [rax]
je vp8_filter_block1d16_v4_ssse3
movdqa xmm5, XMMWORD PTR [rax] ;k0_k5
movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
mov rsi, arg(0) ;src_ptr
movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
mov rdi, arg(2) ;output_ptr
%if ABI_IS_32BIT=0
movsxd r8, DWORD PTR arg(3) ;out_pitch
%endif
mov rax, rsi
movsxd rcx, DWORD PTR arg(4) ;output_height
add rax, rdx
vp8_filter_block1d16_v6_ssse3_loop:
movq xmm1, MMWORD PTR [rsi] ;A
movq xmm2, MMWORD PTR [rsi + rdx] ;B
movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
punpcklbw xmm2, xmm4 ;B D
punpcklbw xmm3, xmm0 ;C E
movq xmm0, MMWORD PTR [rax + rdx * 4] ;F
pmaddubsw xmm3, xmm6
punpcklbw xmm1, xmm0 ;A F
pmaddubsw xmm2, xmm7
pmaddubsw xmm1, xmm5
paddsw xmm2, xmm3
paddsw xmm2, xmm1
paddsw xmm2, [rd GLOBAL]
psraw xmm2, 7
packuswb xmm2, xmm2
movq MMWORD PTR [rdi], xmm2 ;store the results
movq xmm1, MMWORD PTR [rsi + 8] ;A
movq xmm2, MMWORD PTR [rsi + rdx + 8] ;B
movq xmm3, MMWORD PTR [rsi + rdx * 2 + 8] ;C
movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D
movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E
punpcklbw xmm2, xmm4 ;B D
punpcklbw xmm3, xmm0 ;C E
movq xmm0, MMWORD PTR [rax + rdx * 4 + 8] ;F
pmaddubsw xmm3, xmm6
punpcklbw xmm1, xmm0 ;A F
pmaddubsw xmm2, xmm7
pmaddubsw xmm1, xmm5
add rsi, rdx
add rax, rdx
;--
;--
paddsw xmm2, xmm3
paddsw xmm2, xmm1
paddsw xmm2, [rd GLOBAL]
psraw xmm2, 7
packuswb xmm2, xmm2
movq MMWORD PTR [rdi+8], xmm2
%if ABI_IS_32BIT
add rdi, DWORD PTR arg(3) ;out_pitch
%else
add rdi, r8
%endif
dec rcx
jnz vp8_filter_block1d16_v6_ssse3_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
vp8_filter_block1d16_v4_ssse3:
movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
mov rsi, arg(0) ;src_ptr
movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
mov rdi, arg(2) ;output_ptr
%if ABI_IS_32BIT=0
movsxd r8, DWORD PTR arg(3) ;out_pitch
%endif
mov rax, rsi
movsxd rcx, DWORD PTR arg(4) ;output_height
add rax, rdx
vp8_filter_block1d16_v4_ssse3_loop:
movq xmm2, MMWORD PTR [rsi + rdx] ;B
movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
punpcklbw xmm2, xmm4 ;B D
punpcklbw xmm3, xmm0 ;C E
pmaddubsw xmm3, xmm6
pmaddubsw xmm2, xmm7
movq xmm5, MMWORD PTR [rsi + rdx + 8] ;B
movq xmm1, MMWORD PTR [rsi + rdx * 2 + 8] ;C
movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D
movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E
paddsw xmm2, [rd GLOBAL]
paddsw xmm2, xmm3
psraw xmm2, 7
packuswb xmm2, xmm2
punpcklbw xmm5, xmm4 ;B D
punpcklbw xmm1, xmm0 ;C E
pmaddubsw xmm1, xmm6
pmaddubsw xmm5, xmm7
movdqa xmm4, [rd GLOBAL]
add rsi, rdx
add rax, rdx
;--
;--
paddsw xmm5, xmm1
paddsw xmm5, xmm4
psraw xmm5, 7
packuswb xmm5, xmm5
punpcklqdq xmm2, xmm5
movdqa XMMWORD PTR [rdi], xmm2
%if ABI_IS_32BIT
add rdi, DWORD PTR arg(3) ;out_pitch
%else
add rdi, r8
%endif
dec rcx
jnz vp8_filter_block1d16_v4_ssse3_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block1d8_v6_ssse3
;(
; unsigned char *src_ptr,
; unsigned int src_pitch,
; unsigned char *output_ptr,
; unsigned int out_pitch,
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
global sym(vp8_filter_block1d8_v6_ssse3)
sym(vp8_filter_block1d8_v6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
; end prolog
movsxd rdx, DWORD PTR arg(5) ;table index
xor rsi, rsi
shl rdx, 4 ;
lea rax, [k0_k5 GLOBAL]
add rax, rdx
movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
mov rdi, arg(2) ;output_ptr
%if ABI_IS_32BIT=0
movsxd r8, DWORD PTR arg(3) ; out_pitch
%endif
movsxd rcx, DWORD PTR arg(4) ;[output_height]
cmp esi, DWORD PTR [rax]
je vp8_filter_block1d8_v4_ssse3
movdqa xmm5, XMMWORD PTR [rax] ;k0_k5
movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
mov rsi, arg(0) ;src_ptr
mov rax, rsi
add rax, rdx
vp8_filter_block1d8_v6_ssse3_loop:
movq xmm1, MMWORD PTR [rsi] ;A
movq xmm2, MMWORD PTR [rsi + rdx] ;B
movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
punpcklbw xmm2, xmm4 ;B D
punpcklbw xmm3, xmm0 ;C E
movq xmm0, MMWORD PTR [rax + rdx * 4] ;F
movdqa xmm4, [rd GLOBAL]
pmaddubsw xmm3, xmm6
punpcklbw xmm1, xmm0 ;A F
pmaddubsw xmm2, xmm7
pmaddubsw xmm1, xmm5
add rsi, rdx
add rax, rdx
;--
;--
paddsw xmm2, xmm3
paddsw xmm2, xmm1
paddsw xmm2, xmm4
psraw xmm2, 7
packuswb xmm2, xmm2
movq MMWORD PTR [rdi], xmm2
%if ABI_IS_32BIT
add rdi, DWORD PTR arg(3) ;[out_pitch]
%else
add rdi, r8
%endif
dec rcx
jnz vp8_filter_block1d8_v6_ssse3_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
vp8_filter_block1d8_v4_ssse3:
movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
movdqa xmm5, [rd GLOBAL]
mov rsi, arg(0) ;src_ptr
mov rax, rsi
add rax, rdx
vp8_filter_block1d8_v4_ssse3_loop:
movq xmm2, MMWORD PTR [rsi + rdx] ;B
movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
punpcklbw xmm2, xmm4 ;B D
punpcklbw xmm3, xmm0 ;C E
pmaddubsw xmm3, xmm6
pmaddubsw xmm2, xmm7
add rsi, rdx
add rax, rdx
;--
;--
paddsw xmm2, xmm3
paddsw xmm2, xmm5
psraw xmm2, 7
packuswb xmm2, xmm2
movq MMWORD PTR [rdi], xmm2
%if ABI_IS_32BIT
add rdi, DWORD PTR arg(3) ;[out_pitch]
%else
add rdi, r8
%endif
dec rcx
jnz vp8_filter_block1d8_v4_ssse3_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block1d4_v6_ssse3
;(
; unsigned char *src_ptr,
; unsigned int src_pitch,
; unsigned char *output_ptr,
; unsigned int out_pitch,
; unsigned int output_height,
; unsigned int vp8_filter_index
;)
global sym(vp8_filter_block1d4_v6_ssse3)
sym(vp8_filter_block1d4_v6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
GET_GOT rbx
push rsi
push rdi
; end prolog
movsxd rdx, DWORD PTR arg(5) ;table index
xor rsi, rsi
shl rdx, 4 ;
lea rax, [k0_k5 GLOBAL]
add rax, rdx
movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
mov rdi, arg(2) ;output_ptr
%if ABI_IS_32BIT=0
movsxd r8, DWORD PTR arg(3) ; out_pitch
%endif
movsxd rcx, DWORD PTR arg(4) ;[output_height]
cmp esi, DWORD PTR [rax]
je vp8_filter_block1d4_v4_ssse3
movq mm5, MMWORD PTR [rax] ;k0_k5
movq mm6, MMWORD PTR [rax+256] ;k2_k4
movq mm7, MMWORD PTR [rax+128] ;k1_k3
mov rsi, arg(0) ;src_ptr
mov rax, rsi
add rax, rdx
vp8_filter_block1d4_v6_ssse3_loop:
movd mm1, DWORD PTR [rsi] ;A
movd mm2, DWORD PTR [rsi + rdx] ;B
movd mm3, DWORD PTR [rsi + rdx * 2] ;C
movd mm4, DWORD PTR [rax + rdx * 2] ;D
movd mm0, DWORD PTR [rsi + rdx * 4] ;E
punpcklbw mm2, mm4 ;B D
punpcklbw mm3, mm0 ;C E
movd mm0, DWORD PTR [rax + rdx * 4] ;F
movq mm4, [rd GLOBAL]
pmaddubsw mm3, mm6
punpcklbw mm1, mm0 ;A F
pmaddubsw mm2, mm7
pmaddubsw mm1, mm5
add rsi, rdx
add rax, rdx
;--
;--
paddsw mm2, mm3
paddsw mm2, mm1
paddsw mm2, mm4
psraw mm2, 7
packuswb mm2, mm2
movd DWORD PTR [rdi], mm2
%if ABI_IS_32BIT
add rdi, DWORD PTR arg(3) ;[out_pitch]
%else
add rdi, r8
%endif
dec rcx
jnz vp8_filter_block1d4_v6_ssse3_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
vp8_filter_block1d4_v4_ssse3:
movq mm6, MMWORD PTR [rax+256] ;k2_k4
movq mm7, MMWORD PTR [rax+128] ;k1_k3
movq mm5, MMWORD PTR [rd GLOBAL]
mov rsi, arg(0) ;src_ptr
mov rax, rsi
add rax, rdx
vp8_filter_block1d4_v4_ssse3_loop:
movd mm2, DWORD PTR [rsi + rdx] ;B
movd mm3, DWORD PTR [rsi + rdx * 2] ;C
movd mm4, DWORD PTR [rax + rdx * 2] ;D
movd mm0, DWORD PTR [rsi + rdx * 4] ;E
punpcklbw mm2, mm4 ;B D
punpcklbw mm3, mm0 ;C E
pmaddubsw mm3, mm6
pmaddubsw mm2, mm7
add rsi, rdx
add rax, rdx
;--
;--
paddsw mm2, mm3
paddsw mm2, mm5
psraw mm2, 7
packuswb mm2, mm2
movd DWORD PTR [rdi], mm2
%if ABI_IS_32BIT
add rdi, DWORD PTR arg(3) ;[out_pitch]
%else
add rdi, r8
%endif
dec rcx
jnz vp8_filter_block1d4_v4_ssse3_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
shuf1b:
db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
shuf2b:
db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11
shuf3b:
db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10
align 16
rd:
times 8 dw 0x40
align 16
k0_k5:
times 8 db 0, 0 ;placeholder
times 8 db 0, 0
times 8 db 2, 1
times 8 db 0, 0
times 8 db 3, 3
times 8 db 0, 0
times 8 db 1, 2
times 8 db 0, 0
k1_k3:
times 8 db 0, 0 ;placeholder
times 8 db -6, 12
times 8 db -11, 36
times 8 db -9, 50
times 8 db -16, 77
times 8 db -6, 93
times 8 db -8, 108
times 8 db -1, 123
k2_k4:
times 8 db 128, 0 ;placeholder
times 8 db 123, -1
times 8 db 108, -8
times 8 db 93, -6
times 8 db 77, -16
times 8 db 50, -9
times 8 db 36, -11
times 8 db 12, -6

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -86,4 +86,37 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_sse2);
#endif
#endif
#if HAVE_SSSE3
extern prototype_subpixel_predict(vp8_sixtap_predict16x16_ssse3);
extern prototype_subpixel_predict(vp8_sixtap_predict8x8_ssse3);
extern prototype_subpixel_predict(vp8_sixtap_predict8x4_ssse3);
extern prototype_subpixel_predict(vp8_sixtap_predict4x4_ssse3);
//extern prototype_subpixel_predict(vp8_bilinear_predict16x16_sse2);
//extern prototype_subpixel_predict(vp8_bilinear_predict8x8_sse2);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_subpix_sixtap16x16
#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_ssse3
#undef vp8_subpix_sixtap8x8
#define vp8_subpix_sixtap8x8 vp8_sixtap_predict8x8_ssse3
#undef vp8_subpix_sixtap8x4
#define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_ssse3
#undef vp8_subpix_sixtap4x4
#define vp8_subpix_sixtap4x4 vp8_sixtap_predict4x4_ssse3
//#undef vp8_subpix_bilinear16x16
//#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_sse2
//#undef vp8_subpix_bilinear8x8
//#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_sse2
#endif
#endif
#endif

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -68,6 +68,17 @@ extern void vp8_filter_block1d8_v6_sse2
unsigned int output_width,
const short *vp8_filter
);
extern void vp8_filter_block1d16_v6_sse2
(
unsigned short *src_ptr,
unsigned char *output_ptr,
int dst_ptich,
unsigned int pixels_per_line,
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
const short *vp8_filter
);
extern void vp8_unpack_block1d16_h6_sse2
(
unsigned char *src_ptr,
@ -76,31 +87,32 @@ extern void vp8_unpack_block1d16_h6_sse2
unsigned int output_height,
unsigned int output_width
);
extern void vp8_unpack_block1d8_h6_sse2
extern void vp8_filter_block1d8_h6_only_sse2
(
unsigned char *src_ptr,
unsigned short *output_ptr,
unsigned int src_pixels_per_line,
unsigned char *output_ptr,
int dst_ptich,
unsigned int output_height,
unsigned int output_width
const short *vp8_filter
);
extern void vp8_pack_block1d8_v6_sse2
extern void vp8_filter_block1d16_h6_only_sse2
(
unsigned short *src_ptr,
unsigned char *src_ptr,
unsigned int src_pixels_per_line,
unsigned char *output_ptr,
int dst_ptich,
unsigned int output_height,
const short *vp8_filter
);
extern void vp8_filter_block1d8_v6_only_sse2
(
unsigned char *src_ptr,
unsigned int src_pixels_per_line,
unsigned char *output_ptr,
int dst_ptich,
unsigned int pixels_per_line,
unsigned int output_height,
unsigned int output_width
);
extern void vp8_pack_block1d16_v6_sse2
(
unsigned short *src_ptr,
unsigned char *output_ptr,
int dst_ptich,
unsigned int pixels_per_line,
unsigned int output_height,
unsigned int output_width
unsigned int output_height,
const short *vp8_filter
);
extern prototype_subpixel_predict(vp8_bilinear_predict8x8_mmx);
@ -247,23 +259,26 @@ void vp8_sixtap_predict16x16_sse2
if (xoffset)
{
HFilter = vp8_six_tap_mmx[xoffset];
vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
if (yoffset)
{
HFilter = vp8_six_tap_mmx[xoffset];
vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
VFilter = vp8_six_tap_mmx[yoffset];
vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
}
else
{
// First-pass only
HFilter = vp8_six_tap_mmx[xoffset];
vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter);
}
}
else
{
vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32);
}
if (yoffset)
{
// Second-pass only
VFilter = vp8_six_tap_mmx[yoffset];
vp8_filter_block1d8_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, 16, VFilter);
vp8_filter_block1d8_v6_sse2(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter);
}
else
{
vp8_pack_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16, 16);
vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32);
vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
}
}
@ -283,25 +298,26 @@ void vp8_sixtap_predict8x8_sse2
if (xoffset)
{
HFilter = vp8_six_tap_mmx[xoffset];
vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
if (yoffset)
{
HFilter = vp8_six_tap_mmx[xoffset];
vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
VFilter = vp8_six_tap_mmx[yoffset];
vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
}
else
{
// First-pass only
HFilter = vp8_six_tap_mmx[xoffset];
vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter);
}
}
else
{
vp8_unpack_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 13, 16);
}
if (yoffset)
{
// Second-pass only
VFilter = vp8_six_tap_mmx[yoffset];
vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter);
}
else
{
vp8_pack_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8, dst_pitch);
}
}
@ -320,24 +336,218 @@ void vp8_sixtap_predict8x4_sse2
if (xoffset)
{
HFilter = vp8_six_tap_mmx[xoffset];
vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
if (yoffset)
{
HFilter = vp8_six_tap_mmx[xoffset];
vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
VFilter = vp8_six_tap_mmx[yoffset];
vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
}
else
{
// First-pass only
HFilter = vp8_six_tap_mmx[xoffset];
vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter);
}
}
else
{
vp8_unpack_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 9, 16);
}
if (yoffset)
{
// Second-pass only
VFilter = vp8_six_tap_mmx[yoffset];
vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter);
}
}
#endif
#if HAVE_SSSE3
extern void vp8_filter_block1d8_h6_ssse3
(
unsigned char *src_ptr,
unsigned int src_pixels_per_line,
unsigned char *output_ptr,
unsigned int output_pitch,
unsigned int output_height,
unsigned int vp8_filter_index
);
extern void vp8_filter_block1d16_h6_ssse3
(
unsigned char *src_ptr,
unsigned int src_pixels_per_line,
unsigned char *output_ptr,
unsigned int output_pitch,
unsigned int output_height,
unsigned int vp8_filter_index
);
extern void vp8_filter_block1d16_v6_ssse3
(
unsigned char *src_ptr,
unsigned int src_pitch,
unsigned char *output_ptr,
unsigned int out_pitch,
unsigned int output_height,
unsigned int vp8_filter_index
);
extern void vp8_filter_block1d8_v6_ssse3
(
unsigned char *src_ptr,
unsigned int src_pitch,
unsigned char *output_ptr,
unsigned int out_pitch,
unsigned int output_height,
unsigned int vp8_filter_index
);
extern void vp8_filter_block1d4_h6_ssse3
(
unsigned char *src_ptr,
unsigned int src_pixels_per_line,
unsigned char *output_ptr,
unsigned int output_pitch,
unsigned int output_height,
unsigned int vp8_filter_index
);
extern void vp8_filter_block1d4_v6_ssse3
(
unsigned char *src_ptr,
unsigned int src_pitch,
unsigned char *output_ptr,
unsigned int out_pitch,
unsigned int output_height,
unsigned int vp8_filter_index
);
void vp8_sixtap_predict16x16_ssse3
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
if (xoffset)
{
if (yoffset)
{
vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset);
vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset);
}
else
{
// First-pass only
vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
}
}
else
{
vp8_pack_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 4, dst_pitch);
// Second-pass only
vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
}
}
void vp8_sixtap_predict8x8_ssse3
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
if (xoffset)
{
if (yoffset)
{
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset);
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
}
else
{
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset);
}
}
else
{
// Second-pass only
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
}
}
void vp8_sixtap_predict8x4_ssse3
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
if (xoffset)
{
if (yoffset)
{
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset);
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
}
else
{
// First-pass only
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
}
}
else
{
// Second-pass only
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
}
}
void vp8_sixtap_predict4x4_ssse3
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
if (xoffset)
{
if (yoffset)
{
vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset);
vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
}
else
{
vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
}
}
else
{
vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
}
}
#endif

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -27,6 +27,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
int mmx_enabled = flags & HAS_MMX;
int xmm_enabled = flags & HAS_SSE;
int wmt_enabled = flags & HAS_SSE2;
int SSSE3Enabled = flags & HAS_SSSE3;
/* Note:
*
@ -42,7 +43,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
{
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_mmx;
rtcd->idct.idct16 = vp8_short_idct4x4llm_mmx;
rtcd->idct.idct1_scalar = vp8_dc_only_idct_mmx;
rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_mmx;
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_mmx;
@ -114,5 +115,17 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
}
#endif
#if HAVE_SSSE3
if (SSSE3Enabled)
{
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_ssse3;
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_ssse3;
rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_ssse3;
rtcd->subpix.sixtap4x4 = vp8_sixtap_predict4x4_ssse3;
}
#endif
#endif
}

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -13,7 +13,7 @@
#include "vpx_ports/mem.h"
#include "vpx_mem/vpx_mem.h"
DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
DECLARE_ALIGNED(16, const unsigned char, vp8dx_bitreader_norm[256]) =
{
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@ -26,86 +26,41 @@ DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
};
static void copy_in(BOOL_DECODER *br, unsigned int to_write)
{
if (to_write > br->user_buffer_sz)
to_write = br->user_buffer_sz;
memcpy(br->write_ptr, br->user_buffer, to_write);
br->user_buffer += to_write;
br->user_buffer_sz -= to_write;
br->write_ptr = br_ptr_advance(br->write_ptr, to_write);
}
int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source,
unsigned int source_sz)
{
br->lowvalue = 0;
br->user_buffer_end = source+source_sz;
br->user_buffer = source;
br->value = 0;
br->count = -8;
br->range = 255;
br->count = 0;
br->user_buffer = source;
br->user_buffer_sz = source_sz;
if (source_sz && !source)
return 1;
/* Allocate the ring buffer backing store with alignment equal to the
* buffer size*2 so that a single pointer can be used for wrapping rather
* than a pointer+offset.
*/
br->decode_buffer = vpx_memalign(VP8_BOOL_DECODER_SZ * 2,
VP8_BOOL_DECODER_SZ);
if (!br->decode_buffer)
return 1;
/* Populate the buffer */
br->read_ptr = br->decode_buffer;
br->write_ptr = br->decode_buffer;
copy_in(br, VP8_BOOL_DECODER_SZ);
vp8dx_bool_decoder_fill_c(br);
/* Read the first byte */
br->value = (*br->read_ptr++) << 8;
return 0;
}
void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br)
{
int left, right;
const unsigned char *bufptr;
const unsigned char *bufend;
VP8_BD_VALUE value;
int count;
bufend = br->user_buffer_end;
bufptr = br->user_buffer;
value = br->value;
count = br->count;
/* Find available room in the buffer */
left = 0;
right = br->read_ptr - br->write_ptr;
VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
if (right < 0)
{
/* Read pointer is behind the write pointer. We can write from the
* write pointer to the end of the buffer.
*/
right = VP8_BOOL_DECODER_SZ - (br->write_ptr - br->decode_buffer);
left = br->read_ptr - br->decode_buffer;
}
if (right + left < 128)
return;
if (right)
copy_in(br, right);
if (left)
{
br->write_ptr = br->decode_buffer;
copy_in(br, left);
}
}
void vp8dx_stop_decode_c(BOOL_DECODER *bc)
{
vpx_free(bc->decode_buffer);
bc->decode_buffer = 0;
br->user_buffer = bufptr;
br->value = value;
br->count = count;
}
#if 0
@ -120,13 +75,18 @@ void vp8dx_stop_decode_c(BOOL_DECODER *bc)
int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
{
unsigned int bit=0;
VP8_BD_VALUE value;
unsigned int split;
unsigned int bigsplit;
register unsigned int range = br->range;
register unsigned int value = br->value;
VP8_BD_VALUE bigsplit;
int count;
unsigned int range;
value = br->value;
count = br->count;
range = br->range;
split = 1 + (((range-1) * probability) >> 8);
bigsplit = (split<<8);
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
range = split;
if(value >= bigsplit)
@ -144,21 +104,16 @@ int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
}*/
{
int count = br->count;
register unsigned int shift = vp8dx_bitreader_norm[range];
range <<= shift;
value <<= shift;
count -= shift;
if(count <= 0)
{
value |= (*br->read_ptr) << (-count);
br->read_ptr = br_ptr_advance(br->read_ptr, 1);
count += 8 ;
}
br->count = count;
}
br->value = value;
br->count = count;
br->range = range;
if (count < 0)
vp8dx_bool_decoder_fill_c(br);
return bit;
}

Просмотреть файл

@ -1,61 +1,41 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef DBOOLHUFF_H
#define DBOOLHUFF_H
#include <stddef.h>
#include <limits.h>
#include "vpx_ports/config.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
/* Size of the bool decoder backing storage
*
* This size was chosen to be greater than the worst case encoding of a
* single macroblock. This was calcluated as follows (python):
*
* def max_cost(prob):
* return max(prob_costs[prob], prob_costs[255-prob]) / 256;
*
* tree_nodes_cost = 7 * max_cost(255)
* extra_bits_cost = sum([max_cost(bit) for bit in extra_bits])
* sign_bit_cost = max_cost(128)
* total_cost = tree_nodes_cost + extra_bits_cost + sign_bit_cost
*
* where the prob_costs table was taken from the C vp8_prob_cost table in
* boolhuff.c and the extra_bits table was taken from the 11 extrabits for
* a category 6 token as defined in vp8d_token_extra_bits2/detokenize.c
*
* This equation produced a maximum of 79 bits per coefficient. Scaling up
* to the macroblock level:
*
* 79 bits/coeff * 16 coeff/block * 25 blocks/macroblock = 31600 b/mb
*
* 4096 bytes = 32768 bits > 31600
*/
#define VP8_BOOL_DECODER_SZ 4096
#define VP8_BOOL_DECODER_MASK (VP8_BOOL_DECODER_SZ-1)
#define VP8_BOOL_DECODER_PTR_MASK (~(uintptr_t)(VP8_BOOL_DECODER_SZ))
typedef size_t VP8_BD_VALUE;
# define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT)
/*This is meant to be a large, positive constant that can still be efficiently
loaded as an immediate (on platforms like ARM, for example).
Even relatively modest values like 100 would work fine.*/
# define VP8_LOTS_OF_BITS (0x40000000)
struct vp8_dboolhuff_rtcd_vtable;
typedef struct
{
unsigned int lowvalue;
unsigned int range;
unsigned int value;
int count;
const unsigned char *user_buffer_end;
const unsigned char *user_buffer;
unsigned int user_buffer_sz;
unsigned char *decode_buffer;
const unsigned char *read_ptr;
unsigned char *write_ptr;
VP8_BD_VALUE value;
int count;
unsigned int range;
#if CONFIG_RUNTIME_CPU_DETECT
struct vp8_dboolhuff_rtcd_vtable *rtcd;
#endif
@ -63,7 +43,6 @@ typedef struct
#define prototype_dbool_start(sym) int sym(BOOL_DECODER *br, \
const unsigned char *source, unsigned int source_sz)
#define prototype_dbool_stop(sym) void sym(BOOL_DECODER *bc)
#define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br)
#define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability)
#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits);
@ -76,10 +55,6 @@ typedef struct
#define vp8_dbool_start vp8dx_start_decode_c
#endif
#ifndef vp8_dbool_stop
#define vp8_dbool_stop vp8dx_stop_decode_c
#endif
#ifndef vp8_dbool_fill
#define vp8_dbool_fill vp8dx_bool_decoder_fill_c
#endif
@ -93,20 +68,17 @@ typedef struct
#endif
extern prototype_dbool_start(vp8_dbool_start);
extern prototype_dbool_stop(vp8_dbool_stop);
extern prototype_dbool_fill(vp8_dbool_fill);
extern prototype_dbool_debool(vp8_dbool_debool);
extern prototype_dbool_devalue(vp8_dbool_devalue);
typedef prototype_dbool_start((*vp8_dbool_start_fn_t));
typedef prototype_dbool_stop((*vp8_dbool_stop_fn_t));
typedef prototype_dbool_fill((*vp8_dbool_fill_fn_t));
typedef prototype_dbool_debool((*vp8_dbool_debool_fn_t));
typedef prototype_dbool_devalue((*vp8_dbool_devalue_fn_t));
typedef struct vp8_dboolhuff_rtcd_vtable {
vp8_dbool_start_fn_t start;
vp8_dbool_stop_fn_t stop;
vp8_dbool_fill_fn_t fill;
vp8_dbool_debool_fn_t debool;
vp8_dbool_devalue_fn_t devalue;
@ -123,18 +95,7 @@ typedef struct vp8_dboolhuff_rtcd_vtable {
#define IF_RTCD(x) NULL
//#endif
static unsigned char *br_ptr_advance(const unsigned char *_ptr,
unsigned int n)
{
uintptr_t ptr = (uintptr_t)_ptr;
ptr += n;
ptr &= VP8_BOOL_DECODER_PTR_MASK;
return (void *)ptr;
}
DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
/* wrapper functions to hide RTCD. static means inline means hopefully no
* penalty
@ -147,12 +108,34 @@ static int vp8dx_start_decode(BOOL_DECODER *br,
#endif
return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz);
}
static void vp8dx_stop_decode(BOOL_DECODER *br) {
DBOOLHUFF_INVOKE(br->rtcd, stop)(br);
}
static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
DBOOLHUFF_INVOKE(br->rtcd, fill)(br);
}
/*The refill loop is used in several places, so define it in a macro to make
sure they're all consistent.
An inline function would be cleaner, but has a significant penalty, because
multiple BOOL_DECODER fields must be modified, and the compiler is not smart
enough to eliminate the stores to those fields and the subsequent reloads
from them when inlining the function.*/
#define VP8DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \
do \
{ \
int shift; \
for(shift = VP8_BD_VALUE_SIZE - 8 - ((_count) + 8); shift >= 0; ) \
{ \
if((_bufptr) >= (_bufend)) { \
(_count) = VP8_LOTS_OF_BITS; \
break; \
} \
(_count) += 8; \
(_value) |= (VP8_BD_VALUE)*(_bufptr)++ << shift; \
shift -= 8; \
} \
} \
while(0)
static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
/*
* Until optimized versions of this function are available, we
@ -161,13 +144,18 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
*return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability);
*/
unsigned int bit = 0;
VP8_BD_VALUE value;
unsigned int split;
unsigned int bigsplit;
register unsigned int range = br->range;
register unsigned int value = br->value;
VP8_BD_VALUE bigsplit;
int count;
unsigned int range;
value = br->value;
count = br->count;
range = br->range;
split = 1 + (((range - 1) * probability) >> 8);
bigsplit = (split << 8);
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
range = split;
@ -186,23 +174,16 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
}*/
{
int count = br->count;
register unsigned int shift = vp8dx_bitreader_norm[range];
range <<= shift;
value <<= shift;
count -= shift;
if (count <= 0)
{
value |= (*br->read_ptr) << (-count);
br->read_ptr = br_ptr_advance(br->read_ptr, 1);
count += 8 ;
}
br->count = count;
}
br->value = value;
br->count = count;
br->range = range;
if(count < 0)
vp8dx_bool_decoder_fill(br);
return bit;
}

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -171,8 +171,7 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
VP8_COMMON *const pc = &pbi->common;
MACROBLOCKD *xd = &pbi->mb;
vp8dx_bool_decoder_fill(bc);
mbmi->need_to_clamp_mvs = 0;
// Distance of Mb to the various image edges.
// These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
xd->mb_to_left_edge = -((mb_col * 16) << 3);
@ -270,17 +269,16 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
break;
}
/* Clip the MV for this partition so that it does
not extend to far out of image. */
if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN + 7)
mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN + 7;
if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN + 7)
mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN + 7;
if (mv->col < xd->mb_to_left_edge
- LEFT_TOP_MARGIN
|| mv->col > xd->mb_to_right_edge
+ RIGHT_BOTTOM_MARGIN
|| mv->row < xd->mb_to_top_edge
- LEFT_TOP_MARGIN
|| mv->row > xd->mb_to_bottom_edge
+ RIGHT_BOTTOM_MARGIN
)
mbmi->need_to_clamp_mvs = 1;
/* Fill (uniform) modes, mvs of jth subset.
Must do it here because ensuing subsets can
@ -338,27 +336,18 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
read_mv(bc, mv, (const MV_CONTEXT *) mvc);
mv->row += best_mv.row;
mv->col += best_mv.col;
/* Encoder should not produce invalid motion vectors, but since
* arbitrary length MVs can be parsed from the bitstream, we
* need to clamp them here in case we're reading bad data to
* avoid a crash.
/* Don't need to check this on NEARMV and NEARESTMV modes
* since those modes clamp the MV. The NEWMV mode does not,
* so signal to the prediction stage whether special
* handling may be required.
*/
#if CONFIG_DEBUG
assert(mv->col >= (xd->mb_to_left_edge - LEFT_TOP_MARGIN));
assert(mv->col <= (xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN));
assert(mv->row >= (xd->mb_to_top_edge - LEFT_TOP_MARGIN));
assert(mv->row <= (xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN));
#endif
if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
if (mv->col < xd->mb_to_left_edge - LEFT_TOP_MARGIN
|| mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN
|| mv->row < xd->mb_to_top_edge - LEFT_TOP_MARGIN
|| mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN
)
mbmi->need_to_clamp_mvs = 1;
propagate_mv: /* same MV throughout */
{
@ -394,7 +383,6 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
assert(0);
#endif
}
}
else
{

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/

Просмотреть файл

@ -1,10 +1,10 @@
/*
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
@ -18,6 +18,7 @@
extern void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
MACROBLOCKD *xd);
extern void vp8_mt_loop_filter_frame(VP8D_COMP *pbi);
extern void vp8_stop_lfthread(VP8D_COMP *pbi);
extern void vp8_start_lfthread(VP8D_COMP *pbi);
extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше