зеркало из https://github.com/mozilla/gecko-dev.git
Bug 593753 - Update libvpx to 0.9.2. r=cpearce a=blocking
This commit is contained in:
Родитель
c886a42334
Коммит
f5c25b4efb
|
@ -138,7 +138,6 @@ CSRCS += \
|
|||
reconinter.c \
|
||||
reconintra.c \
|
||||
reconintra4x4.c \
|
||||
segmentation_common.c \
|
||||
setupintrarecon.c \
|
||||
swapyv12buffer.c \
|
||||
textblit.c \
|
||||
|
@ -149,6 +148,7 @@ CSRCS += \
|
|||
demode.c \
|
||||
dequantize.c \
|
||||
detokenize.c \
|
||||
idct_blk.c \
|
||||
onyxd_if.c \
|
||||
threading.c \
|
||||
vp8_dx_iface.c \
|
||||
|
@ -169,6 +169,8 @@ ifdef VPX_X86_ASM
|
|||
# Building on an x86 platform with a supported assembler, include
|
||||
# the optimized assembly in the build.
|
||||
CSRCS += \
|
||||
idct_blk_mmx.c \
|
||||
idct_blk_sse2.c \
|
||||
loopfilter_x86.c \
|
||||
vp8_asm_stubs.c \
|
||||
x86_systemdependent.c \
|
||||
|
@ -177,6 +179,7 @@ CSRCS += \
|
|||
|
||||
ASFILES += \
|
||||
idctllm_mmx.asm \
|
||||
idctllm_sse2.asm \
|
||||
iwalsh_mmx.asm \
|
||||
iwalsh_sse2.asm \
|
||||
loopfilter_mmx.asm \
|
||||
|
@ -187,6 +190,7 @@ ASFILES += \
|
|||
recon_sse2.asm \
|
||||
subpixel_mmx.asm \
|
||||
subpixel_sse2.asm \
|
||||
subpixel_ssse3.asm \
|
||||
dequantize_mmx.asm \
|
||||
emms.asm \
|
||||
$(NULL)
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
diff --git a/media/libvpx/vp8/decoder/decodemv.c b/media/libvpx/vp8/decoder/decodemv.c
|
||||
--- a/media/libvpx/vp8/decoder/decodemv.c
|
||||
+++ b/media/libvpx/vp8/decoder/decodemv.c
|
||||
@@ -222,23 +222,24 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
|
||||
{
|
||||
B_MODE_INFO *const bmi = mbmi->partition_bmi + j;
|
||||
MV *const mv = & bmi->mv.as_mv;
|
||||
|
||||
int k = -1; /* first block in subset j */
|
||||
int mv_contz;
|
||||
|
||||
while (j != L[++k])
|
||||
+ {
|
||||
+#if CONFIG_DEBUG
|
||||
if (k >= 16)
|
||||
-#if CONFIG_DEBUG
|
||||
+ {
|
||||
assert(0);
|
||||
-
|
||||
-#else
|
||||
- ;
|
||||
+ }
|
||||
#endif
|
||||
+ }
|
||||
|
||||
mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
|
||||
|
||||
switch (bmi->mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
|
||||
{
|
||||
case NEW4X4:
|
||||
read_mv(bc, mv, (const MV_CONTEXT *) mvc);
|
||||
mv->row += best_mv.row;
|
|
@ -18,7 +18,7 @@ diff --git a/media/libvpx/vp8/common/loopfilter_filters.c b/media/libvpx/vp8/com
|
|||
|
||||
typedef unsigned char uc;
|
||||
|
||||
__inline signed char vp8_signed_char_clamp(int t)
|
||||
static __inline signed char vp8_signed_char_clamp(int t)
|
||||
{
|
||||
t = (t < -128 ? -128 : t);
|
||||
diff --git a/media/libvpx/vpx/internal/vpx_codec_internal.h b/media/libvpx/vpx/internal/vpx_codec_internal.h
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
diff --git a/media/libvpx/vp8/decoder/decodemv.c b/media/libvpx/vp8/decoder/decodemv.c
|
||||
--- a/media/libvpx/vp8/decoder/decodemv.c
|
||||
+++ b/media/libvpx/vp8/decoder/decodemv.c
|
||||
@@ -264,16 +264,28 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
|
||||
#ifdef VPX_MODE_COUNT
|
||||
vp8_mv_cont_count[mv_contz][2]++;
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
+ /* Clip the MV for this partition so that it does
|
||||
+ not extend to far out of image. */
|
||||
+ if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
+ mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
+ else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN + 7)
|
||||
+ mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN + 7;
|
||||
+
|
||||
+ if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
+ mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
+ else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN + 7)
|
||||
+ mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN + 7;
|
||||
+
|
||||
/* Fill (uniform) modes, mvs of jth subset.
|
||||
Must do it here because ensuing subsets can
|
||||
refer back to us via "left" or "above". */
|
||||
do
|
||||
if (j == L[k])
|
||||
mi->bmi[k] = *bmi;
|
||||
|
||||
while (++k < 16);
|
|
@ -1,41 +0,0 @@
|
|||
diff -r 5c557d4dd0c7 media/libvpx/vpx_ports/x86_abi_support.asm
|
||||
--- a/media/libvpx/vpx_ports/x86_abi_support.asm Wed Jun 16 11:12:38 2010 +1200
|
||||
+++ b/media/libvpx/vpx_ports/x86_abi_support.asm Thu Jun 17 15:09:49 2010 -0700
|
||||
@@ -138,12 +138,16 @@
|
||||
%endmacro
|
||||
%endif
|
||||
%endif
|
||||
+ %define HIDDEN_DATA
|
||||
%else
|
||||
%macro GET_GOT 1
|
||||
%endmacro
|
||||
%define GLOBAL wrt rip
|
||||
%ifidn __OUTPUT_FORMAT__,elf64
|
||||
%define WRT_PLT wrt ..plt
|
||||
+ %define HIDDEN_DATA :data hidden
|
||||
+ %else
|
||||
+ %define HIDDEN_DATA
|
||||
%endif
|
||||
%endif
|
||||
%ifnmacro GET_GOT
|
||||
diff -r 5c557d4dd0c7 media/libvpx/vp8/common/x86/subpixel_mmx.asm
|
||||
--- a/media/libvpx/vp8/common/x86/subpixel_mmx.asm Wed Jun 16 11:12:38 2010 +1200
|
||||
+++ b/media/libvpx/vp8/common/x86/subpixel_mmx.asm Thu Jun 17 15:09:49 2010 -0700
|
||||
@@ -731,7 +731,7 @@
|
||||
times 4 dw 0x40
|
||||
|
||||
align 16
|
||||
-global sym(vp8_six_tap_mmx)
|
||||
+global sym(vp8_six_tap_mmx) HIDDEN_DATA
|
||||
sym(vp8_six_tap_mmx):
|
||||
times 8 dw 0
|
||||
times 8 dw 0
|
||||
@@ -791,7 +791,7 @@
|
||||
|
||||
|
||||
align 16
|
||||
-global sym(vp8_bilinear_filters_mmx)
|
||||
+global sym(vp8_bilinear_filters_mmx) HIDDEN_DATA
|
||||
sym(vp8_bilinear_filters_mmx):
|
||||
times 8 dw 128
|
||||
times 8 dw 0
|
|
@ -75,7 +75,6 @@ commonFiles=(
|
|||
vp8/common/reconinter.c
|
||||
vp8/common/reconintra4x4.c
|
||||
vp8/common/reconintra.c
|
||||
vp8/common/segmentation_common.c
|
||||
vp8/common/setupintrarecon.c
|
||||
vp8/common/swapyv12buffer.c
|
||||
vp8/common/textblit.c
|
||||
|
@ -90,8 +89,11 @@ commonFiles=(
|
|||
vp8/decoder/dequantize.c
|
||||
vp8/decoder/detokenize.c
|
||||
vp8/decoder/generic/dsystemdependent.c
|
||||
vp8/decoder/idct_blk.c
|
||||
vp8/decoder/onyxd_if.c
|
||||
vp8/decoder/threading.c
|
||||
vp8/decoder/x86/idct_blk_mmx.c
|
||||
vp8/decoder/x86/idct_blk_sse2.c
|
||||
vp8/decoder/x86/x86_dsystemdependent.c
|
||||
vp8/vp8_dx_iface.c
|
||||
vpx/src/vpx_codec.c
|
||||
|
@ -183,6 +185,7 @@ commonFiles=(
|
|||
vpx_scale/yv12config.h
|
||||
vpx_scale/yv12extend.h
|
||||
vp8/common/x86/idctllm_mmx.asm
|
||||
vp8/common/x86/idctllm_sse2.asm
|
||||
vp8/common/x86/iwalsh_mmx.asm
|
||||
vp8/common/x86/iwalsh_sse2.asm
|
||||
vp8/common/x86/loopfilter_mmx.asm
|
||||
|
@ -193,6 +196,7 @@ commonFiles=(
|
|||
vp8/common/x86/recon_sse2.asm
|
||||
vp8/common/x86/subpixel_mmx.asm
|
||||
vp8/common/x86/subpixel_sse2.asm
|
||||
vp8/common/x86/subpixel_ssse3.asm
|
||||
vp8/decoder/x86/dequantize_mmx.asm
|
||||
vpx_ports/emms.asm
|
||||
vpx_ports/x86_abi_support.asm
|
||||
|
@ -241,10 +245,6 @@ done
|
|||
|
||||
# Patch to reduce compiler warnings, so we can compile with -Werror in mozilla.
|
||||
patch -p3 < reduce-warnings-1.patch
|
||||
patch -p3 < splitmv-bounds.patch
|
||||
patch -p3 < subpixel-qword.patch
|
||||
# Patch to make asm globals symbol hidden so linking succeeds on x86-64.
|
||||
patch -p3 < subpixel-hidden.patch
|
||||
patch -p3 < emptyif_warning.patch
|
||||
# Patch to compile with Sun Studio on Solaris
|
||||
patch -p3 < solaris.patch
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -24,43 +24,36 @@ extern void vp8_init_scan_order_mask();
|
|||
void vp8_update_mode_info_border(MODE_INFO *mi, int rows, int cols)
|
||||
{
|
||||
int i;
|
||||
vpx_memset(mi - cols - 1, 0, sizeof(MODE_INFO) * cols + 1);
|
||||
vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1));
|
||||
|
||||
for (i = 0; i < rows; i++)
|
||||
{
|
||||
vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO));
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
|
||||
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->new_frame);
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->last_frame);
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->golden_frame);
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->alt_ref_frame);
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
|
||||
|
||||
vpx_free(oci->above_context[Y1CONTEXT]);
|
||||
vpx_free(oci->above_context[UCONTEXT]);
|
||||
vpx_free(oci->above_context[VCONTEXT]);
|
||||
vpx_free(oci->above_context[Y2CONTEXT]);
|
||||
vpx_free(oci->above_context);
|
||||
vpx_free(oci->mip);
|
||||
|
||||
oci->above_context[Y1CONTEXT] = 0;
|
||||
oci->above_context[UCONTEXT] = 0;
|
||||
oci->above_context[VCONTEXT] = 0;
|
||||
oci->above_context[Y2CONTEXT] = 0;
|
||||
oci->above_context = 0;
|
||||
oci->mip = 0;
|
||||
|
||||
// Structure used to minitor GF useage
|
||||
if (oci->gf_active_flags != 0)
|
||||
vpx_free(oci->gf_active_flags);
|
||||
|
||||
oci->gf_active_flags = 0;
|
||||
}
|
||||
|
||||
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
{
|
||||
int i;
|
||||
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
|
||||
// our internal buffers are always multiples of 16
|
||||
|
@ -71,37 +64,33 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
|||
height += 16 - (height & 0xf);
|
||||
|
||||
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
{
|
||||
oci->fb_idx_ref_cnt[0] = 0;
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
}
|
||||
}
|
||||
|
||||
oci->new_fb_idx = 0;
|
||||
oci->lst_fb_idx = 1;
|
||||
oci->gld_fb_idx = 2;
|
||||
oci->alt_fb_idx = 3;
|
||||
|
||||
oci->fb_idx_ref_cnt[0] = 1;
|
||||
oci->fb_idx_ref_cnt[1] = 1;
|
||||
oci->fb_idx_ref_cnt[2] = 1;
|
||||
oci->fb_idx_ref_cnt[3] = 1;
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
}
|
||||
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->new_frame, width, height, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
}
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->last_frame, width, height, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
}
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->golden_frame, width, height, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
}
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->alt_ref_frame, width, height, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
}
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
|
@ -123,33 +112,9 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
|||
oci->mi = oci->mip + oci->mode_info_stride + 1;
|
||||
|
||||
|
||||
oci->above_context[Y1CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 4 , 1);
|
||||
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
|
||||
|
||||
if (!oci->above_context[Y1CONTEXT])
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
}
|
||||
|
||||
oci->above_context[UCONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
|
||||
|
||||
if (!oci->above_context[UCONTEXT])
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
}
|
||||
|
||||
oci->above_context[VCONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
|
||||
|
||||
if (!oci->above_context[VCONTEXT])
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
}
|
||||
|
||||
oci->above_context[Y2CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols , 1);
|
||||
|
||||
if (!oci->above_context[Y2CONTEXT])
|
||||
if (!oci->above_context)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
|
@ -157,20 +122,6 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
|||
|
||||
vp8_update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
|
||||
|
||||
// Structures used to minitor GF usage
|
||||
if (oci->gf_active_flags != 0)
|
||||
vpx_free(oci->gf_active_flags);
|
||||
|
||||
oci->gf_active_flags = (unsigned char *)vpx_calloc(oci->mb_rows * oci->mb_cols, 1);
|
||||
|
||||
if (!oci->gf_active_flags)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
}
|
||||
|
||||
oci->gf_active_count = oci->mb_rows * oci->mb_cols;
|
||||
|
||||
return 0;
|
||||
}
|
||||
void vp8_setup_version(VP8_COMMON *cm)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -12,13 +12,13 @@
|
|||
#include "blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count)
|
||||
{
|
||||
vpx_memcpy(t->l, l, sizeof(ENTROPY_CONTEXT) * count);
|
||||
vpx_memcpy(t->a, a, sizeof(ENTROPY_CONTEXT) * count);
|
||||
}
|
||||
|
||||
const int vp8_block2left[25] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0};
|
||||
const int vp8_block2above[25] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0};
|
||||
const int vp8_block2type[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1};
|
||||
const int vp8_block2context[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3};
|
||||
|
||||
const unsigned char vp8_block2left[25] =
|
||||
{
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
|
||||
};
|
||||
const unsigned char vp8_block2above[25] =
|
||||
{
|
||||
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
|
||||
};
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -49,19 +49,19 @@ typedef struct
|
|||
} POS;
|
||||
|
||||
|
||||
typedef int ENTROPY_CONTEXT;
|
||||
|
||||
typedef char ENTROPY_CONTEXT;
|
||||
typedef struct
|
||||
{
|
||||
ENTROPY_CONTEXT l[4];
|
||||
ENTROPY_CONTEXT a[4];
|
||||
} TEMP_CONTEXT;
|
||||
ENTROPY_CONTEXT y1[4];
|
||||
ENTROPY_CONTEXT u[2];
|
||||
ENTROPY_CONTEXT v[2];
|
||||
ENTROPY_CONTEXT y2;
|
||||
} ENTROPY_CONTEXT_PLANES;
|
||||
|
||||
extern void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count);
|
||||
extern const int vp8_block2left[25];
|
||||
extern const int vp8_block2above[25];
|
||||
extern const int vp8_block2type[25];
|
||||
extern const int vp8_block2context[25];
|
||||
|
||||
extern const unsigned char vp8_block2left[25];
|
||||
extern const unsigned char vp8_block2above[25];
|
||||
|
||||
#define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
|
||||
Dest = ((A)!=0) + ((B)!=0);
|
||||
|
@ -174,9 +174,8 @@ typedef struct
|
|||
int dc_diff;
|
||||
unsigned char segment_id; // Which set of segmentation parameters should be used for this MB
|
||||
int force_no_skip;
|
||||
|
||||
int need_to_clamp_mvs;
|
||||
B_MODE_INFO partition_bmi[16];
|
||||
|
||||
} MB_MODE_INFO;
|
||||
|
||||
|
||||
|
@ -216,9 +215,10 @@ typedef struct
|
|||
{
|
||||
DECLARE_ALIGNED(16, short, diff[400]); // from idct diff
|
||||
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
|
||||
DECLARE_ALIGNED(16, short, reference[384]);
|
||||
//not used DECLARE_ALIGNED(16, short, reference[384]);
|
||||
DECLARE_ALIGNED(16, short, qcoeff[400]);
|
||||
DECLARE_ALIGNED(16, short, dqcoeff[400]);
|
||||
DECLARE_ALIGNED(16, char, eobs[25]);
|
||||
|
||||
// 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries.
|
||||
BLOCKD block[25];
|
||||
|
@ -233,14 +233,12 @@ typedef struct
|
|||
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
MB_MODE_INFO mbmi;
|
||||
|
||||
int up_available;
|
||||
int left_available;
|
||||
|
||||
// Y,U,V,Y2
|
||||
ENTROPY_CONTEXT *above_context[4]; // row of context for each plane
|
||||
ENTROPY_CONTEXT(*left_context)[4]; // (up to) 4 contexts ""
|
||||
ENTROPY_CONTEXT_PLANES *above_context;
|
||||
ENTROPY_CONTEXT_PLANES *left_context;
|
||||
|
||||
// 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active.
|
||||
unsigned char segmentation_enabled;
|
||||
|
@ -276,9 +274,6 @@ typedef struct
|
|||
int mb_to_top_edge;
|
||||
int mb_to_bottom_edge;
|
||||
|
||||
//char * gf_active_ptr;
|
||||
signed char *gf_active_ptr;
|
||||
|
||||
unsigned int frames_since_golden;
|
||||
unsigned int frames_till_alt_ref_frame;
|
||||
vp8_subpix_fn_t subpixel_predict;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -264,8 +264,10 @@ void vp8_entropy_mode_init()
|
|||
vp8_tokens_from_tree(vp8_uv_mode_encodings, vp8_uv_mode_tree);
|
||||
vp8_tokens_from_tree(vp8_mbsplit_encodings, vp8_mbsplit_tree);
|
||||
|
||||
vp8_tokens_from_tree(VP8_MVREFENCODINGS, vp8_mv_ref_tree);
|
||||
vp8_tokens_from_tree(VP8_SUBMVREFENCODINGS, vp8_sub_mv_ref_tree);
|
||||
vp8_tokens_from_tree_offset(vp8_mv_ref_encoding_array,
|
||||
vp8_mv_ref_tree, NEARESTMV);
|
||||
vp8_tokens_from_tree_offset(vp8_sub_mv_ref_encoding_array,
|
||||
vp8_sub_mv_ref_tree, LEFT4X4);
|
||||
|
||||
vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree);
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -54,10 +54,6 @@ extern struct vp8_token_struct vp8_mbsplit_encodings [VP8_NUMMBSPLITS];
|
|||
extern struct vp8_token_struct vp8_mv_ref_encoding_array [VP8_MVREFS];
|
||||
extern struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS];
|
||||
|
||||
#define VP8_MVREFENCODINGS (vp8_mv_ref_encoding_array - NEARESTMV)
|
||||
#define VP8_SUBMVREFENCODINGS (vp8_sub_mv_ref_encoding_array - LEFT4X4)
|
||||
|
||||
|
||||
extern const vp8_tree_index vp8_small_mvtree[];
|
||||
|
||||
extern struct vp8_token_struct vp8_small_mvencodings [8];
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -39,7 +39,10 @@ static void extend_plane_borders
|
|||
|
||||
for (i = 0; i < h - 0 + 1; i++)
|
||||
{
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], el);
|
||||
// Some linkers will complain if we call vpx_memset with el set to a
|
||||
// constant 0.
|
||||
if (el)
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], el);
|
||||
vpx_memset(dest_ptr2, src_ptr2[0], er);
|
||||
src_ptr1 += sp;
|
||||
src_ptr2 += sp;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -32,7 +32,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
|||
|
||||
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_c;
|
||||
rtcd->idct.idct16 = vp8_short_idct4x4llm_c;
|
||||
rtcd->idct.idct1_scalar = vp8_dc_only_idct_c;
|
||||
rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c;
|
||||
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c;
|
||||
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c;
|
||||
|
||||
|
@ -61,7 +61,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
|||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
|
||||
|
||||
#if CONFIG_POSTPROC || CONFIG_VP8_ENCODER
|
||||
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
|
||||
rtcd->postproc.down = vp8_mbpost_proc_down_c;
|
||||
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
|
||||
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -18,8 +18,10 @@
|
|||
#define prototype_idct(sym) \
|
||||
void sym(short *input, short *output, int pitch)
|
||||
|
||||
#define prototype_idct_scalar(sym) \
|
||||
void sym(short input, short *output, int pitch)
|
||||
#define prototype_idct_scalar_add(sym) \
|
||||
void sym(short input, \
|
||||
unsigned char *pred, unsigned char *output, \
|
||||
int pitch, int stride)
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#include "x86/idct_x86.h"
|
||||
|
@ -39,10 +41,10 @@ extern prototype_idct(vp8_idct_idct1);
|
|||
#endif
|
||||
extern prototype_idct(vp8_idct_idct16);
|
||||
|
||||
#ifndef vp8_idct_idct1_scalar
|
||||
#define vp8_idct_idct1_scalar vp8_dc_only_idct_c
|
||||
#ifndef vp8_idct_idct1_scalar_add
|
||||
#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_c
|
||||
#endif
|
||||
extern prototype_idct_scalar(vp8_idct_idct1_scalar);
|
||||
extern prototype_idct_scalar_add(vp8_idct_idct1_scalar_add);
|
||||
|
||||
|
||||
#ifndef vp8_idct_iwalsh1
|
||||
|
@ -56,14 +58,14 @@ extern prototype_second_order(vp8_idct_iwalsh1);
|
|||
extern prototype_second_order(vp8_idct_iwalsh16);
|
||||
|
||||
typedef prototype_idct((*vp8_idct_fn_t));
|
||||
typedef prototype_idct_scalar((*vp8_idct_scalar_fn_t));
|
||||
typedef prototype_idct_scalar_add((*vp8_idct_scalar_add_fn_t));
|
||||
typedef prototype_second_order((*vp8_second_order_fn_t));
|
||||
|
||||
typedef struct
|
||||
{
|
||||
vp8_idct_fn_t idct1;
|
||||
vp8_idct_fn_t idct16;
|
||||
vp8_idct_scalar_fn_t idct1_scalar;
|
||||
vp8_idct_fn_t idct1;
|
||||
vp8_idct_fn_t idct16;
|
||||
vp8_idct_scalar_add_fn_t idct1_scalar_add;
|
||||
|
||||
vp8_second_order_fn_t iwalsh1;
|
||||
vp8_second_order_fn_t iwalsh16;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -104,23 +104,30 @@ void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_dc_only_idct_c(short input_dc, short *output, int pitch)
|
||||
void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
|
||||
{
|
||||
int i;
|
||||
int a1;
|
||||
short *op = output;
|
||||
int shortpitch = pitch >> 1;
|
||||
a1 = ((input_dc + 4) >> 3);
|
||||
int a1 = ((input_dc + 4) >> 3);
|
||||
int r, c;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
op[0] = a1;
|
||||
op[1] = a1;
|
||||
op[2] = a1;
|
||||
op[3] = a1;
|
||||
op += shortpitch;
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
int a = a1 + pred_ptr[c] ;
|
||||
|
||||
if (a < 0)
|
||||
a = 0;
|
||||
|
||||
if (a > 255)
|
||||
a = 255;
|
||||
|
||||
dst_ptr[c] = (unsigned char) a ;
|
||||
}
|
||||
|
||||
dst_ptr += stride;
|
||||
pred_ptr += pitch;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_short_inv_walsh4x4_c(short *input, short *output)
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -65,7 +65,8 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x
|
|||
{
|
||||
int i;
|
||||
|
||||
if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
|
||||
if (x->mode_info_context->mbmi.mode != B_PRED &&
|
||||
x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
// do 2nd order transform on the dc block
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -117,5 +117,14 @@ typedef struct
|
|||
#define LF_INVOKE(ctx,fn) vp8_lf_##fn
|
||||
#endif
|
||||
|
||||
typedef void loop_filter_uvfunction
|
||||
(
|
||||
unsigned char *u, // source pointer
|
||||
int p, // pitch
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
unsigned char *v
|
||||
);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -21,7 +21,7 @@
|
|||
|
||||
typedef unsigned char uc;
|
||||
|
||||
__inline signed char vp8_signed_char_clamp(int t)
|
||||
static __inline signed char vp8_signed_char_clamp(int t)
|
||||
{
|
||||
t = (t < -128 ? -128 : t);
|
||||
t = (t > 127 ? 127 : t);
|
||||
|
@ -30,7 +30,7 @@ __inline signed char vp8_signed_char_clamp(int t)
|
|||
|
||||
|
||||
// should we apply any filter at all ( 11111111 yes, 00000000 no)
|
||||
__inline signed char vp8_filter_mask(signed char limit, signed char flimit,
|
||||
static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
|
||||
uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
|
||||
{
|
||||
signed char mask = 0;
|
||||
|
@ -50,7 +50,7 @@ __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
|
|||
}
|
||||
|
||||
// is there high variance internal edge ( 11111111 yes, 00000000 no)
|
||||
__inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
|
||||
static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
signed char hev = 0;
|
||||
hev |= (abs(p1 - p0) > thresh) * -1;
|
||||
|
@ -58,7 +58,7 @@ __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
|
|||
return hev;
|
||||
}
|
||||
|
||||
__inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
|
||||
static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
|
||||
|
||||
{
|
||||
signed char ps0, qs0;
|
||||
|
@ -164,7 +164,7 @@ void vp8_loop_filter_vertical_edge_c
|
|||
while (++i < count * 8);
|
||||
}
|
||||
|
||||
__inline void vp8_mbfilter(signed char mask, signed char hev,
|
||||
static __inline void vp8_mbfilter(signed char mask, signed char hev,
|
||||
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
|
||||
{
|
||||
signed char s, u;
|
||||
|
@ -284,7 +284,7 @@ void vp8_mbloop_filter_vertical_edge_c
|
|||
}
|
||||
|
||||
// should we apply any filter at all ( 11111111 yes, 00000000 no)
|
||||
__inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
|
||||
static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
// Why does this cause problems for win32?
|
||||
// error C2143: syntax error : missing ';' before 'type'
|
||||
|
@ -297,7 +297,7 @@ __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimi
|
|||
return mask;
|
||||
}
|
||||
|
||||
__inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
|
||||
static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
|
||||
{
|
||||
signed char vp8_filter, Filter1, Filter2;
|
||||
signed char p1 = (signed char) * op1 ^ 0x80;
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -33,6 +33,7 @@ void vp8_initialize_common(void);
|
|||
#define MAXQ 127
|
||||
#define QINDEX_RANGE (MAXQ + 1)
|
||||
|
||||
#define NUM_YV12_BUFFERS 4
|
||||
|
||||
typedef struct frame_contexts
|
||||
{
|
||||
|
@ -94,15 +95,16 @@ typedef struct VP8Common
|
|||
YUV_TYPE clr_type;
|
||||
CLAMP_TYPE clamp_type;
|
||||
|
||||
YV12_BUFFER_CONFIG last_frame;
|
||||
YV12_BUFFER_CONFIG golden_frame;
|
||||
YV12_BUFFER_CONFIG alt_ref_frame;
|
||||
YV12_BUFFER_CONFIG new_frame;
|
||||
YV12_BUFFER_CONFIG *frame_to_show;
|
||||
|
||||
YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
|
||||
int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
|
||||
int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
|
||||
|
||||
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG temp_scale_frame;
|
||||
|
||||
FRAME_TYPE last_frame_type; //Add to check if vp8_frame_init_loop_filter() can be skiped.
|
||||
FRAME_TYPE last_frame_type; //Add to check if vp8_frame_init_loop_filter() can be skipped.
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int show_frame;
|
||||
|
@ -131,8 +133,6 @@ typedef struct VP8Common
|
|||
|
||||
unsigned int frames_since_golden;
|
||||
unsigned int frames_till_alt_ref_frame;
|
||||
unsigned char *gf_active_flags; // Record of which MBs still refer to last golden frame either directly or through 0,0
|
||||
int gf_active_count;
|
||||
|
||||
/* We allocate a MODE_INFO struct for each macroblock, together with
|
||||
an extra row on top and column on the left to simplify prediction. */
|
||||
|
@ -165,8 +165,8 @@ typedef struct VP8Common
|
|||
int ref_frame_sign_bias[MAX_REF_FRAMES]; // Two state 0, 1
|
||||
|
||||
// Y,U,V,Y2
|
||||
ENTROPY_CONTEXT *above_context[4]; // row of context for each plane
|
||||
ENTROPY_CONTEXT left_context[4][4]; // (up to) 4 contexts ""
|
||||
ENTROPY_CONTEXT_PLANES *above_context; // row of context for each plane
|
||||
ENTROPY_CONTEXT_PLANES left_context; // (up to) 4 contexts ""
|
||||
|
||||
|
||||
// keyframe block modes are predicted by their above, left neighbors
|
||||
|
@ -201,6 +201,7 @@ typedef struct VP8Common
|
|||
|
||||
void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level);
|
||||
void vp8_init_loop_filter(VP8_COMMON *cm);
|
||||
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type);
|
||||
extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -330,13 +330,6 @@ void vp8_de_noise(YV12_BUFFER_CONFIG *source,
|
|||
|
||||
}
|
||||
|
||||
|
||||
//Notes: It is better to change CHAR to unsigned or signed to
|
||||
//avoid error on ARM platform.
|
||||
char vp8_an[8][64][3072];
|
||||
int vp8_cd[8][64];
|
||||
|
||||
|
||||
double vp8_gaussian(double sigma, double mu, double x)
|
||||
{
|
||||
return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -210,7 +210,8 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
|
|||
{
|
||||
int i;
|
||||
|
||||
if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
|
||||
if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
|
||||
x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
unsigned char *uptr, *vptr;
|
||||
unsigned char *upred_ptr = &x->predictor[256];
|
||||
|
@ -254,16 +255,18 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
//encoder only
|
||||
void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
|
||||
{
|
||||
if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
|
||||
|
||||
if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
|
||||
x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
unsigned char *pred_ptr = x->predictor;
|
||||
int mv_row = x->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mbmi.mv.as_mv.col;
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int pre_stride = x->block[0].pre_stride;
|
||||
|
||||
ptr_base = x->pre.y_buffer;
|
||||
|
@ -282,7 +285,7 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
|
|||
{
|
||||
int i;
|
||||
|
||||
if (x->mbmi.partitioning < 3)
|
||||
if (x->mode_info_context->mbmi.partitioning < 3)
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
|
@ -313,7 +316,9 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
|
|||
|
||||
void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
|
||||
{
|
||||
if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
|
||||
|
||||
if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
|
||||
x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
int offset;
|
||||
unsigned char *ptr_base;
|
||||
|
@ -323,8 +328,8 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
|
|||
unsigned char *upred_ptr = &x->predictor[256];
|
||||
unsigned char *vpred_ptr = &x->predictor[320];
|
||||
|
||||
int mv_row = x->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mbmi.mv.as_mv.col;
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int pre_stride = x->block[0].pre_stride;
|
||||
|
||||
ptr_base = x->pre.y_buffer;
|
||||
|
@ -361,7 +366,7 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
|
|||
{
|
||||
int i;
|
||||
|
||||
if (x->mbmi.partitioning < 3)
|
||||
if (x->mode_info_context->mbmi.partitioning < 3)
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
|
@ -410,7 +415,7 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
|
|||
{
|
||||
int i, j;
|
||||
|
||||
if (x->mbmi.mode == SPLITMV)
|
||||
if (x->mode_info_context->mbmi.mode == SPLITMV)
|
||||
{
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
|
@ -455,8 +460,8 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
|
|||
}
|
||||
else
|
||||
{
|
||||
int mvrow = x->mbmi.mv.as_mv.row;
|
||||
int mvcol = x->mbmi.mv.as_mv.col;
|
||||
int mvrow = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mvcol = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
|
||||
if (mvrow < 0)
|
||||
mvrow -= 1;
|
||||
|
@ -535,7 +540,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
|
|||
unsigned char *pred_ptr = x->predictor;
|
||||
unsigned char *dst_ptr = x->dst.y_buffer;
|
||||
|
||||
if (x->mbmi.mode != SPLITMV)
|
||||
if (x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
int offset;
|
||||
unsigned char *ptr_base;
|
||||
|
@ -547,8 +552,8 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
|
|||
unsigned char *udst_ptr = x->dst.u_buffer;
|
||||
unsigned char *vdst_ptr = x->dst.v_buffer;
|
||||
|
||||
int mv_row = x->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mbmi.mv.as_mv.col;
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int pre_stride = x->dst.y_stride; //x->block[0].pre_stride;
|
||||
|
||||
ptr_base = x->pre.y_buffer;
|
||||
|
@ -587,7 +592,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
|
|||
//if sth is wrong, go back to what it is in build_inter_predictors_mb.
|
||||
int i;
|
||||
|
||||
if (x->mbmi.partitioning < 3)
|
||||
if (x->mode_info_context->mbmi.partitioning < 3)
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -43,7 +43,7 @@ void vp8_build_intra_predictors_mby(MACROBLOCKD *x)
|
|||
}
|
||||
|
||||
// for Y
|
||||
switch (x->mbmi.mode)
|
||||
switch (x->mode_info_context->mbmi.mode)
|
||||
{
|
||||
case DC_PRED:
|
||||
{
|
||||
|
@ -164,7 +164,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
|
|||
}
|
||||
|
||||
// for Y
|
||||
switch (x->mbmi.mode)
|
||||
switch (x->mode_info_context->mbmi.mode)
|
||||
{
|
||||
case DC_PRED:
|
||||
{
|
||||
|
@ -290,7 +290,7 @@ void vp8_build_intra_predictors_mbuv(MACROBLOCKD *x)
|
|||
vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
|
||||
}
|
||||
|
||||
switch (x->mbmi.uv_mode)
|
||||
switch (x->mode_info_context->mbmi.uv_mode)
|
||||
{
|
||||
case DC_PRED:
|
||||
{
|
||||
|
@ -430,7 +430,7 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
|
|||
vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
|
||||
}
|
||||
|
||||
switch (x->mbmi.uv_mode)
|
||||
switch (x->mode_info_context->mbmi.uv_mode)
|
||||
{
|
||||
case DC_PRED:
|
||||
{
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,64 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "segmentation_common.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
void vp8_update_gf_useage_maps(VP8_COMMON *cm, MACROBLOCKD *xd)
|
||||
{
|
||||
int mb_row, mb_col;
|
||||
|
||||
MODE_INFO *this_mb_mode_info = cm->mi;
|
||||
|
||||
xd->gf_active_ptr = (signed char *)cm->gf_active_flags;
|
||||
|
||||
if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame))
|
||||
{
|
||||
// Reset Gf useage monitors
|
||||
vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
|
||||
cm->gf_active_count = cm->mb_rows * cm->mb_cols;
|
||||
}
|
||||
else
|
||||
{
|
||||
// for each macroblock row in image
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
|
||||
{
|
||||
// for each macroblock col in image
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
|
||||
// If using golden then set GF active flag if not already set.
|
||||
// If using last frame 0,0 mode then leave flag as it is
|
||||
// else if using non 0,0 motion or intra modes then clear flag if it is currently set
|
||||
if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME))
|
||||
{
|
||||
if (*(xd->gf_active_ptr) == 0)
|
||||
{
|
||||
*(xd->gf_active_ptr) = 1;
|
||||
cm->gf_active_count ++;
|
||||
}
|
||||
}
|
||||
else if ((this_mb_mode_info->mbmi.mode != ZEROMV) && *(xd->gf_active_ptr))
|
||||
{
|
||||
*(xd->gf_active_ptr) = 0;
|
||||
cm->gf_active_count--;
|
||||
}
|
||||
|
||||
xd->gf_active_ptr++; // Step onto next entry
|
||||
this_mb_mode_info++; // skip to next mb
|
||||
|
||||
}
|
||||
|
||||
// this is to account for the border
|
||||
this_mb_mode_info++;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -75,7 +75,8 @@
|
|||
#define thread_sleep(nms) // { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#define thread_sleep(nms) usleep(nms*1000);// {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
|
||||
#include <sched.h>
|
||||
#define thread_sleep(nms) sched_yield();// {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
|
||||
#endif
|
||||
/* Not Windows. Assume pthreads */
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -47,6 +47,12 @@ void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t)
|
|||
tree2tok(p, t, 0, 0, 0);
|
||||
}
|
||||
|
||||
void vp8_tokens_from_tree_offset(struct vp8_token_struct *p, vp8_tree t,
|
||||
int offset)
|
||||
{
|
||||
tree2tok(p - offset, t, 0, 0, 0);
|
||||
}
|
||||
|
||||
static void branch_counts(
|
||||
int n, /* n = size of alphabet */
|
||||
vp8_token tok [ /* n */ ],
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -54,6 +54,8 @@ typedef const struct vp8_token_struct
|
|||
/* Construct encoding array from tree. */
|
||||
|
||||
void vp8_tokens_from_tree(struct vp8_token_struct *, vp8_tree);
|
||||
void vp8_tokens_from_tree_offset(struct vp8_token_struct *, vp8_tree,
|
||||
int offset);
|
||||
|
||||
|
||||
/* Convert array of token occurrence counts into a table of probabilities
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -22,7 +22,7 @@
|
|||
#if HAVE_MMX
|
||||
extern prototype_idct(vp8_short_idct4x4llm_1_mmx);
|
||||
extern prototype_idct(vp8_short_idct4x4llm_mmx);
|
||||
extern prototype_idct_scalar(vp8_dc_only_idct_mmx);
|
||||
extern prototype_idct_scalar_add(vp8_dc_only_idct_add_mmx);
|
||||
|
||||
extern prototype_second_order(vp8_short_inv_walsh4x4_mmx);
|
||||
extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
|
||||
|
@ -34,8 +34,8 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
|
|||
#undef vp8_idct_idct16
|
||||
#define vp8_idct_idct16 vp8_short_idct4x4llm_mmx
|
||||
|
||||
#undef vp8_idct_idct1_scalar
|
||||
#define vp8_idct_idct1_scalar vp8_dc_only_idct_mmx
|
||||
#undef vp8_idct_idct1_scalar_add
|
||||
#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_mmx
|
||||
|
||||
#undef vp8_idct_iwalsh16
|
||||
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_mmx
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
;
|
||||
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
@ -220,35 +220,61 @@ sym(vp8_short_idct4x4llm_1_mmx):
|
|||
pop rbp
|
||||
ret
|
||||
|
||||
;void dc_only_idct_mmx(short input_dc, short *output, int pitch)
|
||||
global sym(vp8_dc_only_idct_mmx)
|
||||
sym(vp8_dc_only_idct_mmx):
|
||||
;void vp8_dc_only_idct_add_mmx(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
|
||||
global sym(vp8_dc_only_idct_add_mmx)
|
||||
sym(vp8_dc_only_idct_add_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movd mm0, arg(0) ;input_dc
|
||||
mov rsi, arg(1) ;s -- prediction
|
||||
mov rdi, arg(2) ;d -- destination
|
||||
movsxd rax, dword ptr arg(4) ;stride
|
||||
movsxd rdx, dword ptr arg(3) ;pitch
|
||||
pxor mm0, mm0
|
||||
|
||||
paddw mm0, [fours GLOBAL]
|
||||
mov rdx, arg(1) ;output
|
||||
movd mm5, arg(0) ;input_dc
|
||||
|
||||
psraw mm0, 3
|
||||
movsxd rax, dword ptr arg(2) ;pitch
|
||||
paddw mm5, [fours GLOBAL]
|
||||
|
||||
punpcklwd mm0, mm0
|
||||
punpckldq mm0, mm0
|
||||
psraw mm5, 3
|
||||
|
||||
movq [rdx], mm0
|
||||
movq [rdx+rax], mm0
|
||||
punpcklwd mm5, mm5
|
||||
punpckldq mm5, mm5
|
||||
|
||||
movq [rdx+rax*2], mm0
|
||||
add rdx, rax
|
||||
movd mm1, [rsi]
|
||||
punpcklbw mm1, mm0
|
||||
paddsw mm1, mm5
|
||||
packuswb mm1, mm0 ; pack and unpack to saturate
|
||||
movd [rdi], mm1
|
||||
|
||||
movq [rdx+rax*2], mm0
|
||||
movd mm2, [rsi+rdx]
|
||||
punpcklbw mm2, mm0
|
||||
paddsw mm2, mm5
|
||||
packuswb mm2, mm0 ; pack and unpack to saturate
|
||||
movd [rdi+rax], mm2
|
||||
|
||||
movd mm3, [rsi+2*rdx]
|
||||
punpcklbw mm3, mm0
|
||||
paddsw mm3, mm5
|
||||
packuswb mm3, mm0 ; pack and unpack to saturate
|
||||
movd [rdi+2*rax], mm3
|
||||
|
||||
add rdi, rax
|
||||
add rsi, rdx
|
||||
movd mm4, [rsi+2*rdx]
|
||||
punpcklbw mm4, mm0
|
||||
paddsw mm4, mm5
|
||||
packuswb mm4, mm0 ; pack and unpack to saturate
|
||||
movd [rdi+2*rax], mm4
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
|
|
|
@ -0,0 +1,708 @@
|
|||
;
|
||||
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void idct_dequant_0_2x_sse2
|
||||
; (
|
||||
; short *qcoeff - 0
|
||||
; short *dequant - 1
|
||||
; unsigned char *pre - 2
|
||||
; unsigned char *dst - 3
|
||||
; int dst_stride - 4
|
||||
; int blk_stride - 5
|
||||
; )
|
||||
|
||||
global sym(idct_dequant_0_2x_sse2)
|
||||
sym(idct_dequant_0_2x_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
; end prolog
|
||||
|
||||
mov rdx, arg(1) ; dequant
|
||||
mov rax, arg(0) ; qcoeff
|
||||
|
||||
; Zero out xmm7, for use unpacking
|
||||
pxor xmm7, xmm7
|
||||
|
||||
movd xmm4, [rax]
|
||||
movd xmm5, [rdx]
|
||||
|
||||
pinsrw xmm4, [rax+32], 4
|
||||
pinsrw xmm5, [rdx], 4
|
||||
|
||||
pmullw xmm4, xmm5
|
||||
|
||||
; clear coeffs
|
||||
movd [rax], xmm7
|
||||
movd [rax+32], xmm7
|
||||
;pshufb
|
||||
pshuflw xmm4, xmm4, 00000000b
|
||||
pshufhw xmm4, xmm4, 00000000b
|
||||
|
||||
mov rax, arg(2) ; pre
|
||||
paddw xmm4, [fours GLOBAL]
|
||||
|
||||
movsxd rcx, dword ptr arg(5) ; blk_stride
|
||||
psraw xmm4, 3
|
||||
|
||||
movq xmm0, [rax]
|
||||
movq xmm1, [rax+rcx]
|
||||
movq xmm2, [rax+2*rcx]
|
||||
lea rcx, [3*rcx]
|
||||
movq xmm3, [rax+rcx]
|
||||
|
||||
punpcklbw xmm0, xmm7
|
||||
punpcklbw xmm1, xmm7
|
||||
punpcklbw xmm2, xmm7
|
||||
punpcklbw xmm3, xmm7
|
||||
|
||||
mov rax, arg(3) ; dst
|
||||
movsxd rdx, dword ptr arg(4) ; dst_stride
|
||||
|
||||
; Add to predict buffer
|
||||
paddw xmm0, xmm4
|
||||
paddw xmm1, xmm4
|
||||
paddw xmm2, xmm4
|
||||
paddw xmm3, xmm4
|
||||
|
||||
; pack up before storing
|
||||
packuswb xmm0, xmm7
|
||||
packuswb xmm1, xmm7
|
||||
packuswb xmm2, xmm7
|
||||
packuswb xmm3, xmm7
|
||||
|
||||
; store blocks back out
|
||||
movq [rax], xmm0
|
||||
movq [rax + rdx], xmm1
|
||||
|
||||
lea rax, [rax + 2*rdx]
|
||||
|
||||
movq [rax], xmm2
|
||||
movq [rax + rdx], xmm3
|
||||
|
||||
; begin epilog
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
global sym(idct_dequant_full_2x_sse2)
|
||||
sym(idct_dequant_full_2x_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
; special case when 2 blocks have 0 or 1 coeffs
|
||||
; dc is set as first coeff, so no need to load qcoeff
|
||||
mov rax, arg(0) ; qcoeff
|
||||
mov rsi, arg(2) ; pre
|
||||
mov rdi, arg(3) ; dst
|
||||
movsxd rcx, dword ptr arg(5) ; blk_stride
|
||||
|
||||
; Zero out xmm7, for use unpacking
|
||||
pxor xmm7, xmm7
|
||||
|
||||
mov rdx, arg(1) ; dequant
|
||||
|
||||
; note the transpose of xmm1 and xmm2, necessary for shuffle
|
||||
; to spit out sensicle data
|
||||
movdqa xmm0, [rax]
|
||||
movdqa xmm2, [rax+16]
|
||||
movdqa xmm1, [rax+32]
|
||||
movdqa xmm3, [rax+48]
|
||||
|
||||
; Clear out coeffs
|
||||
movdqa [rax], xmm7
|
||||
movdqa [rax+16], xmm7
|
||||
movdqa [rax+32], xmm7
|
||||
movdqa [rax+48], xmm7
|
||||
|
||||
; dequantize qcoeff buffer
|
||||
pmullw xmm0, [rdx]
|
||||
pmullw xmm2, [rdx+16]
|
||||
pmullw xmm1, [rdx]
|
||||
pmullw xmm3, [rdx+16]
|
||||
|
||||
; repack so block 0 row x and block 1 row x are together
|
||||
movdqa xmm4, xmm0
|
||||
punpckldq xmm0, xmm1
|
||||
punpckhdq xmm4, xmm1
|
||||
|
||||
pshufd xmm0, xmm0, 11011000b
|
||||
pshufd xmm1, xmm4, 11011000b
|
||||
|
||||
movdqa xmm4, xmm2
|
||||
punpckldq xmm2, xmm3
|
||||
punpckhdq xmm4, xmm3
|
||||
|
||||
pshufd xmm2, xmm2, 11011000b
|
||||
pshufd xmm3, xmm4, 11011000b
|
||||
|
||||
; first pass
|
||||
psubw xmm0, xmm2 ; b1 = 0-2
|
||||
paddw xmm2, xmm2 ;
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
paddw xmm2, xmm0 ; a1 = 0+2
|
||||
|
||||
pmulhw xmm5, [x_s1sqr2 GLOBAL]
|
||||
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
|
||||
|
||||
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw xmm7, xmm5 ; c1
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
movdqa xmm4, xmm3
|
||||
|
||||
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
|
||||
paddw xmm5, xmm1
|
||||
|
||||
pmulhw xmm3, [x_s1sqr2 GLOBAL]
|
||||
paddw xmm3, xmm4
|
||||
|
||||
paddw xmm3, xmm5 ; d1
|
||||
movdqa xmm6, xmm2 ; a1
|
||||
|
||||
movdqa xmm4, xmm0 ; b1
|
||||
paddw xmm2, xmm3 ;0
|
||||
|
||||
paddw xmm4, xmm7 ;1
|
||||
psubw xmm0, xmm7 ;2
|
||||
|
||||
psubw xmm6, xmm3 ;3
|
||||
|
||||
; transpose for the second pass
|
||||
movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
|
||||
punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
|
||||
punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
|
||||
|
||||
movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
|
||||
punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
|
||||
punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
|
||||
|
||||
|
||||
movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
|
||||
punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
|
||||
punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
|
||||
|
||||
movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
|
||||
punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
|
||||
punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
|
||||
|
||||
|
||||
movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
|
||||
punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
|
||||
punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
|
||||
|
||||
movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
|
||||
punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
|
||||
punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
|
||||
|
||||
pshufd xmm0, xmm2, 11011000b
|
||||
pshufd xmm2, xmm1, 11011000b
|
||||
|
||||
pshufd xmm1, xmm5, 11011000b
|
||||
pshufd xmm3, xmm7, 11011000b
|
||||
|
||||
; second pass
|
||||
psubw xmm0, xmm2 ; b1 = 0-2
|
||||
paddw xmm2, xmm2
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
paddw xmm2, xmm0 ; a1 = 0+2
|
||||
|
||||
pmulhw xmm5, [x_s1sqr2 GLOBAL]
|
||||
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
|
||||
|
||||
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw xmm7, xmm5 ; c1
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
movdqa xmm4, xmm3
|
||||
|
||||
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
|
||||
paddw xmm5, xmm1
|
||||
|
||||
pmulhw xmm3, [x_s1sqr2 GLOBAL]
|
||||
paddw xmm3, xmm4
|
||||
|
||||
paddw xmm3, xmm5 ; d1
|
||||
paddw xmm0, [fours GLOBAL]
|
||||
|
||||
paddw xmm2, [fours GLOBAL]
|
||||
movdqa xmm6, xmm2 ; a1
|
||||
|
||||
movdqa xmm4, xmm0 ; b1
|
||||
paddw xmm2, xmm3 ;0
|
||||
|
||||
paddw xmm4, xmm7 ;1
|
||||
psubw xmm0, xmm7 ;2
|
||||
|
||||
psubw xmm6, xmm3 ;3
|
||||
psraw xmm2, 3
|
||||
|
||||
psraw xmm0, 3
|
||||
psraw xmm4, 3
|
||||
|
||||
psraw xmm6, 3
|
||||
|
||||
; transpose to save
|
||||
movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
|
||||
punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
|
||||
punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
|
||||
|
||||
movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
|
||||
punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
|
||||
punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
|
||||
|
||||
|
||||
movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
|
||||
punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
|
||||
punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
|
||||
|
||||
movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
|
||||
punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
|
||||
punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
|
||||
|
||||
|
||||
movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
|
||||
punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
|
||||
punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
|
||||
|
||||
movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
|
||||
punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
|
||||
punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
|
||||
|
||||
pshufd xmm0, xmm2, 11011000b
|
||||
pshufd xmm2, xmm1, 11011000b
|
||||
|
||||
pshufd xmm1, xmm5, 11011000b
|
||||
pshufd xmm3, xmm7, 11011000b
|
||||
|
||||
pxor xmm7, xmm7
|
||||
|
||||
; Load up predict blocks
|
||||
movq xmm4, [rsi]
|
||||
movq xmm5, [rsi+rcx]
|
||||
|
||||
punpcklbw xmm4, xmm7
|
||||
punpcklbw xmm5, xmm7
|
||||
|
||||
paddw xmm0, xmm4
|
||||
paddw xmm1, xmm5
|
||||
|
||||
movq xmm4, [rsi+2*rcx]
|
||||
lea rcx, [3*rcx]
|
||||
movq xmm5, [rsi+rcx]
|
||||
|
||||
punpcklbw xmm4, xmm7
|
||||
punpcklbw xmm5, xmm7
|
||||
|
||||
paddw xmm2, xmm4
|
||||
paddw xmm3, xmm5
|
||||
|
||||
.finish:
|
||||
|
||||
; pack up before storing
|
||||
packuswb xmm0, xmm7
|
||||
packuswb xmm1, xmm7
|
||||
packuswb xmm2, xmm7
|
||||
packuswb xmm3, xmm7
|
||||
|
||||
; Load destination stride before writing out,
|
||||
; doesn't need to persist
|
||||
movsxd rdx, dword ptr arg(4) ; dst_stride
|
||||
|
||||
; store blocks back out
|
||||
movq [rdi], xmm0
|
||||
movq [rdi + rdx], xmm1
|
||||
|
||||
lea rdi, [rdi + 2*rdx]
|
||||
|
||||
movq [rdi], xmm2
|
||||
movq [rdi + rdx], xmm3
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void idct_dequant_dc_0_2x_sse2
|
||||
; (
|
||||
; short *qcoeff - 0
|
||||
; short *dequant - 1
|
||||
; unsigned char *pre - 2
|
||||
; unsigned char *dst - 3
|
||||
; int dst_stride - 4
|
||||
; short *dc - 5
|
||||
; )
|
||||
global sym(idct_dequant_dc_0_2x_sse2)
|
||||
sym(idct_dequant_dc_0_2x_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
; special case when 2 blocks have 0 or 1 coeffs
|
||||
; dc is set as first coeff, so no need to load qcoeff
|
||||
mov rax, arg(0) ; qcoeff
|
||||
mov rsi, arg(2) ; pre
|
||||
mov rdi, arg(3) ; dst
|
||||
mov rdx, arg(5) ; dc
|
||||
|
||||
; Zero out xmm7, for use unpacking
|
||||
pxor xmm7, xmm7
|
||||
|
||||
; load up 2 dc words here == 2*16 = doubleword
|
||||
movd xmm4, [rdx]
|
||||
|
||||
; Load up predict blocks
|
||||
movq xmm0, [rsi]
|
||||
movq xmm1, [rsi+16]
|
||||
movq xmm2, [rsi+32]
|
||||
movq xmm3, [rsi+48]
|
||||
|
||||
; Duplicate and expand dc across
|
||||
punpcklwd xmm4, xmm4
|
||||
punpckldq xmm4, xmm4
|
||||
|
||||
; Rounding to dequant and downshift
|
||||
paddw xmm4, [fours GLOBAL]
|
||||
psraw xmm4, 3
|
||||
|
||||
; Predict buffer needs to be expanded from bytes to words
|
||||
punpcklbw xmm0, xmm7
|
||||
punpcklbw xmm1, xmm7
|
||||
punpcklbw xmm2, xmm7
|
||||
punpcklbw xmm3, xmm7
|
||||
|
||||
; Add to predict buffer
|
||||
paddw xmm0, xmm4
|
||||
paddw xmm1, xmm4
|
||||
paddw xmm2, xmm4
|
||||
paddw xmm3, xmm4
|
||||
|
||||
; pack up before storing
|
||||
packuswb xmm0, xmm7
|
||||
packuswb xmm1, xmm7
|
||||
packuswb xmm2, xmm7
|
||||
packuswb xmm3, xmm7
|
||||
|
||||
; Load destination stride before writing out,
|
||||
; doesn't need to persist
|
||||
movsxd rdx, dword ptr arg(4) ; dst_stride
|
||||
|
||||
; store blocks back out
|
||||
movq [rdi], xmm0
|
||||
movq [rdi + rdx], xmm1
|
||||
|
||||
lea rdi, [rdi + 2*rdx]
|
||||
|
||||
movq [rdi], xmm2
|
||||
movq [rdi + rdx], xmm3
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
global sym(idct_dequant_dc_full_2x_sse2)
|
||||
sym(idct_dequant_dc_full_2x_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
; special case when 2 blocks have 0 or 1 coeffs
|
||||
; dc is set as first coeff, so no need to load qcoeff
|
||||
mov rax, arg(0) ; qcoeff
|
||||
mov rsi, arg(2) ; pre
|
||||
mov rdi, arg(3) ; dst
|
||||
|
||||
; Zero out xmm7, for use unpacking
|
||||
pxor xmm7, xmm7
|
||||
|
||||
mov rdx, arg(1) ; dequant
|
||||
|
||||
; note the transpose of xmm1 and xmm2, necessary for shuffle
|
||||
; to spit out sensicle data
|
||||
movdqa xmm0, [rax]
|
||||
movdqa xmm2, [rax+16]
|
||||
movdqa xmm1, [rax+32]
|
||||
movdqa xmm3, [rax+48]
|
||||
|
||||
; Clear out coeffs
|
||||
movdqa [rax], xmm7
|
||||
movdqa [rax+16], xmm7
|
||||
movdqa [rax+32], xmm7
|
||||
movdqa [rax+48], xmm7
|
||||
|
||||
; dequantize qcoeff buffer
|
||||
pmullw xmm0, [rdx]
|
||||
pmullw xmm2, [rdx+16]
|
||||
pmullw xmm1, [rdx]
|
||||
pmullw xmm3, [rdx+16]
|
||||
|
||||
; DC component
|
||||
mov rdx, arg(5)
|
||||
|
||||
; repack so block 0 row x and block 1 row x are together
|
||||
movdqa xmm4, xmm0
|
||||
punpckldq xmm0, xmm1
|
||||
punpckhdq xmm4, xmm1
|
||||
|
||||
pshufd xmm0, xmm0, 11011000b
|
||||
pshufd xmm1, xmm4, 11011000b
|
||||
|
||||
movdqa xmm4, xmm2
|
||||
punpckldq xmm2, xmm3
|
||||
punpckhdq xmm4, xmm3
|
||||
|
||||
pshufd xmm2, xmm2, 11011000b
|
||||
pshufd xmm3, xmm4, 11011000b
|
||||
|
||||
; insert DC component
|
||||
pinsrw xmm0, [rdx], 0
|
||||
pinsrw xmm0, [rdx+2], 4
|
||||
|
||||
; first pass
|
||||
psubw xmm0, xmm2 ; b1 = 0-2
|
||||
paddw xmm2, xmm2 ;
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
paddw xmm2, xmm0 ; a1 = 0+2
|
||||
|
||||
pmulhw xmm5, [x_s1sqr2 GLOBAL]
|
||||
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
|
||||
|
||||
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw xmm7, xmm5 ; c1
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
movdqa xmm4, xmm3
|
||||
|
||||
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
|
||||
paddw xmm5, xmm1
|
||||
|
||||
pmulhw xmm3, [x_s1sqr2 GLOBAL]
|
||||
paddw xmm3, xmm4
|
||||
|
||||
paddw xmm3, xmm5 ; d1
|
||||
movdqa xmm6, xmm2 ; a1
|
||||
|
||||
movdqa xmm4, xmm0 ; b1
|
||||
paddw xmm2, xmm3 ;0
|
||||
|
||||
paddw xmm4, xmm7 ;1
|
||||
psubw xmm0, xmm7 ;2
|
||||
|
||||
psubw xmm6, xmm3 ;3
|
||||
|
||||
; transpose for the second pass
|
||||
movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
|
||||
punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
|
||||
punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
|
||||
|
||||
movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
|
||||
punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
|
||||
punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
|
||||
|
||||
|
||||
movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
|
||||
punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
|
||||
punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
|
||||
|
||||
movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
|
||||
punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
|
||||
punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
|
||||
|
||||
|
||||
movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
|
||||
punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
|
||||
punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
|
||||
|
||||
movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
|
||||
punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
|
||||
punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
|
||||
|
||||
pshufd xmm0, xmm2, 11011000b
|
||||
pshufd xmm2, xmm1, 11011000b
|
||||
|
||||
pshufd xmm1, xmm5, 11011000b
|
||||
pshufd xmm3, xmm7, 11011000b
|
||||
|
||||
; second pass
|
||||
psubw xmm0, xmm2 ; b1 = 0-2
|
||||
paddw xmm2, xmm2
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
paddw xmm2, xmm0 ; a1 = 0+2
|
||||
|
||||
pmulhw xmm5, [x_s1sqr2 GLOBAL]
|
||||
paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2)
|
||||
|
||||
movdqa xmm7, xmm3
|
||||
pmulhw xmm7, [x_c1sqr2less1 GLOBAL]
|
||||
|
||||
paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2)
|
||||
psubw xmm7, xmm5 ; c1
|
||||
|
||||
movdqa xmm5, xmm1
|
||||
movdqa xmm4, xmm3
|
||||
|
||||
pmulhw xmm5, [x_c1sqr2less1 GLOBAL]
|
||||
paddw xmm5, xmm1
|
||||
|
||||
pmulhw xmm3, [x_s1sqr2 GLOBAL]
|
||||
paddw xmm3, xmm4
|
||||
|
||||
paddw xmm3, xmm5 ; d1
|
||||
paddw xmm0, [fours GLOBAL]
|
||||
|
||||
paddw xmm2, [fours GLOBAL]
|
||||
movdqa xmm6, xmm2 ; a1
|
||||
|
||||
movdqa xmm4, xmm0 ; b1
|
||||
paddw xmm2, xmm3 ;0
|
||||
|
||||
paddw xmm4, xmm7 ;1
|
||||
psubw xmm0, xmm7 ;2
|
||||
|
||||
psubw xmm6, xmm3 ;3
|
||||
psraw xmm2, 3
|
||||
|
||||
psraw xmm0, 3
|
||||
psraw xmm4, 3
|
||||
|
||||
psraw xmm6, 3
|
||||
|
||||
; transpose to save
|
||||
movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000
|
||||
punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000
|
||||
punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100
|
||||
|
||||
movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008
|
||||
punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008
|
||||
punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108
|
||||
|
||||
|
||||
movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000
|
||||
punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000
|
||||
punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002
|
||||
|
||||
movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100
|
||||
punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100
|
||||
punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102
|
||||
|
||||
|
||||
movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000
|
||||
punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000
|
||||
punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001
|
||||
|
||||
movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002
|
||||
punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002
|
||||
punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003
|
||||
|
||||
pshufd xmm0, xmm2, 11011000b
|
||||
pshufd xmm2, xmm1, 11011000b
|
||||
|
||||
pshufd xmm1, xmm5, 11011000b
|
||||
pshufd xmm3, xmm7, 11011000b
|
||||
|
||||
pxor xmm7, xmm7
|
||||
|
||||
; Load up predict blocks
|
||||
movq xmm4, [rsi]
|
||||
movq xmm5, [rsi+16]
|
||||
|
||||
punpcklbw xmm4, xmm7
|
||||
punpcklbw xmm5, xmm7
|
||||
|
||||
paddw xmm0, xmm4
|
||||
paddw xmm1, xmm5
|
||||
|
||||
movq xmm4, [rsi+32]
|
||||
movq xmm5, [rsi+48]
|
||||
|
||||
punpcklbw xmm4, xmm7
|
||||
punpcklbw xmm5, xmm7
|
||||
|
||||
paddw xmm2, xmm4
|
||||
paddw xmm3, xmm5
|
||||
|
||||
.finish:
|
||||
|
||||
; pack up before storing
|
||||
packuswb xmm0, xmm7
|
||||
packuswb xmm1, xmm7
|
||||
packuswb xmm2, xmm7
|
||||
packuswb xmm3, xmm7
|
||||
|
||||
; Load destination stride before writing out,
|
||||
; doesn't need to persist
|
||||
movsxd rdx, dword ptr arg(4) ; dst_stride
|
||||
|
||||
; store blocks back out
|
||||
movq [rdi], xmm0
|
||||
movq [rdi + rdx], xmm1
|
||||
|
||||
lea rdi, [rdi + 2*rdx]
|
||||
|
||||
movq [rdi], xmm2
|
||||
movq [rdi + rdx], xmm3
|
||||
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
fours:
|
||||
times 8 dw 0x0004
|
||||
align 16
|
||||
x_s1sqr2:
|
||||
times 8 dw 0x8A8C
|
||||
align 16
|
||||
x_c1sqr2less1:
|
||||
times 8 dw 0x4E7B
|
|
@ -1,10 +1,10 @@
|
|||
;
|
||||
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
;
|
||||
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
@ -17,6 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 2
|
||||
SAVE_XMM
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
@ -101,6 +102,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
|
|||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
;
|
||||
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -34,6 +34,11 @@ prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
|
|||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
|
||||
prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
|
||||
|
||||
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
|
||||
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
|
||||
extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2;
|
||||
extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
|
||||
|
||||
#if HAVE_MMX
|
||||
// Horizontal MB filtering
|
||||
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
|
@ -157,10 +162,7 @@ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
|||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
|
@ -183,10 +185,7 @@ void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
|||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
|
@ -211,10 +210,7 @@ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
|||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
|
||||
|
@ -241,10 +237,7 @@ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
|||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
;
|
||||
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
;
|
||||
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
@ -26,6 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -240,6 +241,7 @@ acrossnextcol:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -254,6 +256,7 @@ sym(vp8_mbpost_proc_down_xmm):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -439,6 +442,7 @@ loop_row:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -452,6 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -612,6 +617,7 @@ nextcol4:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
;
|
||||
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
;
|
||||
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
@ -67,6 +67,7 @@ sym(vp8_recon4b_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
SAVE_XMM
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
@ -119,6 +120,7 @@ sym(vp8_recon4b_sse2):
|
|||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
;
|
||||
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
@ -731,7 +731,7 @@ rd:
|
|||
times 4 dw 0x40
|
||||
|
||||
align 16
|
||||
global sym(vp8_six_tap_mmx) HIDDEN_DATA
|
||||
global HIDDEN_DATA(sym(vp8_six_tap_mmx))
|
||||
sym(vp8_six_tap_mmx):
|
||||
times 8 dw 0
|
||||
times 8 dw 0
|
||||
|
@ -791,7 +791,7 @@ sym(vp8_six_tap_mmx):
|
|||
|
||||
|
||||
align 16
|
||||
global sym(vp8_bilinear_filters_mmx) HIDDEN_DATA
|
||||
global HIDDEN_DATA(sym(vp8_bilinear_filters_mmx))
|
||||
sym(vp8_bilinear_filters_mmx):
|
||||
times 8 dw 128
|
||||
times 8 dw 0
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
;
|
||||
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
@ -37,6 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -129,6 +130,7 @@ filter_block1d8_h6_rowloop:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -155,6 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -304,6 +307,7 @@ filter_block1d16_h6_sse2_rowloop:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -329,6 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -397,6 +402,494 @@ vp8_filter_block1d8_v6_sse2_loop:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_filter_block1d16_v6_sse2
|
||||
;(
|
||||
; unsigned short *src_ptr,
|
||||
; unsigned char *output_ptr,
|
||||
; int dst_ptich,
|
||||
; unsigned int pixels_per_line,
|
||||
; unsigned int pixel_step,
|
||||
; unsigned int output_height,
|
||||
; unsigned int output_width,
|
||||
; const short *vp8_filter
|
||||
;)
|
||||
;/************************************************************************************
|
||||
; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The
|
||||
; input pixel array has output_height rows.
|
||||
;*************************************************************************************/
|
||||
global sym(vp8_filter_block1d16_v6_sse2)
|
||||
sym(vp8_filter_block1d16_v6_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rax, arg(7) ;vp8_filter
|
||||
movsxd rdx, dword ptr arg(3) ;pixels_per_line
|
||||
|
||||
mov rdi, arg(1) ;output_ptr
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
|
||||
sub rsi, rdx
|
||||
sub rsi, rdx
|
||||
|
||||
movsxd rcx, DWORD PTR arg(5) ;[output_height]
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r8, dword ptr arg(2) ; dst_ptich
|
||||
%endif
|
||||
|
||||
vp8_filter_block1d16_v6_sse2_loop:
|
||||
; The order for adding 6-tap is 2 5 3 1 4 6. Read in data in that order.
|
||||
movdqa xmm1, XMMWORD PTR [rsi + rdx] ; line 2
|
||||
movdqa xmm2, XMMWORD PTR [rsi + rdx + 16]
|
||||
pmullw xmm1, [rax + 16]
|
||||
pmullw xmm2, [rax + 16]
|
||||
|
||||
movdqa xmm3, XMMWORD PTR [rsi + rdx * 4] ; line 5
|
||||
movdqa xmm4, XMMWORD PTR [rsi + rdx * 4 + 16]
|
||||
pmullw xmm3, [rax + 64]
|
||||
pmullw xmm4, [rax + 64]
|
||||
|
||||
movdqa xmm5, XMMWORD PTR [rsi + rdx * 2] ; line 3
|
||||
movdqa xmm6, XMMWORD PTR [rsi + rdx * 2 + 16]
|
||||
pmullw xmm5, [rax + 32]
|
||||
pmullw xmm6, [rax + 32]
|
||||
|
||||
movdqa xmm7, XMMWORD PTR [rsi] ; line 1
|
||||
movdqa xmm0, XMMWORD PTR [rsi + 16]
|
||||
pmullw xmm7, [rax]
|
||||
pmullw xmm0, [rax]
|
||||
|
||||
paddsw xmm1, xmm3
|
||||
paddsw xmm2, xmm4
|
||||
paddsw xmm1, xmm5
|
||||
paddsw xmm2, xmm6
|
||||
paddsw xmm1, xmm7
|
||||
paddsw xmm2, xmm0
|
||||
|
||||
add rsi, rdx
|
||||
|
||||
movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] ; line 4
|
||||
movdqa xmm4, XMMWORD PTR [rsi + rdx * 2 + 16]
|
||||
pmullw xmm3, [rax + 48]
|
||||
pmullw xmm4, [rax + 48]
|
||||
|
||||
movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] ; line 6
|
||||
movdqa xmm6, XMMWORD PTR [rsi + rdx * 4 + 16]
|
||||
pmullw xmm5, [rax + 80]
|
||||
pmullw xmm6, [rax + 80]
|
||||
|
||||
movdqa xmm7, XMMWORD PTR [rd GLOBAL]
|
||||
pxor xmm0, xmm0 ; clear xmm0
|
||||
|
||||
paddsw xmm1, xmm3
|
||||
paddsw xmm2, xmm4
|
||||
paddsw xmm1, xmm5
|
||||
paddsw xmm2, xmm6
|
||||
|
||||
paddsw xmm1, xmm7
|
||||
paddsw xmm2, xmm7
|
||||
|
||||
psraw xmm1, 7
|
||||
psraw xmm2, 7
|
||||
|
||||
packuswb xmm1, xmm2 ; pack and saturate
|
||||
movdqa XMMWORD PTR [rdi], xmm1 ; store the results in the destination
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, DWORD PTR arg(2) ;[dst_ptich]
|
||||
%else
|
||||
add rdi, r8
|
||||
%endif
|
||||
dec rcx ; decrement count
|
||||
jnz vp8_filter_block1d16_v6_sse2_loop ; next row
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_filter_block1d8_h6_only_sse2
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; unsigned int src_pixels_per_line,
|
||||
; unsigned char *output_ptr,
|
||||
; int dst_ptich,
|
||||
; unsigned int output_height,
|
||||
; const short *vp8_filter
|
||||
;)
|
||||
; First-pass filter only when yoffset==0
|
||||
global sym(vp8_filter_block1d8_h6_only_sse2)
|
||||
sym(vp8_filter_block1d8_h6_only_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rdx, arg(5) ;vp8_filter
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
|
||||
movsxd rcx, dword ptr arg(4) ;output_height
|
||||
movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r8, dword ptr arg(3) ;dst_ptich
|
||||
%endif
|
||||
pxor xmm0, xmm0 ; clear xmm0 for unpack
|
||||
|
||||
filter_block1d8_h6_only_rowloop:
|
||||
movq xmm3, MMWORD PTR [rsi - 2]
|
||||
movq xmm1, MMWORD PTR [rsi + 6]
|
||||
|
||||
prefetcht2 [rsi+rax-2]
|
||||
|
||||
pslldq xmm1, 8
|
||||
por xmm1, xmm3
|
||||
|
||||
movdqa xmm4, xmm1
|
||||
movdqa xmm5, xmm1
|
||||
|
||||
movdqa xmm6, xmm1
|
||||
movdqa xmm7, xmm1
|
||||
|
||||
punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
|
||||
psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
|
||||
|
||||
pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
|
||||
punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
|
||||
|
||||
psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
|
||||
pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
|
||||
|
||||
|
||||
punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
|
||||
psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
|
||||
|
||||
pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
|
||||
|
||||
punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
|
||||
psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
|
||||
|
||||
pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
|
||||
|
||||
punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
|
||||
psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
|
||||
|
||||
|
||||
pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
|
||||
|
||||
punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
|
||||
pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
|
||||
|
||||
|
||||
paddsw xmm4, xmm7
|
||||
paddsw xmm4, xmm5
|
||||
|
||||
paddsw xmm4, xmm3
|
||||
paddsw xmm4, xmm6
|
||||
|
||||
paddsw xmm4, xmm1
|
||||
paddsw xmm4, [rd GLOBAL]
|
||||
|
||||
psraw xmm4, 7
|
||||
|
||||
packuswb xmm4, xmm0
|
||||
|
||||
movq QWORD PTR [rdi], xmm4 ; store the results in the destination
|
||||
lea rsi, [rsi + rax]
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, DWORD Ptr arg(3) ;dst_ptich
|
||||
%else
|
||||
add rdi, r8
|
||||
%endif
|
||||
dec rcx
|
||||
|
||||
jnz filter_block1d8_h6_only_rowloop ; next row
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_filter_block1d16_h6_only_sse2
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; unsigned int src_pixels_per_line,
|
||||
; unsigned char *output_ptr,
|
||||
; int dst_ptich,
|
||||
; unsigned int output_height,
|
||||
; const short *vp8_filter
|
||||
;)
|
||||
; First-pass filter only when yoffset==0
|
||||
global sym(vp8_filter_block1d16_h6_only_sse2)
|
||||
sym(vp8_filter_block1d16_h6_only_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rdx, arg(5) ;vp8_filter
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
|
||||
movsxd rcx, dword ptr arg(4) ;output_height
|
||||
movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r8, dword ptr arg(3) ;dst_ptich
|
||||
%endif
|
||||
|
||||
pxor xmm0, xmm0 ; clear xmm0 for unpack
|
||||
|
||||
filter_block1d16_h6_only_sse2_rowloop:
|
||||
movq xmm3, MMWORD PTR [rsi - 2]
|
||||
movq xmm1, MMWORD PTR [rsi + 6]
|
||||
|
||||
movq xmm2, MMWORD PTR [rsi +14]
|
||||
pslldq xmm2, 8
|
||||
|
||||
por xmm2, xmm1
|
||||
prefetcht2 [rsi+rax-2]
|
||||
|
||||
pslldq xmm1, 8
|
||||
por xmm1, xmm3
|
||||
|
||||
movdqa xmm4, xmm1
|
||||
movdqa xmm5, xmm1
|
||||
|
||||
movdqa xmm6, xmm1
|
||||
movdqa xmm7, xmm1
|
||||
|
||||
punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
|
||||
psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
|
||||
|
||||
pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
|
||||
punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
|
||||
|
||||
psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
|
||||
pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
|
||||
|
||||
punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
|
||||
psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
|
||||
|
||||
pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
|
||||
|
||||
punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
|
||||
psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
|
||||
|
||||
pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
|
||||
|
||||
punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
|
||||
psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
|
||||
|
||||
pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
|
||||
|
||||
punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
|
||||
pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
|
||||
|
||||
paddsw xmm4, xmm7
|
||||
paddsw xmm4, xmm5
|
||||
|
||||
paddsw xmm4, xmm3
|
||||
paddsw xmm4, xmm6
|
||||
|
||||
paddsw xmm4, xmm1
|
||||
paddsw xmm4, [rd GLOBAL]
|
||||
|
||||
psraw xmm4, 7
|
||||
|
||||
packuswb xmm4, xmm0 ; lower 8 bytes
|
||||
|
||||
movq QWORD Ptr [rdi], xmm4 ; store the results in the destination
|
||||
|
||||
movdqa xmm3, xmm2
|
||||
movdqa xmm4, xmm2
|
||||
|
||||
movdqa xmm5, xmm2
|
||||
movdqa xmm6, xmm2
|
||||
|
||||
movdqa xmm7, xmm2
|
||||
|
||||
punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
|
||||
psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
|
||||
|
||||
pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1
|
||||
punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
|
||||
|
||||
psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
|
||||
pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2
|
||||
|
||||
punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
|
||||
psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
|
||||
|
||||
pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3
|
||||
|
||||
punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
|
||||
psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
|
||||
|
||||
pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4
|
||||
|
||||
punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
|
||||
psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
|
||||
|
||||
pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5
|
||||
|
||||
punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
|
||||
pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6
|
||||
|
||||
paddsw xmm4, xmm7
|
||||
paddsw xmm4, xmm5
|
||||
|
||||
paddsw xmm4, xmm3
|
||||
paddsw xmm4, xmm6
|
||||
|
||||
paddsw xmm4, xmm2
|
||||
paddsw xmm4, [rd GLOBAL]
|
||||
|
||||
psraw xmm4, 7
|
||||
|
||||
packuswb xmm4, xmm0 ; higher 8 bytes
|
||||
|
||||
movq QWORD Ptr [rdi+8], xmm4 ; store the results in the destination
|
||||
|
||||
lea rsi, [rsi + rax]
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, DWORD Ptr arg(3) ;dst_ptich
|
||||
%else
|
||||
add rdi, r8
|
||||
%endif
|
||||
|
||||
dec rcx
|
||||
jnz filter_block1d16_h6_only_sse2_rowloop ; next row
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_filter_block1d8_v6_only_sse2
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; unsigned int src_pixels_per_line,
|
||||
; unsigned char *output_ptr,
|
||||
; int dst_ptich,
|
||||
; unsigned int output_height,
|
||||
; const short *vp8_filter
|
||||
;)
|
||||
; Second-pass filter only when xoffset==0
|
||||
global sym(vp8_filter_block1d8_v6_only_sse2)
|
||||
sym(vp8_filter_block1d8_v6_only_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
|
||||
movsxd rcx, dword ptr arg(4) ;output_height
|
||||
movsxd rdx, dword ptr arg(1) ;src_pixels_per_line
|
||||
|
||||
mov rax, arg(5) ;vp8_filter
|
||||
|
||||
pxor xmm0, xmm0 ; clear xmm0
|
||||
|
||||
movdqa xmm7, XMMWORD PTR [rd GLOBAL]
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r8, dword ptr arg(3) ; dst_ptich
|
||||
%endif
|
||||
|
||||
vp8_filter_block1d8_v6_only_sse2_loop:
|
||||
movq xmm1, MMWORD PTR [rsi]
|
||||
movq xmm2, MMWORD PTR [rsi + rdx]
|
||||
movq xmm3, MMWORD PTR [rsi + rdx * 2]
|
||||
movq xmm5, MMWORD PTR [rsi + rdx * 4]
|
||||
add rsi, rdx
|
||||
movq xmm4, MMWORD PTR [rsi + rdx * 2]
|
||||
movq xmm6, MMWORD PTR [rsi + rdx * 4]
|
||||
|
||||
punpcklbw xmm1, xmm0
|
||||
pmullw xmm1, [rax]
|
||||
|
||||
punpcklbw xmm2, xmm0
|
||||
pmullw xmm2, [rax + 16]
|
||||
|
||||
punpcklbw xmm3, xmm0
|
||||
pmullw xmm3, [rax + 32]
|
||||
|
||||
punpcklbw xmm5, xmm0
|
||||
pmullw xmm5, [rax + 64]
|
||||
|
||||
punpcklbw xmm4, xmm0
|
||||
pmullw xmm4, [rax + 48]
|
||||
|
||||
punpcklbw xmm6, xmm0
|
||||
pmullw xmm6, [rax + 80]
|
||||
|
||||
paddsw xmm2, xmm5
|
||||
paddsw xmm2, xmm3
|
||||
|
||||
paddsw xmm2, xmm1
|
||||
paddsw xmm2, xmm4
|
||||
|
||||
paddsw xmm2, xmm6
|
||||
paddsw xmm2, xmm7
|
||||
|
||||
psraw xmm2, 7
|
||||
packuswb xmm2, xmm0 ; pack and saturate
|
||||
|
||||
movq QWORD PTR [rdi], xmm2 ; store the results in the destination
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, DWORD PTR arg(3) ;[dst_ptich]
|
||||
%else
|
||||
add rdi, r8
|
||||
%endif
|
||||
dec rcx ; decrement count
|
||||
jnz vp8_filter_block1d8_v6_only_sse2_loop ; next row
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -415,6 +908,7 @@ sym(vp8_unpack_block1d16_h6_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
;SAVE_XMM ;xmm6, xmm7 are not used here.
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -454,164 +948,7 @@ unpack_block1d16_h6_sse2_rowloop:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_unpack_block1d8_h6_sse2
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; unsigned short *output_ptr,
|
||||
; unsigned int src_pixels_per_line,
|
||||
; unsigned int output_height,
|
||||
; unsigned int output_width
|
||||
;)
|
||||
global sym(vp8_unpack_block1d8_h6_sse2)
|
||||
sym(vp8_unpack_block1d8_h6_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(1) ;output_ptr
|
||||
|
||||
movsxd rcx, dword ptr arg(3) ;output_height
|
||||
movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source
|
||||
|
||||
pxor xmm0, xmm0 ; clear xmm0 for unpack
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r8, dword ptr arg(4) ;output_width ; Pitch for Source
|
||||
%endif
|
||||
|
||||
unpack_block1d8_h6_sse2_rowloop:
|
||||
movq xmm1, MMWORD PTR [rsi] ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2
|
||||
lea rsi, [rsi + rax]
|
||||
|
||||
punpcklbw xmm1, xmm0
|
||||
movdqa XMMWORD Ptr [rdi], xmm1
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, DWORD Ptr arg(4) ;[output_width]
|
||||
%else
|
||||
add rdi, r8
|
||||
%endif
|
||||
dec rcx
|
||||
jnz unpack_block1d8_h6_sse2_rowloop ; next row
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_pack_block1d8_v6_sse2
|
||||
;(
|
||||
; short *src_ptr,
|
||||
; unsigned char *output_ptr,
|
||||
; int dst_ptich,
|
||||
; unsigned int pixels_per_line,
|
||||
; unsigned int output_height,
|
||||
; unsigned int output_width
|
||||
;)
|
||||
global sym(vp8_pack_block1d8_v6_sse2)
|
||||
sym(vp8_pack_block1d8_v6_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movsxd rdx, dword ptr arg(3) ;pixels_per_line
|
||||
mov rdi, arg(1) ;output_ptr
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rcx, DWORD PTR arg(4) ;[output_height]
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r8, dword ptr arg(5) ;output_width ; Pitch for Source
|
||||
%endif
|
||||
|
||||
pack_block1d8_v6_sse2_loop:
|
||||
movdqa xmm0, XMMWORD PTR [rsi]
|
||||
packuswb xmm0, xmm0
|
||||
|
||||
movq QWORD PTR [rdi], xmm0 ; store the results in the destination
|
||||
lea rsi, [rsi+rdx]
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, DWORD Ptr arg(5) ;[output_width]
|
||||
%else
|
||||
add rdi, r8
|
||||
%endif
|
||||
dec rcx ; decrement count
|
||||
jnz pack_block1d8_v6_sse2_loop ; next row
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_pack_block1d16_v6_sse2
|
||||
;(
|
||||
; short *src_ptr,
|
||||
; unsigned char *output_ptr,
|
||||
; int dst_ptich,
|
||||
; unsigned int pixels_per_line,
|
||||
; unsigned int output_height,
|
||||
; unsigned int output_width
|
||||
;)
|
||||
global sym(vp8_pack_block1d16_v6_sse2)
|
||||
sym(vp8_pack_block1d16_v6_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movsxd rdx, dword ptr arg(3) ;pixels_per_line
|
||||
mov rdi, arg(1) ;output_ptr
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rcx, DWORD PTR arg(4) ;[output_height]
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r8, dword ptr arg(2) ;dst_pitch
|
||||
%endif
|
||||
|
||||
pack_block1d16_v6_sse2_loop:
|
||||
movdqa xmm0, XMMWORD PTR [rsi]
|
||||
movdqa xmm1, XMMWORD PTR [rsi+16]
|
||||
|
||||
packuswb xmm0, xmm1
|
||||
movdqa XMMWORD PTR [rdi], xmm0 ; store the results in the destination
|
||||
|
||||
add rsi, rdx
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, DWORD Ptr arg(2) ;dst_pitch
|
||||
%else
|
||||
add rdi, r8
|
||||
%endif
|
||||
dec rcx ; decrement count
|
||||
jnz pack_block1d16_v6_sse2_loop ; next row
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
;RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -632,6 +969,7 @@ sym(vp8_bilinear_predict16x16_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -879,6 +1217,7 @@ done:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -899,6 +1238,7 @@ sym(vp8_bilinear_predict8x8_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -1022,6 +1362,7 @@ next_row8x8:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
|
|
@ -0,0 +1,931 @@
|
|||
;
|
||||
; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
%define BLOCK_HEIGHT_WIDTH 4
|
||||
%define VP8_FILTER_WEIGHT 128
|
||||
%define VP8_FILTER_SHIFT 7
|
||||
|
||||
|
||||
;/************************************************************************************
|
||||
; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
|
||||
; input pixel array has output_height rows. This routine assumes that output_height is an
|
||||
; even number. This function handles 8 pixels in horizontal direction, calculating ONE
|
||||
; rows each iteration to take advantage of the 128 bits operations.
|
||||
;
|
||||
; This is an implementation of some of the SSE optimizations first seen in ffvp8
|
||||
;
|
||||
;*************************************************************************************/
|
||||
;void vp8_filter_block1d8_h6_ssse3
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; unsigned int src_pixels_per_line,
|
||||
; unsigned char *output_ptr,
|
||||
; unsigned int output_pitch,
|
||||
; unsigned int output_height,
|
||||
; unsigned int vp8_filter_index
|
||||
;)
|
||||
global sym(vp8_filter_block1d8_h6_ssse3)
|
||||
sym(vp8_filter_block1d8_h6_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movsxd rdx, DWORD PTR arg(5) ;table index
|
||||
xor rsi, rsi
|
||||
shl rdx, 4
|
||||
|
||||
movdqa xmm7, [rd GLOBAL]
|
||||
|
||||
lea rax, [k0_k5 GLOBAL]
|
||||
add rax, rdx
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
|
||||
cmp esi, DWORD PTR [rax]
|
||||
je vp8_filter_block1d8_h4_ssse3
|
||||
|
||||
movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
|
||||
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
|
||||
movsxd rcx, dword ptr arg(4) ;output_height
|
||||
|
||||
movsxd rdx, dword ptr arg(3) ;output_pitch
|
||||
|
||||
sub rdi, rdx
|
||||
;xmm3 free
|
||||
filter_block1d8_h6_rowloop_ssse3:
|
||||
movdqu xmm0, XMMWORD PTR [rsi - 2]
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
pshufb xmm0, [shuf1b GLOBAL]
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
pshufb xmm1, [shuf2b GLOBAL]
|
||||
pmaddubsw xmm0, xmm4
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
||||
pshufb xmm2, [shuf3b GLOBAL]
|
||||
add rdi, rdx
|
||||
pmaddubsw xmm2, xmm6
|
||||
|
||||
lea rsi, [rsi + rax]
|
||||
dec rcx
|
||||
paddsw xmm0, xmm1
|
||||
paddsw xmm0, xmm7
|
||||
paddsw xmm0, xmm2
|
||||
psraw xmm0, 7
|
||||
packuswb xmm0, xmm0
|
||||
|
||||
movq MMWORD Ptr [rdi], xmm0
|
||||
jnz filter_block1d8_h6_rowloop_ssse3
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
vp8_filter_block1d8_h4_ssse3:
|
||||
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
|
||||
|
||||
movdqa xmm3, XMMWORD PTR [shuf2b GLOBAL]
|
||||
movdqa xmm4, XMMWORD PTR [shuf3b GLOBAL]
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
|
||||
movsxd rcx, dword ptr arg(4) ;output_height
|
||||
|
||||
movsxd rdx, dword ptr arg(3) ;output_pitch
|
||||
|
||||
sub rdi, rdx
|
||||
;xmm3 free
|
||||
filter_block1d8_h4_rowloop_ssse3:
|
||||
movdqu xmm0, XMMWORD PTR [rsi - 2]
|
||||
|
||||
movdqa xmm2, xmm0
|
||||
pshufb xmm0, xmm3 ;[shuf2b GLOBAL]
|
||||
pshufb xmm2, xmm4 ;[shuf3b GLOBAL]
|
||||
|
||||
pmaddubsw xmm0, xmm5
|
||||
add rdi, rdx
|
||||
pmaddubsw xmm2, xmm6
|
||||
|
||||
lea rsi, [rsi + rax]
|
||||
dec rcx
|
||||
paddsw xmm0, xmm7
|
||||
paddsw xmm0, xmm2
|
||||
psraw xmm0, 7
|
||||
packuswb xmm0, xmm0
|
||||
|
||||
movq MMWORD Ptr [rdi], xmm0
|
||||
|
||||
jnz filter_block1d8_h4_rowloop_ssse3
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
;void vp8_filter_block1d16_h6_ssse3
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; unsigned int src_pixels_per_line,
|
||||
; unsigned char *output_ptr,
|
||||
; unsigned int output_pitch,
|
||||
; unsigned int output_height,
|
||||
; unsigned int vp8_filter_index
|
||||
;)
|
||||
global sym(vp8_filter_block1d16_h6_ssse3)
|
||||
sym(vp8_filter_block1d16_h6_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movsxd rdx, DWORD PTR arg(5) ;table index
|
||||
xor rsi, rsi
|
||||
shl rdx, 4 ;
|
||||
|
||||
lea rax, [k0_k5 GLOBAL]
|
||||
add rax, rdx
|
||||
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
movdqa xmm7, [rd GLOBAL]
|
||||
|
||||
;;
|
||||
;; cmp esi, DWORD PTR [rax]
|
||||
;; je vp8_filter_block1d16_h4_ssse3
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
|
||||
movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
|
||||
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
|
||||
movsxd rcx, dword ptr arg(4) ;output_height
|
||||
movsxd rdx, dword ptr arg(3) ;output_pitch
|
||||
|
||||
filter_block1d16_h6_rowloop_ssse3:
|
||||
movdqu xmm0, XMMWORD PTR [rsi - 2]
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
pshufb xmm0, [shuf1b GLOBAL]
|
||||
movdqa xmm2, xmm1
|
||||
pmaddubsw xmm0, xmm4
|
||||
pshufb xmm1, [shuf2b GLOBAL]
|
||||
pshufb xmm2, [shuf3b GLOBAL]
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
||||
movdqu xmm3, XMMWORD PTR [rsi + 6]
|
||||
|
||||
pmaddubsw xmm2, xmm6
|
||||
paddsw xmm0, xmm1
|
||||
movdqa xmm1, xmm3
|
||||
pshufb xmm3, [shuf1b GLOBAL]
|
||||
paddsw xmm0, xmm7
|
||||
pmaddubsw xmm3, xmm4
|
||||
paddsw xmm0, xmm2
|
||||
movdqa xmm2, xmm1
|
||||
pshufb xmm1, [shuf2b GLOBAL]
|
||||
pshufb xmm2, [shuf3b GLOBAL]
|
||||
pmaddubsw xmm1, xmm5
|
||||
pmaddubsw xmm2, xmm6
|
||||
|
||||
psraw xmm0, 7
|
||||
packuswb xmm0, xmm0
|
||||
lea rsi, [rsi + rax]
|
||||
paddsw xmm3, xmm1
|
||||
paddsw xmm3, xmm7
|
||||
paddsw xmm3, xmm2
|
||||
psraw xmm3, 7
|
||||
packuswb xmm3, xmm3
|
||||
|
||||
punpcklqdq xmm0, xmm3
|
||||
|
||||
movdqa XMMWORD Ptr [rdi], xmm0
|
||||
|
||||
add rdi, rdx
|
||||
dec rcx
|
||||
jnz filter_block1d16_h6_rowloop_ssse3
|
||||
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
vp8_filter_block1d16_h4_ssse3:
|
||||
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
|
||||
movsxd rcx, dword ptr arg(4) ;output_height
|
||||
movsxd rdx, dword ptr arg(3) ;output_pitch
|
||||
|
||||
filter_block1d16_h4_rowloop_ssse3:
|
||||
movdqu xmm1, XMMWORD PTR [rsi - 2]
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
pshufb xmm1, [shuf2b GLOBAL]
|
||||
pshufb xmm2, [shuf3b GLOBAL]
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
||||
movdqu xmm3, XMMWORD PTR [rsi + 6]
|
||||
|
||||
pmaddubsw xmm2, xmm6
|
||||
movdqa xmm0, xmm3
|
||||
pshufb xmm3, [shuf3b GLOBAL]
|
||||
pshufb xmm0, [shuf2b GLOBAL]
|
||||
|
||||
paddsw xmm1, xmm7
|
||||
paddsw xmm1, xmm2
|
||||
|
||||
pmaddubsw xmm0, xmm5
|
||||
pmaddubsw xmm3, xmm6
|
||||
|
||||
psraw xmm1, 7
|
||||
packuswb xmm1, xmm1
|
||||
lea rsi, [rsi + rax]
|
||||
paddsw xmm3, xmm0
|
||||
paddsw xmm3, xmm7
|
||||
psraw xmm3, 7
|
||||
packuswb xmm3, xmm3
|
||||
|
||||
punpcklqdq xmm1, xmm3
|
||||
|
||||
movdqa XMMWORD Ptr [rdi], xmm1
|
||||
|
||||
add rdi, rdx
|
||||
dec rcx
|
||||
jnz filter_block1d16_h4_rowloop_ssse3
|
||||
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_filter_block1d4_h6_ssse3
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; unsigned int src_pixels_per_line,
|
||||
; unsigned char *output_ptr,
|
||||
; unsigned int output_pitch,
|
||||
; unsigned int output_height,
|
||||
; unsigned int vp8_filter_index
|
||||
;)
|
||||
global sym(vp8_filter_block1d4_h6_ssse3)
|
||||
sym(vp8_filter_block1d4_h6_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movsxd rdx, DWORD PTR arg(5) ;table index
|
||||
xor rsi, rsi
|
||||
shl rdx, 4 ;
|
||||
|
||||
lea rax, [k0_k5 GLOBAL]
|
||||
add rax, rdx
|
||||
movdqa xmm7, [rd GLOBAL]
|
||||
|
||||
cmp esi, DWORD PTR [rax]
|
||||
je vp8_filter_block1d4_h4_ssse3
|
||||
|
||||
movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
|
||||
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
|
||||
movsxd rcx, dword ptr arg(4) ;output_height
|
||||
|
||||
movsxd rdx, dword ptr arg(3) ;output_pitch
|
||||
|
||||
;xmm3 free
|
||||
filter_block1d4_h6_rowloop_ssse3:
|
||||
movdqu xmm0, XMMWORD PTR [rsi - 2]
|
||||
|
||||
movdqa xmm1, xmm0
|
||||
pshufb xmm0, [shuf1b GLOBAL]
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
pshufb xmm1, [shuf2b GLOBAL]
|
||||
pmaddubsw xmm0, xmm4
|
||||
pshufb xmm2, [shuf3b GLOBAL]
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
||||
;--
|
||||
pmaddubsw xmm2, xmm6
|
||||
|
||||
lea rsi, [rsi + rax]
|
||||
;--
|
||||
paddsw xmm0, xmm1
|
||||
paddsw xmm0, xmm7
|
||||
pxor xmm1, xmm1
|
||||
paddsw xmm0, xmm2
|
||||
psraw xmm0, 7
|
||||
packuswb xmm0, xmm0
|
||||
|
||||
movd DWORD PTR [rdi], xmm0
|
||||
|
||||
add rdi, rdx
|
||||
dec rcx
|
||||
jnz filter_block1d4_h6_rowloop_ssse3
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
vp8_filter_block1d4_h4_ssse3:
|
||||
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
|
||||
movdqa xmm0, XMMWORD PTR [shuf2b GLOBAL]
|
||||
movdqa xmm3, XMMWORD PTR [shuf3b GLOBAL]
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
|
||||
movsxd rcx, dword ptr arg(4) ;output_height
|
||||
|
||||
movsxd rdx, dword ptr arg(3) ;output_pitch
|
||||
|
||||
filter_block1d4_h4_rowloop_ssse3:
|
||||
movdqu xmm1, XMMWORD PTR [rsi - 2]
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
pshufb xmm1, xmm0 ;;[shuf2b GLOBAL]
|
||||
pshufb xmm2, xmm3 ;;[shuf3b GLOBAL]
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
||||
;--
|
||||
pmaddubsw xmm2, xmm6
|
||||
|
||||
lea rsi, [rsi + rax]
|
||||
;--
|
||||
paddsw xmm1, xmm7
|
||||
paddsw xmm1, xmm2
|
||||
psraw xmm1, 7
|
||||
packuswb xmm1, xmm1
|
||||
|
||||
movd DWORD PTR [rdi], xmm1
|
||||
|
||||
add rdi, rdx
|
||||
dec rcx
|
||||
jnz filter_block1d4_h4_rowloop_ssse3
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
|
||||
;void vp8_filter_block1d16_v6_ssse3
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; unsigned int src_pitch,
|
||||
; unsigned char *output_ptr,
|
||||
; unsigned int out_pitch,
|
||||
; unsigned int output_height,
|
||||
; unsigned int vp8_filter_index
|
||||
;)
|
||||
global sym(vp8_filter_block1d16_v6_ssse3)
|
||||
sym(vp8_filter_block1d16_v6_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movsxd rdx, DWORD PTR arg(5) ;table index
|
||||
xor rsi, rsi
|
||||
shl rdx, 4 ;
|
||||
|
||||
lea rax, [k0_k5 GLOBAL]
|
||||
add rax, rdx
|
||||
|
||||
cmp esi, DWORD PTR [rax]
|
||||
je vp8_filter_block1d16_v4_ssse3
|
||||
|
||||
movdqa xmm5, XMMWORD PTR [rax] ;k0_k5
|
||||
movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r8, DWORD PTR arg(3) ;out_pitch
|
||||
%endif
|
||||
mov rax, rsi
|
||||
movsxd rcx, DWORD PTR arg(4) ;output_height
|
||||
add rax, rdx
|
||||
|
||||
|
||||
vp8_filter_block1d16_v6_ssse3_loop:
|
||||
movq xmm1, MMWORD PTR [rsi] ;A
|
||||
movq xmm2, MMWORD PTR [rsi + rdx] ;B
|
||||
movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
|
||||
movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
|
||||
movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
|
||||
|
||||
punpcklbw xmm2, xmm4 ;B D
|
||||
punpcklbw xmm3, xmm0 ;C E
|
||||
|
||||
movq xmm0, MMWORD PTR [rax + rdx * 4] ;F
|
||||
|
||||
pmaddubsw xmm3, xmm6
|
||||
punpcklbw xmm1, xmm0 ;A F
|
||||
pmaddubsw xmm2, xmm7
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
||||
paddsw xmm2, xmm3
|
||||
paddsw xmm2, xmm1
|
||||
paddsw xmm2, [rd GLOBAL]
|
||||
psraw xmm2, 7
|
||||
packuswb xmm2, xmm2
|
||||
|
||||
movq MMWORD PTR [rdi], xmm2 ;store the results
|
||||
|
||||
movq xmm1, MMWORD PTR [rsi + 8] ;A
|
||||
movq xmm2, MMWORD PTR [rsi + rdx + 8] ;B
|
||||
movq xmm3, MMWORD PTR [rsi + rdx * 2 + 8] ;C
|
||||
movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D
|
||||
movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E
|
||||
|
||||
punpcklbw xmm2, xmm4 ;B D
|
||||
punpcklbw xmm3, xmm0 ;C E
|
||||
|
||||
movq xmm0, MMWORD PTR [rax + rdx * 4 + 8] ;F
|
||||
pmaddubsw xmm3, xmm6
|
||||
punpcklbw xmm1, xmm0 ;A F
|
||||
pmaddubsw xmm2, xmm7
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
||||
add rsi, rdx
|
||||
add rax, rdx
|
||||
;--
|
||||
;--
|
||||
paddsw xmm2, xmm3
|
||||
paddsw xmm2, xmm1
|
||||
paddsw xmm2, [rd GLOBAL]
|
||||
psraw xmm2, 7
|
||||
packuswb xmm2, xmm2
|
||||
|
||||
movq MMWORD PTR [rdi+8], xmm2
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, DWORD PTR arg(3) ;out_pitch
|
||||
%else
|
||||
add rdi, r8
|
||||
%endif
|
||||
dec rcx
|
||||
jnz vp8_filter_block1d16_v6_ssse3_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
vp8_filter_block1d16_v4_ssse3:
|
||||
movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r8, DWORD PTR arg(3) ;out_pitch
|
||||
%endif
|
||||
mov rax, rsi
|
||||
movsxd rcx, DWORD PTR arg(4) ;output_height
|
||||
add rax, rdx
|
||||
|
||||
vp8_filter_block1d16_v4_ssse3_loop:
|
||||
movq xmm2, MMWORD PTR [rsi + rdx] ;B
|
||||
movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
|
||||
movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
|
||||
movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
|
||||
|
||||
punpcklbw xmm2, xmm4 ;B D
|
||||
punpcklbw xmm3, xmm0 ;C E
|
||||
|
||||
pmaddubsw xmm3, xmm6
|
||||
pmaddubsw xmm2, xmm7
|
||||
movq xmm5, MMWORD PTR [rsi + rdx + 8] ;B
|
||||
movq xmm1, MMWORD PTR [rsi + rdx * 2 + 8] ;C
|
||||
movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D
|
||||
movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E
|
||||
|
||||
paddsw xmm2, [rd GLOBAL]
|
||||
paddsw xmm2, xmm3
|
||||
psraw xmm2, 7
|
||||
packuswb xmm2, xmm2
|
||||
|
||||
punpcklbw xmm5, xmm4 ;B D
|
||||
punpcklbw xmm1, xmm0 ;C E
|
||||
|
||||
pmaddubsw xmm1, xmm6
|
||||
pmaddubsw xmm5, xmm7
|
||||
|
||||
movdqa xmm4, [rd GLOBAL]
|
||||
add rsi, rdx
|
||||
add rax, rdx
|
||||
;--
|
||||
;--
|
||||
paddsw xmm5, xmm1
|
||||
paddsw xmm5, xmm4
|
||||
psraw xmm5, 7
|
||||
packuswb xmm5, xmm5
|
||||
|
||||
punpcklqdq xmm2, xmm5
|
||||
|
||||
movdqa XMMWORD PTR [rdi], xmm2
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, DWORD PTR arg(3) ;out_pitch
|
||||
%else
|
||||
add rdi, r8
|
||||
%endif
|
||||
dec rcx
|
||||
jnz vp8_filter_block1d16_v4_ssse3_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_filter_block1d8_v6_ssse3
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; unsigned int src_pitch,
|
||||
; unsigned char *output_ptr,
|
||||
; unsigned int out_pitch,
|
||||
; unsigned int output_height,
|
||||
; unsigned int vp8_filter_index
|
||||
;)
|
||||
global sym(vp8_filter_block1d8_v6_ssse3)
|
||||
sym(vp8_filter_block1d8_v6_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movsxd rdx, DWORD PTR arg(5) ;table index
|
||||
xor rsi, rsi
|
||||
shl rdx, 4 ;
|
||||
|
||||
lea rax, [k0_k5 GLOBAL]
|
||||
add rax, rdx
|
||||
|
||||
movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r8, DWORD PTR arg(3) ; out_pitch
|
||||
%endif
|
||||
movsxd rcx, DWORD PTR arg(4) ;[output_height]
|
||||
|
||||
cmp esi, DWORD PTR [rax]
|
||||
je vp8_filter_block1d8_v4_ssse3
|
||||
|
||||
movdqa xmm5, XMMWORD PTR [rax] ;k0_k5
|
||||
movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
|
||||
mov rax, rsi
|
||||
add rax, rdx
|
||||
|
||||
vp8_filter_block1d8_v6_ssse3_loop:
|
||||
movq xmm1, MMWORD PTR [rsi] ;A
|
||||
movq xmm2, MMWORD PTR [rsi + rdx] ;B
|
||||
movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
|
||||
movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
|
||||
movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
|
||||
|
||||
punpcklbw xmm2, xmm4 ;B D
|
||||
punpcklbw xmm3, xmm0 ;C E
|
||||
|
||||
movq xmm0, MMWORD PTR [rax + rdx * 4] ;F
|
||||
movdqa xmm4, [rd GLOBAL]
|
||||
|
||||
pmaddubsw xmm3, xmm6
|
||||
punpcklbw xmm1, xmm0 ;A F
|
||||
pmaddubsw xmm2, xmm7
|
||||
pmaddubsw xmm1, xmm5
|
||||
add rsi, rdx
|
||||
add rax, rdx
|
||||
;--
|
||||
;--
|
||||
paddsw xmm2, xmm3
|
||||
paddsw xmm2, xmm1
|
||||
paddsw xmm2, xmm4
|
||||
psraw xmm2, 7
|
||||
packuswb xmm2, xmm2
|
||||
|
||||
movq MMWORD PTR [rdi], xmm2
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, DWORD PTR arg(3) ;[out_pitch]
|
||||
%else
|
||||
add rdi, r8
|
||||
%endif
|
||||
dec rcx
|
||||
jnz vp8_filter_block1d8_v6_ssse3_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
vp8_filter_block1d8_v4_ssse3:
|
||||
movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3
|
||||
movdqa xmm5, [rd GLOBAL]
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
|
||||
mov rax, rsi
|
||||
add rax, rdx
|
||||
|
||||
vp8_filter_block1d8_v4_ssse3_loop:
|
||||
movq xmm2, MMWORD PTR [rsi + rdx] ;B
|
||||
movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C
|
||||
movq xmm4, MMWORD PTR [rax + rdx * 2] ;D
|
||||
movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E
|
||||
|
||||
punpcklbw xmm2, xmm4 ;B D
|
||||
punpcklbw xmm3, xmm0 ;C E
|
||||
|
||||
pmaddubsw xmm3, xmm6
|
||||
pmaddubsw xmm2, xmm7
|
||||
add rsi, rdx
|
||||
add rax, rdx
|
||||
;--
|
||||
;--
|
||||
paddsw xmm2, xmm3
|
||||
paddsw xmm2, xmm5
|
||||
psraw xmm2, 7
|
||||
packuswb xmm2, xmm2
|
||||
|
||||
movq MMWORD PTR [rdi], xmm2
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, DWORD PTR arg(3) ;[out_pitch]
|
||||
%else
|
||||
add rdi, r8
|
||||
%endif
|
||||
dec rcx
|
||||
jnz vp8_filter_block1d8_v4_ssse3_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
;void vp8_filter_block1d4_v6_ssse3
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; unsigned int src_pitch,
|
||||
; unsigned char *output_ptr,
|
||||
; unsigned int out_pitch,
|
||||
; unsigned int output_height,
|
||||
; unsigned int vp8_filter_index
|
||||
;)
|
||||
global sym(vp8_filter_block1d4_v6_ssse3)
|
||||
sym(vp8_filter_block1d4_v6_ssse3):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movsxd rdx, DWORD PTR arg(5) ;table index
|
||||
xor rsi, rsi
|
||||
shl rdx, 4 ;
|
||||
|
||||
lea rax, [k0_k5 GLOBAL]
|
||||
add rax, rdx
|
||||
|
||||
movsxd rdx, DWORD PTR arg(1) ;pixels_per_line
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
%if ABI_IS_32BIT=0
|
||||
movsxd r8, DWORD PTR arg(3) ; out_pitch
|
||||
%endif
|
||||
movsxd rcx, DWORD PTR arg(4) ;[output_height]
|
||||
|
||||
cmp esi, DWORD PTR [rax]
|
||||
je vp8_filter_block1d4_v4_ssse3
|
||||
|
||||
movq mm5, MMWORD PTR [rax] ;k0_k5
|
||||
movq mm6, MMWORD PTR [rax+256] ;k2_k4
|
||||
movq mm7, MMWORD PTR [rax+128] ;k1_k3
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
|
||||
mov rax, rsi
|
||||
add rax, rdx
|
||||
|
||||
vp8_filter_block1d4_v6_ssse3_loop:
|
||||
movd mm1, DWORD PTR [rsi] ;A
|
||||
movd mm2, DWORD PTR [rsi + rdx] ;B
|
||||
movd mm3, DWORD PTR [rsi + rdx * 2] ;C
|
||||
movd mm4, DWORD PTR [rax + rdx * 2] ;D
|
||||
movd mm0, DWORD PTR [rsi + rdx * 4] ;E
|
||||
|
||||
punpcklbw mm2, mm4 ;B D
|
||||
punpcklbw mm3, mm0 ;C E
|
||||
|
||||
movd mm0, DWORD PTR [rax + rdx * 4] ;F
|
||||
|
||||
movq mm4, [rd GLOBAL]
|
||||
|
||||
pmaddubsw mm3, mm6
|
||||
punpcklbw mm1, mm0 ;A F
|
||||
pmaddubsw mm2, mm7
|
||||
pmaddubsw mm1, mm5
|
||||
add rsi, rdx
|
||||
add rax, rdx
|
||||
;--
|
||||
;--
|
||||
paddsw mm2, mm3
|
||||
paddsw mm2, mm1
|
||||
paddsw mm2, mm4
|
||||
psraw mm2, 7
|
||||
packuswb mm2, mm2
|
||||
|
||||
movd DWORD PTR [rdi], mm2
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, DWORD PTR arg(3) ;[out_pitch]
|
||||
%else
|
||||
add rdi, r8
|
||||
%endif
|
||||
dec rcx
|
||||
jnz vp8_filter_block1d4_v6_ssse3_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
vp8_filter_block1d4_v4_ssse3:
|
||||
movq mm6, MMWORD PTR [rax+256] ;k2_k4
|
||||
movq mm7, MMWORD PTR [rax+128] ;k1_k3
|
||||
movq mm5, MMWORD PTR [rd GLOBAL]
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
|
||||
mov rax, rsi
|
||||
add rax, rdx
|
||||
|
||||
vp8_filter_block1d4_v4_ssse3_loop:
|
||||
movd mm2, DWORD PTR [rsi + rdx] ;B
|
||||
movd mm3, DWORD PTR [rsi + rdx * 2] ;C
|
||||
movd mm4, DWORD PTR [rax + rdx * 2] ;D
|
||||
movd mm0, DWORD PTR [rsi + rdx * 4] ;E
|
||||
|
||||
punpcklbw mm2, mm4 ;B D
|
||||
punpcklbw mm3, mm0 ;C E
|
||||
|
||||
pmaddubsw mm3, mm6
|
||||
pmaddubsw mm2, mm7
|
||||
add rsi, rdx
|
||||
add rax, rdx
|
||||
;--
|
||||
;--
|
||||
paddsw mm2, mm3
|
||||
paddsw mm2, mm5
|
||||
psraw mm2, 7
|
||||
packuswb mm2, mm2
|
||||
|
||||
movd DWORD PTR [rdi], mm2
|
||||
|
||||
%if ABI_IS_32BIT
|
||||
add rdi, DWORD PTR arg(3) ;[out_pitch]
|
||||
%else
|
||||
add rdi, r8
|
||||
%endif
|
||||
dec rcx
|
||||
jnz vp8_filter_block1d4_v4_ssse3_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
shuf1b:
|
||||
db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
|
||||
shuf2b:
|
||||
db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11
|
||||
shuf3b:
|
||||
db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10
|
||||
|
||||
align 16
|
||||
rd:
|
||||
times 8 dw 0x40
|
||||
|
||||
align 16
|
||||
k0_k5:
|
||||
times 8 db 0, 0 ;placeholder
|
||||
times 8 db 0, 0
|
||||
times 8 db 2, 1
|
||||
times 8 db 0, 0
|
||||
times 8 db 3, 3
|
||||
times 8 db 0, 0
|
||||
times 8 db 1, 2
|
||||
times 8 db 0, 0
|
||||
k1_k3:
|
||||
times 8 db 0, 0 ;placeholder
|
||||
times 8 db -6, 12
|
||||
times 8 db -11, 36
|
||||
times 8 db -9, 50
|
||||
times 8 db -16, 77
|
||||
times 8 db -6, 93
|
||||
times 8 db -8, 108
|
||||
times 8 db -1, 123
|
||||
k2_k4:
|
||||
times 8 db 128, 0 ;placeholder
|
||||
times 8 db 123, -1
|
||||
times 8 db 108, -8
|
||||
times 8 db 93, -6
|
||||
times 8 db 77, -16
|
||||
times 8 db 50, -9
|
||||
times 8 db 36, -11
|
||||
times 8 db 12, -6
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -86,4 +86,37 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_sse2);
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3
|
||||
extern prototype_subpixel_predict(vp8_sixtap_predict16x16_ssse3);
|
||||
extern prototype_subpixel_predict(vp8_sixtap_predict8x8_ssse3);
|
||||
extern prototype_subpixel_predict(vp8_sixtap_predict8x4_ssse3);
|
||||
extern prototype_subpixel_predict(vp8_sixtap_predict4x4_ssse3);
|
||||
//extern prototype_subpixel_predict(vp8_bilinear_predict16x16_sse2);
|
||||
//extern prototype_subpixel_predict(vp8_bilinear_predict8x8_sse2);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_subpix_sixtap16x16
|
||||
#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_ssse3
|
||||
|
||||
#undef vp8_subpix_sixtap8x8
|
||||
#define vp8_subpix_sixtap8x8 vp8_sixtap_predict8x8_ssse3
|
||||
|
||||
#undef vp8_subpix_sixtap8x4
|
||||
#define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_ssse3
|
||||
|
||||
#undef vp8_subpix_sixtap4x4
|
||||
#define vp8_subpix_sixtap4x4 vp8_sixtap_predict4x4_ssse3
|
||||
|
||||
|
||||
//#undef vp8_subpix_bilinear16x16
|
||||
//#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_sse2
|
||||
|
||||
//#undef vp8_subpix_bilinear8x8
|
||||
//#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_sse2
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -68,6 +68,17 @@ extern void vp8_filter_block1d8_v6_sse2
|
|||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block1d16_v6_sse2
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int dst_ptich,
|
||||
unsigned int pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_unpack_block1d16_h6_sse2
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
|
@ -76,31 +87,32 @@ extern void vp8_unpack_block1d16_h6_sse2
|
|||
unsigned int output_height,
|
||||
unsigned int output_width
|
||||
);
|
||||
extern void vp8_unpack_block1d8_h6_sse2
|
||||
extern void vp8_filter_block1d8_h6_only_sse2
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *output_ptr,
|
||||
int dst_ptich,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_pack_block1d8_v6_sse2
|
||||
extern void vp8_filter_block1d16_h6_only_sse2
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *output_ptr,
|
||||
int dst_ptich,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern void vp8_filter_block1d8_v6_only_sse2
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *output_ptr,
|
||||
int dst_ptich,
|
||||
unsigned int pixels_per_line,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width
|
||||
);
|
||||
extern void vp8_pack_block1d16_v6_sse2
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int dst_ptich,
|
||||
unsigned int pixels_per_line,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
extern prototype_subpixel_predict(vp8_bilinear_predict8x8_mmx);
|
||||
|
||||
|
@ -247,23 +259,26 @@ void vp8_sixtap_predict16x16_sse2
|
|||
|
||||
if (xoffset)
|
||||
{
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
|
||||
if (yoffset)
|
||||
{
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter);
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
// First-pass only
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32);
|
||||
}
|
||||
|
||||
if (yoffset)
|
||||
{
|
||||
// Second-pass only
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1d8_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, 16, VFilter);
|
||||
vp8_filter_block1d8_v6_sse2(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_pack_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16, 16);
|
||||
vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32);
|
||||
vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -283,25 +298,26 @@ void vp8_sixtap_predict8x8_sse2
|
|||
|
||||
if (xoffset)
|
||||
{
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
|
||||
if (yoffset)
|
||||
{
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter);
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
// First-pass only
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_unpack_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 13, 16);
|
||||
}
|
||||
|
||||
if (yoffset)
|
||||
{
|
||||
// Second-pass only
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
|
||||
vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_pack_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8, dst_pitch);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -320,24 +336,218 @@ void vp8_sixtap_predict8x4_sse2
|
|||
|
||||
if (xoffset)
|
||||
{
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
|
||||
if (yoffset)
|
||||
{
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter);
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
// First-pass only
|
||||
HFilter = vp8_six_tap_mmx[xoffset];
|
||||
vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_unpack_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 9, 16);
|
||||
}
|
||||
|
||||
if (yoffset)
|
||||
{
|
||||
// Second-pass only
|
||||
VFilter = vp8_six_tap_mmx[yoffset];
|
||||
vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
|
||||
vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3
|
||||
|
||||
extern void vp8_filter_block1d8_h6_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int vp8_filter_index
|
||||
);
|
||||
|
||||
extern void vp8_filter_block1d16_h6_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int vp8_filter_index
|
||||
);
|
||||
|
||||
extern void vp8_filter_block1d16_v6_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int out_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int vp8_filter_index
|
||||
);
|
||||
|
||||
extern void vp8_filter_block1d8_v6_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int out_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int vp8_filter_index
|
||||
);
|
||||
|
||||
extern void vp8_filter_block1d4_h6_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int vp8_filter_index
|
||||
);
|
||||
|
||||
extern void vp8_filter_block1d4_v6_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int out_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int vp8_filter_index
|
||||
);
|
||||
|
||||
void vp8_sixtap_predict16x16_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset);
|
||||
vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
// First-pass only
|
||||
vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_pack_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 4, dst_pitch);
|
||||
// Second-pass only
|
||||
vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x8_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset);
|
||||
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Second-pass only
|
||||
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict8x4_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset);
|
||||
vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
// First-pass only
|
||||
vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Second-pass only
|
||||
vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict4x4_ssse3
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
|
||||
|
||||
if (xoffset)
|
||||
{
|
||||
if (yoffset)
|
||||
{
|
||||
vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset);
|
||||
vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -27,6 +27,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
|||
int mmx_enabled = flags & HAS_MMX;
|
||||
int xmm_enabled = flags & HAS_SSE;
|
||||
int wmt_enabled = flags & HAS_SSE2;
|
||||
int SSSE3Enabled = flags & HAS_SSSE3;
|
||||
|
||||
/* Note:
|
||||
*
|
||||
|
@ -42,7 +43,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
|||
{
|
||||
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_mmx;
|
||||
rtcd->idct.idct16 = vp8_short_idct4x4llm_mmx;
|
||||
rtcd->idct.idct1_scalar = vp8_dc_only_idct_mmx;
|
||||
rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx;
|
||||
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_mmx;
|
||||
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_mmx;
|
||||
|
||||
|
@ -114,5 +115,17 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
|||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3
|
||||
|
||||
if (SSSE3Enabled)
|
||||
{
|
||||
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_ssse3;
|
||||
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_ssse3;
|
||||
rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_ssse3;
|
||||
rtcd->subpix.sixtap4x4 = vp8_sixtap_predict4x4_ssse3;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
|||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
|
||||
DECLARE_ALIGNED(16, const unsigned char, vp8dx_bitreader_norm[256]) =
|
||||
{
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
|
@ -26,86 +26,41 @@ DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
|
|||
};
|
||||
|
||||
|
||||
static void copy_in(BOOL_DECODER *br, unsigned int to_write)
|
||||
{
|
||||
if (to_write > br->user_buffer_sz)
|
||||
to_write = br->user_buffer_sz;
|
||||
|
||||
memcpy(br->write_ptr, br->user_buffer, to_write);
|
||||
br->user_buffer += to_write;
|
||||
br->user_buffer_sz -= to_write;
|
||||
br->write_ptr = br_ptr_advance(br->write_ptr, to_write);
|
||||
}
|
||||
|
||||
int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source,
|
||||
unsigned int source_sz)
|
||||
{
|
||||
br->lowvalue = 0;
|
||||
br->user_buffer_end = source+source_sz;
|
||||
br->user_buffer = source;
|
||||
br->value = 0;
|
||||
br->count = -8;
|
||||
br->range = 255;
|
||||
br->count = 0;
|
||||
br->user_buffer = source;
|
||||
br->user_buffer_sz = source_sz;
|
||||
|
||||
if (source_sz && !source)
|
||||
return 1;
|
||||
|
||||
/* Allocate the ring buffer backing store with alignment equal to the
|
||||
* buffer size*2 so that a single pointer can be used for wrapping rather
|
||||
* than a pointer+offset.
|
||||
*/
|
||||
br->decode_buffer = vpx_memalign(VP8_BOOL_DECODER_SZ * 2,
|
||||
VP8_BOOL_DECODER_SZ);
|
||||
|
||||
if (!br->decode_buffer)
|
||||
return 1;
|
||||
|
||||
/* Populate the buffer */
|
||||
br->read_ptr = br->decode_buffer;
|
||||
br->write_ptr = br->decode_buffer;
|
||||
copy_in(br, VP8_BOOL_DECODER_SZ);
|
||||
vp8dx_bool_decoder_fill_c(br);
|
||||
|
||||
/* Read the first byte */
|
||||
br->value = (*br->read_ptr++) << 8;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br)
|
||||
{
|
||||
int left, right;
|
||||
const unsigned char *bufptr;
|
||||
const unsigned char *bufend;
|
||||
VP8_BD_VALUE value;
|
||||
int count;
|
||||
bufend = br->user_buffer_end;
|
||||
bufptr = br->user_buffer;
|
||||
value = br->value;
|
||||
count = br->count;
|
||||
|
||||
/* Find available room in the buffer */
|
||||
left = 0;
|
||||
right = br->read_ptr - br->write_ptr;
|
||||
VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
|
||||
|
||||
if (right < 0)
|
||||
{
|
||||
/* Read pointer is behind the write pointer. We can write from the
|
||||
* write pointer to the end of the buffer.
|
||||
*/
|
||||
right = VP8_BOOL_DECODER_SZ - (br->write_ptr - br->decode_buffer);
|
||||
left = br->read_ptr - br->decode_buffer;
|
||||
}
|
||||
|
||||
if (right + left < 128)
|
||||
return;
|
||||
|
||||
if (right)
|
||||
copy_in(br, right);
|
||||
|
||||
if (left)
|
||||
{
|
||||
br->write_ptr = br->decode_buffer;
|
||||
copy_in(br, left);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void vp8dx_stop_decode_c(BOOL_DECODER *bc)
|
||||
{
|
||||
vpx_free(bc->decode_buffer);
|
||||
bc->decode_buffer = 0;
|
||||
br->user_buffer = bufptr;
|
||||
br->value = value;
|
||||
br->count = count;
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
@ -120,13 +75,18 @@ void vp8dx_stop_decode_c(BOOL_DECODER *bc)
|
|||
int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
|
||||
{
|
||||
unsigned int bit=0;
|
||||
VP8_BD_VALUE value;
|
||||
unsigned int split;
|
||||
unsigned int bigsplit;
|
||||
register unsigned int range = br->range;
|
||||
register unsigned int value = br->value;
|
||||
VP8_BD_VALUE bigsplit;
|
||||
int count;
|
||||
unsigned int range;
|
||||
|
||||
value = br->value;
|
||||
count = br->count;
|
||||
range = br->range;
|
||||
|
||||
split = 1 + (((range-1) * probability) >> 8);
|
||||
bigsplit = (split<<8);
|
||||
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
|
||||
|
||||
range = split;
|
||||
if(value >= bigsplit)
|
||||
|
@ -144,21 +104,16 @@ int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
|
|||
}*/
|
||||
|
||||
{
|
||||
int count = br->count;
|
||||
register unsigned int shift = vp8dx_bitreader_norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
if(count <= 0)
|
||||
{
|
||||
value |= (*br->read_ptr) << (-count);
|
||||
br->read_ptr = br_ptr_advance(br->read_ptr, 1);
|
||||
count += 8 ;
|
||||
}
|
||||
br->count = count;
|
||||
}
|
||||
br->value = value;
|
||||
br->count = count;
|
||||
br->range = range;
|
||||
if (count < 0)
|
||||
vp8dx_bool_decoder_fill_c(br);
|
||||
return bit;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,61 +1,41 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef DBOOLHUFF_H
|
||||
#define DBOOLHUFF_H
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
/* Size of the bool decoder backing storage
|
||||
*
|
||||
* This size was chosen to be greater than the worst case encoding of a
|
||||
* single macroblock. This was calcluated as follows (python):
|
||||
*
|
||||
* def max_cost(prob):
|
||||
* return max(prob_costs[prob], prob_costs[255-prob]) / 256;
|
||||
*
|
||||
* tree_nodes_cost = 7 * max_cost(255)
|
||||
* extra_bits_cost = sum([max_cost(bit) for bit in extra_bits])
|
||||
* sign_bit_cost = max_cost(128)
|
||||
* total_cost = tree_nodes_cost + extra_bits_cost + sign_bit_cost
|
||||
*
|
||||
* where the prob_costs table was taken from the C vp8_prob_cost table in
|
||||
* boolhuff.c and the extra_bits table was taken from the 11 extrabits for
|
||||
* a category 6 token as defined in vp8d_token_extra_bits2/detokenize.c
|
||||
*
|
||||
* This equation produced a maximum of 79 bits per coefficient. Scaling up
|
||||
* to the macroblock level:
|
||||
*
|
||||
* 79 bits/coeff * 16 coeff/block * 25 blocks/macroblock = 31600 b/mb
|
||||
*
|
||||
* 4096 bytes = 32768 bits > 31600
|
||||
*/
|
||||
#define VP8_BOOL_DECODER_SZ 4096
|
||||
#define VP8_BOOL_DECODER_MASK (VP8_BOOL_DECODER_SZ-1)
|
||||
#define VP8_BOOL_DECODER_PTR_MASK (~(uintptr_t)(VP8_BOOL_DECODER_SZ))
|
||||
typedef size_t VP8_BD_VALUE;
|
||||
|
||||
# define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT)
|
||||
/*This is meant to be a large, positive constant that can still be efficiently
|
||||
loaded as an immediate (on platforms like ARM, for example).
|
||||
Even relatively modest values like 100 would work fine.*/
|
||||
# define VP8_LOTS_OF_BITS (0x40000000)
|
||||
|
||||
|
||||
|
||||
struct vp8_dboolhuff_rtcd_vtable;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned int lowvalue;
|
||||
unsigned int range;
|
||||
unsigned int value;
|
||||
int count;
|
||||
const unsigned char *user_buffer_end;
|
||||
const unsigned char *user_buffer;
|
||||
unsigned int user_buffer_sz;
|
||||
unsigned char *decode_buffer;
|
||||
const unsigned char *read_ptr;
|
||||
unsigned char *write_ptr;
|
||||
VP8_BD_VALUE value;
|
||||
int count;
|
||||
unsigned int range;
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
struct vp8_dboolhuff_rtcd_vtable *rtcd;
|
||||
#endif
|
||||
|
@ -63,7 +43,6 @@ typedef struct
|
|||
|
||||
#define prototype_dbool_start(sym) int sym(BOOL_DECODER *br, \
|
||||
const unsigned char *source, unsigned int source_sz)
|
||||
#define prototype_dbool_stop(sym) void sym(BOOL_DECODER *bc)
|
||||
#define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br)
|
||||
#define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability)
|
||||
#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits);
|
||||
|
@ -76,10 +55,6 @@ typedef struct
|
|||
#define vp8_dbool_start vp8dx_start_decode_c
|
||||
#endif
|
||||
|
||||
#ifndef vp8_dbool_stop
|
||||
#define vp8_dbool_stop vp8dx_stop_decode_c
|
||||
#endif
|
||||
|
||||
#ifndef vp8_dbool_fill
|
||||
#define vp8_dbool_fill vp8dx_bool_decoder_fill_c
|
||||
#endif
|
||||
|
@ -93,20 +68,17 @@ typedef struct
|
|||
#endif
|
||||
|
||||
extern prototype_dbool_start(vp8_dbool_start);
|
||||
extern prototype_dbool_stop(vp8_dbool_stop);
|
||||
extern prototype_dbool_fill(vp8_dbool_fill);
|
||||
extern prototype_dbool_debool(vp8_dbool_debool);
|
||||
extern prototype_dbool_devalue(vp8_dbool_devalue);
|
||||
|
||||
typedef prototype_dbool_start((*vp8_dbool_start_fn_t));
|
||||
typedef prototype_dbool_stop((*vp8_dbool_stop_fn_t));
|
||||
typedef prototype_dbool_fill((*vp8_dbool_fill_fn_t));
|
||||
typedef prototype_dbool_debool((*vp8_dbool_debool_fn_t));
|
||||
typedef prototype_dbool_devalue((*vp8_dbool_devalue_fn_t));
|
||||
|
||||
typedef struct vp8_dboolhuff_rtcd_vtable {
|
||||
vp8_dbool_start_fn_t start;
|
||||
vp8_dbool_stop_fn_t stop;
|
||||
vp8_dbool_fill_fn_t fill;
|
||||
vp8_dbool_debool_fn_t debool;
|
||||
vp8_dbool_devalue_fn_t devalue;
|
||||
|
@ -123,18 +95,7 @@ typedef struct vp8_dboolhuff_rtcd_vtable {
|
|||
#define IF_RTCD(x) NULL
|
||||
//#endif
|
||||
|
||||
static unsigned char *br_ptr_advance(const unsigned char *_ptr,
|
||||
unsigned int n)
|
||||
{
|
||||
uintptr_t ptr = (uintptr_t)_ptr;
|
||||
|
||||
ptr += n;
|
||||
ptr &= VP8_BOOL_DECODER_PTR_MASK;
|
||||
|
||||
return (void *)ptr;
|
||||
}
|
||||
|
||||
DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
|
||||
DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
|
||||
|
||||
/* wrapper functions to hide RTCD. static means inline means hopefully no
|
||||
* penalty
|
||||
|
@ -147,12 +108,34 @@ static int vp8dx_start_decode(BOOL_DECODER *br,
|
|||
#endif
|
||||
return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz);
|
||||
}
|
||||
static void vp8dx_stop_decode(BOOL_DECODER *br) {
|
||||
DBOOLHUFF_INVOKE(br->rtcd, stop)(br);
|
||||
}
|
||||
static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
|
||||
DBOOLHUFF_INVOKE(br->rtcd, fill)(br);
|
||||
}
|
||||
|
||||
/*The refill loop is used in several places, so define it in a macro to make
|
||||
sure they're all consistent.
|
||||
An inline function would be cleaner, but has a significant penalty, because
|
||||
multiple BOOL_DECODER fields must be modified, and the compiler is not smart
|
||||
enough to eliminate the stores to those fields and the subsequent reloads
|
||||
from them when inlining the function.*/
|
||||
#define VP8DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \
|
||||
do \
|
||||
{ \
|
||||
int shift; \
|
||||
for(shift = VP8_BD_VALUE_SIZE - 8 - ((_count) + 8); shift >= 0; ) \
|
||||
{ \
|
||||
if((_bufptr) >= (_bufend)) { \
|
||||
(_count) = VP8_LOTS_OF_BITS; \
|
||||
break; \
|
||||
} \
|
||||
(_count) += 8; \
|
||||
(_value) |= (VP8_BD_VALUE)*(_bufptr)++ << shift; \
|
||||
shift -= 8; \
|
||||
} \
|
||||
} \
|
||||
while(0)
|
||||
|
||||
|
||||
static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
|
||||
/*
|
||||
* Until optimized versions of this function are available, we
|
||||
|
@ -161,13 +144,18 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
|
|||
*return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability);
|
||||
*/
|
||||
unsigned int bit = 0;
|
||||
VP8_BD_VALUE value;
|
||||
unsigned int split;
|
||||
unsigned int bigsplit;
|
||||
register unsigned int range = br->range;
|
||||
register unsigned int value = br->value;
|
||||
VP8_BD_VALUE bigsplit;
|
||||
int count;
|
||||
unsigned int range;
|
||||
|
||||
value = br->value;
|
||||
count = br->count;
|
||||
range = br->range;
|
||||
|
||||
split = 1 + (((range - 1) * probability) >> 8);
|
||||
bigsplit = (split << 8);
|
||||
bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
|
||||
|
||||
range = split;
|
||||
|
||||
|
@ -186,23 +174,16 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
|
|||
}*/
|
||||
|
||||
{
|
||||
int count = br->count;
|
||||
register unsigned int shift = vp8dx_bitreader_norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
|
||||
if (count <= 0)
|
||||
{
|
||||
value |= (*br->read_ptr) << (-count);
|
||||
br->read_ptr = br_ptr_advance(br->read_ptr, 1);
|
||||
count += 8 ;
|
||||
}
|
||||
|
||||
br->count = count;
|
||||
}
|
||||
br->value = value;
|
||||
br->count = count;
|
||||
br->range = range;
|
||||
if(count < 0)
|
||||
vp8dx_bool_decoder_fill(br);
|
||||
return bit;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -171,8 +171,7 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
|
|||
VP8_COMMON *const pc = &pbi->common;
|
||||
MACROBLOCKD *xd = &pbi->mb;
|
||||
|
||||
vp8dx_bool_decoder_fill(bc);
|
||||
|
||||
mbmi->need_to_clamp_mvs = 0;
|
||||
// Distance of Mb to the various image edges.
|
||||
// These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
|
||||
xd->mb_to_left_edge = -((mb_col * 16) << 3);
|
||||
|
@ -270,17 +269,16 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
|
|||
break;
|
||||
}
|
||||
|
||||
/* Clip the MV for this partition so that it does
|
||||
not extend to far out of image. */
|
||||
if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN + 7)
|
||||
mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN + 7;
|
||||
|
||||
if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN + 7)
|
||||
mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN + 7;
|
||||
if (mv->col < xd->mb_to_left_edge
|
||||
- LEFT_TOP_MARGIN
|
||||
|| mv->col > xd->mb_to_right_edge
|
||||
+ RIGHT_BOTTOM_MARGIN
|
||||
|| mv->row < xd->mb_to_top_edge
|
||||
- LEFT_TOP_MARGIN
|
||||
|| mv->row > xd->mb_to_bottom_edge
|
||||
+ RIGHT_BOTTOM_MARGIN
|
||||
)
|
||||
mbmi->need_to_clamp_mvs = 1;
|
||||
|
||||
/* Fill (uniform) modes, mvs of jth subset.
|
||||
Must do it here because ensuing subsets can
|
||||
|
@ -338,27 +336,18 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
|
|||
read_mv(bc, mv, (const MV_CONTEXT *) mvc);
|
||||
mv->row += best_mv.row;
|
||||
mv->col += best_mv.col;
|
||||
/* Encoder should not produce invalid motion vectors, but since
|
||||
* arbitrary length MVs can be parsed from the bitstream, we
|
||||
* need to clamp them here in case we're reading bad data to
|
||||
* avoid a crash.
|
||||
|
||||
/* Don't need to check this on NEARMV and NEARESTMV modes
|
||||
* since those modes clamp the MV. The NEWMV mode does not,
|
||||
* so signal to the prediction stage whether special
|
||||
* handling may be required.
|
||||
*/
|
||||
#if CONFIG_DEBUG
|
||||
assert(mv->col >= (xd->mb_to_left_edge - LEFT_TOP_MARGIN));
|
||||
assert(mv->col <= (xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN));
|
||||
assert(mv->row >= (xd->mb_to_top_edge - LEFT_TOP_MARGIN));
|
||||
assert(mv->row <= (xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN));
|
||||
#endif
|
||||
|
||||
if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
|
||||
|
||||
if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
|
||||
if (mv->col < xd->mb_to_left_edge - LEFT_TOP_MARGIN
|
||||
|| mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN
|
||||
|| mv->row < xd->mb_to_top_edge - LEFT_TOP_MARGIN
|
||||
|| mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN
|
||||
)
|
||||
mbmi->need_to_clamp_mvs = 1;
|
||||
|
||||
propagate_mv: /* same MV throughout */
|
||||
{
|
||||
|
@ -394,7 +383,6 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
|
|||
assert(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
|||
|
||||
extern void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
|
||||
MACROBLOCKD *xd);
|
||||
extern void vp8_mt_loop_filter_frame(VP8D_COMP *pbi);
|
||||
extern void vp8_stop_lfthread(VP8D_COMP *pbi);
|
||||
extern void vp8_start_lfthread(VP8D_COMP *pbi);
|
||||
extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче