Merge branch 'master' into nextgenv2
This commit is contained in:
Коммит
f73feedb9e
|
@ -394,7 +394,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
|
|||
sf->intra_y_mode_bsize_mask[i] = INTRA_DC_TM_H_V;
|
||||
} else {
|
||||
for (i = 0; i < BLOCK_SIZES; ++i)
|
||||
if (i >= BLOCK_16X16)
|
||||
if (i > BLOCK_16X16)
|
||||
sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
|
||||
else
|
||||
// Use H and V intra mode for block sizes <= 16X16.
|
||||
|
|
|
@ -79,20 +79,13 @@ SECTION .text
|
|||
|
||||
%macro INC_SRC_BY_SRC_STRIDE 0
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
lea srcq, [srcq + src_stridemp*2]
|
||||
add srcq, src_stridemp
|
||||
add srcq, src_stridemp
|
||||
%else
|
||||
lea srcq, [srcq + src_strideq*2]
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro INC_SRC_BY_SRC_2STRIDE 0
|
||||
%if ARCH_X86=1 && CONFIG_PIC=1
|
||||
lea srcq, [srcq + src_stridemp*4]
|
||||
%else
|
||||
lea srcq, [srcq + src_strideq*4]
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%macro SUBPEL_VARIANCE 1-2 0 ; W
|
||||
%define bilin_filter_m bilin_filter_m_sse2
|
||||
%define filter_idx_shift 5
|
||||
|
@ -984,8 +977,9 @@ SECTION .text
|
|||
.x_other_y_other_loop:
|
||||
movu m2, [srcq]
|
||||
movu m4, [srcq+2]
|
||||
movu m3, [srcq+src_strideq*2]
|
||||
movu m5, [srcq+src_strideq*2+2]
|
||||
INC_SRC_BY_SRC_STRIDE
|
||||
movu m3, [srcq]
|
||||
movu m5, [srcq+2]
|
||||
pmullw m2, filter_x_a
|
||||
pmullw m4, filter_x_b
|
||||
paddw m2, filter_rnd
|
||||
|
@ -1018,7 +1012,7 @@ SECTION .text
|
|||
SUM_SSE m0, m2, m4, m3, m6, m7
|
||||
mova m0, m5
|
||||
|
||||
INC_SRC_BY_SRC_2STRIDE
|
||||
INC_SRC_BY_SRC_STRIDE
|
||||
lea dstq, [dstq + dst_strideq * 4]
|
||||
%if %2 == 1 ; avg
|
||||
add secq, sec_str
|
||||
|
|
|
@ -243,13 +243,18 @@ unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
|
|||
}
|
||||
|
||||
#if CONFIG_USE_X86INC
|
||||
// The 2 unused parameters are place holders for PIC enabled build.
|
||||
// These definitions are for functions defined in
|
||||
// highbd_subpel_variance_impl_sse2.asm
|
||||
#define DECL(w, opt) \
|
||||
int vpx_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \
|
||||
ptrdiff_t src_stride, \
|
||||
int x_offset, int y_offset, \
|
||||
const uint16_t *dst, \
|
||||
ptrdiff_t dst_stride, \
|
||||
int height, unsigned int *sse);
|
||||
int height, \
|
||||
unsigned int *sse, \
|
||||
void *unused0, void *unused);
|
||||
#define DECLS(opt1, opt2) \
|
||||
DECL(8, opt1); \
|
||||
DECL(16, opt1)
|
||||
|
@ -274,7 +279,7 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
|
|||
int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
|
||||
x_offset, y_offset, \
|
||||
dst, dst_stride, h, \
|
||||
&sse); \
|
||||
&sse, NULL, NULL); \
|
||||
if (w > wf) { \
|
||||
unsigned int sse2; \
|
||||
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
|
||||
|
@ -282,19 +287,20 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
|
|||
x_offset, y_offset, \
|
||||
dst + 16, \
|
||||
dst_stride, \
|
||||
h, &sse2); \
|
||||
h, &sse2, \
|
||||
NULL, NULL); \
|
||||
se += se2; \
|
||||
sse += sse2; \
|
||||
if (w > wf * 2) { \
|
||||
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
|
||||
x_offset, y_offset, \
|
||||
dst + 32, dst_stride, \
|
||||
h, &sse2); \
|
||||
h, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
sse += sse2; \
|
||||
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
|
||||
src + 48, src_stride, x_offset, y_offset, \
|
||||
dst + 48, dst_stride, h, &sse2); \
|
||||
dst + 48, dst_stride, h, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
sse += sse2; \
|
||||
} \
|
||||
|
@ -312,7 +318,7 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
|
|||
int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
|
||||
x_offset, y_offset, \
|
||||
dst, dst_stride, \
|
||||
h, &sse); \
|
||||
h, &sse, NULL, NULL); \
|
||||
if (w > wf) { \
|
||||
uint32_t sse2; \
|
||||
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
|
||||
|
@ -320,20 +326,21 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
|
|||
x_offset, y_offset, \
|
||||
dst + 16, \
|
||||
dst_stride, \
|
||||
h, &sse2); \
|
||||
h, &sse2, \
|
||||
NULL, NULL); \
|
||||
se += se2; \
|
||||
sse += sse2; \
|
||||
if (w > wf * 2) { \
|
||||
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
|
||||
x_offset, y_offset, \
|
||||
dst + 32, dst_stride, \
|
||||
h, &sse2); \
|
||||
h, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
sse += sse2; \
|
||||
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \
|
||||
x_offset, y_offset, \
|
||||
dst + 48, dst_stride, \
|
||||
h, &sse2); \
|
||||
h, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
sse += sse2; \
|
||||
} \
|
||||
|
@ -359,27 +366,27 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
|
|||
int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
|
||||
src + (start_row * src_stride), src_stride, \
|
||||
x_offset, y_offset, dst + (start_row * dst_stride), \
|
||||
dst_stride, height, &sse2); \
|
||||
dst_stride, height, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
long_sse += sse2; \
|
||||
if (w > wf) { \
|
||||
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
|
||||
src + 16 + (start_row * src_stride), src_stride, \
|
||||
x_offset, y_offset, dst + 16 + (start_row * dst_stride), \
|
||||
dst_stride, height, &sse2); \
|
||||
dst_stride, height, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
long_sse += sse2; \
|
||||
if (w > wf * 2) { \
|
||||
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
|
||||
src + 32 + (start_row * src_stride), src_stride, \
|
||||
x_offset, y_offset, dst + 32 + (start_row * dst_stride), \
|
||||
dst_stride, height, &sse2); \
|
||||
dst_stride, height, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
long_sse += sse2; \
|
||||
se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
|
||||
src + 48 + (start_row * src_stride), src_stride, \
|
||||
x_offset, y_offset, dst + 48 + (start_row * dst_stride), \
|
||||
dst_stride, height, &sse2); \
|
||||
dst_stride, height, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
long_sse += sse2; \
|
||||
}\
|
||||
|
@ -410,6 +417,7 @@ FNS(sse2, sse);
|
|||
#undef FNS
|
||||
#undef FN
|
||||
|
||||
// The 2 unused parameters are place holders for PIC enabled build.
|
||||
#define DECL(w, opt) \
|
||||
int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
|
||||
ptrdiff_t src_stride, \
|
||||
|
@ -419,7 +427,8 @@ int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
|
|||
const uint16_t *sec, \
|
||||
ptrdiff_t sec_stride, \
|
||||
int height, \
|
||||
unsigned int *sse);
|
||||
unsigned int *sse, \
|
||||
void *unused0, void *unused);
|
||||
#define DECLS(opt1) \
|
||||
DECL(16, opt1) \
|
||||
DECL(8, opt1)
|
||||
|
@ -439,23 +448,23 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance##w##x##h##_##opt( \
|
|||
uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
|
||||
int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
||||
src, src_stride, x_offset, \
|
||||
y_offset, dst, dst_stride, sec, w, h, &sse); \
|
||||
y_offset, dst, dst_stride, sec, w, h, &sse, NULL, NULL); \
|
||||
if (w > wf) { \
|
||||
uint32_t sse2; \
|
||||
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
||||
src + 16, src_stride, x_offset, y_offset, \
|
||||
dst + 16, dst_stride, sec + 16, w, h, &sse2); \
|
||||
dst + 16, dst_stride, sec + 16, w, h, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
sse += sse2; \
|
||||
if (w > wf * 2) { \
|
||||
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
||||
src + 32, src_stride, x_offset, y_offset, \
|
||||
dst + 32, dst_stride, sec + 32, w, h, &sse2); \
|
||||
dst + 32, dst_stride, sec + 32, w, h, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
sse += sse2; \
|
||||
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
||||
src + 48, src_stride, x_offset, y_offset, \
|
||||
dst + 48, dst_stride, sec + 48, w, h, &sse2); \
|
||||
dst + 48, dst_stride, sec + 48, w, h, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
sse += sse2; \
|
||||
} \
|
||||
|
@ -475,14 +484,15 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
|
|||
int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
||||
src, src_stride, x_offset, \
|
||||
y_offset, dst, dst_stride, \
|
||||
sec, w, h, &sse); \
|
||||
sec, w, h, &sse, NULL, NULL); \
|
||||
if (w > wf) { \
|
||||
uint32_t sse2; \
|
||||
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
||||
src + 16, src_stride, \
|
||||
x_offset, y_offset, \
|
||||
dst + 16, dst_stride, \
|
||||
sec + 16, w, h, &sse2); \
|
||||
sec + 16, w, h, &sse2, \
|
||||
NULL, NULL); \
|
||||
se += se2; \
|
||||
sse += sse2; \
|
||||
if (w > wf * 2) { \
|
||||
|
@ -490,14 +500,16 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
|
|||
src + 32, src_stride, \
|
||||
x_offset, y_offset, \
|
||||
dst + 32, dst_stride, \
|
||||
sec + 32, w, h, &sse2); \
|
||||
sec + 32, w, h, &sse2, \
|
||||
NULL, NULL); \
|
||||
se += se2; \
|
||||
sse += sse2; \
|
||||
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
||||
src + 48, src_stride, \
|
||||
x_offset, y_offset, \
|
||||
dst + 48, dst_stride, \
|
||||
sec + 48, w, h, &sse2); \
|
||||
sec + 48, w, h, &sse2, \
|
||||
NULL, NULL); \
|
||||
se += se2; \
|
||||
sse += sse2; \
|
||||
} \
|
||||
|
@ -525,7 +537,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
|
|||
int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
||||
src + (start_row * src_stride), src_stride, x_offset, \
|
||||
y_offset, dst + (start_row * dst_stride), dst_stride, \
|
||||
sec + (start_row * w), w, height, &sse2); \
|
||||
sec + (start_row * w), w, height, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
long_sse += sse2; \
|
||||
if (w > wf) { \
|
||||
|
@ -533,7 +545,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
|
|||
src + 16 + (start_row * src_stride), src_stride, \
|
||||
x_offset, y_offset, \
|
||||
dst + 16 + (start_row * dst_stride), dst_stride, \
|
||||
sec + 16 + (start_row * w), w, height, &sse2); \
|
||||
sec + 16 + (start_row * w), w, height, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
long_sse += sse2; \
|
||||
if (w > wf * 2) { \
|
||||
|
@ -541,14 +553,14 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
|
|||
src + 32 + (start_row * src_stride), src_stride, \
|
||||
x_offset, y_offset, \
|
||||
dst + 32 + (start_row * dst_stride), dst_stride, \
|
||||
sec + 32 + (start_row * w), w, height, &sse2); \
|
||||
sec + 32 + (start_row * w), w, height, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
long_sse += sse2; \
|
||||
se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
|
||||
src + 48 + (start_row * src_stride), src_stride, \
|
||||
x_offset, y_offset, \
|
||||
dst + 48 + (start_row * dst_stride), dst_stride, \
|
||||
sec + 48 + (start_row * w), w, height, &sse2); \
|
||||
sec + 48 + (start_row * w), w, height, &sse2, NULL, NULL); \
|
||||
se += se2; \
|
||||
long_sse += sse2; \
|
||||
} \
|
||||
|
|
|
@ -17,7 +17,7 @@ SECTION .text
|
|||
%if %3 == 5
|
||||
cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, n_rows
|
||||
%else ; %3 == 7
|
||||
cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, \
|
||||
cglobal sad%1x%2, 4, %3, 6, src, src_stride, ref, ref_stride, \
|
||||
src_stride3, ref_stride3, n_rows
|
||||
%endif ; %3 == 5/7
|
||||
%else ; avg
|
||||
|
@ -25,7 +25,7 @@ cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, \
|
|||
cglobal sad%1x%2_avg, 5, 1 + %3, 5, src, src_stride, ref, ref_stride, \
|
||||
second_pred, n_rows
|
||||
%else ; %3 == 7
|
||||
cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 5, src, src_stride, \
|
||||
cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 6, src, src_stride, \
|
||||
ref, ref_stride, \
|
||||
second_pred, \
|
||||
src_stride3, ref_stride3
|
||||
|
@ -244,9 +244,9 @@ SAD8XN 4, 1 ; sad8x4_avg_sse2
|
|||
movd m2, [srcq]
|
||||
movd m5, [srcq+src_strideq]
|
||||
movd m4, [srcq+src_strideq*2]
|
||||
movd m6, [srcq+src_stride3q]
|
||||
movd m3, [srcq+src_stride3q]
|
||||
punpckldq m2, m5
|
||||
punpckldq m4, m6
|
||||
punpckldq m4, m3
|
||||
movlhps m2, m4
|
||||
psadbw m1, m2
|
||||
lea refq, [refq+ref_strideq*4]
|
||||
|
|
Загрузка…
Ссылка в новой задаче