Merge "Reclasify optimized ssim calculations as SSE2."
This commit is contained in:
Коммит
f8e3d23b99
|
@ -94,16 +94,15 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
|
||||||
#if !(CONFIG_REALTIME_ONLY)
|
#if !(CONFIG_REALTIME_ONLY)
|
||||||
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
|
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c;
|
||||||
#endif
|
#endif
|
||||||
|
#if CONFIG_INTERNAL_STATS
|
||||||
|
cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_c;
|
||||||
|
cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_c;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Pure C:
|
// Pure C:
|
||||||
vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
|
vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
|
||||||
|
|
||||||
#if CONFIG_INTERNAL_STATS
|
|
||||||
cpi->rtcd.variance.ssimpf_8x8 = ssim_parms_8x8_c;
|
|
||||||
cpi->rtcd.variance.ssimpf = ssim_parms_c;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if ARCH_X86 || ARCH_X86_64
|
#if ARCH_X86 || ARCH_X86_64
|
||||||
vp8_arch_x86_encoder_init(cpi);
|
vp8_arch_x86_encoder_init(cpi);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -9,18 +9,9 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "vpx_scale/yv12config.h"
|
|
||||||
#include "math.h"
|
|
||||||
#include "onyx_int.h"
|
#include "onyx_int.h"
|
||||||
|
|
||||||
#if CONFIG_RUNTIME_CPU_DETECT
|
void vp8_ssim_parms_16x16_c
|
||||||
#define IF_RTCD(x) (x)
|
|
||||||
#else
|
|
||||||
#define IF_RTCD(x) NULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
void ssim_parms_c
|
|
||||||
(
|
(
|
||||||
unsigned char *s,
|
unsigned char *s,
|
||||||
int sp,
|
int sp,
|
||||||
|
@ -46,7 +37,7 @@ void ssim_parms_c
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void ssim_parms_8x8_c
|
void vp8_ssim_parms_8x8_c
|
||||||
(
|
(
|
||||||
unsigned char *s,
|
unsigned char *s,
|
||||||
int sp,
|
int sp,
|
||||||
|
@ -107,14 +98,14 @@ static double ssim_16x16(unsigned char *s,int sp, unsigned char *r,int rp,
|
||||||
const vp8_variance_rtcd_vtable_t *rtcd)
|
const vp8_variance_rtcd_vtable_t *rtcd)
|
||||||
{
|
{
|
||||||
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
|
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
|
||||||
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
SSIMPF_INVOKE(rtcd,16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
||||||
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);
|
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256);
|
||||||
}
|
}
|
||||||
static double ssim_8x8(unsigned char *s,int sp, unsigned char *r,int rp,
|
static double ssim_8x8(unsigned char *s,int sp, unsigned char *r,int rp,
|
||||||
const vp8_variance_rtcd_vtable_t *rtcd)
|
const vp8_variance_rtcd_vtable_t *rtcd)
|
||||||
{
|
{
|
||||||
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
|
unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0;
|
||||||
rtcd->ssimpf_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
SSIMPF_INVOKE(rtcd,8x8)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
||||||
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
|
return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -134,7 +125,7 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp,
|
||||||
c1 = cc1*16;
|
c1 = cc1*16;
|
||||||
c2 = cc2*16;
|
c2 = cc2*16;
|
||||||
|
|
||||||
rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
SSIMPF_INVOKE(rtcd,16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr);
|
||||||
ssim_n1 = (2*sum_s*sum_r+ c1);
|
ssim_n1 = (2*sum_s*sum_r+ c1);
|
||||||
|
|
||||||
ssim_n2 =((int64_t) 2*256*sum_sxr-(int64_t) 2*sum_s*sum_r+c2);
|
ssim_n2 =((int64_t) 2*256*sum_sxr-(int64_t) 2*sum_s*sum_r+c2);
|
||||||
|
|
|
@ -320,16 +320,16 @@ extern prototype_variance(vp8_variance_mse16x16);
|
||||||
#endif
|
#endif
|
||||||
extern prototype_get16x16prederror(vp8_variance_get4x4sse_cs);
|
extern prototype_get16x16prederror(vp8_variance_get4x4sse_cs);
|
||||||
|
|
||||||
#ifndef vp8_ssimpf
|
|
||||||
#define vp8_ssimpf ssim_parms_c
|
|
||||||
#endif
|
|
||||||
extern prototype_ssimpf(vp8_ssimpf)
|
|
||||||
|
|
||||||
#ifndef vp8_ssimpf_8x8
|
#ifndef vp8_ssimpf_8x8
|
||||||
#define vp8_ssimpf_8x8 ssim_parms_8x8_c
|
#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_c
|
||||||
#endif
|
#endif
|
||||||
extern prototype_ssimpf(vp8_ssimpf_8x8)
|
extern prototype_ssimpf(vp8_ssimpf_8x8)
|
||||||
|
|
||||||
|
#ifndef vp8_ssimpf_16x16
|
||||||
|
#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_c
|
||||||
|
#endif
|
||||||
|
extern prototype_ssimpf(vp8_ssimpf_16x16)
|
||||||
|
|
||||||
typedef prototype_sad(*vp8_sad_fn_t);
|
typedef prototype_sad(*vp8_sad_fn_t);
|
||||||
typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
|
typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
|
||||||
typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t);
|
typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t);
|
||||||
|
@ -394,7 +394,7 @@ typedef struct
|
||||||
|
|
||||||
#if CONFIG_INTERNAL_STATS
|
#if CONFIG_INTERNAL_STATS
|
||||||
vp8_ssimpf_fn_t ssimpf_8x8;
|
vp8_ssimpf_fn_t ssimpf_8x8;
|
||||||
vp8_ssimpf_fn_t ssimpf;
|
vp8_ssimpf_fn_t ssimpf_16x16;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
} vp8_variance_rtcd_vtable_t;
|
} vp8_variance_rtcd_vtable_t;
|
||||||
|
@ -417,8 +417,10 @@ typedef struct
|
||||||
|
|
||||||
#if CONFIG_RUNTIME_CPU_DETECT
|
#if CONFIG_RUNTIME_CPU_DETECT
|
||||||
#define VARIANCE_INVOKE(ctx,fn) (ctx)->fn
|
#define VARIANCE_INVOKE(ctx,fn) (ctx)->fn
|
||||||
|
#define SSIMPF_INVOKE(ctx,fn) (ctx)->fn
|
||||||
#else
|
#else
|
||||||
#define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn
|
#define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn
|
||||||
|
#define SSIMPF_INVOKE(ctx,fn) vp8_ssimpf_##fn
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -44,7 +44,7 @@
|
||||||
paddd %1, xmm1
|
paddd %1, xmm1
|
||||||
SUM_ACROSS_Q %1
|
SUM_ACROSS_Q %1
|
||||||
%endmacro
|
%endmacro
|
||||||
;void ssim_parms_sse3(
|
;void ssim_parms_sse2(
|
||||||
; unsigned char *s,
|
; unsigned char *s,
|
||||||
; int sp,
|
; int sp,
|
||||||
; unsigned char *r,
|
; unsigned char *r,
|
||||||
|
@ -61,8 +61,8 @@
|
||||||
; or pavgb At this point this is just meant to be first pass for calculating
|
; or pavgb At this point this is just meant to be first pass for calculating
|
||||||
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
|
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
|
||||||
; in mode selection code.
|
; in mode selection code.
|
||||||
global sym(vp8_ssim_parms_16x16_sse3)
|
global sym(vp8_ssim_parms_16x16_sse2)
|
||||||
sym(vp8_ssim_parms_16x16_sse3):
|
sym(vp8_ssim_parms_16x16_sse2):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 9
|
SHADOW_ARGS_TO_STACK 9
|
||||||
|
@ -134,7 +134,7 @@ NextRow:
|
||||||
pop rbp
|
pop rbp
|
||||||
ret
|
ret
|
||||||
|
|
||||||
;void ssim_parms_sse3(
|
;void ssim_parms_sse2(
|
||||||
; unsigned char *s,
|
; unsigned char *s,
|
||||||
; int sp,
|
; int sp,
|
||||||
; unsigned char *r,
|
; unsigned char *r,
|
||||||
|
@ -151,8 +151,8 @@ NextRow:
|
||||||
; or pavgb At this point this is just meant to be first pass for calculating
|
; or pavgb At this point this is just meant to be first pass for calculating
|
||||||
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
|
; all the parms needed for 16x16 ssim so we can play with dssim as distortion
|
||||||
; in mode selection code.
|
; in mode selection code.
|
||||||
global sym(vp8_ssim_parms_8x8_sse3)
|
global sym(vp8_ssim_parms_8x8_sse2)
|
||||||
sym(vp8_ssim_parms_8x8_sse3):
|
sym(vp8_ssim_parms_8x8_sse2):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 9
|
SHADOW_ARGS_TO_STACK 9
|
||||||
|
|
|
@ -140,6 +140,8 @@ extern prototype_getmbss(vp8_get_mb_ss_sse2);
|
||||||
extern prototype_variance(vp8_mse16x16_wmt);
|
extern prototype_variance(vp8_mse16x16_wmt);
|
||||||
extern prototype_variance2(vp8_get8x8var_sse2);
|
extern prototype_variance2(vp8_get8x8var_sse2);
|
||||||
extern prototype_variance2(vp8_get16x16var_sse2);
|
extern prototype_variance2(vp8_get16x16var_sse2);
|
||||||
|
extern prototype_ssimpf(vp8_ssim_parms_8x8_sse2)
|
||||||
|
extern prototype_ssimpf(vp8_ssim_parms_16x16_sse2)
|
||||||
|
|
||||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||||
#undef vp8_variance_sad4x4
|
#undef vp8_variance_sad4x4
|
||||||
|
@ -208,6 +210,14 @@ extern prototype_variance2(vp8_get16x16var_sse2);
|
||||||
#undef vp8_variance_mse16x16
|
#undef vp8_variance_mse16x16
|
||||||
#define vp8_variance_mse16x16 vp8_mse16x16_wmt
|
#define vp8_variance_mse16x16 vp8_mse16x16_wmt
|
||||||
|
|
||||||
|
#if ARCH_X86_64
|
||||||
|
#undef vp8_ssimpf_8x8
|
||||||
|
#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_sse2
|
||||||
|
|
||||||
|
#undef vp8_ssimpf_16x16
|
||||||
|
#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_sse2
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -111,29 +111,6 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_SSSE3
|
|
||||||
#if CONFIG_INTERNAL_STATS
|
|
||||||
#if ARCH_X86_64
|
|
||||||
typedef void ssimpf
|
|
||||||
(
|
|
||||||
unsigned char *s,
|
|
||||||
int sp,
|
|
||||||
unsigned char *r,
|
|
||||||
int rp,
|
|
||||||
unsigned long *sum_s,
|
|
||||||
unsigned long *sum_r,
|
|
||||||
unsigned long *sum_sq_s,
|
|
||||||
unsigned long *sum_sq_r,
|
|
||||||
unsigned long *sum_sxr
|
|
||||||
);
|
|
||||||
|
|
||||||
extern ssimpf vp8_ssim_parms_16x16_sse3;
|
|
||||||
extern ssimpf vp8_ssim_parms_8x8_sse3;
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||||
{
|
{
|
||||||
#if CONFIG_RUNTIME_CPU_DETECT
|
#if CONFIG_RUNTIME_CPU_DETECT
|
||||||
|
@ -245,6 +222,13 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||||
|
|
||||||
#if !(CONFIG_REALTIME_ONLY)
|
#if !(CONFIG_REALTIME_ONLY)
|
||||||
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2;
|
cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if CONFIG_INTERNAL_STATS
|
||||||
|
#if ARCH_X86_64
|
||||||
|
cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse2;
|
||||||
|
cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_sse2;
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -280,14 +264,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||||
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3;
|
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3;
|
||||||
|
|
||||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
|
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
|
||||||
|
|
||||||
#if CONFIG_INTERNAL_STATS
|
|
||||||
#if ARCH_X86_64
|
|
||||||
cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse3;
|
|
||||||
cpi->rtcd.variance.ssimpf = vp8_ssim_parms_16x16_sse3;
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче