From 734b1b2041a209ace033481c0fc919164177a3fc Mon Sep 17 00:00:00 2001 From: Fritz Koenig Date: Mon, 22 Aug 2011 11:31:12 -0700 Subject: [PATCH] Revert "Reclasify optimized ssim calculations as SSE2." This reverts commit 01376858cd184d820ff4c2d8390361a8679c0e87 --- vp8/encoder/generic/csystemdependent.c | 9 +++--- vp8/encoder/ssim.c | 19 +++++++++---- vp8/encoder/variance.h | 16 +++++------ vp8/encoder/x86/ssim_opt.asm | 12 ++++---- vp8/encoder/x86/variance_x86.h | 10 ------- vp8/encoder/x86/x86_csystemdependent.c | 38 +++++++++++++++++++++----- 6 files changed, 63 insertions(+), 41 deletions(-) diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index a14843a80..990610554 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -94,15 +94,16 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) #if !(CONFIG_REALTIME_ONLY) cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c; #endif -#if CONFIG_INTERNAL_STATS - cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_c; - cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_c; -#endif #endif // Pure C: vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame; +#if CONFIG_INTERNAL_STATS + cpi->rtcd.variance.ssimpf_8x8 = ssim_parms_8x8_c; + cpi->rtcd.variance.ssimpf = ssim_parms_c; +#endif + #if ARCH_X86 || ARCH_X86_64 vp8_arch_x86_encoder_init(cpi); #endif diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c index d0f8e490a..fea756f7b 100644 --- a/vp8/encoder/ssim.c +++ b/vp8/encoder/ssim.c @@ -9,9 +9,18 @@ */ +#include "vpx_scale/yv12config.h" +#include "math.h" #include "onyx_int.h" -void vp8_ssim_parms_16x16_c +#if CONFIG_RUNTIME_CPU_DETECT +#define IF_RTCD(x) (x) +#else +#define IF_RTCD(x) NULL +#endif + + +void ssim_parms_c ( unsigned char *s, int sp, @@ -37,7 +46,7 @@ void vp8_ssim_parms_16x16_c } } } -void vp8_ssim_parms_8x8_c +void ssim_parms_8x8_c ( unsigned char *s, int sp, @@ -98,14 +107,14 @@ static double ssim_16x16(unsigned char *s,int sp, unsigned char *r,int rp, const vp8_variance_rtcd_vtable_t *rtcd) { unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0; - SSIMPF_INVOKE(rtcd,16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); + rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256); } static double ssim_8x8(unsigned char *s,int sp, unsigned char *r,int rp, const vp8_variance_rtcd_vtable_t *rtcd) { unsigned long sum_s=0,sum_r=0,sum_sq_s=0,sum_sq_r=0,sum_sxr=0; - SSIMPF_INVOKE(rtcd,8x8)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); + rtcd->ssimpf_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64); } @@ -125,7 +134,7 @@ long dssim(unsigned char *s,int sp, unsigned char *r,int rp, c1 = cc1*16; c2 = cc2*16; - SSIMPF_INVOKE(rtcd,16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); + rtcd->ssimpf(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); ssim_n1 = (2*sum_s*sum_r+ c1); ssim_n2 =((int64_t) 2*256*sum_sxr-(int64_t) 2*sum_s*sum_r+c2); diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h index 0f35152e3..5fd6d3ae0 100644 --- a/vp8/encoder/variance.h +++ b/vp8/encoder/variance.h @@ -320,16 +320,16 @@ extern prototype_variance(vp8_variance_mse16x16); #endif extern prototype_get16x16prederror(vp8_variance_get4x4sse_cs); +#ifndef vp8_ssimpf +#define vp8_ssimpf ssim_parms_c +#endif +extern prototype_ssimpf(vp8_ssimpf) + #ifndef vp8_ssimpf_8x8 -#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_c +#define vp8_ssimpf_8x8 ssim_parms_8x8_c #endif extern prototype_ssimpf(vp8_ssimpf_8x8) -#ifndef vp8_ssimpf_16x16 -#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_c -#endif -extern prototype_ssimpf(vp8_ssimpf_16x16) - typedef prototype_sad(*vp8_sad_fn_t); typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t); typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t); @@ -394,7 +394,7 @@ typedef struct #if CONFIG_INTERNAL_STATS vp8_ssimpf_fn_t ssimpf_8x8; - vp8_ssimpf_fn_t ssimpf_16x16; + vp8_ssimpf_fn_t ssimpf; #endif } vp8_variance_rtcd_vtable_t; @@ -417,10 +417,8 @@ typedef struct #if CONFIG_RUNTIME_CPU_DETECT #define VARIANCE_INVOKE(ctx,fn) (ctx)->fn -#define SSIMPF_INVOKE(ctx,fn) (ctx)->fn #else #define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn -#define SSIMPF_INVOKE(ctx,fn) vp8_ssimpf_##fn #endif #endif diff --git a/vp8/encoder/x86/ssim_opt.asm b/vp8/encoder/x86/ssim_opt.asm index 8af4b4533..d5d267a69 100644 --- a/vp8/encoder/x86/ssim_opt.asm +++ b/vp8/encoder/x86/ssim_opt.asm @@ -44,7 +44,7 @@ paddd %1, xmm1 SUM_ACROSS_Q %1 %endmacro -;void ssim_parms_sse2( +;void ssim_parms_sse3( ; unsigned char *s, ; int sp, ; unsigned char *r, @@ -61,8 +61,8 @@ ; or pavgb At this point this is just meant to be first pass for calculating ; all the parms needed for 16x16 ssim so we can play with dssim as distortion ; in mode selection code. -global sym(vp8_ssim_parms_16x16_sse2) -sym(vp8_ssim_parms_16x16_sse2): +global sym(vp8_ssim_parms_16x16_sse3) +sym(vp8_ssim_parms_16x16_sse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 9 @@ -134,7 +134,7 @@ NextRow: pop rbp ret -;void ssim_parms_sse2( +;void ssim_parms_sse3( ; unsigned char *s, ; int sp, ; unsigned char *r, @@ -151,8 +151,8 @@ NextRow: ; or pavgb At this point this is just meant to be first pass for calculating ; all the parms needed for 16x16 ssim so we can play with dssim as distortion ; in mode selection code. -global sym(vp8_ssim_parms_8x8_sse2) -sym(vp8_ssim_parms_8x8_sse2): +global sym(vp8_ssim_parms_8x8_sse3) +sym(vp8_ssim_parms_8x8_sse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 9 diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h index 4b41b5436..af6c4d27e 100644 --- a/vp8/encoder/x86/variance_x86.h +++ b/vp8/encoder/x86/variance_x86.h @@ -140,8 +140,6 @@ extern prototype_getmbss(vp8_get_mb_ss_sse2); extern prototype_variance(vp8_mse16x16_wmt); extern prototype_variance2(vp8_get8x8var_sse2); extern prototype_variance2(vp8_get16x16var_sse2); -extern prototype_ssimpf(vp8_ssim_parms_8x8_sse2) -extern prototype_ssimpf(vp8_ssim_parms_16x16_sse2) #if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_variance_sad4x4 @@ -210,14 +208,6 @@ extern prototype_ssimpf(vp8_ssim_parms_16x16_sse2) #undef vp8_variance_mse16x16 #define vp8_variance_mse16x16 vp8_mse16x16_wmt -#if ARCH_X86_64 -#undef vp8_ssimpf_8x8 -#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_sse2 - -#undef vp8_ssimpf_16x16 -#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_sse2 -#endif - #endif #endif diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index 36b7b7194..badb9f044 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -111,6 +111,29 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) #endif +#if HAVE_SSSE3 +#if CONFIG_INTERNAL_STATS +#if ARCH_X86_64 +typedef void ssimpf +( + unsigned char *s, + int sp, + unsigned char *r, + int rp, + unsigned long *sum_s, + unsigned long *sum_r, + unsigned long *sum_sq_s, + unsigned long *sum_sq_r, + unsigned long *sum_sxr +); + +extern ssimpf vp8_ssim_parms_16x16_sse3; +extern ssimpf vp8_ssim_parms_8x8_sse3; +#endif +#endif +#endif + + void vp8_arch_x86_encoder_init(VP8_COMP *cpi) { #if CONFIG_RUNTIME_CPU_DETECT @@ -222,13 +245,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) #if !(CONFIG_REALTIME_ONLY) cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2; -#endif - -#if CONFIG_INTERNAL_STATS -#if ARCH_X86_64 - cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse2; - cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_sse2; -#endif #endif } #endif @@ -264,6 +280,14 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3; cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3; + +#if CONFIG_INTERNAL_STATS +#if ARCH_X86_64 + cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse3; + cpi->rtcd.variance.ssimpf = vp8_ssim_parms_16x16_sse3; +#endif +#endif + } #endif