Merge remote branch 'internal/upstream' into HEAD
This commit is contained in:
Коммит
820b2b927f
|
@ -153,7 +153,7 @@ endif
|
|||
#
|
||||
obj_int_extract: build/make/obj_int_extract.c
|
||||
$(if $(quiet),echo " [HOSTCC] $@")
|
||||
$(qexec)$(HOSTCC) -I. -o $@ $<
|
||||
$(qexec)$(HOSTCC) -I. -I$(SRC_PATH_BARE) -o $@ $<
|
||||
CLEAN-OBJS += obj_int_extract
|
||||
|
||||
#
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
#include "vpx_config.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#include <io.h>
|
||||
#include <share.h>
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
@ -816,7 +816,7 @@ bail:
|
|||
#endif
|
||||
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
/* See "Microsoft Portable Executable and Common Object File Format Specification"
|
||||
for reference.
|
||||
*/
|
||||
|
@ -830,7 +830,6 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
|||
unsigned int i;
|
||||
unsigned __int8 *ptr;
|
||||
unsigned __int32 symoffset;
|
||||
FILE *fp;
|
||||
|
||||
char **sectionlist; //this array holds all section names in their correct order.
|
||||
//it is used to check if the symbol is in .bss or .data section.
|
||||
|
@ -871,14 +870,6 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
|||
//log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
|
||||
//log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr);
|
||||
|
||||
fp = fopen("assembly_offsets.asm", "w");
|
||||
|
||||
if (fp == NULL)
|
||||
{
|
||||
perror("open file");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* The compiler puts the data with non-zero offset in .data section, but puts the data with
|
||||
zero offset in .bss section. So, if the data in in .bss section, set offset=0.
|
||||
Note from Wiki: In an object module compiled from C, the bss section contains
|
||||
|
@ -912,13 +903,13 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
|||
char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
strncpy(name, ptr, 8);
|
||||
//log_msg("COFF: Parsing symbol %s\n",name);
|
||||
fprintf(fp, "%-40s EQU ", name);
|
||||
printf("%-40s EQU ", name + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
//log_msg("COFF: Parsing symbol %s\n",
|
||||
// buf + strtab_ptr + get_le32(ptr+4));
|
||||
fprintf(fp, "%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
|
||||
printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4) + 1);
|
||||
}
|
||||
|
||||
if (!(strcmp(sectionlist[section-1], ".bss")))
|
||||
|
@ -935,14 +926,13 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
|||
//log_msg(" Address: %u\n",get_le32(ptr+8));
|
||||
//log_msg(" Offset: %u\n", symoffset);
|
||||
|
||||
fprintf(fp, "%5d\n", symoffset);
|
||||
printf("%5d\n", symoffset);
|
||||
}
|
||||
|
||||
ptr += 18;
|
||||
}
|
||||
|
||||
fprintf(fp, " END\n");
|
||||
fclose(fp);
|
||||
printf(" END\n");
|
||||
|
||||
for (i = 0; i < nsections; i++)
|
||||
{
|
||||
|
@ -992,11 +982,7 @@ int main(int argc, char **argv)
|
|||
else
|
||||
f = argv[1];
|
||||
|
||||
if (_sopen_s(&fd, f, _O_BINARY, _SH_DENYNO, _S_IREAD | _S_IWRITE))
|
||||
{
|
||||
perror("Unable to open file");
|
||||
goto bail;
|
||||
}
|
||||
fd = _sopen(f, _O_BINARY, _SH_DENYNO, _S_IREAD | _S_IWRITE);
|
||||
|
||||
if (_fstat(fd, &stat_buf))
|
||||
{
|
||||
|
|
|
@ -1583,165 +1583,18 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
|||
cpi->oxcf = *oxcf;
|
||||
|
||||
|
||||
switch (cpi->oxcf.Mode)
|
||||
{
|
||||
|
||||
case MODE_REALTIME:
|
||||
cpi->pass = 0;
|
||||
cpi->compressor_speed = 2;
|
||||
|
||||
if (cpi->oxcf.cpu_used < -16)
|
||||
{
|
||||
cpi->oxcf.cpu_used = -16;
|
||||
}
|
||||
|
||||
if (cpi->oxcf.cpu_used > 16)
|
||||
cpi->oxcf.cpu_used = 16;
|
||||
|
||||
break;
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
case MODE_GOODQUALITY:
|
||||
cpi->pass = 0;
|
||||
cpi->compressor_speed = 1;
|
||||
|
||||
if (cpi->oxcf.cpu_used < -5)
|
||||
{
|
||||
cpi->oxcf.cpu_used = -5;
|
||||
}
|
||||
|
||||
if (cpi->oxcf.cpu_used > 5)
|
||||
cpi->oxcf.cpu_used = 5;
|
||||
|
||||
break;
|
||||
|
||||
case MODE_BESTQUALITY:
|
||||
cpi->pass = 0;
|
||||
cpi->compressor_speed = 0;
|
||||
break;
|
||||
|
||||
case MODE_FIRSTPASS:
|
||||
cpi->pass = 1;
|
||||
cpi->compressor_speed = 1;
|
||||
break;
|
||||
case MODE_SECONDPASS:
|
||||
cpi->pass = 2;
|
||||
cpi->compressor_speed = 1;
|
||||
|
||||
if (cpi->oxcf.cpu_used < -5)
|
||||
{
|
||||
cpi->oxcf.cpu_used = -5;
|
||||
}
|
||||
|
||||
if (cpi->oxcf.cpu_used > 5)
|
||||
cpi->oxcf.cpu_used = 5;
|
||||
|
||||
break;
|
||||
case MODE_SECONDPASS_BEST:
|
||||
cpi->pass = 2;
|
||||
cpi->compressor_speed = 0;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (cpi->pass == 0)
|
||||
cpi->auto_worst_q = 1;
|
||||
|
||||
cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
|
||||
cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
|
||||
cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
|
||||
|
||||
if (oxcf->fixed_q >= 0)
|
||||
{
|
||||
if (oxcf->worst_allowed_q < 0)
|
||||
cpi->oxcf.fixed_q = q_trans[0];
|
||||
else
|
||||
cpi->oxcf.fixed_q = q_trans[oxcf->worst_allowed_q];
|
||||
|
||||
if (oxcf->alt_q < 0)
|
||||
cpi->oxcf.alt_q = q_trans[0];
|
||||
else
|
||||
cpi->oxcf.alt_q = q_trans[oxcf->alt_q];
|
||||
|
||||
if (oxcf->key_q < 0)
|
||||
cpi->oxcf.key_q = q_trans[0];
|
||||
else
|
||||
cpi->oxcf.key_q = q_trans[oxcf->key_q];
|
||||
|
||||
if (oxcf->gold_q < 0)
|
||||
cpi->oxcf.gold_q = q_trans[0];
|
||||
else
|
||||
cpi->oxcf.gold_q = q_trans[oxcf->gold_q];
|
||||
|
||||
}
|
||||
|
||||
cpi->baseline_gf_interval = cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL;
|
||||
cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
|
||||
|
||||
//cpi->use_golden_frame_only = 0;
|
||||
//cpi->use_last_frame_only = 0;
|
||||
cm->refresh_golden_frame = 0;
|
||||
cm->refresh_last_frame = 1;
|
||||
cm->refresh_entropy_probs = 1;
|
||||
|
||||
if (cpi->oxcf.token_partitions >= 0 && cpi->oxcf.token_partitions <= 3)
|
||||
cm->multi_token_partition = (TOKEN_PARTITION) cpi->oxcf.token_partitions;
|
||||
|
||||
setup_features(cpi);
|
||||
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
|
||||
}
|
||||
|
||||
// At the moment the first order values may not be > MAXQ
|
||||
if (cpi->oxcf.fixed_q > MAXQ)
|
||||
cpi->oxcf.fixed_q = MAXQ;
|
||||
|
||||
// local file playback mode == really big buffer
|
||||
if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
|
||||
{
|
||||
cpi->oxcf.starting_buffer_level = 60000;
|
||||
cpi->oxcf.optimal_buffer_level = 60000;
|
||||
cpi->oxcf.maximum_buffer_size = 240000;
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Convert target bandwidth from Kbit/s to Bit/s
|
||||
cpi->oxcf.target_bandwidth *= 1000;
|
||||
cpi->oxcf.starting_buffer_level =
|
||||
rescale(cpi->oxcf.starting_buffer_level,
|
||||
cpi->oxcf.target_bandwidth, 1000);
|
||||
|
||||
if (cpi->oxcf.optimal_buffer_level == 0)
|
||||
cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
|
||||
else
|
||||
cpi->oxcf.optimal_buffer_level =
|
||||
rescale(cpi->oxcf.optimal_buffer_level,
|
||||
cpi->oxcf.target_bandwidth, 1000);
|
||||
|
||||
if (cpi->oxcf.maximum_buffer_size == 0)
|
||||
cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
|
||||
else
|
||||
cpi->oxcf.maximum_buffer_size =
|
||||
rescale(cpi->oxcf.maximum_buffer_size,
|
||||
cpi->oxcf.target_bandwidth, 1000);
|
||||
|
||||
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
|
||||
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
|
||||
|
||||
vp8_new_frame_rate(cpi, cpi->oxcf.frame_rate);
|
||||
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
|
||||
cpi->best_quality = cpi->oxcf.best_allowed_q;
|
||||
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
|
||||
cpi->cq_target_quality = cpi->oxcf.cq_level;
|
||||
|
||||
cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
|
||||
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
|
||||
|
||||
cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
||||
|
@ -1751,84 +1604,8 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
|||
cpi->total_actual_bits = 0;
|
||||
cpi->total_target_vs_actual = 0;
|
||||
|
||||
// Only allow dropped frames in buffered mode
|
||||
cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode;
|
||||
|
||||
cm->filter_type = (LOOPFILTERTYPE) cpi->filter_type;
|
||||
|
||||
if (!cm->use_bilinear_mc_filter)
|
||||
cm->mcomp_filter_type = SIXTAP;
|
||||
else
|
||||
cm->mcomp_filter_type = BILINEAR;
|
||||
|
||||
cpi->target_bandwidth = cpi->oxcf.target_bandwidth;
|
||||
|
||||
cm->Width = cpi->oxcf.Width ;
|
||||
cm->Height = cpi->oxcf.Height ;
|
||||
|
||||
cpi->intra_frame_target = (4 * (cm->Width + cm->Height) / 15) * 1000; // As per VP8
|
||||
|
||||
cm->horiz_scale = cpi->horiz_scale;
|
||||
cm->vert_scale = cpi->vert_scale ;
|
||||
|
||||
// VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs)
|
||||
if (cpi->oxcf.Sharpness > 7)
|
||||
cpi->oxcf.Sharpness = 7;
|
||||
|
||||
cm->sharpness_level = cpi->oxcf.Sharpness;
|
||||
|
||||
if (cm->horiz_scale != NORMAL || cm->vert_scale != NORMAL)
|
||||
{
|
||||
int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs);
|
||||
int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs);
|
||||
|
||||
Scale2Ratio(cm->horiz_scale, &hr, &hs);
|
||||
Scale2Ratio(cm->vert_scale, &vr, &vs);
|
||||
|
||||
// always go to the next whole number
|
||||
cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs;
|
||||
cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
|
||||
}
|
||||
|
||||
if (((cm->Width + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
|
||||
((cm->Height + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
|
||||
cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
|
||||
{
|
||||
alloc_raw_frame_buffers(cpi);
|
||||
vp8_alloc_compressor_data(cpi);
|
||||
}
|
||||
|
||||
// Clamp KF frame size to quarter of data rate
|
||||
if (cpi->intra_frame_target > cpi->target_bandwidth >> 2)
|
||||
cpi->intra_frame_target = cpi->target_bandwidth >> 2;
|
||||
|
||||
if (cpi->oxcf.fixed_q >= 0)
|
||||
{
|
||||
cpi->last_q[0] = cpi->oxcf.fixed_q;
|
||||
cpi->last_q[1] = cpi->oxcf.fixed_q;
|
||||
}
|
||||
|
||||
cpi->Speed = cpi->oxcf.cpu_used;
|
||||
|
||||
// force to allowlag to 0 if lag_in_frames is 0;
|
||||
if (cpi->oxcf.lag_in_frames == 0)
|
||||
{
|
||||
cpi->oxcf.allow_lag = 0;
|
||||
}
|
||||
// Limit on lag buffers as these are not currently dynamically allocated
|
||||
else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
|
||||
cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
|
||||
|
||||
// YX Temp
|
||||
cpi->last_alt_ref_sei = -1;
|
||||
cpi->is_src_frame_alt_ref = 0;
|
||||
cpi->is_next_src_alt_ref = 0;
|
||||
|
||||
#if 0
|
||||
// Experimental RD Code
|
||||
cpi->frame_distortion = 0;
|
||||
cpi->last_frame_distortion = 0;
|
||||
#endif
|
||||
// change includes all joint functionality
|
||||
vp8_change_config(ptr, oxcf);
|
||||
|
||||
#if VP8_TEMPORAL_ALT_REF
|
||||
|
||||
|
@ -1845,12 +1622,6 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
|||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* This function needs more clean up, i.e. be more tuned torwards
|
||||
* change_config rather than init_config !!!!!!!!!!!!!!!!
|
||||
* YX - 5/28/2009
|
||||
*
|
||||
*/
|
||||
|
||||
void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||
{
|
||||
|
@ -2001,10 +1772,6 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
|||
// Convert target bandwidth from Kbit/s to Bit/s
|
||||
cpi->oxcf.target_bandwidth *= 1000;
|
||||
|
||||
cpi->oxcf.starting_buffer_level =
|
||||
rescale(cpi->oxcf.starting_buffer_level,
|
||||
cpi->oxcf.target_bandwidth, 1000);
|
||||
|
||||
if (cpi->oxcf.optimal_buffer_level == 0)
|
||||
cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
|
||||
else
|
||||
|
@ -2019,27 +1786,34 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
|||
rescale(cpi->oxcf.maximum_buffer_size,
|
||||
cpi->oxcf.target_bandwidth, 1000);
|
||||
|
||||
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
|
||||
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
|
||||
|
||||
vp8_new_frame_rate(cpi, cpi->oxcf.frame_rate);
|
||||
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
|
||||
cpi->best_quality = cpi->oxcf.best_allowed_q;
|
||||
|
||||
// active values should only be modified if out of new range
|
||||
if (cpi->active_worst_quality > cpi->oxcf.worst_allowed_q)
|
||||
{
|
||||
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
||||
}
|
||||
// less likely
|
||||
else if (cpi->active_worst_quality < cpi->oxcf.best_allowed_q)
|
||||
{
|
||||
cpi->active_worst_quality = cpi->oxcf.best_allowed_q;
|
||||
}
|
||||
if (cpi->active_best_quality < cpi->oxcf.best_allowed_q)
|
||||
{
|
||||
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
|
||||
}
|
||||
// less likely
|
||||
else if (cpi->active_best_quality > cpi->oxcf.worst_allowed_q)
|
||||
{
|
||||
cpi->active_best_quality = cpi->oxcf.worst_allowed_q;
|
||||
}
|
||||
|
||||
cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
|
||||
|
||||
cpi->cq_target_quality = cpi->oxcf.cq_level;
|
||||
|
||||
cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
|
||||
cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
||||
|
||||
cpi->total_actual_bits = 0;
|
||||
cpi->total_target_vs_actual = 0;
|
||||
|
||||
// Only allow dropped frames in buffered mode
|
||||
cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode;
|
||||
|
||||
|
@ -3613,6 +3387,7 @@ static void encode_frame_to_data_rate
|
|||
int drop_mark50 = drop_mark / 4;
|
||||
int drop_mark25 = drop_mark / 8;
|
||||
|
||||
|
||||
// Clear down mmx registers to allow floating point in what follows
|
||||
vp8_clear_system_state();
|
||||
|
||||
|
|
|
@ -790,7 +790,7 @@ filter_block2d_bil_variance:
|
|||
ret
|
||||
|
||||
|
||||
;void vp8_half_horiz_vert_variance16x_h_sse2
|
||||
;void vp8_half_horiz_vert_variance8x_h_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
|
@ -800,8 +800,8 @@ filter_block2d_bil_variance:
|
|||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_horiz_vert_variance16x_h_sse2)
|
||||
sym(vp8_half_horiz_vert_variance16x_h_sse2):
|
||||
global sym(vp8_half_horiz_vert_variance8x_h_sse2)
|
||||
sym(vp8_half_horiz_vert_variance8x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
|
@ -835,7 +835,7 @@ sym(vp8_half_horiz_vert_variance16x_h_sse2):
|
|||
add rsi, r8
|
||||
%endif
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_1:
|
||||
vp8_half_horiz_vert_variance8x_h_1:
|
||||
|
||||
movq xmm1, QWORD PTR [rsi] ;
|
||||
movq xmm2, QWORD PTR [rsi+1] ;
|
||||
|
@ -863,7 +863,7 @@ vp8_half_horiz_vert_variance16x_h_1:
|
|||
%endif
|
||||
|
||||
sub rcx, 1 ;
|
||||
jnz vp8_half_horiz_vert_variance16x_h_1 ;
|
||||
jnz vp8_half_horiz_vert_variance8x_h_1 ;
|
||||
|
||||
movdq2q mm6, xmm6 ;
|
||||
movdq2q mm7, xmm7 ;
|
||||
|
@ -910,8 +910,7 @@ vp8_half_horiz_vert_variance16x_h_1:
|
|||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_half_vert_variance16x_h_sse2
|
||||
;void vp8_half_horiz_vert_variance16x_h_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
|
@ -921,8 +920,124 @@ vp8_half_horiz_vert_variance16x_h_1:
|
|||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_vert_variance16x_h_sse2)
|
||||
sym(vp8_half_vert_variance16x_h_sse2):
|
||||
global sym(vp8_half_horiz_vert_variance16x_h_sse2)
|
||||
sym(vp8_half_horiz_vert_variance16x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
pxor xmm6, xmm6 ; error accumulator
|
||||
pxor xmm7, xmm7 ; sse eaccumulator
|
||||
mov rsi, arg(0) ;ref_ptr ;
|
||||
|
||||
mov rdi, arg(2) ;src_ptr ;
|
||||
movsxd rcx, dword ptr arg(4) ;Height ;
|
||||
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||
movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
|
||||
|
||||
pxor xmm0, xmm0 ;
|
||||
|
||||
movdqu xmm5, XMMWORD PTR [rsi]
|
||||
movdqu xmm3, XMMWORD PTR [rsi+1]
|
||||
pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1
|
||||
|
||||
lea rsi, [rsi + rax]
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_1:
|
||||
movdqu xmm1, XMMWORD PTR [rsi] ;
|
||||
movdqu xmm2, XMMWORD PTR [rsi+1] ;
|
||||
pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1
|
||||
|
||||
pavgb xmm5, xmm1 ; xmm = vertical average of the above
|
||||
|
||||
movdqa xmm4, xmm5
|
||||
punpcklbw xmm5, xmm0 ; xmm5 = words of above
|
||||
punpckhbw xmm4, xmm0
|
||||
|
||||
movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
|
||||
punpcklbw xmm3, xmm0 ; xmm3 = words of above
|
||||
psubw xmm5, xmm3 ; xmm5 -= xmm3
|
||||
|
||||
movq xmm3, QWORD PTR [rdi+8]
|
||||
punpcklbw xmm3, xmm0
|
||||
psubw xmm4, xmm3
|
||||
|
||||
paddw xmm6, xmm5 ; xmm6 += accumulated column differences
|
||||
paddw xmm6, xmm4
|
||||
pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
|
||||
pmaddwd xmm4, xmm4
|
||||
paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
|
||||
paddd xmm7, xmm4
|
||||
|
||||
movdqa xmm5, xmm1 ; save xmm1 for use on the next row
|
||||
|
||||
lea rsi, [rsi + rax]
|
||||
lea rdi, [rdi + rdx]
|
||||
|
||||
sub rcx, 1 ;
|
||||
jnz vp8_half_horiz_vert_variance16x_h_1 ;
|
||||
|
||||
pxor xmm1, xmm1
|
||||
pxor xmm5, xmm5
|
||||
|
||||
punpcklwd xmm0, xmm6
|
||||
punpckhwd xmm1, xmm6
|
||||
psrad xmm0, 16
|
||||
psrad xmm1, 16
|
||||
paddd xmm0, xmm1
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
movdqa xmm6, xmm7
|
||||
punpckldq xmm6, xmm5
|
||||
punpckhdq xmm7, xmm5
|
||||
paddd xmm6, xmm7
|
||||
|
||||
punpckldq xmm0, xmm5
|
||||
punpckhdq xmm1, xmm5
|
||||
paddd xmm0, xmm1
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
psrldq xmm7, 8
|
||||
psrldq xmm1, 8
|
||||
|
||||
paddd xmm6, xmm7
|
||||
paddd xmm0, xmm1
|
||||
|
||||
mov rsi, arg(5) ;[Sum]
|
||||
mov rdi, arg(6) ;[SSE]
|
||||
|
||||
movd [rsi], xmm0
|
||||
movd [rdi], xmm6
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_half_vert_variance8x_h_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned int Height,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_vert_variance8x_h_sse2)
|
||||
sym(vp8_half_vert_variance8x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
|
@ -945,7 +1060,7 @@ sym(vp8_half_vert_variance16x_h_sse2):
|
|||
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||
|
||||
pxor xmm0, xmm0 ;
|
||||
vp8_half_vert_variance16x_h_1:
|
||||
vp8_half_vert_variance8x_h_1:
|
||||
movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
|
||||
movq xmm3, QWORD PTR [rsi+rax] ; xmm3 = s1,s2,s3..s9
|
||||
|
||||
|
@ -969,7 +1084,7 @@ vp8_half_vert_variance16x_h_1:
|
|||
%endif
|
||||
|
||||
sub rcx, 1 ;
|
||||
jnz vp8_half_vert_variance16x_h_1 ;
|
||||
jnz vp8_half_vert_variance8x_h_1 ;
|
||||
|
||||
movdq2q mm6, xmm6 ;
|
||||
movdq2q mm7, xmm7 ;
|
||||
|
@ -1016,8 +1131,7 @@ vp8_half_vert_variance16x_h_1:
|
|||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_half_horiz_variance16x_h_sse2
|
||||
;void vp8_half_vert_variance16x_h_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
|
@ -1027,8 +1141,116 @@ vp8_half_vert_variance16x_h_1:
|
|||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_horiz_variance16x_h_sse2)
|
||||
sym(vp8_half_horiz_variance16x_h_sse2):
|
||||
global sym(vp8_half_vert_variance16x_h_sse2)
|
||||
sym(vp8_half_vert_variance16x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
pxor xmm6, xmm6 ; error accumulator
|
||||
pxor xmm7, xmm7 ; sse eaccumulator
|
||||
mov rsi, arg(0) ;ref_ptr
|
||||
|
||||
mov rdi, arg(2) ;src_ptr
|
||||
movsxd rcx, dword ptr arg(4) ;Height
|
||||
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||
movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
|
||||
|
||||
movdqu xmm5, XMMWORD PTR [rsi]
|
||||
lea rsi, [rsi + rax ]
|
||||
pxor xmm0, xmm0
|
||||
|
||||
vp8_half_vert_variance16x_h_1:
|
||||
movdqu xmm3, XMMWORD PTR [rsi]
|
||||
|
||||
pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
|
||||
movdqa xmm4, xmm5
|
||||
punpcklbw xmm5, xmm0
|
||||
punpckhbw xmm4, xmm0
|
||||
|
||||
movq xmm2, QWORD PTR [rdi]
|
||||
punpcklbw xmm2, xmm0
|
||||
psubw xmm5, xmm2
|
||||
movq xmm2, QWORD PTR [rdi+8]
|
||||
punpcklbw xmm2, xmm0
|
||||
psubw xmm4, xmm2
|
||||
|
||||
paddw xmm6, xmm5 ; xmm6 += accumulated column differences
|
||||
paddw xmm6, xmm4
|
||||
pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
|
||||
pmaddwd xmm4, xmm4
|
||||
paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
|
||||
paddd xmm7, xmm4
|
||||
|
||||
movdqa xmm5, xmm3
|
||||
|
||||
lea rsi, [rsi + rax]
|
||||
lea rdi, [rdi + rdx]
|
||||
|
||||
sub rcx, 1
|
||||
jnz vp8_half_vert_variance16x_h_1
|
||||
|
||||
pxor xmm1, xmm1
|
||||
pxor xmm5, xmm5
|
||||
|
||||
punpcklwd xmm0, xmm6
|
||||
punpckhwd xmm1, xmm6
|
||||
psrad xmm0, 16
|
||||
psrad xmm1, 16
|
||||
paddd xmm0, xmm1
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
movdqa xmm6, xmm7
|
||||
punpckldq xmm6, xmm5
|
||||
punpckhdq xmm7, xmm5
|
||||
paddd xmm6, xmm7
|
||||
|
||||
punpckldq xmm0, xmm5
|
||||
punpckhdq xmm1, xmm5
|
||||
paddd xmm0, xmm1
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
psrldq xmm7, 8
|
||||
psrldq xmm1, 8
|
||||
|
||||
paddd xmm6, xmm7
|
||||
paddd xmm0, xmm1
|
||||
|
||||
mov rsi, arg(5) ;[Sum]
|
||||
mov rdi, arg(6) ;[SSE]
|
||||
|
||||
movd [rsi], xmm0
|
||||
movd [rdi], xmm6
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_half_horiz_variance8x_h_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned int Height,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_horiz_variance8x_h_sse2)
|
||||
sym(vp8_half_horiz_variance8x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
|
@ -1050,7 +1272,7 @@ sym(vp8_half_horiz_variance16x_h_sse2):
|
|||
movsxd rcx, dword ptr arg(4) ;Height ;
|
||||
|
||||
pxor xmm0, xmm0 ;
|
||||
vp8_half_horiz_variance16x16_1:
|
||||
vp8_half_horiz_variance8x_h_1:
|
||||
movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
|
||||
movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
|
||||
|
||||
|
@ -1073,7 +1295,7 @@ vp8_half_horiz_variance16x16_1:
|
|||
add rdi, r9
|
||||
%endif
|
||||
sub rcx, 1 ;
|
||||
jnz vp8_half_horiz_variance16x16_1 ;
|
||||
jnz vp8_half_horiz_variance8x_h_1 ;
|
||||
|
||||
movdq2q mm6, xmm6 ;
|
||||
movdq2q mm7, xmm7 ;
|
||||
|
@ -1120,6 +1342,109 @@ vp8_half_horiz_variance16x16_1:
|
|||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_half_horiz_variance16x_h_sse2
|
||||
;(
|
||||
; unsigned char *ref_ptr,
|
||||
; int ref_pixels_per_line,
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; unsigned int Height,
|
||||
; int *sum,
|
||||
; unsigned int *sumsquared
|
||||
;)
|
||||
global sym(vp8_half_horiz_variance16x_h_sse2)
|
||||
sym(vp8_half_horiz_variance16x_h_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
pxor xmm6, xmm6 ; error accumulator
|
||||
pxor xmm7, xmm7 ; sse eaccumulator
|
||||
mov rsi, arg(0) ;ref_ptr ;
|
||||
|
||||
mov rdi, arg(2) ;src_ptr ;
|
||||
movsxd rcx, dword ptr arg(4) ;Height ;
|
||||
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||
movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
|
||||
|
||||
pxor xmm0, xmm0 ;
|
||||
|
||||
vp8_half_horiz_variance16x_h_1:
|
||||
movdqu xmm5, XMMWORD PTR [rsi] ; xmm5 = s0,s1,s2..s15
|
||||
movdqu xmm3, XMMWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s16
|
||||
|
||||
pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
|
||||
movdqa xmm1, xmm5
|
||||
punpcklbw xmm5, xmm0 ; xmm5 = words of above
|
||||
punpckhbw xmm1, xmm0
|
||||
|
||||
movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
|
||||
punpcklbw xmm3, xmm0 ; xmm3 = words of above
|
||||
movq xmm2, QWORD PTR [rdi+8]
|
||||
punpcklbw xmm2, xmm0
|
||||
|
||||
psubw xmm5, xmm3 ; xmm5 -= xmm3
|
||||
psubw xmm1, xmm2
|
||||
paddw xmm6, xmm5 ; xmm6 += accumulated column differences
|
||||
paddw xmm6, xmm1
|
||||
pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
|
||||
pmaddwd xmm1, xmm1
|
||||
paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
|
||||
paddd xmm7, xmm1
|
||||
|
||||
lea rsi, [rsi + rax]
|
||||
lea rdi, [rdi + rdx]
|
||||
|
||||
sub rcx, 1 ;
|
||||
jnz vp8_half_horiz_variance16x_h_1 ;
|
||||
|
||||
pxor xmm1, xmm1
|
||||
pxor xmm5, xmm5
|
||||
|
||||
punpcklwd xmm0, xmm6
|
||||
punpckhwd xmm1, xmm6
|
||||
psrad xmm0, 16
|
||||
psrad xmm1, 16
|
||||
paddd xmm0, xmm1
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
movdqa xmm6, xmm7
|
||||
punpckldq xmm6, xmm5
|
||||
punpckhdq xmm7, xmm5
|
||||
paddd xmm6, xmm7
|
||||
|
||||
punpckldq xmm0, xmm5
|
||||
punpckhdq xmm1, xmm5
|
||||
paddd xmm0, xmm1
|
||||
|
||||
movdqa xmm7, xmm6
|
||||
movdqa xmm1, xmm0
|
||||
|
||||
psrldq xmm7, 8
|
||||
psrldq xmm1, 8
|
||||
|
||||
paddd xmm6, xmm7
|
||||
paddd xmm0, xmm1
|
||||
|
||||
mov rsi, arg(5) ;[Sum]
|
||||
mov rdi, arg(6) ;[SSE]
|
||||
|
||||
movd [rsi], xmm0
|
||||
movd [rdi], xmm6
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
; short xmm_bi_rd[8] = { 64, 64, 64, 64,64, 64, 64, 64};
|
||||
|
|
|
@ -456,146 +456,6 @@ unsigned int vp8_sub_pixel_variance8x16_mmx
|
|||
return (xxsum - ((xsum * xsum) >> 7));
|
||||
}
|
||||
|
||||
unsigned int vp8_i_variance16x16_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, sse2, sse3, var;
|
||||
int sum0, sum1, sum2, sum3, avg;
|
||||
|
||||
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||||
vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse2, &sum2) ;
|
||||
vp8_get8x8var_mmx(src_ptr + (source_stride >> 1) + 8, source_stride, ref_ptr + (recon_stride >> 1) + 8, recon_stride, &sse3, &sum3);
|
||||
|
||||
var = sse0 + sse1 + sse2 + sse3;
|
||||
avg = sum0 + sum1 + sum2 + sum3;
|
||||
*sse = var;
|
||||
return (var - ((avg * avg) >> 8));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_i_variance8x16_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, var;
|
||||
int sum0, sum1, avg;
|
||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse1, &sum1) ;
|
||||
|
||||
var = sse0 + sse1;
|
||||
avg = sum0 + sum1;
|
||||
|
||||
*sse = var;
|
||||
return (var - ((avg * avg) >> 7));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_i_sub_pixel_variance16x16_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
int f2soffset = (src_pixels_per_line >> 1);
|
||||
int f2doffset = (dst_pixels_per_line >> 1);
|
||||
|
||||
|
||||
vp8_filter_block2d_bil_var_mmx(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
||||
&xsum0, &xxsum0
|
||||
);
|
||||
|
||||
|
||||
vp8_filter_block2d_bil_var_mmx(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
||||
&xsum1, &xxsum1
|
||||
);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
|
||||
vp8_filter_block2d_bil_var_mmx(
|
||||
src_ptr + f2soffset, src_pixels_per_line,
|
||||
dst_ptr + f2doffset, dst_pixels_per_line, 8,
|
||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
||||
&xsum1, &xxsum1
|
||||
);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
|
||||
vp8_filter_block2d_bil_var_mmx(
|
||||
src_ptr + f2soffset + 8, src_pixels_per_line,
|
||||
dst_ptr + f2doffset + 8, dst_pixels_per_line, 8,
|
||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
||||
&xsum1, &xxsum1
|
||||
);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_i_sub_pixel_variance8x16_mmx
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
int f2soffset = (src_pixels_per_line >> 1);
|
||||
int f2doffset = (dst_pixels_per_line >> 1);
|
||||
|
||||
|
||||
vp8_filter_block2d_bil_var_mmx(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
||||
&xsum0, &xxsum0
|
||||
);
|
||||
|
||||
|
||||
vp8_filter_block2d_bil_var_mmx(
|
||||
src_ptr + f2soffset, src_pixels_per_line,
|
||||
dst_ptr + f2doffset, dst_pixels_per_line, 8,
|
||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
||||
&xsum1, &xxsum1
|
||||
);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 7));
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance_halfpixvar16x16_h_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
|
|
|
@ -81,6 +81,16 @@ void vp8_filter_block2d_bil_var_sse2
|
|||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
void vp8_half_horiz_vert_variance8x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_pixels_per_line,
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
unsigned int Height,
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
void vp8_half_horiz_vert_variance16x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
|
@ -91,6 +101,16 @@ void vp8_half_horiz_vert_variance16x_h_sse2
|
|||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
void vp8_half_horiz_variance8x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_pixels_per_line,
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
unsigned int Height,
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
void vp8_half_horiz_variance16x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
|
@ -101,6 +121,16 @@ void vp8_half_horiz_variance16x_h_sse2
|
|||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
void vp8_half_vert_variance8x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
int ref_pixels_per_line,
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
unsigned int Height,
|
||||
int *sum,
|
||||
unsigned int *sumsquared
|
||||
);
|
||||
void vp8_half_vert_variance16x_h_sse2
|
||||
(
|
||||
const unsigned char *ref_ptr,
|
||||
|
@ -262,21 +292,21 @@ unsigned int vp8_sub_pixel_variance8x8_wmt
|
|||
|
||||
if (xoffset == 4 && yoffset == 0)
|
||||
{
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
vp8_half_horiz_variance8x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
vp8_half_vert_variance8x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
vp8_half_horiz_vert_variance8x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum, &xxsum);
|
||||
|
@ -317,11 +347,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
|||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
|
@ -329,11 +354,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
|||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
|
@ -341,11 +361,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
|||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -356,17 +371,16 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
|||
&xsum0, &xxsum0
|
||||
);
|
||||
|
||||
|
||||
vp8_filter_block2d_bil_var_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
xoffset, yoffset,
|
||||
&xsum1, &xxsum1
|
||||
);
|
||||
}
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
}
|
||||
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
@ -406,11 +420,6 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
|
|||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
|
@ -418,11 +427,6 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
|
|||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
|
@ -430,11 +434,6 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
|
|||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -449,10 +448,9 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
|
|||
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||
xoffset, yoffset,
|
||||
&xsum1, &xxsum1);
|
||||
}
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
}
|
||||
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 7));
|
||||
|
@ -474,21 +472,21 @@ unsigned int vp8_sub_pixel_variance8x16_wmt
|
|||
|
||||
if (xoffset == 4 && yoffset == 0)
|
||||
{
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
vp8_half_horiz_variance8x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
vp8_half_vert_variance8x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum, &xxsum);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
vp8_half_horiz_vert_variance8x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum, &xxsum);
|
||||
|
@ -506,81 +504,6 @@ unsigned int vp8_sub_pixel_variance8x16_wmt
|
|||
return (xxsum - ((xsum * xsum) >> 7));
|
||||
}
|
||||
|
||||
unsigned int vp8_i_variance16x16_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, sse2, sse3, var;
|
||||
int sum0, sum1, sum2, sum3, avg;
|
||||
|
||||
|
||||
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
||||
vp8_get8x8var_sse2(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse2, &sum2) ;
|
||||
vp8_get8x8var_sse2(src_ptr + (source_stride >> 1) + 8, source_stride, ref_ptr + (recon_stride >> 1) + 8, recon_stride, &sse3, &sum3);
|
||||
|
||||
var = sse0 + sse1 + sse2 + sse3;
|
||||
avg = sum0 + sum1 + sum2 + sum3;
|
||||
|
||||
*sse = var;
|
||||
return (var - ((avg * avg) >> 8));
|
||||
|
||||
}
|
||||
|
||||
unsigned int vp8_i_variance8x16_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
unsigned int sse0, sse1, var;
|
||||
int sum0, sum1, avg;
|
||||
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
||||
vp8_get8x8var_sse2(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse1, &sum1) ;
|
||||
|
||||
var = sse0 + sse1;
|
||||
avg = sum0 + sum1;
|
||||
|
||||
*sse = var;
|
||||
return (var - ((avg * avg) >> 7));
|
||||
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_i_sub_pixel_variance16x16_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
return vp8_sub_pixel_variance16x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse);
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_i_sub_pixel_variance8x16_wmt
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
|
||||
return vp8_sub_pixel_variance8x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse);
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance_halfpixvar16x16_h_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
|
@ -589,21 +512,14 @@ unsigned int vp8_variance_halfpixvar16x16_h_wmt(
|
|||
int dst_pixels_per_line,
|
||||
unsigned int *sse)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
int xsum0;
|
||||
unsigned int xxsum0;
|
||||
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
@ -616,21 +532,13 @@ unsigned int vp8_variance_halfpixvar16x16_v_wmt(
|
|||
int dst_pixels_per_line,
|
||||
unsigned int *sse)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
|
||||
int xsum0;
|
||||
unsigned int xxsum0;
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
@ -643,21 +551,14 @@ unsigned int vp8_variance_halfpixvar16x16_hv_wmt(
|
|||
int dst_pixels_per_line,
|
||||
unsigned int *sse)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
int xsum0;
|
||||
unsigned int xxsum0;
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
|
|
@ -76,8 +76,8 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
|
|||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
int xsum0;
|
||||
unsigned int xxsum0;
|
||||
|
||||
// note we could avoid these if statements if the calling function
|
||||
// just called the appropriate functions inside.
|
||||
|
@ -87,14 +87,6 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
|
|||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
|
@ -102,14 +94,6 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
|
|||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
|
@ -117,14 +101,6 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
|
|||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -138,3 +114,52 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
|
|||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
||||
unsigned int vp8_sub_pixel_variance16x8_ssse3
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
|
||||
)
|
||||
{
|
||||
int xsum0;
|
||||
unsigned int xxsum0;
|
||||
|
||||
if (xoffset == 4 && yoffset == 0)
|
||||
{
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
}
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
{
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
}
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
{
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
&xsum0, &xxsum0);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_bil_var_ssse3(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 8,
|
||||
xoffset, yoffset,
|
||||
&xsum0, &xxsum0);
|
||||
}
|
||||
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 7));
|
||||
}
|
||||
|
|
|
@ -286,6 +286,7 @@ extern prototype_sad_multi_dif_address(vp8_sad4x4x4d_sse3);
|
|||
#if HAVE_SSSE3
|
||||
extern prototype_sad_multi_same_address(vp8_sad16x16x3_ssse3);
|
||||
extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_ssse3);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_ssse3);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@ -295,6 +296,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_ssse3);
|
|||
#undef vp8_variance_sad16x8x3
|
||||
#define vp8_variance_sad16x8x3 vp8_sad16x8x3_ssse3
|
||||
|
||||
#undef vp8_variance_subpixvar16x8
|
||||
#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_ssse3
|
||||
|
||||
#undef vp8_variance_subpixvar16x16
|
||||
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_ssse3
|
||||
|
||||
|
|
|
@ -334,6 +334,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
|||
cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3;
|
||||
cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3;
|
||||
|
||||
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_ssse3;
|
||||
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3;
|
||||
|
||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
|
||||
|
|
|
@ -24,6 +24,7 @@ VP8_COMMON_SRCS-yes += common/entropymode.c
|
|||
VP8_COMMON_SRCS-yes += common/entropymv.c
|
||||
VP8_COMMON_SRCS-yes += common/extend.c
|
||||
VP8_COMMON_SRCS-yes += common/filter.c
|
||||
VP8_COMMON_SRCS-yes += common/filter.h
|
||||
VP8_COMMON_SRCS-yes += common/findnearmv.c
|
||||
VP8_COMMON_SRCS-yes += common/generic/systemdependent.c
|
||||
VP8_COMMON_SRCS-yes += common/idctllm.c
|
||||
|
|
Загрузка…
Ссылка в новой задаче