Merge remote branch 'internal/upstream' into HEAD
This commit is contained in:
Коммит
820b2b927f
|
@ -153,7 +153,7 @@ endif
|
||||||
#
|
#
|
||||||
obj_int_extract: build/make/obj_int_extract.c
|
obj_int_extract: build/make/obj_int_extract.c
|
||||||
$(if $(quiet),echo " [HOSTCC] $@")
|
$(if $(quiet),echo " [HOSTCC] $@")
|
||||||
$(qexec)$(HOSTCC) -I. -o $@ $<
|
$(qexec)$(HOSTCC) -I. -I$(SRC_PATH_BARE) -o $@ $<
|
||||||
CLEAN-OBJS += obj_int_extract
|
CLEAN-OBJS += obj_int_extract
|
||||||
|
|
||||||
#
|
#
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
|
|
||||||
#include "vpx_config.h"
|
#include "vpx_config.h"
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
#include <io.h>
|
#include <io.h>
|
||||||
#include <share.h>
|
#include <share.h>
|
||||||
#include "vpx/vpx_integer.h"
|
#include "vpx/vpx_integer.h"
|
||||||
|
@ -816,7 +816,7 @@ bail:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
/* See "Microsoft Portable Executable and Common Object File Format Specification"
|
/* See "Microsoft Portable Executable and Common Object File Format Specification"
|
||||||
for reference.
|
for reference.
|
||||||
*/
|
*/
|
||||||
|
@ -830,7 +830,6 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
unsigned __int8 *ptr;
|
unsigned __int8 *ptr;
|
||||||
unsigned __int32 symoffset;
|
unsigned __int32 symoffset;
|
||||||
FILE *fp;
|
|
||||||
|
|
||||||
char **sectionlist; //this array holds all section names in their correct order.
|
char **sectionlist; //this array holds all section names in their correct order.
|
||||||
//it is used to check if the symbol is in .bss or .data section.
|
//it is used to check if the symbol is in .bss or .data section.
|
||||||
|
@ -871,14 +870,6 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
||||||
//log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
|
//log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
|
||||||
//log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr);
|
//log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr);
|
||||||
|
|
||||||
fp = fopen("assembly_offsets.asm", "w");
|
|
||||||
|
|
||||||
if (fp == NULL)
|
|
||||||
{
|
|
||||||
perror("open file");
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The compiler puts the data with non-zero offset in .data section, but puts the data with
|
/* The compiler puts the data with non-zero offset in .data section, but puts the data with
|
||||||
zero offset in .bss section. So, if the data in in .bss section, set offset=0.
|
zero offset in .bss section. So, if the data in in .bss section, set offset=0.
|
||||||
Note from Wiki: In an object module compiled from C, the bss section contains
|
Note from Wiki: In an object module compiled from C, the bss section contains
|
||||||
|
@ -912,13 +903,13 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
||||||
char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
|
char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||||
strncpy(name, ptr, 8);
|
strncpy(name, ptr, 8);
|
||||||
//log_msg("COFF: Parsing symbol %s\n",name);
|
//log_msg("COFF: Parsing symbol %s\n",name);
|
||||||
fprintf(fp, "%-40s EQU ", name);
|
printf("%-40s EQU ", name + 1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//log_msg("COFF: Parsing symbol %s\n",
|
//log_msg("COFF: Parsing symbol %s\n",
|
||||||
// buf + strtab_ptr + get_le32(ptr+4));
|
// buf + strtab_ptr + get_le32(ptr+4));
|
||||||
fprintf(fp, "%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
|
printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4) + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(strcmp(sectionlist[section-1], ".bss")))
|
if (!(strcmp(sectionlist[section-1], ".bss")))
|
||||||
|
@ -935,14 +926,13 @@ int parse_coff(unsigned __int8 *buf, size_t sz)
|
||||||
//log_msg(" Address: %u\n",get_le32(ptr+8));
|
//log_msg(" Address: %u\n",get_le32(ptr+8));
|
||||||
//log_msg(" Offset: %u\n", symoffset);
|
//log_msg(" Offset: %u\n", symoffset);
|
||||||
|
|
||||||
fprintf(fp, "%5d\n", symoffset);
|
printf("%5d\n", symoffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
ptr += 18;
|
ptr += 18;
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(fp, " END\n");
|
printf(" END\n");
|
||||||
fclose(fp);
|
|
||||||
|
|
||||||
for (i = 0; i < nsections; i++)
|
for (i = 0; i < nsections; i++)
|
||||||
{
|
{
|
||||||
|
@ -992,11 +982,7 @@ int main(int argc, char **argv)
|
||||||
else
|
else
|
||||||
f = argv[1];
|
f = argv[1];
|
||||||
|
|
||||||
if (_sopen_s(&fd, f, _O_BINARY, _SH_DENYNO, _S_IREAD | _S_IWRITE))
|
fd = _sopen(f, _O_BINARY, _SH_DENYNO, _S_IREAD | _S_IWRITE);
|
||||||
{
|
|
||||||
perror("Unable to open file");
|
|
||||||
goto bail;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_fstat(fd, &stat_buf))
|
if (_fstat(fd, &stat_buf))
|
||||||
{
|
{
|
||||||
|
|
|
@ -1583,252 +1583,29 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||||
cpi->oxcf = *oxcf;
|
cpi->oxcf = *oxcf;
|
||||||
|
|
||||||
|
|
||||||
switch (cpi->oxcf.Mode)
|
|
||||||
{
|
|
||||||
|
|
||||||
case MODE_REALTIME:
|
|
||||||
cpi->pass = 0;
|
|
||||||
cpi->compressor_speed = 2;
|
|
||||||
|
|
||||||
if (cpi->oxcf.cpu_used < -16)
|
|
||||||
{
|
|
||||||
cpi->oxcf.cpu_used = -16;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cpi->oxcf.cpu_used > 16)
|
|
||||||
cpi->oxcf.cpu_used = 16;
|
|
||||||
|
|
||||||
break;
|
|
||||||
|
|
||||||
#if !(CONFIG_REALTIME_ONLY)
|
|
||||||
case MODE_GOODQUALITY:
|
|
||||||
cpi->pass = 0;
|
|
||||||
cpi->compressor_speed = 1;
|
|
||||||
|
|
||||||
if (cpi->oxcf.cpu_used < -5)
|
|
||||||
{
|
|
||||||
cpi->oxcf.cpu_used = -5;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cpi->oxcf.cpu_used > 5)
|
|
||||||
cpi->oxcf.cpu_used = 5;
|
|
||||||
|
|
||||||
break;
|
|
||||||
|
|
||||||
case MODE_BESTQUALITY:
|
|
||||||
cpi->pass = 0;
|
|
||||||
cpi->compressor_speed = 0;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case MODE_FIRSTPASS:
|
|
||||||
cpi->pass = 1;
|
|
||||||
cpi->compressor_speed = 1;
|
|
||||||
break;
|
|
||||||
case MODE_SECONDPASS:
|
|
||||||
cpi->pass = 2;
|
|
||||||
cpi->compressor_speed = 1;
|
|
||||||
|
|
||||||
if (cpi->oxcf.cpu_used < -5)
|
|
||||||
{
|
|
||||||
cpi->oxcf.cpu_used = -5;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cpi->oxcf.cpu_used > 5)
|
|
||||||
cpi->oxcf.cpu_used = 5;
|
|
||||||
|
|
||||||
break;
|
|
||||||
case MODE_SECONDPASS_BEST:
|
|
||||||
cpi->pass = 2;
|
|
||||||
cpi->compressor_speed = 0;
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cpi->pass == 0)
|
|
||||||
cpi->auto_worst_q = 1;
|
|
||||||
|
|
||||||
cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
|
|
||||||
cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
|
|
||||||
cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
|
|
||||||
|
|
||||||
if (oxcf->fixed_q >= 0)
|
|
||||||
{
|
|
||||||
if (oxcf->worst_allowed_q < 0)
|
|
||||||
cpi->oxcf.fixed_q = q_trans[0];
|
|
||||||
else
|
|
||||||
cpi->oxcf.fixed_q = q_trans[oxcf->worst_allowed_q];
|
|
||||||
|
|
||||||
if (oxcf->alt_q < 0)
|
|
||||||
cpi->oxcf.alt_q = q_trans[0];
|
|
||||||
else
|
|
||||||
cpi->oxcf.alt_q = q_trans[oxcf->alt_q];
|
|
||||||
|
|
||||||
if (oxcf->key_q < 0)
|
|
||||||
cpi->oxcf.key_q = q_trans[0];
|
|
||||||
else
|
|
||||||
cpi->oxcf.key_q = q_trans[oxcf->key_q];
|
|
||||||
|
|
||||||
if (oxcf->gold_q < 0)
|
|
||||||
cpi->oxcf.gold_q = q_trans[0];
|
|
||||||
else
|
|
||||||
cpi->oxcf.gold_q = q_trans[oxcf->gold_q];
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
cpi->baseline_gf_interval = cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL;
|
|
||||||
cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
|
|
||||||
|
|
||||||
//cpi->use_golden_frame_only = 0;
|
|
||||||
//cpi->use_last_frame_only = 0;
|
|
||||||
cm->refresh_golden_frame = 0;
|
|
||||||
cm->refresh_last_frame = 1;
|
|
||||||
cm->refresh_entropy_probs = 1;
|
|
||||||
|
|
||||||
if (cpi->oxcf.token_partitions >= 0 && cpi->oxcf.token_partitions <= 3)
|
|
||||||
cm->multi_token_partition = (TOKEN_PARTITION) cpi->oxcf.token_partitions;
|
|
||||||
|
|
||||||
setup_features(cpi);
|
|
||||||
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
|
||||||
cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout;
|
|
||||||
}
|
|
||||||
|
|
||||||
// At the moment the first order values may not be > MAXQ
|
|
||||||
if (cpi->oxcf.fixed_q > MAXQ)
|
|
||||||
cpi->oxcf.fixed_q = MAXQ;
|
|
||||||
|
|
||||||
// local file playback mode == really big buffer
|
|
||||||
if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
|
|
||||||
{
|
|
||||||
cpi->oxcf.starting_buffer_level = 60000;
|
|
||||||
cpi->oxcf.optimal_buffer_level = 60000;
|
|
||||||
cpi->oxcf.maximum_buffer_size = 240000;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Convert target bandwidth from Kbit/s to Bit/s
|
// Convert target bandwidth from Kbit/s to Bit/s
|
||||||
cpi->oxcf.target_bandwidth *= 1000;
|
cpi->oxcf.target_bandwidth *= 1000;
|
||||||
cpi->oxcf.starting_buffer_level =
|
cpi->oxcf.starting_buffer_level =
|
||||||
rescale(cpi->oxcf.starting_buffer_level,
|
rescale(cpi->oxcf.starting_buffer_level,
|
||||||
cpi->oxcf.target_bandwidth, 1000);
|
cpi->oxcf.target_bandwidth, 1000);
|
||||||
|
|
||||||
if (cpi->oxcf.optimal_buffer_level == 0)
|
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
|
||||||
cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
|
|
||||||
else
|
|
||||||
cpi->oxcf.optimal_buffer_level =
|
|
||||||
rescale(cpi->oxcf.optimal_buffer_level,
|
|
||||||
cpi->oxcf.target_bandwidth, 1000);
|
|
||||||
|
|
||||||
if (cpi->oxcf.maximum_buffer_size == 0)
|
|
||||||
cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
|
|
||||||
else
|
|
||||||
cpi->oxcf.maximum_buffer_size =
|
|
||||||
rescale(cpi->oxcf.maximum_buffer_size,
|
|
||||||
cpi->oxcf.target_bandwidth, 1000);
|
|
||||||
|
|
||||||
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
|
|
||||||
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
|
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
|
||||||
|
|
||||||
vp8_new_frame_rate(cpi, cpi->oxcf.frame_rate);
|
|
||||||
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
|
|
||||||
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
||||||
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
|
|
||||||
cpi->best_quality = cpi->oxcf.best_allowed_q;
|
|
||||||
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
|
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
|
||||||
cpi->cq_target_quality = cpi->oxcf.cq_level;
|
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
|
||||||
|
|
||||||
cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
|
|
||||||
|
|
||||||
cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
|
cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
|
||||||
cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
||||||
cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
|
cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
|
||||||
cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
||||||
|
|
||||||
cpi->total_actual_bits = 0;
|
cpi->total_actual_bits = 0;
|
||||||
cpi->total_target_vs_actual = 0;
|
cpi->total_target_vs_actual = 0;
|
||||||
|
|
||||||
// Only allow dropped frames in buffered mode
|
// change includes all joint functionality
|
||||||
cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode;
|
vp8_change_config(ptr, oxcf);
|
||||||
|
|
||||||
cm->filter_type = (LOOPFILTERTYPE) cpi->filter_type;
|
|
||||||
|
|
||||||
if (!cm->use_bilinear_mc_filter)
|
|
||||||
cm->mcomp_filter_type = SIXTAP;
|
|
||||||
else
|
|
||||||
cm->mcomp_filter_type = BILINEAR;
|
|
||||||
|
|
||||||
cpi->target_bandwidth = cpi->oxcf.target_bandwidth;
|
|
||||||
|
|
||||||
cm->Width = cpi->oxcf.Width ;
|
|
||||||
cm->Height = cpi->oxcf.Height ;
|
|
||||||
|
|
||||||
cpi->intra_frame_target = (4 * (cm->Width + cm->Height) / 15) * 1000; // As per VP8
|
|
||||||
|
|
||||||
cm->horiz_scale = cpi->horiz_scale;
|
|
||||||
cm->vert_scale = cpi->vert_scale ;
|
|
||||||
|
|
||||||
// VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs)
|
|
||||||
if (cpi->oxcf.Sharpness > 7)
|
|
||||||
cpi->oxcf.Sharpness = 7;
|
|
||||||
|
|
||||||
cm->sharpness_level = cpi->oxcf.Sharpness;
|
|
||||||
|
|
||||||
if (cm->horiz_scale != NORMAL || cm->vert_scale != NORMAL)
|
|
||||||
{
|
|
||||||
int UNINITIALIZED_IS_SAFE(hr), UNINITIALIZED_IS_SAFE(hs);
|
|
||||||
int UNINITIALIZED_IS_SAFE(vr), UNINITIALIZED_IS_SAFE(vs);
|
|
||||||
|
|
||||||
Scale2Ratio(cm->horiz_scale, &hr, &hs);
|
|
||||||
Scale2Ratio(cm->vert_scale, &vr, &vs);
|
|
||||||
|
|
||||||
// always go to the next whole number
|
|
||||||
cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs;
|
|
||||||
cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (((cm->Width + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
|
|
||||||
((cm->Height + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
|
|
||||||
cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
|
|
||||||
{
|
|
||||||
alloc_raw_frame_buffers(cpi);
|
|
||||||
vp8_alloc_compressor_data(cpi);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clamp KF frame size to quarter of data rate
|
|
||||||
if (cpi->intra_frame_target > cpi->target_bandwidth >> 2)
|
|
||||||
cpi->intra_frame_target = cpi->target_bandwidth >> 2;
|
|
||||||
|
|
||||||
if (cpi->oxcf.fixed_q >= 0)
|
|
||||||
{
|
|
||||||
cpi->last_q[0] = cpi->oxcf.fixed_q;
|
|
||||||
cpi->last_q[1] = cpi->oxcf.fixed_q;
|
|
||||||
}
|
|
||||||
|
|
||||||
cpi->Speed = cpi->oxcf.cpu_used;
|
|
||||||
|
|
||||||
// force to allowlag to 0 if lag_in_frames is 0;
|
|
||||||
if (cpi->oxcf.lag_in_frames == 0)
|
|
||||||
{
|
|
||||||
cpi->oxcf.allow_lag = 0;
|
|
||||||
}
|
|
||||||
// Limit on lag buffers as these are not currently dynamically allocated
|
|
||||||
else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
|
|
||||||
cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
|
|
||||||
|
|
||||||
// YX Temp
|
|
||||||
cpi->last_alt_ref_sei = -1;
|
|
||||||
cpi->is_src_frame_alt_ref = 0;
|
|
||||||
cpi->is_next_src_alt_ref = 0;
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
// Experimental RD Code
|
|
||||||
cpi->frame_distortion = 0;
|
|
||||||
cpi->last_frame_distortion = 0;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if VP8_TEMPORAL_ALT_REF
|
#if VP8_TEMPORAL_ALT_REF
|
||||||
|
|
||||||
|
@ -1845,12 +1622,6 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* This function needs more clean up, i.e. be more tuned torwards
|
|
||||||
* change_config rather than init_config !!!!!!!!!!!!!!!!
|
|
||||||
* YX - 5/28/2009
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||||
{
|
{
|
||||||
|
@ -2001,10 +1772,6 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||||
// Convert target bandwidth from Kbit/s to Bit/s
|
// Convert target bandwidth from Kbit/s to Bit/s
|
||||||
cpi->oxcf.target_bandwidth *= 1000;
|
cpi->oxcf.target_bandwidth *= 1000;
|
||||||
|
|
||||||
cpi->oxcf.starting_buffer_level =
|
|
||||||
rescale(cpi->oxcf.starting_buffer_level,
|
|
||||||
cpi->oxcf.target_bandwidth, 1000);
|
|
||||||
|
|
||||||
if (cpi->oxcf.optimal_buffer_level == 0)
|
if (cpi->oxcf.optimal_buffer_level == 0)
|
||||||
cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
|
cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
|
||||||
else
|
else
|
||||||
|
@ -2019,29 +1786,36 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
|
||||||
rescale(cpi->oxcf.maximum_buffer_size,
|
rescale(cpi->oxcf.maximum_buffer_size,
|
||||||
cpi->oxcf.target_bandwidth, 1000);
|
cpi->oxcf.target_bandwidth, 1000);
|
||||||
|
|
||||||
cpi->buffer_level = cpi->oxcf.starting_buffer_level;
|
|
||||||
cpi->bits_off_target = cpi->oxcf.starting_buffer_level;
|
|
||||||
|
|
||||||
vp8_new_frame_rate(cpi, cpi->oxcf.frame_rate);
|
vp8_new_frame_rate(cpi, cpi->oxcf.frame_rate);
|
||||||
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
|
cpi->worst_quality = cpi->oxcf.worst_allowed_q;
|
||||||
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
|
||||||
cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q;
|
|
||||||
cpi->best_quality = cpi->oxcf.best_allowed_q;
|
cpi->best_quality = cpi->oxcf.best_allowed_q;
|
||||||
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
|
|
||||||
|
// active values should only be modified if out of new range
|
||||||
|
if (cpi->active_worst_quality > cpi->oxcf.worst_allowed_q)
|
||||||
|
{
|
||||||
|
cpi->active_worst_quality = cpi->oxcf.worst_allowed_q;
|
||||||
|
}
|
||||||
|
// less likely
|
||||||
|
else if (cpi->active_worst_quality < cpi->oxcf.best_allowed_q)
|
||||||
|
{
|
||||||
|
cpi->active_worst_quality = cpi->oxcf.best_allowed_q;
|
||||||
|
}
|
||||||
|
if (cpi->active_best_quality < cpi->oxcf.best_allowed_q)
|
||||||
|
{
|
||||||
|
cpi->active_best_quality = cpi->oxcf.best_allowed_q;
|
||||||
|
}
|
||||||
|
// less likely
|
||||||
|
else if (cpi->active_best_quality > cpi->oxcf.worst_allowed_q)
|
||||||
|
{
|
||||||
|
cpi->active_best_quality = cpi->oxcf.worst_allowed_q;
|
||||||
|
}
|
||||||
|
|
||||||
cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
|
cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
|
||||||
|
|
||||||
cpi->cq_target_quality = cpi->oxcf.cq_level;
|
cpi->cq_target_quality = cpi->oxcf.cq_level;
|
||||||
|
|
||||||
cpi->rolling_target_bits = cpi->av_per_frame_bandwidth;
|
|
||||||
cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
|
||||||
cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth;
|
|
||||||
cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth;
|
|
||||||
|
|
||||||
cpi->total_actual_bits = 0;
|
|
||||||
cpi->total_target_vs_actual = 0;
|
|
||||||
|
|
||||||
// Only allow dropped frames in buffered mode
|
// Only allow dropped frames in buffered mode
|
||||||
cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode;
|
cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode;
|
||||||
|
|
||||||
cm->filter_type = (LOOPFILTERTYPE) cpi->filter_type;
|
cm->filter_type = (LOOPFILTERTYPE) cpi->filter_type;
|
||||||
|
|
||||||
|
@ -3613,6 +3387,7 @@ static void encode_frame_to_data_rate
|
||||||
int drop_mark50 = drop_mark / 4;
|
int drop_mark50 = drop_mark / 4;
|
||||||
int drop_mark25 = drop_mark / 8;
|
int drop_mark25 = drop_mark / 8;
|
||||||
|
|
||||||
|
|
||||||
// Clear down mmx registers to allow floating point in what follows
|
// Clear down mmx registers to allow floating point in what follows
|
||||||
vp8_clear_system_state();
|
vp8_clear_system_state();
|
||||||
|
|
||||||
|
|
|
@ -790,7 +790,7 @@ filter_block2d_bil_variance:
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
;void vp8_half_horiz_vert_variance16x_h_sse2
|
;void vp8_half_horiz_vert_variance8x_h_sse2
|
||||||
;(
|
;(
|
||||||
; unsigned char *ref_ptr,
|
; unsigned char *ref_ptr,
|
||||||
; int ref_pixels_per_line,
|
; int ref_pixels_per_line,
|
||||||
|
@ -800,8 +800,8 @@ filter_block2d_bil_variance:
|
||||||
; int *sum,
|
; int *sum,
|
||||||
; unsigned int *sumsquared
|
; unsigned int *sumsquared
|
||||||
;)
|
;)
|
||||||
global sym(vp8_half_horiz_vert_variance16x_h_sse2)
|
global sym(vp8_half_horiz_vert_variance8x_h_sse2)
|
||||||
sym(vp8_half_horiz_vert_variance16x_h_sse2):
|
sym(vp8_half_horiz_vert_variance8x_h_sse2):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 7
|
SHADOW_ARGS_TO_STACK 7
|
||||||
|
@ -835,7 +835,7 @@ sym(vp8_half_horiz_vert_variance16x_h_sse2):
|
||||||
add rsi, r8
|
add rsi, r8
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
vp8_half_horiz_vert_variance16x_h_1:
|
vp8_half_horiz_vert_variance8x_h_1:
|
||||||
|
|
||||||
movq xmm1, QWORD PTR [rsi] ;
|
movq xmm1, QWORD PTR [rsi] ;
|
||||||
movq xmm2, QWORD PTR [rsi+1] ;
|
movq xmm2, QWORD PTR [rsi+1] ;
|
||||||
|
@ -863,7 +863,7 @@ vp8_half_horiz_vert_variance16x_h_1:
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
sub rcx, 1 ;
|
sub rcx, 1 ;
|
||||||
jnz vp8_half_horiz_vert_variance16x_h_1 ;
|
jnz vp8_half_horiz_vert_variance8x_h_1 ;
|
||||||
|
|
||||||
movdq2q mm6, xmm6 ;
|
movdq2q mm6, xmm6 ;
|
||||||
movdq2q mm7, xmm7 ;
|
movdq2q mm7, xmm7 ;
|
||||||
|
@ -910,8 +910,7 @@ vp8_half_horiz_vert_variance16x_h_1:
|
||||||
pop rbp
|
pop rbp
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
;void vp8_half_horiz_vert_variance16x_h_sse2
|
||||||
;void vp8_half_vert_variance16x_h_sse2
|
|
||||||
;(
|
;(
|
||||||
; unsigned char *ref_ptr,
|
; unsigned char *ref_ptr,
|
||||||
; int ref_pixels_per_line,
|
; int ref_pixels_per_line,
|
||||||
|
@ -921,8 +920,124 @@ vp8_half_horiz_vert_variance16x_h_1:
|
||||||
; int *sum,
|
; int *sum,
|
||||||
; unsigned int *sumsquared
|
; unsigned int *sumsquared
|
||||||
;)
|
;)
|
||||||
global sym(vp8_half_vert_variance16x_h_sse2)
|
global sym(vp8_half_horiz_vert_variance16x_h_sse2)
|
||||||
sym(vp8_half_vert_variance16x_h_sse2):
|
sym(vp8_half_horiz_vert_variance16x_h_sse2):
|
||||||
|
push rbp
|
||||||
|
mov rbp, rsp
|
||||||
|
SHADOW_ARGS_TO_STACK 7
|
||||||
|
SAVE_XMM
|
||||||
|
GET_GOT rbx
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
; end prolog
|
||||||
|
|
||||||
|
pxor xmm6, xmm6 ; error accumulator
|
||||||
|
pxor xmm7, xmm7 ; sse eaccumulator
|
||||||
|
mov rsi, arg(0) ;ref_ptr ;
|
||||||
|
|
||||||
|
mov rdi, arg(2) ;src_ptr ;
|
||||||
|
movsxd rcx, dword ptr arg(4) ;Height ;
|
||||||
|
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||||
|
movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
|
||||||
|
|
||||||
|
pxor xmm0, xmm0 ;
|
||||||
|
|
||||||
|
movdqu xmm5, XMMWORD PTR [rsi]
|
||||||
|
movdqu xmm3, XMMWORD PTR [rsi+1]
|
||||||
|
pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3) horizontal line 1
|
||||||
|
|
||||||
|
lea rsi, [rsi + rax]
|
||||||
|
|
||||||
|
vp8_half_horiz_vert_variance16x_h_1:
|
||||||
|
movdqu xmm1, XMMWORD PTR [rsi] ;
|
||||||
|
movdqu xmm2, XMMWORD PTR [rsi+1] ;
|
||||||
|
pavgb xmm1, xmm2 ; xmm1 = avg(xmm1,xmm3) horizontal line i+1
|
||||||
|
|
||||||
|
pavgb xmm5, xmm1 ; xmm = vertical average of the above
|
||||||
|
|
||||||
|
movdqa xmm4, xmm5
|
||||||
|
punpcklbw xmm5, xmm0 ; xmm5 = words of above
|
||||||
|
punpckhbw xmm4, xmm0
|
||||||
|
|
||||||
|
movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
|
||||||
|
punpcklbw xmm3, xmm0 ; xmm3 = words of above
|
||||||
|
psubw xmm5, xmm3 ; xmm5 -= xmm3
|
||||||
|
|
||||||
|
movq xmm3, QWORD PTR [rdi+8]
|
||||||
|
punpcklbw xmm3, xmm0
|
||||||
|
psubw xmm4, xmm3
|
||||||
|
|
||||||
|
paddw xmm6, xmm5 ; xmm6 += accumulated column differences
|
||||||
|
paddw xmm6, xmm4
|
||||||
|
pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
|
||||||
|
pmaddwd xmm4, xmm4
|
||||||
|
paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
|
||||||
|
paddd xmm7, xmm4
|
||||||
|
|
||||||
|
movdqa xmm5, xmm1 ; save xmm1 for use on the next row
|
||||||
|
|
||||||
|
lea rsi, [rsi + rax]
|
||||||
|
lea rdi, [rdi + rdx]
|
||||||
|
|
||||||
|
sub rcx, 1 ;
|
||||||
|
jnz vp8_half_horiz_vert_variance16x_h_1 ;
|
||||||
|
|
||||||
|
pxor xmm1, xmm1
|
||||||
|
pxor xmm5, xmm5
|
||||||
|
|
||||||
|
punpcklwd xmm0, xmm6
|
||||||
|
punpckhwd xmm1, xmm6
|
||||||
|
psrad xmm0, 16
|
||||||
|
psrad xmm1, 16
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
|
||||||
|
movdqa xmm6, xmm7
|
||||||
|
punpckldq xmm6, xmm5
|
||||||
|
punpckhdq xmm7, xmm5
|
||||||
|
paddd xmm6, xmm7
|
||||||
|
|
||||||
|
punpckldq xmm0, xmm5
|
||||||
|
punpckhdq xmm1, xmm5
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
|
||||||
|
movdqa xmm7, xmm6
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
|
||||||
|
psrldq xmm7, 8
|
||||||
|
psrldq xmm1, 8
|
||||||
|
|
||||||
|
paddd xmm6, xmm7
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
|
||||||
|
mov rsi, arg(5) ;[Sum]
|
||||||
|
mov rdi, arg(6) ;[SSE]
|
||||||
|
|
||||||
|
movd [rsi], xmm0
|
||||||
|
movd [rdi], xmm6
|
||||||
|
|
||||||
|
; begin epilog
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
RESTORE_GOT
|
||||||
|
RESTORE_XMM
|
||||||
|
UNSHADOW_ARGS
|
||||||
|
pop rbp
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
;void vp8_half_vert_variance8x_h_sse2
|
||||||
|
;(
|
||||||
|
; unsigned char *ref_ptr,
|
||||||
|
; int ref_pixels_per_line,
|
||||||
|
; unsigned char *src_ptr,
|
||||||
|
; int src_pixels_per_line,
|
||||||
|
; unsigned int Height,
|
||||||
|
; int *sum,
|
||||||
|
; unsigned int *sumsquared
|
||||||
|
;)
|
||||||
|
global sym(vp8_half_vert_variance8x_h_sse2)
|
||||||
|
sym(vp8_half_vert_variance8x_h_sse2):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 7
|
SHADOW_ARGS_TO_STACK 7
|
||||||
|
@ -945,7 +1060,7 @@ sym(vp8_half_vert_variance16x_h_sse2):
|
||||||
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||||
|
|
||||||
pxor xmm0, xmm0 ;
|
pxor xmm0, xmm0 ;
|
||||||
vp8_half_vert_variance16x_h_1:
|
vp8_half_vert_variance8x_h_1:
|
||||||
movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
|
movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
|
||||||
movq xmm3, QWORD PTR [rsi+rax] ; xmm3 = s1,s2,s3..s9
|
movq xmm3, QWORD PTR [rsi+rax] ; xmm3 = s1,s2,s3..s9
|
||||||
|
|
||||||
|
@ -969,7 +1084,7 @@ vp8_half_vert_variance16x_h_1:
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
sub rcx, 1 ;
|
sub rcx, 1 ;
|
||||||
jnz vp8_half_vert_variance16x_h_1 ;
|
jnz vp8_half_vert_variance8x_h_1 ;
|
||||||
|
|
||||||
movdq2q mm6, xmm6 ;
|
movdq2q mm6, xmm6 ;
|
||||||
movdq2q mm7, xmm7 ;
|
movdq2q mm7, xmm7 ;
|
||||||
|
@ -1016,8 +1131,7 @@ vp8_half_vert_variance16x_h_1:
|
||||||
pop rbp
|
pop rbp
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
;void vp8_half_vert_variance16x_h_sse2
|
||||||
;void vp8_half_horiz_variance16x_h_sse2
|
|
||||||
;(
|
;(
|
||||||
; unsigned char *ref_ptr,
|
; unsigned char *ref_ptr,
|
||||||
; int ref_pixels_per_line,
|
; int ref_pixels_per_line,
|
||||||
|
@ -1027,8 +1141,116 @@ vp8_half_vert_variance16x_h_1:
|
||||||
; int *sum,
|
; int *sum,
|
||||||
; unsigned int *sumsquared
|
; unsigned int *sumsquared
|
||||||
;)
|
;)
|
||||||
global sym(vp8_half_horiz_variance16x_h_sse2)
|
global sym(vp8_half_vert_variance16x_h_sse2)
|
||||||
sym(vp8_half_horiz_variance16x_h_sse2):
|
sym(vp8_half_vert_variance16x_h_sse2):
|
||||||
|
push rbp
|
||||||
|
mov rbp, rsp
|
||||||
|
SHADOW_ARGS_TO_STACK 7
|
||||||
|
SAVE_XMM
|
||||||
|
GET_GOT rbx
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
; end prolog
|
||||||
|
|
||||||
|
pxor xmm6, xmm6 ; error accumulator
|
||||||
|
pxor xmm7, xmm7 ; sse eaccumulator
|
||||||
|
mov rsi, arg(0) ;ref_ptr
|
||||||
|
|
||||||
|
mov rdi, arg(2) ;src_ptr
|
||||||
|
movsxd rcx, dword ptr arg(4) ;Height
|
||||||
|
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||||
|
movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
|
||||||
|
|
||||||
|
movdqu xmm5, XMMWORD PTR [rsi]
|
||||||
|
lea rsi, [rsi + rax ]
|
||||||
|
pxor xmm0, xmm0
|
||||||
|
|
||||||
|
vp8_half_vert_variance16x_h_1:
|
||||||
|
movdqu xmm3, XMMWORD PTR [rsi]
|
||||||
|
|
||||||
|
pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
|
||||||
|
movdqa xmm4, xmm5
|
||||||
|
punpcklbw xmm5, xmm0
|
||||||
|
punpckhbw xmm4, xmm0
|
||||||
|
|
||||||
|
movq xmm2, QWORD PTR [rdi]
|
||||||
|
punpcklbw xmm2, xmm0
|
||||||
|
psubw xmm5, xmm2
|
||||||
|
movq xmm2, QWORD PTR [rdi+8]
|
||||||
|
punpcklbw xmm2, xmm0
|
||||||
|
psubw xmm4, xmm2
|
||||||
|
|
||||||
|
paddw xmm6, xmm5 ; xmm6 += accumulated column differences
|
||||||
|
paddw xmm6, xmm4
|
||||||
|
pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
|
||||||
|
pmaddwd xmm4, xmm4
|
||||||
|
paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
|
||||||
|
paddd xmm7, xmm4
|
||||||
|
|
||||||
|
movdqa xmm5, xmm3
|
||||||
|
|
||||||
|
lea rsi, [rsi + rax]
|
||||||
|
lea rdi, [rdi + rdx]
|
||||||
|
|
||||||
|
sub rcx, 1
|
||||||
|
jnz vp8_half_vert_variance16x_h_1
|
||||||
|
|
||||||
|
pxor xmm1, xmm1
|
||||||
|
pxor xmm5, xmm5
|
||||||
|
|
||||||
|
punpcklwd xmm0, xmm6
|
||||||
|
punpckhwd xmm1, xmm6
|
||||||
|
psrad xmm0, 16
|
||||||
|
psrad xmm1, 16
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
|
||||||
|
movdqa xmm6, xmm7
|
||||||
|
punpckldq xmm6, xmm5
|
||||||
|
punpckhdq xmm7, xmm5
|
||||||
|
paddd xmm6, xmm7
|
||||||
|
|
||||||
|
punpckldq xmm0, xmm5
|
||||||
|
punpckhdq xmm1, xmm5
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
|
||||||
|
movdqa xmm7, xmm6
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
|
||||||
|
psrldq xmm7, 8
|
||||||
|
psrldq xmm1, 8
|
||||||
|
|
||||||
|
paddd xmm6, xmm7
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
|
||||||
|
mov rsi, arg(5) ;[Sum]
|
||||||
|
mov rdi, arg(6) ;[SSE]
|
||||||
|
|
||||||
|
movd [rsi], xmm0
|
||||||
|
movd [rdi], xmm6
|
||||||
|
|
||||||
|
; begin epilog
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
RESTORE_GOT
|
||||||
|
RESTORE_XMM
|
||||||
|
UNSHADOW_ARGS
|
||||||
|
pop rbp
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
;void vp8_half_horiz_variance8x_h_sse2
|
||||||
|
;(
|
||||||
|
; unsigned char *ref_ptr,
|
||||||
|
; int ref_pixels_per_line,
|
||||||
|
; unsigned char *src_ptr,
|
||||||
|
; int src_pixels_per_line,
|
||||||
|
; unsigned int Height,
|
||||||
|
; int *sum,
|
||||||
|
; unsigned int *sumsquared
|
||||||
|
;)
|
||||||
|
global sym(vp8_half_horiz_variance8x_h_sse2)
|
||||||
|
sym(vp8_half_horiz_variance8x_h_sse2):
|
||||||
push rbp
|
push rbp
|
||||||
mov rbp, rsp
|
mov rbp, rsp
|
||||||
SHADOW_ARGS_TO_STACK 7
|
SHADOW_ARGS_TO_STACK 7
|
||||||
|
@ -1050,7 +1272,7 @@ sym(vp8_half_horiz_variance16x_h_sse2):
|
||||||
movsxd rcx, dword ptr arg(4) ;Height ;
|
movsxd rcx, dword ptr arg(4) ;Height ;
|
||||||
|
|
||||||
pxor xmm0, xmm0 ;
|
pxor xmm0, xmm0 ;
|
||||||
vp8_half_horiz_variance16x16_1:
|
vp8_half_horiz_variance8x_h_1:
|
||||||
movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
|
movq xmm5, QWORD PTR [rsi] ; xmm5 = s0,s1,s2..s8
|
||||||
movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
|
movq xmm3, QWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s9
|
||||||
|
|
||||||
|
@ -1073,7 +1295,7 @@ vp8_half_horiz_variance16x16_1:
|
||||||
add rdi, r9
|
add rdi, r9
|
||||||
%endif
|
%endif
|
||||||
sub rcx, 1 ;
|
sub rcx, 1 ;
|
||||||
jnz vp8_half_horiz_variance16x16_1 ;
|
jnz vp8_half_horiz_variance8x_h_1 ;
|
||||||
|
|
||||||
movdq2q mm6, xmm6 ;
|
movdq2q mm6, xmm6 ;
|
||||||
movdq2q mm7, xmm7 ;
|
movdq2q mm7, xmm7 ;
|
||||||
|
@ -1120,6 +1342,109 @@ vp8_half_horiz_variance16x16_1:
|
||||||
pop rbp
|
pop rbp
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
;void vp8_half_horiz_variance16x_h_sse2
|
||||||
|
;(
|
||||||
|
; unsigned char *ref_ptr,
|
||||||
|
; int ref_pixels_per_line,
|
||||||
|
; unsigned char *src_ptr,
|
||||||
|
; int src_pixels_per_line,
|
||||||
|
; unsigned int Height,
|
||||||
|
; int *sum,
|
||||||
|
; unsigned int *sumsquared
|
||||||
|
;)
|
||||||
|
global sym(vp8_half_horiz_variance16x_h_sse2)
|
||||||
|
sym(vp8_half_horiz_variance16x_h_sse2):
|
||||||
|
push rbp
|
||||||
|
mov rbp, rsp
|
||||||
|
SHADOW_ARGS_TO_STACK 7
|
||||||
|
SAVE_XMM
|
||||||
|
GET_GOT rbx
|
||||||
|
push rsi
|
||||||
|
push rdi
|
||||||
|
; end prolog
|
||||||
|
|
||||||
|
pxor xmm6, xmm6 ; error accumulator
|
||||||
|
pxor xmm7, xmm7 ; sse eaccumulator
|
||||||
|
mov rsi, arg(0) ;ref_ptr ;
|
||||||
|
|
||||||
|
mov rdi, arg(2) ;src_ptr ;
|
||||||
|
movsxd rcx, dword ptr arg(4) ;Height ;
|
||||||
|
movsxd rax, dword ptr arg(1) ;ref_pixels_per_line
|
||||||
|
movsxd rdx, dword ptr arg(3) ;src_pixels_per_line
|
||||||
|
|
||||||
|
pxor xmm0, xmm0 ;
|
||||||
|
|
||||||
|
vp8_half_horiz_variance16x_h_1:
|
||||||
|
movdqu xmm5, XMMWORD PTR [rsi] ; xmm5 = s0,s1,s2..s15
|
||||||
|
movdqu xmm3, XMMWORD PTR [rsi+1] ; xmm3 = s1,s2,s3..s16
|
||||||
|
|
||||||
|
pavgb xmm5, xmm3 ; xmm5 = avg(xmm1,xmm3)
|
||||||
|
movdqa xmm1, xmm5
|
||||||
|
punpcklbw xmm5, xmm0 ; xmm5 = words of above
|
||||||
|
punpckhbw xmm1, xmm0
|
||||||
|
|
||||||
|
movq xmm3, QWORD PTR [rdi] ; xmm3 = d0,d1,d2..d7
|
||||||
|
punpcklbw xmm3, xmm0 ; xmm3 = words of above
|
||||||
|
movq xmm2, QWORD PTR [rdi+8]
|
||||||
|
punpcklbw xmm2, xmm0
|
||||||
|
|
||||||
|
psubw xmm5, xmm3 ; xmm5 -= xmm3
|
||||||
|
psubw xmm1, xmm2
|
||||||
|
paddw xmm6, xmm5 ; xmm6 += accumulated column differences
|
||||||
|
paddw xmm6, xmm1
|
||||||
|
pmaddwd xmm5, xmm5 ; xmm5 *= xmm5
|
||||||
|
pmaddwd xmm1, xmm1
|
||||||
|
paddd xmm7, xmm5 ; xmm7 += accumulated square column differences
|
||||||
|
paddd xmm7, xmm1
|
||||||
|
|
||||||
|
lea rsi, [rsi + rax]
|
||||||
|
lea rdi, [rdi + rdx]
|
||||||
|
|
||||||
|
sub rcx, 1 ;
|
||||||
|
jnz vp8_half_horiz_variance16x_h_1 ;
|
||||||
|
|
||||||
|
pxor xmm1, xmm1
|
||||||
|
pxor xmm5, xmm5
|
||||||
|
|
||||||
|
punpcklwd xmm0, xmm6
|
||||||
|
punpckhwd xmm1, xmm6
|
||||||
|
psrad xmm0, 16
|
||||||
|
psrad xmm1, 16
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
|
||||||
|
movdqa xmm6, xmm7
|
||||||
|
punpckldq xmm6, xmm5
|
||||||
|
punpckhdq xmm7, xmm5
|
||||||
|
paddd xmm6, xmm7
|
||||||
|
|
||||||
|
punpckldq xmm0, xmm5
|
||||||
|
punpckhdq xmm1, xmm5
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
|
||||||
|
movdqa xmm7, xmm6
|
||||||
|
movdqa xmm1, xmm0
|
||||||
|
|
||||||
|
psrldq xmm7, 8
|
||||||
|
psrldq xmm1, 8
|
||||||
|
|
||||||
|
paddd xmm6, xmm7
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
|
||||||
|
mov rsi, arg(5) ;[Sum]
|
||||||
|
mov rdi, arg(6) ;[SSE]
|
||||||
|
|
||||||
|
movd [rsi], xmm0
|
||||||
|
movd [rdi], xmm6
|
||||||
|
|
||||||
|
; begin epilog
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
RESTORE_GOT
|
||||||
|
RESTORE_XMM
|
||||||
|
UNSHADOW_ARGS
|
||||||
|
pop rbp
|
||||||
|
ret
|
||||||
|
|
||||||
SECTION_RODATA
|
SECTION_RODATA
|
||||||
; short xmm_bi_rd[8] = { 64, 64, 64, 64,64, 64, 64, 64};
|
; short xmm_bi_rd[8] = { 64, 64, 64, 64,64, 64, 64, 64};
|
||||||
|
|
|
@ -456,146 +456,6 @@ unsigned int vp8_sub_pixel_variance8x16_mmx
|
||||||
return (xxsum - ((xsum * xsum) >> 7));
|
return (xxsum - ((xsum * xsum) >> 7));
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int vp8_i_variance16x16_mmx(
|
|
||||||
const unsigned char *src_ptr,
|
|
||||||
int source_stride,
|
|
||||||
const unsigned char *ref_ptr,
|
|
||||||
int recon_stride,
|
|
||||||
unsigned int *sse)
|
|
||||||
{
|
|
||||||
unsigned int sse0, sse1, sse2, sse3, var;
|
|
||||||
int sum0, sum1, sum2, sum3, avg;
|
|
||||||
|
|
||||||
|
|
||||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
|
||||||
vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
|
||||||
vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse2, &sum2) ;
|
|
||||||
vp8_get8x8var_mmx(src_ptr + (source_stride >> 1) + 8, source_stride, ref_ptr + (recon_stride >> 1) + 8, recon_stride, &sse3, &sum3);
|
|
||||||
|
|
||||||
var = sse0 + sse1 + sse2 + sse3;
|
|
||||||
avg = sum0 + sum1 + sum2 + sum3;
|
|
||||||
*sse = var;
|
|
||||||
return (var - ((avg * avg) >> 8));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int vp8_i_variance8x16_mmx(
|
|
||||||
const unsigned char *src_ptr,
|
|
||||||
int source_stride,
|
|
||||||
const unsigned char *ref_ptr,
|
|
||||||
int recon_stride,
|
|
||||||
unsigned int *sse)
|
|
||||||
{
|
|
||||||
unsigned int sse0, sse1, var;
|
|
||||||
int sum0, sum1, avg;
|
|
||||||
vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
|
||||||
vp8_get8x8var_mmx(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse1, &sum1) ;
|
|
||||||
|
|
||||||
var = sse0 + sse1;
|
|
||||||
avg = sum0 + sum1;
|
|
||||||
|
|
||||||
*sse = var;
|
|
||||||
return (var - ((avg * avg) >> 7));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int vp8_i_sub_pixel_variance16x16_mmx
|
|
||||||
(
|
|
||||||
const unsigned char *src_ptr,
|
|
||||||
int src_pixels_per_line,
|
|
||||||
int xoffset,
|
|
||||||
int yoffset,
|
|
||||||
const unsigned char *dst_ptr,
|
|
||||||
int dst_pixels_per_line,
|
|
||||||
unsigned int *sse
|
|
||||||
)
|
|
||||||
{
|
|
||||||
int xsum0, xsum1;
|
|
||||||
unsigned int xxsum0, xxsum1;
|
|
||||||
int f2soffset = (src_pixels_per_line >> 1);
|
|
||||||
int f2doffset = (dst_pixels_per_line >> 1);
|
|
||||||
|
|
||||||
|
|
||||||
vp8_filter_block2d_bil_var_mmx(
|
|
||||||
src_ptr, src_pixels_per_line,
|
|
||||||
dst_ptr, dst_pixels_per_line, 8,
|
|
||||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
|
||||||
&xsum0, &xxsum0
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
vp8_filter_block2d_bil_var_mmx(
|
|
||||||
src_ptr + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
|
||||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
|
||||||
&xsum1, &xxsum1
|
|
||||||
);
|
|
||||||
|
|
||||||
xsum0 += xsum1;
|
|
||||||
xxsum0 += xxsum1;
|
|
||||||
|
|
||||||
vp8_filter_block2d_bil_var_mmx(
|
|
||||||
src_ptr + f2soffset, src_pixels_per_line,
|
|
||||||
dst_ptr + f2doffset, dst_pixels_per_line, 8,
|
|
||||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
|
||||||
&xsum1, &xxsum1
|
|
||||||
);
|
|
||||||
|
|
||||||
xsum0 += xsum1;
|
|
||||||
xxsum0 += xxsum1;
|
|
||||||
|
|
||||||
vp8_filter_block2d_bil_var_mmx(
|
|
||||||
src_ptr + f2soffset + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + f2doffset + 8, dst_pixels_per_line, 8,
|
|
||||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
|
||||||
&xsum1, &xxsum1
|
|
||||||
);
|
|
||||||
|
|
||||||
xsum0 += xsum1;
|
|
||||||
xxsum0 += xxsum1;
|
|
||||||
*sse = xxsum0;
|
|
||||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
unsigned int vp8_i_sub_pixel_variance8x16_mmx
|
|
||||||
(
|
|
||||||
const unsigned char *src_ptr,
|
|
||||||
int src_pixels_per_line,
|
|
||||||
int xoffset,
|
|
||||||
int yoffset,
|
|
||||||
const unsigned char *dst_ptr,
|
|
||||||
int dst_pixels_per_line,
|
|
||||||
unsigned int *sse
|
|
||||||
)
|
|
||||||
{
|
|
||||||
int xsum0, xsum1;
|
|
||||||
unsigned int xxsum0, xxsum1;
|
|
||||||
int f2soffset = (src_pixels_per_line >> 1);
|
|
||||||
int f2doffset = (dst_pixels_per_line >> 1);
|
|
||||||
|
|
||||||
|
|
||||||
vp8_filter_block2d_bil_var_mmx(
|
|
||||||
src_ptr, src_pixels_per_line,
|
|
||||||
dst_ptr, dst_pixels_per_line, 8,
|
|
||||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
|
||||||
&xsum0, &xxsum0
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
vp8_filter_block2d_bil_var_mmx(
|
|
||||||
src_ptr + f2soffset, src_pixels_per_line,
|
|
||||||
dst_ptr + f2doffset, dst_pixels_per_line, 8,
|
|
||||||
vp8_vp7_bilinear_filters_mmx[xoffset], vp8_vp7_bilinear_filters_mmx[yoffset],
|
|
||||||
&xsum1, &xxsum1
|
|
||||||
);
|
|
||||||
|
|
||||||
xsum0 += xsum1;
|
|
||||||
xxsum0 += xxsum1;
|
|
||||||
*sse = xxsum0;
|
|
||||||
return (xxsum0 - ((xsum0 * xsum0) >> 7));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
unsigned int vp8_variance_halfpixvar16x16_h_mmx(
|
unsigned int vp8_variance_halfpixvar16x16_h_mmx(
|
||||||
const unsigned char *src_ptr,
|
const unsigned char *src_ptr,
|
||||||
|
|
|
@ -81,6 +81,16 @@ void vp8_filter_block2d_bil_var_sse2
|
||||||
int *sum,
|
int *sum,
|
||||||
unsigned int *sumsquared
|
unsigned int *sumsquared
|
||||||
);
|
);
|
||||||
|
void vp8_half_horiz_vert_variance8x_h_sse2
|
||||||
|
(
|
||||||
|
const unsigned char *ref_ptr,
|
||||||
|
int ref_pixels_per_line,
|
||||||
|
const unsigned char *src_ptr,
|
||||||
|
int src_pixels_per_line,
|
||||||
|
unsigned int Height,
|
||||||
|
int *sum,
|
||||||
|
unsigned int *sumsquared
|
||||||
|
);
|
||||||
void vp8_half_horiz_vert_variance16x_h_sse2
|
void vp8_half_horiz_vert_variance16x_h_sse2
|
||||||
(
|
(
|
||||||
const unsigned char *ref_ptr,
|
const unsigned char *ref_ptr,
|
||||||
|
@ -91,6 +101,16 @@ void vp8_half_horiz_vert_variance16x_h_sse2
|
||||||
int *sum,
|
int *sum,
|
||||||
unsigned int *sumsquared
|
unsigned int *sumsquared
|
||||||
);
|
);
|
||||||
|
void vp8_half_horiz_variance8x_h_sse2
|
||||||
|
(
|
||||||
|
const unsigned char *ref_ptr,
|
||||||
|
int ref_pixels_per_line,
|
||||||
|
const unsigned char *src_ptr,
|
||||||
|
int src_pixels_per_line,
|
||||||
|
unsigned int Height,
|
||||||
|
int *sum,
|
||||||
|
unsigned int *sumsquared
|
||||||
|
);
|
||||||
void vp8_half_horiz_variance16x_h_sse2
|
void vp8_half_horiz_variance16x_h_sse2
|
||||||
(
|
(
|
||||||
const unsigned char *ref_ptr,
|
const unsigned char *ref_ptr,
|
||||||
|
@ -101,6 +121,16 @@ void vp8_half_horiz_variance16x_h_sse2
|
||||||
int *sum,
|
int *sum,
|
||||||
unsigned int *sumsquared
|
unsigned int *sumsquared
|
||||||
);
|
);
|
||||||
|
void vp8_half_vert_variance8x_h_sse2
|
||||||
|
(
|
||||||
|
const unsigned char *ref_ptr,
|
||||||
|
int ref_pixels_per_line,
|
||||||
|
const unsigned char *src_ptr,
|
||||||
|
int src_pixels_per_line,
|
||||||
|
unsigned int Height,
|
||||||
|
int *sum,
|
||||||
|
unsigned int *sumsquared
|
||||||
|
);
|
||||||
void vp8_half_vert_variance16x_h_sse2
|
void vp8_half_vert_variance16x_h_sse2
|
||||||
(
|
(
|
||||||
const unsigned char *ref_ptr,
|
const unsigned char *ref_ptr,
|
||||||
|
@ -262,21 +292,21 @@ unsigned int vp8_sub_pixel_variance8x8_wmt
|
||||||
|
|
||||||
if (xoffset == 4 && yoffset == 0)
|
if (xoffset == 4 && yoffset == 0)
|
||||||
{
|
{
|
||||||
vp8_half_horiz_variance16x_h_sse2(
|
vp8_half_horiz_variance8x_h_sse2(
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 8,
|
dst_ptr, dst_pixels_per_line, 8,
|
||||||
&xsum, &xxsum);
|
&xsum, &xxsum);
|
||||||
}
|
}
|
||||||
else if (xoffset == 0 && yoffset == 4)
|
else if (xoffset == 0 && yoffset == 4)
|
||||||
{
|
{
|
||||||
vp8_half_vert_variance16x_h_sse2(
|
vp8_half_vert_variance8x_h_sse2(
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 8,
|
dst_ptr, dst_pixels_per_line, 8,
|
||||||
&xsum, &xxsum);
|
&xsum, &xxsum);
|
||||||
}
|
}
|
||||||
else if (xoffset == 4 && yoffset == 4)
|
else if (xoffset == 4 && yoffset == 4)
|
||||||
{
|
{
|
||||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
vp8_half_horiz_vert_variance8x_h_sse2(
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 8,
|
dst_ptr, dst_pixels_per_line, 8,
|
||||||
&xsum, &xxsum);
|
&xsum, &xxsum);
|
||||||
|
@ -317,11 +347,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 16,
|
dst_ptr, dst_pixels_per_line, 16,
|
||||||
&xsum0, &xxsum0);
|
&xsum0, &xxsum0);
|
||||||
|
|
||||||
vp8_half_horiz_variance16x_h_sse2(
|
|
||||||
src_ptr + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
|
||||||
&xsum1, &xxsum1);
|
|
||||||
}
|
}
|
||||||
else if (xoffset == 0 && yoffset == 4)
|
else if (xoffset == 0 && yoffset == 4)
|
||||||
{
|
{
|
||||||
|
@ -329,11 +354,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 16,
|
dst_ptr, dst_pixels_per_line, 16,
|
||||||
&xsum0, &xxsum0);
|
&xsum0, &xxsum0);
|
||||||
|
|
||||||
vp8_half_vert_variance16x_h_sse2(
|
|
||||||
src_ptr + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
|
||||||
&xsum1, &xxsum1);
|
|
||||||
}
|
}
|
||||||
else if (xoffset == 4 && yoffset == 4)
|
else if (xoffset == 4 && yoffset == 4)
|
||||||
{
|
{
|
||||||
|
@ -341,11 +361,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 16,
|
dst_ptr, dst_pixels_per_line, 16,
|
||||||
&xsum0, &xxsum0);
|
&xsum0, &xxsum0);
|
||||||
|
|
||||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
|
||||||
src_ptr + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
|
||||||
&xsum1, &xxsum1);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -356,17 +371,16 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
|
||||||
&xsum0, &xxsum0
|
&xsum0, &xxsum0
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
vp8_filter_block2d_bil_var_sse2(
|
vp8_filter_block2d_bil_var_sse2(
|
||||||
src_ptr + 8, src_pixels_per_line,
|
src_ptr + 8, src_pixels_per_line,
|
||||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||||
xoffset, yoffset,
|
xoffset, yoffset,
|
||||||
&xsum1, &xxsum1
|
&xsum1, &xxsum1
|
||||||
);
|
);
|
||||||
|
xsum0 += xsum1;
|
||||||
|
xxsum0 += xxsum1;
|
||||||
}
|
}
|
||||||
|
|
||||||
xsum0 += xsum1;
|
|
||||||
xxsum0 += xxsum1;
|
|
||||||
*sse = xxsum0;
|
*sse = xxsum0;
|
||||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||||
}
|
}
|
||||||
|
@ -406,11 +420,6 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 8,
|
dst_ptr, dst_pixels_per_line, 8,
|
||||||
&xsum0, &xxsum0);
|
&xsum0, &xxsum0);
|
||||||
|
|
||||||
vp8_half_horiz_variance16x_h_sse2(
|
|
||||||
src_ptr + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
|
||||||
&xsum1, &xxsum1);
|
|
||||||
}
|
}
|
||||||
else if (xoffset == 0 && yoffset == 4)
|
else if (xoffset == 0 && yoffset == 4)
|
||||||
{
|
{
|
||||||
|
@ -418,11 +427,6 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 8,
|
dst_ptr, dst_pixels_per_line, 8,
|
||||||
&xsum0, &xxsum0);
|
&xsum0, &xxsum0);
|
||||||
|
|
||||||
vp8_half_vert_variance16x_h_sse2(
|
|
||||||
src_ptr + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
|
||||||
&xsum1, &xxsum1);
|
|
||||||
}
|
}
|
||||||
else if (xoffset == 4 && yoffset == 4)
|
else if (xoffset == 4 && yoffset == 4)
|
||||||
{
|
{
|
||||||
|
@ -430,11 +434,6 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 8,
|
dst_ptr, dst_pixels_per_line, 8,
|
||||||
&xsum0, &xxsum0);
|
&xsum0, &xxsum0);
|
||||||
|
|
||||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
|
||||||
src_ptr + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
|
||||||
&xsum1, &xxsum1);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -449,11 +448,10 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
|
||||||
dst_ptr + 8, dst_pixels_per_line, 8,
|
dst_ptr + 8, dst_pixels_per_line, 8,
|
||||||
xoffset, yoffset,
|
xoffset, yoffset,
|
||||||
&xsum1, &xxsum1);
|
&xsum1, &xxsum1);
|
||||||
|
xsum0 += xsum1;
|
||||||
|
xxsum0 += xxsum1;
|
||||||
}
|
}
|
||||||
|
|
||||||
xsum0 += xsum1;
|
|
||||||
xxsum0 += xxsum1;
|
|
||||||
|
|
||||||
*sse = xxsum0;
|
*sse = xxsum0;
|
||||||
return (xxsum0 - ((xsum0 * xsum0) >> 7));
|
return (xxsum0 - ((xsum0 * xsum0) >> 7));
|
||||||
}
|
}
|
||||||
|
@ -474,21 +472,21 @@ unsigned int vp8_sub_pixel_variance8x16_wmt
|
||||||
|
|
||||||
if (xoffset == 4 && yoffset == 0)
|
if (xoffset == 4 && yoffset == 0)
|
||||||
{
|
{
|
||||||
vp8_half_horiz_variance16x_h_sse2(
|
vp8_half_horiz_variance8x_h_sse2(
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 16,
|
dst_ptr, dst_pixels_per_line, 16,
|
||||||
&xsum, &xxsum);
|
&xsum, &xxsum);
|
||||||
}
|
}
|
||||||
else if (xoffset == 0 && yoffset == 4)
|
else if (xoffset == 0 && yoffset == 4)
|
||||||
{
|
{
|
||||||
vp8_half_vert_variance16x_h_sse2(
|
vp8_half_vert_variance8x_h_sse2(
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 16,
|
dst_ptr, dst_pixels_per_line, 16,
|
||||||
&xsum, &xxsum);
|
&xsum, &xxsum);
|
||||||
}
|
}
|
||||||
else if (xoffset == 4 && yoffset == 4)
|
else if (xoffset == 4 && yoffset == 4)
|
||||||
{
|
{
|
||||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
vp8_half_horiz_vert_variance8x_h_sse2(
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 16,
|
dst_ptr, dst_pixels_per_line, 16,
|
||||||
&xsum, &xxsum);
|
&xsum, &xxsum);
|
||||||
|
@ -506,81 +504,6 @@ unsigned int vp8_sub_pixel_variance8x16_wmt
|
||||||
return (xxsum - ((xsum * xsum) >> 7));
|
return (xxsum - ((xsum * xsum) >> 7));
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int vp8_i_variance16x16_wmt(
|
|
||||||
const unsigned char *src_ptr,
|
|
||||||
int source_stride,
|
|
||||||
const unsigned char *ref_ptr,
|
|
||||||
int recon_stride,
|
|
||||||
unsigned int *sse)
|
|
||||||
{
|
|
||||||
unsigned int sse0, sse1, sse2, sse3, var;
|
|
||||||
int sum0, sum1, sum2, sum3, avg;
|
|
||||||
|
|
||||||
|
|
||||||
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
|
||||||
vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
|
|
||||||
vp8_get8x8var_sse2(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse2, &sum2) ;
|
|
||||||
vp8_get8x8var_sse2(src_ptr + (source_stride >> 1) + 8, source_stride, ref_ptr + (recon_stride >> 1) + 8, recon_stride, &sse3, &sum3);
|
|
||||||
|
|
||||||
var = sse0 + sse1 + sse2 + sse3;
|
|
||||||
avg = sum0 + sum1 + sum2 + sum3;
|
|
||||||
|
|
||||||
*sse = var;
|
|
||||||
return (var - ((avg * avg) >> 8));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int vp8_i_variance8x16_wmt(
|
|
||||||
const unsigned char *src_ptr,
|
|
||||||
int source_stride,
|
|
||||||
const unsigned char *ref_ptr,
|
|
||||||
int recon_stride,
|
|
||||||
unsigned int *sse)
|
|
||||||
{
|
|
||||||
unsigned int sse0, sse1, var;
|
|
||||||
int sum0, sum1, avg;
|
|
||||||
vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
|
|
||||||
vp8_get8x8var_sse2(src_ptr + (source_stride >> 1), source_stride, ref_ptr + (recon_stride >> 1), recon_stride, &sse1, &sum1) ;
|
|
||||||
|
|
||||||
var = sse0 + sse1;
|
|
||||||
avg = sum0 + sum1;
|
|
||||||
|
|
||||||
*sse = var;
|
|
||||||
return (var - ((avg * avg) >> 7));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
unsigned int vp8_i_sub_pixel_variance16x16_wmt
|
|
||||||
(
|
|
||||||
const unsigned char *src_ptr,
|
|
||||||
int src_pixels_per_line,
|
|
||||||
int xoffset,
|
|
||||||
int yoffset,
|
|
||||||
const unsigned char *dst_ptr,
|
|
||||||
int dst_pixels_per_line,
|
|
||||||
unsigned int *sse
|
|
||||||
)
|
|
||||||
{
|
|
||||||
return vp8_sub_pixel_variance16x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
unsigned int vp8_i_sub_pixel_variance8x16_wmt
|
|
||||||
(
|
|
||||||
const unsigned char *src_ptr,
|
|
||||||
int src_pixels_per_line,
|
|
||||||
int xoffset,
|
|
||||||
int yoffset,
|
|
||||||
const unsigned char *dst_ptr,
|
|
||||||
int dst_pixels_per_line,
|
|
||||||
unsigned int *sse
|
|
||||||
)
|
|
||||||
{
|
|
||||||
|
|
||||||
return vp8_sub_pixel_variance8x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
unsigned int vp8_variance_halfpixvar16x16_h_wmt(
|
unsigned int vp8_variance_halfpixvar16x16_h_wmt(
|
||||||
const unsigned char *src_ptr,
|
const unsigned char *src_ptr,
|
||||||
|
@ -589,21 +512,14 @@ unsigned int vp8_variance_halfpixvar16x16_h_wmt(
|
||||||
int dst_pixels_per_line,
|
int dst_pixels_per_line,
|
||||||
unsigned int *sse)
|
unsigned int *sse)
|
||||||
{
|
{
|
||||||
int xsum0, xsum1;
|
int xsum0;
|
||||||
unsigned int xxsum0, xxsum1;
|
unsigned int xxsum0;
|
||||||
|
|
||||||
vp8_half_horiz_variance16x_h_sse2(
|
vp8_half_horiz_variance16x_h_sse2(
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 16,
|
dst_ptr, dst_pixels_per_line, 16,
|
||||||
&xsum0, &xxsum0);
|
&xsum0, &xxsum0);
|
||||||
|
|
||||||
vp8_half_horiz_variance16x_h_sse2(
|
|
||||||
src_ptr + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
|
||||||
&xsum1, &xxsum1);
|
|
||||||
|
|
||||||
xsum0 += xsum1;
|
|
||||||
xxsum0 += xxsum1;
|
|
||||||
*sse = xxsum0;
|
*sse = xxsum0;
|
||||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||||
}
|
}
|
||||||
|
@ -616,21 +532,13 @@ unsigned int vp8_variance_halfpixvar16x16_v_wmt(
|
||||||
int dst_pixels_per_line,
|
int dst_pixels_per_line,
|
||||||
unsigned int *sse)
|
unsigned int *sse)
|
||||||
{
|
{
|
||||||
int xsum0, xsum1;
|
int xsum0;
|
||||||
unsigned int xxsum0, xxsum1;
|
unsigned int xxsum0;
|
||||||
|
|
||||||
vp8_half_vert_variance16x_h_sse2(
|
vp8_half_vert_variance16x_h_sse2(
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 16,
|
dst_ptr, dst_pixels_per_line, 16,
|
||||||
&xsum0, &xxsum0);
|
&xsum0, &xxsum0);
|
||||||
|
|
||||||
vp8_half_vert_variance16x_h_sse2(
|
|
||||||
src_ptr + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
|
||||||
&xsum1, &xxsum1);
|
|
||||||
|
|
||||||
xsum0 += xsum1;
|
|
||||||
xxsum0 += xxsum1;
|
|
||||||
*sse = xxsum0;
|
*sse = xxsum0;
|
||||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||||
}
|
}
|
||||||
|
@ -643,21 +551,14 @@ unsigned int vp8_variance_halfpixvar16x16_hv_wmt(
|
||||||
int dst_pixels_per_line,
|
int dst_pixels_per_line,
|
||||||
unsigned int *sse)
|
unsigned int *sse)
|
||||||
{
|
{
|
||||||
int xsum0, xsum1;
|
int xsum0;
|
||||||
unsigned int xxsum0, xxsum1;
|
unsigned int xxsum0;
|
||||||
|
|
||||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 16,
|
dst_ptr, dst_pixels_per_line, 16,
|
||||||
&xsum0, &xxsum0);
|
&xsum0, &xxsum0);
|
||||||
|
|
||||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
|
||||||
src_ptr + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
|
||||||
&xsum1, &xxsum1);
|
|
||||||
|
|
||||||
xsum0 += xsum1;
|
|
||||||
xxsum0 += xxsum1;
|
|
||||||
*sse = xxsum0;
|
*sse = xxsum0;
|
||||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,8 +76,8 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
|
||||||
unsigned int *sse
|
unsigned int *sse
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
int xsum0, xsum1;
|
int xsum0;
|
||||||
unsigned int xxsum0, xxsum1;
|
unsigned int xxsum0;
|
||||||
|
|
||||||
// note we could avoid these if statements if the calling function
|
// note we could avoid these if statements if the calling function
|
||||||
// just called the appropriate functions inside.
|
// just called the appropriate functions inside.
|
||||||
|
@ -87,14 +87,6 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 16,
|
dst_ptr, dst_pixels_per_line, 16,
|
||||||
&xsum0, &xxsum0);
|
&xsum0, &xxsum0);
|
||||||
|
|
||||||
vp8_half_horiz_variance16x_h_sse2(
|
|
||||||
src_ptr + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
|
||||||
&xsum1, &xxsum1);
|
|
||||||
|
|
||||||
xsum0 += xsum1;
|
|
||||||
xxsum0 += xxsum1;
|
|
||||||
}
|
}
|
||||||
else if (xoffset == 0 && yoffset == 4)
|
else if (xoffset == 0 && yoffset == 4)
|
||||||
{
|
{
|
||||||
|
@ -102,14 +94,6 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 16,
|
dst_ptr, dst_pixels_per_line, 16,
|
||||||
&xsum0, &xxsum0);
|
&xsum0, &xxsum0);
|
||||||
|
|
||||||
vp8_half_vert_variance16x_h_sse2(
|
|
||||||
src_ptr + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
|
||||||
&xsum1, &xxsum1);
|
|
||||||
|
|
||||||
xsum0 += xsum1;
|
|
||||||
xxsum0 += xxsum1;
|
|
||||||
}
|
}
|
||||||
else if (xoffset == 4 && yoffset == 4)
|
else if (xoffset == 4 && yoffset == 4)
|
||||||
{
|
{
|
||||||
|
@ -117,24 +101,65 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 16,
|
dst_ptr, dst_pixels_per_line, 16,
|
||||||
&xsum0, &xxsum0);
|
&xsum0, &xxsum0);
|
||||||
|
|
||||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
|
||||||
src_ptr + 8, src_pixels_per_line,
|
|
||||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
|
||||||
&xsum1, &xxsum1);
|
|
||||||
|
|
||||||
xsum0 += xsum1;
|
|
||||||
xxsum0 += xxsum1;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
vp8_filter_block2d_bil_var_ssse3(
|
vp8_filter_block2d_bil_var_ssse3(
|
||||||
src_ptr, src_pixels_per_line,
|
src_ptr, src_pixels_per_line,
|
||||||
dst_ptr, dst_pixels_per_line, 16,
|
dst_ptr, dst_pixels_per_line, 16,
|
||||||
xoffset, yoffset,
|
xoffset, yoffset,
|
||||||
&xsum0, &xxsum0);
|
&xsum0, &xxsum0);
|
||||||
}
|
}
|
||||||
|
|
||||||
*sse = xxsum0;
|
*sse = xxsum0;
|
||||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unsigned int vp8_sub_pixel_variance16x8_ssse3
|
||||||
|
(
|
||||||
|
const unsigned char *src_ptr,
|
||||||
|
int src_pixels_per_line,
|
||||||
|
int xoffset,
|
||||||
|
int yoffset,
|
||||||
|
const unsigned char *dst_ptr,
|
||||||
|
int dst_pixels_per_line,
|
||||||
|
unsigned int *sse
|
||||||
|
|
||||||
|
)
|
||||||
|
{
|
||||||
|
int xsum0;
|
||||||
|
unsigned int xxsum0;
|
||||||
|
|
||||||
|
if (xoffset == 4 && yoffset == 0)
|
||||||
|
{
|
||||||
|
vp8_half_horiz_variance16x_h_sse2(
|
||||||
|
src_ptr, src_pixels_per_line,
|
||||||
|
dst_ptr, dst_pixels_per_line, 8,
|
||||||
|
&xsum0, &xxsum0);
|
||||||
|
}
|
||||||
|
else if (xoffset == 0 && yoffset == 4)
|
||||||
|
{
|
||||||
|
vp8_half_vert_variance16x_h_sse2(
|
||||||
|
src_ptr, src_pixels_per_line,
|
||||||
|
dst_ptr, dst_pixels_per_line, 8,
|
||||||
|
&xsum0, &xxsum0);
|
||||||
|
}
|
||||||
|
else if (xoffset == 4 && yoffset == 4)
|
||||||
|
{
|
||||||
|
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||||
|
src_ptr, src_pixels_per_line,
|
||||||
|
dst_ptr, dst_pixels_per_line, 8,
|
||||||
|
&xsum0, &xxsum0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
vp8_filter_block2d_bil_var_ssse3(
|
||||||
|
src_ptr, src_pixels_per_line,
|
||||||
|
dst_ptr, dst_pixels_per_line, 8,
|
||||||
|
xoffset, yoffset,
|
||||||
|
&xsum0, &xxsum0);
|
||||||
|
}
|
||||||
|
|
||||||
|
*sse = xxsum0;
|
||||||
|
return (xxsum0 - ((xsum0 * xsum0) >> 7));
|
||||||
|
}
|
||||||
|
|
|
@ -286,6 +286,7 @@ extern prototype_sad_multi_dif_address(vp8_sad4x4x4d_sse3);
|
||||||
#if HAVE_SSSE3
|
#if HAVE_SSSE3
|
||||||
extern prototype_sad_multi_same_address(vp8_sad16x16x3_ssse3);
|
extern prototype_sad_multi_same_address(vp8_sad16x16x3_ssse3);
|
||||||
extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3);
|
extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3);
|
||||||
|
extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_ssse3);
|
||||||
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_ssse3);
|
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_ssse3);
|
||||||
|
|
||||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||||
|
@ -295,6 +296,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_ssse3);
|
||||||
#undef vp8_variance_sad16x8x3
|
#undef vp8_variance_sad16x8x3
|
||||||
#define vp8_variance_sad16x8x3 vp8_sad16x8x3_ssse3
|
#define vp8_variance_sad16x8x3 vp8_sad16x8x3_ssse3
|
||||||
|
|
||||||
|
#undef vp8_variance_subpixvar16x8
|
||||||
|
#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_ssse3
|
||||||
|
|
||||||
#undef vp8_variance_subpixvar16x16
|
#undef vp8_variance_subpixvar16x16
|
||||||
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_ssse3
|
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_ssse3
|
||||||
|
|
||||||
|
|
|
@ -334,6 +334,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||||
cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3;
|
cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3;
|
||||||
cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3;
|
cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3;
|
||||||
|
|
||||||
|
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_ssse3;
|
||||||
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3;
|
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3;
|
||||||
|
|
||||||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
|
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3;
|
||||||
|
|
|
@ -24,6 +24,7 @@ VP8_COMMON_SRCS-yes += common/entropymode.c
|
||||||
VP8_COMMON_SRCS-yes += common/entropymv.c
|
VP8_COMMON_SRCS-yes += common/entropymv.c
|
||||||
VP8_COMMON_SRCS-yes += common/extend.c
|
VP8_COMMON_SRCS-yes += common/extend.c
|
||||||
VP8_COMMON_SRCS-yes += common/filter.c
|
VP8_COMMON_SRCS-yes += common/filter.c
|
||||||
|
VP8_COMMON_SRCS-yes += common/filter.h
|
||||||
VP8_COMMON_SRCS-yes += common/findnearmv.c
|
VP8_COMMON_SRCS-yes += common/findnearmv.c
|
||||||
VP8_COMMON_SRCS-yes += common/generic/systemdependent.c
|
VP8_COMMON_SRCS-yes += common/generic/systemdependent.c
|
||||||
VP8_COMMON_SRCS-yes += common/idctllm.c
|
VP8_COMMON_SRCS-yes += common/idctllm.c
|
||||||
|
|
Загрузка…
Ссылка в новой задаче