Merge branch 'master' into nextgenv2
This commit is contained in:
Коммит
49f5903dd2
|
@ -1208,14 +1208,20 @@ EOF
|
|||
soft_enable runtime_cpu_detect
|
||||
# We can't use 'check_cflags' until the compiler is configured and CC is
|
||||
# populated.
|
||||
check_gcc_machine_option mmx
|
||||
check_gcc_machine_option sse
|
||||
check_gcc_machine_option sse2
|
||||
check_gcc_machine_option sse3
|
||||
check_gcc_machine_option ssse3
|
||||
check_gcc_machine_option sse4 sse4_1
|
||||
check_gcc_machine_option avx
|
||||
check_gcc_machine_option avx2
|
||||
for ext in ${ARCH_EXT_LIST_X86}; do
|
||||
# disable higher order extensions to simplify asm dependencies
|
||||
if [ "$disable_exts" = "yes" ]; then
|
||||
if ! disabled $ext; then
|
||||
RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${ext} "
|
||||
disable_feature $ext
|
||||
fi
|
||||
elif disabled $ext; then
|
||||
disable_exts="yes"
|
||||
else
|
||||
# use the shortened version for the flag: sse4_1 -> sse4
|
||||
check_gcc_machine_option ${ext%_*} $ext
|
||||
fi
|
||||
done
|
||||
|
||||
if enabled external_build; then
|
||||
log_echo " skipping assembler detection"
|
||||
|
|
|
@ -234,6 +234,16 @@ ARCH_LIST="
|
|||
x86
|
||||
x86_64
|
||||
"
|
||||
ARCH_EXT_LIST_X86="
|
||||
mmx
|
||||
sse
|
||||
sse2
|
||||
sse3
|
||||
ssse3
|
||||
sse4_1
|
||||
avx
|
||||
avx2
|
||||
"
|
||||
ARCH_EXT_LIST="
|
||||
edsp
|
||||
media
|
||||
|
@ -245,14 +255,7 @@ ARCH_EXT_LIST="
|
|||
msa
|
||||
mips64
|
||||
|
||||
mmx
|
||||
sse
|
||||
sse2
|
||||
sse3
|
||||
ssse3
|
||||
sse4_1
|
||||
avx
|
||||
avx2
|
||||
${ARCH_EXT_LIST_X86}
|
||||
"
|
||||
HAVE_LIST="
|
||||
${ARCH_EXT_LIST}
|
||||
|
|
|
@ -194,6 +194,48 @@ class IntProColTest
|
|||
int16_t sum_c_;
|
||||
};
|
||||
|
||||
typedef int (*SatdFunc)(const int16_t *coeffs, int length);
|
||||
typedef std::tr1::tuple<int, SatdFunc> SatdTestParam;
|
||||
|
||||
class SatdTest
|
||||
: public ::testing::Test,
|
||||
public ::testing::WithParamInterface<SatdTestParam> {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
satd_size_ = GET_PARAM(0);
|
||||
satd_func_ = GET_PARAM(1);
|
||||
rnd_.Reset(ACMRandom::DeterministicSeed());
|
||||
src_ = reinterpret_cast<int16_t*>(
|
||||
vpx_memalign(16, sizeof(*src_) * satd_size_));
|
||||
ASSERT_TRUE(src_ != NULL);
|
||||
}
|
||||
|
||||
virtual void TearDown() {
|
||||
libvpx_test::ClearSystemState();
|
||||
vpx_free(src_);
|
||||
}
|
||||
|
||||
void FillConstant(const int16_t val) {
|
||||
for (int i = 0; i < satd_size_; ++i) src_[i] = val;
|
||||
}
|
||||
|
||||
void FillRandom() {
|
||||
for (int i = 0; i < satd_size_; ++i) src_[i] = rnd_.Rand16();
|
||||
}
|
||||
|
||||
void Check(const int expected) {
|
||||
int total;
|
||||
ASM_REGISTER_STATE_CHECK(total = satd_func_(src_, satd_size_));
|
||||
EXPECT_EQ(expected, total);
|
||||
}
|
||||
|
||||
int satd_size_;
|
||||
|
||||
private:
|
||||
int16_t *src_;
|
||||
SatdFunc satd_func_;
|
||||
ACMRandom rnd_;
|
||||
};
|
||||
|
||||
uint8_t* AverageTestBase::source_data_ = NULL;
|
||||
|
||||
|
@ -246,6 +288,36 @@ TEST_P(IntProColTest, Random) {
|
|||
RunComparison();
|
||||
}
|
||||
|
||||
|
||||
TEST_P(SatdTest, MinValue) {
|
||||
const int kMin = -32640;
|
||||
const int expected = -kMin * satd_size_;
|
||||
FillConstant(kMin);
|
||||
Check(expected);
|
||||
}
|
||||
|
||||
TEST_P(SatdTest, MaxValue) {
|
||||
const int kMax = 32640;
|
||||
const int expected = kMax * satd_size_;
|
||||
FillConstant(kMax);
|
||||
Check(expected);
|
||||
}
|
||||
|
||||
TEST_P(SatdTest, Random) {
|
||||
int expected;
|
||||
switch (satd_size_) {
|
||||
case 16: expected = 205298; break;
|
||||
case 64: expected = 1113950; break;
|
||||
case 256: expected = 4268415; break;
|
||||
case 1024: expected = 16954082; break;
|
||||
default:
|
||||
FAIL() << "Invalid satd size (" << satd_size_
|
||||
<< ") valid: 16/64/256/1024";
|
||||
}
|
||||
FillRandom();
|
||||
Check(expected);
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
|
@ -254,6 +326,14 @@ INSTANTIATE_TEST_CASE_P(
|
|||
make_tuple(16, 16, 1, 8, &vp9_avg_8x8_c),
|
||||
make_tuple(16, 16, 1, 4, &vp9_avg_4x4_c)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, SatdTest,
|
||||
::testing::Values(
|
||||
make_tuple(16, &vp9_satd_c),
|
||||
make_tuple(64, &vp9_satd_c),
|
||||
make_tuple(256, &vp9_satd_c),
|
||||
make_tuple(1024, &vp9_satd_c)));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, AverageTest,
|
||||
|
@ -276,6 +356,14 @@ INSTANTIATE_TEST_CASE_P(
|
|||
make_tuple(16, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c),
|
||||
make_tuple(32, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c),
|
||||
make_tuple(64, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, SatdTest,
|
||||
::testing::Values(
|
||||
make_tuple(16, &vp9_satd_sse2),
|
||||
make_tuple(64, &vp9_satd_sse2),
|
||||
make_tuple(256, &vp9_satd_sse2),
|
||||
make_tuple(1024, &vp9_satd_sse2)));
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
|
@ -297,6 +385,14 @@ INSTANTIATE_TEST_CASE_P(
|
|||
make_tuple(16, &vp9_int_pro_col_neon, &vp9_int_pro_col_c),
|
||||
make_tuple(32, &vp9_int_pro_col_neon, &vp9_int_pro_col_c),
|
||||
make_tuple(64, &vp9_int_pro_col_neon, &vp9_int_pro_col_c)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
NEON, SatdTest,
|
||||
::testing::Values(
|
||||
make_tuple(16, &vp9_satd_neon),
|
||||
make_tuple(64, &vp9_satd_neon),
|
||||
make_tuple(256, &vp9_satd_neon),
|
||||
make_tuple(1024, &vp9_satd_neon)));
|
||||
#endif
|
||||
|
||||
#if HAVE_MSA
|
||||
|
|
|
@ -36,20 +36,6 @@ const vpx_prob vp9_cat6_prob[] = {
|
|||
254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129
|
||||
};
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const vpx_prob vp9_cat1_prob_high10[] = { 159 };
|
||||
const vpx_prob vp9_cat2_prob_high10[] = { 165, 145 };
|
||||
const vpx_prob vp9_cat3_prob_high10[] = { 173, 148, 140 };
|
||||
const vpx_prob vp9_cat4_prob_high10[] = { 176, 155, 140, 135 };
|
||||
const vpx_prob vp9_cat5_prob_high10[] = { 180, 157, 141, 134, 130 };
|
||||
const vpx_prob vp9_cat6_prob_high10[] = {
|
||||
255, 255, 254, 254, 254, 252, 249, 243,
|
||||
230, 196, 177, 153, 140, 133, 130, 129
|
||||
};
|
||||
const vpx_prob vp9_cat1_prob_high12[] = { 159 };
|
||||
const vpx_prob vp9_cat2_prob_high12[] = { 165, 145 };
|
||||
const vpx_prob vp9_cat3_prob_high12[] = { 173, 148, 140 };
|
||||
const vpx_prob vp9_cat4_prob_high12[] = { 176, 155, 140, 135 };
|
||||
const vpx_prob vp9_cat5_prob_high12[] = { 180, 157, 141, 134, 130 };
|
||||
const vpx_prob vp9_cat6_prob_high12[] = {
|
||||
255, 255, 255, 255, 254, 254, 254, 252, 249,
|
||||
243, 230, 196, 177, 153, 140, 133, 130, 129
|
||||
|
|
|
@ -209,8 +209,8 @@ specialize qw/vp9_hadamard_8x8 sse2/, "$ssse3_x86_64_x86inc";
|
|||
add_proto qw/void vp9_hadamard_16x16/, "int16_t const *src_diff, int src_stride, int16_t *coeff";
|
||||
specialize qw/vp9_hadamard_16x16 sse2/;
|
||||
|
||||
add_proto qw/int16_t vp9_satd/, "const int16_t *coeff, int length";
|
||||
specialize qw/vp9_satd sse2/;
|
||||
add_proto qw/int vp9_satd/, "const int16_t *coeff, int length";
|
||||
specialize qw/vp9_satd sse2 neon/;
|
||||
|
||||
add_proto qw/void vp9_int_pro_row/, "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height";
|
||||
specialize qw/vp9_int_pro_row sse2 neon/;
|
||||
|
|
|
@ -65,52 +65,24 @@ static int decode_coefs(const MACROBLOCKD *xd,
|
|||
const int dq_shift = (tx_size == TX_32X32);
|
||||
int v, token;
|
||||
int16_t dqv = dq[0];
|
||||
const uint8_t *cat1_prob;
|
||||
const uint8_t *cat2_prob;
|
||||
const uint8_t *cat3_prob;
|
||||
const uint8_t *cat4_prob;
|
||||
const uint8_t *cat5_prob;
|
||||
const uint8_t *cat6_prob;
|
||||
const uint8_t *const cat6_prob =
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
(xd->bd == VPX_BITS_12) ? vp9_cat6_prob_high12 :
|
||||
(xd->bd == VPX_BITS_10) ? vp9_cat6_prob_high12 + 2 :
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
vp9_cat6_prob;
|
||||
const int cat6_bits =
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
(xd->bd == VPX_BITS_12) ? 18 :
|
||||
(xd->bd == VPX_BITS_10) ? 16 :
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
14;
|
||||
|
||||
if (counts) {
|
||||
coef_counts = counts->coef[tx_size][type][ref];
|
||||
eob_branch_count = counts->eob_branch[tx_size][type][ref];
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->bd > VPX_BITS_8) {
|
||||
if (xd->bd == VPX_BITS_10) {
|
||||
cat1_prob = vp9_cat1_prob_high10;
|
||||
cat2_prob = vp9_cat2_prob_high10;
|
||||
cat3_prob = vp9_cat3_prob_high10;
|
||||
cat4_prob = vp9_cat4_prob_high10;
|
||||
cat5_prob = vp9_cat5_prob_high10;
|
||||
cat6_prob = vp9_cat6_prob_high10;
|
||||
} else {
|
||||
cat1_prob = vp9_cat1_prob_high12;
|
||||
cat2_prob = vp9_cat2_prob_high12;
|
||||
cat3_prob = vp9_cat3_prob_high12;
|
||||
cat4_prob = vp9_cat4_prob_high12;
|
||||
cat5_prob = vp9_cat5_prob_high12;
|
||||
cat6_prob = vp9_cat6_prob_high12;
|
||||
}
|
||||
} else {
|
||||
cat1_prob = vp9_cat1_prob;
|
||||
cat2_prob = vp9_cat2_prob;
|
||||
cat3_prob = vp9_cat3_prob;
|
||||
cat4_prob = vp9_cat4_prob;
|
||||
cat5_prob = vp9_cat5_prob;
|
||||
cat6_prob = vp9_cat6_prob;
|
||||
}
|
||||
#else
|
||||
cat1_prob = vp9_cat1_prob;
|
||||
cat2_prob = vp9_cat2_prob;
|
||||
cat3_prob = vp9_cat3_prob;
|
||||
cat4_prob = vp9_cat4_prob;
|
||||
cat5_prob = vp9_cat5_prob;
|
||||
cat6_prob = vp9_cat6_prob;
|
||||
#endif
|
||||
|
||||
while (c < max_eob) {
|
||||
int val = -1;
|
||||
band = *band_translate++;
|
||||
|
@ -149,39 +121,22 @@ static int decode_coefs(const MACROBLOCKD *xd,
|
|||
val = token;
|
||||
break;
|
||||
case CATEGORY1_TOKEN:
|
||||
val = CAT1_MIN_VAL + read_coeff(cat1_prob, 1, r);
|
||||
val = CAT1_MIN_VAL + read_coeff(vp9_cat1_prob, 1, r);
|
||||
break;
|
||||
case CATEGORY2_TOKEN:
|
||||
val = CAT2_MIN_VAL + read_coeff(cat2_prob, 2, r);
|
||||
val = CAT2_MIN_VAL + read_coeff(vp9_cat2_prob, 2, r);
|
||||
break;
|
||||
case CATEGORY3_TOKEN:
|
||||
val = CAT3_MIN_VAL + read_coeff(cat3_prob, 3, r);
|
||||
val = CAT3_MIN_VAL + read_coeff(vp9_cat3_prob, 3, r);
|
||||
break;
|
||||
case CATEGORY4_TOKEN:
|
||||
val = CAT4_MIN_VAL + read_coeff(cat4_prob, 4, r);
|
||||
val = CAT4_MIN_VAL + read_coeff(vp9_cat4_prob, 4, r);
|
||||
break;
|
||||
case CATEGORY5_TOKEN:
|
||||
val = CAT5_MIN_VAL + read_coeff(cat5_prob, 5, r);
|
||||
val = CAT5_MIN_VAL + read_coeff(vp9_cat5_prob, 5, r);
|
||||
break;
|
||||
case CATEGORY6_TOKEN:
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
switch (xd->bd) {
|
||||
case VPX_BITS_8:
|
||||
val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, r);
|
||||
break;
|
||||
case VPX_BITS_10:
|
||||
val = CAT6_MIN_VAL + read_coeff(cat6_prob, 16, r);
|
||||
break;
|
||||
case VPX_BITS_12:
|
||||
val = CAT6_MIN_VAL + read_coeff(cat6_prob, 18, r);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, r);
|
||||
#endif
|
||||
val = CAT6_MIN_VAL + read_coeff(cat6_prob, cat6_bits, r);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,6 +50,33 @@ unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) {
|
|||
return (horizontal_add_u16x8(v_sum) + 32) >> 6;
|
||||
}
|
||||
|
||||
// coeff: 16 bits, dynamic range [-32640, 32640].
|
||||
// length: value range {16, 64, 256, 1024}.
|
||||
int vp9_satd_neon(const int16_t *coeff, int length) {
|
||||
const int16x4_t zero = vdup_n_s16(0);
|
||||
int32x4_t accum = vdupq_n_s32(0);
|
||||
|
||||
do {
|
||||
const int16x8_t src0 = vld1q_s16(coeff);
|
||||
const int16x8_t src8 = vld1q_s16(coeff + 8);
|
||||
accum = vabal_s16(accum, vget_low_s16(src0), zero);
|
||||
accum = vabal_s16(accum, vget_high_s16(src0), zero);
|
||||
accum = vabal_s16(accum, vget_low_s16(src8), zero);
|
||||
accum = vabal_s16(accum, vget_high_s16(src8), zero);
|
||||
length -= 16;
|
||||
coeff += 16;
|
||||
} while (length != 0);
|
||||
|
||||
{
|
||||
// satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
|
||||
const int64x2_t s0 = vpaddlq_s32(accum); // cascading summation of 'accum'.
|
||||
const int32x2_t s1 = vadd_s32(vreinterpret_s32_s64(vget_low_s64(s0)),
|
||||
vreinterpret_s32_s64(vget_high_s64(s0)));
|
||||
const int satd = vget_lane_s32(s1, 0);
|
||||
return satd;
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_int_pro_row_neon(int16_t hbuf[16], uint8_t const *ref,
|
||||
const int ref_stride, const int height) {
|
||||
int i;
|
||||
|
|
|
@ -117,14 +117,14 @@ void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride,
|
|||
|
||||
// coeff: 16 bits, dynamic range [-32640, 32640].
|
||||
// length: value range {16, 64, 256, 1024}.
|
||||
int16_t vp9_satd_c(const int16_t *coeff, int length) {
|
||||
int vp9_satd_c(const int16_t *coeff, int length) {
|
||||
int i;
|
||||
int satd = 0;
|
||||
for (i = 0; i < length; ++i)
|
||||
satd += abs(coeff[i]);
|
||||
|
||||
// satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
|
||||
return (int16_t)satd;
|
||||
return satd;
|
||||
}
|
||||
|
||||
// Integer projection onto row vectors.
|
||||
|
|
|
@ -316,7 +316,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation(VP9_DENOISER *denoiser,
|
|||
|
||||
void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
|
||||
int mi_row, int mi_col, BLOCK_SIZE bs,
|
||||
PICK_MODE_CONTEXT *ctx) {
|
||||
PICK_MODE_CONTEXT *ctx,
|
||||
VP9_DENOISER_DECISION *denoiser_decision) {
|
||||
int mv_col, mv_row;
|
||||
int motion_magnitude = 0;
|
||||
VP9_DENOISER_DECISION decision = COPY_BLOCK;
|
||||
|
@ -380,6 +381,7 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
|
|||
num_4x4_blocks_wide_lookup[bs] << 2,
|
||||
num_4x4_blocks_high_lookup[bs] << 2);
|
||||
}
|
||||
*denoiser_decision = decision;
|
||||
}
|
||||
|
||||
static void copy_frame(YV12_BUFFER_CONFIG * const dest,
|
||||
|
@ -458,6 +460,7 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
|
|||
void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) {
|
||||
ctx->zeromv_sse = UINT_MAX;
|
||||
ctx->newmv_sse = UINT_MAX;
|
||||
ctx->zeromv_lastref_sse = UINT_MAX;
|
||||
}
|
||||
|
||||
void vp9_denoiser_update_frame_stats(MB_MODE_INFO *mbmi, unsigned int sse,
|
||||
|
|
|
@ -54,7 +54,8 @@ void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser,
|
|||
|
||||
void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb,
|
||||
int mi_row, int mi_col, BLOCK_SIZE bs,
|
||||
PICK_MODE_CONTEXT *ctx);
|
||||
PICK_MODE_CONTEXT *ctx ,
|
||||
VP9_DENOISER_DECISION *denoiser_decision);
|
||||
|
||||
void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx);
|
||||
|
||||
|
|
|
@ -1746,16 +1746,6 @@ static void encode_b_rt(VP9_COMP *cpi, ThreadData *td,
|
|||
set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
|
||||
update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize);
|
||||
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity > 0 &&
|
||||
output_enabled &&
|
||||
cpi->common.frame_type != KEY_FRAME &&
|
||||
cpi->resize_pending == 0) {
|
||||
vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col,
|
||||
VPXMAX(BLOCK_8X8, bsize), ctx);
|
||||
}
|
||||
#endif
|
||||
|
||||
encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
|
||||
update_stats(&cpi->common, td);
|
||||
|
||||
|
|
|
@ -673,7 +673,7 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
|
|||
if (*eob == 1)
|
||||
*rate += (int)abs(qcoeff[0]);
|
||||
else if (*eob > 1)
|
||||
*rate += (int)vp9_satd((const int16_t *)qcoeff, step << 4);
|
||||
*rate += vp9_satd((const int16_t *)qcoeff, step << 4);
|
||||
|
||||
*dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> shift;
|
||||
}
|
||||
|
@ -1143,6 +1143,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
int best_pred_sad = INT_MAX;
|
||||
int best_early_term = 0;
|
||||
int ref_frame_cost[MAX_REF_FRAMES];
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
int64_t zero_last_cost_orig = INT64_MAX;
|
||||
#endif
|
||||
|
||||
init_ref_frame_cost(cm, xd, ref_frame_cost);
|
||||
|
||||
|
@ -1252,7 +1255,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
if (const_motion[ref_frame] && this_mode == NEARMV)
|
||||
continue;
|
||||
|
||||
if (!(this_mode == ZEROMV && ref_frame == LAST_FRAME)) {
|
||||
if (!(frame_mv[this_mode][ref_frame].as_int == 0 &&
|
||||
ref_frame == LAST_FRAME)) {
|
||||
i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
|
||||
if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
|
||||
if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
|
||||
|
@ -1523,8 +1527,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
}
|
||||
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity > 0)
|
||||
if (cpi->oxcf.noise_sensitivity > 0) {
|
||||
vp9_denoiser_update_frame_stats(mbmi, sse_y, this_mode, ctx);
|
||||
// Keep track of zero_last cost.
|
||||
if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0)
|
||||
zero_last_cost_orig = this_rdc.rdcost;
|
||||
}
|
||||
#else
|
||||
(void)ctx;
|
||||
#endif
|
||||
|
@ -1682,6 +1690,54 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity > 0 &&
|
||||
cpi->resize_pending == 0) {
|
||||
VP9_DENOISER_DECISION decision = COPY_BLOCK;
|
||||
vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col,
|
||||
VPXMAX(BLOCK_8X8, bsize), ctx, &decision);
|
||||
// If INTRA mode was selected, re-evaluate ZEROMV on denoised result.
|
||||
// Only do this under noise conditions, and if rdcost of ZEROMV on
|
||||
// original source is not significantly higher than rdcost of INTRA MODE.
|
||||
if (best_ref_frame == INTRA_FRAME &&
|
||||
decision == FILTER_BLOCK &&
|
||||
cpi->noise_estimate.enabled &&
|
||||
cpi->noise_estimate.level > kLow &&
|
||||
zero_last_cost_orig < (best_rdc.rdcost << 2) &&
|
||||
!reuse_inter_pred) {
|
||||
// Check if we should pick ZEROMV on denoised signal.
|
||||
int rate = 0;
|
||||
int64_t dist = 0;
|
||||
mbmi->mode = ZEROMV;
|
||||
mbmi->ref_frame[0] = LAST_FRAME;
|
||||
mbmi->ref_frame[1] = NONE;
|
||||
mbmi->mv[0].as_int = 0;
|
||||
mbmi->interp_filter = EIGHTTAP;
|
||||
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
|
||||
model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
|
||||
this_rdc.rate = rate + ref_frame_cost[LAST_FRAME] +
|
||||
cpi->inter_mode_cost[x->mbmi_ext->mode_context[LAST_FRAME]]
|
||||
[INTER_OFFSET(ZEROMV)];
|
||||
this_rdc.dist = dist;
|
||||
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, rate, dist);
|
||||
// Switch to ZEROMV if the rdcost for ZEROMV on denoised source
|
||||
// is lower than INTRA (on original source).
|
||||
if (this_rdc.rdcost > best_rdc.rdcost) {
|
||||
this_rdc = best_rdc;
|
||||
mbmi->mode = best_mode;
|
||||
mbmi->ref_frame[0] = best_ref_frame;
|
||||
mbmi->mv[0].as_int = INVALID_MV;
|
||||
mbmi->interp_filter = best_pred_filter;
|
||||
mbmi->tx_size = best_tx_size;
|
||||
x->skip_txfm[0] = best_mode_skip_txfm;
|
||||
} else {
|
||||
best_ref_frame = LAST_FRAME;
|
||||
best_rdc = this_rdc;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (cpi->sf.adaptive_rd_thresh) {
|
||||
THR_MODES best_mode_idx = mode_idx[best_ref_frame][mode_offset(mbmi->mode)];
|
||||
|
||||
|
|
|
@ -380,6 +380,16 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
|
|||
sf->adaptive_rd_thresh = 2;
|
||||
// This feature is only enabled when partition search is disabled.
|
||||
sf->reuse_inter_pred_sby = 1;
|
||||
// TODO(marpan): When denoising, we may re-evaluate the mode selection and
|
||||
// this seems to cause problems when reuse_inter_pred_sby is enabled.
|
||||
// Disabling reuse_inter_pred_sby for now (under denoising conditions), and
|
||||
// will look into re-enabling it.
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
if (cpi->oxcf.noise_sensitivity > 0 &&
|
||||
cpi->noise_estimate.enabled &&
|
||||
cpi->noise_estimate.level > kLow)
|
||||
sf->reuse_inter_pred_sby = 0;
|
||||
#endif
|
||||
sf->partition_search_breakout_rate_thr = 200;
|
||||
sf->coeff_prob_appx_step = 4;
|
||||
sf->use_fast_coef_updates = is_keyframe ? TWO_LOOP : ONE_LOOP_REDUCED;
|
||||
|
|
|
@ -375,32 +375,32 @@ const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS] = {
|
|||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS] = {
|
||||
{0, 0, 0, zero_cost}, // ZERO
|
||||
{0, 0, 1, one_cost}, // ONE
|
||||
{0, 0, 2, two_cost}, // TWO
|
||||
{0, 0, 3, three_cost}, // THREE
|
||||
{0, 0, 4, four_cost}, // FOUR
|
||||
{vp9_cat1_prob_high10, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
|
||||
{vp9_cat2_prob_high10, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
|
||||
{vp9_cat3_prob_high10, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
|
||||
{vp9_cat4_prob_high10, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
|
||||
{vp9_cat5_prob_high10, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
|
||||
{vp9_cat6_prob_high10, 16, CAT6_MIN_VAL, 0}, // CAT6
|
||||
{0, 0, 0, zero_cost} // EOB
|
||||
{0, 0, 0, zero_cost}, // ZERO
|
||||
{0, 0, 1, one_cost}, // ONE
|
||||
{0, 0, 2, two_cost}, // TWO
|
||||
{0, 0, 3, three_cost}, // THREE
|
||||
{0, 0, 4, four_cost}, // FOUR
|
||||
{vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
|
||||
{vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
|
||||
{vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
|
||||
{vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
|
||||
{vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
|
||||
{vp9_cat6_prob_high12 + 2, 16, CAT6_MIN_VAL, 0}, // CAT6
|
||||
{0, 0, 0, zero_cost} // EOB
|
||||
};
|
||||
const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS] = {
|
||||
{0, 0, 0, zero_cost}, // ZERO
|
||||
{0, 0, 1, one_cost}, // ONE
|
||||
{0, 0, 2, two_cost}, // TWO
|
||||
{0, 0, 3, three_cost}, // THREE
|
||||
{0, 0, 4, four_cost}, // FOUR
|
||||
{vp9_cat1_prob_high12, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
|
||||
{vp9_cat2_prob_high12, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
|
||||
{vp9_cat3_prob_high12, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
|
||||
{vp9_cat4_prob_high12, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
|
||||
{vp9_cat5_prob_high12, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
|
||||
{vp9_cat6_prob_high12, 18, CAT6_MIN_VAL, 0}, // CAT6
|
||||
{0, 0, 0, zero_cost} // EOB
|
||||
{0, 0, 0, zero_cost}, // ZERO
|
||||
{0, 0, 1, one_cost}, // ONE
|
||||
{0, 0, 2, two_cost}, // TWO
|
||||
{0, 0, 3, three_cost}, // THREE
|
||||
{0, 0, 4, four_cost}, // FOUR
|
||||
{vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost}, // CAT1
|
||||
{vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost}, // CAT2
|
||||
{vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost}, // CAT3
|
||||
{vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost}, // CAT4
|
||||
{vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost}, // CAT5
|
||||
{vp9_cat6_prob_high12, 18, CAT6_MIN_VAL, 0}, // CAT6
|
||||
{0, 0, 0, zero_cost} // EOB
|
||||
};
|
||||
#endif
|
||||
|
||||
|
|
|
@ -283,31 +283,30 @@ void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride,
|
|||
}
|
||||
}
|
||||
|
||||
int16_t vp9_satd_sse2(const int16_t *coeff, int length) {
|
||||
int vp9_satd_sse2(const int16_t *coeff, int length) {
|
||||
int i;
|
||||
__m128i sum = _mm_load_si128((const __m128i *)coeff);
|
||||
__m128i sign = _mm_srai_epi16(sum, 15);
|
||||
__m128i val = _mm_xor_si128(sum, sign);
|
||||
sum = _mm_sub_epi16(val, sign);
|
||||
coeff += 8;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
__m128i accum = zero;
|
||||
|
||||
for (i = 8; i < length; i += 8) {
|
||||
__m128i src_line = _mm_load_si128((const __m128i *)coeff);
|
||||
sign = _mm_srai_epi16(src_line, 15);
|
||||
val = _mm_xor_si128(src_line, sign);
|
||||
val = _mm_sub_epi16(val, sign);
|
||||
sum = _mm_add_epi16(sum, val);
|
||||
for (i = 0; i < length; i += 8) {
|
||||
const __m128i src_line = _mm_load_si128((const __m128i *)coeff);
|
||||
const __m128i inv = _mm_sub_epi16(zero, src_line);
|
||||
const __m128i abs = _mm_max_epi16(src_line, inv); // abs(src_line)
|
||||
const __m128i abs_lo = _mm_unpacklo_epi16(abs, zero);
|
||||
const __m128i abs_hi = _mm_unpackhi_epi16(abs, zero);
|
||||
const __m128i sum = _mm_add_epi32(abs_lo, abs_hi);
|
||||
accum = _mm_add_epi32(accum, sum);
|
||||
coeff += 8;
|
||||
}
|
||||
|
||||
val = _mm_srli_si128(sum, 8);
|
||||
sum = _mm_add_epi16(sum, val);
|
||||
val = _mm_srli_epi64(sum, 32);
|
||||
sum = _mm_add_epi16(sum, val);
|
||||
val = _mm_srli_epi32(sum, 16);
|
||||
sum = _mm_add_epi16(sum, val);
|
||||
{ // cascading summation of accum
|
||||
__m128i hi = _mm_srli_si128(accum, 8);
|
||||
accum = _mm_add_epi32(accum, hi);
|
||||
hi = _mm_srli_epi64(accum, 32);
|
||||
accum = _mm_add_epi32(accum, hi);
|
||||
}
|
||||
|
||||
return _mm_extract_epi16(sum, 0);
|
||||
return _mm_cvtsi128_si32(accum);
|
||||
}
|
||||
|
||||
void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
|
||||
|
|
|
@ -98,7 +98,7 @@ static INLINE int vpx_read(vpx_reader *r, int prob) {
|
|||
}
|
||||
|
||||
{
|
||||
register unsigned int shift = vpx_norm[range];
|
||||
register int shift = vpx_norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
|
|
|
@ -35,7 +35,7 @@ static INLINE void vpx_write(vpx_writer *br, int bit, int probability) {
|
|||
int count = br->count;
|
||||
unsigned int range = br->range;
|
||||
unsigned int lowvalue = br->lowvalue;
|
||||
register unsigned int shift;
|
||||
register int shift;
|
||||
|
||||
split = 1 + (((range - 1) * probability) >> 8);
|
||||
|
||||
|
|
|
@ -545,33 +545,31 @@ cglobal tm_predictor_4x4, 4, 4, 5, dst, stride, above, left
|
|||
RET
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal tm_predictor_8x8, 4, 4, 4, dst, stride, above, left
|
||||
cglobal tm_predictor_8x8, 4, 4, 5, dst, stride, above, left
|
||||
pxor m1, m1
|
||||
movd m2, [aboveq-1]
|
||||
movq m0, [aboveq]
|
||||
punpcklbw m2, m1
|
||||
punpcklbw m0, m1
|
||||
pshuflw m2, m2, 0x0
|
||||
punpcklbw m0, m1 ; t1 t2 t3 t4 t5 t6 t7 t8 [word]
|
||||
pshuflw m2, m2, 0x0 ; [63:0] tl tl tl tl [word]
|
||||
DEFINE_ARGS dst, stride, line, left
|
||||
mov lineq, -4
|
||||
punpcklqdq m2, m2
|
||||
add leftq, 8
|
||||
psubw m0, m2
|
||||
.loop:
|
||||
movd m2, [leftq+lineq*2]
|
||||
movd m3, [leftq+lineq*2+1]
|
||||
punpcklbw m2, m1
|
||||
punpcklbw m3, m1
|
||||
pshuflw m2, m2, 0x0
|
||||
pshuflw m3, m3, 0x0
|
||||
punpcklqdq m2, m2
|
||||
punpcklqdq m3, m3
|
||||
paddw m2, m0
|
||||
punpcklqdq m2, m2 ; tl tl tl tl tl tl tl tl [word]
|
||||
psubw m0, m2 ; t1-tl t2-tl ... t8-tl [word]
|
||||
movq m2, [leftq]
|
||||
punpcklbw m2, m1 ; l1 l2 l3 l4 l5 l6 l7 l8 [word]
|
||||
.loop
|
||||
pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word]
|
||||
pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word]
|
||||
punpcklqdq m4, m4 ; l1 l1 l1 l1 l1 l1 l1 l1 [word]
|
||||
punpcklqdq m3, m3 ; l2 l2 l2 l2 l2 l2 l2 l2 [word]
|
||||
paddw m4, m0
|
||||
paddw m3, m0
|
||||
packuswb m2, m3
|
||||
movq [dstq ], m2
|
||||
movhps [dstq+strideq], m2
|
||||
packuswb m4, m3
|
||||
movq [dstq ], m4
|
||||
movhps [dstq+strideq], m4
|
||||
lea dstq, [dstq+strideq*2]
|
||||
psrldq m2, 4
|
||||
inc lineq
|
||||
jnz .loop
|
||||
REP_RET
|
||||
|
|
Загрузка…
Ссылка в новой задаче