experimental : partition using 1/8 x 1/8 image

The concept:

There's too much noise in source pixels for variance and at low bitrate
the reconstructed looks nothing like the source so we have problems
getting good partitionings with either.   This skirts the issue by using
a box blur scaled down version for variance calculations.  To compare
against source_var_ moved keyframe to be rd based like source_var.

Change-Id: Ie3babdbfadae324b7b5a76bea192893af27f0624
This commit is contained in:
Jim Bankoski 2014-10-07 16:36:14 -07:00
Родитель dae97868da
Коммит 0ce51d823f
10 изменённых файлов: 255 добавлений и 17 удалений

Просмотреть файл

@ -129,6 +129,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9) += vp9_intrapred_test.cc
ifeq ($(CONFIG_VP9_ENCODER),yes)

150
test/vp9_avg_test.cc Normal file
Просмотреть файл

@ -0,0 +1,150 @@
/*
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string.h>
#include <limits.h>
#include <stdio.h>
#include "./vpx_config.h"
#if CONFIG_VP9_ENCODER
#include "./vp9_rtcd.h"
#endif
#include "vpx_mem/vpx_mem.h"
#include "test/acm_random.h"
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "test/util.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
using libvpx_test::ACMRandom;
namespace {
class AverageTestBase : public ::testing::Test {
public:
AverageTestBase(int width, int height) : width_(width), height_(height) {}
static void SetUpTestCase() {
source_data_ = reinterpret_cast<uint8_t*>(
vpx_memalign(kDataAlignment, kDataBlockSize));
}
static void TearDownTestCase() {
vpx_free(source_data_);
source_data_ = NULL;
}
virtual void TearDown() {
libvpx_test::ClearSystemState();
}
protected:
// Handle blocks up to 4 blocks 64x64 with stride up to 128
static const int kDataAlignment = 16;
static const int kDataBlockSize = 64 * 128;
virtual void SetUp() {
source_stride_ = (width_ + 31) & ~31;
rnd_.Reset(ACMRandom::DeterministicSeed());
}
// Sum Pixels
unsigned int ReferenceAverage(const uint8_t* source, int pitch ) {
unsigned int average = 0;
for (int h = 0; h < 8; ++h)
for (int w = 0; w < 8; ++w)
average += source[h * source_stride_ + w];
return ((average + 32) >> 6);
}
void FillConstant(uint8_t fill_constant) {
for (int i = 0; i < width_ * height_; ++i) {
source_data_[i] = fill_constant;
}
}
void FillRandom() {
for (int i = 0; i < width_ * height_; ++i) {
source_data_[i] = rnd_.Rand8();
}
}
int width_, height_;
static uint8_t* source_data_;
int source_stride_;
ACMRandom rnd_;
};
typedef unsigned int (*AverageFunction)(const uint8_t* s, int pitch);
typedef std::tr1::tuple<int, int, int, AverageFunction> AvgFunc;
class AverageTest
: public AverageTestBase,
public ::testing::WithParamInterface<AvgFunc>{
public:
AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {}
protected:
void CheckAverages() {
unsigned int expected = ReferenceAverage(source_data_+ GET_PARAM(2),
source_stride_);
ASM_REGISTER_STATE_CHECK(GET_PARAM(3)(source_data_+ GET_PARAM(2),
source_stride_));
unsigned int actual = GET_PARAM(3)(source_data_+ GET_PARAM(2),
source_stride_);
EXPECT_EQ(expected, actual);
}
};
uint8_t* AverageTestBase::source_data_ = NULL;
TEST_P(AverageTest, MinValue) {
FillConstant(0);
CheckAverages();
}
TEST_P(AverageTest, MaxValue) {
FillConstant(255);
CheckAverages();
}
TEST_P(AverageTest, Random) {
// The reference frame, but not the source frame, may be unaligned for
// certain types of searches.
for (int i = 0; i < 1000; i++) {
FillRandom();
CheckAverages();
}
}
using std::tr1::make_tuple;
INSTANTIATE_TEST_CASE_P(
C, AverageTest,
::testing::Values(
make_tuple(16, 16, 1, &vp9_avg_8x8_c)));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(
SSE2, AverageTest,
::testing::Values(
make_tuple(16, 16, 0, &vp9_avg_8x8_sse2),
make_tuple(16, 16, 5, &vp9_avg_8x8_sse2),
make_tuple(32, 32, 15, &vp9_avg_8x8_sse2)));
#endif
} // namespace

Просмотреть файл

@ -1110,6 +1110,10 @@ specialize qw/vp9_mse8x8/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
specialize qw/vp9_avg_8x8/, "$sse2_x86inc";
# ENCODEMB INVOKE
add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";

19
vp9/encoder/vp9_avg.c Normal file
Просмотреть файл

@ -0,0 +1,19 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_ports/mem.h"
unsigned int vp9_avg_8x8_c(const uint8_t *s, int p) {
int i, j;
int sum = 0;
for (i = 0; i < 8; ++i, s+=p)
for (j = 0; j < 8; sum += s[j], ++j) {}
return (sum + 32) >> 6;
}

Просмотреть файл

@ -396,10 +396,10 @@ static int set_vt_partitioning(VP9_COMP *cpi,
const int block_width = num_8x8_blocks_wide_lookup[bsize];
const int block_height = num_8x8_blocks_high_lookup[bsize];
// TODO(debargha): Choose this more intelligently.
const int64_t threshold_multiplier = 25;
int64_t threshold = threshold_multiplier * cpi->common.base_qindex;
const int64_t threshold_multiplier = cm->frame_type == KEY_FRAME ? 64 : 4;
int64_t threshold = threshold_multiplier *
vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth);
assert(block_height == block_width);
tree_to_node(data, bsize, &vt);
// Split none is available only if we have more than half a block size
@ -511,10 +511,17 @@ static void choose_partitioning(VP9_COMP *cpi,
int y_idx = y16_idx + ((k >> 1) << 3);
unsigned int sse = 0;
int sum = 0;
if (x_idx < pixels_wide && y_idx < pixels_high)
vp9_get8x8var(s + y_idx * sp + x_idx, sp,
d + y_idx * dp + x_idx, dp, &sse, &sum);
fill_variance(sse, sum, 64, &vst->split[k].part_variances.none);
if (x_idx < pixels_wide && y_idx < pixels_high) {
int s_avg = vp9_avg_8x8(s + y_idx * sp + x_idx, sp);
int d_avg = vp9_avg_8x8(d + y_idx * dp + x_idx, dp);
sum = s_avg - d_avg;
sse = sum * sum;
}
// For an 8x8 block we have just one value the average of all 64
// pixels, so use 1. This means of course that there is no variance
// in an 8x8 block.
fill_variance(sse, sum, 1, &vst->split[k].part_variances.none);
}
}
}
@ -530,8 +537,8 @@ static void choose_partitioning(VP9_COMP *cpi,
// Now go through the entire structure, splitting every block size until
// we get to one that's got a variance lower than our threshold, or we
// hit 8x8.
if (!set_vt_partitioning(cpi, &vt, BLOCK_64X64,
mi_row, mi_col)) {
if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
!set_vt_partitioning(cpi, &vt, BLOCK_64X64, mi_row, mi_col)) {
for (i = 0; i < 4; ++i) {
const int x32_idx = ((i & 1) << 2);
const int y32_idx = ((i >> 1) << 2);
@ -561,10 +568,10 @@ static void choose_partitioning(VP9_COMP *cpi,
}
}
#else
if (!set_vt_partitioning(cpi, &vt.split[i].split[j], tile,
if (!set_vt_partitioning(cpi, &vt.split[i].split[j],
BLOCK_16X16,
(mi_row + y32_idx + y16_idx),
(mi_col + x32_idx + x16_idx), 2)) {
mi_row + y32_idx + y16_idx,
mi_col + x32_idx + x16_idx)) {
for (k = 0; k < 4; ++k) {
const int x8_idx = (k & 1);
const int y8_idx = (k >> 1);
@ -2593,7 +2600,8 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
set_fixed_partitioning(cpi, tile, mi, mi_row, mi_col, bsize);
rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1, cpi->pc_root);
} else if (sf->partition_search_type == VAR_BASED_PARTITION) {
} else if (sf->partition_search_type == VAR_BASED_PARTITION &&
cm->frame_type != KEY_FRAME ) {
choose_partitioning(cpi, tile, mi_row, mi_col);
rd_use_partition(cpi, tile, mi, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1, cpi->pc_root);

Просмотреть файл

@ -235,6 +235,10 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
else
xd->mi[0].src_mi->mbmi.tx_size = TX_8X8;
if (cpi->sf.partition_search_type == VAR_BASED_PARTITION &&
xd->mi[0].src_mi->mbmi.tx_size > TX_16X16)
xd->mi[0].src_mi->mbmi.tx_size = TX_16X16;
} else {
xd->mi[0].src_mi->mbmi.tx_size =
MIN(max_txsize_lookup[bsize],
@ -611,7 +615,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
continue;
if (this_mode == NEWMV) {
if (this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&
this_rd < (int64_t)(1 << num_pels_log2_lookup[bsize]))
continue;
if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
&frame_mv[NEWMV][ref_frame],

Просмотреть файл

@ -249,6 +249,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->frame_parameter_update = 0;
sf->mv.search_method = FAST_HEX;
sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEAR_NEW;
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST;
sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST;
@ -278,12 +279,17 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
int i;
// Allow fancy modes at all sizes since SOURCE_VAR_BASED_PARTITION is used
for (i = 0; i < BLOCK_SIZES; ++i)
sf->inter_mode_mask[i] = INTER_ALL;
sf->inter_mode_mask[i] = INTER_NEAREST_NEAR_NEW;
}
// Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
sf->partition_search_type = VAR_BASED_PARTITION;
sf->search_type_check_frequency = 50;
sf->mv.search_method = NSTEP;
sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO;
sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO;
sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
@ -291,7 +297,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->reuse_inter_pred_sby = 1;
// Increase mode checking threshold for NEWMV.
sf->elevate_newmv_thresh = 2000;
sf->elevate_newmv_thresh = 1000;
sf->mv.reduce_first_step_size = 1;
}

Просмотреть файл

@ -34,6 +34,9 @@ enum {
enum {
INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV),
INTER_NEAREST = (1 << NEARESTMV),
INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV),
INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV),
INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV),
INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV),
INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV),
};

Просмотреть файл

@ -0,0 +1,40 @@
/*
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <immintrin.h>
#include "vpx_ports/mem.h"
unsigned int vp9_avg_8x8_sse2(const uint8_t *s, int p) {
__m128i s0, s1, u0;
unsigned int avg = 0;
u0 = _mm_setzero_si128();
s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0);
s0 = _mm_adds_epu16(s0, s1);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0);
s0 = _mm_adds_epu16(s0, s1);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0);
s0 = _mm_adds_epu16(s0, s1);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0);
s0 = _mm_adds_epu16(s0, s1);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0);
s0 = _mm_adds_epu16(s0, s1);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0);
s0 = _mm_adds_epu16(s0, s1);
s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0);
s0 = _mm_adds_epu16(s0, s1);
s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 8));
s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 32));
s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16));
avg = _mm_extract_epi16(s0, 0);
return (avg + 32) >> 6;
}

Просмотреть файл

@ -17,6 +17,7 @@ VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no)
VP9_CX_SRCS-yes += vp9_cx_iface.c
VP9_CX_SRCS-yes += encoder/vp9_avg.c
VP9_CX_SRCS-yes += encoder/vp9_bitstream.c
VP9_CX_SRCS-yes += encoder/vp9_context_tree.c
VP9_CX_SRCS-yes += encoder/vp9_context_tree.h
@ -95,6 +96,7 @@ VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_sad4d_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_sad4d_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm