Merge "Extend variance based partitioning to 128x128 superblocks" into nextgenv2

This commit is contained in:
Debargha Mukherjee 2016-04-12 19:42:35 +00:00 коммит произвёл Gerrit Code Review
Родитель 027d12b7d6 61af8981b0
Коммит ec1365a0c9
9 изменённых файлов: 600 добавлений и 531 удалений

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -463,6 +463,9 @@ static void dealloc_compressor_data(VP10_COMP *cpi) {
vp10_free_pc_tree(&cpi->td);
if (cpi->sf.partition_search_type == VAR_BASED_PARTITION)
vp10_free_var_tree(&cpi->td);
if (cpi->common.allow_screen_content_tools)
vpx_free(cpi->td.mb.palette_buffer);
@ -1999,6 +2002,8 @@ void vp10_change_config(struct VP10_COMP *cpi, const VP10EncoderConfig *oxcf) {
CHECK_MEM_ERROR(cm, x->palette_buffer,
vpx_memalign(16, sizeof(*x->palette_buffer)));
}
// Reallocate the pc_tree, as it's contents depends on
// the state of cm->allow_screen_content_tools
vp10_free_pc_tree(&cpi->td);
vp10_setup_pc_tree(&cpi->common, &cpi->td);
}
@ -2586,6 +2591,8 @@ void vp10_remove_compressor(VP10_COMP *cpi) {
vpx_free(thread_data->td->mb.palette_buffer);
vpx_free(thread_data->td->counts);
vp10_free_pc_tree(thread_data->td);
if (cpi->sf.partition_search_type == VAR_BASED_PARTITION)
vp10_free_var_tree(thread_data->td);
vpx_free(thread_data->td);
}
}

Просмотреть файл

@ -34,6 +34,7 @@
#include "vp10/encoder/rd.h"
#include "vp10/encoder/speed_features.h"
#include "vp10/encoder/tokenize.h"
#include "vp10/encoder/variance_tree.h"
#if CONFIG_VP9_TEMPORAL_DENOISING
#include "vp10/encoder/denoiser.h"
@ -267,6 +268,9 @@ typedef struct ThreadData {
PICK_MODE_CONTEXT *leaf_tree;
PC_TREE *pc_tree;
PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1];
VAR_TREE *var_tree;
VAR_TREE *var_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1];
} ThreadData;
struct EncWorkerData;
@ -568,9 +572,12 @@ typedef struct VP10_COMP {
int resize_count;
// VAR_BASED_PARTITION thresholds
// 0 - threshold_64x64; 1 - threshold_32x32;
// 2 - threshold_16x16; 3 - vbp_threshold_8x8;
int64_t vbp_thresholds[4];
// 0 - threshold_128x128;
// 1 - threshold_64x64;
// 2 - threshold_32x32;
// 3 - threshold_16x16;
// 4 - threshold_8x8;
int64_t vbp_thresholds[5];
int64_t vbp_threshold_minmax;
int64_t vbp_threshold_sad;
BLOCK_SIZE vbp_bsize_min;

Просмотреть файл

@ -93,6 +93,10 @@ void vp10_encode_tiles_mt(VP10_COMP *cpi) {
thread_data->td->pc_tree = NULL;
vp10_setup_pc_tree(cm, thread_data->td);
// Set up variance tree if needed.
if (cpi->sf.partition_search_type == VAR_BASED_PARTITION)
vp10_setup_var_tree(cm, &cpi->td);
// Allocate frame counters in thread data.
CHECK_MEM_ERROR(cm, thread_data->td->counts,
vpx_calloc(1, sizeof(*thread_data->td->counts)));

Просмотреть файл

@ -0,0 +1,63 @@
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vp10/encoder/variance_tree.h"
#include "vp10/encoder/encoder.h"
void vp10_setup_var_tree(struct VP10Common *cm, ThreadData *td) {
int i, j;
#if CONFIG_EXT_PARTITION
const int leaf_nodes = 1024;
const int tree_nodes = 1024 + 256 + 64 + 16 + 4 + 1;
#else
const int leaf_nodes = 256;
const int tree_nodes = 256 + 64 + 16 + 4 + 1;
#endif // CONFIG_EXT_PARTITION
int index = 0;
VAR_TREE *this_var;
int nodes;
vpx_free(td->var_tree);
CHECK_MEM_ERROR(cm, td->var_tree, vpx_calloc(tree_nodes,
sizeof(*td->var_tree)));
this_var = &td->var_tree[0];
// Sets up all the leaf nodes in the tree.
for (index = 0; index < leaf_nodes; ++index) {
VAR_TREE *const leaf = &td->var_tree[index];
leaf->split[0] = NULL;
}
// Each node has 4 leaf nodes, fill in the child pointers
// from leafs to the root.
for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) {
for (i = 0; i < nodes; ++i, ++index) {
VAR_TREE *const node = &td->var_tree[index];
for (j = 0; j < 4; j++)
node->split[j] = this_var++;
}
}
// Set up the root node for the largest superblock size
i = MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2;
td->var_root[i] = &td->var_tree[tree_nodes - 1];
// Set up the root nodes for the rest of the possible superblock sizes
while (--i >= 0) {
td->var_root[i] = td->var_root[i+1]->split[0];
}
}
void vp10_free_var_tree(ThreadData *td) {
vpx_free(td->var_tree);
td->var_tree = NULL;
}

Просмотреть файл

@ -0,0 +1,98 @@
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef VP10_ENCODER_VARIANCE_TREE_H_
#define VP10_ENCODER_VARIANCE_TREE_H_
#include <assert.h>
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
#include "vp10/common/enums.h"
#ifdef __cplusplus
extern "C" {
#endif
struct VP10Common;
struct ThreadData;
typedef struct {
int64_t sum_square_error;
int64_t sum_error;
int log2_count;
int variance;
} var;
typedef struct {
var none;
var horz[2];
var vert[2];
} partition_variance;
typedef struct VAR_TREE {
int force_split;
partition_variance variances;
struct VAR_TREE *split[4];
BLOCK_SIZE bsize;
const uint8_t *src;
const uint8_t *ref;
int src_stride;
int ref_stride;
int width;
int height;
#if CONFIG_VP9_HIGHBITDEPTH
int highbd;
#endif // CONFIG_VP9_HIGHBITDEPTH
} VAR_TREE;
void vp10_setup_var_tree(struct VP10Common *cm, struct ThreadData *td);
void vp10_free_var_tree(struct ThreadData *td);
// Set variance values given sum square error, sum error, count.
static INLINE void fill_variance(int64_t s2, int64_t s, int c, var *v) {
v->sum_square_error = s2;
v->sum_error = s;
v->log2_count = c;
v->variance = (int)(256 * (v->sum_square_error -
((v->sum_error * v->sum_error) >> v->log2_count)) >> v->log2_count);
}
static INLINE void sum_2_variances(const var *a, const var *b, var *r) {
assert(a->log2_count == b->log2_count);
fill_variance(a->sum_square_error + b->sum_square_error,
a->sum_error + b->sum_error, a->log2_count + 1, r);
}
static INLINE void fill_variance_node(VAR_TREE *vt) {
sum_2_variances(&vt->split[0]->variances.none,
&vt->split[1]->variances.none,
&vt->variances.horz[0]);
sum_2_variances(&vt->split[2]->variances.none,
&vt->split[3]->variances.none,
&vt->variances.horz[1]);
sum_2_variances(&vt->split[0]->variances.none,
&vt->split[2]->variances.none,
&vt->variances.vert[0]);
sum_2_variances(&vt->split[1]->variances.none,
&vt->split[3]->variances.none,
&vt->variances.vert[1]);
sum_2_variances(&vt->variances.vert[0],
&vt->variances.vert[1],
&vt->variances.none);
}
#ifdef __cplusplus
} // extern "C"
#endif
#endif /* VP10_ENCODER_VARIANCE_TREE_H_ */

Просмотреть файл

@ -21,6 +21,8 @@ VP10_CX_SRCS-yes += encoder/bitstream.c
VP10_CX_SRCS-yes += encoder/bitwriter.h
VP10_CX_SRCS-yes += encoder/context_tree.c
VP10_CX_SRCS-yes += encoder/context_tree.h
VP10_CX_SRCS-yes += encoder/variance_tree.c
VP10_CX_SRCS-yes += encoder/variance_tree.h
VP10_CX_SRCS-yes += encoder/cost.h
VP10_CX_SRCS-yes += encoder/cost.c
VP10_CX_SRCS-yes += encoder/dct.c

Просмотреть файл

@ -12,22 +12,22 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx_ports/mem.h"
unsigned int vpx_avg_8x8_c(const uint8_t *s, int p) {
unsigned int vpx_avg_8x8_c(const uint8_t *src, int stride) {
int i, j;
int sum = 0;
for (i = 0; i < 8; ++i, s+=p)
for (j = 0; j < 8; sum += s[j], ++j) {}
for (i = 0; i < 8; ++i, src += stride)
for (j = 0; j < 8; sum += src[j], ++j) {}
return (sum + 32) >> 6;
return ROUND_POWER_OF_TWO(sum, 6);
}
unsigned int vpx_avg_4x4_c(const uint8_t *s, int p) {
unsigned int vpx_avg_4x4_c(const uint8_t *src, int stride) {
int i, j;
int sum = 0;
for (i = 0; i < 4; ++i, s+=p)
for (j = 0; j < 4; sum += s[j], ++j) {}
for (i = 0; i < 4; ++i, src += stride)
for (j = 0; j < 4; sum += src[j], ++j) {}
return (sum + 8) >> 4;
return ROUND_POWER_OF_TWO(sum, 4);
}
// src_diff: first pass, 9 bit, dynamic range [-255, 255]
@ -176,14 +176,15 @@ int vpx_vector_var_c(int16_t const *ref, int16_t const *src,
return var;
}
void vpx_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp,
void vpx_minmax_8x8_c(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
int *min, int *max) {
int i, j;
*min = 255;
*max = 0;
for (i = 0; i < 8; ++i, s += p, d += dp) {
for (i = 0; i < 8; ++i, src += src_stride, ref += ref_stride) {
for (j = 0; j < 8; ++j) {
int diff = abs(s[j]-d[j]);
int diff = abs(src[j]-ref[j]);
*min = diff < *min ? diff : *min;
*max = diff > *max ? diff : *max;
}
@ -191,24 +192,24 @@ void vpx_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp,
}
#if CONFIG_VP9_HIGHBITDEPTH
unsigned int vpx_highbd_avg_8x8_c(const uint8_t *s8, int p) {
unsigned int vpx_highbd_avg_8x8_c(const uint8_t *src, int stride) {
int i, j;
int sum = 0;
const uint16_t* s = CONVERT_TO_SHORTPTR(s8);
for (i = 0; i < 8; ++i, s+=p)
const uint16_t* s = CONVERT_TO_SHORTPTR(src);
for (i = 0; i < 8; ++i, s += stride)
for (j = 0; j < 8; sum += s[j], ++j) {}
return (sum + 32) >> 6;
return ROUND_POWER_OF_TWO(sum, 6);
}
unsigned int vpx_highbd_avg_4x4_c(const uint8_t *s8, int p) {
unsigned int vpx_highbd_avg_4x4_c(const uint8_t *src, int stride) {
int i, j;
int sum = 0;
const uint16_t* s = CONVERT_TO_SHORTPTR(s8);
for (i = 0; i < 4; ++i, s+=p)
const uint16_t* s = CONVERT_TO_SHORTPTR(src);
for (i = 0; i < 4; ++i, s+=stride)
for (j = 0; j < 4; sum += s[j], ++j) {}
return (sum + 8) >> 4;
return ROUND_POWER_OF_TWO(sum, 4);
}
void vpx_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8,

Просмотреть файл

@ -23,10 +23,10 @@ extern "C" {
#define FILTER_WEIGHT 128
typedef unsigned int(*vpx_sad_fn_t)(const uint8_t *a, int a_stride,
const uint8_t *b_ptr, int b_stride);
const uint8_t *b, int b_stride);
typedef unsigned int(*vpx_sad_avg_fn_t)(const uint8_t *a_ptr, int a_stride,
const uint8_t *b_ptr, int b_stride,
typedef unsigned int(*vpx_sad_avg_fn_t)(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
const uint8_t *second_pred);
typedef void (*vp8_copy32xn_fn_t)(const uint8_t *a, int a_stride,
@ -50,10 +50,10 @@ typedef unsigned int (*vpx_subpixvariance_fn_t)(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
unsigned int *sse);
typedef unsigned int (*vpx_subp_avg_variance_fn_t)(const uint8_t *a_ptr,
typedef unsigned int (*vpx_subp_avg_variance_fn_t)(const uint8_t *a,
int a_stride,
int xoffset, int yoffset,
const uint8_t *b_ptr,
const uint8_t *b,
int b_stride,
unsigned int *sse,
const uint8_t *second_pred);
@ -75,26 +75,25 @@ typedef struct variance_vtable {
#endif // CONFIG_VP8
#if CONFIG_VP10 && CONFIG_EXT_INTER
typedef unsigned int(*vpx_masked_sad_fn_t)(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
typedef unsigned int(*vpx_masked_sad_fn_t)(const uint8_t *src,
int src_stride,
const uint8_t *ref,
int ref_stride,
const uint8_t *msk_ptr,
int msk_stride);
typedef unsigned int (*vpx_masked_variance_fn_t)(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
typedef unsigned int (*vpx_masked_variance_fn_t)(const uint8_t *src,
int src_stride,
const uint8_t *ref,
int ref_stride,
const uint8_t *msk_ptr,
const uint8_t *msk,
int msk_stride,
unsigned int *sse);
typedef unsigned int (*vpx_masked_subpixvariance_fn_t)(const uint8_t *src_ptr,
int source_stride,
int xoffset,
int yoffset,
const uint8_t *ref_ptr,
int Refstride,
const uint8_t *msk_ptr,
typedef unsigned int (*vpx_masked_subpixvariance_fn_t)(const uint8_t *src,
int src_stride,
int xoffset, int yoffset,
const uint8_t *ref,
int ref_stride,
const uint8_t *msk,
int msk_stride,
unsigned int *sse);
#endif // CONFIG_VP10 && CONFIG_EXT_INTER