Totally remove prev_mi in VP9 decoder.

This will save the memory and improve the decode speed due to
removing unnecessary memset of big prev_mi array for
all the key frames.

Decoding a all key frames 1080p video shows speed improve around 2%.

Change-Id: I6284a445c1291056e3c15135c3c20d502f791c10
This commit is contained in:
hkuang 2014-11-03 11:23:22 -08:00
Родитель 343acaa8f2
Коммит 4cc7c5a17f
6 изменённых файлов: 79 добавлений и 82 удалений

Просмотреть файл

@ -17,17 +17,6 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_systemdependent.h"
static void clear_mi_border(const VP9_COMMON *cm, MODE_INFO *mi) {
int i;
// Top border row
vpx_memset(mi, 0, sizeof(*mi) * cm->mi_stride);
// Left border column
for (i = 1; i < cm->mi_rows + 1; ++i)
vpx_memset(&mi[i * cm->mi_stride], 0, sizeof(*mi));
}
void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) {
const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2);
const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2);
@ -41,48 +30,6 @@ void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) {
cm->MBs = cm->mb_rows * cm->mb_cols;
}
static void setup_mi(VP9_COMMON *cm) {
cm->mi = cm->mip + cm->mi_stride + 1;
cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
clear_mi_border(cm, cm->prev_mip);
}
static int alloc_mi(VP9_COMMON *cm, int mi_size) {
int i;
for (i = 0; i < 2; ++i) {
cm->mip_array[i] =
(MODE_INFO *)vpx_calloc(mi_size, sizeof(MODE_INFO));
if (cm->mip_array[i] == NULL)
return 1;
}
cm->mi_alloc_size = mi_size;
// Init the index.
cm->mi_idx = 0;
cm->prev_mi_idx = 1;
cm->mip = cm->mip_array[cm->mi_idx];
cm->prev_mip = cm->mip_array[cm->prev_mi_idx];
return 0;
}
static void free_mi(VP9_COMMON *cm) {
int i;
for (i = 0; i < 2; ++i) {
vpx_free(cm->mip_array[i]);
cm->mip_array[i] = NULL;
}
cm->mip = NULL;
cm->prev_mip = NULL;
}
void vp9_free_ref_frame_buffers(VP9_COMMON *cm) {
int i;
@ -101,14 +48,11 @@ void vp9_free_ref_frame_buffers(VP9_COMMON *cm) {
}
void vp9_free_context_buffers(VP9_COMMON *cm) {
free_mi(cm);
cm->free_mi(cm);
vpx_free(cm->last_frame_seg_map);
cm->last_frame_seg_map = NULL;
vpx_free(cm->above_context);
cm->above_context = NULL;
vpx_free(cm->above_seg_context);
cm->above_seg_context = NULL;
}
@ -117,7 +61,7 @@ int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) {
vp9_free_context_buffers(cm);
vp9_set_mb_mi(cm, width, height);
if (alloc_mi(cm, cm->mi_stride * calc_mi_size(cm->mi_rows)))
if (cm->alloc_mi(cm, cm->mi_stride * calc_mi_size(cm->mi_rows)))
goto fail;
cm->last_frame_seg_map = (uint8_t *)vpx_calloc(cm->mi_rows * cm->mi_cols, 1);
@ -204,22 +148,7 @@ void vp9_remove_common(VP9_COMMON *cm) {
}
void vp9_init_context_buffers(VP9_COMMON *cm) {
setup_mi(cm);
cm->setup_mi(cm);
if (cm->last_frame_seg_map)
vpx_memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols);
}
void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
// Swap indices.
const int tmp = cm->mi_idx;
cm->mi_idx = cm->prev_mi_idx;
cm->prev_mi_idx = tmp;
// Current mip will be the prev_mip for the next frame.
cm->mip = cm->mip_array[cm->mi_idx];
cm->prev_mip = cm->mip_array[cm->prev_mi_idx];
// Update the upper left visible macroblock ptrs.
cm->mi = cm->mip + cm->mi_stride + 1;
cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
}

Просмотреть файл

@ -31,7 +31,6 @@ int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height);
void vp9_free_state_buffers(struct VP9Common *cm);
void vp9_set_mb_mi(struct VP9Common *cm, int width, int height);
void vp9_swap_mi_and_prev_mi(struct VP9Common *cm);
#ifdef __cplusplus
} // extern "C"

Просмотреть файл

@ -464,7 +464,8 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
}
if (frame_is_intra_only(cm))
// prev_mip will only be allocated in encoder.
if (frame_is_intra_only(cm) && cm->prev_mip)
vpx_memset(cm->prev_mip, 0, cm->mi_stride * (cm->mi_rows + 1) *
sizeof(*cm->prev_mip));

Просмотреть файл

@ -96,7 +96,6 @@ typedef struct VP9Common {
#endif
YV12_BUFFER_CONFIG *frame_to_show;
RefCntBuffer frame_bufs[FRAME_BUFFERS];
RefCntBuffer *prev_frame;
@ -149,17 +148,20 @@ typedef struct VP9Common {
/* We allocate a MODE_INFO struct for each macroblock, together with
an extra row on top and column on the left to simplify prediction. */
int mi_idx;
int prev_mi_idx;
int mi_alloc_size;
MODE_INFO *mip_array[2];
MODE_INFO *mip; /* Base of allocated array */
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
// TODO(agrange): Move prev_mi into encoder structure.
// prev_mip and prev_mi will only be allocated in VP9 encoder.
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
// Separate mi functions between encoder and decoder.
int (*alloc_mi)(struct VP9Common *cm, int mi_size);
void (*free_mi)(struct VP9Common *cm);
void (*setup_mi)(struct VP9Common *cm);
// Whether to use previous frame's motion vectors for prediction.
int use_prev_frame_mvs;

Просмотреть файл

@ -43,6 +43,24 @@ static void initialize_dec() {
}
}
static void vp9_dec_setup_mi(VP9_COMMON *cm) {
cm->mi = cm->mip + cm->mi_stride + 1;
vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
}
static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) {
cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
if (!cm->mip)
return 1;
cm->mi_alloc_size = mi_size;
return 0;
}
static void vp9_dec_free_mi(VP9_COMMON *cm) {
vpx_free(cm->mip);
cm->mip = NULL;
}
VP9Decoder *vp9_decoder_create() {
VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi));
VP9_COMMON *const cm = pbi ? &pbi->common : NULL;
@ -77,6 +95,10 @@ VP9Decoder *vp9_decoder_create() {
cm->bit_depth = VPX_BITS_8;
cm->dequant_bit_depth = VPX_BITS_8;
cm->alloc_mi = vp9_dec_alloc_mi;
cm->free_mi = vp9_dec_free_mi;
cm->setup_mi = vp9_dec_setup_mi;
// vp9_init_dequantizer() is first called here. Add check in
// frame_init_dequantizer() to avoid unnecessary calling of
// vp9_init_dequantizer() for every frame.

Просмотреть файл

@ -139,6 +139,47 @@ static void setup_frame(VP9_COMP *cpi) {
}
}
static void vp9_enc_setup_mi(VP9_COMMON *cm) {
int i;
cm->mi = cm->mip + cm->mi_stride + 1;
vpx_memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
// Clear top border row
vpx_memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride);
// Clear left border column
for (i = 1; i < cm->mi_rows + 1; ++i)
vpx_memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip));
}
static int vp9_enc_alloc_mi(VP9_COMMON *cm, int mi_size) {
cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
if (!cm->mip)
return 1;
cm->prev_mip = vpx_calloc(mi_size, sizeof(*cm->prev_mip));
if (!cm->prev_mip)
return 1;
cm->mi_alloc_size = mi_size;
return 0;
}
static void vp9_enc_free_mi(VP9_COMMON *cm) {
vpx_free(cm->mip);
cm->mip = NULL;
vpx_free(cm->prev_mip);
cm->prev_mip = NULL;
}
static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
// Current mip will be the prev_mip for the next frame.
MODE_INFO *temp = cm->prev_mip;
cm->prev_mip = cm->mip;
cm->mip = temp;
// Update the upper left visible macroblock ptrs.
cm->mi = cm->mip + cm->mi_stride + 1;
cm->prev_mi = cm->prev_mip + cm->mi_stride + 1;
}
void vp9_initialize_enc() {
static int init_done = 0;
@ -1380,6 +1421,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) {
}
cm->error.setjmp = 1;
cm->alloc_mi = vp9_enc_alloc_mi;
cm->free_mi = vp9_enc_free_mi;
cm->setup_mi = vp9_enc_setup_mi;
CHECK_MEM_ERROR(cm, cm->fc,
(FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));