From d8b4c7927075bfb2f29c54e6ff7d9fc3a52e2436 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Thu, 31 Oct 2013 12:21:49 -0700 Subject: [PATCH] Decouple macroblockd_plane buffer usage Make the macroblockd_plane contain dynamic buffer pointers instead static pointers to the memory space allocated therein. The decoder uses the buffer allocated in pbi, while encoder will use a dual buffer approach for rate-distortion optimization search. Change-Id: Ie6f24be2dcda35df7c15b4014e5ccf236fb3f76c --- vp9/common/vp9_blockd.h | 6 +++--- vp9/decoder/vp9_decodframe.c | 19 ++++++++++++++++++- vp9/decoder/vp9_onyxd_if.c | 14 ++++++++++++++ vp9/decoder/vp9_onyxd_int.h | 4 ++++ vp9/encoder/vp9_onyx_if.c | 13 +++++++++++++ vp9/encoder/vp9_onyx_int.h | 4 ++++ 6 files changed, 56 insertions(+), 4 deletions(-) diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index f52adfc97..a666d1d1d 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -170,9 +170,9 @@ struct buf_2d { }; struct macroblockd_plane { - DECLARE_ALIGNED(16, int16_t, qcoeff[64 * 64]); - DECLARE_ALIGNED(16, int16_t, dqcoeff[64 * 64]); - DECLARE_ALIGNED(16, uint16_t, eobs[256]); + int16_t *qcoeff; + int16_t *dqcoeff; + uint16_t *eobs; PLANE_TYPE plane_type; int subsampling_x; int subsampling_y; diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index aad400aa1..218fdd813 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -42,6 +42,9 @@ typedef struct TileWorkerData { vp9_reader bit_reader; DECLARE_ALIGNED(16, MACROBLOCKD, xd); DECLARE_ALIGNED(16, unsigned char, token_cache[1024]); + DECLARE_ALIGNED(16, int16_t, qcoeff[MAX_MB_PLANE][64 * 64]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); + DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]); } TileWorkerData; static int read_be32(const uint8_t *p) { @@ -931,6 +934,19 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { return end; } +static void setup_tile_macroblockd(TileWorkerData *const tile_data) { + MACROBLOCKD *xd = &tile_data->xd; + struct macroblockd_plane *const pd = xd->plane; + int i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + pd[i].qcoeff = tile_data->qcoeff[i]; + pd[i].dqcoeff = tile_data->dqcoeff[i]; + pd[i].eobs = tile_data->eobs[i]; + vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t)); + } +} + static int tile_worker_hook(void *arg1, void *arg2) { TileWorkerData *tile_data = (TileWorkerData*)arg1; const TileInfo *const tile = (TileInfo*)arg2; @@ -1008,6 +1024,7 @@ static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { setup_token_decoder(data, data_end, size, &cm->error, &tile_data->bit_reader); setup_tile_context(pbi, &tile_data->xd, 0, tile_col); + setup_tile_macroblockd(tile_data); worker->had_error = 0; if (i == num_workers - 1 || tile_col == tile_cols - 1) { @@ -1319,7 +1336,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { cm->fc = cm->frame_contexts[cm->frame_context_idx]; vp9_zero(cm->counts); for (i = 0; i < MAX_MB_PLANE; ++i) - vp9_zero(xd->plane[i].dqcoeff); + vpx_memset(xd->plane[i].dqcoeff, 0, 64 * 64 * sizeof(int16_t)); xd->corrupted = 0; new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); diff --git a/vp9/decoder/vp9_onyxd_if.c b/vp9/decoder/vp9_onyxd_if.c index 5f970a3d5..cb45d3702 100644 --- a/vp9/decoder/vp9_onyxd_if.c +++ b/vp9/decoder/vp9_onyxd_if.c @@ -107,6 +107,18 @@ void vp9_initialize_dec() { } } +static void init_macroblockd(VP9D_COMP *const pbi) { + MACROBLOCKD *xd = &pbi->mb; + struct macroblockd_plane *const pd = xd->plane; + int i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + pd[i].qcoeff = pbi->qcoeff[i]; + pd[i].dqcoeff = pbi->dqcoeff[i]; + pd[i].eobs = pbi->eobs[i]; + } +} + VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { VP9D_COMP *const pbi = vpx_memalign(32, sizeof(VP9D_COMP)); VP9_COMMON *const cm = pbi ? &pbi->common : NULL; @@ -141,6 +153,8 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { cm->error.setjmp = 0; pbi->decoded_key_frame = 0; + init_macroblockd(pbi); + vp9_worker_init(&pbi->lf_worker); return pbi; diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index 7c4c9db36..7ad05e6b2 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -22,6 +22,10 @@ typedef struct VP9Decompressor { DECLARE_ALIGNED(16, VP9_COMMON, common); + DECLARE_ALIGNED(16, int16_t, qcoeff[MAX_MB_PLANE][64 * 64]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); + DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]); + VP9D_CONFIG oxcf; const uint8_t *source; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index f922f900a..4f1357a2d 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1524,6 +1524,17 @@ static void free_pick_mode_context(MACROBLOCK *x) { } } +static void init_macroblock(VP9_COMP *const cpi) { + MACROBLOCKD *xd = &cpi->mb.e_mbd; + struct macroblockd_plane *const pd = xd->plane; + int i; + for (i = 0; i < MAX_MB_PLANE; ++i) { + pd[i].qcoeff = cpi->qcoeff[i]; + pd[i].dqcoeff = cpi->dqcoeff[i]; + pd[i].eobs = cpi->eobs[i]; + } +} + VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { int i, j; volatile union { @@ -1562,6 +1573,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { init_pick_mode_context(cpi); + init_macroblock(cpi); + cm->current_video_frame = 0; cpi->kf_overspend_bits = 0; cpi->kf_bitrate_adjustment = 0; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 9429c7fed..839a92b7e 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -313,6 +313,10 @@ typedef struct VP9_COMP { VP9_CONFIG oxcf; struct rdcost_block_args rdcost_stack; + DECLARE_ALIGNED(16, int16_t, qcoeff[MAX_MB_PLANE][64 * 64]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[MAX_MB_PLANE][64 * 64]); + DECLARE_ALIGNED(16, uint16_t, eobs[MAX_MB_PLANE][256]); + struct lookahead_ctx *lookahead; struct lookahead_entry *source; #if CONFIG_MULTIPLE_ARF