Fix vp10_accumulate_frame_counts once and for all.

This ensures the multi-threaded and single-threaded encoder/decoder
always uses the same probability contexts.

Change-Id: I6f1e7c6bd8808c390c1dc0a628ae97db3acedf6d
This commit is contained in:
Geza Lore 2016-05-04 11:30:36 +01:00
Родитель e536a1cc07
Коммит c959151fa2
5 изменённых файлов: 14 добавлений и 267 удалений

Просмотреть файл

@ -116,6 +116,8 @@ typedef struct frame_contexts {
} FRAME_CONTEXT;
typedef struct FRAME_COUNTS {
// Note: This structure should only contain 'unsigned int' fields, or
// aggregates built solely from 'unsigned int' fields/elements
unsigned int kf_y_mode[INTRA_MODES][INTRA_MODES][INTRA_MODES];
unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES];
unsigned int uv_mode[INTRA_MODES][INTRA_MODES];

Просмотреть файл

@ -336,270 +336,15 @@ void vp10_loop_filter_dealloc(VP9LfSync *lf_sync) {
}
}
// Accumulate frame counts.
void vp10_accumulate_frame_counts(VP10_COMMON *cm, FRAME_COUNTS *counts,
int is_dec) {
int i, j, k, l, m;
// Accumulate frame counts. FRAME_COUNTS consist solely of 'unsigned int'
// members, so we treat it as an array, and sum over the whole length.
void vp10_accumulate_frame_counts(VP10_COMMON *cm, FRAME_COUNTS *counts) {
unsigned int *const acc = (unsigned int*)&cm->counts;
const unsigned int *const cnt = (unsigned int*)counts;
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
for (j = 0; j < INTRA_MODES; j++)
cm->counts.y_mode[i][j] += counts->y_mode[i][j];
const unsigned int n_counts = sizeof(FRAME_COUNTS)/sizeof(unsigned int);
unsigned int i;
for (i = 0; i < INTRA_MODES; i++)
for (j = 0; j < INTRA_MODES; j++)
cm->counts.uv_mode[i][j] += counts->uv_mode[i][j];
for (i = 0; i < PARTITION_CONTEXTS; i++)
#if CONFIG_EXT_PARTITION_TYPES
for (j = 0; j < (i ? EXT_PARTITION_TYPES : PARTITION_TYPES); j++)
#else
for (j = 0; j < PARTITION_TYPES; j++)
#endif
cm->counts.partition[i][j] += counts->partition[i][j];
if (is_dec) {
int n;
for (i = 0; i < TX_SIZES; i++)
for (j = 0; j < PLANE_TYPES; j++)
for (k = 0; k < REF_TYPES; k++)
for (l = 0; l < COEF_BANDS; l++)
for (m = 0; m < COEFF_CONTEXTS; m++) {
cm->counts.eob_branch[i][j][k][l][m] +=
counts->eob_branch[i][j][k][l][m];
for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
cm->counts.coef[i][j][k][l][m][n] +=
counts->coef[i][j][k][l][m][n];
}
} else {
for (i = 0; i < TX_SIZES; i++)
for (j = 0; j < PLANE_TYPES; j++)
for (k = 0; k < REF_TYPES; k++)
for (l = 0; l < COEF_BANDS; l++)
for (m = 0; m < COEFF_CONTEXTS; m++)
cm->counts.eob_branch[i][j][k][l][m] +=
counts->eob_branch[i][j][k][l][m];
// In the encoder, cm->counts.coef is only updated at frame
// level, so not need to accumulate it here.
// for (n = 0; n < UNCONSTRAINED_NODES + 1; n++)
// cm->counts.coef[i][j][k][l][m][n] +=
// counts->coef[i][j][k][l][m][n];
}
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
for (j = 0; j < SWITCHABLE_FILTERS; j++)
cm->counts.switchable_interp[i][j] += counts->switchable_interp[i][j];
#if CONFIG_OBMC
for (i = 0; i < BLOCK_SIZES; i++)
for (j = 0; j < 2; j++)
cm->counts.obmc[i][j] += counts->obmc[i][j];
#endif // CONFIG_OBMC
#if CONFIG_REF_MV
for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i)
for (j = 0; j < 2; ++j)
cm->counts.newmv_mode[i][j] += counts->newmv_mode[i][j];
for (i = 0; i < ZEROMV_MODE_CONTEXTS; ++i)
for (j = 0; j < 2; ++j)
cm->counts.zeromv_mode[i][j] += counts->zeromv_mode[i][j];
for (i = 0; i < REFMV_MODE_CONTEXTS; ++i)
for (j = 0; j < 2; ++j)
cm->counts.refmv_mode[i][j] += counts->refmv_mode[i][j];
for (i = 0; i < DRL_MODE_CONTEXTS; ++i)
for (j = 0; j < 2; ++j)
cm->counts.drl_mode[i][j] += counts->drl_mode[i][j];
#if CONFIG_EXT_INTER
for (j = 0; j < 2; ++j)
cm->counts.new2mv_mode[j] += counts->new2mv_mode[j];
#endif // CONFIG_EXT_INTER
#endif
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
for (j = 0; j < INTER_MODES; j++)
cm->counts.inter_mode[i][j] += counts->inter_mode[i][j];
#if CONFIG_EXT_INTER
for (i = 0; i < INTER_MODE_CONTEXTS; i++)
for (j = 0; j < INTER_COMPOUND_MODES; j++)
cm->counts.inter_compound_mode[i][j] +=
counts->inter_compound_mode[i][j];
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
for (j = 0; j < 2; j++)
cm->counts.interintra[i][j] += counts->interintra[i][j];
for (i = 0; i < BLOCK_SIZE_GROUPS; i++)
for (j = 0; j < INTERINTRA_MODES; j++)
cm->counts.interintra_mode[i][j] += counts->interintra_mode[i][j];
for (i = 0; i < BLOCK_SIZES; i++)
for (j = 0; j < 2; j++)
cm->counts.wedge_interintra[i][j] += counts->wedge_interintra[i][j];
for (i = 0; i < BLOCK_SIZES; i++)
for (j = 0; j < 2; j++)
cm->counts.wedge_interinter[i][j] += counts->wedge_interinter[i][j];
#endif // CONFIG_EXT_INTER
for (i = 0; i < INTRA_INTER_CONTEXTS; i++)
for (j = 0; j < 2; j++)
cm->counts.intra_inter[i][j] += counts->intra_inter[i][j];
for (i = 0; i < COMP_INTER_CONTEXTS; i++)
for (j = 0; j < 2; j++)
cm->counts.comp_inter[i][j] += counts->comp_inter[i][j];
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < (SINGLE_REFS - 1); j++)
for (k = 0; k < 2; k++)
cm->counts.single_ref[i][j][k] += counts->single_ref[i][j][k];
for (i = 0; i < REF_CONTEXTS; i++)
for (j = 0; j < (COMP_REFS - 1); j++)
for (k = 0; k < 2; k++)
cm->counts.comp_ref[i][j][k] += counts->comp_ref[i][j][k];
for (i = 0; i < TX_SIZES - 1; ++i)
for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
for (k = 0; k < i + 2; ++k)
cm->counts.tx_size[i][j][k] += counts->tx_size[i][j][k];
for (i = 0; i < TX_SIZES; ++i)
cm->counts.tx_size_totals[i] += counts->tx_size_totals[i];
#if CONFIG_VAR_TX
for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
for (j = 0; j < 2; ++j)
cm->counts.txfm_partition[i][j] += counts->txfm_partition[i][j];
#endif
for (i = 0; i < SKIP_CONTEXTS; i++)
for (j = 0; j < 2; j++)
cm->counts.skip[i][j] += counts->skip[i][j];
#if CONFIG_REF_MV
for (m = 0; m < NMV_CONTEXTS; ++m) {
for (i = 0; i < MV_JOINTS; i++)
cm->counts.mv[m].joints[i] += counts->mv[m].joints[i];
for (k = 0; k < 2; k++) {
nmv_component_counts *comps = &cm->counts.mv[m].comps[k];
nmv_component_counts *comps_t = &counts->mv[m].comps[k];
for (i = 0; i < 2; i++) {
comps->sign[i] += comps_t->sign[i];
comps->class0_hp[i] += comps_t->class0_hp[i];
comps->hp[i] += comps_t->hp[i];
}
for (i = 0; i < MV_CLASSES; i++)
comps->classes[i] += comps_t->classes[i];
for (i = 0; i < CLASS0_SIZE; i++) {
comps->class0[i] += comps_t->class0[i];
for (j = 0; j < MV_FP_SIZE; j++)
comps->class0_fp[i][j] += comps_t->class0_fp[i][j];
}
for (i = 0; i < MV_OFFSET_BITS; i++)
for (j = 0; j < 2; j++)
comps->bits[i][j] += comps_t->bits[i][j];
for (i = 0; i < MV_FP_SIZE; i++)
comps->fp[i] += comps_t->fp[i];
}
}
#else
for (i = 0; i < MV_JOINTS; i++)
cm->counts.mv.joints[i] += counts->mv.joints[i];
for (k = 0; k < 2; k++) {
nmv_component_counts *comps = &cm->counts.mv.comps[k];
nmv_component_counts *comps_t = &counts->mv.comps[k];
for (i = 0; i < 2; i++) {
comps->sign[i] += comps_t->sign[i];
comps->class0_hp[i] += comps_t->class0_hp[i];
comps->hp[i] += comps_t->hp[i];
}
for (i = 0; i < MV_CLASSES; i++)
comps->classes[i] += comps_t->classes[i];
for (i = 0; i < CLASS0_SIZE; i++) {
comps->class0[i] += comps_t->class0[i];
for (j = 0; j < MV_FP_SIZE; j++)
comps->class0_fp[i][j] += comps_t->class0_fp[i][j];
}
for (i = 0; i < MV_OFFSET_BITS; i++)
for (j = 0; j < 2; j++)
comps->bits[i][j] += comps_t->bits[i][j];
for (i = 0; i < MV_FP_SIZE; i++)
comps->fp[i] += comps_t->fp[i];
}
#endif
#if CONFIG_EXT_TX
for (i = 0; i < EXT_TX_SIZES; i++) {
int s, k;
for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
if (use_inter_ext_tx_for_txsize[s][i]) {
for (k = 0; k < TX_TYPES; k++)
cm->counts.inter_ext_tx[s][i][k] += counts->inter_ext_tx[s][i][k];
}
}
for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
if (use_intra_ext_tx_for_txsize[s][i]) {
int j;
for (j = 0; j < INTRA_MODES; ++j)
for (k = 0; k < TX_TYPES; k++)
cm->counts.intra_ext_tx[s][i][j][k] +=
counts->intra_ext_tx[s][i][j][k];
}
}
}
#else
for (i = 0; i < EXT_TX_SIZES; i++) {
int j;
for (j = 0; j < TX_TYPES; ++j)
for (k = 0; k < TX_TYPES; k++)
cm->counts.intra_ext_tx[i][j][k] += counts->intra_ext_tx[i][j][k];
}
for (i = 0; i < EXT_TX_SIZES; i++) {
for (k = 0; k < TX_TYPES; k++)
cm->counts.inter_ext_tx[i][k] += counts->inter_ext_tx[i][k];
}
#endif // CONFIG_EXT_TX
#if CONFIG_SUPERTX
for (i = 0; i < PARTITION_SUPERTX_CONTEXTS; i++)
for (j = 0; j < TX_SIZES; j++)
for (k = 0; k < 2; k++)
cm->counts.supertx[i][j][k] += counts->supertx[i][j][k];
for (i = 0; i < TX_SIZES; i++)
cm->counts.supertx_size[i] += counts->supertx_size[i];
#endif // CONFIG_SUPERTX
for (i = 0; i < PREDICTION_PROBS; i++)
for (j = 0; j < 2; j++)
cm->counts.seg.pred[i][j] += counts->seg.pred[i][j];
for (i = 0; i < MAX_SEGMENTS; i++) {
cm->counts.seg.tree_total[i] += counts->seg.tree_total[i];
cm->counts.seg.tree_mispred[i] += counts->seg.tree_mispred[i];
}
#if CONFIG_EXT_INTRA
for (i = 0; i < PLANE_TYPES; ++i)
for (j = 0; j < 2; ++j)
cm->counts.ext_intra[i][j] += counts->ext_intra[i][j];
for (i = 0; i < INTRA_FILTERS + 1; ++i)
for (j = 0; j < INTRA_FILTERS; ++j)
cm->counts.intra_filter[i][j] += counts->intra_filter[i][j];
#endif // CONFIG_EXT_INTRA
for (i = 0; i < n_counts; i++)
acc[i] += cnt[i];
}

Просмотреть файл

@ -56,7 +56,7 @@ void vp10_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
VP9LfSync *lf_sync);
void vp10_accumulate_frame_counts(struct VP10Common *cm,
struct FRAME_COUNTS *counts, int is_dec);
struct FRAME_COUNTS *counts);
#ifdef __cplusplus
} // extern "C"

Просмотреть файл

@ -3491,7 +3491,7 @@ static const uint8_t *decode_tiles_mt(VP10Decoder *pbi,
if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
for (i = 0; i < num_workers; ++i) {
TileWorkerData *const twd = (TileWorkerData*)pbi->tile_workers[i].data1;
vp10_accumulate_frame_counts(cm, &twd->counts, 1);
vp10_accumulate_frame_counts(cm, &twd->counts);
}
}

Просмотреть файл

@ -167,7 +167,7 @@ void vp10_encode_tiles_mt(VP10_COMP *cpi) {
// Accumulate counters.
if (i < cpi->num_workers - 1) {
vp10_accumulate_frame_counts(cm, thread_data->td->counts, 0);
vp10_accumulate_frame_counts(cm, thread_data->td->counts);
accumulate_rd_opt(&cpi->td, thread_data->td);
}
}