Inline vp9_get_coef_context() (and remove vp9_ prefix).

Makes cost_coeffs() a lot faster:
4x4: 236 -> 181 cycles
8x8: 888 -> 588 cycles
16x16: 3550 -> 2483 cycles
32x32: 17392 -> 12010 cycles

Total encode time of first 50 frames of bus (speed 0) @ 1500kbps goes
from 2min51.6 to 2min43.9, i.e. 4.7% overall speedup.

Change-Id: I16b8d595946393c8dc661599550b3f37f5718896
This commit is contained in:
Ronald S. Bultje 2013-06-28 10:40:21 -07:00
Родитель e3ce2b2ab3
Коммит d00b8e5f82
6 изменённых файлов: 29 добавлений и 36 удалений

Просмотреть файл

@ -445,32 +445,6 @@ vp9_extra_bit vp9_extra_bits[12] = {
#include "vp9/common/vp9_default_coef_probs.h"
// This function updates and then returns n AC coefficient context
// This is currently a placeholder function to allow experimentation
// using various context models based on the energy earlier tokens
// within the current block.
//
// For now it just returns the previously used context.
#define MAX_NEIGHBORS 2
int vp9_get_coef_context(const int *scan, const int *neighbors,
int nb_pad, uint8_t *token_cache, int c, int l) {
int eob = l;
assert(nb_pad == MAX_NEIGHBORS);
if (c == eob) {
return 0;
} else {
int ctx;
assert(neighbors[MAX_NEIGHBORS * c + 0] >= 0);
if (neighbors[MAX_NEIGHBORS * c + 1] >= 0) {
ctx = (1 + token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]] +
token_cache[scan[neighbors[MAX_NEIGHBORS * c + 1]]]) >> 1;
} else {
ctx = token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]];
}
return ctx;
}
};
void vp9_default_coef_probs(VP9_COMMON *pc) {
vpx_memcpy(pc->fc.coef_probs[TX_4X4], default_coef_probs_4x4,
sizeof(pc->fc.coef_probs[TX_4X4]));

Просмотреть файл

@ -148,8 +148,27 @@ static int get_coef_band(const uint8_t * band_translate, int coef_index) {
? (COEF_BANDS-1) : band_translate[coef_index];
}
extern int vp9_get_coef_context(const int *scan, const int *neighbors,
int nb_pad, uint8_t *token_cache, int c, int l);
#define MAX_NEIGHBORS 2
static INLINE int get_coef_context(const int *scan, const int *neighbors,
int nb_pad, uint8_t *token_cache,
int c, int l) {
int eob = l;
assert(nb_pad == MAX_NEIGHBORS);
if (c == eob) {
return 0;
} else {
int ctx;
assert(neighbors[MAX_NEIGHBORS * c + 0] >= 0);
if (neighbors[MAX_NEIGHBORS * c + 1] >= 0) {
ctx = (1 + token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]] +
token_cache[scan[neighbors[MAX_NEIGHBORS * c + 1]]]) >> 1;
} else {
ctx = token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]];
}
return ctx;
}
}
const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad);

Просмотреть файл

@ -172,8 +172,8 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd,
if (c >= seg_eob)
break;
if (c)
pt = vp9_get_coef_context(scan, nb, pad, token_cache,
c, default_eob);
pt = get_coef_context(scan, nb, pad, token_cache,
c, default_eob);
band = get_coef_band(band_translate, c);
prob = coef_probs[band][pt];
#if !CONFIG_BALANCED_COEFTREE
@ -186,8 +186,8 @@ SKIP_START:
if (c >= seg_eob)
break;
if (c)
pt = vp9_get_coef_context(scan, nb, pad, token_cache,
c, default_eob);
pt = get_coef_context(scan, nb, pad, token_cache,
c, default_eob);
band = get_coef_band(band_translate, c);
prob = coef_probs[band][pt];

Просмотреть файл

@ -116,7 +116,7 @@ static int trellis_get_coeff_context(const int *scan,
int pad, int l) {
int bak = token_cache[scan[idx]], pt;
token_cache[scan[idx]] = vp9_pt_energy_class[token];
pt = vp9_get_coef_context(scan, nb, pad, token_cache, idx + 1, l);
pt = get_coef_context(scan, nb, pad, token_cache, idx + 1, l);
token_cache[scan[idx]] = bak;
return pt;
}

Просмотреть файл

@ -402,7 +402,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
v = qcoeff_ptr[rc];
t = vp9_dct_value_tokens_ptr[v].token;
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v];
token_cache[rc] = vp9_pt_energy_class[t];
prev_t = t;
@ -410,7 +410,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
// eob token
if (c < seg_eob) {
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
cost += token_costs[0][get_coef_band(band_translate, c)][pt]
[DCT_EOB_TOKEN];
}

Просмотреть файл

@ -193,7 +193,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
int v = 0;
rc = scan[c];
if (c)
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
if (c < eob) {
v = qcoeff_ptr[rc];
assert(-DCT_MAX_VALUE <= v && v < DCT_MAX_VALUE);