Inline vp9_get_coef_context() (and remove vp9_ prefix).
Makes cost_coeffs() a lot faster: 4x4: 236 -> 181 cycles 8x8: 888 -> 588 cycles 16x16: 3550 -> 2483 cycles 32x32: 17392 -> 12010 cycles Total encode time of first 50 frames of bus (speed 0) @ 1500kbps goes from 2min51.6 to 2min43.9, i.e. 4.7% overall speedup. Change-Id: I16b8d595946393c8dc661599550b3f37f5718896
This commit is contained in:
Родитель
e3ce2b2ab3
Коммит
d00b8e5f82
|
@ -445,32 +445,6 @@ vp9_extra_bit vp9_extra_bits[12] = {
|
|||
|
||||
#include "vp9/common/vp9_default_coef_probs.h"
|
||||
|
||||
// This function updates and then returns n AC coefficient context
|
||||
// This is currently a placeholder function to allow experimentation
|
||||
// using various context models based on the energy earlier tokens
|
||||
// within the current block.
|
||||
//
|
||||
// For now it just returns the previously used context.
|
||||
#define MAX_NEIGHBORS 2
|
||||
int vp9_get_coef_context(const int *scan, const int *neighbors,
|
||||
int nb_pad, uint8_t *token_cache, int c, int l) {
|
||||
int eob = l;
|
||||
assert(nb_pad == MAX_NEIGHBORS);
|
||||
if (c == eob) {
|
||||
return 0;
|
||||
} else {
|
||||
int ctx;
|
||||
assert(neighbors[MAX_NEIGHBORS * c + 0] >= 0);
|
||||
if (neighbors[MAX_NEIGHBORS * c + 1] >= 0) {
|
||||
ctx = (1 + token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]] +
|
||||
token_cache[scan[neighbors[MAX_NEIGHBORS * c + 1]]]) >> 1;
|
||||
} else {
|
||||
ctx = token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]];
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
};
|
||||
|
||||
void vp9_default_coef_probs(VP9_COMMON *pc) {
|
||||
vpx_memcpy(pc->fc.coef_probs[TX_4X4], default_coef_probs_4x4,
|
||||
sizeof(pc->fc.coef_probs[TX_4X4]));
|
||||
|
|
|
@ -148,8 +148,27 @@ static int get_coef_band(const uint8_t * band_translate, int coef_index) {
|
|||
? (COEF_BANDS-1) : band_translate[coef_index];
|
||||
}
|
||||
|
||||
extern int vp9_get_coef_context(const int *scan, const int *neighbors,
|
||||
int nb_pad, uint8_t *token_cache, int c, int l);
|
||||
#define MAX_NEIGHBORS 2
|
||||
static INLINE int get_coef_context(const int *scan, const int *neighbors,
|
||||
int nb_pad, uint8_t *token_cache,
|
||||
int c, int l) {
|
||||
int eob = l;
|
||||
assert(nb_pad == MAX_NEIGHBORS);
|
||||
if (c == eob) {
|
||||
return 0;
|
||||
} else {
|
||||
int ctx;
|
||||
assert(neighbors[MAX_NEIGHBORS * c + 0] >= 0);
|
||||
if (neighbors[MAX_NEIGHBORS * c + 1] >= 0) {
|
||||
ctx = (1 + token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]] +
|
||||
token_cache[scan[neighbors[MAX_NEIGHBORS * c + 1]]]) >> 1;
|
||||
} else {
|
||||
ctx = token_cache[scan[neighbors[MAX_NEIGHBORS * c + 0]]];
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
}
|
||||
|
||||
const int *vp9_get_coef_neighbors_handle(const int *scan, int *pad);
|
||||
|
||||
|
||||
|
|
|
@ -172,8 +172,8 @@ static int decode_coefs(FRAME_CONTEXT *fc, const MACROBLOCKD *xd,
|
|||
if (c >= seg_eob)
|
||||
break;
|
||||
if (c)
|
||||
pt = vp9_get_coef_context(scan, nb, pad, token_cache,
|
||||
c, default_eob);
|
||||
pt = get_coef_context(scan, nb, pad, token_cache,
|
||||
c, default_eob);
|
||||
band = get_coef_band(band_translate, c);
|
||||
prob = coef_probs[band][pt];
|
||||
#if !CONFIG_BALANCED_COEFTREE
|
||||
|
@ -186,8 +186,8 @@ SKIP_START:
|
|||
if (c >= seg_eob)
|
||||
break;
|
||||
if (c)
|
||||
pt = vp9_get_coef_context(scan, nb, pad, token_cache,
|
||||
c, default_eob);
|
||||
pt = get_coef_context(scan, nb, pad, token_cache,
|
||||
c, default_eob);
|
||||
band = get_coef_band(band_translate, c);
|
||||
prob = coef_probs[band][pt];
|
||||
|
||||
|
|
|
@ -116,7 +116,7 @@ static int trellis_get_coeff_context(const int *scan,
|
|||
int pad, int l) {
|
||||
int bak = token_cache[scan[idx]], pt;
|
||||
token_cache[scan[idx]] = vp9_pt_energy_class[token];
|
||||
pt = vp9_get_coef_context(scan, nb, pad, token_cache, idx + 1, l);
|
||||
pt = get_coef_context(scan, nb, pad, token_cache, idx + 1, l);
|
||||
token_cache[scan[idx]] = bak;
|
||||
return pt;
|
||||
}
|
||||
|
|
|
@ -402,7 +402,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
|
|||
|
||||
v = qcoeff_ptr[rc];
|
||||
t = vp9_dct_value_tokens_ptr[v].token;
|
||||
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
|
||||
pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
|
||||
cost += token_costs[!prev_t][band][pt][t] + vp9_dct_value_cost_ptr[v];
|
||||
token_cache[rc] = vp9_pt_energy_class[t];
|
||||
prev_t = t;
|
||||
|
@ -410,7 +410,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
|
|||
|
||||
// eob token
|
||||
if (c < seg_eob) {
|
||||
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
|
||||
pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
|
||||
cost += token_costs[0][get_coef_band(band_translate, c)][pt]
|
||||
[DCT_EOB_TOKEN];
|
||||
}
|
||||
|
|
|
@ -193,7 +193,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE_TYPE bsize,
|
|||
int v = 0;
|
||||
rc = scan[c];
|
||||
if (c)
|
||||
pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
|
||||
pt = get_coef_context(scan, nb, pad, token_cache, c, default_eob);
|
||||
if (c < eob) {
|
||||
v = qcoeff_ptr[rc];
|
||||
assert(-DCT_MAX_VALUE <= v && v < DCT_MAX_VALUE);
|
||||
|
|
Загрузка…
Ссылка в новой задаче