pvq: Remove support for disabling nodesync.

This was broken by commit 1238137c3c.

When calling the generic coder with a max, it truncates the CDF, but
then tries to encode/decode with the Q15 entropy coder functions
with built-in adaptation. That causes assertion failures, because the
total probability isn't 32768.

We could fix it by re-scaling the CDF, and then doing adaptation
assuming there was no max, but that requires several special-case
code paths.

Instead, since non-robust streams were the only thing that still
required calling the generic coder with a max, and since the gain
from them is very small (and they require doing more DSP just to be
able to parse the stream), we simply remove the option and force
the use of robust streams all the time.

With robust streams enabled, encoder output should not change, and
all streams should remain decodable without decoder changes.

Thanks to Nathan Egge for reporting the problem.

Change-Id: I6c81481abb796688bf703d68f164d208e6a69f20
This commit is contained in:
Timothy B. Terriberry 2017-04-07 15:44:14 -07:00 коммит произвёл Tim Terriberry
Родитель 281def7349
Коммит 44bb6d06ca
17 изменённых файлов: 79 добавлений и 185 удалений

Просмотреть файл

@ -87,7 +87,6 @@ static INLINE int is_inter_mode(PREDICTION_MODE mode) {
#if CONFIG_PVQ
typedef struct PVQ_INFO {
int theta[PVQ_MAX_PARTITIONS];
int max_theta[PVQ_MAX_PARTITIONS];
int qg[PVQ_MAX_PARTITIONS];
int k[PVQ_MAX_PARTITIONS];
od_coeff y[OD_TXSIZE_MAX * OD_TXSIZE_MAX];

Просмотреть файл

@ -19,8 +19,8 @@
# define GENERIC_TABLES 12
#define generic_decode(r, model, max, ex_q16, integration, ACCT_STR_NAME) \
generic_decode_(r, model, max, ex_q16, integration ACCT_STR_ARG(ACCT_STR_NAME))
#define generic_decode(r, model, ex_q16, integration, ACCT_STR_NAME) \
generic_decode_(r, model, ex_q16, integration ACCT_STR_ARG(ACCT_STR_NAME))
#define aom_decode_cdf_adapt_q15(r, cdf, n, count, rate, ACCT_STR_NAME) \
aom_decode_cdf_adapt_q15_(r, cdf, n, count, rate ACCT_STR_ARG(ACCT_STR_NAME))
#define aom_decode_cdf_adapt(r, cdf, n, increment, ACCT_STR_NAME) \
@ -70,17 +70,16 @@ void aom_encode_cdf_adapt(aom_writer *w, int val, uint16_t *cdf, int n,
int aom_decode_cdf_adapt_(aom_reader *r, uint16_t *cdf, int n,
int increment ACCT_STR_PARAM);
void generic_encode(aom_writer *w, generic_encoder *model, int x, int max,
void generic_encode(aom_writer *w, generic_encoder *model, int x,
int *ex_q16, int integration);
double generic_encode_cost(generic_encoder *model, int x, int max,
int *ex_q16);
double generic_encode_cost(generic_encoder *model, int x, int *ex_q16);
double od_encode_cdf_cost(int val, uint16_t *cdf, int n);
int aom_decode_cdf_adapt_q15_(aom_reader *r, uint16_t *cdf, int n,
int *count, int rate ACCT_STR_PARAM);
int generic_decode_(aom_reader *r, generic_encoder *model, int max,
int generic_decode_(aom_reader *r, generic_encoder *model,
int *ex_q16, int integration ACCT_STR_PARAM);
int log_ex(int ex_q16);

Просмотреть файл

@ -84,8 +84,6 @@ extern "C" {
# define OD_LIMIT_BSIZE_MIN (OD_BLOCK_4X4)
# define OD_LIMIT_BSIZE_MAX (OD_BLOCK_32X32)
# define OD_ROBUST_STREAM (1)
typedef int od_coeff;
#define OD_DIVU_DMAX (1024)

Просмотреть файл

@ -792,16 +792,14 @@ od_val32 od_pvq_compute_theta(int t, int max_theta) {
*
* @param [in] qcg quantized companded gain value
* @param [in] itheta quantized PVQ error angle theta
* @param [in] theta PVQ error angle theta
* @param [in] noref indicates present or lack of reference
* (prediction)
* @param [in] n number of elements to be coded
* @param [in] beta activity masking beta param
* @param [in] nodesync do not use info that depends on the reference
* @return number of pulses to use for coding
*/
int od_pvq_compute_k(od_val32 qcg, int itheta, od_val32 theta, int noref, int n,
od_val16 beta, int nodesync) {
int od_pvq_compute_k(od_val32 qcg, int itheta, int noref, int n,
od_val16 beta) {
#if !defined(OD_FLOAT_PVQ)
/*Lookup table for sqrt(n+3/2) and sqrt(n+2/2) in Q10.
Real max values are 32792 and 32784, but clamped to stay within 16 bits.
@ -839,24 +837,17 @@ int od_pvq_compute_k(od_val32 qcg, int itheta, od_val32 theta, int noref, int n,
approximation for the fact that the coefficients aren't identically
distributed within a band so at low gain the number of dimensions that
are likely to have a pulse is less than n. */
if (nodesync) {
#if defined(OD_FLOAT_PVQ)
return OD_MAXI(1, (int)floor(.5 + (itheta - .2)*sqrt((n + 2)/2)));
return OD_MAXI(1, (int)floor(.5 + (itheta - .2)*sqrt((n + 2)/2)));
#else
od_val16 rt;
OD_ASSERT(OD_ILOG(n + 1) < 13);
rt = od_sqrt_table[0][OD_ILOG(n + 1)];
/*FIXME: get rid of 64-bit mul.*/
return OD_MAXI(1, OD_VSHR_ROUND(((OD_SHL(itheta, OD_ITHETA_SHIFT)
- OD_QCONST32(.2, OD_ITHETA_SHIFT)))*(int64_t)rt,
OD_SQRT_TBL_SHIFT + OD_ITHETA_SHIFT));
od_val16 rt;
OD_ASSERT(OD_ILOG(n + 1) < 13);
rt = od_sqrt_table[0][OD_ILOG(n + 1)];
/*FIXME: get rid of 64-bit mul.*/
return OD_MAXI(1, OD_VSHR_ROUND(((OD_SHL(itheta, OD_ITHETA_SHIFT)
- OD_QCONST32(.2, OD_ITHETA_SHIFT)))*(int64_t)rt,
OD_SQRT_TBL_SHIFT + OD_ITHETA_SHIFT));
#endif
}
else {
return OD_MAXI(1, (int)floor(.5 + (qcg*OD_CGAIN_SCALE_1*
od_pvq_sin(theta)*OD_TRIG_SCALE_1 - .2)*sqrt((n
+ 2)/2)/(beta*OD_BETA_SCALE_1)));
}
}
}

Просмотреть файл

@ -175,8 +175,7 @@ od_val32 od_pvq_compute_gain(const od_val16 *x, int n, int q0, od_val32 *g,
od_val16 beta, int bshift);
int od_pvq_compute_max_theta(od_val32 qcg, od_val16 beta);
od_val32 od_pvq_compute_theta(int t, int max_theta);
int od_pvq_compute_k(od_val32 qcg, int itheta, od_val32 theta, int noref,
int n, od_val16 beta, int nodesync);
int od_pvq_compute_k(od_val32 qcg, int itheta, int noref, int n, od_val16 beta);
int od_vector_is_null(const od_coeff *x, int len);
int od_qm_offset(int bs, int xydec);

Просмотреть файл

@ -388,14 +388,14 @@ static int av1_pvq_decode_helper(MACROBLOCKD *xd, tran_low_t *ref_coeff,
od_pvq_decode(dec, ref_int32, out_int32,
OD_MAXI(1, quant[1] << (OD_COEFF_SHIFT - 3) >> hbd_downshift),
pli, bs, OD_PVQ_BETA[use_activity_masking][pli][bs],
OD_ROBUST_STREAM, is_keyframe, &flags, ac_dc_coded,
dec->state.qm + off, dec->state.qm_inv + off);
is_keyframe, &flags, ac_dc_coded, dec->state.qm + off,
dec->state.qm_inv + off);
if (!has_dc_skip || out_int32[0]) {
out_int32[0] =
has_dc_skip + generic_decode(dec->r, &dec->state.adapt->model_dc[pli],
-1, &dec->state.adapt->ex_dc[pli][bs][0],
2, "dc:mag");
&dec->state.adapt->ex_dc[pli][bs][0], 2,
"dc:mag");
if (out_int32[0]) out_int32[0] *= aom_read_bit(dec->r, "dc:sign") ? -1 : 1;
}
out_int32[0] = out_int32[0] * pvq_dc_quant + ref_int32[0];

Просмотреть файл

@ -86,7 +86,7 @@ int aom_decode_cdf_adapt_(aom_reader *r, uint16_t *cdf, int n,
*
* @retval decoded variable x
*/
int generic_decode_(aom_reader *r, generic_encoder *model, int max,
int generic_decode_(aom_reader *r, generic_encoder *model,
int *ex_q16, int integration ACCT_STR_PARAM) {
int lg_q1;
int shift;
@ -95,9 +95,7 @@ int generic_decode_(aom_reader *r, generic_encoder *model, int max,
int xs;
int lsb;
int x;
int ms;
lsb = 0;
if (max == 0) return 0;
lg_q1 = log_ex(*ex_q16);
/* If expectation is too large, shift x to ensure that
all we have past xs=15 is the exponentially decaying tail
@ -106,9 +104,7 @@ int generic_decode_(aom_reader *r, generic_encoder *model, int max,
/* Choose the cdf to use: we have two per "octave" of ExQ16. */
id = OD_MINI(GENERIC_TABLES - 1, lg_q1);
cdf = model->cdf[id];
ms = (max + (1 << shift >> 1)) >> shift;
if (max == -1) xs = aom_read_symbol_pvq(r, cdf, 16, ACCT_STR_NAME);
else xs = aom_read_symbol_pvq(r, cdf, OD_MINI(ms + 1, 16), ACCT_STR_NAME);
xs = aom_read_symbol_pvq(r, cdf, 16, ACCT_STR_NAME);
if (xs == 15) {
int e;
unsigned decay;
@ -119,7 +115,7 @@ int generic_decode_(aom_reader *r, generic_encoder *model, int max,
OD_ASSERT(*ex_q16 < INT_MAX >> 1);
e = ((2**ex_q16 >> 8) + (1 << shift >> 1)) >> shift;
decay = OD_MAXI(2, OD_MINI(254, 256*e/(e + 256)));
xs += aom_laplace_decode_special(r, decay, (max == -1) ? -1 : ms - 15, ACCT_STR_NAME);
xs += aom_laplace_decode_special(r, decay, ACCT_STR_NAME);
}
if (shift != 0) {
int special;

Просмотреть файл

@ -81,29 +81,24 @@ void aom_decode_band_pvq_splits(aom_reader *r, od_pvq_codeword_ctx *adapt,
*
* @param [dec] range decoder
* @param [decay] decay factor of the distribution, i.e. pdf ~= decay^x
* @param [max] maximum possible value of x (used to truncate the pdf)
*
* @retval decoded variable x
*/
int aom_laplace_decode_special_(aom_reader *r, unsigned decay,
int max ACCT_STR_PARAM) {
int aom_laplace_decode_special_(aom_reader *r, unsigned decay ACCT_STR_PARAM) {
int pos;
int shift;
int xs;
int ms;
int sym;
const uint16_t *cdf;
shift = 0;
if (max == 0) return 0;
/* We don't want a large decay value because that would require too many
symbols. However, it's OK if the max is below 15. */
while (((max >> shift) >= 15 || max == -1) && decay > 235) {
symbols. */
while (decay > 235) {
decay = (decay*decay + 128) >> 8;
shift++;
}
decay = OD_MINI(decay, 254);
decay = OD_MAXI(decay, 2);
ms = max >> shift;
cdf = EXP_CDF_TABLE[(decay + 1) >> 1];
OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "decay = %d\n", decay));
xs = 0;
@ -111,32 +106,16 @@ int aom_laplace_decode_special_(aom_reader *r, unsigned decay,
sym = OD_MINI(xs, 15);
{
int i;
OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "%d %d %d %d", xs, shift, sym, max));
OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "%d %d %d", xs, shift, sym));
for (i = 0; i < 16; i++) {
OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "%d ", cdf[i]));
}
OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "\n"));
}
if (ms > 0 && ms < 15) {
/* Simple way of truncating the pdf when we have a bound. */
sym = aom_read_cdf_unscaled(r, cdf, ms + 1, ACCT_STR_NAME);
}
else sym = aom_read_cdf(r, cdf, 16, ACCT_STR_NAME);
sym = aom_read_cdf(r, cdf, 16, ACCT_STR_NAME);
xs += sym;
ms -= 15;
}
while (sym >= 15 && ms != 0);
} while (sym >= 15);
if (shift) pos = (xs << shift) + aom_read_literal(r, shift, ACCT_STR_NAME);
else pos = xs;
OD_ASSERT(pos >> shift <= max >> shift || max == -1);
if (max != -1 && pos > max) {
pos = max;
#if CONFIG_DAALA_EC
r->ec.error = 1;
#else
# error "CONFIG_PVQ currently requires CONFIG_DAALA_EC."
#endif
}
OD_ASSERT(pos <= max || max == -1);
return pos;
}

Просмотреть файл

@ -108,7 +108,6 @@ typedef struct {
* @param [out] out decoded partition
* @param [out] noref boolean indicating absence of reference
* @param [in] beta per-band activity masking beta param
* @param [in] nodesync stream is robust to error in the reference
* @param [in] is_keyframe whether we're encoding a keyframe
* @param [in] pli plane index
* @param [in] cdf_ctx selects which cdf context to use
@ -130,7 +129,6 @@ static void pvq_decode_partition(aom_reader *r,
od_coeff *out,
int *noref,
od_val16 beta,
int nodesync,
int is_keyframe,
int pli,
int cdf_ctx,
@ -143,7 +141,6 @@ static void pvq_decode_partition(aom_reader *r,
const int16_t *qm_inv) {
int k;
od_val32 qcg;
int max_theta;
int itheta;
od_val32 theta;
od_val32 gr;
@ -171,8 +168,7 @@ static void pvq_decode_partition(aom_reader *r,
}
else {
/* Jointly decode gain, itheta and noref for small values. Then we handle
larger gain. We need to wait for itheta because in the !nodesync case
it depends on max_theta, which depends on the gain. */
larger gain. */
id = aom_read_symbol_pvq(r, &adapt->pvq.pvq_gaintheta_cdf[cdf_ctx][0],
8 + 7*has_skip, "pvq:gaintheta");
if (!is_keyframe && id >= 10) id++;
@ -197,7 +193,7 @@ static void pvq_decode_partition(aom_reader *r,
if (qg > 0) {
int tmp;
tmp = *exg;
qg = 1 + generic_decode(r, &model[!*noref], -1, &tmp, 2, "pvq:gain");
qg = 1 + generic_decode(r, &model[!*noref], &tmp, 2, "pvq:gain");
OD_IIR_DIADIC(*exg, qg << 16, 2);
}
*skip = 0;
@ -239,15 +235,13 @@ static void pvq_decode_partition(aom_reader *r,
gain_offset = cgr - OD_SHL(icgr, OD_CGAIN_SHIFT);
qcg = OD_SHL(qg, OD_CGAIN_SHIFT) + gain_offset;
/* read and decode first-stage PVQ error theta */
max_theta = od_pvq_compute_max_theta(qcg, beta);
if (itheta > 1 && (nodesync || max_theta > 3)) {
if (itheta > 1) {
int tmp;
tmp = *ext;
itheta = 2 + generic_decode(r, &model[2],
nodesync ? -1 : max_theta - 3, &tmp, 2, "pvq:theta");
itheta = 2 + generic_decode(r, &model[2], &tmp, 2, "pvq:theta");
OD_IIR_DIADIC(*ext, itheta << 16, 2);
}
theta = od_pvq_compute_theta(itheta, max_theta);
theta = od_pvq_compute_theta(itheta, od_pvq_compute_max_theta(qcg, beta));
}
else{
itheta = 0;
@ -256,7 +250,7 @@ static void pvq_decode_partition(aom_reader *r,
if (qg == 0) *skip = OD_PVQ_SKIP_ZERO;
}
k = od_pvq_compute_k(qcg, itheta, theta, *noref, n, beta, nodesync);
k = od_pvq_compute_k(qcg, itheta, *noref, n, beta);
if (k != 0) {
/* when noref==0, y is actually size n-1 */
aom_decode_pvq_codeword(r, &adapt->pvq.pvq_codeword_ctx, y,
@ -287,7 +281,6 @@ static void pvq_decode_partition(aom_reader *r,
* @param [in] pli plane index
* @param [in] bs log of the block size minus two
* @param [in] beta per-band activity masking beta param
* @param [in] nodesync stream is robust to error in the reference
* @param [in] is_keyframe whether we're encoding a keyframe
* @param [out] flags bitmask of the per band skip and noref flags
* @param [in] ac_dc_coded skip flag for the block (range 0-3)
@ -301,7 +294,6 @@ void od_pvq_decode(daala_dec_ctx *dec,
int pli,
int bs,
const od_val16 *beta,
int nodesync,
int is_keyframe,
unsigned int *flags,
PVQ_SKIP_TYPE ac_dc_coded,
@ -362,7 +354,7 @@ void od_pvq_decode(daala_dec_ctx *dec,
pvq_decode_partition(dec->r, q, size[i],
model, dec->state.adapt, exg + i, ext + i, ref + off[i], out + off[i],
&noref[i], beta[i], nodesync, is_keyframe, pli,
&noref[i], beta[i], is_keyframe, pli,
(pli != 0)*OD_TXSIZES*PVQ_MAX_PARTITIONS + bs*PVQ_MAX_PARTITIONS + i,
&cfl, i == 0 && (i < nb_bands - 1), skip_rest, i, &skip[i],
qm + off[i], qm_inv + off[i]);

Просмотреть файл

@ -27,14 +27,13 @@ int aom_read_symbol_pvq_(aom_reader *r, aom_cdf_prob *cdf, int nsymbs
void aom_decode_band_pvq_splits(aom_reader *r, od_pvq_codeword_ctx *adapt,
od_coeff *y, int n, int k, int level);
#define aom_laplace_decode_special(r, decay, max, ACCT_STR_NAME) \
aom_laplace_decode_special_(r, decay, max ACCT_STR_ARG(ACCT_STR_NAME))
#define aom_laplace_decode_special(r, decay, ACCT_STR_NAME) \
aom_laplace_decode_special_(r, decay ACCT_STR_ARG(ACCT_STR_NAME))
int aom_laplace_decode_special_(aom_reader *r, unsigned decay,
int max ACCT_STR_PARAM);
int aom_laplace_decode_special_(aom_reader *r, unsigned decay ACCT_STR_PARAM);
void od_pvq_decode(daala_dec_ctx *dec, od_coeff *ref, od_coeff *out, int q0,
int pli, int bs, const od_val16 *beta, int nodesync, int is_keyframe,
int pli, int bs, const od_val16 *beta, int is_keyframe,
unsigned int *flags, PVQ_SKIP_TYPE ac_dc_coded, const int16_t *qm,
const int16_t *qm_inv);

Просмотреть файл

@ -1035,7 +1035,6 @@ static void pack_pvq_tokens(aom_writer *w, MACROBLOCK *const x,
const int is_keyframe = 0;
const int encode_flip = 0;
const int flip = 0;
const int nodesync = 1;
int i;
const int has_dc_skip = 1;
int *exg = &adapt->pvq.pvq_exg[plane][tx_size][0];
@ -1055,9 +1054,8 @@ static void pack_pvq_tokens(aom_writer *w, MACROBLOCK *const x,
if (i == 0 ||
(!pvq->skip_rest && !(pvq->skip_dir & (1 << ((i - 1) % 3))))) {
pvq_encode_partition(
w, pvq->qg[i], pvq->theta[i], pvq->max_theta[i],
pvq->y + pvq->off[i], pvq->size[i], pvq->k[i], model, adapt,
exg + i, ext + i, nodesync || is_keyframe,
w, pvq->qg[i], pvq->theta[i], pvq->y + pvq->off[i],
pvq->size[i], pvq->k[i], model, adapt, exg + i, ext + i,
(plane != 0) * OD_TXSIZES * PVQ_MAX_PARTITIONS +
pvq->bs * PVQ_MAX_PARTITIONS + i,
is_keyframe, i == 0 && (i < pvq->nb_bands - 1), pvq->skip_rest,
@ -1075,7 +1073,7 @@ static void pack_pvq_tokens(aom_writer *w, MACROBLOCK *const x,
// Encode residue of DC coeff, if exist.
if (!has_dc_skip || (pvq->ac_dc_coded & DC_CODED)) {
generic_encode(w, &adapt->model_dc[plane],
abs(pvq->dq_dc_residue) - has_dc_skip, -1,
abs(pvq->dq_dc_residue) - has_dc_skip,
&adapt->ex_dc[plane][pvq->bs][0], 2);
}
if ((pvq->ac_dc_coded & DC_CODED)) {

Просмотреть файл

@ -1218,7 +1218,6 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff,
hbd_downshift), // scale/quantizer
plane,
tx_size, OD_PVQ_BETA[use_activity_masking][plane][tx_size],
OD_ROBUST_STREAM,
0, // is_keyframe,
daala_enc->state.qm + off, daala_enc->state.qm_inv + off,
speed, // speed
@ -1227,7 +1226,7 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff,
// Encode residue of DC coeff, if required.
if (!has_dc_skip || out_int32[0]) {
generic_encode(&daala_enc->w, &daala_enc->state.adapt->model_dc[plane],
abs(out_int32[0]) - has_dc_skip, -1,
abs(out_int32[0]) - has_dc_skip,
&daala_enc->state.adapt->ex_dc[plane][tx_size][0], 2);
}
if (out_int32[0]) {
@ -1267,10 +1266,9 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff,
return ac_dc_coded;
}
void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta,
int *max_theta, int *k, od_coeff *y, int nb_bands,
const int *off, int *size, int skip_rest,
int skip_dir,
void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta, int *k,
od_coeff *y, int nb_bands, const int *off,
int *size, int skip_rest, int skip_dir,
int bs) { // block size in log_2 -2
int i;
const int tx_blk_size = tx_size_wide[bs];
@ -1278,7 +1276,6 @@ void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta,
for (i = 0; i < nb_bands; i++) {
pvq_info->qg[i] = qg[i];
pvq_info->theta[i] = theta[i];
pvq_info->max_theta[i] = max_theta[i];
pvq_info->k[i] = k[i];
pvq_info->off[i] = off[i];
pvq_info->size[i] = size[i];

Просмотреть файл

@ -80,10 +80,9 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff,
int tx_size, TX_TYPE tx_type, int *rate,
int speed, PVQ_INFO *pvq_info);
void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta,
int *max_theta, int *k, od_coeff *y, int nb_bands,
const int *off, int *size, int skip_rest,
int skip_dir, int bs);
void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta, int *k,
od_coeff *y, int nb_bands, const int *off,
int *size, int skip_rest, int skip_dir, int bs);
#endif
#ifdef __cplusplus

Просмотреть файл

@ -78,20 +78,17 @@ void aom_encode_cdf_adapt(aom_writer *w, int val, uint16_t *cdf, int n,
* @param [in,out] w multi-symbol entropy encoder
* @param [in,out] model generic probability model
* @param [in] x variable being encoded
* @param [in] max largest value possible
* @param [in,out] ExQ16 expectation of x (adapted)
* @param [in] integration integration period of ExQ16 (leaky average over
* 1<<integration samples)
*/
void generic_encode(aom_writer *w, generic_encoder *model, int x, int max,
void generic_encode(aom_writer *w, generic_encoder *model, int x,
int *ex_q16, int integration) {
int lg_q1;
int shift;
int id;
uint16_t *cdf;
int xs;
int ms;
if (max == 0) return;
lg_q1 = log_ex(*ex_q16);
OD_LOG((OD_LOG_ENTROPY_CODER, OD_LOG_DEBUG,
"%d %d", *ex_q16, lg_q1));
@ -103,12 +100,7 @@ void generic_encode(aom_writer *w, generic_encoder *model, int x, int max,
id = OD_MINI(GENERIC_TABLES - 1, lg_q1);
cdf = model->cdf[id];
xs = (x + (1 << shift >> 1)) >> shift;
ms = (max + (1 << shift >> 1)) >> shift;
OD_ASSERT(max == -1 || xs <= ms);
if (max == -1) aom_write_symbol_pvq(w, OD_MINI(15, xs), cdf, 16);
else {
aom_write_symbol_pvq(w, OD_MINI(15, xs), cdf, OD_MINI(ms + 1, 16));
}
aom_write_symbol_pvq(w, OD_MINI(15, xs), cdf, 16);
if (xs >= 15) {
int e;
unsigned decay;
@ -120,7 +112,7 @@ void generic_encode(aom_writer *w, generic_encoder *model, int x, int max,
e = ((2**ex_q16 >> 8) + (1 << shift >> 1)) >> shift;
decay = OD_MAXI(2, OD_MINI(254, 256*e/(e + 256)));
/* Encode the tail of the distribution assuming exponential decay. */
aom_laplace_encode_special(w, xs - 15, decay, (max == -1) ? -1 : ms - 15);
aom_laplace_encode_special(w, xs - 15, decay);
}
if (shift != 0) {
int special;
@ -141,20 +133,16 @@ void generic_encode(aom_writer *w, generic_encoder *model, int x, int max,
*
* @param [in,out] model generic probability model
* @param [in] x variable being encoded
* @param [in] max largest value possible
* @param [in,out] ExQ16 expectation of x (adapted)
* @return number of bits (approximation)
*/
double generic_encode_cost(generic_encoder *model, int x, int max,
int *ex_q16) {
double generic_encode_cost(generic_encoder *model, int x, int *ex_q16) {
int lg_q1;
int shift;
int id;
uint16_t *cdf;
int xs;
int ms;
int extra;
if (max == 0) return 0;
lg_q1 = log_ex(*ex_q16);
/* If expectation is too large, shift x to ensure that
all we have past xs=15 is the exponentially decaying tail
@ -164,21 +152,13 @@ double generic_encode_cost(generic_encoder *model, int x, int max,
id = OD_MINI(GENERIC_TABLES - 1, lg_q1);
cdf = model->cdf[id];
xs = (x + (1 << shift >> 1)) >> shift;
ms = (max + (1 << shift >> 1)) >> shift;
OD_ASSERT(max == -1 || xs <= ms);
extra = 0;
if (shift) extra = shift - (xs == 0);
xs = OD_MINI(15, xs);
/* Shortcut: assume it's going to cost 2 bits for the Laplace coder. */
if (xs == 15) extra += 2;
if (max == -1) {
return extra - OD_LOG2((double)(cdf[xs] - (xs == 0 ? 0 : cdf[xs - 1]))/
cdf[15]);
}
else {
return extra - OD_LOG2((double)(cdf[xs] - (xs == 0 ? 0 : cdf[xs - 1]))/
cdf[OD_MINI(ms, 15)]);
}
return
extra - OD_LOG2((double)(cdf[xs] - (xs == 0 ? 0 : cdf[xs - 1]))/cdf[15]);
}
/*Estimates the cost of encoding a value with a given CDF.*/

Просмотреть файл

@ -71,28 +71,22 @@ void aom_encode_band_pvq_splits(aom_writer *w, od_pvq_codeword_ctx *adapt,
* @param [in] x variable to encode (has to be positive)
* @param [in] decay decay factor of the distribution in Q8 format,
* i.e. pdf ~= decay^x
* @param [in] max maximum possible value of x (used to truncate
* the pdf)
*/
void aom_laplace_encode_special(aom_writer *w, int x, unsigned decay, int max) {
void aom_laplace_encode_special(aom_writer *w, int x, unsigned decay) {
int shift;
int xs;
int ms;
int sym;
const uint16_t *cdf;
shift = 0;
if (max == 0) return;
/* We don't want a large decay value because that would require too many
symbols. However, it's OK if the max is below 15. */
while (((max >> shift) >= 15 || max == -1) && decay > 235) {
symbols. */
while (decay > 235) {
decay = (decay*decay + 128) >> 8;
shift++;
}
OD_ASSERT(x <= max || max == -1);
decay = OD_MINI(decay, 254);
decay = OD_MAXI(decay, 2);
xs = x >> shift;
ms = max >> shift;
cdf = EXP_CDF_TABLE[(decay + 1) >> 1];
OD_LOG((OD_LOG_PVQ, OD_LOG_DEBUG, "decay = %d", decay));
do {
@ -106,16 +100,8 @@ void aom_laplace_encode_special(aom_writer *w, int x, unsigned decay, int max) {
}
OD_LOG_PARTIAL((OD_LOG_PVQ, OD_LOG_DEBUG, "\n"));
}
if (ms > 0 && ms < 15) {
/* Simple way of truncating the pdf when we have a bound */
aom_write_cdf_unscaled(w, sym, cdf, ms + 1);
}
else {
aom_write_cdf(w, sym, cdf, 16);
}
aom_write_cdf(w, sym, cdf, 16);
xs -= 15;
ms -= 15;
}
while (sym >= 15 && ms != 0);
} while (sym >= 15);
if (shift) aom_write_literal(w, x & ((1 << shift) - 1), shift);
}

Просмотреть файл

@ -303,12 +303,10 @@ int items_compare(pvq_search_item *a, pvq_search_item *b) {
* @param [in] q0 quantization step size
* @param [out] y pulse vector (i.e. selected PVQ codevector)
* @param [out] itheta angle between input and reference (-1 if noref)
* @param [out] max_theta maximum value of itheta that could have been
* @param [out] vk total number of pulses
* @param [in] beta per-band activity masking beta param
* @param [out] skip_diff distortion cost of skipping this block
* (accumulated)
* @param [in] nodesync make stream robust to error in the reference
* @param [in] is_keyframe whether we're encoding a keyframe
* @param [in] pli plane index
* @param [in] adapt probability adaptation context
@ -319,8 +317,8 @@ int items_compare(pvq_search_item *a, pvq_search_item *b) {
* @return gain index of the quatized gain
*/
static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
int n, int q0, od_coeff *y, int *itheta, int *max_theta, int *vk,
od_val16 beta, double *skip_diff, int nodesync, int is_keyframe, int pli,
int n, int q0, od_coeff *y, int *itheta, int *vk,
od_val16 beta, double *skip_diff, int is_keyframe, int pli,
const od_adapt_ctx *adapt, const int16_t *qm, const int16_t *qm_inv,
double pvq_norm_lambda, int speed) {
od_val32 g;
@ -409,7 +407,6 @@ static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
noref = 1;
best_k = 0;
*itheta = -1;
*max_theta = 0;
OD_CLEAR(y, n);
best_qtheta = 0;
m = 0;
@ -435,7 +432,6 @@ static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
NULL, 0, n, speed);
best_qtheta = 0;
*itheta = 0;
*max_theta = 0;
noref = 0;
}
dist0 = best_dist;
@ -474,7 +470,7 @@ static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
for (j = theta_lower; j <= theta_upper; j++) {
od_val32 qtheta;
qtheta = od_pvq_compute_theta(j, ts);
k = od_pvq_compute_k(qcg, j, qtheta, 0, n, beta, nodesync);
k = od_pvq_compute_k(qcg, j, 0, n, beta);
items[idx].gain = i;
items[idx].theta = j;
items[idx].k = k;
@ -546,7 +542,6 @@ static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
best_k = k;
best_qtheta = qtheta;
*itheta = j;
*max_theta = ts;
noref = 0;
OD_COPY(y, y_tmp, n - 1);
}
@ -566,7 +561,7 @@ static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
double cost;
od_val32 qcg;
qcg = OD_SHL(i, OD_CGAIN_SHIFT);
k = od_pvq_compute_k(qcg, -1, -1, 1, n, beta, nodesync);
k = od_pvq_compute_k(qcg, -1, 1, n, beta);
/* Compute the minimal possible distortion by not taking the PVQ
cos_dist into account. */
dist = gain_weight*(qcg - cg)*(qcg - cg);
@ -589,7 +584,6 @@ static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
noref = 1;
best_k = k;
*itheta = -1;
*max_theta = 0;
OD_COPY(y, y_tmp, n);
}
}
@ -632,7 +626,6 @@ static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
* @param [in,out] w multi-symbol entropy encoder
* @param [in] qg quantized gain
* @param [in] theta quantized post-prediction theta
* @param [in] max_theta maximum possible quantized theta value
* @param [in] in coefficient vector to code
* @param [in] n number of coefficients in partition
* @param [in] k number of pulses in partition
@ -640,7 +633,6 @@ static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
* @param [in,out] adapt adaptation context
* @param [in,out] exg ExQ16 expectation of gain value
* @param [in,out] ext ExQ16 expectation of theta value
* @param [in] nodesync do not use info that depend on the reference
* @param [in] cdf_ctx selects which cdf context to use
* @param [in] is_keyframe whether we're encoding a keyframe
* @param [in] code_skip whether the "skip rest" flag is allowed
@ -651,7 +643,6 @@ static int pvq_theta(od_coeff *out, const od_coeff *x0, const od_coeff *r0,
void pvq_encode_partition(aom_writer *w,
int qg,
int theta,
int max_theta,
const od_coeff *in,
int n,
int k,
@ -659,7 +650,6 @@ void pvq_encode_partition(aom_writer *w,
od_adapt_ctx *adapt,
int *exg,
int *ext,
int nodesync,
int cdf_ctx,
int is_keyframe,
int code_skip,
@ -692,14 +682,13 @@ void pvq_encode_partition(aom_writer *w,
if (qg > 0) {
int tmp;
tmp = *exg;
generic_encode(w, &model[!noref], qg - 1, -1, &tmp, 2);
generic_encode(w, &model[!noref], qg - 1, &tmp, 2);
OD_IIR_DIADIC(*exg, qg << 16, 2);
}
if (theta > 1 && (nodesync || max_theta > 3)) {
if (theta > 1) {
int tmp;
tmp = *ext;
generic_encode(w, &model[2], theta - 2, nodesync ? -1 : max_theta - 3,
&tmp, 2);
generic_encode(w, &model[2], theta - 2, &tmp, 2);
OD_IIR_DIADIC(*ext, theta << 16, 2);
}
aom_encode_pvq_codeword(w, &adapt->pvq.pvq_codeword_ctx, in,
@ -736,7 +725,6 @@ int od_rdo_quant(od_coeff x, int q, double delta0, double pvq_norm_lambda) {
* @param [in] pli plane index
* @param [in] bs log of the block size minus two
* @param [in] beta per-band activity masking beta param
* @param [in] nodesync make stream robust to error in the reference
* @param [in] is_keyframe whether we're encoding a keyframe
* @param [in] qm QM with magnitude compensation
* @param [in] qm_inv Inverse of QM with magnitude compensation
@ -755,14 +743,12 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc,
int pli,
int bs,
const od_val16 *beta,
int nodesync,
int is_keyframe,
const int16_t *qm,
const int16_t *qm_inv,
int speed,
PVQ_INFO *pvq_info){
int theta[PVQ_MAX_PARTITIONS];
int max_theta[PVQ_MAX_PARTITIONS];
int qg[PVQ_MAX_PARTITIONS];
int k[PVQ_MAX_PARTITIONS];
od_coeff y[OD_TXSIZE_MAX*OD_TXSIZE_MAX];
@ -846,9 +832,9 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc,
q = OD_MAXI(1, q_ac);
qg[i] = pvq_theta(out + off[i], in + off[i], ref + off[i], size[i],
q, y + off[i], &theta[i], &max_theta[i],
&k[i], beta[i], &skip_diff, nodesync, is_keyframe, pli, enc->state.adapt,
qm + off[i], qm_inv + off[i], enc->pvq_norm_lambda, speed);
q, y + off[i], &theta[i], &k[i], beta[i], &skip_diff, is_keyframe,
pli, enc->state.adapt, qm + off[i], qm_inv + off[i],
enc->pvq_norm_lambda, speed);
}
od_encode_checkpoint(enc, &buf);
if (is_keyframe) out[0] = 0;
@ -872,7 +858,7 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc,
#endif
od_encode_checkpoint(enc, &dc_buf);
generic_encode(&enc->w, &enc->state.adapt->model_dc[pli],
n - 1, -1, &enc->state.adapt->ex_dc[pli][bs][0], 2);
n - 1, &enc->state.adapt->ex_dc[pli][bs][0], 2);
#if CONFIG_DAALA_EC
tell2 = od_ec_enc_tell_frac(&enc->w.ec) - tell2;
#else
@ -916,8 +902,7 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc,
/* NOTE: There was no other better place to put this function. */
if (pvq_info)
av1_store_pvq_enc_info(pvq_info, qg, theta, max_theta, k,
y, nb_bands, off, size,
av1_store_pvq_enc_info(pvq_info, qg, theta, k, y, nb_bands, off, size,
skip_rest, skip_dir, bs);
for (i = 0; i < nb_bands; i++) {
@ -925,11 +910,10 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc,
/* Encode CFL flip bit just after the first time it's used. */
encode_flip = pli != 0 && is_keyframe && theta[i] != -1 && !cfl_encoded;
if (i == 0 || (!skip_rest && !(skip_dir & (1 << ((i - 1)%3))))) {
pvq_encode_partition(&enc->w, qg[i], theta[i], max_theta[i], y + off[i],
pvq_encode_partition(&enc->w, qg[i], theta[i], y + off[i],
size[i], k[i], model, enc->state.adapt, exg + i, ext + i,
nodesync, (pli != 0)*OD_TXSIZES*PVQ_MAX_PARTITIONS
+ bs*PVQ_MAX_PARTITIONS + i, is_keyframe, i == 0 && (i < nb_bands - 1),
skip_rest, encode_flip, flip);
(pli != 0)*OD_TXSIZES*PVQ_MAX_PARTITIONS + bs*PVQ_MAX_PARTITIONS + i,
is_keyframe, i == 0 && (i < nb_bands - 1), skip_rest, encode_flip, flip);
}
if (i == 0 && !skip_rest && bs > 0) {
aom_write_symbol(&enc->w, skip_dir,
@ -978,7 +962,7 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc,
#endif
od_encode_checkpoint(enc, &dc_buf);
generic_encode(&enc->w, &enc->state.adapt->model_dc[pli],
n - 1, -1, &enc->state.adapt->ex_dc[pli][bs][0], 2);
n - 1, &enc->state.adapt->ex_dc[pli][bs][0], 2);
#if CONFIG_DAALA_EC
tell2 = od_ec_enc_tell_frac(&enc->w.ec) - tell2;
#else

Просмотреть файл

@ -25,12 +25,11 @@ void aom_write_symbol_pvq(aom_writer *w, int symb, aom_cdf_prob *cdf,
void aom_encode_band_pvq_splits(aom_writer *w, od_pvq_codeword_ctx *adapt,
const int *y, int n, int k, int level);
void aom_laplace_encode_special(aom_writer *w, int x, unsigned decay, int max);
void aom_laplace_encode_special(aom_writer *w, int x, unsigned decay);
void pvq_encode_partition(aom_writer *w,
int qg,
int theta,
int max_theta,
const od_coeff *in,
int n,
int k,
@ -38,7 +37,6 @@ void pvq_encode_partition(aom_writer *w,
od_adapt_ctx *adapt,
int *exg,
int *ext,
int nodesync,
int cdf_ctx,
int is_keyframe,
int code_skip,
@ -48,7 +46,7 @@ void pvq_encode_partition(aom_writer *w,
PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc, od_coeff *ref,
const od_coeff *in, od_coeff *out, int q_dc, int q_ac, int pli, int bs,
const od_val16 *beta, int nodesync, int is_keyframe,
const od_val16 *beta, int is_keyframe,
const int16_t *qm, const int16_t *qm_inv, int speed,
PVQ_INFO *pvq_info);