Add ec_smallmul experiment.
This reduces the multiplier width of daala_ec from 16x15->31 to 8x15->23, which reduces hardware latency by an estimated 20% (and area for this module by an estimated 40%). These are the smallest logical changes required to achieve this, but the approach will be optimized significantly in subsequent commits. When enabled: ec_smallmul1c_base@2017-03-08T00:49:01.830Z -> ec_smallmul1c@2017-03-08T00:49:45.091Z PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000 0.0203 | 0.0203 | 0.0204 | 0.0203 | 0.0203 | 0.0203 | 0.0202 Change-Id: Idbbd3743e9189146cb519d5b984bdabd69e3f4c0
This commit is contained in:
Родитель
7e32a4cc34
Коммит
b1c5760ed8
|
@ -202,7 +202,11 @@ int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned fz) {
|
|||
r = dec->rng;
|
||||
OD_ASSERT(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
|
||||
OD_ASSERT(32768U <= r);
|
||||
#if CONFIG_EC_SMALLMUL
|
||||
v = r - ((r >> 8) * (uint32_t)(32768U - fz) >> 7);
|
||||
#else
|
||||
v = fz * (uint32_t)r >> 15;
|
||||
#endif
|
||||
vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
|
||||
ret = 0;
|
||||
r_new = v;
|
||||
|
@ -381,7 +385,11 @@ int od_ec_decode_cdf_q15(od_ec_dec *dec, const uint16_t *cdf, int nsyms) {
|
|||
ret = -1;
|
||||
do {
|
||||
u = v;
|
||||
#if CONFIG_EC_SMALLMUL
|
||||
v = r - ((r >> 8) * (uint32_t)(32768U - cdf[++ret]) >> 7);
|
||||
#else
|
||||
v = cdf[++ret] * (uint32_t)r >> 15;
|
||||
#endif
|
||||
} while (v <= c);
|
||||
OD_ASSERT(v <= r);
|
||||
r = v - u;
|
||||
|
|
|
@ -217,10 +217,21 @@ static void od_ec_encode_q15(od_ec_enc *enc, unsigned fl, unsigned fh) {
|
|||
l = enc->low;
|
||||
r = enc->rng;
|
||||
OD_ASSERT(32768U <= r);
|
||||
#if CONFIG_EC_SMALLMUL
|
||||
if (fl > 0) {
|
||||
u = (r >> 8) * (uint32_t)(32768U - fl) >> 7;
|
||||
v = (r >> 8) * (uint32_t)(32768U - fh) >> 7;
|
||||
l += r - u;
|
||||
r = u - v;
|
||||
} else {
|
||||
r -= (r >> 8) * (uint32_t)(32768U - fh) >> 7;
|
||||
}
|
||||
#else
|
||||
u = fl * (uint32_t)r >> 15;
|
||||
v = fh * (uint32_t)r >> 15;
|
||||
r = v - u;
|
||||
l += u;
|
||||
#endif
|
||||
od_ec_enc_normalize(enc, l, r);
|
||||
#if OD_MEASURE_EC_OVERHEAD
|
||||
enc->entropy -= OD_LOG2((double)(fh - fl) / 32768.);
|
||||
|
@ -306,7 +317,11 @@ void od_ec_encode_bool_q15(od_ec_enc *enc, int val, unsigned fz) {
|
|||
l = enc->low;
|
||||
r = enc->rng;
|
||||
OD_ASSERT(32768U <= r);
|
||||
#if CONFIG_EC_SMALLMUL
|
||||
v = r - ((r >> 8) * (uint32_t)(32768U - fz) >> 7);
|
||||
#else
|
||||
v = fz * (uint32_t)r >> 15;
|
||||
#endif
|
||||
if (val) l += v;
|
||||
r = val ? r - v : v;
|
||||
od_ec_enc_normalize(enc, l, r);
|
||||
|
|
|
@ -286,6 +286,7 @@ EXPERIMENT_LIST="
|
|||
palette
|
||||
daala_ec
|
||||
rawbits
|
||||
ec_smallmul
|
||||
pvq
|
||||
xiphrc
|
||||
cb4x4
|
||||
|
@ -501,6 +502,10 @@ post_process_cmdline() {
|
|||
log_echo "rawbits requires daala_ec, so disabling rawbits"
|
||||
disable_feature rawbits
|
||||
fi
|
||||
if enabled ec_smallmul && ! enabled daala_ec; then
|
||||
log_echo "ec_smallmul requires daala_ec, so disabling ec_smallmul"
|
||||
disable_feature ec_smallmul
|
||||
fi
|
||||
if enabled ext_tile; then
|
||||
log_echo "ext_tile not compatible with reference_buffer, so"
|
||||
log_echo "disabling reference_buffer"
|
||||
|
|
Загрузка…
Ссылка в новой задаче