1245 строки
48 KiB
C
1245 строки
48 KiB
C
/*
|
|
* Copyright (c) 2001-2017, Alliance for Open Media. All rights reserved
|
|
*
|
|
* This source code is subject to the terms of the BSD 2 Clause License and
|
|
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
|
* was not distributed with this source code in the LICENSE file, you can
|
|
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
|
* Media Patent License 1.0 was not distributed with this source code in the
|
|
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <limits.h>
|
|
#include "av1/common/odintrin.h"
|
|
#include "av1/encoder/ratectrl_xiph.h"
|
|
|
|
#define OD_Q57(v) ((int64_t)((uint64_t)(v) << 57))
|
|
#define OD_F_Q45(v) ((int64_t)(((v) * ((int64_t)1 << 45))))
|
|
#define OD_F_Q12(v) ((int32_t)(((v) * ((int32_t)1 << 12))))
|
|
|
|
/*A rough lookup table for tan(x), 0 <= x < pi/2.
|
|
The values are Q12 fixed-point and spaced at 5 degree intervals.
|
|
These decisions are somewhat arbitrary, but sufficient for the 2nd order
|
|
Bessel follower below.
|
|
Values of x larger than 85 degrees are extrapolated from the last interval,
|
|
which is way off, but "good enough".*/
|
|
static uint16_t OD_ROUGH_TAN_LOOKUP[18] = { 0, 358, 722, 1098, 1491,
|
|
1910, 2365, 2868, 3437, 4096,
|
|
4881, 5850, 7094, 8784, 11254,
|
|
15286, 23230, 46817 };
|
|
|
|
/*alpha is Q24 in the range [0,0.5).
|
|
The return values is 5.12.*/
|
|
static int od_warp_alpha(int alpha) {
|
|
int i;
|
|
int d;
|
|
int t0;
|
|
int t1;
|
|
i = alpha * 36 >> 24;
|
|
if (i >= 17) i = 16;
|
|
t0 = OD_ROUGH_TAN_LOOKUP[i];
|
|
t1 = OD_ROUGH_TAN_LOOKUP[i + 1];
|
|
d = alpha * 36 - (i << 24);
|
|
return (int)((((int64_t)t0 << 32) + ((t1 - t0) << 8) * (int64_t)d) >> 32);
|
|
}
|
|
|
|
static const int64_t OD_ATANH_LOG2[32] = {
|
|
0x32B803473F7AD0F4LL, 0x2F2A71BD4E25E916LL, 0x2E68B244BB93BA06LL,
|
|
0x2E39FB9198CE62E4LL, 0x2E2E683F68565C8FLL, 0x2E2B850BE2077FC1LL,
|
|
0x2E2ACC58FE7B78DBLL, 0x2E2A9E2DE52FD5F2LL, 0x2E2A92A338D53EECLL,
|
|
0x2E2A8FC08F5E19B6LL, 0x2E2A8F07E51A485ELL, 0x2E2A8ED9BA8AF388LL,
|
|
0x2E2A8ECE2FE7384ALL, 0x2E2A8ECB4D3E4B1ALL, 0x2E2A8ECA94940FE8LL,
|
|
0x2E2A8ECA6669811DLL, 0x2E2A8ECA5ADEDD6ALL, 0x2E2A8ECA57FC347ELL,
|
|
0x2E2A8ECA57438A43LL, 0x2E2A8ECA57155FB4LL, 0x2E2A8ECA5709D510LL,
|
|
0x2E2A8ECA5706F267LL, 0x2E2A8ECA570639BDLL, 0x2E2A8ECA57060B92LL,
|
|
0x2E2A8ECA57060008LL, 0x2E2A8ECA5705FD25LL, 0x2E2A8ECA5705FC6CLL,
|
|
0x2E2A8ECA5705FC3ELL, 0x2E2A8ECA5705FC33LL, 0x2E2A8ECA5705FC30LL,
|
|
0x2E2A8ECA5705FC2FLL, 0x2E2A8ECA5705FC2FLL
|
|
};
|
|
|
|
static int od_ilog64(int64_t v) {
|
|
static const unsigned char OD_DEBRUIJN_IDX64[64] = {
|
|
0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9, 34, 20, 40,
|
|
5, 17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57,
|
|
63, 6, 12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56,
|
|
62, 11, 23, 32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58
|
|
};
|
|
int ret;
|
|
v |= v >> 1;
|
|
v |= v >> 2;
|
|
v |= v >> 4;
|
|
v |= v >> 8;
|
|
v |= v >> 16;
|
|
v |= v >> 32;
|
|
ret = (int)v & 1;
|
|
v = (v >> 1) + 1;
|
|
ret += OD_DEBRUIJN_IDX64[v * UINT64_C(0x218A392CD3D5DBF) >> 58 & 0x3F];
|
|
return ret;
|
|
}
|
|
|
|
/*Computes the binary exponential of logq57.
|
|
input: a log base 2 in Q57 format
|
|
output: a 64 bit integer in Q0 (no fraction) */
|
|
static int64_t od_bexp64(int64_t logq57) {
|
|
int64_t w;
|
|
int64_t z;
|
|
int ipart;
|
|
ipart = (int)(logq57 >> 57);
|
|
if (ipart < 0) return 0;
|
|
if (ipart >= 63) return 0x7FFFFFFFFFFFFFFFLL;
|
|
z = logq57 - OD_Q57(ipart);
|
|
if (z) {
|
|
int64_t mask;
|
|
int64_t wlo;
|
|
int i;
|
|
/*C doesn't give us 64x64->128 muls, so we use CORDIC.
|
|
This is not particularly fast, but it's not being used in time-critical
|
|
code; it is very accurate.*/
|
|
/*z is the fractional part of the log in Q62 format.
|
|
We need 1 bit of headroom since the magnitude can get larger than 1
|
|
during the iteration, and a sign bit.*/
|
|
z <<= 5;
|
|
/*w is the exponential in Q61 format (since it also needs headroom and can
|
|
get as large as 2.0); we could get another bit if we dropped the sign,
|
|
but we'll recover that bit later anyway.
|
|
Ideally this should start out as
|
|
\lim_{n->\infty} 2^{61}/\product_{i=1}^n \sqrt{1-2^{-2i}}
|
|
but in order to guarantee convergence we have to repeat iterations 4,
|
|
13 (=3*4+1), and 40 (=3*13+1, etc.), so it winds up somewhat larger.*/
|
|
w = 0x26A3D0E401DD846DLL;
|
|
for (i = 0;; i++) {
|
|
mask = -(z < 0);
|
|
w += ((w >> (i + 1)) + mask) ^ mask;
|
|
z -= (OD_ATANH_LOG2[i] + mask) ^ mask;
|
|
/*Repeat iteration 4.*/
|
|
if (i >= 3) break;
|
|
z *= 2;
|
|
}
|
|
for (;; i++) {
|
|
mask = -(z < 0);
|
|
w += ((w >> (i + 1)) + mask) ^ mask;
|
|
z -= (OD_ATANH_LOG2[i] + mask) ^ mask;
|
|
/*Repeat iteration 13.*/
|
|
if (i >= 12) break;
|
|
z *= 2;
|
|
}
|
|
for (; i < 32; i++) {
|
|
mask = -(z < 0);
|
|
w += ((w >> (i + 1)) + mask) ^ mask;
|
|
z = (z - ((OD_ATANH_LOG2[i] + mask) ^ mask)) * 2;
|
|
}
|
|
wlo = 0;
|
|
/*Skip the remaining iterations unless we really require that much
|
|
precision.
|
|
We could have bailed out earlier for smaller iparts, but that would
|
|
require initializing w from a table, as the limit doesn't converge to
|
|
61-bit precision until n=30.*/
|
|
if (ipart > 30) {
|
|
/*For these iterations, we just update the low bits, as the high bits
|
|
can't possibly be affected.
|
|
OD_ATANH_LOG2 has also converged (it actually did so one iteration
|
|
earlier, but that's no reason for an extra special case).*/
|
|
for (;; i++) {
|
|
mask = -(z < 0);
|
|
wlo += ((w >> i) + mask) ^ mask;
|
|
z -= (OD_ATANH_LOG2[31] + mask) ^ mask;
|
|
/*Repeat iteration 40.*/
|
|
if (i >= 39) break;
|
|
z <<= 1;
|
|
}
|
|
for (; i < 61; i++) {
|
|
mask = -(z < 0);
|
|
wlo += ((w >> i) + mask) ^ mask;
|
|
z = (z - ((OD_ATANH_LOG2[31] + mask) ^ mask)) << 1;
|
|
}
|
|
}
|
|
w = (w << 1) + wlo;
|
|
} else {
|
|
w = (int64_t)1 << 62;
|
|
}
|
|
if (ipart < 62) {
|
|
w = ((w >> (61 - ipart)) + 1) >> 1;
|
|
}
|
|
return w;
|
|
}
|
|
|
|
/*Computes the binary log of w
|
|
input: a 64-bit integer in Q0 (no fraction)
|
|
output: a 64-bit log in Q57 */
|
|
static int64_t od_blog64(int64_t w) {
|
|
int64_t z;
|
|
int ipart;
|
|
if (w <= 0) return -1;
|
|
ipart = od_ilog64(w) - 1;
|
|
if (ipart > 61) {
|
|
w >>= ipart - 61;
|
|
} else {
|
|
w <<= 61 - ipart;
|
|
}
|
|
z = 0;
|
|
if (w & (w - 1)) {
|
|
int64_t x;
|
|
int64_t y;
|
|
int64_t u;
|
|
int64_t mask;
|
|
int i;
|
|
/*C doesn't give us 64x64->128 muls, so we use CORDIC.
|
|
This is not particularly fast, but it's not being used in time-critical
|
|
code; it is very accurate.*/
|
|
/*z is the fractional part of the log in Q61 format.*/
|
|
/*x and y are the cosh() and sinh(), respectively, in Q61 format.
|
|
We are computing z = 2*atanh(y/x) = 2*atanh((w - 1)/(w + 1)).*/
|
|
x = w + ((int64_t)1 << 61);
|
|
y = w - ((int64_t)1 << 61);
|
|
for (i = 0; i < 4; i++) {
|
|
mask = -(y < 0);
|
|
z += ((OD_ATANH_LOG2[i] >> i) + mask) ^ mask;
|
|
u = x >> (i + 1);
|
|
x -= ((y >> (i + 1)) + mask) ^ mask;
|
|
y -= (u + mask) ^ mask;
|
|
}
|
|
/*Repeat iteration 4.*/
|
|
for (i--; i < 13; i++) {
|
|
mask = -(y < 0);
|
|
z += ((OD_ATANH_LOG2[i] >> i) + mask) ^ mask;
|
|
u = x >> (i + 1);
|
|
x -= ((y >> (i + 1)) + mask) ^ mask;
|
|
y -= (u + mask) ^ mask;
|
|
}
|
|
/*Repeat iteration 13.*/
|
|
for (i--; i < 32; i++) {
|
|
mask = -(y < 0);
|
|
z += ((OD_ATANH_LOG2[i] >> i) + mask) ^ mask;
|
|
u = x >> (i + 1);
|
|
x -= ((y >> (i + 1)) + mask) ^ mask;
|
|
y -= (u + mask) ^ mask;
|
|
}
|
|
/*OD_ATANH_LOG2 has converged.*/
|
|
for (; i < 40; i++) {
|
|
mask = -(y < 0);
|
|
z += ((OD_ATANH_LOG2[31] >> i) + mask) ^ mask;
|
|
u = x >> (i + 1);
|
|
x -= ((y >> (i + 1)) + mask) ^ mask;
|
|
y -= (u + mask) ^ mask;
|
|
}
|
|
/*Repeat iteration 40.*/
|
|
for (i--; i < 62; i++) {
|
|
mask = -(y < 0);
|
|
z += ((OD_ATANH_LOG2[31] >> i) + mask) ^ mask;
|
|
u = x >> (i + 1);
|
|
x -= ((y >> (i + 1)) + mask) ^ mask;
|
|
y -= (u + mask) ^ mask;
|
|
}
|
|
z = (z + 8) >> 4;
|
|
}
|
|
return OD_Q57(ipart) + z;
|
|
}
|
|
|
|
/*Convenience function converts Q57 value to a clamped 32-bit Q24 value
|
|
in: input in Q57 format.
|
|
Return: same number in Q24 */
|
|
static int32_t od_q57_to_q24(int64_t in) {
|
|
int64_t ret;
|
|
ret = (in + ((int64_t)1 << 32)) >> 33;
|
|
/*0x80000000 is automatically converted to unsigned on 32-bit systems.
|
|
-0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to
|
|
unsigned.*/
|
|
return (int32_t)OD_CLAMPI(-0x7FFFFFFF - 1, ret, 0x7FFFFFFF);
|
|
}
|
|
|
|
/*Binary exponential of log_scale with 24-bit fractional precision and
|
|
saturation.
|
|
log_scale: A binary logarithm in Q57 format.
|
|
Return: The binary exponential in Q24 format, saturated to 2**31-1 if
|
|
log_scale was too large.*/
|
|
static int32_t od_bexp64_q24(int64_t log_scale) {
|
|
if (log_scale < OD_Q57(8)) {
|
|
int64_t ret;
|
|
ret = od_bexp64(log_scale + OD_Q57(24));
|
|
return ret < 0x7FFFFFFF ? (int32_t)ret : 0x7FFFFFFF;
|
|
}
|
|
return 0x7FFFFFFF;
|
|
}
|
|
|
|
/*Re-initialize Bessel filter coefficients with the specified delay.
|
|
This does not alter the x/y state, but changes the reaction time of the
|
|
filter.
|
|
Altering the time constant of a reactive filter without alterning internal
|
|
state is something that has to be done carefuly, but our design operates at
|
|
high enough delays and with small enough time constant changes to make it
|
|
safe.*/
|
|
static void od_iir_bessel2_reinit(od_iir_bessel2 *f, int delay) {
|
|
int alpha;
|
|
int64_t one48;
|
|
int64_t warp;
|
|
int64_t k1;
|
|
int64_t k2;
|
|
int64_t d;
|
|
int64_t a;
|
|
int64_t ik2;
|
|
int64_t b1;
|
|
int64_t b2;
|
|
/*This borrows some code from an unreleased version of Postfish.
|
|
See the recipe at http://unicorn.us.com/alex/2polefilters.html for details
|
|
on deriving the filter coefficients.*/
|
|
/*alpha is Q24*/
|
|
alpha = (1 << 24) / delay;
|
|
one48 = (int64_t)1 << 48;
|
|
/*warp is 7.12*/
|
|
warp = OD_MAXI(od_warp_alpha(alpha), 1);
|
|
/*k1 is 9.12*/
|
|
k1 = 3 * warp;
|
|
/*k2 is 16.24.*/
|
|
k2 = k1 * warp;
|
|
/*d is 16.15.*/
|
|
d = ((((1 << 12) + k1) << 12) + k2 + 256) >> 9;
|
|
/*a is 0.32, since d is larger than both 1.0 and k2.*/
|
|
a = (k2 << 23) / d;
|
|
/*ik2 is 25.24.*/
|
|
ik2 = one48 / k2;
|
|
/*b1 is Q56; in practice, the integer ranges between -2 and 2.*/
|
|
b1 = 2 * a * (ik2 - (1 << 24));
|
|
/*b2 is Q56; in practice, the integer ranges between -2 and 2.*/
|
|
b2 = (one48 << 8) - ((4 * a) << 24) - b1;
|
|
/*All of the filter parameters are Q24.*/
|
|
f->c[0] = (int32_t)((b1 + ((int64_t)1 << 31)) >> 32);
|
|
f->c[1] = (int32_t)((b2 + ((int64_t)1 << 31)) >> 32);
|
|
f->g = (int32_t)((a + 128) >> 8);
|
|
}
|
|
|
|
/*Initialize a 2nd order low-pass Bessel filter with the corresponding delay
|
|
and initial value.
|
|
value is Q24.*/
|
|
static void od_iir_bessel2_init(od_iir_bessel2 *f, int delay, int32_t value) {
|
|
od_iir_bessel2_reinit(f, delay);
|
|
f->y[1] = f->y[0] = f->x[1] = f->x[0] = value;
|
|
}
|
|
|
|
static int64_t od_iir_bessel2_update(od_iir_bessel2 *f, int32_t x) {
|
|
int64_t c0;
|
|
int64_t c1;
|
|
int64_t g;
|
|
int64_t x0;
|
|
int64_t x1;
|
|
int64_t y0;
|
|
int64_t y1;
|
|
int64_t ya;
|
|
c0 = f->c[0];
|
|
c1 = f->c[1];
|
|
g = f->g;
|
|
x0 = f->x[0];
|
|
x1 = f->x[1];
|
|
y0 = f->y[0];
|
|
y1 = f->y[1];
|
|
ya = ((x + x0 * 2 + x1) * g + y0 * c0 + y1 * c1 + (1 << 23)) >> 24;
|
|
f->x[1] = (int32_t)x0;
|
|
f->x[0] = x;
|
|
f->y[1] = (int32_t)y0;
|
|
f->y[0] = (int32_t)ya;
|
|
return ya;
|
|
}
|
|
|
|
static void od_enc_rc_reset(od_rc_state *rc) {
|
|
int64_t npixels;
|
|
int64_t ibpp;
|
|
rc->bits_per_frame = (int64_t)(rc->target_bitrate / rc->framerate);
|
|
/*Insane framerates or frame sizes mean insane bitrates.
|
|
Let's not get carried away.*/
|
|
if (rc->bits_per_frame > 0x400000000000LL) {
|
|
rc->bits_per_frame = (int64_t)0x400000000000LL;
|
|
} else {
|
|
if (rc->bits_per_frame < 32) {
|
|
rc->bits_per_frame = 32;
|
|
}
|
|
}
|
|
rc->reservoir_frame_delay = OD_MAXI(rc->reservoir_frame_delay, 12);
|
|
rc->reservoir_max = rc->bits_per_frame * rc->reservoir_frame_delay;
|
|
/*Start with a buffer fullness and fullness target of 50% */
|
|
rc->reservoir_target = (rc->reservoir_max + 1) >> 1;
|
|
rc->reservoir_fullness = rc->reservoir_target;
|
|
/*Pick exponents and initial scales for quantizer selection.*/
|
|
npixels = rc->frame_width * (int64_t)rc->frame_height;
|
|
rc->log_npixels = od_blog64(npixels);
|
|
ibpp = npixels / rc->bits_per_frame;
|
|
/*All of these initial scale/exp values are from Theora, and have not yet
|
|
been adapted to Daala, so they're certainly wrong.
|
|
The B-frame values especially are simply copies of the P-frame values.*/
|
|
if (ibpp < 1) {
|
|
rc->exp[OD_I_FRAME] = 59;
|
|
rc->log_scale[OD_I_FRAME] = od_blog64(1997) - OD_Q57(OD_COEFF_SHIFT);
|
|
} else if (ibpp < 2) {
|
|
rc->exp[OD_I_FRAME] = 55;
|
|
rc->log_scale[OD_I_FRAME] = od_blog64(1604) - OD_Q57(OD_COEFF_SHIFT);
|
|
} else {
|
|
rc->exp[OD_I_FRAME] = 48;
|
|
rc->log_scale[OD_I_FRAME] = od_blog64(834) - OD_Q57(OD_COEFF_SHIFT);
|
|
}
|
|
if (ibpp < 4) {
|
|
rc->exp[OD_P_FRAME] = 100;
|
|
rc->log_scale[OD_P_FRAME] = od_blog64(2249) - OD_Q57(OD_COEFF_SHIFT);
|
|
} else if (ibpp < 8) {
|
|
rc->exp[OD_P_FRAME] = 95;
|
|
rc->log_scale[OD_P_FRAME] = od_blog64(1751) - OD_Q57(OD_COEFF_SHIFT);
|
|
} else {
|
|
rc->exp[OD_P_FRAME] = 73;
|
|
rc->log_scale[OD_P_FRAME] = od_blog64(1260) - OD_Q57(OD_COEFF_SHIFT);
|
|
}
|
|
/*Golden P-frames both use the same log_scale and exp modeling
|
|
values as regular P-frames and the same scale follower.
|
|
For convenience in the rate calculation code, we maintain a copy of
|
|
the scale and exp values in OD_GOLDEN_P_FRAME.*/
|
|
rc->exp[OD_GOLDEN_P_FRAME] = rc->exp[OD_P_FRAME];
|
|
rc->log_scale[OD_GOLDEN_P_FRAME] = rc->log_scale[OD_P_FRAME];
|
|
rc->exp[OD_ALTREF_P_FRAME] = rc->exp[OD_P_FRAME];
|
|
rc->log_scale[OD_ALTREF_P_FRAME] = rc->log_scale[OD_P_FRAME];
|
|
/*We clamp the actual I and B frame delays to a minimum of 10 to work within
|
|
the range of values where later incrementing the delay works as designed.
|
|
10 is not an exact choice, but rather a good working trade-off.*/
|
|
rc->inter_p_delay = 10;
|
|
rc->inter_delay_target = rc->reservoir_frame_delay >> 1;
|
|
memset(rc->frame_count, 0, sizeof(rc->frame_count));
|
|
/*Drop-frame tracking is concerned with more than just the basic three frame
|
|
types.
|
|
It needs to track boosted and cut subtypes (of which there is only one
|
|
right now, OD_GOLDEN_P_FRAME). */
|
|
rc->prev_drop_count[OD_I_FRAME] = 0;
|
|
rc->log_drop_scale[OD_I_FRAME] = OD_Q57(0);
|
|
rc->prev_drop_count[OD_P_FRAME] = 0;
|
|
rc->log_drop_scale[OD_P_FRAME] = OD_Q57(0);
|
|
rc->prev_drop_count[OD_GOLDEN_P_FRAME] = 0;
|
|
rc->log_drop_scale[OD_GOLDEN_P_FRAME] = OD_Q57(0);
|
|
rc->prev_drop_count[OD_ALTREF_P_FRAME] = 0;
|
|
rc->log_drop_scale[OD_ALTREF_P_FRAME] = OD_Q57(0);
|
|
/*Set up second order followers, initialized according to corresponding
|
|
time constants.*/
|
|
od_iir_bessel2_init(&rc->scalefilter[OD_I_FRAME], 4,
|
|
od_q57_to_q24(rc->log_scale[OD_I_FRAME]));
|
|
od_iir_bessel2_init(&rc->scalefilter[OD_P_FRAME], rc->inter_p_delay,
|
|
od_q57_to_q24(rc->log_scale[OD_P_FRAME]));
|
|
od_iir_bessel2_init(&rc->vfrfilter[OD_I_FRAME], 4,
|
|
od_bexp64_q24(rc->log_drop_scale[OD_I_FRAME]));
|
|
od_iir_bessel2_init(&rc->vfrfilter[OD_P_FRAME], 4,
|
|
od_bexp64_q24(rc->log_drop_scale[OD_P_FRAME]));
|
|
od_iir_bessel2_init(&rc->vfrfilter[OD_GOLDEN_P_FRAME], 4,
|
|
od_bexp64_q24(rc->log_drop_scale[OD_GOLDEN_P_FRAME]));
|
|
od_iir_bessel2_init(&rc->vfrfilter[OD_ALTREF_P_FRAME], 4,
|
|
od_bexp64_q24(rc->log_drop_scale[OD_ALTREF_P_FRAME]));
|
|
}
|
|
|
|
int od_enc_rc_resize(od_rc_state *rc) {
|
|
/*If encoding has not yet begun, reset the buffer state.*/
|
|
if (rc->cur_frame == 0) {
|
|
od_enc_rc_reset(rc);
|
|
} else {
|
|
int idt;
|
|
/*Otherwise, update the bounds on the buffer, but not the current
|
|
fullness.*/
|
|
rc->bits_per_frame = (int64_t)(rc->target_bitrate / rc->framerate);
|
|
/*Insane framerates or frame sizes mean insane bitrates.
|
|
Let's not get carried away.*/
|
|
if (rc->bits_per_frame > 0x400000000000LL) {
|
|
rc->bits_per_frame = (int64_t)0x400000000000LL;
|
|
} else {
|
|
if (rc->bits_per_frame < 32) {
|
|
rc->bits_per_frame = 32;
|
|
}
|
|
}
|
|
rc->reservoir_frame_delay = OD_MAXI(rc->reservoir_frame_delay, 12);
|
|
rc->reservoir_max = rc->bits_per_frame * rc->reservoir_frame_delay;
|
|
rc->reservoir_target =
|
|
((rc->reservoir_max + 1) >> 1) +
|
|
((rc->bits_per_frame + 2) >> 2) *
|
|
OD_MINI(rc->keyframe_rate, rc->reservoir_frame_delay);
|
|
/*Update the INTER-frame scale filter delay.
|
|
We jump to it immediately if we've already seen enough frames; otherwise
|
|
it is simply set as the new target.*/
|
|
rc->inter_delay_target = idt = OD_MAXI(rc->reservoir_frame_delay >> 1, 10);
|
|
if (idt < OD_MINI(rc->inter_p_delay, rc->frame_count[OD_P_FRAME])) {
|
|
od_iir_bessel2_init(&rc->scalefilter[OD_P_FRAME], idt,
|
|
rc->scalefilter[OD_P_FRAME].y[0]);
|
|
rc->inter_p_delay = idt;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int od_enc_rc_init(od_rc_state *rc, int64_t bitrate, int delay_ms) {
|
|
if (rc->framerate <= 0) return 1;
|
|
if (rc->target_bitrate > 0) {
|
|
/*State has already been initialized; rather than reinitialize,
|
|
adjust the buffering for the new target rate. */
|
|
rc->target_bitrate = bitrate;
|
|
return od_enc_rc_resize(rc);
|
|
}
|
|
rc->target_quantizer = 0;
|
|
rc->target_bitrate = bitrate;
|
|
rc->rate_bias = 0;
|
|
if (bitrate > 0) {
|
|
/* The buffer size is clamped between [12, 256], this interval is short
|
|
enough to
|
|
allow reaction, but long enough to allow looking into the next GOP
|
|
(avoiding
|
|
the case where the last frames before an I-frame get starved).
|
|
The 12 frame minimum gives us some chance to distribute bit estimation
|
|
errors in the worst case. The 256 frame maximum means we'll require 8-10
|
|
seconds
|
|
of pre-buffering at 24-30 fps, which is not unreasonable.*/
|
|
rc->reservoir_frame_delay =
|
|
(int)OD_MINI((delay_ms / 1000) * rc->framerate, 256);
|
|
rc->drop_frames = 1;
|
|
rc->cap_overflow = 1;
|
|
rc->cap_underflow = 0;
|
|
rc->twopass_state = 0;
|
|
od_enc_rc_reset(rc);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*Scale the number of frames by the number of expected drops/duplicates.*/
|
|
static int od_rc_scale_drop(od_rc_state *rc, int frame_type, int nframes) {
|
|
if (rc->prev_drop_count[frame_type] > 0 ||
|
|
rc->log_drop_scale[frame_type] > OD_Q57(0)) {
|
|
int64_t dup_scale;
|
|
dup_scale = od_bexp64(((rc->log_drop_scale[frame_type] +
|
|
od_blog64(rc->prev_drop_count[frame_type] + 1)) >>
|
|
1) +
|
|
OD_Q57(8));
|
|
if (dup_scale < nframes << 8) {
|
|
int dup_scalei;
|
|
dup_scalei = (int)dup_scale;
|
|
if (dup_scalei > 0) {
|
|
nframes = ((nframes << 8) + dup_scalei - 1) / dup_scalei;
|
|
}
|
|
} else {
|
|
nframes = !!nframes;
|
|
}
|
|
}
|
|
return nframes;
|
|
}
|
|
|
|
/*Closed form version of frame determination code.
|
|
Used by rate control to predict frame types and subtypes into the future.
|
|
No side effects, may be called any number of times.
|
|
Note that it ignores end-of-file conditions; one-pass planning *should*
|
|
ignore end-of-file. */
|
|
int od_frame_type(od_rc_state *rc, int64_t coding_frame_count, int *is_golden,
|
|
int *is_altref, int64_t *ip_count) {
|
|
int frame_type;
|
|
if (coding_frame_count == 0) {
|
|
*is_golden = 1;
|
|
*is_altref = 1;
|
|
*ip_count = 0;
|
|
frame_type = OD_I_FRAME;
|
|
} else {
|
|
int keyrate = rc->keyframe_rate;
|
|
if (rc->closed_gop) {
|
|
int ip_per_gop;
|
|
int gop_n;
|
|
int gop_i;
|
|
ip_per_gop = (keyrate - 1) / 2;
|
|
gop_n = coding_frame_count / keyrate;
|
|
gop_i = coding_frame_count - gop_n * keyrate;
|
|
*ip_count = gop_n * ip_per_gop + (gop_i > 0) + (gop_i - 1);
|
|
frame_type = gop_i == 0 ? OD_I_FRAME : OD_P_FRAME;
|
|
} else {
|
|
int ip_per_gop;
|
|
int gop_n;
|
|
int gop_i;
|
|
ip_per_gop = (keyrate);
|
|
gop_n = (coding_frame_count - 1) / keyrate;
|
|
gop_i = coding_frame_count - gop_n * keyrate - 1;
|
|
*ip_count = (coding_frame_count > 0) + gop_n * ip_per_gop + (gop_i);
|
|
frame_type = gop_i / 1 < ip_per_gop - 1 ? OD_P_FRAME : OD_I_FRAME;
|
|
}
|
|
}
|
|
*is_golden =
|
|
(*ip_count % rc->goldenframe_rate) == 0 || frame_type == OD_I_FRAME;
|
|
*is_altref = (*ip_count % rc->altref_rate) == 0 || frame_type == OD_I_FRAME;
|
|
return frame_type;
|
|
}
|
|
|
|
/*Count frames types forward from the current frame up to but not including
|
|
the last I-frame in reservoir_frame_delay.
|
|
If reservoir_frame_delay contains no I-frames (or the current frame is the
|
|
only I-frame), count all reservoir_frame_delay frames.
|
|
Returns the number of frames counted.
|
|
Right now, this implementation is simple, brute-force, and expensive.
|
|
It is also easy to understand and debug.
|
|
TODO: replace with a virtual FIFO that keeps running totals as
|
|
repeating the counting over-and-over will have a performance impact on
|
|
whole-file 2pass usage.*/
|
|
static int frame_type_count(od_rc_state *rc, int nframes[OD_FRAME_NSUBTYPES]) {
|
|
int i;
|
|
int j;
|
|
int acc[OD_FRAME_NSUBTYPES];
|
|
int count;
|
|
int reservoir_frames;
|
|
int reservoir_frame_delay;
|
|
memset(nframes, 0, OD_FRAME_NSUBTYPES * sizeof(*nframes));
|
|
memset(acc, 0, sizeof(acc));
|
|
count = 0;
|
|
reservoir_frames = 0;
|
|
#if 1
|
|
/*Go ahead and count past end-of-stream.
|
|
We won't nail the exact bitrate on short files that end with a partial
|
|
GOP, but we also won't [potentially] destroy the quality of the last few
|
|
frames in that same case when we suddenly find out the stream is ending
|
|
before the original planning horizon.*/
|
|
reservoir_frame_delay = rc->reservoir_frame_delay;
|
|
#else
|
|
/*Don't count past the end of the stream (once we know where end-of-stream
|
|
is).*/
|
|
reservoir_frame_delay =
|
|
rc->end_of_input ? rc->input_size + 1 : rc->reservoir_frame_delay;
|
|
#endif
|
|
for (i = 0; i < reservoir_frame_delay; i++) {
|
|
int frame_type;
|
|
int is_golden;
|
|
int is_altref;
|
|
int64_t dummy;
|
|
frame_type =
|
|
od_frame_type(rc, rc->cur_frame + i, &is_golden, &is_altref, &dummy);
|
|
switch (frame_type) {
|
|
case OD_I_FRAME: {
|
|
for (j = 0; j < OD_FRAME_NSUBTYPES; j++) nframes[j] += acc[j];
|
|
reservoir_frames += count;
|
|
memset(acc, 0, sizeof(acc));
|
|
acc[OD_I_FRAME] = 1;
|
|
count = 1;
|
|
break;
|
|
}
|
|
case OD_P_FRAME: {
|
|
if (is_golden) {
|
|
++acc[OD_GOLDEN_P_FRAME];
|
|
++count;
|
|
} else if (is_altref) {
|
|
++acc[OD_ALTREF_P_FRAME];
|
|
++count;
|
|
} else {
|
|
++acc[OD_P_FRAME];
|
|
++count;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
/*If there were no I-frames at all, or only the first frame was an I-frame,
|
|
the accumulators never flushed and still contain the counts for the
|
|
entire buffer.
|
|
In both these cases, we return these counts.
|
|
Otherwise, we discard what remains in the accumulators as they contain
|
|
the counts from and past the last I-frame.*/
|
|
if (reservoir_frames == 0) {
|
|
for (i = 0; i < OD_FRAME_NSUBTYPES; i++) nframes[i] = acc[i];
|
|
reservoir_frames += count;
|
|
}
|
|
return reservoir_frames;
|
|
}
|
|
|
|
static int convert_to_ac_quant(int q, int bit_depth) {
|
|
return lrint(av1_convert_qindex_to_q(q, bit_depth));
|
|
}
|
|
|
|
int od_enc_rc_select_quantizers_and_lambdas(od_rc_state *rc,
|
|
int is_golden_frame,
|
|
int is_altref_frame, int frame_type,
|
|
int *bottom_idx, int *top_idx) {
|
|
int frame_subtype;
|
|
int64_t log_cur_scale;
|
|
int lossy_quantizer_min;
|
|
int lossy_quantizer_max;
|
|
double mqp_i = OD_MQP_I;
|
|
double mqp_p = OD_MQP_P;
|
|
double mqp_gp = OD_MQP_GP;
|
|
double mqp_ap = OD_MQP_AP;
|
|
int reservoir_frames;
|
|
int nframes[OD_FRAME_NSUBTYPES];
|
|
int32_t mqp_Q12[OD_FRAME_NSUBTYPES];
|
|
int64_t dqp_Q45[OD_FRAME_NSUBTYPES];
|
|
/*Verify the closed-form frame type determination code matches what the
|
|
input queue set.*/
|
|
/*One pseudo-non-closed-form caveat:
|
|
Once we've seen end-of-input, the batched frame determination code
|
|
suppresses the last open-GOP's I-frame (since it would only be
|
|
useful for the next GOP, which doesn't exist).
|
|
Thus, don't check one the input queue is drained.*/
|
|
if (!rc->end_of_input) {
|
|
int closed_form_type;
|
|
int closed_form_golden;
|
|
int closed_form_altref;
|
|
int64_t closed_form_cur_frame;
|
|
closed_form_type =
|
|
od_frame_type(rc, rc->cur_frame, &closed_form_golden,
|
|
&closed_form_altref, &closed_form_cur_frame);
|
|
OD_UNUSED(closed_form_type);
|
|
OD_UNUSED(is_altref_frame);
|
|
assert(closed_form_type == frame_type);
|
|
assert(closed_form_cur_frame == rc->cur_frame);
|
|
assert(closed_form_altref == is_altref_frame);
|
|
assert(closed_form_golden == is_golden_frame);
|
|
}
|
|
|
|
log_cur_scale = (int64_t)rc->scalefilter[frame_type].y[0] << 33;
|
|
|
|
/*Count the various types and classes of frames.*/
|
|
reservoir_frames = frame_type_count(rc, nframes);
|
|
nframes[OD_I_FRAME] = od_rc_scale_drop(rc, OD_I_FRAME, nframes[OD_I_FRAME]);
|
|
nframes[OD_P_FRAME] = od_rc_scale_drop(rc, OD_P_FRAME, nframes[OD_P_FRAME]);
|
|
nframes[OD_GOLDEN_P_FRAME] =
|
|
od_rc_scale_drop(rc, OD_GOLDEN_P_FRAME, nframes[OD_GOLDEN_P_FRAME]);
|
|
nframes[OD_ALTREF_P_FRAME] =
|
|
od_rc_scale_drop(rc, OD_ALTREF_P_FRAME, nframes[OD_ALTREF_P_FRAME]);
|
|
|
|
switch (rc->twopass_state) {
|
|
default: break;
|
|
case 1: {
|
|
/*Pass 1 mode: use a fixed qi value.*/
|
|
return rc->firstpass_quant;
|
|
} break;
|
|
case 2: {
|
|
int i;
|
|
int64_t scale_sum[OD_FRAME_NSUBTYPES];
|
|
int qti;
|
|
/*Pass 2 mode: we know exactly how much of each frame type there is in
|
|
the current buffer window, and have estimates for the scales.*/
|
|
for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
|
|
nframes[i] = rc->nframes[i];
|
|
nframes[i] = rc->nframes[i];
|
|
scale_sum[i] = rc->scale_sum[i];
|
|
}
|
|
/*If we're not using the same frame type as in pass 1 (because someone
|
|
changed the keyframe interval), remove that scale estimate.
|
|
We'll add in a replacement for the correct frame type below.*/
|
|
qti = rc->cur_metrics.frame_type;
|
|
if (qti != frame_type) {
|
|
nframes[qti]--;
|
|
scale_sum[qti] -= od_bexp64_q24(rc->cur_metrics.log_scale);
|
|
}
|
|
/*Compute log_scale estimates for each frame type from the pass-1 scales
|
|
we measured in the current window.*/
|
|
for (qti = 0; qti < OD_FRAME_NSUBTYPES; qti++) {
|
|
rc->log_scale[qti] = nframes[qti] > 0
|
|
? od_blog64(scale_sum[qti]) -
|
|
od_blog64(nframes[qti]) - OD_Q57(24)
|
|
: -rc->log_npixels;
|
|
}
|
|
/*If we're not using the same frame type as in pass 1, add a scale
|
|
estimate for the corresponding frame using the current low-pass
|
|
filter value.
|
|
This is mostly to ensure we have a valid estimate even when pass 1 had
|
|
no frames of this type in the buffer window.
|
|
TODO: We could also plan ahead and figure out how many keyframes we'll
|
|
be forced to add in the current buffer window.*/
|
|
qti = rc->cur_metrics.frame_type;
|
|
if (qti != frame_type) {
|
|
int64_t scale;
|
|
scale = rc->log_scale[frame_type] < OD_Q57(23)
|
|
? od_bexp64(rc->log_scale[frame_type] + OD_Q57(24))
|
|
: 0x7FFFFFFFFFFFLL;
|
|
scale *= nframes[frame_type];
|
|
nframes[frame_type]++;
|
|
scale += od_bexp64_q24(log_cur_scale >> 33);
|
|
rc->log_scale[frame_type] =
|
|
od_blog64(scale) - od_blog64(nframes[qti]) - OD_Q57(24);
|
|
} else {
|
|
log_cur_scale = (int64_t)rc->cur_metrics.log_scale << 33;
|
|
}
|
|
} break;
|
|
}
|
|
|
|
/*Quantizer selection sticks to the codable, lossy portion of the quantizer
|
|
range.*/
|
|
lossy_quantizer_min = convert_to_ac_quant(rc->minq, rc->bit_depth);
|
|
lossy_quantizer_max = convert_to_ac_quant(rc->maxq, rc->bit_depth);
|
|
frame_subtype = frame_type;
|
|
/*Stash quantizer modulation by frame type.*/
|
|
mqp_Q12[OD_I_FRAME] = OD_F_Q12(mqp_i);
|
|
mqp_Q12[OD_P_FRAME] = OD_F_Q12(mqp_p);
|
|
mqp_Q12[OD_GOLDEN_P_FRAME] = OD_F_Q12(mqp_gp);
|
|
mqp_Q12[OD_ALTREF_P_FRAME] = OD_F_Q12(mqp_ap);
|
|
dqp_Q45[OD_I_FRAME] = OD_F_Q45(OD_DQP_I);
|
|
dqp_Q45[OD_P_FRAME] = OD_F_Q45(OD_DQP_P);
|
|
dqp_Q45[OD_GOLDEN_P_FRAME] = OD_F_Q45(OD_DQP_GP);
|
|
dqp_Q45[OD_ALTREF_P_FRAME] = OD_F_Q45(OD_DQP_AP);
|
|
/*Is rate control active?*/
|
|
if (rc->target_bitrate <= 0) {
|
|
/*Rate control is not active; derive quantizer directly from
|
|
quality parameter and frame type. */
|
|
/*Can't use the OD_LOSSLESS macro, as it uses state.quantizer to intuit,
|
|
and we've not set it yet.*/
|
|
if (rc->quality == 0) {
|
|
/*Lossless coding requested.*/
|
|
rc->base_quantizer = 0;
|
|
rc->target_quantizer = 0;
|
|
} else {
|
|
int64_t log_quantizer;
|
|
|
|
/* Adjust the modulation constants using the last frame's quantizer. */
|
|
double mqp_delta = (255 - rc->target_quantizer) / 2000.0f;
|
|
mqp_i -= mqp_delta;
|
|
mqp_p += mqp_delta;
|
|
mqp_gp -= mqp_delta;
|
|
mqp_Q12[OD_I_FRAME] = OD_F_Q12(mqp_i);
|
|
mqp_Q12[OD_P_FRAME] = OD_F_Q12(mqp_p);
|
|
mqp_Q12[OD_GOLDEN_P_FRAME] = OD_F_Q12(mqp_gp);
|
|
mqp_Q12[OD_ALTREF_P_FRAME] = OD_F_Q12(mqp_ap);
|
|
|
|
if (rc->quality == -1) {
|
|
/*A quality of -1 means quality was unset; use a default.*/
|
|
rc->base_quantizer = convert_to_ac_quant(10, rc->bit_depth);
|
|
} else {
|
|
rc->base_quantizer = convert_to_ac_quant(rc->quality, rc->bit_depth);
|
|
}
|
|
|
|
if (rc->periodic_boosts && !is_golden_frame) {
|
|
int pattern_rate = (rc->goldenframe_rate >> 1);
|
|
int dist_to_golden = rc->cur_frame % pattern_rate;
|
|
int dist_away_golden = pattern_rate - dist_to_golden;
|
|
int boost = dist_to_golden;
|
|
if (dist_away_golden > dist_to_golden) boost = dist_away_golden;
|
|
boost -= pattern_rate;
|
|
boost *= (rc->base_quantizer) / OD_PERIODIC_BOOST_DIV;
|
|
rc->base_quantizer = rc->base_quantizer + boost;
|
|
}
|
|
|
|
/*As originally written, qp modulation is applied to the coded quantizer.
|
|
Because we now have and use a more precise target quantizer for various
|
|
calculation, that needs to be modulated as well.
|
|
Calculate what is, effectively, a fractional coded quantizer. */
|
|
/*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
|
|
log_quantizer = od_blog64(rc->base_quantizer) - OD_Q57(OD_COEFF_SHIFT);
|
|
/*log_quantizer to Q21.*/
|
|
log_quantizer >>= 36;
|
|
/*scale log quantizer, result is Q33.*/
|
|
log_quantizer *= OD_LOG_QUANTIZER_BASE_Q12;
|
|
/*Add Q33 offset to Q33 log_quantizer.*/
|
|
log_quantizer += OD_LOG_QUANTIZER_OFFSET_Q45 >> 12;
|
|
/*Modulate quantizer according to frame type; result is Q45.*/
|
|
log_quantizer *= mqp_Q12[frame_subtype];
|
|
/*Add Q45 boost/cut to Q45 fractional coded quantizer.*/
|
|
log_quantizer += dqp_Q45[frame_subtype];
|
|
/*Back to log2 quantizer in Q57.*/
|
|
log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
|
|
OD_LOG_QUANTIZER_EXP_Q12 +
|
|
OD_Q57(OD_COEFF_SHIFT);
|
|
/*Convert Q57 log2 quantizer to unclamped linear target quantizer value.*/
|
|
rc->target_quantizer = od_bexp64(log_quantizer);
|
|
}
|
|
} else {
|
|
int clamp;
|
|
int64_t rate_bias;
|
|
int64_t rate_total;
|
|
int base_quantizer;
|
|
int64_t log_quantizer;
|
|
int qlo;
|
|
int qhi;
|
|
int i;
|
|
/*We clamp the allowed amount of qi change (after initialization).*/
|
|
clamp = rc->cur_frame > 0;
|
|
/*Figure out how to re-distribute bits so that we hit our fullness target
|
|
before the last keyframe in our current buffer window (after the current
|
|
frame), or the end of the buffer window, whichever comes first.*/
|
|
/*Single pass only right now.*/
|
|
/*If we've been missing our target, add a penalty term.*/
|
|
rate_bias = (rc->rate_bias / (rc->cur_frame + 1000)) * reservoir_frames;
|
|
/*rate_total is the total bits available over the next
|
|
reservoir_frames frames.*/
|
|
rate_total = rc->reservoir_fullness - rc->reservoir_target + rate_bias +
|
|
reservoir_frames * rc->bits_per_frame;
|
|
/*Find a target quantizer that meets our rate target for the specific mix
|
|
of frame types we'll have over the next frame_delay frames.
|
|
We model the rate<->quantizer relationship as:
|
|
rate = scale*(quantizer**-exp)
|
|
In this case, we have our desired rate, an exponent selected in setup,
|
|
and a scale that's been measured over our frame history, so we're
|
|
solving for the quantizer.
|
|
Exponentiation with arbitrary exponents is expensive, so we work in
|
|
the binary log domain (binary exp and log aren't too bad):
|
|
rate = e2(log2_scale - log2_quantizer * exp)
|
|
There's no easy closed form solution, so we bisection search for it.*/
|
|
/*We do not currently allow rate control to select lossless encoding.*/
|
|
qlo = 1;
|
|
/*If there's a quality specified, it's used to select the
|
|
coarsest base quantizer we can select.
|
|
Otherwise we can use up to and including the coarsest codable
|
|
quantizer.*/
|
|
if (rc->quality > 0)
|
|
qhi = convert_to_ac_quant(rc->quality, rc->bit_depth);
|
|
else
|
|
qhi = lossy_quantizer_max;
|
|
base_quantizer = (qlo + qhi) >> 1;
|
|
while (qlo < qhi) {
|
|
volatile int64_t log_base_quantizer;
|
|
int64_t diff;
|
|
int64_t bits;
|
|
/*Count bits contributed by each frame type using the model.*/
|
|
bits = 0;
|
|
log_base_quantizer = od_blog64(base_quantizer);
|
|
for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
|
|
/*Modulate base quantizer by frame type.*/
|
|
/*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
|
|
log_quantizer = log_base_quantizer - OD_Q57(OD_COEFF_SHIFT);
|
|
/*log_quantizer to Q21.*/
|
|
log_quantizer >>= 36;
|
|
/*scale log quantizer, result is Q33.*/
|
|
log_quantizer *= OD_LOG_QUANTIZER_BASE_Q12;
|
|
/*Add Q33 offset to Q33 log_quantizer.*/
|
|
log_quantizer += OD_LOG_QUANTIZER_OFFSET_Q45 >> 12;
|
|
/*Modulate quantizer according to frame type; result is Q45.*/
|
|
log_quantizer *= mqp_Q12[i];
|
|
/*Add Q45 boost/cut to Q45 fractional coded quantizer.*/
|
|
log_quantizer += dqp_Q45[i];
|
|
/*Back to log2 quantizer in Q57.*/
|
|
log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
|
|
OD_LOG_QUANTIZER_EXP_Q12 +
|
|
OD_Q57(OD_COEFF_SHIFT);
|
|
/*Clamp modulated quantizer values.*/
|
|
log_quantizer = OD_CLAMPI(od_blog64(lossy_quantizer_min), log_quantizer,
|
|
od_blog64(lossy_quantizer_max));
|
|
/* All the fields here are Q57 except for the exponent which is Q6.*/
|
|
bits += nframes[i] * od_bexp64(rc->log_scale[i] + rc->log_npixels -
|
|
(log_quantizer >> 6) * rc->exp[i]);
|
|
}
|
|
diff = bits - rate_total;
|
|
if (diff > 0) {
|
|
qlo = base_quantizer + 1;
|
|
} else if (diff < 0) {
|
|
qhi = base_quantizer - 1;
|
|
} else {
|
|
break;
|
|
}
|
|
base_quantizer = (qlo + qhi) >> 1;
|
|
}
|
|
/*If this was not one of the initial frames, limit the change in base
|
|
quantizer to within [0.8*Q,1.2*Q], where Q is the previous frame's
|
|
base quantizer.*/
|
|
if (clamp) {
|
|
base_quantizer = OD_CLAMPI((rc->base_quantizer * 0x0CCCD + 0x8000) >> 16,
|
|
base_quantizer,
|
|
(rc->base_quantizer * 0x13333 + 0x8000) >> 16);
|
|
}
|
|
/*Modulate chosen base quantizer to produce target quantizer.*/
|
|
log_quantizer = od_blog64(base_quantizer);
|
|
/*Get the log2 quantizer in Q57 (normalized for coefficient shift).*/
|
|
log_quantizer -= OD_Q57(OD_COEFF_SHIFT);
|
|
/*log_quantizer to Q21.*/
|
|
log_quantizer >>= 36;
|
|
/*scale log quantizer, result is Q33.*/
|
|
log_quantizer *= OD_LOG_QUANTIZER_BASE_Q12;
|
|
/*Add Q33 offset to Q33 log_quantizer.*/
|
|
log_quantizer += OD_LOG_QUANTIZER_OFFSET_Q45 >> 12;
|
|
/*Modulate quantizer according to frame type; result is Q45.*/
|
|
log_quantizer *= mqp_Q12[frame_subtype];
|
|
/*Add Q45 boost/cut to Q45 fractional coded quantizer.*/
|
|
log_quantizer += dqp_Q45[frame_subtype];
|
|
/*Back to log2 quantizer in Q57.*/
|
|
log_quantizer = (log_quantizer - OD_LOG_QUANTIZER_OFFSET_Q45) *
|
|
OD_LOG_QUANTIZER_EXP_Q12 +
|
|
OD_Q57(OD_COEFF_SHIFT);
|
|
/*Clamp modulated quantizer values.*/
|
|
log_quantizer = OD_CLAMPI(od_blog64(lossy_quantizer_min), log_quantizer,
|
|
od_blog64(lossy_quantizer_max));
|
|
/*The above allocation looks only at the total rate we'll accumulate in
|
|
the next reservoir_frame_delay frames.
|
|
However we could overflow the bit reservoir on the very next frame, so
|
|
check for that here if we're not using a soft target.*/
|
|
if (rc->cap_overflow) {
|
|
int64_t margin;
|
|
int64_t soft_limit;
|
|
int64_t log_soft_limit;
|
|
int64_t log_scale_pixels;
|
|
int64_t exp;
|
|
int64_t log_qexp;
|
|
/*Allow 3% of the buffer for prediction error.
|
|
This should be plenty, and we don't mind if we go a bit over; we only
|
|
want to keep these bits from being completely wasted.*/
|
|
margin = (rc->reservoir_max + 31) >> 5;
|
|
/*We want to use at least this many bits next frame.*/
|
|
soft_limit = rc->reservoir_fullness + rc->bits_per_frame -
|
|
(rc->reservoir_max - margin);
|
|
log_soft_limit = od_blog64(soft_limit);
|
|
/*If we're predicting we won't use that many bits...*/
|
|
log_scale_pixels = rc->log_scale[frame_subtype] + rc->log_npixels;
|
|
exp = rc->exp[frame_subtype];
|
|
log_qexp = (log_quantizer >> 6) * exp;
|
|
if (log_scale_pixels - log_qexp < log_soft_limit) {
|
|
/*Scale the adjustment based on how far into the margin we are.*/
|
|
log_qexp += ((log_scale_pixels - log_soft_limit - log_qexp) >> 32) *
|
|
(OD_MINI(margin, soft_limit) << 32) / margin;
|
|
log_quantizer = (((log_qexp + (exp >> 1)) / exp) << 6);
|
|
}
|
|
}
|
|
/*We just checked we don't overflow the reservoir next frame, now check
|
|
we don't underflow and bust the budget (when not using a soft target).
|
|
Disabled when a quality bound is set; if we saturate quantizer to the
|
|
maximum possible size when we have a limiting max quality, the
|
|
resulting lambda can cause strange behavior.*/
|
|
if (rc->quality == -1) {
|
|
int64_t exp;
|
|
int64_t log_qexp;
|
|
int64_t log_scale_pixels;
|
|
int64_t log_hard_limit;
|
|
/*Compute the maximum number of bits we can use in the next frame.
|
|
Allow 50% of the rate for a single frame for prediction error.
|
|
This may not be enough for keyframes or sudden changes in
|
|
complexity.*/
|
|
log_hard_limit =
|
|
od_blog64(rc->reservoir_fullness + (rc->bits_per_frame >> 1));
|
|
/*If we're predicting we'll use more than this...*/
|
|
log_scale_pixels = rc->log_scale[frame_subtype] + rc->log_npixels;
|
|
exp = rc->exp[frame_subtype];
|
|
log_qexp = (log_quantizer >> 6) * exp;
|
|
if (log_scale_pixels - log_qexp > log_hard_limit) {
|
|
/*Force the target to hit our limit exactly.*/
|
|
log_qexp = log_scale_pixels - log_hard_limit;
|
|
log_quantizer = (log_qexp + (exp >> 1)) / exp << 6;
|
|
/*If that target is unreasonable, oh well; we'll have to drop.*/
|
|
log_quantizer = OD_MAXI(log_quantizer, od_blog64(lossy_quantizer_max));
|
|
}
|
|
}
|
|
/*Compute a final estimate of the number of bits we plan to use, update
|
|
the running rate bias measurement.*/
|
|
{
|
|
int64_t log_qexp;
|
|
int64_t log_scale_pixels;
|
|
log_scale_pixels = rc->log_scale[frame_subtype] + rc->log_npixels;
|
|
log_qexp = (log_quantizer >> 6) * rc->exp[frame_subtype];
|
|
rc->rate_bias += od_bexp64(log_scale_pixels - log_qexp);
|
|
}
|
|
rc->target_quantizer = od_bexp64(log_quantizer);
|
|
/*The various cappings and adjustments may have altered the log_quantizer
|
|
target significantly.
|
|
We can either update the base quantizer to be consistent with the
|
|
target or let it track separately.
|
|
Theora behavior effectively keeps them consistent, as it regenerates
|
|
the effective base quantizer from the target each frame rather than
|
|
saving both.
|
|
For Daala, it's easier to allow them to track separately.
|
|
For now, allow them to track separately and see how it behaves.*/
|
|
rc->base_quantizer = base_quantizer;
|
|
}
|
|
*bottom_idx = lossy_quantizer_min;
|
|
*top_idx = lossy_quantizer_max;
|
|
rc->target_quantizer = av1_qindex_from_ac(
|
|
OD_CLAMPI(lossy_quantizer_min, rc->target_quantizer, lossy_quantizer_max),
|
|
rc->bit_depth);
|
|
return rc->target_quantizer;
|
|
}
|
|
|
|
int od_enc_rc_update_state(od_rc_state *rc, int64_t bits, int is_golden_frame,
|
|
int is_altref_frame, int frame_type, int droppable) {
|
|
int dropped;
|
|
dropped = 0;
|
|
/*Update rate control only if rate control is active.*/
|
|
if (rc->target_bitrate > 0) {
|
|
int64_t log_scale;
|
|
int frame_subtype;
|
|
frame_subtype = frame_type;
|
|
/*Track non-golden and golden P frame drops separately.*/
|
|
if (is_golden_frame && frame_type == OD_P_FRAME)
|
|
frame_subtype = OD_GOLDEN_P_FRAME;
|
|
else if (is_altref_frame && frame_type == OD_P_FRAME)
|
|
frame_subtype = OD_ALTREF_P_FRAME;
|
|
if (bits <= 0) {
|
|
/*We didn't code any blocks in this frame.*/
|
|
log_scale = OD_Q57(-64);
|
|
bits = 0;
|
|
++rc->prev_drop_count[frame_subtype];
|
|
} else {
|
|
int64_t log_bits;
|
|
int64_t log_qexp;
|
|
/*Compute the estimated scale factor for this frame type.*/
|
|
log_bits = od_blog64(bits);
|
|
log_qexp = od_blog64(rc->target_quantizer);
|
|
log_qexp = (log_qexp >> 6) * (rc->exp[frame_type]);
|
|
log_scale = OD_MINI(log_bits - rc->log_npixels + log_qexp, OD_Q57(16));
|
|
}
|
|
|
|
switch (rc->twopass_state) {
|
|
case 1: {
|
|
int golden, altref;
|
|
int64_t ipc;
|
|
rc->cur_metrics.frame_type =
|
|
od_frame_type(rc, rc->cur_frame, &golden, &altref, &ipc);
|
|
/*Pass 1 mode: save the metrics for this frame.*/
|
|
rc->cur_metrics.log_scale = od_q57_to_q24(log_scale);
|
|
} break;
|
|
case 2: {
|
|
/*Pass 2 mode:*/
|
|
int m_frame_type = rc->cur_metrics.frame_type;
|
|
rc->nframes[m_frame_type]--;
|
|
rc->scale_sum[m_frame_type] -= od_bexp64_q24(rc->cur_metrics.log_scale);
|
|
} break;
|
|
}
|
|
|
|
if (bits > 0) {
|
|
od_iir_bessel2 *f;
|
|
/*If this is the first example of the given frame type we've
|
|
seen, we immediately replace the default scale factor guess
|
|
with the estimate we just computed using the first frame.*/
|
|
if (rc->frame_count[frame_type] == 0) {
|
|
f = rc->scalefilter + frame_type;
|
|
f->y[1] = f->y[0] = f->x[1] = f->x[0] = od_q57_to_q24(log_scale);
|
|
rc->log_scale[frame_type] = log_scale;
|
|
} else {
|
|
/*Lengthen the time constant for the inter filters as we collect more
|
|
frame statistics, until we reach our target.*/
|
|
if (frame_type != OD_I_FRAME &&
|
|
rc->inter_p_delay < rc->inter_delay_target &&
|
|
rc->frame_count[frame_type] >= rc->inter_p_delay) {
|
|
od_iir_bessel2_reinit(&rc->scalefilter[frame_type],
|
|
++rc->inter_p_delay);
|
|
}
|
|
/*Update the low-pass scale filter for this frame type
|
|
regardless of whether or not we drop this frame.*/
|
|
rc->log_scale[frame_type] =
|
|
od_iir_bessel2_update(rc->scalefilter + frame_type,
|
|
od_q57_to_q24(log_scale))
|
|
<< 33;
|
|
}
|
|
/*If this frame busts our budget, it must be dropped.*/
|
|
if (droppable && rc->reservoir_fullness + rc->bits_per_frame < bits) {
|
|
++rc->prev_drop_count[frame_subtype];
|
|
bits = 0;
|
|
dropped = 1;
|
|
} else {
|
|
uint32_t drop_count;
|
|
/*Update a low-pass filter to estimate the "real" frame rate taking
|
|
drops into account.
|
|
This is only done if the frame is coded, as it needs the final
|
|
count of dropped frames.*/
|
|
drop_count = rc->prev_drop_count[frame_subtype] + 1;
|
|
if (drop_count > 0x7F) {
|
|
drop_count = 0x7FFFFFFF;
|
|
} else {
|
|
drop_count <<= 24;
|
|
}
|
|
rc->log_drop_scale[frame_subtype] =
|
|
od_blog64(od_iir_bessel2_update(rc->vfrfilter + frame_subtype,
|
|
drop_count)) -
|
|
OD_Q57(24);
|
|
/*Zero the drop count for this frame.
|
|
It will be increased if we drop frames.*/
|
|
rc->prev_drop_count[frame_subtype] = 0;
|
|
}
|
|
/*Increment the frame count for filter adaptation purposes.*/
|
|
if (!rc->twopass_state) rc->frame_count[frame_type]++;
|
|
}
|
|
rc->reservoir_fullness += rc->bits_per_frame - bits;
|
|
/*If we're too quick filling the buffer and overflow is capped,
|
|
that rate is lost forever.*/
|
|
if (rc->cap_overflow && rc->reservoir_fullness > rc->reservoir_max) {
|
|
rc->reservoir_fullness = rc->reservoir_max;
|
|
}
|
|
/*If we're too quick draining the buffer and underflow is capped,
|
|
don't try to make up that rate later.*/
|
|
if (rc->cap_underflow && rc->reservoir_fullness < 0) {
|
|
rc->reservoir_fullness = 0;
|
|
}
|
|
/*Adjust the bias for the real bits we've used.*/
|
|
rc->rate_bias -= bits;
|
|
}
|
|
return dropped;
|
|
}
|
|
|
|
static INLINE void od_rc_buffer_val(od_rc_state *rc, int64_t val, int bytes) {
|
|
while (bytes-- > 0) {
|
|
rc->twopass_buffer[rc->twopass_buffer_bytes++] = (uint8_t)(val & 0xFF);
|
|
val >>= 8;
|
|
}
|
|
}
|
|
|
|
static INLINE int64_t od_rc_unbuffer_val(od_rc_state *rc, int bytes) {
|
|
int64_t ret = 0;
|
|
int shift = 0;
|
|
while (bytes-- > 0) {
|
|
ret |= ((int64_t)rc->twopass_buffer[rc->twopass_buffer_bytes++]) << shift;
|
|
shift += 8;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
int od_enc_rc_2pass_out(od_rc_state *rc, struct aom_codec_pkt_list *pkt_list,
|
|
int summary) {
|
|
int i;
|
|
struct aom_codec_cx_pkt pkt;
|
|
rc->twopass_buffer = rc->firstpass_buffer;
|
|
rc->twopass_buffer_bytes = 0;
|
|
if (!rc->twopass_state) {
|
|
rc->twopass_state = 1;
|
|
for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
|
|
rc->frame_count[i] = 0;
|
|
rc->exp[i] = 0;
|
|
rc->scale_sum[i] = 0;
|
|
}
|
|
}
|
|
if (summary) {
|
|
od_rc_buffer_val(rc, OD_RC_2PASS_MAGIC, 4);
|
|
od_rc_buffer_val(rc, OD_RC_2PASS_VERSION, 1);
|
|
for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
|
|
od_rc_buffer_val(rc, rc->frame_count[i], 4);
|
|
od_rc_buffer_val(rc, rc->exp[i], 4);
|
|
od_rc_buffer_val(rc, rc->scale_sum[i], 8);
|
|
}
|
|
} else {
|
|
int frame_type = rc->cur_metrics.frame_type;
|
|
rc->scale_sum[frame_type] += od_bexp64_q24(rc->cur_metrics.log_scale);
|
|
rc->frame_count[frame_type]++;
|
|
od_rc_buffer_val(rc, rc->cur_metrics.frame_type, 1);
|
|
od_rc_buffer_val(rc, rc->cur_metrics.log_scale, 4);
|
|
}
|
|
pkt.data.twopass_stats.buf = rc->firstpass_buffer;
|
|
pkt.data.twopass_stats.sz = rc->twopass_buffer_bytes;
|
|
pkt.kind = AOM_CODEC_STATS_PKT;
|
|
aom_codec_pkt_list_add(pkt_list, &pkt);
|
|
return 0;
|
|
}
|
|
|
|
int od_enc_rc_2pass_in(od_rc_state *rc) {
|
|
/* Enable pass 2 mode if this is the first call. */
|
|
if (rc->twopass_state == 0) {
|
|
uint32_t i, total_frames = 0;
|
|
|
|
if (!rc->twopass_allframes_buf ||
|
|
rc->twopass_allframes_buf_size < OD_RC_2PASS_MIN)
|
|
return -1;
|
|
|
|
/* Find summary packet at the end */
|
|
rc->twopass_buffer = rc->twopass_allframes_buf;
|
|
rc->twopass_buffer +=
|
|
rc->twopass_allframes_buf_size - OD_RC_2PASS_SUMMARY_SZ;
|
|
rc->twopass_buffer_bytes = 0;
|
|
|
|
if (od_rc_unbuffer_val(rc, 4) != OD_RC_2PASS_MAGIC) return -1;
|
|
if (od_rc_unbuffer_val(rc, 1) != OD_RC_2PASS_VERSION) return -1;
|
|
|
|
for (i = 0; i < OD_FRAME_NSUBTYPES; i++) {
|
|
rc->frame_count[i] = od_rc_unbuffer_val(rc, 4);
|
|
rc->exp[i] = od_rc_unbuffer_val(rc, 4);
|
|
rc->scale_sum[i] = od_rc_unbuffer_val(rc, 8);
|
|
rc->nframes[i] = rc->frame_count[i];
|
|
total_frames += rc->frame_count[i];
|
|
}
|
|
|
|
if (total_frames < 1) return -1;
|
|
|
|
if (total_frames * OD_RC_2PASS_PACKET_SZ > rc->twopass_allframes_buf_size)
|
|
return -1;
|
|
|
|
od_enc_rc_reset(rc);
|
|
|
|
/* Everything looks ok */
|
|
rc->twopass_buffer = rc->twopass_allframes_buf;
|
|
rc->twopass_state = 2;
|
|
rc->twopass_buffer_bytes = 0;
|
|
}
|
|
|
|
rc->cur_metrics.frame_type = od_rc_unbuffer_val(rc, 1);
|
|
rc->cur_metrics.log_scale = od_rc_unbuffer_val(rc, 4);
|
|
|
|
return 0;
|
|
}
|