Integerize warped motion computation

Integerizes computation of the least squares for warped motion.
The model is restricted to only Affine. Affine seems easiest
to compute and integerize since it can be split into two 3-dim
least squares problems, as opposed to rotation-zoom which needs
a 4-dim least-squares problem to be solved.
The current implementation requires only one division per block.

BDRATE impact is mminimal. The upgrade to the affine model improves
coding efficiency but integerization also degrades efficiency a
little. Overall there is a net gain of about -0.07% BDRATE on
the lowres set.
BDRATE lowres: -1.113% with ----enable-warped-motion vs. without
(up from -1.044%).

Change-Id: I6b9216ac0737d76f59054293eabee48e17739ec4
This commit is contained in:
Debargha Mukherjee 2017-02-26 08:50:56 -08:00
Родитель 4db04d3678
Коммит e6eb3b53c3
7 изменённых файлов: 915 добавлений и 838 удалений

Просмотреть файл

@ -1114,7 +1114,7 @@ void calc_projection_samples(MB_MODE_INFO *const mbmi,
#if CONFIG_GLOBAL_MOTION
MACROBLOCKD *xd,
#endif
int x, int y, double *pts_inref) {
int x, int y, int *pts_inref) {
if (mbmi->motion_mode == WARPED_CAUSAL
#if CONFIG_GLOBAL_MOTION
|| (mbmi->mode == ZEROMV &&
@ -1131,16 +1131,19 @@ void calc_projection_samples(MB_MODE_INFO *const mbmi,
&mbmi->wm_params[0];
project_points(wm, ipts, ipts_inref, 1, 2, 2, 0, 0);
pts_inref[0] = (double)ipts_inref[0] / (double)WARPEDPIXEL_PREC_SHIFTS;
pts_inref[1] = (double)ipts_inref[1] / (double)WARPEDPIXEL_PREC_SHIFTS;
pts_inref[0] =
ROUND_POWER_OF_TWO_SIGNED(ipts_inref[0], WARPEDPIXEL_PREC_BITS - 3);
pts_inref[1] =
ROUND_POWER_OF_TWO_SIGNED(ipts_inref[1], WARPEDPIXEL_PREC_BITS - 3);
} else {
pts_inref[0] = (double)x + (double)(mbmi->mv[0].as_mv.col) * 0.125;
pts_inref[1] = (double)y + (double)(mbmi->mv[0].as_mv.row) * 0.125;
pts_inref[0] = (x * 8) + mbmi->mv[0].as_mv.col;
pts_inref[1] = (y * 8) + mbmi->mv[0].as_mv.row;
}
}
// Note: Samples returned are at 1/8-pel precision
int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
double *pts, double *pts_inref) {
int *pts, int *pts_inref) {
MB_MODE_INFO *const mbmi0 = &(xd->mi[0]->mbmi);
int ref_frame = mbmi0->ref_frame[0];
int up_available = xd->up_available;
@ -1178,8 +1181,8 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
int x = cc_offset + j % 2 + global_offset_c;
int y = cr_offset + j / 2 + global_offset_r;
pts[0] = (double)x;
pts[1] = (double)y;
pts[0] = (x * 8);
pts[1] = (y * 8);
calc_projection_samples(mbmi,
#if CONFIG_GLOBAL_MOTION
xd,
@ -1221,8 +1224,8 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
int x = cc_offset + j % 2 + global_offset_c;
int y = cr_offset + j / 2 + global_offset_r;
pts[0] = (double)x;
pts[1] = (double)y;
pts[0] = (x * 8);
pts[1] = (y * 8);
calc_projection_samples(mbmi,
#if CONFIG_GLOBAL_MOTION
xd,
@ -1260,8 +1263,8 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
int x = cc_offset + j % 2 + global_offset_c;
int y = cr_offset + j / 2 + global_offset_r;
pts[0] = (double)x;
pts[1] = (double)y;
pts[0] = (x * 8);
pts[1] = (y * 8);
calc_projection_samples(mbmi,
#if CONFIG_GLOBAL_MOTION
xd,
@ -1298,11 +1301,13 @@ int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
int r_offset = j / 2;
int c_offset = j % 2;
pts[0] = (double)(cc_offset + c_offset + global_offset_c);
pts[1] = (double)(cr_offset + r_offset + global_offset_r);
int x = (cc_offset + c_offset + global_offset_c);
int y = (cr_offset + r_offset + global_offset_r);
pts_inref[0] = pts[0] + (double)(mv_col)*0.125;
pts_inref[1] = pts[1] + (double)(mv_row)*0.125;
pts[0] = (x * 8);
pts[1] = (y * 8);
pts_inref[0] = pts[0] + mv_col;
pts_inref[1] = pts[1] + mv_row;
pts += 2;
pts_inref += 2;

Просмотреть файл

@ -515,7 +515,7 @@ void av1_update_mv_context(const MACROBLOCKD *xd, MODE_INFO *mi,
#if CONFIG_WARPED_MOTION
int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
double *pts, double *pts_inref);
int *pts, int *pts_inref);
#endif // CONFIG_WARPED_MOTION
#ifdef __cplusplus

Просмотреть файл

@ -1213,826 +1213,147 @@ void av1_warp_plane(WarpedMotionParams *wm,
y_scale, ref_frm);
}
void av1_integerize_model(const double *model, TransformationType wmtype,
WarpedMotionParams *wm) {
wm->wmtype = wmtype;
switch (wmtype) {
case HOMOGRAPHY:
assert(fabs(model[8] - 1.0) < 1e-12);
wm->wmmat[6] =
(int32_t)lrint(model[6] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
wm->wmmat[7] =
(int32_t)lrint(model[7] * (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
/* fallthrough intended */
case AFFINE:
wm->wmmat[4] = (int32_t)lrint(model[4] * (1 << WARPEDMODEL_PREC_BITS));
wm->wmmat[5] = (int32_t)lrint(model[5] * (1 << WARPEDMODEL_PREC_BITS));
/* fallthrough intended */
case ROTZOOM:
wm->wmmat[2] = (int32_t)lrint(model[2] * (1 << WARPEDMODEL_PREC_BITS));
wm->wmmat[3] = (int32_t)lrint(model[3] * (1 << WARPEDMODEL_PREC_BITS));
/* fallthrough intended */
case TRANSLATION:
wm->wmmat[0] = (int32_t)lrint(model[0] * (1 << WARPEDMODEL_PREC_BITS));
wm->wmmat[1] = (int32_t)lrint(model[1] * (1 << WARPEDMODEL_PREC_BITS));
break;
default: assert(0 && "Invalid TransformationType");
}
}
///////////////////////////////////////////////////////////////////////////////
// svdcmp
// Adopted from Numerical Recipes in C
static const double TINY_NEAR_ZERO = 1.0E-12;
static INLINE double sign(double a, double b) {
return ((b) >= 0 ? fabs(a) : -fabs(a));
}
static INLINE double pythag(double a, double b) {
double ct;
const double absa = fabs(a);
const double absb = fabs(b);
if (absa > absb) {
ct = absb / absa;
return absa * sqrt(1.0 + ct * ct);
} else {
ct = absa / absb;
return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
}
}
static void multiply_mat(const double *m1, const double *m2, double *res,
const int m1_rows, const int inner_dim,
const int m2_cols) {
double sum;
int row, col, inner;
for (row = 0; row < m1_rows; ++row) {
for (col = 0; col < m2_cols; ++col) {
sum = 0;
for (inner = 0; inner < inner_dim; ++inner)
sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
*(res++) = sum;
}
}
}
static int svdcmp(double **u, int m, int n, double w[], double **v) {
const int max_its = 30;
int flag, i, its, j, jj, k, l, nm;
double anorm, c, f, g, h, s, scale, x, y, z;
double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
g = scale = anorm = 0.0;
for (i = 0; i < n; i++) {
l = i + 1;
rv1[i] = scale * g;
g = s = scale = 0.0;
if (i < m) {
for (k = i; k < m; k++) scale += fabs(u[k][i]);
if (scale != 0.) {
for (k = i; k < m; k++) {
u[k][i] /= scale;
s += u[k][i] * u[k][i];
}
f = u[i][i];
g = -sign(sqrt(s), f);
h = f * g - s;
u[i][i] = f - g;
for (j = l; j < n; j++) {
for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
f = s / h;
for (k = i; k < m; k++) u[k][j] += f * u[k][i];
}
for (k = i; k < m; k++) u[k][i] *= scale;
}
}
w[i] = scale * g;
g = s = scale = 0.0;
if (i < m && i != n - 1) {
for (k = l; k < n; k++) scale += fabs(u[i][k]);
if (scale != 0.) {
for (k = l; k < n; k++) {
u[i][k] /= scale;
s += u[i][k] * u[i][k];
}
f = u[i][l];
g = -sign(sqrt(s), f);
h = f * g - s;
u[i][l] = f - g;
for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
for (j = l; j < m; j++) {
for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
for (k = l; k < n; k++) u[j][k] += s * rv1[k];
}
for (k = l; k < n; k++) u[i][k] *= scale;
}
}
anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
}
for (i = n - 1; i >= 0; i--) {
if (i < n - 1) {
if (g != 0.) {
for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
for (j = l; j < n; j++) {
for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
for (k = l; k < n; k++) v[k][j] += s * v[k][i];
}
}
for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
}
v[i][i] = 1.0;
g = rv1[i];
l = i;
}
for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
l = i + 1;
g = w[i];
for (j = l; j < n; j++) u[i][j] = 0.0;
if (g != 0.) {
g = 1.0 / g;
for (j = l; j < n; j++) {
for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
f = (s / u[i][i]) * g;
for (k = i; k < m; k++) u[k][j] += f * u[k][i];
}
for (j = i; j < m; j++) u[j][i] *= g;
} else {
for (j = i; j < m; j++) u[j][i] = 0.0;
}
++u[i][i];
}
for (k = n - 1; k >= 0; k--) {
for (its = 0; its < max_its; its++) {
flag = 1;
for (l = k; l >= 0; l--) {
nm = l - 1;
if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
flag = 0;
break;
}
if ((double)(fabs(w[nm]) + anorm) == anorm) break;
}
if (flag) {
c = 0.0;
s = 1.0;
for (i = l; i <= k; i++) {
f = s * rv1[i];
rv1[i] = c * rv1[i];
if ((double)(fabs(f) + anorm) == anorm) break;
g = w[i];
h = pythag(f, g);
w[i] = h;
h = 1.0 / h;
c = g * h;
s = -f * h;
for (j = 0; j < m; j++) {
y = u[j][nm];
z = u[j][i];
u[j][nm] = y * c + z * s;
u[j][i] = z * c - y * s;
}
}
}
z = w[k];
if (l == k) {
if (z < 0.0) {
w[k] = -z;
for (j = 0; j < n; j++) v[j][k] = -v[j][k];
}
break;
}
if (its == max_its - 1) {
aom_free(rv1);
return 1;
}
assert(k > 0);
x = w[l];
nm = k - 1;
y = w[nm];
g = rv1[nm];
h = rv1[k];
f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
g = pythag(f, 1.0);
f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
c = s = 1.0;
for (j = l; j <= nm; j++) {
i = j + 1;
g = rv1[i];
y = w[i];
h = s * g;
g = c * g;
z = pythag(f, h);
rv1[j] = z;
c = f / z;
s = h / z;
f = x * c + g * s;
g = g * c - x * s;
h = y * s;
y *= c;
for (jj = 0; jj < n; jj++) {
x = v[jj][j];
z = v[jj][i];
v[jj][j] = x * c + z * s;
v[jj][i] = z * c - x * s;
}
z = pythag(f, h);
w[j] = z;
if (z != 0.) {
z = 1.0 / z;
c = f * z;
s = h * z;
}
f = c * g + s * y;
x = c * y - s * g;
for (jj = 0; jj < m; jj++) {
y = u[jj][j];
z = u[jj][i];
u[jj][j] = y * c + z * s;
u[jj][i] = z * c - y * s;
}
}
rv1[l] = 0.0;
rv1[k] = f;
w[k] = x;
}
}
aom_free(rv1);
return 0;
}
static int SVD(double *U, double *W, double *V, double *matx, int M, int N) {
// Assumes allocation for U is MxN
double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
int problem, i;
problem = !(nrU && nrV);
if (!problem) {
for (i = 0; i < M; i++) {
nrU[i] = &U[i * N];
}
for (i = 0; i < N; i++) {
nrV[i] = &V[i * N];
}
} else {
if (nrU) aom_free(nrU);
if (nrV) aom_free(nrV);
return 1;
}
/* copy from given matx into nrU */
for (i = 0; i < M; i++) {
memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
}
/* HERE IT IS: do SVD */
if (svdcmp(nrU, M, N, W, nrV)) {
aom_free(nrU);
aom_free(nrV);
return 1;
}
/* aom_free Numerical Recipes arrays */
aom_free(nrU);
aom_free(nrV);
return 0;
}
int pseudo_inverse(double *inv, double *matx, const int M, const int N) {
double ans;
int i, j, k;
double *const U = (double *)aom_malloc(M * N * sizeof(*matx));
double *const W = (double *)aom_malloc(N * sizeof(*matx));
double *const V = (double *)aom_malloc(N * N * sizeof(*matx));
if (!(U && W && V)) {
return 1;
}
if (SVD(U, W, V, matx, M, N)) {
aom_free(U);
aom_free(W);
aom_free(V);
return 1;
}
for (i = 0; i < N; i++) {
if (fabs(W[i]) < TINY_NEAR_ZERO) {
aom_free(U);
aom_free(W);
aom_free(V);
return 1;
}
}
for (i = 0; i < N; i++) {
for (j = 0; j < M; j++) {
ans = 0;
for (k = 0; k < N; k++) {
ans += V[k + N * i] * U[k + N * j] / W[k];
}
inv[j + M * i] = ans;
}
}
aom_free(U);
aom_free(W);
aom_free(V);
return 0;
}
static void normalize_homography(double *pts, int n, double *T) {
double *p = pts;
double mean[2] = { 0, 0 };
double msqe = 0;
double scale;
int i;
for (i = 0; i < n; ++i, p += 2) {
mean[0] += p[0];
mean[1] += p[1];
}
mean[0] /= n;
mean[1] /= n;
for (p = pts, i = 0; i < n; ++i, p += 2) {
p[0] -= mean[0];
p[1] -= mean[1];
msqe += sqrt(p[0] * p[0] + p[1] * p[1]);
}
msqe /= n;
scale = sqrt(2) / msqe;
T[0] = scale;
T[1] = 0;
T[2] = -scale * mean[0];
T[3] = 0;
T[4] = scale;
T[5] = -scale * mean[1];
T[6] = 0;
T[7] = 0;
T[8] = 1;
for (p = pts, i = 0; i < n; ++i, p += 2) {
p[0] *= scale;
p[1] *= scale;
}
}
static void invnormalize_mat(double *T, double *iT) {
double is = 1.0 / T[0];
double m0 = -T[2] * is;
double m1 = -T[5] * is;
iT[0] = is;
iT[1] = 0;
iT[2] = m0;
iT[3] = 0;
iT[4] = is;
iT[5] = m1;
iT[6] = 0;
iT[7] = 0;
iT[8] = 1;
}
static void denormalize_homography(double *params, double *T1, double *T2) {
double iT2[9];
double params2[9];
invnormalize_mat(T2, iT2);
multiply_mat(params, T1, params2, 3, 3, 3);
multiply_mat(iT2, params2, params, 3, 3, 3);
}
static void denormalize_homography_reorder(double *params, double *T1,
double *T2) {
double params_denorm[MAX_PARAMDIM];
memcpy(params_denorm, params, sizeof(*params) * 8);
params_denorm[8] = 1.0;
denormalize_homography(params_denorm, T1, T2);
params[0] = params_denorm[2];
params[1] = params_denorm[5];
params[2] = params_denorm[0];
params[3] = params_denorm[1];
params[4] = params_denorm[3];
params[5] = params_denorm[4];
params[6] = params_denorm[6];
params[7] = params_denorm[7];
}
static void denormalize_affine_reorder(double *params, double *T1, double *T2) {
double params_denorm[MAX_PARAMDIM];
params_denorm[0] = params[0];
params_denorm[1] = params[1];
params_denorm[2] = params[4];
params_denorm[3] = params[2];
params_denorm[4] = params[3];
params_denorm[5] = params[5];
params_denorm[6] = params_denorm[7] = 0;
params_denorm[8] = 1;
denormalize_homography(params_denorm, T1, T2);
params[0] = params_denorm[2];
params[1] = params_denorm[5];
params[2] = params_denorm[0];
params[3] = params_denorm[1];
params[4] = params_denorm[3];
params[5] = params_denorm[4];
params[6] = params[7] = 0;
}
static void denormalize_rotzoom_reorder(double *params, double *T1,
double *T2) {
double params_denorm[MAX_PARAMDIM];
params_denorm[0] = params[0];
params_denorm[1] = params[1];
params_denorm[2] = params[2];
params_denorm[3] = -params[1];
params_denorm[4] = params[0];
params_denorm[5] = params[3];
params_denorm[6] = params_denorm[7] = 0;
params_denorm[8] = 1;
denormalize_homography(params_denorm, T1, T2);
params[0] = params_denorm[2];
params[1] = params_denorm[5];
params[2] = params_denorm[0];
params[3] = params_denorm[1];
params[4] = -params[3];
params[5] = params[2];
params[6] = params[7] = 0;
}
static void denormalize_translation_reorder(double *params, double *T1,
double *T2) {
double params_denorm[MAX_PARAMDIM];
params_denorm[0] = 1;
params_denorm[1] = 0;
params_denorm[2] = params[0];
params_denorm[3] = 0;
params_denorm[4] = 1;
params_denorm[5] = params[1];
params_denorm[6] = params_denorm[7] = 0;
params_denorm[8] = 1;
denormalize_homography(params_denorm, T1, T2);
params[0] = params_denorm[2];
params[1] = params_denorm[5];
params[2] = params[5] = 1;
params[3] = params[4] = 0;
params[6] = params[7] = 0;
}
int find_translation(const int np, double *pts1, double *pts2, double *mat) {
int i;
double sx, sy, dx, dy;
double sumx, sumy;
double T1[9], T2[9];
normalize_homography(pts1, np, T1);
normalize_homography(pts2, np, T2);
sumx = 0;
sumy = 0;
for (i = 0; i < np; ++i) {
dx = *(pts2++);
dy = *(pts2++);
sx = *(pts1++);
sy = *(pts1++);
sumx += dx - sx;
sumy += dy - sy;
}
mat[0] = sumx / np;
mat[1] = sumy / np;
denormalize_translation_reorder(mat, T1, T2);
return 0;
}
int find_rotzoom(const int np, double *pts1, double *pts2, double *mat) {
const int np2 = np * 2;
double *a = (double *)aom_malloc(sizeof(*a) * np2 * 9);
double *b = a + np2 * 4;
double *temp = b + np2;
int i;
double sx, sy, dx, dy;
double T1[9], T2[9];
normalize_homography(pts1, np, T1);
normalize_homography(pts2, np, T2);
for (i = 0; i < np; ++i) {
dx = *(pts2++);
dy = *(pts2++);
sx = *(pts1++);
sy = *(pts1++);
a[i * 2 * 4 + 0] = sx;
a[i * 2 * 4 + 1] = sy;
a[i * 2 * 4 + 2] = 1;
a[i * 2 * 4 + 3] = 0;
a[(i * 2 + 1) * 4 + 0] = sy;
a[(i * 2 + 1) * 4 + 1] = -sx;
a[(i * 2 + 1) * 4 + 2] = 0;
a[(i * 2 + 1) * 4 + 3] = 1;
b[2 * i] = dx;
b[2 * i + 1] = dy;
}
if (pseudo_inverse(temp, a, np2, 4)) {
aom_free(a);
return 1;
}
multiply_mat(temp, b, mat, 4, np2, 1);
denormalize_rotzoom_reorder(mat, T1, T2);
aom_free(a);
return 0;
}
int find_affine(const int np, double *pts1, double *pts2, double *mat) {
const int np2 = np * 2;
double *a = (double *)aom_malloc(sizeof(*a) * np2 * 13);
double *b = a + np2 * 6;
double *temp = b + np2;
int i;
double sx, sy, dx, dy;
double T1[9], T2[9];
normalize_homography(pts1, np, T1);
normalize_homography(pts2, np, T2);
for (i = 0; i < np; ++i) {
dx = *(pts2++);
dy = *(pts2++);
sx = *(pts1++);
sy = *(pts1++);
a[i * 2 * 6 + 0] = sx;
a[i * 2 * 6 + 1] = sy;
a[i * 2 * 6 + 2] = 0;
a[i * 2 * 6 + 3] = 0;
a[i * 2 * 6 + 4] = 1;
a[i * 2 * 6 + 5] = 0;
a[(i * 2 + 1) * 6 + 0] = 0;
a[(i * 2 + 1) * 6 + 1] = 0;
a[(i * 2 + 1) * 6 + 2] = sx;
a[(i * 2 + 1) * 6 + 3] = sy;
a[(i * 2 + 1) * 6 + 4] = 0;
a[(i * 2 + 1) * 6 + 5] = 1;
b[2 * i] = dx;
b[2 * i + 1] = dy;
}
if (pseudo_inverse(temp, a, np2, 6)) {
aom_free(a);
return 1;
}
multiply_mat(temp, b, mat, 6, np2, 1);
denormalize_affine_reorder(mat, T1, T2);
aom_free(a);
return 0;
}
int find_vertrapezoid(const int np, double *pts1, double *pts2, double *mat) {
const int np3 = np * 3;
double *a = (double *)aom_malloc(sizeof(*a) * np3 * 14);
double *U = a + np3 * 7;
double S[7], V[7 * 7], H[9];
int i, mini;
double sx, sy, dx, dy;
double T1[9], T2[9];
normalize_homography(pts1, np, T1);
normalize_homography(pts2, np, T2);
for (i = 0; i < np; ++i) {
dx = *(pts2++);
dy = *(pts2++);
sx = *(pts1++);
sy = *(pts1++);
a[i * 3 * 7 + 0] = a[i * 3 * 7 + 1] = 0;
a[i * 3 * 7 + 2] = -sx;
a[i * 3 * 7 + 3] = -sy;
a[i * 3 * 7 + 4] = -1;
a[i * 3 * 7 + 5] = dy * sx;
a[i * 3 * 7 + 6] = dy;
a[(i * 3 + 1) * 7 + 0] = sx;
a[(i * 3 + 1) * 7 + 1] = 1;
a[(i * 3 + 1) * 7 + 2] = a[(i * 3 + 1) * 7 + 3] = a[(i * 3 + 1) * 7 + 4] =
0;
a[(i * 3 + 1) * 7 + 5] = -dx * sx;
a[(i * 3 + 1) * 7 + 6] = -dx;
a[(i * 3 + 2) * 7 + 0] = -dy * sx;
a[(i * 3 + 2) * 7 + 1] = -dy;
a[(i * 3 + 2) * 7 + 2] = dx * sx;
a[(i * 3 + 2) * 7 + 3] = dx * sy;
a[(i * 3 + 2) * 7 + 4] = dx;
a[(i * 3 + 2) * 7 + 5] = a[(i * 3 + 2) * 7 + 6] = 0;
}
if (SVD(U, S, V, a, np3, 7)) {
aom_free(a);
return 1;
} else {
double minS = 1e12;
mini = -1;
for (i = 0; i < 7; ++i) {
if (S[i] < minS) {
minS = S[i];
mini = i;
}
}
}
H[1] = H[7] = 0;
for (i = 0; i < 1; i++) H[i] = V[i * 7 + mini];
for (; i < 6; i++) H[i + 1] = V[i * 7 + mini];
for (; i < 7; i++) H[i + 2] = V[i * 7 + mini];
denormalize_homography_reorder(H, T1, T2);
aom_free(a);
if (H[8] == 0.0) {
return 1;
} else {
// normalize
double f = 1.0 / H[8];
for (i = 0; i < 8; i++) mat[i] = f * H[i];
}
return 0;
}
int find_hortrapezoid(const int np, double *pts1, double *pts2, double *mat) {
const int np3 = np * 3;
double *a = (double *)aom_malloc(sizeof(*a) * np3 * 14);
double *U = a + np3 * 7;
double S[7], V[7 * 7], H[9];
int i, mini;
double sx, sy, dx, dy;
double T1[9], T2[9];
normalize_homography(pts1, np, T1);
normalize_homography(pts2, np, T2);
for (i = 0; i < np; ++i) {
dx = *(pts2++);
dy = *(pts2++);
sx = *(pts1++);
sy = *(pts1++);
a[i * 3 * 7 + 0] = a[i * 3 * 7 + 1] = a[i * 3 * 7 + 2] = 0;
a[i * 3 * 7 + 3] = -sy;
a[i * 3 * 7 + 4] = -1;
a[i * 3 * 7 + 5] = dy * sy;
a[i * 3 * 7 + 6] = dy;
a[(i * 3 + 1) * 7 + 0] = sx;
a[(i * 3 + 1) * 7 + 1] = sy;
a[(i * 3 + 1) * 7 + 2] = 1;
a[(i * 3 + 1) * 7 + 3] = a[(i * 3 + 1) * 7 + 4] = 0;
a[(i * 3 + 1) * 7 + 5] = -dx * sy;
a[(i * 3 + 1) * 7 + 6] = -dx;
a[(i * 3 + 2) * 7 + 0] = -dy * sx;
a[(i * 3 + 2) * 7 + 1] = -dy * sy;
a[(i * 3 + 2) * 7 + 2] = -dy;
a[(i * 3 + 2) * 7 + 3] = dx * sy;
a[(i * 3 + 2) * 7 + 4] = dx;
a[(i * 3 + 2) * 7 + 5] = a[(i * 3 + 2) * 7 + 6] = 0;
}
if (SVD(U, S, V, a, np3, 7)) {
aom_free(a);
return 1;
} else {
double minS = 1e12;
mini = -1;
for (i = 0; i < 7; ++i) {
if (S[i] < minS) {
minS = S[i];
mini = i;
}
}
}
H[3] = H[6] = 0;
for (i = 0; i < 3; i++) H[i] = V[i * 7 + mini];
for (; i < 5; i++) H[i + 1] = V[i * 7 + mini];
for (; i < 7; i++) H[i + 2] = V[i * 7 + mini];
denormalize_homography_reorder(H, T1, T2);
aom_free(a);
if (H[8] == 0.0) {
return 1;
} else {
// normalize
double f = 1.0 / H[8];
for (i = 0; i < 8; i++) mat[i] = f * H[i];
}
return 0;
}
int find_homography(const int np, double *pts1, double *pts2, double *mat) {
// Implemented from Peter Kovesi's normalized implementation
const int np3 = np * 3;
double *a = (double *)aom_malloc(sizeof(*a) * np3 * 18);
double *U = a + np3 * 9;
double S[9], V[9 * 9], H[9];
int i, mini;
double sx, sy, dx, dy;
double T1[9], T2[9];
normalize_homography(pts1, np, T1);
normalize_homography(pts2, np, T2);
for (i = 0; i < np; ++i) {
dx = *(pts2++);
dy = *(pts2++);
sx = *(pts1++);
sy = *(pts1++);
a[i * 3 * 9 + 0] = a[i * 3 * 9 + 1] = a[i * 3 * 9 + 2] = 0;
a[i * 3 * 9 + 3] = -sx;
a[i * 3 * 9 + 4] = -sy;
a[i * 3 * 9 + 5] = -1;
a[i * 3 * 9 + 6] = dy * sx;
a[i * 3 * 9 + 7] = dy * sy;
a[i * 3 * 9 + 8] = dy;
a[(i * 3 + 1) * 9 + 0] = sx;
a[(i * 3 + 1) * 9 + 1] = sy;
a[(i * 3 + 1) * 9 + 2] = 1;
a[(i * 3 + 1) * 9 + 3] = a[(i * 3 + 1) * 9 + 4] = a[(i * 3 + 1) * 9 + 5] =
0;
a[(i * 3 + 1) * 9 + 6] = -dx * sx;
a[(i * 3 + 1) * 9 + 7] = -dx * sy;
a[(i * 3 + 1) * 9 + 8] = -dx;
a[(i * 3 + 2) * 9 + 0] = -dy * sx;
a[(i * 3 + 2) * 9 + 1] = -dy * sy;
a[(i * 3 + 2) * 9 + 2] = -dy;
a[(i * 3 + 2) * 9 + 3] = dx * sx;
a[(i * 3 + 2) * 9 + 4] = dx * sy;
a[(i * 3 + 2) * 9 + 5] = dx;
a[(i * 3 + 2) * 9 + 6] = a[(i * 3 + 2) * 9 + 7] = a[(i * 3 + 2) * 9 + 8] =
0;
}
if (SVD(U, S, V, a, np3, 9)) {
aom_free(a);
return 1;
} else {
double minS = 1e12;
mini = -1;
for (i = 0; i < 9; ++i) {
if (S[i] < minS) {
minS = S[i];
mini = i;
}
}
}
for (i = 0; i < 9; i++) H[i] = V[i * 9 + mini];
denormalize_homography_reorder(H, T1, T2);
aom_free(a);
if (H[8] == 0.0) {
return 1;
} else {
// normalize
double f = 1.0 / H[8];
for (i = 0; i < 8; i++) mat[i] = f * H[i];
}
return 0;
}
#if CONFIG_WARPED_MOTION
int find_projection(const int np, double *pts1, double *pts2,
WarpedMotionParams *wm_params) {
double H[9];
int result = 1;
#define IDET_PREC_BITS 48
#define IDET_WARPEDMODEL_DIFF_BITS (IDET_PREC_BITS - WARPEDMODEL_PREC_BITS)
static int find_affine_int(const int np, int *pts1, int *pts2,
WarpedMotionParams *wm) {
int64_t A[3][3] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
int64_t C[3][3] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
int64_t Bx[3] = { 0, 0, 0 };
int64_t By[3] = { 0, 0, 0 };
int64_t Px[3], Py[3];
int64_t Det, iDet;
int i, off;
// Offsets to make the values in the arrays smaller
const int ux = pts1[0], uy = pts1[1];
// Let source points (xi, yi) map to destimation points (xi', yi'),
// for i = 0, 1, 2, .... n-1
// Then if P = [x0, y0, 1,
// x1, y1, 1
// x2, y2, 1,
// ....
// ]
// q = [x0', x1', x2', ... ]'
// r = [y0', y1', y2', ... ]'
// the least squares problems that need to be solved are:
// [h1, h2, dx]' = inv(P'P)P'q and
// [h3, h4, dy]' = inv(P'P)P'r
// where the affine transformation is given by:
// x' = h1.x + h2.y + dx
// y' = h3.x + h4.y + dy
//
// The loop below computes: A = P'P, Bx = P'q, By = P'r
for (i = 0; i < np; ++i) {
const int dx = *(pts2++) - ux;
const int dy = *(pts2++) - uy;
const int sx = *(pts1++) - ux;
const int sy = *(pts1++) - uy;
A[0][0] += sx * sx;
A[0][1] += sx * sy;
A[0][2] += sx;
A[1][1] += sy * sy;
A[1][2] += sy;
A[2][2] += 1;
Bx[0] += sx * dx;
Bx[1] += sy * dx;
Bx[2] += dx;
By[0] += sx * dy;
By[1] += sy * dy;
By[2] += dy;
}
// Compute Cofactors of A
C[0][0] = A[1][1] * A[2][2] - A[1][2] * A[1][2];
C[0][1] = A[1][2] * A[0][2] - A[0][1] * A[2][2];
C[0][2] = A[0][1] * A[1][2] - A[0][2] * A[1][1];
C[1][1] = A[0][0] * A[2][2] - A[0][2] * A[0][2];
C[1][2] = A[0][1] * A[0][2] - A[0][0] * A[1][2];
C[2][2] = A[0][0] * A[1][1] - A[0][1] * A[0][1];
// Compute Determinant of A
Det = C[0][0] * A[0][0] + C[0][1] * A[0][1] + C[0][2] * A[0][2];
// These are the least squares solutions but need scaling.
Px[0] = C[0][0] * Bx[0] + C[0][1] * Bx[1] + C[0][2] * Bx[2];
Px[1] = C[0][1] * Bx[0] + C[1][1] * Bx[1] + C[1][2] * Bx[2];
Px[2] = C[0][2] * Bx[0] + C[1][2] * Bx[1] + C[2][2] * Bx[2];
Py[0] = C[0][0] * By[0] + C[0][1] * By[1] + C[0][2] * By[2];
Py[1] = C[0][1] * By[0] + C[1][1] * By[1] + C[1][2] * By[2];
Py[2] = C[0][2] * By[0] + C[1][2] * By[1] + C[2][2] * By[2];
// Scale by 1/16
Px[0] = ROUND_POWER_OF_TWO_SIGNED(Px[0], 4);
Px[1] = ROUND_POWER_OF_TWO_SIGNED(Px[1], 4);
Px[2] = ROUND_POWER_OF_TWO_SIGNED(Px[2], 4);
Py[0] = ROUND_POWER_OF_TWO_SIGNED(Py[0], 4);
Py[1] = ROUND_POWER_OF_TWO_SIGNED(Py[1], 4);
Py[2] = ROUND_POWER_OF_TWO_SIGNED(Py[2], 4);
Det = ROUND_POWER_OF_TWO_SIGNED(Det, 4);
if (Det == 0) return 1;
// Compute inverse of the Determinant
// TODO(debargha, yuec): Try to remove this only division
iDet = ((int64_t)1 << IDET_PREC_BITS) / Det;
wm->wmmat[2] = ((int64_t)Px[0] * (int64_t)iDet +
((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS - 1))) >>
IDET_WARPEDMODEL_DIFF_BITS;
wm->wmmat[3] = ((int64_t)Px[1] * (int64_t)iDet +
((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS - 1))) >>
IDET_WARPEDMODEL_DIFF_BITS;
wm->wmmat[0] = ((int64_t)Px[2] * (int64_t)iDet +
((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS + 2))) >>
(IDET_WARPEDMODEL_DIFF_BITS + 3);
// Adjust x displacement for the offset
off = (ux << WARPEDMODEL_PREC_BITS) - ux * wm->wmmat[2] - uy * wm->wmmat[3];
wm->wmmat[0] += ROUND_POWER_OF_TWO_SIGNED(off, 3);
wm->wmmat[4] = ((int64_t)Py[0] * (int64_t)iDet +
((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS - 1))) >>
IDET_WARPEDMODEL_DIFF_BITS;
wm->wmmat[5] = ((int64_t)Py[1] * (int64_t)iDet +
((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS - 1))) >>
IDET_WARPEDMODEL_DIFF_BITS;
wm->wmmat[1] = ((int64_t)Py[2] * (int64_t)iDet +
((int64_t)1 << (IDET_WARPEDMODEL_DIFF_BITS + 2))) >>
(IDET_WARPEDMODEL_DIFF_BITS + 3);
// Adjust y displacement for the offset
off = (uy << WARPEDMODEL_PREC_BITS) - ux * wm->wmmat[4] - uy * wm->wmmat[5];
wm->wmmat[1] += ROUND_POWER_OF_TWO_SIGNED(off, 3);
wm->wmmat[6] = wm->wmmat[7] = 0;
return 0;
}
int find_projection(const int np, int *pts1, int *pts2,
WarpedMotionParams *wm_params) {
int result = 1;
switch (wm_params->wmtype) {
case AFFINE: result = find_affine(np, pts1, pts2, H); break;
case ROTZOOM: result = find_rotzoom(np, pts1, pts2, H); break;
case HOMOGRAPHY: result = find_homography(np, pts1, pts2, H); break;
case AFFINE: result = find_affine_int(np, pts1, pts2, wm_params); break;
default: assert(0 && "Invalid warped motion type!"); return 1;
}
if (result == 0) {
av1_integerize_model(H, wm_params->wmtype, wm_params);
if (wm_params->wmtype == ROTZOOM) {
wm_params->wmmat[5] = wm_params->wmmat[2];
wm_params->wmmat[4] = -wm_params->wmmat[3];
}
}
if (wm_params->wmtype == AFFINE || wm_params->wmtype == ROTZOOM) {
// check compatibility with the fast warp filter
int32_t *mat = wm_params->wmmat;
int32_t alpha, beta, gamma, delta;
if (wm_params->wmtype == AFFINE || wm_params->wmtype == ROTZOOM) {
// check compatibility with the fast warp filter
int32_t *mat = wm_params->wmmat;
int32_t alpha, beta, gamma, delta;
if (mat[2] == 0) return 1;
if (mat[2] == 0) return 1;
alpha = mat[2] - (1 << WARPEDMODEL_PREC_BITS);
beta = mat[3];
gamma = ((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) / mat[2];
delta = mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
(1 << WARPEDMODEL_PREC_BITS);
alpha = mat[2] - (1 << WARPEDMODEL_PREC_BITS);
beta = mat[3];
gamma = ((int64_t)mat[4] << WARPEDMODEL_PREC_BITS) / mat[2];
delta = mat[5] - (((int64_t)mat[3] * mat[4] + (mat[2] / 2)) / mat[2]) -
(1 << WARPEDMODEL_PREC_BITS);
if ((4 * abs(alpha) + 7 * abs(beta) > (1 << WARPEDMODEL_PREC_BITS)) ||
(4 * abs(gamma) + 4 * abs(delta) > (1 << WARPEDMODEL_PREC_BITS))) {
return 1;
if ((4 * abs(alpha) + 7 * abs(beta) > (1 << WARPEDMODEL_PREC_BITS)) ||
(4 * abs(gamma) + 4 * abs(delta) > (1 << WARPEDMODEL_PREC_BITS))) {
return 1;
}
}
}

Просмотреть файл

@ -27,7 +27,7 @@
#if CONFIG_WARPED_MOTION
#define SAMPLES_PER_NEIGHBOR 4
#define SAMPLES_ARRAY_SIZE ((2 * MAX_MIB_SIZE + 2) * SAMPLES_PER_NEIGHBOR * 2)
#define DEFAULT_WMTYPE ROTZOOM
#define DEFAULT_WMTYPE AFFINE
#endif // CONFIG_WARPED_MOTION
const int16_t warped_filter[WARPEDPIXEL_PREC_SHIFTS * 3][8];
@ -86,16 +86,6 @@ void av1_warp_plane(WarpedMotionParams *wm,
int p_height, int p_stride, int subsampling_x,
int subsampling_y, int x_scale, int y_scale, int ref_frm);
// Integerize model into the WarpedMotionParams structure
void av1_integerize_model(const double *model, TransformationType wmtype,
WarpedMotionParams *wm);
int find_translation(const int np, double *pts1, double *pts2, double *mat);
int find_rotzoom(const int np, double *pts1, double *pts2, double *mat);
int find_affine(const int np, double *pts1, double *pts2, double *mat);
int find_hortrapezoid(const int np, double *pts1, double *pts2, double *mat);
int find_vertrapezoid(const int np, double *pts1, double *pts2, double *mat);
int find_homography(const int np, double *pts1, double *pts2, double *mat);
int find_projection(const int np, double *pts1, double *pts2,
int find_projection(const int np, int *pts1, int *pts2,
WarpedMotionParams *wm_params);
#endif // AV1_COMMON_WARPED_MOTION_H_

Просмотреть файл

@ -1577,7 +1577,7 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
#endif // CONFIG_REF_MV && CONFIG_EXT_INTER
int16_t mode_ctx = 0;
#if CONFIG_WARPED_MOTION
double pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
#endif // CONFIG_WARPED_MOTION
#if CONFIG_EC_ADAPT
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;

Просмотреть файл

@ -133,6 +133,767 @@ static void project_points_double_homography(double *mat, double *points,
}
}
///////////////////////////////////////////////////////////////////////////////
// svdcmp
// Adopted from Numerical Recipes in C
static const double TINY_NEAR_ZERO = 1.0E-12;
static INLINE double sign(double a, double b) {
return ((b) >= 0 ? fabs(a) : -fabs(a));
}
static INLINE double pythag(double a, double b) {
double ct;
const double absa = fabs(a);
const double absb = fabs(b);
if (absa > absb) {
ct = absb / absa;
return absa * sqrt(1.0 + ct * ct);
} else {
ct = absa / absb;
return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct);
}
}
static void multiply_mat(const double *m1, const double *m2, double *res,
const int m1_rows, const int inner_dim,
const int m2_cols) {
double sum;
int row, col, inner;
for (row = 0; row < m1_rows; ++row) {
for (col = 0; col < m2_cols; ++col) {
sum = 0;
for (inner = 0; inner < inner_dim; ++inner)
sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col];
*(res++) = sum;
}
}
}
static int svdcmp(double **u, int m, int n, double w[], double **v) {
const int max_its = 30;
int flag, i, its, j, jj, k, l, nm;
double anorm, c, f, g, h, s, scale, x, y, z;
double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1));
g = scale = anorm = 0.0;
for (i = 0; i < n; i++) {
l = i + 1;
rv1[i] = scale * g;
g = s = scale = 0.0;
if (i < m) {
for (k = i; k < m; k++) scale += fabs(u[k][i]);
if (scale != 0.) {
for (k = i; k < m; k++) {
u[k][i] /= scale;
s += u[k][i] * u[k][i];
}
f = u[i][i];
g = -sign(sqrt(s), f);
h = f * g - s;
u[i][i] = f - g;
for (j = l; j < n; j++) {
for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j];
f = s / h;
for (k = i; k < m; k++) u[k][j] += f * u[k][i];
}
for (k = i; k < m; k++) u[k][i] *= scale;
}
}
w[i] = scale * g;
g = s = scale = 0.0;
if (i < m && i != n - 1) {
for (k = l; k < n; k++) scale += fabs(u[i][k]);
if (scale != 0.) {
for (k = l; k < n; k++) {
u[i][k] /= scale;
s += u[i][k] * u[i][k];
}
f = u[i][l];
g = -sign(sqrt(s), f);
h = f * g - s;
u[i][l] = f - g;
for (k = l; k < n; k++) rv1[k] = u[i][k] / h;
for (j = l; j < m; j++) {
for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k];
for (k = l; k < n; k++) u[j][k] += s * rv1[k];
}
for (k = l; k < n; k++) u[i][k] *= scale;
}
}
anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i])));
}
for (i = n - 1; i >= 0; i--) {
if (i < n - 1) {
if (g != 0.) {
for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g;
for (j = l; j < n; j++) {
for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j];
for (k = l; k < n; k++) v[k][j] += s * v[k][i];
}
}
for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0;
}
v[i][i] = 1.0;
g = rv1[i];
l = i;
}
for (i = AOMMIN(m, n) - 1; i >= 0; i--) {
l = i + 1;
g = w[i];
for (j = l; j < n; j++) u[i][j] = 0.0;
if (g != 0.) {
g = 1.0 / g;
for (j = l; j < n; j++) {
for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j];
f = (s / u[i][i]) * g;
for (k = i; k < m; k++) u[k][j] += f * u[k][i];
}
for (j = i; j < m; j++) u[j][i] *= g;
} else {
for (j = i; j < m; j++) u[j][i] = 0.0;
}
++u[i][i];
}
for (k = n - 1; k >= 0; k--) {
for (its = 0; its < max_its; its++) {
flag = 1;
for (l = k; l >= 0; l--) {
nm = l - 1;
if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) {
flag = 0;
break;
}
if ((double)(fabs(w[nm]) + anorm) == anorm) break;
}
if (flag) {
c = 0.0;
s = 1.0;
for (i = l; i <= k; i++) {
f = s * rv1[i];
rv1[i] = c * rv1[i];
if ((double)(fabs(f) + anorm) == anorm) break;
g = w[i];
h = pythag(f, g);
w[i] = h;
h = 1.0 / h;
c = g * h;
s = -f * h;
for (j = 0; j < m; j++) {
y = u[j][nm];
z = u[j][i];
u[j][nm] = y * c + z * s;
u[j][i] = z * c - y * s;
}
}
}
z = w[k];
if (l == k) {
if (z < 0.0) {
w[k] = -z;
for (j = 0; j < n; j++) v[j][k] = -v[j][k];
}
break;
}
if (its == max_its - 1) {
aom_free(rv1);
return 1;
}
assert(k > 0);
x = w[l];
nm = k - 1;
y = w[nm];
g = rv1[nm];
h = rv1[k];
f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y);
g = pythag(f, 1.0);
f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x;
c = s = 1.0;
for (j = l; j <= nm; j++) {
i = j + 1;
g = rv1[i];
y = w[i];
h = s * g;
g = c * g;
z = pythag(f, h);
rv1[j] = z;
c = f / z;
s = h / z;
f = x * c + g * s;
g = g * c - x * s;
h = y * s;
y *= c;
for (jj = 0; jj < n; jj++) {
x = v[jj][j];
z = v[jj][i];
v[jj][j] = x * c + z * s;
v[jj][i] = z * c - x * s;
}
z = pythag(f, h);
w[j] = z;
if (z != 0.) {
z = 1.0 / z;
c = f * z;
s = h * z;
}
f = c * g + s * y;
x = c * y - s * g;
for (jj = 0; jj < m; jj++) {
y = u[jj][j];
z = u[jj][i];
u[jj][j] = y * c + z * s;
u[jj][i] = z * c - y * s;
}
}
rv1[l] = 0.0;
rv1[k] = f;
w[k] = x;
}
}
aom_free(rv1);
return 0;
}
static int SVD(double *U, double *W, double *V, double *matx, int M, int N) {
// Assumes allocation for U is MxN
double **nrU = (double **)aom_malloc((M) * sizeof(*nrU));
double **nrV = (double **)aom_malloc((N) * sizeof(*nrV));
int problem, i;
problem = !(nrU && nrV);
if (!problem) {
for (i = 0; i < M; i++) {
nrU[i] = &U[i * N];
}
for (i = 0; i < N; i++) {
nrV[i] = &V[i * N];
}
} else {
if (nrU) aom_free(nrU);
if (nrV) aom_free(nrV);
return 1;
}
/* copy from given matx into nrU */
for (i = 0; i < M; i++) {
memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx));
}
/* HERE IT IS: do SVD */
if (svdcmp(nrU, M, N, W, nrV)) {
aom_free(nrU);
aom_free(nrV);
return 1;
}
/* aom_free Numerical Recipes arrays */
aom_free(nrU);
aom_free(nrV);
return 0;
}
int pseudo_inverse(double *inv, double *matx, const int M, const int N) {
double ans;
int i, j, k;
double *const U = (double *)aom_malloc(M * N * sizeof(*matx));
double *const W = (double *)aom_malloc(N * sizeof(*matx));
double *const V = (double *)aom_malloc(N * N * sizeof(*matx));
if (!(U && W && V)) {
return 1;
}
if (SVD(U, W, V, matx, M, N)) {
aom_free(U);
aom_free(W);
aom_free(V);
return 1;
}
for (i = 0; i < N; i++) {
if (fabs(W[i]) < TINY_NEAR_ZERO) {
aom_free(U);
aom_free(W);
aom_free(V);
return 1;
}
}
for (i = 0; i < N; i++) {
for (j = 0; j < M; j++) {
ans = 0;
for (k = 0; k < N; k++) {
ans += V[k + N * i] * U[k + N * j] / W[k];
}
inv[j + M * i] = ans;
}
}
aom_free(U);
aom_free(W);
aom_free(V);
return 0;
}
static void normalize_homography(double *pts, int n, double *T) {
double *p = pts;
double mean[2] = { 0, 0 };
double msqe = 0;
double scale;
int i;
for (i = 0; i < n; ++i, p += 2) {
mean[0] += p[0];
mean[1] += p[1];
}
mean[0] /= n;
mean[1] /= n;
for (p = pts, i = 0; i < n; ++i, p += 2) {
p[0] -= mean[0];
p[1] -= mean[1];
msqe += sqrt(p[0] * p[0] + p[1] * p[1]);
}
msqe /= n;
scale = sqrt(2) / msqe;
T[0] = scale;
T[1] = 0;
T[2] = -scale * mean[0];
T[3] = 0;
T[4] = scale;
T[5] = -scale * mean[1];
T[6] = 0;
T[7] = 0;
T[8] = 1;
for (p = pts, i = 0; i < n; ++i, p += 2) {
p[0] *= scale;
p[1] *= scale;
}
}
static void invnormalize_mat(double *T, double *iT) {
double is = 1.0 / T[0];
double m0 = -T[2] * is;
double m1 = -T[5] * is;
iT[0] = is;
iT[1] = 0;
iT[2] = m0;
iT[3] = 0;
iT[4] = is;
iT[5] = m1;
iT[6] = 0;
iT[7] = 0;
iT[8] = 1;
}
static void denormalize_homography(double *params, double *T1, double *T2) {
double iT2[9];
double params2[9];
invnormalize_mat(T2, iT2);
multiply_mat(params, T1, params2, 3, 3, 3);
multiply_mat(iT2, params2, params, 3, 3, 3);
}
static void denormalize_homography_reorder(double *params, double *T1,
double *T2) {
double params_denorm[MAX_PARAMDIM];
memcpy(params_denorm, params, sizeof(*params) * 8);
params_denorm[8] = 1.0;
denormalize_homography(params_denorm, T1, T2);
params[0] = params_denorm[2];
params[1] = params_denorm[5];
params[2] = params_denorm[0];
params[3] = params_denorm[1];
params[4] = params_denorm[3];
params[5] = params_denorm[4];
params[6] = params_denorm[6];
params[7] = params_denorm[7];
}
static void denormalize_affine_reorder(double *params, double *T1, double *T2) {
double params_denorm[MAX_PARAMDIM];
params_denorm[0] = params[0];
params_denorm[1] = params[1];
params_denorm[2] = params[4];
params_denorm[3] = params[2];
params_denorm[4] = params[3];
params_denorm[5] = params[5];
params_denorm[6] = params_denorm[7] = 0;
params_denorm[8] = 1;
denormalize_homography(params_denorm, T1, T2);
params[0] = params_denorm[2];
params[1] = params_denorm[5];
params[2] = params_denorm[0];
params[3] = params_denorm[1];
params[4] = params_denorm[3];
params[5] = params_denorm[4];
params[6] = params[7] = 0;
}
static void denormalize_rotzoom_reorder(double *params, double *T1,
double *T2) {
double params_denorm[MAX_PARAMDIM];
params_denorm[0] = params[0];
params_denorm[1] = params[1];
params_denorm[2] = params[2];
params_denorm[3] = -params[1];
params_denorm[4] = params[0];
params_denorm[5] = params[3];
params_denorm[6] = params_denorm[7] = 0;
params_denorm[8] = 1;
denormalize_homography(params_denorm, T1, T2);
params[0] = params_denorm[2];
params[1] = params_denorm[5];
params[2] = params_denorm[0];
params[3] = params_denorm[1];
params[4] = -params[3];
params[5] = params[2];
params[6] = params[7] = 0;
}
static void denormalize_translation_reorder(double *params, double *T1,
double *T2) {
double params_denorm[MAX_PARAMDIM];
params_denorm[0] = 1;
params_denorm[1] = 0;
params_denorm[2] = params[0];
params_denorm[3] = 0;
params_denorm[4] = 1;
params_denorm[5] = params[1];
params_denorm[6] = params_denorm[7] = 0;
params_denorm[8] = 1;
denormalize_homography(params_denorm, T1, T2);
params[0] = params_denorm[2];
params[1] = params_denorm[5];
params[2] = params[5] = 1;
params[3] = params[4] = 0;
params[6] = params[7] = 0;
}
static int find_translation(const int np, double *pts1, double *pts2,
double *mat) {
int i;
double sx, sy, dx, dy;
double sumx, sumy;
double T1[9], T2[9];
normalize_homography(pts1, np, T1);
normalize_homography(pts2, np, T2);
sumx = 0;
sumy = 0;
for (i = 0; i < np; ++i) {
dx = *(pts2++);
dy = *(pts2++);
sx = *(pts1++);
sy = *(pts1++);
sumx += dx - sx;
sumy += dy - sy;
}
mat[0] = sumx / np;
mat[1] = sumy / np;
denormalize_translation_reorder(mat, T1, T2);
return 0;
}
static int find_rotzoom(const int np, double *pts1, double *pts2, double *mat) {
const int np2 = np * 2;
double *a = (double *)aom_malloc(sizeof(*a) * np2 * 9);
double *b = a + np2 * 4;
double *temp = b + np2;
int i;
double sx, sy, dx, dy;
double T1[9], T2[9];
normalize_homography(pts1, np, T1);
normalize_homography(pts2, np, T2);
for (i = 0; i < np; ++i) {
dx = *(pts2++);
dy = *(pts2++);
sx = *(pts1++);
sy = *(pts1++);
a[i * 2 * 4 + 0] = sx;
a[i * 2 * 4 + 1] = sy;
a[i * 2 * 4 + 2] = 1;
a[i * 2 * 4 + 3] = 0;
a[(i * 2 + 1) * 4 + 0] = sy;
a[(i * 2 + 1) * 4 + 1] = -sx;
a[(i * 2 + 1) * 4 + 2] = 0;
a[(i * 2 + 1) * 4 + 3] = 1;
b[2 * i] = dx;
b[2 * i + 1] = dy;
}
if (pseudo_inverse(temp, a, np2, 4)) {
aom_free(a);
return 1;
}
multiply_mat(temp, b, mat, 4, np2, 1);
denormalize_rotzoom_reorder(mat, T1, T2);
aom_free(a);
return 0;
}
static int find_affine(const int np, double *pts1, double *pts2, double *mat) {
const int np2 = np * 2;
double *a = (double *)aom_malloc(sizeof(*a) * np2 * 13);
double *b = a + np2 * 6;
double *temp = b + np2;
int i;
double sx, sy, dx, dy;
double T1[9], T2[9];
normalize_homography(pts1, np, T1);
normalize_homography(pts2, np, T2);
for (i = 0; i < np; ++i) {
dx = *(pts2++);
dy = *(pts2++);
sx = *(pts1++);
sy = *(pts1++);
a[i * 2 * 6 + 0] = sx;
a[i * 2 * 6 + 1] = sy;
a[i * 2 * 6 + 2] = 0;
a[i * 2 * 6 + 3] = 0;
a[i * 2 * 6 + 4] = 1;
a[i * 2 * 6 + 5] = 0;
a[(i * 2 + 1) * 6 + 0] = 0;
a[(i * 2 + 1) * 6 + 1] = 0;
a[(i * 2 + 1) * 6 + 2] = sx;
a[(i * 2 + 1) * 6 + 3] = sy;
a[(i * 2 + 1) * 6 + 4] = 0;
a[(i * 2 + 1) * 6 + 5] = 1;
b[2 * i] = dx;
b[2 * i + 1] = dy;
}
if (pseudo_inverse(temp, a, np2, 6)) {
aom_free(a);
return 1;
}
multiply_mat(temp, b, mat, 6, np2, 1);
denormalize_affine_reorder(mat, T1, T2);
aom_free(a);
return 0;
}
static int find_vertrapezoid(const int np, double *pts1, double *pts2,
double *mat) {
const int np3 = np * 3;
double *a = (double *)aom_malloc(sizeof(*a) * np3 * 14);
double *U = a + np3 * 7;
double S[7], V[7 * 7], H[9];
int i, mini;
double sx, sy, dx, dy;
double T1[9], T2[9];
normalize_homography(pts1, np, T1);
normalize_homography(pts2, np, T2);
for (i = 0; i < np; ++i) {
dx = *(pts2++);
dy = *(pts2++);
sx = *(pts1++);
sy = *(pts1++);
a[i * 3 * 7 + 0] = a[i * 3 * 7 + 1] = 0;
a[i * 3 * 7 + 2] = -sx;
a[i * 3 * 7 + 3] = -sy;
a[i * 3 * 7 + 4] = -1;
a[i * 3 * 7 + 5] = dy * sx;
a[i * 3 * 7 + 6] = dy;
a[(i * 3 + 1) * 7 + 0] = sx;
a[(i * 3 + 1) * 7 + 1] = 1;
a[(i * 3 + 1) * 7 + 2] = a[(i * 3 + 1) * 7 + 3] = a[(i * 3 + 1) * 7 + 4] =
0;
a[(i * 3 + 1) * 7 + 5] = -dx * sx;
a[(i * 3 + 1) * 7 + 6] = -dx;
a[(i * 3 + 2) * 7 + 0] = -dy * sx;
a[(i * 3 + 2) * 7 + 1] = -dy;
a[(i * 3 + 2) * 7 + 2] = dx * sx;
a[(i * 3 + 2) * 7 + 3] = dx * sy;
a[(i * 3 + 2) * 7 + 4] = dx;
a[(i * 3 + 2) * 7 + 5] = a[(i * 3 + 2) * 7 + 6] = 0;
}
if (SVD(U, S, V, a, np3, 7)) {
aom_free(a);
return 1;
} else {
double minS = 1e12;
mini = -1;
for (i = 0; i < 7; ++i) {
if (S[i] < minS) {
minS = S[i];
mini = i;
}
}
}
H[1] = H[7] = 0;
for (i = 0; i < 1; i++) H[i] = V[i * 7 + mini];
for (; i < 6; i++) H[i + 1] = V[i * 7 + mini];
for (; i < 7; i++) H[i + 2] = V[i * 7 + mini];
denormalize_homography_reorder(H, T1, T2);
aom_free(a);
if (H[8] == 0.0) {
return 1;
} else {
// normalize
double f = 1.0 / H[8];
for (i = 0; i < 8; i++) mat[i] = f * H[i];
}
return 0;
}
static int find_hortrapezoid(const int np, double *pts1, double *pts2,
double *mat) {
const int np3 = np * 3;
double *a = (double *)aom_malloc(sizeof(*a) * np3 * 14);
double *U = a + np3 * 7;
double S[7], V[7 * 7], H[9];
int i, mini;
double sx, sy, dx, dy;
double T1[9], T2[9];
normalize_homography(pts1, np, T1);
normalize_homography(pts2, np, T2);
for (i = 0; i < np; ++i) {
dx = *(pts2++);
dy = *(pts2++);
sx = *(pts1++);
sy = *(pts1++);
a[i * 3 * 7 + 0] = a[i * 3 * 7 + 1] = a[i * 3 * 7 + 2] = 0;
a[i * 3 * 7 + 3] = -sy;
a[i * 3 * 7 + 4] = -1;
a[i * 3 * 7 + 5] = dy * sy;
a[i * 3 * 7 + 6] = dy;
a[(i * 3 + 1) * 7 + 0] = sx;
a[(i * 3 + 1) * 7 + 1] = sy;
a[(i * 3 + 1) * 7 + 2] = 1;
a[(i * 3 + 1) * 7 + 3] = a[(i * 3 + 1) * 7 + 4] = 0;
a[(i * 3 + 1) * 7 + 5] = -dx * sy;
a[(i * 3 + 1) * 7 + 6] = -dx;
a[(i * 3 + 2) * 7 + 0] = -dy * sx;
a[(i * 3 + 2) * 7 + 1] = -dy * sy;
a[(i * 3 + 2) * 7 + 2] = -dy;
a[(i * 3 + 2) * 7 + 3] = dx * sy;
a[(i * 3 + 2) * 7 + 4] = dx;
a[(i * 3 + 2) * 7 + 5] = a[(i * 3 + 2) * 7 + 6] = 0;
}
if (SVD(U, S, V, a, np3, 7)) {
aom_free(a);
return 1;
} else {
double minS = 1e12;
mini = -1;
for (i = 0; i < 7; ++i) {
if (S[i] < minS) {
minS = S[i];
mini = i;
}
}
}
H[3] = H[6] = 0;
for (i = 0; i < 3; i++) H[i] = V[i * 7 + mini];
for (; i < 5; i++) H[i + 1] = V[i * 7 + mini];
for (; i < 7; i++) H[i + 2] = V[i * 7 + mini];
denormalize_homography_reorder(H, T1, T2);
aom_free(a);
if (H[8] == 0.0) {
return 1;
} else {
// normalize
double f = 1.0 / H[8];
for (i = 0; i < 8; i++) mat[i] = f * H[i];
}
return 0;
}
static int find_homography(const int np, double *pts1, double *pts2,
double *mat) {
// Implemented from Peter Kovesi's normalized implementation
const int np3 = np * 3;
double *a = (double *)aom_malloc(sizeof(*a) * np3 * 18);
double *U = a + np3 * 9;
double S[9], V[9 * 9], H[9];
int i, mini;
double sx, sy, dx, dy;
double T1[9], T2[9];
normalize_homography(pts1, np, T1);
normalize_homography(pts2, np, T2);
for (i = 0; i < np; ++i) {
dx = *(pts2++);
dy = *(pts2++);
sx = *(pts1++);
sy = *(pts1++);
a[i * 3 * 9 + 0] = a[i * 3 * 9 + 1] = a[i * 3 * 9 + 2] = 0;
a[i * 3 * 9 + 3] = -sx;
a[i * 3 * 9 + 4] = -sy;
a[i * 3 * 9 + 5] = -1;
a[i * 3 * 9 + 6] = dy * sx;
a[i * 3 * 9 + 7] = dy * sy;
a[i * 3 * 9 + 8] = dy;
a[(i * 3 + 1) * 9 + 0] = sx;
a[(i * 3 + 1) * 9 + 1] = sy;
a[(i * 3 + 1) * 9 + 2] = 1;
a[(i * 3 + 1) * 9 + 3] = a[(i * 3 + 1) * 9 + 4] = a[(i * 3 + 1) * 9 + 5] =
0;
a[(i * 3 + 1) * 9 + 6] = -dx * sx;
a[(i * 3 + 1) * 9 + 7] = -dx * sy;
a[(i * 3 + 1) * 9 + 8] = -dx;
a[(i * 3 + 2) * 9 + 0] = -dy * sx;
a[(i * 3 + 2) * 9 + 1] = -dy * sy;
a[(i * 3 + 2) * 9 + 2] = -dy;
a[(i * 3 + 2) * 9 + 3] = dx * sx;
a[(i * 3 + 2) * 9 + 4] = dx * sy;
a[(i * 3 + 2) * 9 + 5] = dx;
a[(i * 3 + 2) * 9 + 6] = a[(i * 3 + 2) * 9 + 7] = a[(i * 3 + 2) * 9 + 8] =
0;
}
if (SVD(U, S, V, a, np3, 9)) {
aom_free(a);
return 1;
} else {
double minS = 1e12;
mini = -1;
for (i = 0; i < 9; ++i) {
if (S[i] < minS) {
minS = S[i];
mini = i;
}
}
}
for (i = 0; i < 9; i++) H[i] = V[i * 9 + mini];
denormalize_homography_reorder(H, T1, T2);
aom_free(a);
if (H[8] == 0.0) {
return 1;
} else {
// normalize
double f = 1.0 / H[8];
for (i = 0; i < 8; i++) mat[i] = f * H[i];
}
return 0;
}
static int get_rand_indices(int npoints, int minpts, int *indices,
unsigned int *seed) {
int i, j;

Просмотреть файл

@ -7944,7 +7944,7 @@ static int64_t handle_inter_mode(
#endif // CONFIG_EXT_INTER
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
#if CONFIG_WARPED_MOTION
double pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
#endif // CONFIG_WARPED_MOTION
int64_t rd = INT64_MAX;
BUFFER_SET orig_dst, tmp_dst;