Speed up global motion determination
When global-motion is enabled, a considerable amount of encoder time is spent in the functions in corner_match.c. This patch optimizes those functions to be 3.5-4x as fast, leading to an end-to-end encoder speed improvement (on 20 frames of tempete_cif.y4m) of: 200kbps: ~26% faster 800kbps: ~19% faster 2800kbps: ~12% faster Change-Id: I04d3f87484c36c41eb5a1e86e814f2accbe86297
This commit is contained in:
Родитель
24f1a90441
Коммит
15338d5f40
|
@ -24,11 +24,13 @@
|
|||
|
||||
#define THRESHOLD_NCC 0.75
|
||||
|
||||
static double compute_variance(unsigned char *im, int stride, int x, int y,
|
||||
double *mean) {
|
||||
double sum = 0.0;
|
||||
double sumsq = 0.0;
|
||||
double var;
|
||||
/* Compute var(im) * MATCH_SZ_SQ over a MATCH_SZ by MATCH_SZ window of im,
|
||||
centered at (x, y).
|
||||
*/
|
||||
static double compute_variance(unsigned char *im, int stride, int x, int y) {
|
||||
int sum = 0.0;
|
||||
int sumsq = 0.0;
|
||||
int var;
|
||||
int i, j;
|
||||
for (i = 0; i < MATCH_SZ; ++i)
|
||||
for (j = 0; j < MATCH_SZ; ++j) {
|
||||
|
@ -36,39 +38,45 @@ static double compute_variance(unsigned char *im, int stride, int x, int y,
|
|||
sumsq += im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)] *
|
||||
im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)];
|
||||
}
|
||||
var = (sumsq * MATCH_SZ_SQ - sum * sum) / (MATCH_SZ_SQ * MATCH_SZ_SQ);
|
||||
if (mean) *mean = sum / MATCH_SZ_SQ;
|
||||
return var;
|
||||
var = sumsq * MATCH_SZ_SQ - sum * sum;
|
||||
return (double)var;
|
||||
}
|
||||
|
||||
/* Compute corr(im1, im2) * MATCH_SZ * stddev(im1), where the
|
||||
correlation/standard deviation are taken over MATCH_SZ by MATCH_SZ windows
|
||||
of each image, centered at (x1, y1) and (x2, y2) respectively.
|
||||
*/
|
||||
static double compute_cross_correlation(unsigned char *im1, int stride1, int x1,
|
||||
int y1, unsigned char *im2, int stride2,
|
||||
int x2, int y2) {
|
||||
double sum1 = 0;
|
||||
double sum2 = 0;
|
||||
double cross = 0;
|
||||
double corr;
|
||||
int v1, v2;
|
||||
int sum1 = 0;
|
||||
int sum2 = 0;
|
||||
int sumsq2 = 0;
|
||||
int cross = 0;
|
||||
int var2, cov;
|
||||
int i, j;
|
||||
for (i = 0; i < MATCH_SZ; ++i)
|
||||
for (j = 0; j < MATCH_SZ; ++j) {
|
||||
sum1 += im1[(i + y1 - MATCH_SZ_BY2) * stride1 + (j + x1 - MATCH_SZ_BY2)];
|
||||
sum2 += im2[(i + y2 - MATCH_SZ_BY2) * stride2 + (j + x2 - MATCH_SZ_BY2)];
|
||||
cross +=
|
||||
im1[(i + y1 - MATCH_SZ_BY2) * stride1 + (j + x1 - MATCH_SZ_BY2)] *
|
||||
im2[(i + y2 - MATCH_SZ_BY2) * stride2 + (j + x2 - MATCH_SZ_BY2)];
|
||||
v1 = im1[(i + y1 - MATCH_SZ_BY2) * stride1 + (j + x1 - MATCH_SZ_BY2)];
|
||||
v2 = im2[(i + y2 - MATCH_SZ_BY2) * stride2 + (j + x2 - MATCH_SZ_BY2)];
|
||||
sum1 += v1;
|
||||
sum2 += v2;
|
||||
sumsq2 += v2 * v2;
|
||||
cross += v1 * v2;
|
||||
}
|
||||
corr = (cross * MATCH_SZ_SQ - sum1 * sum2) / (MATCH_SZ_SQ * MATCH_SZ_SQ);
|
||||
return corr;
|
||||
var2 = sumsq2 * MATCH_SZ_SQ - sum2 * sum2;
|
||||
cov = cross * MATCH_SZ_SQ - sum1 * sum2;
|
||||
return cov / sqrt((double)var2);
|
||||
}
|
||||
|
||||
static int is_eligible_point(double pointx, double pointy, int width,
|
||||
int height) {
|
||||
static int is_eligible_point(int pointx, int pointy, int width, int height) {
|
||||
return (pointx >= MATCH_SZ_BY2 && pointy >= MATCH_SZ_BY2 &&
|
||||
pointx + MATCH_SZ_BY2 < width && pointy + MATCH_SZ_BY2 < height);
|
||||
}
|
||||
|
||||
static int is_eligible_distance(double point1x, double point1y, double point2x,
|
||||
double point2y, int width, int height) {
|
||||
static int is_eligible_distance(int point1x, int point1y, int point2x,
|
||||
int point2y, int width, int height) {
|
||||
const int thresh = (width < height ? height : width) >> 4;
|
||||
return ((point1x - point2x) * (point1x - point2x) +
|
||||
(point1y - point2y) * (point1y - point2y)) <= thresh * thresh;
|
||||
|
@ -81,32 +89,21 @@ static void improve_correspondence(unsigned char *frm, unsigned char *ref,
|
|||
int num_correspondences) {
|
||||
int i;
|
||||
for (i = 0; i < num_correspondences; ++i) {
|
||||
double template_norm =
|
||||
compute_variance(frm, frm_stride, (int)correspondences[i].x,
|
||||
(int)correspondences[i].y, NULL);
|
||||
int x, y, best_x = 0, best_y = 0;
|
||||
double best_match_ncc = 0.0;
|
||||
for (y = -SEARCH_SZ_BY2; y <= SEARCH_SZ_BY2; ++y) {
|
||||
for (x = -SEARCH_SZ_BY2; x <= SEARCH_SZ_BY2; ++x) {
|
||||
double match_ncc;
|
||||
double subimage_norm;
|
||||
if (!is_eligible_point((int)correspondences[i].rx + x,
|
||||
(int)correspondences[i].ry + y, width, height))
|
||||
if (!is_eligible_point(correspondences[i].rx + x,
|
||||
correspondences[i].ry + y, width, height))
|
||||
continue;
|
||||
if (!is_eligible_distance(
|
||||
(int)correspondences[i].x, (int)correspondences[i].y,
|
||||
(int)correspondences[i].rx + x, (int)correspondences[i].ry + y,
|
||||
width, height))
|
||||
if (!is_eligible_distance(correspondences[i].x, correspondences[i].y,
|
||||
correspondences[i].rx + x,
|
||||
correspondences[i].ry + y, width, height))
|
||||
continue;
|
||||
subimage_norm =
|
||||
compute_variance(ref, ref_stride, (int)correspondences[i].rx + x,
|
||||
(int)correspondences[i].ry + y, NULL);
|
||||
match_ncc = compute_cross_correlation(
|
||||
frm, frm_stride, (int)correspondences[i].x,
|
||||
(int)correspondences[i].y, ref, ref_stride,
|
||||
(int)correspondences[i].rx + x,
|
||||
(int)correspondences[i].ry + y) /
|
||||
sqrt(template_norm * subimage_norm);
|
||||
frm, frm_stride, correspondences[i].x, correspondences[i].y, ref,
|
||||
ref_stride, correspondences[i].rx + x, correspondences[i].ry + y);
|
||||
if (match_ncc > best_match_ncc) {
|
||||
best_match_ncc = match_ncc;
|
||||
best_y = y;
|
||||
|
@ -114,36 +111,25 @@ static void improve_correspondence(unsigned char *frm, unsigned char *ref,
|
|||
}
|
||||
}
|
||||
}
|
||||
correspondences[i].rx += (double)best_x;
|
||||
correspondences[i].ry += (double)best_y;
|
||||
correspondences[i].rx += best_x;
|
||||
correspondences[i].ry += best_y;
|
||||
}
|
||||
for (i = 0; i < num_correspondences; ++i) {
|
||||
double template_norm =
|
||||
compute_variance(ref, ref_stride, (int)correspondences[i].rx,
|
||||
(int)correspondences[i].ry, NULL);
|
||||
int x, y, best_x = 0, best_y = 0;
|
||||
double best_match_ncc = 0.0;
|
||||
for (y = -SEARCH_SZ_BY2; y <= SEARCH_SZ_BY2; ++y)
|
||||
for (x = -SEARCH_SZ_BY2; x <= SEARCH_SZ_BY2; ++x) {
|
||||
double match_ncc;
|
||||
double subimage_norm;
|
||||
if (!is_eligible_point((int)correspondences[i].x + x,
|
||||
(int)correspondences[i].y + y, width, height))
|
||||
if (!is_eligible_point(correspondences[i].x + x,
|
||||
correspondences[i].y + y, width, height))
|
||||
continue;
|
||||
if (!is_eligible_distance((int)correspondences[i].x + x,
|
||||
(int)correspondences[i].y + y,
|
||||
(int)correspondences[i].rx,
|
||||
(int)correspondences[i].ry, width, height))
|
||||
if (!is_eligible_distance(
|
||||
correspondences[i].x + x, correspondences[i].y + y,
|
||||
correspondences[i].rx, correspondences[i].ry, width, height))
|
||||
continue;
|
||||
subimage_norm =
|
||||
compute_variance(frm, frm_stride, (int)correspondences[i].x + x,
|
||||
(int)correspondences[i].y + y, NULL);
|
||||
match_ncc =
|
||||
compute_cross_correlation(
|
||||
frm, frm_stride, (int)correspondences[i].x + x,
|
||||
(int)correspondences[i].y + y, ref, ref_stride,
|
||||
(int)correspondences[i].rx, (int)correspondences[i].ry) /
|
||||
sqrt(template_norm * subimage_norm);
|
||||
match_ncc = compute_cross_correlation(
|
||||
ref, ref_stride, correspondences[i].rx, correspondences[i].ry, frm,
|
||||
frm_stride, correspondences[i].x + x, correspondences[i].y + y);
|
||||
if (match_ncc > best_match_ncc) {
|
||||
best_match_ncc = match_ncc;
|
||||
best_y = y;
|
||||
|
@ -159,7 +145,7 @@ int determine_correspondence(unsigned char *frm, int *frm_corners,
|
|||
int num_frm_corners, unsigned char *ref,
|
||||
int *ref_corners, int num_ref_corners, int width,
|
||||
int height, int frm_stride, int ref_stride,
|
||||
double *correspondence_pts) {
|
||||
int *correspondence_pts) {
|
||||
// TODO(sarahparker) Improve this to include 2-way match
|
||||
int i, j;
|
||||
Correspondence *correspondences = (Correspondence *)correspondence_pts;
|
||||
|
@ -171,11 +157,8 @@ int determine_correspondence(unsigned char *frm, int *frm_corners,
|
|||
if (!is_eligible_point(frm_corners[2 * i], frm_corners[2 * i + 1], width,
|
||||
height))
|
||||
continue;
|
||||
template_norm = compute_variance(frm, frm_stride, frm_corners[2 * i],
|
||||
frm_corners[2 * i + 1], NULL);
|
||||
for (j = 0; j < num_ref_corners; ++j) {
|
||||
double match_ncc;
|
||||
double subimage_norm;
|
||||
if (!is_eligible_point(ref_corners[2 * j], ref_corners[2 * j + 1], width,
|
||||
height))
|
||||
continue;
|
||||
|
@ -183,25 +166,24 @@ int determine_correspondence(unsigned char *frm, int *frm_corners,
|
|||
ref_corners[2 * j], ref_corners[2 * j + 1],
|
||||
width, height))
|
||||
continue;
|
||||
subimage_norm = compute_variance(ref, ref_stride, ref_corners[2 * j],
|
||||
ref_corners[2 * j + 1], NULL);
|
||||
match_ncc = compute_cross_correlation(frm, frm_stride, frm_corners[2 * i],
|
||||
frm_corners[2 * i + 1], ref,
|
||||
ref_stride, ref_corners[2 * j],
|
||||
ref_corners[2 * j + 1]) /
|
||||
sqrt(template_norm * subimage_norm);
|
||||
match_ncc = compute_cross_correlation(
|
||||
frm, frm_stride, frm_corners[2 * i], frm_corners[2 * i + 1], ref,
|
||||
ref_stride, ref_corners[2 * j], ref_corners[2 * j + 1]);
|
||||
if (match_ncc > best_match_ncc) {
|
||||
best_match_ncc = match_ncc;
|
||||
best_match_j = j;
|
||||
}
|
||||
}
|
||||
if (best_match_ncc > THRESHOLD_NCC) {
|
||||
correspondences[num_correspondences].x = (double)frm_corners[2 * i];
|
||||
correspondences[num_correspondences].y = (double)frm_corners[2 * i + 1];
|
||||
correspondences[num_correspondences].rx =
|
||||
(double)ref_corners[2 * best_match_j];
|
||||
// Note: We want to test if the best correlation is >= THRESHOLD_NCC,
|
||||
// but need to account for the normalization in compute_cross_correlation.
|
||||
template_norm = compute_variance(frm, frm_stride, frm_corners[2 * i],
|
||||
frm_corners[2 * i + 1]);
|
||||
if (best_match_ncc > THRESHOLD_NCC * sqrt(template_norm)) {
|
||||
correspondences[num_correspondences].x = frm_corners[2 * i];
|
||||
correspondences[num_correspondences].y = frm_corners[2 * i + 1];
|
||||
correspondences[num_correspondences].rx = ref_corners[2 * best_match_j];
|
||||
correspondences[num_correspondences].ry =
|
||||
(double)ref_corners[2 * best_match_j + 1];
|
||||
ref_corners[2 * best_match_j + 1];
|
||||
num_correspondences++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,14 +16,14 @@
|
|||
#include <memory.h>
|
||||
|
||||
typedef struct {
|
||||
double x, y;
|
||||
double rx, ry;
|
||||
int x, y;
|
||||
int rx, ry;
|
||||
} Correspondence;
|
||||
|
||||
int determine_correspondence(unsigned char *frm, int *frm_corners,
|
||||
int num_frm_corners, unsigned char *ref,
|
||||
int *ref_corners, int num_ref_corners, int width,
|
||||
int height, int frm_stride, int ref_stride,
|
||||
double *correspondence_pts);
|
||||
int *correspondence_pts);
|
||||
|
||||
#endif // AV1_ENCODER_CORNER_MATCH_H_
|
||||
|
|
|
@ -218,7 +218,7 @@ static INLINE RansacFunc get_ransac_type(TransformationType type) {
|
|||
|
||||
// computes global motion parameters by fitting a model using RANSAC
|
||||
static int compute_global_motion_params(TransformationType type,
|
||||
double *correspondences,
|
||||
int *correspondences,
|
||||
int num_correspondences, double *params,
|
||||
int *inlier_map) {
|
||||
int result;
|
||||
|
@ -259,7 +259,7 @@ int compute_global_motion_feature_based(TransformationType type,
|
|||
double *params) {
|
||||
int num_frm_corners, num_ref_corners;
|
||||
int num_correspondences;
|
||||
double *correspondences;
|
||||
int *correspondences;
|
||||
int num_inliers;
|
||||
int frm_corners[2 * MAX_CORNERS], ref_corners[2 * MAX_CORNERS];
|
||||
int *inlier_map = NULL;
|
||||
|
@ -291,7 +291,7 @@ int compute_global_motion_feature_based(TransformationType type,
|
|||
|
||||
// find correspondences between the two images
|
||||
correspondences =
|
||||
(double *)malloc(num_frm_corners * 4 * sizeof(*correspondences));
|
||||
(int *)malloc(num_frm_corners * 4 * sizeof(*correspondences));
|
||||
num_correspondences = determine_correspondence(
|
||||
frm_buffer, (int *)frm_corners, num_frm_corners, ref_buffer,
|
||||
(int *)ref_corners, num_ref_corners, frm->y_width, frm->y_height,
|
||||
|
|
|
@ -116,7 +116,7 @@ static int get_rand_indices(int npoints, int minpts, int *indices,
|
|||
return 1;
|
||||
}
|
||||
|
||||
static int ransac(double *matched_points, int npoints, int *number_of_inliers,
|
||||
static int ransac(int *matched_points, int npoints, int *number_of_inliers,
|
||||
int *best_inlier_mask, double *best_params, const int minpts,
|
||||
IsDegenerateFunc is_degenerate,
|
||||
FindTransformationFunc find_transformation,
|
||||
|
@ -305,31 +305,29 @@ static int is_degenerate_homography(double *p) {
|
|||
is_collinear3(p, p + 4, p + 6) || is_collinear3(p + 2, p + 4, p + 6);
|
||||
}
|
||||
|
||||
int ransac_translation(double *matched_points, int npoints,
|
||||
int *number_of_inliers, int *best_inlier_mask,
|
||||
double *best_params) {
|
||||
int ransac_translation(int *matched_points, int npoints, int *number_of_inliers,
|
||||
int *best_inlier_mask, double *best_params) {
|
||||
return ransac(matched_points, npoints, number_of_inliers, best_inlier_mask,
|
||||
best_params, 3, is_degenerate_translation, find_translation,
|
||||
project_points_double_translation);
|
||||
}
|
||||
|
||||
int ransac_rotzoom(double *matched_points, int npoints, int *number_of_inliers,
|
||||
int ransac_rotzoom(int *matched_points, int npoints, int *number_of_inliers,
|
||||
int *best_inlier_mask, double *best_params) {
|
||||
return ransac(matched_points, npoints, number_of_inliers, best_inlier_mask,
|
||||
best_params, 3, is_degenerate_affine, find_rotzoom,
|
||||
project_points_double_rotzoom);
|
||||
}
|
||||
|
||||
int ransac_affine(double *matched_points, int npoints, int *number_of_inliers,
|
||||
int ransac_affine(int *matched_points, int npoints, int *number_of_inliers,
|
||||
int *best_inlier_mask, double *best_params) {
|
||||
return ransac(matched_points, npoints, number_of_inliers, best_inlier_mask,
|
||||
best_params, 3, is_degenerate_affine, find_affine,
|
||||
project_points_double_affine);
|
||||
}
|
||||
|
||||
int ransac_homography(double *matched_points, int npoints,
|
||||
int *number_of_inliers, int *best_inlier_mask,
|
||||
double *best_params) {
|
||||
int ransac_homography(int *matched_points, int npoints, int *number_of_inliers,
|
||||
int *best_inlier_mask, double *best_params) {
|
||||
return ransac(matched_points, npoints, number_of_inliers, best_inlier_mask,
|
||||
best_params, 4, is_degenerate_homography, find_homography,
|
||||
project_points_double_homography);
|
||||
|
|
|
@ -19,20 +19,18 @@
|
|||
|
||||
#include "av1/common/warped_motion.h"
|
||||
|
||||
typedef int (*RansacFunc)(double *matched_points, int npoints,
|
||||
typedef int (*RansacFunc)(int *matched_points, int npoints,
|
||||
int *number_of_inliers, int *best_inlier_mask,
|
||||
double *best_params);
|
||||
|
||||
/* Each of these functions fits a motion model from a set of
|
||||
corresponding points in 2 frames using RANSAC.*/
|
||||
int ransac_homography(double *matched_points, int npoints,
|
||||
int *number_of_inliers, int *best_inlier_indices,
|
||||
double *best_params);
|
||||
int ransac_affine(double *matched_points, int npoints, int *number_of_inliers,
|
||||
int ransac_homography(int *matched_points, int npoints, int *number_of_inliers,
|
||||
int *best_inlier_indices, double *best_params);
|
||||
int ransac_affine(int *matched_points, int npoints, int *number_of_inliers,
|
||||
int *best_inlier_indices, double *best_params);
|
||||
int ransac_rotzoom(double *matched_points, int npoints, int *number_of_inliers,
|
||||
int ransac_rotzoom(int *matched_points, int npoints, int *number_of_inliers,
|
||||
int *best_inlier_indices, double *best_params);
|
||||
int ransac_translation(double *matched_points, int npoints,
|
||||
int *number_of_inliers, int *best_inlier_indices,
|
||||
double *best_params);
|
||||
int ransac_translation(int *matched_points, int npoints, int *number_of_inliers,
|
||||
int *best_inlier_indices, double *best_params);
|
||||
#endif // AV1_ENCODER_RANSAC_H_
|
||||
|
|
Загрузка…
Ссылка в новой задаче