CDEF: computing MSE only on the filtered blocks
Change-Id: I16881cd6267922a3e156defb90577d6ad2b46d5b
This commit is contained in:
Родитель
32282f2c41
Коммит
d4fd4eef61
|
@ -77,21 +77,6 @@ static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
|
|||
return best_tot_mse;
|
||||
}
|
||||
|
||||
static double compute_dist(uint16_t *x, int xstride, uint16_t *y, int ystride,
|
||||
int nhb, int nvb, int coeff_shift, int bsize) {
|
||||
int i, j;
|
||||
double sum;
|
||||
sum = 0;
|
||||
for (i = 0; i < nvb << bsize; i++) {
|
||||
for (j = 0; j < nhb << bsize; j++) {
|
||||
double tmp;
|
||||
tmp = x[i * xstride + j] - y[i * ystride + j];
|
||||
sum += tmp * tmp;
|
||||
}
|
||||
}
|
||||
return sum / (double)(1 << 2 * coeff_shift);
|
||||
}
|
||||
|
||||
/* FIXME: SSE-optimize this. */
|
||||
static void copy_sb16_16(uint16_t *dst, int dstride, const uint16_t *src,
|
||||
int src_voffset, int src_hoffset, int sstride,
|
||||
|
@ -105,6 +90,56 @@ static void copy_sb16_16(uint16_t *dst, int dstride, const uint16_t *src,
|
|||
}
|
||||
}
|
||||
|
||||
static INLINE uint64_t mse_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src,
|
||||
int sstride) {
|
||||
uint64_t sum = 0;
|
||||
int i, j;
|
||||
for (i = 0; i < 8; i++) {
|
||||
for (j = 0; j < 8; j++) {
|
||||
int e = dst[i * dstride + j] - src[i * sstride + j];
|
||||
sum += e * e;
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
static INLINE uint64_t mse_4x4_16bit(uint16_t *dst, int dstride, uint16_t *src,
|
||||
int sstride) {
|
||||
uint64_t sum = 0;
|
||||
int i, j;
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
int e = dst[i * dstride + j] - src[i * sstride + j];
|
||||
sum += e * e;
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
/* Compute MSE only on the blocks we filtered. */
|
||||
uint64_t compute_dering_mse(uint16_t *dst, int dstride, uint16_t *src,
|
||||
dering_list *dlist, int dering_count, int bsize,
|
||||
int coeff_shift) {
|
||||
uint64_t sum = 0;
|
||||
int bi, bx, by;
|
||||
if (bsize == 3) {
|
||||
for (bi = 0; bi < dering_count; bi++) {
|
||||
by = dlist[bi].by;
|
||||
bx = dlist[bi].bx;
|
||||
sum += mse_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
|
||||
&src[bi << 2 * bsize], 1 << bsize);
|
||||
}
|
||||
} else {
|
||||
for (bi = 0; bi < dering_count; bi++) {
|
||||
by = dlist[bi].by;
|
||||
bx = dlist[bi].bx;
|
||||
sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
|
||||
&src[bi << 2 * bsize], 1 << bsize);
|
||||
}
|
||||
}
|
||||
return sum >> 2 * coeff_shift;
|
||||
}
|
||||
|
||||
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
|
||||
AV1_COMMON *cm, MACROBLOCKD *xd) {
|
||||
int r, c;
|
||||
|
@ -139,7 +174,6 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
|
|||
int nplanes = 3;
|
||||
DECLARE_ALIGNED(32, uint16_t, inbuf[OD_DERING_INBUF_SIZE]);
|
||||
uint16_t *in;
|
||||
DECLARE_ALIGNED(32, uint16_t, dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8]);
|
||||
DECLARE_ALIGNED(32, uint16_t, tmp_dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8]);
|
||||
int chroma_dering =
|
||||
xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
|
||||
|
@ -205,12 +239,6 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
|
|||
sbc * MAX_MIB_SIZE, dlist);
|
||||
if (dering_count == 0) continue;
|
||||
for (pli = 0; pli < nplanes; pli++) {
|
||||
/* Copy the dst buffer only once since it will always be written at
|
||||
the same place. */
|
||||
copy_sb16_16(dst, MAX_MIB_SIZE << bsize[pli], src[pli],
|
||||
sbr * MAX_MIB_SIZE << bsize[pli],
|
||||
sbc * MAX_MIB_SIZE << bsize[pli], stride[pli],
|
||||
nvb << bsize[pli], nhb << bsize[pli]);
|
||||
for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
|
||||
inbuf[i] = OD_DERING_VERY_LARGE;
|
||||
for (gi = 0; gi < TOTAL_STRENGTHS; gi++) {
|
||||
|
@ -238,13 +266,12 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
|
|||
dering_count, threshold,
|
||||
clpf_strength + (clpf_strength == 3), clpf_damping,
|
||||
coeff_shift);
|
||||
copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[pli], tmp_dst,
|
||||
dlist, dering_count, bsize[pli]);
|
||||
mse[pli][sb_count][gi] = (int)compute_dist(
|
||||
dst, MAX_MIB_SIZE << bsize[pli],
|
||||
&ref_coeff[pli][(sbr * stride[pli] * MAX_MIB_SIZE << bsize[pli]) +
|
||||
(sbc * MAX_MIB_SIZE << bsize[pli])],
|
||||
stride[pli], nhb, nvb, coeff_shift, bsize[pli]);
|
||||
mse[pli][sb_count][gi] = compute_dering_mse(
|
||||
ref_coeff[pli] +
|
||||
(sbr * MAX_MIB_SIZE << bsize[pli]) * stride[pli] +
|
||||
(sbc * MAX_MIB_SIZE << bsize[pli]),
|
||||
stride[pli], tmp_dst, dlist, dering_count, bsize[pli],
|
||||
coeff_shift);
|
||||
sb_index[sb_count] =
|
||||
MAX_MIB_SIZE * sbr * cm->mi_stride + MAX_MIB_SIZE * sbc;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче