CDEF encode buffering optimizations
Change-Id: I6b178d5ebf353bca98f18d8add2aa8b77e03cc4f
This commit is contained in:
Родитель
12ec6c6529
Коммит
deb1950bb3
|
@ -92,6 +92,19 @@ static double compute_dist(uint16_t *x, int xstride, uint16_t *y, int ystride,
|
|||
return sum / (double)(1 << 2 * coeff_shift);
|
||||
}
|
||||
|
||||
/* FIXME: SSE-optimize this. */
|
||||
static void copy_sb16_16(uint16_t *dst, int dstride, const uint16_t *src,
|
||||
int src_voffset, int src_hoffset, int sstride,
|
||||
int vsize, int hsize) {
|
||||
int r, c;
|
||||
const uint16_t *base = &src[src_voffset * sstride + src_hoffset];
|
||||
for (r = 0; r < vsize; r++) {
|
||||
for (c = 0; c < hsize; c++) {
|
||||
dst[r * dstride + c] = base[r * sstride + c];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
|
||||
AV1_COMMON *cm, MACROBLOCKD *xd) {
|
||||
int r, c;
|
||||
|
@ -181,7 +194,8 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
|
|||
int nvb, nhb;
|
||||
int gi;
|
||||
int dirinit = 0;
|
||||
DECLARE_ALIGNED(32, uint16_t, dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8]);
|
||||
DECLARE_ALIGNED(32, uint16_t,
|
||||
dst[3][MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8]);
|
||||
DECLARE_ALIGNED(32, uint16_t,
|
||||
tmp_dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8]);
|
||||
nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
|
||||
|
@ -189,24 +203,23 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
|
|||
dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE,
|
||||
sbc * MAX_MIB_SIZE, dlist);
|
||||
if (dering_count == 0) continue;
|
||||
for (pli = 0; pli < nplanes; pli++) {
|
||||
/* Copy the dst buffer only once since it will always be written at
|
||||
the same place. */
|
||||
copy_sb16_16(dst[pli], MAX_MIB_SIZE << bsize[pli], src[pli],
|
||||
sbr * MAX_MIB_SIZE << bsize[pli],
|
||||
sbc * MAX_MIB_SIZE << bsize[pli], stride[pli],
|
||||
nvb << bsize[pli], nhb << bsize[pli]);
|
||||
}
|
||||
for (gi = 0; gi < TOTAL_STRENGTHS; gi++) {
|
||||
int threshold;
|
||||
int clpf_strength;
|
||||
DECLARE_ALIGNED(32, uint16_t, inbuf[OD_DERING_INBUF_SIZE]);
|
||||
uint16_t *in;
|
||||
int j;
|
||||
level = dering_level_table[gi / CLPF_STRENGTHS];
|
||||
threshold = level << coeff_shift;
|
||||
for (pli = 0; pli < nplanes; pli++) {
|
||||
if (pli > 0 && !chroma_dering) threshold = 0;
|
||||
for (r = 0; r < nvb << bsize[pli]; r++) {
|
||||
for (c = 0; c < nhb << bsize[pli]; c++) {
|
||||
dst[(r * MAX_MIB_SIZE << bsize[pli]) + c] =
|
||||
src[pli]
|
||||
[((sbr * MAX_MIB_SIZE << bsize[pli]) + r) * stride[pli] +
|
||||
(sbc * MAX_MIB_SIZE << bsize[pli]) + c];
|
||||
}
|
||||
}
|
||||
in = inbuf + OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER;
|
||||
/* We avoid filtering the pixels for which some of the pixels to
|
||||
average
|
||||
|
@ -214,27 +227,25 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
|
|||
would add special cases for any future vectorization. */
|
||||
for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
|
||||
inbuf[i] = OD_DERING_VERY_LARGE;
|
||||
for (i = -OD_FILT_VBORDER * (sbr != 0);
|
||||
i < (nvb << bsize[pli]) + OD_FILT_VBORDER * (sbr != nvsb - 1);
|
||||
i++) {
|
||||
for (j = -OD_FILT_HBORDER * (sbc != 0);
|
||||
j < (nhb << bsize[pli]) + OD_FILT_HBORDER * (sbc != nhsb - 1);
|
||||
j++) {
|
||||
uint16_t *x;
|
||||
x = &src[pli][(sbr * stride[pli] * MAX_MIB_SIZE << bsize[pli]) +
|
||||
(sbc * MAX_MIB_SIZE << bsize[pli])];
|
||||
in[i * OD_FILT_BSTRIDE + j] = x[i * stride[pli] + j];
|
||||
}
|
||||
}
|
||||
int yoff = OD_FILT_VBORDER * (sbr != 0);
|
||||
int xoff = OD_FILT_HBORDER * (sbc != 0);
|
||||
int ysize =
|
||||
(nvb << bsize[pli]) + OD_FILT_VBORDER * (sbr != nvsb - 1) + yoff;
|
||||
int xsize =
|
||||
(nhb << bsize[pli]) + OD_FILT_HBORDER * (sbc != nhsb - 1) + xoff;
|
||||
copy_sb16_16(&in[(-yoff * OD_FILT_BSTRIDE - xoff)], OD_FILT_BSTRIDE,
|
||||
src[pli], (sbr * MAX_MIB_SIZE << bsize[pli]) - yoff,
|
||||
(sbc * MAX_MIB_SIZE << bsize[pli]) - xoff, stride[pli],
|
||||
ysize, xsize);
|
||||
clpf_strength = gi % CLPF_STRENGTHS;
|
||||
od_dering(tmp_dst, in, dec[pli], dir, &dirinit, var, pli, dlist,
|
||||
dering_count, threshold,
|
||||
clpf_strength + (clpf_strength == 3), clpf_damping,
|
||||
coeff_shift);
|
||||
copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[pli], tmp_dst,
|
||||
dlist, dering_count, bsize[pli]);
|
||||
copy_dering_16bit_to_16bit(dst[pli], MAX_MIB_SIZE << bsize[pli],
|
||||
tmp_dst, dlist, dering_count, bsize[pli]);
|
||||
mse[pli][sb_count][gi] = (int)compute_dist(
|
||||
dst, MAX_MIB_SIZE << bsize[pli],
|
||||
dst[pli], MAX_MIB_SIZE << bsize[pli],
|
||||
&ref_coeff[pli][(sbr * stride[pli] * MAX_MIB_SIZE << bsize[pli]) +
|
||||
(sbc * MAX_MIB_SIZE << bsize[pli])],
|
||||
stride[pli], nhb, nvb, coeff_shift, bsize[pli]);
|
||||
|
|
Загрузка…
Ссылка в новой задаче