Added another denoising threshold for finding DC shifts.

Compares the sum of differences between the input block and the averaged
block. If they differ too much the block will not be filtered. Negligible
perfomance hit.

Change-Id: Ib1c31a265efd4d100b3abc4a1ea6675038c8ddde
This commit is contained in:
Stefan Holmer 2012-05-30 12:17:06 +02:00
Родитель ffe79d61d4
Коммит d850034443
4 изменённых файлов: 75 добавлений и 24 удалений

Просмотреть файл

@ -505,7 +505,7 @@ specialize vp8_yv12_copy_partial_frame neon
# Denoiser filter # Denoiser filter
# #
if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then
prototype void vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset" prototype int vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"
specialize vp8_denoiser_filter sse2 specialize vp8_denoiser_filter sse2
fi fi

Просмотреть файл

@ -15,14 +15,11 @@
#include "vpx_mem/vpx_mem.h" #include "vpx_mem/vpx_mem.h"
#include "vpx_rtcd.h" #include "vpx_rtcd.h"
static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25;
static const unsigned int NOISE_MOTION_THRESHOLD = 20 * 20;
// SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming var(noise) ~= 100. // SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming var(noise) ~= 100.
static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20; static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20;
static const unsigned int SSE_THRESHOLD = 16 * 16 * 40; static const unsigned int SSE_THRESHOLD = 16 * 16 * 40;
// The filtering coefficients used for denoizing are adjusted for static // The filtering coefficients used for denoizing are adjusted for static
// blocks, or blocks with very small motion vectors. This is done through // blocks, or blocks with very small motion vectors. This is done through
// the motion magnitude parameter. // the motion magnitude parameter.
@ -82,11 +79,15 @@ union coeff_pair *vp8_get_filter_coeff_LUT(unsigned int motion_magnitude)
void vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg, int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
YV12_BUFFER_CONFIG *running_avg, MACROBLOCK *signal, YV12_BUFFER_CONFIG *running_avg,
unsigned int motion_magnitude, int y_offset, MACROBLOCK *signal,
unsigned int motion_magnitude,
int y_offset,
int uv_offset) int uv_offset)
{ {
unsigned char filtered_buf[16*16];
unsigned char *filtered = filtered_buf;
unsigned char *sig = signal->thismb; unsigned char *sig = signal->thismb;
int sig_stride = 16; int sig_stride = 16;
unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
@ -95,6 +96,7 @@ void vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
int avg_y_stride = running_avg->y_stride; int avg_y_stride = running_avg->y_stride;
const union coeff_pair *LUT = vp8_get_filter_coeff_LUT(motion_magnitude); const union coeff_pair *LUT = vp8_get_filter_coeff_LUT(motion_magnitude);
int r, c; int r, c;
int sum_diff = 0;
for (r = 0; r < 16; ++r) for (r = 0; r < 16; ++r)
{ {
@ -133,22 +135,31 @@ void vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
for (c = 0; c < 16; ++c) for (c = 0; c < 16; ++c)
{ {
const int diff = sig[c] - running_avg_y[c]; const int diff = sig[c] - running_avg_y[c];
sum_diff += diff;
if (diff * diff < NOISE_DIFF2_THRESHOLD) if (diff * diff < NOISE_DIFF2_THRESHOLD)
{ {
sig[c] = running_avg_y[c]; filtered[c] = running_avg_y[c];
} }
else else
{ {
filtered[c] = sig[c];
running_avg_y[c] = sig[c]; running_avg_y[c] = sig[c];
} }
} }
// Update pointers for next iteration. // Update pointers for next iteration.
sig += sig_stride; sig += sig_stride;
filtered += 16;
mc_running_avg_y += mc_avg_y_stride; mc_running_avg_y += mc_avg_y_stride;
running_avg_y += avg_y_stride; running_avg_y += avg_y_stride;
} }
if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
{
return COPY_BLOCK;
}
vp8_copy_mem16x16(filtered_buf, 16, signal->thismb, sig_stride);
return FILTER_BLOCK;
} }
@ -215,8 +226,10 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
MV_REFERENCE_FRAME frame = x->best_reference_frame; MV_REFERENCE_FRAME frame = x->best_reference_frame;
MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame; MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;
enum vp8_denoiser_decision decision = FILTER_BLOCK;
// Motion compensate the running average. // Motion compensate the running average.
if(zero_frame) if (zero_frame)
{ {
YV12_BUFFER_CONFIG *src = &denoiser->yv12_running_avg[frame]; YV12_BUFFER_CONFIG *src = &denoiser->yv12_running_avg[frame];
YV12_BUFFER_CONFIG *dst = &denoiser->yv12_mc_running_avg; YV12_BUFFER_CONFIG *dst = &denoiser->yv12_mc_running_avg;
@ -238,7 +251,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
mv_col = x->best_sse_mv.as_mv.col; mv_col = x->best_sse_mv.as_mv.col;
mv_row = x->best_sse_mv.as_mv.row; mv_row = x->best_sse_mv.as_mv.row;
if(frame == INTRA_FRAME || if (frame == INTRA_FRAME ||
(mv_row *mv_row + mv_col *mv_col <= NOISE_MOTION_THRESHOLD && (mv_row *mv_row + mv_col *mv_col <= NOISE_MOTION_THRESHOLD &&
sse_diff < SSE_DIFF_THRESHOLD)) sse_diff < SSE_DIFF_THRESHOLD))
{ {
@ -295,6 +308,20 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
motion_magnitude2 = mv_row * mv_row + mv_col * mv_col; motion_magnitude2 = mv_row * mv_row + mv_col * mv_col;
if (best_sse > SSE_THRESHOLD || motion_magnitude2 if (best_sse > SSE_THRESHOLD || motion_magnitude2
> 8 * NOISE_MOTION_THRESHOLD) > 8 * NOISE_MOTION_THRESHOLD)
{
decision = COPY_BLOCK;
}
if (decision == FILTER_BLOCK)
{
// Filter.
decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg,
&denoiser->yv12_running_avg[LAST_FRAME],
x,
motion_magnitude2,
recon_yoffset, recon_uvoffset);
}
if (decision == COPY_BLOCK)
{ {
// No filtering of this block; it differs too much from the predictor, // No filtering of this block; it differs too much from the predictor,
// or the motion vector magnitude is considered too big. // or the motion vector magnitude is considered too big.
@ -302,12 +329,5 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
x->thismb, 16, x->thismb, 16,
denoiser->yv12_running_avg[LAST_FRAME].y_buffer + recon_yoffset, denoiser->yv12_running_avg[LAST_FRAME].y_buffer + recon_yoffset,
denoiser->yv12_running_avg[LAST_FRAME].y_stride); denoiser->yv12_running_avg[LAST_FRAME].y_stride);
return;
} }
// Filter.
vp8_denoiser_filter(&denoiser->yv12_mc_running_avg,
&denoiser->yv12_running_avg[LAST_FRAME], x,
motion_magnitude2,
recon_yoffset, recon_uvoffset);
} }

Просмотреть файл

@ -14,6 +14,13 @@
#include "block.h" #include "block.h"
#define NOISE_DIFF2_THRESHOLD (75) #define NOISE_DIFF2_THRESHOLD (75)
#define SUM_DIFF_THRESHOLD (16 * 16 * 2)
enum vp8_denoiser_decision
{
COPY_BLOCK,
FILTER_BLOCK,
};
typedef struct vp8_denoiser typedef struct vp8_denoiser
{ {

Просмотреть файл

@ -17,11 +17,25 @@
#include <emmintrin.h> #include <emmintrin.h>
void vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg, union sum_union {
__m128i v;
short e[8];
};
inline int sum_vec_128i(__m128i vec)
{
union sum_union s = { .v = vec };
return s.e[0] + s.e[1] + s.e[2] + s.e[3] +
s.e[4] + s.e[5] + s.e[6] + s.e[7];
}
int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
YV12_BUFFER_CONFIG *running_avg, YV12_BUFFER_CONFIG *running_avg,
MACROBLOCK *signal, unsigned int motion_magnitude, MACROBLOCK *signal, unsigned int motion_magnitude,
int y_offset, int uv_offset) int y_offset, int uv_offset)
{ {
unsigned char filtered_buf[16*16];
unsigned char *filtered = filtered_buf;
unsigned char *sig = signal->thismb; unsigned char *sig = signal->thismb;
int sig_stride = 16; int sig_stride = 16;
unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset; unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
@ -30,6 +44,7 @@ void vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
int avg_y_stride = running_avg->y_stride; int avg_y_stride = running_avg->y_stride;
const union coeff_pair *LUT = vp8_get_filter_coeff_LUT(motion_magnitude); const union coeff_pair *LUT = vp8_get_filter_coeff_LUT(motion_magnitude);
int r, c; int r, c;
__m128i sum_diff = { 0 };
for (r = 0; r < 16; ++r) for (r = 0; r < 16; ++r)
{ {
@ -110,6 +125,8 @@ void vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
// isn't classified as noise. // isn't classified as noise.
diff0 = _mm_sub_epi16(v_sig0, res0); diff0 = _mm_sub_epi16(v_sig0, res0);
diff1 = _mm_sub_epi16(v_sig1, res2); diff1 = _mm_sub_epi16(v_sig1, res2);
sum_diff = _mm_add_epi16(sum_diff, _mm_add_epi16(diff0, diff1));
diff0sq = _mm_mullo_epi16(diff0, diff0); diff0sq = _mm_mullo_epi16(diff0, diff0);
diff1sq = _mm_mullo_epi16(diff1, diff1); diff1sq = _mm_mullo_epi16(diff1, diff1);
diff_sq = _mm_packus_epi16(diff0sq, diff1sq); diff_sq = _mm_packus_epi16(diff0sq, diff1sq);
@ -118,11 +135,18 @@ void vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
p1 = _mm_andnot_si128(take_running, v_sig); p1 = _mm_andnot_si128(take_running, v_sig);
p2 = _mm_or_si128(p0, p1); p2 = _mm_or_si128(p0, p1);
_mm_storeu_si128((__m128i *)(&running_avg_y[0]), p2); _mm_storeu_si128((__m128i *)(&running_avg_y[0]), p2);
_mm_storeu_si128((__m128i *)(&sig[0]), p2); _mm_storeu_si128((__m128i *)(&filtered[0]), p2);
// Update pointers for next iteration. // Update pointers for next iteration.
sig += sig_stride; sig += sig_stride;
filtered += 16;
mc_running_avg_y += mc_avg_y_stride; mc_running_avg_y += mc_avg_y_stride;
running_avg_y += avg_y_stride; running_avg_y += avg_y_stride;
} }
if (abs(sum_vec_128i(sum_diff)) > SUM_DIFF_THRESHOLD)
{
return COPY_BLOCK;
}
vp8_copy_mem16x16(filtered_buf, 16, signal->thismb, sig_stride);
return FILTER_BLOCK;
} }