Merge "Reduce partial frame copy in encoder's pick_filter_level_fast"
This commit is contained in:
Коммит
294777b915
|
@ -506,7 +506,8 @@ void vp8_loop_filter_partial_frame
|
|||
unsigned char *y_ptr;
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
int mb_cols = post->y_width >> 4;
|
||||
int mb_cols = post->y_width >> 4;
|
||||
int mb_rows = post->y_height >> 4;
|
||||
|
||||
int linestocopy, i;
|
||||
|
||||
|
@ -521,15 +522,9 @@ void vp8_loop_filter_partial_frame
|
|||
|
||||
int lvl_seg[MAX_MB_SEGMENTS];
|
||||
|
||||
mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
|
||||
|
||||
/* 3 is a magic number. 4 is probably magic too */
|
||||
linestocopy = (post->y_height >> (4 + 3));
|
||||
|
||||
if (linestocopy < 1)
|
||||
linestocopy = 1;
|
||||
|
||||
linestocopy <<= 4;
|
||||
/* number of MB rows to use in partial filtering */
|
||||
linestocopy = mb_rows / PARTIAL_FRAME_FRACTION;
|
||||
linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
|
||||
|
||||
/* Note the baseline filter values for each segment */
|
||||
/* See vp8_loop_filter_frame_init. Rather than call that for each change
|
||||
|
@ -554,8 +549,9 @@ void vp8_loop_filter_partial_frame
|
|||
}
|
||||
}
|
||||
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
|
||||
/* Set up the buffer pointers; partial image starts at ~middle of frame */
|
||||
y_ptr = post->y_buffer + ((post->y_height >> 5) * 16) * post->y_stride;
|
||||
mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
|
||||
|
||||
/* vp8_filter each macro block */
|
||||
for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++)
|
||||
|
|
|
@ -15,7 +15,10 @@
|
|||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_config.h"
|
||||
|
||||
#define MAX_LOOP_FILTER 63
|
||||
#define MAX_LOOP_FILTER 63
|
||||
/* fraction of total macroblock rows to be used in fast filter level picking */
|
||||
/* has to be > 2 */
|
||||
#define PARTIAL_FRAME_FRACTION 8
|
||||
|
||||
typedef enum
|
||||
{
|
||||
|
|
|
@ -14,9 +14,9 @@
|
|||
#include "vp8/encoder/variance.h"
|
||||
#include "vp8/encoder/onyx_int.h"
|
||||
|
||||
extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
|
||||
extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
|
||||
extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
|
||||
extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
|
||||
extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
|
||||
extern void vp8_yv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc);
|
||||
|
||||
void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
|
||||
{
|
||||
|
@ -123,15 +123,15 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
|
|||
cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;
|
||||
cpi->rtcd.quantize.fastquantb_pair = vp8_fast_quantize_b_pair_neon;
|
||||
}
|
||||
#endif
|
||||
#endif /* HAVE_ARMV7 */
|
||||
#endif /* CONFIG_RUNTIME_CPU_DETECT */
|
||||
|
||||
#if HAVE_ARMV7
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
if (flags & HAS_NEON)
|
||||
#endif
|
||||
{
|
||||
vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
|
||||
vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame_neon;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -8,20 +8,16 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
#include "vp8/encoder/onyx_int.h"
|
||||
#include "vp8/encoder/quantize.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "vpx_scale/yv12extend.h"
|
||||
#include "vpx_scale/vpxscale.h"
|
||||
#include "vp8/common/alloccommon.h"
|
||||
|
||||
extern void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz);
|
||||
extern void vp8_memcpy_partial_neon(unsigned char *dst_ptr,
|
||||
unsigned char *src_ptr,
|
||||
int sz);
|
||||
|
||||
|
||||
void
|
||||
vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction)
|
||||
void vp8_yv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc,
|
||||
YV12_BUFFER_CONFIG *dst_ybc)
|
||||
{
|
||||
unsigned char *src_y, *dst_y;
|
||||
int yheight;
|
||||
|
@ -34,17 +30,19 @@ vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG
|
|||
yheight = src_ybc->y_height;
|
||||
ystride = src_ybc->y_stride;
|
||||
|
||||
linestocopy = (yheight >> (Fraction + 4));
|
||||
/* number of MB rows to use in partial filtering */
|
||||
linestocopy = (yheight >> 4) / PARTIAL_FRAME_FRACTION;
|
||||
linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
|
||||
|
||||
if (linestocopy < 1)
|
||||
linestocopy = 1;
|
||||
|
||||
linestocopy <<= 4;
|
||||
|
||||
yoffset = ystride * ((yheight >> 5) * 16 - 8);
|
||||
/* Copy extra 4 so that full filter context is available if filtering done
|
||||
* on the copied partial frame and not original. Partial filter does mb
|
||||
* filtering for top row also, which can modify3 pixels above.
|
||||
*/
|
||||
linestocopy += 4;
|
||||
/* partial image starts at ~middle of frame (macroblock border) */
|
||||
yoffset = ystride * (((yheight >> 5) * 16) - 4);
|
||||
src_y = src_ybc->y_buffer + yoffset;
|
||||
dst_y = dst_ybc->y_buffer + yoffset;
|
||||
|
||||
//vpx_memcpy (dst_y, src_y, ystride * (linestocopy +16));
|
||||
vp8_memcpy_neon((unsigned char *)dst_y, (unsigned char *)src_y, (int)(ystride *(linestocopy + 16)));
|
||||
vp8_memcpy_partial_neon(dst_y, src_y, ystride * linestocopy);
|
||||
}
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
;
|
||||
|
||||
|
||||
EXPORT |vp8_memcpy_neon|
|
||||
EXPORT |vp8_memcpy_partial_neon|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
|
@ -17,8 +17,10 @@
|
|||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
;=========================================
|
||||
;void vp8_memcpy_neon(unsigned char *dst_ptr, unsigned char *src_ptr, int sz);
|
||||
|vp8_memcpy_neon| PROC
|
||||
;this is not a full memcpy function!!!
|
||||
;void vp8_memcpy_partial_neon(unsigned char *dst_ptr, unsigned char *src_ptr,
|
||||
; int sz);
|
||||
|vp8_memcpy_partial_neon| PROC
|
||||
;pld [r1] ;preload pred data
|
||||
;pld [r1, #128]
|
||||
;pld [r1, #256]
|
||||
|
|
|
@ -17,8 +17,10 @@
|
|||
void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
|
||||
void vp8_arch_arm_encoder_init(VP8_COMP *cpi);
|
||||
|
||||
void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
|
||||
extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
|
||||
void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc,
|
||||
YV12_BUFFER_CONFIG *dst_ybc);
|
||||
extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
|
||||
YV12_BUFFER_CONFIG *dst_ybc);
|
||||
|
||||
void vp8_cmachine_specific_config(VP8_COMP *cpi)
|
||||
{
|
||||
|
|
|
@ -29,12 +29,11 @@ extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest,
|
|||
#define IF_RTCD(x) NULL
|
||||
#endif
|
||||
|
||||
extern void
|
||||
(*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc,
|
||||
YV12_BUFFER_CONFIG *dst_ybc,
|
||||
int Fraction);
|
||||
void
|
||||
vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction)
|
||||
extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc,
|
||||
YV12_BUFFER_CONFIG *dst_ybc);
|
||||
|
||||
void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc,
|
||||
YV12_BUFFER_CONFIG *dst_ybc)
|
||||
{
|
||||
unsigned char *src_y, *dst_y;
|
||||
int yheight;
|
||||
|
@ -47,21 +46,26 @@ vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst
|
|||
yheight = src_ybc->y_height;
|
||||
ystride = src_ybc->y_stride;
|
||||
|
||||
linestocopy = (yheight >> (Fraction + 4));
|
||||
/* number of MB rows to use in partial filtering */
|
||||
linestocopy = (yheight >> 4) / PARTIAL_FRAME_FRACTION;
|
||||
linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
|
||||
|
||||
if (linestocopy < 1)
|
||||
linestocopy = 1;
|
||||
|
||||
linestocopy <<= 4;
|
||||
|
||||
yoffset = ystride * ((yheight >> 5) * 16 - 8);
|
||||
/* Copy extra 4 so that full filter context is available if filtering done
|
||||
* on the copied partial frame and not original. Partial filter does mb
|
||||
* filtering for top row also, which can modify3 pixels above.
|
||||
*/
|
||||
linestocopy += 4;
|
||||
/* partial image starts at ~middle of frame (macroblock border)*/
|
||||
yoffset = ystride * (((yheight >> 5) * 16) - 4);
|
||||
src_y = src_ybc->y_buffer + yoffset;
|
||||
dst_y = dst_ybc->y_buffer + yoffset;
|
||||
|
||||
vpx_memcpy(dst_y, src_y, ystride *(linestocopy + 16));
|
||||
vpx_memcpy(dst_y, src_y, ystride * linestocopy);
|
||||
}
|
||||
|
||||
static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, int Fraction, const vp8_variance_rtcd_vtable_t *rtcd)
|
||||
static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *dest,
|
||||
const vp8_variance_rtcd_vtable_t *rtcd)
|
||||
{
|
||||
int i, j;
|
||||
int Total = 0;
|
||||
|
@ -69,17 +73,16 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF
|
|||
unsigned char *src = source->y_buffer;
|
||||
unsigned char *dst = dest->y_buffer;
|
||||
|
||||
int linestocopy = (source->y_height >> (Fraction + 4));
|
||||
(void)rtcd;
|
||||
int linestocopy;
|
||||
|
||||
if (linestocopy < 1)
|
||||
linestocopy = 1;
|
||||
|
||||
linestocopy <<= 4;
|
||||
/* number of MB rows to use in partial filtering */
|
||||
linestocopy = (source->y_height >> 4) / PARTIAL_FRAME_FRACTION;
|
||||
linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */
|
||||
|
||||
|
||||
srcoffset = source->y_stride * (dest->y_height >> 5) * 16;
|
||||
dstoffset = dest->y_stride * (dest->y_height >> 5) * 16;
|
||||
/* partial image starts at ~middle of frame (macroblock border)*/
|
||||
srcoffset = source->y_stride * ((dest->y_height >> 5) * 16);
|
||||
dstoffset = dest->y_stride * ((dest->y_height >> 5) * 16);
|
||||
|
||||
src += srcoffset;
|
||||
dst += dstoffset;
|
||||
|
@ -90,7 +93,9 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF
|
|||
for (j = 0; j < source->y_width; j += 16)
|
||||
{
|
||||
unsigned int sse;
|
||||
Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
|
||||
Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride,
|
||||
dst + j, dest->y_stride,
|
||||
&sse);
|
||||
}
|
||||
|
||||
src += 16 * source->y_stride;
|
||||
|
@ -105,7 +110,8 @@ static int get_min_filter_level(VP8_COMP *cpi, int base_qindex)
|
|||
{
|
||||
int min_filter_level;
|
||||
|
||||
if (cpi->source_alt_ref_active && cpi->common.refresh_golden_frame && !cpi->common.refresh_alt_ref_frame)
|
||||
if (cpi->source_alt_ref_active && cpi->common.refresh_golden_frame &&
|
||||
!cpi->common.refresh_alt_ref_frame)
|
||||
min_filter_level = 0;
|
||||
else
|
||||
{
|
||||
|
@ -148,7 +154,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
|||
int best_filt_val = cm->filter_level;
|
||||
|
||||
// Make a copy of the unfiltered / processed recon buffer
|
||||
vp8_yv12_copy_partial_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf, 3);
|
||||
vp8_yv12_copy_partial_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
|
||||
|
||||
if (cm->frame_type == KEY_FRAME)
|
||||
cm->sharpness_level = 0;
|
||||
|
@ -173,10 +179,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
|||
// Get the err using the previous frame's filter value.
|
||||
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
|
||||
|
||||
best_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
|
||||
best_err = calc_partial_ssl_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
// Re-instate the unfiltered frame
|
||||
vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
|
||||
vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show);
|
||||
|
||||
filt_val -= (1 + ((filt_val > 10) ? 1 : 0));
|
||||
|
||||
|
@ -187,11 +193,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
|||
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
|
||||
|
||||
// Get the err for filtered frame
|
||||
filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
|
||||
filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
// Re-instate the unfiltered frame
|
||||
vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
|
||||
|
||||
vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show);
|
||||
|
||||
// Update the best case record or exit loop.
|
||||
if (filt_err < best_err)
|
||||
|
@ -220,10 +225,10 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
|
|||
vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val);
|
||||
|
||||
// Get the err for filtered frame
|
||||
filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance));
|
||||
filt_err = calc_partial_ssl_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance));
|
||||
|
||||
// Re-instate the unfiltered frame
|
||||
vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3);
|
||||
vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show);
|
||||
|
||||
// Update the best case record or exit loop.
|
||||
if (filt_err < best_err)
|
||||
|
|
Загрузка…
Ссылка в новой задаче