Merge "Update armv6 loopfilter to new interface"
This commit is contained in:
Коммит
8f910594bd
|
@ -54,9 +54,11 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
|
|||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6;
|
||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
|
||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
|
||||
rtcd->loopfilter.simple_mb_v =
|
||||
vp8_loop_filter_simple_vertical_edge_armv6;
|
||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
|
||||
rtcd->loopfilter.simple_mb_h =
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6;
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6;
|
||||
|
||||
rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6;
|
||||
|
|
|
@ -53,14 +53,11 @@ count RN r5
|
|||
|
||||
;r0 unsigned char *src_ptr,
|
||||
;r1 int src_pixel_step,
|
||||
;r2 const char *flimit,
|
||||
;r2 const char *blimit,
|
||||
;r3 const char *limit,
|
||||
;stack const char *thresh,
|
||||
;stack int count
|
||||
|
||||
;Note: All 16 elements in flimit are equal. So, in the code, only one load is needed
|
||||
;for flimit. Same way applies to limit and thresh.
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_horizontal_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|
@ -72,14 +69,18 @@ count RN r5
|
|||
sub sp, sp, #16 ; create temp buffer
|
||||
|
||||
ldr r9, [src], pstep ; p3
|
||||
ldr r4, [r2], #4 ; flimit
|
||||
ldrb r4, [r2] ; blimit
|
||||
ldr r10, [src], pstep ; p2
|
||||
ldr r2, [r3], #4 ; limit
|
||||
ldrb r2, [r3] ; limit
|
||||
ldr r11, [src], pstep ; p1
|
||||
uadd8 r4, r4, r4 ; flimit * 2
|
||||
ldr r3, [r6], #4 ; thresh
|
||||
orr r4, r4, r4, lsl #8
|
||||
ldrb r3, [r6] ; thresh
|
||||
orr r2, r2, r2, lsl #8
|
||||
mov count, count, lsl #1 ; 4-in-parallel
|
||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
||||
orr r4, r4, r4, lsl #16
|
||||
orr r3, r3, r3, lsl #8
|
||||
orr r2, r2, r2, lsl #16
|
||||
orr r3, r3, r3, lsl #16
|
||||
|
||||
|Hnext8|
|
||||
; vp8_filter_mask() function
|
||||
|
@ -275,14 +276,18 @@ count RN r5
|
|||
sub sp, sp, #16 ; create temp buffer
|
||||
|
||||
ldr r9, [src], pstep ; p3
|
||||
ldr r4, [r2], #4 ; flimit
|
||||
ldrb r4, [r2] ; blimit
|
||||
ldr r10, [src], pstep ; p2
|
||||
ldr r2, [r3], #4 ; limit
|
||||
ldrb r2, [r3] ; limit
|
||||
ldr r11, [src], pstep ; p1
|
||||
uadd8 r4, r4, r4 ; flimit * 2
|
||||
ldr r3, [r6], #4 ; thresh
|
||||
orr r4, r4, r4, lsl #8
|
||||
ldrb r3, [r6] ; thresh
|
||||
orr r2, r2, r2, lsl #8
|
||||
mov count, count, lsl #1 ; 4-in-parallel
|
||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
||||
orr r4, r4, r4, lsl #16
|
||||
orr r3, r3, r3, lsl #8
|
||||
orr r2, r2, r2, lsl #16
|
||||
orr r3, r3, r3, lsl #16
|
||||
|
||||
|MBHnext8|
|
||||
|
||||
|
@ -584,15 +589,19 @@ count RN r5
|
|||
sub sp, sp, #16 ; create temp buffer
|
||||
|
||||
ldr r6, [src], pstep ; load source data
|
||||
ldr r4, [r2], #4 ; flimit
|
||||
ldrb r4, [r2] ; blimit
|
||||
ldr r7, [src], pstep
|
||||
ldr r2, [r3], #4 ; limit
|
||||
ldrb r2, [r3] ; limit
|
||||
ldr r8, [src], pstep
|
||||
uadd8 r4, r4, r4 ; flimit * 2
|
||||
ldr r3, [r12], #4 ; thresh
|
||||
orr r4, r4, r4, lsl #8
|
||||
ldrb r3, [r12] ; thresh
|
||||
orr r2, r2, r2, lsl #8
|
||||
ldr lr, [src], pstep
|
||||
mov count, count, lsl #1 ; 4-in-parallel
|
||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
||||
orr r4, r4, r4, lsl #16
|
||||
orr r3, r3, r3, lsl #8
|
||||
orr r2, r2, r2, lsl #16
|
||||
orr r3, r3, r3, lsl #16
|
||||
|
||||
|Vnext8|
|
||||
|
||||
|
@ -855,18 +864,22 @@ count RN r5
|
|||
sub sp, sp, #16 ; create temp buffer
|
||||
|
||||
ldr r6, [src], pstep ; load source data
|
||||
ldr r4, [r2], #4 ; flimit
|
||||
ldrb r4, [r2] ; blimit
|
||||
pld [src, #23]
|
||||
ldr r7, [src], pstep
|
||||
ldr r2, [r3], #4 ; limit
|
||||
ldrb r2, [r3] ; limit
|
||||
pld [src, #23]
|
||||
ldr r8, [src], pstep
|
||||
uadd8 r4, r4, r4 ; flimit * 2
|
||||
ldr r3, [r12], #4 ; thresh
|
||||
orr r4, r4, r4, lsl #8
|
||||
ldrb r3, [r12] ; thresh
|
||||
orr r2, r2, r2, lsl #8
|
||||
pld [src, #23]
|
||||
ldr lr, [src], pstep
|
||||
mov count, count, lsl #1 ; 4-in-parallel
|
||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
||||
orr r4, r4, r4, lsl #16
|
||||
orr r3, r3, r3, lsl #8
|
||||
orr r2, r2, r2, lsl #16
|
||||
orr r3, r3, r3, lsl #16
|
||||
|
||||
|MBVnext8|
|
||||
; vp8_filter_mask() function
|
||||
|
@ -906,6 +919,7 @@ count RN r5
|
|||
str lr, [sp, #8]
|
||||
ldr lr, [src], pstep
|
||||
|
||||
|
||||
TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12
|
||||
|
||||
ldr lr, [sp, #8] ; load back (f)limit accumulator
|
||||
|
@ -954,6 +968,7 @@ count RN r5
|
|||
beq mbvskip_filter ; skip filtering
|
||||
|
||||
|
||||
|
||||
;vp8_hevmask() function
|
||||
;calculate high edge variance
|
||||
|
||||
|
@ -1121,6 +1136,7 @@ count RN r5
|
|||
smlabb r8, r6, lr, r7
|
||||
smlatb r6, r6, lr, r7
|
||||
smlabb r9, r10, lr, r7
|
||||
|
||||
smlatb r10, r10, lr, r7
|
||||
ssat r8, #8, r8, asr #7
|
||||
ssat r6, #8, r6, asr #7
|
||||
|
|
|
@ -45,35 +45,28 @@
|
|||
MEND
|
||||
|
||||
|
||||
|
||||
src RN r0
|
||||
pstep RN r1
|
||||
|
||||
;r0 unsigned char *src_ptr,
|
||||
;r1 int src_pixel_step,
|
||||
;r2 const char *flimit,
|
||||
;r3 const char *limit,
|
||||
;stack const char *thresh,
|
||||
;stack int count
|
||||
|
||||
; All 16 elements in flimit are equal. So, in the code, only one load is needed
|
||||
; for flimit. Same applies to limit. thresh is not used in simple looopfilter
|
||||
;r2 const char *blimit
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r12, [r3] ; limit
|
||||
ldrb r12, [r2] ; blimit
|
||||
ldr r3, [src, -pstep, lsl #1] ; p1
|
||||
ldr r4, [src, -pstep] ; p0
|
||||
ldr r5, [src] ; q0
|
||||
ldr r6, [src, pstep] ; q1
|
||||
ldr r7, [r2] ; flimit
|
||||
orr r12, r12, r12, lsl #8 ; blimit
|
||||
ldr r2, c0x80808080
|
||||
ldr r9, [sp, #40] ; count for 8-in-parallel
|
||||
uadd8 r7, r7, r7 ; flimit * 2
|
||||
mov r9, r9, lsl #1 ; double the count. we're doing 4 at a time
|
||||
uadd8 r12, r7, r12 ; flimit * 2 + limit
|
||||
orr r12, r12, r12, lsl #16 ; blimit
|
||||
mov r9, #4 ; double the count. we're doing 4 at a time
|
||||
mov lr, #0 ; need 0 in a couple places
|
||||
|
||||
|simple_hnext8|
|
||||
|
@ -148,34 +141,32 @@ pstep RN r1
|
|||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r12, [r2] ; r12: flimit
|
||||
ldrb r12, [r2] ; r12: blimit
|
||||
ldr r2, c0x80808080
|
||||
ldr r7, [r3] ; limit
|
||||
orr r12, r12, r12, lsl #8
|
||||
|
||||
; load soure data to r7, r8, r9, r10
|
||||
ldrh r3, [src, #-2]
|
||||
pld [src, #23] ; preload for next block
|
||||
ldrh r4, [src], pstep
|
||||
uadd8 r12, r12, r12 ; flimit * 2
|
||||
orr r12, r12, r12, lsl #16
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r6, [src], pstep
|
||||
uadd8 r12, r12, r7 ; flimit * 2 + limit
|
||||
|
||||
pkhbt r7, r3, r4, lsl #16
|
||||
|
||||
ldrh r3, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r4, [src], pstep
|
||||
ldr r11, [sp, #40] ; count (r11) for 8-in-parallel
|
||||
|
||||
pkhbt r8, r5, r6, lsl #16
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r6, [src], pstep
|
||||
mov r11, r11, lsl #1 ; 4-in-parallel
|
||||
mov r11, #4 ; double the count. we're doing 4 at a time
|
||||
|
||||
|simple_vnext8|
|
||||
; vp8_simple_filter_mask() function
|
||||
|
|
|
@ -18,8 +18,6 @@ extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
|||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
|
@ -55,15 +53,6 @@ void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
|||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
|
@ -77,15 +66,6 @@ void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
|||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
|
@ -101,15 +81,12 @@ void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
|||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
|
@ -127,15 +104,12 @@ void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsign
|
|||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -19,10 +19,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_armv6);
|
|||
extern prototype_loopfilter_block(vp8_loop_filter_bv_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_armv6);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_armv6);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_lf_normal_mb_v
|
||||
|
@ -38,13 +38,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
|
|||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_armv6
|
||||
|
||||
#undef vp8_lf_simple_mb_v
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_armv6
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_armv6
|
||||
|
||||
#undef vp8_lf_simple_b_v
|
||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_armv6
|
||||
|
||||
#undef vp8_lf_simple_mb_h
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_armv6
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_armv6
|
||||
|
||||
#undef vp8_lf_simple_b_h
|
||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
|
||||
|
|
Загрузка…
Ссылка в новой задаче