followup arm patch
make the arm asm detokenizer work with the new structures Change-Id: I7cd92c2a018ec24032bb1cfd1bb9739bc84b444a
This commit is contained in:
Родитель
e85e631504
Коммит
0b94f5d6e8
|
@ -55,11 +55,11 @@ DEFINE(mb_up_available, offsetof(MACROBLOCKD, up_availab
|
|||
DEFINE(mb_left_available, offsetof(MACROBLOCKD, left_available));
|
||||
|
||||
DEFINE(detok_scan, offsetof(DETOK, scan));
|
||||
DEFINE(detok_ptr_onyxblock2context_leftabove, offsetof(DETOK, ptr_onyxblock2context_leftabove));
|
||||
DEFINE(detok_onyx_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr));
|
||||
DEFINE(detok_ptr_block2leftabove, offsetof(DETOK, ptr_block2leftabove));
|
||||
DEFINE(detok_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr));
|
||||
DEFINE(detok_teb_base_ptr, offsetof(DETOK, teb_base_ptr));
|
||||
DEFINE(detok_norm_ptr, offsetof(DETOK, norm_ptr));
|
||||
DEFINE(detok_ptr_onyx_coef_bands_x, offsetof(DETOK, ptr_onyx_coef_bands_x));
|
||||
DEFINE(detok_ptr_coef_bands_x, offsetof(DETOK, ptr_coef_bands_x));
|
||||
|
||||
DEFINE(detok_A, offsetof(DETOK, A));
|
||||
DEFINE(detok_L, offsetof(DETOK, L));
|
||||
|
|
|
@ -28,8 +28,7 @@ l_stacksize EQU 64
|
|||
|
||||
|
||||
;; constant offsets -- these should be created at build time
|
||||
c_onyxblock2left_offset EQU 25
|
||||
c_onyxblock2above_offset EQU 50
|
||||
c_block2above_offset EQU 25
|
||||
c_entropy_nodes EQU 11
|
||||
c_dct_eob_token EQU 11
|
||||
|
||||
|
@ -42,12 +41,12 @@ c_dct_eob_token EQU 11
|
|||
ldr r1, [r9, #detok_current_bc]
|
||||
ldr r0, [r9, #detok_qcoeff_start_ptr]
|
||||
mov r11, #0 ; i
|
||||
mov r3, #0x10 ; stop
|
||||
mov r3, #16 ; stop
|
||||
|
||||
cmp r7, #1 ; type ?= 1
|
||||
addeq r11, r11, #24 ; i = 24
|
||||
addeq r3, r3, #8 ; stop = 24
|
||||
addeq r0, r0, #3, 24 ; qcoefptr += 24*16 ?CHECKME
|
||||
addeq r0, r0, #3, 24 ; qcoefptr += 24*16
|
||||
|
||||
str r0, [sp, #l_qcoeff]
|
||||
str r11, [sp, #l_i]
|
||||
|
@ -59,61 +58,50 @@ c_dct_eob_token EQU 11
|
|||
|
||||
ldr r8, [r1, #bool_decoder_user_buffer]
|
||||
|
||||
ldr r10, [lr, #detok_coef_probs] ; coef_probs[type]
|
||||
ldr r10, [lr, #detok_coef_probs]
|
||||
ldr r5, [r1, #bool_decoder_count]
|
||||
ldr r6, [r1, #bool_decoder_range]
|
||||
ldr r4, [r1, #bool_decoder_value]
|
||||
|
||||
str r10, [sp, #l_coef_ptr]
|
||||
|
||||
;align 4
|
||||
BLOCK_LOOP
|
||||
ldr r3, [r9, #detok_ptr_onyxblock2context_leftabove]
|
||||
ldr r2, [r9, #detok_A]
|
||||
ldr r3, [r9, #detok_ptr_block2leftabove]
|
||||
ldr r1, [r9, #detok_L]
|
||||
ldrb r12, [r3, r11]! ; onyxblock2context[i]
|
||||
ldr r2, [r9, #detok_A]
|
||||
ldrb r12, [r3, r11]! ; block2left[i]
|
||||
ldrb r3, [r3, #c_block2above_offset]; block2above[i]
|
||||
|
||||
cmp r7, #0 ; c = !type
|
||||
moveq r7, #1
|
||||
movne r7, #0
|
||||
|
||||
ldr r0, [r2, r12, lsl #2] ; A[onyxblock2context[i]]
|
||||
add r1, r1, r12, lsl #4 ; L + onyxblock2context[i] << 4
|
||||
; A is ptr to ptr (**)
|
||||
; L is ptr to data (*[4])
|
||||
|
||||
ldrb r2, [r3, #c_onyxblock2above_offset] ; + above offset
|
||||
ldrb r3, [r3, #c_onyxblock2left_offset] ; + left offset
|
||||
ldrb r0, [r1, r12]! ; *(L += block2left[i])
|
||||
ldrb r3, [r2, r3]! ; *(A += block2above[i])
|
||||
mov lr, #c_entropy_nodes ; ENTROPY_NODES = 11
|
||||
;; ;++
|
||||
|
||||
ldr r2, [r0, r2, lsl #2]! ; A + above offset
|
||||
ldr r3, [r1, r3, lsl #2]! ; L + left offset
|
||||
; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
|
||||
cmp r2, #0 ; *a ?= 0
|
||||
movne r2, #1 ; haha if a == 0 no need to set up another var to state that pretty sweet :)
|
||||
cmp r3, #0 ; *l ?= 0
|
||||
addne r2, r2, #1 ; t
|
||||
cmp r0, #0 ; *l ?= 0
|
||||
movne r0, #1
|
||||
cmp r3, #0 ; *a ?= 0
|
||||
addne r0, r0, #1 ; t
|
||||
|
||||
str r1, [sp, #l_l_ptr] ; save &l
|
||||
str r0, [sp, #l_a_ptr] ; save &a
|
||||
smlabb r0, r2, lr, r10 ; Prob = coef_probs + (t * ENTROPY_NODES)
|
||||
str r2, [sp, #l_a_ptr] ; save &a
|
||||
smlabb r0, r0, lr, r10 ; Prob = coef_probs + (t * ENTROPY_NODES)
|
||||
mov r1, #0 ; t = 0
|
||||
str r7, [sp, #l_c]
|
||||
|
||||
;align 4
|
||||
COEFF_LOOP
|
||||
ldr r3, [r9, #detok_ptr_onyx_coef_bands_x]
|
||||
ldr lr, [r9, #detok_onyx_coef_tree_ptr]
|
||||
|
||||
; onyx_coef_bands_x is UINT16
|
||||
add r3, r3, r7, lsl #1 ; coef_bands_x[c]
|
||||
ldrh r3, [r3] ; UINT16
|
||||
|
||||
;++
|
||||
ldr r3, [r9, #detok_ptr_coef_bands_x]
|
||||
ldr lr, [r9, #detok_coef_tree_ptr]
|
||||
;STALL
|
||||
ldrb r3, [r3, r7] ; coef_bands_x[c]
|
||||
;STALL
|
||||
;STALL
|
||||
add r0, r0, r3 ; Prob += coef_bands_x[c]
|
||||
|
||||
;align 4
|
||||
get_token_loop
|
||||
ldrb r2, [r0, +r1, asr #1] ; Prob[t >> 1]
|
||||
mov r3, r6, lsl #8 ; range << 8
|
||||
|
@ -221,14 +209,14 @@ SKIP_EXTRABITS
|
|||
mov r4, r4, lsl r3 ; value <<= shift
|
||||
ldrleb r2, [r8], #1 ; *(bufptr++)
|
||||
addle r5, r5, #8 ; count += 8
|
||||
rsble r3, r5, #24 ; BR_COUNT - count
|
||||
rsble r3, r5, #24 ; BR_COUNT - count
|
||||
orrle r4, r4, r2, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
|
||||
|
||||
add r0, r0, #0xB ; Prob += ENTROPY_NODES (11)
|
||||
add r0, r0, #11 ; Prob += ENTROPY_NODES (11)
|
||||
|
||||
cmn r1, #1 ; t < -ONE_TOKEN
|
||||
|
||||
addlt r0, r0, #0xB ; Prob += ENTROPY_NODES (11)
|
||||
addlt r0, r0, #11 ; Prob += ENTROPY_NODES (11)
|
||||
|
||||
mvn r1, #1 ; t = -1 ???? C is -2
|
||||
|
||||
|
@ -236,7 +224,7 @@ SKIP_EOB_CHECK
|
|||
ldr r7, [sp, #l_c] ; c
|
||||
ldr r3, [r9, #detok_scan]
|
||||
add r1, r1, #2 ; t+= 2
|
||||
cmp r7, #(0x10 - 1) ; c should will be one higher
|
||||
cmp r7, #15 ; c should will be one higher
|
||||
|
||||
ldr r3, [r3, +r7, lsl #2] ; scan[c] this needs pre-inc c value
|
||||
add r7, r7, #1 ; c++
|
||||
|
@ -247,7 +235,7 @@ SKIP_EOB_CHECK
|
|||
|
||||
blt COEFF_LOOP
|
||||
|
||||
sub r7, r7, #1 ; if(t != -DCT_EOB_TOKEN) --c ; never stored! no condition!
|
||||
sub r7, r7, #1 ; if(t != -DCT_EOB_TOKEN) --c
|
||||
|
||||
END_OF_BLOCK
|
||||
ldr r3, [sp, #l_type] ; type
|
||||
|
@ -269,50 +257,49 @@ END_OF_BLOCK
|
|||
movne r3, #1 ; t
|
||||
moveq r3, #0
|
||||
|
||||
add r0, r0, #0x20 ; qcoeff += 32 (16 * 2?)
|
||||
add r0, r0, #32 ; qcoeff += 32 (16 * 2?)
|
||||
add r11, r11, #1 ; i++
|
||||
str r3, [r7] ; *l = t
|
||||
str r3, [r2] ; *a = t
|
||||
strb r3, [r7] ; *l = t
|
||||
strb r3, [r2] ; *a = t
|
||||
str r0, [sp, #l_qcoeff] ; qcoeff
|
||||
str r11, [sp, #l_i] ; i
|
||||
|
||||
cmp r11, r12 ; i >= stop ? VERIFY should be strictly LT(<)?
|
||||
cmp r11, r12 ; i < stop
|
||||
ldr r7, [sp, #l_type] ; type
|
||||
mov lr, #0xB ; 11 (ENTORPY_NODES?)
|
||||
|
||||
blt BLOCK_LOOP
|
||||
|
||||
cmp r11, #0x19 ; i ?= 25
|
||||
cmp r11, #25 ; i ?= 25
|
||||
bne ln2_decode_mb_to
|
||||
|
||||
ldr r12, [r9, #detok_qcoeff_start_ptr]
|
||||
ldr r10, [r9, #detok_coef_probs]
|
||||
mov r7, #0 ; type/i = 0
|
||||
mov r3, #0x10 ; stop = 0
|
||||
mov r3, #16 ; stop = 16
|
||||
str r12, [sp, #l_qcoeff] ; qcoeff_ptr = qcoeff_start_ptr
|
||||
str r7, [sp, #l_i]
|
||||
str r7, [sp, #l_type]
|
||||
str r3, [sp, #l_stop]
|
||||
|
||||
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type] (0)
|
||||
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type=0]
|
||||
|
||||
b BLOCK_LOOP
|
||||
|
||||
ln2_decode_mb_to
|
||||
cmp r11, #0x10 ; i ?= 16
|
||||
cmp r11, #16 ; i ?= 16
|
||||
bne ln1_decode_mb_to
|
||||
|
||||
mov r10, #detok_coef_probs
|
||||
add r10, r10, #2*4 ; coef_probs[type]
|
||||
ldr r10, [r9, r10] ; detok + 48 - THIS IS PROBABLY THE ISSUE: NEW STRUCTURE
|
||||
ldr r10, [r9, r10] ; detok + detok_coef_probs[type]
|
||||
|
||||
mov r7, #2 ; type = 2
|
||||
mov r3, #0x18 ; stop = 24
|
||||
mov r3, #24 ; stop = 24
|
||||
|
||||
str r7, [sp, #l_type]
|
||||
str r3, [sp, #l_stop]
|
||||
|
||||
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type] - didn't want to add 2 to coef_probs
|
||||
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type]
|
||||
b BLOCK_LOOP
|
||||
|
||||
ln1_decode_mb_to
|
||||
|
|
|
@ -75,11 +75,14 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
|
|||
}
|
||||
|
||||
#if CONFIG_ARM_ASM_DETOK
|
||||
DECLARE_ALIGNED(16, const UINT8, vp8_block2context_leftabove[25*3]) =
|
||||
// mashup of vp8_block2left and vp8_block2above so we only need one pointer
|
||||
// for the assembly version.
|
||||
DECLARE_ALIGNED(16, const UINT8, vp8_block2leftabove[25*2]) =
|
||||
{
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, //end of vp8_block2context
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0, //end of vp8_block2left
|
||||
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0 //end of vp8_block2above
|
||||
//vp8_block2left
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
|
||||
//vp8_block2above
|
||||
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
|
||||
};
|
||||
|
||||
void vp8_init_detokenizer(VP8D_COMP *dx)
|
||||
|
@ -88,8 +91,8 @@ void vp8_init_detokenizer(VP8D_COMP *dx)
|
|||
MACROBLOCKD *x = & dx->mb;
|
||||
|
||||
dx->detoken.vp8_coef_tree_ptr = vp8_coef_tree;
|
||||
dx->detoken.ptr_onyxblock2context_leftabove = vp8_block2context_leftabove;
|
||||
dx->detoken.ptr_onyx_coef_bands_x = vp8_coef_bands_x;
|
||||
dx->detoken.ptr_block2leftabove = vp8_block2leftabove;
|
||||
dx->detoken.ptr_coef_bands_x = vp8_coef_bands_x;
|
||||
dx->detoken.scan = vp8_default_zig_zag1d;
|
||||
dx->detoken.teb_base_ptr = vp8d_token_extra_bits2;
|
||||
dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
|
||||
|
|
|
@ -49,14 +49,14 @@ typedef struct
|
|||
typedef struct
|
||||
{
|
||||
int const *scan;
|
||||
UINT8 const *ptr_onyxblock2context_leftabove;
|
||||
UINT8 const *ptr_block2leftabove;
|
||||
vp8_tree_index const *vp8_coef_tree_ptr;
|
||||
TOKENEXTRABITS const *teb_base_ptr;
|
||||
unsigned char *norm_ptr;
|
||||
UINT16 *ptr_onyx_coef_bands_x;
|
||||
UINT8 *ptr_coef_bands_x;
|
||||
|
||||
ENTROPY_CONTEXT **A;
|
||||
ENTROPY_CONTEXT (*L)[4];
|
||||
ENTROPY_CONTEXT_PLANES *A;
|
||||
ENTROPY_CONTEXT_PLANES *L;
|
||||
|
||||
INT16 *qcoeff_start_ptr;
|
||||
BOOL_DECODER *current_bc;
|
||||
|
|
Загрузка…
Ссылка в новой задаче