ARC: String library
Hand optimised asm code for ARC700 pipeline. Originally written/optimized by Joern Rennecke Signed-off-by: Vineet Gupta <vgupta@synopsys.com> Cc: Joern Rennecke <joern.rennecke@embecosm.com>
This commit is contained in:
Родитель
6e35fa2d43
Коммит
5210d1e688
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* vineetg: May 2011
|
||||
* -We had half-optimised memset/memcpy, got better versions of those
|
||||
* -Added memcmp, strchr, strcpy, strcmp, strlen
|
||||
*
|
||||
* Amit Bhor: Codito Technologies 2004
|
||||
*/
|
||||
|
||||
#ifndef _ASM_ARC_STRING_H
|
||||
#define _ASM_ARC_STRING_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#define __HAVE_ARCH_MEMSET
|
||||
#define __HAVE_ARCH_MEMCPY
|
||||
#define __HAVE_ARCH_MEMCMP
|
||||
#define __HAVE_ARCH_STRCHR
|
||||
#define __HAVE_ARCH_STRCPY
|
||||
#define __HAVE_ARCH_STRCMP
|
||||
#define __HAVE_ARCH_STRLEN
|
||||
|
||||
extern void *memset(void *ptr, int, __kernel_size_t);
|
||||
extern void *memcpy(void *, const void *, __kernel_size_t);
|
||||
extern void memzero(void *ptr, __kernel_size_t n);
|
||||
extern int memcmp(const void *, const void *, __kernel_size_t);
|
||||
extern char *strchr(const char *s, int c);
|
||||
extern char *strcpy(char *dest, const char *src);
|
||||
extern int strcmp(const char *cs, const char *ct);
|
||||
extern __kernel_size_t strlen(const char *);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
#endif /* _ASM_ARC_STRING_H */
|
|
@ -0,0 +1,124 @@
|
|||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/linkage.h>
|
||||
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
#define WORD2 r2
|
||||
#define SHIFT r3
|
||||
#else /* BIG ENDIAN */
|
||||
#define WORD2 r3
|
||||
#define SHIFT r2
|
||||
#endif
|
||||
|
||||
ARC_ENTRY memcmp
|
||||
or r12,r0,r1
|
||||
asl_s r12,r12,30
|
||||
sub r3,r2,1
|
||||
brls r2,r12,.Lbytewise
|
||||
ld r4,[r0,0]
|
||||
ld r5,[r1,0]
|
||||
lsr.f lp_count,r3,3
|
||||
lpne .Loop_end
|
||||
ld_s WORD2,[r0,4]
|
||||
ld_s r12,[r1,4]
|
||||
brne r4,r5,.Leven
|
||||
ld.a r4,[r0,8]
|
||||
ld.a r5,[r1,8]
|
||||
brne WORD2,r12,.Lodd
|
||||
.Loop_end:
|
||||
asl_s SHIFT,SHIFT,3
|
||||
bhs_s .Last_cmp
|
||||
brne r4,r5,.Leven
|
||||
ld r4,[r0,4]
|
||||
ld r5,[r1,4]
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
nop_s
|
||||
; one more load latency cycle
|
||||
.Last_cmp:
|
||||
xor r0,r4,r5
|
||||
bset r0,r0,SHIFT
|
||||
sub_s r1,r0,1
|
||||
bic_s r1,r1,r0
|
||||
norm r1,r1
|
||||
b.d .Leven_cmp
|
||||
and r1,r1,24
|
||||
.Leven:
|
||||
xor r0,r4,r5
|
||||
sub_s r1,r0,1
|
||||
bic_s r1,r1,r0
|
||||
norm r1,r1
|
||||
; slow track insn
|
||||
and r1,r1,24
|
||||
.Leven_cmp:
|
||||
asl r2,r4,r1
|
||||
asl r12,r5,r1
|
||||
lsr_s r2,r2,1
|
||||
lsr_s r12,r12,1
|
||||
j_s.d [blink]
|
||||
sub r0,r2,r12
|
||||
.balign 4
|
||||
.Lodd:
|
||||
xor r0,WORD2,r12
|
||||
sub_s r1,r0,1
|
||||
bic_s r1,r1,r0
|
||||
norm r1,r1
|
||||
; slow track insn
|
||||
and r1,r1,24
|
||||
asl_s r2,r2,r1
|
||||
asl_s r12,r12,r1
|
||||
lsr_s r2,r2,1
|
||||
lsr_s r12,r12,1
|
||||
j_s.d [blink]
|
||||
sub r0,r2,r12
|
||||
#else /* BIG ENDIAN */
|
||||
.Last_cmp:
|
||||
neg_s SHIFT,SHIFT
|
||||
lsr r4,r4,SHIFT
|
||||
lsr r5,r5,SHIFT
|
||||
; slow track insn
|
||||
.Leven:
|
||||
sub.f r0,r4,r5
|
||||
mov.ne r0,1
|
||||
j_s.d [blink]
|
||||
bset.cs r0,r0,31
|
||||
.Lodd:
|
||||
cmp_s WORD2,r12
|
||||
|
||||
mov_s r0,1
|
||||
j_s.d [blink]
|
||||
bset.cs r0,r0,31
|
||||
#endif /* ENDIAN */
|
||||
.balign 4
|
||||
.Lbytewise:
|
||||
breq r2,0,.Lnil
|
||||
ldb r4,[r0,0]
|
||||
ldb r5,[r1,0]
|
||||
lsr.f lp_count,r3
|
||||
lpne .Lbyte_end
|
||||
ldb_s r3,[r0,1]
|
||||
ldb r12,[r1,1]
|
||||
brne r4,r5,.Lbyte_even
|
||||
ldb.a r4,[r0,2]
|
||||
ldb.a r5,[r1,2]
|
||||
brne r3,r12,.Lbyte_odd
|
||||
.Lbyte_end:
|
||||
bcc .Lbyte_even
|
||||
brne r4,r5,.Lbyte_even
|
||||
ldb_s r3,[r0,1]
|
||||
ldb_s r12,[r1,1]
|
||||
.Lbyte_odd:
|
||||
j_s.d [blink]
|
||||
sub r0,r3,r12
|
||||
.Lbyte_even:
|
||||
j_s.d [blink]
|
||||
sub r0,r4,r5
|
||||
.Lnil:
|
||||
j_s.d [blink]
|
||||
mov r0,0
|
||||
ARC_EXIT memcmp
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/linkage.h>
|
||||
|
||||
ARC_ENTRY memcpy
|
||||
or r3,r0,r1
|
||||
asl_s r3,r3,30
|
||||
mov_s r5,r0
|
||||
brls.d r2,r3,.Lcopy_bytewise
|
||||
sub.f r3,r2,1
|
||||
ld_s r12,[r1,0]
|
||||
asr.f lp_count,r3,3
|
||||
bbit0.d r3,2,.Lnox4
|
||||
bmsk_s r2,r2,1
|
||||
st.ab r12,[r5,4]
|
||||
ld.a r12,[r1,4]
|
||||
.Lnox4:
|
||||
lppnz .Lendloop
|
||||
ld_s r3,[r1,4]
|
||||
st.ab r12,[r5,4]
|
||||
ld.a r12,[r1,8]
|
||||
st.ab r3,[r5,4]
|
||||
.Lendloop:
|
||||
breq r2,0,.Last_store
|
||||
ld r3,[r5,0]
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
add3 r2,-1,r2
|
||||
; uses long immediate
|
||||
xor_s r12,r12,r3
|
||||
bmsk r12,r12,r2
|
||||
xor_s r12,r12,r3
|
||||
#else /* BIG ENDIAN */
|
||||
sub3 r2,31,r2
|
||||
; uses long immediate
|
||||
xor_s r3,r3,r12
|
||||
bmsk r3,r3,r2
|
||||
xor_s r12,r12,r3
|
||||
#endif /* ENDIAN */
|
||||
.Last_store:
|
||||
j_s.d [blink]
|
||||
st r12,[r5,0]
|
||||
|
||||
.balign 4
|
||||
.Lcopy_bytewise:
|
||||
jcs [blink]
|
||||
ldb_s r12,[r1,0]
|
||||
lsr.f lp_count,r3
|
||||
bhs_s .Lnox1
|
||||
stb.ab r12,[r5,1]
|
||||
ldb.a r12,[r1,1]
|
||||
.Lnox1:
|
||||
lppnz .Lendbloop
|
||||
ldb_s r3,[r1,1]
|
||||
stb.ab r12,[r5,1]
|
||||
ldb.a r12,[r1,2]
|
||||
stb.ab r3,[r5,1]
|
||||
.Lendbloop:
|
||||
j_s.d [blink]
|
||||
stb r12,[r5,0]
|
||||
ARC_EXIT memcpy
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/linkage.h>
|
||||
|
||||
#define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */
|
||||
|
||||
ARC_ENTRY memset
|
||||
mov_s r4,r0
|
||||
or r12,r0,r2
|
||||
bmsk.f r12,r12,1
|
||||
extb_s r1,r1
|
||||
asl r3,r1,8
|
||||
beq.d .Laligned
|
||||
or_s r1,r1,r3
|
||||
brls r2,SMALL,.Ltiny
|
||||
add r3,r2,r0
|
||||
stb r1,[r3,-1]
|
||||
bclr_s r3,r3,0
|
||||
stw r1,[r3,-2]
|
||||
bmsk.f r12,r0,1
|
||||
add_s r2,r2,r12
|
||||
sub.ne r2,r2,4
|
||||
stb.ab r1,[r4,1]
|
||||
and r4,r4,-2
|
||||
stw.ab r1,[r4,2]
|
||||
and r4,r4,-4
|
||||
.Laligned: ; This code address should be aligned for speed.
|
||||
asl r3,r1,16
|
||||
lsr.f lp_count,r2,2
|
||||
or_s r1,r1,r3
|
||||
lpne .Loop_end
|
||||
st.ab r1,[r4,4]
|
||||
.Loop_end:
|
||||
j_s [blink]
|
||||
|
||||
.balign 4
|
||||
.Ltiny:
|
||||
mov.f lp_count,r2
|
||||
lpne .Ltiny_end
|
||||
stb.ab r1,[r4,1]
|
||||
.Ltiny_end:
|
||||
j_s [blink]
|
||||
ARC_EXIT memset
|
||||
|
||||
; memzero: @r0 = mem, @r1 = size_t
|
||||
; memset: @r0 = mem, @r1 = char, @r2 = size_t
|
||||
|
||||
ARC_ENTRY memzero
|
||||
; adjust bzero args to memset args
|
||||
mov r2, r1
|
||||
mov r1, 0
|
||||
b memset ;tail call so need to tinker with blink
|
||||
ARC_EXIT memzero
|
|
@ -0,0 +1,123 @@
|
|||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
/* ARC700 has a relatively long pipeline and branch prediction, so we want
|
||||
to avoid branches that are hard to predict. On the other hand, the
|
||||
presence of the norm instruction makes it easier to operate on whole
|
||||
words branch-free. */
|
||||
|
||||
#include <asm/linkage.h>
|
||||
|
||||
ARC_ENTRY strchr
|
||||
extb_s r1,r1
|
||||
asl r5,r1,8
|
||||
bmsk r2,r0,1
|
||||
or r5,r5,r1
|
||||
mov_s r3,0x01010101
|
||||
breq.d r2,r0,.Laligned
|
||||
asl r4,r5,16
|
||||
sub_s r0,r0,r2
|
||||
asl r7,r2,3
|
||||
ld_s r2,[r0]
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
asl r7,r3,r7
|
||||
#else
|
||||
lsr r7,r3,r7
|
||||
#endif
|
||||
or r5,r5,r4
|
||||
ror r4,r3
|
||||
sub r12,r2,r7
|
||||
bic_s r12,r12,r2
|
||||
and r12,r12,r4
|
||||
brne.d r12,0,.Lfound0_ua
|
||||
xor r6,r2,r5
|
||||
ld.a r2,[r0,4]
|
||||
sub r12,r6,r7
|
||||
bic r12,r12,r6
|
||||
and r7,r12,r4
|
||||
breq r7,0,.Loop ; For speed, we want this branch to be unaligned.
|
||||
b .Lfound_char ; Likewise this one.
|
||||
; /* We require this code address to be unaligned for speed... */
|
||||
.Laligned:
|
||||
ld_s r2,[r0]
|
||||
or r5,r5,r4
|
||||
ror r4,r3
|
||||
; /* ... so that this code address is aligned, for itself and ... */
|
||||
.Loop:
|
||||
sub r12,r2,r3
|
||||
bic_s r12,r12,r2
|
||||
and r12,r12,r4
|
||||
brne.d r12,0,.Lfound0
|
||||
xor r6,r2,r5
|
||||
ld.a r2,[r0,4]
|
||||
sub r12,r6,r3
|
||||
bic r12,r12,r6
|
||||
and r7,r12,r4
|
||||
breq r7,0,.Loop /* ... so that this branch is unaligned. */
|
||||
; Found searched-for character. r0 has already advanced to next word.
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
/* We only need the information about the first matching byte
|
||||
(i.e. the least significant matching byte) to be exact,
|
||||
hence there is no problem with carry effects. */
|
||||
.Lfound_char:
|
||||
sub r3,r7,1
|
||||
bic r3,r3,r7
|
||||
norm r2,r3
|
||||
sub_s r0,r0,1
|
||||
asr_s r2,r2,3
|
||||
j.d [blink]
|
||||
sub_s r0,r0,r2
|
||||
|
||||
.balign 4
|
||||
.Lfound0_ua:
|
||||
mov r3,r7
|
||||
.Lfound0:
|
||||
sub r3,r6,r3
|
||||
bic r3,r3,r6
|
||||
and r2,r3,r4
|
||||
or_s r12,r12,r2
|
||||
sub_s r3,r12,1
|
||||
bic_s r3,r3,r12
|
||||
norm r3,r3
|
||||
add_s r0,r0,3
|
||||
asr_s r12,r3,3
|
||||
asl.f 0,r2,r3
|
||||
sub_s r0,r0,r12
|
||||
j_s.d [blink]
|
||||
mov.pl r0,0
|
||||
#else /* BIG ENDIAN */
|
||||
.Lfound_char:
|
||||
lsr r7,r7,7
|
||||
|
||||
bic r2,r7,r6
|
||||
norm r2,r2
|
||||
sub_s r0,r0,4
|
||||
asr_s r2,r2,3
|
||||
j.d [blink]
|
||||
add_s r0,r0,r2
|
||||
|
||||
.Lfound0_ua:
|
||||
mov_s r3,r7
|
||||
.Lfound0:
|
||||
asl_s r2,r2,7
|
||||
or r7,r6,r4
|
||||
bic_s r12,r12,r2
|
||||
sub r2,r7,r3
|
||||
or r2,r2,r6
|
||||
bic r12,r2,r12
|
||||
bic.f r3,r4,r12
|
||||
norm r3,r3
|
||||
|
||||
add.pl r3,r3,1
|
||||
asr_s r12,r3,3
|
||||
asl.f 0,r2,r3
|
||||
add_s r0,r0,r12
|
||||
j_s.d [blink]
|
||||
mov.mi r0,0
|
||||
#endif /* ENDIAN */
|
||||
ARC_EXIT strchr
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
/* This is optimized primarily for the ARC700.
|
||||
It would be possible to speed up the loops by one cycle / word
|
||||
respective one cycle / byte by forcing double source 1 alignment, unrolling
|
||||
by a factor of two, and speculatively loading the second word / byte of
|
||||
source 1; however, that would increase the overhead for loop setup / finish,
|
||||
and strcmp might often terminate early. */
|
||||
|
||||
#include <asm/linkage.h>
|
||||
|
||||
ARC_ENTRY strcmp
|
||||
or r2,r0,r1
|
||||
bmsk_s r2,r2,1
|
||||
brne r2,0,.Lcharloop
|
||||
mov_s r12,0x01010101
|
||||
ror r5,r12
|
||||
.Lwordloop:
|
||||
ld.ab r2,[r0,4]
|
||||
ld.ab r3,[r1,4]
|
||||
nop_s
|
||||
sub r4,r2,r12
|
||||
bic r4,r4,r2
|
||||
and r4,r4,r5
|
||||
brne r4,0,.Lfound0
|
||||
breq r2,r3,.Lwordloop
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
xor r0,r2,r3 ; mask for difference
|
||||
sub_s r1,r0,1
|
||||
bic_s r0,r0,r1 ; mask for least significant difference bit
|
||||
sub r1,r5,r0
|
||||
xor r0,r5,r1 ; mask for least significant difference byte
|
||||
and_s r2,r2,r0
|
||||
and_s r3,r3,r0
|
||||
#endif /* LITTLE ENDIAN */
|
||||
cmp_s r2,r3
|
||||
mov_s r0,1
|
||||
j_s.d [blink]
|
||||
bset.lo r0,r0,31
|
||||
|
||||
.balign 4
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
.Lfound0:
|
||||
xor r0,r2,r3 ; mask for difference
|
||||
or r0,r0,r4 ; or in zero indicator
|
||||
sub_s r1,r0,1
|
||||
bic_s r0,r0,r1 ; mask for least significant difference bit
|
||||
sub r1,r5,r0
|
||||
xor r0,r5,r1 ; mask for least significant difference byte
|
||||
and_s r2,r2,r0
|
||||
and_s r3,r3,r0
|
||||
sub.f r0,r2,r3
|
||||
mov.hi r0,1
|
||||
j_s.d [blink]
|
||||
bset.lo r0,r0,31
|
||||
#else /* BIG ENDIAN */
|
||||
/* The zero-detection above can mis-detect 0x01 bytes as zeroes
|
||||
because of carry-propagateion from a lower significant zero byte.
|
||||
We can compensate for this by checking that bit0 is zero.
|
||||
This compensation is not necessary in the step where we
|
||||
get a low estimate for r2, because in any affected bytes
|
||||
we already have 0x00 or 0x01, which will remain unchanged
|
||||
when bit 7 is cleared. */
|
||||
.balign 4
|
||||
.Lfound0:
|
||||
lsr r0,r4,8
|
||||
lsr_s r1,r2
|
||||
bic_s r2,r2,r0 ; get low estimate for r2 and get ...
|
||||
bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
|
||||
or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
|
||||
cmp_s r3,r2 ; ... be independent of trailing garbage
|
||||
or_s r2,r2,r0 ; likewise for r3 > r2
|
||||
bic_s r3,r3,r0
|
||||
rlc r0,0 ; r0 := r2 > r3 ? 1 : 0
|
||||
cmp_s r2,r3
|
||||
j_s.d [blink]
|
||||
bset.lo r0,r0,31
|
||||
#endif /* ENDIAN */
|
||||
|
||||
.balign 4
|
||||
.Lcharloop:
|
||||
ldb.ab r2,[r0,1]
|
||||
ldb.ab r3,[r1,1]
|
||||
nop_s
|
||||
breq r2,0,.Lcmpend
|
||||
breq r2,r3,.Lcharloop
|
||||
.Lcmpend:
|
||||
j_s.d [blink]
|
||||
sub r0,r2,r3
|
||||
ARC_EXIT strcmp
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
/* If dst and src are 4 byte aligned, copy 8 bytes at a time.
|
||||
If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
|
||||
it 8 byte aligned. Thus, we can do a little read-ahead, without
|
||||
dereferencing a cache line that we should not touch.
|
||||
Note that short and long instructions have been scheduled to avoid
|
||||
branch stalls.
|
||||
The beq_s to r3z could be made unaligned & long to avoid a stall
|
||||
there, but the it is not likely to be taken often, and it
|
||||
would also be likey to cost an unaligned mispredict at the next call. */
|
||||
|
||||
#include <asm/linkage.h>
|
||||
|
||||
ARC_ENTRY strcpy
|
||||
or r2,r0,r1
|
||||
bmsk_s r2,r2,1
|
||||
brne.d r2,0,charloop
|
||||
mov_s r10,r0
|
||||
ld_s r3,[r1,0]
|
||||
mov r8,0x01010101
|
||||
bbit0.d r1,2,loop_start
|
||||
ror r12,r8
|
||||
sub r2,r3,r8
|
||||
bic_s r2,r2,r3
|
||||
tst_s r2,r12
|
||||
bne r3z
|
||||
mov_s r4,r3
|
||||
.balign 4
|
||||
loop:
|
||||
ld.a r3,[r1,4]
|
||||
st.ab r4,[r10,4]
|
||||
loop_start:
|
||||
ld.a r4,[r1,4]
|
||||
sub r2,r3,r8
|
||||
bic_s r2,r2,r3
|
||||
tst_s r2,r12
|
||||
bne_s r3z
|
||||
st.ab r3,[r10,4]
|
||||
sub r2,r4,r8
|
||||
bic r2,r2,r4
|
||||
tst r2,r12
|
||||
beq loop
|
||||
mov_s r3,r4
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
r3z: bmsk.f r1,r3,7
|
||||
lsr_s r3,r3,8
|
||||
#else
|
||||
r3z: lsr.f r1,r3,24
|
||||
asl_s r3,r3,8
|
||||
#endif
|
||||
bne.d r3z
|
||||
stb.ab r1,[r10,1]
|
||||
j_s [blink]
|
||||
|
||||
.balign 4
|
||||
charloop:
|
||||
ldb.ab r3,[r1,1]
|
||||
|
||||
|
||||
brne.d r3,0,charloop
|
||||
stb.ab r3,[r10,1]
|
||||
j [blink]
|
||||
ARC_EXIT strcpy
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/linkage.h>
|
||||
|
||||
ARC_ENTRY strlen
|
||||
or r3,r0,7
|
||||
ld r2,[r3,-7]
|
||||
ld.a r6,[r3,-3]
|
||||
mov r4,0x01010101
|
||||
; uses long immediate
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
asl_s r1,r0,3
|
||||
btst_s r0,2
|
||||
asl r7,r4,r1
|
||||
ror r5,r4
|
||||
sub r1,r2,r7
|
||||
bic_s r1,r1,r2
|
||||
mov.eq r7,r4
|
||||
sub r12,r6,r7
|
||||
bic r12,r12,r6
|
||||
or.eq r12,r12,r1
|
||||
and r12,r12,r5
|
||||
brne r12,0,.Learly_end
|
||||
#else /* BIG ENDIAN */
|
||||
ror r5,r4
|
||||
btst_s r0,2
|
||||
mov_s r1,31
|
||||
sub3 r7,r1,r0
|
||||
sub r1,r2,r4
|
||||
bic_s r1,r1,r2
|
||||
bmsk r1,r1,r7
|
||||
sub r12,r6,r4
|
||||
bic r12,r12,r6
|
||||
bmsk.ne r12,r12,r7
|
||||
or.eq r12,r12,r1
|
||||
and r12,r12,r5
|
||||
brne r12,0,.Learly_end
|
||||
#endif /* ENDIAN */
|
||||
|
||||
.Loop:
|
||||
ld_s r2,[r3,4]
|
||||
ld.a r6,[r3,8]
|
||||
; stall for load result
|
||||
sub r1,r2,r4
|
||||
bic_s r1,r1,r2
|
||||
sub r12,r6,r4
|
||||
bic r12,r12,r6
|
||||
or r12,r12,r1
|
||||
and r12,r12,r5
|
||||
breq r12,0,.Loop
|
||||
.Lend:
|
||||
and.f r1,r1,r5
|
||||
sub.ne r3,r3,4
|
||||
mov.eq r1,r12
|
||||
#ifdef __LITTLE_ENDIAN__
|
||||
sub_s r2,r1,1
|
||||
bic_s r2,r2,r1
|
||||
norm r1,r2
|
||||
sub_s r0,r0,3
|
||||
lsr_s r1,r1,3
|
||||
sub r0,r3,r0
|
||||
j_s.d [blink]
|
||||
sub r0,r0,r1
|
||||
#else /* BIG ENDIAN */
|
||||
lsr_s r1,r1,7
|
||||
mov.eq r2,r6
|
||||
bic_s r1,r1,r2
|
||||
norm r1,r1
|
||||
sub r0,r3,r0
|
||||
lsr_s r1,r1,3
|
||||
j_s.d [blink]
|
||||
add r0,r0,r1
|
||||
#endif /* ENDIAN */
|
||||
.Learly_end:
|
||||
b.d .Lend
|
||||
sub_s.ne r1,r1,r1
|
||||
ARC_EXIT strlen
|
Загрузка…
Ссылка в новой задаче