xtensa: clean up fixups in assembly code

Remove duplicate definitions of EX() and similar TRY/CATCH and SRC/DST
macros from assembly sources and put single definition into asm/asmmacro.h

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
This commit is contained in:
Max Filippov 2017-12-09 21:18:47 -08:00
Родитель 2da03d4114
Коммит 0013aceb30
7 изменённых файлов: 133 добавлений и 194 удалений

Просмотреть файл

@ -150,5 +150,12 @@
__endl \ar \as __endl \ar \as
.endm .endm
/* Load or store instructions that may cause exceptions use the EX macro. */
#define EX(handler) \
.section __ex_table, "a"; \
.word 97f, handler; \
.previous \
97:
#endif /* _XTENSA_ASMMACRO_H */ #endif /* _XTENSA_ASMMACRO_H */

Просмотреть файл

@ -14,6 +14,7 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/asmmacro.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/coprocessor.h> #include <asm/coprocessor.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
@ -1094,35 +1095,12 @@ ENDPROC(fast_syscall_unrecoverable)
* < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception * < VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
* *
* Note: we don't have to save a2; a2 holds the return value * Note: we don't have to save a2; a2 holds the return value
*
* We use the two macros TRY and CATCH:
*
* TRY adds an entry to the __ex_table fixup table for the immediately
* following instruction.
*
* CATCH catches any exception that occurred at one of the preceding TRY
* statements and continues from there
*
* Usage TRY l32i a0, a1, 0
* <other code>
* done: rfe
* CATCH <set return code>
* j done
*/ */
.literal_position .literal_position
#ifdef CONFIG_FAST_SYSCALL_XTENSA #ifdef CONFIG_FAST_SYSCALL_XTENSA
#define TRY \
.section __ex_table, "a"; \
.word 66f, 67f; \
.text; \
66:
#define CATCH \
67:
ENTRY(fast_syscall_xtensa) ENTRY(fast_syscall_xtensa)
s32i a7, a2, PT_AREG7 # we need an additional register s32i a7, a2, PT_AREG7 # we need an additional register
@ -1136,9 +1114,9 @@ ENTRY(fast_syscall_xtensa)
.Lswp: /* Atomic compare and swap */ .Lswp: /* Atomic compare and swap */
TRY l32i a0, a3, 0 # read old value EX(.Leac) l32i a0, a3, 0 # read old value
bne a0, a4, 1f # same as old value? jump bne a0, a4, 1f # same as old value? jump
TRY s32i a5, a3, 0 # different, modify value EX(.Leac) s32i a5, a3, 0 # different, modify value
l32i a7, a2, PT_AREG7 # restore a7 l32i a7, a2, PT_AREG7 # restore a7
l32i a0, a2, PT_AREG0 # restore a0 l32i a0, a2, PT_AREG0 # restore a0
movi a2, 1 # and return 1 movi a2, 1 # and return 1
@ -1151,12 +1129,12 @@ TRY s32i a5, a3, 0 # different, modify value
.Lnswp: /* Atomic set, add, and exg_add. */ .Lnswp: /* Atomic set, add, and exg_add. */
TRY l32i a7, a3, 0 # orig EX(.Leac) l32i a7, a3, 0 # orig
addi a6, a6, -SYS_XTENSA_ATOMIC_SET addi a6, a6, -SYS_XTENSA_ATOMIC_SET
add a0, a4, a7 # + arg add a0, a4, a7 # + arg
moveqz a0, a4, a6 # set moveqz a0, a4, a6 # set
addi a6, a6, SYS_XTENSA_ATOMIC_SET addi a6, a6, SYS_XTENSA_ATOMIC_SET
TRY s32i a0, a3, 0 # write new value EX(.Leac) s32i a0, a3, 0 # write new value
mov a0, a2 mov a0, a2
mov a2, a7 mov a2, a7
@ -1164,7 +1142,6 @@ TRY s32i a0, a3, 0 # write new value
l32i a0, a0, PT_AREG0 # restore a0 l32i a0, a0, PT_AREG0 # restore a0
rfe rfe
CATCH
.Leac: l32i a7, a2, PT_AREG7 # restore a7 .Leac: l32i a7, a2, PT_AREG7 # restore a7
l32i a0, a2, PT_AREG0 # restore a0 l32i a0, a2, PT_AREG0 # restore a0
movi a2, -EFAULT movi a2, -EFAULT

Просмотреть файл

@ -14,9 +14,10 @@
* 2 of the License, or (at your option) any later version. * 2 of the License, or (at your option) any later version.
*/ */
#include <asm/errno.h> #include <linux/errno.h>
#include <linux/linkage.h> #include <linux/linkage.h>
#include <variant/core.h> #include <variant/core.h>
#include <asm/asmmacro.h>
/* /*
* computes a partial checksum, e.g. for TCP/UDP fragments * computes a partial checksum, e.g. for TCP/UDP fragments
@ -175,23 +176,8 @@ ENDPROC(csum_partial)
/* /*
* Copy from ds while checksumming, otherwise like csum_partial * Copy from ds while checksumming, otherwise like csum_partial
*
* The macros SRC and DST specify the type of access for the instruction.
* thus we can call a custom exception handler for each access type.
*/ */
#define SRC(y...) \
9999: y; \
.section __ex_table, "a"; \
.long 9999b, 6001f ; \
.previous
#define DST(y...) \
9999: y; \
.section __ex_table, "a"; \
.long 9999b, 6002f ; \
.previous
/* /*
unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
int sum, int *src_err_ptr, int *dst_err_ptr) int sum, int *src_err_ptr, int *dst_err_ptr)
@ -244,28 +230,28 @@ ENTRY(csum_partial_copy_generic)
add a10, a10, a2 /* a10 = end of last 32-byte src chunk */ add a10, a10, a2 /* a10 = end of last 32-byte src chunk */
.Loop5: .Loop5:
#endif #endif
SRC( l32i a9, a2, 0 ) EX(10f) l32i a9, a2, 0
SRC( l32i a8, a2, 4 ) EX(10f) l32i a8, a2, 4
DST( s32i a9, a3, 0 ) EX(11f) s32i a9, a3, 0
DST( s32i a8, a3, 4 ) EX(11f) s32i a8, a3, 4
ONES_ADD(a5, a9) ONES_ADD(a5, a9)
ONES_ADD(a5, a8) ONES_ADD(a5, a8)
SRC( l32i a9, a2, 8 ) EX(10f) l32i a9, a2, 8
SRC( l32i a8, a2, 12 ) EX(10f) l32i a8, a2, 12
DST( s32i a9, a3, 8 ) EX(11f) s32i a9, a3, 8
DST( s32i a8, a3, 12 ) EX(11f) s32i a8, a3, 12
ONES_ADD(a5, a9) ONES_ADD(a5, a9)
ONES_ADD(a5, a8) ONES_ADD(a5, a8)
SRC( l32i a9, a2, 16 ) EX(10f) l32i a9, a2, 16
SRC( l32i a8, a2, 20 ) EX(10f) l32i a8, a2, 20
DST( s32i a9, a3, 16 ) EX(11f) s32i a9, a3, 16
DST( s32i a8, a3, 20 ) EX(11f) s32i a8, a3, 20
ONES_ADD(a5, a9) ONES_ADD(a5, a9)
ONES_ADD(a5, a8) ONES_ADD(a5, a8)
SRC( l32i a9, a2, 24 ) EX(10f) l32i a9, a2, 24
SRC( l32i a8, a2, 28 ) EX(10f) l32i a8, a2, 28
DST( s32i a9, a3, 24 ) EX(11f) s32i a9, a3, 24
DST( s32i a8, a3, 28 ) EX(11f) s32i a8, a3, 28
ONES_ADD(a5, a9) ONES_ADD(a5, a9)
ONES_ADD(a5, a8) ONES_ADD(a5, a8)
addi a2, a2, 32 addi a2, a2, 32
@ -284,8 +270,8 @@ DST( s32i a8, a3, 28 )
add a10, a10, a2 /* a10 = end of last 4-byte src chunk */ add a10, a10, a2 /* a10 = end of last 4-byte src chunk */
.Loop6: .Loop6:
#endif #endif
SRC( l32i a9, a2, 0 ) EX(10f) l32i a9, a2, 0
DST( s32i a9, a3, 0 ) EX(11f) s32i a9, a3, 0
ONES_ADD(a5, a9) ONES_ADD(a5, a9)
addi a2, a2, 4 addi a2, a2, 4
addi a3, a3, 4 addi a3, a3, 4
@ -315,8 +301,8 @@ DST( s32i a9, a3, 0 )
add a10, a10, a2 /* a10 = end of last 2-byte src chunk */ add a10, a10, a2 /* a10 = end of last 2-byte src chunk */
.Loop7: .Loop7:
#endif #endif
SRC( l16ui a9, a2, 0 ) EX(10f) l16ui a9, a2, 0
DST( s16i a9, a3, 0 ) EX(11f) s16i a9, a3, 0
ONES_ADD(a5, a9) ONES_ADD(a5, a9)
addi a2, a2, 2 addi a2, a2, 2
addi a3, a3, 2 addi a3, a3, 2
@ -326,8 +312,8 @@ DST( s16i a9, a3, 0 )
4: 4:
/* This section processes a possible trailing odd byte. */ /* This section processes a possible trailing odd byte. */
_bbci.l a4, 0, 8f /* 1-byte chunk */ _bbci.l a4, 0, 8f /* 1-byte chunk */
SRC( l8ui a9, a2, 0 ) EX(10f) l8ui a9, a2, 0
DST( s8i a9, a3, 0 ) EX(11f) s8i a9, a3, 0
#ifdef __XTENSA_EB__ #ifdef __XTENSA_EB__
slli a9, a9, 8 /* shift byte to bits 8..15 */ slli a9, a9, 8 /* shift byte to bits 8..15 */
#endif #endif
@ -350,10 +336,10 @@ DST( s8i a9, a3, 0 )
add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */ add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */
.Loop8: .Loop8:
#endif #endif
SRC( l8ui a9, a2, 0 ) EX(10f) l8ui a9, a2, 0
SRC( l8ui a8, a2, 1 ) EX(10f) l8ui a8, a2, 1
DST( s8i a9, a3, 0 ) EX(11f) s8i a9, a3, 0
DST( s8i a8, a3, 1 ) EX(11f) s8i a8, a3, 1
#ifdef __XTENSA_EB__ #ifdef __XTENSA_EB__
slli a9, a9, 8 /* combine into a single 16-bit value */ slli a9, a9, 8 /* combine into a single 16-bit value */
#else /* for checksum computation */ #else /* for checksum computation */
@ -381,7 +367,7 @@ ENDPROC(csum_partial_copy_generic)
a12 = original dst for exception handling a12 = original dst for exception handling
*/ */
6001: 10:
_movi a2, -EFAULT _movi a2, -EFAULT
s32i a2, a6, 0 /* src_err_ptr */ s32i a2, a6, 0 /* src_err_ptr */
@ -403,7 +389,7 @@ ENDPROC(csum_partial_copy_generic)
2: 2:
retw retw
6002: 11:
movi a2, -EFAULT movi a2, -EFAULT
s32i a2, a7, 0 /* dst_err_ptr */ s32i a2, a7, 0 /* dst_err_ptr */
movi a2, 0 movi a2, 0

Просмотреть файл

@ -12,6 +12,7 @@
*/ */
#include <variant/core.h> #include <variant/core.h>
#include <asm/asmmacro.h>
/* /*
* void *memset(void *dst, int c, size_t length) * void *memset(void *dst, int c, size_t length)
@ -28,15 +29,6 @@
* the alignment labels). * the alignment labels).
*/ */
/* Load or store instructions that may cause exceptions use the EX macro. */
#define EX(insn,reg1,reg2,offset,handler) \
9: insn reg1, reg2, offset; \
.section __ex_table, "a"; \
.word 9b, handler; \
.previous
.text .text
.align 4 .align 4
.global memset .global memset
@ -73,10 +65,10 @@ memset:
add a6, a6, a5 # a6 = end of last 16B chunk add a6, a6, a5 # a6 = end of last 16B chunk
#endif /* !XCHAL_HAVE_LOOPS */ #endif /* !XCHAL_HAVE_LOOPS */
.Loop1: .Loop1:
EX(s32i, a3, a5, 0, memset_fixup) EX(10f) s32i a3, a5, 0
EX(s32i, a3, a5, 4, memset_fixup) EX(10f) s32i a3, a5, 4
EX(s32i, a3, a5, 8, memset_fixup) EX(10f) s32i a3, a5, 8
EX(s32i, a3, a5, 12, memset_fixup) EX(10f) s32i a3, a5, 12
addi a5, a5, 16 addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS #if !XCHAL_HAVE_LOOPS
blt a5, a6, .Loop1 blt a5, a6, .Loop1
@ -84,23 +76,23 @@ memset:
.Loop1done: .Loop1done:
bbci.l a4, 3, .L2 bbci.l a4, 3, .L2
# set 8 bytes # set 8 bytes
EX(s32i, a3, a5, 0, memset_fixup) EX(10f) s32i a3, a5, 0
EX(s32i, a3, a5, 4, memset_fixup) EX(10f) s32i a3, a5, 4
addi a5, a5, 8 addi a5, a5, 8
.L2: .L2:
bbci.l a4, 2, .L3 bbci.l a4, 2, .L3
# set 4 bytes # set 4 bytes
EX(s32i, a3, a5, 0, memset_fixup) EX(10f) s32i a3, a5, 0
addi a5, a5, 4 addi a5, a5, 4
.L3: .L3:
bbci.l a4, 1, .L4 bbci.l a4, 1, .L4
# set 2 bytes # set 2 bytes
EX(s16i, a3, a5, 0, memset_fixup) EX(10f) s16i a3, a5, 0
addi a5, a5, 2 addi a5, a5, 2
.L4: .L4:
bbci.l a4, 0, .L5 bbci.l a4, 0, .L5
# set 1 byte # set 1 byte
EX(s8i, a3, a5, 0, memset_fixup) EX(10f) s8i a3, a5, 0
.L5: .L5:
.Lret1: .Lret1:
retw retw
@ -114,7 +106,7 @@ memset:
bbci.l a5, 0, .L20 # branch if dst alignment half-aligned bbci.l a5, 0, .L20 # branch if dst alignment half-aligned
# dst is only byte aligned # dst is only byte aligned
# set 1 byte # set 1 byte
EX(s8i, a3, a5, 0, memset_fixup) EX(10f) s8i a3, a5, 0
addi a5, a5, 1 addi a5, a5, 1
addi a4, a4, -1 addi a4, a4, -1
# now retest if dst aligned # now retest if dst aligned
@ -122,7 +114,7 @@ memset:
.L20: .L20:
# dst half-aligned # dst half-aligned
# set 2 bytes # set 2 bytes
EX(s16i, a3, a5, 0, memset_fixup) EX(10f) s16i a3, a5, 0
addi a5, a5, 2 addi a5, a5, 2
addi a4, a4, -2 addi a4, a4, -2
j .L0 # dst is now aligned, return to main algorithm j .L0 # dst is now aligned, return to main algorithm
@ -141,7 +133,7 @@ memset:
add a6, a5, a4 # a6 = ending address add a6, a5, a4 # a6 = ending address
#endif /* !XCHAL_HAVE_LOOPS */ #endif /* !XCHAL_HAVE_LOOPS */
.Lbyteloop: .Lbyteloop:
EX(s8i, a3, a5, 0, memset_fixup) EX(10f) s8i a3, a5, 0
addi a5, a5, 1 addi a5, a5, 1
#if !XCHAL_HAVE_LOOPS #if !XCHAL_HAVE_LOOPS
blt a5, a6, .Lbyteloop blt a5, a6, .Lbyteloop
@ -155,6 +147,6 @@ memset:
/* We return zero if a failure occurred. */ /* We return zero if a failure occurred. */
memset_fixup: 10:
movi a2, 0 movi a2, 0
retw retw

Просмотреть файл

@ -11,16 +11,9 @@
* Copyright (C) 2002 Tensilica Inc. * Copyright (C) 2002 Tensilica Inc.
*/ */
#include <variant/core.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <variant/core.h>
/* Load or store instructions that may cause exceptions use the EX macro. */ #include <asm/asmmacro.h>
#define EX(insn,reg1,reg2,offset,handler) \
9: insn reg1, reg2, offset; \
.section __ex_table, "a"; \
.word 9b, handler; \
.previous
/* /*
* char *__strncpy_user(char *dst, const char *src, size_t len) * char *__strncpy_user(char *dst, const char *src, size_t len)
@ -75,9 +68,9 @@ __strncpy_user:
j .Ldstunaligned j .Ldstunaligned
.Lsrc1mod2: # src address is odd .Lsrc1mod2: # src address is odd
EX(l8ui, a9, a3, 0, fixup_l) # get byte 0 EX(11f) l8ui a9, a3, 0 # get byte 0
addi a3, a3, 1 # advance src pointer addi a3, a3, 1 # advance src pointer
EX(s8i, a9, a11, 0, fixup_s) # store byte 0 EX(10f) s8i a9, a11, 0 # store byte 0
beqz a9, .Lret # if byte 0 is zero beqz a9, .Lret # if byte 0 is zero
addi a11, a11, 1 # advance dst pointer addi a11, a11, 1 # advance dst pointer
addi a4, a4, -1 # decrement len addi a4, a4, -1 # decrement len
@ -85,16 +78,16 @@ __strncpy_user:
bbci.l a3, 1, .Lsrcaligned # if src is now word-aligned bbci.l a3, 1, .Lsrcaligned # if src is now word-aligned
.Lsrc2mod4: # src address is 2 mod 4 .Lsrc2mod4: # src address is 2 mod 4
EX(l8ui, a9, a3, 0, fixup_l) # get byte 0 EX(11f) l8ui a9, a3, 0 # get byte 0
/* 1-cycle interlock */ /* 1-cycle interlock */
EX(s8i, a9, a11, 0, fixup_s) # store byte 0 EX(10f) s8i a9, a11, 0 # store byte 0
beqz a9, .Lret # if byte 0 is zero beqz a9, .Lret # if byte 0 is zero
addi a11, a11, 1 # advance dst pointer addi a11, a11, 1 # advance dst pointer
addi a4, a4, -1 # decrement len addi a4, a4, -1 # decrement len
beqz a4, .Lret # if len is zero beqz a4, .Lret # if len is zero
EX(l8ui, a9, a3, 1, fixup_l) # get byte 0 EX(11f) l8ui a9, a3, 1 # get byte 0
addi a3, a3, 2 # advance src pointer addi a3, a3, 2 # advance src pointer
EX(s8i, a9, a11, 0, fixup_s) # store byte 0 EX(10f) s8i a9, a11, 0 # store byte 0
beqz a9, .Lret # if byte 0 is zero beqz a9, .Lret # if byte 0 is zero
addi a11, a11, 1 # advance dst pointer addi a11, a11, 1 # advance dst pointer
addi a4, a4, -1 # decrement len addi a4, a4, -1 # decrement len
@ -117,12 +110,12 @@ __strncpy_user:
add a12, a12, a11 # a12 = end of last 4B chunck add a12, a12, a11 # a12 = end of last 4B chunck
#endif #endif
.Loop1: .Loop1:
EX(l32i, a9, a3, 0, fixup_l) # get word from src EX(11f) l32i a9, a3, 0 # get word from src
addi a3, a3, 4 # advance src pointer addi a3, a3, 4 # advance src pointer
bnone a9, a5, .Lz0 # if byte 0 is zero bnone a9, a5, .Lz0 # if byte 0 is zero
bnone a9, a6, .Lz1 # if byte 1 is zero bnone a9, a6, .Lz1 # if byte 1 is zero
bnone a9, a7, .Lz2 # if byte 2 is zero bnone a9, a7, .Lz2 # if byte 2 is zero
EX(s32i, a9, a11, 0, fixup_s) # store word to dst EX(10f) s32i a9, a11, 0 # store word to dst
bnone a9, a8, .Lz3 # if byte 3 is zero bnone a9, a8, .Lz3 # if byte 3 is zero
addi a11, a11, 4 # advance dst pointer addi a11, a11, 4 # advance dst pointer
#if !XCHAL_HAVE_LOOPS #if !XCHAL_HAVE_LOOPS
@ -132,7 +125,7 @@ __strncpy_user:
.Loop1done: .Loop1done:
bbci.l a4, 1, .L100 bbci.l a4, 1, .L100
# copy 2 bytes # copy 2 bytes
EX(l16ui, a9, a3, 0, fixup_l) EX(11f) l16ui a9, a3, 0
addi a3, a3, 2 # advance src pointer addi a3, a3, 2 # advance src pointer
#ifdef __XTENSA_EB__ #ifdef __XTENSA_EB__
bnone a9, a7, .Lz0 # if byte 2 is zero bnone a9, a7, .Lz0 # if byte 2 is zero
@ -141,13 +134,13 @@ __strncpy_user:
bnone a9, a5, .Lz0 # if byte 0 is zero bnone a9, a5, .Lz0 # if byte 0 is zero
bnone a9, a6, .Lz1 # if byte 1 is zero bnone a9, a6, .Lz1 # if byte 1 is zero
#endif #endif
EX(s16i, a9, a11, 0, fixup_s) EX(10f) s16i a9, a11, 0
addi a11, a11, 2 # advance dst pointer addi a11, a11, 2 # advance dst pointer
.L100: .L100:
bbci.l a4, 0, .Lret bbci.l a4, 0, .Lret
EX(l8ui, a9, a3, 0, fixup_l) EX(11f) l8ui a9, a3, 0
/* slot */ /* slot */
EX(s8i, a9, a11, 0, fixup_s) EX(10f) s8i a9, a11, 0
beqz a9, .Lret # if byte is zero beqz a9, .Lret # if byte is zero
addi a11, a11, 1-3 # advance dst ptr 1, but also cancel addi a11, a11, 1-3 # advance dst ptr 1, but also cancel
# the effect of adding 3 in .Lz3 code # the effect of adding 3 in .Lz3 code
@ -161,14 +154,14 @@ __strncpy_user:
#ifdef __XTENSA_EB__ #ifdef __XTENSA_EB__
movi a9, 0 movi a9, 0
#endif /* __XTENSA_EB__ */ #endif /* __XTENSA_EB__ */
EX(s8i, a9, a11, 0, fixup_s) EX(10f) s8i a9, a11, 0
sub a2, a11, a2 # compute strlen sub a2, a11, a2 # compute strlen
retw retw
.Lz1: # byte 1 is zero .Lz1: # byte 1 is zero
#ifdef __XTENSA_EB__ #ifdef __XTENSA_EB__
extui a9, a9, 16, 16 extui a9, a9, 16, 16
#endif /* __XTENSA_EB__ */ #endif /* __XTENSA_EB__ */
EX(s16i, a9, a11, 0, fixup_s) EX(10f) s16i a9, a11, 0
addi a11, a11, 1 # advance dst pointer addi a11, a11, 1 # advance dst pointer
sub a2, a11, a2 # compute strlen sub a2, a11, a2 # compute strlen
retw retw
@ -176,9 +169,9 @@ __strncpy_user:
#ifdef __XTENSA_EB__ #ifdef __XTENSA_EB__
extui a9, a9, 16, 16 extui a9, a9, 16, 16
#endif /* __XTENSA_EB__ */ #endif /* __XTENSA_EB__ */
EX(s16i, a9, a11, 0, fixup_s) EX(10f) s16i a9, a11, 0
movi a9, 0 movi a9, 0
EX(s8i, a9, a11, 2, fixup_s) EX(10f) s8i a9, a11, 2
addi a11, a11, 2 # advance dst pointer addi a11, a11, 2 # advance dst pointer
sub a2, a11, a2 # compute strlen sub a2, a11, a2 # compute strlen
retw retw
@ -196,9 +189,9 @@ __strncpy_user:
add a12, a11, a4 # a12 = ending address add a12, a11, a4 # a12 = ending address
#endif /* XCHAL_HAVE_LOOPS */ #endif /* XCHAL_HAVE_LOOPS */
.Lnextbyte: .Lnextbyte:
EX(l8ui, a9, a3, 0, fixup_l) EX(11f) l8ui a9, a3, 0
addi a3, a3, 1 addi a3, a3, 1
EX(s8i, a9, a11, 0, fixup_s) EX(10f) s8i a9, a11, 0
beqz a9, .Lunalignedend beqz a9, .Lunalignedend
addi a11, a11, 1 addi a11, a11, 1
#if !XCHAL_HAVE_LOOPS #if !XCHAL_HAVE_LOOPS
@ -218,8 +211,7 @@ __strncpy_user:
* implementation in memset(). Thus, we differentiate between * implementation in memset(). Thus, we differentiate between
* load/store fixups. */ * load/store fixups. */
fixup_s: 10:
fixup_l: 11:
movi a2, -EFAULT movi a2, -EFAULT
retw retw

Просмотреть файл

@ -12,14 +12,7 @@
*/ */
#include <variant/core.h> #include <variant/core.h>
#include <asm/asmmacro.h>
/* Load or store instructions that may cause exceptions use the EX macro. */
#define EX(insn,reg1,reg2,offset,handler) \
9: insn reg1, reg2, offset; \
.section __ex_table, "a"; \
.word 9b, handler; \
.previous
/* /*
* size_t __strnlen_user(const char *s, size_t len) * size_t __strnlen_user(const char *s, size_t len)
@ -77,7 +70,7 @@ __strnlen_user:
add a10, a10, a4 # a10 = end of last 4B chunk add a10, a10, a4 # a10 = end of last 4B chunk
#endif /* XCHAL_HAVE_LOOPS */ #endif /* XCHAL_HAVE_LOOPS */
.Loop: .Loop:
EX(l32i, a9, a4, 4, lenfixup) # get next word of string EX(10f) l32i a9, a4, 4 # get next word of string
addi a4, a4, 4 # advance string pointer addi a4, a4, 4 # advance string pointer
bnone a9, a5, .Lz0 # if byte 0 is zero bnone a9, a5, .Lz0 # if byte 0 is zero
bnone a9, a6, .Lz1 # if byte 1 is zero bnone a9, a6, .Lz1 # if byte 1 is zero
@ -88,7 +81,7 @@ __strnlen_user:
#endif #endif
.Ldone: .Ldone:
EX(l32i, a9, a4, 4, lenfixup) # load 4 bytes for remaining checks EX(10f) l32i a9, a4, 4 # load 4 bytes for remaining checks
bbci.l a3, 1, .L100 bbci.l a3, 1, .L100
# check two more bytes (bytes 0, 1 of word) # check two more bytes (bytes 0, 1 of word)
@ -125,14 +118,14 @@ __strnlen_user:
retw retw
.L1mod2: # address is odd .L1mod2: # address is odd
EX(l8ui, a9, a4, 4, lenfixup) # get byte 0 EX(10f) l8ui a9, a4, 4 # get byte 0
addi a4, a4, 1 # advance string pointer addi a4, a4, 1 # advance string pointer
beqz a9, .Lz3 # if byte 0 is zero beqz a9, .Lz3 # if byte 0 is zero
bbci.l a4, 1, .Laligned # if string pointer is now word-aligned bbci.l a4, 1, .Laligned # if string pointer is now word-aligned
.L2mod4: # address is 2 mod 4 .L2mod4: # address is 2 mod 4
addi a4, a4, 2 # advance ptr for aligned access addi a4, a4, 2 # advance ptr for aligned access
EX(l32i, a9, a4, 0, lenfixup) # get word with first two bytes of string EX(10f) l32i a9, a4, 0 # get word with first two bytes of string
bnone a9, a7, .Lz2 # if byte 2 (of word, not string) is zero bnone a9, a7, .Lz2 # if byte 2 (of word, not string) is zero
bany a9, a8, .Laligned # if byte 3 (of word, not string) is nonzero bany a9, a8, .Laligned # if byte 3 (of word, not string) is nonzero
# byte 3 is zero # byte 3 is zero
@ -142,6 +135,6 @@ __strnlen_user:
.section .fixup, "ax" .section .fixup, "ax"
.align 4 .align 4
lenfixup: 10:
movi a2, 0 movi a2, 0
retw retw

Просмотреть файл

@ -54,6 +54,7 @@
*/ */
#include <variant/core.h> #include <variant/core.h>
#include <asm/asmmacro.h>
#ifdef __XTENSA_EB__ #ifdef __XTENSA_EB__
#define ALIGN(R, W0, W1) src R, W0, W1 #define ALIGN(R, W0, W1) src R, W0, W1
@ -63,15 +64,6 @@
#define SSA8(R) ssa8l R #define SSA8(R) ssa8l R
#endif #endif
/* Load or store instructions that may cause exceptions use the EX macro. */
#define EX(insn,reg1,reg2,offset,handler) \
9: insn reg1, reg2, offset; \
.section __ex_table, "a"; \
.word 9b, handler; \
.previous
.text .text
.align 4 .align 4
.global __xtensa_copy_user .global __xtensa_copy_user
@ -102,9 +94,9 @@ __xtensa_copy_user:
bltui a4, 7, .Lbytecopy # do short copies byte by byte bltui a4, 7, .Lbytecopy # do short copies byte by byte
# copy 1 byte # copy 1 byte
EX(l8ui, a6, a3, 0, fixup) EX(10f) l8ui a6, a3, 0
addi a3, a3, 1 addi a3, a3, 1
EX(s8i, a6, a5, 0, fixup) EX(10f) s8i a6, a5, 0
addi a5, a5, 1 addi a5, a5, 1
addi a4, a4, -1 addi a4, a4, -1
bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
@ -112,11 +104,11 @@ __xtensa_copy_user:
.Ldst2mod4: # dst 16-bit aligned .Ldst2mod4: # dst 16-bit aligned
# copy 2 bytes # copy 2 bytes
bltui a4, 6, .Lbytecopy # do short copies byte by byte bltui a4, 6, .Lbytecopy # do short copies byte by byte
EX(l8ui, a6, a3, 0, fixup) EX(10f) l8ui a6, a3, 0
EX(l8ui, a7, a3, 1, fixup) EX(10f) l8ui a7, a3, 1
addi a3, a3, 2 addi a3, a3, 2
EX(s8i, a6, a5, 0, fixup) EX(10f) s8i a6, a5, 0
EX(s8i, a7, a5, 1, fixup) EX(10f) s8i a7, a5, 1
addi a5, a5, 2 addi a5, a5, 2
addi a4, a4, -2 addi a4, a4, -2
j .Ldstaligned # dst is now aligned, return to main algorithm j .Ldstaligned # dst is now aligned, return to main algorithm
@ -135,9 +127,9 @@ __xtensa_copy_user:
add a7, a3, a4 # a7 = end address for source add a7, a3, a4 # a7 = end address for source
#endif /* !XCHAL_HAVE_LOOPS */ #endif /* !XCHAL_HAVE_LOOPS */
.Lnextbyte: .Lnextbyte:
EX(l8ui, a6, a3, 0, fixup) EX(10f) l8ui a6, a3, 0
addi a3, a3, 1 addi a3, a3, 1
EX(s8i, a6, a5, 0, fixup) EX(10f) s8i a6, a5, 0
addi a5, a5, 1 addi a5, a5, 1
#if !XCHAL_HAVE_LOOPS #if !XCHAL_HAVE_LOOPS
blt a3, a7, .Lnextbyte blt a3, a7, .Lnextbyte
@ -161,15 +153,15 @@ __xtensa_copy_user:
add a8, a8, a3 # a8 = end of last 16B source chunk add a8, a8, a3 # a8 = end of last 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */ #endif /* !XCHAL_HAVE_LOOPS */
.Loop1: .Loop1:
EX(l32i, a6, a3, 0, fixup) EX(10f) l32i a6, a3, 0
EX(l32i, a7, a3, 4, fixup) EX(10f) l32i a7, a3, 4
EX(s32i, a6, a5, 0, fixup) EX(10f) s32i a6, a5, 0
EX(l32i, a6, a3, 8, fixup) EX(10f) l32i a6, a3, 8
EX(s32i, a7, a5, 4, fixup) EX(10f) s32i a7, a5, 4
EX(l32i, a7, a3, 12, fixup) EX(10f) l32i a7, a3, 12
EX(s32i, a6, a5, 8, fixup) EX(10f) s32i a6, a5, 8
addi a3, a3, 16 addi a3, a3, 16
EX(s32i, a7, a5, 12, fixup) EX(10f) s32i a7, a5, 12
addi a5, a5, 16 addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS #if !XCHAL_HAVE_LOOPS
blt a3, a8, .Loop1 blt a3, a8, .Loop1
@ -177,31 +169,31 @@ __xtensa_copy_user:
.Loop1done: .Loop1done:
bbci.l a4, 3, .L2 bbci.l a4, 3, .L2
# copy 8 bytes # copy 8 bytes
EX(l32i, a6, a3, 0, fixup) EX(10f) l32i a6, a3, 0
EX(l32i, a7, a3, 4, fixup) EX(10f) l32i a7, a3, 4
addi a3, a3, 8 addi a3, a3, 8
EX(s32i, a6, a5, 0, fixup) EX(10f) s32i a6, a5, 0
EX(s32i, a7, a5, 4, fixup) EX(10f) s32i a7, a5, 4
addi a5, a5, 8 addi a5, a5, 8
.L2: .L2:
bbci.l a4, 2, .L3 bbci.l a4, 2, .L3
# copy 4 bytes # copy 4 bytes
EX(l32i, a6, a3, 0, fixup) EX(10f) l32i a6, a3, 0
addi a3, a3, 4 addi a3, a3, 4
EX(s32i, a6, a5, 0, fixup) EX(10f) s32i a6, a5, 0
addi a5, a5, 4 addi a5, a5, 4
.L3: .L3:
bbci.l a4, 1, .L4 bbci.l a4, 1, .L4
# copy 2 bytes # copy 2 bytes
EX(l16ui, a6, a3, 0, fixup) EX(10f) l16ui a6, a3, 0
addi a3, a3, 2 addi a3, a3, 2
EX(s16i, a6, a5, 0, fixup) EX(10f) s16i a6, a5, 0
addi a5, a5, 2 addi a5, a5, 2
.L4: .L4:
bbci.l a4, 0, .L5 bbci.l a4, 0, .L5
# copy 1 byte # copy 1 byte
EX(l8ui, a6, a3, 0, fixup) EX(10f) l8ui a6, a3, 0
EX(s8i, a6, a5, 0, fixup) EX(10f) s8i a6, a5, 0
.L5: .L5:
movi a2, 0 # return success for len bytes copied movi a2, 0 # return success for len bytes copied
retw retw
@ -217,7 +209,7 @@ __xtensa_copy_user:
# copy 16 bytes per iteration for word-aligned dst and unaligned src # copy 16 bytes per iteration for word-aligned dst and unaligned src
and a10, a3, a8 # save unalignment offset for below and a10, a3, a8 # save unalignment offset for below
sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware) sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
EX(l32i, a6, a3, 0, fixup) # load first word EX(10f) l32i a6, a3, 0 # load first word
#if XCHAL_HAVE_LOOPS #if XCHAL_HAVE_LOOPS
loopnez a7, .Loop2done loopnez a7, .Loop2done
#else /* !XCHAL_HAVE_LOOPS */ #else /* !XCHAL_HAVE_LOOPS */
@ -226,19 +218,19 @@ __xtensa_copy_user:
add a12, a12, a3 # a12 = end of last 16B source chunk add a12, a12, a3 # a12 = end of last 16B source chunk
#endif /* !XCHAL_HAVE_LOOPS */ #endif /* !XCHAL_HAVE_LOOPS */
.Loop2: .Loop2:
EX(l32i, a7, a3, 4, fixup) EX(10f) l32i a7, a3, 4
EX(l32i, a8, a3, 8, fixup) EX(10f) l32i a8, a3, 8
ALIGN( a6, a6, a7) ALIGN( a6, a6, a7)
EX(s32i, a6, a5, 0, fixup) EX(10f) s32i a6, a5, 0
EX(l32i, a9, a3, 12, fixup) EX(10f) l32i a9, a3, 12
ALIGN( a7, a7, a8) ALIGN( a7, a7, a8)
EX(s32i, a7, a5, 4, fixup) EX(10f) s32i a7, a5, 4
EX(l32i, a6, a3, 16, fixup) EX(10f) l32i a6, a3, 16
ALIGN( a8, a8, a9) ALIGN( a8, a8, a9)
EX(s32i, a8, a5, 8, fixup) EX(10f) s32i a8, a5, 8
addi a3, a3, 16 addi a3, a3, 16
ALIGN( a9, a9, a6) ALIGN( a9, a9, a6)
EX(s32i, a9, a5, 12, fixup) EX(10f) s32i a9, a5, 12
addi a5, a5, 16 addi a5, a5, 16
#if !XCHAL_HAVE_LOOPS #if !XCHAL_HAVE_LOOPS
blt a3, a12, .Loop2 blt a3, a12, .Loop2
@ -246,39 +238,39 @@ __xtensa_copy_user:
.Loop2done: .Loop2done:
bbci.l a4, 3, .L12 bbci.l a4, 3, .L12
# copy 8 bytes # copy 8 bytes
EX(l32i, a7, a3, 4, fixup) EX(10f) l32i a7, a3, 4
EX(l32i, a8, a3, 8, fixup) EX(10f) l32i a8, a3, 8
ALIGN( a6, a6, a7) ALIGN( a6, a6, a7)
EX(s32i, a6, a5, 0, fixup) EX(10f) s32i a6, a5, 0
addi a3, a3, 8 addi a3, a3, 8
ALIGN( a7, a7, a8) ALIGN( a7, a7, a8)
EX(s32i, a7, a5, 4, fixup) EX(10f) s32i a7, a5, 4
addi a5, a5, 8 addi a5, a5, 8
mov a6, a8 mov a6, a8
.L12: .L12:
bbci.l a4, 2, .L13 bbci.l a4, 2, .L13
# copy 4 bytes # copy 4 bytes
EX(l32i, a7, a3, 4, fixup) EX(10f) l32i a7, a3, 4
addi a3, a3, 4 addi a3, a3, 4
ALIGN( a6, a6, a7) ALIGN( a6, a6, a7)
EX(s32i, a6, a5, 0, fixup) EX(10f) s32i a6, a5, 0
addi a5, a5, 4 addi a5, a5, 4
mov a6, a7 mov a6, a7
.L13: .L13:
add a3, a3, a10 # readjust a3 with correct misalignment add a3, a3, a10 # readjust a3 with correct misalignment
bbci.l a4, 1, .L14 bbci.l a4, 1, .L14
# copy 2 bytes # copy 2 bytes
EX(l8ui, a6, a3, 0, fixup) EX(10f) l8ui a6, a3, 0
EX(l8ui, a7, a3, 1, fixup) EX(10f) l8ui a7, a3, 1
addi a3, a3, 2 addi a3, a3, 2
EX(s8i, a6, a5, 0, fixup) EX(10f) s8i a6, a5, 0
EX(s8i, a7, a5, 1, fixup) EX(10f) s8i a7, a5, 1
addi a5, a5, 2 addi a5, a5, 2
.L14: .L14:
bbci.l a4, 0, .L15 bbci.l a4, 0, .L15
# copy 1 byte # copy 1 byte
EX(l8ui, a6, a3, 0, fixup) EX(10f) l8ui a6, a3, 0
EX(s8i, a6, a5, 0, fixup) EX(10f) s8i a6, a5, 0
.L15: .L15:
movi a2, 0 # return success for len bytes copied movi a2, 0 # return success for len bytes copied
retw retw
@ -294,7 +286,7 @@ __xtensa_copy_user:
*/ */
fixup: 10:
sub a2, a5, a2 /* a2 <-- bytes copied */ sub a2, a5, a2 /* a2 <-- bytes copied */
sub a2, a11, a2 /* a2 <-- bytes not copied */ sub a2, a11, a2 /* a2 <-- bytes not copied */
retw retw