MIPS: memset: Limit excessive `noreorder' assembly mode use

Rewrite to use the `reorder' assembly mode and remove manually scheduled
delay slots except where GAS cannot schedule a delay-slot instruction
due to a data dependency or a section switch (as is the case with the EX
macro).  No change in machine code produced.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
[paul.burton@mips.com:
  Fix conflict with commit 932afdeec1 ("MIPS: Add Kconfig variable for
  CPUs with unaligned load/store instructions")]
Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/20834/
Cc: Ralf Baechle <ralf@linux-mips.org>
This commit is contained in:
Maciej W. Rozycki 2018-10-02 12:50:16 +01:00 коммит произвёл Paul Burton
Родитель 2f7619ae90
Коммит 68dec269ee
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 3EA79FACB57500DD
1 изменённых файлов: 24 добавлений и 24 удалений

Просмотреть файл

@ -78,7 +78,6 @@
#endif
.endm
.set noreorder
.align 5
/*
@ -94,13 +93,16 @@
.endif
sltiu t0, a2, STORSIZE /* very small region? */
.set noreorder
bnez t0, .Lsmall_memset\@
andi t0, a0, STORMASK /* aligned? */
.set reorder
#ifdef CONFIG_CPU_MICROMIPS
move t8, a1 /* used by 'swp' instruction */
move t9, a1
#endif
.set noreorder
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
beqz t0, 1f
PTR_SUBU t0, STORSIZE /* alignment in bytes */
@ -111,6 +113,7 @@
PTR_SUBU t0, AT /* alignment in bytes */
.set at
#endif
.set reorder
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
R10KCBARRIER(0(ra))
@ -125,8 +128,10 @@
#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#define STORE_BYTE(N) \
EX(sb, a1, N(a0), .Lbyte_fixup\@); \
.set noreorder; \
beqz t0, 0f; \
PTR_ADDU t0, 1;
PTR_ADDU t0, 1; \
.set reorder;
PTR_ADDU a2, t0 /* correct size */
PTR_ADDU t0, 1
@ -148,16 +153,14 @@
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
1: ori t1, a2, 0x3f /* # of full blocks */
xori t1, 0x3f
andi t0, a2, 0x40-STORSIZE
beqz t1, .Lmemset_partial\@ /* no block to fill */
andi t0, a2, 0x40-STORSIZE
PTR_ADDU t1, a0 /* end address */
.set reorder
1: PTR_ADDIU a0, 64
R10KCBARRIER(0(ra))
f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
bne t1, a0, 1b
.set noreorder
.Lmemset_partial\@:
R10KCBARRIER(0(ra))
@ -173,20 +176,18 @@
PTR_SUBU t1, AT
.set at
#endif
PTR_ADDU a0, t0 /* dest ptr */
jr t1
PTR_ADDU a0, t0 /* dest ptr */
.set push
.set noreorder
.set nomacro
/* ... but first do longs ... */
f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
2: .set pop
andi a2, STORMASK /* At most one long to go */
2: andi a2, STORMASK /* At most one long to go */
.set noreorder
beqz a2, 1f
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
PTR_ADDU a0, a2 /* What's left */
.set reorder
R10KCBARRIER(0(ra))
#ifdef __MIPSEB__
EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
@ -195,6 +196,7 @@
#endif
#else
PTR_SUBU t0, $0, a2
.set reorder
move a2, zero /* No remaining longs */
PTR_ADDIU t0, 1
STORE_BYTE(0)
@ -210,20 +212,22 @@
#endif
0:
#endif
1: jr ra
move a2, zero
1: move a2, zero
jr ra
.Lsmall_memset\@:
PTR_ADDU t1, a0, a2
beqz a2, 2f
PTR_ADDU t1, a0, a2
1: PTR_ADDIU a0, 1 /* fill bytewise */
R10KCBARRIER(0(ra))
.set noreorder
bne t1, a0, 1b
EX(sb, a1, -1(a0), .Lsmall_fixup\@)
.set reorder
2: jr ra /* done */
move a2, zero
2: move a2, zero
jr ra /* done */
.if __memset == 1
END(memset)
.set __memset, 0
@ -237,14 +241,13 @@
* a2 = a2 - t0 + 1
*/
PTR_SUBU a2, t0
PTR_ADDIU a2, 1
jr ra
PTR_ADDIU a2, 1
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
.Lfirst_fixup\@:
/* unset_bytes already in a2 */
jr ra
nop
.Lfwd_fixup\@:
/*
@ -255,8 +258,8 @@
andi a2, 0x3f
LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, t1
LONG_SUBU a2, t0
jr ra
LONG_SUBU a2, t0
.Lpartial_fixup\@:
/*
@ -267,24 +270,21 @@
andi a2, STORMASK
LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, a0
LONG_SUBU a2, t0
jr ra
LONG_SUBU a2, t0
.Llast_fixup\@:
/* unset_bytes already in a2 */
jr ra
nop
.Lsmall_fixup\@:
/*
* unset_bytes = end_addr - current_addr + 1
* a2 = t1 - a0 + 1
*/
.set reorder
PTR_SUBU a2, t1, a0
PTR_ADDIU a2, 1
jr ra
.set noreorder
.endm
@ -298,8 +298,8 @@
LEAF(memset)
EXPORT_SYMBOL(memset)
move v0, a0 /* result */
beqz a1, 1f
move v0, a0 /* result */
andi a1, 0xff /* spread fillword */
LONG_SLL t1, a1, 8