sparc64: speed up etrap/rtrap on NG2 and later processors
For many sun4v processor types, reading or writing a privileged register has a latency of 40 to 70 cycles. Use a combination of the low-latency allclean, otherw, normalw, and nop instructions in etrap and rtrap to replace 2 rdpr and 5 wrpr instructions and improve etrap/rtrap performance. allclean, otherw, and normalw are available on NG2 and later processors. The average ticks to execute the flush windows trap ("ta 0x3") with and without this patch on select platforms: CPU Not patched Patched % Latency Reduction NG2 1762 1558 -11.58 NG4 3619 3204 -11.47 M7 3015 2624 -12.97 SPARC64-X 829 770 -7.12 Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Родитель
5bd0ea9107
Коммит
a7159a87a3
|
@ -73,6 +73,8 @@ struct sun4v_1insn_patch_entry {
|
||||||
};
|
};
|
||||||
extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch,
|
extern struct sun4v_1insn_patch_entry __sun4v_1insn_patch,
|
||||||
__sun4v_1insn_patch_end;
|
__sun4v_1insn_patch_end;
|
||||||
|
extern struct sun4v_1insn_patch_entry __fast_win_ctrl_1insn_patch,
|
||||||
|
__fast_win_ctrl_1insn_patch_end;
|
||||||
|
|
||||||
struct sun4v_2insn_patch_entry {
|
struct sun4v_2insn_patch_entry {
|
||||||
unsigned int addr;
|
unsigned int addr;
|
||||||
|
|
|
@ -38,7 +38,11 @@ etrap_syscall: TRAP_LOAD_THREAD_REG(%g6, %g1)
|
||||||
or %g1, %g3, %g1
|
or %g1, %g3, %g1
|
||||||
bne,pn %xcc, 1f
|
bne,pn %xcc, 1f
|
||||||
sub %sp, STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS, %g2
|
sub %sp, STACKFRAME_SZ+TRACEREG_SZ-STACK_BIAS, %g2
|
||||||
wrpr %g0, 7, %cleanwin
|
661: wrpr %g0, 7, %cleanwin
|
||||||
|
.section .fast_win_ctrl_1insn_patch, "ax"
|
||||||
|
.word 661b
|
||||||
|
.word 0x85880000 ! allclean
|
||||||
|
.previous
|
||||||
|
|
||||||
sethi %hi(TASK_REGOFF), %g2
|
sethi %hi(TASK_REGOFF), %g2
|
||||||
sethi %hi(TSTATE_PEF), %g3
|
sethi %hi(TSTATE_PEF), %g3
|
||||||
|
@ -88,16 +92,30 @@ etrap_save: save %g2, -STACK_BIAS, %sp
|
||||||
|
|
||||||
bne,pn %xcc, 3f
|
bne,pn %xcc, 3f
|
||||||
mov PRIMARY_CONTEXT, %l4
|
mov PRIMARY_CONTEXT, %l4
|
||||||
rdpr %canrestore, %g3
|
661: rdpr %canrestore, %g3
|
||||||
|
.section .fast_win_ctrl_1insn_patch, "ax"
|
||||||
|
.word 661b
|
||||||
|
nop
|
||||||
|
.previous
|
||||||
|
|
||||||
rdpr %wstate, %g2
|
rdpr %wstate, %g2
|
||||||
wrpr %g0, 0, %canrestore
|
661: wrpr %g0, 0, %canrestore
|
||||||
|
.section .fast_win_ctrl_1insn_patch, "ax"
|
||||||
|
.word 661b
|
||||||
|
nop
|
||||||
|
.previous
|
||||||
sll %g2, 3, %g2
|
sll %g2, 3, %g2
|
||||||
|
|
||||||
/* Set TI_SYS_FPDEPTH to 1 and clear TI_SYS_NOERROR. */
|
/* Set TI_SYS_FPDEPTH to 1 and clear TI_SYS_NOERROR. */
|
||||||
mov 1, %l5
|
mov 1, %l5
|
||||||
sth %l5, [%l6 + TI_SYS_NOERROR]
|
sth %l5, [%l6 + TI_SYS_NOERROR]
|
||||||
|
|
||||||
wrpr %g3, 0, %otherwin
|
661: wrpr %g3, 0, %otherwin
|
||||||
|
.section .fast_win_ctrl_1insn_patch, "ax"
|
||||||
|
.word 661b
|
||||||
|
.word 0x87880000 ! otherw
|
||||||
|
.previous
|
||||||
|
|
||||||
wrpr %g2, 0, %wstate
|
wrpr %g2, 0, %wstate
|
||||||
sethi %hi(sparc64_kern_pri_context), %g2
|
sethi %hi(sparc64_kern_pri_context), %g2
|
||||||
ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3
|
ldx [%g2 + %lo(sparc64_kern_pri_context)], %g3
|
||||||
|
|
|
@ -224,10 +224,19 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
|
||||||
rdpr %otherwin, %l2
|
rdpr %otherwin, %l2
|
||||||
srl %l1, 3, %l1
|
srl %l1, 3, %l1
|
||||||
|
|
||||||
wrpr %l2, %g0, %canrestore
|
661: wrpr %l2, %g0, %canrestore
|
||||||
|
.section .fast_win_ctrl_1insn_patch, "ax"
|
||||||
|
.word 661b
|
||||||
|
.word 0x89880000 ! normalw
|
||||||
|
.previous
|
||||||
|
|
||||||
wrpr %l1, %g0, %wstate
|
wrpr %l1, %g0, %wstate
|
||||||
brnz,pt %l2, user_rtt_restore
|
brnz,pt %l2, user_rtt_restore
|
||||||
wrpr %g0, %g0, %otherwin
|
661: wrpr %g0, %g0, %otherwin
|
||||||
|
.section .fast_win_ctrl_1insn_patch, "ax"
|
||||||
|
.word 661b
|
||||||
|
nop
|
||||||
|
.previous
|
||||||
|
|
||||||
ldx [%g6 + TI_FLAGS], %g3
|
ldx [%g6 + TI_FLAGS], %g3
|
||||||
wr %g0, ASI_AIUP, %asi
|
wr %g0, ASI_AIUP, %asi
|
||||||
|
|
|
@ -300,6 +300,11 @@ static void __init sun4v_patch(void)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sun4v_chip_type != SUN4V_CHIP_NIAGARA1) {
|
||||||
|
sun4v_patch_1insn_range(&__fast_win_ctrl_1insn_patch,
|
||||||
|
&__fast_win_ctrl_1insn_patch_end);
|
||||||
|
}
|
||||||
|
|
||||||
sun4v_hvapi_init();
|
sun4v_hvapi_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -159,6 +159,11 @@ SECTIONS
|
||||||
*(.pud_huge_patch)
|
*(.pud_huge_patch)
|
||||||
__pud_huge_patch_end = .;
|
__pud_huge_patch_end = .;
|
||||||
}
|
}
|
||||||
|
.fast_win_ctrl_1insn_patch : {
|
||||||
|
__fast_win_ctrl_1insn_patch = .;
|
||||||
|
*(.fast_win_ctrl_1insn_patch)
|
||||||
|
__fast_win_ctrl_1insn_patch_end = .;
|
||||||
|
}
|
||||||
PERCPU_SECTION(SMP_CACHE_BYTES)
|
PERCPU_SECTION(SMP_CACHE_BYTES)
|
||||||
|
|
||||||
#ifdef CONFIG_JUMP_LABEL
|
#ifdef CONFIG_JUMP_LABEL
|
||||||
|
|
Загрузка…
Ссылка в новой задаче