diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index a73553ae7e53..906b64ffdb1b 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -50,7 +50,8 @@ do_fpdis: add %g0, %g0, %g0 ba,a,pt %xcc, rtrap_clr_l6 -1: ldub [%g6 + TI_FPSAVED], %g5 +1: TRAP_LOAD_THREAD_REG + ldub [%g6 + TI_FPSAVED], %g5 wr %g0, FPRS_FEF, %fprs andcc %g5, FPRS_FEF, %g0 be,a,pt %icc, 1f @@ -189,6 +190,7 @@ fp_other_bounce: .globl do_fpother_check_fitos .align 32 do_fpother_check_fitos: + TRAP_LOAD_THREAD_REG sethi %hi(fp_other_bounce - 4), %g7 or %g7, %lo(fp_other_bounce - 4), %g7 @@ -353,8 +355,6 @@ do_fptrap_after_fsr: * * With this method we can do most of the cross-call tlb/cache * flushing very quickly. - * - * Current CPU's IRQ worklist table is locked into %g6, don't touch. */ .text .align 32 @@ -378,6 +378,8 @@ do_ivec: sllx %g2, %g4, %g2 sllx %g4, 2, %g4 + TRAP_LOAD_IRQ_WORK + lduw [%g6 + %g4], %g5 /* g5 = irq_work(cpu, pil) */ stw %g5, [%g3 + 0x00] /* bucket->irq_chain = g5 */ stw %g3, [%g6 + %g4] /* irq_work(cpu, pil) = bucket */ @@ -488,9 +490,24 @@ setcc: retl stx %o1, [%o0 + PT_V9_TSTATE] - .globl utrap, utrap_ill -utrap: brz,pn %g1, etrap + .globl utrap_trap +utrap_trap: /* %g3=handler,%g4=level */ + TRAP_LOAD_THREAD_REG + ldx [%g6 + TI_UTRAPS], %g1 + brnz,pt %g1, invoke_utrap nop + + ba,pt %xcc, etrap + rd %pc, %g7 + mov %l4, %o1 + call bad_trap + add %sp, PTREGS_OFF, %o0 + ba,pt %xcc, rtrap + clr %l6 + +invoke_utrap: + sllx %g3, 3, %g3 + ldx [%g1 + %g3], %g1 save %sp, -128, %sp rdpr %tstate, %l6 rdpr %cwp, %l7 @@ -500,17 +517,6 @@ utrap: brz,pn %g1, etrap rdpr %tnpc, %l7 wrpr %g1, 0, %tnpc done -utrap_ill: - call bad_trap - add %sp, PTREGS_OFF, %o0 - ba,pt %xcc, rtrap - clr %l6 - - /* XXX Here is stuff we still need to write... -DaveM XXX */ - .globl netbsd_syscall -netbsd_syscall: - retl - nop /* We need to carefully read the error status, ACK * the errors, prevent recursive traps, and pass the @@ -1001,7 +1007,7 @@ dcpe_icpe_tl1_common: * %g3: scratch * %g4: AFSR * %g5: AFAR - * %g6: current thread ptr + * %g6: unused, will have current thread ptr after etrap * %g7: scratch */ __cheetah_log_error: @@ -1690,3 +1696,85 @@ __flushw_user: restore %g0, %g0, %g0 2: retl nop + + /* Read cpu ID from hardware, return in %g6. + * (callers_pc - 4) is in %g1. Patched at boot time. + * + * Default is spitfire implementation. + * + * The instruction sequence needs to be 5 instructions + * in order to fit the longest implementation, which is + * currently starfire. + */ + .align 32 + .globl __get_cpu_id +__get_cpu_id: + ldxa [%g0] ASI_UPA_CONFIG, %g6 + srlx %g6, 17, %g6 + jmpl %g1 + 0x4, %g0 + and %g6, 0x1f, %g6 + nop + +__get_cpu_id_cheetah_safari: + ldxa [%g0] ASI_SAFARI_CONFIG, %g6 + srlx %g6, 17, %g6 + jmpl %g1 + 0x4, %g0 + and %g6, 0x3ff, %g6 + nop + +__get_cpu_id_cheetah_jbus: + ldxa [%g0] ASI_JBUS_CONFIG, %g6 + srlx %g6, 17, %g6 + jmpl %g1 + 0x4, %g0 + and %g6, 0x1f, %g6 + nop + +__get_cpu_id_starfire: + sethi %hi(0x1fff40000d0 >> 9), %g6 + sllx %g6, 9, %g6 + or %g6, 0xd0, %g6 + jmpl %g1 + 0x4, %g0 + lduwa [%g6] ASI_PHYS_BYPASS_EC_E, %g6 + + .globl per_cpu_patch +per_cpu_patch: + sethi %hi(this_is_starfire), %o0 + lduw [%o0 + %lo(this_is_starfire)], %o1 + sethi %hi(__get_cpu_id_starfire), %o0 + brnz,pn %o1, 10f + or %o0, %lo(__get_cpu_id_starfire), %o0 + sethi %hi(tlb_type), %o0 + lduw [%o0 + %lo(tlb_type)], %o1 + brz,pt %o1, 11f + nop + rdpr %ver, %o0 + srlx %o0, 32, %o0 + sethi %hi(0x003e0016), %o1 + or %o1, %lo(0x003e0016), %o1 + cmp %o0, %o1 + sethi %hi(__get_cpu_id_cheetah_jbus), %o0 + be,pn %icc, 10f + or %o0, %lo(__get_cpu_id_cheetah_jbus), %o0 + sethi %hi(__get_cpu_id_cheetah_safari), %o0 + or %o0, %lo(__get_cpu_id_cheetah_safari), %o0 +10: + sethi %hi(__get_cpu_id), %o1 + or %o1, %lo(__get_cpu_id), %o1 + lduw [%o0 + 0x00], %o2 + stw %o2, [%o1 + 0x00] + flush %o1 + 0x00 + lduw [%o0 + 0x04], %o2 + stw %o2, [%o1 + 0x04] + flush %o1 + 0x04 + lduw [%o0 + 0x08], %o2 + stw %o2, [%o1 + 0x08] + flush %o1 + 0x08 + lduw [%o0 + 0x0c], %o2 + stw %o2, [%o1 + 0x0c] + flush %o1 + 0x0c + lduw [%o0 + 0x10], %o2 + stw %o2, [%o1 + 0x10] + flush %o1 + 0x10 +11: + retl + nop diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S index 567dbb765c34..8b3b6d720ed5 100644 --- a/arch/sparc64/kernel/etrap.S +++ b/arch/sparc64/kernel/etrap.S @@ -31,6 +31,7 @@ .globl etrap, etrap_irq, etraptl1 etrap: rdpr %pil, %g2 etrap_irq: + TRAP_LOAD_THREAD_REG rdpr %tstate, %g1 sllx %g2, 20, %g3 andcc %g1, TSTATE_PRIV, %g0 @@ -98,11 +99,7 @@ etrap_irq: stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] wrpr %g0, ETRAP_PSTATE2, %pstate mov %l6, %g6 -#ifdef CONFIG_SMP -#error IMMU TSB usage must be fixed - mov TSB_REG, %g3 - ldxa [%g3] ASI_IMMU, %g5 -#endif + LOAD_PER_CPU_BASE(%g4, %g3) jmpl %l2 + 0x4, %g0 ldx [%g6 + TI_TASK], %g4 @@ -126,6 +123,7 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. * 0x58 TL4's TT * 0x60 TL */ + TRAP_LOAD_THREAD_REG sub %sp, ((4 * 8) * 4) + 8, %g2 rdpr %tl, %g1 @@ -179,7 +177,9 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself. .align 64 .globl scetrap -scetrap: rdpr %pil, %g2 +scetrap: + TRAP_LOAD_THREAD_REG + rdpr %pil, %g2 rdpr %tstate, %g1 sllx %g2, 20, %g3 andcc %g1, TSTATE_PRIV, %g0 @@ -248,11 +248,7 @@ scetrap: rdpr %pil, %g2 stx %i6, [%sp + PTREGS_OFF + PT_V9_I6] mov %l6, %g6 stx %i7, [%sp + PTREGS_OFF + PT_V9_I7] -#ifdef CONFIG_SMP -#error IMMU TSB usage must be fixed - mov TSB_REG, %g3 - ldxa [%g3] ASI_IMMU, %g5 -#endif + LOAD_PER_CPU_BASE(%g4, %g3) ldx [%g6 + TI_TASK], %g4 done diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S index d00e20693be1..82ce5bced9c7 100644 --- a/arch/sparc64/kernel/head.S +++ b/arch/sparc64/kernel/head.S @@ -26,6 +26,7 @@ #include #include #include +#include /* This section from from _start to sparc64_boot_end should fit into * 0x0000000000404000 to 0x0000000000408000. @@ -421,24 +422,6 @@ setup_trap_table: stxa %g2, [%g1] ASI_DMMU membar #Sync - /* The Linux trap handlers expect various trap global registers - * to be setup with some fixed values. So here we set these - * up very carefully. These globals are: - * - * Alternate Globals (PSTATE_AG): - * - * %g6 --> current_thread_info() - * - * Interrupt Globals (PSTATE_IG, setup by init_irqwork_curcpu()): - * - * %g6 --> __irq_work[smp_processor_id()] - */ - - rdpr %pstate, %o1 - mov %g6, %o2 - wrpr %o1, PSTATE_AG, %pstate - mov %o2, %g6 - /* Kill PROM timer */ sethi %hi(0x80000000), %o2 sllx %o2, 32, %o2 @@ -457,7 +440,6 @@ setup_trap_table: 2: wrpr %g0, %g0, %wstate - wrpr %o1, 0x0, %pstate call init_irqwork_curcpu nop diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c index f7490ef629b9..3e48af2769d4 100644 --- a/arch/sparc64/kernel/irq.c +++ b/arch/sparc64/kernel/irq.c @@ -848,33 +848,9 @@ static void kill_prom_timer(void) void init_irqwork_curcpu(void) { - register struct irq_work_struct *workp asm("o2"); - register unsigned long tmp asm("o3"); int cpu = hard_smp_processor_id(); - memset(__irq_work + cpu, 0, sizeof(*workp)); - - /* Make sure we are called with PSTATE_IE disabled. */ - __asm__ __volatile__("rdpr %%pstate, %0\n\t" - : "=r" (tmp)); - if (tmp & PSTATE_IE) { - prom_printf("BUG: init_irqwork_curcpu() called with " - "PSTATE_IE enabled, bailing.\n"); - __asm__ __volatile__("mov %%i7, %0\n\t" - : "=r" (tmp)); - prom_printf("BUG: Called from %lx\n", tmp); - prom_halt(); - } - - /* Set interrupt globals. */ - workp = &__irq_work[cpu]; - __asm__ __volatile__( - "rdpr %%pstate, %0\n\t" - "wrpr %0, %1, %%pstate\n\t" - "mov %2, %%g6\n\t" - "wrpr %0, 0x0, %%pstate\n\t" - : "=&r" (tmp) - : "i" (PSTATE_IG), "r" (workp)); + memset(__irq_work + cpu, 0, sizeof(struct irq_work_struct)); } /* Only invoked on boot processor. */ diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S index 213eb4a9d8a4..5a62ec5d531c 100644 --- a/arch/sparc64/kernel/rtrap.S +++ b/arch/sparc64/kernel/rtrap.S @@ -223,12 +223,10 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1 ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3 ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4 ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5 -#ifdef CONFIG_SMP -#error IMMU TSB usage must be fixed - mov TSB_REG, %g6 - brnz,a,pn %l3, 1f - ldxa [%g6] ASI_IMMU, %g5 -#endif + brz,pt %l3, 1f + nop + /* Must do this before thread reg is clobbered below. */ + LOAD_PER_CPU_BASE(%g6, %g7) 1: ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6 ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7 diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 158bd31e15b7..59a70301a6cf 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -507,6 +507,11 @@ void __init setup_arch(char **cmdline_p) /* Work out if we are starfire early on */ check_if_starfire(); + /* Now we know enough to patch the __get_cpu_id() + * trampoline used by trap code. + */ + per_cpu_patch(); + boot_flags_init(*cmdline_p); idprom_init(); @@ -545,6 +550,9 @@ void __init setup_arch(char **cmdline_p) smp_setup_cpu_possible_map(); paging_init(); + + /* Get boot processor trap_block[] setup. */ + init_cur_cpu_trap(); } static int __init set_preferred_console(void) diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index d2d3369e7b5d..8c245859d212 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -38,6 +38,7 @@ #include #include #include +#include extern void calibrate_delay(void); @@ -87,10 +88,6 @@ void __init smp_store_cpu_info(int id) cpu_data(id).clock_tick = prom_getintdefault(cpu_node, "clock-frequency", 0); - cpu_data(id).pgcache_size = 0; - cpu_data(id).pte_cache[0] = NULL; - cpu_data(id).pte_cache[1] = NULL; - cpu_data(id).pgd_cache = NULL; cpu_data(id).idle_volume = 1; cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size", @@ -121,26 +118,15 @@ static volatile unsigned long callin_flag = 0; extern void inherit_locked_prom_mappings(int save_p); -static inline void cpu_setup_percpu_base(unsigned long cpu_id) -{ -#error IMMU TSB usage must be fixed - __asm__ __volatile__("mov %0, %%g5\n\t" - "stxa %0, [%1] %2\n\t" - "membar #Sync" - : /* no outputs */ - : "r" (__per_cpu_offset(cpu_id)), - "r" (TSB_REG), "i" (ASI_IMMU)); -} - void __init smp_callin(void) { int cpuid = hard_smp_processor_id(); inherit_locked_prom_mappings(0); - __flush_tlb_all(); + __local_per_cpu_offset = __per_cpu_offset(cpuid); - cpu_setup_percpu_base(cpuid); + __flush_tlb_all(); smp_setup_percpu_timer(); @@ -1107,12 +1093,15 @@ void __init smp_setup_cpu_possible_map(void) void __devinit smp_prepare_boot_cpu(void) { - if (hard_smp_processor_id() >= NR_CPUS) { + int cpu = hard_smp_processor_id(); + + if (cpu >= NR_CPUS) { prom_printf("Serious problem, boot cpu id >= NR_CPUS\n"); prom_halt(); } - current_thread_info()->cpu = hard_smp_processor_id(); + current_thread_info()->cpu = cpu; + __local_per_cpu_offset = __per_cpu_offset(cpu); cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), phys_cpu_present_map); @@ -1173,12 +1162,9 @@ void __init setup_per_cpu_areas(void) { unsigned long goal, size, i; char *ptr; - /* Created by linker magic */ - extern char __per_cpu_start[], __per_cpu_end[]; /* Copy section for each CPU (we discard the original) */ - goal = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); - + goal = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); #ifdef CONFIG_MODULES if (goal < PERCPU_ENOUGH_ROOM) goal = PERCPU_ENOUGH_ROOM; @@ -1187,31 +1173,10 @@ void __init setup_per_cpu_areas(void) for (size = 1UL; size < goal; size <<= 1UL) __per_cpu_shift++; - /* Make sure the resulting __per_cpu_base value - * will fit in the 43-bit sign extended IMMU - * TSB register. - */ - ptr = __alloc_bootmem(size * NR_CPUS, PAGE_SIZE, - (unsigned long) __per_cpu_start); + ptr = alloc_bootmem(size * NR_CPUS); __per_cpu_base = ptr - __per_cpu_start; - if ((__per_cpu_shift < PAGE_SHIFT) || - (__per_cpu_base & ~PAGE_MASK) || - (__per_cpu_base != (((long) __per_cpu_base << 20) >> 20))) { - prom_printf("PER_CPU: Invalid layout, " - "ptr[%p] shift[%lx] base[%lx]\n", - ptr, __per_cpu_shift, __per_cpu_base); - prom_halt(); - } - for (i = 0; i < NR_CPUS; i++, ptr += size) memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - - /* Finally, load in the boot cpu's base value. - * We abuse the IMMU TSB register for trap handler - * entry and exit loading of %g5. That is why it - * has to be page aligned. - */ - cpu_setup_percpu_base(hard_smp_processor_id()); } diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S index 782d8c4973e4..18c333f841e3 100644 --- a/arch/sparc64/kernel/trampoline.S +++ b/arch/sparc64/kernel/trampoline.S @@ -287,21 +287,18 @@ do_unlock: wrpr %g0, 0, %wstate wrpr %g0, 0, %tl - /* Setup the trap globals, then we can resurface. */ - rdpr %pstate, %o1 - mov %g6, %o2 - wrpr %o1, PSTATE_AG, %pstate + /* Load TBA, then we can resurface. */ sethi %hi(sparc64_ttable_tl0), %g5 wrpr %g5, %tba - mov %o2, %g6 - wrpr %o1, 0x0, %pstate ldx [%g6 + TI_TASK], %g4 wrpr %g0, 0, %wstate call init_irqwork_curcpu nop + call init_cur_cpu_trap + nop /* Start using proper page size encodings in ctx register. */ sethi %hi(sparc64_kern_pri_context), %g3 diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c index 8d44ae5a15e3..f47f4874253c 100644 --- a/arch/sparc64/kernel/traps.c +++ b/arch/sparc64/kernel/traps.c @@ -2130,7 +2130,22 @@ void do_getpsr(struct pt_regs *regs) } } +struct trap_per_cpu trap_block[NR_CPUS]; + +/* This can get invoked before sched_init() so play it super safe + * and use hard_smp_processor_id(). + */ +void init_cur_cpu_trap(void) +{ + int cpu = hard_smp_processor_id(); + struct trap_per_cpu *p = &trap_block[cpu]; + + p->thread = current_thread_info(); + p->pgd_paddr = 0; +} + extern void thread_info_offsets_are_bolixed_dave(void); +extern void trap_per_cpu_offsets_are_bolixed_dave(void); /* Only invoked on boot processor. */ void __init trap_init(void) @@ -2165,6 +2180,10 @@ void __init trap_init(void) (TI_FPREGS & (64 - 1))) thread_info_offsets_are_bolixed_dave(); + if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) || + TRAP_PER_CPU_PGD_PADDR != offsetof(struct trap_per_cpu, pgd_paddr)) + trap_per_cpu_offsets_are_bolixed_dave(); + /* Attach to the address space of init_task. On SMP we * do this in smp.c:smp_callin for other cpus. */ diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S index 44b9e6fed09f..50752c518773 100644 --- a/arch/sparc64/kernel/tsb.S +++ b/arch/sparc64/kernel/tsb.S @@ -36,6 +36,15 @@ tsb_miss_itlb: nop tsb_miss_page_table_walk: + /* This clobbers %g1 and %g6, preserve them... */ + mov %g1, %g5 + mov %g6, %g2 + + TRAP_LOAD_PGD_PHYS + + mov %g2, %g6 + mov %g5, %g1 + USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault) tsb_reload: @@ -112,15 +121,20 @@ winfix_trampoline: * %o0: page table physical address * %o1: TSB address */ + .align 32 .globl tsb_context_switch tsb_context_switch: - wrpr %g0, PSTATE_MG | PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV, %pstate + rdpr %pstate, %o5 + wrpr %o5, PSTATE_IE, %pstate - /* Set page table base alternate global. */ - mov %o0, %g7 + ldub [%g6 + TI_CPU], %o3 + sethi %hi(trap_block), %o4 + sllx %o3, TRAP_BLOCK_SZ_SHIFT, %o3 + or %o4, %lo(trap_block), %o4 + add %o4, %o3, %o4 + stx %o0, [%o4 + TRAP_PER_CPU_PGD_PADDR] - /* XXX can this happen? */ - brz,pn %o1, 9f + brgez %o1, 9f nop /* Lock TSB into D-TLB. */ @@ -163,7 +177,7 @@ tsb_context_switch: membar #Sync 9: - wrpr %g0, PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE, %pstate + wrpr %o5, %pstate retl mov %o2, %o0 diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S index 56f060c8fbf0..2fb7a33993c0 100644 --- a/arch/sparc64/kernel/ttable.S +++ b/arch/sparc64/kernel/ttable.S @@ -128,7 +128,7 @@ tl0_flushw: FLUSH_WINDOW_TRAP tl0_resv104: BTRAP(0x104) BTRAP(0x105) BTRAP(0x106) BTRAP(0x107) .globl tl0_solaris tl0_solaris: SOLARIS_SYSCALL_TRAP -tl0_netbsd: NETBSD_SYSCALL_TRAP +tl0_resv109: BTRAP(0x109) tl0_resv10a: BTRAP(0x10a) BTRAP(0x10b) BTRAP(0x10c) BTRAP(0x10d) BTRAP(0x10e) tl0_resv10f: BTRAP(0x10f) tl0_linux32: LINUX_32BIT_SYSCALL_TRAP diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S index f5d93aa99cbb..de588036df43 100644 --- a/arch/sparc64/kernel/winfixup.S +++ b/arch/sparc64/kernel/winfixup.S @@ -39,6 +39,7 @@ set_pcontext: */ .globl fill_fixup, spill_fixup fill_fixup: + TRAP_LOAD_THREAD_REG rdpr %tstate, %g1 andcc %g1, TSTATE_PRIV, %g0 or %g4, FAULT_CODE_WINFIXUP, %g4 @@ -84,11 +85,7 @@ fill_fixup: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ldx [%g6 + TI_TASK], %g4 -#ifdef CONFIG_SMP -#error IMMU TSB usage must be fixed - mov TSB_REG, %g1 - ldxa [%g1] ASI_IMMU, %g5 -#endif + LOAD_PER_CPU_BASE(%g1, %g2) /* This is the same as below, except we handle this a bit special * since we must preserve %l5 and %l6, see comment above. @@ -107,6 +104,7 @@ fill_fixup: * do not touch %g7 or %g2 so we handle the two cases fine. */ spill_fixup: + TRAP_LOAD_THREAD_REG ldx [%g6 + TI_FLAGS], %g1 andcc %g1, _TIF_32BIT, %g0 ldub [%g6 + TI_WSAVED], %g1 @@ -182,6 +180,7 @@ winfix_mna: wrpr %g3, %tnpc done fill_fixup_mna: + TRAP_LOAD_THREAD_REG rdpr %tstate, %g1 andcc %g1, TSTATE_PRIV, %g0 be,pt %xcc, window_mna_from_user_common @@ -209,17 +208,14 @@ fill_fixup_mna: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. -#ifdef CONFIG_SMP -#error IMMU TSB usage must be fixed - mov TSB_REG, %g1 - ldxa [%g1] ASI_IMMU, %g5 -#endif + LOAD_PER_CPU_BASE(%g1, %g2) call mem_address_unaligned add %sp, PTREGS_OFF, %o0 b,pt %xcc, rtrap nop ! yes, the nop is correct spill_fixup_mna: + TRAP_LOAD_THREAD_REG ldx [%g6 + TI_FLAGS], %g1 andcc %g1, _TIF_32BIT, %g0 ldub [%g6 + TI_WSAVED], %g1 @@ -287,6 +283,7 @@ winfix_dax: wrpr %g3, %tnpc done fill_fixup_dax: + TRAP_LOAD_THREAD_REG rdpr %tstate, %g1 andcc %g1, TSTATE_PRIV, %g0 be,pt %xcc, window_dax_from_user_common @@ -314,17 +311,14 @@ fill_fixup_dax: wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate mov %o7, %g6 ! Get current back. ldx [%g6 + TI_TASK], %g4 ! Finish it. -#ifdef CONFIG_SMP -#error IMMU TSB usage must be fixed - mov TSB_REG, %g1 - ldxa [%g1] ASI_IMMU, %g5 -#endif + LOAD_PER_CPU_BASE(%g1, %g2) call spitfire_data_access_exception add %sp, PTREGS_OFF, %o0 b,pt %xcc, rtrap nop ! yes, the nop is correct spill_fixup_dax: + TRAP_LOAD_THREAD_REG ldx [%g6 + TI_FLAGS], %g1 andcc %g1, _TIF_32BIT, %g0 ldub [%g6 + TI_WSAVED], %g1 diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S index 22791f29552e..a87394824ec2 100644 --- a/arch/sparc64/mm/ultra.S +++ b/arch/sparc64/mm/ultra.S @@ -295,12 +295,10 @@ cheetah_patch_cachetlbops: * %g1 address arg 1 (tlb page and range flushes) * %g7 address arg 2 (tlb range flush only) * - * %g6 ivector table, don't touch - * %g2 scratch 1 - * %g3 scratch 2 - * %g4 scratch 3 - * - * TODO: Make xcall TLB range flushes use the tricks above... -DaveM + * %g6 scratch 1 + * %g2 scratch 2 + * %g3 scratch 3 + * %g4 scratch 4 */ .align 32 .globl xcall_flush_tlb_mm diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h index f7c0faede8b8..6c57cbb9a7d1 100644 --- a/include/asm-sparc64/cpudata.h +++ b/include/asm-sparc64/cpudata.h @@ -1,12 +1,15 @@ /* cpudata.h: Per-cpu parameters. * - * Copyright (C) 2003, 2005 David S. Miller (davem@redhat.com) + * Copyright (C) 2003, 2005, 2006 David S. Miller (davem@davemloft.net) */ #ifndef _SPARC64_CPUDATA_H #define _SPARC64_CPUDATA_H +#ifndef __ASSEMBLY__ + #include +#include typedef struct { /* Dcache line 1 */ @@ -32,4 +35,85 @@ DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu)) #define local_cpu_data() __get_cpu_var(__cpu_data) +/* Trap handling code needs to get at a few critical values upon + * trap entry and to process TSB misses. These cannot be in the + * per_cpu() area as we really need to lock them into the TLB and + * thus make them part of the main kernel image. As a result we + * try to make this as small as possible. + * + * This is padded out and aligned to 64-bytes to avoid false sharing + * on SMP. + */ + +/* If you modify the size of this structure, please update + * TRAP_BLOCK_SZ_SHIFT below. + */ +struct thread_info; +struct trap_per_cpu { +/* D-cache line 1 */ + struct thread_info *thread; + unsigned long pgd_paddr; + unsigned long __pad1[2]; + +/* D-cache line 2 */ + unsigned long __pad2[4]; +} __attribute__((aligned(64))); +extern struct trap_per_cpu trap_block[NR_CPUS]; +extern void init_cur_cpu_trap(void); +extern void per_cpu_patch(void); + +#endif /* !(__ASSEMBLY__) */ + +#define TRAP_PER_CPU_THREAD 0x00 +#define TRAP_PER_CPU_PGD_PADDR 0x08 + +#define TRAP_BLOCK_SZ_SHIFT 6 + +/* Clobbers %g1, loads %g6 with local processor's cpuid */ +#define __GET_CPUID \ + ba,pt %xcc, __get_cpu_id; \ + rd %pc, %g1; + +/* Clobbers %g1, current address space PGD phys address into %g7. */ +#define TRAP_LOAD_PGD_PHYS \ + __GET_CPUID \ + sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \ + sethi %hi(trap_block), %g7; \ + or %g7, %lo(trap_block), %g7; \ + add %g7, %g6, %g7; \ + ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7; + +/* Clobbers %g1, loads local processor's IRQ work area into %g6. */ +#define TRAP_LOAD_IRQ_WORK \ + __GET_CPUID \ + sethi %hi(__irq_work), %g1; \ + sllx %g6, 6, %g6; \ + or %g1, %lo(__irq_work), %g1; \ + add %g1, %g6, %g6; + +/* Clobbers %g1, loads %g6 with current thread info pointer. */ +#define TRAP_LOAD_THREAD_REG \ + __GET_CPUID \ + sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \ + sethi %hi(trap_block), %g1; \ + or %g1, %lo(trap_block), %g1; \ + ldx [%g1 + %g6], %g6; + +/* Given the current thread info pointer in %g6, load the per-cpu + * area base of the current processor into %g5. REG1 and REG2 are + * clobbered. + */ +#ifdef CONFIG_SMP +#define LOAD_PER_CPU_BASE(REG1, REG2) \ + ldub [%g6 + TI_CPU], REG1; \ + sethi %hi(__per_cpu_shift), %g5; \ + sethi %hi(__per_cpu_base), REG2; \ + ldx [%g5 + %lo(__per_cpu_shift)], %g5; \ + ldx [REG2 + %lo(__per_cpu_base)], REG2; \ + sllx REG1, %g5, %g5; \ + add %g5, REG2, %g5; +#else +#define LOAD_PER_CPU_BASE(REG1, REG2) +#endif + #endif /* _SPARC64_CPUDATA_H */ diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index af254e581834..26c0807af3e4 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -209,6 +209,8 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \ /* so that ASI is only written if it changes, think again. */ \ __asm__ __volatile__("wr %%g0, %0, %%asi" \ : : "r" (__thread_flag_byte_ptr(task_thread_info(next))[TI_FLAG_BYTE_CURRENT_DS]));\ + trap_block[current_thread_info()->cpu].thread = \ + task_thread_info(next); \ __asm__ __volatile__( \ "mov %%g4, %%g7\n\t" \ "wrpr %%g0, 0x95, %%pstate\n\t" \ diff --git a/include/asm-sparc64/ttable.h b/include/asm-sparc64/ttable.h index 2784f80094c3..f557db4faf84 100644 --- a/include/asm-sparc64/ttable.h +++ b/include/asm-sparc64/ttable.h @@ -109,14 +109,14 @@ nop;nop;nop; #define TRAP_UTRAP(handler,lvl) \ - ldx [%g6 + TI_UTRAPS], %g1; \ - sethi %hi(109f), %g7; \ - brz,pn %g1, utrap; \ - or %g7, %lo(109f), %g7; \ - ba,pt %xcc, utrap; \ -109: ldx [%g1 + handler*8], %g1; \ - ba,pt %xcc, utrap_ill; \ - mov lvl, %o1; + mov handler, %g3; \ + ba,pt %xcc, utrap_trap; \ + mov lvl, %g4; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop; #ifdef CONFIG_SUNOS_EMUL #define SUNOS_SYSCALL_TRAP SYSCALL_TRAP(linux_sparc_syscall32, sunos_sys_table) @@ -136,8 +136,6 @@ #else #define SOLARIS_SYSCALL_TRAP TRAP(solaris_syscall) #endif -/* FIXME: Write these actually */ -#define NETBSD_SYSCALL_TRAP TRAP(netbsd_syscall) #define BREAKPOINT_TRAP TRAP(breakpoint_trap) #define TRAP_IRQ(routine, level) \