powerpc/64s/hash: Use POWER9 SLBIA IH=3 variant in switch_slb
POWER9 introduces SLBIA IH=3, which invalidates all SLB entries and associated lookaside information that have a class value of 1, which Linux assigns to user addresses. This matches what switch_slb wants, and allows a simple fast implementation that avoids the slb_cache complexity. As a side-effect, the POWER5 < DD2.1 SLB invalidation workaround is also avoided on POWER9. Process context switching rate is improved about 2.2% for a small process that hits the slb cache which is the best case for the current code. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
Родитель
5141c182d7
Коммит
82d8f4c22f
|
@ -279,7 +279,6 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2)
|
||||||
/* Flush all user entries from the segment table of the current processor. */
|
/* Flush all user entries from the segment table of the current processor. */
|
||||||
void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
|
void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
unsigned long offset;
|
|
||||||
unsigned long pc = KSTK_EIP(tsk);
|
unsigned long pc = KSTK_EIP(tsk);
|
||||||
unsigned long stack = KSTK_ESP(tsk);
|
unsigned long stack = KSTK_ESP(tsk);
|
||||||
unsigned long exec_base;
|
unsigned long exec_base;
|
||||||
|
@ -291,45 +290,56 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
|
||||||
* which would update the slb_cache/slb_cache_ptr fields in the PACA.
|
* which would update the slb_cache/slb_cache_ptr fields in the PACA.
|
||||||
*/
|
*/
|
||||||
hard_irq_disable();
|
hard_irq_disable();
|
||||||
offset = get_paca()->slb_cache_ptr;
|
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
|
||||||
if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
|
/*
|
||||||
offset <= SLB_CACHE_ENTRIES) {
|
* SLBIA IH=3 invalidates all Class=1 SLBEs and their
|
||||||
unsigned long slbie_data = 0;
|
* associated lookaside structures, which matches what
|
||||||
int i;
|
* switch_slb wants. So ARCH_300 does not use the slb
|
||||||
|
* cache.
|
||||||
|
*/
|
||||||
|
asm volatile("isync ; " PPC_SLBIA(3)" ; isync");
|
||||||
|
} else {
|
||||||
|
unsigned long offset = get_paca()->slb_cache_ptr;
|
||||||
|
|
||||||
asm volatile("isync" : : : "memory");
|
if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
|
||||||
for (i = 0; i < offset; i++) {
|
offset <= SLB_CACHE_ENTRIES) {
|
||||||
slbie_data = (unsigned long)get_paca()->slb_cache[i]
|
unsigned long slbie_data = 0;
|
||||||
<< SID_SHIFT; /* EA */
|
int i;
|
||||||
slbie_data |= user_segment_size(slbie_data)
|
|
||||||
<< SLBIE_SSIZE_SHIFT;
|
asm volatile("isync" : : : "memory");
|
||||||
slbie_data |= SLBIE_C; /* C set for user addresses */
|
for (i = 0; i < offset; i++) {
|
||||||
asm volatile("slbie %0" : : "r" (slbie_data));
|
/* EA */
|
||||||
|
slbie_data = (unsigned long)
|
||||||
|
get_paca()->slb_cache[i] << SID_SHIFT;
|
||||||
|
slbie_data |= user_segment_size(slbie_data)
|
||||||
|
<< SLBIE_SSIZE_SHIFT;
|
||||||
|
slbie_data |= SLBIE_C; /* user slbs have C=1 */
|
||||||
|
asm volatile("slbie %0" : : "r" (slbie_data));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Workaround POWER5 < DD2.1 issue */
|
||||||
|
if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
|
||||||
|
asm volatile("slbie %0" : : "r" (slbie_data));
|
||||||
|
|
||||||
|
asm volatile("isync" : : : "memory");
|
||||||
|
} else {
|
||||||
|
struct slb_shadow *p = get_slb_shadow();
|
||||||
|
unsigned long ksp_esid_data =
|
||||||
|
be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
|
||||||
|
unsigned long ksp_vsid_data =
|
||||||
|
be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
|
||||||
|
|
||||||
|
asm volatile("isync\n"
|
||||||
|
PPC_SLBIA(1) "\n"
|
||||||
|
"slbmte %0,%1\n"
|
||||||
|
"isync"
|
||||||
|
:: "r"(ksp_vsid_data),
|
||||||
|
"r"(ksp_esid_data));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Workaround POWER5 < DD2.1 issue */
|
get_paca()->slb_cache_ptr = 0;
|
||||||
if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
|
|
||||||
asm volatile("slbie %0" : : "r" (slbie_data));
|
|
||||||
|
|
||||||
asm volatile("isync" : : : "memory");
|
|
||||||
} else {
|
|
||||||
struct slb_shadow *p = get_slb_shadow();
|
|
||||||
unsigned long ksp_esid_data =
|
|
||||||
be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
|
|
||||||
unsigned long ksp_vsid_data =
|
|
||||||
be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
|
|
||||||
|
|
||||||
asm volatile("isync\n"
|
|
||||||
PPC_SLBIA(1) "\n"
|
|
||||||
"slbmte %0,%1\n"
|
|
||||||
"isync"
|
|
||||||
:: "r"(ksp_vsid_data),
|
|
||||||
"r"(ksp_esid_data));
|
|
||||||
|
|
||||||
asm volatile("isync" : : : "memory");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
get_paca()->slb_cache_ptr = 0;
|
|
||||||
copy_mm_to_paca(mm);
|
copy_mm_to_paca(mm);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -455,6 +465,9 @@ static void insert_slb_entry(unsigned long vsid, unsigned long ea,
|
||||||
enum slb_index index;
|
enum slb_index index;
|
||||||
int slb_cache_index;
|
int slb_cache_index;
|
||||||
|
|
||||||
|
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||||
|
return; /* ISAv3.0B and later does not use slb_cache */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We are irq disabled, hence should be safe to access PACA.
|
* We are irq disabled, hence should be safe to access PACA.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -2393,10 +2393,13 @@ static void dump_one_paca(int cpu)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
DUMP(p, vmalloc_sllp, "%#-*x");
|
DUMP(p, vmalloc_sllp, "%#-*x");
|
||||||
DUMP(p, slb_cache_ptr, "%#-*x");
|
|
||||||
for (i = 0; i < SLB_CACHE_ENTRIES; i++)
|
if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
|
||||||
printf(" %-*s[%d] = 0x%016x\n",
|
DUMP(p, slb_cache_ptr, "%#-*x");
|
||||||
22, "slb_cache", i, p->slb_cache[i]);
|
for (i = 0; i < SLB_CACHE_ENTRIES; i++)
|
||||||
|
printf(" %-*s[%d] = 0x%016x\n",
|
||||||
|
22, "slb_cache", i, p->slb_cache[i]);
|
||||||
|
}
|
||||||
|
|
||||||
DUMP(p, rfi_flush_fallback_area, "%-*px");
|
DUMP(p, rfi_flush_fallback_area, "%-*px");
|
||||||
#endif
|
#endif
|
||||||
|
|
Загрузка…
Ссылка в новой задаче