From 416f37d0816b9720b8227953e55954d81456f991 Mon Sep 17 00:00:00 2001 From: Darren Stevens Date: Wed, 27 Jul 2016 15:41:29 +0100 Subject: [PATCH 01/26] powerpc/pasemi: Fix coherent_dma_mask for dma engine Commit 817820b0226a ("powerpc/iommu: Support "hybrid" iommu/direct DMA ops for coherent_mask < dma_mask) adds a check of coherent_dma_mask for dma allocations. Unfortunately current PASemi code does not set this value for the DMA engine, which ends up with the default value of 0xffffffff, the result is on a PASemi system with >2Gb ram and iommu enabled the the onboard ethernet stops working due to an inability to allocate memory. Add an initialisation to pci_dma_dev_setup_pasemi(). Signed-off-by: Darren Stevens Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pasemi/iommu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index 309d9ccccd50..c61667e8bb06 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -187,6 +187,11 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev) if (dev->vendor == 0x1959 && dev->device == 0xa007 && !firmware_has_feature(FW_FEATURE_LPAR)) { dev->dev.archdata.dma_ops = &dma_direct_ops; + /* + * Set the coherent DMA mask to prevent the iommu + * being used unnecessarily + */ + dev->dev.coherent_dma_mask = DMA_BIT_MASK(44); return; } #endif From d2cf5be07ff7c7cde8bef8551a30e8e86e2037a7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 4 Aug 2016 16:38:15 +1000 Subject: [PATCH 02/26] crypto: crc32c-vpmsum - Convert to CPU feature based module autoloading This patch utilises the GENERIC_CPU_AUTOPROBE infrastructure to automatically load the crc32c-vpmsum module if the CPU supports it. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/crypto/crc32c-vpmsum_glue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c index bfe3d37a24ef..9fa046d56eba 100644 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #define CHKSUM_BLOCK_SIZE 1 @@ -157,7 +158,7 @@ static void __exit crc32c_vpmsum_mod_fini(void) crypto_unregister_shash(&alg); } -module_init(crc32c_vpmsum_mod_init); +module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init); module_exit(crc32c_vpmsum_mod_fini); MODULE_AUTHOR("Anton Blanchard "); From 880a3d6afd068682d6386a0528be1217541d3d8e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 2 Aug 2016 12:39:43 +1000 Subject: [PATCH 03/26] powerpc/xics: Properly set Edge/Level type and enable resend This sets the type of the interrupt appropriately. We set it as follow: - If not mapped from the device-tree, we use edge. This is the case of the virtual interrupts and PCI MSIs for example. - If mapped from the device-tree and #interrupt-cells is 2 (PAPR compliant), we use the second cell to set the appropriate type - If mapped from the device-tree and #interrupt-cells is 1 (current OPAL on P8 does that), we assume level sensitive since those are typically going to be the PSI LSIs which are level sensitive. Additionally, we mark the interrupts requested via the opal_interrupts property all level. This is a bit fishy but the best we can do until we fix OPAL to properly expose them with a complete descriptor. It is also correct for the current HW anyway as OPAL interrupts are currently PCI error and PSI interrupts which are level. Finally now that edge interrupts are properly identified, we can enable CONFIG_HARDIRQS_SW_RESEND which will make the core re-send them if they occur while masked, which some drivers rely upon. This fixes issues with lost interrupts on some Mellanox adapters. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/xics.h | 2 + arch/powerpc/platforms/powernv/opal-irqchip.c | 3 +- arch/powerpc/sysdev/xics/Kconfig | 1 + arch/powerpc/sysdev/xics/ics-opal.c | 4 +- arch/powerpc/sysdev/xics/ics-rtas.c | 4 +- arch/powerpc/sysdev/xics/xics-common.c | 59 ++++++++++++++++--- 6 files changed, 63 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index f5f729c11578..f0b238516e9b 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -159,6 +159,8 @@ extern void xics_teardown_cpu(void); extern void xics_kexec_teardown_cpu(int secondary); extern void xics_migrate_irqs_away(void); extern void icp_native_eoi(struct irq_data *d); +extern int xics_set_irq_type(struct irq_data *d, unsigned int flow_type); +extern int xics_retrigger(struct irq_data *data); #ifdef CONFIG_SMP extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask, unsigned int strict_check); diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index e505223b4ec5..ed8bba68a162 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -228,7 +228,8 @@ int __init opal_event_init(void) } /* Install interrupt handler */ - rc = request_irq(virq, opal_interrupt, 0, "opal", NULL); + rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW, + "opal", NULL); if (rc) { irq_dispose_mapping(virq); pr_warn("Error %d requesting irq %d (0x%x)\n", diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig index 0031eda320c3..385e7aa9e273 100644 --- a/arch/powerpc/sysdev/xics/Kconfig +++ b/arch/powerpc/sysdev/xics/Kconfig @@ -1,6 +1,7 @@ config PPC_XICS def_bool n select PPC_SMP_MUXED_IPI + select HARDIRQS_SW_RESEND config PPC_ICP_NATIVE def_bool n diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index 27c936c080a6..1c6bf4b66f56 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -156,7 +156,9 @@ static struct irq_chip ics_opal_irq_chip = { .irq_mask = ics_opal_mask_irq, .irq_unmask = ics_opal_unmask_irq, .irq_eoi = NULL, /* Patched at init time */ - .irq_set_affinity = ics_opal_set_affinity + .irq_set_affinity = ics_opal_set_affinity, + .irq_set_type = xics_set_irq_type, + .irq_retrigger = xics_retrigger, }; static int ics_opal_map(struct ics *ics, unsigned int virq); diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index 3854dd41558d..78ee5c778ef8 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -163,7 +163,9 @@ static struct irq_chip ics_rtas_irq_chip = { .irq_mask = ics_rtas_mask_irq, .irq_unmask = ics_rtas_unmask_irq, .irq_eoi = NULL, /* Patched at init time */ - .irq_set_affinity = ics_rtas_set_affinity + .irq_set_affinity = ics_rtas_set_affinity, + .irq_set_type = xics_set_irq_type, + .irq_retrigger = xics_retrigger, }; static int ics_rtas_map(struct ics *ics, unsigned int virq) diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index a795a5f0301c..9d530f479588 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -328,8 +328,12 @@ static int xics_host_map(struct irq_domain *h, unsigned int virq, pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw); - /* They aren't all level sensitive but we just don't really know */ - irq_set_status_flags(virq, IRQ_LEVEL); + /* + * Mark interrupts as edge sensitive by default so that resend + * actually works. The device-tree parsing will turn the LSIs + * back to level. + */ + irq_clear_status_flags(virq, IRQ_LEVEL); /* Don't call into ICS for IPIs */ if (hw == XICS_IPI) { @@ -351,13 +355,54 @@ static int xics_host_xlate(struct irq_domain *h, struct device_node *ct, irq_hw_number_t *out_hwirq, unsigned int *out_flags) { - /* Current xics implementation translates everything - * to level. It is not technically right for MSIs but this - * is irrelevant at this point. We might get smarter in the future - */ *out_hwirq = intspec[0]; - *out_flags = IRQ_TYPE_LEVEL_LOW; + /* + * If intsize is at least 2, we look for the type in the second cell, + * we assume the LSB indicates a level interrupt. + */ + if (intsize > 1) { + if (intspec[1] & 1) + *out_flags = IRQ_TYPE_LEVEL_LOW; + else + *out_flags = IRQ_TYPE_EDGE_RISING; + } else + *out_flags = IRQ_TYPE_LEVEL_LOW; + + return 0; +} + +int xics_set_irq_type(struct irq_data *d, unsigned int flow_type) +{ + /* + * We only support these. This has really no effect other than setting + * the corresponding descriptor bits mind you but those will in turn + * affect the resend function when re-enabling an edge interrupt. + * + * Set set the default to edge as explained in map(). + */ + if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE) + flow_type = IRQ_TYPE_EDGE_RISING; + + if (flow_type != IRQ_TYPE_EDGE_RISING && + flow_type != IRQ_TYPE_LEVEL_LOW) + return -EINVAL; + + irqd_set_trigger_type(d, flow_type); + + return IRQ_SET_MASK_OK_NOCOPY; +} + +int xics_retrigger(struct irq_data *data) +{ + /* + * We need to push a dummy CPPR when retriggering, since the subsequent + * EOI will try to pop it. Passing 0 works, as the function hard codes + * the priority value anyway. + */ + xics_push_cppr(0); + + /* Tell the core to do a soft retrigger */ return 0; } From 54a94fcf2f00c9deb9491d5ab2b5a4022332f47e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Aug 2016 08:37:03 +0300 Subject: [PATCH 04/26] powerpc/cell: Add missing error code in spufs_mkgang() We should return -ENOMEM if alloc_spu_gang() fails. Signed-off-by: Dan Carpenter Acked-by: Arnd Bergmann Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/cell/spufs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 5be15cff758d..2975754c65ea 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -496,8 +496,10 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode) gang = alloc_spu_gang(); SPUFS_I(inode)->i_ctx = NULL; SPUFS_I(inode)->i_gang = gang; - if (!gang) + if (!gang) { + ret = -ENOMEM; goto out_iput; + } inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; From 4d9021957b5218310e28767f25551ca8f5eac797 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 3 Aug 2016 18:40:45 +1000 Subject: [PATCH 05/26] powerpc/powernv/ioda: Fix TCE invalidate to work in real mode again Commit fd141d1a99a3 ("powerpc/powernv/pci: Rework accessing the TCE invalidate register") broke TCE invalidation on IODA2/PHB3 for real mode. This makes invalidate work again. Fixes: fd141d1a99a3 ("powerpc/powernv/pci: Rework accessing the TCE invalidate register") Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 6b9528307f62..dedbbe9785ba 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1877,7 +1877,7 @@ static void pnv_pci_phb3_tce_invalidate(struct pnv_ioda_pe *pe, bool rm, unsigned shift, unsigned long index, unsigned long npages) { - __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false); + __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm); unsigned long start, end, inc; /* We'll invalidate DMA address in PE scope */ From e325d76f8bd2d222a1f577aba00dfb43cece4cbc Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Fri, 5 Aug 2016 19:13:12 +0530 Subject: [PATCH 06/26] powerpc/powernv: Load correct TOC pointer while waking up from winkle. The function pnv_restore_hyp_resource() loads the TOC into r2 from the invalid PACA pointer before fixing r13 value. This do not affect POWER ISA 3.0 but it does have an impact on POWER ISA 2.07 or less leading CPU to get stuck forever. login: [ 471.830433] Processor 120 is stuck. This can be easily reproducible using following steps: - Turn off SMT $ ppc64_cpu --smt=off - offline/online any online cpu (Thread 0 of any core which is online) $ echo 0 > /sys/devices/system/cpu/cpu/online $ echo 1 > /sys/devices/system/cpu/cpu/online For POWER ISA 2.07 or less, the last bit of HSPRG0 is set indicating that thread is waking up from winkle. Hence, the last bit of HSPRG0(r13) needs to be clear before accessing it as PACA to avoid loading invalid values from invalid PACA pointer. Fix this by loading TOC after r13 register is corrected. Fixes: bcef83a00dc4 ("powerpc/powernv: Add platform support for stop instruction") Signed-off-by: Mahesh Salgaonkar Acked-by: Vaidyanathan Srinivasan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index ba79d15f4ddd..07a330ed7022 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -363,8 +363,8 @@ _GLOBAL(power9_idle_stop) * cr3 - set to gt if waking up with partial/complete hypervisor state loss */ _GLOBAL(pnv_restore_hyp_resource) - ld r2,PACATOC(r13); BEGIN_FTR_SECTION + ld r2,PACATOC(r13); /* * POWER ISA 3. Use PSSCR to determine if we * are waking up from deep idle state @@ -395,6 +395,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) */ clrldi r5,r13,63 clrrdi r13,r13,1 + + /* Now that we are sure r13 is corrected, load TOC */ + ld r2,PACATOC(r13); cmpwi cr4,r5,1 mtspr SPRN_HSPRG0,r13 From 98d8821a47f3fd7354d3ab87adb50b10c9adb937 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Fri, 5 Aug 2016 17:34:04 +0530 Subject: [PATCH 07/26] powerpc/powernv: Move IDLE_STATE_ENTER_SEQ macro to cpuidle.h Move IDLE_STATE_ENTER_SEQ macro to cpuidle.h so that MCE handler changes in subsequent patch can use it. No functionality change. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 13 +++++++++++++ arch/powerpc/kernel/idle_book3s.S | 12 ------------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 3d7fc06532a1..01b8a13f0224 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -19,4 +19,17 @@ extern u64 pnv_first_deep_stop_state; #endif +/* Idle state entry routines */ +#ifdef CONFIG_PPC_P7_NAP +#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ + /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ + std r0,0(r1); \ + ptesync; \ + ld r0,0(r1); \ +1: cmp cr0,r0,r0; \ + bne 1b; \ + IDLE_INST; \ + b . +#endif /* CONFIG_PPC_P7_NAP */ + #endif diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 07a330ed7022..2265c6398a17 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -44,18 +44,6 @@ PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ PSSCR_MTL_MASK -/* Idle state entry routines */ - -#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ - /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r0,0(r1); \ - ptesync; \ - ld r0,0(r1); \ -1: cmp cr0,r0,r0; \ - bne 1b; \ - IDLE_INST; \ - b . - .text /* From bc14c49195e49b3231c01e4c44e3e5456c940b94 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Fri, 5 Aug 2016 17:34:13 +0530 Subject: [PATCH 08/26] powerpc/powernv: Fix MCE handler to avoid trashing CR0/CR1 registers. The current implementation of MCE early handling modifies CR0/1 registers without saving its old values. Fix this by moving early check for powersaving mode to machine_check_handle_early(). The power architecture 2.06 or later allows the possibility of getting machine check while in nap/sleep/winkle. The last bit of HSPRG0 is set to 1, if thread is woken up from winkle. Hence, clear the last bit of HSPRG0 (r13) before MCE handler starts using it as paca pointer. Also, the current code always puts the thread into nap state irrespective of whatever idle state it woke up from. Fix that by looking at paca->thread_idle_state and put the thread back into same state where it came from. Fixes: 1c51089f777b ("powerpc/book3s: Return from interrupt if coming from evil context.") Reported-by: Paul Mackerras Signed-off-by: Mahesh Salgaonkar Reviewed-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/exceptions-64s.S | 69 ++++++++++++++++------------ 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 41091fdf9bd8..df6d45eb4115 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -144,29 +144,14 @@ machine_check_pSeries_1: * vector */ SET_SCRATCH0(r13) /* save r13 */ -#ifdef CONFIG_PPC_P7_NAP -BEGIN_FTR_SECTION - /* Running native on arch 2.06 or later, check if we are - * waking up from nap. We only handle no state loss and - * supervisor state loss. We do -not- handle hypervisor - * state loss at this time. + /* + * Running native on arch 2.06 or later, we may wakeup from winkle + * inside machine check. If yes, then last bit of HSPGR0 would be set + * to 1. Hence clear it unconditionally. */ - mfspr r13,SPRN_SRR1 - rlwinm. r13,r13,47-31,30,31 - OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) - beq 9f - - mfspr r13,SPRN_SRR1 - rlwinm. r13,r13,47-31,30,31 - /* waking up from powersave (nap) state */ - cmpwi cr1,r13,2 - /* Total loss of HV state is fatal. let's just stay stuck here */ - OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) - bgt cr1,. -9: - OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -#endif /* CONFIG_PPC_P7_NAP */ + GET_PACA(r13) + clrrdi r13,r13,1 + SET_PACA(r13) EXCEPTION_PROLOG_0(PACA_EXMC) BEGIN_FTR_SECTION b machine_check_powernv_early @@ -1273,25 +1258,51 @@ machine_check_handle_early: * Check if thread was in power saving mode. We come here when any * of the following is true: * a. thread wasn't in power saving mode - * b. thread was in power saving mode with no state loss or - * supervisor state loss + * b. thread was in power saving mode with no state loss, + * supervisor state loss or hypervisor state loss. * - * Go back to nap again if (b) is true. + * Go back to nap/sleep/winkle mode again if (b) is true. */ rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */ beq 4f /* No, it wasn;t */ /* Thread was in power saving mode. Go back to nap again. */ cmpwi r11,2 - bne 3f - /* Supervisor state loss */ + blt 3f + /* Supervisor/Hypervisor state loss */ li r0,1 stb r0,PACA_NAPSTATELOST(r13) 3: bl machine_check_queue_event MACHINE_CHECK_HANDLER_WINDUP GET_PACA(r13) ld r1,PACAR1(r13) - li r3,PNV_THREAD_NAP - b pnv_enter_arch207_idle_mode + /* + * Check what idle state this CPU was in and go back to same mode + * again. + */ + lbz r3,PACA_THREAD_IDLE_STATE(r13) + cmpwi r3,PNV_THREAD_NAP + bgt 10f + IDLE_STATE_ENTER_SEQ(PPC_NAP) + /* No return */ +10: + cmpwi r3,PNV_THREAD_SLEEP + bgt 2f + IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + /* No return */ + +2: + /* + * Go back to winkle. Please note that this thread was woken up in + * machine check from winkle and have not restored the per-subcore + * state. Hence before going back to winkle, set last bit of HSPGR0 + * to 1. This will make sure that if this thread gets woken up + * again at reset vector 0x100 then it will get chance to restore + * the subcore state. + */ + ori r13,r13,1 + SET_PACA(r13) + IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + /* No return */ 4: #endif /* From 9dc512819e4b723856773f5a018865c20441072f Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Mon, 1 Aug 2016 16:32:51 +1000 Subject: [PATCH 09/26] powerpc: Fix unused function warning 'lmb_to_memblock' This patch fixes the following warning: arch/powerpc/platforms/pseries/hotplug-memory.c:323:29: error: 'lmb_to_memblock' defined but not used [-Werror=unused-function] static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) ^~~~~~~~~~~~~~~ The only consumer of this function is 'dlpar_remove_lmb', which is enabled with CONFIG_MEMORY_HOTREMOVE, so move it into the same ifdef block. Signed-off-by: Michael Ellerman --- .../platforms/pseries/hotplug-memory.c | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 43f7beb2902d..76ec104e88be 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -320,19 +320,6 @@ static int dlpar_remove_device_tree_lmb(struct of_drconf_cell *lmb) return dlpar_update_device_tree_lmb(lmb); } -static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) -{ - unsigned long section_nr; - struct mem_section *mem_sect; - struct memory_block *mem_block; - - section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr)); - mem_sect = __nr_to_section(section_nr); - - mem_block = find_memory_block(mem_sect); - return mem_block; -} - #ifdef CONFIG_MEMORY_HOTREMOVE static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { @@ -420,6 +407,19 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb) static int dlpar_add_lmb(struct of_drconf_cell *); +static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) +{ + unsigned long section_nr; + struct mem_section *mem_sect; + struct memory_block *mem_block; + + section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr)); + mem_sect = __nr_to_section(section_nr); + + mem_block = find_memory_block(mem_sect); + return mem_block; +} + static int dlpar_remove_lmb(struct of_drconf_cell *lmb) { struct memory_block *mem_block; From 546c4402c57497a6ffdf5373aff75bc89f0866bc Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 6 Aug 2016 15:54:12 -0700 Subject: [PATCH 10/26] powerpc/vdso: Add missing include file Some powerpc builds fail with the following buld error. In file included from ./arch/powerpc/include/asm/mmu_context.h:11:0, from arch/powerpc/kernel/vdso.c:28: arch/powerpc/include/asm/cputhreads.h: In function 'get_tensr': arch/powerpc/include/asm/cputhreads.h:101:2: error: implicit declaration of function 'cpu_has_feature' Fixes: b92a226e5284 ("powerpc: Move cpu_has_feature() to a separate file") Signed-off-by: Guenter Roeck Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vdso.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 6767605ea8da..4111d30badfa 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include From cbd74e1bc8129efb9908f130a8a6e60fd95d2106 Mon Sep 17 00:00:00 2001 From: Philippe Bergheaud Date: Fri, 5 Aug 2016 14:02:00 +0200 Subject: [PATCH 11/26] cxl: Use fixed width predefined types in data structure. This patch fixes a regression introduced by commit b810253bd934 ("cxl: Add mechanism for delivering AFU driver specific events"). It changes the type u8 to __u8 in the uapi header cxl.h, because the former is a kernel internal type, and may not be defined in userland build environments, in particular when cross-compiling libcxl on x86_64 linux machines (RHEL6.7 and Ubuntu 16.04). This patch also changes the size of the field data_size, and makes it constant, to support 32-bit userland applications running on big-endian ppc64 kernels transparently. mpe: This is an ABI change, however the ABI was only added during the 4.8 merge window so has never been part of a released kernel - therefore we give ourselves permission to change it. Fixes: b810253bd934 ("cxl: Add mechanism for delivering AFU driver specific events") Signed-off-by: Philippe Bergheaud Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman --- include/uapi/misc/cxl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index cbae529b7ce0..180d526a55c3 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -136,8 +136,8 @@ struct cxl_event_afu_driver_reserved { * * Of course the contents will be ABI, but that's up the AFU driver. */ - size_t data_size; - u8 data[]; + __u32 data_size; + __u8 data[]; }; struct cxl_event { From 164793379ad3b7ef5fc5a28260c111358892dff3 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Thu, 28 Jul 2016 15:39:41 +1000 Subject: [PATCH 12/26] cxl: Fix NULL dereference in cxl_context_init() on PowerVM guests Commit f67a6722d650 ("cxl: Workaround PE=0 hardware limitation in Mellanox CX4") added a "min_pe" field to struct cxl_service_layer_ops, to allow us to work around a Mellanox CX-4 hardware limitation. When allocating the PE number in cxl_context_init(), we read from ctx->afu->adapter->native->sl_ops->min_pe to get the minimum PE number. Unsurprisingly, in a PowerVM guest ctx->afu->adapter->native is NULL, and guests don't have a cxl_service_layer_ops struct anywhere. Move min_pe from struct cxl_service_layer_ops to struct cxl so it's accessible in both native and PowerVM environments. For the Mellanox CX-4, set the min_pe value in set_sl_ops(). Fixes: f67a6722d650 ("cxl: Workaround PE=0 hardware limitation in Mellanox CX4") Reported-by: Frederic Barrat Signed-off-by: Andrew Donnellan Acked-by: Ian Munsie Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/cxl/context.c | 3 +-- drivers/misc/cxl/cxl.h | 2 +- drivers/misc/cxl/pci.c | 3 ++- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index bdee9a01ef35..c466ee2b0c97 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -90,8 +90,7 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master, */ mutex_lock(&afu->contexts_lock); idr_preload(GFP_KERNEL); - i = idr_alloc(&ctx->afu->contexts_idr, ctx, - ctx->afu->adapter->native->sl_ops->min_pe, + i = idr_alloc(&ctx->afu->contexts_idr, ctx, ctx->afu->adapter->min_pe, ctx->afu->num_procs, GFP_NOWAIT); idr_preload_end(); mutex_unlock(&afu->contexts_lock); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index de090533f18c..344a0ff8f8c7 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -561,7 +561,6 @@ struct cxl_service_layer_ops { u64 (*timebase_read)(struct cxl *adapter); int capi_mode; bool needs_reset_before_disable; - int min_pe; }; struct cxl_native { @@ -603,6 +602,7 @@ struct cxl { struct bin_attribute cxl_attr; int adapter_num; int user_irqs; + int min_pe; u64 ps_size; u16 psl_rev; u16 base_image; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index d152e2de8c93..1d0347c36e6d 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1521,14 +1521,15 @@ static const struct cxl_service_layer_ops xsl_ops = { .write_timebase_ctrl = write_timebase_ctrl_xsl, .timebase_read = timebase_read_xsl, .capi_mode = OPAL_PHB_CAPI_MODE_DMA, - .min_pe = 1, /* Workaround for Mellanox CX4 HW bug */ }; static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev) { if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) { + /* Mellanox CX-4 */ dev_info(&adapter->dev, "Device uses an XSL\n"); adapter->native->sl_ops = &xsl_ops; + adapter->min_pe = 1; /* Workaround for CX-4 hardware bug */ } else { dev_info(&adapter->dev, "Device uses a PSL\n"); adapter->native->sl_ops = &psl_ops; From 6fd40f192a9dba391b2d84882f1ed3169c52b714 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Fri, 22 Jul 2016 19:01:36 +1000 Subject: [PATCH 13/26] cxl: Fix sparse warnings Make native_irq_wait() static and use NULL rather than 0 to initialise phb->cfg_data in cxl_pci_vphb_add() to remove sparse warnings. Signed-off-by: Andrew Donnellan Reviewed-by: Matthew R. Ochs Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/native.c | 2 +- drivers/misc/cxl/vphb.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 3bcdaee11ba1..e606fdc4bc9c 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -924,7 +924,7 @@ static irqreturn_t native_irq_multiplexed(int irq, void *data) return fail_psl_irq(afu, &irq_info); } -void native_irq_wait(struct cxl_context *ctx) +static void native_irq_wait(struct cxl_context *ctx) { u64 dsisr; int timeout = 1000; diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index dee8def1c193..7ada5f1b7bb6 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -221,7 +221,7 @@ int cxl_pci_vphb_add(struct cxl_afu *afu) /* Setup the PHB using arch provided callback */ phb->ops = &cxl_pcie_pci_ops; phb->cfg_addr = NULL; - phb->cfg_data = 0; + phb->cfg_data = NULL; phb->private_data = afu; phb->controller_ops = cxl_pci_controller_ops; From 10560b9afc8abf349843dff88c45dd43223e803e Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Fri, 22 Jul 2016 14:05:29 -0300 Subject: [PATCH 14/26] powerpc/eeh: Switch to conventional PCI address output in EEH log This is a very minor/trivial fix for the output of PCI address on EEH logs. The PCI address on "OF node" field currently is using ":" as a separator for the function, but the usual separator is ".". This patch changes the separator to dot, so the PCI address is printed as usual. Signed-off-by: Guilherme G. Piccoli Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index c9bc78e9c610..7429556eb8df 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -168,10 +168,10 @@ static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) int n = 0, l = 0; char buffer[128]; - n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n", + n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", edev->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); - pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n", + pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", edev->phb->global_number, pdn->busno, PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); From 61e8a0d5a0270b91581f6c715036844b2ea98da1 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 5 Aug 2016 16:40:56 +1000 Subject: [PATCH 15/26] powerpc/pci: Fix endian bug in fixed PHB numbering The recent commit 63a72284b159 ("powerpc/pci: Assign fixed PHB number based on device-tree properties"), added code to read a 64-bit property from the device tree, and if not found read a 32-bit property (reg). There was a bug in the 32-bit case, on big endian machines, due to the use of the 64-bit value to read the 32-bit property. The cast of &prop means we end up writing to the high 32-bit of prop, leaving the low 32-bits containing whatever junk was on the stack. If that junk value was non-zero, and < MAX_PHBS, we would end up using it as the PHB id. This results in users seeing what appear to be random PHB ids. Fix it by reading into a u32 property and then assigning that to the u64 value, letting the CPU do the correct conversions for us. Fixes: 63a72284b159 ("powerpc/pci: Assign fixed PHB number based on device-tree properties") Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index a5c0153ede37..7fdf324d5b51 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -78,6 +78,7 @@ EXPORT_SYMBOL(get_pci_dma_ops); static int get_phb_number(struct device_node *dn) { int ret, phb_id = -1; + u32 prop_32; u64 prop; /* @@ -86,8 +87,10 @@ static int get_phb_number(struct device_node *dn) * reading "ibm,opal-phbid", only present in OPAL environment. */ ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); - if (ret) - ret = of_property_read_u32_index(dn, "reg", 1, (u32 *)&prop); + if (ret) { + ret = of_property_read_u32_index(dn, "reg", 1, &prop_32); + prop = prop_32; + } if (!ret) phb_id = (int)(prop & (MAX_PHBS - 1)); From c74dd88e77d3ecbc9e55c78796d82c9aa21cabad Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 9 Aug 2016 10:39:13 +0530 Subject: [PATCH 16/26] powerpc/book3s: Fix MCE console messages for unrecoverable MCE. When machine check occurs with MSR(RI=0), it means MC interrupt is unrecoverable and kernel goes down to panic path. But the console message still shows it as recovered. This patch fixes the MCE console messages. Fixes: 36df96f8acaf ("powerpc/book3s: Decode and save machine check event.") Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/mce.c | 3 ++- arch/powerpc/platforms/powernv/opal.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index ef267fd9dd22..5e7ece0fda9f 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -92,7 +92,8 @@ void save_mce_event(struct pt_regs *regs, long handled, mce->in_use = 1; mce->initiator = MCE_INITIATOR_CPU; - if (handled) + /* Mark it recovered if we have handled it and MSR(RI=1). */ + if (handled && (regs->msr & MSR_RI)) mce->disposition = MCE_DISPOSITION_RECOVERED; else mce->disposition = MCE_DISPOSITION_NOT_RECOVERED; diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 8b4fc68cebcb..6c9a65b52e63 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -399,6 +399,7 @@ static int opal_recover_mce(struct pt_regs *regs, if (!(regs->msr & MSR_RI)) { /* If MSR_RI isn't set, we cannot recover */ + pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n"); recovered = 0; } else if (evt->disposition == MCE_DISPOSITION_RECOVERED) { /* Platform corrected itself */ From 5958d19a143eb229e9ece20bd4c781ad41cb7d24 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 8 Jul 2016 15:55:43 +1000 Subject: [PATCH 17/26] powerpc/pnv/pci: Fix incorrect PE reservation attempt on some 64-bit BARs The generic allocation code may sometimes decide to assign a prefetchable 64-bit BAR to the M32 window. In fact it may also decide to allocate a 64-bit non-prefetchable BAR to the M64 one ! So using the resource flags as a test to decide which window was used for PE allocation is just wrong and leads to insane PE numbers. Instead, compare the addresses to figure it out. Signed-off-by: Benjamin Herrenschmidt [mpe: Rename the function as agreed by Ben & Gavin] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 30 +++++++++++++++-------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index dedbbe9785ba..fd9444f9fb0c 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -111,10 +111,17 @@ static int __init iommu_setup(char *str) } early_param("iommu", iommu_setup); -static inline bool pnv_pci_is_mem_pref_64(unsigned long flags) +static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r) { - return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) == - (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)); + /* + * WARNING: We cannot rely on the resource flags. The Linux PCI + * allocation code sometimes decides to put a 64-bit prefetchable + * BAR in the 32-bit window, so we have to compare the addresses. + * + * For simplicity we only test resource start. + */ + return (r->start >= phb->ioda.m64_base && + r->start < (phb->ioda.m64_base + phb->ioda.m64_size)); } static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no) @@ -229,7 +236,7 @@ static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev, sgsz = phb->ioda.m64_segsize; for (i = 0; i <= PCI_ROM_RESOURCE; i++) { r = &pdev->resource[i]; - if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags)) + if (!r->parent || !pnv_pci_is_m64(phb, r)) continue; start = _ALIGN_DOWN(r->start - base, sgsz); @@ -2863,7 +2870,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) { + if (!pnv_pci_is_m64(phb, res)) { dev_warn(&pdev->dev, "Don't support SR-IOV with" " non M64 VF BAR%d: %pR. \n", i, res); @@ -2958,7 +2965,7 @@ static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe, index++; } } else if ((res->flags & IORESOURCE_MEM) && - !pnv_pci_is_mem_pref_64(res->flags)) { + !pnv_pci_is_m64(phb, res)) { region.start = res->start - phb->hose->mem_offset[0] - phb->ioda.m32_pci_base; @@ -3083,9 +3090,12 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, bridge = bridge->bus->self; } - /* We fail back to M32 if M64 isn't supported */ - if (phb->ioda.m64_segsize && - pnv_pci_is_mem_pref_64(type)) + /* + * We fall back to M32 if M64 isn't supported. We enforce the M64 + * alignment for any 64-bit resource, PCIe doesn't care and + * bridges only do 64-bit prefetchable anyway. + */ + if (phb->ioda.m64_segsize && (type & IORESOURCE_MEM_64)) return phb->ioda.m64_segsize; if (type & IORESOURCE_MEM) return phb->ioda.m32_segsize; @@ -3125,7 +3135,7 @@ static void pnv_pci_fixup_bridge_resources(struct pci_bus *bus, w = NULL; if (r->flags & type & IORESOURCE_IO) w = &hose->io_resource; - else if (pnv_pci_is_mem_pref_64(r->flags) && + else if (pnv_pci_is_m64(phb, r) && (type & IORESOURCE_PREFETCH) && phb->ioda.m64_segsize) w = &hose->mem_resources[1]; From c6d2ee09c2fffd3efdd31be2b2811d081a45bb99 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Mon, 8 Aug 2016 11:57:48 +0200 Subject: [PATCH 18/26] cxl: Set psl_fir_cntl to production environment value Switch the setting of psl_fir_cntl from debug to production environment recommended value. It mostly affects the PSL behavior when an error is raised in psl_fir1/2. Tested with cxlflash. Signed-off-by: Frederic Barrat Reviewed-by: Uma Krishnan Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 1d0347c36e6d..6f0c4ac4b649 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -379,7 +379,7 @@ static int calc_capp_routing(struct pci_dev *dev, u64 *chipid, u64 *capp_unit_id static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_dev *dev) { - u64 psl_dsnctl; + u64 psl_dsnctl, psl_fircntl; u64 chipid; u64 capp_unit_id; int rc; @@ -398,8 +398,11 @@ static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_ cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL); /* snoop write mask */ cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL); - /* set fir_accum */ - cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, 0x0800000000000000ULL); + /* set fir_cntl to recommended value for production env */ + psl_fircntl = (0x2ULL << (63-3)); /* ce_report */ + psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */ + psl_fircntl |= 0x1ULL; /* ce_thresh */ + cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, psl_fircntl); /* for debugging with trace arrays */ cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL); From 1bc8b816cb8058c31f61fe78442f10a43209e582 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 2 Aug 2016 10:07:05 +0200 Subject: [PATCH 19/26] powerpc/32: Fix csum_partial_copy_generic() Commit 7aef4136566b0 ("powerpc32: rewrite csum_partial_copy_generic() based on copy_tofrom_user()") introduced a bug when destination address is odd and initial csum is not null In that (rare) case the initial csum value has to be rotated one byte as well as the resulting value is This patch also fixes related comments Fixes: 7aef4136566b0 ("powerpc32: rewrite csum_partial_copy_generic() based on copy_tofrom_user()") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/lib/checksum_32.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index d90870a66b60..0a57fe6d49cc 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -127,8 +127,9 @@ _GLOBAL(csum_partial_copy_generic) stw r7,12(r1) stw r8,8(r1) - andi. r0,r4,1 /* is destination address even ? */ - cmplwi cr7,r0,0 + rlwinm r0,r4,3,0x8 + rlwnm r6,r6,r0,0,31 /* odd destination address: rotate one byte */ + cmplwi cr7,r0,0 /* is destination address even ? */ addic r12,r6,0 addi r6,r4,-4 neg r0,r4 @@ -237,7 +238,7 @@ _GLOBAL(csum_partial_copy_generic) 66: addze r3,r12 addi r1,r1,16 beqlr+ cr7 - rlwinm r3,r3,8,0,31 /* swap bytes for odd destination */ + rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ blr /* read fault */ From c7a318ba868c61fc9be710a4970172d8c2eeb8b9 Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Wed, 10 Aug 2016 15:44:46 +1000 Subject: [PATCH 20/26] powerpc/ptrace: Fix coredump since ptrace TM changes Commit 8d460f6156cd ("powerpc/process: Add the function flush_tmregs_to_thread") added flush_tmregs_to_thread() and included the assumption that it would only be called for a task which is not current. Although this is correct for ptrace, when generating a core dump, some of the routines which call flush_tmregs_to_thread() are called. This leads to a WARNing such as: Not expecting ptrace on self: TM regs may be incorrect ------------[ cut here ]------------ WARNING: CPU: 123 PID: 7727 at arch/powerpc/kernel/process.c:1088 flush_tmregs_to_thread+0x78/0x80 CPU: 123 PID: 7727 Comm: libvirtd Not tainted 4.8.0-rc1-gcc6x-g61e8a0d #1 task: c000000fe631b600 task.stack: c000000fe63b0000 NIP: c00000000001a1a8 LR: c00000000001a1a4 CTR: c000000000717780 REGS: c000000fe63b3420 TRAP: 0700 Not tainted (4.8.0-rc1-gcc6x-g61e8a0d) MSR: 900000010282b033 CR: 28004222 XER: 20000000 ... NIP [c00000000001a1a8] flush_tmregs_to_thread+0x78/0x80 LR [c00000000001a1a4] flush_tmregs_to_thread+0x74/0x80 Call Trace: flush_tmregs_to_thread+0x74/0x80 (unreliable) vsr_get+0x64/0x1a0 elf_core_dump+0x604/0x1430 do_coredump+0x5fc/0x1200 get_signal+0x398/0x740 do_signal+0x54/0x2b0 do_notify_resume+0x98/0xb0 ret_from_except_lite+0x70/0x74 So fix flush_tmregs_to_thread() to detect the case where it is called on current, and a transaction is active, and in that case flush the TM regs to the thread_struct. This patch also moves flush_tmregs_to_thread() into ptrace.c as it is only called from that file. Fixes: 8d460f6156cd ("powerpc/process: Add the function flush_tmregs_to_thread") Signed-off-by: Cyril Bur [mpe: Flesh out change log] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/switch_to.h | 8 -------- arch/powerpc/kernel/process.c | 20 -------------------- arch/powerpc/kernel/ptrace.c | 19 +++++++++++++++++++ 3 files changed, 19 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index 0a74ebe934e1..17c8380673a6 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -75,14 +75,6 @@ static inline void disable_kernel_spe(void) static inline void __giveup_spe(struct task_struct *t) { } #endif -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -extern void flush_tmregs_to_thread(struct task_struct *); -#else -static inline void flush_tmregs_to_thread(struct task_struct *t) -{ -} -#endif - static inline void clear_task_ebb(struct task_struct *t) { #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 58ccf86415b4..9ee2623e0f67 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1074,26 +1074,6 @@ static inline void restore_sprs(struct thread_struct *old_thread, #endif } -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -void flush_tmregs_to_thread(struct task_struct *tsk) -{ - /* - * Process self tracing is not yet supported through - * ptrace interface. Ptrace generic code should have - * prevented this from happening in the first place. - * Warn once here with the message, if some how it - * is attempted. - */ - WARN_ONCE(tsk == current, - "Not expecting ptrace on self: TM regs may be incorrect\n"); - - /* - * If task is not current, it should have been flushed - * already to it's thread_struct during __switch_to(). - */ -} -#endif - struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *new) { diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 4f3c5756cc09..bf91658a8a40 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -38,6 +38,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -118,6 +119,24 @@ static const struct pt_regs_offset regoffset_table[] = { REG_OFFSET_END, }; +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static void flush_tmregs_to_thread(struct task_struct *tsk) +{ + /* + * If task is not current, it will have been flushed already to + * it's thread_struct during __switch_to(). + * + * A reclaim flushes ALL the state. + */ + + if (tsk == current && MSR_TM_SUSPENDED(mfmsr())) + tm_reclaim_current(TM_CAUSE_SIGNAL); + +} +#else +static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } +#endif + /** * regs_query_register_offset() - query register offset from its name * @name: the name of a register From 7d70c63c7132eb95e428e945242360233ab23d82 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 10 Aug 2016 17:29:29 +1000 Subject: [PATCH 21/26] powerpc: Print the kernel load address at the end of prom_init() This makes it easier to debug crashes that happen very early before the kernel takes over Open Firmware by allowing us to relate the OF reported crashing addresses to offsets within the kernel. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 6ee4b72cda42..4e74fc588a3f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2940,7 +2940,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, /* Don't print anything after quiesce under OPAL, it crashes OFW */ if (of_platform != PLATFORM_OPAL) { - prom_printf("Booting Linux via __start() ...\n"); + prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase); prom_debug("->dt_header_start=0x%x\n", hdr); } From f9cc1d1f808dbdfd56978259d262b879504efaed Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 10 Aug 2016 17:32:38 +1000 Subject: [PATCH 22/26] powerpc: Update obsolete comment in setup_32.c about early_init() We don't identify the machine type anymore... Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_32.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index c3e861df4b20..f10a975baa5e 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -93,10 +93,8 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * and we are running with enough of the MMU enabled to have our * proper kernel virtual addresses * - * Find out what kind of machine we're on and save any data we need - * from the early boot process (devtree is copied on pmac by prom_init()). - * This is called very early on the boot process, after a minimal - * MMU environment has been set up but before MMU_init is called. + * We do the initial parsing of the flat device-tree and prepares + * for the MMU to be fully initialized. */ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ From 97f6e0cc35026a2a09147a6da636d901525e1969 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 10 Aug 2016 17:27:34 +1000 Subject: [PATCH 23/26] powerpc/32: Fix crash during static key init We cannot do those initializations from apply_feature_fixups() as this function runs in a very restricted environment on 32-bit where the kernel isn't running at its linked address and the PTRRELOC() macro must be used for any global accesss. Instead, split them into a separtate steup_feature_keys() function which is called in a more suitable spot on ppc32. Fixes: 309b315b6ec6 ("powerpc: Call jump_label_init() in apply_feature_fixups()") Reported-and-tested-by: Christian Kujau Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/feature-fixups.h | 1 + arch/powerpc/kernel/setup_32.c | 3 +++ arch/powerpc/kernel/setup_64.c | 1 + arch/powerpc/lib/feature-fixups.c | 3 +++ 4 files changed, 8 insertions(+) diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 57fec8ac7b92..ddf54f5bbdd1 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -186,6 +186,7 @@ label##3: \ #ifndef __ASSEMBLY__ void apply_feature_fixups(void); +void setup_feature_keys(void); #endif #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index f10a975baa5e..24ec3ea4b3a2 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -100,6 +100,9 @@ extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */ notrace void __init machine_init(u64 dt_ptr) { + /* Configure static keys first, now that we're relocated. */ + setup_feature_keys(); + /* Enable early debugging if any specified (see udbg.h) */ udbg_early_init(); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index eafb9a79e011..7ac8e6eaab5b 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -300,6 +300,7 @@ void __init early_setup(unsigned long dt_ptr) /* Apply all the dynamic patching */ apply_feature_fixups(); + setup_feature_keys(); /* Initialize the hash table or TLB handling */ early_init_mmu(); diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 74145f02ad41..043415f0bdb1 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -188,7 +188,10 @@ void __init apply_feature_fixups(void) &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup); #endif do_final_fixups(); +} +void __init setup_feature_keys(void) +{ /* * Initialise jump label. This causes all the cpu/mmu_has_feature() * checks to take on their correct polarity based on the current set of From 164af597ce945751e2dcd53d0a86e84203a6d117 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Aug 2016 22:43:46 +1000 Subject: [PATCH 24/26] powerpc/Makefile: Use cflags-y/aflags-y for setting endian options When we introduced the little endian support, we added the endian flags to CC directly using override. I don't know the history of why we did that, I suspect no one does. Although this mostly works, it has one bug, which is that CROSS32CC doesn't get -mbig-endian. That means when the compiler is little endian by default and the user is building big endian, vdso32 is incorrectly compiled as little endian and the kernel fails to build. Instead we can add the endian flags to cflags-y/aflags-y, and then append those to KBUILD_CFLAGS/KBUILD_AFLAGS. This has the advantage of being 1) less ugly, 2) the documented way of adding flags in the arch Makefile and 3) it fixes building vdso32 with a LE toolchain. Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index ca254546cd05..1934707bf321 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -66,29 +66,28 @@ endif UTS_MACHINE := $(OLDARCH) ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -override CC += -mlittle-endian -ifneq ($(cc-name),clang) -override CC += -mno-strict-align -endif -override AS += -mlittle-endian override LD += -EL -override CROSS32CC += -mlittle-endian override CROSS32AS += -mlittle-endian LDEMULATION := lppc GNUTARGET := powerpcle MULTIPLEWORD := -mno-multiple KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect) else -ifeq ($(call cc-option-yn,-mbig-endian),y) -override CC += -mbig-endian -override AS += -mbig-endian -endif override LD += -EB LDEMULATION := ppc GNUTARGET := powerpc MULTIPLEWORD := -mmultiple endif +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian +ifneq ($(cc-name),clang) + cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align +endif + +aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian + ifeq ($(HAS_BIARCH),y) override AS += -a$(CONFIG_WORD_SIZE) override LD += -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION) @@ -232,6 +231,9 @@ cpu-as-$(CONFIG_E200) += -Wa,-me200 KBUILD_AFLAGS += $(cpu-as-y) KBUILD_CFLAGS += $(cpu-as-y) +KBUILD_AFLAGS += $(aflags-y) +KBUILD_CFLAGS += $(cflags-y) + head-y := arch/powerpc/kernel/head_$(CONFIG_WORD_SIZE).o head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o From b9a4a0d02c5b8d9a1397c11d741d2a1a56381178 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 9 Aug 2016 22:17:29 +1000 Subject: [PATCH 25/26] powerpc/vdso: Fix build rules to rebuild vdsos correctly When using if_changed, we need to add FORCE as a dependency (see Documentation/kbuild/makefiles.txt) otherwise we don't get command line change checking amongst other things. This has resulted in vdsos not being rebuilt when switching between big and little endian. The vdso64/32ld commands have to be changed around to avoid pulling FORCE into the linker command line (code copied from x86). Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/vdso32/Makefile | 6 +++--- arch/powerpc/kernel/vdso64/Makefile | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile index cbabd143acae..78a7449bf489 100644 --- a/arch/powerpc/kernel/vdso32/Makefile +++ b/arch/powerpc/kernel/vdso32/Makefile @@ -30,7 +30,7 @@ CPPFLAGS_vdso32.lds += -P -C -Upowerpc $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) +$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE $(call if_changed,vdso32ld) # strip rule for the .so file @@ -39,12 +39,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso32): %.o: %.S +$(obj-vdso32): %.o: %.S FORCE $(call if_changed_dep,vdso32as) # actual build commands quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index c710802b8fb6..366ae09b14c1 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -23,7 +23,7 @@ CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE $(call if_changed,vdso64ld) # strip rule for the .so file @@ -32,12 +32,12 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # assembly rules for the .S files -$(obj-vdso64): %.o: %.S +$(obj-vdso64): %.o: %.S FORCE $(call if_changed_dep,vdso64as) # actual build commands quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< From ca49e64f0cb1368fc666a53b16b45d4505763d9c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 29 Jul 2016 20:48:09 +1000 Subject: [PATCH 26/26] selftests/powerpc: Specify we expect to build with std=gnu99 We have some tests that assume we're using std=gnu99, which is fine on most compilers, but some old compilers use a different default. So make it explicit that we want to use std=gnu99. Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 3c40c9d0e6c7..1cc6d64c39b7 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -8,7 +8,7 @@ ifeq ($(ARCH),powerpc) GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown") -CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS) +CFLAGS := -std=gnu99 -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS) export CFLAGS