From 73984137d32bb6e48646daea60ebe1457a01a061 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 10 Aug 2016 16:27:38 -0700 Subject: [PATCH 1/8] rapidio: dereferencing an error pointer Original patch: https://lkml.org/lkml/2016/8/4/32 If riocm_ch_alloc() fails then we end up dereferencing the error pointer. The problem is that we're not unwinding in the reverse order from how we allocate things so it gets confusing. I've changed this around so now "ch" is NULL when we are done with it after we call riocm_put_channel(). That way we can check if it's NULL and avoid calling riocm_put_channel() on it twice. I renamed err_nodev to err_put_new_ch so that it better reflects what the goto does. Then because we had flipping things around, it means we don't neeed to initialize the pointers to NULL and we can remove an if statement and pull things in an indent level. Link: http://lkml.kernel.org/r/20160805152406.20713-1-alexandre.bounine@idt.com Signed-off-by: Dan Carpenter Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Andre van Herk Cc: Barry Wood Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio_cm.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index cecc15a880de..3fa17ac8df54 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -1080,8 +1080,8 @@ static int riocm_send_ack(struct rio_channel *ch) static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, long timeout) { - struct rio_channel *ch = NULL; - struct rio_channel *new_ch = NULL; + struct rio_channel *ch; + struct rio_channel *new_ch; struct conn_req *req; struct cm_peer *peer; int found = 0; @@ -1155,6 +1155,7 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, spin_unlock_bh(&ch->lock); riocm_put_channel(ch); + ch = NULL; kfree(req); down_read(&rdev_sem); @@ -1172,7 +1173,7 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, if (!found) { /* If peer device object not found, simply ignore the request */ err = -ENODEV; - goto err_nodev; + goto err_put_new_ch; } new_ch->rdev = peer->rdev; @@ -1184,15 +1185,16 @@ static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id, *new_ch_id = new_ch->id; return new_ch; + +err_put_new_ch: + spin_lock_bh(&idr_lock); + idr_remove(&ch_idr, new_ch->id); + spin_unlock_bh(&idr_lock); + riocm_put_channel(new_ch); + err_put: - riocm_put_channel(ch); -err_nodev: - if (new_ch) { - spin_lock_bh(&idr_lock); - idr_remove(&ch_idr, new_ch->id); - spin_unlock_bh(&idr_lock); - riocm_put_channel(new_ch); - } + if (ch) + riocm_put_channel(ch); *new_ch_id = 0; return ERR_PTR(err); } From a545de5ce2ef3abc4db0b9331840acf59c8f9efa Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 10 Aug 2016 16:27:41 -0700 Subject: [PATCH 2/8] revert "ARM: keystone: dts: add psci command definition" Revert commit 51d5d12b8f3d ("ARM: keystone: dts: add psci command definition"), which was inadvertently added twice. Cc: Russell King - ARM Linux Cc: Vitaly Andrianov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/boot/dts/keystone.dtsi | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm/boot/dts/keystone.dtsi b/arch/arm/boot/dts/keystone.dtsi index 00cb314d5e4d..e23f46d15c80 100644 --- a/arch/arm/boot/dts/keystone.dtsi +++ b/arch/arm/boot/dts/keystone.dtsi @@ -70,14 +70,6 @@ cpu_on = <0x84000003>; }; - psci { - compatible = "arm,psci"; - method = "smc"; - cpu_suspend = <0x84000001>; - cpu_off = <0x84000002>; - cpu_on = <0x84000003>; - }; - soc { #address-cells = <1>; #size-cells = <1>; From 3b33719c9b741066f7d2bc6036409752f8e0478d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 10 Aug 2016 16:27:44 -0700 Subject: [PATCH 3/8] thp: move shmem_huge_enabled() outside of SYSFS ifdef The newly introduced shmem_huge_enabled() function has two definitions, but neither of them is visible if CONFIG_SYSFS is disabled, leading to a build error: mm/khugepaged.o: In function `khugepaged': khugepaged.c:(.text.khugepaged+0x3ca): undefined reference to `shmem_huge_enabled' This changes the #ifdef guards around the definition to match those that are used in the header file. Fixes: e496cf3d7821 ("thp: introduce CONFIG_TRANSPARENT_HUGE_PAGECACHE") Link: http://lkml.kernel.org/r/20160809123638.1357593-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Acked-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/shmem.c b/mm/shmem.c index 7f7748a0f9e1..fd8b2b5741b1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3975,7 +3975,9 @@ static ssize_t shmem_enabled_store(struct kobject *kobj, struct kobj_attribute shmem_enabled_attr = __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store); +#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */ +#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE bool shmem_huge_enabled(struct vm_area_struct *vma) { struct inode *inode = file_inode(vma->vm_file); @@ -4006,7 +4008,7 @@ bool shmem_huge_enabled(struct vm_area_struct *vma) return false; } } -#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */ +#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */ #else /* !CONFIG_SHMEM */ From 81cbcbc2d810c0ce49fba81f864302e1afe5ff27 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 10 Aug 2016 16:27:46 -0700 Subject: [PATCH 4/8] mm/page_alloc.c: fix wrong initialization when sysctl_min_unmapped_ratio changes Before resetting min_unmapped_pages, we need to initialize min_unmapped_pages rather than min_slab_pages. Fixes: a5f5f91da6 (mm: convert zone_reclaim to node_reclaim) Link: http://lkml.kernel.org/r/1470724248-26780-1-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Joonsoo Kim Acked-by: Mel Gorman Cc: Vlastimil Babka Cc: Johannes Weiner Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ee744fa3b93d..9a92718b1103 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -6854,7 +6854,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write, return rc; for_each_online_pgdat(pgdat) - pgdat->min_slab_pages = 0; + pgdat->min_unmapped_pages = 0; for_each_zone(zone) zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages * From 6423aa8192c596848e1b23bd4193dc0924e7274d Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 10 Aug 2016 16:27:49 -0700 Subject: [PATCH 5/8] mm/page_alloc.c: recalculate some of node threshold when on/offline memory Some of node threshold depends on number of managed pages in the node. When memory is going on/offline, it can be changed and we need to adjust them. Add recalculation to appropriate places and clean-up related functions for better maintenance. Link: http://lkml.kernel.org/r/1470724248-26780-2-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Joonsoo Kim Acked-by: Mel Gorman Cc: Vlastimil Babka Cc: Johannes Weiner Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 50 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9a92718b1103..ab2c0ff8c2e6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4757,6 +4757,8 @@ int local_memory_node(int node) } #endif +static void setup_min_unmapped_ratio(void); +static void setup_min_slab_ratio(void); #else /* CONFIG_NUMA */ static void set_zonelist_order(void) @@ -5878,9 +5880,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) zone->managed_pages = is_highmem_idx(j) ? realsize : freesize; #ifdef CONFIG_NUMA zone->node = nid; - pgdat->min_unmapped_pages += (freesize*sysctl_min_unmapped_ratio) - / 100; - pgdat->min_slab_pages += (freesize * sysctl_min_slab_ratio) / 100; #endif zone->name = zone_names[j]; zone->zone_pgdat = pgdat; @@ -6801,6 +6800,12 @@ int __meminit init_per_zone_wmark_min(void) setup_per_zone_wmarks(); refresh_zone_stat_thresholds(); setup_per_zone_lowmem_reserve(); + +#ifdef CONFIG_NUMA + setup_min_unmapped_ratio(); + setup_min_slab_ratio(); +#endif + return 0; } core_initcall(init_per_zone_wmark_min) @@ -6842,16 +6847,10 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write, } #ifdef CONFIG_NUMA -int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) +static void setup_min_unmapped_ratio(void) { - struct pglist_data *pgdat; + pg_data_t *pgdat; struct zone *zone; - int rc; - - rc = proc_dointvec_minmax(table, write, buffer, length, ppos); - if (rc) - return rc; for_each_online_pgdat(pgdat) pgdat->min_unmapped_pages = 0; @@ -6859,26 +6858,47 @@ int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write, for_each_zone(zone) zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages * sysctl_min_unmapped_ratio) / 100; - return 0; } -int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write, + +int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { - struct pglist_data *pgdat; - struct zone *zone; int rc; rc = proc_dointvec_minmax(table, write, buffer, length, ppos); if (rc) return rc; + setup_min_unmapped_ratio(); + + return 0; +} + +static void setup_min_slab_ratio(void) +{ + pg_data_t *pgdat; + struct zone *zone; + for_each_online_pgdat(pgdat) pgdat->min_slab_pages = 0; for_each_zone(zone) zone->zone_pgdat->min_slab_pages += (zone->managed_pages * sysctl_min_slab_ratio) / 100; +} + +int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int rc; + + rc = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (rc) + return rc; + + setup_min_slab_ratio(); + return 0; } #endif From c8efc390c1e0eca195ae59a2f7cec46773620e0c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 10 Aug 2016 16:27:52 -0700 Subject: [PATCH 6/8] mm, rmap: fix false positive VM_BUG() in page_add_file_rmap() PageTransCompound() doesn't distinguish THP from from any other type of compound pages. This can lead to false-positive VM_BUG_ON() in page_add_file_rmap() if called on compound page from a driver[1]. I think we can exclude such cases by checking if the page belong to a mapping. The VM_BUG_ON_PAGE() is downgraded to VM_WARN_ON_ONCE(). This path should not cause any harm to non-THP page, but good to know if we step on anything else. [1] http://lkml.kernel.org/r/c711e067-0bff-a6cb-3c37-04dfe77d2db1@redhat.com Link: http://lkml.kernel.org/r/20160810161345.GA67522@black.fi.intel.com Signed-off-by: Kirill A. Shutemov Reported-by: Laura Abbott Tested-by: Laura Abbott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/rmap.c b/mm/rmap.c index 709bc83703b1..d4f56060ba3f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1284,8 +1284,9 @@ void page_add_file_rmap(struct page *page, bool compound) VM_BUG_ON_PAGE(!PageSwapBacked(page), page); __inc_node_page_state(page, NR_SHMEM_PMDMAPPED); } else { - if (PageTransCompound(page)) { - VM_BUG_ON_PAGE(!PageLocked(page), page); + if (PageTransCompound(page) && page_mapping(page)) { + VM_WARN_ON_ONCE(!PageLocked(page)); + SetPageDoubleMap(compound_head(page)); if (PageMlocked(page)) clear_page_mlock(compound_head(page)); From 57dea93ac42d341e1fe902528b348ef6763fa485 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Wed, 10 Aug 2016 16:27:55 -0700 Subject: [PATCH 7/8] rmap: fix compound check logic in page_remove_file_rmap In page_remove_file_rmap(.) we have the following check: VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page); This is meant to check for either HugeTLB pages or THP when a compound page is passed in. Unfortunately, if one disables CONFIG_TRANSPARENT_HUGEPAGE, then PageTransHuge(.) will always return false, provoking BUGs when one runs the libhugetlbfs test suite. This patch replaces PageTransHuge(), with PageHead() which will work for both HugeTLB and THP. Fixes: dd78fedde4b9 ("rmap: support file thp") Link: http://lkml.kernel.org/r/1470838217-5889-1-git-send-email-steve.capper@arm.com Signed-off-by: Steve Capper Acked-by: Kirill A. Shutemov Cc: Huang Shijie Cc: Will Deacon Cc: Catalin Marinas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/rmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/rmap.c b/mm/rmap.c index d4f56060ba3f..1ef36404e7b2 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1304,7 +1304,7 @@ static void page_remove_file_rmap(struct page *page, bool compound) { int i, nr = 1; - VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page); + VM_BUG_ON_PAGE(compound && !PageHead(page), page); lock_page_memcg(page); /* Hugepages are not counted in NR_FILE_MAPPED for now. */ From 6039892396d845b18228935561960441900cffca Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 10 Aug 2016 16:27:58 -0700 Subject: [PATCH 8/8] mm/slub.c: run free_partial() outside of the kmem_cache_node->list_lock With debugobjects enabled and using SLAB_DESTROY_BY_RCU, when a kmem_cache_node is destroyed the call_rcu() may trigger a slab allocation to fill the debug object pool (__debug_object_init:fill_pool). Everywhere but during kmem_cache_destroy(), discard_slab() is performed outside of the kmem_cache_node->list_lock and avoids a lockdep warning about potential recursion: ============================================= [ INFO: possible recursive locking detected ] 4.8.0-rc1-gfxbench+ #1 Tainted: G U --------------------------------------------- rmmod/8895 is trying to acquire lock: (&(&n->list_lock)->rlock){-.-...}, at: [] get_partial_node.isra.63+0x47/0x430 but task is already holding lock: (&(&n->list_lock)->rlock){-.-...}, at: [] __kmem_cache_shutdown+0x54/0x320 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&n->list_lock)->rlock); lock(&(&n->list_lock)->rlock); *** DEADLOCK *** May be due to missing lock nesting notation 5 locks held by rmmod/8895: #0: (&dev->mutex){......}, at: driver_detach+0x42/0xc0 #1: (&dev->mutex){......}, at: driver_detach+0x50/0xc0 #2: (cpu_hotplug.dep_map){++++++}, at: get_online_cpus+0x2d/0x80 #3: (slab_mutex){+.+.+.}, at: kmem_cache_destroy+0x3c/0x220 #4: (&(&n->list_lock)->rlock){-.-...}, at: __kmem_cache_shutdown+0x54/0x320 stack backtrace: CPU: 6 PID: 8895 Comm: rmmod Tainted: G U 4.8.0-rc1-gfxbench+ #1 Hardware name: Gigabyte Technology Co., Ltd. H87M-D3H/H87M-D3H, BIOS F11 08/18/2015 Call Trace: __lock_acquire+0x1646/0x1ad0 lock_acquire+0xb2/0x200 _raw_spin_lock+0x36/0x50 get_partial_node.isra.63+0x47/0x430 ___slab_alloc.constprop.67+0x1a7/0x3b0 __slab_alloc.isra.64.constprop.66+0x43/0x80 kmem_cache_alloc+0x236/0x2d0 __debug_object_init+0x2de/0x400 debug_object_activate+0x109/0x1e0 __call_rcu.constprop.63+0x32/0x2f0 call_rcu+0x12/0x20 discard_slab+0x3d/0x40 __kmem_cache_shutdown+0xdb/0x320 shutdown_cache+0x19/0x60 kmem_cache_destroy+0x1ae/0x220 i915_gem_load_cleanup+0x14/0x40 [i915] i915_driver_unload+0x151/0x180 [i915] i915_pci_remove+0x14/0x20 [i915] pci_device_remove+0x34/0xb0 __device_release_driver+0x95/0x140 driver_detach+0xb6/0xc0 bus_remove_driver+0x53/0xd0 driver_unregister+0x27/0x50 pci_unregister_driver+0x25/0x70 i915_exit+0x1a/0x1e2 [i915] SyS_delete_module+0x193/0x1f0 entry_SYSCALL_64_fastpath+0x1c/0xac Fixes: 52b4b950b507 ("mm: slab: free kmem_cache_node after destroy sysfs file") Link: http://lkml.kernel.org/r/1470759070-18743-1-git-send-email-chris@chris-wilson.co.uk Reported-by: Dave Gordon Signed-off-by: Chris Wilson Reviewed-by: Vladimir Davydov Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Dmitry Safonov Cc: Daniel Vetter Cc: Dave Gordon Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index cead06394e9e..9adae58462f8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3629,6 +3629,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, */ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) { + LIST_HEAD(discard); struct page *page, *h; BUG_ON(irqs_disabled()); @@ -3636,13 +3637,16 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) list_for_each_entry_safe(page, h, &n->partial, lru) { if (!page->inuse) { remove_partial(n, page); - discard_slab(s, page); + list_add(&page->lru, &discard); } else { list_slab_objects(s, page, "Objects remaining in %s on __kmem_cache_shutdown()"); } } spin_unlock_irq(&n->list_lock); + + list_for_each_entry_safe(page, h, &discard, lru) + discard_slab(s, page); } /*