swap: make each swap partition have one address_space
When I use several fast SSD to do swap, swapper_space.tree_lock is heavily contended. This makes each swap partition have one address_space to reduce the lock contention. There is an array of address_space for swap. The swap entry type is the index to the array. In my test with 3 SSD, this increases the swapout throughput 20%. [akpm@linux-foundation.org: revert unneeded change to __add_to_swap_cache] Signed-off-by: Shaohua Li <shli@fusionio.com> Cc: Hugh Dickins <hughd@google.com> Acked-by: Rik van Riel <riel@redhat.com> Acked-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Родитель
9800339b5e
Коммит
33806f06da
|
@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
|
|||
* sysctl_overcommit_ratio / 100) + total_swap_pages;
|
||||
|
||||
cached = global_page_state(NR_FILE_PAGES) -
|
||||
total_swapcache_pages - i.bufferram;
|
||||
total_swapcache_pages() - i.bufferram;
|
||||
if (cached < 0)
|
||||
cached = 0;
|
||||
|
||||
|
@ -109,7 +109,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
|
|||
K(i.freeram),
|
||||
K(i.bufferram),
|
||||
K(cached),
|
||||
K(total_swapcache_pages),
|
||||
K(total_swapcache_pages()),
|
||||
K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]),
|
||||
K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]),
|
||||
K(pages[LRU_ACTIVE_ANON]),
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#include <linux/memcontrol.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/node.h>
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
|
@ -330,8 +330,9 @@ int generic_swapfile_activate(struct swap_info_struct *, struct file *,
|
|||
sector_t *);
|
||||
|
||||
/* linux/mm/swap_state.c */
|
||||
extern struct address_space swapper_space;
|
||||
#define total_swapcache_pages swapper_space.nrpages
|
||||
extern struct address_space swapper_spaces[];
|
||||
#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
|
||||
extern unsigned long total_swapcache_pages(void);
|
||||
extern void show_swap_cache_info(void);
|
||||
extern int add_to_swap(struct page *);
|
||||
extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
|
||||
|
@ -382,7 +383,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
|
|||
|
||||
#define nr_swap_pages 0L
|
||||
#define total_swap_pages 0L
|
||||
#define total_swapcache_pages 0UL
|
||||
#define total_swapcache_pages() 0UL
|
||||
|
||||
#define si_swapinfo(val) \
|
||||
do { (val)->freeswap = (val)->totalswap = 0; } while (0)
|
||||
|
|
|
@ -6307,7 +6307,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
|
|||
* Because lookup_swap_cache() updates some statistics counter,
|
||||
* we call find_get_page() with swapper_space directly.
|
||||
*/
|
||||
page = find_get_page(&swapper_space, ent.val);
|
||||
page = find_get_page(swap_address_space(ent), ent.val);
|
||||
if (do_swap_account)
|
||||
entry->val = ent.val;
|
||||
|
||||
|
@ -6348,7 +6348,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
|
|||
swp_entry_t swap = radix_to_swp_entry(page);
|
||||
if (do_swap_account)
|
||||
*entry = swap;
|
||||
page = find_get_page(&swapper_space, swap.val);
|
||||
page = find_get_page(swap_address_space(swap), swap.val);
|
||||
}
|
||||
#endif
|
||||
return page;
|
||||
|
|
|
@ -75,7 +75,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
|
|||
/* shmem/tmpfs may return swap: account for swapcache page too. */
|
||||
if (radix_tree_exceptional_entry(page)) {
|
||||
swp_entry_t swap = radix_to_swp_entry(page);
|
||||
page = find_get_page(&swapper_space, swap.val);
|
||||
page = find_get_page(swap_address_space(swap), swap.val);
|
||||
}
|
||||
#endif
|
||||
if (page) {
|
||||
|
@ -135,7 +135,8 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
|
|||
} else {
|
||||
#ifdef CONFIG_SWAP
|
||||
pgoff = entry.val;
|
||||
*vec = mincore_page(&swapper_space, pgoff);
|
||||
*vec = mincore_page(swap_address_space(entry),
|
||||
pgoff);
|
||||
#else
|
||||
WARN_ON(1);
|
||||
*vec = 1;
|
||||
|
|
|
@ -855,9 +855,14 @@ EXPORT_SYMBOL(pagevec_lookup_tag);
|
|||
void __init swap_setup(void)
|
||||
{
|
||||
unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
|
||||
|
||||
#ifdef CONFIG_SWAP
|
||||
bdi_init(swapper_space.backing_dev_info);
|
||||
int i;
|
||||
|
||||
bdi_init(swapper_spaces[0].backing_dev_info);
|
||||
for (i = 0; i < MAX_SWAPFILES; i++) {
|
||||
spin_lock_init(&swapper_spaces[i].tree_lock);
|
||||
INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Use a smaller cluster for small-memory machines */
|
||||
|
|
|
@ -36,12 +36,12 @@ static struct backing_dev_info swap_backing_dev_info = {
|
|||
.capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
|
||||
};
|
||||
|
||||
struct address_space swapper_space = {
|
||||
.page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
|
||||
.tree_lock = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock),
|
||||
.a_ops = &swap_aops,
|
||||
.i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
|
||||
.backing_dev_info = &swap_backing_dev_info,
|
||||
struct address_space swapper_spaces[MAX_SWAPFILES] = {
|
||||
[0 ... MAX_SWAPFILES - 1] = {
|
||||
.page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
|
||||
.a_ops = &swap_aops,
|
||||
.backing_dev_info = &swap_backing_dev_info,
|
||||
}
|
||||
};
|
||||
|
||||
#define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0)
|
||||
|
@ -53,9 +53,19 @@ static struct {
|
|||
unsigned long find_total;
|
||||
} swap_cache_info;
|
||||
|
||||
unsigned long total_swapcache_pages(void)
|
||||
{
|
||||
int i;
|
||||
unsigned long ret = 0;
|
||||
|
||||
for (i = 0; i < MAX_SWAPFILES; i++)
|
||||
ret += swapper_spaces[i].nrpages;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void show_swap_cache_info(void)
|
||||
{
|
||||
printk("%lu pages in swap cache\n", total_swapcache_pages);
|
||||
printk("%lu pages in swap cache\n", total_swapcache_pages());
|
||||
printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n",
|
||||
swap_cache_info.add_total, swap_cache_info.del_total,
|
||||
swap_cache_info.find_success, swap_cache_info.find_total);
|
||||
|
@ -70,6 +80,7 @@ void show_swap_cache_info(void)
|
|||
static int __add_to_swap_cache(struct page *page, swp_entry_t entry)
|
||||
{
|
||||
int error;
|
||||
struct address_space *address_space;
|
||||
|
||||
VM_BUG_ON(!PageLocked(page));
|
||||
VM_BUG_ON(PageSwapCache(page));
|
||||
|
@ -79,14 +90,16 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry)
|
|||
SetPageSwapCache(page);
|
||||
set_page_private(page, entry.val);
|
||||
|
||||
spin_lock_irq(&swapper_space.tree_lock);
|
||||
error = radix_tree_insert(&swapper_space.page_tree, entry.val, page);
|
||||
address_space = swap_address_space(entry);
|
||||
spin_lock_irq(&address_space->tree_lock);
|
||||
error = radix_tree_insert(&address_space->page_tree,
|
||||
entry.val, page);
|
||||
if (likely(!error)) {
|
||||
total_swapcache_pages++;
|
||||
address_space->nrpages++;
|
||||
__inc_zone_page_state(page, NR_FILE_PAGES);
|
||||
INC_CACHE_INFO(add_total);
|
||||
}
|
||||
spin_unlock_irq(&swapper_space.tree_lock);
|
||||
spin_unlock_irq(&address_space->tree_lock);
|
||||
|
||||
if (unlikely(error)) {
|
||||
/*
|
||||
|
@ -122,14 +135,19 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
|
|||
*/
|
||||
void __delete_from_swap_cache(struct page *page)
|
||||
{
|
||||
swp_entry_t entry;
|
||||
struct address_space *address_space;
|
||||
|
||||
VM_BUG_ON(!PageLocked(page));
|
||||
VM_BUG_ON(!PageSwapCache(page));
|
||||
VM_BUG_ON(PageWriteback(page));
|
||||
|
||||
radix_tree_delete(&swapper_space.page_tree, page_private(page));
|
||||
entry.val = page_private(page);
|
||||
address_space = swap_address_space(entry);
|
||||
radix_tree_delete(&address_space->page_tree, page_private(page));
|
||||
set_page_private(page, 0);
|
||||
ClearPageSwapCache(page);
|
||||
total_swapcache_pages--;
|
||||
address_space->nrpages--;
|
||||
__dec_zone_page_state(page, NR_FILE_PAGES);
|
||||
INC_CACHE_INFO(del_total);
|
||||
}
|
||||
|
@ -195,12 +213,14 @@ int add_to_swap(struct page *page)
|
|||
void delete_from_swap_cache(struct page *page)
|
||||
{
|
||||
swp_entry_t entry;
|
||||
struct address_space *address_space;
|
||||
|
||||
entry.val = page_private(page);
|
||||
|
||||
spin_lock_irq(&swapper_space.tree_lock);
|
||||
address_space = swap_address_space(entry);
|
||||
spin_lock_irq(&address_space->tree_lock);
|
||||
__delete_from_swap_cache(page);
|
||||
spin_unlock_irq(&swapper_space.tree_lock);
|
||||
spin_unlock_irq(&address_space->tree_lock);
|
||||
|
||||
swapcache_free(entry, page);
|
||||
page_cache_release(page);
|
||||
|
@ -263,7 +283,7 @@ struct page * lookup_swap_cache(swp_entry_t entry)
|
|||
{
|
||||
struct page *page;
|
||||
|
||||
page = find_get_page(&swapper_space, entry.val);
|
||||
page = find_get_page(swap_address_space(entry), entry.val);
|
||||
|
||||
if (page)
|
||||
INC_CACHE_INFO(find_success);
|
||||
|
@ -290,7 +310,8 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
|
|||
* called after lookup_swap_cache() failed, re-calling
|
||||
* that would confuse statistics.
|
||||
*/
|
||||
found_page = find_get_page(&swapper_space, entry.val);
|
||||
found_page = find_get_page(swap_address_space(entry),
|
||||
entry.val);
|
||||
if (found_page)
|
||||
break;
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset)
|
|||
struct page *page;
|
||||
int ret = 0;
|
||||
|
||||
page = find_get_page(&swapper_space, entry.val);
|
||||
page = find_get_page(swap_address_space(entry), entry.val);
|
||||
if (!page)
|
||||
return 0;
|
||||
/*
|
||||
|
@ -699,7 +699,8 @@ int free_swap_and_cache(swp_entry_t entry)
|
|||
p = swap_info_get(entry);
|
||||
if (p) {
|
||||
if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
|
||||
page = find_get_page(&swapper_space, entry.val);
|
||||
page = find_get_page(swap_address_space(entry),
|
||||
entry.val);
|
||||
if (page && !trylock_page(page)) {
|
||||
page_cache_release(page);
|
||||
page = NULL;
|
||||
|
|
10
mm/util.c
10
mm/util.c
|
@ -6,6 +6,7 @@
|
|||
#include <linux/sched.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/swapops.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
@ -389,9 +390,12 @@ struct address_space *page_mapping(struct page *page)
|
|||
|
||||
VM_BUG_ON(PageSlab(page));
|
||||
#ifdef CONFIG_SWAP
|
||||
if (unlikely(PageSwapCache(page)))
|
||||
mapping = &swapper_space;
|
||||
else
|
||||
if (unlikely(PageSwapCache(page))) {
|
||||
swp_entry_t entry;
|
||||
|
||||
entry.val = page_private(page);
|
||||
mapping = swap_address_space(entry);
|
||||
} else
|
||||
#endif
|
||||
if ((unsigned long)mapping & PAGE_MAPPING_ANON)
|
||||
mapping = NULL;
|
||||
|
|
Загрузка…
Ссылка в новой задаче