memcg: move charges of anonymous swap
This patch is another core part of this move-charge-at-task-migration feature. It enables moving charges of anonymous swaps. To move the charge of swap, we need to exchange swap_cgroup's record. In current implementation, swap_cgroup's record is protected by: - page lock: if the entry is on swap cache. - swap_lock: if the entry is not on swap cache. This works well in usual swap-in/out activity. But this behavior make the feature of moving swap charge check many conditions to exchange swap_cgroup's record safely. So I changed modification of swap_cgroup's recored(swap_cgroup_record()) to use xchg, and define a new function to cmpxchg swap_cgroup's record. This patch also enables moving charge of non pte_present but not uncharged swap caches, which can be exist on swap-out path, by getting the target pages via find_get_page() as do_mincore() does. [kosaki.motohiro@jp.fujitsu.com: fix ia64 build] [akpm@linux-foundation.org: fix typos] Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Родитель
8033b97c9b
Коммит
024914477e
|
@ -420,6 +420,8 @@ NOTE2: It is recommended to set the soft limit always below the hard limit,
|
|||
|
||||
Users can move charges associated with a task along with task migration, that
|
||||
is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
|
||||
This feature is not supported in !CONFIG_MMU environments because of lack of
|
||||
page tables.
|
||||
|
||||
8.1 Interface
|
||||
|
||||
|
|
|
@ -118,6 +118,8 @@ static inline void __init page_cgroup_init_flatmem(void)
|
|||
#include <linux/swap.h>
|
||||
|
||||
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
|
||||
extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
|
||||
unsigned short old, unsigned short new);
|
||||
extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
|
||||
extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
|
||||
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
|
||||
|
|
|
@ -355,6 +355,7 @@ static inline void disable_swap_token(void)
|
|||
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
|
||||
extern void
|
||||
mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
|
||||
extern int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep);
|
||||
#else
|
||||
static inline void
|
||||
mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
|
||||
|
@ -485,6 +486,14 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
|
|||
{
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
|
||||
static inline int
|
||||
mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_SWAP */
|
||||
#endif /* __KERNEL__*/
|
||||
#endif /* _LINUX_SWAP_H */
|
||||
|
|
187
mm/memcontrol.c
187
mm/memcontrol.c
|
@ -33,6 +33,7 @@
|
|||
#include <linux/rbtree.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/swapops.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
@ -2270,6 +2271,54 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
|
|||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_move_swap_account - move swap charge and swap_cgroup's record.
|
||||
* @entry: swap entry to be moved
|
||||
* @from: mem_cgroup which the entry is moved from
|
||||
* @to: mem_cgroup which the entry is moved to
|
||||
*
|
||||
* It succeeds only when the swap_cgroup's record for this entry is the same
|
||||
* as the mem_cgroup's id of @from.
|
||||
*
|
||||
* Returns 0 on success, -EINVAL on failure.
|
||||
*
|
||||
* The caller must have charged to @to, IOW, called res_counter_charge() about
|
||||
* both res and memsw, and called css_get().
|
||||
*/
|
||||
static int mem_cgroup_move_swap_account(swp_entry_t entry,
|
||||
struct mem_cgroup *from, struct mem_cgroup *to)
|
||||
{
|
||||
unsigned short old_id, new_id;
|
||||
|
||||
old_id = css_id(&from->css);
|
||||
new_id = css_id(&to->css);
|
||||
|
||||
if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
|
||||
if (!mem_cgroup_is_root(from))
|
||||
res_counter_uncharge(&from->memsw, PAGE_SIZE);
|
||||
mem_cgroup_swap_statistics(from, false);
|
||||
mem_cgroup_put(from);
|
||||
/*
|
||||
* we charged both to->res and to->memsw, so we should uncharge
|
||||
* to->res.
|
||||
*/
|
||||
if (!mem_cgroup_is_root(to))
|
||||
res_counter_uncharge(&to->res, PAGE_SIZE);
|
||||
mem_cgroup_swap_statistics(to, true);
|
||||
mem_cgroup_get(to);
|
||||
css_put(&to->css);
|
||||
|
||||
return 0;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
#else
|
||||
static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
|
||||
struct mem_cgroup *from, struct mem_cgroup *to)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -2949,6 +2998,7 @@ static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
|
|||
return mem_cgroup_from_cont(cgrp)->move_charge_at_immigrate;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
|
||||
struct cftype *cft, u64 val)
|
||||
{
|
||||
|
@ -2967,6 +3017,13 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
|
|||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
|
||||
struct cftype *cft, u64 val)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* For read statistics */
|
||||
|
@ -3489,6 +3546,7 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
|
|||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
/* Handlers for move charge at task migration. */
|
||||
#define PRECHARGE_COUNT_AT_ONCE 256
|
||||
static int mem_cgroup_do_precharge(unsigned long count)
|
||||
|
@ -3544,77 +3602,124 @@ one_by_one:
|
|||
}
|
||||
return ret;
|
||||
}
|
||||
#else /* !CONFIG_MMU */
|
||||
static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
|
||||
struct cgroup *cgroup,
|
||||
struct task_struct *p,
|
||||
bool threadgroup)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
|
||||
struct cgroup *cgroup,
|
||||
struct task_struct *p,
|
||||
bool threadgroup)
|
||||
{
|
||||
}
|
||||
static void mem_cgroup_move_task(struct cgroup_subsys *ss,
|
||||
struct cgroup *cont,
|
||||
struct cgroup *old_cont,
|
||||
struct task_struct *p,
|
||||
bool threadgroup)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* is_target_pte_for_mc - check a pte whether it is valid for move charge
|
||||
* @vma: the vma the pte to be checked belongs
|
||||
* @addr: the address corresponding to the pte to be checked
|
||||
* @ptent: the pte to be checked
|
||||
* @target: the pointer the target page will be stored(can be NULL)
|
||||
* @target: the pointer the target page or swap ent will be stored(can be NULL)
|
||||
*
|
||||
* Returns
|
||||
* 0(MC_TARGET_NONE): if the pte is not a target for move charge.
|
||||
* 1(MC_TARGET_PAGE): if the page corresponding to this pte is a target for
|
||||
* move charge. if @target is not NULL, the page is stored in target->page
|
||||
* with extra refcnt got(Callers should handle it).
|
||||
* 2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
|
||||
* target for charge migration. if @target is not NULL, the entry is stored
|
||||
* in target->ent.
|
||||
*
|
||||
* Called with pte lock held.
|
||||
*/
|
||||
/* We add a new member later. */
|
||||
union mc_target {
|
||||
struct page *page;
|
||||
swp_entry_t ent;
|
||||
};
|
||||
|
||||
/* We add a new type later. */
|
||||
enum mc_target_type {
|
||||
MC_TARGET_NONE, /* not used */
|
||||
MC_TARGET_PAGE,
|
||||
MC_TARGET_SWAP,
|
||||
};
|
||||
|
||||
static int is_target_pte_for_mc(struct vm_area_struct *vma,
|
||||
unsigned long addr, pte_t ptent, union mc_target *target)
|
||||
{
|
||||
struct page *page;
|
||||
struct page *page = NULL;
|
||||
struct page_cgroup *pc;
|
||||
int ret = 0;
|
||||
swp_entry_t ent = { .val = 0 };
|
||||
int usage_count = 0;
|
||||
bool move_anon = test_bit(MOVE_CHARGE_TYPE_ANON,
|
||||
&mc.to->move_charge_at_immigrate);
|
||||
|
||||
if (!pte_present(ptent))
|
||||
return 0;
|
||||
|
||||
page = vm_normal_page(vma, addr, ptent);
|
||||
if (!page || !page_mapped(page))
|
||||
return 0;
|
||||
/*
|
||||
* TODO: We don't move charges of file(including shmem/tmpfs) pages for
|
||||
* now.
|
||||
*/
|
||||
if (!move_anon || !PageAnon(page))
|
||||
return 0;
|
||||
/*
|
||||
* TODO: We don't move charges of shared(used by multiple processes)
|
||||
* pages for now.
|
||||
*/
|
||||
if (page_mapcount(page) > 1)
|
||||
return 0;
|
||||
if (!get_page_unless_zero(page))
|
||||
return 0;
|
||||
|
||||
pc = lookup_page_cgroup(page);
|
||||
/*
|
||||
* Do only loose check w/o page_cgroup lock. mem_cgroup_move_account()
|
||||
* checks the pc is valid or not under the lock.
|
||||
*/
|
||||
if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
|
||||
ret = MC_TARGET_PAGE;
|
||||
if (target)
|
||||
target->page = page;
|
||||
if (!pte_present(ptent)) {
|
||||
/* TODO: handle swap of shmes/tmpfs */
|
||||
if (pte_none(ptent) || pte_file(ptent))
|
||||
return 0;
|
||||
else if (is_swap_pte(ptent)) {
|
||||
ent = pte_to_swp_entry(ptent);
|
||||
if (!move_anon || non_swap_entry(ent))
|
||||
return 0;
|
||||
usage_count = mem_cgroup_count_swap_user(ent, &page);
|
||||
}
|
||||
} else {
|
||||
page = vm_normal_page(vma, addr, ptent);
|
||||
if (!page || !page_mapped(page))
|
||||
return 0;
|
||||
/*
|
||||
* TODO: We don't move charges of file(including shmem/tmpfs)
|
||||
* pages for now.
|
||||
*/
|
||||
if (!move_anon || !PageAnon(page))
|
||||
return 0;
|
||||
if (!get_page_unless_zero(page))
|
||||
return 0;
|
||||
usage_count = page_mapcount(page);
|
||||
}
|
||||
if (usage_count > 1) {
|
||||
/*
|
||||
* TODO: We don't move charges of shared(used by multiple
|
||||
* processes) pages for now.
|
||||
*/
|
||||
if (page)
|
||||
put_page(page);
|
||||
return 0;
|
||||
}
|
||||
if (page) {
|
||||
pc = lookup_page_cgroup(page);
|
||||
/*
|
||||
* Do only loose check w/o page_cgroup lock.
|
||||
* mem_cgroup_move_account() checks the pc is valid or not under
|
||||
* the lock.
|
||||
*/
|
||||
if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
|
||||
ret = MC_TARGET_PAGE;
|
||||
if (target)
|
||||
target->page = page;
|
||||
}
|
||||
if (!ret || !target)
|
||||
put_page(page);
|
||||
}
|
||||
/* throught */
|
||||
if (ent.val && do_swap_account && !ret &&
|
||||
css_id(&mc.from->css) == lookup_swap_cgroup(ent)) {
|
||||
ret = MC_TARGET_SWAP;
|
||||
if (target)
|
||||
target->ent = ent;
|
||||
}
|
||||
|
||||
if (!ret || !target)
|
||||
put_page(page);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -3754,6 +3859,7 @@ retry:
|
|||
int type;
|
||||
struct page *page;
|
||||
struct page_cgroup *pc;
|
||||
swp_entry_t ent;
|
||||
|
||||
if (!mc.precharge)
|
||||
break;
|
||||
|
@ -3775,6 +3881,11 @@ retry:
|
|||
put: /* is_target_pte_for_mc() gets the page */
|
||||
put_page(page);
|
||||
break;
|
||||
case MC_TARGET_SWAP:
|
||||
ent = target.ent;
|
||||
if (!mem_cgroup_move_swap_account(ent, mc.from, mc.to))
|
||||
mc.precharge--;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -334,6 +334,37 @@ not_enough_page:
|
|||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
|
||||
* @end: swap entry to be cmpxchged
|
||||
* @old: old id
|
||||
* @new: new id
|
||||
*
|
||||
* Returns old id at success, 0 at failure.
|
||||
* (There is no mem_cgroup useing 0 as its id)
|
||||
*/
|
||||
unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
|
||||
unsigned short old, unsigned short new)
|
||||
{
|
||||
int type = swp_type(ent);
|
||||
unsigned long offset = swp_offset(ent);
|
||||
unsigned long idx = offset / SC_PER_PAGE;
|
||||
unsigned long pos = offset & SC_POS_MASK;
|
||||
struct swap_cgroup_ctrl *ctrl;
|
||||
struct page *mappage;
|
||||
struct swap_cgroup *sc;
|
||||
|
||||
ctrl = &swap_cgroup_ctrl[type];
|
||||
|
||||
mappage = ctrl->map[idx];
|
||||
sc = page_address(mappage);
|
||||
sc += pos;
|
||||
if (cmpxchg(&sc->id, old, new) == old)
|
||||
return old;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* swap_cgroup_record - record mem_cgroup for this swp_entry.
|
||||
* @ent: swap entry to be recorded into
|
||||
|
@ -358,8 +389,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
|
|||
mappage = ctrl->map[idx];
|
||||
sc = page_address(mappage);
|
||||
sc += pos;
|
||||
old = sc->id;
|
||||
sc->id = id;
|
||||
old = xchg(&sc->id, id);
|
||||
|
||||
return old;
|
||||
}
|
||||
|
|
|
@ -723,6 +723,37 @@ int free_swap_and_cache(swp_entry_t entry)
|
|||
return p != NULL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
|
||||
/**
|
||||
* mem_cgroup_count_swap_user - count the user of a swap entry
|
||||
* @ent: the swap entry to be checked
|
||||
* @pagep: the pointer for the swap cache page of the entry to be stored
|
||||
*
|
||||
* Returns the number of the user of the swap entry. The number is valid only
|
||||
* for swaps of anonymous pages.
|
||||
* If the entry is found on swap cache, the page is stored to pagep with
|
||||
* refcount of it being incremented.
|
||||
*/
|
||||
int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
|
||||
{
|
||||
struct page *page;
|
||||
struct swap_info_struct *p;
|
||||
int count = 0;
|
||||
|
||||
page = find_get_page(&swapper_space, ent.val);
|
||||
if (page)
|
||||
count += page_mapcount(page);
|
||||
p = swap_info_get(ent);
|
||||
if (p) {
|
||||
count += swap_count(p->swap_map[swp_offset(ent)]);
|
||||
spin_unlock(&swap_lock);
|
||||
}
|
||||
|
||||
*pagep = page;
|
||||
return count;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
/*
|
||||
* Find the swap type that corresponds to given device (if any).
|
||||
|
|
Загрузка…
Ссылка в новой задаче