Merge branch 'akpm' (patches from Andrew Morton)
Merge fixes from Andrew Morton: "Bunch of fixes. And a reversion of mhocko's "Soft limit rework" patch series. This is actually your fault for opening the merge window when I was off racing ;) I didn't read the email thread before sending everything off. Johannes Weiner raised significant issues: http://www.spinics.net/lists/cgroups/msg08813.html and we agreed to back it all out" I clearly need to be more aware of Andrew's racing schedule. * akpm: MAINTAINERS: update mach-bcm related email address checkpatch: make extern in .h prototypes quieter cciss: fix info leak in cciss_ioctl32_passthru() cpqarray: fix info leak in ida_locked_ioctl() kernel/reboot.c: re-enable the function of variable reboot_default audit: fix endless wait in audit_log_start() revert "memcg, vmscan: integrate soft reclaim tighter with zone shrinking code" revert "memcg: get rid of soft-limit tree infrastructure" revert "vmscan, memcg: do softlimit reclaim also for targeted reclaim" revert "memcg: enhance memcg iterator to support predicates" revert "memcg: track children in soft limit excess to improve soft limit" revert "memcg, vmscan: do not attempt soft limit reclaim if it would not scan anything" revert "memcg: track all children over limit in the root" revert "memcg, vmscan: do not fall into reclaim-all pass too quickly" fs/ocfs2/super.c: use a bigger nodestr in ocfs2_dismount_volume watchdog: update watchdog_thresh properly watchdog: update watchdog attributes atomically
This commit is contained in:
Коммит
a153e67bda
|
@ -1812,7 +1812,8 @@ S: Supported
|
|||
F: drivers/net/ethernet/broadcom/bnx2x/
|
||||
|
||||
BROADCOM BCM281XX/BCM11XXX ARM ARCHITECTURE
|
||||
M: Christian Daudt <csd@broadcom.com>
|
||||
M: Christian Daudt <bcm@fixthebug.org>
|
||||
L: bcm-kernel-feedback-list@broadcom.com
|
||||
T: git git://git.github.com/broadcom/bcm11351
|
||||
S: Maintained
|
||||
F: arch/arm/mach-bcm/
|
||||
|
|
|
@ -1189,6 +1189,7 @@ static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
|
|||
int err;
|
||||
u32 cp;
|
||||
|
||||
memset(&arg64, 0, sizeof(arg64));
|
||||
err = 0;
|
||||
err |=
|
||||
copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
|
||||
|
|
|
@ -1193,6 +1193,7 @@ out_passthru:
|
|||
ida_pci_info_struct pciinfo;
|
||||
|
||||
if (!arg) return -EINVAL;
|
||||
memset(&pciinfo, 0, sizeof(pciinfo));
|
||||
pciinfo.bus = host->pci_dev->bus->number;
|
||||
pciinfo.dev_fn = host->pci_dev->devfn;
|
||||
pciinfo.board_id = host->board_id;
|
||||
|
|
|
@ -1924,7 +1924,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
|
|||
{
|
||||
int tmp, hangup_needed = 0;
|
||||
struct ocfs2_super *osb = NULL;
|
||||
char nodestr[8];
|
||||
char nodestr[12];
|
||||
|
||||
trace_ocfs2_dismount_volume(sb);
|
||||
|
||||
|
|
|
@ -53,23 +53,6 @@ struct mem_cgroup_reclaim_cookie {
|
|||
unsigned int generation;
|
||||
};
|
||||
|
||||
enum mem_cgroup_filter_t {
|
||||
VISIT, /* visit current node */
|
||||
SKIP, /* skip the current node and continue traversal */
|
||||
SKIP_TREE, /* skip the whole subtree and continue traversal */
|
||||
};
|
||||
|
||||
/*
|
||||
* mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to
|
||||
* iterate through the hierarchy tree. Each tree element is checked by the
|
||||
* predicate before it is returned by the iterator. If a filter returns
|
||||
* SKIP or SKIP_TREE then the iterator code continues traversal (with the
|
||||
* next node down the hierarchy or the next node that doesn't belong under the
|
||||
* memcg's subtree).
|
||||
*/
|
||||
typedef enum mem_cgroup_filter_t
|
||||
(*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root);
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
/*
|
||||
* All "charge" functions with gfp_mask should use GFP_KERNEL or
|
||||
|
@ -137,18 +120,9 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
|
|||
extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
|
||||
struct page *oldpage, struct page *newpage, bool migration_ok);
|
||||
|
||||
struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
|
||||
struct mem_cgroup *prev,
|
||||
struct mem_cgroup_reclaim_cookie *reclaim,
|
||||
mem_cgroup_iter_filter cond);
|
||||
|
||||
static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
|
||||
struct mem_cgroup *prev,
|
||||
struct mem_cgroup_reclaim_cookie *reclaim)
|
||||
{
|
||||
return mem_cgroup_iter_cond(root, prev, reclaim, NULL);
|
||||
}
|
||||
|
||||
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
|
||||
struct mem_cgroup *,
|
||||
struct mem_cgroup_reclaim_cookie *);
|
||||
void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
|
||||
|
||||
/*
|
||||
|
@ -260,9 +234,9 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
|
|||
mem_cgroup_update_page_stat(page, idx, -1);
|
||||
}
|
||||
|
||||
enum mem_cgroup_filter_t
|
||||
mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
|
||||
struct mem_cgroup *root);
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||
gfp_t gfp_mask,
|
||||
unsigned long *total_scanned);
|
||||
|
||||
void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
|
||||
static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
|
||||
|
@ -376,15 +350,6 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
|
|||
struct page *oldpage, struct page *newpage, bool migration_ok)
|
||||
{
|
||||
}
|
||||
static inline struct mem_cgroup *
|
||||
mem_cgroup_iter_cond(struct mem_cgroup *root,
|
||||
struct mem_cgroup *prev,
|
||||
struct mem_cgroup_reclaim_cookie *reclaim,
|
||||
mem_cgroup_iter_filter cond)
|
||||
{
|
||||
/* first call must return non-NULL, second return NULL */
|
||||
return (struct mem_cgroup *)(unsigned long)!prev;
|
||||
}
|
||||
|
||||
static inline struct mem_cgroup *
|
||||
mem_cgroup_iter(struct mem_cgroup *root,
|
||||
|
@ -471,11 +436,11 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
|
|||
}
|
||||
|
||||
static inline
|
||||
enum mem_cgroup_filter_t
|
||||
mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
|
||||
struct mem_cgroup *root)
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||
gfp_t gfp_mask,
|
||||
unsigned long *total_scanned)
|
||||
{
|
||||
return VISIT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void mem_cgroup_split_huge_fixup(struct page *head)
|
||||
|
|
|
@ -155,6 +155,12 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
|
|||
|
||||
static inline void kick_all_cpus_sync(void) { }
|
||||
|
||||
static inline void __smp_call_function_single(int cpuid,
|
||||
struct call_single_data *data, int wait)
|
||||
{
|
||||
on_each_cpu(data->func, data->info, wait);
|
||||
}
|
||||
|
||||
#endif /* !SMP */
|
||||
|
||||
/*
|
||||
|
|
|
@ -1117,9 +1117,10 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
|
|||
|
||||
sleep_time = timeout_start + audit_backlog_wait_time -
|
||||
jiffies;
|
||||
if ((long)sleep_time > 0)
|
||||
if ((long)sleep_time > 0) {
|
||||
wait_for_auditd(sleep_time);
|
||||
continue;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (audit_rate_check() && printk_ratelimit())
|
||||
printk(KERN_WARNING
|
||||
|
|
|
@ -32,7 +32,14 @@ EXPORT_SYMBOL(cad_pid);
|
|||
#endif
|
||||
enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE;
|
||||
|
||||
int reboot_default;
|
||||
/*
|
||||
* This variable is used privately to keep track of whether or not
|
||||
* reboot_type is still set to its default value (i.e., reboot= hasn't
|
||||
* been set on the command line). This is needed so that we can
|
||||
* suppress DMI scanning for reboot quirks. Without it, it's
|
||||
* impossible to override a faulty reboot quirk without recompiling.
|
||||
*/
|
||||
int reboot_default = 1;
|
||||
int reboot_cpu;
|
||||
enum reboot_type reboot_type = BOOT_ACPI;
|
||||
int reboot_force;
|
||||
|
|
|
@ -486,7 +486,52 @@ static struct smp_hotplug_thread watchdog_threads = {
|
|||
.unpark = watchdog_enable,
|
||||
};
|
||||
|
||||
static int watchdog_enable_all_cpus(void)
|
||||
static void restart_watchdog_hrtimer(void *info)
|
||||
{
|
||||
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* No need to cancel and restart hrtimer if it is currently executing
|
||||
* because it will reprogram itself with the new period now.
|
||||
* We should never see it unqueued here because we are running per-cpu
|
||||
* with interrupts disabled.
|
||||
*/
|
||||
ret = hrtimer_try_to_cancel(hrtimer);
|
||||
if (ret == 1)
|
||||
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
|
||||
HRTIMER_MODE_REL_PINNED);
|
||||
}
|
||||
|
||||
static void update_timers(int cpu)
|
||||
{
|
||||
struct call_single_data data = {.func = restart_watchdog_hrtimer};
|
||||
/*
|
||||
* Make sure that perf event counter will adopt to a new
|
||||
* sampling period. Updating the sampling period directly would
|
||||
* be much nicer but we do not have an API for that now so
|
||||
* let's use a big hammer.
|
||||
* Hrtimer will adopt the new period on the next tick but this
|
||||
* might be late already so we have to restart the timer as well.
|
||||
*/
|
||||
watchdog_nmi_disable(cpu);
|
||||
__smp_call_function_single(cpu, &data, 1);
|
||||
watchdog_nmi_enable(cpu);
|
||||
}
|
||||
|
||||
static void update_timers_all_cpus(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
get_online_cpus();
|
||||
preempt_disable();
|
||||
for_each_online_cpu(cpu)
|
||||
update_timers(cpu);
|
||||
preempt_enable();
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
static int watchdog_enable_all_cpus(bool sample_period_changed)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
|
@ -496,6 +541,8 @@ static int watchdog_enable_all_cpus(void)
|
|||
pr_err("Failed to create watchdog threads, disabled\n");
|
||||
else
|
||||
watchdog_running = 1;
|
||||
} else if (sample_period_changed) {
|
||||
update_timers_all_cpus();
|
||||
}
|
||||
|
||||
return err;
|
||||
|
@ -520,13 +567,15 @@ int proc_dowatchdog(struct ctl_table *table, int write,
|
|||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int err, old_thresh, old_enabled;
|
||||
static DEFINE_MUTEX(watchdog_proc_mutex);
|
||||
|
||||
mutex_lock(&watchdog_proc_mutex);
|
||||
old_thresh = ACCESS_ONCE(watchdog_thresh);
|
||||
old_enabled = ACCESS_ONCE(watchdog_user_enabled);
|
||||
|
||||
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
if (err || !write)
|
||||
return err;
|
||||
goto out;
|
||||
|
||||
set_sample_period();
|
||||
/*
|
||||
|
@ -535,7 +584,7 @@ int proc_dowatchdog(struct ctl_table *table, int write,
|
|||
* watchdog_*_all_cpus() function takes care of this.
|
||||
*/
|
||||
if (watchdog_user_enabled && watchdog_thresh)
|
||||
err = watchdog_enable_all_cpus();
|
||||
err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh);
|
||||
else
|
||||
watchdog_disable_all_cpus();
|
||||
|
||||
|
@ -544,7 +593,8 @@ int proc_dowatchdog(struct ctl_table *table, int write,
|
|||
watchdog_thresh = old_thresh;
|
||||
watchdog_user_enabled = old_enabled;
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_unlock(&watchdog_proc_mutex);
|
||||
return err;
|
||||
}
|
||||
#endif /* CONFIG_SYSCTL */
|
||||
|
@ -554,5 +604,5 @@ void __init lockup_detector_init(void)
|
|||
set_sample_period();
|
||||
|
||||
if (watchdog_user_enabled)
|
||||
watchdog_enable_all_cpus();
|
||||
watchdog_enable_all_cpus(false);
|
||||
}
|
||||
|
|
554
mm/memcontrol.c
554
mm/memcontrol.c
|
@ -39,6 +39,7 @@
|
|||
#include <linux/limits.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/swapops.h>
|
||||
|
@ -160,6 +161,10 @@ struct mem_cgroup_per_zone {
|
|||
|
||||
struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
|
||||
|
||||
struct rb_node tree_node; /* RB tree node */
|
||||
unsigned long long usage_in_excess;/* Set to the value by which */
|
||||
/* the soft limit is exceeded*/
|
||||
bool on_tree;
|
||||
struct mem_cgroup *memcg; /* Back pointer, we cannot */
|
||||
/* use container_of */
|
||||
};
|
||||
|
@ -168,6 +173,26 @@ struct mem_cgroup_per_node {
|
|||
struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
|
||||
};
|
||||
|
||||
/*
|
||||
* Cgroups above their limits are maintained in a RB-Tree, independent of
|
||||
* their hierarchy representation
|
||||
*/
|
||||
|
||||
struct mem_cgroup_tree_per_zone {
|
||||
struct rb_root rb_root;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
struct mem_cgroup_tree_per_node {
|
||||
struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
|
||||
};
|
||||
|
||||
struct mem_cgroup_tree {
|
||||
struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
|
||||
};
|
||||
|
||||
static struct mem_cgroup_tree soft_limit_tree __read_mostly;
|
||||
|
||||
struct mem_cgroup_threshold {
|
||||
struct eventfd_ctx *eventfd;
|
||||
u64 threshold;
|
||||
|
@ -303,22 +328,6 @@ struct mem_cgroup {
|
|||
atomic_t numainfo_events;
|
||||
atomic_t numainfo_updating;
|
||||
#endif
|
||||
/*
|
||||
* Protects soft_contributed transitions.
|
||||
* See mem_cgroup_update_soft_limit
|
||||
*/
|
||||
spinlock_t soft_lock;
|
||||
|
||||
/*
|
||||
* If true then this group has increased parents' children_in_excess
|
||||
* when it got over the soft limit.
|
||||
* When a group falls bellow the soft limit, parents' children_in_excess
|
||||
* is decreased and soft_contributed changed to false.
|
||||
*/
|
||||
bool soft_contributed;
|
||||
|
||||
/* Number of children that are in soft limit excess */
|
||||
atomic_t children_in_excess;
|
||||
|
||||
struct mem_cgroup_per_node *nodeinfo[0];
|
||||
/* WARNING: nodeinfo must be the last member here */
|
||||
|
@ -422,6 +431,7 @@ static bool move_file(void)
|
|||
* limit reclaim to prevent infinite loops, if they ever occur.
|
||||
*/
|
||||
#define MEM_CGROUP_MAX_RECLAIM_LOOPS 100
|
||||
#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2
|
||||
|
||||
enum charge_type {
|
||||
MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
|
||||
|
@ -648,6 +658,164 @@ page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
|
|||
return mem_cgroup_zoneinfo(memcg, nid, zid);
|
||||
}
|
||||
|
||||
static struct mem_cgroup_tree_per_zone *
|
||||
soft_limit_tree_node_zone(int nid, int zid)
|
||||
{
|
||||
return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
|
||||
}
|
||||
|
||||
static struct mem_cgroup_tree_per_zone *
|
||||
soft_limit_tree_from_page(struct page *page)
|
||||
{
|
||||
int nid = page_to_nid(page);
|
||||
int zid = page_zonenum(page);
|
||||
|
||||
return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
|
||||
}
|
||||
|
||||
static void
|
||||
__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
|
||||
struct mem_cgroup_per_zone *mz,
|
||||
struct mem_cgroup_tree_per_zone *mctz,
|
||||
unsigned long long new_usage_in_excess)
|
||||
{
|
||||
struct rb_node **p = &mctz->rb_root.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct mem_cgroup_per_zone *mz_node;
|
||||
|
||||
if (mz->on_tree)
|
||||
return;
|
||||
|
||||
mz->usage_in_excess = new_usage_in_excess;
|
||||
if (!mz->usage_in_excess)
|
||||
return;
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
|
||||
tree_node);
|
||||
if (mz->usage_in_excess < mz_node->usage_in_excess)
|
||||
p = &(*p)->rb_left;
|
||||
/*
|
||||
* We can't avoid mem cgroups that are over their soft
|
||||
* limit by the same amount
|
||||
*/
|
||||
else if (mz->usage_in_excess >= mz_node->usage_in_excess)
|
||||
p = &(*p)->rb_right;
|
||||
}
|
||||
rb_link_node(&mz->tree_node, parent, p);
|
||||
rb_insert_color(&mz->tree_node, &mctz->rb_root);
|
||||
mz->on_tree = true;
|
||||
}
|
||||
|
||||
static void
|
||||
__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
|
||||
struct mem_cgroup_per_zone *mz,
|
||||
struct mem_cgroup_tree_per_zone *mctz)
|
||||
{
|
||||
if (!mz->on_tree)
|
||||
return;
|
||||
rb_erase(&mz->tree_node, &mctz->rb_root);
|
||||
mz->on_tree = false;
|
||||
}
|
||||
|
||||
static void
|
||||
mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
|
||||
struct mem_cgroup_per_zone *mz,
|
||||
struct mem_cgroup_tree_per_zone *mctz)
|
||||
{
|
||||
spin_lock(&mctz->lock);
|
||||
__mem_cgroup_remove_exceeded(memcg, mz, mctz);
|
||||
spin_unlock(&mctz->lock);
|
||||
}
|
||||
|
||||
|
||||
static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
|
||||
{
|
||||
unsigned long long excess;
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
struct mem_cgroup_tree_per_zone *mctz;
|
||||
int nid = page_to_nid(page);
|
||||
int zid = page_zonenum(page);
|
||||
mctz = soft_limit_tree_from_page(page);
|
||||
|
||||
/*
|
||||
* Necessary to update all ancestors when hierarchy is used.
|
||||
* because their event counter is not touched.
|
||||
*/
|
||||
for (; memcg; memcg = parent_mem_cgroup(memcg)) {
|
||||
mz = mem_cgroup_zoneinfo(memcg, nid, zid);
|
||||
excess = res_counter_soft_limit_excess(&memcg->res);
|
||||
/*
|
||||
* We have to update the tree if mz is on RB-tree or
|
||||
* mem is over its softlimit.
|
||||
*/
|
||||
if (excess || mz->on_tree) {
|
||||
spin_lock(&mctz->lock);
|
||||
/* if on-tree, remove it */
|
||||
if (mz->on_tree)
|
||||
__mem_cgroup_remove_exceeded(memcg, mz, mctz);
|
||||
/*
|
||||
* Insert again. mz->usage_in_excess will be updated.
|
||||
* If excess is 0, no tree ops.
|
||||
*/
|
||||
__mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
|
||||
spin_unlock(&mctz->lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
|
||||
{
|
||||
int node, zone;
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
struct mem_cgroup_tree_per_zone *mctz;
|
||||
|
||||
for_each_node(node) {
|
||||
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
||||
mz = mem_cgroup_zoneinfo(memcg, node, zone);
|
||||
mctz = soft_limit_tree_node_zone(node, zone);
|
||||
mem_cgroup_remove_exceeded(memcg, mz, mctz);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static struct mem_cgroup_per_zone *
|
||||
__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
|
||||
{
|
||||
struct rb_node *rightmost = NULL;
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
|
||||
retry:
|
||||
mz = NULL;
|
||||
rightmost = rb_last(&mctz->rb_root);
|
||||
if (!rightmost)
|
||||
goto done; /* Nothing to reclaim from */
|
||||
|
||||
mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
|
||||
/*
|
||||
* Remove the node now but someone else can add it back,
|
||||
* we will to add it back at the end of reclaim to its correct
|
||||
* position in the tree.
|
||||
*/
|
||||
__mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
|
||||
if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
|
||||
!css_tryget(&mz->memcg->css))
|
||||
goto retry;
|
||||
done:
|
||||
return mz;
|
||||
}
|
||||
|
||||
static struct mem_cgroup_per_zone *
|
||||
mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
|
||||
{
|
||||
struct mem_cgroup_per_zone *mz;
|
||||
|
||||
spin_lock(&mctz->lock);
|
||||
mz = __mem_cgroup_largest_soft_limit_node(mctz);
|
||||
spin_unlock(&mctz->lock);
|
||||
return mz;
|
||||
}
|
||||
|
||||
/*
|
||||
* Implementation Note: reading percpu statistics for memcg.
|
||||
*
|
||||
|
@ -821,48 +989,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
|
|||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from rate-limited memcg_check_events when enough
|
||||
* MEM_CGROUP_TARGET_SOFTLIMIT events are accumulated and it makes sure
|
||||
* that all the parents up the hierarchy will be notified that this group
|
||||
* is in excess or that it is not in excess anymore. mmecg->soft_contributed
|
||||
* makes the transition a single action whenever the state flips from one to
|
||||
* the other.
|
||||
*/
|
||||
static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg)
|
||||
{
|
||||
unsigned long long excess = res_counter_soft_limit_excess(&memcg->res);
|
||||
struct mem_cgroup *parent = memcg;
|
||||
int delta = 0;
|
||||
|
||||
spin_lock(&memcg->soft_lock);
|
||||
if (excess) {
|
||||
if (!memcg->soft_contributed) {
|
||||
delta = 1;
|
||||
memcg->soft_contributed = true;
|
||||
}
|
||||
} else {
|
||||
if (memcg->soft_contributed) {
|
||||
delta = -1;
|
||||
memcg->soft_contributed = false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Necessary to update all ancestors when hierarchy is used
|
||||
* because their event counter is not touched.
|
||||
* We track children even outside the hierarchy for the root
|
||||
* cgroup because tree walk starting at root should visit
|
||||
* all cgroups and we want to prevent from pointless tree
|
||||
* walk if no children is below the limit.
|
||||
*/
|
||||
while (delta && (parent = parent_mem_cgroup(parent)))
|
||||
atomic_add(delta, &parent->children_in_excess);
|
||||
if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy)
|
||||
atomic_add(delta, &root_mem_cgroup->children_in_excess);
|
||||
spin_unlock(&memcg->soft_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check events in order.
|
||||
*
|
||||
|
@ -886,7 +1012,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
|
|||
|
||||
mem_cgroup_threshold(memcg);
|
||||
if (unlikely(do_softlimit))
|
||||
mem_cgroup_update_soft_limit(memcg);
|
||||
mem_cgroup_update_tree(memcg, page);
|
||||
#if MAX_NUMNODES > 1
|
||||
if (unlikely(do_numainfo))
|
||||
atomic_inc(&memcg->numainfo_events);
|
||||
|
@ -929,15 +1055,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
|
|||
return memcg;
|
||||
}
|
||||
|
||||
static enum mem_cgroup_filter_t
|
||||
mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
|
||||
mem_cgroup_iter_filter cond)
|
||||
{
|
||||
if (!cond)
|
||||
return VISIT;
|
||||
return cond(memcg, root);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns a next (in a pre-order walk) alive memcg (with elevated css
|
||||
* ref. count) or NULL if the whole root's subtree has been visited.
|
||||
|
@ -945,7 +1062,7 @@ mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
|
|||
* helper function to be used by mem_cgroup_iter
|
||||
*/
|
||||
static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
|
||||
struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond)
|
||||
struct mem_cgroup *last_visited)
|
||||
{
|
||||
struct cgroup_subsys_state *prev_css, *next_css;
|
||||
|
||||
|
@ -963,31 +1080,11 @@ skip_node:
|
|||
if (next_css) {
|
||||
struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
|
||||
|
||||
switch (mem_cgroup_filter(mem, root, cond)) {
|
||||
case SKIP:
|
||||
if (css_tryget(&mem->css))
|
||||
return mem;
|
||||
else {
|
||||
prev_css = next_css;
|
||||
goto skip_node;
|
||||
case SKIP_TREE:
|
||||
if (mem == root)
|
||||
return NULL;
|
||||
/*
|
||||
* css_rightmost_descendant is not an optimal way to
|
||||
* skip through a subtree (especially for imbalanced
|
||||
* trees leaning to right) but that's what we have right
|
||||
* now. More effective solution would be traversing
|
||||
* right-up for first non-NULL without calling
|
||||
* css_next_descendant_pre afterwards.
|
||||
*/
|
||||
prev_css = css_rightmost_descendant(next_css);
|
||||
goto skip_node;
|
||||
case VISIT:
|
||||
if (css_tryget(&mem->css))
|
||||
return mem;
|
||||
else {
|
||||
prev_css = next_css;
|
||||
goto skip_node;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1051,7 +1148,6 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
|
|||
* @root: hierarchy root
|
||||
* @prev: previously returned memcg, NULL on first invocation
|
||||
* @reclaim: cookie for shared reclaim walks, NULL for full walks
|
||||
* @cond: filter for visited nodes, NULL for no filter
|
||||
*
|
||||
* Returns references to children of the hierarchy below @root, or
|
||||
* @root itself, or %NULL after a full round-trip.
|
||||
|
@ -1064,18 +1160,15 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
|
|||
* divide up the memcgs in the hierarchy among all concurrent
|
||||
* reclaimers operating on the same zone and priority.
|
||||
*/
|
||||
struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
|
||||
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
|
||||
struct mem_cgroup *prev,
|
||||
struct mem_cgroup_reclaim_cookie *reclaim,
|
||||
mem_cgroup_iter_filter cond)
|
||||
struct mem_cgroup_reclaim_cookie *reclaim)
|
||||
{
|
||||
struct mem_cgroup *memcg = NULL;
|
||||
struct mem_cgroup *last_visited = NULL;
|
||||
|
||||
if (mem_cgroup_disabled()) {
|
||||
/* first call must return non-NULL, second return NULL */
|
||||
return (struct mem_cgroup *)(unsigned long)!prev;
|
||||
}
|
||||
if (mem_cgroup_disabled())
|
||||
return NULL;
|
||||
|
||||
if (!root)
|
||||
root = root_mem_cgroup;
|
||||
|
@ -1086,9 +1179,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
|
|||
if (!root->use_hierarchy && root != root_mem_cgroup) {
|
||||
if (prev)
|
||||
goto out_css_put;
|
||||
if (mem_cgroup_filter(root, root, cond) == VISIT)
|
||||
return root;
|
||||
return NULL;
|
||||
return root;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
|
@ -1111,7 +1202,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
|
|||
last_visited = mem_cgroup_iter_load(iter, root, &seq);
|
||||
}
|
||||
|
||||
memcg = __mem_cgroup_iter_next(root, last_visited, cond);
|
||||
memcg = __mem_cgroup_iter_next(root, last_visited);
|
||||
|
||||
if (reclaim) {
|
||||
mem_cgroup_iter_update(iter, last_visited, memcg, seq);
|
||||
|
@ -1122,11 +1213,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
|
|||
reclaim->generation = iter->generation;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have finished the whole tree walk or no group has been
|
||||
* visited because filter told us to skip the root node.
|
||||
*/
|
||||
if (!memcg && (prev || (cond && !last_visited)))
|
||||
if (prev && !memcg)
|
||||
goto out_unlock;
|
||||
}
|
||||
out_unlock:
|
||||
|
@ -1767,7 +1854,6 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
|
|||
return total;
|
||||
}
|
||||
|
||||
#if MAX_NUMNODES > 1
|
||||
/**
|
||||
* test_mem_cgroup_node_reclaimable
|
||||
* @memcg: the target memcg
|
||||
|
@ -1790,6 +1876,7 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
|
|||
return false;
|
||||
|
||||
}
|
||||
#if MAX_NUMNODES > 1
|
||||
|
||||
/*
|
||||
* Always updating the nodemask is not very good - even if we have an empty
|
||||
|
@ -1857,50 +1944,104 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
|
|||
return node;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check all nodes whether it contains reclaimable pages or not.
|
||||
* For quick scan, we make use of scan_nodes. This will allow us to skip
|
||||
* unused nodes. But scan_nodes is lazily updated and may not cotain
|
||||
* enough new information. We need to do double check.
|
||||
*/
|
||||
static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
|
||||
{
|
||||
int nid;
|
||||
|
||||
/*
|
||||
* quick check...making use of scan_node.
|
||||
* We can skip unused nodes.
|
||||
*/
|
||||
if (!nodes_empty(memcg->scan_nodes)) {
|
||||
for (nid = first_node(memcg->scan_nodes);
|
||||
nid < MAX_NUMNODES;
|
||||
nid = next_node(nid, memcg->scan_nodes)) {
|
||||
|
||||
if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Check rest of nodes.
|
||||
*/
|
||||
for_each_node_state(nid, N_MEMORY) {
|
||||
if (node_isset(nid, memcg->scan_nodes))
|
||||
continue;
|
||||
if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#else
|
||||
int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
|
||||
{
|
||||
return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* A group is eligible for the soft limit reclaim under the given root
|
||||
* hierarchy if
|
||||
* a) it is over its soft limit
|
||||
* b) any parent up the hierarchy is over its soft limit
|
||||
*
|
||||
* If the given group doesn't have any children over the limit then it
|
||||
* doesn't make any sense to iterate its subtree.
|
||||
*/
|
||||
enum mem_cgroup_filter_t
|
||||
mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
|
||||
struct mem_cgroup *root)
|
||||
static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
|
||||
struct zone *zone,
|
||||
gfp_t gfp_mask,
|
||||
unsigned long *total_scanned)
|
||||
{
|
||||
struct mem_cgroup *parent;
|
||||
struct mem_cgroup *victim = NULL;
|
||||
int total = 0;
|
||||
int loop = 0;
|
||||
unsigned long excess;
|
||||
unsigned long nr_scanned;
|
||||
struct mem_cgroup_reclaim_cookie reclaim = {
|
||||
.zone = zone,
|
||||
.priority = 0,
|
||||
};
|
||||
|
||||
if (!memcg)
|
||||
memcg = root_mem_cgroup;
|
||||
parent = memcg;
|
||||
excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
|
||||
|
||||
if (res_counter_soft_limit_excess(&memcg->res))
|
||||
return VISIT;
|
||||
|
||||
/*
|
||||
* If any parent up to the root in the hierarchy is over its soft limit
|
||||
* then we have to obey and reclaim from this group as well.
|
||||
*/
|
||||
while ((parent = parent_mem_cgroup(parent))) {
|
||||
if (res_counter_soft_limit_excess(&parent->res))
|
||||
return VISIT;
|
||||
if (parent == root)
|
||||
while (1) {
|
||||
victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
|
||||
if (!victim) {
|
||||
loop++;
|
||||
if (loop >= 2) {
|
||||
/*
|
||||
* If we have not been able to reclaim
|
||||
* anything, it might because there are
|
||||
* no reclaimable pages under this hierarchy
|
||||
*/
|
||||
if (!total)
|
||||
break;
|
||||
/*
|
||||
* We want to do more targeted reclaim.
|
||||
* excess >> 2 is not to excessive so as to
|
||||
* reclaim too much, nor too less that we keep
|
||||
* coming back to reclaim from this cgroup
|
||||
*/
|
||||
if (total >= (excess >> 2) ||
|
||||
(loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!mem_cgroup_reclaimable(victim, false))
|
||||
continue;
|
||||
total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
|
||||
zone, &nr_scanned);
|
||||
*total_scanned += nr_scanned;
|
||||
if (!res_counter_soft_limit_excess(&root_memcg->res))
|
||||
break;
|
||||
}
|
||||
|
||||
if (!atomic_read(&memcg->children_in_excess))
|
||||
return SKIP_TREE;
|
||||
return SKIP;
|
||||
mem_cgroup_iter_break(root_memcg, victim);
|
||||
return total;
|
||||
}
|
||||
|
||||
static DEFINE_SPINLOCK(memcg_oom_lock);
|
||||
|
@ -2812,7 +2953,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
|
|||
unlock_page_cgroup(pc);
|
||||
|
||||
/*
|
||||
* "charge_statistics" updated event counter.
|
||||
* "charge_statistics" updated event counter. Then, check it.
|
||||
* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
|
||||
* if they exceeds softlimit.
|
||||
*/
|
||||
memcg_check_events(memcg, page);
|
||||
}
|
||||
|
@ -4647,6 +4790,98 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
|
|||
return ret;
|
||||
}
|
||||
|
||||
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||
gfp_t gfp_mask,
|
||||
unsigned long *total_scanned)
|
||||
{
|
||||
unsigned long nr_reclaimed = 0;
|
||||
struct mem_cgroup_per_zone *mz, *next_mz = NULL;
|
||||
unsigned long reclaimed;
|
||||
int loop = 0;
|
||||
struct mem_cgroup_tree_per_zone *mctz;
|
||||
unsigned long long excess;
|
||||
unsigned long nr_scanned;
|
||||
|
||||
if (order > 0)
|
||||
return 0;
|
||||
|
||||
mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
|
||||
/*
|
||||
* This loop can run a while, specially if mem_cgroup's continuously
|
||||
* keep exceeding their soft limit and putting the system under
|
||||
* pressure
|
||||
*/
|
||||
do {
|
||||
if (next_mz)
|
||||
mz = next_mz;
|
||||
else
|
||||
mz = mem_cgroup_largest_soft_limit_node(mctz);
|
||||
if (!mz)
|
||||
break;
|
||||
|
||||
nr_scanned = 0;
|
||||
reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
|
||||
gfp_mask, &nr_scanned);
|
||||
nr_reclaimed += reclaimed;
|
||||
*total_scanned += nr_scanned;
|
||||
spin_lock(&mctz->lock);
|
||||
|
||||
/*
|
||||
* If we failed to reclaim anything from this memory cgroup
|
||||
* it is time to move on to the next cgroup
|
||||
*/
|
||||
next_mz = NULL;
|
||||
if (!reclaimed) {
|
||||
do {
|
||||
/*
|
||||
* Loop until we find yet another one.
|
||||
*
|
||||
* By the time we get the soft_limit lock
|
||||
* again, someone might have aded the
|
||||
* group back on the RB tree. Iterate to
|
||||
* make sure we get a different mem.
|
||||
* mem_cgroup_largest_soft_limit_node returns
|
||||
* NULL if no other cgroup is present on
|
||||
* the tree
|
||||
*/
|
||||
next_mz =
|
||||
__mem_cgroup_largest_soft_limit_node(mctz);
|
||||
if (next_mz == mz)
|
||||
css_put(&next_mz->memcg->css);
|
||||
else /* next_mz == NULL or other memcg */
|
||||
break;
|
||||
} while (1);
|
||||
}
|
||||
__mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
|
||||
excess = res_counter_soft_limit_excess(&mz->memcg->res);
|
||||
/*
|
||||
* One school of thought says that we should not add
|
||||
* back the node to the tree if reclaim returns 0.
|
||||
* But our reclaim could return 0, simply because due
|
||||
* to priority we are exposing a smaller subset of
|
||||
* memory to reclaim from. Consider this as a longer
|
||||
* term TODO.
|
||||
*/
|
||||
/* If excess == 0, no tree ops */
|
||||
__mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
|
||||
spin_unlock(&mctz->lock);
|
||||
css_put(&mz->memcg->css);
|
||||
loop++;
|
||||
/*
|
||||
* Could not reclaim anything and there are no more
|
||||
* mem cgroups to try or we seem to be looping without
|
||||
* reclaiming anything.
|
||||
*/
|
||||
if (!nr_reclaimed &&
|
||||
(next_mz == NULL ||
|
||||
loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
|
||||
break;
|
||||
} while (!nr_reclaimed);
|
||||
if (next_mz)
|
||||
css_put(&next_mz->memcg->css);
|
||||
return nr_reclaimed;
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_force_empty_list - clears LRU of a group
|
||||
* @memcg: group to clear
|
||||
|
@ -5911,6 +6146,8 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
|
|||
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
||||
mz = &pn->zoneinfo[zone];
|
||||
lruvec_init(&mz->lruvec);
|
||||
mz->usage_in_excess = 0;
|
||||
mz->on_tree = false;
|
||||
mz->memcg = memcg;
|
||||
}
|
||||
memcg->nodeinfo[node] = pn;
|
||||
|
@ -5966,6 +6203,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
|
|||
int node;
|
||||
size_t size = memcg_size();
|
||||
|
||||
mem_cgroup_remove_from_trees(memcg);
|
||||
free_css_id(&mem_cgroup_subsys, &memcg->css);
|
||||
|
||||
for_each_node(node)
|
||||
|
@ -6002,6 +6240,29 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
|
|||
}
|
||||
EXPORT_SYMBOL(parent_mem_cgroup);
|
||||
|
||||
static void __init mem_cgroup_soft_limit_tree_init(void)
|
||||
{
|
||||
struct mem_cgroup_tree_per_node *rtpn;
|
||||
struct mem_cgroup_tree_per_zone *rtpz;
|
||||
int tmp, node, zone;
|
||||
|
||||
for_each_node(node) {
|
||||
tmp = node;
|
||||
if (!node_state(node, N_NORMAL_MEMORY))
|
||||
tmp = -1;
|
||||
rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
|
||||
BUG_ON(!rtpn);
|
||||
|
||||
soft_limit_tree.rb_tree_per_node[node] = rtpn;
|
||||
|
||||
for (zone = 0; zone < MAX_NR_ZONES; zone++) {
|
||||
rtpz = &rtpn->rb_tree_per_zone[zone];
|
||||
rtpz->rb_root = RB_ROOT;
|
||||
spin_lock_init(&rtpz->lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static struct cgroup_subsys_state * __ref
|
||||
mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
{
|
||||
|
@ -6031,7 +6292,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
|||
mutex_init(&memcg->thresholds_lock);
|
||||
spin_lock_init(&memcg->move_lock);
|
||||
vmpressure_init(&memcg->vmpressure);
|
||||
spin_lock_init(&memcg->soft_lock);
|
||||
|
||||
return &memcg->css;
|
||||
|
||||
|
@ -6109,13 +6369,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
|
|||
|
||||
mem_cgroup_invalidate_reclaim_iterators(memcg);
|
||||
mem_cgroup_reparent_charges(memcg);
|
||||
if (memcg->soft_contributed) {
|
||||
while ((memcg = parent_mem_cgroup(memcg)))
|
||||
atomic_dec(&memcg->children_in_excess);
|
||||
|
||||
if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy)
|
||||
atomic_dec(&root_mem_cgroup->children_in_excess);
|
||||
}
|
||||
mem_cgroup_destroy_all_caches(memcg);
|
||||
vmpressure_cleanup(&memcg->vmpressure);
|
||||
}
|
||||
|
@ -6790,6 +7043,7 @@ static int __init mem_cgroup_init(void)
|
|||
{
|
||||
hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
|
||||
enable_swap_cgroup();
|
||||
mem_cgroup_soft_limit_tree_init();
|
||||
memcg_stock_init();
|
||||
return 0;
|
||||
}
|
||||
|
|
83
mm/vmscan.c
83
mm/vmscan.c
|
@ -139,23 +139,11 @@ static bool global_reclaim(struct scan_control *sc)
|
|||
{
|
||||
return !sc->target_mem_cgroup;
|
||||
}
|
||||
|
||||
static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
|
||||
{
|
||||
struct mem_cgroup *root = sc->target_mem_cgroup;
|
||||
return !mem_cgroup_disabled() &&
|
||||
mem_cgroup_soft_reclaim_eligible(root, root) != SKIP_TREE;
|
||||
}
|
||||
#else
|
||||
static bool global_reclaim(struct scan_control *sc)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
unsigned long zone_reclaimable_pages(struct zone *zone)
|
||||
|
@ -2176,11 +2164,9 @@ static inline bool should_continue_reclaim(struct zone *zone,
|
|||
}
|
||||
}
|
||||
|
||||
static int
|
||||
__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
|
||||
static void shrink_zone(struct zone *zone, struct scan_control *sc)
|
||||
{
|
||||
unsigned long nr_reclaimed, nr_scanned;
|
||||
int groups_scanned = 0;
|
||||
|
||||
do {
|
||||
struct mem_cgroup *root = sc->target_mem_cgroup;
|
||||
|
@ -2188,17 +2174,15 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
|
|||
.zone = zone,
|
||||
.priority = sc->priority,
|
||||
};
|
||||
struct mem_cgroup *memcg = NULL;
|
||||
mem_cgroup_iter_filter filter = (soft_reclaim) ?
|
||||
mem_cgroup_soft_reclaim_eligible : NULL;
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
nr_reclaimed = sc->nr_reclaimed;
|
||||
nr_scanned = sc->nr_scanned;
|
||||
|
||||
while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) {
|
||||
memcg = mem_cgroup_iter(root, NULL, &reclaim);
|
||||
do {
|
||||
struct lruvec *lruvec;
|
||||
|
||||
groups_scanned++;
|
||||
lruvec = mem_cgroup_zone_lruvec(zone, memcg);
|
||||
|
||||
shrink_lruvec(lruvec, sc);
|
||||
|
@ -2218,7 +2202,8 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
|
|||
mem_cgroup_iter_break(root, memcg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
memcg = mem_cgroup_iter(root, memcg, &reclaim);
|
||||
} while (memcg);
|
||||
|
||||
vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
|
||||
sc->nr_scanned - nr_scanned,
|
||||
|
@ -2226,37 +2211,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
|
|||
|
||||
} while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
|
||||
sc->nr_scanned - nr_scanned, sc));
|
||||
|
||||
return groups_scanned;
|
||||
}
|
||||
|
||||
|
||||
static void shrink_zone(struct zone *zone, struct scan_control *sc)
|
||||
{
|
||||
bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc);
|
||||
unsigned long nr_scanned = sc->nr_scanned;
|
||||
int scanned_groups;
|
||||
|
||||
scanned_groups = __shrink_zone(zone, sc, do_soft_reclaim);
|
||||
/*
|
||||
* memcg iterator might race with other reclaimer or start from
|
||||
* a incomplete tree walk so the tree walk in __shrink_zone
|
||||
* might have missed groups that are above the soft limit. Try
|
||||
* another loop to catch up with others. Do it just once to
|
||||
* prevent from reclaim latencies when other reclaimers always
|
||||
* preempt this one.
|
||||
*/
|
||||
if (do_soft_reclaim && !scanned_groups)
|
||||
__shrink_zone(zone, sc, do_soft_reclaim);
|
||||
|
||||
/*
|
||||
* No group is over the soft limit or those that are do not have
|
||||
* pages in the zone we are reclaiming so we have to reclaim everybody
|
||||
*/
|
||||
if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) {
|
||||
__shrink_zone(zone, sc, false);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns true if compaction should go ahead for a high-order request */
|
||||
|
@ -2320,6 +2274,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
|
|||
{
|
||||
struct zoneref *z;
|
||||
struct zone *zone;
|
||||
unsigned long nr_soft_reclaimed;
|
||||
unsigned long nr_soft_scanned;
|
||||
bool aborted_reclaim = false;
|
||||
|
||||
/*
|
||||
|
@ -2359,6 +2315,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
|
|||
continue;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* This steals pages from memory cgroups over softlimit
|
||||
* and returns the number of reclaimed pages and
|
||||
* scanned pages. This works for global memory pressure
|
||||
* and balancing, not for a memcg's limit.
|
||||
*/
|
||||
nr_soft_scanned = 0;
|
||||
nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
|
||||
sc->order, sc->gfp_mask,
|
||||
&nr_soft_scanned);
|
||||
sc->nr_reclaimed += nr_soft_reclaimed;
|
||||
sc->nr_scanned += nr_soft_scanned;
|
||||
/* need some check for avoid more shrink_zone() */
|
||||
}
|
||||
|
||||
|
@ -2952,6 +2920,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|||
{
|
||||
int i;
|
||||
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
|
||||
unsigned long nr_soft_reclaimed;
|
||||
unsigned long nr_soft_scanned;
|
||||
struct scan_control sc = {
|
||||
.gfp_mask = GFP_KERNEL,
|
||||
.priority = DEF_PRIORITY,
|
||||
|
@ -3066,6 +3036,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
|
|||
|
||||
sc.nr_scanned = 0;
|
||||
|
||||
nr_soft_scanned = 0;
|
||||
/*
|
||||
* Call soft limit reclaim before calling shrink_zone.
|
||||
*/
|
||||
nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
|
||||
order, sc.gfp_mask,
|
||||
&nr_soft_scanned);
|
||||
sc.nr_reclaimed += nr_soft_reclaimed;
|
||||
|
||||
/*
|
||||
* There should be no need to raise the scanning
|
||||
* priority if enough pages are already being scanned
|
||||
|
|
|
@ -3975,8 +3975,8 @@ sub string_find_replace {
|
|||
# check for new externs in .h files.
|
||||
if ($realfile =~ /\.h$/ &&
|
||||
$line =~ /^\+\s*(extern\s+)$Type\s*$Ident\s*\(/s) {
|
||||
if (WARN("AVOID_EXTERNS",
|
||||
"extern prototypes should be avoided in .h files\n" . $herecurr) &&
|
||||
if (CHK("AVOID_EXTERNS",
|
||||
"extern prototypes should be avoided in .h files\n" . $herecurr) &&
|
||||
$fix) {
|
||||
$fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче