vmstat: create separate function to fold per cpu diffs into local counters
The main idea behind this patchset is to reduce the vmstat update overhead by avoiding interrupt enable/disable and the use of per cpu atomics. This patch (of 3): It is better to have a separate folding function because refresh_cpu_vm_stats() also does other things like expire pages in the page allocator caches. If we have a separate function then refresh_cpu_vm_stats() is only called from the local cpu which allows additional optimizations. The folding function is only called when a cpu is being downed and therefore no other processor will be accessing the counters. Also simplifies synchronization. [akpm@linux-foundation.org: fix UP build] Signed-off-by: Christoph Lameter <cl@linux.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> CC: Tejun Heo <tj@kernel.org> Cc: Joonsoo Kim <js1304@gmail.com> Cc: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Родитель
d2cf5ad631
Коммит
2bb921e526
|
@ -198,7 +198,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item);
|
||||||
extern void dec_zone_state(struct zone *, enum zone_stat_item);
|
extern void dec_zone_state(struct zone *, enum zone_stat_item);
|
||||||
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
|
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
|
||||||
|
|
||||||
void refresh_cpu_vm_stats(int);
|
void cpu_vm_stats_fold(int cpu);
|
||||||
void refresh_zone_stat_thresholds(void);
|
void refresh_zone_stat_thresholds(void);
|
||||||
|
|
||||||
void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
|
void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
|
||||||
|
@ -255,6 +255,7 @@ static inline void __dec_zone_page_state(struct page *page,
|
||||||
|
|
||||||
static inline void refresh_cpu_vm_stats(int cpu) { }
|
static inline void refresh_cpu_vm_stats(int cpu) { }
|
||||||
static inline void refresh_zone_stat_thresholds(void) { }
|
static inline void refresh_zone_stat_thresholds(void) { }
|
||||||
|
static inline void cpu_vm_stats_fold(int cpu) { }
|
||||||
|
|
||||||
static inline void drain_zonestat(struct zone *zone,
|
static inline void drain_zonestat(struct zone *zone,
|
||||||
struct per_cpu_pageset *pset) { }
|
struct per_cpu_pageset *pset) { }
|
||||||
|
|
|
@ -5435,7 +5435,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
|
||||||
* This is only okay since the processor is dead and cannot
|
* This is only okay since the processor is dead and cannot
|
||||||
* race with what we are doing.
|
* race with what we are doing.
|
||||||
*/
|
*/
|
||||||
refresh_cpu_vm_stats(cpu);
|
cpu_vm_stats_fold(cpu);
|
||||||
}
|
}
|
||||||
return NOTIFY_OK;
|
return NOTIFY_OK;
|
||||||
}
|
}
|
||||||
|
|
40
mm/vmstat.c
40
mm/vmstat.c
|
@ -415,11 +415,7 @@ EXPORT_SYMBOL(dec_zone_page_state);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Update the zone counters for one cpu.
|
* Update the zone counters for the current cpu.
|
||||||
*
|
|
||||||
* The cpu specified must be either the current cpu or a processor that
|
|
||||||
* is not online. If it is the current cpu then the execution thread must
|
|
||||||
* be pinned to the current cpu.
|
|
||||||
*
|
*
|
||||||
* Note that refresh_cpu_vm_stats strives to only access
|
* Note that refresh_cpu_vm_stats strives to only access
|
||||||
* node local memory. The per cpu pagesets on remote zones are placed
|
* node local memory. The per cpu pagesets on remote zones are placed
|
||||||
|
@ -432,7 +428,7 @@ EXPORT_SYMBOL(dec_zone_page_state);
|
||||||
* with the global counters. These could cause remote node cache line
|
* with the global counters. These could cause remote node cache line
|
||||||
* bouncing and will have to be only done when necessary.
|
* bouncing and will have to be only done when necessary.
|
||||||
*/
|
*/
|
||||||
void refresh_cpu_vm_stats(int cpu)
|
static void refresh_cpu_vm_stats(int cpu)
|
||||||
{
|
{
|
||||||
struct zone *zone;
|
struct zone *zone;
|
||||||
int i;
|
int i;
|
||||||
|
@ -493,6 +489,38 @@ void refresh_cpu_vm_stats(int cpu)
|
||||||
atomic_long_add(global_diff[i], &vm_stat[i]);
|
atomic_long_add(global_diff[i], &vm_stat[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fold the data for an offline cpu into the global array.
|
||||||
|
* There cannot be any access by the offline cpu and therefore
|
||||||
|
* synchronization is simplified.
|
||||||
|
*/
|
||||||
|
void cpu_vm_stats_fold(int cpu)
|
||||||
|
{
|
||||||
|
struct zone *zone;
|
||||||
|
int i;
|
||||||
|
int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
|
||||||
|
|
||||||
|
for_each_populated_zone(zone) {
|
||||||
|
struct per_cpu_pageset *p;
|
||||||
|
|
||||||
|
p = per_cpu_ptr(zone->pageset, cpu);
|
||||||
|
|
||||||
|
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
|
||||||
|
if (p->vm_stat_diff[i]) {
|
||||||
|
int v;
|
||||||
|
|
||||||
|
v = p->vm_stat_diff[i];
|
||||||
|
p->vm_stat_diff[i] = 0;
|
||||||
|
atomic_long_add(v, &zone->vm_stat[i]);
|
||||||
|
global_diff[i] += v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
|
||||||
|
if (global_diff[i])
|
||||||
|
atomic_long_add(global_diff[i], &vm_stat[i]);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this is only called if !populated_zone(zone), which implies no other users of
|
* this is only called if !populated_zone(zone), which implies no other users of
|
||||||
* pset->vm_stat_diff[] exsist.
|
* pset->vm_stat_diff[] exsist.
|
||||||
|
|
Загрузка…
Ссылка в новой задаче