diff --git a/include/linux/sched.h b/include/linux/sched.h index 50d04b92ceda..4f163a8ffabf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1034,6 +1034,9 @@ struct task_struct { #ifdef CONFIG_SMP struct llist_node wake_entry; int on_cpu; + struct task_struct *last_wakee; + unsigned long wakee_flips; + unsigned long wakee_flip_decay_ts; #endif int on_rq; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 765d87acdf05..860063a8c849 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3017,6 +3017,23 @@ static unsigned long cpu_avg_load_per_task(int cpu) return 0; } +static void record_wakee(struct task_struct *p) +{ + /* + * Rough decay (wiping) for cost saving, don't worry + * about the boundary, really active task won't care + * about the loss. + */ + if (jiffies > current->wakee_flip_decay_ts + HZ) { + current->wakee_flips = 0; + current->wakee_flip_decay_ts = jiffies; + } + + if (current->last_wakee != p) { + current->last_wakee = p; + current->wakee_flips++; + } +} static void task_waking_fair(struct task_struct *p) { @@ -3037,6 +3054,7 @@ static void task_waking_fair(struct task_struct *p) #endif se->vruntime -= min_vruntime; + record_wakee(p); } #ifdef CONFIG_FAIR_GROUP_SCHED @@ -3155,6 +3173,28 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, #endif +static int wake_wide(struct task_struct *p) +{ + int factor = nr_cpus_node(cpu_to_node(smp_processor_id())); + + /* + * Yeah, it's the switching-frequency, could means many wakee or + * rapidly switch, use factor here will just help to automatically + * adjust the loose-degree, so bigger node will lead to more pull. + */ + if (p->wakee_flips > factor) { + /* + * wakee is somewhat hot, it needs certain amount of cpu + * resource, so if waker is far more hot, prefer to leave + * it alone. + */ + if (current->wakee_flips > (factor * p->wakee_flips)) + return 1; + } + + return 0; +} + static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) { s64 this_load, load; @@ -3164,6 +3204,13 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) unsigned long weight; int balanced; + /* + * If we wake multiple tasks be careful to not bounce + * ourselves around too much. + */ + if (wake_wide(p)) + return 0; + idx = sd->wake_idx; this_cpu = smp_processor_id(); prev_cpu = task_cpu(p);