From 5e8869bb699d50be5c0733edfc71cfcd5b43e10a Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Mon, 26 Nov 2007 21:21:49 +0100
Subject: [PATCH 1/6] sched: don't forget to unlock uids_mutex on error paths

The commit

 commit 5cb350baf580017da38199625b7365b1763d7180
 Author: Dhaval Giani <dhaval@linux.vnet.ibm.com>
 Date:   Mon Oct 15 17:00:14 2007 +0200

    sched: group scheduling, sysfs tunables

introduced the uids_mutex and the helpers to lock/unlock it.
Unfortunately, the error paths of alloc_uid() were not patched
to unlock it.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Acked-by: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/user.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/user.c b/kernel/user.c
index 0f3aa0234107..8320a87f3e5a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -337,8 +337,11 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
 		struct user_struct *new;
 
 		new = kmem_cache_alloc(uid_cachep, GFP_KERNEL);
-		if (!new)
+		if (!new) {
+			uids_mutex_unlock();
 			return NULL;
+		}
+
 		new->uid = uid;
 		atomic_set(&new->__count, 1);
 		atomic_set(&new->processes, 0);
@@ -355,6 +358,7 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
 
 		if (alloc_uid_keyring(new, current) < 0) {
 			kmem_cache_free(uid_cachep, new);
+			uids_mutex_unlock();
 			return NULL;
 		}
 
@@ -362,6 +366,7 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
 			key_put(new->uid_keyring);
 			key_put(new->session_keyring);
 			kmem_cache_free(uid_cachep, new);
+			uids_mutex_unlock();
 			return NULL;
 		}
 

From 08e4570a4a393bcc241f78dfc444cb0b07995fc0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 26 Nov 2007 21:21:49 +0100
Subject: [PATCH 2/6] sched: fix prev_stime calculation

Srivatsa Vaddagiri noticed occasionally incorrect CPU usage
values in top and tracked it down to stime going below 0 in
task_stime(). Negative values are possible there due to the
sampled nature of stime/utime.

Fix suggested by Balbir Singh.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Reviewed-by: Balbir Singh <balbir@linux.vnet.ibm.com>
---
 fs/proc/array.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/proc/array.c b/fs/proc/array.c
index eba339ecba27..65c62e1bfd6f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -374,7 +374,9 @@ static cputime_t task_stime(struct task_struct *p)
 	stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
 			cputime_to_clock_t(task_utime(p));
 
-	p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
+	if (stime >= 0)
+		p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
+
 	return p->prev_stime;
 }
 #endif

From bcbe4a076609e15ea84cbebd9cd8f317ed70ce92 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 26 Nov 2007 21:21:49 +0100
Subject: [PATCH 3/6] sched: fix kernel/acct.c comment

fix kernel/acct.c comment.

noticed by Lin Tan. Comment suggested by Olaf Kirch.

also see:

  http://bugzilla.kernel.org/show_bug.cgi?id=8220

Reported-by: tammy000@gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/acct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/acct.c b/kernel/acct.c
index fce53d8df8a7..cf19547cc9e4 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -413,7 +413,7 @@ static u32 encode_float(u64 value)
  *  The acct_process() call is the workhorse of the process
  *  accounting system. The struct acct is built here and then written
  *  into the accounting file. This function should only be called from
- *  do_exit().
+ *  do_exit() or when switching to a different output file.
  */
 
 /*

From 58e1010da3c15e7bdf426b0a3d4b13dba1b7d055 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 26 Nov 2007 21:21:49 +0100
Subject: [PATCH 4/6] sched: fix RLIMIT_CPU comment

Devan Lippman noticed that the RLIMIT_CPU comment in resource.h is
incorrect: the field is in seconds, not msecs. We used msecs in
earlier versions of the patch but that got changed.

Found-by: Devan Lippman <devan.lippman@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-generic/resource.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h
index cfe3692b23e5..a4a22cc35898 100644
--- a/include/asm-generic/resource.h
+++ b/include/asm-generic/resource.h
@@ -12,7 +12,7 @@
  *   then it defines them prior including asm-generic/resource.h. )
  */
 
-#define RLIMIT_CPU		0	/* CPU time in ms */
+#define RLIMIT_CPU		0	/* CPU time in sec */
 #define RLIMIT_FSIZE		1	/* Maximum filesize */
 #define RLIMIT_DATA		2	/* max data size */
 #define RLIMIT_STACK		3	/* max stack size */

From 722aab0c3bbd7648d66790515c14d95d10a15bf3 Mon Sep 17 00:00:00 2001
From: Zou Nan hai <nanhai.zou@intel.com>
Date: Mon, 26 Nov 2007 21:21:49 +0100
Subject: [PATCH 5/6] sched: fix minimum granularity tunings

increase the default minimum granularity some more - this gives us
more performance in aim7 benchmarks.

also correct some comments: we scale with ilog(ncpus) + 1.

Signed-off-by: Zou Nan hai <nanhai.zou@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_fair.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ee00da284b12..2f16e15c022c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -22,7 +22,7 @@
 
 /*
  * Targeted preemption latency for CPU-bound tasks:
- * (default: 20ms * ilog(ncpus), units: nanoseconds)
+ * (default: 20ms * (1 + ilog(ncpus)), units: nanoseconds)
  *
  * NOTE: this latency value is not the same as the concept of
  * 'timeslice length' - timeslices in CFS are of variable length
@@ -36,14 +36,14 @@ unsigned int sysctl_sched_latency = 20000000ULL;
 
 /*
  * Minimal preemption granularity for CPU-bound tasks:
- * (default: 1 msec * ilog(ncpus), units: nanoseconds)
+ * (default: 4 msec * (1 + ilog(ncpus)), units: nanoseconds)
  */
-unsigned int sysctl_sched_min_granularity = 1000000ULL;
+unsigned int sysctl_sched_min_granularity = 4000000ULL;
 
 /*
  * is kept at sysctl_sched_latency / sysctl_sched_min_granularity
  */
-static unsigned int sched_nr_latency = 20;
+static unsigned int sched_nr_latency = 5;
 
 /*
  * After fork, child runs first. (default) If set to 0 then
@@ -61,7 +61,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
 
 /*
  * SCHED_BATCH wake-up granularity.
- * (default: 10 msec * ilog(ncpus), units: nanoseconds)
+ * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds)
  *
  * This option delays the preemption effects of decoupled workloads
  * and reduces their over-scheduling. Synchronous workloads will still
@@ -71,7 +71,7 @@ unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL;
 
 /*
  * SCHED_OTHER wake-up granularity.
- * (default: 10 msec * ilog(ncpus), units: nanoseconds)
+ * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds)
  *
  * This option delays the preemption effects of decoupled workloads
  * and reduces their over-scheduling. Synchronous workloads will still

From f7b9329e556a8bdb9e07292cddbbe484c7a2b8c5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 26 Nov 2007 21:21:49 +0100
Subject: [PATCH 6/6] sched: bump version of kernel/sched_debug.c

bump version of kernel/sched_debug.c and remove CFS version
information from it.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index ca198a797bfa..5d0d623a5465 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -199,7 +199,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
 	u64 now = ktime_to_ns(ktime_get());
 	int cpu;
 
-	SEQ_printf(m, "Sched Debug Version: v0.06-v22, %s %.*s\n",
+	SEQ_printf(m, "Sched Debug Version: v0.07, %s %.*s\n",
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);