From 42f8570f437b65aaf3ef176a38ad7d7fc5847d8b Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Mon, 17 Dec 2012 10:01:23 -0500
Subject: [PATCH 01/37] workqueue: use new hashtable implementation

Switch workqueues to use the new hashtable implementation. This reduces the
amount of generic unrelated code in the workqueues.

This patch depends on d9b482c ("hashtable: introduce a small and naive
hashtable") which was merged in v3.6.

Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 86 ++++++++--------------------------------------
 1 file changed, 15 insertions(+), 71 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index fbc6576a83c3..acd417be8199 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,6 +41,7 @@
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
 #include <linux/idr.h>
+#include <linux/hashtable.h>
 
 #include "workqueue_sched.h"
 
@@ -82,8 +83,6 @@ enum {
 	NR_WORKER_POOLS		= 2,		/* # worker pools per gcwq */
 
 	BUSY_WORKER_HASH_ORDER	= 6,		/* 64 pointers */
-	BUSY_WORKER_HASH_SIZE	= 1 << BUSY_WORKER_HASH_ORDER,
-	BUSY_WORKER_HASH_MASK	= BUSY_WORKER_HASH_SIZE - 1,
 
 	MAX_IDLE_WORKERS_RATIO	= 4,		/* 1/4 of busy can be idle */
 	IDLE_WORKER_TIMEOUT	= 300 * HZ,	/* keep idle ones for 5 mins */
@@ -180,7 +179,7 @@ struct global_cwq {
 	unsigned int		flags;		/* L: GCWQ_* flags */
 
 	/* workers are chained either in busy_hash or pool idle_list */
-	struct hlist_head	busy_hash[BUSY_WORKER_HASH_SIZE];
+	DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
 						/* L: hash of busy workers */
 
 	struct worker_pool	pools[NR_WORKER_POOLS];
@@ -285,8 +284,7 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 	     (pool) < &(gcwq)->pools[NR_WORKER_POOLS]; (pool)++)
 
 #define for_each_busy_worker(worker, i, pos, gcwq)			\
-	for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)			\
-		hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
+	hash_for_each(gcwq->busy_hash, i, pos, worker, hentry)
 
 static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
 				  unsigned int sw)
@@ -858,63 +856,6 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
 			atomic_inc(get_pool_nr_running(pool));
 }
 
-/**
- * busy_worker_head - return the busy hash head for a work
- * @gcwq: gcwq of interest
- * @work: work to be hashed
- *
- * Return hash head of @gcwq for @work.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq->lock).
- *
- * RETURNS:
- * Pointer to the hash head.
- */
-static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
-					   struct work_struct *work)
-{
-	const int base_shift = ilog2(sizeof(struct work_struct));
-	unsigned long v = (unsigned long)work;
-
-	/* simple shift and fold hash, do we need something better? */
-	v >>= base_shift;
-	v += v >> BUSY_WORKER_HASH_ORDER;
-	v &= BUSY_WORKER_HASH_MASK;
-
-	return &gcwq->busy_hash[v];
-}
-
-/**
- * __find_worker_executing_work - find worker which is executing a work
- * @gcwq: gcwq of interest
- * @bwh: hash head as returned by busy_worker_head()
- * @work: work to find worker for
- *
- * Find a worker which is executing @work on @gcwq.  @bwh should be
- * the hash head obtained by calling busy_worker_head() with the same
- * work.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq->lock).
- *
- * RETURNS:
- * Pointer to worker which is executing @work if found, NULL
- * otherwise.
- */
-static struct worker *__find_worker_executing_work(struct global_cwq *gcwq,
-						   struct hlist_head *bwh,
-						   struct work_struct *work)
-{
-	struct worker *worker;
-	struct hlist_node *tmp;
-
-	hlist_for_each_entry(worker, tmp, bwh, hentry)
-		if (worker->current_work == work)
-			return worker;
-	return NULL;
-}
-
 /**
  * find_worker_executing_work - find worker which is executing a work
  * @gcwq: gcwq of interest
@@ -934,8 +875,14 @@ static struct worker *__find_worker_executing_work(struct global_cwq *gcwq,
 static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
 						 struct work_struct *work)
 {
-	return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
-					    work);
+	struct worker *worker;
+	struct hlist_node *tmp;
+
+	hash_for_each_possible(gcwq->busy_hash, worker, tmp, hentry, (unsigned long)work)
+		if (worker->current_work == work)
+			return worker;
+
+	return NULL;
 }
 
 /**
@@ -2166,7 +2113,6 @@ __acquires(&gcwq->lock)
 	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
 	struct worker_pool *pool = worker->pool;
 	struct global_cwq *gcwq = pool->gcwq;
-	struct hlist_head *bwh = busy_worker_head(gcwq, work);
 	bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
 	work_func_t f = work->func;
 	int work_color;
@@ -2198,7 +2144,7 @@ __acquires(&gcwq->lock)
 	 * already processing the work.  If so, defer the work to the
 	 * currently executing one.
 	 */
-	collision = __find_worker_executing_work(gcwq, bwh, work);
+	collision = find_worker_executing_work(gcwq, work);
 	if (unlikely(collision)) {
 		move_linked_works(work, &collision->scheduled, NULL);
 		return;
@@ -2206,7 +2152,7 @@ __acquires(&gcwq->lock)
 
 	/* claim and dequeue */
 	debug_work_deactivate(work);
-	hlist_add_head(&worker->hentry, bwh);
+	hash_add(gcwq->busy_hash, &worker->hentry, (unsigned long)worker);
 	worker->current_work = work;
 	worker->current_cwq = cwq;
 	work_color = get_work_color(work);
@@ -2264,7 +2210,7 @@ __acquires(&gcwq->lock)
 		worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
 
 	/* we're done with it, release */
-	hlist_del_init(&worker->hentry);
+	hash_del(&worker->hentry);
 	worker->current_work = NULL;
 	worker->current_cwq = NULL;
 	cwq_dec_nr_in_flight(cwq, work_color);
@@ -3831,7 +3777,6 @@ out_unlock:
 static int __init init_workqueues(void)
 {
 	unsigned int cpu;
-	int i;
 
 	/* make sure we have enough bits for OFFQ CPU number */
 	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) <
@@ -3849,8 +3794,7 @@ static int __init init_workqueues(void)
 		gcwq->cpu = cpu;
 		gcwq->flags |= GCWQ_DISASSOCIATED;
 
-		for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
-			INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
+		hash_init(gcwq->busy_hash);
 
 		for_each_worker_pool(pool, gcwq) {
 			pool->gcwq = gcwq;

From a2c1c57be8d9fd5b716113c8991d3d702eeacf77 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Dec 2012 10:35:02 -0800
Subject: [PATCH 02/37] workqueue: consider work function when searching for
 busy work items

To avoid executing the same work item concurrenlty, workqueue hashes
currently busy workers according to their current work items and looks
up the the table when it wants to execute a new work item.  If there
already is a worker which is executing the new work item, the new item
is queued to the found worker so that it gets executed only after the
current execution finishes.

Unfortunately, a work item may be freed while being executed and thus
recycled for different purposes.  If it gets recycled for a different
work item and queued while the previous execution is still in
progress, workqueue may make the new work item wait for the old one
although the two aren't really related in any way.

In extreme cases, this false dependency may lead to deadlock although
it's extremely unlikely given that there aren't too many self-freeing
work item users and they usually don't wait for other work items.

To alleviate the problem, record the current work function in each
busy worker and match it together with the work item address in
find_worker_executing_work().  While this isn't complete, it ensures
that unrelated work items don't interact with each other and in the
very unlikely case where a twisted wq user triggers it, it's always
onto itself making the culprit easy to spot.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Andrey Isakov <andy51@gmx.ru>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=51701
Cc: stable@vger.kernel.org
---
 kernel/workqueue.c | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index acd417be8199..d24a41101838 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -137,6 +137,7 @@ struct worker {
 	};
 
 	struct work_struct	*current_work;	/* L: work being processed */
+	work_func_t		current_func;	/* L: current_work's fn */
 	struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
 	struct list_head	scheduled;	/* L: scheduled works */
 	struct task_struct	*task;		/* I: worker task */
@@ -861,9 +862,27 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
  * @gcwq: gcwq of interest
  * @work: work to find worker for
  *
- * Find a worker which is executing @work on @gcwq.  This function is
- * identical to __find_worker_executing_work() except that this
- * function calculates @bwh itself.
+ * Find a worker which is executing @work on @gcwq by searching
+ * @gcwq->busy_hash which is keyed by the address of @work.  For a worker
+ * to match, its current execution should match the address of @work and
+ * its work function.  This is to avoid unwanted dependency between
+ * unrelated work executions through a work item being recycled while still
+ * being executed.
+ *
+ * This is a bit tricky.  A work item may be freed once its execution
+ * starts and nothing prevents the freed area from being recycled for
+ * another work item.  If the same work item address ends up being reused
+ * before the original execution finishes, workqueue will identify the
+ * recycled work item as currently executing and make it wait until the
+ * current execution finishes, introducing an unwanted dependency.
+ *
+ * This function checks the work item address, work function and workqueue
+ * to avoid false positives.  Note that this isn't complete as one may
+ * construct a work function which can introduce dependency onto itself
+ * through a recycled work item.  Well, if somebody wants to shoot oneself
+ * in the foot that badly, there's only so much we can do, and if such
+ * deadlock actually occurs, it should be easy to locate the culprit work
+ * function.
  *
  * CONTEXT:
  * spin_lock_irq(gcwq->lock).
@@ -878,8 +897,10 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
 	struct worker *worker;
 	struct hlist_node *tmp;
 
-	hash_for_each_possible(gcwq->busy_hash, worker, tmp, hentry, (unsigned long)work)
-		if (worker->current_work == work)
+	hash_for_each_possible(gcwq->busy_hash, worker, tmp, hentry,
+			       (unsigned long)work)
+		if (worker->current_work == work &&
+		    worker->current_func == work->func)
 			return worker;
 
 	return NULL;
@@ -2114,7 +2135,6 @@ __acquires(&gcwq->lock)
 	struct worker_pool *pool = worker->pool;
 	struct global_cwq *gcwq = pool->gcwq;
 	bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
-	work_func_t f = work->func;
 	int work_color;
 	struct worker *collision;
 #ifdef CONFIG_LOCKDEP
@@ -2154,6 +2174,7 @@ __acquires(&gcwq->lock)
 	debug_work_deactivate(work);
 	hash_add(gcwq->busy_hash, &worker->hentry, (unsigned long)worker);
 	worker->current_work = work;
+	worker->current_func = work->func;
 	worker->current_cwq = cwq;
 	work_color = get_work_color(work);
 
@@ -2186,7 +2207,7 @@ __acquires(&gcwq->lock)
 	lock_map_acquire_read(&cwq->wq->lockdep_map);
 	lock_map_acquire(&lockdep_map);
 	trace_workqueue_execute_start(work);
-	f(work);
+	worker->current_func(work);
 	/*
 	 * While we must be careful to not use "work" after this, the trace
 	 * point will only record its address.
@@ -2198,7 +2219,8 @@ __acquires(&gcwq->lock)
 	if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
 		pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
 		       "     last function: %pf\n",
-		       current->comm, preempt_count(), task_pid_nr(current), f);
+		       current->comm, preempt_count(), task_pid_nr(current),
+		       worker->current_func);
 		debug_show_held_locks(current);
 		dump_stack();
 	}
@@ -2212,6 +2234,7 @@ __acquires(&gcwq->lock)
 	/* we're done with it, release */
 	hash_del(&worker->hentry);
 	worker->current_work = NULL;
+	worker->current_func = NULL;
 	worker->current_cwq = NULL;
 	cwq_dec_nr_in_flight(cwq, work_color);
 }

From 023f27d3d6fcc9048754d879fe5e7d63402a5b16 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 19 Dec 2012 11:24:06 -0800
Subject: [PATCH 03/37] workqueue: fix find_worker_executing_work() brekage
 from hashtable conversion

42f8570f43 ("workqueue: use new hashtable implementation") incorrectly
made busy workers hashed by the pointer value of worker instead of
work.  This broke find_worker_executing_work() which in turn broke a
lot of fundamental operations of workqueue - non-reentrancy and
flushing among others.  The flush malfunction triggered warning in
disk event code in Fengguang's automated test.

 write_dev_root_ (3265) used greatest stack depth: 2704 bytes left
 ------------[ cut here ]------------
 WARNING: at /c/kernel-tests/src/stable/block/genhd.c:1574 disk_clear_events+0x\
cf/0x108()
 Hardware name: Bochs
 Modules linked in:
 Pid: 3328, comm: ata_id Not tainted 3.7.0-01930-gbff6343 #1167
 Call Trace:
  [<ffffffff810997c4>] warn_slowpath_common+0x83/0x9c
  [<ffffffff810997f7>] warn_slowpath_null+0x1a/0x1c
  [<ffffffff816aea77>] disk_clear_events+0xcf/0x108
  [<ffffffff811bd8be>] check_disk_change+0x27/0x59
  [<ffffffff822e48e2>] cdrom_open+0x49/0x68b
  [<ffffffff81ab0291>] idecd_open+0x88/0xb7
  [<ffffffff811be58f>] __blkdev_get+0x102/0x3ec
  [<ffffffff811bea08>] blkdev_get+0x18f/0x30f
  [<ffffffff811bebfd>] blkdev_open+0x75/0x80
  [<ffffffff8118f510>] do_dentry_open+0x1ea/0x295
  [<ffffffff8118f5f0>] finish_open+0x35/0x41
  [<ffffffff8119c720>] do_last+0x878/0xa25
  [<ffffffff8119c993>] path_openat+0xc6/0x333
  [<ffffffff8119cf37>] do_filp_open+0x38/0x86
  [<ffffffff81190170>] do_sys_open+0x6c/0xf9
  [<ffffffff8119021e>] sys_open+0x21/0x23
  [<ffffffff82c1c3d9>] system_call_fastpath+0x16/0x1b

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
---
 kernel/workqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d24a41101838..7967f3476393 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2172,7 +2172,7 @@ __acquires(&gcwq->lock)
 
 	/* claim and dequeue */
 	debug_work_deactivate(work);
-	hash_add(gcwq->busy_hash, &worker->hentry, (unsigned long)worker);
+	hash_add(gcwq->busy_hash, &worker->hentry, (unsigned long)work);
 	worker->current_work = work;
 	worker->current_func = work->func;
 	worker->current_cwq = cwq;

From 111c225a5f8d872bc9327ada18d13b75edaa34be Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 17 Jan 2013 17:16:24 -0800
Subject: [PATCH 04/37] workqueue: set PF_WQ_WORKER on rescuers

PF_WQ_WORKER is used to tell scheduler that the task is a workqueue
worker and needs wq_worker_sleeping/waking_up() invoked on it for
concurrency management.  As rescuers never participate in concurrency
management, PF_WQ_WORKER wasn't set on them.

There's a need for an interface which can query whether %current is
executing a work item and if so which.  Such interface requires a way
to identify all tasks which may execute work items and PF_WQ_WORKER
will be used for that.  As all normal workers always have PF_WQ_WORKER
set, we only need to add it to rescuers.

As rescuers start with WORKER_PREP but never clear it, it's always
NOT_RUNNING and there's no need to worry about it interfering with
concurrency management even if PF_WQ_WORKER is set; however, unlike
normal workers, rescuers currently don't have its worker struct as
kthread_data().  It uses the associated workqueue_struct instead.
This is problematic as wq_worker_sleeping/waking_up() expect struct
worker at kthread_data().

This patch adds worker->rescue_wq and start rescuer kthreads with
worker struct as kthread_data and sets PF_WQ_WORKER on rescuers.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/workqueue.c | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7967f3476393..6b99ac7b19f6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -149,6 +149,9 @@ struct worker {
 
 	/* for rebinding worker to CPU */
 	struct work_struct	rebind_work;	/* L: for busy worker */
+
+	/* used only by rescuers to point to the target workqueue */
+	struct workqueue_struct	*rescue_wq;	/* I: the workqueue to rescue */
 };
 
 struct worker_pool {
@@ -763,12 +766,20 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
 				       unsigned int cpu)
 {
 	struct worker *worker = kthread_data(task), *to_wakeup = NULL;
-	struct worker_pool *pool = worker->pool;
-	atomic_t *nr_running = get_pool_nr_running(pool);
+	struct worker_pool *pool;
+	atomic_t *nr_running;
 
+	/*
+	 * Rescuers, which may not have all the fields set up like normal
+	 * workers, also reach here, let's not access anything before
+	 * checking NOT_RUNNING.
+	 */
 	if (worker->flags & WORKER_NOT_RUNNING)
 		return NULL;
 
+	pool = worker->pool;
+	nr_running = get_pool_nr_running(pool);
+
 	/* this can only happen on the local cpu */
 	BUG_ON(cpu != raw_smp_processor_id());
 
@@ -2357,7 +2368,7 @@ sleep:
 
 /**
  * rescuer_thread - the rescuer thread function
- * @__wq: the associated workqueue
+ * @__rescuer: self
  *
  * Workqueue rescuer thread function.  There's one rescuer for each
  * workqueue which has WQ_RESCUER set.
@@ -2374,20 +2385,27 @@ sleep:
  *
  * This should happen rarely.
  */
-static int rescuer_thread(void *__wq)
+static int rescuer_thread(void *__rescuer)
 {
-	struct workqueue_struct *wq = __wq;
-	struct worker *rescuer = wq->rescuer;
+	struct worker *rescuer = __rescuer;
+	struct workqueue_struct *wq = rescuer->rescue_wq;
 	struct list_head *scheduled = &rescuer->scheduled;
 	bool is_unbound = wq->flags & WQ_UNBOUND;
 	unsigned int cpu;
 
 	set_user_nice(current, RESCUER_NICE_LEVEL);
+
+	/*
+	 * Mark rescuer as worker too.  As WORKER_PREP is never cleared, it
+	 * doesn't participate in concurrency management.
+	 */
+	rescuer->task->flags |= PF_WQ_WORKER;
 repeat:
 	set_current_state(TASK_INTERRUPTIBLE);
 
 	if (kthread_should_stop()) {
 		__set_current_state(TASK_RUNNING);
+		rescuer->task->flags &= ~PF_WQ_WORKER;
 		return 0;
 	}
 
@@ -2431,6 +2449,8 @@ repeat:
 		spin_unlock_irq(&gcwq->lock);
 	}
 
+	/* rescuers should never participate in concurrency management */
+	WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
 	schedule();
 	goto repeat;
 }
@@ -3266,7 +3286,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 		if (!rescuer)
 			goto err;
 
-		rescuer->task = kthread_create(rescuer_thread, wq, "%s",
+		rescuer->rescue_wq = wq;
+		rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
 					       wq->name);
 		if (IS_ERR(rescuer->task))
 			goto err;

From ea138446e51f7bfe55cdeffa3f1dd9cafc786bd8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Jan 2013 14:05:55 -0800
Subject: [PATCH 05/37] workqueue: rename kernel/workqueue_sched.h to
 kernel/workqueue_internal.h

Workqueue wants to expose more interface internal to kernel/.  Instead
of adding a new header file, repurpose kernel/workqueue_sched.h.
Rename it to workqueue_internal.h and add include protector.

This patch doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 kernel/sched/core.c         |  2 +-
 kernel/workqueue.c          |  2 +-
 kernel/workqueue_internal.h | 18 ++++++++++++++++++
 kernel/workqueue_sched.h    |  9 ---------
 4 files changed, 20 insertions(+), 11 deletions(-)
 create mode 100644 kernel/workqueue_internal.h
 delete mode 100644 kernel/workqueue_sched.h

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 257002c13bb0..c6737f4fb63b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -83,7 +83,7 @@
 #endif
 
 #include "sched.h"
-#include "../workqueue_sched.h"
+#include "../workqueue_internal.h"
 #include "../smpboot.h"
 
 #define CREATE_TRACE_POINTS
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6b99ac7b19f6..b4e92061a934 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -43,7 +43,7 @@
 #include <linux/idr.h>
 #include <linux/hashtable.h>
 
-#include "workqueue_sched.h"
+#include "workqueue_internal.h"
 
 enum {
 	/*
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
new file mode 100644
index 000000000000..b3ea6ad5566b
--- /dev/null
+++ b/kernel/workqueue_internal.h
@@ -0,0 +1,18 @@
+/*
+ * kernel/workqueue_internal.h
+ *
+ * Workqueue internal header file.  Only to be included by workqueue and
+ * core kernel subsystems.
+ */
+#ifndef _KERNEL_WORKQUEUE_INTERNAL_H
+#define _KERNEL_WORKQUEUE_INTERNAL_H
+
+/*
+ * Scheduler hooks for concurrency managed workqueue.  Only to be used from
+ * sched.c and workqueue.c.
+ */
+void wq_worker_waking_up(struct task_struct *task, unsigned int cpu);
+struct task_struct *wq_worker_sleeping(struct task_struct *task,
+				       unsigned int cpu);
+
+#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
diff --git a/kernel/workqueue_sched.h b/kernel/workqueue_sched.h
deleted file mode 100644
index 2d10fc98dc79..000000000000
--- a/kernel/workqueue_sched.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * kernel/workqueue_sched.h
- *
- * Scheduler hooks for concurrency managed workqueue.  Only to be
- * included from sched.c and workqueue.c.
- */
-void wq_worker_waking_up(struct task_struct *task, unsigned int cpu);
-struct task_struct *wq_worker_sleeping(struct task_struct *task,
-				       unsigned int cpu);

From 2eaebdb33e1911c0cf3d44fd3596c42c6f502fab Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Jan 2013 14:05:55 -0800
Subject: [PATCH 06/37] workqueue: move struct worker definition to
 workqueue_internal.h

This will be used to implement an inline function to query whether
%current is a workqueue worker and, if so, allow determining which
work item it's executing.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/workqueue.c          | 32 +-------------------------------
 kernel/workqueue_internal.h | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b4e92061a934..2ffa240052fa 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -122,37 +122,7 @@ enum {
  * W: workqueue_lock protected.
  */
 
-struct global_cwq;
-struct worker_pool;
-
-/*
- * The poor guys doing the actual heavy lifting.  All on-duty workers
- * are either serving the manager role, on idle list or on busy hash.
- */
-struct worker {
-	/* on idle list while idle, on busy hash table while busy */
-	union {
-		struct list_head	entry;	/* L: while idle */
-		struct hlist_node	hentry;	/* L: while busy */
-	};
-
-	struct work_struct	*current_work;	/* L: work being processed */
-	work_func_t		current_func;	/* L: current_work's fn */
-	struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
-	struct list_head	scheduled;	/* L: scheduled works */
-	struct task_struct	*task;		/* I: worker task */
-	struct worker_pool	*pool;		/* I: the associated pool */
-	/* 64 bytes boundary on 64bit, 32 on 32bit */
-	unsigned long		last_active;	/* L: last active timestamp */
-	unsigned int		flags;		/* X: flags */
-	int			id;		/* I: worker id */
-
-	/* for rebinding worker to CPU */
-	struct work_struct	rebind_work;	/* L: for busy worker */
-
-	/* used only by rescuers to point to the target workqueue */
-	struct workqueue_struct	*rescue_wq;	/* I: the workqueue to rescue */
-};
+/* struct worker is defined in workqueue_internal.h */
 
 struct worker_pool {
 	struct global_cwq	*gcwq;		/* I: the owning gcwq */
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index b3ea6ad5566b..02549fa04587 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -7,6 +7,43 @@
 #ifndef _KERNEL_WORKQUEUE_INTERNAL_H
 #define _KERNEL_WORKQUEUE_INTERNAL_H
 
+#include <linux/workqueue.h>
+
+struct global_cwq;
+struct worker_pool;
+
+/*
+ * The poor guys doing the actual heavy lifting.  All on-duty workers are
+ * either serving the manager role, on idle list or on busy hash.  For
+ * details on the locking annotation (L, I, X...), refer to workqueue.c.
+ *
+ * Only to be used in workqueue and async.
+ */
+struct worker {
+	/* on idle list while idle, on busy hash table while busy */
+	union {
+		struct list_head	entry;	/* L: while idle */
+		struct hlist_node	hentry;	/* L: while busy */
+	};
+
+	struct work_struct	*current_work;	/* L: work being processed */
+	work_func_t		current_func;	/* L: current_work's fn */
+	struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
+	struct list_head	scheduled;	/* L: scheduled works */
+	struct task_struct	*task;		/* I: worker task */
+	struct worker_pool	*pool;		/* I: the associated pool */
+	/* 64 bytes boundary on 64bit, 32 on 32bit */
+	unsigned long		last_active;	/* L: last active timestamp */
+	unsigned int		flags;		/* X: flags */
+	int			id;		/* I: worker id */
+
+	/* for rebinding worker to CPU */
+	struct work_struct	rebind_work;	/* L: for busy worker */
+
+	/* used only by rescuers to point to the target workqueue */
+	struct workqueue_struct	*rescue_wq;	/* I: the workqueue to rescue */
+};
+
 /*
  * Scheduler hooks for concurrency managed workqueue.  Only to be used from
  * sched.c and workqueue.c.

From 84b233adcca3cacd5cfa8013a5feda7a3db4a9af Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 18 Jan 2013 14:05:56 -0800
Subject: [PATCH 07/37] workqueue: implement current_is_async()

This function queries whether %current is an async worker executing an
async item.  This will be used to implement warning on synchronous
request_module() from async workers.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/async.h       |  1 +
 kernel/async.c              | 14 ++++++++++++++
 kernel/workqueue_internal.h | 11 +++++++++++
 3 files changed, 26 insertions(+)

diff --git a/include/linux/async.h b/include/linux/async.h
index 7a24fe9b44b4..345169cfa304 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -52,4 +52,5 @@ extern void async_synchronize_full_domain(struct async_domain *domain);
 extern void async_synchronize_cookie(async_cookie_t cookie);
 extern void async_synchronize_cookie_domain(async_cookie_t cookie,
 					    struct async_domain *domain);
+extern bool current_is_async(void);
 #endif
diff --git a/kernel/async.c b/kernel/async.c
index 9d3118384858..d9bf2a9b5cee 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -57,6 +57,8 @@ asynchronous and synchronous parts of the kernel.
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 
+#include "workqueue_internal.h"
+
 static async_cookie_t next_cookie = 1;
 
 #define MAX_WORK	32768
@@ -337,3 +339,15 @@ void async_synchronize_cookie(async_cookie_t cookie)
 	async_synchronize_cookie_domain(cookie, &async_running);
 }
 EXPORT_SYMBOL_GPL(async_synchronize_cookie);
+
+/**
+ * current_is_async - is %current an async worker task?
+ *
+ * Returns %true if %current is an async worker task.
+ */
+bool current_is_async(void)
+{
+	struct worker *worker = current_wq_worker();
+
+	return worker && worker->current_func == async_run_entry_fn;
+}
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 02549fa04587..cc35e7e62091 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -8,6 +8,7 @@
 #define _KERNEL_WORKQUEUE_INTERNAL_H
 
 #include <linux/workqueue.h>
+#include <linux/kthread.h>
 
 struct global_cwq;
 struct worker_pool;
@@ -44,6 +45,16 @@ struct worker {
 	struct workqueue_struct	*rescue_wq;	/* I: the workqueue to rescue */
 };
 
+/**
+ * current_wq_worker - return struct worker if %current is a workqueue worker
+ */
+static inline struct worker *current_wq_worker(void)
+{
+	if (current->flags & PF_WQ_WORKER)
+		return kthread_data(current);
+	return NULL;
+}
+
 /*
  * Scheduler hooks for concurrency managed workqueue.  Only to be used from
  * sched.c and workqueue.c.

From e2905b29122173b72b612c962b138e3fa07476b8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:32 -0800
Subject: [PATCH 08/37] workqueue: unexport work_cpu()

This function no longer has any external users.  Unexport it.  It will
be removed later on.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h | 1 -
 kernel/workqueue.c        | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2b58905d3504..ff68b1d95b41 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -426,7 +426,6 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
 				     int max_active);
 extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
-extern unsigned int work_cpu(struct work_struct *work);
 extern unsigned int work_busy(struct work_struct *work);
 
 /*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2ffa240052fa..fe0745f54fcd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -449,6 +449,7 @@ static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = {
 };
 
 static int worker_thread(void *__worker);
+static unsigned int work_cpu(struct work_struct *work);
 
 static int worker_pool_pri(struct worker_pool *pool)
 {
@@ -3419,13 +3420,12 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
  * RETURNS:
  * CPU number if @work was ever queued.  WORK_CPU_NONE otherwise.
  */
-unsigned int work_cpu(struct work_struct *work)
+static unsigned int work_cpu(struct work_struct *work)
 {
 	struct global_cwq *gcwq = get_work_gcwq(work);
 
 	return gcwq ? gcwq->cpu : WORK_CPU_NONE;
 }
-EXPORT_SYMBOL_GPL(work_cpu);
 
 /**
  * work_busy - test whether a work is currently pending or running

From e34cdddb03bdfe98f20c58934fd4c45019f13ae5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: [PATCH 09/37] workqueue: use std_ prefix for the standard per-cpu
 pools

There are currently two worker pools per cpu (including the unbound
cpu) and they are the only pools in use.  New class of pools are
scheduled to be added and some pool related APIs will be added
inbetween.  Call the existing pools the standard pools and prefix them
with std_.  Do this early so that new APIs can use std_ prefix from
the beginning.

This patch doesn't introduce any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index fe0745f54fcd..634251572fdd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -80,7 +80,7 @@ enum {
 	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_UNBOUND |
 				  WORKER_CPU_INTENSIVE,
 
-	NR_WORKER_POOLS		= 2,		/* # worker pools per gcwq */
+	NR_STD_WORKER_POOLS	= 2,		/* # standard pools per cpu */
 
 	BUSY_WORKER_HASH_ORDER	= 6,		/* 64 pointers */
 
@@ -156,7 +156,7 @@ struct global_cwq {
 	DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
 						/* L: hash of busy workers */
 
-	struct worker_pool	pools[NR_WORKER_POOLS];
+	struct worker_pool	pools[NR_STD_WORKER_POOLS];
 						/* normal and highpri pools */
 } ____cacheline_aligned_in_smp;
 
@@ -255,7 +255,7 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 
 #define for_each_worker_pool(pool, gcwq)				\
 	for ((pool) = &(gcwq)->pools[0];				\
-	     (pool) < &(gcwq)->pools[NR_WORKER_POOLS]; (pool)++)
+	     (pool) < &(gcwq)->pools[NR_STD_WORKER_POOLS]; (pool)++)
 
 #define for_each_busy_worker(worker, i, pos, gcwq)			\
 	hash_for_each(gcwq->busy_hash, i, pos, worker, hentry)
@@ -436,7 +436,7 @@ static bool workqueue_freezing;		/* W: have wqs started freezing? */
  * try_to_wake_up().  Put it in a separate cacheline.
  */
 static DEFINE_PER_CPU(struct global_cwq, global_cwq);
-static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_WORKER_POOLS]);
+static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_STD_WORKER_POOLS]);
 
 /*
  * Global cpu workqueue and nr_running counter for unbound gcwq.  The
@@ -444,14 +444,14 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_WORKER_POOLS])
  * workers have WORKER_UNBOUND set.
  */
 static struct global_cwq unbound_global_cwq;
-static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = {
-	[0 ... NR_WORKER_POOLS - 1]	= ATOMIC_INIT(0),	/* always 0 */
+static atomic_t unbound_pool_nr_running[NR_STD_WORKER_POOLS] = {
+	[0 ... NR_STD_WORKER_POOLS - 1]	= ATOMIC_INIT(0),	/* always 0 */
 };
 
 static int worker_thread(void *__worker);
 static unsigned int work_cpu(struct work_struct *work);
 
-static int worker_pool_pri(struct worker_pool *pool)
+static int std_worker_pool_pri(struct worker_pool *pool)
 {
 	return pool - pool->gcwq->pools;
 }
@@ -467,7 +467,7 @@ static struct global_cwq *get_gcwq(unsigned int cpu)
 static atomic_t *get_pool_nr_running(struct worker_pool *pool)
 {
 	int cpu = pool->gcwq->cpu;
-	int idx = worker_pool_pri(pool);
+	int idx = std_worker_pool_pri(pool);
 
 	if (cpu != WORK_CPU_UNBOUND)
 		return &per_cpu(pool_nr_running, cpu)[idx];
@@ -1688,7 +1688,7 @@ static void rebind_workers(struct global_cwq *gcwq)
 		 * wq doesn't really matter but let's keep @worker->pool
 		 * and @cwq->pool consistent for sanity.
 		 */
-		if (worker_pool_pri(worker->pool))
+		if (std_worker_pool_pri(worker->pool))
 			wq = system_highpri_wq;
 		else
 			wq = system_wq;
@@ -1731,7 +1731,7 @@ static struct worker *alloc_worker(void)
 static struct worker *create_worker(struct worker_pool *pool)
 {
 	struct global_cwq *gcwq = pool->gcwq;
-	const char *pri = worker_pool_pri(pool) ? "H" : "";
+	const char *pri = std_worker_pool_pri(pool) ? "H" : "";
 	struct worker *worker = NULL;
 	int id = -1;
 
@@ -1761,7 +1761,7 @@ static struct worker *create_worker(struct worker_pool *pool)
 	if (IS_ERR(worker->task))
 		goto fail;
 
-	if (worker_pool_pri(pool))
+	if (std_worker_pool_pri(pool))
 		set_user_nice(worker->task, HIGHPRI_NICE_LEVEL);
 
 	/*

From 2464757086b4de0591738d5e30f069d068d70ec0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: [PATCH 10/37] workqueue: make GCWQ_DISASSOCIATED a pool flag

Make GCWQ_DISASSOCIATED a pool flag POOL_DISASSOCIATED.  This patch
doesn't change locking - DISASSOCIATED on both pools of a CPU are set
or clear together while holding gcwq->lock.  It shouldn't cause any
functional difference.

This is part of an effort to remove global_cwq and make worker_pool
the top level abstraction, which in turn will help implementing worker
pools with user-specified attributes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 66 ++++++++++++++++++++++++----------------------
 1 file changed, 35 insertions(+), 31 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 634251572fdd..1b8af92cc2c9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -48,26 +48,28 @@
 enum {
 	/*
 	 * global_cwq flags
+	 */
+	GCWQ_FREEZING		= 1 << 1,	/* freeze in progress */
+
+	/*
+	 * worker_pool flags
 	 *
-	 * A bound gcwq is either associated or disassociated with its CPU.
+	 * A bound pool is either associated or disassociated with its CPU.
 	 * While associated (!DISASSOCIATED), all workers are bound to the
 	 * CPU and none has %WORKER_UNBOUND set and concurrency management
 	 * is in effect.
 	 *
 	 * While DISASSOCIATED, the cpu may be offline and all workers have
 	 * %WORKER_UNBOUND set and concurrency management disabled, and may
-	 * be executing on any CPU.  The gcwq behaves as an unbound one.
+	 * be executing on any CPU.  The pool behaves as an unbound one.
 	 *
 	 * Note that DISASSOCIATED can be flipped only while holding
-	 * assoc_mutex of all pools on the gcwq to avoid changing binding
-	 * state while create_worker() is in progress.
+	 * assoc_mutex to avoid changing binding state while
+	 * create_worker() is in progress.
 	 */
-	GCWQ_DISASSOCIATED	= 1 << 0,	/* cpu can't serve workers */
-	GCWQ_FREEZING		= 1 << 1,	/* freeze in progress */
-
-	/* pool flags */
 	POOL_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
 	POOL_MANAGING_WORKERS   = 1 << 1,       /* managing workers */
+	POOL_DISASSOCIATED	= 1 << 2,	/* cpu can't serve workers */
 
 	/* worker flags */
 	WORKER_STARTED		= 1 << 0,	/* started */
@@ -115,7 +117,7 @@ enum {
  * X: During normal operation, modification requires gcwq->lock and
  *    should be done only from local cpu.  Either disabling preemption
  *    on local cpu or grabbing gcwq->lock is enough for read access.
- *    If GCWQ_DISASSOCIATED is set, it's identical to L.
+ *    If POOL_DISASSOCIATED is set, it's identical to L.
  *
  * F: wq->flush_mutex protected.
  *
@@ -138,7 +140,7 @@ struct worker_pool {
 	struct timer_list	idle_timer;	/* L: worker idle timeout */
 	struct timer_list	mayday_timer;	/* L: SOS timer for workers */
 
-	struct mutex		assoc_mutex;	/* protect GCWQ_DISASSOCIATED */
+	struct mutex		assoc_mutex;	/* protect POOL_DISASSOCIATED */
 	struct ida		worker_ida;	/* L: for worker IDs */
 };
 
@@ -439,9 +441,9 @@ static DEFINE_PER_CPU(struct global_cwq, global_cwq);
 static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_STD_WORKER_POOLS]);
 
 /*
- * Global cpu workqueue and nr_running counter for unbound gcwq.  The
- * gcwq is always online, has GCWQ_DISASSOCIATED set, and all its
- * workers have WORKER_UNBOUND set.
+ * Global cpu workqueue and nr_running counter for unbound gcwq.  The pools
+ * for online CPUs have POOL_DISASSOCIATED set, and all their workers have
+ * WORKER_UNBOUND set.
  */
 static struct global_cwq unbound_global_cwq;
 static atomic_t unbound_pool_nr_running[NR_STD_WORKER_POOLS] = {
@@ -1474,7 +1476,6 @@ EXPORT_SYMBOL_GPL(mod_delayed_work);
 static void worker_enter_idle(struct worker *worker)
 {
 	struct worker_pool *pool = worker->pool;
-	struct global_cwq *gcwq = pool->gcwq;
 
 	BUG_ON(worker->flags & WORKER_IDLE);
 	BUG_ON(!list_empty(&worker->entry) &&
@@ -1497,7 +1498,7 @@ static void worker_enter_idle(struct worker *worker)
 	 * nr_running, the warning may trigger spuriously.  Check iff
 	 * unbind is not in progress.
 	 */
-	WARN_ON_ONCE(!(gcwq->flags & GCWQ_DISASSOCIATED) &&
+	WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
 		     pool->nr_workers == pool->nr_idle &&
 		     atomic_read(get_pool_nr_running(pool)));
 }
@@ -1538,7 +1539,7 @@ static void worker_leave_idle(struct worker *worker)
  * [dis]associated in the meantime.
  *
  * This function tries set_cpus_allowed() and locks gcwq and verifies the
- * binding against %GCWQ_DISASSOCIATED which is set during
+ * binding against %POOL_DISASSOCIATED which is set during
  * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker
  * enters idle state or fetches works without dropping lock, it can
  * guarantee the scheduling requirement described in the first paragraph.
@@ -1554,7 +1555,8 @@ static void worker_leave_idle(struct worker *worker)
 static bool worker_maybe_bind_and_lock(struct worker *worker)
 __acquires(&gcwq->lock)
 {
-	struct global_cwq *gcwq = worker->pool->gcwq;
+	struct worker_pool *pool = worker->pool;
+	struct global_cwq *gcwq = pool->gcwq;
 	struct task_struct *task = worker->task;
 
 	while (true) {
@@ -1562,13 +1564,13 @@ __acquires(&gcwq->lock)
 		 * The following call may fail, succeed or succeed
 		 * without actually migrating the task to the cpu if
 		 * it races with cpu hotunplug operation.  Verify
-		 * against GCWQ_DISASSOCIATED.
+		 * against POOL_DISASSOCIATED.
 		 */
-		if (!(gcwq->flags & GCWQ_DISASSOCIATED))
+		if (!(pool->flags & POOL_DISASSOCIATED))
 			set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu));
 
 		spin_lock_irq(&gcwq->lock);
-		if (gcwq->flags & GCWQ_DISASSOCIATED)
+		if (pool->flags & POOL_DISASSOCIATED)
 			return false;
 		if (task_cpu(task) == gcwq->cpu &&
 		    cpumask_equal(&current->cpus_allowed,
@@ -1766,14 +1768,14 @@ static struct worker *create_worker(struct worker_pool *pool)
 
 	/*
 	 * Determine CPU binding of the new worker depending on
-	 * %GCWQ_DISASSOCIATED.  The caller is responsible for ensuring the
+	 * %POOL_DISASSOCIATED.  The caller is responsible for ensuring the
 	 * flag remains stable across this function.  See the comments
 	 * above the flag definition for details.
 	 *
 	 * As an unbound worker may later become a regular one if CPU comes
 	 * online, make sure every worker has %PF_THREAD_BOUND set.
 	 */
-	if (!(gcwq->flags & GCWQ_DISASSOCIATED)) {
+	if (!(pool->flags & POOL_DISASSOCIATED)) {
 		kthread_bind(worker->task, gcwq->cpu);
 	} else {
 		worker->task->flags |= PF_THREAD_BOUND;
@@ -2134,10 +2136,10 @@ __acquires(&gcwq->lock)
 	/*
 	 * Ensure we're on the correct CPU.  DISASSOCIATED test is
 	 * necessary to avoid spurious warnings from rescuers servicing the
-	 * unbound or a disassociated gcwq.
+	 * unbound or a disassociated pool.
 	 */
 	WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) &&
-		     !(gcwq->flags & GCWQ_DISASSOCIATED) &&
+		     !(pool->flags & POOL_DISASSOCIATED) &&
 		     raw_smp_processor_id() != gcwq->cpu);
 
 	/*
@@ -3472,7 +3474,7 @@ EXPORT_SYMBOL_GPL(work_busy);
  * gcwqs serve mix of short, long and very long running works making
  * blocked draining impractical.
  *
- * This is solved by allowing a gcwq to be disassociated from the CPU
+ * This is solved by allowing the pools to be disassociated from the CPU
  * running as an unbound one and allowing it to be reattached later if the
  * cpu comes back online.
  */
@@ -3522,7 +3524,8 @@ static void gcwq_unbind_fn(struct work_struct *work)
 	for_each_busy_worker(worker, i, pos, gcwq)
 		worker->flags |= WORKER_UNBOUND;
 
-	gcwq->flags |= GCWQ_DISASSOCIATED;
+	for_each_worker_pool(pool, gcwq)
+		pool->flags |= POOL_DISASSOCIATED;
 
 	gcwq_release_assoc_and_unlock(gcwq);
 
@@ -3581,7 +3584,8 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
 		gcwq_claim_assoc_and_lock(gcwq);
-		gcwq->flags &= ~GCWQ_DISASSOCIATED;
+		for_each_worker_pool(pool, gcwq)
+			pool->flags &= ~POOL_DISASSOCIATED;
 		rebind_workers(gcwq);
 		gcwq_release_assoc_and_unlock(gcwq);
 		break;
@@ -3806,12 +3810,12 @@ static int __init init_workqueues(void)
 
 		spin_lock_init(&gcwq->lock);
 		gcwq->cpu = cpu;
-		gcwq->flags |= GCWQ_DISASSOCIATED;
 
 		hash_init(gcwq->busy_hash);
 
 		for_each_worker_pool(pool, gcwq) {
 			pool->gcwq = gcwq;
+			pool->flags |= POOL_DISASSOCIATED;
 			INIT_LIST_HEAD(&pool->worklist);
 			INIT_LIST_HEAD(&pool->idle_list);
 
@@ -3832,12 +3836,12 @@ static int __init init_workqueues(void)
 		struct global_cwq *gcwq = get_gcwq(cpu);
 		struct worker_pool *pool;
 
-		if (cpu != WORK_CPU_UNBOUND)
-			gcwq->flags &= ~GCWQ_DISASSOCIATED;
-
 		for_each_worker_pool(pool, gcwq) {
 			struct worker *worker;
 
+			if (cpu != WORK_CPU_UNBOUND)
+				pool->flags &= ~POOL_DISASSOCIATED;
+
 			worker = create_worker(pool);
 			BUG_ON(!worker);
 			spin_lock_irq(&gcwq->lock);

From 35b6bb63b8a288f90e07948867941a553b3d97bc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: [PATCH 11/37] workqueue: make GCWQ_FREEZING a pool flag

Make GCWQ_FREEZING a pool flag POOL_FREEZING.  This patch doesn't
change locking - FREEZING on both pools of a CPU are set or clear
together while holding gcwq->lock.  It shouldn't cause any functional
difference.

This leaves gcwq->flags w/o any flags.  Removed.

While at it, convert BUG_ON()s in freeze_workqueue_begin() and
thaw_workqueues() to WARN_ON_ONCE().

This is part of an effort to remove global_cwq and make worker_pool
the top level abstraction, which in turn will help implementing worker
pools with user-specified attributes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1b8af92cc2c9..1a686e481132 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -46,11 +46,6 @@
 #include "workqueue_internal.h"
 
 enum {
-	/*
-	 * global_cwq flags
-	 */
-	GCWQ_FREEZING		= 1 << 1,	/* freeze in progress */
-
 	/*
 	 * worker_pool flags
 	 *
@@ -70,6 +65,7 @@ enum {
 	POOL_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
 	POOL_MANAGING_WORKERS   = 1 << 1,       /* managing workers */
 	POOL_DISASSOCIATED	= 1 << 2,	/* cpu can't serve workers */
+	POOL_FREEZING		= 1 << 3,	/* freeze in progress */
 
 	/* worker flags */
 	WORKER_STARTED		= 1 << 0,	/* started */
@@ -152,7 +148,6 @@ struct worker_pool {
 struct global_cwq {
 	spinlock_t		lock;		/* the gcwq lock */
 	unsigned int		cpu;		/* I: the associated cpu */
-	unsigned int		flags;		/* L: GCWQ_* flags */
 
 	/* workers are chained either in busy_hash or pool idle_list */
 	DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
@@ -3380,13 +3375,15 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 	wq->saved_max_active = max_active;
 
 	for_each_cwq_cpu(cpu, wq) {
-		struct global_cwq *gcwq = get_gcwq(cpu);
+		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+		struct worker_pool *pool = cwq->pool;
+		struct global_cwq *gcwq = pool->gcwq;
 
 		spin_lock_irq(&gcwq->lock);
 
 		if (!(wq->flags & WQ_FREEZABLE) ||
-		    !(gcwq->flags & GCWQ_FREEZING))
-			cwq_set_max_active(get_cwq(gcwq->cpu, wq), max_active);
+		    !(pool->flags & POOL_FREEZING))
+			cwq_set_max_active(cwq, max_active);
 
 		spin_unlock_irq(&gcwq->lock);
 	}
@@ -3676,12 +3673,15 @@ void freeze_workqueues_begin(void)
 
 	for_each_gcwq_cpu(cpu) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
+		struct worker_pool *pool;
 		struct workqueue_struct *wq;
 
 		spin_lock_irq(&gcwq->lock);
 
-		BUG_ON(gcwq->flags & GCWQ_FREEZING);
-		gcwq->flags |= GCWQ_FREEZING;
+		for_each_worker_pool(pool, gcwq) {
+			WARN_ON_ONCE(pool->flags & POOL_FREEZING);
+			pool->flags |= POOL_FREEZING;
+		}
 
 		list_for_each_entry(wq, &workqueues, list) {
 			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
@@ -3767,8 +3767,10 @@ void thaw_workqueues(void)
 
 		spin_lock_irq(&gcwq->lock);
 
-		BUG_ON(!(gcwq->flags & GCWQ_FREEZING));
-		gcwq->flags &= ~GCWQ_FREEZING;
+		for_each_worker_pool(pool, gcwq) {
+			WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
+			pool->flags &= ~POOL_FREEZING;
+		}
 
 		list_for_each_entry(wq, &workqueues, list) {
 			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);

From 715b06b864c99a18cb8368dfb187da4f569788cd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: [PATCH 12/37] workqueue: introduce WORK_OFFQ_CPU_NONE

Currently, when a work item is off queue, high bits of its data
encodes the last CPU it was on.  This is scheduled to be changed to
pool ID, which will make it impossible to use WORK_CPU_NONE to
indicate no association.

This patch limits the number of bits which are used for off-queue cpu
number to 31 (so that the max fits in an int) and uses the highest
possible value - WORK_OFFQ_CPU_NONE - to indicate no association.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h | 10 +++++++++-
 kernel/workqueue.c        |  4 ++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ff68b1d95b41..f8b35763e55f 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -73,13 +73,21 @@ enum {
 
 	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
 
+	/*
+	 * When a work item is off queue, its high bits point to the last
+	 * cpu it was on.  Cap at 31 bits and use the highest number to
+	 * indicate that no cpu is associated.
+	 */
 	WORK_OFFQ_FLAG_BITS	= 1,
 	WORK_OFFQ_CPU_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+	WORK_OFFQ_LEFT		= BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT,
+	WORK_OFFQ_CPU_BITS	= WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
+	WORK_OFFQ_CPU_NONE	= (1LU << WORK_OFFQ_CPU_BITS) - 1,
 
 	/* convenience constants */
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
-	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
+	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_OFFQ_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
 
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1a686e481132..405282d30046 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -573,7 +573,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq;
 
 	cpu = data >> WORK_OFFQ_CPU_SHIFT;
-	if (cpu == WORK_CPU_NONE)
+	if (cpu == WORK_OFFQ_CPU_NONE)
 		return NULL;
 
 	BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
@@ -583,7 +583,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 static void mark_work_canceling(struct work_struct *work)
 {
 	struct global_cwq *gcwq = get_work_gcwq(work);
-	unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE;
+	unsigned long cpu = gcwq ? gcwq->cpu : WORK_OFFQ_CPU_NONE;
 
 	set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
 		      WORK_STRUCT_PENDING);

From 9daf9e678d18585433a4ad90ec51a448e5fd054c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: [PATCH 13/37] workqueue: add worker_pool->id

Add worker_pool->id which is allocated from worker_pool_idr.  This
will be used to record the last associated worker_pool in work->data.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 405282d30046..9c6ad974bb9e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -124,6 +124,7 @@ enum {
 
 struct worker_pool {
 	struct global_cwq	*gcwq;		/* I: the owning gcwq */
+	int			id;		/* I: pool ID */
 	unsigned int		flags;		/* X: flags */
 
 	struct list_head	worklist;	/* L: list of pending works */
@@ -445,6 +446,10 @@ static atomic_t unbound_pool_nr_running[NR_STD_WORKER_POOLS] = {
 	[0 ... NR_STD_WORKER_POOLS - 1]	= ATOMIC_INIT(0),	/* always 0 */
 };
 
+/* idr of all pools */
+static DEFINE_MUTEX(worker_pool_idr_mutex);
+static DEFINE_IDR(worker_pool_idr);
+
 static int worker_thread(void *__worker);
 static unsigned int work_cpu(struct work_struct *work);
 
@@ -461,6 +466,19 @@ static struct global_cwq *get_gcwq(unsigned int cpu)
 		return &unbound_global_cwq;
 }
 
+/* allocate ID and assign it to @pool */
+static int worker_pool_assign_id(struct worker_pool *pool)
+{
+	int ret;
+
+	mutex_lock(&worker_pool_idr_mutex);
+	idr_pre_get(&worker_pool_idr, GFP_KERNEL);
+	ret = idr_get_new(&worker_pool_idr, pool, &pool->id);
+	mutex_unlock(&worker_pool_idr_mutex);
+
+	return ret;
+}
+
 static atomic_t *get_pool_nr_running(struct worker_pool *pool)
 {
 	int cpu = pool->gcwq->cpu;
@@ -3830,6 +3848,9 @@ static int __init init_workqueues(void)
 
 			mutex_init(&pool->assoc_mutex);
 			ida_init(&pool->worker_ida);
+
+			/* alloc pool ID */
+			BUG_ON(worker_pool_assign_id(pool));
 		}
 	}
 

From 7c3eed5cd60d0f736516e6ade77d90c6255860bd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: [PATCH 14/37] workqueue: record pool ID instead of CPU in work->data
 when off-queue

Currently, when a work item is off-queue, work->data records the CPU
it was last on, which is used to locate the last executing instance
for non-reentrance, flushing, etc.

We're in the process of removing global_cwq and making worker_pool the
top level abstraction.  This patch makes work->data point to the pool
it was last associated with instead of CPU.

After the previous WORK_OFFQ_POOL_CPU and worker_poo->id additions,
the conversion is fairly straight-forward.  WORK_OFFQ constants and
functions are modified to record and read back pool ID instead.
worker_pool_by_id() is added to allow looking up pool from ID.
get_work_pool() replaces get_work_gcwq(), which is reimplemented using
get_work_pool().  get_work_pool_id() replaces work_cpu().

This patch shouldn't introduce any observable behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h |  18 +++----
 kernel/workqueue.c        | 111 +++++++++++++++++++++++---------------
 2 files changed, 76 insertions(+), 53 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f8b35763e55f..a94e4e84e7b1 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -75,19 +75,19 @@ enum {
 
 	/*
 	 * When a work item is off queue, its high bits point to the last
-	 * cpu it was on.  Cap at 31 bits and use the highest number to
-	 * indicate that no cpu is associated.
+	 * pool it was on.  Cap at 31 bits and use the highest number to
+	 * indicate that no pool is associated.
 	 */
 	WORK_OFFQ_FLAG_BITS	= 1,
-	WORK_OFFQ_CPU_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
-	WORK_OFFQ_LEFT		= BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT,
-	WORK_OFFQ_CPU_BITS	= WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
-	WORK_OFFQ_CPU_NONE	= (1LU << WORK_OFFQ_CPU_BITS) - 1,
+	WORK_OFFQ_POOL_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+	WORK_OFFQ_LEFT		= BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
+	WORK_OFFQ_POOL_BITS	= WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
+	WORK_OFFQ_POOL_NONE	= (1LU << WORK_OFFQ_POOL_BITS) - 1,
 
 	/* convenience constants */
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
-	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_OFFQ_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
+	WORK_STRUCT_NO_POOL	= (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT,
 
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
@@ -103,9 +103,9 @@ struct work_struct {
 #endif
 };
 
-#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
+#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL)
 #define WORK_DATA_STATIC_INIT()	\
-	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU | WORK_STRUCT_STATIC)
+	ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)
 
 struct delayed_work {
 	struct work_struct work;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9c6ad974bb9e..a4d7e3f0a874 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -451,7 +451,6 @@ static DEFINE_MUTEX(worker_pool_idr_mutex);
 static DEFINE_IDR(worker_pool_idr);
 
 static int worker_thread(void *__worker);
-static unsigned int work_cpu(struct work_struct *work);
 
 static int std_worker_pool_pri(struct worker_pool *pool)
 {
@@ -479,6 +478,15 @@ static int worker_pool_assign_id(struct worker_pool *pool)
 	return ret;
 }
 
+/*
+ * Lookup worker_pool by id.  The idr currently is built during boot and
+ * never modified.  Don't worry about locking for now.
+ */
+static struct worker_pool *worker_pool_by_id(int pool_id)
+{
+	return idr_find(&worker_pool_idr, pool_id);
+}
+
 static atomic_t *get_pool_nr_running(struct worker_pool *pool)
 {
 	int cpu = pool->gcwq->cpu;
@@ -520,17 +528,17 @@ static int work_next_color(int color)
 /*
  * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data
  * contain the pointer to the queued cwq.  Once execution starts, the flag
- * is cleared and the high bits contain OFFQ flags and CPU number.
+ * is cleared and the high bits contain OFFQ flags and pool ID.
  *
- * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling()
- * and clear_work_data() can be used to set the cwq, cpu or clear
+ * set_work_cwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
+ * and clear_work_data() can be used to set the cwq, pool or clear
  * work->data.  These functions should only be called while the work is
  * owned - ie. while the PENDING bit is set.
  *
- * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to
- * a work.  gcwq is available once the work has been queued anywhere after
- * initialization until it is sync canceled.  cwq is available only while
- * the work item is queued.
+ * get_work_pool() and get_work_cwq() can be used to obtain the pool or cwq
+ * corresponding to a work.  Pool is available once the work has been
+ * queued anywhere after initialization until it is sync canceled.  cwq is
+ * available only while the work item is queued.
  *
  * %WORK_OFFQ_CANCELING is used to mark a work item which is being
  * canceled.  While being canceled, a work item may have its PENDING set
@@ -552,8 +560,8 @@ static void set_work_cwq(struct work_struct *work,
 		      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
 }
 
-static void set_work_cpu_and_clear_pending(struct work_struct *work,
-					   unsigned int cpu)
+static void set_work_pool_and_clear_pending(struct work_struct *work,
+					    int pool_id)
 {
 	/*
 	 * The following wmb is paired with the implied mb in
@@ -562,13 +570,13 @@ static void set_work_cpu_and_clear_pending(struct work_struct *work,
 	 * owner.
 	 */
 	smp_wmb();
-	set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0);
+	set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
 }
 
 static void clear_work_data(struct work_struct *work)
 {
-	smp_wmb();	/* see set_work_cpu_and_clear_pending() */
-	set_work_data(work, WORK_STRUCT_NO_CPU, 0);
+	smp_wmb();	/* see set_work_pool_and_clear_pending() */
+	set_work_data(work, WORK_STRUCT_NO_POOL, 0);
 }
 
 static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
@@ -581,30 +589,58 @@ static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
 		return NULL;
 }
 
-static struct global_cwq *get_work_gcwq(struct work_struct *work)
+/**
+ * get_work_pool - return the worker_pool a given work was associated with
+ * @work: the work item of interest
+ *
+ * Return the worker_pool @work was last associated with.  %NULL if none.
+ */
+static struct worker_pool *get_work_pool(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
-	unsigned int cpu;
+	struct worker_pool *pool;
+	int pool_id;
 
 	if (data & WORK_STRUCT_CWQ)
 		return ((struct cpu_workqueue_struct *)
-			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq;
+			(data & WORK_STRUCT_WQ_DATA_MASK))->pool;
 
-	cpu = data >> WORK_OFFQ_CPU_SHIFT;
-	if (cpu == WORK_OFFQ_CPU_NONE)
+	pool_id = data >> WORK_OFFQ_POOL_SHIFT;
+	if (pool_id == WORK_OFFQ_POOL_NONE)
 		return NULL;
 
-	BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
-	return get_gcwq(cpu);
+	pool = worker_pool_by_id(pool_id);
+	WARN_ON_ONCE(!pool);
+	return pool;
+}
+
+/**
+ * get_work_pool_id - return the worker pool ID a given work is associated with
+ * @work: the work item of interest
+ *
+ * Return the worker_pool ID @work was last associated with.
+ * %WORK_OFFQ_POOL_NONE if none.
+ */
+static int get_work_pool_id(struct work_struct *work)
+{
+	struct worker_pool *pool = get_work_pool(work);
+
+	return pool ? pool->id : WORK_OFFQ_POOL_NONE;
+}
+
+static struct global_cwq *get_work_gcwq(struct work_struct *work)
+{
+	struct worker_pool *pool = get_work_pool(work);
+
+	return pool ? pool->gcwq : NULL;
 }
 
 static void mark_work_canceling(struct work_struct *work)
 {
-	struct global_cwq *gcwq = get_work_gcwq(work);
-	unsigned long cpu = gcwq ? gcwq->cpu : WORK_OFFQ_CPU_NONE;
+	unsigned long pool_id = get_work_pool_id(work);
 
-	set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
-		      WORK_STRUCT_PENDING);
+	pool_id <<= WORK_OFFQ_POOL_SHIFT;
+	set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
 }
 
 static bool work_is_canceling(struct work_struct *work)
@@ -2192,12 +2228,12 @@ __acquires(&gcwq->lock)
 		wake_up_worker(pool);
 
 	/*
-	 * Record the last CPU and clear PENDING which should be the last
+	 * Record the last pool and clear PENDING which should be the last
 	 * update to @work.  Also, do this inside @gcwq->lock so that
 	 * PENDING and queued state changes happen together while IRQ is
 	 * disabled.
 	 */
-	set_work_cpu_and_clear_pending(work, gcwq->cpu);
+	set_work_pool_and_clear_pending(work, pool->id);
 
 	spin_unlock_irq(&gcwq->lock);
 
@@ -2967,7 +3003,8 @@ bool cancel_delayed_work(struct delayed_work *dwork)
 	if (unlikely(ret < 0))
 		return false;
 
-	set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work));
+	set_work_pool_and_clear_pending(&dwork->work,
+					get_work_pool_id(&dwork->work));
 	local_irq_restore(flags);
 	return ret;
 }
@@ -3430,20 +3467,6 @@ bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(workqueue_congested);
 
-/**
- * work_cpu - return the last known associated cpu for @work
- * @work: the work of interest
- *
- * RETURNS:
- * CPU number if @work was ever queued.  WORK_CPU_NONE otherwise.
- */
-static unsigned int work_cpu(struct work_struct *work)
-{
-	struct global_cwq *gcwq = get_work_gcwq(work);
-
-	return gcwq ? gcwq->cpu : WORK_CPU_NONE;
-}
-
 /**
  * work_busy - test whether a work is currently pending or running
  * @work: the work to be tested
@@ -3816,9 +3839,9 @@ static int __init init_workqueues(void)
 {
 	unsigned int cpu;
 
-	/* make sure we have enough bits for OFFQ CPU number */
-	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) <
-		     WORK_CPU_LAST);
+	/* make sure we have enough bits for OFFQ pool ID */
+	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
+		     WORK_CPU_LAST * NR_STD_WORKER_POOLS);
 
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
 	hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);

From c9e7cf273fa1876dee8effdb201a6f65eefab3a7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: [PATCH 15/37] workqueue: move busy_hash from global_cwq to
 worker_pool

There's no functional necessity for the two pools on the same CPU to
share the busy hash table.  It's also likely to be a bottleneck when
implementing pools with user-specified attributes.

This patch makes busy_hash per-pool.  The conversion is mostly
straight-forward.  Changes worth noting are,

* Large block of changes in rebind_workers() is moving the block
  inside for_each_worker_pool() as now there are separate hash tables
  for each pool.  This changes the order of operations but doesn't
  break anything.

* Thre for_each_worker_pool() loops in gcwq_unbind_fn() are combined
  into one.  This again changes the order of operaitons but doesn't
  break anything.

This is part of an effort to remove global_cwq and make worker_pool
the top level abstraction, which in turn will help implementing worker
pools with user-specified attributes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 109 ++++++++++++++++++++++++---------------------
 1 file changed, 58 insertions(+), 51 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a4d7e3f0a874..99c30116d291 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -137,6 +137,10 @@ struct worker_pool {
 	struct timer_list	idle_timer;	/* L: worker idle timeout */
 	struct timer_list	mayday_timer;	/* L: SOS timer for workers */
 
+	/* workers are chained either in busy_hash or idle_list */
+	DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
+						/* L: hash of busy workers */
+
 	struct mutex		assoc_mutex;	/* protect POOL_DISASSOCIATED */
 	struct ida		worker_ida;	/* L: for worker IDs */
 };
@@ -150,10 +154,6 @@ struct global_cwq {
 	spinlock_t		lock;		/* the gcwq lock */
 	unsigned int		cpu;		/* I: the associated cpu */
 
-	/* workers are chained either in busy_hash or pool idle_list */
-	DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
-						/* L: hash of busy workers */
-
 	struct worker_pool	pools[NR_STD_WORKER_POOLS];
 						/* normal and highpri pools */
 } ____cacheline_aligned_in_smp;
@@ -255,8 +255,8 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 	for ((pool) = &(gcwq)->pools[0];				\
 	     (pool) < &(gcwq)->pools[NR_STD_WORKER_POOLS]; (pool)++)
 
-#define for_each_busy_worker(worker, i, pos, gcwq)			\
-	hash_for_each(gcwq->busy_hash, i, pos, worker, hentry)
+#define for_each_busy_worker(worker, i, pos, pool)			\
+	hash_for_each(pool->busy_hash, i, pos, worker, hentry)
 
 static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
 				  unsigned int sw)
@@ -892,11 +892,11 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
 
 /**
  * find_worker_executing_work - find worker which is executing a work
- * @gcwq: gcwq of interest
+ * @pool: pool of interest
  * @work: work to find worker for
  *
- * Find a worker which is executing @work on @gcwq by searching
- * @gcwq->busy_hash which is keyed by the address of @work.  For a worker
+ * Find a worker which is executing @work on @pool by searching
+ * @pool->busy_hash which is keyed by the address of @work.  For a worker
  * to match, its current execution should match the address of @work and
  * its work function.  This is to avoid unwanted dependency between
  * unrelated work executions through a work item being recycled while still
@@ -924,13 +924,13 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
  * Pointer to worker which is executing @work if found, NULL
  * otherwise.
  */
-static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
+static struct worker *find_worker_executing_work(struct worker_pool *pool,
 						 struct work_struct *work)
 {
 	struct worker *worker;
 	struct hlist_node *tmp;
 
-	hash_for_each_possible(gcwq->busy_hash, worker, tmp, hentry,
+	hash_for_each_possible(pool->busy_hash, worker, tmp, hentry,
 			       (unsigned long)work)
 		if (worker->current_work == work &&
 		    worker->current_func == work->func)
@@ -1191,13 +1191,15 @@ static bool is_chained_work(struct workqueue_struct *wq)
 	unsigned int cpu;
 
 	for_each_gcwq_cpu(cpu) {
-		struct global_cwq *gcwq = get_gcwq(cpu);
+		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+		struct worker_pool *pool = cwq->pool;
+		struct global_cwq *gcwq = pool->gcwq;
 		struct worker *worker;
 		struct hlist_node *pos;
 		int i;
 
 		spin_lock_irqsave(&gcwq->lock, flags);
-		for_each_busy_worker(worker, i, pos, gcwq) {
+		for_each_busy_worker(worker, i, pos, pool) {
 			if (worker->task != current)
 				continue;
 			spin_unlock_irqrestore(&gcwq->lock, flags);
@@ -1238,7 +1240,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	/* determine gcwq to use */
 	if (!(wq->flags & WQ_UNBOUND)) {
-		struct global_cwq *last_gcwq;
+		struct worker_pool *last_pool;
 
 		if (cpu == WORK_CPU_UNBOUND)
 			cpu = raw_smp_processor_id();
@@ -1250,14 +1252,15 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		 * non-reentrancy.
 		 */
 		gcwq = get_gcwq(cpu);
-		last_gcwq = get_work_gcwq(work);
+		last_pool = get_work_pool(work);
 
-		if (last_gcwq && last_gcwq != gcwq) {
+		if (last_pool && last_pool->gcwq != gcwq) {
+			struct global_cwq *last_gcwq = last_pool->gcwq;
 			struct worker *worker;
 
 			spin_lock(&last_gcwq->lock);
 
-			worker = find_worker_executing_work(last_gcwq, work);
+			worker = find_worker_executing_work(last_pool, work);
 
 			if (worker && worker->current_cwq->wq == wq)
 				gcwq = last_gcwq;
@@ -1722,31 +1725,32 @@ static void rebind_workers(struct global_cwq *gcwq)
 			 */
 			wake_up_process(worker->task);
 		}
-	}
 
-	/* rebind busy workers */
-	for_each_busy_worker(worker, i, pos, gcwq) {
-		struct work_struct *rebind_work = &worker->rebind_work;
-		struct workqueue_struct *wq;
+		/* rebind busy workers */
+		for_each_busy_worker(worker, i, pos, pool) {
+			struct work_struct *rebind_work = &worker->rebind_work;
+			struct workqueue_struct *wq;
 
-		if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
-				     work_data_bits(rebind_work)))
-			continue;
+			if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
+					     work_data_bits(rebind_work)))
+				continue;
 
-		debug_work_activate(rebind_work);
+			debug_work_activate(rebind_work);
 
-		/*
-		 * wq doesn't really matter but let's keep @worker->pool
-		 * and @cwq->pool consistent for sanity.
-		 */
-		if (std_worker_pool_pri(worker->pool))
-			wq = system_highpri_wq;
-		else
-			wq = system_wq;
+			/*
+			 * wq doesn't really matter but let's keep
+			 * @worker->pool and @cwq->pool consistent for
+			 * sanity.
+			 */
+			if (std_worker_pool_pri(worker->pool))
+				wq = system_highpri_wq;
+			else
+				wq = system_wq;
 
-		insert_work(get_cwq(gcwq->cpu, wq), rebind_work,
-			worker->scheduled.next,
-			work_color_to_flags(WORK_NO_COLOR));
+			insert_work(get_cwq(gcwq->cpu, wq), rebind_work,
+				    worker->scheduled.next,
+				    work_color_to_flags(WORK_NO_COLOR));
+		}
 	}
 }
 
@@ -2197,7 +2201,7 @@ __acquires(&gcwq->lock)
 	 * already processing the work.  If so, defer the work to the
 	 * currently executing one.
 	 */
-	collision = find_worker_executing_work(gcwq, work);
+	collision = find_worker_executing_work(pool, work);
 	if (unlikely(collision)) {
 		move_linked_works(work, &collision->scheduled, NULL);
 		return;
@@ -2205,7 +2209,7 @@ __acquires(&gcwq->lock)
 
 	/* claim and dequeue */
 	debug_work_deactivate(work);
-	hash_add(gcwq->busy_hash, &worker->hentry, (unsigned long)work);
+	hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
 	worker->current_work = work;
 	worker->current_func = work->func;
 	worker->current_cwq = cwq;
@@ -2833,13 +2837,15 @@ EXPORT_SYMBOL_GPL(drain_workqueue);
 static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 {
 	struct worker *worker = NULL;
+	struct worker_pool *pool;
 	struct global_cwq *gcwq;
 	struct cpu_workqueue_struct *cwq;
 
 	might_sleep();
-	gcwq = get_work_gcwq(work);
-	if (!gcwq)
+	pool = get_work_pool(work);
+	if (!pool)
 		return false;
+	gcwq = pool->gcwq;
 
 	spin_lock_irq(&gcwq->lock);
 	if (!list_empty(&work->entry)) {
@@ -2853,7 +2859,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 		if (unlikely(!cwq || gcwq != cwq->pool->gcwq))
 			goto already_gone;
 	} else {
-		worker = find_worker_executing_work(gcwq, work);
+		worker = find_worker_executing_work(pool, work);
 		if (!worker)
 			goto already_gone;
 		cwq = worker->current_cwq;
@@ -3482,18 +3488,20 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
  */
 unsigned int work_busy(struct work_struct *work)
 {
-	struct global_cwq *gcwq = get_work_gcwq(work);
+	struct worker_pool *pool = get_work_pool(work);
+	struct global_cwq *gcwq;
 	unsigned long flags;
 	unsigned int ret = 0;
 
-	if (!gcwq)
+	if (!pool)
 		return 0;
+	gcwq = pool->gcwq;
 
 	spin_lock_irqsave(&gcwq->lock, flags);
 
 	if (work_pending(work))
 		ret |= WORK_BUSY_PENDING;
-	if (find_worker_executing_work(gcwq, work))
+	if (find_worker_executing_work(pool, work))
 		ret |= WORK_BUSY_RUNNING;
 
 	spin_unlock_irqrestore(&gcwq->lock, flags);
@@ -3555,15 +3563,15 @@ static void gcwq_unbind_fn(struct work_struct *work)
 	 * ones which are still executing works from before the last CPU
 	 * down must be on the cpu.  After this, they may become diasporas.
 	 */
-	for_each_worker_pool(pool, gcwq)
+	for_each_worker_pool(pool, gcwq) {
 		list_for_each_entry(worker, &pool->idle_list, entry)
 			worker->flags |= WORKER_UNBOUND;
 
-	for_each_busy_worker(worker, i, pos, gcwq)
-		worker->flags |= WORKER_UNBOUND;
+		for_each_busy_worker(worker, i, pos, pool)
+			worker->flags |= WORKER_UNBOUND;
 
-	for_each_worker_pool(pool, gcwq)
 		pool->flags |= POOL_DISASSOCIATED;
+	}
 
 	gcwq_release_assoc_and_unlock(gcwq);
 
@@ -3854,13 +3862,12 @@ static int __init init_workqueues(void)
 		spin_lock_init(&gcwq->lock);
 		gcwq->cpu = cpu;
 
-		hash_init(gcwq->busy_hash);
-
 		for_each_worker_pool(pool, gcwq) {
 			pool->gcwq = gcwq;
 			pool->flags |= POOL_DISASSOCIATED;
 			INIT_LIST_HEAD(&pool->worklist);
 			INIT_LIST_HEAD(&pool->idle_list);
+			hash_init(pool->busy_hash);
 
 			init_timer_deferrable(&pool->idle_timer);
 			pool->idle_timer.function = idle_worker_timeout;

From ec22ca5eab0bd225588c69ccd06b16504cb05adf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: [PATCH 16/37] workqueue: move global_cwq->cpu to worker_pool

Move gcwq->cpu to pool->cpu.  This introduces a couple places where
gcwq->pools[0].cpu is used.  These will soon go away as gcwq is
further reduced.

This is part of an effort to remove global_cwq and make worker_pool
the top level abstraction, which in turn will help implementing worker
pools with user-specified attributes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/trace/events/workqueue.h |  2 +-
 kernel/workqueue.c               | 42 ++++++++++++++++----------------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index f28d1b65f178..4e798e384a6a 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -54,7 +54,7 @@ TRACE_EVENT(workqueue_queue_work,
 		__entry->function	= work->func;
 		__entry->workqueue	= cwq->wq;
 		__entry->req_cpu	= req_cpu;
-		__entry->cpu		= cwq->pool->gcwq->cpu;
+		__entry->cpu		= cwq->pool->cpu;
 	),
 
 	TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u",
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 99c30116d291..366132bd226f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -124,6 +124,7 @@ enum {
 
 struct worker_pool {
 	struct global_cwq	*gcwq;		/* I: the owning gcwq */
+	unsigned int		cpu;		/* I: the associated cpu */
 	int			id;		/* I: pool ID */
 	unsigned int		flags;		/* X: flags */
 
@@ -152,7 +153,6 @@ struct worker_pool {
  */
 struct global_cwq {
 	spinlock_t		lock;		/* the gcwq lock */
-	unsigned int		cpu;		/* I: the associated cpu */
 
 	struct worker_pool	pools[NR_STD_WORKER_POOLS];
 						/* normal and highpri pools */
@@ -489,7 +489,7 @@ static struct worker_pool *worker_pool_by_id(int pool_id)
 
 static atomic_t *get_pool_nr_running(struct worker_pool *pool)
 {
-	int cpu = pool->gcwq->cpu;
+	int cpu = pool->cpu;
 	int idx = std_worker_pool_pri(pool);
 
 	if (cpu != WORK_CPU_UNBOUND)
@@ -764,7 +764,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
 	struct worker *worker = kthread_data(task);
 
 	if (!(worker->flags & WORKER_NOT_RUNNING)) {
-		WARN_ON_ONCE(worker->pool->gcwq->cpu != cpu);
+		WARN_ON_ONCE(worker->pool->cpu != cpu);
 		atomic_inc(get_pool_nr_running(worker->pool));
 	}
 }
@@ -1278,7 +1278,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	}
 
 	/* gcwq determined, get cwq and queue */
-	cwq = get_cwq(gcwq->cpu, wq);
+	cwq = get_cwq(gcwq->pools[0].cpu, wq);
 	trace_workqueue_queue_work(req_cpu, cwq, work);
 
 	if (WARN_ON(!list_empty(&work->entry))) {
@@ -1385,20 +1385,20 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 
 	/*
 	 * This stores cwq for the moment, for the timer_fn.  Note that the
-	 * work's gcwq is preserved to allow reentrance detection for
+	 * work's pool is preserved to allow reentrance detection for
 	 * delayed works.
 	 */
 	if (!(wq->flags & WQ_UNBOUND)) {
-		struct global_cwq *gcwq = get_work_gcwq(work);
+		struct worker_pool *pool = get_work_pool(work);
 
 		/*
-		 * If we cannot get the last gcwq from @work directly,
+		 * If we cannot get the last pool from @work directly,
 		 * select the last CPU such that it avoids unnecessarily
 		 * triggering non-reentrancy check in __queue_work().
 		 */
 		lcpu = cpu;
-		if (gcwq)
-			lcpu = gcwq->cpu;
+		if (pool)
+			lcpu = pool->cpu;
 		if (lcpu == WORK_CPU_UNBOUND)
 			lcpu = raw_smp_processor_id();
 	} else {
@@ -1619,14 +1619,14 @@ __acquires(&gcwq->lock)
 		 * against POOL_DISASSOCIATED.
 		 */
 		if (!(pool->flags & POOL_DISASSOCIATED))
-			set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu));
+			set_cpus_allowed_ptr(task, get_cpu_mask(pool->cpu));
 
 		spin_lock_irq(&gcwq->lock);
 		if (pool->flags & POOL_DISASSOCIATED)
 			return false;
-		if (task_cpu(task) == gcwq->cpu &&
+		if (task_cpu(task) == pool->cpu &&
 		    cpumask_equal(&current->cpus_allowed,
-				  get_cpu_mask(gcwq->cpu)))
+				  get_cpu_mask(pool->cpu)))
 			return true;
 		spin_unlock_irq(&gcwq->lock);
 
@@ -1747,7 +1747,7 @@ static void rebind_workers(struct global_cwq *gcwq)
 			else
 				wq = system_wq;
 
-			insert_work(get_cwq(gcwq->cpu, wq), rebind_work,
+			insert_work(get_cwq(pool->cpu, wq), rebind_work,
 				    worker->scheduled.next,
 				    work_color_to_flags(WORK_NO_COLOR));
 		}
@@ -1806,10 +1806,10 @@ static struct worker *create_worker(struct worker_pool *pool)
 	worker->pool = pool;
 	worker->id = id;
 
-	if (gcwq->cpu != WORK_CPU_UNBOUND)
+	if (pool->cpu != WORK_CPU_UNBOUND)
 		worker->task = kthread_create_on_node(worker_thread,
-					worker, cpu_to_node(gcwq->cpu),
-					"kworker/%u:%d%s", gcwq->cpu, id, pri);
+					worker, cpu_to_node(pool->cpu),
+					"kworker/%u:%d%s", pool->cpu, id, pri);
 	else
 		worker->task = kthread_create(worker_thread, worker,
 					      "kworker/u:%d%s", id, pri);
@@ -1829,7 +1829,7 @@ static struct worker *create_worker(struct worker_pool *pool)
 	 * online, make sure every worker has %PF_THREAD_BOUND set.
 	 */
 	if (!(pool->flags & POOL_DISASSOCIATED)) {
-		kthread_bind(worker->task, gcwq->cpu);
+		kthread_bind(worker->task, pool->cpu);
 	} else {
 		worker->task->flags |= PF_THREAD_BOUND;
 		worker->flags |= WORKER_UNBOUND;
@@ -1936,7 +1936,7 @@ static bool send_mayday(struct work_struct *work)
 		return false;
 
 	/* mayday mayday mayday */
-	cpu = cwq->pool->gcwq->cpu;
+	cpu = cwq->pool->cpu;
 	/* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
 	if (cpu == WORK_CPU_UNBOUND)
 		cpu = 0;
@@ -2193,7 +2193,7 @@ __acquires(&gcwq->lock)
 	 */
 	WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) &&
 		     !(pool->flags & POOL_DISASSOCIATED) &&
-		     raw_smp_processor_id() != gcwq->cpu);
+		     raw_smp_processor_id() != pool->cpu);
 
 	/*
 	 * A single work shouldn't be executed concurrently by
@@ -3553,7 +3553,7 @@ static void gcwq_unbind_fn(struct work_struct *work)
 	struct hlist_node *pos;
 	int i;
 
-	BUG_ON(gcwq->cpu != smp_processor_id());
+	BUG_ON(gcwq->pools[0].cpu != smp_processor_id());
 
 	gcwq_claim_assoc_and_lock(gcwq);
 
@@ -3860,10 +3860,10 @@ static int __init init_workqueues(void)
 		struct worker_pool *pool;
 
 		spin_lock_init(&gcwq->lock);
-		gcwq->cpu = cpu;
 
 		for_each_worker_pool(pool, gcwq) {
 			pool->gcwq = gcwq;
+			pool->cpu = cpu;
 			pool->flags |= POOL_DISASSOCIATED;
 			INIT_LIST_HEAD(&pool->worklist);
 			INIT_LIST_HEAD(&pool->idle_list);

From d565ed6309300304de4a865a04adef07a85edc45 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: [PATCH 17/37] workqueue: move global_cwq->lock to worker_pool

Move gcwq->lock to pool->lock.  The conversion is mostly
straight-forward.  Things worth noting are

* In many places, this removes the need to use gcwq completely.  pool
  is used directly instead.  get_std_worker_pool() is added to help
  some of these conversions.  This also leaves get_work_gcwq() without
  any user.  Removed.

* In hotplug and freezer paths, the pools belonging to a CPU are often
  processed together.  This patch makes those paths hold locks of all
  pools, with highpri lock nested inside, to keep the conversion
  straight-forward.  These nested lockings will be removed by
  following patches.

This is part of an effort to remove global_cwq and make worker_pool
the top level abstraction, which in turn will help implementing worker
pools with user-specified attributes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 316 ++++++++++++++++++++++-----------------------
 1 file changed, 154 insertions(+), 162 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 366132bd226f..c93651208760 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -108,12 +108,12 @@ enum {
  * P: Preemption protected.  Disabling preemption is enough and should
  *    only be modified and accessed from the local cpu.
  *
- * L: gcwq->lock protected.  Access with gcwq->lock held.
+ * L: pool->lock protected.  Access with pool->lock held.
  *
- * X: During normal operation, modification requires gcwq->lock and
- *    should be done only from local cpu.  Either disabling preemption
- *    on local cpu or grabbing gcwq->lock is enough for read access.
- *    If POOL_DISASSOCIATED is set, it's identical to L.
+ * X: During normal operation, modification requires pool->lock and should
+ *    be done only from local cpu.  Either disabling preemption on local
+ *    cpu or grabbing pool->lock is enough for read access.  If
+ *    POOL_DISASSOCIATED is set, it's identical to L.
  *
  * F: wq->flush_mutex protected.
  *
@@ -124,6 +124,7 @@ enum {
 
 struct worker_pool {
 	struct global_cwq	*gcwq;		/* I: the owning gcwq */
+	spinlock_t		lock;		/* the pool lock */
 	unsigned int		cpu;		/* I: the associated cpu */
 	int			id;		/* I: pool ID */
 	unsigned int		flags;		/* X: flags */
@@ -152,8 +153,6 @@ struct worker_pool {
  * target workqueues.
  */
 struct global_cwq {
-	spinlock_t		lock;		/* the gcwq lock */
-
 	struct worker_pool	pools[NR_STD_WORKER_POOLS];
 						/* normal and highpri pools */
 } ____cacheline_aligned_in_smp;
@@ -487,6 +486,13 @@ static struct worker_pool *worker_pool_by_id(int pool_id)
 	return idr_find(&worker_pool_idr, pool_id);
 }
 
+static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
+{
+	struct global_cwq *gcwq = get_gcwq(cpu);
+
+	return &gcwq->pools[highpri];
+}
+
 static atomic_t *get_pool_nr_running(struct worker_pool *pool)
 {
 	int cpu = pool->cpu;
@@ -628,13 +634,6 @@ static int get_work_pool_id(struct work_struct *work)
 	return pool ? pool->id : WORK_OFFQ_POOL_NONE;
 }
 
-static struct global_cwq *get_work_gcwq(struct work_struct *work)
-{
-	struct worker_pool *pool = get_work_pool(work);
-
-	return pool ? pool->gcwq : NULL;
-}
-
 static void mark_work_canceling(struct work_struct *work)
 {
 	unsigned long pool_id = get_work_pool_id(work);
@@ -653,7 +652,7 @@ static bool work_is_canceling(struct work_struct *work)
 /*
  * Policy functions.  These define the policies on how the global worker
  * pools are managed.  Unless noted otherwise, these functions assume that
- * they're being called with gcwq->lock held.
+ * they're being called with pool->lock held.
  */
 
 static bool __need_more_worker(struct worker_pool *pool)
@@ -738,7 +737,7 @@ static struct worker *first_worker(struct worker_pool *pool)
  * Wake up the first idle worker of @pool.
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock).
+ * spin_lock_irq(pool->lock).
  */
 static void wake_up_worker(struct worker_pool *pool)
 {
@@ -813,7 +812,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
 	 * NOT_RUNNING is clear.  This means that we're bound to and
 	 * running on the local cpu w/ rq lock held and preemption
 	 * disabled, which in turn means that none else could be
-	 * manipulating idle_list, so dereferencing idle_list without gcwq
+	 * manipulating idle_list, so dereferencing idle_list without pool
 	 * lock is safe.
 	 */
 	if (atomic_dec_and_test(nr_running) && !list_empty(&pool->worklist))
@@ -832,7 +831,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
  * woken up.
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock)
+ * spin_lock_irq(pool->lock)
  */
 static inline void worker_set_flags(struct worker *worker, unsigned int flags,
 				    bool wakeup)
@@ -869,7 +868,7 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags,
  * Clear @flags in @worker->flags and adjust nr_running accordingly.
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock)
+ * spin_lock_irq(pool->lock)
  */
 static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
 {
@@ -918,7 +917,7 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
  * function.
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock).
+ * spin_lock_irq(pool->lock).
  *
  * RETURNS:
  * Pointer to worker which is executing @work if found, NULL
@@ -954,7 +953,7 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool,
  * nested inside outer list_for_each_entry_safe().
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock).
+ * spin_lock_irq(pool->lock).
  */
 static void move_linked_works(struct work_struct *work, struct list_head *head,
 			      struct work_struct **nextp)
@@ -1007,7 +1006,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
  * decrement nr_in_flight of its cwq and handle workqueue flushing.
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock).
+ * spin_lock_irq(pool->lock).
  */
 static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
 {
@@ -1071,7 +1070,7 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
 static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 			       unsigned long *flags)
 {
-	struct global_cwq *gcwq;
+	struct worker_pool *pool;
 
 	local_irq_save(*flags);
 
@@ -1096,19 +1095,19 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 	 * The queueing is in progress, or it is already queued. Try to
 	 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
 	 */
-	gcwq = get_work_gcwq(work);
-	if (!gcwq)
+	pool = get_work_pool(work);
+	if (!pool)
 		goto fail;
 
-	spin_lock(&gcwq->lock);
+	spin_lock(&pool->lock);
 	if (!list_empty(&work->entry)) {
 		/*
-		 * This work is queued, but perhaps we locked the wrong gcwq.
-		 * In that case we must see the new value after rmb(), see
-		 * insert_work()->wmb().
+		 * This work is queued, but perhaps we locked the wrong
+		 * pool.  In that case we must see the new value after
+		 * rmb(), see insert_work()->wmb().
 		 */
 		smp_rmb();
-		if (gcwq == get_work_gcwq(work)) {
+		if (pool == get_work_pool(work)) {
 			debug_work_deactivate(work);
 
 			/*
@@ -1126,11 +1125,11 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 			cwq_dec_nr_in_flight(get_work_cwq(work),
 				get_work_color(work));
 
-			spin_unlock(&gcwq->lock);
+			spin_unlock(&pool->lock);
 			return 1;
 		}
 	}
-	spin_unlock(&gcwq->lock);
+	spin_unlock(&pool->lock);
 fail:
 	local_irq_restore(*flags);
 	if (work_is_canceling(work))
@@ -1150,7 +1149,7 @@ fail:
  * @extra_flags is or'd to work_struct flags.
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock).
+ * spin_lock_irq(pool->lock).
  */
 static void insert_work(struct cpu_workqueue_struct *cwq,
 			struct work_struct *work, struct list_head *head,
@@ -1193,23 +1192,22 @@ static bool is_chained_work(struct workqueue_struct *wq)
 	for_each_gcwq_cpu(cpu) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		struct worker_pool *pool = cwq->pool;
-		struct global_cwq *gcwq = pool->gcwq;
 		struct worker *worker;
 		struct hlist_node *pos;
 		int i;
 
-		spin_lock_irqsave(&gcwq->lock, flags);
+		spin_lock_irqsave(&pool->lock, flags);
 		for_each_busy_worker(worker, i, pos, pool) {
 			if (worker->task != current)
 				continue;
-			spin_unlock_irqrestore(&gcwq->lock, flags);
+			spin_unlock_irqrestore(&pool->lock, flags);
 			/*
 			 * I'm @worker, no locking necessary.  See if @work
 			 * is headed to the same workqueue.
 			 */
 			return worker->current_cwq->wq == wq;
 		}
-		spin_unlock_irqrestore(&gcwq->lock, flags);
+		spin_unlock_irqrestore(&pool->lock, flags);
 	}
 	return false;
 }
@@ -1217,7 +1215,8 @@ static bool is_chained_work(struct workqueue_struct *wq)
 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
-	struct global_cwq *gcwq;
+	bool highpri = wq->flags & WQ_HIGHPRI;
+	struct worker_pool *pool;
 	struct cpu_workqueue_struct *cwq;
 	struct list_head *worklist;
 	unsigned int work_flags;
@@ -1238,7 +1237,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
 
-	/* determine gcwq to use */
+	/* determine pool to use */
 	if (!(wq->flags & WQ_UNBOUND)) {
 		struct worker_pool *last_pool;
 
@@ -1251,38 +1250,37 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		 * work needs to be queued on that cpu to guarantee
 		 * non-reentrancy.
 		 */
-		gcwq = get_gcwq(cpu);
+		pool = get_std_worker_pool(cpu, highpri);
 		last_pool = get_work_pool(work);
 
-		if (last_pool && last_pool->gcwq != gcwq) {
-			struct global_cwq *last_gcwq = last_pool->gcwq;
+		if (last_pool && last_pool != pool) {
 			struct worker *worker;
 
-			spin_lock(&last_gcwq->lock);
+			spin_lock(&last_pool->lock);
 
 			worker = find_worker_executing_work(last_pool, work);
 
 			if (worker && worker->current_cwq->wq == wq)
-				gcwq = last_gcwq;
+				pool = last_pool;
 			else {
 				/* meh... not running there, queue here */
-				spin_unlock(&last_gcwq->lock);
-				spin_lock(&gcwq->lock);
+				spin_unlock(&last_pool->lock);
+				spin_lock(&pool->lock);
 			}
 		} else {
-			spin_lock(&gcwq->lock);
+			spin_lock(&pool->lock);
 		}
 	} else {
-		gcwq = get_gcwq(WORK_CPU_UNBOUND);
-		spin_lock(&gcwq->lock);
+		pool = get_std_worker_pool(WORK_CPU_UNBOUND, highpri);
+		spin_lock(&pool->lock);
 	}
 
-	/* gcwq determined, get cwq and queue */
-	cwq = get_cwq(gcwq->pools[0].cpu, wq);
+	/* pool determined, get cwq and queue */
+	cwq = get_cwq(pool->cpu, wq);
 	trace_workqueue_queue_work(req_cpu, cwq, work);
 
 	if (WARN_ON(!list_empty(&work->entry))) {
-		spin_unlock(&gcwq->lock);
+		spin_unlock(&pool->lock);
 		return;
 	}
 
@@ -1300,7 +1298,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	insert_work(cwq, work, worklist, work_flags);
 
-	spin_unlock(&gcwq->lock);
+	spin_unlock(&pool->lock);
 }
 
 /**
@@ -1523,7 +1521,7 @@ EXPORT_SYMBOL_GPL(mod_delayed_work);
  * necessary.
  *
  * LOCKING:
- * spin_lock_irq(gcwq->lock).
+ * spin_lock_irq(pool->lock).
  */
 static void worker_enter_idle(struct worker *worker)
 {
@@ -1546,7 +1544,7 @@ static void worker_enter_idle(struct worker *worker)
 
 	/*
 	 * Sanity check nr_running.  Because gcwq_unbind_fn() releases
-	 * gcwq->lock between setting %WORKER_UNBOUND and zapping
+	 * pool->lock between setting %WORKER_UNBOUND and zapping
 	 * nr_running, the warning may trigger spuriously.  Check iff
 	 * unbind is not in progress.
 	 */
@@ -1562,7 +1560,7 @@ static void worker_enter_idle(struct worker *worker)
  * @worker is leaving idle state.  Update stats.
  *
  * LOCKING:
- * spin_lock_irq(gcwq->lock).
+ * spin_lock_irq(pool->lock).
  */
 static void worker_leave_idle(struct worker *worker)
 {
@@ -1597,7 +1595,7 @@ static void worker_leave_idle(struct worker *worker)
  * guarantee the scheduling requirement described in the first paragraph.
  *
  * CONTEXT:
- * Might sleep.  Called without any lock but returns with gcwq->lock
+ * Might sleep.  Called without any lock but returns with pool->lock
  * held.
  *
  * RETURNS:
@@ -1605,10 +1603,9 @@ static void worker_leave_idle(struct worker *worker)
  * bound), %false if offline.
  */
 static bool worker_maybe_bind_and_lock(struct worker *worker)
-__acquires(&gcwq->lock)
+__acquires(&pool->lock)
 {
 	struct worker_pool *pool = worker->pool;
-	struct global_cwq *gcwq = pool->gcwq;
 	struct task_struct *task = worker->task;
 
 	while (true) {
@@ -1621,14 +1618,14 @@ __acquires(&gcwq->lock)
 		if (!(pool->flags & POOL_DISASSOCIATED))
 			set_cpus_allowed_ptr(task, get_cpu_mask(pool->cpu));
 
-		spin_lock_irq(&gcwq->lock);
+		spin_lock_irq(&pool->lock);
 		if (pool->flags & POOL_DISASSOCIATED)
 			return false;
 		if (task_cpu(task) == pool->cpu &&
 		    cpumask_equal(&current->cpus_allowed,
 				  get_cpu_mask(pool->cpu)))
 			return true;
-		spin_unlock_irq(&gcwq->lock);
+		spin_unlock_irq(&pool->lock);
 
 		/*
 		 * We've raced with CPU hot[un]plug.  Give it a breather
@@ -1647,15 +1644,13 @@ __acquires(&gcwq->lock)
  */
 static void idle_worker_rebind(struct worker *worker)
 {
-	struct global_cwq *gcwq = worker->pool->gcwq;
-
 	/* CPU may go down again inbetween, clear UNBOUND only on success */
 	if (worker_maybe_bind_and_lock(worker))
 		worker_clr_flags(worker, WORKER_UNBOUND);
 
 	/* rebind complete, become available again */
 	list_add(&worker->entry, &worker->pool->idle_list);
-	spin_unlock_irq(&gcwq->lock);
+	spin_unlock_irq(&worker->pool->lock);
 }
 
 /*
@@ -1667,12 +1662,11 @@ static void idle_worker_rebind(struct worker *worker)
 static void busy_worker_rebind_fn(struct work_struct *work)
 {
 	struct worker *worker = container_of(work, struct worker, rebind_work);
-	struct global_cwq *gcwq = worker->pool->gcwq;
 
 	if (worker_maybe_bind_and_lock(worker))
 		worker_clr_flags(worker, WORKER_UNBOUND);
 
-	spin_unlock_irq(&gcwq->lock);
+	spin_unlock_irq(&worker->pool->lock);
 }
 
 /**
@@ -1704,10 +1698,10 @@ static void rebind_workers(struct global_cwq *gcwq)
 	struct hlist_node *pos;
 	int i;
 
-	lockdep_assert_held(&gcwq->lock);
-
-	for_each_worker_pool(pool, gcwq)
+	for_each_worker_pool(pool, gcwq) {
 		lockdep_assert_held(&pool->assoc_mutex);
+		lockdep_assert_held(&pool->lock);
+	}
 
 	/* dequeue and kick idle ones */
 	for_each_worker_pool(pool, gcwq) {
@@ -1785,19 +1779,18 @@ static struct worker *alloc_worker(void)
  */
 static struct worker *create_worker(struct worker_pool *pool)
 {
-	struct global_cwq *gcwq = pool->gcwq;
 	const char *pri = std_worker_pool_pri(pool) ? "H" : "";
 	struct worker *worker = NULL;
 	int id = -1;
 
-	spin_lock_irq(&gcwq->lock);
+	spin_lock_irq(&pool->lock);
 	while (ida_get_new(&pool->worker_ida, &id)) {
-		spin_unlock_irq(&gcwq->lock);
+		spin_unlock_irq(&pool->lock);
 		if (!ida_pre_get(&pool->worker_ida, GFP_KERNEL))
 			goto fail;
-		spin_lock_irq(&gcwq->lock);
+		spin_lock_irq(&pool->lock);
 	}
-	spin_unlock_irq(&gcwq->lock);
+	spin_unlock_irq(&pool->lock);
 
 	worker = alloc_worker();
 	if (!worker)
@@ -1838,9 +1831,9 @@ static struct worker *create_worker(struct worker_pool *pool)
 	return worker;
 fail:
 	if (id >= 0) {
-		spin_lock_irq(&gcwq->lock);
+		spin_lock_irq(&pool->lock);
 		ida_remove(&pool->worker_ida, id);
-		spin_unlock_irq(&gcwq->lock);
+		spin_unlock_irq(&pool->lock);
 	}
 	kfree(worker);
 	return NULL;
@@ -1853,7 +1846,7 @@ fail:
  * Make the gcwq aware of @worker and start it.
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock).
+ * spin_lock_irq(pool->lock).
  */
 static void start_worker(struct worker *worker)
 {
@@ -1870,12 +1863,11 @@ static void start_worker(struct worker *worker)
  * Destroy @worker and adjust @gcwq stats accordingly.
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock) which is released and regrabbed.
+ * spin_lock_irq(pool->lock) which is released and regrabbed.
  */
 static void destroy_worker(struct worker *worker)
 {
 	struct worker_pool *pool = worker->pool;
-	struct global_cwq *gcwq = pool->gcwq;
 	int id = worker->id;
 
 	/* sanity check frenzy */
@@ -1890,21 +1882,20 @@ static void destroy_worker(struct worker *worker)
 	list_del_init(&worker->entry);
 	worker->flags |= WORKER_DIE;
 
-	spin_unlock_irq(&gcwq->lock);
+	spin_unlock_irq(&pool->lock);
 
 	kthread_stop(worker->task);
 	kfree(worker);
 
-	spin_lock_irq(&gcwq->lock);
+	spin_lock_irq(&pool->lock);
 	ida_remove(&pool->worker_ida, id);
 }
 
 static void idle_worker_timeout(unsigned long __pool)
 {
 	struct worker_pool *pool = (void *)__pool;
-	struct global_cwq *gcwq = pool->gcwq;
 
-	spin_lock_irq(&gcwq->lock);
+	spin_lock_irq(&pool->lock);
 
 	if (too_many_workers(pool)) {
 		struct worker *worker;
@@ -1923,7 +1914,7 @@ static void idle_worker_timeout(unsigned long __pool)
 		}
 	}
 
-	spin_unlock_irq(&gcwq->lock);
+	spin_unlock_irq(&pool->lock);
 }
 
 static bool send_mayday(struct work_struct *work)
@@ -1948,10 +1939,9 @@ static bool send_mayday(struct work_struct *work)
 static void gcwq_mayday_timeout(unsigned long __pool)
 {
 	struct worker_pool *pool = (void *)__pool;
-	struct global_cwq *gcwq = pool->gcwq;
 	struct work_struct *work;
 
-	spin_lock_irq(&gcwq->lock);
+	spin_lock_irq(&pool->lock);
 
 	if (need_to_create_worker(pool)) {
 		/*
@@ -1964,7 +1954,7 @@ static void gcwq_mayday_timeout(unsigned long __pool)
 			send_mayday(work);
 	}
 
-	spin_unlock_irq(&gcwq->lock);
+	spin_unlock_irq(&pool->lock);
 
 	mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
 }
@@ -1983,24 +1973,22 @@ static void gcwq_mayday_timeout(unsigned long __pool)
  * may_start_working() true.
  *
  * LOCKING:
- * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * spin_lock_irq(pool->lock) which may be released and regrabbed
  * multiple times.  Does GFP_KERNEL allocations.  Called only from
  * manager.
  *
  * RETURNS:
- * false if no action was taken and gcwq->lock stayed locked, true
+ * false if no action was taken and pool->lock stayed locked, true
  * otherwise.
  */
 static bool maybe_create_worker(struct worker_pool *pool)
-__releases(&gcwq->lock)
-__acquires(&gcwq->lock)
+__releases(&pool->lock)
+__acquires(&pool->lock)
 {
-	struct global_cwq *gcwq = pool->gcwq;
-
 	if (!need_to_create_worker(pool))
 		return false;
 restart:
-	spin_unlock_irq(&gcwq->lock);
+	spin_unlock_irq(&pool->lock);
 
 	/* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
 	mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
@@ -2011,7 +1999,7 @@ restart:
 		worker = create_worker(pool);
 		if (worker) {
 			del_timer_sync(&pool->mayday_timer);
-			spin_lock_irq(&gcwq->lock);
+			spin_lock_irq(&pool->lock);
 			start_worker(worker);
 			BUG_ON(need_to_create_worker(pool));
 			return true;
@@ -2028,7 +2016,7 @@ restart:
 	}
 
 	del_timer_sync(&pool->mayday_timer);
-	spin_lock_irq(&gcwq->lock);
+	spin_lock_irq(&pool->lock);
 	if (need_to_create_worker(pool))
 		goto restart;
 	return true;
@@ -2042,11 +2030,11 @@ restart:
  * IDLE_WORKER_TIMEOUT.
  *
  * LOCKING:
- * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * spin_lock_irq(pool->lock) which may be released and regrabbed
  * multiple times.  Called only from manager.
  *
  * RETURNS:
- * false if no action was taken and gcwq->lock stayed locked, true
+ * false if no action was taken and pool->lock stayed locked, true
  * otherwise.
  */
 static bool maybe_destroy_workers(struct worker_pool *pool)
@@ -2085,12 +2073,12 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
  * and may_start_working() is true.
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * spin_lock_irq(pool->lock) which may be released and regrabbed
  * multiple times.  Does GFP_KERNEL allocations.
  *
  * RETURNS:
- * false if no action was taken and gcwq->lock stayed locked, true if
- * some action was taken.
+ * spin_lock_irq(pool->lock) which may be released and regrabbed
+ * multiple times.  Does GFP_KERNEL allocations.
  */
 static bool manage_workers(struct worker *worker)
 {
@@ -2112,10 +2100,10 @@ static bool manage_workers(struct worker *worker)
 	 * manager against CPU hotplug.
 	 *
 	 * assoc_mutex would always be free unless CPU hotplug is in
-	 * progress.  trylock first without dropping @gcwq->lock.
+	 * progress.  trylock first without dropping @pool->lock.
 	 */
 	if (unlikely(!mutex_trylock(&pool->assoc_mutex))) {
-		spin_unlock_irq(&pool->gcwq->lock);
+		spin_unlock_irq(&pool->lock);
 		mutex_lock(&pool->assoc_mutex);
 		/*
 		 * CPU hotplug could have happened while we were waiting
@@ -2162,15 +2150,14 @@ static bool manage_workers(struct worker *worker)
  * call this function to process a work.
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock) which is released and regrabbed.
+ * spin_lock_irq(pool->lock) which is released and regrabbed.
  */
 static void process_one_work(struct worker *worker, struct work_struct *work)
-__releases(&gcwq->lock)
-__acquires(&gcwq->lock)
+__releases(&pool->lock)
+__acquires(&pool->lock)
 {
 	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
 	struct worker_pool *pool = worker->pool;
-	struct global_cwq *gcwq = pool->gcwq;
 	bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
 	int work_color;
 	struct worker *collision;
@@ -2225,7 +2212,7 @@ __acquires(&gcwq->lock)
 		worker_set_flags(worker, WORKER_CPU_INTENSIVE, true);
 
 	/*
-	 * Unbound gcwq isn't concurrency managed and work items should be
+	 * Unbound pool isn't concurrency managed and work items should be
 	 * executed ASAP.  Wake up another worker if necessary.
 	 */
 	if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool))
@@ -2233,13 +2220,13 @@ __acquires(&gcwq->lock)
 
 	/*
 	 * Record the last pool and clear PENDING which should be the last
-	 * update to @work.  Also, do this inside @gcwq->lock so that
+	 * update to @work.  Also, do this inside @pool->lock so that
 	 * PENDING and queued state changes happen together while IRQ is
 	 * disabled.
 	 */
 	set_work_pool_and_clear_pending(work, pool->id);
 
-	spin_unlock_irq(&gcwq->lock);
+	spin_unlock_irq(&pool->lock);
 
 	lock_map_acquire_read(&cwq->wq->lockdep_map);
 	lock_map_acquire(&lockdep_map);
@@ -2262,7 +2249,7 @@ __acquires(&gcwq->lock)
 		dump_stack();
 	}
 
-	spin_lock_irq(&gcwq->lock);
+	spin_lock_irq(&pool->lock);
 
 	/* clear cpu intensive status */
 	if (unlikely(cpu_intensive))
@@ -2285,7 +2272,7 @@ __acquires(&gcwq->lock)
  * fetches a work from the top and executes it.
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * spin_lock_irq(pool->lock) which may be released and regrabbed
  * multiple times.
  */
 static void process_scheduled_works(struct worker *worker)
@@ -2311,16 +2298,15 @@ static int worker_thread(void *__worker)
 {
 	struct worker *worker = __worker;
 	struct worker_pool *pool = worker->pool;
-	struct global_cwq *gcwq = pool->gcwq;
 
 	/* tell the scheduler that this is a workqueue worker */
 	worker->task->flags |= PF_WQ_WORKER;
 woke_up:
-	spin_lock_irq(&gcwq->lock);
+	spin_lock_irq(&pool->lock);
 
 	/* we are off idle list if destruction or rebind is requested */
 	if (unlikely(list_empty(&worker->entry))) {
-		spin_unlock_irq(&gcwq->lock);
+		spin_unlock_irq(&pool->lock);
 
 		/* if DIE is set, destruction is requested */
 		if (worker->flags & WORKER_DIE) {
@@ -2379,15 +2365,15 @@ sleep:
 		goto recheck;
 
 	/*
-	 * gcwq->lock is held and there's no work to process and no
-	 * need to manage, sleep.  Workers are woken up only while
-	 * holding gcwq->lock or from local cpu, so setting the
-	 * current state before releasing gcwq->lock is enough to
-	 * prevent losing any event.
+	 * pool->lock is held and there's no work to process and no need to
+	 * manage, sleep.  Workers are woken up only while holding
+	 * pool->lock or from local cpu, so setting the current state
+	 * before releasing pool->lock is enough to prevent losing any
+	 * event.
 	 */
 	worker_enter_idle(worker);
 	__set_current_state(TASK_INTERRUPTIBLE);
-	spin_unlock_irq(&gcwq->lock);
+	spin_unlock_irq(&pool->lock);
 	schedule();
 	goto woke_up;
 }
@@ -2443,7 +2429,6 @@ repeat:
 		unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
 		struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq);
 		struct worker_pool *pool = cwq->pool;
-		struct global_cwq *gcwq = pool->gcwq;
 		struct work_struct *work, *n;
 
 		__set_current_state(TASK_RUNNING);
@@ -2465,14 +2450,14 @@ repeat:
 		process_scheduled_works(rescuer);
 
 		/*
-		 * Leave this gcwq.  If keep_working() is %true, notify a
+		 * Leave this pool.  If keep_working() is %true, notify a
 		 * regular worker; otherwise, we end up with 0 concurrency
 		 * and stalling the execution.
 		 */
 		if (keep_working(pool))
 			wake_up_worker(pool);
 
-		spin_unlock_irq(&gcwq->lock);
+		spin_unlock_irq(&pool->lock);
 	}
 
 	/* rescuers should never participate in concurrency management */
@@ -2514,7 +2499,7 @@ static void wq_barrier_func(struct work_struct *work)
  * underneath us, so we can't reliably determine cwq from @target.
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock).
+ * spin_lock_irq(pool->lock).
  */
 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 			      struct wq_barrier *barr,
@@ -2524,7 +2509,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	unsigned int linked = 0;
 
 	/*
-	 * debugobject calls are safe here even with gcwq->lock locked
+	 * debugobject calls are safe here even with pool->lock locked
 	 * as we know for sure that this will not trigger any of the
 	 * checks and call back into the fixup functions where we
 	 * might deadlock.
@@ -2597,9 +2582,9 @@ static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
 
 	for_each_cwq_cpu(cpu, wq) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-		struct global_cwq *gcwq = cwq->pool->gcwq;
+		struct worker_pool *pool = cwq->pool;
 
-		spin_lock_irq(&gcwq->lock);
+		spin_lock_irq(&pool->lock);
 
 		if (flush_color >= 0) {
 			BUG_ON(cwq->flush_color != -1);
@@ -2616,7 +2601,7 @@ static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
 			cwq->work_color = work_color;
 		}
 
-		spin_unlock_irq(&gcwq->lock);
+		spin_unlock_irq(&pool->lock);
 	}
 
 	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
@@ -2813,9 +2798,9 @@ reflush:
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		bool drained;
 
-		spin_lock_irq(&cwq->pool->gcwq->lock);
+		spin_lock_irq(&cwq->pool->lock);
 		drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
-		spin_unlock_irq(&cwq->pool->gcwq->lock);
+		spin_unlock_irq(&cwq->pool->lock);
 
 		if (drained)
 			continue;
@@ -2838,25 +2823,23 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 {
 	struct worker *worker = NULL;
 	struct worker_pool *pool;
-	struct global_cwq *gcwq;
 	struct cpu_workqueue_struct *cwq;
 
 	might_sleep();
 	pool = get_work_pool(work);
 	if (!pool)
 		return false;
-	gcwq = pool->gcwq;
 
-	spin_lock_irq(&gcwq->lock);
+	spin_lock_irq(&pool->lock);
 	if (!list_empty(&work->entry)) {
 		/*
 		 * See the comment near try_to_grab_pending()->smp_rmb().
-		 * If it was re-queued to a different gcwq under us, we
+		 * If it was re-queued to a different pool under us, we
 		 * are not going to wait.
 		 */
 		smp_rmb();
 		cwq = get_work_cwq(work);
-		if (unlikely(!cwq || gcwq != cwq->pool->gcwq))
+		if (unlikely(!cwq || pool != cwq->pool))
 			goto already_gone;
 	} else {
 		worker = find_worker_executing_work(pool, work);
@@ -2866,7 +2849,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 	}
 
 	insert_wq_barrier(cwq, barr, work, worker);
-	spin_unlock_irq(&gcwq->lock);
+	spin_unlock_irq(&pool->lock);
 
 	/*
 	 * If @max_active is 1 or rescuer is in use, flushing another work
@@ -2882,7 +2865,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 
 	return true;
 already_gone:
-	spin_unlock_irq(&gcwq->lock);
+	spin_unlock_irq(&pool->lock);
 	return false;
 }
 
@@ -3404,7 +3387,7 @@ EXPORT_SYMBOL_GPL(destroy_workqueue);
  * increased.
  *
  * CONTEXT:
- * spin_lock_irq(gcwq->lock).
+ * spin_lock_irq(pool->lock).
  */
 static void cwq_set_max_active(struct cpu_workqueue_struct *cwq, int max_active)
 {
@@ -3438,15 +3421,14 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 	for_each_cwq_cpu(cpu, wq) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		struct worker_pool *pool = cwq->pool;
-		struct global_cwq *gcwq = pool->gcwq;
 
-		spin_lock_irq(&gcwq->lock);
+		spin_lock_irq(&pool->lock);
 
 		if (!(wq->flags & WQ_FREEZABLE) ||
 		    !(pool->flags & POOL_FREEZING))
 			cwq_set_max_active(cwq, max_active);
 
-		spin_unlock_irq(&gcwq->lock);
+		spin_unlock_irq(&pool->lock);
 	}
 
 	spin_unlock(&workqueue_lock);
@@ -3489,22 +3471,20 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
 unsigned int work_busy(struct work_struct *work)
 {
 	struct worker_pool *pool = get_work_pool(work);
-	struct global_cwq *gcwq;
 	unsigned long flags;
 	unsigned int ret = 0;
 
 	if (!pool)
 		return 0;
-	gcwq = pool->gcwq;
 
-	spin_lock_irqsave(&gcwq->lock, flags);
+	spin_lock_irqsave(&pool->lock, flags);
 
 	if (work_pending(work))
 		ret |= WORK_BUSY_PENDING;
 	if (find_worker_executing_work(pool, work))
 		ret |= WORK_BUSY_RUNNING;
 
-	spin_unlock_irqrestore(&gcwq->lock, flags);
+	spin_unlock_irqrestore(&pool->lock, flags);
 
 	return ret;
 }
@@ -3532,7 +3512,10 @@ static void gcwq_claim_assoc_and_lock(struct global_cwq *gcwq)
 
 	for_each_worker_pool(pool, gcwq)
 		mutex_lock_nested(&pool->assoc_mutex, pool - gcwq->pools);
-	spin_lock_irq(&gcwq->lock);
+
+	local_irq_disable();
+	for_each_worker_pool(pool, gcwq)
+		spin_lock_nested(&pool->lock, pool - gcwq->pools);
 }
 
 /* release manager positions */
@@ -3540,7 +3523,10 @@ static void gcwq_release_assoc_and_unlock(struct global_cwq *gcwq)
 {
 	struct worker_pool *pool;
 
-	spin_unlock_irq(&gcwq->lock);
+	for_each_worker_pool(pool, gcwq)
+		spin_unlock(&pool->lock);
+	local_irq_enable();
+
 	for_each_worker_pool(pool, gcwq)
 		mutex_unlock(&pool->assoc_mutex);
 }
@@ -3621,9 +3607,9 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 			if (!worker)
 				return NOTIFY_BAD;
 
-			spin_lock_irq(&gcwq->lock);
+			spin_lock_irq(&pool->lock);
 			start_worker(worker);
-			spin_unlock_irq(&gcwq->lock);
+			spin_unlock_irq(&pool->lock);
 		}
 		break;
 
@@ -3709,7 +3695,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
  * gcwq->worklist.
  *
  * CONTEXT:
- * Grabs and releases workqueue_lock and gcwq->lock's.
+ * Grabs and releases workqueue_lock and pool->lock's.
  */
 void freeze_workqueues_begin(void)
 {
@@ -3725,9 +3711,11 @@ void freeze_workqueues_begin(void)
 		struct worker_pool *pool;
 		struct workqueue_struct *wq;
 
-		spin_lock_irq(&gcwq->lock);
+		local_irq_disable();
 
 		for_each_worker_pool(pool, gcwq) {
+			spin_lock_nested(&pool->lock, pool - gcwq->pools);
+
 			WARN_ON_ONCE(pool->flags & POOL_FREEZING);
 			pool->flags |= POOL_FREEZING;
 		}
@@ -3739,7 +3727,9 @@ void freeze_workqueues_begin(void)
 				cwq->max_active = 0;
 		}
 
-		spin_unlock_irq(&gcwq->lock);
+		for_each_worker_pool(pool, gcwq)
+			spin_unlock(&pool->lock);
+		local_irq_enable();
 	}
 
 	spin_unlock(&workqueue_lock);
@@ -3798,7 +3788,7 @@ out_unlock:
  * frozen works are transferred to their respective gcwq worklists.
  *
  * CONTEXT:
- * Grabs and releases workqueue_lock and gcwq->lock's.
+ * Grabs and releases workqueue_lock and pool->lock's.
  */
 void thaw_workqueues(void)
 {
@@ -3814,9 +3804,11 @@ void thaw_workqueues(void)
 		struct worker_pool *pool;
 		struct workqueue_struct *wq;
 
-		spin_lock_irq(&gcwq->lock);
+		local_irq_disable();
 
 		for_each_worker_pool(pool, gcwq) {
+			spin_lock_nested(&pool->lock, pool - gcwq->pools);
+
 			WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
 			pool->flags &= ~POOL_FREEZING;
 		}
@@ -3831,10 +3823,11 @@ void thaw_workqueues(void)
 			cwq_set_max_active(cwq, wq->saved_max_active);
 		}
 
-		for_each_worker_pool(pool, gcwq)
+		for_each_worker_pool(pool, gcwq) {
 			wake_up_worker(pool);
-
-		spin_unlock_irq(&gcwq->lock);
+			spin_unlock(&pool->lock);
+		}
+		local_irq_enable();
 	}
 
 	workqueue_freezing = false;
@@ -3859,10 +3852,9 @@ static int __init init_workqueues(void)
 		struct global_cwq *gcwq = get_gcwq(cpu);
 		struct worker_pool *pool;
 
-		spin_lock_init(&gcwq->lock);
-
 		for_each_worker_pool(pool, gcwq) {
 			pool->gcwq = gcwq;
+			spin_lock_init(&pool->lock);
 			pool->cpu = cpu;
 			pool->flags |= POOL_DISASSOCIATED;
 			INIT_LIST_HEAD(&pool->worklist);
@@ -3897,9 +3889,9 @@ static int __init init_workqueues(void)
 
 			worker = create_worker(pool);
 			BUG_ON(!worker);
-			spin_lock_irq(&gcwq->lock);
+			spin_lock_irq(&pool->lock);
 			start_worker(worker);
-			spin_unlock_irq(&gcwq->lock);
+			spin_unlock_irq(&pool->lock);
 		}
 	}
 

From 94cf58bb2907bd2702fce2266955e29ab5261f53 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: [PATCH 18/37] workqueue: make hotplug processing per-pool

Instead of holding locks from both pools and then processing the pools
together, make hotplug processing per-pool - grab locks of one pool,
process it, release it and then proceed to the next pool.

rebind_workers() is updated to take and process @pool instead of @gcwq
which results in a lot of de-indentation.  gcwq_claim_assoc_and_lock()
and its counterpart are replaced with in-line per-pool locking.

While this patch changes processing order across pools, order within
each pool remains the same.  As each pool is independent, this
shouldn't break anything.

This is part of an effort to remove global_cwq and make worker_pool
the top level abstraction, which in turn will help implementing worker
pools with user-specified attributes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 149 +++++++++++++++++++--------------------------
 1 file changed, 62 insertions(+), 87 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c93651208760..fd400f8c9514 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1670,10 +1670,10 @@ static void busy_worker_rebind_fn(struct work_struct *work)
 }
 
 /**
- * rebind_workers - rebind all workers of a gcwq to the associated CPU
- * @gcwq: gcwq of interest
+ * rebind_workers - rebind all workers of a pool to the associated CPU
+ * @pool: pool of interest
  *
- * @gcwq->cpu is coming online.  Rebind all workers to the CPU.  Rebinding
+ * @pool->cpu is coming online.  Rebind all workers to the CPU.  Rebinding
  * is different for idle and busy ones.
  *
  * Idle ones will be removed from the idle_list and woken up.  They will
@@ -1691,60 +1691,53 @@ static void busy_worker_rebind_fn(struct work_struct *work)
  * including the manager will not appear on @idle_list until rebind is
  * complete, making local wake-ups safe.
  */
-static void rebind_workers(struct global_cwq *gcwq)
+static void rebind_workers(struct worker_pool *pool)
 {
-	struct worker_pool *pool;
 	struct worker *worker, *n;
 	struct hlist_node *pos;
 	int i;
 
-	for_each_worker_pool(pool, gcwq) {
-		lockdep_assert_held(&pool->assoc_mutex);
-		lockdep_assert_held(&pool->lock);
-	}
+	lockdep_assert_held(&pool->assoc_mutex);
+	lockdep_assert_held(&pool->lock);
 
 	/* dequeue and kick idle ones */
-	for_each_worker_pool(pool, gcwq) {
-		list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
-			/*
-			 * idle workers should be off @pool->idle_list
-			 * until rebind is complete to avoid receiving
-			 * premature local wake-ups.
-			 */
-			list_del_init(&worker->entry);
+	list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
+		/*
+		 * idle workers should be off @pool->idle_list until rebind
+		 * is complete to avoid receiving premature local wake-ups.
+		 */
+		list_del_init(&worker->entry);
 
-			/*
-			 * worker_thread() will see the above dequeuing
-			 * and call idle_worker_rebind().
-			 */
-			wake_up_process(worker->task);
-		}
+		/*
+		 * worker_thread() will see the above dequeuing and call
+		 * idle_worker_rebind().
+		 */
+		wake_up_process(worker->task);
+	}
 
-		/* rebind busy workers */
-		for_each_busy_worker(worker, i, pos, pool) {
-			struct work_struct *rebind_work = &worker->rebind_work;
-			struct workqueue_struct *wq;
+	/* rebind busy workers */
+	for_each_busy_worker(worker, i, pos, pool) {
+		struct work_struct *rebind_work = &worker->rebind_work;
+		struct workqueue_struct *wq;
 
-			if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
-					     work_data_bits(rebind_work)))
-				continue;
+		if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
+				     work_data_bits(rebind_work)))
+			continue;
 
-			debug_work_activate(rebind_work);
+		debug_work_activate(rebind_work);
 
-			/*
-			 * wq doesn't really matter but let's keep
-			 * @worker->pool and @cwq->pool consistent for
-			 * sanity.
-			 */
-			if (std_worker_pool_pri(worker->pool))
-				wq = system_highpri_wq;
-			else
-				wq = system_wq;
+		/*
+		 * wq doesn't really matter but let's keep @worker->pool
+		 * and @cwq->pool consistent for sanity.
+		 */
+		if (std_worker_pool_pri(worker->pool))
+			wq = system_highpri_wq;
+		else
+			wq = system_wq;
 
-			insert_work(get_cwq(pool->cpu, wq), rebind_work,
-				    worker->scheduled.next,
-				    work_color_to_flags(WORK_NO_COLOR));
-		}
+		insert_work(get_cwq(pool->cpu, wq), rebind_work,
+			    worker->scheduled.next,
+			    work_color_to_flags(WORK_NO_COLOR));
 	}
 }
 
@@ -3497,7 +3490,7 @@ EXPORT_SYMBOL_GPL(work_busy);
  * are a lot of assumptions on strong associations among work, cwq and
  * gcwq which make migrating pending and scheduled works very
  * difficult to implement without impacting hot paths.  Secondly,
- * gcwqs serve mix of short, long and very long running works making
+ * worker pools serve mix of short, long and very long running works making
  * blocked draining impractical.
  *
  * This is solved by allowing the pools to be disassociated from the CPU
@@ -3505,32 +3498,6 @@ EXPORT_SYMBOL_GPL(work_busy);
  * cpu comes back online.
  */
 
-/* claim manager positions of all pools */
-static void gcwq_claim_assoc_and_lock(struct global_cwq *gcwq)
-{
-	struct worker_pool *pool;
-
-	for_each_worker_pool(pool, gcwq)
-		mutex_lock_nested(&pool->assoc_mutex, pool - gcwq->pools);
-
-	local_irq_disable();
-	for_each_worker_pool(pool, gcwq)
-		spin_lock_nested(&pool->lock, pool - gcwq->pools);
-}
-
-/* release manager positions */
-static void gcwq_release_assoc_and_unlock(struct global_cwq *gcwq)
-{
-	struct worker_pool *pool;
-
-	for_each_worker_pool(pool, gcwq)
-		spin_unlock(&pool->lock);
-	local_irq_enable();
-
-	for_each_worker_pool(pool, gcwq)
-		mutex_unlock(&pool->assoc_mutex);
-}
-
 static void gcwq_unbind_fn(struct work_struct *work)
 {
 	struct global_cwq *gcwq = get_gcwq(smp_processor_id());
@@ -3539,17 +3506,19 @@ static void gcwq_unbind_fn(struct work_struct *work)
 	struct hlist_node *pos;
 	int i;
 
-	BUG_ON(gcwq->pools[0].cpu != smp_processor_id());
-
-	gcwq_claim_assoc_and_lock(gcwq);
-
-	/*
-	 * We've claimed all manager positions.  Make all workers unbound
-	 * and set DISASSOCIATED.  Before this, all workers except for the
-	 * ones which are still executing works from before the last CPU
-	 * down must be on the cpu.  After this, they may become diasporas.
-	 */
 	for_each_worker_pool(pool, gcwq) {
+		BUG_ON(pool->cpu != smp_processor_id());
+
+		mutex_lock(&pool->assoc_mutex);
+		spin_lock_irq(&pool->lock);
+
+		/*
+		 * We've claimed all manager positions.  Make all workers
+		 * unbound and set DISASSOCIATED.  Before this, all workers
+		 * except for the ones which are still executing works from
+		 * before the last CPU down must be on the cpu.  After
+		 * this, they may become diasporas.
+		 */
 		list_for_each_entry(worker, &pool->idle_list, entry)
 			worker->flags |= WORKER_UNBOUND;
 
@@ -3557,9 +3526,10 @@ static void gcwq_unbind_fn(struct work_struct *work)
 			worker->flags |= WORKER_UNBOUND;
 
 		pool->flags |= POOL_DISASSOCIATED;
-	}
 
-	gcwq_release_assoc_and_unlock(gcwq);
+		spin_unlock_irq(&pool->lock);
+		mutex_unlock(&pool->assoc_mutex);
+	}
 
 	/*
 	 * Call schedule() so that we cross rq->lock and thus can guarantee
@@ -3615,11 +3585,16 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 
 	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
-		gcwq_claim_assoc_and_lock(gcwq);
-		for_each_worker_pool(pool, gcwq)
+		for_each_worker_pool(pool, gcwq) {
+			mutex_lock(&pool->assoc_mutex);
+			spin_lock_irq(&pool->lock);
+
 			pool->flags &= ~POOL_DISASSOCIATED;
-		rebind_workers(gcwq);
-		gcwq_release_assoc_and_unlock(gcwq);
+			rebind_workers(pool);
+
+			spin_unlock_irq(&pool->lock);
+			mutex_unlock(&pool->assoc_mutex);
+		}
 		break;
 	}
 	return NOTIFY_OK;

From a1056305fa98c7e13b38718658a8b07a5d926460 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: [PATCH 19/37] workqueue: make freezing/thawing per-pool

Instead of holding locks from both pools and then processing the pools
together, make freezing/thwaing per-pool - grab locks of one pool,
process it, release it and then proceed to the next pool.

While this patch changes processing order across pools, order within
each pool remains the same.  As each pool is independent, this
shouldn't break anything.

This is part of an effort to remove global_cwq and make worker_pool
the top level abstraction, which in turn will help implementing worker
pools with user-specified attributes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 50 ++++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 28 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index fd400f8c9514..b609bfba134b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3686,25 +3686,22 @@ void freeze_workqueues_begin(void)
 		struct worker_pool *pool;
 		struct workqueue_struct *wq;
 
-		local_irq_disable();
-
 		for_each_worker_pool(pool, gcwq) {
-			spin_lock_nested(&pool->lock, pool - gcwq->pools);
+			spin_lock_irq(&pool->lock);
 
 			WARN_ON_ONCE(pool->flags & POOL_FREEZING);
 			pool->flags |= POOL_FREEZING;
+
+			list_for_each_entry(wq, &workqueues, list) {
+				struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+				if (cwq && cwq->pool == pool &&
+				    (wq->flags & WQ_FREEZABLE))
+					cwq->max_active = 0;
+			}
+
+			spin_unlock_irq(&pool->lock);
 		}
-
-		list_for_each_entry(wq, &workqueues, list) {
-			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-
-			if (cwq && wq->flags & WQ_FREEZABLE)
-				cwq->max_active = 0;
-		}
-
-		for_each_worker_pool(pool, gcwq)
-			spin_unlock(&pool->lock);
-		local_irq_enable();
 	}
 
 	spin_unlock(&workqueue_lock);
@@ -3779,30 +3776,27 @@ void thaw_workqueues(void)
 		struct worker_pool *pool;
 		struct workqueue_struct *wq;
 
-		local_irq_disable();
-
 		for_each_worker_pool(pool, gcwq) {
-			spin_lock_nested(&pool->lock, pool - gcwq->pools);
+			spin_lock_irq(&pool->lock);
 
 			WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
 			pool->flags &= ~POOL_FREEZING;
-		}
 
-		list_for_each_entry(wq, &workqueues, list) {
-			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+			list_for_each_entry(wq, &workqueues, list) {
+				struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
-			if (!cwq || !(wq->flags & WQ_FREEZABLE))
-				continue;
+				if (!cwq || cwq->pool != pool ||
+				    !(wq->flags & WQ_FREEZABLE))
+					continue;
 
-			/* restore max_active and repopulate worklist */
-			cwq_set_max_active(cwq, wq->saved_max_active);
-		}
+				/* restore max_active and repopulate worklist */
+				cwq_set_max_active(cwq, wq->saved_max_active);
+			}
 
-		for_each_worker_pool(pool, gcwq) {
 			wake_up_worker(pool);
-			spin_unlock(&pool->lock);
+
+			spin_unlock_irq(&pool->lock);
 		}
-		local_irq_enable();
 	}
 
 	workqueue_freezing = false;

From 38db41d984f17938631420ff78160dda7f182d24 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:34 -0800
Subject: [PATCH 20/37] workqueue: replace for_each_worker_pool() with
 for_each_std_worker_pool()

for_each_std_worker_pool() takes @cpu instead of @gcwq.

This is part of an effort to remove global_cwq and make worker_pool
the top level abstraction, which in turn will help implementing worker
pools with user-specified attributes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 39 +++++++++++++++++----------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b609bfba134b..bd639c185da1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -250,9 +250,9 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
 
-#define for_each_worker_pool(pool, gcwq)				\
-	for ((pool) = &(gcwq)->pools[0];				\
-	     (pool) < &(gcwq)->pools[NR_STD_WORKER_POOLS]; (pool)++)
+#define for_each_std_worker_pool(pool, cpu)				\
+	for ((pool) = &get_gcwq((cpu))->pools[0];			\
+	     (pool) < &get_gcwq((cpu))->pools[NR_STD_WORKER_POOLS]; (pool)++)
 
 #define for_each_busy_worker(worker, i, pos, pool)			\
 	hash_for_each(pool->busy_hash, i, pos, worker, hentry)
@@ -3500,14 +3500,14 @@ EXPORT_SYMBOL_GPL(work_busy);
 
 static void gcwq_unbind_fn(struct work_struct *work)
 {
-	struct global_cwq *gcwq = get_gcwq(smp_processor_id());
+	int cpu = smp_processor_id();
 	struct worker_pool *pool;
 	struct worker *worker;
 	struct hlist_node *pos;
 	int i;
 
-	for_each_worker_pool(pool, gcwq) {
-		BUG_ON(pool->cpu != smp_processor_id());
+	for_each_std_worker_pool(pool, cpu) {
+		BUG_ON(cpu != smp_processor_id());
 
 		mutex_lock(&pool->assoc_mutex);
 		spin_lock_irq(&pool->lock);
@@ -3541,15 +3541,15 @@ static void gcwq_unbind_fn(struct work_struct *work)
 	/*
 	 * Sched callbacks are disabled now.  Zap nr_running.  After this,
 	 * nr_running stays zero and need_more_worker() and keep_working()
-	 * are always true as long as the worklist is not empty.  @gcwq now
-	 * behaves as unbound (in terms of concurrency management) gcwq
-	 * which is served by workers tied to the CPU.
+	 * are always true as long as the worklist is not empty.  Pools on
+	 * @cpu now behave as unbound (in terms of concurrency management)
+	 * pools which are served by workers tied to the CPU.
 	 *
 	 * On return from this function, the current worker would trigger
 	 * unbound chain execution of pending work items if other workers
 	 * didn't already.
 	 */
-	for_each_worker_pool(pool, gcwq)
+	for_each_std_worker_pool(pool, cpu)
 		atomic_set(get_pool_nr_running(pool), 0);
 }
 
@@ -3562,12 +3562,11 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 					       void *hcpu)
 {
 	unsigned int cpu = (unsigned long)hcpu;
-	struct global_cwq *gcwq = get_gcwq(cpu);
 	struct worker_pool *pool;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_UP_PREPARE:
-		for_each_worker_pool(pool, gcwq) {
+		for_each_std_worker_pool(pool, cpu) {
 			struct worker *worker;
 
 			if (pool->nr_workers)
@@ -3585,7 +3584,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 
 	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
-		for_each_worker_pool(pool, gcwq) {
+		for_each_std_worker_pool(pool, cpu) {
 			mutex_lock(&pool->assoc_mutex);
 			spin_lock_irq(&pool->lock);
 
@@ -3682,11 +3681,10 @@ void freeze_workqueues_begin(void)
 	workqueue_freezing = true;
 
 	for_each_gcwq_cpu(cpu) {
-		struct global_cwq *gcwq = get_gcwq(cpu);
 		struct worker_pool *pool;
 		struct workqueue_struct *wq;
 
-		for_each_worker_pool(pool, gcwq) {
+		for_each_std_worker_pool(pool, cpu) {
 			spin_lock_irq(&pool->lock);
 
 			WARN_ON_ONCE(pool->flags & POOL_FREEZING);
@@ -3772,11 +3770,10 @@ void thaw_workqueues(void)
 		goto out_unlock;
 
 	for_each_gcwq_cpu(cpu) {
-		struct global_cwq *gcwq = get_gcwq(cpu);
 		struct worker_pool *pool;
 		struct workqueue_struct *wq;
 
-		for_each_worker_pool(pool, gcwq) {
+		for_each_std_worker_pool(pool, cpu) {
 			spin_lock_irq(&pool->lock);
 
 			WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
@@ -3818,11 +3815,10 @@ static int __init init_workqueues(void)
 
 	/* initialize gcwqs */
 	for_each_gcwq_cpu(cpu) {
-		struct global_cwq *gcwq = get_gcwq(cpu);
 		struct worker_pool *pool;
 
-		for_each_worker_pool(pool, gcwq) {
-			pool->gcwq = gcwq;
+		for_each_std_worker_pool(pool, cpu) {
+			pool->gcwq = get_gcwq(cpu);
 			spin_lock_init(&pool->lock);
 			pool->cpu = cpu;
 			pool->flags |= POOL_DISASSOCIATED;
@@ -3847,10 +3843,9 @@ static int __init init_workqueues(void)
 
 	/* create the initial worker */
 	for_each_online_gcwq_cpu(cpu) {
-		struct global_cwq *gcwq = get_gcwq(cpu);
 		struct worker_pool *pool;
 
-		for_each_worker_pool(pool, gcwq) {
+		for_each_std_worker_pool(pool, cpu) {
 			struct worker *worker;
 
 			if (cpu != WORK_CPU_UNBOUND)

From 4e8f0a609677a25f504527e50981df146c5b3d08 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:34 -0800
Subject: [PATCH 21/37] workqueue: remove worker_pool->gcwq

The only remaining user of pool->gcwq is std_worker_pool_pri().
Reimplement it using get_gcwq() and remove worker_pool->gcwq.

This is part of an effort to remove global_cwq and make worker_pool
the top level abstraction, which in turn will help implementing worker
pools with user-specified attributes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index bd639c185da1..475a447aa6d8 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -123,7 +123,6 @@ enum {
 /* struct worker is defined in workqueue_internal.h */
 
 struct worker_pool {
-	struct global_cwq	*gcwq;		/* I: the owning gcwq */
 	spinlock_t		lock;		/* the pool lock */
 	unsigned int		cpu;		/* I: the associated cpu */
 	int			id;		/* I: pool ID */
@@ -451,11 +450,6 @@ static DEFINE_IDR(worker_pool_idr);
 
 static int worker_thread(void *__worker);
 
-static int std_worker_pool_pri(struct worker_pool *pool)
-{
-	return pool - pool->gcwq->pools;
-}
-
 static struct global_cwq *get_gcwq(unsigned int cpu)
 {
 	if (cpu != WORK_CPU_UNBOUND)
@@ -464,6 +458,11 @@ static struct global_cwq *get_gcwq(unsigned int cpu)
 		return &unbound_global_cwq;
 }
 
+static int std_worker_pool_pri(struct worker_pool *pool)
+{
+	return pool - get_gcwq(pool->cpu)->pools;
+}
+
 /* allocate ID and assign it to @pool */
 static int worker_pool_assign_id(struct worker_pool *pool)
 {
@@ -3818,7 +3817,6 @@ static int __init init_workqueues(void)
 		struct worker_pool *pool;
 
 		for_each_std_worker_pool(pool, cpu) {
-			pool->gcwq = get_gcwq(cpu);
 			spin_lock_init(&pool->lock);
 			pool->cpu = cpu;
 			pool->flags |= POOL_DISASSOCIATED;

From a60dc39c016a65bfdbd05c43b3707962d5ed04c7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:34 -0800
Subject: [PATCH 22/37] workqueue: remove global_cwq

global_cwq is now nothing but a container for per-cpu standard
worker_pools.  Declare the worker pools directly as
cpu/unbound_std_worker_pools[] and remove global_cwq.

* ____cacheline_aligned_in_smp moved from global_cwq to worker_pool.
  This probably would have made sense even before this change as we
  want each pool to be aligned.

* get_gcwq() is replaced with std_worker_pools() which returns the
  pointer to the standard pool array for a given CPU.

* __alloc_workqueue_key() updated to use get_std_worker_pool() instead
  of open-coding pool determination.

This is part of an effort to remove global_cwq and make worker_pool
the top level abstraction, which in turn will help implementing worker
pools with user-specified attributes.

v2: Joonsoo pointed out that it'd better to align struct worker_pool
    rather than the array so that every pool is aligned.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Joonsoo Kim <js1304@gmail.com>
---
 kernel/workqueue.c          | 46 ++++++++++++++-----------------------
 kernel/workqueue_internal.h |  1 -
 2 files changed, 17 insertions(+), 30 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 475a447aa6d8..224580f7459c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -144,16 +144,6 @@ struct worker_pool {
 
 	struct mutex		assoc_mutex;	/* protect POOL_DISASSOCIATED */
 	struct ida		worker_ida;	/* L: for worker IDs */
-};
-
-/*
- * Global per-cpu workqueue.  There's one and only one for each cpu
- * and all works are queued and processed here regardless of their
- * target workqueues.
- */
-struct global_cwq {
-	struct worker_pool	pools[NR_STD_WORKER_POOLS];
-						/* normal and highpri pools */
 } ____cacheline_aligned_in_smp;
 
 /*
@@ -250,8 +240,8 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 #include <trace/events/workqueue.h>
 
 #define for_each_std_worker_pool(pool, cpu)				\
-	for ((pool) = &get_gcwq((cpu))->pools[0];			\
-	     (pool) < &get_gcwq((cpu))->pools[NR_STD_WORKER_POOLS]; (pool)++)
+	for ((pool) = &std_worker_pools(cpu)[0];			\
+	     (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++)
 
 #define for_each_busy_worker(worker, i, pos, pool)			\
 	hash_for_each(pool->busy_hash, i, pos, worker, hentry)
@@ -427,19 +417,19 @@ static LIST_HEAD(workqueues);
 static bool workqueue_freezing;		/* W: have wqs started freezing? */
 
 /*
- * The almighty global cpu workqueues.  nr_running is the only field
- * which is expected to be used frequently by other cpus via
- * try_to_wake_up().  Put it in a separate cacheline.
+ * The CPU standard worker pools.  nr_running is the only field which is
+ * expected to be used frequently by other cpus via try_to_wake_up().  Put
+ * it in a separate cacheline.
  */
-static DEFINE_PER_CPU(struct global_cwq, global_cwq);
+static DEFINE_PER_CPU(struct worker_pool [NR_STD_WORKER_POOLS],
+		      cpu_std_worker_pools);
 static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_STD_WORKER_POOLS]);
 
 /*
- * Global cpu workqueue and nr_running counter for unbound gcwq.  The pools
- * for online CPUs have POOL_DISASSOCIATED set, and all their workers have
- * WORKER_UNBOUND set.
+ * Standard worker pools and nr_running counter for unbound CPU.  The pools
+ * have POOL_DISASSOCIATED set, and all workers have WORKER_UNBOUND set.
  */
-static struct global_cwq unbound_global_cwq;
+static struct worker_pool unbound_std_worker_pools[NR_STD_WORKER_POOLS];
 static atomic_t unbound_pool_nr_running[NR_STD_WORKER_POOLS] = {
 	[0 ... NR_STD_WORKER_POOLS - 1]	= ATOMIC_INIT(0),	/* always 0 */
 };
@@ -450,17 +440,17 @@ static DEFINE_IDR(worker_pool_idr);
 
 static int worker_thread(void *__worker);
 
-static struct global_cwq *get_gcwq(unsigned int cpu)
+static struct worker_pool *std_worker_pools(int cpu)
 {
 	if (cpu != WORK_CPU_UNBOUND)
-		return &per_cpu(global_cwq, cpu);
+		return per_cpu(cpu_std_worker_pools, cpu);
 	else
-		return &unbound_global_cwq;
+		return unbound_std_worker_pools;
 }
 
 static int std_worker_pool_pri(struct worker_pool *pool)
 {
-	return pool - get_gcwq(pool->cpu)->pools;
+	return pool - std_worker_pools(pool->cpu);
 }
 
 /* allocate ID and assign it to @pool */
@@ -487,9 +477,9 @@ static struct worker_pool *worker_pool_by_id(int pool_id)
 
 static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
 {
-	struct global_cwq *gcwq = get_gcwq(cpu);
+	struct worker_pool *pools = std_worker_pools(cpu);
 
-	return &gcwq->pools[highpri];
+	return &pools[highpri];
 }
 
 static atomic_t *get_pool_nr_running(struct worker_pool *pool)
@@ -3269,11 +3259,9 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 
 	for_each_cwq_cpu(cpu, wq) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-		struct global_cwq *gcwq = get_gcwq(cpu);
-		int pool_idx = (bool)(flags & WQ_HIGHPRI);
 
 		BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
-		cwq->pool = &gcwq->pools[pool_idx];
+		cwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI);
 		cwq->wq = wq;
 		cwq->flush_color = -1;
 		cwq->max_active = max_active;
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index cc35e7e62091..328be4a269aa 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -10,7 +10,6 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 
-struct global_cwq;
 struct worker_pool;
 
 /*

From e6e380ed92555533740d5f670640f6f1868132de Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:34 -0800
Subject: [PATCH 23/37] workqueue: rename nr_running variables

Rename per-cpu and unbound nr_running variables such that they match
the pool variables.

This patch doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 224580f7459c..db8d4b7471ac 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -423,14 +423,15 @@ static bool workqueue_freezing;		/* W: have wqs started freezing? */
  */
 static DEFINE_PER_CPU(struct worker_pool [NR_STD_WORKER_POOLS],
 		      cpu_std_worker_pools);
-static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_STD_WORKER_POOLS]);
+static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t [NR_STD_WORKER_POOLS],
+				     cpu_std_pool_nr_running);
 
 /*
  * Standard worker pools and nr_running counter for unbound CPU.  The pools
  * have POOL_DISASSOCIATED set, and all workers have WORKER_UNBOUND set.
  */
 static struct worker_pool unbound_std_worker_pools[NR_STD_WORKER_POOLS];
-static atomic_t unbound_pool_nr_running[NR_STD_WORKER_POOLS] = {
+static atomic_t unbound_std_pool_nr_running[NR_STD_WORKER_POOLS] = {
 	[0 ... NR_STD_WORKER_POOLS - 1]	= ATOMIC_INIT(0),	/* always 0 */
 };
 
@@ -488,9 +489,9 @@ static atomic_t *get_pool_nr_running(struct worker_pool *pool)
 	int idx = std_worker_pool_pri(pool);
 
 	if (cpu != WORK_CPU_UNBOUND)
-		return &per_cpu(pool_nr_running, cpu)[idx];
+		return &per_cpu(cpu_std_pool_nr_running, cpu)[idx];
 	else
-		return &unbound_pool_nr_running[idx];
+		return &unbound_std_pool_nr_running[idx];
 }
 
 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,

From 706026c2141113886f61e1ad2738c9a7723ec69c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:34 -0800
Subject: [PATCH 24/37] workqueue: post global_cwq removal cleanups

Remove remaining references to gcwq.

* __next_gcwq_cpu() steals __next_wq_cpu() name.  The original
  __next_wq_cpu() became __next_cwq_cpu().

* s/for_each_gcwq_cpu/for_each_wq_cpu/
  s/for_each_online_gcwq_cpu/for_each_online_wq_cpu/

* s/gcwq_mayday_timeout/pool_mayday_timeout/

* s/gcwq_unbind_fn/wq_unbind_fn/

* Drop references to gcwq in comments.

This patch doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 104 ++++++++++++++++++++++-----------------------
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index db8d4b7471ac..577de1073f24 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -246,8 +246,8 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
 #define for_each_busy_worker(worker, i, pos, pool)			\
 	hash_for_each(pool->busy_hash, i, pos, worker, hentry)
 
-static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
-				  unsigned int sw)
+static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
+				unsigned int sw)
 {
 	if (cpu < nr_cpu_ids) {
 		if (sw & 1) {
@@ -261,39 +261,39 @@ static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
 	return WORK_CPU_NONE;
 }
 
-static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
-				struct workqueue_struct *wq)
+static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
+				 struct workqueue_struct *wq)
 {
-	return __next_gcwq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
+	return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
 }
 
 /*
  * CPU iterators
  *
- * An extra gcwq is defined for an invalid cpu number
+ * An extra cpu number is defined using an invalid cpu number
  * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any
- * specific CPU.  The following iterators are similar to
- * for_each_*_cpu() iterators but also considers the unbound gcwq.
+ * specific CPU.  The following iterators are similar to for_each_*_cpu()
+ * iterators but also considers the unbound CPU.
  *
- * for_each_gcwq_cpu()		: possible CPUs + WORK_CPU_UNBOUND
- * for_each_online_gcwq_cpu()	: online CPUs + WORK_CPU_UNBOUND
+ * for_each_wq_cpu()		: possible CPUs + WORK_CPU_UNBOUND
+ * for_each_online_wq_cpu()	: online CPUs + WORK_CPU_UNBOUND
  * for_each_cwq_cpu()		: possible CPUs for bound workqueues,
  *				  WORK_CPU_UNBOUND for unbound workqueues
  */
-#define for_each_gcwq_cpu(cpu)						\
-	for ((cpu) = __next_gcwq_cpu(-1, cpu_possible_mask, 3);		\
+#define for_each_wq_cpu(cpu)						\
+	for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3);		\
 	     (cpu) < WORK_CPU_NONE;					\
-	     (cpu) = __next_gcwq_cpu((cpu), cpu_possible_mask, 3))
+	     (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, 3))
 
-#define for_each_online_gcwq_cpu(cpu)					\
-	for ((cpu) = __next_gcwq_cpu(-1, cpu_online_mask, 3);		\
+#define for_each_online_wq_cpu(cpu)					\
+	for ((cpu) = __next_wq_cpu(-1, cpu_online_mask, 3);		\
 	     (cpu) < WORK_CPU_NONE;					\
-	     (cpu) = __next_gcwq_cpu((cpu), cpu_online_mask, 3))
+	     (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
 
 #define for_each_cwq_cpu(cpu, wq)					\
-	for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, (wq));	\
+	for ((cpu) = __next_cwq_cpu(-1, cpu_possible_mask, (wq));	\
 	     (cpu) < WORK_CPU_NONE;					\
-	     (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq)))
+	     (cpu) = __next_cwq_cpu((cpu), cpu_possible_mask, (wq)))
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
@@ -655,7 +655,7 @@ static bool __need_more_worker(struct worker_pool *pool)
  * running workers.
  *
  * Note that, because unbound workers never contribute to nr_running, this
- * function will always return %true for unbound gcwq as long as the
+ * function will always return %true for unbound pools as long as the
  * worklist isn't empty.
  */
 static bool need_more_worker(struct worker_pool *pool)
@@ -1129,14 +1129,14 @@ fail:
 }
 
 /**
- * insert_work - insert a work into gcwq
+ * insert_work - insert a work into a pool
  * @cwq: cwq @work belongs to
  * @work: work to insert
  * @head: insertion point
  * @extra_flags: extra WORK_STRUCT_* flags to set
  *
- * Insert @work which belongs to @cwq into @gcwq after @head.
- * @extra_flags is or'd to work_struct flags.
+ * Insert @work which belongs to @cwq after @head.  @extra_flags is or'd to
+ * work_struct flags.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
@@ -1179,7 +1179,7 @@ static bool is_chained_work(struct workqueue_struct *wq)
 	unsigned long flags;
 	unsigned int cpu;
 
-	for_each_gcwq_cpu(cpu) {
+	for_each_wq_cpu(cpu) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		struct worker_pool *pool = cwq->pool;
 		struct worker *worker;
@@ -1533,7 +1533,7 @@ static void worker_enter_idle(struct worker *worker)
 		mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
 
 	/*
-	 * Sanity check nr_running.  Because gcwq_unbind_fn() releases
+	 * Sanity check nr_running.  Because wq_unbind_fn() releases
 	 * pool->lock between setting %WORKER_UNBOUND and zapping
 	 * nr_running, the warning may trigger spuriously.  Check iff
 	 * unbind is not in progress.
@@ -1563,7 +1563,7 @@ static void worker_leave_idle(struct worker *worker)
 }
 
 /**
- * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock gcwq
+ * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock pool
  * @worker: self
  *
  * Works which are scheduled while the cpu is online must at least be
@@ -1575,10 +1575,10 @@ static void worker_leave_idle(struct worker *worker)
  * themselves to the target cpu and may race with cpu going down or
  * coming online.  kthread_bind() can't be used because it may put the
  * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
- * verbatim as it's best effort and blocking and gcwq may be
+ * verbatim as it's best effort and blocking and pool may be
  * [dis]associated in the meantime.
  *
- * This function tries set_cpus_allowed() and locks gcwq and verifies the
+ * This function tries set_cpus_allowed() and locks pool and verifies the
  * binding against %POOL_DISASSOCIATED which is set during
  * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker
  * enters idle state or fetches works without dropping lock, it can
@@ -1589,7 +1589,7 @@ static void worker_leave_idle(struct worker *worker)
  * held.
  *
  * RETURNS:
- * %true if the associated gcwq is online (@worker is successfully
+ * %true if the associated pool is online (@worker is successfully
  * bound), %false if offline.
  */
 static bool worker_maybe_bind_and_lock(struct worker *worker)
@@ -1826,7 +1826,7 @@ fail:
  * start_worker - start a newly created worker
  * @worker: worker to start
  *
- * Make the gcwq aware of @worker and start it.
+ * Make the pool aware of @worker and start it.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
@@ -1843,7 +1843,7 @@ static void start_worker(struct worker *worker)
  * destroy_worker - destroy a workqueue worker
  * @worker: worker to be destroyed
  *
- * Destroy @worker and adjust @gcwq stats accordingly.
+ * Destroy @worker and adjust @pool stats accordingly.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock) which is released and regrabbed.
@@ -1919,7 +1919,7 @@ static bool send_mayday(struct work_struct *work)
 	return true;
 }
 
-static void gcwq_mayday_timeout(unsigned long __pool)
+static void pool_mayday_timeout(unsigned long __pool)
 {
 	struct worker_pool *pool = (void *)__pool;
 	struct work_struct *work;
@@ -2047,9 +2047,9 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
  * manage_workers - manage worker pool
  * @worker: self
  *
- * Assume the manager role and manage gcwq worker pool @worker belongs
+ * Assume the manager role and manage the worker pool @worker belongs
  * to.  At any given time, there can be only zero or one manager per
- * gcwq.  The exclusion is handled automatically by this function.
+ * pool.  The exclusion is handled automatically by this function.
  *
  * The caller can safely start processing works on false return.  On
  * true return, it's guaranteed that need_to_create_worker() is false
@@ -2092,11 +2092,11 @@ static bool manage_workers(struct worker *worker)
 		 * CPU hotplug could have happened while we were waiting
 		 * for assoc_mutex.  Hotplug itself can't handle us
 		 * because manager isn't either on idle or busy list, and
-		 * @gcwq's state and ours could have deviated.
+		 * @pool's state and ours could have deviated.
 		 *
 		 * As hotplug is now excluded via assoc_mutex, we can
 		 * simply try to bind.  It will succeed or fail depending
-		 * on @gcwq's current state.  Try it and adjust
+		 * on @pool's current state.  Try it and adjust
 		 * %WORKER_UNBOUND accordingly.
 		 */
 		if (worker_maybe_bind_and_lock(worker))
@@ -2271,8 +2271,8 @@ static void process_scheduled_works(struct worker *worker)
  * worker_thread - the worker thread function
  * @__worker: self
  *
- * The gcwq worker thread function.  There's a single dynamic pool of
- * these per each cpu.  These workers process all works regardless of
+ * The worker thread function.  There are NR_CPU_WORKER_POOLS dynamic pools
+ * of these per each cpu.  These workers process all works regardless of
  * their specific target workqueue.  The only exception is works which
  * belong to workqueues with a rescuer which will be explained in
  * rescuer_thread().
@@ -2368,14 +2368,14 @@ sleep:
  * Workqueue rescuer thread function.  There's one rescuer for each
  * workqueue which has WQ_RESCUER set.
  *
- * Regular work processing on a gcwq may block trying to create a new
+ * Regular work processing on a pool may block trying to create a new
  * worker which uses GFP_KERNEL allocation which has slight chance of
  * developing into deadlock if some works currently on the same queue
  * need to be processed to satisfy the GFP_KERNEL allocation.  This is
  * the problem rescuer solves.
  *
- * When such condition is possible, the gcwq summons rescuers of all
- * workqueues which have works queued on the gcwq and let them process
+ * When such condition is possible, the pool summons rescuers of all
+ * workqueues which have works queued on the pool and let them process
  * those works so that forward progress can be guaranteed.
  *
  * This should happen rarely.
@@ -3476,7 +3476,7 @@ EXPORT_SYMBOL_GPL(work_busy);
  *
  * There are two challenges in supporting CPU hotplug.  Firstly, there
  * are a lot of assumptions on strong associations among work, cwq and
- * gcwq which make migrating pending and scheduled works very
+ * pool which make migrating pending and scheduled works very
  * difficult to implement without impacting hot paths.  Secondly,
  * worker pools serve mix of short, long and very long running works making
  * blocked draining impractical.
@@ -3486,7 +3486,7 @@ EXPORT_SYMBOL_GPL(work_busy);
  * cpu comes back online.
  */
 
-static void gcwq_unbind_fn(struct work_struct *work)
+static void wq_unbind_fn(struct work_struct *work)
 {
 	int cpu = smp_processor_id();
 	struct worker_pool *pool;
@@ -3601,7 +3601,7 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
 	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_DOWN_PREPARE:
 		/* unbinding should happen on the local CPU */
-		INIT_WORK_ONSTACK(&unbind_work, gcwq_unbind_fn);
+		INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
 		queue_work_on(cpu, system_highpri_wq, &unbind_work);
 		flush_work(&unbind_work);
 		break;
@@ -3654,7 +3654,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
  *
  * Start freezing workqueues.  After this function returns, all freezable
  * workqueues will queue new works to their frozen_works list instead of
- * gcwq->worklist.
+ * pool->worklist.
  *
  * CONTEXT:
  * Grabs and releases workqueue_lock and pool->lock's.
@@ -3668,7 +3668,7 @@ void freeze_workqueues_begin(void)
 	BUG_ON(workqueue_freezing);
 	workqueue_freezing = true;
 
-	for_each_gcwq_cpu(cpu) {
+	for_each_wq_cpu(cpu) {
 		struct worker_pool *pool;
 		struct workqueue_struct *wq;
 
@@ -3715,7 +3715,7 @@ bool freeze_workqueues_busy(void)
 
 	BUG_ON(!workqueue_freezing);
 
-	for_each_gcwq_cpu(cpu) {
+	for_each_wq_cpu(cpu) {
 		struct workqueue_struct *wq;
 		/*
 		 * nr_active is monotonically decreasing.  It's safe
@@ -3743,7 +3743,7 @@ out_unlock:
  * thaw_workqueues - thaw workqueues
  *
  * Thaw workqueues.  Normal queueing is restored and all collected
- * frozen works are transferred to their respective gcwq worklists.
+ * frozen works are transferred to their respective pool worklists.
  *
  * CONTEXT:
  * Grabs and releases workqueue_lock and pool->lock's.
@@ -3757,7 +3757,7 @@ void thaw_workqueues(void)
 	if (!workqueue_freezing)
 		goto out_unlock;
 
-	for_each_gcwq_cpu(cpu) {
+	for_each_wq_cpu(cpu) {
 		struct worker_pool *pool;
 		struct workqueue_struct *wq;
 
@@ -3801,8 +3801,8 @@ static int __init init_workqueues(void)
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
 	hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
 
-	/* initialize gcwqs */
-	for_each_gcwq_cpu(cpu) {
+	/* initialize CPU pools */
+	for_each_wq_cpu(cpu) {
 		struct worker_pool *pool;
 
 		for_each_std_worker_pool(pool, cpu) {
@@ -3817,7 +3817,7 @@ static int __init init_workqueues(void)
 			pool->idle_timer.function = idle_worker_timeout;
 			pool->idle_timer.data = (unsigned long)pool;
 
-			setup_timer(&pool->mayday_timer, gcwq_mayday_timeout,
+			setup_timer(&pool->mayday_timer, pool_mayday_timeout,
 				    (unsigned long)pool);
 
 			mutex_init(&pool->assoc_mutex);
@@ -3829,7 +3829,7 @@ static int __init init_workqueues(void)
 	}
 
 	/* create the initial worker */
-	for_each_online_gcwq_cpu(cpu) {
+	for_each_online_wq_cpu(cpu) {
 		struct worker_pool *pool;
 
 		for_each_std_worker_pool(pool, cpu) {

From 6be195886ac26abe0194ed1bc7a9224f8a97c310 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 6 Feb 2013 18:04:53 -0800
Subject: [PATCH 25/37] workqueue: replace WORK_CPU_NONE/LAST with WORK_CPU_END

Now that workqueue has moved away from gcwqs, workqueue no longer has
the need to have a CPU identifier indicating "no cpu associated" - we
now use WORK_OFFQ_POOL_NONE instead - and most uses of WORK_CPU_NONE
are gone.

The only left usage is as the end marker for for_each_*wq*()
iterators, where the name WORK_CPU_NONE is confusing w/o actual
WORK_CPU_NONE usages.  Similarly, WORK_CPU_LAST which equals
WORK_CPU_NONE no longer makes sense.

Replace both WORK_CPU_NONE and LAST with WORK_CPU_END.  This patch
doesn't introduce any functional difference.

tj: s/WORK_CPU_LAST/WORK_CPU_END/ and rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  3 +--
 kernel/workqueue.c        | 10 +++++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a94e4e84e7b1..426c39c2aaa4 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -57,8 +57,7 @@ enum {
 
 	/* special cpu IDs */
 	WORK_CPU_UNBOUND	= NR_CPUS,
-	WORK_CPU_NONE		= NR_CPUS + 1,
-	WORK_CPU_LAST		= WORK_CPU_NONE,
+	WORK_CPU_END		= NR_CPUS + 1,
 
 	/*
 	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 577de1073f24..7e11334a119f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -258,7 +258,7 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 		if (sw & 2)
 			return WORK_CPU_UNBOUND;
 	}
-	return WORK_CPU_NONE;
+	return WORK_CPU_END;
 }
 
 static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
@@ -282,17 +282,17 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
  */
 #define for_each_wq_cpu(cpu)						\
 	for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3);		\
-	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, 3))
 
 #define for_each_online_wq_cpu(cpu)					\
 	for ((cpu) = __next_wq_cpu(-1, cpu_online_mask, 3);		\
-	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
 
 #define for_each_cwq_cpu(cpu, wq)					\
 	for ((cpu) = __next_cwq_cpu(-1, cpu_possible_mask, (wq));	\
-	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_cwq_cpu((cpu), cpu_possible_mask, (wq)))
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -3796,7 +3796,7 @@ static int __init init_workqueues(void)
 
 	/* make sure we have enough bits for OFFQ pool ID */
 	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
-		     WORK_CPU_LAST * NR_STD_WORKER_POOLS);
+		     WORK_CPU_END * NR_STD_WORKER_POOLS);
 
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
 	hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);

From 038366c5cf23ae737b9f72169dd8ade2d105755b Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 6 Feb 2013 18:04:53 -0800
Subject: [PATCH 26/37] workqueue: make work_busy() test WORK_STRUCT_PENDING
 first

Currently, work_busy() first tests whether the work has a pool
associated with it and if not, considers it idle.  This works fine
even for delayed_work.work queued on timer, as __queue_delayed_work()
sets cwq on delayed_work.work - a queued delayed_work always has its
cwq and thus pool associated with it.

However, we're about to update delayed_work queueing and this won't
hold.  Update work_busy() such that it tests WORK_STRUCT_PENDING
before the associated pool.  This doesn't make any noticeable behavior
difference now.

With work_pending() test moved, the function read a lot better with
"if (!pool)" test flipped to positive.  Flip it.

While at it, lose the comment about now non-existent reentrant
workqueues.

tj: Reorganized the function and rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7e11334a119f..a229a56f3a32 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3443,8 +3443,6 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
  * Test whether @work is currently pending or running.  There is no
  * synchronization around this function and the test result is
  * unreliable and only useful as advisory hints or for debugging.
- * Especially for reentrant wqs, the pending state might hide the
- * running state.
  *
  * RETURNS:
  * OR'd bitmask of WORK_BUSY_* bits.
@@ -3455,17 +3453,15 @@ unsigned int work_busy(struct work_struct *work)
 	unsigned long flags;
 	unsigned int ret = 0;
 
-	if (!pool)
-		return 0;
-
-	spin_lock_irqsave(&pool->lock, flags);
-
 	if (work_pending(work))
 		ret |= WORK_BUSY_PENDING;
-	if (find_worker_executing_work(pool, work))
-		ret |= WORK_BUSY_RUNNING;
 
-	spin_unlock_irqrestore(&pool->lock, flags);
+	if (pool) {
+		spin_lock_irqsave(&pool->lock, flags);
+		if (find_worker_executing_work(pool, work))
+			ret |= WORK_BUSY_RUNNING;
+		spin_unlock_irqrestore(&pool->lock, flags);
+	}
 
 	return ret;
 }

From 60c057bca22285efefbba033624763a778f243bf Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 6 Feb 2013 18:04:53 -0800
Subject: [PATCH 27/37] workqueue: add delayed_work->wq to simplify reentrancy
 handling

To avoid executing the same work item from multiple CPUs concurrently,
a work_struct records the last pool it was on in its ->data so that,
on the next queueing, the pool can be queried to determine whether the
work item is still executing or not.

A delayed_work goes through timer before actually being queued on the
target workqueue and the timer needs to know the target workqueue and
CPU.  This is currently achieved by modifying delayed_work->work.data
such that it points to the cwq which points to the target workqueue
and the last CPU the work item was on.  __queue_delayed_work()
extracts the last CPU from delayed_work->work.data and then combines
it with the target workqueue to create new work.data.

The only thing this rather ugly hack achieves is encoding the target
workqueue into delayed_work->work.data without using a separate field,
which could be a trade off one can make; unfortunately, this entangles
work->data management between regular workqueue and delayed_work code
by setting cwq pointer before the work item is actually queued and
becomes a hindrance for further improvements of work->data handling.

This can be easily made sane by adding a target workqueue field to
delayed_work.  While delayed_work is used widely in the kernel and
this does make it a bit larger (<5%), I think this is the right
trade-off especially given the prospect of much saner handling of
work->data which currently involves quite tricky memory barrier
dancing, and don't expect to see any measureable effect.

Add delayed_work->wq and drop the delayed_work->work.data overloading.

tj: Rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  3 +++
 kernel/workqueue.c        | 32 +++-----------------------------
 2 files changed, 6 insertions(+), 29 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 426c39c2aaa4..a3d7556510c3 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -109,6 +109,9 @@ struct work_struct {
 struct delayed_work {
 	struct work_struct work;
 	struct timer_list timer;
+
+	/* target workqueue and CPU ->timer uses to queue ->work */
+	struct workqueue_struct *wq;
 	int cpu;
 };
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a229a56f3a32..41a502ce3802 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1339,10 +1339,9 @@ EXPORT_SYMBOL_GPL(queue_work);
 void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
-	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
 	/* should have been called from irqsafe timer with irq already off */
-	__queue_work(dwork->cpu, cwq->wq, &dwork->work);
+	__queue_work(dwork->cpu, dwork->wq, &dwork->work);
 }
 EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
 
@@ -1351,7 +1350,6 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 {
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
-	unsigned int lcpu;
 
 	WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
 		     timer->data != (unsigned long)dwork);
@@ -1371,30 +1369,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 
 	timer_stats_timer_set_start_info(&dwork->timer);
 
-	/*
-	 * This stores cwq for the moment, for the timer_fn.  Note that the
-	 * work's pool is preserved to allow reentrance detection for
-	 * delayed works.
-	 */
-	if (!(wq->flags & WQ_UNBOUND)) {
-		struct worker_pool *pool = get_work_pool(work);
-
-		/*
-		 * If we cannot get the last pool from @work directly,
-		 * select the last CPU such that it avoids unnecessarily
-		 * triggering non-reentrancy check in __queue_work().
-		 */
-		lcpu = cpu;
-		if (pool)
-			lcpu = pool->cpu;
-		if (lcpu == WORK_CPU_UNBOUND)
-			lcpu = raw_smp_processor_id();
-	} else {
-		lcpu = WORK_CPU_UNBOUND;
-	}
-
-	set_work_cwq(work, get_cwq(lcpu, wq), 0);
-
+	dwork->wq = wq;
 	dwork->cpu = cpu;
 	timer->expires = jiffies + delay;
 
@@ -2944,8 +2919,7 @@ bool flush_delayed_work(struct delayed_work *dwork)
 {
 	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
-		__queue_work(dwork->cpu,
-			     get_work_cwq(&dwork->work)->wq, &dwork->work);
+		__queue_work(dwork->cpu, dwork->wq, &dwork->work);
 	local_irq_enable();
 	return flush_work(&dwork->work);
 }

From 4468a00fd9a274fe1b30c886370d662e4a439efb Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 6 Feb 2013 18:04:53 -0800
Subject: [PATCH 28/37] workqueue: make work->data point to pool after
 try_to_grab_pending()

We plan to use work->data pointing to cwq as the synchronization
invariant when determining whether a given work item is on a locked
pool or not, which requires work->data pointing to cwq only while the
work item is queued on the associated pool.

With delayed_work updated not to overload work->data for target
workqueue recording, the only case where we still have off-queue
work->data pointing to cwq is try_to_grab_pending() which doesn't
update work->data after stealing a queued work item.  There's no
reason for try_to_grab_pending() to not update work->data to point to
the pool instead of cwq, like the normal execution does.

This patch adds set_work_pool_and_keep_pending() which makes
work->data point to pool instead of cwq but keeps the pending bit
unlike set_work_pool_and_clear_pending() (surprise!).

After this patch, it's guaranteed that only queued work items point to
cwqs.

This patch doesn't introduce any visible behavior change.

tj: Renamed the new helper function to match
    set_work_pool_and_clear_pending() and rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 41a502ce3802..1a442c301ddb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -556,6 +556,13 @@ static void set_work_cwq(struct work_struct *work,
 		      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
 }
 
+static void set_work_pool_and_keep_pending(struct work_struct *work,
+					   int pool_id)
+{
+	set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
+		      WORK_STRUCT_PENDING);
+}
+
 static void set_work_pool_and_clear_pending(struct work_struct *work,
 					    int pool_id)
 {
@@ -1115,6 +1122,9 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 			cwq_dec_nr_in_flight(get_work_cwq(work),
 				get_work_color(work));
 
+			/* work->data points to cwq iff queued, point to pool */
+			set_work_pool_and_keep_pending(work, pool->id);
+
 			spin_unlock(&pool->lock);
 			return 1;
 		}

From 0b3dae68ac199fac224fea9a31907b44f0d257b3 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 6 Feb 2013 18:04:53 -0800
Subject: [PATCH 29/37] workqueue: simplify is-work-item-queued-here test

Currently, determining whether a work item is queued on a locked pool
involves somewhat convoluted memory barrier dancing.  It goes like the
following.

* When a work item is queued on a pool, work->data is updated before
  work->entry is linked to the pending list with a wmb() inbetween.

* When trying to determine whether a work item is currently queued on
  a pool pointed to by work->data, it locks the pool and looks at
  work->entry.  If work->entry is linked, we then do rmb() and then
  check whether work->data points to the current pool.

This works because, work->data can only point to a pool if it
currently is or were on the pool and,

* If it currently is on the pool, the tests would obviously succeed.

* It it left the pool, its work->entry was cleared under pool->lock,
  so if we're seeing non-empty work->entry, it has to be from the work
  item being linked on another pool.  Because work->data is updated
  before work->entry is linked with wmb() inbetween, work->data update
  from another pool is guaranteed to be visible if we do rmb() after
  seeing non-empty work->entry.  So, we either see empty work->entry
  or we see updated work->data pointin to another pool.

While this works, it's convoluted, to put it mildly.  With recent
updates, it's now guaranteed that work->data points to cwq only while
the work item is queued and that updating work->data to point to cwq
or back to pool is done under pool->lock, so we can simply test
whether work->data points to cwq which is associated with the
currently locked pool instead of the convoluted memory barrier
dancing.

This patch replaces the memory barrier based "are you still here,
really?" test with much simpler "does work->data points to me?" test -
if work->data points to a cwq which is associated with the currently
locked pool, the work item is guaranteed to be queued on the pool as
work->data can start and stop pointing to such cwq only under
pool->lock and the start and stop coincide with queue and dequeue.

tj: Rewrote the comments and description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 40 ++++++++++++++++------------------------
 1 file changed, 16 insertions(+), 24 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1a442c301ddb..251f00914295 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1068,6 +1068,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 			       unsigned long *flags)
 {
 	struct worker_pool *pool;
+	struct cpu_workqueue_struct *cwq;
 
 	local_irq_save(*flags);
 
@@ -1097,14 +1098,17 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 		goto fail;
 
 	spin_lock(&pool->lock);
-	if (!list_empty(&work->entry)) {
-		/*
-		 * This work is queued, but perhaps we locked the wrong
-		 * pool.  In that case we must see the new value after
-		 * rmb(), see insert_work()->wmb().
-		 */
-		smp_rmb();
-		if (pool == get_work_pool(work)) {
+	/*
+	 * work->data is guaranteed to point to cwq only while the work
+	 * item is queued on cwq->wq, and both updating work->data to point
+	 * to cwq on queueing and to pool on dequeueing are done under
+	 * cwq->pool->lock.  This in turn guarantees that, if work->data
+	 * points to cwq which is associated with a locked pool, the work
+	 * item is currently queued on that pool.
+	 */
+	cwq = get_work_cwq(work);
+	if (cwq) {
+		if (cwq->pool == pool) {
 			debug_work_deactivate(work);
 
 			/*
@@ -1159,13 +1163,6 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 
 	/* we own @work, set data and link */
 	set_work_cwq(work, cwq, extra_flags);
-
-	/*
-	 * Ensure that we get the right work->data if we see the
-	 * result of list_add() below, see try_to_grab_pending().
-	 */
-	smp_wmb();
-
 	list_add_tail(&work->entry, head);
 
 	/*
@@ -2799,15 +2796,10 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 		return false;
 
 	spin_lock_irq(&pool->lock);
-	if (!list_empty(&work->entry)) {
-		/*
-		 * See the comment near try_to_grab_pending()->smp_rmb().
-		 * If it was re-queued to a different pool under us, we
-		 * are not going to wait.
-		 */
-		smp_rmb();
-		cwq = get_work_cwq(work);
-		if (unlikely(!cwq || pool != cwq->pool))
+	/* see the comment in try_to_grab_pending() with the same code */
+	cwq = get_work_cwq(work);
+	if (cwq) {
+		if (unlikely(cwq->pool != pool))
 			goto already_gone;
 	} else {
 		worker = find_worker_executing_work(pool, work);

From 1606283622689bdc460052b4a1281c36de13fe49 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 6 Feb 2013 18:04:53 -0800
Subject: [PATCH 30/37] workqueue: cosmetic update in try_to_grab_pending()

With the recent is-work-queued-here test simplification, the nested
if() in try_to_grab_pending() can be collapsed.  Collapse it.

This patch is purely cosmetic.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 kernel/workqueue.c | 38 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 251f00914295..e2dd61861fbd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1107,31 +1107,27 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 	 * item is currently queued on that pool.
 	 */
 	cwq = get_work_cwq(work);
-	if (cwq) {
-		if (cwq->pool == pool) {
-			debug_work_deactivate(work);
+	if (cwq && cwq->pool == pool) {
+		debug_work_deactivate(work);
 
-			/*
-			 * A delayed work item cannot be grabbed directly
-			 * because it might have linked NO_COLOR work items
-			 * which, if left on the delayed_list, will confuse
-			 * cwq->nr_active management later on and cause
-			 * stall.  Make sure the work item is activated
-			 * before grabbing.
-			 */
-			if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
-				cwq_activate_delayed_work(work);
+		/*
+		 * A delayed work item cannot be grabbed directly because
+		 * it might have linked NO_COLOR work items which, if left
+		 * on the delayed_list, will confuse cwq->nr_active
+		 * management later on and cause stall.  Make sure the work
+		 * item is activated before grabbing.
+		 */
+		if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
+			cwq_activate_delayed_work(work);
 
-			list_del_init(&work->entry);
-			cwq_dec_nr_in_flight(get_work_cwq(work),
-				get_work_color(work));
+		list_del_init(&work->entry);
+		cwq_dec_nr_in_flight(get_work_cwq(work), get_work_color(work));
 
-			/* work->data points to cwq iff queued, point to pool */
-			set_work_pool_and_keep_pending(work, pool->id);
+		/* work->data points to cwq iff queued, point to pool */
+		set_work_pool_and_keep_pending(work, pool->id);
 
-			spin_unlock(&pool->lock);
-			return 1;
-		}
+		spin_unlock(&pool->lock);
+		return 1;
 	}
 	spin_unlock(&pool->lock);
 fail:

From e19e397a85f33100bfa4210e256bec82fe22e167 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:39:44 -0800
Subject: [PATCH 31/37] workqueue: move nr_running into worker_pool

As nr_running is likely to be accessed from other CPUs during
try_to_wake_up(), it was kept outside worker_pool; however, while less
frequent, other fields in worker_pool are accessed from other CPUs
for, e.g., non-reentrancy check.  Also, with recent pool related
changes, accessing nr_running matching the worker_pool isn't as simple
as it used to be.

Move nr_running inside worker_pool.  Keep it aligned to cacheline and
define CPU pools using DEFINE_PER_CPU_SHARED_ALIGNED().  This should
give at least the same cacheline behavior.

get_pool_nr_running() is replaced with direct pool->nr_running
accesses.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Joonsoo Kim <js1304@gmail.com>
---
 kernel/workqueue.c | 63 ++++++++++++++++------------------------------
 1 file changed, 22 insertions(+), 41 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e2dd61861fbd..91ce7a984c22 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -144,6 +144,13 @@ struct worker_pool {
 
 	struct mutex		assoc_mutex;	/* protect POOL_DISASSOCIATED */
 	struct ida		worker_ida;	/* L: for worker IDs */
+
+	/*
+	 * The current concurrency level.  As it's likely to be accessed
+	 * from other CPUs during try_to_wake_up(), put it in a separate
+	 * cacheline.
+	 */
+	atomic_t		nr_running ____cacheline_aligned_in_smp;
 } ____cacheline_aligned_in_smp;
 
 /*
@@ -417,23 +424,12 @@ static LIST_HEAD(workqueues);
 static bool workqueue_freezing;		/* W: have wqs started freezing? */
 
 /*
- * The CPU standard worker pools.  nr_running is the only field which is
- * expected to be used frequently by other cpus via try_to_wake_up().  Put
- * it in a separate cacheline.
- */
-static DEFINE_PER_CPU(struct worker_pool [NR_STD_WORKER_POOLS],
-		      cpu_std_worker_pools);
-static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t [NR_STD_WORKER_POOLS],
-				     cpu_std_pool_nr_running);
-
-/*
- * Standard worker pools and nr_running counter for unbound CPU.  The pools
- * have POOL_DISASSOCIATED set, and all workers have WORKER_UNBOUND set.
+ * The CPU and unbound standard worker pools.  The unbound ones have
+ * POOL_DISASSOCIATED set, and their workers have WORKER_UNBOUND set.
  */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
+				     cpu_std_worker_pools);
 static struct worker_pool unbound_std_worker_pools[NR_STD_WORKER_POOLS];
-static atomic_t unbound_std_pool_nr_running[NR_STD_WORKER_POOLS] = {
-	[0 ... NR_STD_WORKER_POOLS - 1]	= ATOMIC_INIT(0),	/* always 0 */
-};
 
 /* idr of all pools */
 static DEFINE_MUTEX(worker_pool_idr_mutex);
@@ -483,17 +479,6 @@ static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
 	return &pools[highpri];
 }
 
-static atomic_t *get_pool_nr_running(struct worker_pool *pool)
-{
-	int cpu = pool->cpu;
-	int idx = std_worker_pool_pri(pool);
-
-	if (cpu != WORK_CPU_UNBOUND)
-		return &per_cpu(cpu_std_pool_nr_running, cpu)[idx];
-	else
-		return &unbound_std_pool_nr_running[idx];
-}
-
 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
 					    struct workqueue_struct *wq)
 {
@@ -654,7 +639,7 @@ static bool work_is_canceling(struct work_struct *work)
 
 static bool __need_more_worker(struct worker_pool *pool)
 {
-	return !atomic_read(get_pool_nr_running(pool));
+	return !atomic_read(&pool->nr_running);
 }
 
 /*
@@ -679,9 +664,8 @@ static bool may_start_working(struct worker_pool *pool)
 /* Do I need to keep working?  Called from currently running workers. */
 static bool keep_working(struct worker_pool *pool)
 {
-	atomic_t *nr_running = get_pool_nr_running(pool);
-
-	return !list_empty(&pool->worklist) && atomic_read(nr_running) <= 1;
+	return !list_empty(&pool->worklist) &&
+		atomic_read(&pool->nr_running) <= 1;
 }
 
 /* Do we need a new worker?  Called from manager. */
@@ -761,7 +745,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
 
 	if (!(worker->flags & WORKER_NOT_RUNNING)) {
 		WARN_ON_ONCE(worker->pool->cpu != cpu);
-		atomic_inc(get_pool_nr_running(worker->pool));
+		atomic_inc(&worker->pool->nr_running);
 	}
 }
 
@@ -785,7 +769,6 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
 {
 	struct worker *worker = kthread_data(task), *to_wakeup = NULL;
 	struct worker_pool *pool;
-	atomic_t *nr_running;
 
 	/*
 	 * Rescuers, which may not have all the fields set up like normal
@@ -796,7 +779,6 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
 		return NULL;
 
 	pool = worker->pool;
-	nr_running = get_pool_nr_running(pool);
 
 	/* this can only happen on the local cpu */
 	BUG_ON(cpu != raw_smp_processor_id());
@@ -812,7 +794,8 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
 	 * manipulating idle_list, so dereferencing idle_list without pool
 	 * lock is safe.
 	 */
-	if (atomic_dec_and_test(nr_running) && !list_empty(&pool->worklist))
+	if (atomic_dec_and_test(&pool->nr_running) &&
+	    !list_empty(&pool->worklist))
 		to_wakeup = first_worker(pool);
 	return to_wakeup ? to_wakeup->task : NULL;
 }
@@ -844,14 +827,12 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags,
 	 */
 	if ((flags & WORKER_NOT_RUNNING) &&
 	    !(worker->flags & WORKER_NOT_RUNNING)) {
-		atomic_t *nr_running = get_pool_nr_running(pool);
-
 		if (wakeup) {
-			if (atomic_dec_and_test(nr_running) &&
+			if (atomic_dec_and_test(&pool->nr_running) &&
 			    !list_empty(&pool->worklist))
 				wake_up_worker(pool);
 		} else
-			atomic_dec(nr_running);
+			atomic_dec(&pool->nr_running);
 	}
 
 	worker->flags |= flags;
@@ -883,7 +864,7 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
 	 */
 	if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
 		if (!(worker->flags & WORKER_NOT_RUNNING))
-			atomic_inc(get_pool_nr_running(pool));
+			atomic_inc(&pool->nr_running);
 }
 
 /**
@@ -1518,7 +1499,7 @@ static void worker_enter_idle(struct worker *worker)
 	 */
 	WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
 		     pool->nr_workers == pool->nr_idle &&
-		     atomic_read(get_pool_nr_running(pool)));
+		     atomic_read(&pool->nr_running));
 }
 
 /**
@@ -3506,7 +3487,7 @@ static void wq_unbind_fn(struct work_struct *work)
 	 * didn't already.
 	 */
 	for_each_std_worker_pool(pool, cpu)
-		atomic_set(get_pool_nr_running(pool), 0);
+		atomic_set(&pool->nr_running, 0);
 }
 
 /*

From 54d5b7d079dffa74597715a892473b474babd5b5 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 7 Feb 2013 13:14:20 -0800
Subject: [PATCH 32/37] workqueue: make get_work_pool_id() cheaper

get_work_pool_id() currently first obtains pool using get_work_pool()
and then return pool->id.  For an off-queue work item, this involves
obtaining pool ID from worker->data, performing idr_find() to find the
matching pool and then returning its pool->id which of course is the
same as the one which went into idr_find().

Just open code WORK_STRUCT_CWQ case and directly return pool ID from
work->data.

tj: The original patch dropped on-queue work item handling and renamed
    the function to offq_work_pool_id().  There isn't much benefit in
    doing so.  Handling it only requires a single if() and we need at
    least BUG_ON(), which is also a branch, even if we drop on-queue
    handling.  Open code WORK_STRUCT_CWQ case and keep the function in
    line with get_work_pool().  Rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 91ce7a984c22..1801c37b28c4 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -611,9 +611,13 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
  */
 static int get_work_pool_id(struct work_struct *work)
 {
-	struct worker_pool *pool = get_work_pool(work);
+	unsigned long data = atomic_long_read(&work->data);
 
-	return pool ? pool->id : WORK_OFFQ_POOL_NONE;
+	if (data & WORK_STRUCT_CWQ)
+		return ((struct cpu_workqueue_struct *)
+			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
+
+	return data >> WORK_OFFQ_POOL_SHIFT;
 }
 
 static void mark_work_canceling(struct work_struct *work)

From 8594fade39d3ad02ef856b8c53b5d7cc538a55f5 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 7 Feb 2013 13:14:20 -0800
Subject: [PATCH 33/37] workqueue: pick cwq instead of pool in __queue_work()

Currently, __queue_work() chooses the pool to queue a work item to and
then determines cwq from the target wq and the chosen pool.  This is a
bit backwards in that we can determine cwq first and simply use
cwq->pool.  This way, we can skip get_std_worker_pool() in queueing
path which will be a hurdle when implementing custom worker pools.

Update __queue_work() such that it chooses the target cwq and then use
cwq->pool instead of the other way around.  While at it, add missing
{} in an if statement.

This patch doesn't introduce any functional changes.

tj: The original patch had two get_cwq() calls - the first to
    determine the pool by doing get_cwq(cpu, wq)->pool and the second
    to determine the matching cwq from get_cwq(pool->cpu, wq).
    Updated the function such that it chooses cwq instead of pool and
    removed the second call.  Rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1801c37b28c4..d6fdce12ca7e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1193,8 +1193,6 @@ static bool is_chained_work(struct workqueue_struct *wq)
 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
-	bool highpri = wq->flags & WQ_HIGHPRI;
-	struct worker_pool *pool;
 	struct cpu_workqueue_struct *cwq;
 	struct list_head *worklist;
 	unsigned int work_flags;
@@ -1215,7 +1213,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
 
-	/* determine pool to use */
+	/* determine the cwq to use */
 	if (!(wq->flags & WQ_UNBOUND)) {
 		struct worker_pool *last_pool;
 
@@ -1228,37 +1226,36 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		 * work needs to be queued on that cpu to guarantee
 		 * non-reentrancy.
 		 */
-		pool = get_std_worker_pool(cpu, highpri);
+		cwq = get_cwq(cpu, wq);
 		last_pool = get_work_pool(work);
 
-		if (last_pool && last_pool != pool) {
+		if (last_pool && last_pool != cwq->pool) {
 			struct worker *worker;
 
 			spin_lock(&last_pool->lock);
 
 			worker = find_worker_executing_work(last_pool, work);
 
-			if (worker && worker->current_cwq->wq == wq)
-				pool = last_pool;
-			else {
+			if (worker && worker->current_cwq->wq == wq) {
+				cwq = get_cwq(last_pool->cpu, wq);
+			} else {
 				/* meh... not running there, queue here */
 				spin_unlock(&last_pool->lock);
-				spin_lock(&pool->lock);
+				spin_lock(&cwq->pool->lock);
 			}
 		} else {
-			spin_lock(&pool->lock);
+			spin_lock(&cwq->pool->lock);
 		}
 	} else {
-		pool = get_std_worker_pool(WORK_CPU_UNBOUND, highpri);
-		spin_lock(&pool->lock);
+		cwq = get_cwq(WORK_CPU_UNBOUND, wq);
+		spin_lock(&cwq->pool->lock);
 	}
 
-	/* pool determined, get cwq and queue */
-	cwq = get_cwq(pool->cpu, wq);
+	/* cwq determined, queue */
 	trace_workqueue_queue_work(req_cpu, cwq, work);
 
 	if (WARN_ON(!list_empty(&work->entry))) {
-		spin_unlock(&pool->lock);
+		spin_unlock(&cwq->pool->lock);
 		return;
 	}
 
@@ -1276,7 +1273,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	insert_work(cwq, work, worklist, work_flags);
 
-	spin_unlock(&pool->lock);
+	spin_unlock(&cwq->pool->lock);
 }
 
 /**

From 1dd638149f1f9d7d7dbb32591d5c7c2a0ea36264 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Feb 2013 19:29:07 -0800
Subject: [PATCH 34/37] workqueue: fix is_chained_work() regression

c9e7cf273f ("workqueue: move busy_hash from global_cwq to
worker_pool") incorrectly converted is_chained_work() to use
get_gcwq() inside for_each_gcwq_cpu() while removing get_gcwq().

As cwq might not exist for all possible workqueue CPUs, @cwq can be
NULL and the following cwq deferences can lead to oops.

Fix it by using for_each_cwq_cpu() instead, which is the better one to
use anyway as we only need to check pools that the wq is associated
with.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d6fdce12ca7e..0d26ab3aee59 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1167,7 +1167,7 @@ static bool is_chained_work(struct workqueue_struct *wq)
 	unsigned long flags;
 	unsigned int cpu;
 
-	for_each_wq_cpu(cpu) {
+	for_each_cwq_cpu(cpu, wq) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		struct worker_pool *pool = cwq->pool;
 		struct worker *worker;

From 8d03ecfe471802d6afe97da97722b6924533aa82 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Feb 2013 19:29:10 -0800
Subject: [PATCH 35/37] workqueue: reimplement is_chained_work() using
 current_wq_worker()

is_chained_work() was added before current_wq_worker() and implemented
its own ham-fisted way of finding out whether %current is a workqueue
worker - it iterates through all possible workers.

Drop the custom implementation and reimplement using
current_wq_worker().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 kernel/workqueue.c | 33 ++++++++-------------------------
 1 file changed, 8 insertions(+), 25 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0d26ab3aee59..ea7f696f1060 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1159,35 +1159,18 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 
 /*
  * Test whether @work is being queued from another work executing on the
- * same workqueue.  This is rather expensive and should only be used from
- * cold paths.
+ * same workqueue.
  */
 static bool is_chained_work(struct workqueue_struct *wq)
 {
-	unsigned long flags;
-	unsigned int cpu;
+	struct worker *worker;
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-		struct worker_pool *pool = cwq->pool;
-		struct worker *worker;
-		struct hlist_node *pos;
-		int i;
-
-		spin_lock_irqsave(&pool->lock, flags);
-		for_each_busy_worker(worker, i, pos, pool) {
-			if (worker->task != current)
-				continue;
-			spin_unlock_irqrestore(&pool->lock, flags);
-			/*
-			 * I'm @worker, no locking necessary.  See if @work
-			 * is headed to the same workqueue.
-			 */
-			return worker->current_cwq->wq == wq;
-		}
-		spin_unlock_irqrestore(&pool->lock, flags);
-	}
-	return false;
+	worker = current_wq_worker();
+	/*
+	 * Return %true iff I'm a worker execuing a work item on @wq.  If
+	 * I'm @worker, it's safe to dereference it without locking.
+	 */
+	return worker && worker->current_cwq->wq == wq;
 }
 
 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,

From 112202d9098aae2c36436e5178c0cf3ced423c7b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Feb 2013 19:29:12 -0800
Subject: [PATCH 36/37] workqueue: rename cpu_workqueue to pool_workqueue

workqueue has moved away from global_cwqs to worker_pools and with the
scheduled custom worker pools, wforkqueues will be associated with
pools which don't have anything to do with CPUs.  The workqueue code
went through significant amount of changes recently and mass renaming
isn't likely to hurt much additionally.  Let's replace 'cpu' with
'pool' so that it reflects the current design.

* s/struct cpu_workqueue_struct/struct pool_workqueue/
* s/cpu_wq/pool_wq/
* s/cwq/pwq/

This patch is purely cosmetic.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h        |  12 +-
 include/trace/events/workqueue.h |  10 +-
 kernel/workqueue.c               | 433 +++++++++++++++----------------
 kernel/workqueue_internal.h      |   2 +-
 4 files changed, 228 insertions(+), 229 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a3d7556510c3..8afab27cdbc2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -27,7 +27,7 @@ void delayed_work_timer_fn(unsigned long __data);
 enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
 	WORK_STRUCT_DELAYED_BIT	= 1,	/* work item is delayed */
-	WORK_STRUCT_CWQ_BIT	= 2,	/* data points to cwq */
+	WORK_STRUCT_PWQ_BIT	= 2,	/* data points to pwq */
 	WORK_STRUCT_LINKED_BIT	= 3,	/* next work is linked to this one */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC_BIT	= 4,	/* static initializer (debugobjects) */
@@ -40,7 +40,7 @@ enum {
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
 	WORK_STRUCT_DELAYED	= 1 << WORK_STRUCT_DELAYED_BIT,
-	WORK_STRUCT_CWQ		= 1 << WORK_STRUCT_CWQ_BIT,
+	WORK_STRUCT_PWQ		= 1 << WORK_STRUCT_PWQ_BIT,
 	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
@@ -60,14 +60,14 @@ enum {
 	WORK_CPU_END		= NR_CPUS + 1,
 
 	/*
-	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
-	 * off.  This makes cwqs aligned to 256 bytes and allows 15
-	 * workqueue flush colors.
+	 * Reserve 7 bits off of pwq pointer w/ debugobjects turned off.
+	 * This makes pwqs aligned to 256 bytes and allows 15 workqueue
+	 * flush colors.
 	 */
 	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
 				  WORK_STRUCT_COLOR_BITS,
 
-	/* data contains off-queue information when !WORK_STRUCT_CWQ */
+	/* data contains off-queue information when !WORK_STRUCT_PWQ */
 	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_FLAG_BITS,
 
 	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index 4e798e384a6a..bf0e18ba6cfb 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -27,7 +27,7 @@ DECLARE_EVENT_CLASS(workqueue_work,
 /**
  * workqueue_queue_work - called when a work gets queued
  * @req_cpu:	the requested cpu
- * @cwq:	pointer to struct cpu_workqueue_struct
+ * @pwq:	pointer to struct pool_workqueue
  * @work:	pointer to struct work_struct
  *
  * This event occurs when a work is queued immediately or once a
@@ -36,10 +36,10 @@ DECLARE_EVENT_CLASS(workqueue_work,
  */
 TRACE_EVENT(workqueue_queue_work,
 
-	TP_PROTO(unsigned int req_cpu, struct cpu_workqueue_struct *cwq,
+	TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq,
 		 struct work_struct *work),
 
-	TP_ARGS(req_cpu, cwq, work),
+	TP_ARGS(req_cpu, pwq, work),
 
 	TP_STRUCT__entry(
 		__field( void *,	work	)
@@ -52,9 +52,9 @@ TRACE_EVENT(workqueue_queue_work,
 	TP_fast_assign(
 		__entry->work		= work;
 		__entry->function	= work->func;
-		__entry->workqueue	= cwq->wq;
+		__entry->workqueue	= pwq->wq;
 		__entry->req_cpu	= req_cpu;
-		__entry->cpu		= cwq->pool->cpu;
+		__entry->cpu		= pwq->pool->cpu;
 	),
 
 	TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u",
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ea7f696f1060..0f1a264d0f22 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -154,11 +154,12 @@ struct worker_pool {
 } ____cacheline_aligned_in_smp;
 
 /*
- * The per-CPU workqueue.  The lower WORK_STRUCT_FLAG_BITS of
- * work_struct->data are used for flags and thus cwqs need to be
- * aligned at two's power of the number of flag bits.
+ * The per-pool workqueue.  While queued, the lower WORK_STRUCT_FLAG_BITS
+ * of work_struct->data are used for flags and the remaining high bits
+ * point to the pwq; thus, pwqs need to be aligned at two's power of the
+ * number of flag bits.
  */
-struct cpu_workqueue_struct {
+struct pool_workqueue {
 	struct worker_pool	*pool;		/* I: the associated pool */
 	struct workqueue_struct *wq;		/* I: the owning workqueue */
 	int			work_color;	/* L: current color */
@@ -207,16 +208,16 @@ typedef unsigned long mayday_mask_t;
 struct workqueue_struct {
 	unsigned int		flags;		/* W: WQ_* flags */
 	union {
-		struct cpu_workqueue_struct __percpu	*pcpu;
-		struct cpu_workqueue_struct		*single;
+		struct pool_workqueue __percpu		*pcpu;
+		struct pool_workqueue			*single;
 		unsigned long				v;
-	} cpu_wq;				/* I: cwq's */
+	} pool_wq;				/* I: pwq's */
 	struct list_head	list;		/* W: list of all workqueues */
 
 	struct mutex		flush_mutex;	/* protects wq flushing */
 	int			work_color;	/* F: current work color */
 	int			flush_color;	/* F: current flush color */
-	atomic_t		nr_cwqs_to_flush; /* flush in progress */
+	atomic_t		nr_pwqs_to_flush; /* flush in progress */
 	struct wq_flusher	*first_flusher;	/* F: first flusher */
 	struct list_head	flusher_queue;	/* F: flush waiters */
 	struct list_head	flusher_overflow; /* F: flush overflow list */
@@ -225,7 +226,7 @@ struct workqueue_struct {
 	struct worker		*rescuer;	/* I: rescue worker */
 
 	int			nr_drainers;	/* W: drain in progress */
-	int			saved_max_active; /* W: saved cwq max_active */
+	int			saved_max_active; /* W: saved pwq max_active */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
 #endif
@@ -268,7 +269,7 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 	return WORK_CPU_END;
 }
 
-static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
+static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask,
 				 struct workqueue_struct *wq)
 {
 	return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
@@ -284,7 +285,7 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
  *
  * for_each_wq_cpu()		: possible CPUs + WORK_CPU_UNBOUND
  * for_each_online_wq_cpu()	: online CPUs + WORK_CPU_UNBOUND
- * for_each_cwq_cpu()		: possible CPUs for bound workqueues,
+ * for_each_pwq_cpu()		: possible CPUs for bound workqueues,
  *				  WORK_CPU_UNBOUND for unbound workqueues
  */
 #define for_each_wq_cpu(cpu)						\
@@ -297,10 +298,10 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
 	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
 
-#define for_each_cwq_cpu(cpu, wq)					\
-	for ((cpu) = __next_cwq_cpu(-1, cpu_possible_mask, (wq));	\
+#define for_each_pwq_cpu(cpu, wq)					\
+	for ((cpu) = __next_pwq_cpu(-1, cpu_possible_mask, (wq));	\
 	     (cpu) < WORK_CPU_END;					\
-	     (cpu) = __next_cwq_cpu((cpu), cpu_possible_mask, (wq)))
+	     (cpu) = __next_pwq_cpu((cpu), cpu_possible_mask, (wq)))
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
@@ -479,14 +480,14 @@ static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
 	return &pools[highpri];
 }
 
-static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
-					    struct workqueue_struct *wq)
+static struct pool_workqueue *get_pwq(unsigned int cpu,
+				      struct workqueue_struct *wq)
 {
 	if (!(wq->flags & WQ_UNBOUND)) {
 		if (likely(cpu < nr_cpu_ids))
-			return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
+			return per_cpu_ptr(wq->pool_wq.pcpu, cpu);
 	} else if (likely(cpu == WORK_CPU_UNBOUND))
-		return wq->cpu_wq.single;
+		return wq->pool_wq.single;
 	return NULL;
 }
 
@@ -507,18 +508,18 @@ static int work_next_color(int color)
 }
 
 /*
- * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data
- * contain the pointer to the queued cwq.  Once execution starts, the flag
+ * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
+ * contain the pointer to the queued pwq.  Once execution starts, the flag
  * is cleared and the high bits contain OFFQ flags and pool ID.
  *
- * set_work_cwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
- * and clear_work_data() can be used to set the cwq, pool or clear
+ * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
+ * and clear_work_data() can be used to set the pwq, pool or clear
  * work->data.  These functions should only be called while the work is
  * owned - ie. while the PENDING bit is set.
  *
- * get_work_pool() and get_work_cwq() can be used to obtain the pool or cwq
+ * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
  * corresponding to a work.  Pool is available once the work has been
- * queued anywhere after initialization until it is sync canceled.  cwq is
+ * queued anywhere after initialization until it is sync canceled.  pwq is
  * available only while the work item is queued.
  *
  * %WORK_OFFQ_CANCELING is used to mark a work item which is being
@@ -533,12 +534,11 @@ static inline void set_work_data(struct work_struct *work, unsigned long data,
 	atomic_long_set(&work->data, data | flags | work_static(work));
 }
 
-static void set_work_cwq(struct work_struct *work,
-			 struct cpu_workqueue_struct *cwq,
+static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
 			 unsigned long extra_flags)
 {
-	set_work_data(work, (unsigned long)cwq,
-		      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
+	set_work_data(work, (unsigned long)pwq,
+		      WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
 }
 
 static void set_work_pool_and_keep_pending(struct work_struct *work,
@@ -567,11 +567,11 @@ static void clear_work_data(struct work_struct *work)
 	set_work_data(work, WORK_STRUCT_NO_POOL, 0);
 }
 
-static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
+static struct pool_workqueue *get_work_pwq(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
 
-	if (data & WORK_STRUCT_CWQ)
+	if (data & WORK_STRUCT_PWQ)
 		return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
 	else
 		return NULL;
@@ -589,8 +589,8 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
 	struct worker_pool *pool;
 	int pool_id;
 
-	if (data & WORK_STRUCT_CWQ)
-		return ((struct cpu_workqueue_struct *)
+	if (data & WORK_STRUCT_PWQ)
+		return ((struct pool_workqueue *)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool;
 
 	pool_id = data >> WORK_OFFQ_POOL_SHIFT;
@@ -613,8 +613,8 @@ static int get_work_pool_id(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
 
-	if (data & WORK_STRUCT_CWQ)
-		return ((struct cpu_workqueue_struct *)
+	if (data & WORK_STRUCT_PWQ)
+		return ((struct pool_workqueue *)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
 
 	return data >> WORK_OFFQ_POOL_SHIFT;
@@ -632,7 +632,7 @@ static bool work_is_canceling(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
 
-	return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING);
+	return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
 }
 
 /*
@@ -961,67 +961,67 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
 		*nextp = n;
 }
 
-static void cwq_activate_delayed_work(struct work_struct *work)
+static void pwq_activate_delayed_work(struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
+	struct pool_workqueue *pwq = get_work_pwq(work);
 
 	trace_workqueue_activate_work(work);
-	move_linked_works(work, &cwq->pool->worklist, NULL);
+	move_linked_works(work, &pwq->pool->worklist, NULL);
 	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
-	cwq->nr_active++;
+	pwq->nr_active++;
 }
 
-static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
+static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
 {
-	struct work_struct *work = list_first_entry(&cwq->delayed_works,
+	struct work_struct *work = list_first_entry(&pwq->delayed_works,
 						    struct work_struct, entry);
 
-	cwq_activate_delayed_work(work);
+	pwq_activate_delayed_work(work);
 }
 
 /**
- * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
- * @cwq: cwq of interest
+ * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
+ * @pwq: pwq of interest
  * @color: color of work which left the queue
  *
  * A work either has completed or is removed from pending queue,
- * decrement nr_in_flight of its cwq and handle workqueue flushing.
+ * decrement nr_in_flight of its pwq and handle workqueue flushing.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
+static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
 {
 	/* ignore uncolored works */
 	if (color == WORK_NO_COLOR)
 		return;
 
-	cwq->nr_in_flight[color]--;
+	pwq->nr_in_flight[color]--;
 
-	cwq->nr_active--;
-	if (!list_empty(&cwq->delayed_works)) {
+	pwq->nr_active--;
+	if (!list_empty(&pwq->delayed_works)) {
 		/* one down, submit a delayed one */
-		if (cwq->nr_active < cwq->max_active)
-			cwq_activate_first_delayed(cwq);
+		if (pwq->nr_active < pwq->max_active)
+			pwq_activate_first_delayed(pwq);
 	}
 
 	/* is flush in progress and are we at the flushing tip? */
-	if (likely(cwq->flush_color != color))
+	if (likely(pwq->flush_color != color))
 		return;
 
 	/* are there still in-flight works? */
-	if (cwq->nr_in_flight[color])
+	if (pwq->nr_in_flight[color])
 		return;
 
-	/* this cwq is done, clear flush_color */
-	cwq->flush_color = -1;
+	/* this pwq is done, clear flush_color */
+	pwq->flush_color = -1;
 
 	/*
-	 * If this was the last cwq, wake up the first flusher.  It
+	 * If this was the last pwq, wake up the first flusher.  It
 	 * will handle the rest.
 	 */
-	if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
-		complete(&cwq->wq->first_flusher->done);
+	if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
+		complete(&pwq->wq->first_flusher->done);
 }
 
 /**
@@ -1053,7 +1053,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 			       unsigned long *flags)
 {
 	struct worker_pool *pool;
-	struct cpu_workqueue_struct *cwq;
+	struct pool_workqueue *pwq;
 
 	local_irq_save(*flags);
 
@@ -1084,31 +1084,31 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 
 	spin_lock(&pool->lock);
 	/*
-	 * work->data is guaranteed to point to cwq only while the work
-	 * item is queued on cwq->wq, and both updating work->data to point
-	 * to cwq on queueing and to pool on dequeueing are done under
-	 * cwq->pool->lock.  This in turn guarantees that, if work->data
-	 * points to cwq which is associated with a locked pool, the work
+	 * work->data is guaranteed to point to pwq only while the work
+	 * item is queued on pwq->wq, and both updating work->data to point
+	 * to pwq on queueing and to pool on dequeueing are done under
+	 * pwq->pool->lock.  This in turn guarantees that, if work->data
+	 * points to pwq which is associated with a locked pool, the work
 	 * item is currently queued on that pool.
 	 */
-	cwq = get_work_cwq(work);
-	if (cwq && cwq->pool == pool) {
+	pwq = get_work_pwq(work);
+	if (pwq && pwq->pool == pool) {
 		debug_work_deactivate(work);
 
 		/*
 		 * A delayed work item cannot be grabbed directly because
 		 * it might have linked NO_COLOR work items which, if left
-		 * on the delayed_list, will confuse cwq->nr_active
+		 * on the delayed_list, will confuse pwq->nr_active
 		 * management later on and cause stall.  Make sure the work
 		 * item is activated before grabbing.
 		 */
 		if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
-			cwq_activate_delayed_work(work);
+			pwq_activate_delayed_work(work);
 
 		list_del_init(&work->entry);
-		cwq_dec_nr_in_flight(get_work_cwq(work), get_work_color(work));
+		pwq_dec_nr_in_flight(get_work_pwq(work), get_work_color(work));
 
-		/* work->data points to cwq iff queued, point to pool */
+		/* work->data points to pwq iff queued, point to pool */
 		set_work_pool_and_keep_pending(work, pool->id);
 
 		spin_unlock(&pool->lock);
@@ -1125,25 +1125,24 @@ fail:
 
 /**
  * insert_work - insert a work into a pool
- * @cwq: cwq @work belongs to
+ * @pwq: pwq @work belongs to
  * @work: work to insert
  * @head: insertion point
  * @extra_flags: extra WORK_STRUCT_* flags to set
  *
- * Insert @work which belongs to @cwq after @head.  @extra_flags is or'd to
+ * Insert @work which belongs to @pwq after @head.  @extra_flags is or'd to
  * work_struct flags.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void insert_work(struct cpu_workqueue_struct *cwq,
-			struct work_struct *work, struct list_head *head,
-			unsigned int extra_flags)
+static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
+			struct list_head *head, unsigned int extra_flags)
 {
-	struct worker_pool *pool = cwq->pool;
+	struct worker_pool *pool = pwq->pool;
 
 	/* we own @work, set data and link */
-	set_work_cwq(work, cwq, extra_flags);
+	set_work_pwq(work, pwq, extra_flags);
 	list_add_tail(&work->entry, head);
 
 	/*
@@ -1170,13 +1169,13 @@ static bool is_chained_work(struct workqueue_struct *wq)
 	 * Return %true iff I'm a worker execuing a work item on @wq.  If
 	 * I'm @worker, it's safe to dereference it without locking.
 	 */
-	return worker && worker->current_cwq->wq == wq;
+	return worker && worker->current_pwq->wq == wq;
 }
 
 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq;
+	struct pool_workqueue *pwq;
 	struct list_head *worklist;
 	unsigned int work_flags;
 	unsigned int req_cpu = cpu;
@@ -1196,7 +1195,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
 
-	/* determine the cwq to use */
+	/* determine the pwq to use */
 	if (!(wq->flags & WQ_UNBOUND)) {
 		struct worker_pool *last_pool;
 
@@ -1209,54 +1208,54 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		 * work needs to be queued on that cpu to guarantee
 		 * non-reentrancy.
 		 */
-		cwq = get_cwq(cpu, wq);
+		pwq = get_pwq(cpu, wq);
 		last_pool = get_work_pool(work);
 
-		if (last_pool && last_pool != cwq->pool) {
+		if (last_pool && last_pool != pwq->pool) {
 			struct worker *worker;
 
 			spin_lock(&last_pool->lock);
 
 			worker = find_worker_executing_work(last_pool, work);
 
-			if (worker && worker->current_cwq->wq == wq) {
-				cwq = get_cwq(last_pool->cpu, wq);
+			if (worker && worker->current_pwq->wq == wq) {
+				pwq = get_pwq(last_pool->cpu, wq);
 			} else {
 				/* meh... not running there, queue here */
 				spin_unlock(&last_pool->lock);
-				spin_lock(&cwq->pool->lock);
+				spin_lock(&pwq->pool->lock);
 			}
 		} else {
-			spin_lock(&cwq->pool->lock);
+			spin_lock(&pwq->pool->lock);
 		}
 	} else {
-		cwq = get_cwq(WORK_CPU_UNBOUND, wq);
-		spin_lock(&cwq->pool->lock);
+		pwq = get_pwq(WORK_CPU_UNBOUND, wq);
+		spin_lock(&pwq->pool->lock);
 	}
 
-	/* cwq determined, queue */
-	trace_workqueue_queue_work(req_cpu, cwq, work);
+	/* pwq determined, queue */
+	trace_workqueue_queue_work(req_cpu, pwq, work);
 
 	if (WARN_ON(!list_empty(&work->entry))) {
-		spin_unlock(&cwq->pool->lock);
+		spin_unlock(&pwq->pool->lock);
 		return;
 	}
 
-	cwq->nr_in_flight[cwq->work_color]++;
-	work_flags = work_color_to_flags(cwq->work_color);
+	pwq->nr_in_flight[pwq->work_color]++;
+	work_flags = work_color_to_flags(pwq->work_color);
 
-	if (likely(cwq->nr_active < cwq->max_active)) {
+	if (likely(pwq->nr_active < pwq->max_active)) {
 		trace_workqueue_activate_work(work);
-		cwq->nr_active++;
-		worklist = &cwq->pool->worklist;
+		pwq->nr_active++;
+		worklist = &pwq->pool->worklist;
 	} else {
 		work_flags |= WORK_STRUCT_DELAYED;
-		worklist = &cwq->delayed_works;
+		worklist = &pwq->delayed_works;
 	}
 
-	insert_work(cwq, work, worklist, work_flags);
+	insert_work(pwq, work, worklist, work_flags);
 
-	spin_unlock(&cwq->pool->lock);
+	spin_unlock(&pwq->pool->lock);
 }
 
 /**
@@ -1661,14 +1660,14 @@ static void rebind_workers(struct worker_pool *pool)
 
 		/*
 		 * wq doesn't really matter but let's keep @worker->pool
-		 * and @cwq->pool consistent for sanity.
+		 * and @pwq->pool consistent for sanity.
 		 */
 		if (std_worker_pool_pri(worker->pool))
 			wq = system_highpri_wq;
 		else
 			wq = system_wq;
 
-		insert_work(get_cwq(pool->cpu, wq), rebind_work,
+		insert_work(get_pwq(pool->cpu, wq), rebind_work,
 			    worker->scheduled.next,
 			    work_color_to_flags(WORK_NO_COLOR));
 	}
@@ -1845,15 +1844,15 @@ static void idle_worker_timeout(unsigned long __pool)
 
 static bool send_mayday(struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
-	struct workqueue_struct *wq = cwq->wq;
+	struct pool_workqueue *pwq = get_work_pwq(work);
+	struct workqueue_struct *wq = pwq->wq;
 	unsigned int cpu;
 
 	if (!(wq->flags & WQ_RESCUER))
 		return false;
 
 	/* mayday mayday mayday */
-	cpu = cwq->pool->cpu;
+	cpu = pwq->pool->cpu;
 	/* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
 	if (cpu == WORK_CPU_UNBOUND)
 		cpu = 0;
@@ -2082,9 +2081,9 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 __releases(&pool->lock)
 __acquires(&pool->lock)
 {
-	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
+	struct pool_workqueue *pwq = get_work_pwq(work);
 	struct worker_pool *pool = worker->pool;
-	bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
+	bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
 	int work_color;
 	struct worker *collision;
 #ifdef CONFIG_LOCKDEP
@@ -2125,7 +2124,7 @@ __acquires(&pool->lock)
 	hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
 	worker->current_work = work;
 	worker->current_func = work->func;
-	worker->current_cwq = cwq;
+	worker->current_pwq = pwq;
 	work_color = get_work_color(work);
 
 	list_del_init(&work->entry);
@@ -2154,7 +2153,7 @@ __acquires(&pool->lock)
 
 	spin_unlock_irq(&pool->lock);
 
-	lock_map_acquire_read(&cwq->wq->lockdep_map);
+	lock_map_acquire_read(&pwq->wq->lockdep_map);
 	lock_map_acquire(&lockdep_map);
 	trace_workqueue_execute_start(work);
 	worker->current_func(work);
@@ -2164,7 +2163,7 @@ __acquires(&pool->lock)
 	 */
 	trace_workqueue_execute_end(work);
 	lock_map_release(&lockdep_map);
-	lock_map_release(&cwq->wq->lockdep_map);
+	lock_map_release(&pwq->wq->lockdep_map);
 
 	if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
 		pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
@@ -2185,8 +2184,8 @@ __acquires(&pool->lock)
 	hash_del(&worker->hentry);
 	worker->current_work = NULL;
 	worker->current_func = NULL;
-	worker->current_cwq = NULL;
-	cwq_dec_nr_in_flight(cwq, work_color);
+	worker->current_pwq = NULL;
+	pwq_dec_nr_in_flight(pwq, work_color);
 }
 
 /**
@@ -2353,8 +2352,8 @@ repeat:
 	 */
 	for_each_mayday_cpu(cpu, wq->mayday_mask) {
 		unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
-		struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq);
-		struct worker_pool *pool = cwq->pool;
+		struct pool_workqueue *pwq = get_pwq(tcpu, wq);
+		struct worker_pool *pool = pwq->pool;
 		struct work_struct *work, *n;
 
 		__set_current_state(TASK_RUNNING);
@@ -2370,7 +2369,7 @@ repeat:
 		 */
 		BUG_ON(!list_empty(&rescuer->scheduled));
 		list_for_each_entry_safe(work, n, &pool->worklist, entry)
-			if (get_work_cwq(work) == cwq)
+			if (get_work_pwq(work) == pwq)
 				move_linked_works(work, scheduled, &n);
 
 		process_scheduled_works(rescuer);
@@ -2405,7 +2404,7 @@ static void wq_barrier_func(struct work_struct *work)
 
 /**
  * insert_wq_barrier - insert a barrier work
- * @cwq: cwq to insert barrier into
+ * @pwq: pwq to insert barrier into
  * @barr: wq_barrier to insert
  * @target: target work to attach @barr to
  * @worker: worker currently executing @target, NULL if @target is not executing
@@ -2422,12 +2421,12 @@ static void wq_barrier_func(struct work_struct *work)
  * after a work with LINKED flag set.
  *
  * Note that when @worker is non-NULL, @target may be modified
- * underneath us, so we can't reliably determine cwq from @target.
+ * underneath us, so we can't reliably determine pwq from @target.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
+static void insert_wq_barrier(struct pool_workqueue *pwq,
 			      struct wq_barrier *barr,
 			      struct work_struct *target, struct worker *worker)
 {
@@ -2460,23 +2459,23 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	}
 
 	debug_work_activate(&barr->work);
-	insert_work(cwq, &barr->work, head,
+	insert_work(pwq, &barr->work, head,
 		    work_color_to_flags(WORK_NO_COLOR) | linked);
 }
 
 /**
- * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
+ * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
  * @wq: workqueue being flushed
  * @flush_color: new flush color, < 0 for no-op
  * @work_color: new work color, < 0 for no-op
  *
- * Prepare cwqs for workqueue flushing.
+ * Prepare pwqs for workqueue flushing.
  *
- * If @flush_color is non-negative, flush_color on all cwqs should be
- * -1.  If no cwq has in-flight commands at the specified color, all
- * cwq->flush_color's stay at -1 and %false is returned.  If any cwq
- * has in flight commands, its cwq->flush_color is set to
- * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
+ * If @flush_color is non-negative, flush_color on all pwqs should be
+ * -1.  If no pwq has in-flight commands at the specified color, all
+ * pwq->flush_color's stay at -1 and %false is returned.  If any pwq
+ * has in flight commands, its pwq->flush_color is set to
+ * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
  * wakeup logic is armed and %true is returned.
  *
  * The caller should have initialized @wq->first_flusher prior to
@@ -2484,7 +2483,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
  * @flush_color is negative, no flush color update is done and %false
  * is returned.
  *
- * If @work_color is non-negative, all cwqs should have the same
+ * If @work_color is non-negative, all pwqs should have the same
  * work_color which is previous to @work_color and all will be
  * advanced to @work_color.
  *
@@ -2495,42 +2494,42 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
  * %true if @flush_color >= 0 and there's something to flush.  %false
  * otherwise.
  */
-static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
+static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 				      int flush_color, int work_color)
 {
 	bool wait = false;
 	unsigned int cpu;
 
 	if (flush_color >= 0) {
-		BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
-		atomic_set(&wq->nr_cwqs_to_flush, 1);
+		BUG_ON(atomic_read(&wq->nr_pwqs_to_flush));
+		atomic_set(&wq->nr_pwqs_to_flush, 1);
 	}
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-		struct worker_pool *pool = cwq->pool;
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
+		struct worker_pool *pool = pwq->pool;
 
 		spin_lock_irq(&pool->lock);
 
 		if (flush_color >= 0) {
-			BUG_ON(cwq->flush_color != -1);
+			BUG_ON(pwq->flush_color != -1);
 
-			if (cwq->nr_in_flight[flush_color]) {
-				cwq->flush_color = flush_color;
-				atomic_inc(&wq->nr_cwqs_to_flush);
+			if (pwq->nr_in_flight[flush_color]) {
+				pwq->flush_color = flush_color;
+				atomic_inc(&wq->nr_pwqs_to_flush);
 				wait = true;
 			}
 		}
 
 		if (work_color >= 0) {
-			BUG_ON(work_color != work_next_color(cwq->work_color));
-			cwq->work_color = work_color;
+			BUG_ON(work_color != work_next_color(pwq->work_color));
+			pwq->work_color = work_color;
 		}
 
 		spin_unlock_irq(&pool->lock);
 	}
 
-	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
+	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
 		complete(&wq->first_flusher->done);
 
 	return wait;
@@ -2581,7 +2580,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 
 			wq->first_flusher = &this_flusher;
 
-			if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
+			if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
 						       wq->work_color)) {
 				/* nothing to flush, done */
 				wq->flush_color = next_color;
@@ -2592,7 +2591,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 			/* wait in queue */
 			BUG_ON(wq->flush_color == this_flusher.flush_color);
 			list_add_tail(&this_flusher.list, &wq->flusher_queue);
-			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
+			flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
 		}
 	} else {
 		/*
@@ -2659,7 +2658,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 
 			list_splice_tail_init(&wq->flusher_overflow,
 					      &wq->flusher_queue);
-			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
+			flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
 		}
 
 		if (list_empty(&wq->flusher_queue)) {
@@ -2669,7 +2668,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 
 		/*
 		 * Need to flush more colors.  Make the next flusher
-		 * the new first flusher and arm cwqs.
+		 * the new first flusher and arm pwqs.
 		 */
 		BUG_ON(wq->flush_color == wq->work_color);
 		BUG_ON(wq->flush_color != next->flush_color);
@@ -2677,7 +2676,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 		list_del_init(&next->list);
 		wq->first_flusher = next;
 
-		if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
+		if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
 			break;
 
 		/*
@@ -2720,13 +2719,13 @@ void drain_workqueue(struct workqueue_struct *wq)
 reflush:
 	flush_workqueue(wq);
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
 		bool drained;
 
-		spin_lock_irq(&cwq->pool->lock);
-		drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
-		spin_unlock_irq(&cwq->pool->lock);
+		spin_lock_irq(&pwq->pool->lock);
+		drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
+		spin_unlock_irq(&pwq->pool->lock);
 
 		if (drained)
 			continue;
@@ -2749,7 +2748,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 {
 	struct worker *worker = NULL;
 	struct worker_pool *pool;
-	struct cpu_workqueue_struct *cwq;
+	struct pool_workqueue *pwq;
 
 	might_sleep();
 	pool = get_work_pool(work);
@@ -2758,18 +2757,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 
 	spin_lock_irq(&pool->lock);
 	/* see the comment in try_to_grab_pending() with the same code */
-	cwq = get_work_cwq(work);
-	if (cwq) {
-		if (unlikely(cwq->pool != pool))
+	pwq = get_work_pwq(work);
+	if (pwq) {
+		if (unlikely(pwq->pool != pool))
 			goto already_gone;
 	} else {
 		worker = find_worker_executing_work(pool, work);
 		if (!worker)
 			goto already_gone;
-		cwq = worker->current_cwq;
+		pwq = worker->current_pwq;
 	}
 
-	insert_wq_barrier(cwq, barr, work, worker);
+	insert_wq_barrier(pwq, barr, work, worker);
 	spin_unlock_irq(&pool->lock);
 
 	/*
@@ -2778,11 +2777,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 	 * flusher is not running on the same workqueue by verifying write
 	 * access.
 	 */
-	if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER)
-		lock_map_acquire(&cwq->wq->lockdep_map);
+	if (pwq->wq->saved_max_active == 1 || pwq->wq->flags & WQ_RESCUER)
+		lock_map_acquire(&pwq->wq->lockdep_map);
 	else
-		lock_map_acquire_read(&cwq->wq->lockdep_map);
-	lock_map_release(&cwq->wq->lockdep_map);
+		lock_map_acquire_read(&pwq->wq->lockdep_map);
+	lock_map_release(&pwq->wq->lockdep_map);
 
 	return true;
 already_gone:
@@ -3092,46 +3091,46 @@ int keventd_up(void)
 	return system_wq != NULL;
 }
 
-static int alloc_cwqs(struct workqueue_struct *wq)
+static int alloc_pwqs(struct workqueue_struct *wq)
 {
 	/*
-	 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
+	 * pwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
 	 * Make sure that the alignment isn't lower than that of
 	 * unsigned long long.
 	 */
-	const size_t size = sizeof(struct cpu_workqueue_struct);
+	const size_t size = sizeof(struct pool_workqueue);
 	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
 				   __alignof__(unsigned long long));
 
 	if (!(wq->flags & WQ_UNBOUND))
-		wq->cpu_wq.pcpu = __alloc_percpu(size, align);
+		wq->pool_wq.pcpu = __alloc_percpu(size, align);
 	else {
 		void *ptr;
 
 		/*
-		 * Allocate enough room to align cwq and put an extra
+		 * Allocate enough room to align pwq and put an extra
 		 * pointer at the end pointing back to the originally
 		 * allocated pointer which will be used for free.
 		 */
 		ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
 		if (ptr) {
-			wq->cpu_wq.single = PTR_ALIGN(ptr, align);
-			*(void **)(wq->cpu_wq.single + 1) = ptr;
+			wq->pool_wq.single = PTR_ALIGN(ptr, align);
+			*(void **)(wq->pool_wq.single + 1) = ptr;
 		}
 	}
 
 	/* just in case, make sure it's actually aligned */
-	BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
-	return wq->cpu_wq.v ? 0 : -ENOMEM;
+	BUG_ON(!IS_ALIGNED(wq->pool_wq.v, align));
+	return wq->pool_wq.v ? 0 : -ENOMEM;
 }
 
-static void free_cwqs(struct workqueue_struct *wq)
+static void free_pwqs(struct workqueue_struct *wq)
 {
 	if (!(wq->flags & WQ_UNBOUND))
-		free_percpu(wq->cpu_wq.pcpu);
-	else if (wq->cpu_wq.single) {
-		/* the pointer to free is stored right after the cwq */
-		kfree(*(void **)(wq->cpu_wq.single + 1));
+		free_percpu(wq->pool_wq.pcpu);
+	else if (wq->pool_wq.single) {
+		/* the pointer to free is stored right after the pwq */
+		kfree(*(void **)(wq->pool_wq.single + 1));
 	}
 }
 
@@ -3185,25 +3184,25 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	wq->flags = flags;
 	wq->saved_max_active = max_active;
 	mutex_init(&wq->flush_mutex);
-	atomic_set(&wq->nr_cwqs_to_flush, 0);
+	atomic_set(&wq->nr_pwqs_to_flush, 0);
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
 
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
 
-	if (alloc_cwqs(wq) < 0)
+	if (alloc_pwqs(wq) < 0)
 		goto err;
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-		BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
-		cwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI);
-		cwq->wq = wq;
-		cwq->flush_color = -1;
-		cwq->max_active = max_active;
-		INIT_LIST_HEAD(&cwq->delayed_works);
+		BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
+		pwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI);
+		pwq->wq = wq;
+		pwq->flush_color = -1;
+		pwq->max_active = max_active;
+		INIT_LIST_HEAD(&pwq->delayed_works);
 	}
 
 	if (flags & WQ_RESCUER) {
@@ -3234,8 +3233,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	spin_lock(&workqueue_lock);
 
 	if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
-		for_each_cwq_cpu(cpu, wq)
-			get_cwq(cpu, wq)->max_active = 0;
+		for_each_pwq_cpu(cpu, wq)
+			get_pwq(cpu, wq)->max_active = 0;
 
 	list_add(&wq->list, &workqueues);
 
@@ -3244,7 +3243,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	return wq;
 err:
 	if (wq) {
-		free_cwqs(wq);
+		free_pwqs(wq);
 		free_mayday_mask(wq->mayday_mask);
 		kfree(wq->rescuer);
 		kfree(wq);
@@ -3275,14 +3274,14 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	spin_unlock(&workqueue_lock);
 
 	/* sanity check */
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
 		int i;
 
 		for (i = 0; i < WORK_NR_COLORS; i++)
-			BUG_ON(cwq->nr_in_flight[i]);
-		BUG_ON(cwq->nr_active);
-		BUG_ON(!list_empty(&cwq->delayed_works));
+			BUG_ON(pwq->nr_in_flight[i]);
+		BUG_ON(pwq->nr_active);
+		BUG_ON(!list_empty(&pwq->delayed_works));
 	}
 
 	if (wq->flags & WQ_RESCUER) {
@@ -3291,29 +3290,29 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		kfree(wq->rescuer);
 	}
 
-	free_cwqs(wq);
+	free_pwqs(wq);
 	kfree(wq);
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 
 /**
- * cwq_set_max_active - adjust max_active of a cwq
- * @cwq: target cpu_workqueue_struct
+ * pwq_set_max_active - adjust max_active of a pwq
+ * @pwq: target pool_workqueue
  * @max_active: new max_active value.
  *
- * Set @cwq->max_active to @max_active and activate delayed works if
+ * Set @pwq->max_active to @max_active and activate delayed works if
  * increased.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void cwq_set_max_active(struct cpu_workqueue_struct *cwq, int max_active)
+static void pwq_set_max_active(struct pool_workqueue *pwq, int max_active)
 {
-	cwq->max_active = max_active;
+	pwq->max_active = max_active;
 
-	while (!list_empty(&cwq->delayed_works) &&
-	       cwq->nr_active < cwq->max_active)
-		cwq_activate_first_delayed(cwq);
+	while (!list_empty(&pwq->delayed_works) &&
+	       pwq->nr_active < pwq->max_active)
+		pwq_activate_first_delayed(pwq);
 }
 
 /**
@@ -3336,15 +3335,15 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 
 	wq->saved_max_active = max_active;
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-		struct worker_pool *pool = cwq->pool;
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
+		struct worker_pool *pool = pwq->pool;
 
 		spin_lock_irq(&pool->lock);
 
 		if (!(wq->flags & WQ_FREEZABLE) ||
 		    !(pool->flags & POOL_FREEZING))
-			cwq_set_max_active(cwq, max_active);
+			pwq_set_max_active(pwq, max_active);
 
 		spin_unlock_irq(&pool->lock);
 	}
@@ -3367,9 +3366,9 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
  */
 bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
 {
-	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-	return !list_empty(&cwq->delayed_works);
+	return !list_empty(&pwq->delayed_works);
 }
 EXPORT_SYMBOL_GPL(workqueue_congested);
 
@@ -3408,7 +3407,7 @@ EXPORT_SYMBOL_GPL(work_busy);
  * CPU hotplug.
  *
  * There are two challenges in supporting CPU hotplug.  Firstly, there
- * are a lot of assumptions on strong associations among work, cwq and
+ * are a lot of assumptions on strong associations among work, pwq and
  * pool which make migrating pending and scheduled works very
  * difficult to implement without impacting hot paths.  Secondly,
  * worker pools serve mix of short, long and very long running works making
@@ -3612,11 +3611,11 @@ void freeze_workqueues_begin(void)
 			pool->flags |= POOL_FREEZING;
 
 			list_for_each_entry(wq, &workqueues, list) {
-				struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+				struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-				if (cwq && cwq->pool == pool &&
+				if (pwq && pwq->pool == pool &&
 				    (wq->flags & WQ_FREEZABLE))
-					cwq->max_active = 0;
+					pwq->max_active = 0;
 			}
 
 			spin_unlock_irq(&pool->lock);
@@ -3655,13 +3654,13 @@ bool freeze_workqueues_busy(void)
 		 * to peek without lock.
 		 */
 		list_for_each_entry(wq, &workqueues, list) {
-			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+			struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-			if (!cwq || !(wq->flags & WQ_FREEZABLE))
+			if (!pwq || !(wq->flags & WQ_FREEZABLE))
 				continue;
 
-			BUG_ON(cwq->nr_active < 0);
-			if (cwq->nr_active) {
+			BUG_ON(pwq->nr_active < 0);
+			if (pwq->nr_active) {
 				busy = true;
 				goto out_unlock;
 			}
@@ -3701,14 +3700,14 @@ void thaw_workqueues(void)
 			pool->flags &= ~POOL_FREEZING;
 
 			list_for_each_entry(wq, &workqueues, list) {
-				struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+				struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-				if (!cwq || cwq->pool != pool ||
+				if (!pwq || pwq->pool != pool ||
 				    !(wq->flags & WQ_FREEZABLE))
 					continue;
 
 				/* restore max_active and repopulate worklist */
-				cwq_set_max_active(cwq, wq->saved_max_active);
+				pwq_set_max_active(pwq, wq->saved_max_active);
 			}
 
 			wake_up_worker(pool);
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 328be4a269aa..07650264ec15 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -28,7 +28,7 @@ struct worker {
 
 	struct work_struct	*current_work;	/* L: work being processed */
 	work_func_t		current_func;	/* L: current_work's fn */
-	struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
+	struct pool_workqueue	*current_pwq; /* L: current_work's pwq */
 	struct list_head	scheduled;	/* L: scheduled works */
 	struct task_struct	*task;		/* I: worker task */
 	struct worker_pool	*pool;		/* I: the associated pool */

From 1438ade5670b56d5386c220e1ad4b5a824a1e585 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Thu, 24 Jan 2013 16:36:31 +0400
Subject: [PATCH 37/37] workqueue: un-GPL function delayed_work_timer_fn()

commit d8e794dfd51c368ed3f686b7f4172830b60ae47b ("workqueue: set
delayed_work->timer function on initialization") exports function
delayed_work_timer_fn() only for GPL modules. This makes delayed-works
unusable for non-GPL modules, because initialization macro now requires
GPL symbol. For example schedule_delayed_work() available for non-GPL.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: stable@vger.kernel.org # 3.7
---
 kernel/workqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0f1a264d0f22..f4feacad3812 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1310,7 +1310,7 @@ void delayed_work_timer_fn(unsigned long __data)
 	/* should have been called from irqsafe timer with irq already off */
 	__queue_work(dwork->cpu, dwork->wq, &dwork->work);
 }
-EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
+EXPORT_SYMBOL(delayed_work_timer_fn);
 
 static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 				struct delayed_work *dwork, unsigned long delay)