diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 7dcef3317689..13d55206ccf6 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -73,7 +73,6 @@ struct kthread_worker { struct kthread_work { struct list_head node; kthread_work_func_t func; - wait_queue_head_t done; struct kthread_worker *worker; }; @@ -85,7 +84,6 @@ struct kthread_work { #define KTHREAD_WORK_INIT(work, fn) { \ .node = LIST_HEAD_INIT((work).node), \ .func = (fn), \ - .done = __WAIT_QUEUE_HEAD_INITIALIZER((work).done), \ } #define DEFINE_KTHREAD_WORKER(worker) \ @@ -95,22 +93,16 @@ struct kthread_work { struct kthread_work work = KTHREAD_WORK_INIT(work, fn) /* - * kthread_worker.lock and kthread_work.done need their own lockdep class - * keys if they are defined on stack with lockdep enabled. Use the - * following macros when defining them on stack. + * kthread_worker.lock needs its own lockdep class key when defined on + * stack with lockdep enabled. Use the following macros in such cases. */ #ifdef CONFIG_LOCKDEP # define KTHREAD_WORKER_INIT_ONSTACK(worker) \ ({ init_kthread_worker(&worker); worker; }) # define DEFINE_KTHREAD_WORKER_ONSTACK(worker) \ struct kthread_worker worker = KTHREAD_WORKER_INIT_ONSTACK(worker) -# define KTHREAD_WORK_INIT_ONSTACK(work, fn) \ - ({ init_kthread_work((&work), fn); work; }) -# define DEFINE_KTHREAD_WORK_ONSTACK(work, fn) \ - struct kthread_work work = KTHREAD_WORK_INIT_ONSTACK(work, fn) #else # define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker) -# define DEFINE_KTHREAD_WORK_ONSTACK(work, fn) DEFINE_KTHREAD_WORK(work, fn) #endif extern void __init_kthread_worker(struct kthread_worker *worker, @@ -127,7 +119,6 @@ extern void __init_kthread_worker(struct kthread_worker *worker, memset((work), 0, sizeof(struct kthread_work)); \ INIT_LIST_HEAD(&(work)->node); \ (work)->func = (fn); \ - init_waitqueue_head(&(work)->done); \ } while (0) int kthread_worker_fn(void *worker_ptr); diff --git a/kernel/kthread.c b/kernel/kthread.c index c2390f41307b..ef483220e855 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -591,7 +591,7 @@ static void insert_kthread_work(struct kthread_worker *worker, list_add_tail(&work->node, pos); work->worker = worker; - if (likely(worker->task)) + if (!worker->current_work && likely(worker->task)) wake_up_process(worker->task); } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 35974ac69600..7a2e449a96b1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -265,7 +265,6 @@ struct workqueue_struct { static struct kmem_cache *pwq_cache; -static int wq_numa_tbl_len; /* highest possible NUMA node id + 1 */ static cpumask_var_t *wq_numa_possible_cpumask; /* possible CPUs of each node */ @@ -758,13 +757,6 @@ static bool too_many_workers(struct worker_pool *pool) int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ int nr_busy = pool->nr_workers - nr_idle; - /* - * nr_idle and idle_list may disagree if idle rebinding is in - * progress. Never return %true if idle_list is empty. - */ - if (list_empty(&pool->idle_list)) - return false; - return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; } @@ -850,7 +842,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu) pool = worker->pool; /* this can only happen on the local cpu */ - if (WARN_ON_ONCE(cpu != raw_smp_processor_id())) + if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu)) return NULL; /* @@ -874,35 +866,22 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu) * worker_set_flags - set worker flags and adjust nr_running accordingly * @worker: self * @flags: flags to set - * @wakeup: wakeup an idle worker if necessary * - * Set @flags in @worker->flags and adjust nr_running accordingly. If - * nr_running becomes zero and @wakeup is %true, an idle worker is - * woken up. + * Set @flags in @worker->flags and adjust nr_running accordingly. * * CONTEXT: * spin_lock_irq(pool->lock) */ -static inline void worker_set_flags(struct worker *worker, unsigned int flags, - bool wakeup) +static inline void worker_set_flags(struct worker *worker, unsigned int flags) { struct worker_pool *pool = worker->pool; WARN_ON_ONCE(worker->task != current); - /* - * If transitioning into NOT_RUNNING, adjust nr_running and - * wake up an idle worker as necessary if requested by - * @wakeup. - */ + /* If transitioning into NOT_RUNNING, adjust nr_running. */ if ((flags & WORKER_NOT_RUNNING) && !(worker->flags & WORKER_NOT_RUNNING)) { - if (wakeup) { - if (atomic_dec_and_test(&pool->nr_running) && - !list_empty(&pool->worklist)) - wake_up_worker(pool); - } else - atomic_dec(&pool->nr_running); + atomic_dec(&pool->nr_running); } worker->flags |= flags; @@ -1232,7 +1211,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, pwq_activate_delayed_work(work); list_del_init(&work->entry); - pwq_dec_nr_in_flight(get_work_pwq(work), get_work_color(work)); + pwq_dec_nr_in_flight(pwq, get_work_color(work)); /* work->data points to pwq iff queued, point to pool */ set_work_pool_and_keep_pending(work, pool->id); @@ -1560,7 +1539,7 @@ static void worker_enter_idle(struct worker *worker) (worker->hentry.next || worker->hentry.pprev))) return; - /* can't use worker_set_flags(), also called from start_worker() */ + /* can't use worker_set_flags(), also called from create_worker() */ worker->flags |= WORKER_IDLE; pool->nr_idle++; worker->last_active = jiffies; @@ -1602,11 +1581,11 @@ static void worker_leave_idle(struct worker *worker) list_del_init(&worker->entry); } -static struct worker *alloc_worker(void) +static struct worker *alloc_worker(int node) { struct worker *worker; - worker = kzalloc(sizeof(*worker), GFP_KERNEL); + worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node); if (worker) { INIT_LIST_HEAD(&worker->entry); INIT_LIST_HEAD(&worker->scheduled); @@ -1670,6 +1649,9 @@ static void worker_detach_from_pool(struct worker *worker, detach_completion = pool->detach_completion; mutex_unlock(&pool->attach_mutex); + /* clear leftover flags without pool->lock after it is detached */ + worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND); + if (detach_completion) complete(detach_completion); } @@ -1678,8 +1660,7 @@ static void worker_detach_from_pool(struct worker *worker, * create_worker - create a new workqueue worker * @pool: pool the new worker will belong to * - * Create a new worker which is attached to @pool. The new worker must be - * started by start_worker(). + * Create and start a new worker which is attached to @pool. * * CONTEXT: * Might sleep. Does GFP_KERNEL allocations. @@ -1698,7 +1679,7 @@ static struct worker *create_worker(struct worker_pool *pool) if (id < 0) goto fail; - worker = alloc_worker(); + worker = alloc_worker(pool->node); if (!worker) goto fail; @@ -1724,6 +1705,13 @@ static struct worker *create_worker(struct worker_pool *pool) /* successful, attach the worker to the pool */ worker_attach_to_pool(worker, pool); + /* start the newly created worker */ + spin_lock_irq(&pool->lock); + worker->pool->nr_workers++; + worker_enter_idle(worker); + wake_up_process(worker->task); + spin_unlock_irq(&pool->lock); + return worker; fail: @@ -1733,44 +1721,6 @@ fail: return NULL; } -/** - * start_worker - start a newly created worker - * @worker: worker to start - * - * Make the pool aware of @worker and start it. - * - * CONTEXT: - * spin_lock_irq(pool->lock). - */ -static void start_worker(struct worker *worker) -{ - worker->pool->nr_workers++; - worker_enter_idle(worker); - wake_up_process(worker->task); -} - -/** - * create_and_start_worker - create and start a worker for a pool - * @pool: the target pool - * - * Grab the managership of @pool and create and start a new worker for it. - * - * Return: 0 on success. A negative error code otherwise. - */ -static int create_and_start_worker(struct worker_pool *pool) -{ - struct worker *worker; - - worker = create_worker(pool); - if (worker) { - spin_lock_irq(&pool->lock); - start_worker(worker); - spin_unlock_irq(&pool->lock); - } - - return worker ? 0 : -ENOMEM; -} - /** * destroy_worker - destroy a workqueue worker * @worker: worker to be destroyed @@ -1909,23 +1859,10 @@ restart: mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); while (true) { - struct worker *worker; - - worker = create_worker(pool); - if (worker) { - del_timer_sync(&pool->mayday_timer); - spin_lock_irq(&pool->lock); - start_worker(worker); - if (WARN_ON_ONCE(need_to_create_worker(pool))) - goto restart; - return true; - } - - if (!need_to_create_worker(pool)) + if (create_worker(pool) || !need_to_create_worker(pool)) break; - __set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(CREATE_COOLDOWN); + schedule_timeout_interruptible(CREATE_COOLDOWN); if (!need_to_create_worker(pool)) break; @@ -1933,6 +1870,11 @@ restart: del_timer_sync(&pool->mayday_timer); spin_lock_irq(&pool->lock); + /* + * This is necessary even after a new worker was just successfully + * created as @pool->lock was dropped and the new worker might have + * already become busy. + */ if (need_to_create_worker(pool)) goto restart; return true; @@ -2020,13 +1962,7 @@ __acquires(&pool->lock) lockdep_copy_map(&lockdep_map, &work->lockdep_map); #endif - /* - * Ensure we're on the correct CPU. DISASSOCIATED test is - * necessary to avoid spurious warnings from rescuers servicing the - * unbound or a disassociated pool. - */ - WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) && - !(pool->flags & POOL_DISASSOCIATED) && + WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) && raw_smp_processor_id() != pool->cpu); /* @@ -2052,17 +1988,22 @@ __acquires(&pool->lock) list_del_init(&work->entry); /* - * CPU intensive works don't participate in concurrency - * management. They're the scheduler's responsibility. + * CPU intensive works don't participate in concurrency management. + * They're the scheduler's responsibility. This takes @worker out + * of concurrency management and the next code block will chain + * execution of the pending work items. */ if (unlikely(cpu_intensive)) - worker_set_flags(worker, WORKER_CPU_INTENSIVE, true); + worker_set_flags(worker, WORKER_CPU_INTENSIVE); /* - * Unbound pool isn't concurrency managed and work items should be - * executed ASAP. Wake up another worker if necessary. + * Wake up another worker if necessary. The condition is always + * false for normal per-cpu workers since nr_running would always + * be >= 1 at this point. This is used to chain execution of the + * pending work items for WORKER_NOT_RUNNING workers such as the + * UNBOUND and CPU_INTENSIVE ones. */ - if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) + if (need_more_worker(pool)) wake_up_worker(pool); /* @@ -2218,7 +2159,7 @@ recheck: } } while (keep_working(pool)); - worker_set_flags(worker, WORKER_PREP, false); + worker_set_flags(worker, WORKER_PREP); sleep: /* * pool->lock is held and there's no work to process and no need to @@ -2311,29 +2252,27 @@ repeat: move_linked_works(work, scheduled, &n); process_scheduled_works(rescuer); - spin_unlock_irq(&pool->lock); - - worker_detach_from_pool(rescuer, pool); - - spin_lock_irq(&pool->lock); /* * Put the reference grabbed by send_mayday(). @pool won't - * go away while we're holding its lock. + * go away while we're still attached to it. */ put_pwq(pwq); /* - * Leave this pool. If keep_working() is %true, notify a + * Leave this pool. If need_more_worker() is %true, notify a * regular worker; otherwise, we end up with 0 concurrency * and stalling the execution. */ - if (keep_working(pool)) + if (need_more_worker(pool)) wake_up_worker(pool); rescuer->pool = NULL; - spin_unlock(&pool->lock); - spin_lock(&wq_mayday_lock); + spin_unlock_irq(&pool->lock); + + worker_detach_from_pool(rescuer, pool); + + spin_lock_irq(&wq_mayday_lock); } spin_unlock_irq(&wq_mayday_lock); @@ -3458,7 +3397,7 @@ static void put_unbound_pool(struct worker_pool *pool) return; /* sanity checks */ - if (WARN_ON(!(pool->flags & POOL_DISASSOCIATED)) || + if (WARN_ON(!(pool->cpu < 0)) || WARN_ON(!list_empty(&pool->worklist))) return; @@ -3524,7 +3463,7 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs) hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) { if (wqattrs_equal(pool->attrs, attrs)) { pool->refcnt++; - goto out_unlock; + return pool; } } @@ -3557,12 +3496,12 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs) goto fail; /* create and start the initial worker */ - if (create_and_start_worker(pool) < 0) + if (!create_worker(pool)) goto fail; /* install */ hash_add(unbound_pool_hash, &pool->hash_node, hash); -out_unlock: + return pool; fail: if (pool) @@ -3591,11 +3530,6 @@ static void pwq_unbound_release_workfn(struct work_struct *work) if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND))) return; - /* - * Unlink @pwq. Synchronization against wq->mutex isn't strictly - * necessary on release but do it anyway. It's easier to verify - * and consistent with the linking path. - */ mutex_lock(&wq->mutex); list_del_rcu(&pwq->pwqs_node); is_last = list_empty(&wq->pwqs); @@ -3692,10 +3626,7 @@ static void link_pwq(struct pool_workqueue *pwq) if (!list_empty(&pwq->pwqs_node)) return; - /* - * Set the matching work_color. This is synchronized with - * wq->mutex to avoid confusing flush_workqueue(). - */ + /* set the matching work_color */ pwq->work_color = wq->work_color; /* sync max_active to the current setting */ @@ -3832,7 +3763,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq, if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) return -EINVAL; - pwq_tbl = kzalloc(wq_numa_tbl_len * sizeof(pwq_tbl[0]), GFP_KERNEL); + pwq_tbl = kzalloc(nr_node_ids * sizeof(pwq_tbl[0]), GFP_KERNEL); new_attrs = alloc_workqueue_attrs(GFP_KERNEL); tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL); if (!pwq_tbl || !new_attrs || !tmp_attrs) @@ -4080,7 +4011,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, /* allocate wq and format name */ if (flags & WQ_UNBOUND) - tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]); + tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]); wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL); if (!wq) @@ -4122,7 +4053,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, if (flags & WQ_MEM_RECLAIM) { struct worker *rescuer; - rescuer = alloc_worker(); + rescuer = alloc_worker(NUMA_NO_NODE); if (!rescuer) goto err_destroy; @@ -4470,8 +4401,6 @@ static void wq_unbind_fn(struct work_struct *work) struct worker *worker; for_each_cpu_worker_pool(pool, cpu) { - WARN_ON_ONCE(cpu != smp_processor_id()); - mutex_lock(&pool->attach_mutex); spin_lock_irq(&pool->lock); @@ -4543,6 +4472,7 @@ static void rebind_workers(struct worker_pool *pool) pool->attrs->cpumask) < 0); spin_lock_irq(&pool->lock); + pool->flags &= ~POOL_DISASSOCIATED; for_each_pool_worker(worker, pool) { unsigned int worker_flags = worker->flags; @@ -4632,7 +4562,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb, for_each_cpu_worker_pool(pool, cpu) { if (pool->nr_workers) continue; - if (create_and_start_worker(pool) < 0) + if (!create_worker(pool)) return NOTIFY_BAD; } break; @@ -4645,10 +4575,6 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb, mutex_lock(&pool->attach_mutex); if (pool->cpu == cpu) { - spin_lock_irq(&pool->lock); - pool->flags &= ~POOL_DISASSOCIATED; - spin_unlock_irq(&pool->lock); - rebind_workers(pool); } else if (pool->cpu < 0) { restore_unbound_workers_cpumask(pool, cpu); @@ -4856,10 +4782,6 @@ static void __init wq_numa_init(void) cpumask_var_t *tbl; int node, cpu; - /* determine NUMA pwq table len - highest node id + 1 */ - for_each_node(node) - wq_numa_tbl_len = max(wq_numa_tbl_len, node + 1); - if (num_possible_nodes() <= 1) return; @@ -4876,7 +4798,7 @@ static void __init wq_numa_init(void) * available. Build one from cpu_to_node() which should have been * fully initialized by now. */ - tbl = kzalloc(wq_numa_tbl_len * sizeof(tbl[0]), GFP_KERNEL); + tbl = kzalloc(nr_node_ids * sizeof(tbl[0]), GFP_KERNEL); BUG_ON(!tbl); for_each_node(node) @@ -4936,7 +4858,7 @@ static int __init init_workqueues(void) for_each_cpu_worker_pool(pool, cpu) { pool->flags &= ~POOL_DISASSOCIATED; - BUG_ON(create_and_start_worker(pool) < 0); + BUG_ON(!create_worker(pool)); } }