summaryrefslogtreecommitdiffstats
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c686
1 files changed, 243 insertions, 443 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 193e977a10ea..5dbe22aa3efd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -65,15 +65,12 @@ enum {
* be executing on any CPU. The pool behaves as an unbound one.
*
* Note that DISASSOCIATED should be flipped only while holding
- * manager_mutex to avoid changing binding state while
- * create_worker() is in progress.
+ * attach_mutex to avoid changing binding state while
+ * worker_attach_to_pool() is in progress.
*/
- POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
- POOL_FREEZING = 1 << 3, /* freeze in progress */
/* worker flags */
- WORKER_STARTED = 1 << 0, /* started */
WORKER_DIE = 1 << 1, /* die die die */
WORKER_IDLE = 1 << 2, /* is idle */
WORKER_PREP = 1 << 3, /* preparing to run works */
@@ -100,10 +97,10 @@ enum {
/*
* Rescue workers are used only on emergencies and shared by
- * all cpus. Give -20.
+ * all cpus. Give MIN_NICE.
*/
- RESCUER_NICE_LEVEL = -20,
- HIGHPRI_NICE_LEVEL = -20,
+ RESCUER_NICE_LEVEL = MIN_NICE,
+ HIGHPRI_NICE_LEVEL = MIN_NICE,
WQ_NAME_LEN = 24,
};
@@ -124,8 +121,7 @@ enum {
* cpu or grabbing pool->lock is enough for read access. If
* POOL_DISASSOCIATED is set, it's identical to L.
*
- * MG: pool->manager_mutex and pool->lock protected. Writes require both
- * locks. Reads can happen under either lock.
+ * A: pool->attach_mutex protected.
*
* PL: wq_pool_mutex protected.
*
@@ -163,8 +159,11 @@ struct worker_pool {
/* see manage_workers() for details on the two manager mutexes */
struct mutex manager_arb; /* manager arbitration */
- struct mutex manager_mutex; /* manager exclusion */
- struct idr worker_idr; /* MG: worker IDs and iteration */
+ struct mutex attach_mutex; /* attach/detach exclusion */
+ struct list_head workers; /* A: attached workers */
+ struct completion *detach_completion; /* all workers detached */
+
+ struct ida worker_ida; /* worker IDs for task name */
struct workqueue_attrs *attrs; /* I: worker attributes */
struct hlist_node hash_node; /* PL: unbound_pool_hash node */
@@ -266,7 +265,6 @@ struct workqueue_struct {
static struct kmem_cache *pwq_cache;
-static int wq_numa_tbl_len; /* highest possible NUMA node id + 1 */
static cpumask_var_t *wq_numa_possible_cpumask;
/* possible CPUs of each node */
@@ -340,16 +338,6 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
lockdep_is_held(&wq->mutex), \
"sched RCU or wq->mutex should be held")
-#ifdef CONFIG_LOCKDEP
-#define assert_manager_or_pool_lock(pool) \
- WARN_ONCE(debug_locks && \
- !lockdep_is_held(&(pool)->manager_mutex) && \
- !lockdep_is_held(&(pool)->lock), \
- "pool->manager_mutex or ->lock should be held")
-#else
-#define assert_manager_or_pool_lock(pool) do { } while (0)
-#endif
-
#define for_each_cpu_worker_pool(pool, cpu) \
for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
(pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
@@ -375,17 +363,16 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
/**
* for_each_pool_worker - iterate through all workers of a worker_pool
* @worker: iteration cursor
- * @wi: integer used for iteration
* @pool: worker_pool to iterate workers of
*
- * This must be called with either @pool->manager_mutex or ->lock held.
+ * This must be called with @pool->attach_mutex.
*
* The if/else clause exists only for the lockdep assertion and can be
* ignored.
*/
-#define for_each_pool_worker(worker, wi, pool) \
- idr_for_each_entry(&(pool)->worker_idr, (worker), (wi)) \
- if (({ assert_manager_or_pool_lock((pool)); false; })) { } \
+#define for_each_pool_worker(worker, pool) \
+ list_for_each_entry((worker), &(pool)->workers, node) \
+ if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
else
/**
@@ -516,6 +503,13 @@ void destroy_work_on_stack(struct work_struct *work)
}
EXPORT_SYMBOL_GPL(destroy_work_on_stack);
+void destroy_delayed_work_on_stack(struct delayed_work *work)
+{
+ destroy_timer_on_stack(&work->timer);
+ debug_object_free(&work->work, &work_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
+
#else
static inline void debug_work_activate(struct work_struct *work) { }
static inline void debug_work_deactivate(struct work_struct *work) { }
@@ -756,13 +750,6 @@ static bool need_to_create_worker(struct worker_pool *pool)
return need_more_worker(pool) && !may_start_working(pool);
}
-/* Do I need to be the manager? */
-static bool need_to_manage_workers(struct worker_pool *pool)
-{
- return need_to_create_worker(pool) ||
- (pool->flags & POOL_MANAGE_WORKERS);
-}
-
/* Do we have too many workers and should some go away? */
static bool too_many_workers(struct worker_pool *pool)
{
@@ -770,13 +757,6 @@ static bool too_many_workers(struct worker_pool *pool)
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
int nr_busy = pool->nr_workers - nr_idle;
- /*
- * nr_idle and idle_list may disagree if idle rebinding is in
- * progress. Never return %true if idle_list is empty.
- */
- if (list_empty(&pool->idle_list))
- return false;
-
return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
}
@@ -784,8 +764,8 @@ static bool too_many_workers(struct worker_pool *pool)
* Wake up functions.
*/
-/* Return the first worker. Safe with preemption disabled */
-static struct worker *first_worker(struct worker_pool *pool)
+/* Return the first idle worker. Safe with preemption disabled */
+static struct worker *first_idle_worker(struct worker_pool *pool)
{
if (unlikely(list_empty(&pool->idle_list)))
return NULL;
@@ -804,7 +784,7 @@ static struct worker *first_worker(struct worker_pool *pool)
*/
static void wake_up_worker(struct worker_pool *pool)
{
- struct worker *worker = first_worker(pool);
+ struct worker *worker = first_idle_worker(pool);
if (likely(worker))
wake_up_process(worker->task);
@@ -862,7 +842,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
pool = worker->pool;
/* this can only happen on the local cpu */
- if (WARN_ON_ONCE(cpu != raw_smp_processor_id()))
+ if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu))
return NULL;
/*
@@ -878,7 +858,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
*/
if (atomic_dec_and_test(&pool->nr_running) &&
!list_empty(&pool->worklist))
- to_wakeup = first_worker(pool);
+ to_wakeup = first_idle_worker(pool);
return to_wakeup ? to_wakeup->task : NULL;
}
@@ -886,35 +866,22 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
* worker_set_flags - set worker flags and adjust nr_running accordingly
* @worker: self
* @flags: flags to set
- * @wakeup: wakeup an idle worker if necessary
*
- * Set @flags in @worker->flags and adjust nr_running accordingly. If
- * nr_running becomes zero and @wakeup is %true, an idle worker is
- * woken up.
+ * Set @flags in @worker->flags and adjust nr_running accordingly.
*
* CONTEXT:
* spin_lock_irq(pool->lock)
*/
-static inline void worker_set_flags(struct worker *worker, unsigned int flags,
- bool wakeup)
+static inline void worker_set_flags(struct worker *worker, unsigned int flags)
{
struct worker_pool *pool = worker->pool;
WARN_ON_ONCE(worker->task != current);
- /*
- * If transitioning into NOT_RUNNING, adjust nr_running and
- * wake up an idle worker as necessary if requested by
- * @wakeup.
- */
+ /* If transitioning into NOT_RUNNING, adjust nr_running. */
if ((flags & WORKER_NOT_RUNNING) &&
!(worker->flags & WORKER_NOT_RUNNING)) {
- if (wakeup) {
- if (atomic_dec_and_test(&pool->nr_running) &&
- !list_empty(&pool->worklist))
- wake_up_worker(pool);
- } else
- atomic_dec(&pool->nr_running);
+ atomic_dec(&pool->nr_running);
}
worker->flags |= flags;
@@ -1244,7 +1211,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
pwq_activate_delayed_work(work);
list_del_init(&work->entry);
- pwq_dec_nr_in_flight(get_work_pwq(work), get_work_color(work));
+ pwq_dec_nr_in_flight(pwq, get_work_color(work));
/* work->data points to pwq iff queued, point to pool */
set_work_pool_and_keep_pending(work, pool->id);
@@ -1572,7 +1539,7 @@ static void worker_enter_idle(struct worker *worker)
(worker->hentry.next || worker->hentry.pprev)))
return;
- /* can't use worker_set_flags(), also called from start_worker() */
+ /* can't use worker_set_flags(), also called from create_worker() */
worker->flags |= WORKER_IDLE;
pool->nr_idle++;
worker->last_active = jiffies;
@@ -1614,78 +1581,15 @@ static void worker_leave_idle(struct worker *worker)
list_del_init(&worker->entry);
}
-/**
- * worker_maybe_bind_and_lock - try to bind %current to worker_pool and lock it
- * @pool: target worker_pool
- *
- * Bind %current to the cpu of @pool if it is associated and lock @pool.
- *
- * Works which are scheduled while the cpu is online must at least be
- * scheduled to a worker which is bound to the cpu so that if they are
- * flushed from cpu callbacks while cpu is going down, they are
- * guaranteed to execute on the cpu.
- *
- * This function is to be used by unbound workers and rescuers to bind
- * themselves to the target cpu and may race with cpu going down or
- * coming online. kthread_bind() can't be used because it may put the
- * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
- * verbatim as it's best effort and blocking and pool may be
- * [dis]associated in the meantime.
- *
- * This function tries set_cpus_allowed() and locks pool and verifies the
- * binding against %POOL_DISASSOCIATED which is set during
- * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker
- * enters idle state or fetches works without dropping lock, it can
- * guarantee the scheduling requirement described in the first paragraph.
- *
- * CONTEXT:
- * Might sleep. Called without any lock but returns with pool->lock
- * held.
- *
- * Return:
- * %true if the associated pool is online (@worker is successfully
- * bound), %false if offline.
- */
-static bool worker_maybe_bind_and_lock(struct worker_pool *pool)
-__acquires(&pool->lock)
-{
- while (true) {
- /*
- * The following call may fail, succeed or succeed
- * without actually migrating the task to the cpu if
- * it races with cpu hotunplug operation. Verify
- * against POOL_DISASSOCIATED.
- */
- if (!(pool->flags & POOL_DISASSOCIATED))
- set_cpus_allowed_ptr(current, pool->attrs->cpumask);
-
- spin_lock_irq(&pool->lock);
- if (pool->flags & POOL_DISASSOCIATED)
- return false;
- if (task_cpu(current) == pool->cpu &&
- cpumask_equal(&current->cpus_allowed, pool->attrs->cpumask))
- return true;
- spin_unlock_irq(&pool->lock);
-
- /*
- * We've raced with CPU hot[un]plug. Give it a breather
- * and retry migration. cond_resched() is required here;
- * otherwise, we might deadlock against cpu_stop trying to
- * bring down the CPU on non-preemptive kernel.
- */
- cpu_relax();
- cond_resched();
- }
-}
-
-static struct worker *alloc_worker(void)
+static struct worker *alloc_worker(int node)
{
struct worker *worker;
- worker = kzalloc(sizeof(*worker), GFP_KERNEL);
+ worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);
if (worker) {
INIT_LIST_HEAD(&worker->entry);
INIT_LIST_HEAD(&worker->scheduled);
+ INIT_LIST_HEAD(&worker->node);
/* on creation a worker is in !idle && prep state */
worker->flags = WORKER_PREP;
}
@@ -1693,12 +1597,70 @@ static struct worker *alloc_worker(void)
}
/**
+ * worker_attach_to_pool() - attach a worker to a pool
+ * @worker: worker to be attached
+ * @pool: the target pool
+ *
+ * Attach @worker to @pool. Once attached, the %WORKER_UNBOUND flag and
+ * cpu-binding of @worker are kept coordinated with the pool across
+ * cpu-[un]hotplugs.
+ */
+static void worker_attach_to_pool(struct worker *worker,
+ struct worker_pool *pool)
+{
+ mutex_lock(&pool->attach_mutex);
+
+ /*
+ * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
+ * online CPUs. It'll be re-applied when any of the CPUs come up.
+ */
+ set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
+
+ /*
+ * The pool->attach_mutex ensures %POOL_DISASSOCIATED remains
+ * stable across this function. See the comments above the
+ * flag definition for details.
+ */
+ if (pool->flags & POOL_DISASSOCIATED)
+ worker->flags |= WORKER_UNBOUND;
+
+ list_add_tail(&worker->node, &pool->workers);
+
+ mutex_unlock(&pool->attach_mutex);
+}
+
+/**
+ * worker_detach_from_pool() - detach a worker from its pool
+ * @worker: worker which is attached to its pool
+ * @pool: the pool @worker is attached to
+ *
+ * Undo the attaching which had been done in worker_attach_to_pool(). The
+ * caller worker shouldn't access to the pool after detached except it has
+ * other reference to the pool.
+ */
+static void worker_detach_from_pool(struct worker *worker,
+ struct worker_pool *pool)
+{
+ struct completion *detach_completion = NULL;
+
+ mutex_lock(&pool->attach_mutex);
+ list_del(&worker->node);
+ if (list_empty(&pool->workers))
+ detach_completion = pool->detach_completion;
+ mutex_unlock(&pool->attach_mutex);
+
+ /* clear leftover flags without pool->lock after it is detached */
+ worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND);
+
+ if (detach_completion)
+ complete(detach_completion);
+}
+
+/**
* create_worker - create a new workqueue worker
* @pool: pool the new worker will belong to
*
- * Create a new worker which is bound to @pool. The returned worker
- * can be started by calling start_worker() or destroyed using
- * destroy_worker().
+ * Create and start a new worker which is attached to @pool.
*
* CONTEXT:
* Might sleep. Does GFP_KERNEL allocations.
@@ -1712,23 +1674,12 @@ static struct worker *create_worker(struct worker_pool *pool)
int id = -1;
char id_buf[16];
- lockdep_assert_held(&pool->manager_mutex);
-
- /*
- * ID is needed to determine kthread name. Allocate ID first
- * without installing the pointer.
- */
- idr_preload(GFP_KERNEL);
- spin_lock_irq(&pool->lock);
-
- id = idr_alloc(&pool->worker_idr, NULL, 0, 0, GFP_NOWAIT);
-
- spin_unlock_irq(&pool->lock);
- idr_preload_end();
+ /* ID is needed to determine kthread name */
+ id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
if (id < 0)
goto fail;
- worker = alloc_worker();
+ worker = alloc_worker(pool->node);
if (!worker)
goto fail;
@@ -1751,124 +1702,53 @@ static struct worker *create_worker(struct worker_pool *pool)
/* prevent userland from meddling with cpumask of workqueue workers */
worker->task->flags |= PF_NO_SETAFFINITY;
- /*
- * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
- * online CPUs. It'll be re-applied when any of the CPUs come up.
- */
- set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
+ /* successful, attach the worker to the pool */
+ worker_attach_to_pool(worker, pool);
- /*
- * The caller is responsible for ensuring %POOL_DISASSOCIATED
- * remains stable across this function. See the comments above the
- * flag definition for details.
- */
- if (pool->flags & POOL_DISASSOCIATED)
- worker->flags |= WORKER_UNBOUND;
-
- /* successful, commit the pointer to idr */
+ /* start the newly created worker */
spin_lock_irq(&pool->lock);
- idr_replace(&pool->worker_idr, worker, worker->id);
+ worker->pool->nr_workers++;
+ worker_enter_idle(worker);
+ wake_up_process(worker->task);
spin_unlock_irq(&pool->lock);
return worker;
fail:
- if (id >= 0) {
- spin_lock_irq(&pool->lock);
- idr_remove(&pool->worker_idr, id);
- spin_unlock_irq(&pool->lock);
- }
+ if (id >= 0)
+ ida_simple_remove(&pool->worker_ida, id);
kfree(worker);
return NULL;
}
/**
- * start_worker - start a newly created worker
- * @worker: worker to start
- *
- * Make the pool aware of @worker and start it.
- *
- * CONTEXT:
- * spin_lock_irq(pool->lock).
- */
-static void start_worker(struct worker *worker)
-{
- worker->flags |= WORKER_STARTED;
- worker->pool->nr_workers++;
- worker_enter_idle(worker);
- wake_up_process(worker->task);
-}
-
-/**
- * create_and_start_worker - create and start a worker for a pool
- * @pool: the target pool
- *
- * Grab the managership of @pool and create and start a new worker for it.
- *
- * Return: 0 on success. A negative error code otherwise.
- */
-static int create_and_start_worker(struct worker_pool *pool)
-{
- struct worker *worker;
-
- mutex_lock(&pool->manager_mutex);
-
- worker = create_worker(pool);
- if (worker) {
- spin_lock_irq(&pool->lock);
- start_worker(worker);
- spin_unlock_irq(&pool->lock);
- }
-
- mutex_unlock(&pool->manager_mutex);
-
- return worker ? 0 : -ENOMEM;
-}
-
-/**
* destroy_worker - destroy a workqueue worker
* @worker: worker to be destroyed
*
- * Destroy @worker and adjust @pool stats accordingly.
+ * Destroy @worker and adjust @pool stats accordingly. The worker should
+ * be idle.
*
* CONTEXT:
- * spin_lock_irq(pool->lock) which is released and regrabbed.
+ * spin_lock_irq(pool->lock).
*/
static void destroy_worker(struct worker *worker)
{
struct worker_pool *pool = worker->pool;
- lockdep_assert_held(&pool->manager_mutex);
lockdep_assert_held(&pool->lock);
/* sanity check frenzy */
if (WARN_ON(worker->current_work) ||
- WARN_ON(!list_empty(&worker->scheduled)))
+ WARN_ON(!list_empty(&worker->scheduled)) ||
+ WARN_ON(!(worker->flags & WORKER_IDLE)))
return;
- if (worker->flags & WORKER_STARTED)
- pool->nr_workers--;
- if (worker->flags & WORKER_IDLE)
- pool->nr_idle--;
-
- /*
- * Once WORKER_DIE is set, the kworker may destroy itself at any
- * point. Pin to ensure the task stays until we're done with it.
- */
- get_task_struct(worker->task);
+ pool->nr_workers--;
+ pool->nr_idle--;
list_del_init(&worker->entry);
worker->flags |= WORKER_DIE;
-
- idr_remove(&pool->worker_idr, worker->id);
-
- spin_unlock_irq(&pool->lock);
-
- kthread_stop(worker->task);
- put_task_struct(worker->task);
- kfree(worker);
-
- spin_lock_irq(&pool->lock);
+ wake_up_process(worker->task);
}
static void idle_worker_timeout(unsigned long __pool)
@@ -1877,7 +1757,7 @@ static void idle_worker_timeout(unsigned long __pool)
spin_lock_irq(&pool->lock);
- if (too_many_workers(pool)) {
+ while (too_many_workers(pool)) {
struct worker *worker;
unsigned long expires;
@@ -1885,13 +1765,12 @@ static void idle_worker_timeout(unsigned long __pool)
worker = list_entry(pool->idle_list.prev, struct worker, entry);
expires = worker->last_active + IDLE_WORKER_TIMEOUT;
- if (time_before(jiffies, expires))
+ if (time_before(jiffies, expires)) {
mod_timer(&pool->idle_timer, expires);
- else {
- /* it's been idle for too long, wake up manager */
- pool->flags |= POOL_MANAGE_WORKERS;
- wake_up_worker(pool);
+ break;
}
+
+ destroy_worker(worker);
}
spin_unlock_irq(&pool->lock);
@@ -1909,6 +1788,12 @@ static void send_mayday(struct work_struct *work)
/* mayday mayday mayday */
if (list_empty(&pwq->mayday_node)) {
+ /*
+ * If @pwq is for an unbound wq, its base ref may be put at
+ * any time due to an attribute change. Pin @pwq until the
+ * rescuer is done with it.
+ */
+ get_pwq(pwq);
list_add_tail(&pwq->mayday_node, &wq->maydays);
wake_up_process(wq->rescuer->task);
}
@@ -1974,23 +1859,10 @@ restart:
mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
while (true) {
- struct worker *worker;
-
- worker = create_worker(pool);
- if (worker) {
- del_timer_sync(&pool->mayday_timer);
- spin_lock_irq(&pool->lock);
- start_worker(worker);
- if (WARN_ON_ONCE(need_to_create_worker(pool)))
- goto restart;
- return true;
- }
-
- if (!need_to_create_worker(pool))
+ if (create_worker(pool) || !need_to_create_worker(pool))
break;
- __set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(CREATE_COOLDOWN);
+ schedule_timeout_interruptible(CREATE_COOLDOWN);
if (!need_to_create_worker(pool))
break;
@@ -1998,50 +1870,17 @@ restart:
del_timer_sync(&pool->mayday_timer);
spin_lock_irq(&pool->lock);
+ /*
+ * This is necessary even after a new worker was just successfully
+ * created as @pool->lock was dropped and the new worker might have
+ * already become busy.
+ */
if (need_to_create_worker(pool))
goto restart;
return true;
}
/**
- * maybe_destroy_worker - destroy workers which have been idle for a while
- * @pool: pool to destroy workers for
- *
- * Destroy @pool workers which have been idle for longer than
- * IDLE_WORKER_TIMEOUT.
- *
- * LOCKING:
- * spin_lock_irq(pool->lock) which may be released and regrabbed
- * multiple times. Called only from manager.
- *
- * Return:
- * %false if no action was taken and pool->lock stayed locked, %true
- * otherwise.
- */
-static bool maybe_destroy_workers(struct worker_pool *pool)
-{
- bool ret = false;
-
- while (too_many_workers(pool)) {
- struct worker *worker;
- unsigned long expires;
-
- worker = list_entry(pool->idle_list.prev, struct worker, entry);
- expires = worker->last_active + IDLE_WORKER_TIMEOUT;
-
- if (time_before(jiffies, expires)) {
- mod_timer(&pool->idle_timer, expires);
- break;
- }
-
- destroy_worker(worker);
- ret = true;
- }
-
- return ret;
-}
-
-/**
* manage_workers - manage worker pool
* @worker: self
*
@@ -2070,8 +1909,6 @@ static bool manage_workers(struct worker *worker)
bool ret = false;
/*
- * Managership is governed by two mutexes - manager_arb and
- * manager_mutex. manager_arb handles arbitration of manager role.
* Anyone who successfully grabs manager_arb wins the arbitration
* and becomes the manager. mutex_trylock() on pool->manager_arb
* failure while holding pool->lock reliably indicates that someone
@@ -2080,40 +1917,12 @@ static bool manage_workers(struct worker *worker)
* grabbing manager_arb is responsible for actually performing
* manager duties. If manager_arb is grabbed and released without
* actual management, the pool may stall indefinitely.
- *
- * manager_mutex is used for exclusion of actual management
- * operations. The holder of manager_mutex can be sure that none
- * of management operations, including creation and destruction of
- * workers, won't take place until the mutex is released. Because
- * manager_mutex doesn't interfere with manager role arbitration,
- * it is guaranteed that the pool's management, while may be
- * delayed, won't be disturbed by someone else grabbing
- * manager_mutex.
*/
if (!mutex_trylock(&pool->manager_arb))
return ret;
- /*
- * With manager arbitration won, manager_mutex would be free in
- * most cases. trylock first without dropping @pool->lock.
- */
- if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
- spin_unlock_irq(&pool->lock);
- mutex_lock(&pool->manager_mutex);
- spin_lock_irq(&pool->lock);
- ret = true;
- }
-
- pool->flags &= ~POOL_MANAGE_WORKERS;
-
- /*
- * Destroy and then create so that may_start_working() is true
- * on return.
- */
- ret |= maybe_destroy_workers(pool);
ret |= maybe_create_worker(pool);
- mutex_unlock(&pool->manager_mutex);
mutex_unlock(&pool->manager_arb);
return ret;
}
@@ -2153,13 +1962,8 @@ __acquires(&pool->lock)
lockdep_copy_map(&lockdep_map, &work->lockdep_map);
#endif
- /*
- * Ensure we're on the correct CPU. DISASSOCIATED test is
- * necessary to avoid spurious warnings from rescuers servicing the
- * unbound or a disassociated pool.
- */
- WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) &&
- !(pool->flags & POOL_DISASSOCIATED) &&
+ /* ensure we're on the correct CPU */
+ WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
raw_smp_processor_id() != pool->cpu);
/*
@@ -2185,17 +1989,22 @@ __acquires(&pool->lock)
list_del_init(&work->entry);
/*
- * CPU intensive works don't participate in concurrency
- * management. They're the scheduler's responsibility.
+ * CPU intensive works don't participate in concurrency management.
+ * They're the scheduler's responsibility. This takes @worker out
+ * of concurrency management and the next code block will chain
+ * execution of the pending work items.
*/
if (unlikely(cpu_intensive))
- worker_set_flags(worker, WORKER_CPU_INTENSIVE, true);
+ worker_set_flags(worker, WORKER_CPU_INTENSIVE);
/*
- * Unbound pool isn't concurrency managed and work items should be
- * executed ASAP. Wake up another worker if necessary.
+ * Wake up another worker if necessary. The condition is always
+ * false for normal per-cpu workers since nr_running would always
+ * be >= 1 at this point. This is used to chain execution of the
+ * pending work items for WORKER_NOT_RUNNING workers such as the
+ * UNBOUND and CPU_INTENSIVE ones.
*/
- if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool))
+ if (need_more_worker(pool))
wake_up_worker(pool);
/*
@@ -2301,6 +2110,11 @@ woke_up:
spin_unlock_irq(&pool->lock);
WARN_ON_ONCE(!list_empty(&worker->entry));
worker->task->flags &= ~PF_WQ_WORKER;
+
+ set_task_comm(worker->task, "kworker/dying");
+ ida_simple_remove(&pool->worker_ida, worker->id);
+ worker_detach_from_pool(worker, pool);
+ kfree(worker);
return 0;
}
@@ -2346,11 +2160,8 @@ recheck:
}
} while (keep_working(pool));
- worker_set_flags(worker, WORKER_PREP, false);
+ worker_set_flags(worker, WORKER_PREP);
sleep:
- if (unlikely(need_to_manage_workers(pool)) && manage_workers(worker))
- goto recheck;
-
/*
* pool->lock is held and there's no work to process and no need to
* manage, sleep. Workers are woken up only while holding
@@ -2391,6 +2202,7 @@ static int rescuer_thread(void *__rescuer)
struct worker *rescuer = __rescuer;
struct workqueue_struct *wq = rescuer->rescue_wq;
struct list_head *scheduled = &rescuer->scheduled;
+ bool should_stop;
set_user_nice(current, RESCUER_NICE_LEVEL);
@@ -2402,11 +2214,15 @@ static int rescuer_thread(void *__rescuer)
repeat:
set_current_state(TASK_INTERRUPTIBLE);
- if (kthread_should_stop()) {
- __set_current_state(TASK_RUNNING);
- rescuer->task->flags &= ~PF_WQ_WORKER;
- return 0;
- }
+ /*
+ * By the time the rescuer is requested to stop, the workqueue
+ * shouldn't have any work pending, but @wq->maydays may still have
+ * pwq(s) queued. This can happen by non-rescuer workers consuming
+ * all the work items before the rescuer got to them. Go through
+ * @wq->maydays processing before acting on should_stop so that the
+ * list is always empty on exit.
+ */
+ should_stop = kthread_should_stop();
/* see whether any pwq is asking for help */
spin_lock_irq(&wq_mayday_lock);
@@ -2422,8 +2238,9 @@ repeat:
spin_unlock_irq(&wq_mayday_lock);
- /* migrate to the target cpu if possible */
- worker_maybe_bind_and_lock(pool);
+ worker_attach_to_pool(rescuer, pool);
+
+ spin_lock_irq(&pool->lock);
rescuer->pool = pool;
/*
@@ -2438,20 +2255,35 @@ repeat:
process_scheduled_works(rescuer);
/*
- * Leave this pool. If keep_working() is %true, notify a
+ * Put the reference grabbed by send_mayday(). @pool won't
+ * go away while we're still attached to it.
+ */
+ put_pwq(pwq);
+
+ /*
+ * Leave this pool. If need_more_worker() is %true, notify a
* regular worker; otherwise, we end up with 0 concurrency
* and stalling the execution.
*/
- if (keep_working(pool))
+ if (need_more_worker(pool))
wake_up_worker(pool);
rescuer->pool = NULL;
- spin_unlock(&pool->lock);
- spin_lock(&wq_mayday_lock);
+ spin_unlock_irq(&pool->lock);
+
+ worker_detach_from_pool(rescuer, pool);
+
+ spin_lock_irq(&wq_mayday_lock);
}
spin_unlock_irq(&wq_mayday_lock);
+ if (should_stop) {
+ __set_current_state(TASK_RUNNING);
+ rescuer->task->flags &= ~PF_WQ_WORKER;
+ return 0;
+ }
+
/* rescuers should never participate in concurrency management */
WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
schedule();
@@ -3225,7 +3057,7 @@ static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
return -ENOMEM;
if (sscanf(buf, "%d", &attrs->nice) == 1 &&
- attrs->nice >= -20 && attrs->nice <= 19)
+ attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
ret = apply_workqueue_attrs(wq, attrs);
else
ret = -EINVAL;
@@ -3392,6 +3224,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
}
}
+ dev_set_uevent_suppress(&wq_dev->dev, false);
kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
return 0;
}
@@ -3520,9 +3353,10 @@ static int init_worker_pool(struct worker_pool *pool)
(unsigned long)pool);
mutex_init(&pool->manager_arb);
- mutex_init(&pool->manager_mutex);
- idr_init(&pool->worker_idr);
+ mutex_init(&pool->attach_mutex);
+ INIT_LIST_HEAD(&pool->workers);
+ ida_init(&pool->worker_ida);
INIT_HLIST_NODE(&pool->hash_node);
pool->refcnt = 1;
@@ -3537,7 +3371,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
{
struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
- idr_destroy(&pool->worker_idr);
+ ida_destroy(&pool->worker_ida);
free_workqueue_attrs(pool->attrs);
kfree(pool);
}
@@ -3555,6 +3389,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
*/
static void put_unbound_pool(struct worker_pool *pool)
{
+ DECLARE_COMPLETION_ONSTACK(detach_completion);
struct worker *worker;
lockdep_assert_held(&wq_pool_mutex);
@@ -3563,7 +3398,7 @@ static void put_unbound_pool(struct worker_pool *pool)
return;
/* sanity checks */
- if (WARN_ON(!(pool->flags & POOL_DISASSOCIATED)) ||
+ if (WARN_ON(!(pool->cpu < 0)) ||
WARN_ON(!list_empty(&pool->worklist)))
return;
@@ -3575,18 +3410,24 @@ static void put_unbound_pool(struct worker_pool *pool)
/*
* Become the manager and destroy all workers. Grabbing
* manager_arb prevents @pool's workers from blocking on
- * manager_mutex.
+ * attach_mutex.
*/
mutex_lock(&pool->manager_arb);
- mutex_lock(&pool->manager_mutex);
- spin_lock_irq(&pool->lock);
- while ((worker = first_worker(pool)))
+ spin_lock_irq(&pool->lock);
+ while ((worker = first_idle_worker(pool)))
destroy_worker(worker);
WARN_ON(pool->nr_workers || pool->nr_idle);
-
spin_unlock_irq(&pool->lock);
- mutex_unlock(&pool->manager_mutex);
+
+ mutex_lock(&pool->attach_mutex);
+ if (!list_empty(&pool->workers))
+ pool->detach_completion = &detach_completion;
+ mutex_unlock(&pool->attach_mutex);
+
+ if (pool->detach_completion)
+ wait_for_completion(pool->detach_completion);
+
mutex_unlock(&pool->manager_arb);
/* shut down the timers */
@@ -3623,7 +3464,7 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {
if (wqattrs_equal(pool->attrs, attrs)) {
pool->refcnt++;
- goto out_unlock;
+ return pool;
}
}
@@ -3632,9 +3473,6 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
if (!pool || init_worker_pool(pool) < 0)
goto fail;
- if (workqueue_freezing)
- pool->flags |= POOL_FREEZING;
-
lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */
copy_workqueue_attrs(pool->attrs, attrs);
@@ -3659,12 +3497,12 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
goto fail;
/* create and start the initial worker */
- if (create_and_start_worker(pool) < 0)
+ if (!create_worker(pool))
goto fail;
/* install */
hash_add(unbound_pool_hash, &pool->hash_node, hash);
-out_unlock:
+
return pool;
fail:
if (pool)
@@ -3693,11 +3531,6 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))
return;
- /*
- * Unlink @pwq. Synchronization against wq->mutex isn't strictly
- * necessary on release but do it anyway. It's easier to verify
- * and consistent with the linking path.
- */
mutex_lock(&wq->mutex);
list_del_rcu(&pwq->pwqs_node);
is_last = list_empty(&wq->pwqs);
@@ -3741,7 +3574,12 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
spin_lock_irq(&pwq->pool->lock);
- if (!freezable || !(pwq->pool->flags & POOL_FREEZING)) {
+ /*
+ * During [un]freezing, the caller is responsible for ensuring that
+ * this function is called at least once after @workqueue_freezing
+ * is updated and visible.
+ */
+ if (!freezable || !workqueue_freezing) {
pwq->max_active = wq->saved_max_active;
while (!list_empty(&pwq->delayed_works) &&
@@ -3789,10 +3627,7 @@ static void link_pwq(struct pool_workqueue *pwq)
if (!list_empty(&pwq->pwqs_node))
return;
- /*
- * Set the matching work_color. This is synchronized with
- * wq->mutex to avoid confusing flush_workqueue().
- */
+ /* set the matching work_color */
pwq->work_color = wq->work_color;
/* sync max_active to the current setting */
@@ -3929,7 +3764,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
return -EINVAL;
- pwq_tbl = kzalloc(wq_numa_tbl_len * sizeof(pwq_tbl[0]), GFP_KERNEL);
+ pwq_tbl = kzalloc(nr_node_ids * sizeof(pwq_tbl[0]), GFP_KERNEL);
new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
if (!pwq_tbl || !new_attrs || !tmp_attrs)
@@ -4073,17 +3908,13 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
* Let's determine what needs to be done. If the target cpumask is
* different from wq's, we need to compare it to @pwq's and create
* a new one if they don't match. If the target cpumask equals
- * wq's, the default pwq should be used. If @pwq is already the
- * default one, nothing to do; otherwise, install the default one.
+ * wq's, the default pwq should be used.
*/
if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
goto out_unlock;
} else {
- if (pwq == wq->dfl_pwq)
- goto out_unlock;
- else
- goto use_dfl_pwq;
+ goto use_dfl_pwq;
}
mutex_unlock(&wq->mutex);
@@ -4091,9 +3922,10 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
/* create a new pwq */
pwq = alloc_unbound_pwq(wq, target_attrs);
if (!pwq) {
- pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
- wq->name);
- goto out_unlock;
+ pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
+ wq->name);
+ mutex_lock(&wq->mutex);
+ goto use_dfl_pwq;
}
/*
@@ -4180,7 +4012,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
/* allocate wq and format name */
if (flags & WQ_UNBOUND)
- tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
+ tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);
wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);
if (!wq)
@@ -4222,7 +4054,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
if (flags & WQ_MEM_RECLAIM) {
struct worker *rescuer;
- rescuer = alloc_worker();
+ rescuer = alloc_worker(NUMA_NO_NODE);
if (!rescuer)
goto err_destroy;
@@ -4568,28 +4400,25 @@ static void wq_unbind_fn(struct work_struct *work)
int cpu = smp_processor_id();
struct worker_pool *pool;
struct worker *worker;
- int wi;
for_each_cpu_worker_pool(pool, cpu) {
- WARN_ON_ONCE(cpu != smp_processor_id());
-
- mutex_lock(&pool->manager_mutex);
+ mutex_lock(&pool->attach_mutex);
spin_lock_irq(&pool->lock);
/*
- * We've blocked all manager operations. Make all workers
+ * We've blocked all attach/detach operations. Make all workers
* unbound and set DISASSOCIATED. Before this, all workers
* except for the ones which are still executing works from
* before the last CPU down must be on the cpu. After
* this, they may become diasporas.
*/
- for_each_pool_worker(worker, wi, pool)
+ for_each_pool_worker(worker, pool)
worker->flags |= WORKER_UNBOUND;
pool->flags |= POOL_DISASSOCIATED;
spin_unlock_irq(&pool->lock);
- mutex_unlock(&pool->manager_mutex);
+ mutex_unlock(&pool->attach_mutex);
/*
* Call schedule() so that we cross rq->lock and thus can
@@ -4629,9 +4458,8 @@ static void wq_unbind_fn(struct work_struct *work)
static void rebind_workers(struct worker_pool *pool)
{
struct worker *worker;
- int wi;
- lockdep_assert_held(&pool->manager_mutex);
+ lockdep_assert_held(&pool->attach_mutex);
/*
* Restore CPU affinity of all workers. As all idle workers should
@@ -4640,13 +4468,14 @@ static void rebind_workers(struct worker_pool *pool)
* of all workers first and then clear UNBOUND. As we're called
* from CPU_ONLINE, the following shouldn't fail.
*/
- for_each_pool_worker(worker, wi, pool)
+ for_each_pool_worker(worker, pool)
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
pool->attrs->cpumask) < 0);
spin_lock_irq(&pool->lock);
+ pool->flags &= ~POOL_DISASSOCIATED;
- for_each_pool_worker(worker, wi, pool) {
+ for_each_pool_worker(worker, pool) {
unsigned int worker_flags = worker->flags;
/*
@@ -4698,9 +4527,8 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
{
static cpumask_t cpumask;
struct worker *worker;
- int wi;
- lockdep_assert_held(&pool->manager_mutex);
+ lockdep_assert_held(&pool->attach_mutex);
/* is @cpu allowed for @pool? */
if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
@@ -4712,7 +4540,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
return;
/* as we're called from CPU_ONLINE, the following shouldn't fail */
- for_each_pool_worker(worker, wi, pool)
+ for_each_pool_worker(worker, pool)
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
pool->attrs->cpumask) < 0);
}
@@ -4735,7 +4563,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,
for_each_cpu_worker_pool(pool, cpu) {
if (pool->nr_workers)
continue;
- if (create_and_start_worker(pool) < 0)
+ if (!create_worker(pool))
return NOTIFY_BAD;
}
break;
@@ -4745,19 +4573,14 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,
mutex_lock(&wq_pool_mutex);
for_each_pool(pool, pi) {
- mutex_lock(&pool->manager_mutex);
-
- if (pool->cpu == cpu) {
- spin_lock_irq(&pool->lock);
- pool->flags &= ~POOL_DISASSOCIATED;
- spin_unlock_irq(&pool->lock);
+ mutex_lock(&pool->attach_mutex);
+ if (pool->cpu == cpu)
rebind_workers(pool);
- } else if (pool->cpu < 0) {
+ else if (pool->cpu < 0)
restore_unbound_workers_cpumask(pool, cpu);
- }
- mutex_unlock(&pool->manager_mutex);
+ mutex_unlock(&pool->attach_mutex);
}
/* update NUMA affinity of unbound workqueues */
@@ -4856,24 +4679,14 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
*/
void freeze_workqueues_begin(void)
{
- struct worker_pool *pool;
struct workqueue_struct *wq;
struct pool_workqueue *pwq;
- int pi;
mutex_lock(&wq_pool_mutex);
WARN_ON_ONCE(workqueue_freezing);
workqueue_freezing = true;
- /* set FREEZING */
- for_each_pool(pool, pi) {
- spin_lock_irq(&pool->lock);
- WARN_ON_ONCE(pool->flags & POOL_FREEZING);
- pool->flags |= POOL_FREEZING;
- spin_unlock_irq(&pool->lock);
- }
-
list_for_each_entry(wq, &workqueues, list) {
mutex_lock(&wq->mutex);
for_each_pwq(pwq, wq)
@@ -4943,21 +4756,13 @@ void thaw_workqueues(void)
{
struct workqueue_struct *wq;
struct pool_workqueue *pwq;
- struct worker_pool *pool;
- int pi;
mutex_lock(&wq_pool_mutex);
if (!workqueue_freezing)
goto out_unlock;
- /* clear FREEZING */
- for_each_pool(pool, pi) {
- spin_lock_irq(&pool->lock);
- WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
- pool->flags &= ~POOL_FREEZING;
- spin_unlock_irq(&pool->lock);
- }
+ workqueue_freezing = false;
/* restore max_active and repopulate worklist */
list_for_each_entry(wq, &workqueues, list) {
@@ -4967,7 +4772,6 @@ void thaw_workqueues(void)
mutex_unlock(&wq->mutex);
}
- workqueue_freezing = false;
out_unlock:
mutex_unlock(&wq_pool_mutex);
}
@@ -4978,10 +4782,6 @@ static void __init wq_numa_init(void)
cpumask_var_t *tbl;
int node, cpu;
- /* determine NUMA pwq table len - highest node id + 1 */
- for_each_node(node)
- wq_numa_tbl_len = max(wq_numa_tbl_len, node + 1);
-
if (num_possible_nodes() <= 1)
return;
@@ -4998,11 +4798,11 @@ static void __init wq_numa_init(void)
* available. Build one from cpu_to_node() which should have been
* fully initialized by now.
*/
- tbl = kzalloc(wq_numa_tbl_len * sizeof(tbl[0]), GFP_KERNEL);
+ tbl = kzalloc(nr_node_ids * sizeof(tbl[0]), GFP_KERNEL);
BUG_ON(!tbl);
for_each_node(node)
- BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
+ BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
node_online(node) ? node : NUMA_NO_NODE));
for_each_possible_cpu(cpu) {
@@ -5058,7 +4858,7 @@ static int __init init_workqueues(void)
for_each_cpu_worker_pool(pool, cpu) {
pool->flags &= ~POOL_DISASSOCIATED;
- BUG_ON(create_and_start_worker(pool) < 0);
+ BUG_ON(!create_worker(pool));
}
}
OpenPOWER on IntegriCloud