summaryrefslogtreecommitdiffstats
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-07-13 22:16:45 -0700
committerTejun Heo <tj@kernel.org>2012-07-13 22:24:45 -0700
commit3270476a6c0ce322354df8679652f060d66526dc (patch)
treedb58846beb7c5e1c1b50b7e2f1c2538320408c26 /kernel/workqueue.c
parent4ce62e9e30cacc26885cab133ad1de358dd79f21 (diff)
downloadtalos-op-linux-3270476a6c0ce322354df8679652f060d66526dc.tar.gz
talos-op-linux-3270476a6c0ce322354df8679652f060d66526dc.zip
workqueue: reimplement WQ_HIGHPRI using a separate worker_pool
WQ_HIGHPRI was implemented by queueing highpri work items at the head of the global worklist. Other than queueing at the head, they weren't handled differently; unfortunately, this could lead to execution latency of a few seconds on heavily loaded systems. Now that workqueue code has been updated to deal with multiple worker_pools per global_cwq, this patch reimplements WQ_HIGHPRI using a separate worker_pool. NR_WORKER_POOLS is bumped to two and gcwq->pools[0] is used for normal pri work items and ->pools[1] for highpri. Highpri workers get -20 nice level and has 'H' suffix in their names. Note that this change increases the number of kworkers per cpu. POOL_HIGHPRI_PENDING, pool_determine_ins_pos() and highpri chain wakeup code in process_one_work() are no longer used and removed. This allows proper prioritization of highpri work items and removes high execution latency of highpri work items. v2: nr_running indexing bug in get_pool_nr_running() fixed. v3: Refreshed for the get_pool_nr_running() update in the previous patch. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Josh Hunt <joshhunt00@gmail.com> LKML-Reference: <CAKA=qzaHqwZ8eqpLNFjxnO2fX-tgAOjmpvxgBFjv6dJeQaOW1w@mail.gmail.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Fengguang Wu <fengguang.wu@intel.com>
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c100
1 files changed, 27 insertions, 73 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b0daaea44eaa..4fa9e3552f1e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -52,7 +52,6 @@ enum {
/* pool flags */
POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
- POOL_HIGHPRI_PENDING = 1 << 2, /* highpri works on queue */
/* worker flags */
WORKER_STARTED = 1 << 0, /* started */
@@ -74,7 +73,7 @@ enum {
TRUSTEE_RELEASE = 3, /* release workers */
TRUSTEE_DONE = 4, /* trustee is done */
- NR_WORKER_POOLS = 1, /* # worker pools per gcwq */
+ NR_WORKER_POOLS = 2, /* # worker pools per gcwq */
BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER,
@@ -95,6 +94,7 @@ enum {
* all cpus. Give -20.
*/
RESCUER_NICE_LEVEL = -20,
+ HIGHPRI_NICE_LEVEL = -20,
};
/*
@@ -174,7 +174,7 @@ struct global_cwq {
struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE];
/* L: hash of busy workers */
- struct worker_pool pool; /* the worker pools */
+ struct worker_pool pools[2]; /* normal and highpri pools */
struct task_struct *trustee; /* L: for gcwq shutdown */
unsigned int trustee_state; /* L: trustee state */
@@ -277,7 +277,8 @@ EXPORT_SYMBOL_GPL(system_nrt_freezable_wq);
#include <trace/events/workqueue.h>
#define for_each_worker_pool(pool, gcwq) \
- for ((pool) = &(gcwq)->pool; (pool); (pool) = NULL)
+ for ((pool) = &(gcwq)->pools[0]; \
+ (pool) < &(gcwq)->pools[NR_WORKER_POOLS]; (pool)++)
#define for_each_busy_worker(worker, i, pos, gcwq) \
for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \
@@ -473,6 +474,11 @@ static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = {
static int worker_thread(void *__worker);
+static int worker_pool_pri(struct worker_pool *pool)
+{
+ return pool - pool->gcwq->pools;
+}
+
static struct global_cwq *get_gcwq(unsigned int cpu)
{
if (cpu != WORK_CPU_UNBOUND)
@@ -484,7 +490,7 @@ static struct global_cwq *get_gcwq(unsigned int cpu)
static atomic_t *get_pool_nr_running(struct worker_pool *pool)
{
int cpu = pool->gcwq->cpu;
- int idx = 0;
+ int idx = worker_pool_pri(pool);
if (cpu != WORK_CPU_UNBOUND)
return &per_cpu(pool_nr_running, cpu)[idx];
@@ -586,15 +592,14 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
}
/*
- * Policy functions. These define the policies on how the global
- * worker pool is managed. Unless noted otherwise, these functions
- * assume that they're being called with gcwq->lock held.
+ * Policy functions. These define the policies on how the global worker
+ * pools are managed. Unless noted otherwise, these functions assume that
+ * they're being called with gcwq->lock held.
*/
static bool __need_more_worker(struct worker_pool *pool)
{
- return !atomic_read(get_pool_nr_running(pool)) ||
- (pool->flags & POOL_HIGHPRI_PENDING);
+ return !atomic_read(get_pool_nr_running(pool));
}
/*
@@ -621,9 +626,7 @@ static bool keep_working(struct worker_pool *pool)
{
atomic_t *nr_running = get_pool_nr_running(pool);
- return !list_empty(&pool->worklist) &&
- (atomic_read(nr_running) <= 1 ||
- (pool->flags & POOL_HIGHPRI_PENDING));
+ return !list_empty(&pool->worklist) && atomic_read(nr_running) <= 1;
}
/* Do we need a new worker? Called from manager. */
@@ -892,43 +895,6 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
}
/**
- * pool_determine_ins_pos - find insertion position
- * @pool: pool of interest
- * @cwq: cwq a work is being queued for
- *
- * A work for @cwq is about to be queued on @pool, determine insertion
- * position for the work. If @cwq is for HIGHPRI wq, the work is
- * queued at the head of the queue but in FIFO order with respect to
- * other HIGHPRI works; otherwise, at the end of the queue. This
- * function also sets POOL_HIGHPRI_PENDING flag to hint @pool that
- * there are HIGHPRI works pending.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq->lock).
- *
- * RETURNS:
- * Pointer to inserstion position.
- */
-static inline struct list_head *pool_determine_ins_pos(struct worker_pool *pool,
- struct cpu_workqueue_struct *cwq)
-{
- struct work_struct *twork;
-
- if (likely(!(cwq->wq->flags & WQ_HIGHPRI)))
- return &pool->worklist;
-
- list_for_each_entry(twork, &pool->worklist, entry) {
- struct cpu_workqueue_struct *tcwq = get_work_cwq(twork);
-
- if (!(tcwq->wq->flags & WQ_HIGHPRI))
- break;
- }
-
- pool->flags |= POOL_HIGHPRI_PENDING;
- return &twork->entry;
-}
-
-/**
* insert_work - insert a work into gcwq
* @cwq: cwq @work belongs to
* @work: work to insert
@@ -1068,7 +1034,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
if (likely(cwq->nr_active < cwq->max_active)) {
trace_workqueue_activate_work(work);
cwq->nr_active++;
- worklist = pool_determine_ins_pos(cwq->pool, cwq);
+ worklist = &cwq->pool->worklist;
} else {
work_flags |= WORK_STRUCT_DELAYED;
worklist = &cwq->delayed_works;
@@ -1385,6 +1351,7 @@ static struct worker *create_worker(struct worker_pool *pool, bool bind)
{
struct global_cwq *gcwq = pool->gcwq;
bool on_unbound_cpu = gcwq->cpu == WORK_CPU_UNBOUND;
+ const char *pri = worker_pool_pri(pool) ? "H" : "";
struct worker *worker = NULL;
int id = -1;
@@ -1406,15 +1373,17 @@ static struct worker *create_worker(struct worker_pool *pool, bool bind)
if (!on_unbound_cpu)
worker->task = kthread_create_on_node(worker_thread,
- worker,
- cpu_to_node(gcwq->cpu),
- "kworker/%u:%d", gcwq->cpu, id);
+ worker, cpu_to_node(gcwq->cpu),
+ "kworker/%u:%d%s", gcwq->cpu, id, pri);
else
worker->task = kthread_create(worker_thread, worker,
- "kworker/u:%d", id);
+ "kworker/u:%d%s", id, pri);
if (IS_ERR(worker->task))
goto fail;
+ if (worker_pool_pri(pool))
+ set_user_nice(worker->task, HIGHPRI_NICE_LEVEL);
+
/*
* A rogue worker will become a regular one if CPU comes
* online later on. Make sure every worker has
@@ -1761,10 +1730,9 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
{
struct work_struct *work = list_first_entry(&cwq->delayed_works,
struct work_struct, entry);
- struct list_head *pos = pool_determine_ins_pos(cwq->pool, cwq);
trace_workqueue_activate_work(work);
- move_linked_works(work, pos, NULL);
+ move_linked_works(work, &cwq->pool->worklist, NULL);
__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
cwq->nr_active++;
}
@@ -1880,21 +1848,6 @@ __acquires(&gcwq->lock)
list_del_init(&work->entry);
/*
- * If HIGHPRI_PENDING, check the next work, and, if HIGHPRI,
- * wake up another worker; otherwise, clear HIGHPRI_PENDING.
- */
- if (unlikely(pool->flags & POOL_HIGHPRI_PENDING)) {
- struct work_struct *nwork = list_first_entry(&pool->worklist,
- struct work_struct, entry);
-
- if (!list_empty(&pool->worklist) &&
- get_work_cwq(nwork)->wq->flags & WQ_HIGHPRI)
- wake_up_worker(pool);
- else
- pool->flags &= ~POOL_HIGHPRI_PENDING;
- }
-
- /*
* CPU intensive works don't participate in concurrency
* management. They're the scheduler's responsibility.
*/
@@ -3047,9 +3000,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
for_each_cwq_cpu(cpu, wq) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
struct global_cwq *gcwq = get_gcwq(cpu);
+ int pool_idx = (bool)(flags & WQ_HIGHPRI);
BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
- cwq->pool = &gcwq->pool;
+ cwq->pool = &gcwq->pools[pool_idx];
cwq->wq = wq;
cwq->flush_color = -1;
cwq->max_active = max_active;
OpenPOWER on IntegriCloud