diff options
author | Tejun Heo <tj@kernel.org> | 2012-07-13 22:16:45 -0700 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2012-07-13 22:24:45 -0700 |
commit | 3270476a6c0ce322354df8679652f060d66526dc (patch) | |
tree | db58846beb7c5e1c1b50b7e2f1c2538320408c26 /kernel/workqueue.c | |
parent | 4ce62e9e30cacc26885cab133ad1de358dd79f21 (diff) | |
download | talos-op-linux-3270476a6c0ce322354df8679652f060d66526dc.tar.gz talos-op-linux-3270476a6c0ce322354df8679652f060d66526dc.zip |
workqueue: reimplement WQ_HIGHPRI using a separate worker_pool
WQ_HIGHPRI was implemented by queueing highpri work items at the head
of the global worklist. Other than queueing at the head, they weren't
handled differently; unfortunately, this could lead to execution
latency of a few seconds on heavily loaded systems.
Now that workqueue code has been updated to deal with multiple
worker_pools per global_cwq, this patch reimplements WQ_HIGHPRI using
a separate worker_pool. NR_WORKER_POOLS is bumped to two and
gcwq->pools[0] is used for normal pri work items and ->pools[1] for
highpri. Highpri workers get -20 nice level and has 'H' suffix in
their names. Note that this change increases the number of kworkers
per cpu.
POOL_HIGHPRI_PENDING, pool_determine_ins_pos() and highpri chain
wakeup code in process_one_work() are no longer used and removed.
This allows proper prioritization of highpri work items and removes
high execution latency of highpri work items.
v2: nr_running indexing bug in get_pool_nr_running() fixed.
v3: Refreshed for the get_pool_nr_running() update in the previous
patch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Josh Hunt <joshhunt00@gmail.com>
LKML-Reference: <CAKA=qzaHqwZ8eqpLNFjxnO2fX-tgAOjmpvxgBFjv6dJeQaOW1w@mail.gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r-- | kernel/workqueue.c | 100 |
1 files changed, 27 insertions, 73 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b0daaea44eaa..4fa9e3552f1e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -52,7 +52,6 @@ enum { /* pool flags */ POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */ - POOL_HIGHPRI_PENDING = 1 << 2, /* highpri works on queue */ /* worker flags */ WORKER_STARTED = 1 << 0, /* started */ @@ -74,7 +73,7 @@ enum { TRUSTEE_RELEASE = 3, /* release workers */ TRUSTEE_DONE = 4, /* trustee is done */ - NR_WORKER_POOLS = 1, /* # worker pools per gcwq */ + NR_WORKER_POOLS = 2, /* # worker pools per gcwq */ BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */ BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER, @@ -95,6 +94,7 @@ enum { * all cpus. Give -20. */ RESCUER_NICE_LEVEL = -20, + HIGHPRI_NICE_LEVEL = -20, }; /* @@ -174,7 +174,7 @@ struct global_cwq { struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE]; /* L: hash of busy workers */ - struct worker_pool pool; /* the worker pools */ + struct worker_pool pools[2]; /* normal and highpri pools */ struct task_struct *trustee; /* L: for gcwq shutdown */ unsigned int trustee_state; /* L: trustee state */ @@ -277,7 +277,8 @@ EXPORT_SYMBOL_GPL(system_nrt_freezable_wq); #include <trace/events/workqueue.h> #define for_each_worker_pool(pool, gcwq) \ - for ((pool) = &(gcwq)->pool; (pool); (pool) = NULL) + for ((pool) = &(gcwq)->pools[0]; \ + (pool) < &(gcwq)->pools[NR_WORKER_POOLS]; (pool)++) #define for_each_busy_worker(worker, i, pos, gcwq) \ for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ @@ -473,6 +474,11 @@ static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = { static int worker_thread(void *__worker); +static int worker_pool_pri(struct worker_pool *pool) +{ + return pool - pool->gcwq->pools; +} + static struct global_cwq *get_gcwq(unsigned int cpu) { if (cpu != WORK_CPU_UNBOUND) @@ -484,7 +490,7 @@ static struct global_cwq *get_gcwq(unsigned int cpu) static atomic_t *get_pool_nr_running(struct worker_pool *pool) { int cpu = pool->gcwq->cpu; - int idx = 0; + int idx = worker_pool_pri(pool); if (cpu != WORK_CPU_UNBOUND) return &per_cpu(pool_nr_running, cpu)[idx]; @@ -586,15 +592,14 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work) } /* - * Policy functions. These define the policies on how the global - * worker pool is managed. Unless noted otherwise, these functions - * assume that they're being called with gcwq->lock held. + * Policy functions. These define the policies on how the global worker + * pools are managed. Unless noted otherwise, these functions assume that + * they're being called with gcwq->lock held. */ static bool __need_more_worker(struct worker_pool *pool) { - return !atomic_read(get_pool_nr_running(pool)) || - (pool->flags & POOL_HIGHPRI_PENDING); + return !atomic_read(get_pool_nr_running(pool)); } /* @@ -621,9 +626,7 @@ static bool keep_working(struct worker_pool *pool) { atomic_t *nr_running = get_pool_nr_running(pool); - return !list_empty(&pool->worklist) && - (atomic_read(nr_running) <= 1 || - (pool->flags & POOL_HIGHPRI_PENDING)); + return !list_empty(&pool->worklist) && atomic_read(nr_running) <= 1; } /* Do we need a new worker? Called from manager. */ @@ -892,43 +895,6 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq, } /** - * pool_determine_ins_pos - find insertion position - * @pool: pool of interest - * @cwq: cwq a work is being queued for - * - * A work for @cwq is about to be queued on @pool, determine insertion - * position for the work. If @cwq is for HIGHPRI wq, the work is - * queued at the head of the queue but in FIFO order with respect to - * other HIGHPRI works; otherwise, at the end of the queue. This - * function also sets POOL_HIGHPRI_PENDING flag to hint @pool that - * there are HIGHPRI works pending. - * - * CONTEXT: - * spin_lock_irq(gcwq->lock). - * - * RETURNS: - * Pointer to inserstion position. - */ -static inline struct list_head *pool_determine_ins_pos(struct worker_pool *pool, - struct cpu_workqueue_struct *cwq) -{ - struct work_struct *twork; - - if (likely(!(cwq->wq->flags & WQ_HIGHPRI))) - return &pool->worklist; - - list_for_each_entry(twork, &pool->worklist, entry) { - struct cpu_workqueue_struct *tcwq = get_work_cwq(twork); - - if (!(tcwq->wq->flags & WQ_HIGHPRI)) - break; - } - - pool->flags |= POOL_HIGHPRI_PENDING; - return &twork->entry; -} - -/** * insert_work - insert a work into gcwq * @cwq: cwq @work belongs to * @work: work to insert @@ -1068,7 +1034,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, if (likely(cwq->nr_active < cwq->max_active)) { trace_workqueue_activate_work(work); cwq->nr_active++; - worklist = pool_determine_ins_pos(cwq->pool, cwq); + worklist = &cwq->pool->worklist; } else { work_flags |= WORK_STRUCT_DELAYED; worklist = &cwq->delayed_works; @@ -1385,6 +1351,7 @@ static struct worker *create_worker(struct worker_pool *pool, bool bind) { struct global_cwq *gcwq = pool->gcwq; bool on_unbound_cpu = gcwq->cpu == WORK_CPU_UNBOUND; + const char *pri = worker_pool_pri(pool) ? "H" : ""; struct worker *worker = NULL; int id = -1; @@ -1406,15 +1373,17 @@ static struct worker *create_worker(struct worker_pool *pool, bool bind) if (!on_unbound_cpu) worker->task = kthread_create_on_node(worker_thread, - worker, - cpu_to_node(gcwq->cpu), - "kworker/%u:%d", gcwq->cpu, id); + worker, cpu_to_node(gcwq->cpu), + "kworker/%u:%d%s", gcwq->cpu, id, pri); else worker->task = kthread_create(worker_thread, worker, - "kworker/u:%d", id); + "kworker/u:%d%s", id, pri); if (IS_ERR(worker->task)) goto fail; + if (worker_pool_pri(pool)) + set_user_nice(worker->task, HIGHPRI_NICE_LEVEL); + /* * A rogue worker will become a regular one if CPU comes * online later on. Make sure every worker has @@ -1761,10 +1730,9 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) { struct work_struct *work = list_first_entry(&cwq->delayed_works, struct work_struct, entry); - struct list_head *pos = pool_determine_ins_pos(cwq->pool, cwq); trace_workqueue_activate_work(work); - move_linked_works(work, pos, NULL); + move_linked_works(work, &cwq->pool->worklist, NULL); __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); cwq->nr_active++; } @@ -1880,21 +1848,6 @@ __acquires(&gcwq->lock) list_del_init(&work->entry); /* - * If HIGHPRI_PENDING, check the next work, and, if HIGHPRI, - * wake up another worker; otherwise, clear HIGHPRI_PENDING. - */ - if (unlikely(pool->flags & POOL_HIGHPRI_PENDING)) { - struct work_struct *nwork = list_first_entry(&pool->worklist, - struct work_struct, entry); - - if (!list_empty(&pool->worklist) && - get_work_cwq(nwork)->wq->flags & WQ_HIGHPRI) - wake_up_worker(pool); - else - pool->flags &= ~POOL_HIGHPRI_PENDING; - } - - /* * CPU intensive works don't participate in concurrency * management. They're the scheduler's responsibility. */ @@ -3047,9 +3000,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, for_each_cwq_cpu(cpu, wq) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); struct global_cwq *gcwq = get_gcwq(cpu); + int pool_idx = (bool)(flags & WQ_HIGHPRI); BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK); - cwq->pool = &gcwq->pool; + cwq->pool = &gcwq->pools[pool_idx]; cwq->wq = wq; cwq->flush_color = -1; cwq->max_active = max_active; |