diff options
-rw-r--r-- | Documentation/sysctl/vm.txt | 22 | ||||
-rw-r--r-- | kernel/sysctl.c | 9 | ||||
-rw-r--r-- | mm/oom_kill.c | 49 |
3 files changed, 75 insertions, 5 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 24eac1bc735d..8a4863c4edd4 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -32,6 +32,7 @@ Currently, these files are in /proc/sys/vm: - min_unmapped_ratio - min_slab_ratio - panic_on_oom +- oom_dump_tasks - oom_kill_allocating_task - mmap_min_address - numa_zonelist_order @@ -232,6 +233,27 @@ according to your policy of failover. ============================================================= +oom_dump_tasks + +Enables a system-wide task dump (excluding kernel threads) to be +produced when the kernel performs an OOM-killing and includes such +information as pid, uid, tgid, vm size, rss, cpu, oom_adj score, and +name. This is helpful to determine why the OOM killer was invoked +and to identify the rogue task that caused it. + +If this is set to zero, this information is suppressed. On very +large systems with thousands of tasks it may not be feasible to dump +the memory state information for each one. Such systems should not +be forced to incur a performance penalty in OOM conditions when the +information may not be desired. + +If this is set to non-zero, this information is shown whenever the +OOM killer actually kills a memory-hogging task. + +The default value is 0. + +============================================================= + oom_kill_allocating_task This enables or disables killing the OOM-triggering task in diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 86daaa26d120..8c98d8147d88 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -67,6 +67,7 @@ extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern int sysctl_panic_on_oom; extern int sysctl_oom_kill_allocating_task; +extern int sysctl_oom_dump_tasks; extern int max_threads; extern int core_uses_pid; extern int suid_dumpable; @@ -871,6 +872,14 @@ static struct ctl_table vm_table[] = { .proc_handler = &proc_dointvec, }, { + .ctl_name = CTL_UNNUMBERED, + .procname = "oom_dump_tasks", + .data = &sysctl_oom_dump_tasks, + .maxlen = sizeof(sysctl_oom_dump_tasks), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = VM_OVERCOMMIT_RATIO, .procname = "overcommit_ratio", .data = &sysctl_overcommit_ratio, diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ef5084dbc793..4194b9db0104 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -29,6 +29,7 @@ int sysctl_panic_on_oom; int sysctl_oom_kill_allocating_task; +int sysctl_oom_dump_tasks; static DEFINE_SPINLOCK(zone_scan_mutex); /* #define DEBUG */ @@ -263,6 +264,41 @@ static struct task_struct *select_bad_process(unsigned long *ppoints, } /** + * Dumps the current memory state of all system tasks, excluding kernel threads. + * State information includes task's pid, uid, tgid, vm size, rss, cpu, oom_adj + * score, and name. + * + * If the actual is non-NULL, only tasks that are a member of the mem_cgroup are + * shown. + * + * Call with tasklist_lock read-locked. + */ +static void dump_tasks(const struct mem_cgroup *mem) +{ + struct task_struct *g, *p; + + printk(KERN_INFO "[ pid ] uid tgid total_vm rss cpu oom_adj " + "name\n"); + do_each_thread(g, p) { + /* + * total_vm and rss sizes do not exist for tasks with a + * detached mm so there's no need to report them. + */ + if (!p->mm) + continue; + if (mem && !task_in_mem_cgroup(p, mem)) + continue; + + task_lock(p); + printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", + p->pid, p->uid, p->tgid, p->mm->total_vm, + get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj, + p->comm); + task_unlock(p); + } while_each_thread(g, p); +} + +/** * Send SIGKILL to the selected process irrespective of CAP_SYS_RAW_IO * flag though it's unlikely that we select a process with CAP_SYS_RAW_IO * set. @@ -339,7 +375,8 @@ static int oom_kill_task(struct task_struct *p) } static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, - unsigned long points, const char *message) + unsigned long points, struct mem_cgroup *mem, + const char *message) { struct task_struct *c; @@ -349,6 +386,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, current->comm, gfp_mask, order, current->oomkilladj); dump_stack(); show_mem(); + if (sysctl_oom_dump_tasks) + dump_tasks(mem); } /* @@ -389,7 +428,7 @@ retry: if (!p) p = current; - if (oom_kill_process(p, gfp_mask, 0, points, + if (oom_kill_process(p, gfp_mask, 0, points, mem, "Memory cgroup out of memory")) goto retry; out: @@ -495,7 +534,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) switch (constraint) { case CONSTRAINT_MEMORY_POLICY: - oom_kill_process(current, gfp_mask, order, points, + oom_kill_process(current, gfp_mask, order, points, NULL, "No available memory (MPOL_BIND)"); break; @@ -505,7 +544,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) /* Fall-through */ case CONSTRAINT_CPUSET: if (sysctl_oom_kill_allocating_task) { - oom_kill_process(current, gfp_mask, order, points, + oom_kill_process(current, gfp_mask, order, points, NULL, "Out of memory (oom_kill_allocating_task)"); break; } @@ -525,7 +564,7 @@ retry: panic("Out of memory and no killable processes...\n"); } - if (oom_kill_process(p, gfp_mask, order, points, + if (oom_kill_process(p, gfp_mask, order, points, NULL, "Out of memory")) goto retry; |