diff options
author | Michal Hocko <mhocko@suse.cz> | 2011-08-09 11:56:26 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-08-09 17:04:43 -0700 |
commit | 9f50fad65b87a8776ae989ca059ad6c17925dfc3 (patch) | |
tree | 9d15fc968323357da6b3a534c4c40ff3192cf2f9 /mm | |
parent | 47e180d6523081605c970f806572beab8a205537 (diff) | |
download | talos-obmc-linux-9f50fad65b87a8776ae989ca059ad6c17925dfc3.tar.gz talos-obmc-linux-9f50fad65b87a8776ae989ca059ad6c17925dfc3.zip |
Revert "memcg: get rid of percpu_charge_mutex lock"
This reverts commit 8521fc50d433507a7cdc96bec280f9e5888a54cc.
The patch incorrectly assumes that using atomic FLUSHING_CACHED_CHARGE
bit operations is sufficient but that is not true. Johannes Weiner has
reported a crash during parallel memory cgroup removal:
BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
IP: [<ffffffff81083b70>] css_is_ancestor+0x20/0x70
Oops: 0000 [#1] PREEMPT SMP
Pid: 19677, comm: rmdir Tainted: G W 3.0.0-mm1-00188-gf38d32b #35 ECS MCP61M-M3/MCP61M-M3
RIP: 0010:[<ffffffff81083b70>] css_is_ancestor+0x20/0x70
RSP: 0018:ffff880077b09c88 EFLAGS: 00010202
Process rmdir (pid: 19677, threadinfo ffff880077b08000, task ffff8800781bb310)
Call Trace:
[<ffffffff810feba3>] mem_cgroup_same_or_subtree+0x33/0x40
[<ffffffff810feccf>] drain_all_stock+0x11f/0x170
[<ffffffff81103211>] mem_cgroup_force_empty+0x231/0x6d0
[<ffffffff811036c4>] mem_cgroup_pre_destroy+0x14/0x20
[<ffffffff81080559>] cgroup_rmdir+0xb9/0x500
[<ffffffff81114d26>] vfs_rmdir+0x86/0xe0
[<ffffffff81114e7b>] do_rmdir+0xfb/0x110
[<ffffffff81114ea6>] sys_rmdir+0x16/0x20
[<ffffffff8154d76b>] system_call_fastpath+0x16/0x1b
We are crashing because we try to dereference cached memcg when we are
checking whether we should wait for draining on the cache. The cache is
already cleaned up, though.
There is also a theoretical chance that the cached memcg gets freed
between we test for the FLUSHING_CACHED_CHARGE and dereference it in
mem_cgroup_same_or_subtree:
CPU0 CPU1 CPU2
mem=stock->cached
stock->cached=NULL
clear_bit
test_and_set_bit
test_bit() ...
<preempted> mem_cgroup_destroy
use after free
The percpu_charge_mutex protected from this race because sync draining
is exclusive.
It is safer to revert now and come up with a more parallel
implementation later.
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Reported-by: Johannes Weiner <jweiner@redhat.com>
Acked-by: Johannes Weiner <jweiner@redhat.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f4ec4e7ca4cd..930de9437271 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2091,6 +2091,7 @@ struct memcg_stock_pcp { #define FLUSHING_CACHED_CHARGE (0) }; static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); +static DEFINE_MUTEX(percpu_charge_mutex); /* * Try to consume stocked charge on this cpu. If success, one page is consumed @@ -2197,8 +2198,7 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync) for_each_online_cpu(cpu) { struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); - if (mem_cgroup_same_or_subtree(root_mem, stock->cached) && - test_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) + if (test_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) flush_work(&stock->work); } out: @@ -2213,14 +2213,22 @@ out: */ static void drain_all_stock_async(struct mem_cgroup *root_mem) { + /* + * If someone calls draining, avoid adding more kworker runs. + */ + if (!mutex_trylock(&percpu_charge_mutex)) + return; drain_all_stock(root_mem, false); + mutex_unlock(&percpu_charge_mutex); } /* This is a synchronous drain interface. */ static void drain_all_stock_sync(struct mem_cgroup *root_mem) { /* called when force_empty is called */ + mutex_lock(&percpu_charge_mutex); drain_all_stock(root_mem, true); + mutex_unlock(&percpu_charge_mutex); } /* |