From 93e6f119c0ce8a1bba6e81dc8dd97d67be360844 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 31 May 2012 16:26:28 -0700 Subject: ipc/mqueue: cleanup definition names and locations Since commit b231cca4381e ("message queues: increase range limits") on Oct 18, 2008, calls to mq_open() that did not pass in an attribute struct and expected to get default values for the size of the queue and the max message size now get the system wide maximums instead of hardwired defaults like they used to get. This was uncovered when one of the earlier patches in this patch set increased the default system wide maximums at the same time it increased the hard ceiling on the system wide maximums (a customer specifically needed the hard ceiling brought back up, the new ceiling that commit b231cca4381e introduced was too low for their production systems). By increasing the default maximums and not realising they were tied to any attempt to create a message queue without an attribute struct, I had inadvertently made it such that all message queue creation attempts without an attribute struct were failing because the new default maximums would create a queue that exceeded the default rlimit for message queue bytes. As a result, the system wide defaults were brought back down to their previous levels, and the system wide ceilings on the maximums were raised to meet the customer's needs. However, the fact that the no attribute struct behavior of mq_open() could be broken by changing the system wide maximums for message queues was seen as fundamentally broken itself. So we hardwired the no attribute case back like it used to be. But, then we realized that on the very off chance that some piece of software in the wild depended on that behavior, we could work around that issue by adding two new knobs to /proc that allowed setting the defaults for message queues created without an attr struct separately from the system wide maximums. What is not an option IMO is to leave the current behavior in place. No piece of software should ever rely on setting the system wide maximums in order to get a desired message queue. Such a reliance would be so fundamentally multitasking OS unfriendly as to not really be tolerable. Fortunately, we don't know of any software in the wild that uses this except for a regression test program that caught the issue in the first place. If there is though, we have made accommodations with the two new /proc knobs (and that's all the accommodations such fundamentally broken software can be allowed).. This patch: The various defines for minimums and maximums of the sysctl controllable mqueue values are scattered amongst different files and named inconsistently. Move them all into ipc_namespace.h and make them have consistent names. Additionally, make the number of queues per namespace also have a minimum and maximum and use the same sysctl function as the other two settable variables. Signed-off-by: Doug Ledford Acked-by: Serge E. Hallyn Cc: Amerigo Wang Cc: Joe Korty Cc: Jiri Slaby Acked-by: KOSAKI Motohiro Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux/ipc_namespace.h') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 8a297a5e794c..1372b566e1e1 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -91,10 +91,15 @@ static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {} #ifdef CONFIG_POSIX_MQUEUE extern int mq_init_ns(struct ipc_namespace *ns); /* default values */ +#define MIN_QUEUESMAX 1 #define DFLT_QUEUESMAX 256 /* max number of message queues */ +#define HARD_QUEUESMAX 1024 +#define MIN_MSGMAX 1 #define DFLT_MSGMAX 10 /* max number of messages in each queue */ #define HARD_MSGMAX (32768*sizeof(void *)/4) +#define MIN_MSGSIZEMAX 128 #define DFLT_MSGSIZEMAX 8192 /* max message size */ +#define HARD_MSGSIZEMAX (8192*128) #else static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } #endif -- cgit v1.2.3 From 858ee3784e8105467f1f3017f4ece51cb51d4830 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 31 May 2012 16:26:29 -0700 Subject: ipc/mqueue: switch back to using non-max values on create Commit b231cca4381e ("message queues: increase range limits") changed how we create a queue that does not include an attr struct passed to open so that it creates the queue with whatever the maximum values are. However, if the admin has set the maximums to allow flexibility in creating a queue (aka, both a large size and large queue are allowed, but combined they create a queue too large for the RLIMIT_MSGQUEUE of the user), then attempts to create a queue without an attr struct will fail. Switch back to using acceptable defaults regardless of what the maximums are. Note: so far, we only know of a few applications that rely on this behavior (specifically, set the maximums in /proc, then run the application which calls mq_open() without passing in an attr struct, and the application expects the newly created message queue to have the maximum sizes that were set in /proc used on the mq_open() call, and all of those applications that we know of are actually part of regression test suites that were coded to do something like this: for size in 4096 65536 $((1024 * 1024)) $((16 * 1024 * 1024)); do echo $size > /proc/sys/fs/mqueue/msgsize_max mq_open || echo "Error opening mq with size $size" done These test suites that depend on any behavior like this are broken. The concept that programs should rely upon the system wide maximum in order to get their desired results instead of simply using a attr struct to specify what they want is fundamentally unfriendly programming practice for any multi-tasking OS. Fixing this will break those few apps that we know of (and those app authors recognize the brokenness of their code and the need to fix it). However, the following patch "mqueue: separate mqueue default value" allows a workaround in the form of new knobs for the default msg queue creation parameters for any software out there that we don't already know about that might rely on this behavior at the moment. Signed-off-by: Doug Ledford Cc: Serge E. Hallyn Cc: Amerigo Wang Cc: Joe Korty Cc: Jiri Slaby Acked-by: KOSAKI Motohiro Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 2 ++ ipc/mqueue.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux/ipc_namespace.h') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 1372b566e1e1..bde094ee7b0e 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -95,9 +95,11 @@ extern int mq_init_ns(struct ipc_namespace *ns); #define DFLT_QUEUESMAX 256 /* max number of message queues */ #define HARD_QUEUESMAX 1024 #define MIN_MSGMAX 1 +#define DFLT_MSG 10U #define DFLT_MSGMAX 10 /* max number of messages in each queue */ #define HARD_MSGMAX (32768*sizeof(void *)/4) #define MIN_MSGSIZEMAX 128 +#define DFLT_MSGSIZE 8192U #define DFLT_MSGSIZEMAX 8192 /* max message size */ #define HARD_MSGSIZEMAX (8192*128) #else diff --git a/ipc/mqueue.c b/ipc/mqueue.c index a2757d4ab773..b103022179a3 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -144,8 +144,9 @@ static struct inode *mqueue_get_inode(struct super_block *sb, info->qsize = 0; info->user = NULL; /* set when all is ok */ memset(&info->attr, 0, sizeof(info->attr)); - info->attr.mq_maxmsg = ipc_ns->mq_msg_max; - info->attr.mq_msgsize = ipc_ns->mq_msgsize_max; + info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, DFLT_MSG); + info->attr.mq_msgsize = + min(ipc_ns->mq_msgsize_max, DFLT_MSGSIZE); if (attr) { info->attr.mq_maxmsg = attr->mq_maxmsg; info->attr.mq_msgsize = attr->mq_msgsize; -- cgit v1.2.3 From 5b5c4d1a1440e94994c73dddbad7be0676cd8b9a Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Thu, 31 May 2012 16:26:30 -0700 Subject: ipc/mqueue: update maximums for the mqueue subsystem Commit b231cca4381e ("message queues: increase range limits") changed the maximum size of a message in a message queue from INT_MAX to 8192*128. Unfortunately, we had customers that relied on a size much larger than 8192*128 on their production systems. After reviewing POSIX, we found that it is silent on the maximum message size. We did find a couple other areas in which it was not silent. Fix up the mqueue maximums so that the customer's system can continue to work, and document both the POSIX and real world requirements in ipc_namespace.h so that we don't have this issue crop back up. Also, commit 9cf18e1dd74cd0 ("ipc: HARD_MSGMAX should be higher not lower on 64bit") fiddled with HARD_MSGMAX without realizing that the number was intentionally in place to limit the msg queue depth to one that was small enough to kmalloc an array of pointers (hence why we divided 128k by sizeof(long)). If we wish to meet POSIX requirements, we have no choice but to change our allocation to a vmalloc instead (at least for the large queue size case). With that, it's possible to increase our allowed maximum to the POSIX requirements (or more if we choose). [sfr@canb.auug.org.au: using vmalloc requires including vmalloc.h] Signed-off-by: Doug Ledford Cc: Serge E. Hallyn Cc: Amerigo Wang Cc: Joe Korty Cc: Jiri Slaby Acked-by: KOSAKI Motohiro Cc: Manfred Spraul Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 47 ++++++++++++++++++++++++++++++++----------- ipc/mqueue.c | 11 ++++++++-- 2 files changed, 44 insertions(+), 14 deletions(-) (limited to 'include/linux/ipc_namespace.h') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index bde094ee7b0e..6e1dd08194fd 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -90,18 +90,41 @@ static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {} #ifdef CONFIG_POSIX_MQUEUE extern int mq_init_ns(struct ipc_namespace *ns); -/* default values */ -#define MIN_QUEUESMAX 1 -#define DFLT_QUEUESMAX 256 /* max number of message queues */ -#define HARD_QUEUESMAX 1024 -#define MIN_MSGMAX 1 -#define DFLT_MSG 10U -#define DFLT_MSGMAX 10 /* max number of messages in each queue */ -#define HARD_MSGMAX (32768*sizeof(void *)/4) -#define MIN_MSGSIZEMAX 128 -#define DFLT_MSGSIZE 8192U -#define DFLT_MSGSIZEMAX 8192 /* max message size */ -#define HARD_MSGSIZEMAX (8192*128) +/* + * POSIX Message Queue default values: + * + * MIN_*: Lowest value an admin can set the maximum unprivileged limit to + * DFLT_*MAX: Default values for the maximum unprivileged limits + * DFLT_{MSG,MSGSIZE}: Default values used when the user doesn't supply + * an attribute to the open call and the queue must be created + * HARD_*: Highest value the maximums can be set to. These are enforced + * on CAP_SYS_RESOURCE apps as well making them inviolate (so make them + * suitably high) + * + * POSIX Requirements: + * Per app minimum openable message queues - 8. This does not map well + * to the fact that we limit the number of queues on a per namespace + * basis instead of a per app basis. So, make the default high enough + * that no given app should have a hard time opening 8 queues. + * Minimum maximum for HARD_MSGMAX - 32767. I bumped this to 65536. + * Minimum maximum for HARD_MSGSIZEMAX - POSIX is silent on this. However, + * we have run into a situation where running applications in the wild + * require this to be at least 5MB, and preferably 10MB, so I set the + * value to 16MB in hopes that this user is the worst of the bunch and + * the new maximum will handle anyone else. I may have to revisit this + * in the future. + */ +#define MIN_QUEUESMAX 1 +#define DFLT_QUEUESMAX 256 +#define HARD_QUEUESMAX 1024 +#define MIN_MSGMAX 1 +#define DFLT_MSG 64U +#define DFLT_MSGMAX 1024 +#define HARD_MSGMAX 65536 +#define MIN_MSGSIZEMAX 128 +#define DFLT_MSGSIZE 8192U +#define DFLT_MSGSIZEMAX (1024*1024) +#define HARD_MSGSIZEMAX (16*1024*1024) #else static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } #endif diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 6e10a55a78c5..f8eba5e46c5a 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -152,7 +153,10 @@ static struct inode *mqueue_get_inode(struct super_block *sb, info->attr.mq_msgsize = attr->mq_msgsize; } mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *); - info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL); + if (mq_msg_tblsz > KMALLOC_MAX_SIZE) + info->messages = vmalloc(mq_msg_tblsz); + else + info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL); if (!info->messages) goto out_inode; @@ -262,7 +266,10 @@ static void mqueue_evict_inode(struct inode *inode) spin_lock(&info->lock); for (i = 0; i < info->attr.mq_curmsgs; i++) free_msg(info->messages[i]); - kfree(info->messages); + if (info->attr.mq_maxmsg * sizeof(struct msg_msg *) > KMALLOC_MAX_SIZE) + vfree(info->messages); + else + kfree(info->messages); spin_unlock(&info->lock); /* Total amount of bytes accounted for the mqueue */ -- cgit v1.2.3 From e6315bb154e778391ce64b194756bd3d108dadf6 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 31 May 2012 16:26:31 -0700 Subject: mqueue: revert bump up DFLT_*MAX Mqueue limitation is slightly naieve parameter likes other ipcs because unprivileged user can consume kernel memory by using ipcs. Thus, too aggressive raise bring us security issue. Example, current setting allow evil unprivileged user use 256GB (= 256 * 1024 * 1024*1024) and it's enough large to system will belome unresponsive. Don't do that. Instead, every admin should adjust the knobs for their own systems. Signed-off-by: KOSAKI Motohiro Acked-by: Doug Ledford Acked-by: Joe Korty Cc: Amerigo Wang Acked-by: Serge E. Hallyn Cc: Jiri Slaby Cc: Manfred Spraul Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/ipc_namespace.h') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 6e1dd08194fd..2488535a32a3 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -118,12 +118,12 @@ extern int mq_init_ns(struct ipc_namespace *ns); #define DFLT_QUEUESMAX 256 #define HARD_QUEUESMAX 1024 #define MIN_MSGMAX 1 -#define DFLT_MSG 64U -#define DFLT_MSGMAX 1024 +#define DFLT_MSG 10U +#define DFLT_MSGMAX 10 #define HARD_MSGMAX 65536 #define MIN_MSGSIZEMAX 128 #define DFLT_MSGSIZE 8192U -#define DFLT_MSGSIZEMAX (1024*1024) +#define DFLT_MSGSIZEMAX 8192 #define HARD_MSGSIZEMAX (16*1024*1024) #else static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } -- cgit v1.2.3 From cef0184c115e5e4e10498f6548d9526465e72478 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 31 May 2012 16:26:33 -0700 Subject: mqueue: separate mqueue default value from maximum value Commit b231cca4381e ("message queues: increase range limits") changed mqueue default value when attr parameter is specified NULL from hard coded value to fs.mqueue.{msg,msgsize}_max sysctl value. This made large side effect. When user need to use two mqueue applications 1) using !NULL attr parameter and it require big message size and 2) using NULL attr parameter and only need small size message, app (1) require to raise fs.mqueue.msgsize_max and app (2) consume large memory size even though it doesn't need. Doug Ledford propsed to switch back it to static hard coded value. However it also has a compatibility problem. Some applications might started depend on the default value is tunable. The solution is to separate default value from maximum value. Signed-off-by: KOSAKI Motohiro Signed-off-by: Doug Ledford Acked-by: Doug Ledford Acked-by: Joe Korty Cc: Amerigo Wang Acked-by: Serge E. Hallyn Cc: Jiri Slaby Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/fs.txt | 7 +++++++ include/linux/ipc_namespace.h | 2 ++ ipc/mq_sysctl.c | 18 ++++++++++++++++++ ipc/mqueue.c | 9 ++++++--- 4 files changed, 33 insertions(+), 3 deletions(-) (limited to 'include/linux/ipc_namespace.h') diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 88fd7f5c8dcd..13d6166d7a27 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -225,6 +225,13 @@ a queue must be less or equal then msg_max. maximum message size value (it is every message queue's attribute set during its creation). +/proc/sys/fs/mqueue/msg_default is a read/write file for setting/getting the +default number of messages in a queue value if attr parameter of mq_open(2) is +NULL. If it exceed msg_max, the default value is initialized msg_max. + +/proc/sys/fs/mqueue/msgsize_default is a read/write file for setting/getting +the default message size value if attr parameter of mq_open(2) is NULL. If it +exceed msgsize_max, the default value is initialized msgsize_max. 4. /proc/sys/fs/epoll - Configuration options for the epoll interface -------------------------------------------------------- diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 2488535a32a3..5499c92a9153 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -62,6 +62,8 @@ struct ipc_namespace { unsigned int mq_queues_max; /* initialized to DFLT_QUEUESMAX */ unsigned int mq_msg_max; /* initialized to DFLT_MSGMAX */ unsigned int mq_msgsize_max; /* initialized to DFLT_MSGSIZEMAX */ + unsigned int mq_msg_default; + unsigned int mq_msgsize_default; /* user_ns which owns the ipc ns */ struct user_namespace *user_ns; diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index e22336a09b4a..383d638340b8 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -73,6 +73,24 @@ static ctl_table mq_sysctls[] = { .extra1 = &msg_maxsize_limit_min, .extra2 = &msg_maxsize_limit_max, }, + { + .procname = "msg_default", + .data = &init_ipc_ns.mq_msg_default, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_mq_dointvec_minmax, + .extra1 = &msg_max_limit_min, + .extra2 = &msg_max_limit_max, + }, + { + .procname = "msgsize_default", + .data = &init_ipc_ns.mq_msgsize_default, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_mq_dointvec_minmax, + .extra1 = &msg_maxsize_limit_min, + .extra2 = &msg_maxsize_limit_max, + }, {} }; diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 6828e2c93cef..609d53f7a915 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -145,9 +145,10 @@ static struct inode *mqueue_get_inode(struct super_block *sb, info->qsize = 0; info->user = NULL; /* set when all is ok */ memset(&info->attr, 0, sizeof(info->attr)); - info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, DFLT_MSG); - info->attr.mq_msgsize = - min(ipc_ns->mq_msgsize_max, DFLT_MSGSIZE); + info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, + ipc_ns->mq_msg_default); + info->attr.mq_msgsize = min(ipc_ns->mq_msgsize_max, + ipc_ns->mq_msgsize_default); if (attr) { info->attr.mq_maxmsg = attr->mq_maxmsg; info->attr.mq_msgsize = attr->mq_msgsize; @@ -1261,6 +1262,8 @@ int mq_init_ns(struct ipc_namespace *ns) ns->mq_queues_max = DFLT_QUEUESMAX; ns->mq_msg_max = DFLT_MSGMAX; ns->mq_msgsize_max = DFLT_MSGSIZEMAX; + ns->mq_msg_default = DFLT_MSG; + ns->mq_msgsize_default = DFLT_MSGSIZE; ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns); if (IS_ERR(ns->mq_mnt)) { -- cgit v1.2.3