From aefad9593ec5ad4aae5346253a8b646364cd7317 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 20:52:43 -0500 Subject: sem/security: Pass kern_ipc_perm not sem_array into the sem security hooks All of the implementations of security hooks that take sem_array only access sem_perm the struct kern_ipc_perm member. This means the dependencies of the sem security hooks can be simplified by passing the kern_ipc_perm member of sem_array. Making this change will allow struct sem and struct sem_array to become private to ipc/sem.c. Signed-off-by: "Eric W. Biederman" --- ipc/sem.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'ipc') diff --git a/ipc/sem.c b/ipc/sem.c index a4af04979fd2..01f5c63670ae 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -265,7 +265,7 @@ static void sem_rcu_free(struct rcu_head *head) struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); struct sem_array *sma = container_of(p, struct sem_array, sem_perm); - security_sem_free(sma); + security_sem_free(&sma->sem_perm); kvfree(sma); } @@ -495,7 +495,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) sma->sem_perm.key = key; sma->sem_perm.security = NULL; - retval = security_sem_alloc(sma); + retval = security_sem_alloc(&sma->sem_perm); if (retval) { kvfree(sma); return retval; @@ -535,10 +535,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) */ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) { - struct sem_array *sma; - - sma = container_of(ipcp, struct sem_array, sem_perm); - return security_sem_associate(sma, semflg); + return security_sem_associate(ipcp, semflg); } /* @@ -1209,7 +1206,7 @@ static int semctl_stat(struct ipc_namespace *ns, int semid, if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) goto out_unlock; - err = security_sem_semctl(sma, cmd); + err = security_sem_semctl(&sma->sem_perm, cmd); if (err) goto out_unlock; @@ -1300,7 +1297,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, return -EACCES; } - err = security_sem_semctl(sma, SETVAL); + err = security_sem_semctl(&sma->sem_perm, SETVAL); if (err) { rcu_read_unlock(); return -EACCES; @@ -1354,7 +1351,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, if (ipcperms(ns, &sma->sem_perm, cmd == SETALL ? S_IWUGO : S_IRUGO)) goto out_rcu_wakeup; - err = security_sem_semctl(sma, cmd); + err = security_sem_semctl(&sma->sem_perm, cmd); if (err) goto out_rcu_wakeup; @@ -1545,7 +1542,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, sma = container_of(ipcp, struct sem_array, sem_perm); - err = security_sem_semctl(sma, cmd); + err = security_sem_semctl(&sma->sem_perm, cmd); if (err) goto out_unlock1; @@ -1962,7 +1959,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, goto out_free; } - error = security_sem_semop(sma, sops, nsops, alter); + error = security_sem_semop(&sma->sem_perm, sops, nsops, alter); if (error) { rcu_read_unlock(); goto out_free; -- cgit v1.2.3 From 7191adff2a5566efb139c79ea03eda3d0520d44a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 21:08:27 -0500 Subject: shm/security: Pass kern_ipc_perm not shmid_kernel into the shm security hooks All of the implementations of security hooks that take shmid_kernel only access shm_perm the struct kern_ipc_perm member. This means the dependencies of the shm security hooks can be simplified by passing the kern_ipc_perm member of shmid_kernel.. Making this change will allow struct shmid_kernel to become private to ipc/shm.c. Signed-off-by: "Eric W. Biederman" --- ipc/shm.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'ipc') diff --git a/ipc/shm.c b/ipc/shm.c index 4643865e9171..387a786e7be1 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -181,7 +181,7 @@ static void shm_rcu_free(struct rcu_head *head) rcu); struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel, shm_perm); - security_shm_free(shp); + security_shm_free(&shp->shm_perm); kvfree(shp); } @@ -554,7 +554,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->mlock_user = NULL; shp->shm_perm.security = NULL; - error = security_shm_alloc(shp); + error = security_shm_alloc(&shp->shm_perm); if (error) { kvfree(shp); return error; @@ -635,10 +635,7 @@ no_file: */ static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) { - struct shmid_kernel *shp; - - shp = container_of(ipcp, struct shmid_kernel, shm_perm); - return security_shm_associate(shp, shmflg); + return security_shm_associate(ipcp, shmflg); } /* @@ -835,7 +832,7 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd, shp = container_of(ipcp, struct shmid_kernel, shm_perm); - err = security_shm_shmctl(shp, cmd); + err = security_shm_shmctl(&shp->shm_perm, cmd); if (err) goto out_unlock1; @@ -934,7 +931,7 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid, if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) goto out_unlock; - err = security_shm_shmctl(shp, cmd); + err = security_shm_shmctl(&shp->shm_perm, cmd); if (err) goto out_unlock; @@ -978,7 +975,7 @@ static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd) } audit_ipc_obj(&(shp->shm_perm)); - err = security_shm_shmctl(shp, cmd); + err = security_shm_shmctl(&shp->shm_perm, cmd); if (err) goto out_unlock1; @@ -1348,7 +1345,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, if (ipcperms(ns, &shp->shm_perm, acc_mode)) goto out_unlock; - err = security_shm_shmat(shp, shmaddr, shmflg); + err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg); if (err) goto out_unlock; -- cgit v1.2.3 From d8c6e8543294428426578d74dc7aaf121e762d58 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 21:22:26 -0500 Subject: msg/security: Pass kern_ipc_perm not msg_queue into the msg_queue security hooks All of the implementations of security hooks that take msg_queue only access q_perm the struct kern_ipc_perm member. This means the dependencies of the msg_queue security hooks can be simplified by passing the kern_ipc_perm member of msg_queue. Making this change will allow struct msg_queue to become private to ipc/msg.c. Signed-off-by: "Eric W. Biederman" --- ipc/msg.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'ipc') diff --git a/ipc/msg.c b/ipc/msg.c index 0dcc6699dc53..cdfab0825fce 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -101,7 +101,7 @@ static void msg_rcu_free(struct rcu_head *head) struct kern_ipc_perm *p = container_of(head, struct kern_ipc_perm, rcu); struct msg_queue *msq = container_of(p, struct msg_queue, q_perm); - security_msg_queue_free(msq); + security_msg_queue_free(&msq->q_perm); kvfree(msq); } @@ -127,7 +127,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) msq->q_perm.key = key; msq->q_perm.security = NULL; - retval = security_msg_queue_alloc(msq); + retval = security_msg_queue_alloc(&msq->q_perm); if (retval) { kvfree(msq); return retval; @@ -258,9 +258,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) */ static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) { - struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); - - return security_msg_queue_associate(msq, msgflg); + return security_msg_queue_associate(ipcp, msgflg); } SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) @@ -380,7 +378,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd, msq = container_of(ipcp, struct msg_queue, q_perm); - err = security_msg_queue_msgctl(msq, cmd); + err = security_msg_queue_msgctl(&msq->q_perm, cmd); if (err) goto out_unlock1; @@ -502,7 +500,7 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid, if (ipcperms(ns, &msq->q_perm, S_IRUGO)) goto out_unlock; - err = security_msg_queue_msgctl(msq, cmd); + err = security_msg_queue_msgctl(&msq->q_perm, cmd); if (err) goto out_unlock; @@ -718,7 +716,7 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg, list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) { if (testmsg(msg, msr->r_msgtype, msr->r_mode) && - !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, + !security_msg_queue_msgrcv(&msq->q_perm, msg, msr->r_tsk, msr->r_msgtype, msr->r_mode)) { list_del(&msr->r_list); @@ -784,7 +782,7 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext, goto out_unlock0; } - err = security_msg_queue_msgsnd(msq, msg, msgflg); + err = security_msg_queue_msgsnd(&msq->q_perm, msg, msgflg); if (err) goto out_unlock0; @@ -960,7 +958,7 @@ static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode) list_for_each_entry(msg, &msq->q_messages, m_list) { if (testmsg(msg, *msgtyp, mode) && - !security_msg_queue_msgrcv(msq, msg, current, + !security_msg_queue_msgrcv(&msq->q_perm, msg, current, *msgtyp, mode)) { if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) { *msgtyp = msg->m_type - 1; -- cgit v1.2.3 From 1a5c1349d105df5196ad9025e271b02a4dc05aee Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 21:30:56 -0500 Subject: sem: Move struct sem and struct sem_array into ipc/sem.c All of the users are now in ipc/sem.c so make the definitions local to that file to make code maintenance easier. AKA to prevent rebuilding the entire kernel when one of these files is changed. Signed-off-by: "Eric W. Biederman" --- ipc/sem.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'ipc') diff --git a/ipc/sem.c b/ipc/sem.c index 01f5c63670ae..d661c491b0a5 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -88,6 +88,40 @@ #include #include "util.h" +/* One semaphore structure for each semaphore in the system. */ +struct sem { + int semval; /* current value */ + /* + * PID of the process that last modified the semaphore. For + * Linux, specifically these are: + * - semop + * - semctl, via SETVAL and SETALL. + * - at task exit when performing undo adjustments (see exit_sem). + */ + int sempid; + spinlock_t lock; /* spinlock for fine-grained semtimedop */ + struct list_head pending_alter; /* pending single-sop operations */ + /* that alter the semaphore */ + struct list_head pending_const; /* pending single-sop operations */ + /* that do not alter the semaphore*/ + time_t sem_otime; /* candidate for sem_otime */ +} ____cacheline_aligned_in_smp; + +/* One sem_array data structure for each set of semaphores in the system. */ +struct sem_array { + struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ + time64_t sem_ctime; /* create/last semctl() time */ + struct list_head pending_alter; /* pending operations */ + /* that alter the array */ + struct list_head pending_const; /* pending complex operations */ + /* that do not alter semvals */ + struct list_head list_id; /* undo requests on this array */ + int sem_nsems; /* no. of semaphores in array */ + int complex_count; /* pending complex operations */ + unsigned int use_global_lock;/* >0: global lock required */ + + struct sem sems[]; +} __randomize_layout; /* One queue for each sleeping process in the system. */ struct sem_queue { -- cgit v1.2.3 From a2e102cd3cdd8b7a14e08716510707b15802073f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 21:34:44 -0500 Subject: shm: Move struct shmid_kernel into ipc/shm.c All of the users are now in ipc/shm.c so make the definition local to that file to make code maintenance easier. AKA to prevent rebuilding the entire kernel when struct shmid_kernel changes. Signed-off-by: "Eric W. Biederman" --- ipc/shm.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'ipc') diff --git a/ipc/shm.c b/ipc/shm.c index 387a786e7be1..0565669ebe5c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -48,6 +48,28 @@ #include "util.h" +struct shmid_kernel /* private to the kernel */ +{ + struct kern_ipc_perm shm_perm; + struct file *shm_file; + unsigned long shm_nattch; + unsigned long shm_segsz; + time64_t shm_atim; + time64_t shm_dtim; + time64_t shm_ctim; + pid_t shm_cprid; + pid_t shm_lprid; + struct user_struct *mlock_user; + + /* The task created the shm object. NULL if the task is dead. */ + struct task_struct *shm_creator; + struct list_head shm_clist; /* list by creator */ +} __randomize_layout; + +/* shm_mode upper byte flags */ +#define SHM_DEST 01000 /* segment will be destroyed on last detach */ +#define SHM_LOCKED 02000 /* segment will not be swapped */ + struct shm_file_data { int id; struct ipc_namespace *ns; -- cgit v1.2.3 From 34b56df922b10ac2876f268c522951785bf333fd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 21:37:34 -0500 Subject: msg: Move struct msg_queue into ipc/msg.c All of the users are now in ipc/msg.c so make the definition local to that file to make code maintenance easier. AKA to prevent rebuilding the entire kernel when struct msg_queue changes. Signed-off-by: "Eric W. Biederman" --- ipc/msg.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'ipc') diff --git a/ipc/msg.c b/ipc/msg.c index cdfab0825fce..af5a963306c4 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -43,6 +43,23 @@ #include #include "util.h" +/* one msq_queue structure for each present queue on the system */ +struct msg_queue { + struct kern_ipc_perm q_perm; + time64_t q_stime; /* last msgsnd time */ + time64_t q_rtime; /* last msgrcv time */ + time64_t q_ctime; /* last change time */ + unsigned long q_cbytes; /* current number of bytes on queue */ + unsigned long q_qnum; /* number of messages in queue */ + unsigned long q_qbytes; /* max number of bytes on queue */ + pid_t q_lspid; /* pid of last msgsnd */ + pid_t q_lrpid; /* last receive pid */ + + struct list_head q_messages; + struct list_head q_receivers; + struct list_head q_senders; +} __randomize_layout; + /* one msg_receiver structure for each sleeping receiver */ struct msg_receiver { struct list_head r_list; -- cgit v1.2.3 From f83a396d06d499029fe6d32e326605a2b5ca4eff Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 22 Mar 2018 21:45:50 -0500 Subject: ipc: Move IPCMNI from include/ipc.h into ipc/util.h The definition IPCMNI is only used in ipc/util.h and ipc/util.c. So there is no reason to keep it in a header file that the whole kernel can see. Move it into util.h to simplify future maintenance. Signed-off-by: "Eric W. Biederman" --- ipc/util.h | 1 + 1 file changed, 1 insertion(+) (limited to 'ipc') diff --git a/ipc/util.h b/ipc/util.h index 89b8ec176fc4..959c10eb9cc1 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -15,6 +15,7 @@ #include #include +#define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ #define SEQ_MULTIPLIER (IPCMNI) int sem_init(void); -- cgit v1.2.3 From 03f1fc09180b345582889a344b012d069b3a6dbe Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 23 Mar 2018 00:22:05 -0500 Subject: ipc/util: Helpers for making the sysvipc operations pid namespace aware Capture the pid namespace when /proc/sysvipc/msg /proc/sysvipc/shm and /proc/sysvipc/sem are opened, and make it available through the new helper ipc_seq_pid_ns. This makes it possible to report the pids in these files in the pid namespace of the opener of the files. Implement ipc_update_pid. A simple impline helper that will only update a struct pid pointer if the new value does not equal the old value. This removes the need for wordy code sequences like: old = object->pid; object->pid = new; put_pid(old); and old = object->pid; if (old != new) { object->pid = new; put_pid(old); } Allowing the following to be written instead: ipc_update_pid(&object->pid, new); Which is easier to read and ensures that the pid reference count is not touched the old and the new values are the same. Not touching the reference count in this case is important to help avoid issues like af_unix experienced, where multiple threads of the same process managed to bounce the struct pid between cpu cache lines, but updating the pids reference count. Signed-off-by: "Eric W. Biederman" --- ipc/util.c | 9 +++++++++ ipc/util.h | 11 +++++++++++ 2 files changed, 20 insertions(+) (limited to 'ipc') diff --git a/ipc/util.c b/ipc/util.c index 4ed5a17dd06f..3783b7991cc7 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -747,9 +747,16 @@ int ipc_parse_version(int *cmd) #ifdef CONFIG_PROC_FS struct ipc_proc_iter { struct ipc_namespace *ns; + struct pid_namespace *pid_ns; struct ipc_proc_iface *iface; }; +struct pid_namespace *ipc_seq_pid_ns(struct seq_file *s) +{ + struct ipc_proc_iter *iter = s->private; + return iter->pid_ns; +} + /* * This routine locks the ipc structure found at least at position pos. */ @@ -872,6 +879,7 @@ static int sysvipc_proc_open(struct inode *inode, struct file *file) iter->iface = PDE_DATA(inode); iter->ns = get_ipc_ns(current->nsproxy->ipc_ns); + iter->pid_ns = get_pid_ns(task_active_pid_ns(current)); return 0; } @@ -881,6 +889,7 @@ static int sysvipc_proc_release(struct inode *inode, struct file *file) struct seq_file *seq = file->private_data; struct ipc_proc_iter *iter = seq->private; put_ipc_ns(iter->ns); + put_pid_ns(iter->pid_ns); return seq_release_private(inode, file); } diff --git a/ipc/util.h b/ipc/util.h index 959c10eb9cc1..e39ed9705f99 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -23,6 +23,7 @@ int msg_init(void); void shm_init(void); struct ipc_namespace; +struct pid_namespace; #ifdef CONFIG_POSIX_MQUEUE extern void mq_clear_sbinfo(struct ipc_namespace *ns); @@ -86,6 +87,7 @@ int ipc_init_ids(struct ipc_ids *); #ifdef CONFIG_PROC_FS void __init ipc_init_proc_interface(const char *path, const char *header, int ids, int (*show)(struct seq_file *, void *)); +struct pid_namespace *ipc_seq_pid_ns(struct seq_file *); #else #define ipc_init_proc_interface(path, header, ids, show) do {} while (0) #endif @@ -150,6 +152,15 @@ struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns, struct ipc_ids *ids, int id, int cmd, struct ipc64_perm *perm, int extra_perm); +static inline void ipc_update_pid(struct pid **pos, struct pid *pid) +{ + struct pid *old = *pos; + if (old != pid) { + *pos = get_pid(pid); + put_pid(old); + } +} + #ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION /* On IA-64, we always use the "64-bit version" of the IPC structures. */ # define ipc_parse_version(cmd) IPC_64 -- cgit v1.2.3 From 98f929b1bd4d0b7c7a77d0d9776d1b924db2e454 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 23 Mar 2018 00:29:57 -0500 Subject: ipc/shm: Fix shmctl(..., IPC_STAT, ...) between pid namespaces. Today shm_cpid and shm_lpid are remembered in the pid namespace of the creator and the processes that last touched a sysvipc shared memory segment. If you have processes in multiple pid namespaces that is just wrong, and I don't know how this has been over-looked for so long. As only creation and shared memory attach and shared memory detach update the pids I do not expect there to be a repeat of the issues when struct pid was attached to each af_unix skb, which in some notable cases cut the performance in half. The problem was threads of the same process updating same struct pid from different cpus causing the cache line to be highly contended and bounce between cpus. As creation, attach, and detach are expected to be rare operations for sysvipc shared memory segments I do not expect that kind of cache line ping pong to cause probems. In addition because the pid is at a fixed location in the structure instead of being dynamic on a skb, the reference count of the pid does not need to be updated on each operation if the pid is the same. This ability to simply skip the pid reference count changes if the pid is unchanging further reduces the likelihood of the a cache line holding a pid reference count ping-ponging between cpus. Fixes: b488893a390e ("pid namespaces: changes to show virtual ids to user") Reviewed-by: Nagarathnam Muthusamy Signed-off-by: "Eric W. Biederman" --- ipc/shm.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'ipc') diff --git a/ipc/shm.c b/ipc/shm.c index 0565669ebe5c..932b7e411c6c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -57,8 +57,8 @@ struct shmid_kernel /* private to the kernel */ time64_t shm_atim; time64_t shm_dtim; time64_t shm_ctim; - pid_t shm_cprid; - pid_t shm_lprid; + struct pid *shm_cprid; + struct pid *shm_lprid; struct user_struct *mlock_user; /* The task created the shm object. NULL if the task is dead. */ @@ -226,7 +226,7 @@ static int __shm_open(struct vm_area_struct *vma) return PTR_ERR(shp); shp->shm_atim = ktime_get_real_seconds(); - shp->shm_lprid = task_tgid_vnr(current); + ipc_update_pid(&shp->shm_lprid, task_tgid(current)); shp->shm_nattch++; shm_unlock(shp); return 0; @@ -267,6 +267,8 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) user_shm_unlock(i_size_read(file_inode(shm_file)), shp->mlock_user); fput(shm_file); + ipc_update_pid(&shp->shm_cprid, NULL); + ipc_update_pid(&shp->shm_lprid, NULL); ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); } @@ -311,7 +313,7 @@ static void shm_close(struct vm_area_struct *vma) if (WARN_ON_ONCE(IS_ERR(shp))) goto done; /* no-op */ - shp->shm_lprid = task_tgid_vnr(current); + ipc_update_pid(&shp->shm_lprid, task_tgid(current)); shp->shm_dtim = ktime_get_real_seconds(); shp->shm_nattch--; if (shm_may_destroy(ns, shp)) @@ -614,8 +616,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (IS_ERR(file)) goto no_file; - shp->shm_cprid = task_tgid_vnr(current); - shp->shm_lprid = 0; + shp->shm_cprid = get_pid(task_tgid(current)); + shp->shm_lprid = NULL; shp->shm_atim = shp->shm_dtim = 0; shp->shm_ctim = ktime_get_real_seconds(); shp->shm_segsz = size; @@ -648,6 +650,8 @@ no_id: user_shm_unlock(size, shp->mlock_user); fput(file); no_file: + ipc_update_pid(&shp->shm_cprid, NULL); + ipc_update_pid(&shp->shm_lprid, NULL); call_rcu(&shp->shm_perm.rcu, shm_rcu_free); return error; } @@ -970,8 +974,8 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid, tbuf->shm_atime = shp->shm_atim; tbuf->shm_dtime = shp->shm_dtim; tbuf->shm_ctime = shp->shm_ctim; - tbuf->shm_cpid = shp->shm_cprid; - tbuf->shm_lpid = shp->shm_lprid; + tbuf->shm_cpid = pid_vnr(shp->shm_cprid); + tbuf->shm_lpid = pid_vnr(shp->shm_lprid); tbuf->shm_nattch = shp->shm_nattch; ipc_unlock_object(&shp->shm_perm); @@ -1605,6 +1609,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) #ifdef CONFIG_PROC_FS static int sysvipc_shm_proc_show(struct seq_file *s, void *it) { + struct pid_namespace *pid_ns = ipc_seq_pid_ns(s); struct user_namespace *user_ns = seq_user_ns(s); struct kern_ipc_perm *ipcp = it; struct shmid_kernel *shp; @@ -1627,8 +1632,8 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it) shp->shm_perm.id, shp->shm_perm.mode, shp->shm_segsz, - shp->shm_cprid, - shp->shm_lprid, + pid_nr_ns(shp->shm_cprid, pid_ns), + pid_nr_ns(shp->shm_lprid, pid_ns), shp->shm_nattch, from_kuid_munged(user_ns, shp->shm_perm.uid), from_kgid_munged(user_ns, shp->shm_perm.gid), -- cgit v1.2.3 From 39a4940eaa185910bb802ca9829c12268fd2c855 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 23 Mar 2018 00:42:21 -0500 Subject: ipc/msg: Fix msgctl(..., IPC_STAT, ...) between pid namespaces Today msg_lspid and msg_lrpid are remembered in the pid namespace of the creator and the processes that last send or received a sysvipc message. If you have processes in multiple pid namespaces that is just wrong. The process ids reported will not make the least bit of sense. This fix is slightly more susceptible to a performance problem than the related fix for System V shared memory. By definition the pids are updated by msgsnd and msgrcv, the fast path of System V message queues. The only concern over the previous implementation is the incrementing and decrementing of the pid reference count. As that is the only difference and multiple updates by of the task_tgid by threads in the same process have been shown in af_unix sockets to create a cache line ping-pong between cpus of the same processor. In this case I don't expect cache lines holding pid reference counts to ping pong between cpus. As senders and receivers update different pids there is a natural separation there. Further if multiple threads of the same process either send or receive messages the pid will be updated to the same value and ipc_update_pid will avoid the reference count update. Which means in the common case I expect msg_lspid and msg_lrpid to remain constant, and reference counts not to be updated when messages are sent. In rare cases it may be possible to trigger the issue which was observed for af_unix sockets, but it will require multiple processes with multiple threads to be either sending or receiving messages. It just does not feel likely that anyone would do that in practice. This change updates msgctl(..., IPC_STAT, ...) to return msg_lspid and msg_lrpid in the pid namespace of the process calling stat. This change also updates cat /proc/sysvipc/msg to return print msg_lspid and msg_lrpid in the pid namespace of the process that opened the proc file. Fixes: b488893a390e ("pid namespaces: changes to show virtual ids to user") Reviewed-by: Nagarathnam Muthusamy Signed-off-by: "Eric W. Biederman" --- ipc/msg.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'ipc') diff --git a/ipc/msg.c b/ipc/msg.c index af5a963306c4..825ad585a6ff 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -52,8 +52,8 @@ struct msg_queue { unsigned long q_cbytes; /* current number of bytes on queue */ unsigned long q_qnum; /* number of messages in queue */ unsigned long q_qbytes; /* max number of bytes on queue */ - pid_t q_lspid; /* pid of last msgsnd */ - pid_t q_lrpid; /* last receive pid */ + struct pid *q_lspid; /* pid of last msgsnd */ + struct pid *q_lrpid; /* last receive pid */ struct list_head q_messages; struct list_head q_receivers; @@ -154,7 +154,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) msq->q_ctime = ktime_get_real_seconds(); msq->q_cbytes = msq->q_qnum = 0; msq->q_qbytes = ns->msg_ctlmnb; - msq->q_lspid = msq->q_lrpid = 0; + msq->q_lspid = msq->q_lrpid = NULL; INIT_LIST_HEAD(&msq->q_messages); INIT_LIST_HEAD(&msq->q_receivers); INIT_LIST_HEAD(&msq->q_senders); @@ -267,6 +267,8 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) free_msg(msg); } atomic_sub(msq->q_cbytes, &ns->msg_bytes); + ipc_update_pid(&msq->q_lspid, NULL); + ipc_update_pid(&msq->q_lrpid, NULL); ipc_rcu_putref(&msq->q_perm, msg_rcu_free); } @@ -536,8 +538,8 @@ static int msgctl_stat(struct ipc_namespace *ns, int msqid, p->msg_cbytes = msq->q_cbytes; p->msg_qnum = msq->q_qnum; p->msg_qbytes = msq->q_qbytes; - p->msg_lspid = msq->q_lspid; - p->msg_lrpid = msq->q_lrpid; + p->msg_lspid = pid_vnr(msq->q_lspid); + p->msg_lrpid = pid_vnr(msq->q_lrpid); ipc_unlock_object(&msq->q_perm); rcu_read_unlock(); @@ -741,7 +743,7 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg, wake_q_add(wake_q, msr->r_tsk); WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG)); } else { - msq->q_lrpid = task_pid_vnr(msr->r_tsk); + ipc_update_pid(&msq->q_lrpid, task_pid(msr->r_tsk)); msq->q_rtime = get_seconds(); wake_q_add(wake_q, msr->r_tsk); @@ -842,7 +844,7 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext, } - msq->q_lspid = task_tgid_vnr(current); + ipc_update_pid(&msq->q_lspid, task_tgid(current)); msq->q_stime = get_seconds(); if (!pipelined_send(msq, msg, &wake_q)) { @@ -1060,7 +1062,7 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in list_del(&msg->m_list); msq->q_qnum--; msq->q_rtime = get_seconds(); - msq->q_lrpid = task_tgid_vnr(current); + ipc_update_pid(&msq->q_lrpid, task_tgid(current)); msq->q_cbytes -= msg->m_ts; atomic_sub(msg->m_ts, &ns->msg_bytes); atomic_dec(&ns->msg_hdrs); @@ -1202,6 +1204,7 @@ void msg_exit_ns(struct ipc_namespace *ns) #ifdef CONFIG_PROC_FS static int sysvipc_msg_proc_show(struct seq_file *s, void *it) { + struct pid_namespace *pid_ns = ipc_seq_pid_ns(s); struct user_namespace *user_ns = seq_user_ns(s); struct kern_ipc_perm *ipcp = it; struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); @@ -1213,8 +1216,8 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it) msq->q_perm.mode, msq->q_cbytes, msq->q_qnum, - msq->q_lspid, - msq->q_lrpid, + pid_nr_ns(msq->q_lspid, pid_ns), + pid_nr_ns(msq->q_lrpid, pid_ns), from_kuid_munged(user_ns, msq->q_perm.uid), from_kgid_munged(user_ns, msq->q_perm.gid), from_kuid_munged(user_ns, msq->q_perm.cuid), -- cgit v1.2.3 From 51d6f2635b39709ee5e62479be23d423b760292c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 23 Mar 2018 01:11:29 -0500 Subject: ipc/sem: Fix semctl(..., GETPID, ...) between pid namespaces Today the last process to update a semaphore is remembered and reported in the pid namespace of that process. If there are processes in any other pid namespace querying that process id with GETPID the result will be unusable nonsense as it does not make any sense in your own pid namespace. Due to ipc_update_pid I don't think you will be able to get System V ipc semaphores into a troublesome cache line ping-pong. Using struct pids from separate process are not a problem because they do not share a cache line. Using struct pid from different threads of the same process are unlikely to be a problem as the reference count update can be avoided. Further linux futexes are a much better tool for the job of mutual exclusion between processes than System V semaphores. So I expect programs that are performance limited by their interprocess mutual exclusion primitive will be using futexes. So while it is possible that enhancing the storage of the last rocess of a System V semaphore from an integer to a struct pid will cause a performance regression because of the effect of frequently updating the pid reference count. I don't expect that to happen in practice. This change updates semctl(..., GETPID, ...) to return the process id of the last process to update a semphore inthe pid namespace of the calling process. Fixes: b488893a390e ("pid namespaces: changes to show virtual ids to user") Signed-off-by: "Eric W. Biederman" --- ipc/sem.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'ipc') diff --git a/ipc/sem.c b/ipc/sem.c index d661c491b0a5..47b263960524 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -98,7 +98,7 @@ struct sem { * - semctl, via SETVAL and SETALL. * - at task exit when performing undo adjustments (see exit_sem). */ - int sempid; + struct pid *sempid; spinlock_t lock; /* spinlock for fine-grained semtimedop */ struct list_head pending_alter; /* pending single-sop operations */ /* that alter the semaphore */ @@ -128,7 +128,7 @@ struct sem_queue { struct list_head list; /* queue of pending operations */ struct task_struct *sleeper; /* this process */ struct sem_undo *undo; /* undo structure */ - int pid; /* process id of requesting process */ + struct pid *pid; /* process id of requesting process */ int status; /* completion status of operation */ struct sembuf *sops; /* array of pending operations */ struct sembuf *blocking; /* the operation that blocked */ @@ -628,7 +628,8 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) */ static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) { - int result, sem_op, nsops, pid; + int result, sem_op, nsops; + struct pid *pid; struct sembuf *sop; struct sem *curr; struct sembuf *sops; @@ -666,7 +667,7 @@ static int perform_atomic_semop_slow(struct sem_array *sma, struct sem_queue *q) sop--; pid = q->pid; while (sop >= sops) { - sma->sems[sop->sem_num].sempid = pid; + ipc_update_pid(&sma->sems[sop->sem_num].sempid, pid); sop--; } @@ -753,7 +754,7 @@ static int perform_atomic_semop(struct sem_array *sma, struct sem_queue *q) un->semadj[sop->sem_num] = undo; } curr->semval += sem_op; - curr->sempid = q->pid; + ipc_update_pid(&curr->sempid, q->pid); } return 0; @@ -1160,6 +1161,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) unlink_queue(sma, q); wake_up_sem_queue_prepare(q, -EIDRM, &wake_q); } + ipc_update_pid(&sem->sempid, NULL); } /* Remove the semaphore set from the IDR */ @@ -1352,7 +1354,7 @@ static int semctl_setval(struct ipc_namespace *ns, int semid, int semnum, un->semadj[semnum] = 0; curr->semval = val; - curr->sempid = task_tgid_vnr(current); + ipc_update_pid(&curr->sempid, task_tgid(current)); sma->sem_ctime = ktime_get_real_seconds(); /* maybe some queued-up processes were waiting for this */ do_smart_update(sma, NULL, 0, 0, &wake_q); @@ -1473,7 +1475,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, for (i = 0; i < nsems; i++) { sma->sems[i].semval = sem_io[i]; - sma->sems[i].sempid = task_tgid_vnr(current); + ipc_update_pid(&sma->sems[i].sempid, task_tgid(current)); } ipc_assert_locked_object(&sma->sem_perm); @@ -1505,7 +1507,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, err = curr->semval; goto out_unlock; case GETPID: - err = curr->sempid; + err = pid_vnr(curr->sempid); goto out_unlock; case GETNCNT: err = count_semcnt(sma, semnum, 0); @@ -2024,7 +2026,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, queue.sops = sops; queue.nsops = nsops; queue.undo = un; - queue.pid = task_tgid_vnr(current); + queue.pid = task_tgid(current); queue.alter = alter; queue.dupsop = dupsop; @@ -2318,7 +2320,7 @@ void exit_sem(struct task_struct *tsk) semaphore->semval = 0; if (semaphore->semval > SEMVMX) semaphore->semval = SEMVMX; - semaphore->sempid = task_tgid_vnr(current); + ipc_update_pid(&semaphore->sempid, task_tgid(current)); } } /* maybe some queued-up processes were waiting for this */ -- cgit v1.2.3 From 50ab44b1c5d1b13305ce8acb74c8e50e0dcbaedc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 23 Mar 2018 23:41:55 -0500 Subject: ipc: Directly call the security hook in ipc_ops.associate After the last round of cleanups the shm, sem, and msg associate operations just became trivial wrappers around the appropriate security method. Simplify things further by just calling the security method directly. Signed-off-by: "Eric W. Biederman" --- ipc/msg.c | 10 +--------- ipc/sem.c | 10 +--------- ipc/shm.c | 10 +--------- 3 files changed, 3 insertions(+), 27 deletions(-) (limited to 'ipc') diff --git a/ipc/msg.c b/ipc/msg.c index 825ad585a6ff..d667dd8e97ab 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -272,20 +272,12 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) ipc_rcu_putref(&msq->q_perm, msg_rcu_free); } -/* - * Called with msg_ids.rwsem and ipcp locked. - */ -static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) -{ - return security_msg_queue_associate(ipcp, msgflg); -} - SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) { struct ipc_namespace *ns; static const struct ipc_ops msg_ops = { .getnew = newque, - .associate = msg_security, + .associate = security_msg_queue_associate, }; struct ipc_params msg_params; diff --git a/ipc/sem.c b/ipc/sem.c index 47b263960524..09d54af076a4 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -564,14 +564,6 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) } -/* - * Called with sem_ids.rwsem and ipcp locked. - */ -static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) -{ - return security_sem_associate(ipcp, semflg); -} - /* * Called with sem_ids.rwsem and ipcp locked. */ @@ -592,7 +584,7 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) struct ipc_namespace *ns; static const struct ipc_ops sem_ops = { .getnew = newary, - .associate = sem_security, + .associate = security_sem_associate, .more_checks = sem_more_checks, }; struct ipc_params sem_params; diff --git a/ipc/shm.c b/ipc/shm.c index 932b7e411c6c..018db3d0e70e 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -656,14 +656,6 @@ no_file: return error; } -/* - * Called with shm_ids.rwsem and ipcp locked. - */ -static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) -{ - return security_shm_associate(ipcp, shmflg); -} - /* * Called with shm_ids.rwsem and ipcp locked. */ @@ -684,7 +676,7 @@ SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) struct ipc_namespace *ns; static const struct ipc_ops shm_ops = { .getnew = newseg, - .associate = shm_security, + .associate = security_shm_associate, .more_checks = shm_more_checks, }; struct ipc_params shm_params; -- cgit v1.2.3 From 2236d4d39035b9839944603ec4b65ce71180a9ea Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 28 Mar 2018 13:38:55 -0500 Subject: ipc/shm: Fix pid freeing. The 0day kernel test build report reported an oops: > > IP: put_pid+0x22/0x5c > PGD 19efa067 P4D 19efa067 PUD 0 > Oops: 0000 [#1] > CPU: 0 PID: 727 Comm: trinity Not tainted 4.16.0-rc2-00010-g98f929b #1 > RIP: 0010:put_pid+0x22/0x5c > RSP: 0018:ffff986719f73e48 EFLAGS: 00010202 > RAX: 00000006d765f710 RBX: ffff98671a4fa4d0 RCX: ffff986719f73d40 > RDX: 000000006f6e6125 RSI: 0000000000000000 RDI: ffffffffa01e6d21 > RBP: ffffffffa0955fe0 R08: 0000000000000020 R09: 0000000000000000 > R10: 0000000000000078 R11: ffff986719f73e76 R12: 0000000000001000 > R13: 00000000ffffffea R14: 0000000054000fb0 R15: 0000000000000000 > FS: 00000000028c2880(0000) GS:ffffffffa06ad000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 0000000677846439 CR3: 0000000019fc1005 CR4: 00000000000606b0 > Call Trace: > ? ipc_update_pid+0x36/0x3e > ? newseg+0x34c/0x3a6 > ? ipcget+0x5d/0x528 > ? entry_SYSCALL_64_after_hwframe+0x52/0xb7 > ? SyS_shmget+0x5a/0x84 > ? do_syscall_64+0x194/0x1b3 > ? entry_SYSCALL_64_after_hwframe+0x42/0xb7 > Code: ff 05 e7 20 9b 03 58 c9 c3 48 ff 05 85 21 9b 03 48 85 ff 74 4f 8b 47 04 8b 17 48 ff 05 7c 21 9b 03 48 83 c0 03 48 c1 e0 04 ff ca <48> 8b 44 07 08 74 1f 48 ff 05 6c 21 9b 03 ff 0f 0f 94 c2 48 ff > RIP: put_pid+0x22/0x5c RSP: ffff986719f73e48 > CR2: 0000000677846439 > ---[ end trace ab8c5cb4389d37c5 ]--- > Kernel panic - not syncing: Fatal exception In newseg when changing shm_cprid and shm_lprid from pid_t to struct pid* I misread the kvmalloc as kvzalloc and thought shp was initialized to 0. As that is not the case it is not safe to for the error handling to address shm_cprid and shm_lprid before they are initialized. Therefore move the cleanup of shm_cprid and shm_lprid from the no_file error cleanup path to the no_id error cleanup path. Ensuring that an early error exit won't cause the oops above. Reported-by: kernel test robot Reviewed-by: Nagarathnam Muthusamy Signed-off-by: Eric W. Biederman --- ipc/shm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'ipc') diff --git a/ipc/shm.c b/ipc/shm.c index 018db3d0e70e..35bdfe76d11b 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -646,12 +646,12 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) return error; no_id: + ipc_update_pid(&shp->shm_cprid, NULL); + ipc_update_pid(&shp->shm_lprid, NULL); if (is_file_hugepages(file) && shp->mlock_user) user_shm_unlock(size, shp->mlock_user); fput(file); no_file: - ipc_update_pid(&shp->shm_cprid, NULL); - ipc_update_pid(&shp->shm_lprid, NULL); call_rcu(&shp->shm_perm.rcu, shm_rcu_free); return error; } -- cgit v1.2.3