aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/async.c3
-rw-r--r--kernel/audit.c40
-rw-r--r--kernel/audit_tree.c26
-rw-r--r--kernel/audit_watch.c2
-rw-r--r--kernel/auditfilter.c1
-rw-r--r--kernel/auditsc.c20
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/module.c27
-rw-r--r--kernel/pid.c15
-rw-r--r--kernel/pid_namespace.c4
-rw-r--r--kernel/printk.c5
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/rwsem.c10
-rw-r--r--kernel/signal.c9
-rw-r--r--kernel/trace/trace.c17
15 files changed, 143 insertions, 46 deletions
diff --git a/kernel/async.c b/kernel/async.c
index 9d311838485..a1d585c351d 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -196,6 +196,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
atomic_inc(&entry_count);
spin_unlock_irqrestore(&async_lock, flags);
+ /* mark that this task has queued an async job, used by module init */
+ current->flags |= PF_USED_ASYNC;
+
/* schedule for execution */
queue_work(system_unbound_wq, &entry->work);
diff --git a/kernel/audit.c b/kernel/audit.c
index 40414e9143d..d596e5355f1 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -272,6 +272,8 @@ static int audit_log_config_change(char *function_name, int new, int old,
int rc = 0;
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ if (unlikely(!ab))
+ return rc;
audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new,
old, from_kuid(&init_user_ns, loginuid), sessionid);
if (sid) {
@@ -619,6 +621,8 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type,
}
*ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
+ if (unlikely(!*ab))
+ return rc;
audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u",
task_tgid_vnr(current),
from_kuid(&init_user_ns, current_uid()),
@@ -1097,6 +1101,23 @@ static inline void audit_get_stamp(struct audit_context *ctx,
}
}
+/*
+ * Wait for auditd to drain the queue a little
+ */
+static void wait_for_auditd(unsigned long sleep_time)
+{
+ DECLARE_WAITQUEUE(wait, current);
+ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&audit_backlog_wait, &wait);
+
+ if (audit_backlog_limit &&
+ skb_queue_len(&audit_skb_queue) > audit_backlog_limit)
+ schedule_timeout(sleep_time);
+
+ __set_current_state(TASK_RUNNING);
+ remove_wait_queue(&audit_backlog_wait, &wait);
+}
+
/* Obtain an audit buffer. This routine does locking to obtain the
* audit buffer, but then no locking is required for calls to
* audit_log_*format. If the tsk is a task that is currently in a
@@ -1142,20 +1163,13 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
while (audit_backlog_limit
&& skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
- if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time
- && time_before(jiffies, timeout_start + audit_backlog_wait_time)) {
+ if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time) {
+ unsigned long sleep_time;
- /* Wait for auditd to drain the queue a little */
- DECLARE_WAITQUEUE(wait, current);
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&audit_backlog_wait, &wait);
-
- if (audit_backlog_limit &&
- skb_queue_len(&audit_skb_queue) > audit_backlog_limit)
- schedule_timeout(timeout_start + audit_backlog_wait_time - jiffies);
-
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&audit_backlog_wait, &wait);
+ sleep_time = timeout_start + audit_backlog_wait_time -
+ jiffies;
+ if ((long)sleep_time > 0)
+ wait_for_auditd(sleep_time);
continue;
}
if (audit_rate_check() && printk_ratelimit())
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index e81175ef25f..642a89c4f3d 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -449,11 +449,26 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
return 0;
}
+static void audit_log_remove_rule(struct audit_krule *rule)
+{
+ struct audit_buffer *ab;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ if (unlikely(!ab))
+ return;
+ audit_log_format(ab, "op=");
+ audit_log_string(ab, "remove rule");
+ audit_log_format(ab, " dir=");
+ audit_log_untrustedstring(ab, rule->tree->pathname);
+ audit_log_key(ab, rule->filterkey);
+ audit_log_format(ab, " list=%d res=1", rule->listnr);
+ audit_log_end(ab);
+}
+
static void kill_rules(struct audit_tree *tree)
{
struct audit_krule *rule, *next;
struct audit_entry *entry;
- struct audit_buffer *ab;
list_for_each_entry_safe(rule, next, &tree->rules, rlist) {
entry = container_of(rule, struct audit_entry, rule);
@@ -461,14 +476,7 @@ static void kill_rules(struct audit_tree *tree)
list_del_init(&rule->rlist);
if (rule->tree) {
/* not a half-baked one */
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
- audit_log_format(ab, "op=");
- audit_log_string(ab, "remove rule");
- audit_log_format(ab, " dir=");
- audit_log_untrustedstring(ab, rule->tree->pathname);
- audit_log_key(ab, rule->filterkey);
- audit_log_format(ab, " list=%d res=1", rule->listnr);
- audit_log_end(ab);
+ audit_log_remove_rule(rule);
rule->tree = NULL;
list_del_rcu(&entry->list);
list_del(&entry->rule.list);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 4a599f699ad..22831c4d369 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -240,6 +240,8 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc
if (audit_enabled) {
struct audit_buffer *ab;
ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
+ if (unlikely(!ab))
+ return;
audit_log_format(ab, "auid=%u ses=%u op=",
from_kuid(&init_user_ns, audit_get_loginuid(current)),
audit_get_sessionid(current));
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 7f19f23d38a..f9fc54bbe06 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1144,7 +1144,6 @@ static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid,
* audit_receive_filter - apply all rules to the specified message type
* @type: audit message type
* @pid: target pid for netlink audit messages
- * @uid: target uid for netlink audit messages
* @seq: netlink audit message sequence (serial) number
* @data: payload data
* @datasz: size of payload data
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e37e6a12c5e..a371f857a0a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1464,14 +1464,14 @@ static void show_special(struct audit_context *context, int *call_panic)
audit_log_end(ab);
ab = audit_log_start(context, GFP_KERNEL,
AUDIT_IPC_SET_PERM);
+ if (unlikely(!ab))
+ return;
audit_log_format(ab,
"qbytes=%lx ouid=%u ogid=%u mode=%#ho",
context->ipc.qbytes,
context->ipc.perm_uid,
context->ipc.perm_gid,
context->ipc.perm_mode);
- if (!ab)
- return;
}
break; }
case AUDIT_MQ_OPEN: {
@@ -2675,7 +2675,7 @@ void __audit_mmap_fd(int fd, int flags)
context->type = AUDIT_MMAP;
}
-static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
+static void audit_log_task(struct audit_buffer *ab)
{
kuid_t auid, uid;
kgid_t gid;
@@ -2693,6 +2693,11 @@ static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
audit_log_task_context(ab);
audit_log_format(ab, " pid=%d comm=", current->pid);
audit_log_untrustedstring(ab, current->comm);
+}
+
+static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
+{
+ audit_log_task(ab);
audit_log_format(ab, " reason=");
audit_log_string(ab, reason);
audit_log_format(ab, " sig=%ld", signr);
@@ -2715,6 +2720,8 @@ void audit_core_dumps(long signr)
return;
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
+ if (unlikely(!ab))
+ return;
audit_log_abend(ab, "memory violation", signr);
audit_log_end(ab);
}
@@ -2723,8 +2730,11 @@ void __audit_seccomp(unsigned long syscall, long signr, int code)
{
struct audit_buffer *ab;
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
- audit_log_abend(ab, "seccomp", signr);
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP);
+ if (unlikely(!ab))
+ return;
+ audit_log_task(ab);
+ audit_log_format(ab, " sig=%ld", signr);
audit_log_format(ab, " syscall=%ld", syscall);
audit_log_format(ab, " compat=%d", is_compat_task());
audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current));
diff --git a/kernel/fork.c b/kernel/fork.c
index e05cff2429b..c535f33bbb9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1166,6 +1166,14 @@ static struct task_struct *copy_process(unsigned long clone_flags,
current->signal->flags & SIGNAL_UNKILLABLE)
return ERR_PTR(-EINVAL);
+ /*
+ * If the new process will be in a different pid namespace
+ * don't allow the creation of threads.
+ */
+ if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) &&
+ (task_active_pid_ns(current) != current->nsproxy->pid_ns))
+ return ERR_PTR(-EINVAL);
+
retval = security_task_create(clone_flags);
if (retval)
goto fork_out;
diff --git a/kernel/module.c b/kernel/module.c
index 250092c1d57..b10b048367e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3013,6 +3013,12 @@ static int do_init_module(struct module *mod)
{
int ret = 0;
+ /*
+ * We want to find out whether @mod uses async during init. Clear
+ * PF_USED_ASYNC. async_schedule*() will set it.
+ */
+ current->flags &= ~PF_USED_ASYNC;
+
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_COMING, mod);
@@ -3058,8 +3064,25 @@ static int do_init_module(struct module *mod)
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_LIVE, mod);
- /* We need to finish all async code before the module init sequence is done */
- async_synchronize_full();
+ /*
+ * We need to finish all async code before the module init sequence
+ * is done. This has potential to deadlock. For example, a newly
+ * detected block device can trigger request_module() of the
+ * default iosched from async probing task. Once userland helper
+ * reaches here, async_synchronize_full() will wait on the async
+ * task waiting on request_module() and deadlock.
+ *
+ * This deadlock is avoided by perfomring async_synchronize_full()
+ * iff module init queued any async jobs. This isn't a full
+ * solution as it will deadlock the same if module loading from
+ * async jobs nests more than once; however, due to the various
+ * constraints, this hack seems to be the best option for now.
+ * Please refer to the following thread for details.
+ *
+ * http://thread.gmane.org/gmane.linux.kernel/1420814
+ */
+ if (current->flags & PF_USED_ASYNC)
+ async_synchronize_full();
mutex_lock(&module_mutex);
/* Drop initial reference. */
diff --git a/kernel/pid.c b/kernel/pid.c
index 36aa02ff17d..de9af600006 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -270,7 +270,6 @@ void free_pid(struct pid *pid)
wake_up_process(ns->child_reaper);
break;
case 0:
- ns->nr_hashed = -1;
schedule_work(&ns->proc_work);
break;
}
@@ -319,7 +318,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
upid = pid->numbers + ns->level;
spin_lock_irq(&pidmap_lock);
- if (ns->nr_hashed < 0)
+ if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
goto out_unlock;
for ( ; upid >= pid->numbers; --upid) {
hlist_add_head_rcu(&upid->pid_chain,
@@ -342,6 +341,13 @@ out_free:
goto out;
}
+void disable_pid_allocation(struct pid_namespace *ns)
+{
+ spin_lock_irq(&pidmap_lock);
+ ns->nr_hashed &= ~PIDNS_HASH_ADDING;
+ spin_unlock_irq(&pidmap_lock);
+}
+
struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
{
struct hlist_node *elem;
@@ -573,6 +579,9 @@ void __init pidhash_init(void)
void __init pidmap_init(void)
{
+ /* Veryify no one has done anything silly */
+ BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
+
/* bump default and minimum pid_max based on number of cpus */
pid_max = min(pid_max_max, max_t(int, pid_max,
PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
@@ -584,7 +593,7 @@ void __init pidmap_init(void)
/* Reserve PID 0. We never call free_pidmap(0) */
set_bit(0, init_pid_ns.pidmap[0].page);
atomic_dec(&init_pid_ns.pidmap[0].nr_free);
- init_pid_ns.nr_hashed = 1;
+ init_pid_ns.nr_hashed = PIDNS_HASH_ADDING;
init_pid_ns.pid_cachep = KMEM_CACHE(pid,
SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index fdbd0cdf271..c1c3dc1c602 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
ns->level = level;
ns->parent = get_pid_ns(parent_pid_ns);
ns->user_ns = get_user_ns(user_ns);
+ ns->nr_hashed = PIDNS_HASH_ADDING;
INIT_WORK(&ns->proc_work, proc_cleanup_work);
set_bit(0, ns->pidmap[0].page);
@@ -181,6 +182,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
int rc;
struct task_struct *task, *me = current;
+ /* Don't allow any more processes into the pid namespace */
+ disable_pid_allocation(pid_ns);
+
/* Ignore SIGCHLD causing any terminated children to autoreap */
spin_lock_irq(&me->sighand->siglock);
me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
diff --git a/kernel/printk.c b/kernel/printk.c
index 19c0d7bcf24..357f714ddd4 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -870,10 +870,11 @@ static size_t print_time(u64 ts, char *buf)
if (!printk_time)
return 0;
+ rem_nsec = do_div(ts, 1000000000);
+
if (!buf)
- return 15;
+ return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts);
- rem_nsec = do_div(ts, 1000000000);
return sprintf(buf, "[%5lu.%06lu] ",
(unsigned long)ts, rem_nsec / 1000);
}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 1599157336a..612a5612685 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -139,7 +139,7 @@ void __ptrace_unlink(struct task_struct *child)
* RETURNS:
* 0 on success, -ESRCH if %child is not ready.
*/
-int ptrace_check_attach(struct task_struct *child, bool ignore_state)
+static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
{
int ret = -ESRCH;
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index 6850f53e02d..b3c6c3fcd84 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -116,6 +116,16 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
EXPORT_SYMBOL(down_read_nested);
+void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
+{
+ might_sleep();
+ rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
+
+ LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+}
+
+EXPORT_SYMBOL(_down_write_nest_lock);
+
void down_write_nested(struct rw_semaphore *sem, int subclass)
{
might_sleep();
diff --git a/kernel/signal.c b/kernel/signal.c
index 00b4a6d4449..53cd5c4d117 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2528,11 +2528,8 @@ static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset)
*/
void set_current_blocked(sigset_t *newset)
{
- struct task_struct *tsk = current;
sigdelsetmask(newset, sigmask(SIGKILL) | sigmask(SIGSTOP));
- spin_lock_irq(&tsk->sighand->siglock);
- __set_task_blocked(tsk, newset);
- spin_unlock_irq(&tsk->sighand->siglock);
+ __set_current_blocked(newset);
}
void __set_current_blocked(const sigset_t *newset)
@@ -3205,7 +3202,6 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
if (nset) {
if (copy_from_user(&new_set, nset, sizeof(*nset)))
return -EFAULT;
- new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP));
new_blocked = current->blocked;
@@ -3223,7 +3219,7 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
return -EINVAL;
}
- __set_current_blocked(&new_blocked);
+ set_current_blocked(&new_blocked);
}
if (oset) {
@@ -3287,6 +3283,7 @@ SYSCALL_DEFINE1(ssetmask, int, newmask)
int old = current->blocked.sig[0];
sigset_t newset;
+ siginitset(&newset, newmask);
set_current_blocked(&newset);
return old;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e5125677efa..3c13e46d7d2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2899,6 +2899,8 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
+ buf[cnt] = 0;
+
trace_set_options(buf);
*ppos += cnt;
@@ -3452,7 +3454,7 @@ static int tracing_wait_pipe(struct file *filp)
return -EINTR;
/*
- * We block until we read something and tracing is enabled.
+ * We block until we read something and tracing is disabled.
* We still block if tracing is disabled, but we have never
* read anything. This allows a user to cat this file, and
* then enable tracing. But after we have read something,
@@ -3460,7 +3462,7 @@ static int tracing_wait_pipe(struct file *filp)
*
* iter->pos will be 0 if we haven't read anything.
*/
- if (tracing_is_enabled() && iter->pos)
+ if (!tracing_is_enabled() && iter->pos)
break;
}
@@ -4815,10 +4817,17 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
return ret;
if (buffer) {
- if (val)
+ mutex_lock(&trace_types_lock);
+ if (val) {
ring_buffer_record_on(buffer);
- else
+ if (current_trace->start)
+ current_trace->start(tr);
+ } else {
ring_buffer_record_off(buffer);
+ if (current_trace->stop)
+ current_trace->stop(tr);
+ }
+ mutex_unlock(&trace_types_lock);
}
(*ppos)++;