From c6ecf7ed3131961e5aeedb0efd217afa0808798f Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 14 Oct 2005 15:59:03 -0700
Subject: [PATCH] Add missing export of getnstimeofday()

Adds the missing EXPORT_SYMBOL_GPL for getnstimeofday() when
CONFIG_TIME_INTERPOLATION isn't set.  Needed by drivers/char/mmtimer.c

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/time.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/time.c b/kernel/time.c
index dd5ae1162a8..40c2410ac99 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -570,6 +570,7 @@ void getnstimeofday(struct timespec *tv)
 	tv->tv_sec = x.tv_sec;
 	tv->tv_nsec = x.tv_usec * NSEC_PER_USEC;
 }
+EXPORT_SYMBOL_GPL(getnstimeofday);
 #endif
 
 #if (BITS_PER_LONG < 64)
-- 
cgit v1.2.3


From 2cc78eb52bc1ae89f0a4fa5a00eb998dffde4a9f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Mon, 17 Oct 2005 09:10:15 -0700
Subject: Increase default RCU batching sharply

Dipankar made RCU limit the batch size to improve latency, but that
approach is unworkable: it can cause the RCU queues to grow without
bounds, since the batch limiter ended up limiting the callbacks.

So make the limit much higher, and start planning on instead limiting
the batch size by doing RCU callbacks more often if the queue looks like
it might be growing too long.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/rcupdate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index bef3b6901b7..dd99415b155 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -71,7 +71,7 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
 
 /* Fake initialization required by compiler */
 static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
-static int maxbatch = 10;
+static int maxbatch = 10000;
 
 #ifndef __HAVE_ARCH_CMPXCHG
 /*
-- 
cgit v1.2.3


From 47d6b08334a43fafa61a587f721fa21ef65d81be Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Mon, 17 Oct 2005 18:49:42 +0400
Subject: [PATCH] posix-timers: fix task accounting

Make sure we release the task struct properly when releasing pending
timers.

release_task() does write_lock_irq(&tasklist_lock), so it can't race
with run_posix_cpu_timers() on any cpu.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/posix-cpu-timers.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index ad85d3f0dcc..7a51a5597c3 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -424,6 +424,7 @@ static void cleanup_timers(struct list_head *head,
 	cputime_t ptime = cputime_add(utime, stime);
 
 	list_for_each_entry_safe(timer, next, head, entry) {
+		put_task_struct(timer->task);
 		timer->task = NULL;
 		list_del_init(&timer->entry);
 		if (cputime_lt(timer->expires.cpu, ptime)) {
@@ -436,6 +437,7 @@ static void cleanup_timers(struct list_head *head,
 
 	++head;
 	list_for_each_entry_safe(timer, next, head, entry) {
+		put_task_struct(timer->task);
 		timer->task = NULL;
 		list_del_init(&timer->entry);
 		if (cputime_lt(timer->expires.cpu, utime)) {
@@ -448,6 +450,7 @@ static void cleanup_timers(struct list_head *head,
 
 	++head;
 	list_for_each_entry_safe(timer, next, head, entry) {
+		put_task_struct(timer->task);
 		timer->task = NULL;
 		list_del_init(&timer->entry);
 		if (timer->expires.sched < sched_time) {
-- 
cgit v1.2.3


From 5ee832dbc6770135ec8d63296af0a4374557bb79 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 17 Oct 2005 20:01:21 +0200
Subject: [PATCH] rcu: keep rcu callback event counter

This makes call_rcu() keep track of how many events there are on the RCU
list, and cause a reschedule event when the list gets too long.

This helps keep RCU event lists down.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/rcupdate.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'kernel')

diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index dd99415b155..2559d4b8f23 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -109,6 +109,10 @@ void fastcall call_rcu(struct rcu_head *head,
 	rdp = &__get_cpu_var(rcu_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
+
+	if (unlikely(++rdp->count > 10000))
+		set_need_resched();
+
 	local_irq_restore(flags);
 }
 
@@ -140,6 +144,12 @@ void fastcall call_rcu_bh(struct rcu_head *head,
 	rdp = &__get_cpu_var(rcu_bh_data);
 	*rdp->nxttail = head;
 	rdp->nxttail = &head->next;
+	rdp->count++;
+/*
+ *  Should we directly call rcu_do_batch() here ?
+ *  if (unlikely(rdp->count > 10000))
+ *      rcu_do_batch(rdp);
+ */
 	local_irq_restore(flags);
 }
 
@@ -157,6 +167,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
 		next = rdp->donelist = list->next;
 		list->func(list);
 		list = next;
+		rdp->count--;
 		if (++count >= maxbatch)
 			break;
 	}
-- 
cgit v1.2.3


From e03d13e985d48ac4885382c9e3b1510c78bd047f Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 19 Oct 2005 22:21:23 -0700
Subject: [PATCH] Fix cpu timers exit deadlock and races

Oleg Nesterov reported an SMP deadlock.  If there is a running timer
tracking a different process's CPU time clock when the process owning
the timer exits, we deadlock on tasklist_lock in posix_cpu_timer_del via
exit_itimers.

That code was using tasklist_lock to check for a race with __exit_signal
being called on the timer-target task and clearing its ->signal.
However, there is actually no such race.  __exit_signal will have called
posix_cpu_timers_exit and posix_cpu_timers_exit_group before it does
that.  Those will clear those k_itimer's association with the dying
task, so posix_cpu_timer_del will return early and never reach the code
in question.

In addition, posix_cpu_timer_del called from exit_itimers during execve
or directly from timer_delete in the process owning the timer can race
with an exiting timer-target task to cause a double put on timer-target
task struct.  Make sure we always access cpu_timers lists with sighand
lock held.

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Chris Wright <chrisw@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/posix-cpu-timers.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

(limited to 'kernel')

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 7a51a5597c3..b3f3edc475d 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -387,25 +387,19 @@ int posix_cpu_timer_del(struct k_itimer *timer)
 	if (unlikely(p == NULL))
 		return 0;
 
+	spin_lock(&p->sighand->siglock);
 	if (!list_empty(&timer->it.cpu.entry)) {
-		read_lock(&tasklist_lock);
-		if (unlikely(p->signal == NULL)) {
-			/*
-			 * We raced with the reaping of the task.
-			 * The deletion should have cleared us off the list.
-			 */
-			BUG_ON(!list_empty(&timer->it.cpu.entry));
-		} else {
-			/*
-			 * Take us off the task's timer list.
-			 */
-			spin_lock(&p->sighand->siglock);
-			list_del(&timer->it.cpu.entry);
-			spin_unlock(&p->sighand->siglock);
-		}
-		read_unlock(&tasklist_lock);
+		/*
+		 * Take us off the task's timer list.  We don't need to
+		 * take tasklist_lock and check for the task being reaped.
+		 * If it was reaped, it already called posix_cpu_timers_exit
+		 * and posix_cpu_timers_exit_group to clear all the timers
+		 * that pointed to it.
+		 */
+		list_del(&timer->it.cpu.entry);
+		put_task_struct(p);
 	}
-	put_task_struct(p);
+	spin_unlock(&p->sighand->siglock);
 
 	return 0;
 }
-- 
cgit v1.2.3


From d1209d049bbc3df66650f8417637be4f7b57b604 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 19 Oct 2005 21:23:51 -0700
Subject: [PATCH] Threads shouldn't inherit PF_NOFREEZE

The PF_NOFREEZE process flag should not be inherited when a thread is
forked.  This patch (as585) removes the flag from the child.

This problem is starting to show up more and more as drivers turn to the
kthread API instead of using kernel_thread().  As a result, their kernel
threads are now children of the kthread worker instead of modprobe, and
they inherit the PF_NOFREEZE flag.  This can cause problems during system
suspend; the kernel threads are not getting frozen as they ought to be.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 533ce27f4b2..280bd44ac44 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -848,7 +848,7 @@ static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
 	unsigned long new_flags = p->flags;
 
-	new_flags &= ~PF_SUPERPRIV;
+	new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE);
 	new_flags |= PF_FORKNOEXEC;
 	if (!(clone_flags & CLONE_PTRACE))
 		p->ptrace = 0;
-- 
cgit v1.2.3


From 9465bee863bc4c6cf1566c12d6f92a8133e3da5c Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Fri, 21 Oct 2005 15:36:00 -0700
Subject: Revert "Fix cpu timers exit deadlock and races"

Revert commit e03d13e985d48ac4885382c9e3b1510c78bd047f, to be replaced
by a much nicer fix from Roland.
---
 kernel/posix-cpu-timers.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

(limited to 'kernel')

diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index b3f3edc475d..7a51a5597c3 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -387,19 +387,25 @@ int posix_cpu_timer_del(struct k_itimer *timer)
 	if (unlikely(p == NULL))
 		return 0;
 
-	spin_lock(&p->sighand->siglock);
 	if (!list_empty(&timer->it.cpu.entry)) {
-		/*
-		 * Take us off the task's timer list.  We don't need to
-		 * take tasklist_lock and check for the task being reaped.
-		 * If it was reaped, it already called posix_cpu_timers_exit
-		 * and posix_cpu_timers_exit_group to clear all the timers
-		 * that pointed to it.
-		 */
-		list_del(&timer->it.cpu.entry);
-		put_task_struct(p);
+		read_lock(&tasklist_lock);
+		if (unlikely(p->signal == NULL)) {
+			/*
+			 * We raced with the reaping of the task.
+			 * The deletion should have cleared us off the list.
+			 */
+			BUG_ON(!list_empty(&timer->it.cpu.entry));
+		} else {
+			/*
+			 * Take us off the task's timer list.
+			 */
+			spin_lock(&p->sighand->siglock);
+			list_del(&timer->it.cpu.entry);
+			spin_unlock(&p->sighand->siglock);
+		}
+		read_unlock(&tasklist_lock);
 	}
-	spin_unlock(&p->sighand->siglock);
+	put_task_struct(p);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 25f407f0b668f5e4ebd5d13e1fb4306ba6427ead Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 21 Oct 2005 15:03:29 -0700
Subject: [PATCH] Call exit_itimers from do_exit, not __exit_signal

When I originally moved exit_itimers into __exit_signal, that was the only
place where we could reliably know it was the last thread in the group
dying, without races.  Since then we've gotten the signal_struct.live
counter, and do_exit can reliably do group-wide cleanup work.

This patch moves the call to do_exit, where it's made without locks.  This
avoids the deadlock issues that the old __exit_signal code's comment talks
about, and the one that Oleg found recently with process CPU timers.

[ This replaces e03d13e985d48ac4885382c9e3b1510c78bd047f, which is why
  it was just reverted. ]

Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/exit.c         |  1 +
 kernel/posix-timers.c |  2 +-
 kernel/signal.c       | 14 +-------------
 3 files changed, 3 insertions(+), 14 deletions(-)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index 43077732619..3b25b182d2b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -843,6 +843,7 @@ fastcall NORET_TYPE void do_exit(long code)
 	group_dead = atomic_dec_and_test(&tsk->signal->live);
 	if (group_dead) {
  		del_timer_sync(&tsk->signal->real_timer);
+		exit_itimers(tsk->signal);
 		acct_process(code);
 	}
 	exit_mm(tsk);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index b7b532acd9f..dda3cda73c7 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -1157,7 +1157,7 @@ retry_delete:
 }
 
 /*
- * This is called by __exit_signal, only when there are no more
+ * This is called by do_exit or de_thread, only when there are no more
  * references to the shared signal_struct.
  */
 void exit_itimers(struct signal_struct *sig)
diff --git a/kernel/signal.c b/kernel/signal.c
index 50c99264377..f2b96b08fb4 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -397,20 +397,8 @@ void __exit_signal(struct task_struct *tsk)
 	flush_sigqueue(&tsk->pending);
 	if (sig) {
 		/*
-		 * We are cleaning up the signal_struct here.  We delayed
-		 * calling exit_itimers until after flush_sigqueue, just in
-		 * case our thread-local pending queue contained a queued
-		 * timer signal that would have been cleared in
-		 * exit_itimers.  When that called sigqueue_free, it would
-		 * attempt to re-take the tasklist_lock and deadlock.  This
-		 * can never happen if we ensure that all queues the
-		 * timer's signal might be queued on have been flushed
-		 * first.  The shared_pending queue, and our own pending
-		 * queue are the only queues the timer could be on, since
-		 * there are no other threads left in the group and timer
-		 * signals are constrained to threads inside the group.
+		 * We are cleaning up the signal_struct here.
 		 */
-		exit_itimers(sig);
 		exit_thread_group_keys(sig);
 		kmem_cache_free(signal_cachep, sig);
 	}
-- 
cgit v1.2.3