Patches contributed by Eötvös Lorand University

<<Prev 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349[350]351 352 353 Next>>

commit 50dd26ba0947aa653f0e42897aad7a4adce4e620
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Jan 8 01:01:42 2006 -0800

    [PATCH] DEBUG_SLAB depends on SLAB
    
    Make DEBUG_SLAB depend on SLAB.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Cc: Matt Mackall <mpm@selenic.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 80598cfd728c..c48260fb8fd9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -79,7 +79,7 @@ config SCHEDSTATS
 
 config DEBUG_SLAB
 	bool "Debug memory allocations"
-	depends on DEBUG_KERNEL
+	depends on DEBUG_KERNEL && SLAB
 	help
 	  Say Y here to have the kernel do limited verification on memory
 	  allocation as well as poisoning memory on free to catch use of freed

commit e56d090310d7625ecb43a1eeebd479f04affb48b
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Jan 8 01:01:37 2006 -0800

    [PATCH] RCU signal handling
    
    RCU tasklist_lock and RCU signal handling: send signals RCU-read-locked
    instead of tasklist_lock read-locked.  This is a scalability improvement on
    SMP and a preemption-latency improvement under PREEMPT_RCU.
    
    Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Acked-by: William Irwin <wli@holomorphy.com>
    Cc: Roland McGrath <roland@redhat.com>
    Cc: Oleg Nesterov <oleg@tv-sign.ru>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/fs/exec.c b/fs/exec.c
index e75a9548da8e..e9650cd22a3b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -760,7 +760,7 @@ static inline int de_thread(struct task_struct *tsk)
 		spin_lock(&oldsighand->siglock);
 		spin_lock(&newsighand->siglock);
 
-		current->sighand = newsighand;
+		rcu_assign_pointer(current->sighand, newsighand);
 		recalc_sigpending();
 
 		spin_unlock(&newsighand->siglock);
@@ -768,7 +768,7 @@ static inline int de_thread(struct task_struct *tsk)
 		write_unlock_irq(&tasklist_lock);
 
 		if (atomic_dec_and_test(&oldsighand->count))
-			kmem_cache_free(sighand_cachep, oldsighand);
+			sighand_free(oldsighand);
 	}
 
 	BUG_ON(!thread_group_leader(current));
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a74662077d60..a6af77e9b4cf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -34,6 +34,7 @@
 #include <linux/percpu.h>
 #include <linux/topology.h>
 #include <linux/seccomp.h>
+#include <linux/rcupdate.h>
 
 #include <linux/auxvec.h>	/* For AT_VECTOR_SIZE */
 
@@ -350,8 +351,16 @@ struct sighand_struct {
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
 	spinlock_t		siglock;
+	struct rcu_head		rcu;
 };
 
+extern void sighand_free_cb(struct rcu_head *rhp);
+
+static inline void sighand_free(struct sighand_struct *sp)
+{
+	call_rcu(&sp->rcu, sighand_free_cb);
+}
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -844,6 +853,7 @@ struct task_struct {
 	int cpuset_mems_generation;
 #endif
 	atomic_t fs_excl;	/* holding fs exclusive resources */
+	struct rcu_head rcu;
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
@@ -867,8 +877,26 @@ static inline int pid_alive(struct task_struct *p)
 extern void free_task(struct task_struct *tsk);
 extern void __put_task_struct(struct task_struct *tsk);
 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
-#define put_task_struct(tsk) \
-do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
+
+static inline int get_task_struct_rcu(struct task_struct *t)
+{
+	int oldusage;
+
+	do {
+		oldusage = atomic_read(&t->usage);
+		if (oldusage == 0)
+			return 0;
+	} while (cmpxchg(&t->usage.counter, oldusage, oldusage+1) != oldusage);
+	return 1;
+}
+
+extern void __put_task_struct_cb(struct rcu_head *rhp);
+
+static inline void put_task_struct(struct task_struct *t)
+{
+	if (atomic_dec_and_test(&t->usage))
+		call_rcu(&t->rcu, __put_task_struct_cb);
+}
 
 /*
  * Per process flags
diff --git a/kernel/exit.c b/kernel/exit.c
index ee515683b92d..c73a7eb26de3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -72,7 +72,6 @@ void release_task(struct task_struct * p)
 		__ptrace_unlink(p);
 	BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
 	__exit_signal(p);
-	__exit_sighand(p);
 	/*
 	 * Note that the fastpath in sys_times depends on __exit_signal having
 	 * updated the counters before a task is removed from the tasklist of
diff --git a/kernel/fork.c b/kernel/fork.c
index fb8572a42297..7fe3adfa65cb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -743,6 +743,14 @@ int unshare_files(void)
 
 EXPORT_SYMBOL(unshare_files);
 
+void sighand_free_cb(struct rcu_head *rhp)
+{
+	struct sighand_struct *sp;
+
+	sp = container_of(rhp, struct sighand_struct, rcu);
+	kmem_cache_free(sighand_cachep, sp);
+}
+
 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 {
 	struct sighand_struct *sig;
@@ -752,7 +760,7 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
 		return 0;
 	}
 	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
-	tsk->sighand = sig;
+	rcu_assign_pointer(tsk->sighand, sig);
 	if (!sig)
 		return -ENOMEM;
 	spin_lock_init(&sig->siglock);
diff --git a/kernel/pid.c b/kernel/pid.c
index edba31c681ac..1acc07246991 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -136,7 +136,7 @@ struct pid * fastcall find_pid(enum pid_type type, int nr)
 	struct hlist_node *elem;
 	struct pid *pid;
 
-	hlist_for_each_entry(pid, elem,
+	hlist_for_each_entry_rcu(pid, elem,
 			&pid_hash[type][pid_hashfn(nr)], pid_chain) {
 		if (pid->nr == nr)
 			return pid;
@@ -150,15 +150,15 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
 
 	task_pid = &task->pids[type];
 	pid = find_pid(type, nr);
+	task_pid->nr = nr;
 	if (pid == NULL) {
-		hlist_add_head(&task_pid->pid_chain,
-				&pid_hash[type][pid_hashfn(nr)]);
 		INIT_LIST_HEAD(&task_pid->pid_list);
+		hlist_add_head_rcu(&task_pid->pid_chain,
+				   &pid_hash[type][pid_hashfn(nr)]);
 	} else {
 		INIT_HLIST_NODE(&task_pid->pid_chain);
-		list_add_tail(&task_pid->pid_list, &pid->pid_list);
+		list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list);
 	}
-	task_pid->nr = nr;
 
 	return 0;
 }
@@ -170,20 +170,20 @@ static fastcall int __detach_pid(task_t *task, enum pid_type type)
 
 	pid = &task->pids[type];
 	if (!hlist_unhashed(&pid->pid_chain)) {
-		hlist_del(&pid->pid_chain);
 
-		if (list_empty(&pid->pid_list))
+		if (list_empty(&pid->pid_list)) {
 			nr = pid->nr;
-		else {
+			hlist_del_rcu(&pid->pid_chain);
+		} else {
 			pid_next = list_entry(pid->pid_list.next,
 						struct pid, pid_list);
 			/* insert next pid from pid_list to hash */
-			hlist_add_head(&pid_next->pid_chain,
-				&pid_hash[type][pid_hashfn(pid_next->nr)]);
+			hlist_replace_rcu(&pid->pid_chain,
+					  &pid_next->pid_chain);
 		}
 	}
 
-	list_del(&pid->pid_list);
+	list_del_rcu(&pid->pid_list);
 	pid->nr = 0;
 
 	return nr;
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c9afc61240e4..0a669bd2f6d1 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/smp.h>
+#include <linux/rcupdate.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <asm/atomic.h>
diff --git a/kernel/sched.c b/kernel/sched.c
index 6f46c94cc29e..92733091154c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -176,6 +176,13 @@ static unsigned int task_timeslice(task_t *p)
 #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran)	\
 				< (long long) (sd)->cache_hot_time)
 
+void __put_task_struct_cb(struct rcu_head *rhp)
+{
+	__put_task_struct(container_of(rhp, struct task_struct, rcu));
+}
+
+EXPORT_SYMBOL_GPL(__put_task_struct_cb);
+
 /*
  * These are the runqueue data structures:
  */
diff --git a/kernel/signal.c b/kernel/signal.c
index d7611f189ef7..64737c72dadd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -329,13 +329,20 @@ void __exit_sighand(struct task_struct *tsk)
 	/* Ok, we're done with the signal handlers */
 	tsk->sighand = NULL;
 	if (atomic_dec_and_test(&sighand->count))
-		kmem_cache_free(sighand_cachep, sighand);
+		sighand_free(sighand);
 }
 
 void exit_sighand(struct task_struct *tsk)
 {
 	write_lock_irq(&tasklist_lock);
-	__exit_sighand(tsk);
+	rcu_read_lock();
+	if (tsk->sighand != NULL) {
+		struct sighand_struct *sighand = rcu_dereference(tsk->sighand);
+		spin_lock(&sighand->siglock);
+		__exit_sighand(tsk);
+		spin_unlock(&sighand->siglock);
+	}
+	rcu_read_unlock();
 	write_unlock_irq(&tasklist_lock);
 }
 
@@ -345,12 +352,14 @@ void exit_sighand(struct task_struct *tsk)
 void __exit_signal(struct task_struct *tsk)
 {
 	struct signal_struct * sig = tsk->signal;
-	struct sighand_struct * sighand = tsk->sighand;
+	struct sighand_struct * sighand;
 
 	if (!sig)
 		BUG();
 	if (!atomic_read(&sig->count))
 		BUG();
+	rcu_read_lock();
+	sighand = rcu_dereference(tsk->sighand);
 	spin_lock(&sighand->siglock);
 	posix_cpu_timers_exit(tsk);
 	if (atomic_dec_and_test(&sig->count)) {
@@ -358,6 +367,7 @@ void __exit_signal(struct task_struct *tsk)
 		if (tsk == sig->curr_target)
 			sig->curr_target = next_thread(tsk);
 		tsk->signal = NULL;
+		__exit_sighand(tsk);
 		spin_unlock(&sighand->siglock);
 		flush_sigqueue(&sig->shared_pending);
 	} else {
@@ -389,9 +399,11 @@ void __exit_signal(struct task_struct *tsk)
 		sig->nvcsw += tsk->nvcsw;
 		sig->nivcsw += tsk->nivcsw;
 		sig->sched_time += tsk->sched_time;
+		__exit_sighand(tsk);
 		spin_unlock(&sighand->siglock);
 		sig = NULL;	/* Marker for below.  */
 	}
+	rcu_read_unlock();
 	clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
 	flush_sigqueue(&tsk->pending);
 	if (sig) {
@@ -1080,18 +1092,28 @@ void zap_other_threads(struct task_struct *p)
 }
 
 /*
- * Must be called with the tasklist_lock held for reading!
+ * Must be called under rcu_read_lock() or with tasklist_lock read-held.
  */
 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
 	unsigned long flags;
+	struct sighand_struct *sp;
 	int ret;
 
+retry:
 	ret = check_kill_permission(sig, info, p);
-	if (!ret && sig && p->sighand) {
-		spin_lock_irqsave(&p->sighand->siglock, flags);
+	if (!ret && sig && (sp = p->sighand)) {
+		if (!get_task_struct_rcu(p))
+			return -ESRCH;
+		spin_lock_irqsave(&sp->siglock, flags);
+		if (p->sighand != sp) {
+			spin_unlock_irqrestore(&sp->siglock, flags);
+			put_task_struct(p);
+			goto retry;
+		}
 		ret = __group_send_sig_info(sig, info, p);
-		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+		spin_unlock_irqrestore(&sp->siglock, flags);
+		put_task_struct(p);
 	}
 
 	return ret;
@@ -1136,14 +1158,21 @@ int
 kill_proc_info(int sig, struct siginfo *info, pid_t pid)
 {
 	int error;
+	int acquired_tasklist_lock = 0;
 	struct task_struct *p;
 
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
+	if (unlikely(sig_kernel_stop(sig) || sig == SIGCONT)) {
+		read_lock(&tasklist_lock);
+		acquired_tasklist_lock = 1;
+	}
 	p = find_task_by_pid(pid);
 	error = -ESRCH;
 	if (p)
 		error = group_send_sig_info(sig, info, p);
-	read_unlock(&tasklist_lock);
+	if (unlikely(acquired_tasklist_lock))
+		read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	return error;
 }
 
@@ -1355,16 +1384,54 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
 {
 	unsigned long flags;
 	int ret = 0;
+	struct sighand_struct *sh;
 
 	BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
-	read_lock(&tasklist_lock);
+
+	/*
+	 * The rcu based delayed sighand destroy makes it possible to
+	 * run this without tasklist lock held. The task struct itself
+	 * cannot go away as create_timer did get_task_struct().
+	 *
+	 * We return -1, when the task is marked exiting, so
+	 * posix_timer_event can redirect it to the group leader
+	 */
+	rcu_read_lock();
 
 	if (unlikely(p->flags & PF_EXITING)) {
 		ret = -1;
 		goto out_err;
 	}
 
-	spin_lock_irqsave(&p->sighand->siglock, flags);
+retry:
+	sh = rcu_dereference(p->sighand);
+
+	spin_lock_irqsave(&sh->siglock, flags);
+	if (p->sighand != sh) {
+		/* We raced with exec() in a multithreaded process... */
+		spin_unlock_irqrestore(&sh->siglock, flags);
+		goto retry;
+	}
+
+	/*
+	 * We do the check here again to handle the following scenario:
+	 *
+	 * CPU 0		CPU 1
+	 * send_sigqueue
+	 * check PF_EXITING
+	 * interrupt		exit code running
+	 *			__exit_signal
+	 *			lock sighand->siglock
+	 *			unlock sighand->siglock
+	 * lock sh->siglock
+	 * add(tsk->pending) 	flush_sigqueue(tsk->pending)
+	 *
+	 */
+
+	if (unlikely(p->flags & PF_EXITING)) {
+		ret = -1;
+		goto out;
+	}
 
 	if (unlikely(!list_empty(&q->list))) {
 		/*
@@ -1388,9 +1455,9 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
 		signal_wake_up(p, sig == SIGKILL);
 
 out:
-	spin_unlock_irqrestore(&p->sighand->siglock, flags);
+	spin_unlock_irqrestore(&sh->siglock, flags);
 out_err:
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 
 	return ret;
 }
@@ -1402,7 +1469,9 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
 	int ret = 0;
 
 	BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
+
 	read_lock(&tasklist_lock);
+	/* Since it_lock is held, p->sighand cannot be NULL. */
 	spin_lock_irqsave(&p->sighand->siglock, flags);
 	handle_stop_signal(sig, p);
 
@@ -1436,7 +1505,7 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
 out:
 	spin_unlock_irqrestore(&p->sighand->siglock, flags);
 	read_unlock(&tasklist_lock);
-	return(ret);
+	return ret;
 }
 
 /*

commit bb44f116a14c4c932f15c79acfafd46bcb43ca9a
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 20 11:54:17 2005 +0100

    [PATCH] fix spinlock-debugging smp_processor_id() usage
    
    When a spinlock debugging check hits, we print the CPU number as an
    informational thing - but there is no guarantee that preemption is off
    at that point - hence we should use raw_smp_processor_id().  Otherwise
    DEBUG_PREEMPT will print a warning.
    
    With this fix the warning goes away and only the spinlock-debugging info
    is printed.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index 906ad101eab3..dcd4be9bd4e5 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -20,7 +20,8 @@ static void spin_bug(spinlock_t *lock, const char *msg)
 		if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
 			owner = lock->owner;
 		printk("BUG: spinlock %s on CPU#%d, %s/%d\n",
-			msg, smp_processor_id(), current->comm, current->pid);
+			msg, raw_smp_processor_id(),
+			current->comm, current->pid);
 		printk(" lock: %p, .magic: %08x, .owner: %s/%d, .owner_cpu: %d\n",
 			lock, lock->magic,
 			owner ? owner->comm : "<none>",
@@ -78,8 +79,8 @@ static void __spin_lock_debug(spinlock_t *lock)
 		if (print_once) {
 			print_once = 0;
 			printk("BUG: spinlock lockup on CPU#%d, %s/%d, %p\n",
-				smp_processor_id(), current->comm, current->pid,
-					lock);
+				raw_smp_processor_id(), current->comm,
+				current->pid, lock);
 			dump_stack();
 		}
 	}
@@ -120,7 +121,8 @@ static void rwlock_bug(rwlock_t *lock, const char *msg)
 
 	if (xchg(&print_once, 0)) {
 		printk("BUG: rwlock %s on CPU#%d, %s/%d, %p\n", msg,
-			smp_processor_id(), current->comm, current->pid, lock);
+			raw_smp_processor_id(), current->comm,
+			current->pid, lock);
 		dump_stack();
 #ifdef CONFIG_SMP
 		/*
@@ -148,8 +150,8 @@ static void __read_lock_debug(rwlock_t *lock)
 		if (print_once) {
 			print_once = 0;
 			printk("BUG: read-lock lockup on CPU#%d, %s/%d, %p\n",
-				smp_processor_id(), current->comm, current->pid,
-					lock);
+				raw_smp_processor_id(), current->comm,
+				current->pid, lock);
 			dump_stack();
 		}
 	}
@@ -220,8 +222,8 @@ static void __write_lock_debug(rwlock_t *lock)
 		if (print_once) {
 			print_once = 0;
 			printk("BUG: write-lock lockup on CPU#%d, %s/%d, %p\n",
-				smp_processor_id(), current->comm, current->pid,
-					lock);
+				raw_smp_processor_id(), current->comm,
+				current->pid, lock);
 			dump_stack();
 		}
 	}

commit b88cb42428f14fabdaf947150c00d65891820635
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Dec 12 00:37:11 2005 -0800

    [PATCH] add hlist_replace_rcu()
    
    Add list_replace_rcu: replace old entry by new one.
    
    Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/list.h b/include/linux/list.h
index fbfca73355a3..8e3388284530 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -202,12 +202,15 @@ static inline void list_del_rcu(struct list_head *entry)
  *
  * The old entry will be replaced with the new entry atomically.
  */
-static inline void list_replace_rcu(struct list_head *old, struct list_head *new){
+static inline void list_replace_rcu(struct list_head *old,
+				struct list_head *new)
+{
 	new->next = old->next;
 	new->prev = old->prev;
 	smp_wmb();
 	new->next->prev = new;
 	new->prev->next = new;
+	old->prev = LIST_POISON2;
 }
 
 /**
@@ -578,6 +581,27 @@ static inline void hlist_del_init(struct hlist_node *n)
 	}
 }
 
+/*
+ * hlist_replace_rcu - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ *
+ * The old entry will be replaced with the new entry atomically.
+ */
+static inline void hlist_replace_rcu(struct hlist_node *old,
+					struct hlist_node *new)
+{
+	struct hlist_node *next = old->next;
+
+	new->next = next;
+	new->pprev = old->pprev;
+	smp_wmb();
+	if (next)
+		new->next->pprev = &new->next;
+	*new->pprev = new;
+	old->pprev = LIST_POISON2;
+}
+
 static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
 {
 	struct hlist_node *first = h->first;

commit dbdf65b1b7f8ec48bda1604cfea7ac09ce583d6b
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Nov 13 16:07:22 2005 -0800

    [PATCH] rcutorture: renice to low priority
    
    Make the box usable for interactive work when running the RCU torture test,
    by renicing the RCU torture-test threads to +19 by default.  Kthreads run
    at nice -5 by default.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Acked-by: Paul E. McKenney" <paulmck@us.ibm.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 9b58f1eff3ca..eb6719c50b4e 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -195,6 +195,8 @@ rcu_torture_writer(void *arg)
 	static DEFINE_RCU_RANDOM(rand);
 
 	VERBOSE_PRINTK_STRING("rcu_torture_writer task started");
+	set_user_nice(current, 19);
+
 	do {
 		schedule_timeout_uninterruptible(1);
 		if (rcu_batches_completed() == oldbatch)
@@ -238,6 +240,8 @@ rcu_torture_reader(void *arg)
 	int pipe_count;
 
 	VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
+	set_user_nice(current, 19);
+
 	do {
 		rcu_read_lock();
 		completed = rcu_batches_completed();

commit 28ef35845f2c8da8e1bed068277d2fab1e8c8979
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Nov 7 00:59:29 2005 -0800

    [PATCH] small kernel_stat.h cleanup
    
    cleanup: use for_each_cpu() instead of an open-coded NR_CPUS loop.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index dba27749b428..a484572c302e 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -6,6 +6,7 @@
 #include <linux/smp.h>
 #include <linux/threads.h>
 #include <linux/percpu.h>
+#include <linux/cpumask.h>
 #include <asm/cputime.h>
 
 /*
@@ -43,11 +44,10 @@ extern unsigned long long nr_context_switches(void);
  */
 static inline int kstat_irqs(int irq)
 {
-	int i, sum=0;
+	int cpu, sum = 0;
 
-	for (i = 0; i < NR_CPUS; i++)
-		if (cpu_possible(i))
-			sum += kstat_cpu(i).irqs[irq];
+	for_each_cpu(cpu)
+		sum += kstat_cpu(cpu).irqs[irq];
 
 	return sum;
 }

commit 6d6f9156209892bc0cdc3354394ac947db5e32f1
Author: Karoly Lorentey <lorentey@elte.hu>
Date:   Tue Oct 25 11:50:25 2005 +0200

    [ALSA] intel8x0 - Add ac97_quirk for Dell Inspiron 8600
    
    Modules: Intel8x0 driver
    
    Add AC97_TUNE_HP_ONLY quirk for Dell Inspiron 8600.
    
    Signed-off-by: Karoly Lorentey <lorentey@elte.hu>
    Signed-off-by: Takashi Iwai <tiwai@suse.de>

diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index c2f050215321..8eb966505d2d 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -1765,6 +1765,12 @@ static struct ac97_quirk ac97_quirks[] __devinitdata = {
 		.name = "Dell Unknown",	/* STAC9750/51 */
 		.type = AC97_TUNE_HP_ONLY
 	},
+	{
+		.subvendor = 0x1028,
+		.subdevice = 0x0191,
+		.name = "Dell Inspiron 8600",
+		.type = AC97_TUNE_HP_ONLY
+	},
 	{
 		.subvendor = 0x103c,
 		.subdevice = 0x006d,

commit bda98685b855f71f7e2fc5378aa3cdfb24a9db65
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Oct 30 14:59:44 2005 -0800

    [PATCH] x86: inline spin_unlock if !CONFIG_DEBUG_SPINLOCK and !CONFIG_PREEMPT
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index cdc99a27840d..0e9682c9def5 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -171,23 +171,42 @@ extern int __lockfunc generic__raw_read_trylock(raw_rwlock_t *lock);
 #define write_lock_irq(lock)		_write_lock_irq(lock)
 #define write_lock_bh(lock)		_write_lock_bh(lock)
 
-#define spin_unlock(lock)		_spin_unlock(lock)
-#define write_unlock(lock)		_write_unlock(lock)
-#define read_unlock(lock)		_read_unlock(lock)
+/*
+ * We inline the unlock functions in the nondebug case:
+ */
+#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP)
+# define spin_unlock(lock)		_spin_unlock(lock)
+# define read_unlock(lock)		_read_unlock(lock)
+# define write_unlock(lock)		_write_unlock(lock)
+#else
+# define spin_unlock(lock)		__raw_spin_unlock(&(lock)->raw_lock)
+# define read_unlock(lock)		__raw_read_unlock(&(lock)->raw_lock)
+# define write_unlock(lock)		__raw_write_unlock(&(lock)->raw_lock)
+#endif
+
+#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP)
+# define spin_unlock_irq(lock)		_spin_unlock_irq(lock)
+# define read_unlock_irq(lock)		_read_unlock_irq(lock)
+# define write_unlock_irq(lock)		_write_unlock_irq(lock)
+#else
+# define spin_unlock_irq(lock) \
+    do { __raw_spin_unlock(&(lock)->raw_lock); local_irq_enable(); } while (0)
+# define read_unlock_irq(lock) \
+    do { __raw_read_unlock(&(lock)->raw_lock); local_irq_enable(); } while (0)
+# define write_unlock_irq(lock) \
+    do { __raw_write_unlock(&(lock)->raw_lock); local_irq_enable(); } while (0)
+#endif
 
 #define spin_unlock_irqrestore(lock, flags) \
 					_spin_unlock_irqrestore(lock, flags)
-#define spin_unlock_irq(lock)		_spin_unlock_irq(lock)
 #define spin_unlock_bh(lock)		_spin_unlock_bh(lock)
 
 #define read_unlock_irqrestore(lock, flags) \
 					_read_unlock_irqrestore(lock, flags)
-#define read_unlock_irq(lock)		_read_unlock_irq(lock)
 #define read_unlock_bh(lock)		_read_unlock_bh(lock)
 
 #define write_unlock_irqrestore(lock, flags) \
 					_write_unlock_irqrestore(lock, flags)
-#define write_unlock_irq(lock)		_write_unlock_irq(lock)
 #define write_unlock_bh(lock)		_write_unlock_bh(lock)
 
 #define spin_trylock_bh(lock)		__cond_lock(_spin_trylock_bh(lock))

commit da04c035039b5288039a5bf2d340866114ae994b
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Sep 13 11:17:59 2005 +0200

    [PATCH] Fix spinlock owner debugging
    
    fix up the runqueue lock owner only if we truly did a context-switch
    with the runqueue lock held. Impacts ia64, mips, sparc64 and arm.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/sched.c b/kernel/sched.c
index 81b3a96ed2d0..1f31a528fdba 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -294,6 +294,10 @@ static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
 
 static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
 {
+#ifdef CONFIG_DEBUG_SPINLOCK
+	/* this is a valid case when another task releases the spinlock */
+	rq->lock.owner = current;
+#endif
 	spin_unlock_irq(&rq->lock);
 }
 
@@ -1529,10 +1533,6 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
 	 *		Manfred Spraul <manfred@colorfullife.com>
 	 */
 	prev_task_flags = prev->flags;
-#ifdef CONFIG_DEBUG_SPINLOCK
-	/* this is a valid case when another task releases the spinlock */
-	rq->lock.owner = current;
-#endif
 	finish_arch_switch(prev);
 	finish_lock_switch(rq, prev);
 	if (mm)

commit 67f9a619e7460b7d07284a9d0745727a77d3ade6
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Sep 10 00:26:16 2005 -0700

    [PATCH] sched: fix SMT scheduler latency bug
    
    William Weston reported unusually high scheduling latencies on his x86 HT
    box, on the -RT kernel.  I managed to reproduce it on my HT box and the
    latency tracer shows the incident in action:
    
                     _------=> CPU#
                    / _-----=> irqs-off
                   | / _----=> need-resched
                   || / _---=> hardirq/softirq
                   ||| / _--=> preempt-depth
                   |||| /
                   |||||     delay
       cmd     pid ||||| time  |   caller
          \   /    |||||   \   |   /
          du-2803  3Dnh2    0us : __trace_start_sched_wakeup (try_to_wake_up)
            ..............................................................
            ... we are running on CPU#3, PID 2778 gets woken to CPU#1: ...
            ..............................................................
          du-2803  3Dnh2    0us : __trace_start_sched_wakeup <<...>-2778> (73 1)
          du-2803  3Dnh2    0us : _raw_spin_unlock (try_to_wake_up)
            ................................................
            ... still on CPU#3, we send an IPI to CPU#1: ...
            ................................................
          du-2803  3Dnh1    0us : resched_task (try_to_wake_up)
          du-2803  3Dnh1    1us : smp_send_reschedule (try_to_wake_up)
          du-2803  3Dnh1    1us : send_IPI_mask_bitmask (smp_send_reschedule)
          du-2803  3Dnh1    2us : _raw_spin_unlock_irqrestore (try_to_wake_up)
            ...............................................
            ... 1 usec later, the IPI arrives on CPU#1: ...
            ...............................................
      <idle>-0     1Dnh.    2us : smp_reschedule_interrupt (c0100c5a 0 0)
    
    So far so good, this is the normal wakeup/preemption mechanism.  But here
    comes the scheduler anomaly on CPU#1:
    
      <idle>-0     1Dnh.    2us : preempt_schedule_irq (need_resched)
      <idle>-0     1Dnh.    2us : preempt_schedule_irq (need_resched)
      <idle>-0     1Dnh.    3us : __schedule (preempt_schedule_irq)
      <idle>-0     1Dnh.    3us : profile_hit (__schedule)
      <idle>-0     1Dnh1    3us : sched_clock (__schedule)
      <idle>-0     1Dnh1    4us : _raw_spin_lock_irq (__schedule)
      <idle>-0     1Dnh1    4us : _raw_spin_lock_irqsave (__schedule)
      <idle>-0     1Dnh2    5us : _raw_spin_unlock (__schedule)
      <idle>-0     1Dnh1    5us : preempt_schedule (__schedule)
      <idle>-0     1Dnh1    6us : _raw_spin_lock (__schedule)
      <idle>-0     1Dnh2    6us : find_next_bit (__schedule)
      <idle>-0     1Dnh2    6us : _raw_spin_lock (__schedule)
      <idle>-0     1Dnh3    7us : find_next_bit (__schedule)
      <idle>-0     1Dnh3    7us : find_next_bit (__schedule)
      <idle>-0     1Dnh3    8us : _raw_spin_unlock (__schedule)
      <idle>-0     1Dnh2    8us : preempt_schedule (__schedule)
      <idle>-0     1Dnh2    8us : find_next_bit (__schedule)
      <idle>-0     1Dnh2    9us : trace_stop_sched_switched (__schedule)
      <idle>-0     1Dnh2    9us : _raw_spin_lock (trace_stop_sched_switched)
      <idle>-0     1Dnh3   10us : trace_stop_sched_switched <<...>-2778> (73 8c)
      <idle>-0     1Dnh3   10us : _raw_spin_unlock (trace_stop_sched_switched)
      <idle>-0     1Dnh1   10us : _raw_spin_unlock (__schedule)
      <idle>-0     1Dnh.   11us : local_irq_enable_noresched (preempt_schedule_irq)
      <idle>-0     1Dnh.   11us < (0)
    
    we didnt pick up pid 2778! It only gets scheduled much later:
    
       <...>-2778  1Dnh2  412us : __switch_to (__schedule)
       <...>-2778  1Dnh2  413us : __schedule <<idle>-0> (8c 73)
       <...>-2778  1Dnh2  413us : _raw_spin_unlock (__schedule)
       <...>-2778  1Dnh1  413us : trace_stop_sched_switched (__schedule)
       <...>-2778  1Dnh1  414us : _raw_spin_lock (trace_stop_sched_switched)
       <...>-2778  1Dnh2  414us : trace_stop_sched_switched <<...>-2778> (73 1)
       <...>-2778  1Dnh2  414us : _raw_spin_unlock (trace_stop_sched_switched)
       <...>-2778  1Dnh1  415us : trace_stop_sched_switched (__schedule)
    
    the reason for this anomaly is the following code in dependent_sleeper():
    
                    /*
                     * If a user task with lower static priority than the
                     * running task on the SMT sibling is trying to schedule,
                     * delay it till there is proportionately less timeslice
                     * left of the sibling task to prevent a lower priority
                     * task from using an unfair proportion of the
                     * physical cpu's resources. -ck
                     */
    [...]
                            if (((smt_curr->time_slice * (100 - sd->per_cpu_gain) /
                                    100) > task_timeslice(p)))
                                            ret = 1;
    
    Note that in contrast to the comment above, we dont actually do the check
    based on static priority, we do the check based on timeslices.  But
    timeslices go up and down, and even highprio tasks can randomly have very
    low timeslices (just before their next refill) and can thus be judged as
    'lowprio' by the above piece of code.  This condition is clearly buggy.
    The correct test is to check for static_prio _and_ to check for the
    preemption priority.  Even on different static priority levels, a
    higher-prio interactive task should not be delayed due to a
    higher-static-prio CPU hog.
    
    There is a symmetric bug in the 'kick SMT sibling' code of this function as
    well, which can be solved in a similar way.
    
    The patch below (against the current scheduler queue in -mm) fixes both
    bugs.  I have build and boot-tested this on x86 SMT, and nice +20 tasks
    still get properly throttled - so the dependent-sleeper logic is still in
    action.
    
    btw., these bugs pessimised the SMT scheduler because the 'delay wakeup'
    property was applied too liberally, so this fix is likely a throughput
    improvement as well.
    
    I separated out a smt_slice() function to make the code easier to read.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/sched.c b/kernel/sched.c
index 6da13bba3e23..c61ee3451a04 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2650,6 +2650,16 @@ static inline void wake_sleeping_dependent(int this_cpu, runqueue_t *this_rq)
 	 */
 }
 
+/*
+ * number of 'lost' timeslices this task wont be able to fully
+ * utilize, if another task runs on a sibling. This models the
+ * slowdown effect of other tasks running on siblings:
+ */
+static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
+{
+	return p->time_slice * (100 - sd->per_cpu_gain) / 100;
+}
+
 static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
 {
 	struct sched_domain *tmp, *sd = NULL;
@@ -2714,8 +2724,9 @@ static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
 				(sd->per_cpu_gain * DEF_TIMESLICE / 100))
 					ret = 1;
 		} else
-			if (((smt_curr->time_slice * (100 - sd->per_cpu_gain) /
-				100) > task_timeslice(p)))
+			if (smt_curr->static_prio < p->static_prio &&
+				!TASK_PREEMPTS_CURR(p, smt_rq) &&
+				smt_slice(smt_curr, sd) > task_timeslice(p))
 					ret = 1;
 
 check_smt_task:
@@ -2737,8 +2748,8 @@ static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
 				(sd->per_cpu_gain * DEF_TIMESLICE / 100))
 					resched_task(smt_curr);
 		} else {
-			if ((p->time_slice * (100 - sd->per_cpu_gain) / 100) >
-				task_timeslice(smt_curr))
+			if (TASK_PREEMPTS_CURR(p, smt_rq) &&
+				smt_slice(p, sd) > task_timeslice(smt_curr))
 					resched_task(smt_curr);
 			else
 				wakeup_busy_runqueue(smt_rq);