Patches contributed by Eötvös Lorand University

<<Prev 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351[352]353 Next>>

commit 00a5dfdb93f74e4d95fb0d83c890728e331f8810
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Aug 5 23:05:27 2005 +0200

    [PATCH] Fix semundo lock leakage
    
    semundo->lock can leak if semundo->refcount goes from 2 to 1 while
    another thread has it locked.  This causes major problems for PREEMPT
    kernels.
    
    The simplest fix for now is to undo the single-thread optimization.
    
    This bug was found via relentless testing by Dominik Karall.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/ipc/sem.c b/ipc/sem.c
index 7e8a25c82ef3..70975ce0784a 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -895,7 +895,7 @@ static inline void lock_semundo(void)
 	struct sem_undo_list *undo_list;
 
 	undo_list = current->sysvsem.undo_list;
-	if ((undo_list != NULL) && (atomic_read(&undo_list->refcnt) != 1))
+	if (undo_list)
 		spin_lock(&undo_list->lock);
 }
 
@@ -915,7 +915,7 @@ static inline void unlock_semundo(void)
 	struct sem_undo_list *undo_list;
 
 	undo_list = current->sysvsem.undo_list;
-	if ((undo_list != NULL) && (atomic_read(&undo_list->refcnt) != 1))
+	if (undo_list)
 		spin_unlock(&undo_list->lock);
 }
 
@@ -943,9 +943,7 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp)
 		if (undo_list == NULL)
 			return -ENOMEM;
 		memset(undo_list, 0, size);
-		/* don't initialize unodhd->lock here.  It's done
-		 * in copy_semundo() instead.
-		 */
+		spin_lock_init(&undo_list->lock);
 		atomic_set(&undo_list->refcnt, 1);
 		current->sysvsem.undo_list = undo_list;
 	}
@@ -1231,8 +1229,6 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
 		error = get_undo_list(&undo_list);
 		if (error)
 			return error;
-		if (atomic_read(&undo_list->refcnt) == 1)
-			spin_lock_init(&undo_list->lock);
 		atomic_inc(&undo_list->refcnt);
 		tsk->sysvsem.undo_list = undo_list;
 	} else

commit 6cb54819d7b1867053e2dfd8c0ca3a8dc65a7eff
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Aug 1 13:39:13 2005 +0200

    [PATCH] remove sys_set_zone_reclaim()
    
    This removes sys_set_zone_reclaim() for now.  While i'm sure Martin is
    trying to solve a real problem, we must not hard-code an incomplete and
    insufficient approach into a syscall, because syscalls are pretty much
    for eternity.  I am quite strongly convinced that this syscall must not
    hit v2.6.13 in its current form.
    
    Firstly, the syscall lacks basic syscall design: e.g. it allows the
    global setting of VM policy for unprivileged users. (!) [ Imagine an
    Oracle installation and a SAP installation on the same NUMA box fighting
    over the 'optimal' setting for this flag. What will they do? Will they
    try to set the flag to their own preferred value every second or so? ]
    
    Secondly, it was added based on a single datapoint from Martin:
    
     http://marc.theaimsgroup.com/?l=linux-mm&m=111763597218177&w=2
    
    where Martin characterizes the numbers the following way:
    
     ' Run-to-run variability for "make -j" is huge, so these numbers aren't
       terribly useful except to see that with reclaim the benchmark still
       finishes in a reasonable amount of time. '
    
    in other words: the fundamental problem has likely not been solved, only
    a tendential move into the right direction has been observed, and a
    handful of numbers were picked out of a set of hugely variable results,
    without showing the variability data. How much variance is there
    run-to-run?
    
    I'd really suggest to first walk the walk and see what's needed to get
    stable & predictable kernel compilation numbers on that NUMA box, before
    adding random syscalls to tune a particular aspect of the VM ... which
    approach might not even matter once the whole picture has been analyzed
    and understood!
    
    The third, most important point is that the syscall exposes VM tuning
    internals in a completely unstructured way. What sense does it make to
    have a _GLOBAL_ per-node setting for 'should we go to another node for
    reclaim'? If then it might make sense to do this per-app, via numalib or
    so.
    
    The change is minimalistic in that it doesnt remove the syscall and the
    underlying infrastructure changes, only the user-visible changes.  We
    could perhaps add a CAP_SYS_ADMIN-only sysctl for this hack, a'ka
    /proc/sys/vm/swappiness, but even that looks quite counterproductive
    when the generic approach is that we are trying to reduce the number of
    external factors in the VM balance picture.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index 468500a7e894..9b21a31d4f4e 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -251,7 +251,7 @@ ENTRY(sys_call_table)
 	.long sys_io_submit
 	.long sys_io_cancel
 	.long sys_fadvise64	/* 250 */
-	.long sys_set_zone_reclaim
+	.long sys_ni_syscall
 	.long sys_exit_group
 	.long sys_lookup_dcookie
 	.long sys_epoll_create
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 66946f3fdac7..9be53e1ea404 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1573,7 +1573,7 @@ sys_call_table:
 	data8 sys_keyctl
 	data8 sys_ioprio_set
 	data8 sys_ioprio_get			// 1275
-	data8 sys_set_zone_reclaim
+	data8 sys_ni_syscall
 	data8 sys_inotify_init
 	data8 sys_inotify_add_watch
 	data8 sys_inotify_rm_watch
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 42b40ae5eada..1ab2370e2efa 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -79,7 +79,6 @@ cond_syscall(sys_request_key);
 cond_syscall(sys_keyctl);
 cond_syscall(compat_sys_keyctl);
 cond_syscall(compat_sys_socketcall);
-cond_syscall(sys_set_zone_reclaim);
 cond_syscall(sys_inotify_init);
 cond_syscall(sys_inotify_add_watch);
 cond_syscall(sys_inotify_rm_watch);

commit 5bbcfd9000887c0da7d57cc7b3ac869fc0dd5aa9
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Jul 7 17:57:04 2005 -0700

    [PATCH] cond_resched(): fix bogus might_sleep() warning
    
    The BKS might be reacquired before we have dropped PREEMPT_ACTIVE, which
    could trigger a second could trigger a second cond_resched() call.  Bug
    found by Hirofumi Ogawa.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/sched.c b/kernel/sched.c
index 5f2182d42241..4107db0dc091 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3877,6 +3877,13 @@ asmlinkage long sys_sched_yield(void)
 
 static inline void __cond_resched(void)
 {
+	/*
+	 * The BKS might be reacquired before we have dropped
+	 * PREEMPT_ACTIVE, which could trigger a second
+	 * cond_resched() call.
+	 */
+	if (unlikely(preempt_count()))
+		return;
 	do {
 		add_preempt_count(PREEMPT_ACTIVE);
 		schedule();

commit 306e440daf5f40b195afd83d05dee89fa63189e7
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Jun 30 02:58:55 2005 -0700

    [PATCH] x86: i8253/i8259A lock cleanup
    
    Introduce proper declarations for i8253_lock and i8259A_lock.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 93df90bbb87e..bd1dbf3bd223 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -35,6 +35,7 @@
 #include <asm/desc.h>
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
+#include <asm/i8253.h>
 
 #include <mach_apic.h>
 
@@ -879,7 +880,6 @@ void __init init_apic_mappings(void)
  */
 static unsigned int __devinit get_8254_timer_count(void)
 {
-	extern spinlock_t i8253_lock;
 	unsigned long flags;
 
 	unsigned int count;
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index d48ce9290963..064211d5f41b 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -228,10 +228,10 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/desc.h>
+#include <asm/i8253.h>
 
 #include "io_ports.h"
 
-extern spinlock_t i8253_lock;
 extern unsigned long get_cmos_time(void);
 extern void machine_real_restart(unsigned char *, int);
 
@@ -1168,8 +1168,7 @@ static void get_time_diff(void)
 static void reinit_timer(void)
 {
 #ifdef INIT_TIMER_AFTER_SUSPEND
-	unsigned long	flags;
-	extern spinlock_t i8253_lock;
+	unsigned long flags;
 
 	spin_lock_irqsave(&i8253_lock, flags);
 	/* set the clock to 100 Hz */
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 35eb8e29c485..6578f40bd501 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -37,6 +37,7 @@
 #include <asm/smp.h>
 #include <asm/desc.h>
 #include <asm/timer.h>
+#include <asm/i8259.h>
 
 #include <mach_apic.h>
 
@@ -1566,7 +1567,6 @@ void print_all_local_APICs (void)
 
 void /*__init*/ print_PIC(void)
 {
-	extern spinlock_t i8259A_lock;
 	unsigned int v;
 	unsigned long flags;
 
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index e68d9fdb0759..2854c357377f 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -68,7 +68,8 @@
 
 #include "io_ports.h"
 
-extern spinlock_t i8259A_lock;
+#include <asm/i8259.h>
+
 int pit_latch_buggy;              /* extern */
 
 #include "do_timer.h"
@@ -85,6 +86,8 @@ extern unsigned long wall_jiffies;
 DEFINE_SPINLOCK(rtc_lock);
 EXPORT_SYMBOL(rtc_lock);
 
+#include <asm/i8253.h>
+
 DEFINE_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
 
diff --git a/arch/i386/kernel/timers/timer_cyclone.c b/arch/i386/kernel/timers/timer_cyclone.c
index f6f1206a11bb..13892a65c941 100644
--- a/arch/i386/kernel/timers/timer_cyclone.c
+++ b/arch/i386/kernel/timers/timer_cyclone.c
@@ -17,9 +17,9 @@
 #include <asm/io.h>
 #include <asm/pgtable.h>
 #include <asm/fixmap.h>
-#include "io_ports.h"
+#include <asm/i8253.h>
 
-extern spinlock_t i8253_lock;
+#include "io_ports.h"
 
 /* Number of usecs that the last interrupt was delayed */
 static int delay_at_last_interrupt;
diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c
index 967d5453cd0e..06de036a820c 100644
--- a/arch/i386/kernel/timers/timer_pit.c
+++ b/arch/i386/kernel/timers/timer_pit.c
@@ -15,9 +15,8 @@
 #include <asm/smp.h>
 #include <asm/io.h>
 #include <asm/arch_hooks.h>
+#include <asm/i8253.h>
 
-extern spinlock_t i8259A_lock;
-extern spinlock_t i8253_lock;
 #include "do_timer.h"
 #include "io_ports.h"
 
@@ -166,7 +165,6 @@ struct init_timer_opts __initdata timer_pit_init = {
 
 void setup_pit_timer(void)
 {
-	extern spinlock_t i8253_lock;
 	unsigned long flags;
 
 	spin_lock_irqsave(&i8253_lock, flags);
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index f46e625bab67..8f4e4d5bc560 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -24,6 +24,7 @@
 #include "mach_timer.h"
 
 #include <asm/hpet.h>
+#include <asm/i8253.h>
 
 #ifdef CONFIG_HPET_TIMER
 static unsigned long hpet_usec_quotient;
@@ -35,8 +36,6 @@ static inline void cpufreq_delayed_get(void);
 
 int tsc_disable __devinitdata = 0;
 
-extern spinlock_t i8253_lock;
-
 static int use_tsc;
 /* Number of usecs that the last interrupt was delayed */
 static int delay_at_last_interrupt;
diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c
index 602aea240e9b..3e439ce5e1b2 100644
--- a/arch/i386/mach-voyager/voyager_basic.c
+++ b/arch/i386/mach-voyager/voyager_basic.c
@@ -30,6 +30,7 @@
 #include <linux/irq.h>
 #include <asm/tlbflush.h>
 #include <asm/arch_hooks.h>
+#include <asm/i8253.h>
 
 /*
  * Power off function, if any
@@ -182,7 +183,6 @@ voyager_timer_interrupt(struct pt_regs *regs)
 		 * and swiftly introduce it to something sharp and
 		 * pointy.  */
 		__u16 val;
-		extern spinlock_t i8253_lock;
 
 		spin_lock(&i8253_lock);
 		
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 157190d986bb..d206d7e49cf5 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -1064,7 +1064,6 @@ void print_all_local_APICs (void)
 
 void __apicdebuginit print_PIC(void)
 {
-	extern spinlock_t i8259A_lock;
 	unsigned int v;
 	unsigned long flags;
 
diff --git a/drivers/ide/legacy/hd.c b/drivers/ide/legacy/hd.c
index e884cd4b22fd..242029c9c0ca 100644
--- a/drivers/ide/legacy/hd.c
+++ b/drivers/ide/legacy/hd.c
@@ -156,11 +156,13 @@ else \
 
 
 #if (HD_DELAY > 0)
+
+#include <asm/i8253.h>
+
 unsigned long last_req;
 
 unsigned long read_timer(void)
 {
-        extern spinlock_t i8253_lock;
 	unsigned long t, flags;
 	int i;
 
diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
index 3e72c9b1461e..ab09cf4093e3 100644
--- a/drivers/input/gameport/gameport.c
+++ b/drivers/input/gameport/gameport.c
@@ -60,12 +60,13 @@ static void gameport_disconnect_port(struct gameport *gameport);
 
 #if defined(__i386__)
 
+#include <asm/i8253.h>
+
 #define DELTA(x,y)      ((y)-(x)+((y)<(x)?1193182/HZ:0))
 #define GET_TIME(x)     do { x = get_time_pit(); } while (0)
 
 static unsigned int get_time_pit(void)
 {
-	extern spinlock_t i8253_lock;
 	unsigned long flags;
 	unsigned int count;
 
diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
index 504b7d550567..c3a5739030c3 100644
--- a/drivers/input/joystick/analog.c
+++ b/drivers/input/joystick/analog.c
@@ -140,12 +140,14 @@ struct analog_port {
  */
 
 #ifdef __i386__
+
+#include <asm/i8253.h>
+
 #define GET_TIME(x)	do { if (cpu_has_tsc) rdtscl(x); else x = get_time_pit(); } while (0)
 #define DELTA(x,y)	(cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? CLOCK_TICK_RATE / HZ : 0)))
 #define TIME_NAME	(cpu_has_tsc?"TSC":"PIT")
 static unsigned int get_time_pit(void)
 {
-        extern spinlock_t i8253_lock;
         unsigned long flags;
         unsigned int count;
 
diff --git a/include/asm-i386/i8253.h b/include/asm-i386/i8253.h
new file mode 100644
index 000000000000..015d8df07690
--- /dev/null
+++ b/include/asm-i386/i8253.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_I8253_H__
+#define __ASM_I8253_H__
+
+extern spinlock_t i8253_lock;
+
+#endif	/* __ASM_I8253_H__ */
diff --git a/include/asm-i386/mach-default/do_timer.h b/include/asm-i386/mach-default/do_timer.h
index 03dd13a48a8c..56211414fc95 100644
--- a/include/asm-i386/mach-default/do_timer.h
+++ b/include/asm-i386/mach-default/do_timer.h
@@ -1,6 +1,7 @@
 /* defines for inline arch setup functions */
 
 #include <asm/apic.h>
+#include <asm/i8259.h>
 
 /**
  * do_timer_interrupt_hook - hook into timer tick
diff --git a/include/asm-x86_64/io_apic.h b/include/asm-x86_64/io_apic.h
index 32573749004c..a8babd2bbe84 100644
--- a/include/asm-x86_64/io_apic.h
+++ b/include/asm-x86_64/io_apic.h
@@ -217,4 +217,6 @@ extern int assign_irq_vector(int irq);
 
 void enable_NMI_through_LVT0 (void * dummy);
 
+extern spinlock_t i8259A_lock;
+
 #endif

commit f340c0d1a3f40fdcba69cd291530a4debc58748f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Jun 28 16:40:42 2005 +0200

    [PATCH] Tweak idle thread setup semantics
    
    This patch tweaks idle thread setup semantics a bit: instead of setting
    NEED_RESCHED in init_idle(), we do an explicit schedule() before calling
    into cpu_idle().
    
    This patch, while having no negative side-effects, enables wider use of
    cond_resched()s.  (which might happen in the stock kernel too, but it's
    particulary important for voluntary-preempt)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/init/main.c b/init/main.c
index d324801729ba..b5e421e39ede 100644
--- a/init/main.c
+++ b/init/main.c
@@ -383,6 +383,13 @@ static void noinline rest_init(void)
 	numa_default_policy();
 	unlock_kernel();
 	preempt_enable_no_resched();
+
+	/*
+	 * The boot idle thread must execute schedule()
+	 * at least one to get things moving:
+	 */
+	schedule();
+
 	cpu_idle();
 } 
 
diff --git a/kernel/sched.c b/kernel/sched.c
index e2b0d3e4dd06..5f2182d42241 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4166,6 +4166,14 @@ void show_state(void)
 	read_unlock(&tasklist_lock);
 }
 
+/**
+ * init_idle - set up an idle thread for a given CPU
+ * @idle: task in question
+ * @cpu: cpu the idle task belongs to
+ *
+ * NOTE: this function does not set the idle thread's NEED_RESCHED
+ * flag, to make booting more robust.
+ */
 void __devinit init_idle(task_t *idle, int cpu)
 {
 	runqueue_t *rq = cpu_rq(cpu);
@@ -4183,7 +4191,6 @@ void __devinit init_idle(task_t *idle, int cpu)
 #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
 	idle->oncpu = 1;
 #endif
-	set_tsk_need_resched(idle);
 	spin_unlock_irqrestore(&rq->lock, flags);
 
 	/* Set the preempt count _outside_ the spinlocks! */

commit f8cbd99bd3a023db8d6356d19a5f6f539d367327
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Jun 25 14:57:39 2005 -0700

    [PATCH] sched: voluntary kernel preemption
    
    This patch adds a new preemption model: 'Voluntary Kernel Preemption'.  The
    3 models can be selected from a new menu:
    
                (X) No Forced Preemption (Server)
                ( ) Voluntary Kernel Preemption (Desktop)
                ( ) Preemptible Kernel (Low-Latency Desktop)
    
    we still default to the stock (Server) preemption model.
    
    Voluntary preemption works by adding a cond_resched()
    (reschedule-if-needed) call to every might_sleep() check.  It is lighter
    than CONFIG_PREEMPT - at the cost of not having as tight latencies.  It
    represents a different latency/complexity/overhead tradeoff.
    
    It has no runtime impact at all if disabled.  Here are size stats that show
    how the various preemption models impact the kernel's size:
    
        text    data     bss     dec     hex filename
     3618774  547184  179896 4345854  424ffe vmlinux.stock
     3626406  547184  179896 4353486  426dce vmlinux.voluntary   +0.2%
     3748414  548640  179896 4476950  445016 vmlinux.preempt     +3.5%
    
    voluntary-preempt is +0.2% of .text, preempt is +3.5%.
    
    This feature has been tested for many months by lots of people (and it's
    also included in the RHEL4 distribution and earlier variants were in Fedora
    as well), and it's intended for users and distributions who dont want to
    use full-blown CONFIG_PREEMPT for one reason or another.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e25b97062ce1..687ba8c9973d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -58,15 +58,23 @@ struct completion;
  * be biten later when the calling function happens to sleep when it is not
  * supposed to.
  */
+#ifdef CONFIG_PREEMPT_VOLUNTARY
+extern int cond_resched(void);
+# define might_resched() cond_resched()
+#else
+# define might_resched() do { } while (0)
+#endif
+
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
-#define might_sleep() __might_sleep(__FILE__, __LINE__)
-#define might_sleep_if(cond) do { if (unlikely(cond)) might_sleep(); } while (0)
-void __might_sleep(char *file, int line);
+  void __might_sleep(char *file, int line);
+# define might_sleep() \
+	do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
 #else
-#define might_sleep() do {} while(0)
-#define might_sleep_if(cond) do {} while (0)
+# define might_sleep() do { might_resched(); } while (0)
 #endif
 
+#define might_sleep_if(cond) do { if (unlikely(cond)) might_sleep(); } while (0)
+
 #define abs(x) ({				\
 		int __x = (x);			\
 		(__x < 0) ? -__x : __x;		\
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 34c631221aa3..0b46a5dff4c0 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -1,15 +1,56 @@
 
-config PREEMPT
-	bool "Preemptible Kernel"
+choice
+	prompt "Preemption Model"
+	default PREEMPT_NONE
+
+config PREEMPT_NONE
+	bool "No Forced Preemption (Server)"
+	help
+	  This is the traditional Linux preemption model, geared towards
+	  throughput. It will still provide good latencies most of the
+	  time, but there are no guarantees and occasional longer delays
+	  are possible.
+
+	  Select this option if you are building a kernel for a server or
+	  scientific/computation system, or if you want to maximize the
+	  raw processing power of the kernel, irrespective of scheduling
+	  latencies.
+
+config PREEMPT_VOLUNTARY
+	bool "Voluntary Kernel Preemption (Desktop)"
 	help
-	  This option reduces the latency of the kernel when reacting to
-	  real-time or interactive events by allowing a low priority process to
-	  be preempted even if it is in kernel mode executing a system call.
-	  This allows applications to run more reliably even when the system is
+	  This option reduces the latency of the kernel by adding more
+	  "explicit preemption points" to the kernel code. These new
+	  preemption points have been selected to reduce the maximum
+	  latency of rescheduling, providing faster application reactions,
+	  at the cost of slighly lower throughput.
+
+	  This allows reaction to interactive events by allowing a
+	  low priority process to voluntarily preempt itself even if it
+	  is in kernel mode executing a system call. This allows
+	  applications to run more 'smoothly' even when the system is
 	  under load.
 
-	  Say Y here if you are building a kernel for a desktop, embedded
-	  or real-time system.  Say N if you are unsure.
+	  Select this if you are building a kernel for a desktop system.
+
+config PREEMPT
+	bool "Preemptible Kernel (Low-Latency Desktop)"
+	help
+	  This option reduces the latency of the kernel by making
+	  all kernel code (that is not executing in a critical section)
+	  preemptible.  This allows reaction to interactive events by
+	  permitting a low priority process to be preempted involuntarily
+	  even if it is in kernel mode executing a system call and would
+	  otherwise not be about to reach a natural preemption point.
+	  This allows applications to run more 'smoothly' even when the
+	  system is under load, at the cost of slighly lower throughput
+	  and a slight runtime overhead to kernel code.
+
+	  Select this if you are building a kernel for a desktop or
+	  embedded system with latency requirements in the milliseconds
+	  range.
+
+endchoice
 
 config PREEMPT_BKL
 	bool "Preempt The Big Kernel Lock"

commit f704f56af95bec3c1ca719d64d0becef74d40899
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Jun 25 14:57:38 2005 -0700

    [PATCH] enable PREEMPT_BKL on !PREEMPT+SMP too
    
    The only sane way to clean up the current 3 lock_kernel() variants seems to
    be to remove the spinlock-based BKL implementations altogether, and to keep
    the semaphore-based one only.  If we dont want to do that for whatever
    reason then i'm afraid we have to live with the current complexity.  (but
    i'm open for other cleanup suggestions as well.)
    
    To explore this possibility we'll (at a minimum) have to know whether the
    semaphore-based BKL works fine on plain SMP too.  The patch below enables
    this.
    
    The patch may make sense in isolation as well, as it might bring
    performance benefits: code that would formerly spin on the BKL spinlock
    will now schedule away and give up the CPU.  It might introduce performance
    regressions as well, if any performance-critical code uses the BKL heavily
    and gets overscheduled due to the semaphore.  I very much hope there is no
    such performance-critical codepath left though.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 587328be8216..34c631221aa3 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -13,7 +13,7 @@ config PREEMPT
 
 config PREEMPT_BKL
 	bool "Preempt The Big Kernel Lock"
-	depends on PREEMPT
+	depends on SMP || PREEMPT
 	default y
 	help
 	  This option reduces the latency of the kernel by making the

commit cc19ca86a023fcd552c78e77a7be6ce271f92a28
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Jun 25 14:57:36 2005 -0700

    [PATCH] consolidate PREEMPT options into kernel/Kconfig.preempt
    
    This patch consolidates the CONFIG_PREEMPT and CONFIG_PREEMPT_BKL
    preemption options into kernel/Kconfig.preempt.  This, besides reducing
    source-code, also enables more centralized tweaking of preemption related
    options.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index b4cd11e58451..961ab20032f5 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -510,28 +510,7 @@ config SCHED_SMT
 	  cost of slightly increased overhead in some places. If unsure say
 	  N here.
 
-config PREEMPT
-	bool "Preemptible Kernel"
-	help
-	  This option reduces the latency of the kernel when reacting to
-	  real-time or interactive events by allowing a low priority process to
-	  be preempted even if it is in kernel mode executing a system call.
-	  This allows applications to run more reliably even when the system is
-	  under load.
-
-	  Say Y here if you are building a kernel for a desktop, embedded
-	  or real-time system.  Say N if you are unsure.
-
-config PREEMPT_BKL
-	bool "Preempt The Big Kernel Lock"
-	depends on PREEMPT
-	default y
-	help
-	  This option reduces the latency of the kernel by making the
-	  big kernel lock preemptible.
-
-	  Say Y here if you are building a kernel for a desktop system.
-	  Say N if you are unsure.
+source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors"
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig
index cb27068bfcd4..5f40b438b584 100644
--- a/arch/ppc64/Kconfig
+++ b/arch/ppc64/Kconfig
@@ -270,26 +270,7 @@ config SCHED_SMT
 	  when dealing with POWER5 cpus at a cost of slightly increased
 	  overhead in some places. If unsure say N here.
 
-config PREEMPT
-	bool "Preemptible Kernel"
-	help
-	  This option reduces the latency of the kernel when reacting to
-	  real-time or interactive events by allowing a low priority process to
-	  be preempted even if it is in kernel mode executing a system call.
-
-	  Say Y here if you are building a kernel for a desktop, embedded
-	  or real-time system.  Say N if you are unsure.
-
-config PREEMPT_BKL
-	bool "Preempt The Big Kernel Lock"
-	depends on PREEMPT
-	default y
-	help
-	  This option reduces the latency of the kernel by making the
-	  big kernel lock preemptible.
-
-	  Say Y here if you are building a kernel for a desktop system.
-	  Say N if you are unsure.
+source "kernel/Kconfig.preempt"
 
 config EEH
 	bool "PCI Extended Error Handling (EEH)" if EMBEDDED
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 92f5a5266023..a853d87ff7e3 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -207,33 +207,6 @@ config SMP
 
 	  If you don't know what to do here, say N.
 
-config PREEMPT
-	bool "Preemptible Kernel"
-	---help---
-	  This option reduces the latency of the kernel when reacting to
-	  real-time or interactive events by allowing a low priority process to
-	  be preempted even if it is in kernel mode executing a system call.
-	  This allows applications to run more reliably even when the system is
-	  under load. On contrary it may also break your drivers and add
-	  priority inheritance problems to your system. Don't select it if
-	  you rely on a stable system or have slightly obscure hardware.
-	  It's also not very well tested on x86-64 currently.
-	  You have been warned.
-
-	  Say Y here if you are feeling brave and building a kernel for a
-	  desktop, embedded or real-time system.  Say N if you are unsure.
-
-config PREEMPT_BKL
-	bool "Preempt The Big Kernel Lock"
-	depends on PREEMPT
-	default y
-	help
-	  This option reduces the latency of the kernel by making the
-	  big kernel lock preemptible.
-
-	  Say Y here if you are building a kernel for a desktop system.
-	  Say N if you are unsure.
-
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
 	depends on SMP
@@ -244,6 +217,8 @@ config SCHED_SMT
 	  cost of slightly increased overhead in some places. If unsure say
 	  N here.
 
+source "kernel/Kconfig.preempt"
+
 config K8_NUMA
        bool "K8 NUMA support"
        select NUMA
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
new file mode 100644
index 000000000000..587328be8216
--- /dev/null
+++ b/kernel/Kconfig.preempt
@@ -0,0 +1,24 @@
+
+config PREEMPT
+	bool "Preemptible Kernel"
+	help
+	  This option reduces the latency of the kernel when reacting to
+	  real-time or interactive events by allowing a low priority process to
+	  be preempted even if it is in kernel mode executing a system call.
+	  This allows applications to run more reliably even when the system is
+	  under load.
+
+	  Say Y here if you are building a kernel for a desktop, embedded
+	  or real-time system.  Say N if you are unsure.
+
+config PREEMPT_BKL
+	bool "Preempt The Big Kernel Lock"
+	depends on PREEMPT
+	default y
+	help
+	  This option reduces the latency of the kernel by making the
+	  big kernel lock preemptible.
+
+	  Say Y here if you are building a kernel for a desktop system.
+	  Say N if you are unsure.
+

commit 48c08d3f8ff94fa118187e4d8d4a5707bb85e59d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Jun 25 14:57:22 2005 -0700

    [PATCH] sched: uninline task_timeslice
    
          "Chen, Kenneth W" <kenneth.w.chen@intel.com>
    
    uninline task_timeslice() - reduces code footprint noticeably, and it's
    slowpath code.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/sched.c b/kernel/sched.c
index 2711130cd973..98bf1c091da5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -166,7 +166,7 @@
 #define SCALE_PRIO(x, prio) \
 	max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO/2), MIN_TIMESLICE)
 
-static inline unsigned int task_timeslice(task_t *p)
+static unsigned int task_timeslice(task_t *p)
 {
 	if (p->static_prio < NICE_TO_PRIO(0))
 		return SCALE_PRIO(DEF_TIMESLICE*4, p->static_prio);

commit 39c715b71740c4a78ba4769fb54826929bac03cb
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Jun 21 17:14:34 2005 -0700

    [PATCH] smp_processor_id() cleanup
    
    This patch implements a number of smp_processor_id() cleanup ideas that
    Arjan van de Ven and I came up with.
    
    The previous __smp_processor_id/_smp_processor_id/smp_processor_id API
    spaghetti was hard to follow both on the implementational and on the
    usage side.
    
    Some of the complexity arose from picking wrong names, some of the
    complexity comes from the fact that not all architectures defined
    __smp_processor_id.
    
    In the new code, there are two externally visible symbols:
    
     - smp_processor_id(): debug variant.
    
     - raw_smp_processor_id(): nondebug variant. Replaces all existing
       uses of _smp_processor_id() and __smp_processor_id(). Defined
       by every SMP architecture in include/asm-*/smp.h.
    
    There is one new internal symbol, dependent on DEBUG_PREEMPT:
    
     - debug_smp_processor_id(): internal debug variant, mapped to
                                 smp_processor_id().
    
    Also, i moved debug_smp_processor_id() from lib/kernel_lock.c into a new
    lib/smp_processor_id.c file.  All related comments got updated and/or
    clarified.
    
    I have build/boot tested the following 8 .config combinations on x86:
    
     {SMP,UP} x {PREEMPT,!PREEMPT} x {DEBUG_PREEMPT,!DEBUG_PREEMPT}
    
    I have also build/boot tested x64 on UP/PREEMPT/DEBUG_PREEMPT.  (Other
    architectures are untested, but should work just fine.)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@infradead.org>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 00c63419c06f..83c579e82a81 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -306,7 +306,7 @@ void die(const char * str, struct pt_regs * regs, long err)
 	};
 	static int die_counter;
 
-	if (die.lock_owner != _smp_processor_id()) {
+	if (die.lock_owner != raw_smp_processor_id()) {
 		console_verbose();
 		spin_lock_irq(&die.lock);
 		die.lock_owner = smp_processor_id();
diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c
index 080639f262b1..eb0cdfe9280f 100644
--- a/arch/i386/lib/delay.c
+++ b/arch/i386/lib/delay.c
@@ -34,7 +34,7 @@ inline void __const_udelay(unsigned long xloops)
 	xloops *= 4;
 	__asm__("mull %0"
 		:"=d" (xloops), "=&a" (d0)
-		:"1" (xloops),"0" (cpu_data[_smp_processor_id()].loops_per_jiffy * (HZ/4)));
+		:"1" (xloops),"0" (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (HZ/4)));
         __delay(++xloops);
 }
 
diff --git a/arch/ppc/lib/locks.c b/arch/ppc/lib/locks.c
index 694163d696d8..c450dc4b766e 100644
--- a/arch/ppc/lib/locks.c
+++ b/arch/ppc/lib/locks.c
@@ -130,7 +130,7 @@ void _raw_read_lock(rwlock_t *rw)
 		while (!read_can_lock(rw)) {
 			if (--stuck == 0) {
 				printk("_read_lock(%p) CPU#%d lock %d\n",
-				       rw, _smp_processor_id(), rw->lock);
+				       rw, raw_smp_processor_id(), rw->lock);
 				stuck = INIT_STUCK;
 			}
 		}
@@ -158,7 +158,7 @@ void _raw_write_lock(rwlock_t *rw)
 		while (!write_can_lock(rw)) {
 			if (--stuck == 0) {
 				printk("write_lock(%p) CPU#%d lock %d)\n",
-				       rw, _smp_processor_id(), rw->lock);
+				       rw, raw_smp_processor_id(), rw->lock);
 				stuck = INIT_STUCK;
 			}
 		}
diff --git a/arch/ppc64/kernel/idle.c b/arch/ppc64/kernel/idle.c
index f24ce2b87200..ff8a7db142d3 100644
--- a/arch/ppc64/kernel/idle.c
+++ b/arch/ppc64/kernel/idle.c
@@ -292,7 +292,7 @@ static int native_idle(void)
 		if (need_resched())
 			schedule();
 
-		if (cpu_is_offline(_smp_processor_id()) &&
+		if (cpu_is_offline(raw_smp_processor_id()) &&
 		    system_state == SYSTEM_RUNNING)
 			cpu_die();
 	}
diff --git a/arch/sh/lib/delay.c b/arch/sh/lib/delay.c
index 50b36037d86b..351714694d6d 100644
--- a/arch/sh/lib/delay.c
+++ b/arch/sh/lib/delay.c
@@ -24,7 +24,7 @@ inline void __const_udelay(unsigned long xloops)
 	__asm__("dmulu.l	%0, %2\n\t"
 		"sts	mach, %0"
 		: "=r" (xloops)
-		: "0" (xloops), "r" (cpu_data[_smp_processor_id()].loops_per_jiffy)
+		: "0" (xloops), "r" (cpu_data[raw_smp_processor_id()].loops_per_jiffy)
 		: "macl", "mach");
 	__delay(xloops * HZ);
 }
diff --git a/arch/sparc64/lib/delay.c b/arch/sparc64/lib/delay.c
index f6b4c784d53e..e8808727617a 100644
--- a/arch/sparc64/lib/delay.c
+++ b/arch/sparc64/lib/delay.c
@@ -31,7 +31,7 @@ void __const_udelay(unsigned long n)
 {
 	n *= 4;
 
-	n *= (cpu_data(_smp_processor_id()).udelay_val * (HZ/4));
+	n *= (cpu_data(raw_smp_processor_id()).udelay_val * (HZ/4));
 	n >>= 32;
 
 	__delay(n + 1);
diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c
index 6e2d66472eb1..aed61a668a1b 100644
--- a/arch/x86_64/lib/delay.c
+++ b/arch/x86_64/lib/delay.c
@@ -34,7 +34,7 @@ void __delay(unsigned long loops)
 
 inline void __const_udelay(unsigned long xloops)
 {
-	__delay(((xloops * cpu_data[_smp_processor_id()].loops_per_jiffy) >> 32) * HZ);
+	__delay(((xloops * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) * HZ);
 }
 
 void __udelay(unsigned long usecs)
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index ff64d333e95f..c9d671cf7857 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -171,7 +171,7 @@ static void acpi_processor_idle (void)
 	int			sleep_ticks = 0;
 	u32			t1, t2 = 0;
 
-	pr = processors[_smp_processor_id()];
+	pr = processors[raw_smp_processor_id()];
 	if (!pr)
 		return;
 
diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c
index 9b8ff396e6f8..e152d0fa0cdd 100644
--- a/drivers/input/gameport/gameport.c
+++ b/drivers/input/gameport/gameport.c
@@ -134,7 +134,7 @@ static int gameport_measure_speed(struct gameport *gameport)
 	}
 
 	gameport_close(gameport);
-	return (cpu_data[_smp_processor_id()].loops_per_jiffy * (unsigned long)HZ / (1000 / 50)) / (tx < 1 ? 1 : tx);
+	return (cpu_data[raw_smp_processor_id()].loops_per_jiffy * (unsigned long)HZ / (1000 / 50)) / (tx < 1 ? 1 : tx);
 
 #else
 
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index 55720dc6ec43..745a14183634 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -62,7 +62,7 @@ static int task_exit_notify(struct notifier_block * self, unsigned long val, voi
 	/* To avoid latency problems, we only process the current CPU,
 	 * hoping that most samples for the task are on this CPU
 	 */
-	sync_buffer(_smp_processor_id());
+	sync_buffer(raw_smp_processor_id());
   	return 0;
 }
 
@@ -86,7 +86,7 @@ static int munmap_notify(struct notifier_block * self, unsigned long val, void *
 		/* To avoid latency problems, we only process the current CPU,
 		 * hoping that most samples for the task are on this CPU
 		 */
-		sync_buffer(_smp_processor_id());
+		sync_buffer(raw_smp_processor_id());
 		return 0;
 	}
 
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 71bb41019a12..7d7c8788ea75 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -145,10 +145,10 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
 #define xfs_inherit_nosymlinks	xfs_params.inherit_nosym.val
 #define xfs_rotorstep		xfs_params.rotorstep.val
 
-#ifndef __smp_processor_id
-#define __smp_processor_id()	smp_processor_id()
+#ifndef raw_smp_processor_id
+#define raw_smp_processor_id()	smp_processor_id()
 #endif
-#define current_cpu()		__smp_processor_id()
+#define current_cpu()		raw_smp_processor_id()
 #define current_pid()		(current->pid)
 #define current_fsuid(cred)	(current->fsuid)
 #define current_fsgid(cred)	(current->fsgid)
diff --git a/include/asm-alpha/smp.h b/include/asm-alpha/smp.h
index cbc173ae45aa..9950706abdf8 100644
--- a/include/asm-alpha/smp.h
+++ b/include/asm-alpha/smp.h
@@ -43,7 +43,7 @@ extern struct cpuinfo_alpha cpu_data[NR_CPUS];
 #define PROC_CHANGE_PENALTY     20
 
 #define hard_smp_processor_id()	__hard_smp_processor_id()
-#define smp_processor_id()	(current_thread_info()->cpu)
+#define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 extern cpumask_t cpu_present_mask;
 extern cpumask_t cpu_online_map;
diff --git a/include/asm-arm/smp.h b/include/asm-arm/smp.h
index bd44f894690f..6c6c60adbbaa 100644
--- a/include/asm-arm/smp.h
+++ b/include/asm-arm/smp.h
@@ -21,7 +21,7 @@
 # error "<asm-arm/smp.h> included in non-SMP build"
 #endif
 
-#define smp_processor_id()	(current_thread_info()->cpu)
+#define raw_smp_processor_id() (current_thread_info()->cpu)
 
 extern cpumask_t cpu_present_mask;
 #define cpu_possible_map cpu_present_mask
diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h
index e03a206dfa36..55ef31f66bbe 100644
--- a/include/asm-i386/smp.h
+++ b/include/asm-i386/smp.h
@@ -51,7 +51,7 @@ extern u8 x86_cpu_to_apicid[];
  * from the initial startup. We map APIC_BASE very early in page_setup(),
  * so this is correct in the x86 case.
  */
-#define __smp_processor_id() (current_thread_info()->cpu)
+#define raw_smp_processor_id() (current_thread_info()->cpu)
 
 extern cpumask_t cpu_callout_map;
 extern cpumask_t cpu_callin_map;
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index 3ba1a061e4ae..a3914352c995 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -46,7 +46,7 @@ ia64_get_lid (void)
 #define SMP_IRQ_REDIRECTION	(1 << 0)
 #define SMP_IPI_REDIRECTION	(1 << 1)
 
-#define smp_processor_id()	(current_thread_info()->cpu)
+#define raw_smp_processor_id() (current_thread_info()->cpu)
 
 extern struct smp_boot_data {
 	int cpu_count;
diff --git a/include/asm-m32r/smp.h b/include/asm-m32r/smp.h
index 8cd4d0da4be1..b9a20cdad65f 100644
--- a/include/asm-m32r/smp.h
+++ b/include/asm-m32r/smp.h
@@ -66,7 +66,7 @@ extern volatile int cpu_2_physid[NR_CPUS];
 #define physid_to_cpu(physid)	physid_2_cpu[physid]
 #define cpu_to_physid(cpu_id)	cpu_2_physid[cpu_id]
 
-#define smp_processor_id()	(current_thread_info()->cpu)
+#define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 extern cpumask_t cpu_callout_map;
 #define cpu_possible_map cpu_callout_map
diff --git a/include/asm-mips/smp.h b/include/asm-mips/smp.h
index 8ba370ecfd4c..5618f1e12f40 100644
--- a/include/asm-mips/smp.h
+++ b/include/asm-mips/smp.h
@@ -21,7 +21,7 @@
 #include <linux/cpumask.h>
 #include <asm/atomic.h>
 
-#define smp_processor_id()	(current_thread_info()->cpu)
+#define raw_smp_processor_id() (current_thread_info()->cpu)
 
 /* Map from cpu id to sequential logical cpu number.  This will only
    not be idempotent when cpus failed to come on-line.  */
diff --git a/include/asm-parisc/smp.h b/include/asm-parisc/smp.h
index fde77ac35463..9413f67a540b 100644
--- a/include/asm-parisc/smp.h
+++ b/include/asm-parisc/smp.h
@@ -51,7 +51,7 @@ extern void smp_send_reschedule(int cpu);
 
 extern unsigned long cpu_present_mask;
 
-#define smp_processor_id()	(current_thread_info()->cpu)
+#define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 #endif /* CONFIG_SMP */
 
diff --git a/include/asm-ppc/smp.h b/include/asm-ppc/smp.h
index ebfb614f55f6..17530c232c76 100644
--- a/include/asm-ppc/smp.h
+++ b/include/asm-ppc/smp.h
@@ -44,7 +44,7 @@ extern void smp_message_recv(int, struct pt_regs *);
 #define NO_PROC_ID		0xFF            /* No processor magic marker */
 #define PROC_CHANGE_PENALTY	20
 
-#define smp_processor_id() (current_thread_info()->cpu)
+#define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 extern int __cpu_up(unsigned int cpu);
 
diff --git a/include/asm-ppc64/smp.h b/include/asm-ppc64/smp.h
index c8646fa999c2..8115ecb8feee 100644
--- a/include/asm-ppc64/smp.h
+++ b/include/asm-ppc64/smp.h
@@ -45,7 +45,7 @@ void generic_cpu_die(unsigned int cpu);
 void generic_mach_cpu_die(void);
 #endif
 
-#define __smp_processor_id() (get_paca()->paca_index)
+#define raw_smp_processor_id()	(get_paca()->paca_index)
 #define hard_smp_processor_id() (get_paca()->hw_cpu_id)
 
 extern cpumask_t cpu_sibling_map[NR_CPUS];
diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h
index 9473786387a3..dd50e57a928f 100644
--- a/include/asm-s390/smp.h
+++ b/include/asm-s390/smp.h
@@ -47,7 +47,7 @@ extern int smp_call_function_on(void (*func) (void *info), void *info,
  
 #define PROC_CHANGE_PENALTY	20		/* Schedule penalty */
 
-#define smp_processor_id() (S390_lowcore.cpu_data.cpu_nr)
+#define raw_smp_processor_id()	(S390_lowcore.cpu_data.cpu_nr)
 
 extern int smp_get_cpu(cpumask_t cpu_map);
 extern void smp_put_cpu(int cpu);
diff --git a/include/asm-sh/smp.h b/include/asm-sh/smp.h
index 38b54469d7d1..f19a8b3b69a6 100644
--- a/include/asm-sh/smp.h
+++ b/include/asm-sh/smp.h
@@ -25,7 +25,7 @@ extern cpumask_t cpu_possible_map;
 
 #define cpu_online(cpu)		cpu_isset(cpu, cpu_online_map)
 
-#define smp_processor_id()	(current_thread_info()->cpu)
+#define raw_smp_processor_id()	(current_thread_info()->cpu)
 
 /* I've no idea what the real meaning of this is */
 #define PROC_CHANGE_PENALTY	20
diff --git a/include/asm-sparc/smp.h b/include/asm-sparc/smp.h
index f986c0d0922a..4f96d8333a12 100644
--- a/include/asm-sparc/smp.h
+++ b/include/asm-sparc/smp.h
@@ -148,7 +148,7 @@ extern __inline__ int hard_smp_processor_id(void)
 }
 #endif
 
-#define smp_processor_id()	(current_thread_info()->cpu)
+#define raw_smp_processor_id()		(current_thread_info()->cpu)
 
 #define prof_multiplier(__cpu)		cpu_data(__cpu).multiplier
 #define prof_counter(__cpu)		cpu_data(__cpu).counter
diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h
index 5e3e06d908fe..110a2de89123 100644
--- a/include/asm-sparc64/smp.h
+++ b/include/asm-sparc64/smp.h
@@ -64,7 +64,7 @@ static __inline__ int hard_smp_processor_id(void)
 	}
 }
 
-#define smp_processor_id() (current_thread_info()->cpu)
+#define raw_smp_processor_id() (current_thread_info()->cpu)
 
 #endif /* !(__ASSEMBLY__) */
 
diff --git a/include/asm-um/smp.h b/include/asm-um/smp.h
index 4412d5d9c26b..d879eba2b52c 100644
--- a/include/asm-um/smp.h
+++ b/include/asm-um/smp.h
@@ -8,7 +8,8 @@
 #include "asm/current.h"
 #include "linux/cpumask.h"
 
-#define smp_processor_id() (current_thread->cpu)
+#define raw_smp_processor_id() (current_thread->cpu)
+
 #define cpu_logical_map(n) (n)
 #define cpu_number_map(n) (n)
 #define PROC_CHANGE_PENALTY	15 /* Pick a number, any number */
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index 96844fecbde8..a7425aa5a3b7 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -68,7 +68,7 @@ static inline int num_booting_cpus(void)
 	return cpus_weight(cpu_callout_map);
 }
 
-#define __smp_processor_id() read_pda(cpunumber)
+#define raw_smp_processor_id() read_pda(cpunumber)
 
 extern __inline int hard_smp_processor_id(void)
 {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e530c6c092f1..beacd931b606 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -381,7 +381,7 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
-#define numa_node_id()		(cpu_to_node(_smp_processor_id()))
+#define numa_node_id()		(cpu_to_node(raw_smp_processor_id()))
 
 #ifndef CONFIG_DISCONTIGMEM
 
diff --git a/include/linux/smp.h b/include/linux/smp.h
index dcf1db3b35d3..9dfa3ee769ae 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -92,10 +92,7 @@ void smp_prepare_boot_cpu(void);
 /*
  *	These macros fold the SMP functionality into a single CPU system
  */
-
-#if !defined(__smp_processor_id) || !defined(CONFIG_PREEMPT)
-# define smp_processor_id()			0
-#endif
+#define raw_smp_processor_id()			0
 #define hard_smp_processor_id()			0
 #define smp_call_function(func,info,retry,wait)	({ 0; })
 #define on_each_cpu(func,info,retry,wait)	({ func(info); 0; })
@@ -106,30 +103,25 @@ static inline void smp_send_reschedule(int cpu) { }
 #endif /* !SMP */
 
 /*
- * DEBUG_PREEMPT support: check whether smp_processor_id() is being
- * used in a preemption-safe way.
+ * smp_processor_id(): get the current CPU ID.
  *
- * An architecture has to enable this debugging code explicitly.
- * It can do so by renaming the smp_processor_id() macro to
- * __smp_processor_id().  This should only be done after some minimal
- * testing, because usually there are a number of false positives
- * that an architecture will trigger.
+ * if DEBUG_PREEMPT is enabled the we check whether it is
+ * used in a preemption-safe way. (smp_processor_id() is safe
+ * if it's used in a preemption-off critical section, or in
+ * a thread that is bound to the current CPU.)
  *
- * To fix a false positive (i.e. smp_processor_id() use that the
- * debugging code reports but which use for some reason is legal),
- * change the smp_processor_id() reference to _smp_processor_id(),
- * which is the nondebug variant.  NOTE: don't use this to hack around
- * real bugs.
+ * NOTE: raw_smp_processor_id() is for internal use only
+ * (smp_processor_id() is the preferred variant), but in rare
+ * instances it might also be used to turn off false positives
+ * (i.e. smp_processor_id() use that the debugging code reports but
+ * which use for some reason is legal). Don't use this to hack around
+ * the warning message, as your code might not work under PREEMPT.
  */
-#ifdef __smp_processor_id
-# if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
-   extern unsigned int smp_processor_id(void);
-# else
-#  define smp_processor_id() __smp_processor_id()
-# endif
-# define _smp_processor_id() __smp_processor_id()
+#ifdef CONFIG_DEBUG_PREEMPT
+  extern unsigned int debug_smp_processor_id(void);
+# define smp_processor_id() debug_smp_processor_id()
 #else
-# define _smp_processor_id() smp_processor_id()
+# define smp_processor_id() raw_smp_processor_id()
 #endif
 
 #define get_cpu()		({ preempt_disable(); smp_processor_id(); })
diff --git a/include/net/route.h b/include/net/route.h
index d34ca8fc6756..c3cd069a9aca 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -107,7 +107,7 @@ struct rt_cache_stat
 
 extern struct rt_cache_stat *rt_cache_stat;
 #define RT_CACHE_STAT_INC(field)					  \
-		(per_cpu_ptr(rt_cache_stat, _smp_processor_id())->field++)
+		(per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++)
 
 extern struct ip_rt_acct *ip_rt_acct;
 
diff --git a/include/net/snmp.h b/include/net/snmp.h
index a15ab256276e..a36bed8ea210 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -128,18 +128,18 @@ struct linux_mib {
 #define SNMP_STAT_USRPTR(name)	(name[1])
 
 #define SNMP_INC_STATS_BH(mib, field) 	\
-	(per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field]++)
+	(per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field]++)
 #define SNMP_INC_STATS_OFFSET_BH(mib, field, offset)	\
-	(per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field + (offset)]++)
+	(per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field + (offset)]++)
 #define SNMP_INC_STATS_USER(mib, field) \
-	(per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field]++)
+	(per_cpu_ptr(mib[1], raw_smp_processor_id())->mibs[field]++)
 #define SNMP_INC_STATS(mib, field) 	\
-	(per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]++)
+	(per_cpu_ptr(mib[!in_softirq()], raw_smp_processor_id())->mibs[field]++)
 #define SNMP_DEC_STATS(mib, field) 	\
-	(per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]--)
+	(per_cpu_ptr(mib[!in_softirq()], raw_smp_processor_id())->mibs[field]--)
 #define SNMP_ADD_STATS_BH(mib, field, addend) 	\
-	(per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field] += addend)
+	(per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field] += addend)
 #define SNMP_ADD_STATS_USER(mib, field, addend) 	\
-	(per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field] += addend)
+	(per_cpu_ptr(mib[1], raw_smp_processor_id())->mibs[field] += addend)
 
 #endif
diff --git a/kernel/module.c b/kernel/module.c
index 83b3d376708c..a566745dde62 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -379,7 +379,7 @@ static void module_unload_init(struct module *mod)
 	for (i = 0; i < NR_CPUS; i++)
 		local_set(&mod->ref[i].count, 0);
 	/* Hold reference count during initialization. */
-	local_set(&mod->ref[_smp_processor_id()].count, 1);
+	local_set(&mod->ref[raw_smp_processor_id()].count, 1);
 	/* Backwards compatibility macros put refcount during init. */
 	mod->waiter = current;
 }
diff --git a/kernel/power/smp.c b/kernel/power/smp.c
index cba3584b80fe..457c2302ed42 100644
--- a/kernel/power/smp.c
+++ b/kernel/power/smp.c
@@ -48,11 +48,11 @@ void disable_nonboot_cpus(void)
 {
 	oldmask = current->cpus_allowed;
 	set_cpus_allowed(current, cpumask_of_cpu(0));
-	printk("Freezing CPUs (at %d)", _smp_processor_id());
+	printk("Freezing CPUs (at %d)", raw_smp_processor_id());
 	current->state = TASK_INTERRUPTIBLE;
 	schedule_timeout(HZ);
 	printk("...");
-	BUG_ON(_smp_processor_id() != 0);
+	BUG_ON(raw_smp_processor_id() != 0);
 
 	/* FIXME: for this to work, all the CPUs must be running
 	 * "idle" thread (or we deadlock). Is that guaranteed? */
diff --git a/kernel/sched.c b/kernel/sched.c
index f12a0c8a7d98..deca041fc364 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3814,7 +3814,7 @@ EXPORT_SYMBOL(yield);
  */
 void __sched io_schedule(void)
 {
-	struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());
+	struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
 
 	atomic_inc(&rq->nr_iowait);
 	schedule();
@@ -3825,7 +3825,7 @@ EXPORT_SYMBOL(io_schedule);
 
 long __sched io_schedule_timeout(long timeout)
 {
-	struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());
+	struct runqueue *rq = &per_cpu(runqueues, raw_smp_processor_id());
 	long ret;
 
 	atomic_inc(&rq->nr_iowait);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 6116b25aa7cf..84a9d18aa8da 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -100,7 +100,7 @@ static int stop_machine(void)
 	stopmachine_state = STOPMACHINE_WAIT;
 
 	for_each_online_cpu(i) {
-		if (i == _smp_processor_id())
+		if (i == raw_smp_processor_id())
 			continue;
 		ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
 		if (ret < 0)
@@ -182,7 +182,7 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
 
 	/* If they don't care which CPU fn runs on, bind to any online one. */
 	if (cpu == NR_CPUS)
-		cpu = _smp_processor_id();
+		cpu = raw_smp_processor_id();
 
 	p = kthread_create(do_stop, &smdata, "kstopmachine");
 	if (!IS_ERR(p)) {
diff --git a/lib/Makefile b/lib/Makefile
index 9eccea9429a7..5f10cb898407 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -20,6 +20,7 @@ lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
 obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
+obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 
 ifneq ($(CONFIG_HAVE_DEC_LOCK),y) 
   lib-y += dec_and_lock.o
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 99b0ae3d51dd..bd2bc5d887b8 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -9,61 +9,6 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 
-#if defined(CONFIG_PREEMPT) && defined(__smp_processor_id) && \
-		defined(CONFIG_DEBUG_PREEMPT)
-
-/*
- * Debugging check.
- */
-unsigned int smp_processor_id(void)
-{
-	unsigned long preempt_count = preempt_count();
-	int this_cpu = __smp_processor_id();
-	cpumask_t this_mask;
-
-	if (likely(preempt_count))
-		goto out;
-
-	if (irqs_disabled())
-		goto out;
-
-	/*
-	 * Kernel threads bound to a single CPU can safely use
-	 * smp_processor_id():
-	 */
-	this_mask = cpumask_of_cpu(this_cpu);
-
-	if (cpus_equal(current->cpus_allowed, this_mask))
-		goto out;
-
-	/*
-	 * It is valid to assume CPU-locality during early bootup:
-	 */
-	if (system_state != SYSTEM_RUNNING)
-		goto out;
-
-	/*
-	 * Avoid recursion:
-	 */
-	preempt_disable();
-
-	if (!printk_ratelimit())
-		goto out_enable;
-
-	printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] code: %s/%d\n", preempt_count(), current->comm, current->pid);
-	print_symbol("caller is %s\n", (long)__builtin_return_address(0));
-	dump_stack();
-
-out_enable:
-	preempt_enable_no_resched();
-out:
-	return this_cpu;
-}
-
-EXPORT_SYMBOL(smp_processor_id);
-
-#endif /* PREEMPT && __smp_processor_id && DEBUG_PREEMPT */
-
 #ifdef CONFIG_PREEMPT_BKL
 /*
  * The 'big kernel semaphore'
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
new file mode 100644
index 000000000000..42c08ef828c5
--- /dev/null
+++ b/lib/smp_processor_id.c
@@ -0,0 +1,55 @@
+/*
+ * lib/smp_processor_id.c
+ *
+ * DEBUG_PREEMPT variant of smp_processor_id().
+ */
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+
+unsigned int debug_smp_processor_id(void)
+{
+	unsigned long preempt_count = preempt_count();
+	int this_cpu = raw_smp_processor_id();
+	cpumask_t this_mask;
+
+	if (likely(preempt_count))
+		goto out;
+
+	if (irqs_disabled())
+		goto out;
+
+	/*
+	 * Kernel threads bound to a single CPU can safely use
+	 * smp_processor_id():
+	 */
+	this_mask = cpumask_of_cpu(this_cpu);
+
+	if (cpus_equal(current->cpus_allowed, this_mask))
+		goto out;
+
+	/*
+	 * It is valid to assume CPU-locality during early bootup:
+	 */
+	if (system_state != SYSTEM_RUNNING)
+		goto out;
+
+	/*
+	 * Avoid recursion:
+	 */
+	preempt_disable();
+
+	if (!printk_ratelimit())
+		goto out_enable;
+
+	printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] code: %s/%d\n", preempt_count(), current->comm, current->pid);
+	print_symbol("caller is %s\n", (long)__builtin_return_address(0));
+	dump_stack();
+
+out_enable:
+	preempt_enable_no_resched();
+out:
+	return this_cpu;
+}
+
+EXPORT_SYMBOL(debug_smp_processor_id);
+