Patches contributed by Eötvös Lorand University

<<Prev 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318[319]320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 Next>>

commit 8018c27b26af56af18eb8b2dc600eba825792d8f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Dec 29 16:50:01 2006 -0800

    [PATCH] kvm: fix GFP_KERNEL allocation in atomic section in kvm_dev_ioctl_create_vcpu()
    
    fix an GFP_KERNEL allocation in atomic section: kvm_dev_ioctl_create_vcpu()
    called kvm_mmu_init(), which calls alloc_pages(), while holding the vcpu.
    
    The fix is to set up the MMU state in two phases: kvm_mmu_create() and
    kvm_mmu_setup().
    
    (NOTE: free_vcpus does an kvm_mmu_destroy() call so there's no need for any
    extra teardown branch on allocation/init failure here.)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Cc: Avi Kivity <avi@qumranet.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 2670219a9264..100df6f38d92 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -319,7 +319,8 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module);
 void kvm_exit_arch(void);
 
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
-int kvm_mmu_init(struct kvm_vcpu *vcpu);
+int kvm_mmu_create(struct kvm_vcpu *vcpu);
+int kvm_mmu_setup(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 973544553cba..ce7fe640f18d 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -522,12 +522,14 @@ static int kvm_dev_ioctl_create_vcpu(struct kvm *kvm, int n)
 	if (r < 0)
 		goto out_free_vcpus;
 
-	kvm_arch_ops->vcpu_load(vcpu);
+	r = kvm_mmu_create(vcpu);
+	if (r < 0)
+		goto out_free_vcpus;
 
-	r = kvm_arch_ops->vcpu_setup(vcpu);
+	kvm_arch_ops->vcpu_load(vcpu);
+	r = kvm_mmu_setup(vcpu);
 	if (r >= 0)
-		r = kvm_mmu_init(vcpu);
-
+		r = kvm_arch_ops->vcpu_setup(vcpu);
 	vcpu_put(vcpu);
 
 	if (r < 0)
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 85887fcd584f..790423c5f23d 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -639,28 +639,22 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 	return -ENOMEM;
 }
 
-int kvm_mmu_init(struct kvm_vcpu *vcpu)
+int kvm_mmu_create(struct kvm_vcpu *vcpu)
 {
-	int r;
-
 	ASSERT(vcpu);
 	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
 	ASSERT(list_empty(&vcpu->free_pages));
 
-	r = alloc_mmu_pages(vcpu);
-	if (r)
-		goto out;
-
-	r = init_kvm_mmu(vcpu);
-	if (r)
-		goto out_free_pages;
+	return alloc_mmu_pages(vcpu);
+}
 
-	return 0;
+int kvm_mmu_setup(struct kvm_vcpu *vcpu)
+{
+	ASSERT(vcpu);
+	ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
+	ASSERT(!list_empty(&vcpu->free_pages));
 
-out_free_pages:
-	free_mmu_pages(vcpu);
-out:
-	return r;
+	return init_kvm_mmu(vcpu);
 }
 
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu)

commit 9414232fa0cc28e2f51b8c76d260f2748f7953fc
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Dec 29 16:48:13 2006 -0800

    [PATCH] sched: fix cond_resched_softirq() offset
    
    Remove the __resched_legal() check: it is conceptually broken.  The biggest
    problem it had is that it can mask buggy cond_resched() calls.  A
    cond_resched() call is only legal if we are not in an atomic context, with
    two narrow exceptions:
    
     - if the system is booting
     - a reacquire_kernel_lock() down() done while PREEMPT_ACTIVE is set
    
    But __resched_legal() hid this and just silently returned whenever
    these primitives were called from invalid contexts. (Same goes for
    cond_resched_locked() and cond_resched_softirq()).
    
    Furthermore, the __legal_resched(0) call was buggy in that it caused
    unnecessarily long softirq latencies via cond_resched_softirq().  (which is
    only called from softirq-off sections, hence the code did nothing.)
    
    The fix is to resurrect the efficiency of the might_sleep checks and to
    only allow the narrow exceptions.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/sched.c b/kernel/sched.c
index b515e3caad7f..3df33da0dafc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4617,17 +4617,6 @@ asmlinkage long sys_sched_yield(void)
 	return 0;
 }
 
-static inline int __resched_legal(int expected_preempt_count)
-{
-#ifdef CONFIG_PREEMPT
-	if (unlikely(preempt_count() != expected_preempt_count))
-		return 0;
-#endif
-	if (unlikely(system_state != SYSTEM_RUNNING))
-		return 0;
-	return 1;
-}
-
 static void __cond_resched(void)
 {
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -4647,7 +4636,8 @@ static void __cond_resched(void)
 
 int __sched cond_resched(void)
 {
-	if (need_resched() && __resched_legal(0)) {
+	if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
+					system_state == SYSTEM_RUNNING) {
 		__cond_resched();
 		return 1;
 	}
@@ -4673,7 +4663,7 @@ int cond_resched_lock(spinlock_t *lock)
 		ret = 1;
 		spin_lock(lock);
 	}
-	if (need_resched() && __resched_legal(1)) {
+	if (need_resched() && system_state == SYSTEM_RUNNING) {
 		spin_release(&lock->dep_map, 1, _THIS_IP_);
 		_raw_spin_unlock(lock);
 		preempt_enable_no_resched();
@@ -4689,7 +4679,7 @@ int __sched cond_resched_softirq(void)
 {
 	BUG_ON(!in_softirq());
 
-	if (need_resched() && __resched_legal(0)) {
+	if (need_resched() && system_state == SYSTEM_RUNNING) {
 		raw_local_irq_disable();
 		_local_bh_enable();
 		raw_local_irq_enable();

commit e4e6bdbb426d1ecd9e4587f22115f8d0d426d21f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Dec 29 16:47:14 2006 -0800

    [PATCH] rcu: rcutorture suspend fix
    
    Fix suspend hang: rcutorture threads need to be nofreeze.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index c52f981ea008..482b11ff65cb 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -522,6 +522,7 @@ rcu_torture_writer(void *arg)
 
 	VERBOSE_PRINTK_STRING("rcu_torture_writer task started");
 	set_user_nice(current, 19);
+	current->flags |= PF_NOFREEZE;
 
 	do {
 		schedule_timeout_uninterruptible(1);
@@ -561,6 +562,7 @@ rcu_torture_fakewriter(void *arg)
 
 	VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started");
 	set_user_nice(current, 19);
+	current->flags |= PF_NOFREEZE;
 
 	do {
 		schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
@@ -591,6 +593,7 @@ rcu_torture_reader(void *arg)
 
 	VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
 	set_user_nice(current, 19);
+	current->flags |= PF_NOFREEZE;
 
 	do {
 		idx = cur_ops->readlock();

commit 52e88f5d4a6b06f3a945728dd3bc403632afe069
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Dec 29 16:47:10 2006 -0800

    [PATCH] change WARN_ON back to "BUG: at ..."
    
    WARN_ON() ever triggering is a kernel bug.  Do not try to paper over this
    fact by suggesting to the user that this is 'only' a warning, as the
    following recent commit does:
    
      commit 30e25b71e725b150585e17888b130e3324f8cf7c
      Author: Jeremy Fitzhardinge <jeremy@goop.org>
      Date:   Fri Dec 8 02:36:24 2006 -0800
    
        [PATCH] Fix generic WARN_ON message
    
        A warning is a warning, not a BUG.
    
    ( it might make sense to rename BUG() to CRASH() and BUG_ON() to
      CRASH_ON(), but that does not change the fact that WARN_ON()
      signals a kernel bug. )
    
    i and others objected to this change during lkml review:
    
      http://marc.theaimsgroup.com/?l=linux-kernel&m=116115160710533&w=2
    
    still the change slipped upstream - grumble :)
    
    Also, use the standard "BUG: " format to make it easier to grep logs and
    to make it easier to google for kernel bugs.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Cc: Jeremy Fitzhardinge <jeremy@goop.org>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index a06eecd48292..14fae1fa87df 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -35,7 +35,7 @@ struct bug_entry {
 #define WARN_ON(condition) ({						\
 	typeof(condition) __ret_warn_on = (condition);			\
 	if (unlikely(__ret_warn_on)) {					\
-		printk("WARNING at %s:%d %s()\n", __FILE__,	\
+		printk("BUG: at %s:%d %s()\n", __FILE__,		\
 			__LINE__, __FUNCTION__);			\
 		dump_stack();						\
 	}								\

commit e1d9fd2e3d33b2fec3207171ec8ca6e71d5c81c7
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Dec 23 16:55:29 2006 +0100

    [PATCH] suspend: fix suspend on single-CPU systems
    
    Clark Williams reported that suspend doesnt work on his laptop on
    2.6.20-rc1-rt kernels. The bug was introduced by the following cleanup
    commit:
    
     commit 112cecb2cc0e7341db92281ba04b26c41bb8146d
     Author: Siddha, Suresh B <suresh.b.siddha@intel.com>
     Date:   Wed Dec 6 20:34:31 2006 -0800
    
        [PATCH] suspend: don't change cpus_allowed for task initiating the suspend
    
    because with this change 'error' is not initialized to 0 anymore, if
    there are no other online CPUs. (i.e. if the system is single-CPU).
    
    the fix is the initialize it to 0. The really weird thing is that my
    version of gcc does not warn about this non-initialized variable
    situation ...
    
    (also fix the kernel printk in the error branch, it was missing a
     newline)
    
    Reported-by: Clark Williams <williams@redhat.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9124669f4586..241064a32241 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -258,7 +258,7 @@ static cpumask_t frozen_cpus;
 
 int disable_nonboot_cpus(void)
 {
-	int cpu, first_cpu, error;
+	int cpu, first_cpu, error = 0;
 
 	mutex_lock(&cpu_add_remove_lock);
 	first_cpu = first_cpu(cpu_present_map);
@@ -294,7 +294,7 @@ int disable_nonboot_cpus(void)
 		/* Make sure the CPUs won't be enabled by someone else */
 		cpu_hotplug_disabled = 1;
 	} else {
-		printk(KERN_ERR "Non-boot CPUs are not disabled");
+		printk(KERN_ERR "Non-boot CPUs are not disabled\n");
 	}
 out:
 	mutex_unlock(&cpu_add_remove_lock);

commit 0888f06ac99f993df2bb4c479f5b9306dafe154f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Dec 22 01:11:56 2006 -0800

    [PATCH] sched: fix bad missed wakeups in the i386, x86_64, ia64, ACPI and APM idle code
    
    Fernando Lopez-Lezcano reported frequent scheduling latencies and audio
    xruns starting at the 2.6.18-rt kernel, and those problems persisted all
    until current -rt kernels. The latencies were serious and unjustified by
    system load, often in the milliseconds range.
    
    After a patient and heroic multi-month effort of Fernando, where he
    tested dozens of kernels, tried various configs, boot options,
    test-patches of mine and provided latency traces of those incidents, the
    following 'smoking gun' trace was captured by him:
    
                     _------=> CPU#
                    / _-----=> irqs-off
                   | / _----=> need-resched
                   || / _---=> hardirq/softirq
                   ||| / _--=> preempt-depth
                   |||| /
                   |||||     delay
       cmd     pid ||||| time  |   caller
          \   /    |||||   \   |   /
      IRQ_19-1479  1D..1    0us : __trace_start_sched_wakeup (try_to_wake_up)
      IRQ_19-1479  1D..1    0us : __trace_start_sched_wakeup <<...>-5856> (37 0)
      IRQ_19-1479  1D..1    0us : __trace_start_sched_wakeup (c01262ba 0 0)
      IRQ_19-1479  1D..1    0us : resched_task (try_to_wake_up)
      IRQ_19-1479  1D..1    0us : __spin_unlock_irqrestore (try_to_wake_up)
      ...
      <idle>-0     1...1   11us!: default_idle (cpu_idle)
      ...
      <idle>-0     0Dn.1  602us : smp_apic_timer_interrupt (c0103baf 1 0)
      ...
       <...>-5856  0D..2  618us : __switch_to (__schedule)
       <...>-5856  0D..2  618us : __schedule <<idle>-0> (20 162)
       <...>-5856  0D..2  619us : __spin_unlock_irq (__schedule)
       <...>-5856  0...1  619us : trace_stop_sched_switched (__schedule)
       <...>-5856  0D..1  619us : trace_stop_sched_switched <<...>-5856> (37 0)
    
    what is visible in this trace is that CPU#1 ran try_to_wake_up() for
    PID:5856, it placed PID:5856 on CPU#0's runqueue and ran resched_task()
    for CPU#0. But it decided to not send an IPI that no CPU - due to
    TS_POLLING. But CPU#0 never woke up after its NEED_RESCHED bit was set,
    and only rescheduled to PID:5856 upon the next lapic timer IRQ. The
    result was a 600+ usecs latency and a missed wakeup!
    
    the bug turned out to be an idle-wakeup bug introduced into the mainline
    kernel this summer via an optimization in the x86_64 tree:
    
        commit 495ab9c045e1b0e5c82951b762257fe1c9d81564
        Author: Andi Kleen <ak@suse.de>
        Date:   Mon Jun 26 13:59:11 2006 +0200
    
        [PATCH] i386/x86-64/ia64: Move polling flag into thread_info_status
    
        During some profiling I noticed that default_idle causes a lot of
        memory traffic. I think that is caused by the atomic operations
        to clear/set the polling flag in thread_info. There is actually
        no reason to make this atomic - only the idle thread does it
        to itself, other CPUs only read it. So I moved it into ti->status.
    
    the problem is this type of change:
    
            if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
    -               clear_thread_flag(TIF_POLLING_NRFLAG);
    +               current_thread_info()->status &= ~TS_POLLING;
                    smp_mb__after_clear_bit();
                    while (!need_resched()) {
                            local_irq_disable();
    
    this changes clear_thread_flag() to an explicit clearing of TS_POLLING.
    clear_thread_flag() is defined as:
    
            clear_bit(flag, &ti->flags);
    
    and clear_bit() is a LOCK-ed atomic instruction on all x86 platforms:
    
      static inline void clear_bit(int nr, volatile unsigned long * addr)
      {
              __asm__ __volatile__( LOCK_PREFIX
                      "btrl %1,%0"
    
    hence smp_mb__after_clear_bit() is defined as a simple compile barrier:
    
      #define smp_mb__after_clear_bit()       barrier()
    
    but the explicit TS_POLLING clearing introduced by the patch:
    
    +               current_thread_info()->status &= ~TS_POLLING;
    
    is not an atomic op! So the clearing of the TS_POLLING bit is freely
    reorderable with the reading of the NEED_RESCHED bit - and both now
    reside in different memory addresses.
    
    CPU idle wakeup very much depends on ordered memory ops, the clearing of
    the TS_POLLING flag must always be done before we test need_resched()
    and hit the idle instruction(s). [Symmetrically, the wakeup code needs
    to set NEED_RESCHED before it tests the TS_POLLING flag, so memory
    ordering is paramount.]
    
    Fernando's dual-core Athlon64 system has a sufficiently advanced memory
    ordering model so that it triggered this scenario very often.
    
    ( And it also turned out that the reason why these latencies never
      triggered on my testsystems is that i routinely use idle=poll, which
      was the only idle variant not affected by this bug. )
    
    The fix is to change the smp_mb__after_clear_bit() to an smp_mb(), to
    act as an absolute barrier between the TS_POLLING write and the
    NEED_RESCHED read. This affects almost all idling methods (default,
    ACPI, APM), on all 3 x86 architectures: i386, x86_64, ia64.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Tested-by: Fernando Lopez-Lezcano <nando@ccrma.Stanford.EDU>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index b75cff25de4b..199016927541 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -785,7 +785,11 @@ static int apm_do_idle(void)
 	polling = !!(current_thread_info()->status & TS_POLLING);
 	if (polling) {
 		current_thread_info()->status &= ~TS_POLLING;
-		smp_mb__after_clear_bit();
+		/*
+		 * TS_POLLING-cleared state must be visible before we
+		 * test NEED_RESCHED:
+		 */
+		smp_mb();
 	}
 	if (!need_resched()) {
 		idled = 1;
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 99308510a17c..c641056233a6 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -102,7 +102,12 @@ void default_idle(void)
 {
 	if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
 		current_thread_info()->status &= ~TS_POLLING;
-		smp_mb__after_clear_bit();
+		/*
+		 * TS_POLLING-cleared state must be visible before we
+		 * test NEED_RESCHED:
+		 */
+		smp_mb();
+
 		local_irq_disable();
 		if (!need_resched())
 			safe_halt();	/* enables interrupts racelessly */
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 51922b98086a..17685abaf496 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -268,10 +268,16 @@ cpu_idle (void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		if (can_do_pal_halt)
+		if (can_do_pal_halt) {
 			current_thread_info()->status &= ~TS_POLLING;
-		else
+			/*
+			 * TS_POLLING-cleared state must be visible before we
+			 * test NEED_RESCHED:
+			 */
+			smp_mb();
+		} else {
 			current_thread_info()->status |= TS_POLLING;
+		}
 
 		if (!need_resched()) {
 			void (*idle)(void);
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index a418ee4c8c62..cbbc6adc1a92 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -109,7 +109,11 @@ void exit_idle(void)
 static void default_idle(void)
 {
 	current_thread_info()->status &= ~TS_POLLING;
-	smp_mb__after_clear_bit();
+	/*
+	 * TS_POLLING-cleared state must be visible before we
+	 * test NEED_RESCHED:
+	 */
+	smp_mb();
 	local_irq_disable();
 	if (!need_resched()) {
 		/* Enables interrupts one instruction before HLT.
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 65b3f056ad89..6dac6050bb5a 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -211,7 +211,11 @@ acpi_processor_power_activate(struct acpi_processor *pr,
 static void acpi_safe_halt(void)
 {
 	current_thread_info()->status &= ~TS_POLLING;
-	smp_mb__after_clear_bit();
+	/*
+	 * TS_POLLING-cleared state must be visible before we
+	 * test NEED_RESCHED:
+	 */
+	smp_mb();
 	if (!need_resched())
 		safe_halt();
 	current_thread_info()->status |= TS_POLLING;
@@ -345,7 +349,11 @@ static void acpi_processor_idle(void)
 	 */
 	if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) {
 		current_thread_info()->status &= ~TS_POLLING;
-		smp_mb__after_clear_bit();
+		/*
+		 * TS_POLLING-cleared state must be visible before we
+		 * test NEED_RESCHED:
+		 */
+		smp_mb();
 		if (need_resched()) {
 			current_thread_info()->status |= TS_POLLING;
 			local_irq_enable();

commit 9127d4b1d9b2e8fba8e7fbc7f88ea93e5eb01396
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Dec 22 01:08:52 2006 -0800

    [PATCH] lock debugging: fix DEBUG_LOCKS_WARN_ON() & debug_locks_silent
    
    Matthew Wilcox noticed that the debug_locks_silent use should be inverted
    in DEBUG_LOCKS_WARN_ON().  This bug was causing spurious stacktraces and
    incorrect failures in the locking self-test on the parisc kernel.
    
    Bug-found-by: Matthew Wilcox <matthew@wil.cx>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index a1c10b0c4cf0..1678a5de7013 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -24,7 +24,7 @@ extern int debug_locks_off(void);
 	int __ret = 0;							\
 									\
 	if (unlikely(c)) {						\
-		if (debug_locks_silent || debug_locks_off())		\
+		if (debug_locks_off() && !debug_locks_silent)		\
 			WARN_ON(1);					\
 		__ret = 1;						\
 	}								\

commit 9bfb18392ef586467277fa25d8f3a7a93611f6df
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Dec 18 20:05:09 2006 +0100

    [PATCH] workqueue: fix schedule_on_each_cpu()
    
    fix the schedule_on_each_cpu() implementation: __queue_work() is now
    stricter, hence set the work-pending bit before passing in the new work.
    
    (found in the -rt tree, using Peter Zijlstra's files-lock scalability
    patchset)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 742cbbe49bdc..180a8ce11535 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -637,9 +637,11 @@ int schedule_on_each_cpu(work_func_t func)
 
 	mutex_lock(&workqueue_mutex);
 	for_each_online_cpu(cpu) {
-		INIT_WORK(per_cpu_ptr(works, cpu), func);
-		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
-				per_cpu_ptr(works, cpu));
+		struct work_struct *work = per_cpu_ptr(works, cpu);
+
+		INIT_WORK(work, func);
+		set_bit(WORK_STRUCT_PENDING, work_data_bits(work));
+		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), work);
 	}
 	mutex_unlock(&workqueue_mutex);
 	flush_workqueue(keventd_wq);

commit 136f1e7a8cb7d17ff91706518549697071640ae4
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Dec 20 11:53:32 2006 +0100

    [PATCH] x86_64: fix boot time hang in detect_calgary()
    
    if CONFIG_CALGARY_IOMMU is built into the kernel via
    CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT, or is enabled via the
    iommu=calgary boot option, then the detect_calgary() function runs to
    detect the presence of a Calgary IOMMU.
    
    detect_calgary() first searches the BIOS EBDA area for a "rio_table_hdr"
    BIOS table. It has this parsing algorithm for the EBDA:
    
            while (offset) {
                    ...
                    /* The next offset is stored in the 1st word. 0 means no more */
                    offset = *((unsigned short *)(ptr + offset));
            }
    
    got that? Lets repeat it slowly: we've got a BIOS-supplied data
    structure, plus Linux kernel code that will only break out of an
    infinite parsing loop once the BIOS gives a zero offset. Ok?
    
    Translation: what an excellent opportunity for BIOS writers to lock up
    the Linux boot process in an utterly hard to debug place! Indeed the
    BIOS jumped on that opportunity on my box, which has the following EBDA
    chaining layout:
    
      384, 65282, 65535, 65535, 65535, 65535, 65535, 65535 ...
    
    see the pattern? So my, definitely non-Calgary system happily locks up
    in detect_calgary()!
    
    the patch below fixes the boot hang by trusting the BIOS-supplied data
    structure a bit less: the parser always has to make forward progress,
    and if it doesnt, we break out of the loop and i get the expected kernel
    message:
    
      Calgary: Unable to locate Rio Grande Table in EBDA - bailing!
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Acked-by: Muli Ben-Yehuda <muli@il.ibm.com>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 3215675ab128..87d90cb68a74 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -1052,7 +1052,7 @@ void __init detect_calgary(void)
 	void *tbl;
 	int calgary_found = 0;
 	unsigned long ptr;
-	int offset;
+	unsigned int offset, prev_offset;
 	int ret;
 
 	/*
@@ -1071,15 +1071,20 @@ void __init detect_calgary(void)
 	ptr = (unsigned long)phys_to_virt(get_bios_ebda());
 
 	rio_table_hdr = NULL;
+	prev_offset = 0;
 	offset = 0x180;
-	while (offset) {
+	/*
+	 * The next offset is stored in the 1st word.
+	 * Only parse up until the offset increases:
+	 */
+	while (offset > prev_offset) {
 		/* The block id is stored in the 2nd word */
 		if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
 			/* set the pointer past the offset & block id */
 			rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
 			break;
 		}
-		/* The next offset is stored in the 1st word. 0 means no more */
+		prev_offset = offset;
 		offset = *((unsigned short *)(ptr + offset));
 	}
 	if (!rio_table_hdr) {

commit a9622f6219ce58faba1417743bf3078501eb3434
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Dec 20 11:28:46 2006 +0100

    [PATCH] x86_64: fix boot hang caused by CALGARY_IOMMU_ENABLED_BY_DEFAULT
    
    one of my boxes didnt boot the 2.6.20-rc1-rt0 kernel rpm, it hung during
    early bootup. After an hour or two of happy debugging i narrowed it down
    to the CALGARY_IOMMU_ENABLED_BY_DEFAULT option, which was freshly added
    to 2.6.20 via the x86_64 tree and /enabled by default/.
    
    commit bff6547bb6a4e82c399d74e7fba78b12d2f162ed claims:
    
        [PATCH] Calgary: allow compiling Calgary in but not using it by default
    
        This patch makes it possible to compile Calgary in but not use it by
        default. In this mode, use 'iommu=calgary' to activate it.
    
    but the change does not actually practice it:
    
     config CALGARY_IOMMU_ENABLED_BY_DEFAULT
            bool "Should Calgary be enabled by default?"
            default y
            depends on CALGARY_IOMMU
            help
              Should Calgary be enabled by default? if you choose 'y', Calgary
              will be used (if it exists). If you choose 'n', Calgary will not be
              used even if it exists. If you choose 'n' and would like to use
              Calgary anyway, pass 'iommu=calgary' on the kernel command line.
              If unsure, say Y.
    
    it's both 'default y', and says "If unsure, say Y". Clearly not a typo.
    
    disabling this option makes my box boot again. The patch below fixes the
    Kconfig entry. Grumble.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index d4275537b25b..ef6672455695 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -480,14 +480,13 @@ config CALGARY_IOMMU
 
 config CALGARY_IOMMU_ENABLED_BY_DEFAULT
 	bool "Should Calgary be enabled by default?"
-	default y
 	depends on CALGARY_IOMMU
 	help
-	  Should Calgary be enabled by default? if you choose 'y', Calgary
+	  Should Calgary be enabled by default? If you choose 'y', Calgary
 	  will be used (if it exists). If you choose 'n', Calgary will not be
 	  used even if it exists. If you choose 'n' and would like to use
 	  Calgary anyway, pass 'iommu=calgary' on the kernel command line.
-	  If unsure, say Y.
+	  If unsure, say N.
 
 # need this always selected by IOMMU for the VIA workaround
 config SWIOTLB