Patches contributed by Eötvös Lorand University


commit f18d397e6aa5cde638d164b1d519c3ee903f4867
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed May 2 19:27:04 2007 +0200

    [PATCH] x86-64: optimize & fix APIC mode setup
    
    Fix a couple of inconsistencies/problems I found while reviewing the x86_64
    genapic code (when I was chasing mysterious eth0 timeouts that would only
    trigger if CPU_HOTPLUG is enabled):
    
     - AMD systems defaulted to the slower flat-physical mode instead
       of the flat-logical mode. The only restriction on AMD systems
       is that they should not use clustered APIC mode.
    
     - removed the CPU hotplug hacks, switching the default for small
       systems back from phys-flat to logical-flat. The switching to logical
       flat mode on small systems fixed sporadic ethernet driver timeouts i
       was getting on a dual-core Athlon64 system:
    
        NETDEV WATCHDOG: eth0: transmit timed out
        eth0: Transmit timeout, status 0c 0005 c07f media 80.
        eth0: Tx queue start entry 32  dirty entry 28.
        eth0:  Tx descriptor 0 is 0008a04a. (queue head)
        eth0:  Tx descriptor 1 is 0008a04a.
        eth0:  Tx descriptor 2 is 0008a04a.
        eth0:  Tx descriptor 3 is 0008a04a.
        eth0: link up, 100Mbps, full-duplex, lpa 0xC5E1
    
     - The use of '<= 8' was a bug by itself (the valid APIC ids
       for logical flat mode go from 0 to 7, not 0 to 8). The new logic
       is to use logical flat mode on both AMD and Intel systems, and
       to only switch to physical mode when logical mode cannot be used.
       If CPU hotplug is racy wrt. APIC shutdown then CPU hotplug needs
       fixing, not the whole IRQ system be made inconsistent and slowed
       down.
    
     - minor cleanups: simplified some code constructs
    
    build & booted on a couple of AMD and Intel SMP systems.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andi Kleen <ak@suse.de>
    Cc: Suresh Siddha <suresh.b.siddha@intel.com>
    Cc: Andi Kleen <ak@suse.de>
    Cc: "Li, Shaohua" <shaohua.li@intel.com>
    Cc: "Eric W. Biederman" <ebiederm@xmission.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>

diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 7312ddb84fb4..2f2b8fc6e2f3 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -32,21 +32,20 @@ extern struct genapic apic_cluster;
 extern struct genapic apic_flat;
 extern struct genapic apic_physflat;
 
-struct genapic *genapic = &apic_flat;
-
+struct genapic __read_mostly *genapic = &apic_flat;
 
 /*
  * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
  */
 void __init clustered_apic_check(void)
 {
-	long i;
+	int i;
 	u8 clusters, max_cluster;
 	u8 id;
 	u8 cluster_cnt[NUM_APIC_CLUSTERS];
 	int max_apic = 0;
 
-#if defined(CONFIG_ACPI)
+#ifdef CONFIG_ACPI
 	/*
 	 * Some x86_64 machines use physical APIC mode regardless of how many
 	 * procs/clusters are present (x86_64 ES7000 is an example).
@@ -68,20 +67,17 @@ void __init clustered_apic_check(void)
 		cluster_cnt[APIC_CLUSTERID(id)]++;
 	}
 
-	/* Don't use clustered mode on AMD platforms. */
+	/*
+	 * Don't use clustered mode on AMD platforms, default
+	 * to flat logical mode.
+	 */
  	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
-		genapic = &apic_physflat;
-#ifndef CONFIG_HOTPLUG_CPU
-		/* In the CPU hotplug case we cannot use broadcast mode
-		   because that opens a race when a CPU is removed.
-		   Stay at physflat mode in this case.
-		   It is bad to do this unconditionally though. Once
-		   we have ACPI platform support for CPU hotplug
-		   we should detect hotplug capablity from ACPI tables and
-		   only do this when really needed. -AK */
-		if (max_apic <= 8)
-			genapic = &apic_flat;
-#endif
+		/*
+		 * Switch to physical flat mode if more than 8 APICs
+		 * (In the case of 8 CPUs APIC ID goes from 0 to 7):
+		 */
+		if (max_apic >= 8)
+			genapic = &apic_physflat;
  		goto print;
  	}
 
@@ -103,14 +99,9 @@ void __init clustered_apic_check(void)
 	 * (We don't use lowest priority delivery + HW APIC IRQ steering, so
 	 * can ignore the clustered logical case and go straight to physical.)
 	 */
-	if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) {
-#ifdef CONFIG_HOTPLUG_CPU
-		/* Don't use APIC shortcuts in CPU hotplug to avoid races */
-		genapic = &apic_physflat;
-#else
+	if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster)
 		genapic = &apic_flat;
-#endif
-	} else
+	else
 		genapic = &apic_cluster;
 
 print:

commit 39bc89fd4019b164002adaacef92c4140e37955a
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Apr 25 20:50:03 2007 -0700

    make SysRq-T show all tasks again
    
    show_state() (SysRq-T) developed the buggy habbit of not showing
    TASK_RUNNING tasks.  This was due to the mistaken belief that state_filter
    == -1 would be a pass-through filter - while in reality it did not let
    TASK_RUNNING == 0 p->state values through.
    
    Fix this by restoring the original '!state_filter means all tasks'
    special-case i had in the original version.  Test-built and test-booted on
    i686, SysRq-T now works as intended.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 49fe2997a016..a1707583de49 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -196,13 +196,13 @@ extern void init_idle(struct task_struct *idle, int cpu);
 extern cpumask_t nohz_cpu_mask;
 
 /*
- * Only dump TASK_* tasks. (-1 for all tasks)
+ * Only dump TASK_* tasks. (0 for all tasks)
  */
 extern void show_state_filter(unsigned long state_filter);
 
 static inline void show_state(void)
 {
-	show_state_filter(-1);
+	show_state_filter(0);
 }
 
 extern void show_regs(struct pt_regs *);
diff --git a/kernel/sched.c b/kernel/sched.c
index b9a683730148..960d7c5fca39 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4746,7 +4746,7 @@ void show_state_filter(unsigned long state_filter)
 		 * console might take alot of time:
 		 */
 		touch_nmi_watchdog();
-		if (p->state & state_filter)
+		if (!state_filter || (p->state & state_filter))
 			show_task(p);
 	} while_each_thread(g, p);
 

commit 35f6f753b79705bc4b62da5dcc218d75ffa88370
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Apr 6 21:18:06 2007 +0200

    [PATCH] sched: get rid of p->children use in show_task()
    
    the p->parent PID printout gives us all the information about the
    task tree that we need - the eldest_child()/older_sibling()/
    younger_sibling() printouts are mostly historic and i do not
    remember ever having used those fields. (IMO in fact they confuse
    the SysRq-T output.) So remove them.
    
    This code has sentimental value though, those fields and
    printouts are one of the oldest ones still surviving from
    Linux v0.95's kernel/sched.c:
    
            if (p->p_ysptr || p->p_osptr)
                    printk("   Younger sib=%d, older sib=%d\n\r",
                            p->p_ysptr ? p->p_ysptr->pid : -1,
                            p->p_osptr ? p->p_osptr->pid : -1);
            else
                    printk("\n\r");
    
    written 15 years ago, in early 1992.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Linus 'snif' Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/sched.c b/kernel/sched.c
index a4ca632c477c..cdad3b04242a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4687,27 +4687,6 @@ long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
 	return retval;
 }
 
-static inline struct task_struct *eldest_child(struct task_struct *p)
-{
-	if (list_empty(&p->children))
-		return NULL;
-	return list_entry(p->children.next,struct task_struct,sibling);
-}
-
-static inline struct task_struct *older_sibling(struct task_struct *p)
-{
-	if (p->sibling.prev==&p->parent->children)
-		return NULL;
-	return list_entry(p->sibling.prev,struct task_struct,sibling);
-}
-
-static inline struct task_struct *younger_sibling(struct task_struct *p)
-{
-	if (p->sibling.next==&p->parent->children)
-		return NULL;
-	return list_entry(p->sibling.next,struct task_struct,sibling);
-}
-
 static const char stat_nam[] = "RSDTtZX";
 
 static void show_task(struct task_struct *p)
@@ -4738,19 +4717,7 @@ static void show_task(struct task_struct *p)
 		free = (unsigned long)n - (unsigned long)end_of_stack(p);
 	}
 #endif
-	printk("%5lu %5d %6d ", free, p->pid, p->parent->pid);
-	if ((relative = eldest_child(p)))
-		printk("%5d ", relative->pid);
-	else
-		printk("      ");
-	if ((relative = younger_sibling(p)))
-		printk("%7d", relative->pid);
-	else
-		printk("       ");
-	if ((relative = older_sibling(p)))
-		printk(" %5d", relative->pid);
-	else
-		printk("      ");
+	printk("%5lu %5d %6d", free, p->pid, p->parent->pid);
 	if (!p->mm)
 		printk(" (L-TLB)\n");
 	else

commit 995f054f2a342f8505fed4f8395d12c0f5966414
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Apr 7 12:05:00 2007 +0200

    [PATCH] high-res timers: resume fix
    
    Soeren Sonnenburg reported that upon resume he is getting
    this backtrace:
    
     [<c0119637>] smp_apic_timer_interrupt+0x57/0x90
     [<c0142d30>] retrigger_next_event+0x0/0xb0
     [<c0104d30>] apic_timer_interrupt+0x28/0x30
     [<c0142d30>] retrigger_next_event+0x0/0xb0
     [<c0140068>] __kfifo_put+0x8/0x90
     [<c0130fe5>] on_each_cpu+0x35/0x60
     [<c0143538>] clock_was_set+0x18/0x20
     [<c0135cdc>] timekeeping_resume+0x7c/0xa0
     [<c02aabe1>] __sysdev_resume+0x11/0x80
     [<c02ab0c7>] sysdev_resume+0x47/0x80
     [<c02b0b05>] device_power_up+0x5/0x10
    
    it turns out that on resume we mistakenly re-enable interrupts too
    early.  Do the timer retrigger only on the current CPU.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Acked-by: Thomas Gleixner <tglx@linutronix.de>
    Acked-by: Soeren Sonnenburg <kernel@nn7.de>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 5bdbc744e773..17c29dca8354 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -206,6 +206,7 @@ struct hrtimer_cpu_base {
 struct clock_event_device;
 
 extern void clock_was_set(void);
+extern void hres_timers_resume(void);
 extern void hrtimer_interrupt(struct clock_event_device *dev);
 
 /*
@@ -236,6 +237,8 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
  */
 static inline void clock_was_set(void) { }
 
+static inline void hres_timers_resume(void) { }
+
 /*
  * In non high resolution mode the time reference is taken from
  * the base softirq time variable.
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 067ba2c05328..b74860aaf5f1 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -458,6 +458,18 @@ void clock_was_set(void)
 	on_each_cpu(retrigger_next_event, NULL, 0, 1);
 }
 
+/*
+ * During resume we might have to reprogram the high resolution timer
+ * interrupt (on the local CPU):
+ */
+void hres_timers_resume(void)
+{
+	WARN_ON_ONCE(num_online_cpus() > 1);
+
+	/* Retrigger the CPU local events: */
+	retrigger_next_event(NULL);
+}
+
 /*
  * Check, whether the timer is on the callback pending list
  */
diff --git a/kernel/timer.c b/kernel/timer.c
index 440048acaea1..dd6c2c1c561b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1016,7 +1016,7 @@ static int timekeeping_resume(struct sys_device *dev)
 	clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
 
 	/* Resume hrtimers */
-	clock_was_set();
+	hres_timers_resume();
 
 	return 0;
 }

commit 935c631db827cc3a96df4dcc6fec374b994fdbd1
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Mar 28 13:17:18 2007 +0200

    [PATCH] hrtimers: fix reprogramming SMP race
    
    hrtimer_start() incorrectly set the 'reprogram' flag to enqueue_hrtimer(),
    which should only be 1 if the hrtimer is queued to the current CPU.
    
    Doing otherwise could result in a reprogramming of the current CPU's
    clockevents device, with a timer that is not queued to it - resulting in a
    bogus next expiry value.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Cc: Michal Piotrowski <michal.k.k.piotrowski@gmail.com>
    Acked-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 6a7938a0d513..067ba2c05328 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -814,7 +814,12 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
 
 	timer_stats_hrtimer_set_start_info(timer);
 
-	enqueue_hrtimer(timer, new_base, base == new_base);
+	/*
+	 * Only allow reprogramming if the new base is on this CPU.
+	 * (it might still be on another CPU if the timer was pending)
+	 */
+	enqueue_hrtimer(timer, new_base,
+			new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
 
 	unlock_hrtimer_base(timer, &flags);
 

commit 6d9658df07789a124e5c1f8677afcd7773439f3e
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Mar 11 13:52:33 2007 +0100

    KVM: always reload segment selectors
    
    failed VM entry on VMX might still change %fs or %gs, thus make sure
    that KVM always reloads the segment selectors. This is crutial on both
    x86 and x86_64: x86 has __KERNEL_PDA in %fs on which things like
    'current' depends and x86_64 has 0 there and needs MSR_GS_BASE to work.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 25b247199224..fbbf9d6b299f 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1888,6 +1888,27 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		[cr2]"i"(offsetof(struct kvm_vcpu, cr2))
 	      : "cc", "memory" );
 
+	/*
+	 * Reload segment selectors ASAP. (it's needed for a functional
+	 * kernel: x86 relies on having __KERNEL_PDA in %fs and x86_64
+	 * relies on having 0 in %gs for the CPU PDA to work.)
+	 */
+	if (fs_gs_ldt_reload_needed) {
+		load_ldt(ldt_sel);
+		load_fs(fs_sel);
+		/*
+		 * If we have to reload gs, we must take care to
+		 * preserve our gs base.
+		 */
+		local_irq_disable();
+		load_gs(gs_sel);
+#ifdef CONFIG_X86_64
+		wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
+#endif
+		local_irq_enable();
+
+		reload_tss();
+	}
 	++kvm_stat.exits;
 
 	save_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
@@ -1905,22 +1926,6 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR);
 		r = 0;
 	} else {
-		if (fs_gs_ldt_reload_needed) {
-			load_ldt(ldt_sel);
-			load_fs(fs_sel);
-			/*
-			 * If we have to reload gs, we must take care to
-			 * preserve our gs base.
-			 */
-			local_irq_disable();
-			load_gs(gs_sel);
-#ifdef CONFIG_X86_64
-			wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE));
-#endif
-			local_irq_enable();
-
-			reload_tss();
-		}
 		/*
 		 * Profile KVM exit RIPs:
 		 */

commit 4edc5db83f574dfcc8be35b7b96760ded543b360
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Mar 22 10:31:19 2007 +0100

    [PATCH] setup_boot_APIC_clock() irq-enable fix
    
    latest -git triggers an irqtrace/lockdep warning of a leaked
    irqs-off condition:
    
      BUG: at kernel/fork.c:1033 copy_process()
    
    after some debugging it turns out that commit ca1b940c accidentally left
    interrupts disabled - which trickled down all the way to the first time
    we fork a kernel thread and triggered the warning.
    
    the fix is to re-enable interrupts in the 'else' branch of
    setup_boot_APIC_clock()'s pmtimers calibration path.
    
    Reported-by: Michal Piotrowski <michal.k.k.piotrowski@gmail.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Acked-by: Thomas Gleixner <tglx@brown.paperbag.linutronix.de>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 36825117835d..244c3fe9b8c3 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -506,7 +506,8 @@ void __init setup_boot_APIC_clock(void)
 			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
 		else
 			local_apic_timer_verify_ok = 0;
-	}
+	} else
+		local_irq_enable();
 
 	if (!local_apic_timer_verify_ok) {
 		printk(KERN_WARNING

commit 21778867b1c8e0feb567addb6dc0a7e2ca6ecdec
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Mar 16 13:38:31 2007 -0800

    [PATCH] futex: PI state locking fix
    
    Testing of -rt by IBM uncovered a locking bug in wake_futex_pi(): the PI
    state needs to be locked before we access it.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Acked-by: Thomas Gleixner <tglx@linutronix.de>
    Cc: Chuck Ebbert <cebbert@redhat.com>
    Cc: <stable@kernel.org>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/kernel/futex.c b/kernel/futex.c
index e749e7df14b1..5a270b5e3f95 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -565,6 +565,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
 	if (!pi_state)
 		return -EINVAL;
 
+	spin_lock(&pi_state->pi_mutex.wait_lock);
 	new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
 
 	/*
@@ -604,6 +605,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
 	pi_state->owner = new_owner;
 	spin_unlock_irq(&new_owner->pi_lock);
 
+	spin_unlock(&pi_state->pi_mutex.wait_lock);
 	rt_mutex_unlock(&pi_state->pi_mutex);
 
 	return 0;

commit d04f41e35343f1d788551fd3f753f51794f4afcf
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Mar 7 18:12:31 2007 +0100

    [PATCH] CPU hotplug: call check_tsc_sync_source() with irqs off
    
    check_tsc_sync_source() depends on being called with irqs disabled (it
    checks whether the TSC is coherent across two specific CPUs). This is
    incidentally true during bootup, but not during cpu hotplug __cpu_up().
    This got found via smp_processor_id() debugging.
    
    disable irqs explicitly and remove the unconditional enabling of
    interrupts. Add touch_nmi_watchdog() to the cpu_online_map busy loop.
    
    this bug is present both on i386 and on x86_64.
    
    Reported-by: Michal Piotrowski <michal.k.k.piotrowski@gmail.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 9b0dd2744c82..4ff55e675576 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -45,6 +45,7 @@
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/percpu.h>
+#include <linux/nmi.h>
 
 #include <linux/delay.h>
 #include <linux/mc146818rtc.h>
@@ -1278,8 +1279,9 @@ void __cpu_die(unsigned int cpu)
 
 int __cpuinit __cpu_up(unsigned int cpu)
 {
+	unsigned long flags;
 #ifdef CONFIG_HOTPLUG_CPU
-	int ret=0;
+	int ret = 0;
 
 	/*
 	 * We do warm boot only on cpus that had booted earlier
@@ -1297,23 +1299,25 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	/* In case one didn't come up */
 	if (!cpu_isset(cpu, cpu_callin_map)) {
 		printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
-		local_irq_enable();
 		return -EIO;
 	}
 
-	local_irq_enable();
-
 	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
 	/* Unleash the CPU! */
 	cpu_set(cpu, smp_commenced_mask);
 
 	/*
-	 * Check TSC synchronization with the AP:
+	 * Check TSC synchronization with the AP (keep irqs disabled
+	 * while doing so):
 	 */
+	local_irq_save(flags);
 	check_tsc_sync_source(cpu);
+	local_irq_restore(flags);
 
-	while (!cpu_isset(cpu, cpu_online_map))
+	while (!cpu_isset(cpu, cpu_online_map)) {
 		cpu_relax();
+		touch_nmi_watchdog();
+	}
 
 #ifdef CONFIG_X86_GENERICARCH
 	if (num_online_cpus() > 8 && genapic == &apic_default)
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 35443729aad8..cd4643a37022 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -923,8 +923,9 @@ void __init smp_prepare_boot_cpu(void)
  */
 int __cpuinit __cpu_up(unsigned int cpu)
 {
-	int err;
 	int apicid = cpu_present_to_apicid(cpu);
+	unsigned long flags;
+	int err;
 
 	WARN_ON(irqs_disabled());
 
@@ -958,7 +959,9 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	/*
   	 * Make sure and check TSC sync:
  	 */
+	local_irq_save(flags);
 	check_tsc_sync_source(cpu);
+	local_irq_restore(flags);
 
 	while (!cpu_isset(cpu, cpu_online_map))
 		cpu_relax();

commit c3442e296517aee733d62fc3fe03211598902c7d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Mar 5 14:46:30 2007 +0100

    [PATCH] paravirt: re-enable COMPAT_VDSO
    
    CONFIG_PARAVIRT broke old glibc bootup: it silently turned off the
    selectability of CONFIG_COMPAT_VDSO and thus rendered distro kernels
    unbootable on old-style VDSO glibc setups.
    
    the proper solution is to keep COMPAT_VDSO available - if a hypervisor
    needs any modification of that concept then we'll judge those changes in
    full context, once those changes are submitted.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index cee4ff679d3c..27e8453274e6 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -893,7 +893,6 @@ config HOTPLUG_CPU
 config COMPAT_VDSO
 	bool "Compat VDSO support"
 	default y
-	depends on !PARAVIRT
 	help
 	  Map the VDSO to the predictable old-style address too.
 	---help---