Patches contributed by Eötvös Lorand University


commit 1b49061d400c9e51e3ac2aac026a099fe599b9bb
Merge: 14131f2f98ac 83ce40092868
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 21:21:59 2009 +0100

    Merge branch 'sched/clock' into tracing/ftrace
    
    Conflicts:
            kernel/sched_clock.c

diff --cc kernel/sched_clock.c
index db69174b1178,a755d023805a..7ec82c1c61c5
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@@ -24,12 -24,11 +24,12 @@@
   * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
   * consistent between cpus (never more than 2 jiffies difference).
   */
- #include <linux/sched.h>
- #include <linux/percpu.h>
  #include <linux/spinlock.h>
- #include <linux/ktime.h>
- #include <linux/module.h>
 +#include <linux/hardirq.h>
+ #include <linux/module.h>
+ #include <linux/percpu.h>
+ #include <linux/ktime.h>
+ #include <linux/sched.h>
  
  /*
   * Scheduler clock - returns current time in nanosec units.
@@@ -149,19 -155,13 +156,24 @@@ static void lock_double_clock(struct sc
  
  u64 sched_clock_cpu(int cpu)
  {
- 	struct sched_clock_data *scd = cpu_sdc(cpu);
  	u64 now, clock, this_clock, remote_clock;
+ 	struct sched_clock_data *scd;
+ 
+ 	if (sched_clock_stable)
+ 		return sched_clock();
+ 
+ 	scd = cpu_sdc(cpu);
 +
 +	/*
 +	 * Normally this is not called in NMI context - but if it is,
 +	 * trying to do any locking here is totally lethal.
 +	 */
 +	if (unlikely(in_nmi()))
 +		return scd->clock;
 +
 +	if (unlikely(!sched_clock_running))
 +		return 0ull;
 +
  	WARN_ON_ONCE(!irqs_disabled());
  	now = sched_clock();
  

commit ba1d755a36f66101aa88ac9ebb54694def6ec38d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Oct 18 21:24:45 2008 +0200

    fix warning in arch/x86/kernel/cpu/intel_cacheinfo.c
    
    fix this warning:
    
      arch/x86/kernel/cpu/intel_cacheinfo.c:139: warning: ‘k8_nb_id’ defined but not used
      arch/x86/kernel/cpu/intel_cacheinfo.c:527: warning: ‘free_cache_attributes’ defined but not used
      arch/x86/kernel/cpu/intel_cacheinfo.c:538: warning: ‘detect_cache_attributes’ defined but not used
    
    Unused variables in the !CONFIG_SYSCTL case.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 51b5dfd67163..03f93c5dcfb3 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -144,7 +144,7 @@ struct _cpuid4_info_regs {
 	unsigned long can_disable;
 };
 
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS)
 static struct pci_device_id k8_nb_id[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
@@ -484,6 +484,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	return l2;
 }
 
+#ifdef CONFIG_SYSFS
+
 /* pointer to _cpuid4_info array (for each cache leaf) */
 static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
 #define CPUID4_INFO_IDX(x, y)	(&((per_cpu(cpuid4_info, x))[y]))
@@ -597,8 +599,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
 	return retval;
 }
 
-#ifdef CONFIG_SYSFS
-
 #include <linux/kobject.h>
 #include <linux/sysfs.h>
 

commit 5d0859cef29167d45dc6cf89d19712145e6005d6
Merge: 14131f2f98ac 83ce40092868
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 21:21:59 2009 +0100

    Merge branch 'sched/clock' into tracing/ftrace
    
    Conflicts:
            kernel/sched_clock.c

diff --cc kernel/sched_clock.c
index db69174b1178,a755d023805a..f7602da84c40
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@@ -24,12 -24,11 +24,12 @@@
   * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
   * consistent between cpus (never more than 2 jiffies difference).
   */
- #include <linux/sched.h>
- #include <linux/percpu.h>
  #include <linux/spinlock.h>
- #include <linux/ktime.h>
- #include <linux/module.h>
 +#include <linux/hardirq.h>
+ #include <linux/module.h>
+ #include <linux/percpu.h>
+ #include <linux/ktime.h>
+ #include <linux/sched.h>
  
  /*
   * Scheduler clock - returns current time in nanosec units.
@@@ -149,19 -155,13 +156,23 @@@ static void lock_double_clock(struct sc
  
  u64 sched_clock_cpu(int cpu)
  {
- 	struct sched_clock_data *scd = cpu_sdc(cpu);
  	u64 now, clock, this_clock, remote_clock;
+ 	struct sched_clock_data *scd;
+ 
+ 	if (sched_clock_stable)
+ 		return sched_clock();
  
 +	/*
 +	 * Normally this is not called in NMI context - but if it is,
 +	 * trying to do any locking here is totally lethal.
 +	 */
 +	if (unlikely(in_nmi()))
 +		return scd->clock;
 +
 +	if (unlikely(!sched_clock_running))
 +		return 0ull;
 +
+ 	scd = cpu_sdc(cpu);
  	WARN_ON_ONCE(!irqs_disabled());
  	now = sched_clock();
  

commit 83ce400928680a6c8123d492684b27857f5a2d95
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 20:16:58 2009 +0100

    x86: set X86_FEATURE_TSC_RELIABLE
    
    If the TSC is constant and non-stop, also set it reliable.
    
    (We will turn this off in DMI quirks for multi-chassis systems)
    
    The performance number on a 16-way Nehalem system running
    32 tasks that context-switch between each other is significant:
    
       sched_clock_stable=0         sched_clock_stable=1
       ....................         ....................
       22.456925 million/sec        24.306972 million/sec   [+8.2%]
    
    lmbench's "lat_ctx -s 0 2" goes from 0.63 microseconds to
    0.59 microseconds - a 6.7% increase in context-switching
    performance.
    
    Perfstat of 1 million pipe context switches between two tasks:
    
     Performance counter stats for './pipe-test-1m':
    
           [before]           [after]
       ............      ............
       37621.421089      36436.848378    task clock ticks     (msecs)
    
                  0                 0    CPU migrations       (events)
            2000274           2000189    context switches     (events)
                194               193    pagefaults           (events)
         8433799643        8171016416    CPU cycles           (events) -3.21%
         8370133368        8180999694    instructions         (events) -2.31%
            4158565           3895941    cache references     (events) -6.74%
              44312             46264    cache misses         (events)
    
        2349.287976       2279.362465    wall-time            (msecs)  -3.06%
    
    The speedup comes straight from the reduction in the instruction
    count. sched_clock_cpu() got simpler and the whole workload thus
    executes faster.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 24ff26a38ade..5fff00c70de0 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -4,6 +4,7 @@
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <linux/smp.h>
+#include <linux/sched.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
 
@@ -56,11 +57,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 
 	/*
 	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
-	 * with P/T states and does not stop in deep C-states
+	 * with P/T states and does not stop in deep C-states.
+	 *
+	 * It is also reliable across cores and sockets. (but not across
+	 * cabinets - we turn it off in that case explicitly.)
 	 */
 	if (c->x86_power & (1 << 8)) {
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+		set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
+		sched_clock_stable = 1;
 	}
 
 }

commit b342501cd31e5546d0c9ca8ceff5ded1832f9e5b
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 20:20:29 2009 +0100

    sched: allow architectures to specify sched_clock_stable
    
    Allow CONFIG_HAVE_UNSTABLE_SCHED_CLOCK architectures to still specify
    that their sched_clock() implementation is reliable.
    
    This will be used by x86 to switch on a faster sched_clock_cpu()
    implementation on certain CPU types.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8981e52c714f..a063d19b7a7d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1670,6 +1670,16 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 	return set_cpus_allowed_ptr(p, &new_mask);
 }
 
+/*
+ * Architectures can set this to 1 if they have specified
+ * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
+ * but then during bootup it turns out that sched_clock()
+ * is reliable after all:
+ */
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+extern int sched_clock_stable;
+#endif
+
 extern unsigned long long sched_clock(void);
 
 extern void sched_clock_init(void);
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index a0b0852414cc..a755d023805a 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -24,11 +24,11 @@
  * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
  * consistent between cpus (never more than 2 jiffies difference).
  */
-#include <linux/sched.h>
-#include <linux/percpu.h>
 #include <linux/spinlock.h>
-#include <linux/ktime.h>
 #include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -43,6 +43,10 @@ unsigned long long __attribute__((weak)) sched_clock(void)
 static __read_mostly int sched_clock_running;
 
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+__read_mostly int sched_clock_stable;
+#else
+static const int sched_clock_stable = 1;
+#endif
 
 struct sched_clock_data {
 	/*
@@ -87,7 +91,7 @@ void sched_clock_init(void)
 }
 
 /*
- * min,max except they take wrapping into account
+ * min, max except they take wrapping into account
  */
 
 static inline u64 wrap_min(u64 x, u64 y)
@@ -116,10 +120,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
 	if (unlikely(delta < 0))
 		delta = 0;
 
+	if (unlikely(!sched_clock_running))
+		return 0ull;
+
 	/*
 	 * scd->clock = clamp(scd->tick_gtod + delta,
-	 * 		      max(scd->tick_gtod, scd->clock),
-	 * 		      scd->tick_gtod + TICK_NSEC);
+	 *		      max(scd->tick_gtod, scd->clock),
+	 *		      scd->tick_gtod + TICK_NSEC);
 	 */
 
 	clock = scd->tick_gtod + delta;
@@ -148,12 +155,13 @@ static void lock_double_clock(struct sched_clock_data *data1,
 
 u64 sched_clock_cpu(int cpu)
 {
-	struct sched_clock_data *scd = cpu_sdc(cpu);
 	u64 now, clock, this_clock, remote_clock;
+	struct sched_clock_data *scd;
 
-	if (unlikely(!sched_clock_running))
-		return 0ull;
+	if (sched_clock_stable)
+		return sched_clock();
 
+	scd = cpu_sdc(cpu);
 	WARN_ON_ONCE(!irqs_disabled());
 	now = sched_clock();
 
@@ -193,6 +201,8 @@ u64 sched_clock_cpu(int cpu)
 	return clock;
 }
 
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+
 void sched_clock_tick(void)
 {
 	struct sched_clock_data *scd = this_scd();
@@ -235,22 +245,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
 }
 EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 
-#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
-
-void sched_clock_init(void)
-{
-	sched_clock_running = 1;
-}
-
-u64 sched_clock_cpu(int cpu)
-{
-	if (unlikely(!sched_clock_running))
-		return 0;
-
-	return sched_clock();
-}
-
-#endif
+#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
 unsigned long long cpu_clock(int cpu)
 {

commit 14131f2f98ac350ee9e73faed916d2238a8b6a0d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 18:47:11 2009 +0100

    tracing: implement trace_clock_*() APIs
    
    Impact: implement new tracing timestamp APIs
    
    Add three trace clock variants, with differing scalability/precision
    tradeoffs:
    
     -   local: CPU-local trace clock
     -  medium: scalable global clock with some jitter
     -  global: globally monotonic, serialized clock
    
    Make the ring-buffer use the local trace clock internally.
    
    Acked-by: Peter Zijlstra <peterz@infradead.org>
    Acked-by: Steven Rostedt <rostedt@goodmis.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h
new file mode 100644
index 000000000000..7a8130384087
--- /dev/null
+++ b/include/linux/trace_clock.h
@@ -0,0 +1,19 @@
+#ifndef _LINUX_TRACE_CLOCK_H
+#define _LINUX_TRACE_CLOCK_H
+
+/*
+ * 3 trace clock variants, with differing scalability/precision
+ * tradeoffs:
+ *
+ *  -   local: CPU-local trace clock
+ *  -  medium: scalable global clock with some jitter
+ *  -  global: globally monotonic, serialized clock
+ */
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+extern u64 notrace trace_clock_local(void);
+extern u64 notrace trace_clock(void);
+extern u64 notrace trace_clock_global(void);
+
+#endif /* _LINUX_TRACE_CLOCK_H */
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 664b6c0dc75a..c931fe0560cb 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
 obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
 
 obj-$(CONFIG_TRACING) += trace.o
+obj-$(CONFIG_TRACING) += trace_clock.o
 obj-$(CONFIG_TRACING) += trace_output.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8f19f1aa42b0..a8c275c01e83 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
  */
 #include <linux/ring_buffer.h>
+#include <linux/trace_clock.h>
 #include <linux/ftrace_irq.h>
 #include <linux/spinlock.h>
 #include <linux/debugfs.h>
@@ -12,7 +13,6 @@
 #include <linux/module.h>
 #include <linux/percpu.h>
 #include <linux/mutex.h>
-#include <linux/sched.h>	/* used for sched_clock() (for now) */
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/list.h>
@@ -112,14 +112,13 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
 
-/* FIXME!!! */
 u64 ring_buffer_time_stamp(int cpu)
 {
 	u64 time;
 
 	preempt_disable_notrace();
 	/* shift to debug/test normalization and TIME_EXTENTS */
-	time = sched_clock() << DEBUG_SHIFT;
+	time = trace_clock_local() << DEBUG_SHIFT;
 	preempt_enable_no_resched_notrace();
 
 	return time;
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
new file mode 100644
index 000000000000..2d4953f93560
--- /dev/null
+++ b/kernel/trace/trace_clock.c
@@ -0,0 +1,101 @@
+/*
+ * tracing clocks
+ *
+ *  Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Implements 3 trace clock variants, with differing scalability/precision
+ * tradeoffs:
+ *
+ *  -   local: CPU-local trace clock
+ *  -  medium: scalable global clock with some jitter
+ *  -  global: globally monotonic, serialized clock
+ *
+ * Tracer plugins will chose a default from these clocks.
+ */
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/ktime.h>
+
+/*
+ * trace_clock_local(): the simplest and least coherent tracing clock.
+ *
+ * Useful for tracing that does not cross to other CPUs nor
+ * does it go through idle events.
+ */
+u64 notrace trace_clock_local(void)
+{
+	/*
+	 * sched_clock() is an architecture implemented, fast, scalable,
+	 * lockless clock. It is not guaranteed to be coherent across
+	 * CPUs, nor across CPU idle events.
+	 */
+	return sched_clock();
+}
+
+/*
+ * trace_clock(): 'inbetween' trace clock. Not completely serialized,
+ * but not completely incorrect when crossing CPUs either.
+ *
+ * This is based on cpu_clock(), which will allow at most ~1 jiffy of
+ * jitter between CPUs. So it's a pretty scalable clock, but there
+ * can be offsets in the trace data.
+ */
+u64 notrace trace_clock(void)
+{
+	return cpu_clock(raw_smp_processor_id());
+}
+
+
+/*
+ * trace_clock_global(): special globally coherent trace clock
+ *
+ * It has higher overhead than the other trace clocks but is still
+ * an order of magnitude faster than GTOD derived hardware clocks.
+ *
+ * Used by plugins that need globally coherent timestamps.
+ */
+
+static u64 prev_trace_clock_time;
+
+static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp =
+	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+u64 notrace trace_clock_global(void)
+{
+	unsigned long flags;
+	int this_cpu;
+	u64 now;
+
+	raw_local_irq_save(flags);
+
+	this_cpu = raw_smp_processor_id();
+	now = cpu_clock(this_cpu);
+	/*
+	 * If in an NMI context then dont risk lockups and return the
+	 * cpu_clock() time:
+	 */
+	if (unlikely(in_nmi()))
+		goto out;
+
+	__raw_spin_lock(&trace_clock_lock);
+
+	/*
+	 * TODO: if this happens often then maybe we should reset
+	 * my_scd->clock to prev_trace_clock_time+1, to make sure
+	 * we start ticking with the local clock from now on?
+	 */
+	if ((s64)(now - prev_trace_clock_time) < 0)
+		now = prev_trace_clock_time + 1;
+
+	prev_trace_clock_time = now;
+
+	__raw_spin_unlock(&trace_clock_lock);
+
+ out:
+	raw_local_irq_restore(flags);
+
+	return now;
+}

commit 6409c4da289d6905f7ae2bd0630438368439bda2
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:21:14 2008 +0200

    sched: sched_clock() improvement: use in_nmi()
    
    make sure we dont execute more complex sched_clock() code in NMI context.
    
    Acked-by: Peter Zijlstra <peterz@infradead.org>
    Acked-by: Steven Rostedt <rostedt@goodmis.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index a0b0852414cc..db69174b1178 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -29,6 +29,7 @@
 #include <linux/spinlock.h>
 #include <linux/ktime.h>
 #include <linux/module.h>
+#include <linux/hardirq.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -151,6 +152,13 @@ u64 sched_clock_cpu(int cpu)
 	struct sched_clock_data *scd = cpu_sdc(cpu);
 	u64 now, clock, this_clock, remote_clock;
 
+	/*
+	 * Normally this is not called in NMI context - but if it is,
+	 * trying to do any locking here is totally lethal.
+	 */
+	if (unlikely(in_nmi()))
+		return scd->clock;
+
 	if (unlikely(!sched_clock_running))
 		return 0ull;
 

commit 3b900d44190c7da8681101c57a5be6b354dab2c7
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 14:34:08 2009 +0100

    x86: fix !ACPI build for es7000_32.c
    
    arch/x86/kernel/apic/es7000_32.c:702: error: 'es7000_acpi_madt_oem_check_cluster' undeclared here (not in a function)
    
    Provide a es7000_acpi_madt_oem_check_cluster() definition in the !ACPI
    case too.
    
    Cc: Yinghai Lu <yinghai@kernel.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index b4838ed3f26a..da37e2c59fe1 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -332,8 +332,9 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 
 	return ret && !es7000_apic_is_cluster();
 }
-static int __init es7000_acpi_madt_oem_check_cluster(char *oem_id,
-						     char *oem_table_id)
+
+static int __init
+es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
 {
 	int ret = es7000_acpi_ret;
 
@@ -345,6 +346,12 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
 	return 0;
 }
+
+static int __init
+es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
+{
+	return 0;
+}
 #endif /* !CONFIG_ACPI */
 
 static void es7000_spin(int n)

commit 0b1da1c8fc1a0cb71f17701efad06855a059f752
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 14:10:10 2009 +0100

    x86: apic: simplify secondary CPU wakeup methods, fix
    
    Impact: build fix
    
    init_deasserted is only available on SMP. Make the secondary-wakeup
    function conditional on SMP.
    
    Also clean up the file some.
    
    Cc: Yinghai Lu <yinghai@kernel.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 7151de74a396..1bd6da1f8fad 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -7,28 +7,28 @@
  *
  * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
  */
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/cpu.h>
 #include <linux/cpumask.h>
+#include <linux/hardirq.h>
+#include <linux/proc_fs.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
-#include <linux/init.h>
 #include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/timer.h>
-#include <linux/proc_fs.h>
-#include <asm/current.h>
-#include <asm/smp.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-#include <asm/pgtable.h>
-#include <asm/uv/uv.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
+#include <asm/current.h>
+#include <asm/pgtable.h>
 #include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/smp.h>
 
 DEFINE_PER_CPU(int, x2apic_extra_bits);
 
@@ -93,6 +93,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
 
 static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 {
+#ifdef CONFIG_SMP
 	unsigned long val;
 	int pnode;
 
@@ -111,7 +112,7 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 
 	atomic_set(&init_deasserted, 1);
-
+#endif
 	return 0;
 }
 
@@ -368,7 +369,7 @@ static __init void map_high(char *id, unsigned long base, int shift,
 	paddr = base << shift;
 	bytes = (1UL << shift) * (max_pnode + 1);
 	printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
-	       					paddr + bytes);
+						paddr + bytes);
 	if (map_type == map_uc)
 		init_extra_mapping_uc(paddr, bytes);
 	else
@@ -531,7 +532,7 @@ late_initcall(uv_init_heartbeat);
 
 /*
  * Called on each cpu to initialize the per_cpu UV data area.
- * 	ZZZ hotplug not supported yet
+ * FIXME: hotplug not supported yet
  */
 void __cpuinit uv_cpu_init(void)
 {

commit 1f5bcabf1b997d6b76a09114b5a79423495a1263
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 13:51:40 2009 +0100

    x86: apic: simplify secondary CPU wakeup methods
    
    Impact: cleanup
    
    - rename apic->wakeup_cpu  to apic->wakeup_secondary_cpu, to
      make it apparent that this is an SMP-only method
    
    - handle NULL ->wakeup_secondary_cpus to mean the default INIT
      wakeup sequence - this allows simplification of the APIC
      driver templates.
    
    Cc: Yinghai Lu <yinghai@kernel.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 0fbf6f1520fa..4ef949c1972e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -313,7 +313,7 @@ struct apic {
 	void (*send_IPI_self)(int vector);
 
 	/* wakeup_secondary_cpu */
-	int (*wakeup_cpu)(int apicid, unsigned long start_eip);
+	int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
 
 	int trampoline_phys_low;
 	int trampoline_phys_high;
@@ -344,13 +344,6 @@ extern struct apic *apic;
 #ifdef CONFIG_SMP
 extern atomic_t init_deasserted;
 extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
-extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
-#else
-static inline int
-wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip)
-{
-	return 0;
-}
 #endif
 
 static inline u32 apic_read(u32 reg)
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 00595bc2da8d..f933822dba18 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -222,7 +222,6 @@ struct apic apic_flat =  {
 	.send_IPI_all			= flat_send_IPI_all,
 	.send_IPI_self			= apic_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
@@ -373,7 +372,6 @@ struct apic apic_physflat =  {
 	.send_IPI_all			= physflat_send_IPI_all,
 	.send_IPI_self			= apic_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 8c25917b51a0..69c512e23a9f 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -256,7 +256,6 @@ struct apic apic_bigsmp = {
 	.send_IPI_all			= bigsmp_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 9f6102fc87a1..b4838ed3f26a 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -741,7 +741,7 @@ struct apic apic_es7000_cluster = {
 	.send_IPI_all			= es7000_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_mip,
+	.wakeup_secondary_cpu		= wakeup_secondary_cpu_via_mip,
 
 	.trampoline_phys_low		= 0x467,
 	.trampoline_phys_high		= 0x469,
@@ -806,8 +806,6 @@ struct apic apic_es7000 = {
 	.send_IPI_all			= es7000_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
-
 	.trampoline_phys_low		= 0x467,
 	.trampoline_phys_high		= 0x469,
 
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index c503c1799d63..a7f711f5110a 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -538,7 +538,7 @@ struct apic apic_numaq = {
 	.send_IPI_all			= numaq_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_nmi,
+	.wakeup_secondary_cpu		= wakeup_secondary_cpu_via_nmi,
 	.trampoline_phys_low		= NUMAQ_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= NUMAQ_TRAMPOLINE_PHYS_HIGH,
 
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 13c6fc7dff99..141c99a1c264 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -138,7 +138,6 @@ struct apic apic_default = {
 	.send_IPI_all			= default_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 5a75d563f676..0a1135c5a6de 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -574,7 +574,6 @@ struct apic apic_summit = {
 	.send_IPI_all			= summit_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 561a6b1042ae..8fb87b6dd633 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -224,7 +224,6 @@ struct apic apic_x2apic_cluster = {
 	.send_IPI_all			= x2apic_send_IPI_all,
 	.send_IPI_self			= x2apic_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 785f8ee4b1df..23625b9f98b2 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -213,7 +213,6 @@ struct apic apic_x2apic_phys = {
 	.send_IPI_all			= x2apic_send_IPI_all,
 	.send_IPI_self			= x2apic_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 6d7b9d960ddc..7151de74a396 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -288,7 +288,7 @@ struct apic apic_x2apic_uv_x = {
 	.send_IPI_all			= uv_send_IPI_all,
 	.send_IPI_self			= uv_send_IPI_self,
 
-	.wakeup_cpu			= uv_wakeup_secondary,
+	.wakeup_secondary_cpu		= uv_wakeup_secondary,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9b338aa03b40..249334f5080a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -742,7 +742,8 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from ->wakeup_cpu.
+ * Returns zero if CPU booted OK, else error code from
+ * ->wakeup_secondary_cpu.
  */
 static int __cpuinit do_boot_cpu(int apicid, int cpu)
 {
@@ -829,9 +830,13 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 	}
 
 	/*
-	 * Starting actual IPI sequence...
+	 * Kick the secondary CPU. Use the method in the APIC driver
+	 * if it's defined - or use an INIT boot APIC message otherwise:
 	 */
-	boot_error = apic->wakeup_cpu(apicid, start_ip);
+	if (apic->wakeup_secondary_cpu)
+		boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
+	else
+		boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
 
 	if (!boot_error) {
 		/*