Patches contributed by Eötvös Lorand University

<<Prev 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112[113]114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 Next>>

commit 1b49061d400c9e51e3ac2aac026a099fe599b9bb
Merge: 14131f2f98ac 83ce40092868
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 21:21:59 2009 +0100

    Merge branch 'sched/clock' into tracing/ftrace
    
    Conflicts:
            kernel/sched_clock.c

diff --cc kernel/sched_clock.c
index db69174b1178,a755d023805a..7ec82c1c61c5
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@@ -24,12 -24,11 +24,12 @@@
   * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
   * consistent between cpus (never more than 2 jiffies difference).
   */
- #include <linux/sched.h>
- #include <linux/percpu.h>
  #include <linux/spinlock.h>
- #include <linux/ktime.h>
- #include <linux/module.h>
 +#include <linux/hardirq.h>
+ #include <linux/module.h>
+ #include <linux/percpu.h>
+ #include <linux/ktime.h>
+ #include <linux/sched.h>
  
  /*
   * Scheduler clock - returns current time in nanosec units.
@@@ -149,19 -155,13 +156,24 @@@ static void lock_double_clock(struct sc
  
  u64 sched_clock_cpu(int cpu)
  {
- 	struct sched_clock_data *scd = cpu_sdc(cpu);
  	u64 now, clock, this_clock, remote_clock;
+ 	struct sched_clock_data *scd;
+ 
+ 	if (sched_clock_stable)
+ 		return sched_clock();
+ 
+ 	scd = cpu_sdc(cpu);
 +
 +	/*
 +	 * Normally this is not called in NMI context - but if it is,
 +	 * trying to do any locking here is totally lethal.
 +	 */
 +	if (unlikely(in_nmi()))
 +		return scd->clock;
 +
 +	if (unlikely(!sched_clock_running))
 +		return 0ull;
 +
  	WARN_ON_ONCE(!irqs_disabled());
  	now = sched_clock();

commit ba1d755a36f66101aa88ac9ebb54694def6ec38d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Oct 18 21:24:45 2008 +0200

    fix warning in arch/x86/kernel/cpu/intel_cacheinfo.c
    
    fix this warning:
    
      arch/x86/kernel/cpu/intel_cacheinfo.c:139: warning: ‘k8_nb_id’ defined but not used
      arch/x86/kernel/cpu/intel_cacheinfo.c:527: warning: ‘free_cache_attributes’ defined but not used
      arch/x86/kernel/cpu/intel_cacheinfo.c:538: warning: ‘detect_cache_attributes’ defined but not used
    
    Unused variables in the !CONFIG_SYSCTL case.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 51b5dfd67163..03f93c5dcfb3 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -144,7 +144,7 @@ struct _cpuid4_info_regs {
 	unsigned long can_disable;
 };
 
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS)
 static struct pci_device_id k8_nb_id[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
@@ -484,6 +484,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 	return l2;
 }
 
+#ifdef CONFIG_SYSFS
+
 /* pointer to _cpuid4_info array (for each cache leaf) */
 static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
 #define CPUID4_INFO_IDX(x, y)	(&((per_cpu(cpuid4_info, x))[y]))
@@ -597,8 +599,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
 	return retval;
 }
 
-#ifdef CONFIG_SYSFS
-
 #include <linux/kobject.h>
 #include <linux/sysfs.h>

commit 5d0859cef29167d45dc6cf89d19712145e6005d6
Merge: 14131f2f98ac 83ce40092868
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 21:21:59 2009 +0100

    Merge branch 'sched/clock' into tracing/ftrace
    
    Conflicts:
            kernel/sched_clock.c

diff --cc kernel/sched_clock.c
index db69174b1178,a755d023805a..f7602da84c40
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@@ -24,12 -24,11 +24,12 @@@
   * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
   * consistent between cpus (never more than 2 jiffies difference).
   */
- #include <linux/sched.h>
- #include <linux/percpu.h>
  #include <linux/spinlock.h>
- #include <linux/ktime.h>
- #include <linux/module.h>
 +#include <linux/hardirq.h>
+ #include <linux/module.h>
+ #include <linux/percpu.h>
+ #include <linux/ktime.h>
+ #include <linux/sched.h>
  
  /*
   * Scheduler clock - returns current time in nanosec units.
@@@ -149,19 -155,13 +156,23 @@@ static void lock_double_clock(struct sc
  
  u64 sched_clock_cpu(int cpu)
  {
- 	struct sched_clock_data *scd = cpu_sdc(cpu);
  	u64 now, clock, this_clock, remote_clock;
+ 	struct sched_clock_data *scd;
+ 
+ 	if (sched_clock_stable)
+ 		return sched_clock();
  
 +	/*
 +	 * Normally this is not called in NMI context - but if it is,
 +	 * trying to do any locking here is totally lethal.
 +	 */
 +	if (unlikely(in_nmi()))
 +		return scd->clock;
 +
 +	if (unlikely(!sched_clock_running))
 +		return 0ull;
 +
+ 	scd = cpu_sdc(cpu);
  	WARN_ON_ONCE(!irqs_disabled());
  	now = sched_clock();

commit 83ce400928680a6c8123d492684b27857f5a2d95
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 20:16:58 2009 +0100

    x86: set X86_FEATURE_TSC_RELIABLE
    
    If the TSC is constant and non-stop, also set it reliable.
    
    (We will turn this off in DMI quirks for multi-chassis systems)
    
    The performance number on a 16-way Nehalem system running
    32 tasks that context-switch between each other is significant:
    
       sched_clock_stable=0         sched_clock_stable=1
       ....................         ....................
       22.456925 million/sec        24.306972 million/sec   [+8.2%]
    
    lmbench's "lat_ctx -s 0 2" goes from 0.63 microseconds to
    0.59 microseconds - a 6.7% increase in context-switching
    performance.
    
    Perfstat of 1 million pipe context switches between two tasks:
    
     Performance counter stats for './pipe-test-1m':
    
           [before]           [after]
       ............      ............
       37621.421089      36436.848378    task clock ticks     (msecs)
    
                  0                 0    CPU migrations       (events)
            2000274           2000189    context switches     (events)
                194               193    pagefaults           (events)
         8433799643        8171016416    CPU cycles           (events) -3.21%
         8370133368        8180999694    instructions         (events) -2.31%
            4158565           3895941    cache references     (events) -6.74%
              44312             46264    cache misses         (events)
    
        2349.287976       2279.362465    wall-time            (msecs)  -3.06%
    
    The speedup comes straight from the reduction in the instruction
    count. sched_clock_cpu() got simpler and the whole workload thus
    executes faster.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 24ff26a38ade..5fff00c70de0 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -4,6 +4,7 @@
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <linux/smp.h>
+#include <linux/sched.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
 
@@ -56,11 +57,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 
 	/*
 	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
-	 * with P/T states and does not stop in deep C-states
+	 * with P/T states and does not stop in deep C-states.
+	 *
+	 * It is also reliable across cores and sockets. (but not across
+	 * cabinets - we turn it off in that case explicitly.)
 	 */
 	if (c->x86_power & (1 << 8)) {
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+		set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
+		sched_clock_stable = 1;
 	}
 
 }

commit b342501cd31e5546d0c9ca8ceff5ded1832f9e5b
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 20:20:29 2009 +0100

    sched: allow architectures to specify sched_clock_stable
    
    Allow CONFIG_HAVE_UNSTABLE_SCHED_CLOCK architectures to still specify
    that their sched_clock() implementation is reliable.
    
    This will be used by x86 to switch on a faster sched_clock_cpu()
    implementation on certain CPU types.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8981e52c714f..a063d19b7a7d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1670,6 +1670,16 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 	return set_cpus_allowed_ptr(p, &new_mask);
 }
 
+/*
+ * Architectures can set this to 1 if they have specified
+ * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
+ * but then during bootup it turns out that sched_clock()
+ * is reliable after all:
+ */
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+extern int sched_clock_stable;
+#endif
+
 extern unsigned long long sched_clock(void);
 
 extern void sched_clock_init(void);
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index a0b0852414cc..a755d023805a 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -24,11 +24,11 @@
  * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
  * consistent between cpus (never more than 2 jiffies difference).
  */
-#include <linux/sched.h>
-#include <linux/percpu.h>
 #include <linux/spinlock.h>
-#include <linux/ktime.h>
 #include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -43,6 +43,10 @@ unsigned long long __attribute__((weak)) sched_clock(void)
 static __read_mostly int sched_clock_running;
 
 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+__read_mostly int sched_clock_stable;
+#else
+static const int sched_clock_stable = 1;
+#endif
 
 struct sched_clock_data {
 	/*
@@ -87,7 +91,7 @@ void sched_clock_init(void)
 }
 
 /*
- * min,max except they take wrapping into account
+ * min, max except they take wrapping into account
  */
 
 static inline u64 wrap_min(u64 x, u64 y)
@@ -116,10 +120,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
 	if (unlikely(delta < 0))
 		delta = 0;
 
+	if (unlikely(!sched_clock_running))
+		return 0ull;
+
 	/*
 	 * scd->clock = clamp(scd->tick_gtod + delta,
-	 * 		      max(scd->tick_gtod, scd->clock),
-	 * 		      scd->tick_gtod + TICK_NSEC);
+	 *		      max(scd->tick_gtod, scd->clock),
+	 *		      scd->tick_gtod + TICK_NSEC);
 	 */
 
 	clock = scd->tick_gtod + delta;
@@ -148,12 +155,13 @@ static void lock_double_clock(struct sched_clock_data *data1,
 
 u64 sched_clock_cpu(int cpu)
 {
-	struct sched_clock_data *scd = cpu_sdc(cpu);
 	u64 now, clock, this_clock, remote_clock;
+	struct sched_clock_data *scd;
 
-	if (unlikely(!sched_clock_running))
-		return 0ull;
+	if (sched_clock_stable)
+		return sched_clock();
 
+	scd = cpu_sdc(cpu);
 	WARN_ON_ONCE(!irqs_disabled());
 	now = sched_clock();
 
@@ -193,6 +201,8 @@ u64 sched_clock_cpu(int cpu)
 	return clock;
 }
 
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+
 void sched_clock_tick(void)
 {
 	struct sched_clock_data *scd = this_scd();
@@ -235,22 +245,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
 }
 EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 
-#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
-
-void sched_clock_init(void)
-{
-	sched_clock_running = 1;
-}
-
-u64 sched_clock_cpu(int cpu)
-{
-	if (unlikely(!sched_clock_running))
-		return 0;
-
-	return sched_clock();
-}
-
-#endif
+#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
 
 unsigned long long cpu_clock(int cpu)
 {

commit 14131f2f98ac350ee9e73faed916d2238a8b6a0d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 18:47:11 2009 +0100

    tracing: implement trace_clock_*() APIs
    
    Impact: implement new tracing timestamp APIs
    
    Add three trace clock variants, with differing scalability/precision
    tradeoffs:
    
     -   local: CPU-local trace clock
     -  medium: scalable global clock with some jitter
     -  global: globally monotonic, serialized clock
    
    Make the ring-buffer use the local trace clock internally.
    
    Acked-by: Peter Zijlstra <peterz@infradead.org>
    Acked-by: Steven Rostedt <rostedt@goodmis.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h
new file mode 100644
index 000000000000..7a8130384087
--- /dev/null
+++ b/include/linux/trace_clock.h
@@ -0,0 +1,19 @@
+#ifndef _LINUX_TRACE_CLOCK_H
+#define _LINUX_TRACE_CLOCK_H
+
+/*
+ * 3 trace clock variants, with differing scalability/precision
+ * tradeoffs:
+ *
+ *  -   local: CPU-local trace clock
+ *  -  medium: scalable global clock with some jitter
+ *  -  global: globally monotonic, serialized clock
+ */
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+extern u64 notrace trace_clock_local(void);
+extern u64 notrace trace_clock(void);
+extern u64 notrace trace_clock_global(void);
+
+#endif /* _LINUX_TRACE_CLOCK_H */
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 664b6c0dc75a..c931fe0560cb 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
 obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
 
 obj-$(CONFIG_TRACING) += trace.o
+obj-$(CONFIG_TRACING) += trace_clock.o
 obj-$(CONFIG_TRACING) += trace_output.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8f19f1aa42b0..a8c275c01e83 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
  */
 #include <linux/ring_buffer.h>
+#include <linux/trace_clock.h>
 #include <linux/ftrace_irq.h>
 #include <linux/spinlock.h>
 #include <linux/debugfs.h>
@@ -12,7 +13,6 @@
 #include <linux/module.h>
 #include <linux/percpu.h>
 #include <linux/mutex.h>
-#include <linux/sched.h>	/* used for sched_clock() (for now) */
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/list.h>
@@ -112,14 +112,13 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
 
-/* FIXME!!! */
 u64 ring_buffer_time_stamp(int cpu)
 {
 	u64 time;
 
 	preempt_disable_notrace();
 	/* shift to debug/test normalization and TIME_EXTENTS */
-	time = sched_clock() << DEBUG_SHIFT;
+	time = trace_clock_local() << DEBUG_SHIFT;
 	preempt_enable_no_resched_notrace();
 
 	return time;
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
new file mode 100644
index 000000000000..2d4953f93560
--- /dev/null
+++ b/kernel/trace/trace_clock.c
@@ -0,0 +1,101 @@
+/*
+ * tracing clocks
+ *
+ *  Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Implements 3 trace clock variants, with differing scalability/precision
+ * tradeoffs:
+ *
+ *  -   local: CPU-local trace clock
+ *  -  medium: scalable global clock with some jitter
+ *  -  global: globally monotonic, serialized clock
+ *
+ * Tracer plugins will chose a default from these clocks.
+ */
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/ktime.h>
+
+/*
+ * trace_clock_local(): the simplest and least coherent tracing clock.
+ *
+ * Useful for tracing that does not cross to other CPUs nor
+ * does it go through idle events.
+ */
+u64 notrace trace_clock_local(void)
+{
+	/*
+	 * sched_clock() is an architecture implemented, fast, scalable,
+	 * lockless clock. It is not guaranteed to be coherent across
+	 * CPUs, nor across CPU idle events.
+	 */
+	return sched_clock();
+}
+
+/*
+ * trace_clock(): 'inbetween' trace clock. Not completely serialized,
+ * but not completely incorrect when crossing CPUs either.
+ *
+ * This is based on cpu_clock(), which will allow at most ~1 jiffy of
+ * jitter between CPUs. So it's a pretty scalable clock, but there
+ * can be offsets in the trace data.
+ */
+u64 notrace trace_clock(void)
+{
+	return cpu_clock(raw_smp_processor_id());
+}
+
+
+/*
+ * trace_clock_global(): special globally coherent trace clock
+ *
+ * It has higher overhead than the other trace clocks but is still
+ * an order of magnitude faster than GTOD derived hardware clocks.
+ *
+ * Used by plugins that need globally coherent timestamps.
+ */
+
+static u64 prev_trace_clock_time;
+
+static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp =
+	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+u64 notrace trace_clock_global(void)
+{
+	unsigned long flags;
+	int this_cpu;
+	u64 now;
+
+	raw_local_irq_save(flags);
+
+	this_cpu = raw_smp_processor_id();
+	now = cpu_clock(this_cpu);
+	/*
+	 * If in an NMI context then dont risk lockups and return the
+	 * cpu_clock() time:
+	 */
+	if (unlikely(in_nmi()))
+		goto out;
+
+	__raw_spin_lock(&trace_clock_lock);
+
+	/*
+	 * TODO: if this happens often then maybe we should reset
+	 * my_scd->clock to prev_trace_clock_time+1, to make sure
+	 * we start ticking with the local clock from now on?
+	 */
+	if ((s64)(now - prev_trace_clock_time) < 0)
+		now = prev_trace_clock_time + 1;
+
+	prev_trace_clock_time = now;
+
+	__raw_spin_unlock(&trace_clock_lock);
+
+ out:
+	raw_local_irq_restore(flags);
+
+	return now;
+}

commit 6409c4da289d6905f7ae2bd0630438368439bda2
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:21:14 2008 +0200

    sched: sched_clock() improvement: use in_nmi()
    
    make sure we dont execute more complex sched_clock() code in NMI context.
    
    Acked-by: Peter Zijlstra <peterz@infradead.org>
    Acked-by: Steven Rostedt <rostedt@goodmis.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index a0b0852414cc..db69174b1178 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -29,6 +29,7 @@
 #include <linux/spinlock.h>
 #include <linux/ktime.h>
 #include <linux/module.h>
+#include <linux/hardirq.h>
 
 /*
  * Scheduler clock - returns current time in nanosec units.
@@ -151,6 +152,13 @@ u64 sched_clock_cpu(int cpu)
 	struct sched_clock_data *scd = cpu_sdc(cpu);
 	u64 now, clock, this_clock, remote_clock;
 
+	/*
+	 * Normally this is not called in NMI context - but if it is,
+	 * trying to do any locking here is totally lethal.
+	 */
+	if (unlikely(in_nmi()))
+		return scd->clock;
+
 	if (unlikely(!sched_clock_running))
 		return 0ull;

commit 3b900d44190c7da8681101c57a5be6b354dab2c7
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 14:34:08 2009 +0100

    x86: fix !ACPI build for es7000_32.c
    
    arch/x86/kernel/apic/es7000_32.c:702: error: 'es7000_acpi_madt_oem_check_cluster' undeclared here (not in a function)
    
    Provide a es7000_acpi_madt_oem_check_cluster() definition in the !ACPI
    case too.
    
    Cc: Yinghai Lu <yinghai@kernel.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index b4838ed3f26a..da37e2c59fe1 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -332,8 +332,9 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 
 	return ret && !es7000_apic_is_cluster();
 }
-static int __init es7000_acpi_madt_oem_check_cluster(char *oem_id,
-						     char *oem_table_id)
+
+static int __init
+es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
 {
 	int ret = es7000_acpi_ret;
 
@@ -345,6 +346,12 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
 	return 0;
 }
+
+static int __init
+es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
+{
+	return 0;
+}
 #endif /* !CONFIG_ACPI */
 
 static void es7000_spin(int n)

commit 0b1da1c8fc1a0cb71f17701efad06855a059f752
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 14:10:10 2009 +0100

    x86: apic: simplify secondary CPU wakeup methods, fix
    
    Impact: build fix
    
    init_deasserted is only available on SMP. Make the secondary-wakeup
    function conditional on SMP.
    
    Also clean up the file some.
    
    Cc: Yinghai Lu <yinghai@kernel.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 7151de74a396..1bd6da1f8fad 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -7,28 +7,28 @@
  *
  * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
  */
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/cpu.h>
 #include <linux/cpumask.h>
+#include <linux/hardirq.h>
+#include <linux/proc_fs.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
-#include <linux/init.h>
 #include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/hardirq.h>
 #include <linux/timer.h>
-#include <linux/proc_fs.h>
-#include <asm/current.h>
-#include <asm/smp.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-#include <asm/pgtable.h>
-#include <asm/uv/uv.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
+#include <asm/current.h>
+#include <asm/pgtable.h>
 #include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/smp.h>
 
 DEFINE_PER_CPU(int, x2apic_extra_bits);
 
@@ -93,6 +93,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
 
 static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 {
+#ifdef CONFIG_SMP
 	unsigned long val;
 	int pnode;
 
@@ -111,7 +112,7 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 
 	atomic_set(&init_deasserted, 1);
-
+#endif
 	return 0;
 }
 
@@ -368,7 +369,7 @@ static __init void map_high(char *id, unsigned long base, int shift,
 	paddr = base << shift;
 	bytes = (1UL << shift) * (max_pnode + 1);
 	printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
-	       					paddr + bytes);
+						paddr + bytes);
 	if (map_type == map_uc)
 		init_extra_mapping_uc(paddr, bytes);
 	else
@@ -531,7 +532,7 @@ late_initcall(uv_init_heartbeat);
 
 /*
  * Called on each cpu to initialize the per_cpu UV data area.
- * 	ZZZ hotplug not supported yet
+ * FIXME: hotplug not supported yet
  */
 void __cpuinit uv_cpu_init(void)
 {

commit 1f5bcabf1b997d6b76a09114b5a79423495a1263
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 26 13:51:40 2009 +0100

    x86: apic: simplify secondary CPU wakeup methods
    
    Impact: cleanup
    
    - rename apic->wakeup_cpu  to apic->wakeup_secondary_cpu, to
      make it apparent that this is an SMP-only method
    
    - handle NULL ->wakeup_secondary_cpus to mean the default INIT
      wakeup sequence - this allows simplification of the APIC
      driver templates.
    
    Cc: Yinghai Lu <yinghai@kernel.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 0fbf6f1520fa..4ef949c1972e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -313,7 +313,7 @@ struct apic {
 	void (*send_IPI_self)(int vector);
 
 	/* wakeup_secondary_cpu */
-	int (*wakeup_cpu)(int apicid, unsigned long start_eip);
+	int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
 
 	int trampoline_phys_low;
 	int trampoline_phys_high;
@@ -344,13 +344,6 @@ extern struct apic *apic;
 #ifdef CONFIG_SMP
 extern atomic_t init_deasserted;
 extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
-extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
-#else
-static inline int
-wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip)
-{
-	return 0;
-}
 #endif
 
 static inline u32 apic_read(u32 reg)
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 00595bc2da8d..f933822dba18 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -222,7 +222,6 @@ struct apic apic_flat =  {
 	.send_IPI_all			= flat_send_IPI_all,
 	.send_IPI_self			= apic_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
@@ -373,7 +372,6 @@ struct apic apic_physflat =  {
 	.send_IPI_all			= physflat_send_IPI_all,
 	.send_IPI_self			= apic_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 8c25917b51a0..69c512e23a9f 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -256,7 +256,6 @@ struct apic apic_bigsmp = {
 	.send_IPI_all			= bigsmp_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 9f6102fc87a1..b4838ed3f26a 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -741,7 +741,7 @@ struct apic apic_es7000_cluster = {
 	.send_IPI_all			= es7000_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_mip,
+	.wakeup_secondary_cpu		= wakeup_secondary_cpu_via_mip,
 
 	.trampoline_phys_low		= 0x467,
 	.trampoline_phys_high		= 0x469,
@@ -806,8 +806,6 @@ struct apic apic_es7000 = {
 	.send_IPI_all			= es7000_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
-
 	.trampoline_phys_low		= 0x467,
 	.trampoline_phys_high		= 0x469,
 
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index c503c1799d63..a7f711f5110a 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -538,7 +538,7 @@ struct apic apic_numaq = {
 	.send_IPI_all			= numaq_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_nmi,
+	.wakeup_secondary_cpu		= wakeup_secondary_cpu_via_nmi,
 	.trampoline_phys_low		= NUMAQ_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= NUMAQ_TRAMPOLINE_PHYS_HIGH,
 
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 13c6fc7dff99..141c99a1c264 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -138,7 +138,6 @@ struct apic apic_default = {
 	.send_IPI_all			= default_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 5a75d563f676..0a1135c5a6de 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -574,7 +574,6 @@ struct apic apic_summit = {
 	.send_IPI_all			= summit_send_IPI_all,
 	.send_IPI_self			= default_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 561a6b1042ae..8fb87b6dd633 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -224,7 +224,6 @@ struct apic apic_x2apic_cluster = {
 	.send_IPI_all			= x2apic_send_IPI_all,
 	.send_IPI_self			= x2apic_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 785f8ee4b1df..23625b9f98b2 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -213,7 +213,6 @@ struct apic apic_x2apic_phys = {
 	.send_IPI_all			= x2apic_send_IPI_all,
 	.send_IPI_self			= x2apic_send_IPI_self,
 
-	.wakeup_cpu			= wakeup_secondary_cpu_via_init,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 6d7b9d960ddc..7151de74a396 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -288,7 +288,7 @@ struct apic apic_x2apic_uv_x = {
 	.send_IPI_all			= uv_send_IPI_all,
 	.send_IPI_self			= uv_send_IPI_self,
 
-	.wakeup_cpu			= uv_wakeup_secondary,
+	.wakeup_secondary_cpu		= uv_wakeup_secondary,
 	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
 	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
 	.wait_for_init_deassert		= NULL,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9b338aa03b40..249334f5080a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -742,7 +742,8 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from ->wakeup_cpu.
+ * Returns zero if CPU booted OK, else error code from
+ * ->wakeup_secondary_cpu.
  */
 static int __cpuinit do_boot_cpu(int apicid, int cpu)
 {
@@ -829,9 +830,13 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 	}
 
 	/*
-	 * Starting actual IPI sequence...
+	 * Kick the secondary CPU. Use the method in the APIC driver
+	 * if it's defined - or use an INIT boot APIC message otherwise:
 	 */
-	boot_error = apic->wakeup_cpu(apicid, start_ip);
+	if (apic->wakeup_secondary_cpu)
+		boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
+	else
+		boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
 
 	if (!boot_error) {
 		/*