Patches contributed by Eötvös Lorand University
commit 1b49061d400c9e51e3ac2aac026a099fe599b9bb
Merge: 14131f2f98ac 83ce40092868
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 26 21:21:59 2009 +0100
Merge branch 'sched/clock' into tracing/ftrace
Conflicts:
kernel/sched_clock.c
diff --cc kernel/sched_clock.c
index db69174b1178,a755d023805a..7ec82c1c61c5
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@@ -24,12 -24,11 +24,12 @@@
* The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
* consistent between cpus (never more than 2 jiffies difference).
*/
- #include <linux/sched.h>
- #include <linux/percpu.h>
#include <linux/spinlock.h>
- #include <linux/ktime.h>
- #include <linux/module.h>
+#include <linux/hardirq.h>
+ #include <linux/module.h>
+ #include <linux/percpu.h>
+ #include <linux/ktime.h>
+ #include <linux/sched.h>
/*
* Scheduler clock - returns current time in nanosec units.
@@@ -149,19 -155,13 +156,24 @@@ static void lock_double_clock(struct sc
u64 sched_clock_cpu(int cpu)
{
- struct sched_clock_data *scd = cpu_sdc(cpu);
u64 now, clock, this_clock, remote_clock;
+ struct sched_clock_data *scd;
+
+ if (sched_clock_stable)
+ return sched_clock();
+
+ scd = cpu_sdc(cpu);
+
+ /*
+ * Normally this is not called in NMI context - but if it is,
+ * trying to do any locking here is totally lethal.
+ */
+ if (unlikely(in_nmi()))
+ return scd->clock;
+
+ if (unlikely(!sched_clock_running))
+ return 0ull;
+
WARN_ON_ONCE(!irqs_disabled());
now = sched_clock();
commit ba1d755a36f66101aa88ac9ebb54694def6ec38d
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Oct 18 21:24:45 2008 +0200
fix warning in arch/x86/kernel/cpu/intel_cacheinfo.c
fix this warning:
arch/x86/kernel/cpu/intel_cacheinfo.c:139: warning: ‘k8_nb_id’ defined but not used
arch/x86/kernel/cpu/intel_cacheinfo.c:527: warning: ‘free_cache_attributes’ defined but not used
arch/x86/kernel/cpu/intel_cacheinfo.c:538: warning: ‘detect_cache_attributes’ defined but not used
Unused variables in the !CONFIG_SYSCTL case.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 51b5dfd67163..03f93c5dcfb3 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -144,7 +144,7 @@ struct _cpuid4_info_regs {
unsigned long can_disable;
};
-#ifdef CONFIG_PCI
+#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS)
static struct pci_device_id k8_nb_id[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
@@ -484,6 +484,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
return l2;
}
+#ifdef CONFIG_SYSFS
+
/* pointer to _cpuid4_info array (for each cache leaf) */
static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
@@ -597,8 +599,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
return retval;
}
-#ifdef CONFIG_SYSFS
-
#include <linux/kobject.h>
#include <linux/sysfs.h>
commit 5d0859cef29167d45dc6cf89d19712145e6005d6
Merge: 14131f2f98ac 83ce40092868
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 26 21:21:59 2009 +0100
Merge branch 'sched/clock' into tracing/ftrace
Conflicts:
kernel/sched_clock.c
diff --cc kernel/sched_clock.c
index db69174b1178,a755d023805a..f7602da84c40
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@@ -24,12 -24,11 +24,12 @@@
* The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
* consistent between cpus (never more than 2 jiffies difference).
*/
- #include <linux/sched.h>
- #include <linux/percpu.h>
#include <linux/spinlock.h>
- #include <linux/ktime.h>
- #include <linux/module.h>
+#include <linux/hardirq.h>
+ #include <linux/module.h>
+ #include <linux/percpu.h>
+ #include <linux/ktime.h>
+ #include <linux/sched.h>
/*
* Scheduler clock - returns current time in nanosec units.
@@@ -149,19 -155,13 +156,23 @@@ static void lock_double_clock(struct sc
u64 sched_clock_cpu(int cpu)
{
- struct sched_clock_data *scd = cpu_sdc(cpu);
u64 now, clock, this_clock, remote_clock;
+ struct sched_clock_data *scd;
+
+ if (sched_clock_stable)
+ return sched_clock();
+ /*
+ * Normally this is not called in NMI context - but if it is,
+ * trying to do any locking here is totally lethal.
+ */
+ if (unlikely(in_nmi()))
+ return scd->clock;
+
+ if (unlikely(!sched_clock_running))
+ return 0ull;
+
+ scd = cpu_sdc(cpu);
WARN_ON_ONCE(!irqs_disabled());
now = sched_clock();
commit 83ce400928680a6c8123d492684b27857f5a2d95
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 26 20:16:58 2009 +0100
x86: set X86_FEATURE_TSC_RELIABLE
If the TSC is constant and non-stop, also set it reliable.
(We will turn this off in DMI quirks for multi-chassis systems)
The performance number on a 16-way Nehalem system running
32 tasks that context-switch between each other is significant:
sched_clock_stable=0 sched_clock_stable=1
.................... ....................
22.456925 million/sec 24.306972 million/sec [+8.2%]
lmbench's "lat_ctx -s 0 2" goes from 0.63 microseconds to
0.59 microseconds - a 6.7% increase in context-switching
performance.
Perfstat of 1 million pipe context switches between two tasks:
Performance counter stats for './pipe-test-1m':
[before] [after]
............ ............
37621.421089 36436.848378 task clock ticks (msecs)
0 0 CPU migrations (events)
2000274 2000189 context switches (events)
194 193 pagefaults (events)
8433799643 8171016416 CPU cycles (events) -3.21%
8370133368 8180999694 instructions (events) -2.31%
4158565 3895941 cache references (events) -6.74%
44312 46264 cache misses (events)
2349.287976 2279.362465 wall-time (msecs) -3.06%
The speedup comes straight from the reduction in the instruction
count. sched_clock_cpu() got simpler and the whole workload thus
executes faster.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 24ff26a38ade..5fff00c70de0 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -4,6 +4,7 @@
#include <linux/string.h>
#include <linux/bitops.h>
#include <linux/smp.h>
+#include <linux/sched.h>
#include <linux/thread_info.h>
#include <linux/module.h>
@@ -56,11 +57,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
/*
* c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
- * with P/T states and does not stop in deep C-states
+ * with P/T states and does not stop in deep C-states.
+ *
+ * It is also reliable across cores and sockets. (but not across
+ * cabinets - we turn it off in that case explicitly.)
*/
if (c->x86_power & (1 << 8)) {
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
+ sched_clock_stable = 1;
}
}
commit b342501cd31e5546d0c9ca8ceff5ded1832f9e5b
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 26 20:20:29 2009 +0100
sched: allow architectures to specify sched_clock_stable
Allow CONFIG_HAVE_UNSTABLE_SCHED_CLOCK architectures to still specify
that their sched_clock() implementation is reliable.
This will be used by x86 to switch on a faster sched_clock_cpu()
implementation on certain CPU types.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8981e52c714f..a063d19b7a7d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1670,6 +1670,16 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
return set_cpus_allowed_ptr(p, &new_mask);
}
+/*
+ * Architectures can set this to 1 if they have specified
+ * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
+ * but then during bootup it turns out that sched_clock()
+ * is reliable after all:
+ */
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+extern int sched_clock_stable;
+#endif
+
extern unsigned long long sched_clock(void);
extern void sched_clock_init(void);
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index a0b0852414cc..a755d023805a 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -24,11 +24,11 @@
* The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
* consistent between cpus (never more than 2 jiffies difference).
*/
-#include <linux/sched.h>
-#include <linux/percpu.h>
#include <linux/spinlock.h>
-#include <linux/ktime.h>
#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/ktime.h>
+#include <linux/sched.h>
/*
* Scheduler clock - returns current time in nanosec units.
@@ -43,6 +43,10 @@ unsigned long long __attribute__((weak)) sched_clock(void)
static __read_mostly int sched_clock_running;
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+__read_mostly int sched_clock_stable;
+#else
+static const int sched_clock_stable = 1;
+#endif
struct sched_clock_data {
/*
@@ -87,7 +91,7 @@ void sched_clock_init(void)
}
/*
- * min,max except they take wrapping into account
+ * min, max except they take wrapping into account
*/
static inline u64 wrap_min(u64 x, u64 y)
@@ -116,10 +120,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
if (unlikely(delta < 0))
delta = 0;
+ if (unlikely(!sched_clock_running))
+ return 0ull;
+
/*
* scd->clock = clamp(scd->tick_gtod + delta,
- * max(scd->tick_gtod, scd->clock),
- * scd->tick_gtod + TICK_NSEC);
+ * max(scd->tick_gtod, scd->clock),
+ * scd->tick_gtod + TICK_NSEC);
*/
clock = scd->tick_gtod + delta;
@@ -148,12 +155,13 @@ static void lock_double_clock(struct sched_clock_data *data1,
u64 sched_clock_cpu(int cpu)
{
- struct sched_clock_data *scd = cpu_sdc(cpu);
u64 now, clock, this_clock, remote_clock;
+ struct sched_clock_data *scd;
- if (unlikely(!sched_clock_running))
- return 0ull;
+ if (sched_clock_stable)
+ return sched_clock();
+ scd = cpu_sdc(cpu);
WARN_ON_ONCE(!irqs_disabled());
now = sched_clock();
@@ -193,6 +201,8 @@ u64 sched_clock_cpu(int cpu)
return clock;
}
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+
void sched_clock_tick(void)
{
struct sched_clock_data *scd = this_scd();
@@ -235,22 +245,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
-#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
-
-void sched_clock_init(void)
-{
- sched_clock_running = 1;
-}
-
-u64 sched_clock_cpu(int cpu)
-{
- if (unlikely(!sched_clock_running))
- return 0;
-
- return sched_clock();
-}
-
-#endif
+#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
unsigned long long cpu_clock(int cpu)
{
commit 14131f2f98ac350ee9e73faed916d2238a8b6a0d
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 26 18:47:11 2009 +0100
tracing: implement trace_clock_*() APIs
Impact: implement new tracing timestamp APIs
Add three trace clock variants, with differing scalability/precision
tradeoffs:
- local: CPU-local trace clock
- medium: scalable global clock with some jitter
- global: globally monotonic, serialized clock
Make the ring-buffer use the local trace clock internally.
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h
new file mode 100644
index 000000000000..7a8130384087
--- /dev/null
+++ b/include/linux/trace_clock.h
@@ -0,0 +1,19 @@
+#ifndef _LINUX_TRACE_CLOCK_H
+#define _LINUX_TRACE_CLOCK_H
+
+/*
+ * 3 trace clock variants, with differing scalability/precision
+ * tradeoffs:
+ *
+ * - local: CPU-local trace clock
+ * - medium: scalable global clock with some jitter
+ * - global: globally monotonic, serialized clock
+ */
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+extern u64 notrace trace_clock_local(void);
+extern u64 notrace trace_clock(void);
+extern u64 notrace trace_clock_global(void);
+
+#endif /* _LINUX_TRACE_CLOCK_H */
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 664b6c0dc75a..c931fe0560cb 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
obj-$(CONFIG_TRACING) += trace.o
+obj-$(CONFIG_TRACING) += trace_clock.o
obj-$(CONFIG_TRACING) += trace_output.o
obj-$(CONFIG_TRACING) += trace_stat.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 8f19f1aa42b0..a8c275c01e83 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4,6 +4,7 @@
* Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
*/
#include <linux/ring_buffer.h>
+#include <linux/trace_clock.h>
#include <linux/ftrace_irq.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
@@ -12,7 +13,6 @@
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/mutex.h>
-#include <linux/sched.h> /* used for sched_clock() (for now) */
#include <linux/init.h>
#include <linux/hash.h>
#include <linux/list.h>
@@ -112,14 +112,13 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
/* Up this if you want to test the TIME_EXTENTS and normalization */
#define DEBUG_SHIFT 0
-/* FIXME!!! */
u64 ring_buffer_time_stamp(int cpu)
{
u64 time;
preempt_disable_notrace();
/* shift to debug/test normalization and TIME_EXTENTS */
- time = sched_clock() << DEBUG_SHIFT;
+ time = trace_clock_local() << DEBUG_SHIFT;
preempt_enable_no_resched_notrace();
return time;
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
new file mode 100644
index 000000000000..2d4953f93560
--- /dev/null
+++ b/kernel/trace/trace_clock.c
@@ -0,0 +1,101 @@
+/*
+ * tracing clocks
+ *
+ * Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Implements 3 trace clock variants, with differing scalability/precision
+ * tradeoffs:
+ *
+ * - local: CPU-local trace clock
+ * - medium: scalable global clock with some jitter
+ * - global: globally monotonic, serialized clock
+ *
+ * Tracer plugins will chose a default from these clocks.
+ */
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/ktime.h>
+
+/*
+ * trace_clock_local(): the simplest and least coherent tracing clock.
+ *
+ * Useful for tracing that does not cross to other CPUs nor
+ * does it go through idle events.
+ */
+u64 notrace trace_clock_local(void)
+{
+ /*
+ * sched_clock() is an architecture implemented, fast, scalable,
+ * lockless clock. It is not guaranteed to be coherent across
+ * CPUs, nor across CPU idle events.
+ */
+ return sched_clock();
+}
+
+/*
+ * trace_clock(): 'inbetween' trace clock. Not completely serialized,
+ * but not completely incorrect when crossing CPUs either.
+ *
+ * This is based on cpu_clock(), which will allow at most ~1 jiffy of
+ * jitter between CPUs. So it's a pretty scalable clock, but there
+ * can be offsets in the trace data.
+ */
+u64 notrace trace_clock(void)
+{
+ return cpu_clock(raw_smp_processor_id());
+}
+
+
+/*
+ * trace_clock_global(): special globally coherent trace clock
+ *
+ * It has higher overhead than the other trace clocks but is still
+ * an order of magnitude faster than GTOD derived hardware clocks.
+ *
+ * Used by plugins that need globally coherent timestamps.
+ */
+
+static u64 prev_trace_clock_time;
+
+static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp =
+ (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+u64 notrace trace_clock_global(void)
+{
+ unsigned long flags;
+ int this_cpu;
+ u64 now;
+
+ raw_local_irq_save(flags);
+
+ this_cpu = raw_smp_processor_id();
+ now = cpu_clock(this_cpu);
+ /*
+ * If in an NMI context then dont risk lockups and return the
+ * cpu_clock() time:
+ */
+ if (unlikely(in_nmi()))
+ goto out;
+
+ __raw_spin_lock(&trace_clock_lock);
+
+ /*
+ * TODO: if this happens often then maybe we should reset
+ * my_scd->clock to prev_trace_clock_time+1, to make sure
+ * we start ticking with the local clock from now on?
+ */
+ if ((s64)(now - prev_trace_clock_time) < 0)
+ now = prev_trace_clock_time + 1;
+
+ prev_trace_clock_time = now;
+
+ __raw_spin_unlock(&trace_clock_lock);
+
+ out:
+ raw_local_irq_restore(flags);
+
+ return now;
+}
commit 6409c4da289d6905f7ae2bd0630438368439bda2
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon May 12 21:21:14 2008 +0200
sched: sched_clock() improvement: use in_nmi()
make sure we dont execute more complex sched_clock() code in NMI context.
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index a0b0852414cc..db69174b1178 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -29,6 +29,7 @@
#include <linux/spinlock.h>
#include <linux/ktime.h>
#include <linux/module.h>
+#include <linux/hardirq.h>
/*
* Scheduler clock - returns current time in nanosec units.
@@ -151,6 +152,13 @@ u64 sched_clock_cpu(int cpu)
struct sched_clock_data *scd = cpu_sdc(cpu);
u64 now, clock, this_clock, remote_clock;
+ /*
+ * Normally this is not called in NMI context - but if it is,
+ * trying to do any locking here is totally lethal.
+ */
+ if (unlikely(in_nmi()))
+ return scd->clock;
+
if (unlikely(!sched_clock_running))
return 0ull;
commit 3b900d44190c7da8681101c57a5be6b354dab2c7
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 26 14:34:08 2009 +0100
x86: fix !ACPI build for es7000_32.c
arch/x86/kernel/apic/es7000_32.c:702: error: 'es7000_acpi_madt_oem_check_cluster' undeclared here (not in a function)
Provide a es7000_acpi_madt_oem_check_cluster() definition in the !ACPI
case too.
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index b4838ed3f26a..da37e2c59fe1 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -332,8 +332,9 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return ret && !es7000_apic_is_cluster();
}
-static int __init es7000_acpi_madt_oem_check_cluster(char *oem_id,
- char *oem_table_id)
+
+static int __init
+es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
{
int ret = es7000_acpi_ret;
@@ -345,6 +346,12 @@ static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
return 0;
}
+
+static int __init
+es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
+{
+ return 0;
+}
#endif /* !CONFIG_ACPI */
static void es7000_spin(int n)
commit 0b1da1c8fc1a0cb71f17701efad06855a059f752
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 26 14:10:10 2009 +0100
x86: apic: simplify secondary CPU wakeup methods, fix
Impact: build fix
init_deasserted is only available on SMP. Make the secondary-wakeup
function conditional on SMP.
Also clean up the file some.
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 7151de74a396..1bd6da1f8fad 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -7,28 +7,28 @@
*
* Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
*/
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/cpu.h>
#include <linux/cpumask.h>
+#include <linux/hardirq.h>
+#include <linux/proc_fs.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/string.h>
#include <linux/ctype.h>
-#include <linux/init.h>
#include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/hardirq.h>
#include <linux/timer.h>
-#include <linux/proc_fs.h>
-#include <asm/current.h>
-#include <asm/smp.h>
-#include <asm/apic.h>
-#include <asm/ipi.h>
-#include <asm/pgtable.h>
-#include <asm/uv/uv.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/uv_hub.h>
+#include <asm/current.h>
+#include <asm/pgtable.h>
#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/smp.h>
DEFINE_PER_CPU(int, x2apic_extra_bits);
@@ -93,6 +93,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
{
+#ifdef CONFIG_SMP
unsigned long val;
int pnode;
@@ -111,7 +112,7 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
atomic_set(&init_deasserted, 1);
-
+#endif
return 0;
}
@@ -368,7 +369,7 @@ static __init void map_high(char *id, unsigned long base, int shift,
paddr = base << shift;
bytes = (1UL << shift) * (max_pnode + 1);
printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
- paddr + bytes);
+ paddr + bytes);
if (map_type == map_uc)
init_extra_mapping_uc(paddr, bytes);
else
@@ -531,7 +532,7 @@ late_initcall(uv_init_heartbeat);
/*
* Called on each cpu to initialize the per_cpu UV data area.
- * ZZZ hotplug not supported yet
+ * FIXME: hotplug not supported yet
*/
void __cpuinit uv_cpu_init(void)
{
commit 1f5bcabf1b997d6b76a09114b5a79423495a1263
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 26 13:51:40 2009 +0100
x86: apic: simplify secondary CPU wakeup methods
Impact: cleanup
- rename apic->wakeup_cpu to apic->wakeup_secondary_cpu, to
make it apparent that this is an SMP-only method
- handle NULL ->wakeup_secondary_cpus to mean the default INIT
wakeup sequence - this allows simplification of the APIC
driver templates.
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 0fbf6f1520fa..4ef949c1972e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -313,7 +313,7 @@ struct apic {
void (*send_IPI_self)(int vector);
/* wakeup_secondary_cpu */
- int (*wakeup_cpu)(int apicid, unsigned long start_eip);
+ int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
int trampoline_phys_low;
int trampoline_phys_high;
@@ -344,13 +344,6 @@ extern struct apic *apic;
#ifdef CONFIG_SMP
extern atomic_t init_deasserted;
extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
-extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
-#else
-static inline int
-wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip)
-{
- return 0;
-}
#endif
static inline u32 apic_read(u32 reg)
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 00595bc2da8d..f933822dba18 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -222,7 +222,6 @@ struct apic apic_flat = {
.send_IPI_all = flat_send_IPI_all,
.send_IPI_self = apic_send_IPI_self,
- .wakeup_cpu = wakeup_secondary_cpu_via_init,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
@@ -373,7 +372,6 @@ struct apic apic_physflat = {
.send_IPI_all = physflat_send_IPI_all,
.send_IPI_self = apic_send_IPI_self,
- .wakeup_cpu = wakeup_secondary_cpu_via_init,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 8c25917b51a0..69c512e23a9f 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -256,7 +256,6 @@ struct apic apic_bigsmp = {
.send_IPI_all = bigsmp_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .wakeup_cpu = wakeup_secondary_cpu_via_init,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 9f6102fc87a1..b4838ed3f26a 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -741,7 +741,7 @@ struct apic apic_es7000_cluster = {
.send_IPI_all = es7000_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .wakeup_cpu = wakeup_secondary_cpu_via_mip,
+ .wakeup_secondary_cpu = wakeup_secondary_cpu_via_mip,
.trampoline_phys_low = 0x467,
.trampoline_phys_high = 0x469,
@@ -806,8 +806,6 @@ struct apic apic_es7000 = {
.send_IPI_all = es7000_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .wakeup_cpu = wakeup_secondary_cpu_via_init,
-
.trampoline_phys_low = 0x467,
.trampoline_phys_high = 0x469,
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index c503c1799d63..a7f711f5110a 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -538,7 +538,7 @@ struct apic apic_numaq = {
.send_IPI_all = numaq_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .wakeup_cpu = wakeup_secondary_cpu_via_nmi,
+ .wakeup_secondary_cpu = wakeup_secondary_cpu_via_nmi,
.trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 13c6fc7dff99..141c99a1c264 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -138,7 +138,6 @@ struct apic apic_default = {
.send_IPI_all = default_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .wakeup_cpu = wakeup_secondary_cpu_via_init,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index 5a75d563f676..0a1135c5a6de 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -574,7 +574,6 @@ struct apic apic_summit = {
.send_IPI_all = summit_send_IPI_all,
.send_IPI_self = default_send_IPI_self,
- .wakeup_cpu = wakeup_secondary_cpu_via_init,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 561a6b1042ae..8fb87b6dd633 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -224,7 +224,6 @@ struct apic apic_x2apic_cluster = {
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_self = x2apic_send_IPI_self,
- .wakeup_cpu = wakeup_secondary_cpu_via_init,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 785f8ee4b1df..23625b9f98b2 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -213,7 +213,6 @@ struct apic apic_x2apic_phys = {
.send_IPI_all = x2apic_send_IPI_all,
.send_IPI_self = x2apic_send_IPI_self,
- .wakeup_cpu = wakeup_secondary_cpu_via_init,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 6d7b9d960ddc..7151de74a396 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -288,7 +288,7 @@ struct apic apic_x2apic_uv_x = {
.send_IPI_all = uv_send_IPI_all,
.send_IPI_self = uv_send_IPI_self,
- .wakeup_cpu = uv_wakeup_secondary,
+ .wakeup_secondary_cpu = uv_wakeup_secondary,
.trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
.trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
.wait_for_init_deassert = NULL,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9b338aa03b40..249334f5080a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -742,7 +742,8 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from ->wakeup_cpu.
+ * Returns zero if CPU booted OK, else error code from
+ * ->wakeup_secondary_cpu.
*/
static int __cpuinit do_boot_cpu(int apicid, int cpu)
{
@@ -829,9 +830,13 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
}
/*
- * Starting actual IPI sequence...
+ * Kick the secondary CPU. Use the method in the APIC driver
+ * if it's defined - or use an INIT boot APIC message otherwise:
*/
- boot_error = apic->wakeup_cpu(apicid, start_ip);
+ if (apic->wakeup_secondary_cpu)
+ boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
+ else
+ boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
if (!boot_error) {
/*