Patches contributed by Eötvös Lorand University


commit 9c44bc03fff44ff04237a7d92e35304a0e50c331
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:21:04 2008 +0200

    softlockup: allow panic on lockup
    
    allow users to configure the softlockup detector to generate a panic
    instead of a warning message.
    
    high-availability systems might opt for this strict method (combined
    with panic_timeout= boot option/sysctl), instead of generating
    softlockup warnings ad infinitum.
    
    also, automated tests work better if the system reboots reliably (into
    a safe kernel) in case of a lockup.
    
    The full spectrum of configurability is supported: boot option, sysctl
    option and Kconfig option.
    
    it's default-disabled.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e07c432c731f..042588fa12e5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1971,6 +1971,9 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	snd-ymfpci=	[HW,ALSA]
 
+	softlockup_panic=
+			[KNL] Should the soft-lockup detector generate panics.
+
 	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
 			See Documentation/sonypi.txt
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5395a6176f4b..71f5972dc48e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -294,7 +294,8 @@ extern void softlockup_tick(void);
 extern void spawn_softlockup_task(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_all_softlockup_watchdogs(void);
-extern unsigned long  softlockup_thresh;
+extern unsigned int  softlockup_panic;
+extern unsigned long softlockup_thresh;
 extern unsigned long sysctl_hung_task_check_count;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_warnings;
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 01b6522fd92b..78e0ad21cb0c 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -27,6 +27,21 @@ static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
 static int __read_mostly did_panic;
 unsigned long __read_mostly softlockup_thresh = 60;
 
+/*
+ * Should we panic (and reboot, if panic_timeout= is set) when a
+ * soft-lockup occurs:
+ */
+unsigned int __read_mostly softlockup_panic =
+				CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
+
+static int __init softlockup_panic_setup(char *str)
+{
+	softlockup_panic = simple_strtoul(str, NULL, 0);
+
+	return 1;
+}
+__setup("softlockup_panic=", softlockup_panic_setup);
+
 static int
 softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
 {
@@ -120,6 +135,9 @@ void softlockup_tick(void)
 	else
 		dump_stack();
 	spin_unlock(&print_lock);
+
+	if (softlockup_panic)
+		panic("softlockup: hung tasks");
 }
 
 /*
@@ -172,6 +190,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now)
 
 	t->last_switch_timestamp = now;
 	touch_nmi_watchdog();
+
+	if (softlockup_panic)
+		panic("softlockup: blocked tasks");
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 29116652dca8..2d3b388c402d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -727,6 +727,17 @@ static struct ctl_table kern_table[] = {
 	},
 #endif
 #ifdef CONFIG_DETECT_SOFTLOCKUP
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "softlockup_panic",
+		.data		= &softlockup_panic,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "softlockup_thresh",
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d2099f41aa1e..509ae35a9ef5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -147,7 +147,7 @@ config DETECT_SOFTLOCKUP
 	help
 	  Say Y here to enable the kernel to detect "soft lockups",
 	  which are bugs that cause the kernel to loop in kernel
-	  mode for more than 10 seconds, without giving other tasks a
+	  mode for more than 60 seconds, without giving other tasks a
 	  chance to run.
 
 	  When a soft-lockup is detected, the kernel will print the
@@ -159,6 +159,30 @@ config DETECT_SOFTLOCKUP
 	   can be detected via the NMI-watchdog, on platforms that
 	   support it.)
 
+config BOOTPARAM_SOFTLOCKUP_PANIC
+	bool "Panic (Reboot) On Soft Lockups"
+	depends on DETECT_SOFTLOCKUP
+	help
+	  Say Y here to enable the kernel to panic on "soft lockups",
+	  which are bugs that cause the kernel to loop in kernel
+	  mode for more than 60 seconds, without giving other tasks a
+	  chance to run.
+
+	  The panic can be used in combination with panic_timeout,
+	  to cause the system to reboot automatically after a
+	  lockup has been detected. This feature is useful for
+	  high-availability systems that have uptime guarantees and
+	  where a lockup must be resolved ASAP.
+
+	  Say N if unsure.
+
+config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
+	int
+	depends on DETECT_SOFTLOCKUP
+	range 0 1
+	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
+	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+
 config SCHED_DEBUG
 	bool "Collect scheduler debugging info"
 	depends on DEBUG_KERNEL && PROC_FS

commit 49023168261a7f9a2fd4a1ca1adbfea922556015
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:58 2008 +0200

    mmiotrace: cleanup
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/mmiotrace/kmmio.c b/arch/x86/kernel/mmiotrace/kmmio.c
index cd0d95fe4fe6..3ad27b8504a5 100644
--- a/arch/x86/kernel/mmiotrace/kmmio.c
+++ b/arch/x86/kernel/mmiotrace/kmmio.c
@@ -228,7 +228,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 
 	ctx->fpage = faultpage;
 	ctx->probe = get_kmmio_probe(addr);
-	ctx->saved_flags = (regs->flags & (TF_MASK|IF_MASK));
+	ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
 	ctx->addr = addr;
 
 	if (ctx->probe && ctx->probe->pre_handler)
@@ -238,8 +238,8 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 	 * Enable single-stepping and disable interrupts for the faulting
 	 * context. Local interrupts must not get enabled during stepping.
 	 */
-	regs->flags |= TF_MASK;
-	regs->flags &= ~IF_MASK;
+	regs->flags |= X86_EFLAGS_TF;
+	regs->flags &= ~X86_EFLAGS_IF;
 
 	/* Now we set present bit in PTE and single step. */
 	disarm_kmmio_fault_page(ctx->fpage->page, NULL);
@@ -283,7 +283,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
 
 	arm_kmmio_fault_page(ctx->fpage->page, NULL);
 
-	regs->flags &= ~TF_MASK;
+	regs->flags &= ~X86_EFLAGS_TF;
 	regs->flags |= ctx->saved_flags;
 
 	/* These were acquired in kmmio_handler(). */
@@ -297,7 +297,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
 	 * will have TF set, in which case, continue the remaining processing
 	 * of do_debug, as if this is not a probe hit.
 	 */
-	if (!(regs->flags & TF_MASK))
+	if (!(regs->flags & X86_EFLAGS_TF))
 		ret = 1;
 out:
 	put_cpu_var(kmmio_ctx);

commit 801a175bf601f9a9d5e86e92dee9adeeb6625da8
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:58 2008 +0200

    mmiotrace: ftrace fix
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d14fe49e9638..4dcc4e85c5d6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -838,12 +838,16 @@ void __trace_mmiotrace_rw(struct trace_array *tr, struct trace_array_cpu *data,
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	spin_lock_irqsave(&data->lock, irq_flags);
+	raw_local_irq_save(irq_flags);
+	__raw_spin_lock(&data->lock);
+
 	entry			= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, 0);
 	entry->type		= TRACE_MMIO_RW;
 	entry->mmiorw		= *rw;
-	spin_unlock_irqrestore(&data->lock, irq_flags);
+
+	__raw_spin_unlock(&data->lock);
+	raw_local_irq_restore(irq_flags);
 
 	trace_wake_up();
 }
@@ -854,12 +858,16 @@ void __trace_mmiotrace_map(struct trace_array *tr, struct trace_array_cpu *data,
 	struct trace_entry *entry;
 	unsigned long irq_flags;
 
-	spin_lock_irqsave(&data->lock, irq_flags);
+	raw_local_irq_save(irq_flags);
+	__raw_spin_lock(&data->lock);
+
 	entry			= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, 0);
 	entry->type		= TRACE_MMIO_MAP;
 	entry->mmiomap		= *map;
-	spin_unlock_irqrestore(&data->lock, irq_flags);
+
+	__raw_spin_unlock(&data->lock);
+	raw_local_irq_restore(irq_flags);
 
 	trace_wake_up();
 }

commit 8a9e94c1fbfdac45a3b6811b880777c4116aa309
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:54 2008 +0200

    sysprof: update copyrights
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index ebcb66d054cc..fe23d6dba7f1 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -1,9 +1,9 @@
 /*
  * trace stack traces
  *
+ * Copyright (C) 2004-2008, Soeren Sandmann
  * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
- * Copyright (C) 2004, 2005, Soeren Sandmann
  */
 #include <linux/kallsyms.h>
 #include <linux/debugfs.h>

commit d618b3e6e50970a6248ac857653fdd49bcd3c045
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:49 2008 +0200

    ftrace: sysprof updates
    
    make the sample period configurable.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3271916ff033..95b7c48a9a1d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2800,6 +2800,9 @@ static __init void tracer_init_debugfs(void)
 		pr_warning("Could not create debugfs "
 			   "'dyn_ftrace_total_info' entry\n");
 #endif
+#ifdef CONFIG_SYSPROF_TRACER
+	init_tracer_sysprof_debugfs(d_tracer);
+#endif
 }
 
 static int trace_alloc_page(void)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b2198bc830ae..b7f85d9c80d7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -188,6 +188,8 @@ struct trace_iterator {
 void tracing_reset(struct trace_array_cpu *data);
 int tracing_open_generic(struct inode *inode, struct file *filp);
 struct dentry *tracing_init_dentry(void);
+void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
+
 void ftrace(struct trace_array *tr,
 			    struct trace_array_cpu *data,
 			    unsigned long ip,
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index f9a09fe705b0..19406236b67b 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -20,11 +20,12 @@ static struct trace_array	*sysprof_trace;
 static int __read_mostly	tracer_enabled;
 
 /*
- * 10 msecs for now:
+ * 1 msec sample interval by default:
  */
-static const unsigned long sample_period = 1000000;
+static unsigned long sample_period = 1000000;
 static const unsigned int sample_max_depth = 512;
 
+static DEFINE_MUTEX(sample_timer_lock);
 /*
  * Per CPU hrtimers that do the profiling:
  */
@@ -166,15 +167,19 @@ static notrace void stack_reset(struct trace_array *tr)
 
 static notrace void start_stack_trace(struct trace_array *tr)
 {
+	mutex_lock(&sample_timer_lock);
 	stack_reset(tr);
 	start_stack_timers();
 	tracer_enabled = 1;
+	mutex_unlock(&sample_timer_lock);
 }
 
 static notrace void stop_stack_trace(struct trace_array *tr)
 {
+	mutex_lock(&sample_timer_lock);
 	stop_stack_timers();
 	tracer_enabled = 0;
+	mutex_unlock(&sample_timer_lock);
 }
 
 static notrace void stack_trace_init(struct trace_array *tr)
@@ -216,3 +221,64 @@ __init static int init_stack_trace(void)
 	return register_tracer(&stack_trace);
 }
 device_initcall(init_stack_trace);
+
+#define MAX_LONG_DIGITS 22
+
+static ssize_t
+sysprof_sample_read(struct file *filp, char __user *ubuf,
+		    size_t cnt, loff_t *ppos)
+{
+	char buf[MAX_LONG_DIGITS];
+	int r;
+
+	r = sprintf(buf, "%ld\n", nsecs_to_usecs(sample_period));
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
+sysprof_sample_write(struct file *filp, const char __user *ubuf,
+		     size_t cnt, loff_t *ppos)
+{
+	char buf[MAX_LONG_DIGITS];
+	unsigned long val;
+
+	if (cnt > MAX_LONG_DIGITS-1)
+		cnt = MAX_LONG_DIGITS-1;
+
+	if (copy_from_user(&buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	val = simple_strtoul(buf, NULL, 10);
+	/*
+	 * Enforce a minimum sample period of 100 usecs:
+	 */
+	if (val < 100)
+		val = 100;
+
+	mutex_lock(&sample_timer_lock);
+	stop_stack_timers();
+	sample_period = val * 1000;
+	start_stack_timers();
+	mutex_unlock(&sample_timer_lock);
+
+	return cnt;
+}
+
+static struct file_operations sysprof_sample_fops = {
+	.read		= sysprof_sample_read,
+	.write		= sysprof_sample_write,
+};
+
+void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
+{
+	struct dentry *entry;
+
+	entry = debugfs_create_file("sysprof_sample_period", 0644,
+			d_tracer, NULL, &sysprof_sample_fops);
+	if (entry)
+		return;
+	pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n");
+}

commit 9f6b4e3f4a24f2590f1c96f117fc45fbea9b0fa4
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:48 2008 +0200

    ftrace: sysprof fix
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index 7f6fcccffb88..f9a09fe705b0 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -37,21 +37,26 @@ struct stack_frame {
 
 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
 {
+	int ret;
+
 	if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
 		return 0;
 
-	if (__copy_from_user_inatomic(frame, frame_pointer, sizeof(*frame)))
-		return 0;
+	ret = 1;
+	pagefault_disable();
+	if (__copy_from_user_inatomic(frame, fp, sizeof(*frame)))
+		ret = 0;
+	pagefault_enable();
 
-	return 1;
+	return ret;
 }
 
 static void timer_notify(struct pt_regs *regs, int cpu)
 {
-	const void __user *frame_pointer;
 	struct trace_array_cpu *data;
 	struct stack_frame frame;
 	struct trace_array *tr;
+	const void __user *fp;
 	int is_user;
 	int i;
 
@@ -77,21 +82,26 @@ static void timer_notify(struct pt_regs *regs, int cpu)
 
 	trace_special(tr, data, 0, current->pid, regs->ip);
 
-	frame_pointer = (void __user *)regs->bp;
+	fp = (void __user *)regs->bp;
 
 	for (i = 0; i < sample_max_depth; i++) {
-		if (!copy_stack_frame(frame_pointer, &frame))
+		frame.next_fp = 0;
+		frame.return_address = 0;
+		if (!copy_stack_frame(fp, &frame))
 			break;
-		if ((unsigned long)frame_pointer < regs->sp)
+		if ((unsigned long)fp < regs->sp)
 			break;
 
 		trace_special(tr, data, 1, frame.return_address,
-			      (unsigned long)frame_pointer);
-		frame_pointer = frame.next_fp;
+			      (unsigned long)fp);
+		fp = frame.next_fp;
 	}
 
 	trace_special(tr, data, 2, current->pid, i);
 
+	/*
+	 * Special trace entry if we overflow the max depth:
+	 */
 	if (i == sample_max_depth)
 		trace_special(tr, data, -1, -1, -1);
 }

commit ef4ab15ff34fd9c65e92bee70f58e7179da881c5
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:48 2008 +0200

    ftrace: make sysprof dependent on x86 for now
    
    that's the only tested platform for now. If there's interest we
    can make it generic easily.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e101c9a85f0f..9b49526ac0b5 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -77,7 +77,7 @@ config PREEMPT_TRACER
 
 config SYSPROF_TRACER
 	bool "Sysprof Tracer"
-	depends on DEBUG_KERNEL
+	depends on DEBUG_KERNEL && X86
 	select TRACING
 	help
 	  This tracer provides the trace needed by the 'Sysprof' userspace
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 033a6fb2e5ff..5588ecc40985 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -557,11 +557,6 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
 	ret = trace_test_buffer(tr, &count);
 	trace->reset(tr);
 
-	if (!ret && !count) {
-		printk(KERN_CONT ".. no entries found ..");
-		ret = -1;
-	}
-
 	return ret;
 }
 #endif /* CONFIG_SYSPROF_TRACER */

commit 842af315e8b0adad58fc642eaa5e6f53525e0534
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:47 2008 +0200

    ftrace: sysprof plugin improvement
    
    add sample maximum depth.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index b78f12f77fca..7f6fcccffb88 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -23,6 +23,7 @@ static int __read_mostly	tracer_enabled;
  * 10 msecs for now:
  */
 static const unsigned long sample_period = 1000000;
+static const unsigned int sample_max_depth = 512;
 
 /*
  * Per CPU hrtimers that do the profiling:
@@ -45,8 +46,6 @@ static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
 	return 1;
 }
 
-#define SYSPROF_MAX_ADDRESSES	512
-
 static void timer_notify(struct pt_regs *regs, int cpu)
 {
 	const void __user *frame_pointer;
@@ -80,7 +79,7 @@ static void timer_notify(struct pt_regs *regs, int cpu)
 
 	frame_pointer = (void __user *)regs->bp;
 
-	for (i = 0; i < SYSPROF_MAX_ADDRESSES; i++) {
+	for (i = 0; i < sample_max_depth; i++) {
 		if (!copy_stack_frame(frame_pointer, &frame))
 			break;
 		if ((unsigned long)frame_pointer < regs->sp)
@@ -93,7 +92,7 @@ static void timer_notify(struct pt_regs *regs, int cpu)
 
 	trace_special(tr, data, 2, current->pid, i);
 
-	if (i == SYSPROF_MAX_ADDRESSES)
+	if (i == sample_max_depth)
 		trace_special(tr, data, -1, -1, -1);
 }
 
@@ -126,7 +125,6 @@ static void start_stack_timers(void)
 	for_each_online_cpu(cpu) {
 		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 		start_stack_timer(cpu);
-		printk(KERN_INFO "started sysprof timer on cpu%d\n", cpu);
 	}
 	set_cpus_allowed_ptr(current, &saved_mask);
 }
@@ -136,7 +134,6 @@ static void stop_stack_timer(int cpu)
 	struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
 
 	hrtimer_cancel(hrtimer);
-	printk(KERN_INFO "cancelled sysprof timer on cpu%d\n", cpu);
 }
 
 static void stop_stack_timers(void)

commit a6dd24f8d00cbccb560b19a723e6fb9bdfb20799
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:47 2008 +0200

    ftrace: sysprof-plugin, add self-tests
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c460e85e94ed..b2198bc830ae 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -280,6 +280,10 @@ extern int trace_selftest_startup_wakeup(struct tracer *trace,
 extern int trace_selftest_startup_sched_switch(struct tracer *trace,
 					       struct trace_array *tr);
 #endif
+#ifdef CONFIG_SYSPROF_TRACER
+extern int trace_selftest_startup_sysprof(struct tracer *trace,
+					       struct trace_array *tr);
+#endif
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 3877dd9102f1..033a6fb2e5ff 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -537,3 +537,31 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
 	return ret;
 }
 #endif /* CONFIG_CONTEXT_SWITCH_TRACER */
+
+#ifdef CONFIG_SYSPROF_TRACER
+int
+trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr)
+{
+	unsigned long count;
+	int ret;
+
+	/* start the tracing */
+	tr->ctrl = 1;
+	trace->init(tr);
+	/* Sleep for a 1/10 of a second */
+	msleep(100);
+	/* stop the tracing. */
+	tr->ctrl = 0;
+	trace->ctrl_update(tr);
+	/* check the trace buffer */
+	ret = trace_test_buffer(tr, &count);
+	trace->reset(tr);
+
+	if (!ret && !count) {
+		printk(KERN_CONT ".. no entries found ..");
+		ret = -1;
+	}
+
+	return ret;
+}
+#endif /* CONFIG_SYSPROF_TRACER */
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index b1137c11ef8b..b78f12f77fca 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -126,7 +126,7 @@ static void start_stack_timers(void)
 	for_each_online_cpu(cpu) {
 		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
 		start_stack_timer(cpu);
-		printk("started timer on cpu%d\n", cpu);
+		printk(KERN_INFO "started sysprof timer on cpu%d\n", cpu);
 	}
 	set_cpus_allowed_ptr(current, &saved_mask);
 }
@@ -136,7 +136,7 @@ static void stop_stack_timer(int cpu)
 	struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
 
 	hrtimer_cancel(hrtimer);
-	printk("cancelled timer on cpu%d\n", cpu);
+	printk(KERN_INFO "cancelled sysprof timer on cpu%d\n", cpu);
 }
 
 static void stop_stack_timers(void)
@@ -200,7 +200,7 @@ static struct tracer stack_trace __read_mostly =
 	.reset		= stack_trace_reset,
 	.ctrl_update	= stack_trace_ctrl_update,
 #ifdef CONFIG_FTRACE_SELFTEST
-	.selftest    = trace_selftest_startup_stack,
+	.selftest    = trace_selftest_startup_sysprof,
 #endif
 };
 

commit 56a08bdcff20f0022bd9160c1093e56f763499aa
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:47 2008 +0200

    ftrace: extend sysprof plugin some more
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index ba55b871b3d9..b1137c11ef8b 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
- *
+ * Copyright (C) 2004, 2005, Soeren Sandmann
  */
 #include <linux/kallsyms.h>
 #include <linux/debugfs.h>
@@ -11,13 +11,17 @@
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
 #include <linux/module.h>
+#include <linux/irq.h>
 #include <linux/fs.h>
 
 #include "trace.h"
 
-static struct trace_array	*ctx_trace;
+static struct trace_array	*sysprof_trace;
 static int __read_mostly	tracer_enabled;
 
+/*
+ * 10 msecs for now:
+ */
 static const unsigned long sample_period = 1000000;
 
 /*
@@ -25,10 +29,78 @@ static const unsigned long sample_period = 1000000;
  */
 static DEFINE_PER_CPU(struct hrtimer, stack_trace_hrtimer);
 
+struct stack_frame {
+	const void __user	*next_fp;
+	unsigned long		return_address;
+};
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+	if (!access_ok(VERIFY_READ, fp, sizeof(*frame)))
+		return 0;
+
+	if (__copy_from_user_inatomic(frame, frame_pointer, sizeof(*frame)))
+		return 0;
+
+	return 1;
+}
+
+#define SYSPROF_MAX_ADDRESSES	512
+
+static void timer_notify(struct pt_regs *regs, int cpu)
+{
+	const void __user *frame_pointer;
+	struct trace_array_cpu *data;
+	struct stack_frame frame;
+	struct trace_array *tr;
+	int is_user;
+	int i;
+
+	if (!regs)
+		return;
+
+	tr = sysprof_trace;
+	data = tr->data[cpu];
+	is_user = user_mode(regs);
+
+	if (!current || current->pid == 0)
+		return;
+
+	if (is_user && current->state != TASK_RUNNING)
+		return;
+
+	if (!is_user) {
+		/* kernel */
+		ftrace(tr, data, current->pid, 1, 0);
+		return;
+
+	}
+
+	trace_special(tr, data, 0, current->pid, regs->ip);
+
+	frame_pointer = (void __user *)regs->bp;
+
+	for (i = 0; i < SYSPROF_MAX_ADDRESSES; i++) {
+		if (!copy_stack_frame(frame_pointer, &frame))
+			break;
+		if ((unsigned long)frame_pointer < regs->sp)
+			break;
+
+		trace_special(tr, data, 1, frame.return_address,
+			      (unsigned long)frame_pointer);
+		frame_pointer = frame.next_fp;
+	}
+
+	trace_special(tr, data, 2, current->pid, i);
+
+	if (i == SYSPROF_MAX_ADDRESSES)
+		trace_special(tr, data, -1, -1, -1);
+}
+
 static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
 {
 	/* trace here */
-	panic_timeout++;
+	timer_notify(get_irq_regs(), smp_processor_id());
 
 	hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
 
@@ -100,7 +172,7 @@ static notrace void stop_stack_trace(struct trace_array *tr)
 
 static notrace void stack_trace_init(struct trace_array *tr)
 {
-	ctx_trace = tr;
+	sysprof_trace = tr;
 
 	if (tr->ctrl)
 		start_stack_trace(tr);