Patches contributed by Eötvös Lorand University


commit f9896bf30928922a3913a3810a4ab7908da6cfe7
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:47 2008 +0200

    ftrace: add raw output
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 42f1926acf73..bebd263f582f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -153,6 +153,7 @@ enum trace_iterator_flags {
 	TRACE_ITER_SYM_OFFSET		= 0x02,
 	TRACE_ITER_SYM_ADDR		= 0x04,
 	TRACE_ITER_VERBOSE		= 0x08,
+	TRACE_ITER_RAW			= 0x10,
 };
 
 #define TRACE_ITER_SYM_MASK \
@@ -164,6 +165,7 @@ static const char *trace_options[] = {
 	"sym-offset",
 	"sym-addr",
 	"verbose",
+	"raw",
 	NULL
 };
 
@@ -1099,7 +1101,7 @@ lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
 
 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
 
-static notrace void
+static notrace int
 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 {
 	struct trace_seq *s = &iter->seq;
@@ -1154,10 +1156,10 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	default:
 		trace_seq_printf(s, "Unknown type %d\n", entry->type);
 	}
+	return 1;
 }
 
-static notrace int
-print_trace_fmt(struct trace_iterator *iter)
+static notrace int print_trace_fmt(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -1222,6 +1224,43 @@ print_trace_fmt(struct trace_iterator *iter)
 	return 1;
 }
 
+static notrace int print_raw_fmt(struct trace_iterator *iter)
+{
+	struct trace_seq *s = &iter->seq;
+	struct trace_entry *entry;
+	int ret;
+	int S;
+
+	entry = iter->ent;
+
+	ret = trace_seq_printf(s, "%d %d %llu ",
+		entry->pid, iter->cpu, entry->t);
+	if (!ret)
+		return 0;
+
+	switch (entry->type) {
+	case TRACE_FN:
+		ret = trace_seq_printf(s, "%x %x\n",
+					entry->fn.ip, entry->fn.parent_ip);
+		if (!ret)
+			return 0;
+		break;
+	case TRACE_CTX:
+		S = entry->ctx.prev_state < sizeof(state_to_char) ?
+			state_to_char[entry->ctx.prev_state] : 'X';
+		ret = trace_seq_printf(s, "%d %d %c %d %d\n",
+				       entry->ctx.prev_pid,
+				       entry->ctx.prev_prio,
+				       S,
+				       entry->ctx.next_pid,
+				       entry->ctx.next_prio);
+		if (!ret)
+			return 0;
+		break;
+	}
+	return 1;
+}
+
 static int trace_empty(struct trace_iterator *iter)
 {
 	struct trace_array_cpu *data;
@@ -1238,6 +1277,17 @@ static int trace_empty(struct trace_iterator *iter)
 	return 1;
 }
 
+static int print_trace_line(struct trace_iterator *iter)
+{
+	if (trace_flags & TRACE_ITER_RAW)
+		return print_raw_fmt(iter);
+
+	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
+		return print_lat_fmt(iter, iter->idx, iter->cpu);
+
+	return print_trace_fmt(iter);
+}
+
 static int s_show(struct seq_file *m, void *v)
 {
 	struct trace_iterator *iter = v;
@@ -1259,10 +1309,7 @@ static int s_show(struct seq_file *m, void *v)
 				print_func_help_header(m);
 		}
 	} else {
-		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
-			print_lat_fmt(iter, iter->idx, iter->cpu);
-		else
-			print_trace_fmt(iter);
+		print_trace_line(iter);
 		trace_print_seq(m, &iter->seq);
 	}
 
@@ -1870,7 +1917,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 	}
 
 	while ((entry = find_next_entry_inc(iter)) != NULL) {
-		ret = print_trace_fmt(iter);
+		ret = print_trace_line(iter);
 		if (!ret)
 			break;
 

commit 8c523a9d82dbc4f3f7d972df8c0f1eacd83d0d55
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:46 2008 +0200

    ftrace: clean-up-pipe-iteration
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ce8ceb8aea6a..42f1926acf73 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -770,12 +770,12 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu)
 	return next;
 }
 
-static notrace void
-trace_iterator_increment(struct trace_iterator *iter)
+static notrace void trace_iterator_increment(struct trace_iterator *iter)
 {
 	iter->idx++;
 	iter->next_idx[iter->cpu]++;
 	iter->next_page_idx[iter->cpu]++;
+
 	if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
 		struct trace_array_cpu *data = iter->tr->data[iter->cpu];
 
@@ -785,8 +785,7 @@ trace_iterator_increment(struct trace_iterator *iter)
 	}
 }
 
-static notrace void
-trace_consume(struct trace_iterator *iter)
+static notrace void trace_consume(struct trace_iterator *iter)
 {
 	struct trace_array_cpu *data = iter->tr->data[iter->cpu];
 
@@ -802,8 +801,7 @@ trace_consume(struct trace_iterator *iter)
 		data->trace_idx = 0;
 }
 
-static notrace void *
-find_next_entry_inc(struct trace_iterator *iter)
+static notrace void *find_next_entry_inc(struct trace_iterator *iter)
 {
 	struct trace_entry *next;
 	int next_cpu = -1;
@@ -1871,14 +1869,7 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 		cpu_set(cpu, mask);
 	}
 
-	while ((entry = find_next_entry(iter, &cpu))) {
-
-		if (!entry)
-			break;
-
-		iter->ent = entry;
-		iter->cpu = cpu;
-
+	while ((entry = find_next_entry_inc(iter)) != NULL) {
 		ret = print_trace_fmt(iter);
 		if (!ret)
 			break;
@@ -1887,7 +1878,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 
 		if (iter->seq.len >= cnt)
 			break;
-
 	}
 
 	for_each_cpu_mask(cpu, mask) {

commit cdd31cd2d7a0dcbec2cce3974f7129dd4fc8c879
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:46 2008 +0200

    ftrace: remove-idx-sync
    
    remove idx syncing - it's expensive on SMP.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9a931c7c2da3..ce8ceb8aea6a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -99,7 +99,6 @@ notrace cycle_t ftrace_now(int cpu)
 	return time;
 }
 
-static atomic_t			tracer_counter;
 static struct trace_array	global_trace;
 
 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
@@ -661,7 +660,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 
 	pc = preempt_count();
 
-	entry->idx		= atomic_inc_return(&tracer_counter);
 	entry->preempt_count	= pc & 0xff;
 	entry->pid		= tsk->pid;
 	entry->t		= ftrace_now(raw_smp_processor_id());
@@ -757,8 +755,10 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu)
 		if (!head_page(tr->data[cpu]))
 			continue;
 		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
-		if (ent &&
-		    (!next || (long)(next->idx - ent->idx) > 0)) {
+		/*
+		 * Pick the entry with the smallest timestamp:
+		 */
+		if (ent && (!next || ent->t < next->t)) {
 			next = ent;
 			next_cpu = cpu;
 		}
@@ -800,8 +800,6 @@ trace_consume(struct trace_iterator *iter)
 	if (data->trace_head == data->trace_tail &&
 	    data->trace_head_idx == data->trace_tail_idx)
 		data->trace_idx = 0;
-
-	trace_iterator_increment(iter);
 }
 
 static notrace void *
@@ -1160,33 +1158,6 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
 	}
 }
 
-static notrace void sync_time_offset(struct trace_iterator *iter)
-{
-	struct trace_array_cpu *prev_array, *array;
-	struct trace_entry *prev_entry, *entry;
-	cycle_t prev_t, t;
-
-	entry = iter->ent;
-	prev_entry = iter->prev_ent;
-	if (!prev_entry)
-		return;
-
-	prev_array = iter->tr->data[iter->prev_cpu];
-	array = iter->tr->data[iter->cpu];
-
-	prev_t = prev_entry->t + prev_array->time_offset;
-	t = entry->t + array->time_offset;
-
-	/*
-	 * If time goes backwards we increase the offset of
-	 * the current array, to not have observable time warps.
-	 * This will quickly synchronize the time offsets of
-	 * multiple CPUs:
-	 */
-	if (t < prev_t)
-		array->time_offset += prev_t - t;
-}
-
 static notrace int
 print_trace_fmt(struct trace_iterator *iter)
 {
@@ -1200,12 +1171,11 @@ print_trace_fmt(struct trace_iterator *iter)
 	int S;
 	int ret;
 
-	sync_time_offset(iter);
 	entry = iter->ent;
 
 	comm = trace_find_cmdline(iter->ent->pid);
 
-	t = ns2usecs(entry->t + iter->tr->data[iter->cpu]->time_offset);
+	t = ns2usecs(entry->t);
 	usec_rem = do_div(t, 1000000ULL);
 	secs = (unsigned long)t;
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 30cad677e9d0..27fa2d06f499 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -38,7 +38,6 @@ struct trace_entry {
 	char			preempt_count;
 	int			pid;
 	cycle_t			t;
-	unsigned long		idx;
 	union {
 		struct ftrace_entry		fn;
 		struct ctx_switch_entry		ctx;
@@ -57,7 +56,6 @@ struct trace_array_cpu {
 	atomic_t		disabled;
 	spinlock_t		lock;
 	struct lock_class_key	lock_key;
-	cycle_t			time_offset;
 
 	/* these fields get copied into max-trace: */
 	unsigned		trace_head_idx;

commit 53c37c17aafcf50f7c6fddaf01dda8f9d7e31ddf
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:46 2008 +0200

    ftrace: fast, scalable, synchronized timestamps
    
    implement globally synchronized, fast and scalable time source for tracing.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e3778ab0d3f7..9a931c7c2da3 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -42,9 +42,61 @@ ns2usecs(cycle_t nsec)
 	return nsec;
 }
 
+static const int time_sync_freq_max = 128;
+static const cycle_t time_sync_thresh = 100000;
+
+static DEFINE_PER_CPU(cycle_t, time_offset);
+static DEFINE_PER_CPU(cycle_t, prev_cpu_time);
+static DEFINE_PER_CPU(int, time_sync_count);
+static DEFINE_PER_CPU(int, time_sync_freq);
+
+/*
+ * Global lock which we take every now and then to synchronize
+ * the CPUs time. This method is not warp-safe, but it's good
+ * enough to synchronize slowly diverging time sources and thus
+ * it's good enough for tracing:
+ */
+static DEFINE_SPINLOCK(time_sync_lock);
+static cycle_t prev_global_time;
+
+static notrace cycle_t __ftrace_now_sync(cycles_t time, int cpu)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&time_sync_lock, flags);
+
+	/*
+	 * Update the synchronization frequency:
+	 */
+	if (per_cpu(time_sync_freq, cpu) < time_sync_freq_max)
+		per_cpu(time_sync_freq, cpu) *= 2;
+	per_cpu(time_sync_count, cpu) = per_cpu(time_sync_freq, cpu);
+
+	if (time < prev_global_time) {
+		per_cpu(time_offset, cpu) += prev_global_time - time;
+		time = prev_global_time;
+	} else {
+		prev_global_time = time;
+	}
+
+	spin_unlock_irqrestore(&time_sync_lock, flags);
+
+	return time;
+}
+
 notrace cycle_t ftrace_now(int cpu)
 {
-	return cpu_clock(cpu);
+	cycle_t prev_cpu_time, time, delta_time;
+
+	prev_cpu_time = per_cpu(prev_cpu_time, cpu);
+	time = sched_clock() + per_cpu(time_offset, cpu);
+	delta_time = time-prev_cpu_time;
+
+	if (unlikely(delta_time > time_sync_thresh ||
+				--per_cpu(time_sync_count, cpu) <= 0))
+		time = __ftrace_now_sync(time, cpu);
+
+	return time;
 }
 
 static atomic_t			tracer_counter;

commit 750ed1a40783432d0dcb0e6c2e813a12615d7664
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:46 2008 +0200

    ftrace: timestamp syncing, prepare
    
    rename and uninline now() to ftrace_now().
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 97c40865a93e..a15e068535f8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -531,7 +531,7 @@ static int notrace __ftrace_update_code(void *ignore)
 	save_ftrace_enabled = ftrace_enabled;
 	ftrace_enabled = 0;
 
-	start = now(raw_smp_processor_id());
+	start = ftrace_now(raw_smp_processor_id());
 	ftrace_update_cnt = 0;
 
 	/* No locks needed, the machine is stopped! */
@@ -550,7 +550,7 @@ static int notrace __ftrace_update_code(void *ignore)
 
 	}
 
-	stop = now(raw_smp_processor_id());
+	stop = ftrace_now(raw_smp_processor_id());
 	ftrace_update_time = stop - start;
 	ftrace_update_tot_cnt += ftrace_update_cnt;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4550afda9607..e3778ab0d3f7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -42,6 +42,11 @@ ns2usecs(cycle_t nsec)
 	return nsec;
 }
 
+notrace cycle_t ftrace_now(int cpu)
+{
+	return cpu_clock(cpu);
+}
+
 static atomic_t			tracer_counter;
 static struct trace_array	global_trace;
 
@@ -607,7 +612,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 	entry->idx		= atomic_inc_return(&tracer_counter);
 	entry->preempt_count	= pc & 0xff;
 	entry->pid		= tsk->pid;
-	entry->t		= now(raw_smp_processor_id());
+	entry->t		= ftrace_now(raw_smp_processor_id());
 	entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b0408356f0e0..30cad677e9d0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -171,10 +171,7 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
 void update_max_tr_single(struct trace_array *tr,
 			  struct task_struct *tsk, int cpu);
 
-static inline notrace cycle_t now(int cpu)
-{
-	return cpu_clock(cpu);
-}
+extern notrace cycle_t ftrace_now(int cpu);
 
 #ifdef CONFIG_SCHED_TRACER
 extern void notrace
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 5d8ad7a09605..e5d34b78fc99 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -20,7 +20,7 @@ static notrace void function_reset(struct trace_array *tr)
 {
 	int cpu;
 
-	tr->time_start = now(tr->cpu);
+	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
 		tracing_reset(tr->data[cpu]);
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 2dfebb67fdfb..d2a6e6f1ad2d 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -136,7 +136,7 @@ check_critical_timing(struct trace_array *tr,
 	 * as long as possible:
 	 */
 	T0 = data->preempt_timestamp;
-	T1 = now(cpu);
+	T1 = ftrace_now(cpu);
 	delta = T1-T0;
 
 	local_save_flags(flags);
@@ -186,7 +186,7 @@ check_critical_timing(struct trace_array *tr,
 
 out:
 	data->critical_sequence = max_sequence;
-	data->preempt_timestamp = now(cpu);
+	data->preempt_timestamp = ftrace_now(cpu);
 	tracing_reset(data);
 	ftrace(tr, data, CALLER_ADDR0, parent_ip, flags);
 }
@@ -215,7 +215,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
 	atomic_inc(&data->disabled);
 
 	data->critical_sequence = max_sequence;
-	data->preempt_timestamp = now(cpu);
+	data->preempt_timestamp = ftrace_now(cpu);
 	data->critical_start = parent_ip ? : ip;
 	tracing_reset(data);
 
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index 6c9284103a62..8d656672da93 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -61,7 +61,7 @@ static notrace void sched_switch_reset(struct trace_array *tr)
 {
 	int cpu;
 
-	tr->time_start = now(tr->cpu);
+	tr->time_start = ftrace_now(tr->cpu);
 
 	for_each_online_cpu(cpu)
 		tracing_reset(tr->data[cpu]);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 688df965f3f2..b7df825c3af9 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -92,7 +92,7 @@ wakeup_sched_switch(struct task_struct *prev, struct task_struct *next)
 	 * as long as possible:
 	 */
 	T0 = data->preempt_timestamp;
-	T1 = now(cpu);
+	T1 = ftrace_now(cpu);
 	delta = T1-T0;
 
 	if (!report_latency(delta))
@@ -191,7 +191,7 @@ wakeup_check_start(struct trace_array *tr, struct task_struct *p,
 
 	local_save_flags(flags);
 
-	tr->data[wakeup_cpu]->preempt_timestamp = now(cpu);
+	tr->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
 	ftrace(tr, tr->data[wakeup_cpu], CALLER_ADDR1, CALLER_ADDR2, flags);
 
 out_locked:

commit 4bf39a9411a4ce8712954e03a9bd1592ee345919
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:46 2008 +0200

    ftrace: cleanups
    
    no code changed.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5e9389faaf75..97c40865a93e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -756,9 +756,11 @@ ftrace_avail_open(struct inode *inode, struct file *file)
 	ret = seq_open(file, &show_ftrace_seq_ops);
 	if (!ret) {
 		struct seq_file *m = file->private_data;
+
 		m->private = iter;
-	} else
+	} else {
 		kfree(iter);
+	}
 
 	return ret;
 }
@@ -770,6 +772,7 @@ int ftrace_avail_release(struct inode *inode, struct file *file)
 
 	seq_release(inode, file);
 	kfree(iter);
+
 	return 0;
 }
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b3811ca74071..4550afda9607 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1245,10 +1245,10 @@ static int s_show(struct seq_file *m, void *v)
 }
 
 static struct seq_operations tracer_seq_ops = {
-	.start = s_start,
-	.next = s_next,
-	.stop = s_stop,
-	.show = s_show,
+	.start		= s_start,
+	.next		= s_next,
+	.stop		= s_stop,
+	.show		= s_show,
 };
 
 static struct trace_iterator notrace *
@@ -1397,10 +1397,10 @@ static int t_show(struct seq_file *m, void *v)
 }
 
 static struct seq_operations show_traces_seq_ops = {
-	.start = t_start,
-	.next = t_next,
-	.stop = t_stop,
-	.show = t_show,
+	.start		= t_start,
+	.next		= t_next,
+	.stop		= t_stop,
+	.show		= t_show,
 };
 
 static int show_traces_open(struct inode *inode, struct file *file)
@@ -1420,17 +1420,17 @@ static int show_traces_open(struct inode *inode, struct file *file)
 }
 
 static struct file_operations tracing_fops = {
-	.open = tracing_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = tracing_release,
+	.open		= tracing_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= tracing_release,
 };
 
 static struct file_operations tracing_lt_fops = {
-	.open = tracing_lt_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = tracing_release,
+	.open		= tracing_lt_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= tracing_release,
 };
 
 static struct file_operations show_traces_fops = {
@@ -1620,8 +1620,7 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf,
 		r = sprintf(buf, "\n");
 	mutex_unlock(&trace_types_lock);
 
-	return simple_read_from_buffer(ubuf, cnt, ppos,
-				       buf, r);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 }
 
 static ssize_t
@@ -1680,8 +1679,7 @@ tracing_max_lat_read(struct file *filp, char __user *ubuf,
 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
 	if (r > 64)
 		r = 64;
-	return simple_read_from_buffer(ubuf, cnt, ppos,
-				       buf, r);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 }
 
 static ssize_t
@@ -1891,27 +1889,27 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 }
 
 static struct file_operations tracing_max_lat_fops = {
-	.open = tracing_open_generic,
-	.read = tracing_max_lat_read,
-	.write = tracing_max_lat_write,
+	.open		= tracing_open_generic,
+	.read		= tracing_max_lat_read,
+	.write		= tracing_max_lat_write,
 };
 
 static struct file_operations tracing_ctrl_fops = {
-	.open = tracing_open_generic,
-	.read = tracing_ctrl_read,
-	.write = tracing_ctrl_write,
+	.open		= tracing_open_generic,
+	.read		= tracing_ctrl_read,
+	.write		= tracing_ctrl_write,
 };
 
 static struct file_operations set_tracer_fops = {
-	.open = tracing_open_generic,
-	.read = tracing_set_trace_read,
-	.write = tracing_set_trace_write,
+	.open		= tracing_open_generic,
+	.read		= tracing_set_trace_read,
+	.write		= tracing_set_trace_write,
 };
 
 static struct file_operations tracing_pipe_fops = {
-	.open = tracing_open_pipe,
-	.read = tracing_read_pipe,
-	.release = tracing_release_pipe,
+	.open		= tracing_open_pipe,
+	.read		= tracing_read_pipe,
+	.release	= tracing_release_pipe,
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -1925,13 +1923,13 @@ tracing_read_long(struct file *filp, char __user *ubuf,
 	int r;
 
 	r = sprintf(buf, "%ld\n", *p);
-	return simple_read_from_buffer(ubuf, cnt, ppos,
-				       buf, r);
+
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 }
 
 static struct file_operations tracing_read_long_fops = {
-	.open = tracing_open_generic,
-	.read = tracing_read_long,
+	.open		= tracing_open_generic,
+	.read		= tracing_read_long,
 };
 #endif
 
@@ -2033,7 +2031,7 @@ static __init void tracer_init_debugfs(void)
 /* dummy trace to disable tracing */
 static struct tracer no_tracer __read_mostly =
 {
-	.name = "none",
+	.name		= "none",
 };
 
 static int trace_alloc_page(void)

commit d4c5a2f5870939d837293de87b41dda0012a4572
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:46 2008 +0200

    ftrace: fix locking
    
    we can hold all cpu trace buffer locks at once - put each into a
    separate lock class.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a40687a4413a..b3811ca74071 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1865,11 +1865,8 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
 
 	}
 
-	for_each_possible_cpu(cpu) {
+	for_each_cpu_mask(cpu, mask) {
 		data = iter->tr->data[cpu];
-
-		if (!cpu_isset(cpu, mask))
-			continue;
 		spin_unlock(&data->lock);
 		atomic_dec(&data->disabled);
 	}
@@ -2076,6 +2073,7 @@ static int trace_alloc_page(void)
 	for_each_possible_cpu(i) {
 		data = global_trace.data[i];
 		spin_lock_init(&data->lock);
+		lockdep_set_class(&data->lock, &data->lock_key);
 		page = list_entry(pages.next, struct page, lru);
 		list_del_init(&page->lru);
 		list_add_tail(&page->lru, &data->trace_pages);
@@ -2084,6 +2082,7 @@ static int trace_alloc_page(void)
 #ifdef CONFIG_TRACER_MAX_TRACE
 		data = max_tr.data[i];
 		spin_lock_init(&data->lock);
+		lockdep_set_class(&data->lock, &data->lock_key);
 		page = list_entry(pages.next, struct page, lru);
 		list_del_init(&page->lru);
 		list_add_tail(&page->lru, &data->trace_pages);
@@ -2203,5 +2202,4 @@ __init static int tracer_alloc_buffers(void)
 	}
 	return ret;
 }
-
 fs_initcall(tracer_alloc_buffers);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 29a7ea59de50..b0408356f0e0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -56,6 +56,7 @@ struct trace_array_cpu {
 	struct list_head	trace_pages;
 	atomic_t		disabled;
 	spinlock_t		lock;
+	struct lock_class_key	lock_key;
 	cycle_t			time_offset;
 
 	/* these fields get copied into max-trace: */

commit 4e3c3333f3bd7eedfd21b1155b3c7cd24fc7f754
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:45 2008 +0200

    ftrace: fix time offset
    
    fix time offset calculations and ordering, plus make code more consistent.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c736dd2e068d..8755a4370484 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -120,7 +120,7 @@ static DEFINE_SPINLOCK(ftrace_max_lock);
  * structure. (this way the maximum trace is permanently saved,
  * for later retrieval via /debugfs/tracing/latency_trace)
  */
-static void notrace
+static notrace void
 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
 	struct trace_array_cpu *data = tr->data[cpu];
@@ -333,15 +333,16 @@ void unregister_tracer(struct tracer *type)
 	mutex_unlock(&trace_types_lock);
 }
 
-void notrace tracing_reset(struct trace_array_cpu *data)
+notrace void tracing_reset(struct trace_array_cpu *data)
 {
 	data->trace_idx = 0;
 	data->trace_current = head_page(data);
 	data->trace_current_idx = 0;
+	data->time_offset = 0;
 }
 
 #ifdef CONFIG_FTRACE
-static void notrace
+static notrace void
 function_trace_call(unsigned long ip, unsigned long parent_ip)
 {
 	struct trace_array *tr = &global_trace;
@@ -398,7 +399,7 @@ static void trace_init_cmdlines(void)
 
 notrace void trace_stop_cmdline_recording(void);
 
-static void notrace trace_save_cmdline(struct task_struct *tsk)
+static notrace void trace_save_cmdline(struct task_struct *tsk)
 {
 	unsigned map;
 	unsigned idx;
@@ -624,6 +625,7 @@ static void *find_next_entry_inc(struct trace_iterator *iter)
 		iter->idx++;
 		iter->next_idx[next_cpu]++;
 		iter->next_page_idx[next_cpu]++;
+
 		if (iter->next_page_idx[next_cpu] >= ENTRIES_PER_PAGE) {
 			struct trace_array_cpu *data = iter->tr->data[next_cpu];
 
@@ -635,19 +637,21 @@ static void *find_next_entry_inc(struct trace_iterator *iter)
 					data->trace_pages.next;
 		}
 	}
+	iter->prev_ent = iter->ent;
+	iter->prev_cpu = iter->cpu;
+
 	iter->ent = next;
 	iter->cpu = next_cpu;
 
 	return next ? iter : NULL;
 }
 
-static void notrace *
-s_next(struct seq_file *m, void *v, loff_t *pos)
+static notrace void *s_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct trace_iterator *iter = m->private;
-	void *ent;
 	void *last_ent = iter->ent;
 	int i = (int)*pos;
+	void *ent;
 
 	(*pos)++;
 
@@ -693,6 +697,8 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 		iter->ent = NULL;
 		iter->cpu = 0;
 		iter->idx = -1;
+		iter->prev_ent = NULL;
+		iter->prev_cpu = -1;
 
 		for_each_possible_cpu(i) {
 			iter->next_idx[i] = 0;
@@ -752,7 +758,7 @@ seq_print_sym_offset(struct seq_file *m, const char *fmt, unsigned long address)
 # define IP_FMT "%016lx"
 #endif
 
-static void notrace
+static notrace void
 seq_print_ip_sym(struct seq_file *m, unsigned long ip, unsigned long sym_flags)
 {
 	if (!ip) {
@@ -769,7 +775,7 @@ seq_print_ip_sym(struct seq_file *m, unsigned long ip, unsigned long sym_flags)
 		seq_printf(m, " <" IP_FMT ">", ip);
 }
 
-static void notrace print_lat_help_header(struct seq_file *m)
+static notrace void print_lat_help_header(struct seq_file *m)
 {
 	seq_puts(m, "#                _------=> CPU#            \n");
 	seq_puts(m, "#               / _-----=> irqs-off        \n");
@@ -782,14 +788,14 @@ static void notrace print_lat_help_header(struct seq_file *m)
 	seq_puts(m, "#     \\   /    |||||   \\   |   /           \n");
 }
 
-static void notrace print_func_help_header(struct seq_file *m)
+static notrace void print_func_help_header(struct seq_file *m)
 {
 	seq_puts(m, "#           TASK-PID   CPU#    TIMESTAMP  FUNCTION\n");
 	seq_puts(m, "#              | |      |          |         |\n");
 }
 
 
-static void notrace
+static notrace void
 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 {
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -858,7 +864,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 	seq_puts(m, "\n");
 }
 
-static void notrace
+static notrace void
 lat_print_generic(struct seq_file *m, struct trace_entry *entry, int cpu)
 {
 	int hardirq, softirq;
@@ -895,7 +901,7 @@ lat_print_generic(struct seq_file *m, struct trace_entry *entry, int cpu)
 
 unsigned long preempt_mark_thresh = 100;
 
-static void notrace
+static notrace void
 lat_print_timestamp(struct seq_file *m, unsigned long long abs_usecs,
 		    unsigned long rel_usecs)
 {
@@ -910,7 +916,7 @@ lat_print_timestamp(struct seq_file *m, unsigned long long abs_usecs,
 
 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
 
-static void notrace
+static notrace void
 print_lat_fmt(struct seq_file *m, struct trace_iterator *iter,
 	      unsigned int trace_idx, int cpu)
 {
@@ -966,20 +972,50 @@ print_lat_fmt(struct seq_file *m, struct trace_iterator *iter,
 	}
 }
 
-static void notrace
+static notrace void sync_time_offset(struct trace_iterator *iter)
+{
+	struct trace_array_cpu *prev_array, *array;
+	struct trace_entry *prev_entry, *entry;
+	cycle_t prev_t, t;
+
+	entry = iter->ent;
+	prev_entry = iter->prev_ent;
+	if (!prev_entry)
+		return;
+
+	prev_array = iter->tr->data[iter->prev_cpu];
+	array = iter->tr->data[iter->cpu];
+
+	prev_t = prev_entry->t + prev_array->time_offset;
+	t = entry->t + array->time_offset;
+
+	/*
+	 * If time goes backwards we increase the offset of
+	 * the current array, to not have observable time warps.
+	 * This will quickly synchronize the time offsets of
+	 * multiple CPUs:
+	 */
+	if (t < prev_t)
+		array->time_offset += prev_t - t;
+}
+
+static notrace void
 print_trace_fmt(struct seq_file *m, struct trace_iterator *iter)
 {
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
-	struct trace_entry *entry = iter->ent;
+	struct trace_entry *entry;
 	unsigned long usec_rem;
 	unsigned long long t;
 	unsigned long secs;
 	char *comm;
 	int S;
 
+	sync_time_offset(iter);
+	entry = iter->ent;
+
 	comm = trace_find_cmdline(iter->ent->pid);
 
-	t = ns2usecs(entry->t);
+	t = ns2usecs(entry->t + iter->tr->data[iter->cpu]->time_offset);
 	usec_rem = do_div(t, 1000000ULL);
 	secs = (unsigned long)t;
 
@@ -1158,7 +1194,7 @@ static int tracing_lt_open(struct inode *inode, struct file *file)
 }
 
 
-static void notrace *
+static notrace void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct tracer *t = m->private;
@@ -1374,8 +1410,7 @@ tracing_ctrl_read(struct file *filp, char __user *ubuf,
 	int r;
 
 	r = sprintf(buf, "%ld\n", tr->ctrl);
-	return simple_read_from_buffer(ubuf, cnt, ppos,
-				       buf, r);
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 }
 
 static ssize_t
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cc1d34b8b771..5df8ff2b84a7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -56,6 +56,8 @@ struct trace_array_cpu {
 	void			*trace_current;
 	struct list_head	trace_pages;
 	atomic_t		disabled;
+	cycle_t			time_offset;
+
 	/* these fields get copied into max-trace: */
 	unsigned		trace_current_idx;
 	unsigned long		trace_idx;
@@ -114,14 +116,19 @@ struct tracer {
 struct trace_iterator {
 	struct trace_array	*tr;
 	struct tracer		*trace;
+
 	struct trace_entry	*ent;
+	int			cpu;
+
+	struct trace_entry	*prev_ent;
+	int			prev_cpu;
+
 	unsigned long		iter_flags;
 	loff_t			pos;
 	unsigned long		next_idx[NR_CPUS];
 	struct list_head	*next_page[NR_CPUS];
 	unsigned		next_page_idx[NR_CPUS];
 	long			idx;
-	int			cpu;
 };
 
 void notrace tracing_reset(struct trace_array_cpu *data);

commit 7bd2f24c2f769e3f8f1d4fc8b9fddf689825f6a7
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:45 2008 +0200

    ftrace: add README
    
    make it easier for newbies to find their way around.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 61d2f0228866..c736dd2e068d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -995,6 +995,7 @@ print_trace_fmt(struct seq_file *m, struct trace_iterator *iter)
 			seq_printf(m, " <-");
 			seq_print_ip_sym(m, entry->fn.parent_ip, sym_flags);
 		}
+		seq_printf(m, "\n");
 		break;
 	case TRACE_CTX:
 		S = entry->ctx.prev_state < sizeof(state_to_char) ?
@@ -1007,7 +1008,6 @@ print_trace_fmt(struct seq_file *m, struct trace_iterator *iter)
 			   entry->ctx.next_prio);
 		break;
 	}
-	seq_printf(m, "\n");
 }
 
 static int trace_empty(struct trace_iterator *iter)
@@ -1332,6 +1332,39 @@ static struct file_operations tracing_iter_fops = {
 	.write = tracing_iter_ctrl_write,
 };
 
+static const char readme_msg[] =
+	"tracing mini-HOWTO:\n\n"
+	"# mkdir /debug\n"
+	"# mount -t debugfs nodev /debug\n\n"
+	"# cat /debug/tracing/available_tracers\n"
+	"wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
+	"# cat /debug/tracing/current_tracer\n"
+	"none\n"
+	"# echo sched_switch > /debug/tracing/current_tracer\n"
+	"# cat /debug/tracing/current_tracer\n"
+	"sched_switch\n"
+	"# cat /debug/tracing/iter_ctrl\n"
+	"noprint-parent nosym-offset nosym-addr noverbose\n"
+	"# echo print-parent > /debug/tracing/iter_ctrl\n"
+	"# echo 1 > /debug/tracing/tracing_enabled\n"
+	"# cat /debug/tracing/trace > /tmp/trace.txt\n"
+	"echo 0 > /debug/tracing/tracing_enabled\n"
+;
+
+static ssize_t
+tracing_readme_read(struct file *filp, char __user *ubuf,
+		       size_t cnt, loff_t *ppos)
+{
+	return simple_read_from_buffer(ubuf, cnt, ppos,
+					readme_msg, strlen(readme_msg));
+}
+
+static struct file_operations tracing_readme_fops = {
+	.open = tracing_open_generic,
+	.read = tracing_readme_read,
+};
+
+
 static ssize_t
 tracing_ctrl_read(struct file *filp, char __user *ubuf,
 		  size_t cnt, loff_t *ppos)
@@ -1598,6 +1631,11 @@ static __init void tracer_init_debugfs(void)
 	if (!entry)
 		pr_warning("Could not create debugfs "
 			   "'tracing_threash' entry\n");
+	entry = debugfs_create_file("README", 0644, d_tracer,
+				    NULL, &tracing_readme_fops);
+	if (!entry)
+		pr_warning("Could not create debugfs 'README' entry\n");
+
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 	entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,

commit c7aafc549766b87819285d3480648fc652a47bc4
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon May 12 21:20:45 2008 +0200

    ftrace: cleanups
    
    factor out code and clean it up.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 2c1670c65236..953a36d6a199 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -69,7 +69,7 @@ extern void ftrace_caller(void);
 extern void ftrace_call(void);
 extern void mcount_call(void);
 #else
-# define ftrace_force_update() do { } while (0)
+# define ftrace_force_update() ({ 0; })
 #endif
 
 static inline void tracer_disable(void)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4facf5ceeb86..6d4d2e86debc 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1152,10 +1152,10 @@ static int __init notrace ftrace_dynamic_init(void)
 
 core_initcall(ftrace_dynamic_init);
 #else
-# define ftrace_startup()	  do { } while (0)
-# define ftrace_shutdown()	  do { } while (0)
-# define ftrace_startup_sysctl()  do { } while (0)
-# define ftrace_shutdown_sysctl() do { } while (0)
+# define ftrace_startup()		do { } while (0)
+# define ftrace_shutdown()		do { } while (0)
+# define ftrace_startup_sysctl()	do { } while (0)
+# define ftrace_shutdown_sysctl()	do { } while (0)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f6d026f17dbb..61d2f0228866 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -142,12 +142,59 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	tracing_record_cmdline(current);
 }
 
+void check_pages(struct trace_array_cpu *data)
+{
+	struct page *page, *tmp;
+
+	BUG_ON(data->trace_pages.next->prev != &data->trace_pages);
+	BUG_ON(data->trace_pages.prev->next != &data->trace_pages);
+
+	list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
+		BUG_ON(page->lru.next->prev != &page->lru);
+		BUG_ON(page->lru.prev->next != &page->lru);
+	}
+}
+
+void *head_page(struct trace_array_cpu *data)
+{
+	struct page *page;
+
+	check_pages(data);
+	if (list_empty(&data->trace_pages))
+		return NULL;
+
+	page = list_entry(data->trace_pages.next, struct page, lru);
+	BUG_ON(&page->lru == &data->trace_pages);
+
+	return page_address(page);
+}
+
+notrace static void
+flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
+{
+	struct list_head flip_pages;
+
+	INIT_LIST_HEAD(&flip_pages);
+
+	tr1->trace_current = NULL;
+	memcpy(&tr1->trace_current_idx, &tr2->trace_current_idx,
+		sizeof(struct trace_array_cpu) -
+		offsetof(struct trace_array_cpu, trace_current_idx));
+
+	check_pages(tr1);
+	check_pages(tr2);
+	list_splice_init(&tr1->trace_pages, &flip_pages);
+	list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
+	list_splice_init(&flip_pages, &tr2->trace_pages);
+	BUG_ON(!list_empty(&flip_pages));
+	check_pages(tr1);
+	check_pages(tr2);
+}
+
 notrace void
 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
 	struct trace_array_cpu *data;
-	void *save_trace;
-	struct list_head save_pages;
 	int i;
 
 	WARN_ON_ONCE(!irqs_disabled());
@@ -155,11 +202,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	/* clear out all the previous traces */
 	for_each_possible_cpu(i) {
 		data = tr->data[i];
-		save_trace = max_tr.data[i]->trace;
-		save_pages = max_tr.data[i]->trace_pages;
-		memcpy(max_tr.data[i], data, sizeof(*data));
-		data->trace = save_trace;
-		data->trace_pages = save_pages;
+		flip_trace(max_tr.data[i], data);
 		tracing_reset(data);
 	}
 
@@ -177,8 +220,6 @@ notrace void
 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
 	struct trace_array_cpu *data = tr->data[cpu];
-	void *save_trace;
-	struct list_head save_pages;
 	int i;
 
 	WARN_ON_ONCE(!irqs_disabled());
@@ -186,11 +227,8 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	for_each_possible_cpu(i)
 		tracing_reset(max_tr.data[i]);
 
-	save_trace = max_tr.data[cpu]->trace;
-	save_pages = max_tr.data[cpu]->trace_pages;
-	memcpy(max_tr.data[cpu], data, sizeof(*data));
-	data->trace = save_trace;
-	data->trace_pages = save_pages;
+	flip_trace(max_tr.data[cpu], data);
+
 	tracing_reset(data);
 
 	__update_max_tr(tr, tsk, cpu);
@@ -234,9 +272,9 @@ int register_tracer(struct tracer *type)
 		 * If we fail, we do not register this tracer.
 		 */
 		for_each_possible_cpu(i) {
-			if (!data->trace)
-				continue;
 			data = tr->data[i];
+			if (!head_page(data))
+				continue;
 			tracing_reset(data);
 		}
 		current_trace = type;
@@ -298,7 +336,7 @@ void unregister_tracer(struct tracer *type)
 void notrace tracing_reset(struct trace_array_cpu *data)
 {
 	data->trace_idx = 0;
-	data->trace_current = data->trace;
+	data->trace_current = head_page(data);
 	data->trace_current_idx = 0;
 }
 
@@ -425,26 +463,31 @@ notrace void tracing_record_cmdline(struct task_struct *tsk)
 }
 
 static inline notrace struct trace_entry *
-tracing_get_trace_entry(struct trace_array *tr,
-			struct trace_array_cpu *data)
+tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
 {
 	unsigned long idx, idx_next;
 	struct trace_entry *entry;
-	struct page *page;
 	struct list_head *next;
+	struct page *page;
 
 	data->trace_idx++;
 	idx = data->trace_current_idx;
 	idx_next = idx + 1;
 
+	BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
+
 	entry = data->trace_current + idx * TRACE_ENTRY_SIZE;
 
 	if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
 		page = virt_to_page(data->trace_current);
-		if (unlikely(&page->lru == data->trace_pages.prev))
-			next = data->trace_pages.next;
-		else
-			next = page->lru.next;
+		/*
+		 * Roundrobin - but skip the head (which is not a real page):
+		 */
+		next = page->lru.next;
+		if (unlikely(next == &data->trace_pages))
+			next = next->next;
+		BUG_ON(next == &data->trace_pages);
+
 		page = list_entry(next, struct page, lru);
 		data->trace_current = page_address(page);
 		idx_next = 0;
@@ -456,18 +499,17 @@ tracing_get_trace_entry(struct trace_array *tr,
 }
 
 static inline notrace void
-tracing_generic_entry_update(struct trace_entry *entry,
-			     unsigned long flags)
+tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
 {
 	struct task_struct *tsk = current;
 	unsigned long pc;
 
 	pc = preempt_count();
 
-	entry->idx	= atomic_inc_return(&tracer_counter);
-	entry->preempt_count = pc & 0xff;
-	entry->pid	 = tsk->pid;
-	entry->t	 = now(raw_smp_processor_id());
+	entry->idx		= atomic_inc_return(&tracer_counter);
+	entry->preempt_count	= pc & 0xff;
+	entry->pid		= tsk->pid;
+	entry->t		= now(raw_smp_processor_id());
 	entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
 		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
@@ -476,16 +518,15 @@ tracing_generic_entry_update(struct trace_entry *entry,
 
 notrace void
 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
-		       unsigned long ip, unsigned long parent_ip,
-		       unsigned long flags)
+       unsigned long ip, unsigned long parent_ip, unsigned long flags)
 {
 	struct trace_entry *entry;
 
-	entry = tracing_get_trace_entry(tr, data);
+	entry			= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, flags);
-	entry->type	    = TRACE_FN;
-	entry->fn.ip	    = ip;
-	entry->fn.parent_ip = parent_ip;
+	entry->type		= TRACE_FN;
+	entry->fn.ip		= ip;
+	entry->fn.parent_ip	= parent_ip;
 }
 
 notrace void
@@ -496,7 +537,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
 {
 	struct trace_entry *entry;
 
-	entry = tracing_get_trace_entry(tr, data);
+	entry			= tracing_get_trace_entry(tr, data);
 	tracing_generic_entry_update(entry, flags);
 	entry->type		= TRACE_CTX;
 	entry->ctx.prev_pid	= prev->pid;
@@ -540,6 +581,8 @@ trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
 	}
 
 	page = list_entry(iter->next_page[cpu], struct page, lru);
+	BUG_ON(&data->trace_pages == &page->lru);
+
 	array = page_address(page);
 
 	return &array[iter->next_page_idx[cpu]];
@@ -554,7 +597,7 @@ find_next_entry(struct trace_iterator *iter, int *ent_cpu)
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
-		if (!tr->data[cpu]->trace)
+		if (!head_page(tr->data[cpu]))
 			continue;
 		ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
 		if (ent &&
@@ -762,7 +805,7 @@ print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 		name = type->name;
 
 	for_each_possible_cpu(cpu) {
-		if (tr->data[cpu]->trace) {
+		if (head_page(tr->data[cpu])) {
 			total += tr->data[cpu]->trace_idx;
 			if (tr->data[cpu]->trace_idx > tr->entries)
 				entries += tr->entries;
@@ -975,8 +1018,7 @@ static int trace_empty(struct trace_iterator *iter)
 	for_each_possible_cpu(cpu) {
 		data = iter->tr->data[cpu];
 
-		if (data->trace &&
-		    data->trace_idx)
+		if (head_page(data) && data->trace_idx)
 			return 0;
 	}
 	return 1;
@@ -1576,9 +1618,9 @@ static struct tracer no_tracer __read_mostly =
 static int trace_alloc_page(void)
 {
 	struct trace_array_cpu *data;
-	void *array;
 	struct page *page, *tmp;
 	LIST_HEAD(pages);
+	void *array;
 	int i;
 
 	/* first allocate a page for each CPU */
@@ -1610,14 +1652,14 @@ static int trace_alloc_page(void)
 	for_each_possible_cpu(i) {
 		data = global_trace.data[i];
 		page = list_entry(pages.next, struct page, lru);
-		list_del(&page->lru);
+		list_del_init(&page->lru);
 		list_add_tail(&page->lru, &data->trace_pages);
 		ClearPageLRU(page);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 		data = max_tr.data[i];
 		page = list_entry(pages.next, struct page, lru);
-		list_del(&page->lru);
+		list_del_init(&page->lru);
 		list_add_tail(&page->lru, &data->trace_pages);
 		SetPageLRU(page);
 #endif
@@ -1628,7 +1670,7 @@ static int trace_alloc_page(void)
 
  free_pages:
 	list_for_each_entry_safe(page, tmp, &pages, lru) {
-		list_del(&page->lru);
+		list_del_init(&page->lru);
 		__free_page(page);
 	}
 	return -ENOMEM;
@@ -1654,7 +1696,6 @@ __init static int tracer_alloc_buffers(void)
 			       "for trace buffer!\n");
 			goto free_buffers;
 		}
-		data->trace = array;
 
 		/* set the array to the list */
 		INIT_LIST_HEAD(&data->trace_pages);
@@ -1671,7 +1712,6 @@ __init static int tracer_alloc_buffers(void)
 			       "for trace buffer!\n");
 			goto free_buffers;
 		}
-		max_tr.data[i]->trace = array;
 
 		INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
 		page = virt_to_page(array);
@@ -1716,24 +1756,22 @@ __init static int tracer_alloc_buffers(void)
 		struct page *page, *tmp;
 		struct trace_array_cpu *data = global_trace.data[i];
 
-		if (data && data->trace) {
+		if (data) {
 			list_for_each_entry_safe(page, tmp,
 						 &data->trace_pages, lru) {
-				list_del(&page->lru);
+				list_del_init(&page->lru);
 				__free_page(page);
 			}
-			data->trace = NULL;
 		}
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 		data = max_tr.data[i];
-		if (data && data->trace) {
+		if (data) {
 			list_for_each_entry_safe(page, tmp,
 						 &data->trace_pages, lru) {
-				list_del(&page->lru);
+				list_del_init(&page->lru);
 				__free_page(page);
 			}
-			data->trace = NULL;
 		}
 #endif
 	}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 88edbf1f6788..cc1d34b8b771 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -53,12 +53,12 @@ struct trace_entry {
  * the trace, etc.)
  */
 struct trace_array_cpu {
-	void			*trace;
 	void			*trace_current;
-	unsigned		trace_current_idx;
 	struct list_head	trace_pages;
-	unsigned long		trace_idx;
 	atomic_t		disabled;
+	/* these fields get copied into max-trace: */
+	unsigned		trace_current_idx;
+	unsigned long		trace_idx;
 	unsigned long		saved_latency;
 	unsigned long		critical_start;
 	unsigned long		critical_end;
@@ -216,4 +216,6 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace,
 #endif
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
+extern void *head_page(struct trace_array_cpu *data);
+
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 14183b8f79c5..2dfebb67fdfb 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -144,7 +144,7 @@ check_critical_timing(struct trace_array *tr,
 	if (!report_latency(delta))
 		goto out;
 
-	spin_lock(&max_trace_lock);
+	spin_lock_irqsave(&max_trace_lock, flags);
 
 	/* check if we are still the max latency */
 	if (!report_latency(delta))
@@ -165,32 +165,24 @@ check_critical_timing(struct trace_array *tr,
 
 	update_max_tr_single(tr, current, cpu);
 
-	if (tracing_thresh)
-		printk(KERN_INFO "(%16s-%-5d|#%d): %lu us critical section "
-		       "violates %lu us threshold.\n"
-		       " => started at timestamp %lu: ",
+	if (tracing_thresh) {
+		printk(KERN_INFO "(%16s-%-5d|#%d):"
+			" %lu us critical section violates %lu us threshold.\n",
 				current->comm, current->pid,
 				raw_smp_processor_id(),
-				latency, nsecs_to_usecs(tracing_thresh), t0);
-	else
+				latency, nsecs_to_usecs(tracing_thresh));
+	} else {
 		printk(KERN_INFO "(%16s-%-5d|#%d):"
-		       " new %lu us maximum-latency "
-		       "critical section.\n => started at timestamp %lu: ",
+		       " new %lu us maximum-latency critical section.\n",
 				current->comm, current->pid,
 				raw_smp_processor_id(),
-				latency, t0);
-
-	print_symbol(KERN_CONT "<%s>\n", data->critical_start);
-	printk(KERN_CONT " =>   ended at timestamp %lu: ", t1);
-	print_symbol(KERN_CONT "<%s>\n", data->critical_end);
-	dump_stack();
-	t1 = nsecs_to_usecs(now(cpu));
-	printk(KERN_CONT " =>   dump-end timestamp %lu\n\n", t1);
+				latency);
+	}
 
 	max_sequence++;
 
 out_unlock:
-	spin_unlock(&max_trace_lock);
+	spin_unlock_irqrestore(&max_trace_lock, flags);
 
 out:
 	data->critical_sequence = max_sequence;
@@ -216,7 +208,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 
-	if (unlikely(!data) || unlikely(!data->trace) ||
+	if (unlikely(!data) || unlikely(!head_page(data)) ||
 	    atomic_read(&data->disabled))
 		return;
 
@@ -256,7 +248,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
 	cpu = raw_smp_processor_id();
 	data = tr->data[cpu];
 
-	if (unlikely(!data) || unlikely(!data->trace) ||
+	if (unlikely(!data) || unlikely(!head_page(data)) ||
 	    !data->critical_start || atomic_read(&data->disabled))
 		return;
 
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3d10ff01f805..688df965f3f2 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -107,24 +107,18 @@ wakeup_sched_switch(struct task_struct *prev, struct task_struct *next)
 	update_max_tr(tr, wakeup_task, wakeup_cpu);
 
 	if (tracing_thresh) {
-		printk(KERN_INFO "(%16s-%-5d|#%d): %lu us wakeup latency "
-		       "violates %lu us threshold.\n"
-		       " => started at timestamp %lu: ",
+		printk(KERN_INFO "(%16s-%-5d|#%d):"
+			" %lu us wakeup latency violates %lu us threshold.\n",
 				wakeup_task->comm, wakeup_task->pid,
 				raw_smp_processor_id(),
-				latency, nsecs_to_usecs(tracing_thresh), t0);
+				latency, nsecs_to_usecs(tracing_thresh));
 	} else {
-		printk(KERN_INFO "(%16s-%-5d|#%d): new %lu us maximum "
-		       "wakeup latency.\n => started at timestamp %lu: ",
+		printk(KERN_INFO "(%16s-%-5d|#%d):"
+			" new %lu us maximum wakeup latency.\n",
 				wakeup_task->comm, wakeup_task->pid,
-				cpu, latency, t0);
+				cpu, latency);
 	}
 
-	printk(KERN_CONT "   ended at timestamp %lu: ", t1);
-	dump_stack();
-	t1 = nsecs_to_usecs(now(cpu));
-	printk(KERN_CONT "   dump-end timestamp %lu\n\n", t1);
-
 out_unlock:
 	__wakeup_reset(tr);
 	spin_unlock_irqrestore(&wakeup_lock, flags);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index ef4d3cc009f5..c01874c3b1f9 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -1,6 +1,7 @@
 /* Include in trace.c */
 
 #include <linux/kthread.h>
+#include <linux/delay.h>
 
 static inline int trace_valid_entry(struct trace_entry *entry)
 {
@@ -15,28 +16,29 @@ static inline int trace_valid_entry(struct trace_entry *entry)
 static int
 trace_test_buffer_cpu(struct trace_array *tr, struct trace_array_cpu *data)
 {
-	struct page *page;
 	struct trace_entry *entries;
+	struct page *page;
 	int idx = 0;
 	int i;
 
+	BUG_ON(list_empty(&data->trace_pages));
 	page = list_entry(data->trace_pages.next, struct page, lru);
 	entries = page_address(page);
 
-	if (data->trace != entries)
+	if (head_page(data) != entries)
 		goto failed;
 
 	/*
 	 * The starting trace buffer always has valid elements,
-	 * if any element exits.
+	 * if any element exists.
 	 */
-	entries = data->trace;
+	entries = head_page(data);
 
 	for (i = 0; i < tr->entries; i++) {
 
-		if (i < data->trace_idx &&
-		    !trace_valid_entry(&entries[idx])) {
-			printk(KERN_CONT ".. invalid entry %d ", entries[idx].type);
+		if (i < data->trace_idx && !trace_valid_entry(&entries[idx])) {
+			printk(KERN_CONT ".. invalid entry %d ",
+				entries[idx].type);
 			goto failed;
 		}
 
@@ -80,11 +82,10 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
 	int ret = 0;
 
 	for_each_possible_cpu(cpu) {
-		if (!tr->data[cpu]->trace)
+		if (!head_page(tr->data[cpu]))
 			continue;
 
 		cnt += tr->data[cpu]->trace_idx;
-		printk("%d: count = %ld\n", cpu, cnt);
 
 		ret = trace_test_buffer_cpu(tr, tr->data[cpu]);
 		if (ret)
@@ -117,6 +118,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
 	}
 
 	/* start the tracing */
+	ftrace_enabled = 1;
+
 	tr->ctrl = 1;
 	trace->init(tr);
 	/* Sleep for a 1/10 of a second */
@@ -124,6 +127,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
 	/* stop the tracing. */
 	tr->ctrl = 0;
 	trace->ctrl_update(tr);
+	ftrace_enabled = 0;
+
 	/* check the trace buffer */
 	ret = trace_test_buffer(tr, &count);
 	trace->reset(tr);
@@ -328,7 +333,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
 
 	/* create a high prio thread */
 	p = kthread_run(trace_wakeup_test_thread, &isrt, "ftrace-test");
-	if (!IS_ERR(p)) {
+	if (IS_ERR(p)) {
 		printk(KERN_CONT "Failed to create ftrace wakeup test thread ");
 		return -1;
 	}