Patches contributed by Eötvös Lorand University


commit 8adf65cfae2d6c2ec5c06e4521f089c62f9eff05
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Sep 3 21:26:34 2009 +0200

    x86, msr: Fix msr-reg.S compilation with gas 2.16.1, on 32-bit too
    
    The macro was defined in the 32-bit path as well - breaking the
    build on 32-bit platforms:
    
      arch/x86/lib/msr-reg.S: Assembler messages:
      arch/x86/lib/msr-reg.S:53: Error: Bad macro parameter list
      arch/x86/lib/msr-reg.S:100: Error: invalid character '_' in mnemonic
      arch/x86/lib/msr-reg.S:101: Error: invalid character '_' in mnemonic
    
    Cc: Borislav Petkov <petkovbb@googlemail.com>
    Cc: H. Peter Anvin <hpa@zytor.com>
    LKML-Reference: <tip-f6909f394c2d4a0a71320797df72d54c49c5927e@git.kernel.org>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index d5eaf53aa670..69fa10623f21 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -50,7 +50,7 @@ ENDPROC(native_\op\()_safe_regs)
 
 #else /* X86_32 */
 
-.macro op_safe_regs op:req
+.macro op_safe_regs op
 ENTRY(native_\op\()_safe_regs)
 	CFI_STARTPROC
 	pushl_cfi %ebx

commit dc86cabe4b242446ea9aa8492c727e1220817898
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Sep 3 18:03:00 2009 +0200

    perf_counter: Fix output-sharing error path
    
    We forget to release the fd in the PERF_FLAG_FD_OUTPUT
    error path.
    
    Reorganize the error flow here to be a clean fall-through
    logic.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Paul Mackerras <paulus@samba.org>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0aa609f69103..e0d91fdf0c3c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -4316,15 +4316,15 @@ SYSCALL_DEFINE5(perf_counter_open,
 	struct file *group_file = NULL;
 	int fput_needed = 0;
 	int fput_needed2 = 0;
-	int ret;
+	int err;
 
 	/* for future expandability... */
 	if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
 		return -EINVAL;
 
-	ret = perf_copy_attr(attr_uptr, &attr);
-	if (ret)
-		return ret;
+	err = perf_copy_attr(attr_uptr, &attr);
+	if (err)
+		return err;
 
 	if (!attr.exclude_kernel) {
 		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
@@ -4348,7 +4348,7 @@ SYSCALL_DEFINE5(perf_counter_open,
 	 */
 	group_leader = NULL;
 	if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
-		ret = -EINVAL;
+		err = -EINVAL;
 		group_file = fget_light(group_fd, &fput_needed);
 		if (!group_file)
 			goto err_put_context;
@@ -4377,22 +4377,22 @@ SYSCALL_DEFINE5(perf_counter_open,
 
 	counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
 				     NULL, GFP_KERNEL);
-	ret = PTR_ERR(counter);
+	err = PTR_ERR(counter);
 	if (IS_ERR(counter))
 		goto err_put_context;
 
-	ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
-	if (ret < 0)
+	err = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
+	if (err < 0)
 		goto err_free_put_context;
 
-	counter_file = fget_light(ret, &fput_needed2);
+	counter_file = fget_light(err, &fput_needed2);
 	if (!counter_file)
 		goto err_free_put_context;
 
 	if (flags & PERF_FLAG_FD_OUTPUT) {
-		ret = perf_counter_set_output(counter, group_fd);
-		if (ret)
-			goto err_free_put_context;
+		err = perf_counter_set_output(counter, group_fd);
+		if (err)
+			goto err_fput_free_put_context;
 	}
 
 	counter->filp = counter_file;
@@ -4408,20 +4408,20 @@ SYSCALL_DEFINE5(perf_counter_open,
 	list_add_tail(&counter->owner_entry, &current->perf_counter_list);
 	mutex_unlock(&current->perf_counter_mutex);
 
+err_fput_free_put_context:
 	fput_light(counter_file, fput_needed2);
 
-out_fput:
-	fput_light(group_file, fput_needed);
-
-	return ret;
-
 err_free_put_context:
-	kfree(counter);
+	if (err < 0)
+		kfree(counter);
 
 err_put_context:
-	put_ctx(ctx);
+	if (err < 0)
+		put_ctx(ctx);
+
+	fput_light(group_file, fput_needed);
 
-	goto out_fput;
+	return err;
 }
 
 /*

commit 6f4596d9312ba5fbf5f3231ef484823c4e684d2e
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Sep 3 16:22:45 2009 +0200

    perf trace: Fix read_string()
    
    We did not account for the enclosing \0. Depending on what malloc()
    gave us this resulted in corrupted version string printouts.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Paul Mackerras <paulus@samba.org>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index b12e4903739a..a1217a10632f 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -113,8 +113,11 @@ static char *read_string(void)
 		}
 	}
 
+	/* trailing \0: */
+	i++;
+
 	/* move the file descriptor to the end of the string */
-	r = lseek(input_fd, -(r - (i+1)), SEEK_CUR);
+	r = lseek(input_fd, -(r - i), SEEK_CUR);
 	if (r < 0)
 		die("lseek");
 

commit 00fc97863c21c41e257a941e83410c56341e2a5d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Sep 3 16:22:02 2009 +0200

    perf trace: Print out in nanoseconds
    
    Print out more accurate timestamps - usecs does not cut it
    anymore on fast enough boxes ;-)
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Paul Mackerras <paulus@samba.org>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 37b10c2cd3c9..629e602d9405 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -2708,9 +2708,9 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs,
 		return pretty_print_func_graph(data, size, event, cpu,
 					       pid, comm, secs, usecs);
 
-	printf("%16s-%-5d [%03d] %5lu.%06lu: %s: ",
+	printf("%16s-%-5d [%03d] %5lu.%09Lu: %s: ",
 	       comm, pid,  cpu,
-	       secs, usecs, event->name);
+	       secs, nsecs, event->name);
 
 	pretty_print(data, size, event);
 	printf("\n");

commit 2e01d1791168bb824226d8cc70e50374767dcc42
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Sep 3 16:21:11 2009 +0200

    perf tools: Seek to the end of the header area
    
    Leave the input fd at the data area.
    
    It does not matter right now - but seeking at the end of it
    certainly did not make sense.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Paul Mackerras <paulus@samba.org>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index a37a2221a0c3..ec4d4c2f9522 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -237,7 +237,7 @@ struct perf_header *perf_header__read(int fd)
 	self->data_offset = f_header.data.offset;
 	self->data_size   = f_header.data.size;
 
-	lseek(fd, self->data_offset + self->data_size, SEEK_SET);
+	lseek(fd, self->data_offset, SEEK_SET);
 
 	self->frozen = 1;
 

commit 8886f42d6d8dabeb488c706c339634a0e3e08df4
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Sep 3 16:19:57 2009 +0200

    perf trace: Fix parsing of perf.data
    
    We started parsing perf.data at head 0. This caused -D to
    segfault and it could possibly also case incorrect trace
    entries to be displayed.
    
    Parse it at data_offset instead.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Paul Mackerras <paulus@samba.org>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d59bf8a86743..914ab366e369 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -196,6 +196,7 @@ static int __cmd_trace(void)
 		exit(0);
 	}
 	header = perf_header__read(input);
+	head = header->data_offset;
 	sample_type = perf_header__sample_type(header);
 
 	if (!(sample_type & PERF_SAMPLE_RAW))

commit 6ddf259da76cab6555c2086386f8bcd10bbb86d2
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Sep 3 12:00:22 2009 +0200

    perf trace: Sample timestamps as well
    
    Before:
    
                perf-21082 [013]     0.000000: sched_wakeup_new: task perf:21083 [120] success=1 [015]
                perf-21082 [013]     0.000000: sched_migrate_task: task perf:21082 [120] from: 13  to: 15
                perf-21082 [013]     0.000000: sched_process_fork: parent perf:21082  child perf:21083
                true-21083 [015]     0.000000: sched_wakeup: task migration/15:33 [0] success=1 [015]
                perf-21082 [013]     0.000000: sched_switch: task perf:21082 [120] (S) ==> swapper:0 [140]
                true-21083 [015]     0.000000: sched_switch: task perf:21083 [120] (R) ==> migration/15:33 [0]
                true-21083 [011]     0.000000: sched_process_exit: task true:21083 [120]
    
    After:
    
                perf-21082 [013] 14674.797613: sched_wakeup_new: task perf:21083 [120] success=1 [015]
                perf-21082 [013] 14674.797506: sched_migrate_task: task perf:21082 [120] from: 13  to: 15
                perf-21082 [013] 14674.797610: sched_process_fork: parent perf:21082  child perf:21083
                true-21083 [015] 14674.797725: sched_wakeup: task migration/15:33 [0] success=1 [015]
                perf-21082 [013] 14674.797722: sched_switch: task perf:21082 [120] (S) ==> swapper:0 [140]
                true-21083 [015] 14674.797729: sched_switch: task perf:21083 [120] (R) ==> migration/15:33 [0]
                true-21083 [011] 14674.798159: sched_process_exit: task true:21083 [120]
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Paul Mackerras <paulus@samba.org>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ff93f8ecba28..99a12fe86e9f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -404,6 +404,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
 
 	if (raw_samples) {
+		attr->sample_type	|= PERF_SAMPLE_TIME;
 		attr->sample_type	|= PERF_SAMPLE_RAW;
 		attr->sample_type	|= PERF_SAMPLE_CPU;
 	}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index bbe4c444ef8f..d59bf8a86743 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -58,6 +58,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 	struct dso *dso = NULL;
 	struct thread *thread;
 	u64 ip = event->ip.ip;
+	u64 timestamp = -1;
 	u32 cpu = -1;
 	u64 period = 1;
 	void *more_data = event->ip.__more_data;
@@ -65,6 +66,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->ip.pid, &threads, &last_match);
 
+	if (sample_type & PERF_SAMPLE_TIME) {
+		timestamp = *(u64 *)more_data;
+		more_data += sizeof(u64);
+	}
+
 	if (sample_type & PERF_SAMPLE_CPU) {
 		cpu = *(u32 *)more_data;
 		more_data += sizeof(u32);
@@ -127,7 +133,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		 * field, although it should be the same than this perf
 		 * event pid
 		 */
-		print_event(cpu, raw->data, raw->size, 0, thread->comm);
+		print_event(cpu, raw->data, raw->size, timestamp, thread->comm);
 	}
 	total += period;
 

commit 0fbdea19e9394a5cb5f2f5081b028c50b558910a
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Sep 2 21:46:00 2009 +0200

    perf_counter: Introduce new (non-)paranoia level to allow raw tracepoint access
    
    I want to sample inherited tracepoint workloads as a normal
    user and the CAP_SYS_ADMIN check prevents me from doing that
    right now.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Paul Mackerras <paulus@samba.org>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d988dfb4bbab..0aa609f69103 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -46,12 +46,18 @@ static atomic_t nr_task_counters __read_mostly;
 
 /*
  * perf counter paranoia level:
- *  0 - not paranoid
- *  1 - disallow cpu counters to unpriv
- *  2 - disallow kernel profiling to unpriv
+ *  -1 - not paranoid at all
+ *   0 - disallow raw tracepoint access for unpriv
+ *   1 - disallow cpu counters for unpriv
+ *   2 - disallow kernel profiling for unpriv
  */
 int sysctl_perf_counter_paranoid __read_mostly = 1;
 
+static inline bool perf_paranoid_tracepoint_raw(void)
+{
+	return sysctl_perf_counter_paranoid > -1;
+}
+
 static inline bool perf_paranoid_cpu(void)
 {
 	return sysctl_perf_counter_paranoid > 0;
@@ -3971,6 +3977,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 	 * have these.
 	 */
 	if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
+			perf_paranoid_tracepoint_raw() &&
 			!capable(CAP_SYS_ADMIN))
 		return ERR_PTR(-EPERM);
 

commit f76bd108e5031202bb40849306f98c4afebe4ef6
Merge: cd6feeeafddb eced1dfcfcf6
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Sep 2 21:42:56 2009 +0200

    Merge branch 'perfcounters/urgent' into perfcounters/core
    
    Merge reason: We are going to modify a place modified by
                  perfcounters/urgent.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit cd6feeeafddbef6abfe4d90fb26e42fd844d34ed
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Sep 2 20:20:38 2009 +0200

    perf trace: Sample the CPU too
    
    Sample, record, parse and print the CPU field - it had all zeroes before.
    
    Before (watch the second column, the CPU values):
    
                perf-32685 [000]     0.000000: sched_wakeup_new: task perf:32686 [120] success=1 [011]
                perf-32685 [000]     0.000000: sched_migrate_task: task perf:32685 [120] from: 1  to: 11
                perf-32685 [000]     0.000000: sched_process_fork: parent perf:32685  child perf:32686
                true-32686 [000]     0.000000: sched_wakeup: task migration/11:25 [0] success=1 [011]
                true-32686 [000]     0.000000: sched_wakeup: task distccd:12793 [125] success=1 [015]
                true-32686 [000]     0.000000: sched_wakeup: task distccd:12793 [125] success=1 [015]
                perf-32685 [000]     0.000000: sched_switch: task perf:32685 [120] (S) ==> swapper:0 [140]
                true-32686 [000]     0.000000: sched_switch: task perf:32686 [120] (R) ==> migration/11:25 [0]
                true-32686 [000]     0.000000: sched_switch: task perf:32686 [120] (R) ==> distccd:12793 [125]
                true-32686 [000]     0.000000: sched_switch: task true:32686 [120] (R) ==> distccd:12793 [125]
                true-32686 [000]     0.000000: sched_process_exit: task true:32686 [120]
                true-32686 [000]     0.000000: sched_stat_wait: task: distccd:12793 wait: 6767985949080 [ns]
                true-32686 [000]     0.000000: sched_stat_wait: task: distccd:12793 wait: 6767986139446 [ns]
                true-32686 [000]     0.000000: sched_stat_sleep: task: distccd:12793 sleep: 132844 [ns]
                true-32686 [000]     0.000000: sched_stat_sleep: task: distccd:12793 sleep: 131724 [ns]
    
    After:
    
                perf-32685 [001]     0.000000: sched_wakeup_new: task perf:32686 [120] success=1 [011]
                perf-32685 [001]     0.000000: sched_migrate_task: task perf:32685 [120] from: 1  to: 11
                perf-32685 [001]     0.000000: sched_process_fork: parent perf:32685  child perf:32686
                true-32686 [011]     0.000000: sched_wakeup: task migration/11:25 [0] success=1 [011]
                true-32686 [015]     0.000000: sched_wakeup: task distccd:12793 [125] success=1 [015]
                true-32686 [015]     0.000000: sched_wakeup: task distccd:12793 [125] success=1 [015]
                perf-32685 [001]     0.000000: sched_switch: task perf:32685 [120] (S) ==> swapper:0 [140]
                true-32686 [011]     0.000000: sched_switch: task perf:32686 [120] (R) ==> migration/11:25 [0]
                true-32686 [015]     0.000000: sched_switch: task perf:32686 [120] (R) ==> distccd:12793 [125]
                true-32686 [015]     0.000000: sched_switch: task true:32686 [120] (R) ==> distccd:12793 [125]
                true-32686 [015]     0.000000: sched_process_exit: task true:32686 [120]
                true-32686 [015]     0.000000: sched_stat_wait: task: distccd:12793 wait: 6767985949080 [ns]
                true-32686 [015]     0.000000: sched_stat_wait: task: distccd:12793 wait: 6767986139446 [ns]
                true-32686 [015]     0.000000: sched_stat_sleep: task: distccd:12793 sleep: 132844 [ns]
                true-32686 [015]     0.000000: sched_stat_sleep: task: distccd:12793 sleep: 131724 [ns]
    
    So we can now see how this workload migrated between CPUs.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Paul Mackerras <paulus@samba.org>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Cc: Li Zefan <lizf@cn.fujitsu.com>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index add514d53d2e..ff93f8ecba28 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -403,8 +403,10 @@ static void create_counter(int counter, int cpu, pid_t pid)
 	if (call_graph)
 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
 
-	if (raw_samples)
+	if (raw_samples) {
 		attr->sample_type	|= PERF_SAMPLE_RAW;
+		attr->sample_type	|= PERF_SAMPLE_CPU;
+	}
 
 	attr->mmap		= track;
 	attr->comm		= track;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 8247fd04745a..bbe4c444ef8f 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -58,12 +58,19 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 	struct dso *dso = NULL;
 	struct thread *thread;
 	u64 ip = event->ip.ip;
+	u32 cpu = -1;
 	u64 period = 1;
 	void *more_data = event->ip.__more_data;
 	int cpumode;
 
 	thread = threads__findnew(event->ip.pid, &threads, &last_match);
 
+	if (sample_type & PERF_SAMPLE_CPU) {
+		cpu = *(u32 *)more_data;
+		more_data += sizeof(u32);
+		more_data += sizeof(u32); /* reserved */
+	}
+
 	if (sample_type & PERF_SAMPLE_PERIOD) {
 		period = *(u64 *)more_data;
 		more_data += sizeof(u64);
@@ -120,7 +127,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		 * field, although it should be the same than this perf
 		 * event pid
 		 */
-		print_event(0, raw->data, raw->size, 0, thread->comm);
+		print_event(cpu, raw->data, raw->size, 0, thread->comm);
 	}
 	total += period;