Patches contributed by Eötvös Lorand University
commit 038e836e97e70c4ad2b5058b07fc7207f50b59dd
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jun 15 09:57:59 2009 +0200
perf_counter, x86: Fix kernel-space call-chains
Kernel-space call-chains were trimmed at the first entry because
we never processed anything beyond the first stack context.
Allow the backtrace to jump from NMI to IRQ stack then to task stack
and finally user-space stack.
Also calculate the stack and bp variables correctly so that the
stack walker does not exit early.
We can get deep traces as a result, visible in perf report -D output:
0x32af0 [0xe0]: PERF_EVENT (IP, 5): 15134: 0xffffffff815225fd period: 1
... chain: u:2, k:22, nr:24
..... 0: 0xffffffff815225fd
..... 1: 0xffffffff810ac51c
..... 2: 0xffffffff81018e29
..... 3: 0xffffffff81523939
..... 4: 0xffffffff81524b8f
..... 5: 0xffffffff81524bd9
..... 6: 0xffffffff8105e498
..... 7: 0xffffffff8152315a
..... 8: 0xffffffff81522c3a
..... 9: 0xffffffff810d9b74
..... 10: 0xffffffff810dbeec
..... 11: 0xffffffff810dc3fb
This is a 22-entries kernel-space chain.
(We still only record reliable stack entries.)
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 09d8cb69c3f3..6d5e7cfd97e7 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1575,8 +1575,8 @@ static void backtrace_warning(void *data, char *msg)
static int backtrace_stack(void *data, char *name)
{
- /* Don't bother with IRQ stacks for now */
- return -1;
+ /* Process all stacks: */
+ return 0;
}
static void backtrace_address(void *data, unsigned long addr, int reliable)
@@ -1594,6 +1594,8 @@ static const struct stacktrace_ops backtrace_ops = {
.address = backtrace_address,
};
+#include "../dumpstack.h"
+
static void
perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
@@ -1601,26 +1603,20 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
char *stack;
int nr = entry->nr;
- callchain_store(entry, instruction_pointer(regs));
+ callchain_store(entry, regs->ip);
stack = ((char *)regs + sizeof(struct pt_regs));
#ifdef CONFIG_FRAME_POINTER
- bp = frame_pointer(regs);
+ get_bp(bp);
#else
bp = 0;
#endif
- dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry);
+ dump_trace(NULL, regs, (void *)&stack, bp, &backtrace_ops, entry);
entry->kernel = entry->nr - nr;
}
-
-struct stack_frame {
- const void __user *next_fp;
- unsigned long return_address;
-};
-
static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
{
int ret;
@@ -1652,7 +1648,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
callchain_store(entry, regs->ip);
while (entry->nr < MAX_STACK_DEPTH) {
- frame.next_fp = NULL;
+ frame.next_frame = NULL;
frame.return_address = 0;
if (!copy_stack_frame(fp, &frame))
@@ -1662,7 +1658,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
break;
callchain_store(entry, frame.return_address);
- fp = frame.next_fp;
+ fp = frame.next_frame;
}
entry->user = entry->nr - nr;
commit 5a6cec3abbdb74244caab68db100825a8c4ac02d
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri May 29 11:25:09 2009 +0200
perf_counter, x86: Fix call-chain walking
Fix the ptregs variant when we hit user-mode tasks.
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 77a59a5566a8..09d8cb69c3f3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1644,7 +1644,9 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
const void __user *fp;
int nr = entry->nr;
- regs = (struct pt_regs *)current->thread.sp0 - 1;
+ if (!user_mode(regs))
+ regs = task_pt_regs(current);
+
fp = (void __user *)regs->bp;
callchain_store(entry, regs->ip);
@@ -1656,7 +1658,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
if (!copy_stack_frame(fp, &frame))
break;
- if ((unsigned long)fp < user_stack_pointer(regs))
+ if ((unsigned long)fp < regs->sp)
break;
callchain_store(entry, frame.return_address);
commit 3efa1cc99ec51bc7a7ae0011a16619fd20dbe6ea
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Jun 14 15:04:15 2009 +0200
perf record/report: Add call graph / call chain profiling
Add the first steps of call-graph profiling:
- add the -c (--call-graph) option to perf record
- parse the call-graph record and printout out under -D (--dump-trace)
The call-graph data is not put into the histogram yet, but it
can be seen that it's being processed correctly:
0x3ce0 [0x38]: event: 35
.
. ... raw event: size 56 bytes
. 0000: 23 00 00 00 05 00 38 00 d4 df 0e 81 ff ff ff ff #.....8........
. 0010: 60 0b 00 00 60 0b 00 00 03 00 00 00 01 00 02 00 `...`..........
. 0020: d4 df 0e 81 ff ff ff ff a0 61 ed 41 36 00 00 00 .........a.A6..
. 0030: 04 92 e6 41 36 00 00 00 .a.A6..
.
0x3ce0 [0x38]: PERF_EVENT (IP, 5): 2912: 0xffffffff810edfd4 period: 1
... chain: u:2, k:1, nr:3
..... 0: 0xffffffff810edfd4
..... 1: 0x3641ed61a0
..... 2: 0x3641e69204
... thread: perf:2912
...... dso: [kernel]
This shows a 3-entry call-graph: with 1 kernel-space and two user-space
entries
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0f5771f615da..a177a591b52c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -37,6 +37,7 @@ static pid_t target_pid = -1;
static int inherit = 1;
static int force = 0;
static int append_file = 0;
+static int call_graph = 0;
static int verbose = 0;
static long samples;
@@ -351,11 +352,16 @@ static void create_counter(int counter, int cpu, pid_t pid)
int track = 1;
attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
+
if (freq) {
attr->sample_type |= PERF_SAMPLE_PERIOD;
attr->freq = 1;
attr->sample_freq = freq;
}
+
+ if (call_graph)
+ attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
+
attr->mmap = track;
attr->comm = track;
attr->inherit = (cpu < 0) && inherit;
@@ -555,6 +561,8 @@ static const struct option options[] = {
"profile at this frequency"),
OPT_INTEGER('m', "mmap-pages", &mmap_pages,
"number of mmap data pages"),
+ OPT_BOOLEAN('g', "call-graph", &call_graph,
+ "do call-graph (stack chain/backtrace) recording"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_END()
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 37515da637f7..aebba5659345 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -36,6 +36,7 @@ static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
static int dump_trace = 0;
#define dprintf(x...) do { if (dump_trace) printf(x); } while (0)
+#define cdprintf(x...) do { if (dump_trace) color_fprintf(stdout, color, x); } while (0)
static int verbose;
static int full_paths;
@@ -43,11 +44,19 @@ static int full_paths;
static unsigned long page_size;
static unsigned long mmap_window = 32;
+struct ip_chain_event {
+ __u16 nr;
+ __u16 hv;
+ __u16 kernel;
+ __u16 user;
+ __u64 ips[];
+};
+
struct ip_event {
struct perf_event_header header;
__u64 ip;
__u32 pid, tid;
- __u64 period;
+ unsigned char __more_data[];
};
struct mmap_event {
@@ -944,9 +953,13 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
__u64 ip = event->ip.ip;
__u64 period = 1;
struct map *map = NULL;
+ void *more_data = event->ip.__more_data;
+ struct ip_chain_event *chain;
- if (event->header.type & PERF_SAMPLE_PERIOD)
- period = event->ip.period;
+ if (event->header.type & PERF_SAMPLE_PERIOD) {
+ period = *(__u64 *)more_data;
+ more_data += sizeof(__u64);
+ }
dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n",
(void *)(offset + head),
@@ -956,6 +969,22 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
(void *)(long)ip,
(long long)period);
+ if (event->header.type & PERF_SAMPLE_CALLCHAIN) {
+ int i;
+
+ chain = (void *)more_data;
+
+ if (dump_trace) {
+ dprintf("... chain: u:%d, k:%d, nr:%d\n",
+ chain->user,
+ chain->kernel,
+ chain->nr);
+
+ for (i = 0; i < chain->nr; i++)
+ dprintf("..... %2d: %p\n", i, (void *)chain->ips[i]);
+ }
+ }
+
dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid);
if (thread == NULL) {
@@ -1098,30 +1127,34 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head)
static void trace_event(event_t *event)
{
unsigned char *raw_event = (void *)event;
+ char *color = PERF_COLOR_BLUE;
int i, j;
if (!dump_trace)
return;
- dprintf(".\n. ... raw event: size %d bytes\n", event->header.size);
+ dprintf(".");
+ cdprintf("\n. ... raw event: size %d bytes\n", event->header.size);
for (i = 0; i < event->header.size; i++) {
- if ((i & 15) == 0)
- dprintf(". %04x: ", i);
+ if ((i & 15) == 0) {
+ dprintf(".");
+ cdprintf(" %04x: ", i);
+ }
- dprintf(" %02x", raw_event[i]);
+ cdprintf(" %02x", raw_event[i]);
if (((i & 15) == 15) || i == event->header.size-1) {
- dprintf(" ");
+ cdprintf(" ");
for (j = 0; j < 15-(i & 15); j++)
- dprintf(" ");
+ cdprintf(" ");
for (j = 0; j < (i & 15); j++) {
if (isprint(raw_event[i-15+j]))
- dprintf("%c", raw_event[i-15+j]);
+ cdprintf("%c", raw_event[i-15+j]);
else
- dprintf(".");
+ cdprintf(".");
}
- dprintf("\n");
+ cdprintf("\n");
}
}
dprintf(".\n");
commit 8465b05046652cfde3d47692cab2e8ba962f140f
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Jun 14 14:44:07 2009 +0200
perf report: Print out raw events in hexa
Print out events in hexa dump format, when -D is specified:
0x4868 [0x48]: event: 1
.
. ... raw event: size 72 bytes
. 0000: 01 00 00 00 00 00 48 00 d4 72 00 00 d4 72 00 00 ......H..r...r.
. 0010: 00 00 40 f2 3e 00 00 00 00 30 01 00 00 00 00 00 ..@.>....0.....
. 0020: 00 00 00 00 00 00 00 00 2f 75 73 72 2f 6c 69 62 ......../usr/li
. 0030: 36 34 2f 6c 69 62 65 6c 66 2d 30 2e 31 34 31 2e 64/libelf-0.141
. 0040: 73 6f 00 00 00 00 00 00 f-0.141
.
0x4868 [0x48]: PERF_EVENT_MMAP 29396: [0x3ef2400000(0x13000) @ (nil)]: /usr/lib64/libelf-0.141.so
This helps the debugging of mis-parsing of data files, and helps
the addition of new sample/trace formats.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 82fa93b4db99..37515da637f7 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1095,9 +1095,43 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head)
return 0;
}
+static void trace_event(event_t *event)
+{
+ unsigned char *raw_event = (void *)event;
+ int i, j;
+
+ if (!dump_trace)
+ return;
+
+ dprintf(".\n. ... raw event: size %d bytes\n", event->header.size);
+
+ for (i = 0; i < event->header.size; i++) {
+ if ((i & 15) == 0)
+ dprintf(". %04x: ", i);
+
+ dprintf(" %02x", raw_event[i]);
+
+ if (((i & 15) == 15) || i == event->header.size-1) {
+ dprintf(" ");
+ for (j = 0; j < 15-(i & 15); j++)
+ dprintf(" ");
+ for (j = 0; j < (i & 15); j++) {
+ if (isprint(raw_event[i-15+j]))
+ dprintf("%c", raw_event[i-15+j]);
+ else
+ dprintf(".");
+ }
+ dprintf("\n");
+ }
+ }
+ dprintf(".\n");
+}
+
static int
process_event(event_t *event, unsigned long offset, unsigned long head)
{
+ trace_event(event);
+
if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
return process_overflow_event(event, offset, head);
@@ -1204,7 +1238,7 @@ static int __cmd_report(void)
size = event->header.size;
- dprintf("%p [%p]: event: %d\n",
+ dprintf("\n%p [%p]: event: %d\n",
(void *)(offset + head),
(void *)(long)event->header.size,
event->header.type);
commit bc3bf8fd330ce981ce632a1a4a283eee46838f32
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Jun 13 08:29:33 2009 +0200
[SCSI] cnic: fix error: implicit declaration of function ‘__symbol_get’
drivers/net/cnic.c: In function ‘init_bnx2_cnic’:
drivers/net/cnic.c:2520: error: implicit declaration of function ‘__symbol_get’
drivers/net/cnic.c:2520: warning: assignment makes pointer from integer without a cast
make[1]: *** [drivers/net/cnic.o] Error 1
make: *** [drivers/net/cnic.o] Error 2
Caused by not including linux/module.h
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
index a9e2fd35bb41..44f77eb1180f 100644
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -25,6 +25,8 @@
#include <linux/delay.h>
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
+#include <linux/module.h>
+
#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
#define BCM_VLAN 1
#endif
commit ef281a196d66b8bc2d067a3704712e5b93691fbc
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Jun 13 15:40:35 2009 +0200
perf stat: Enable raw data to be printed
If -vv (very verbose) is specified, print out raw data
in the following format:
$ perf stat -vv -r 3 ./loop_1b_instructions
[ perf stat: executing run #1 ... ]
[ perf stat: executing run #2 ... ]
[ perf stat: executing run #3 ... ]
debug: runtime[0]: 235871872
debug: walltime[0]: 236646752
debug: runtime_cycles[0]: 755150182
debug: counter/0[0]: 235871872
debug: counter/1[0]: 235871872
debug: counter/2[0]: 235871872
debug: scaled[0]: 0
debug: counter/0[1]: 2
debug: counter/1[1]: 235870662
debug: counter/2[1]: 235870662
debug: scaled[1]: 0
debug: counter/0[2]: 1
debug: counter/1[2]: 235870437
debug: counter/2[2]: 235870437
debug: scaled[2]: 0
debug: counter/0[3]: 140
debug: counter/1[3]: 235870298
debug: counter/2[3]: 235870298
debug: scaled[3]: 0
debug: counter/0[4]: 755150182
debug: counter/1[4]: 235870145
debug: counter/2[4]: 235870145
debug: scaled[4]: 0
debug: counter/0[5]: 1001411258
debug: counter/1[5]: 235868838
debug: counter/2[5]: 235868838
debug: scaled[5]: 0
debug: counter/0[6]: 27897
debug: counter/1[6]: 235868560
debug: counter/2[6]: 235868560
debug: scaled[6]: 0
debug: counter/0[7]: 2910
debug: counter/1[7]: 235868151
debug: counter/2[7]: 235868151
debug: scaled[7]: 0
debug: runtime[0]: 235980257
debug: walltime[0]: 236770942
debug: runtime_cycles[0]: 755114546
debug: counter/0[0]: 235980257
debug: counter/1[0]: 235980257
debug: counter/2[0]: 235980257
debug: scaled[0]: 0
debug: counter/0[1]: 3
debug: counter/1[1]: 235980049
debug: counter/2[1]: 235980049
debug: scaled[1]: 0
debug: counter/0[2]: 1
debug: counter/1[2]: 235979907
debug: counter/2[2]: 235979907
debug: scaled[2]: 0
debug: counter/0[3]: 135
debug: counter/1[3]: 235979780
debug: counter/2[3]: 235979780
debug: scaled[3]: 0
debug: counter/0[4]: 755114546
debug: counter/1[4]: 235979652
debug: counter/2[4]: 235979652
debug: scaled[4]: 0
debug: counter/0[5]: 1001439771
debug: counter/1[5]: 235979304
debug: counter/2[5]: 235979304
debug: scaled[5]: 0
debug: counter/0[6]: 23723
debug: counter/1[6]: 235979050
debug: counter/2[6]: 235979050
debug: scaled[6]: 0
debug: counter/0[7]: 2213
debug: counter/1[7]: 235978820
debug: counter/2[7]: 235978820
debug: scaled[7]: 0
debug: runtime[0]: 235888002
debug: walltime[0]: 236700533
debug: runtime_cycles[0]: 754881504
debug: counter/0[0]: 235888002
debug: counter/1[0]: 235888002
debug: counter/2[0]: 235888002
debug: scaled[0]: 0
debug: counter/0[1]: 2
debug: counter/1[1]: 235887793
debug: counter/2[1]: 235887793
debug: scaled[1]: 0
debug: counter/0[2]: 1
debug: counter/1[2]: 235887645
debug: counter/2[2]: 235887645
debug: scaled[2]: 0
debug: counter/0[3]: 135
debug: counter/1[3]: 235887499
debug: counter/2[3]: 235887499
debug: scaled[3]: 0
debug: counter/0[4]: 754881504
debug: counter/1[4]: 235887368
debug: counter/2[4]: 235887368
debug: scaled[4]: 0
debug: counter/0[5]: 1001401731
debug: counter/1[5]: 235887024
debug: counter/2[5]: 235887024
debug: scaled[5]: 0
debug: counter/0[6]: 24212
debug: counter/1[6]: 235886786
debug: counter/2[6]: 235886786
debug: scaled[6]: 0
debug: counter/0[7]: 1824
debug: counter/1[7]: 235886560
debug: counter/2[7]: 235886560
debug: scaled[7]: 0
Performance counter stats for '/home/mingo/loop_1b_instructions' (3 runs):
235.913377 task-clock-msecs # 0.997 CPUs ( +- 0.011% )
2 context-switches # 0.000 M/sec ( +- 0.000% )
1 CPU-migrations # 0.000 M/sec ( +- 0.000% )
136 page-faults # 0.001 M/sec ( +- 0.730% )
755048744 cycles # 3200.534 M/sec ( +- 0.009% )
1001417586 instructions # 1.326 IPC ( +- 0.001% )
25277 cache-references # 0.107 M/sec ( +- 3.988% )
2315 cache-misses # 0.010 M/sec ( +- 9.845% )
0.236706075 seconds time elapsed.
This allows the summary stats to be validated.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 0cbd5d6874ec..e8346f95fbb0 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -160,7 +160,7 @@ uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
# CFLAGS and LDFLAGS are for the users to override from the command line.
CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6
-LDFLAGS = -lpthread -lrt -lelf
+LDFLAGS = -lpthread -lrt -lelf -lm
ALL_CFLAGS = $(CFLAGS)
ALL_LDFLAGS = $(LDFLAGS)
STRIP ?= strip
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9eb42b1ae784..e5b3c0ff03a9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -324,9 +324,9 @@ static void print_counter(int counter)
}
/*
- * Normalize noise values down to stddev:
+ * normalize_noise noise values down to stddev:
*/
-static void normalize(__u64 *val)
+static void normalize_noise(__u64 *val)
{
double res;
@@ -335,6 +335,13 @@ static void normalize(__u64 *val)
*val = (__u64)res;
}
+static void update_avg(const char *name, int idx, __u64 *avg, __u64 *val)
+{
+ *avg += *val;
+
+ if (verbose > 1)
+ fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val);
+}
/*
* Calculate the averages and noises:
*/
@@ -342,16 +349,23 @@ static void calc_avg(void)
{
int i, j;
+ if (verbose > 1)
+ fprintf(stderr, "\n");
+
for (i = 0; i < run_count; i++) {
- runtime_nsecs_avg += runtime_nsecs[i];
- walltime_nsecs_avg += walltime_nsecs[i];
- runtime_cycles_avg += runtime_cycles[i];
+ update_avg("runtime", 0, &runtime_nsecs_avg, runtime_nsecs + i);
+ update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i);
+ update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i);
for (j = 0; j < nr_counters; j++) {
- event_res_avg[j][0] += event_res[i][j][0];
- event_res_avg[j][1] += event_res[i][j][1];
- event_res_avg[j][2] += event_res[i][j][2];
- event_scaled_avg[j] += event_scaled[i][j];
+ update_avg("counter/0", j,
+ event_res_avg[j]+0, event_res[i][j]+0);
+ update_avg("counter/1", j,
+ event_res_avg[j]+1, event_res[i][j]+1);
+ update_avg("counter/2", j,
+ event_res_avg[j]+2, event_res[i][j]+2);
+ update_avg("scaled", j,
+ event_scaled_avg + j, event_scaled[i]+j);
}
}
runtime_nsecs_avg /= run_count;
@@ -382,14 +396,14 @@ static void calc_avg(void)
}
}
- normalize(&runtime_nsecs_noise);
- normalize(&walltime_nsecs_noise);
- normalize(&runtime_cycles_noise);
+ normalize_noise(&runtime_nsecs_noise);
+ normalize_noise(&walltime_nsecs_noise);
+ normalize_noise(&runtime_cycles_noise);
for (j = 0; j < nr_counters; j++) {
- normalize(&event_res_noise[j][0]);
- normalize(&event_res_noise[j][1]);
- normalize(&event_res_noise[j][2]);
+ normalize_noise(&event_res_noise[j][0]);
+ normalize_noise(&event_res_noise[j][1]);
+ normalize_noise(&event_res_noise[j][2]);
}
}
@@ -399,8 +413,6 @@ static void print_stat(int argc, const char **argv)
calc_avg();
- run_idx = 0;
-
fflush(stdout);
fprintf(stderr, "\n");
commit 42202dd56c717f173cd0bf2390249e1bf5cf210b
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Jun 13 14:57:28 2009 +0200
perf stat: Add feature to run and measure a command multiple times
Add the --repeat <n> feature to perf stat, which repeats a given
command up to a 100 times, collects the stats and calculates an
average and a stddev.
For example, the following oneliner 'perf stat' command runs hackbench
5 times and prints a tabulated result of all metrics, with averages
and noise levels (in percentage) printed:
aldebaran:~/linux/linux/tools/perf> ./perf stat --repeat 5 ~/hackbench 10
Time: 0.117
Time: 0.108
Time: 0.089
Time: 0.088
Time: 0.100
Performance counter stats for '/home/mingo/hackbench 10' (5 runs):
1243.989586 task-clock-msecs # 10.460 CPUs ( +- 4.720% )
47706 context-switches # 0.038 M/sec ( +- 19.706% )
387 CPU-migrations # 0.000 M/sec ( +- 3.608% )
17793 page-faults # 0.014 M/sec ( +- 0.354% )
3770941606 cycles # 3031.329 M/sec ( +- 4.621% )
1566372416 instructions # 0.415 IPC ( +- 2.703% )
16783421 cache-references # 13.492 M/sec ( +- 5.202% )
7128590 cache-misses # 5.730 M/sec ( +- 7.420% )
0.118924455 seconds time elapsed.
The goal of this feature is to allow the reliance on these accurate
statistics and to know how many times a command has to be repeated
for the noise to go down to an acceptable level.
(The -v option can be used to see a line printed out as each run progresses.)
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c12804853eab..9eb42b1ae784 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -43,6 +43,7 @@
#include "util/parse-events.h"
#include <sys/prctl.h>
+#include <math.h>
static struct perf_counter_attr default_attrs[MAX_COUNTERS] = {
@@ -79,12 +80,34 @@ static const unsigned int default_count[] = {
10000,
};
-static __u64 event_res[MAX_COUNTERS][3];
-static __u64 event_scaled[MAX_COUNTERS];
+#define MAX_RUN 100
-static __u64 runtime_nsecs;
-static __u64 walltime_nsecs;
-static __u64 runtime_cycles;
+static int run_count = 1;
+static int run_idx = 0;
+
+static __u64 event_res[MAX_RUN][MAX_COUNTERS][3];
+static __u64 event_scaled[MAX_RUN][MAX_COUNTERS];
+
+//static __u64 event_hist[MAX_RUN][MAX_COUNTERS][3];
+
+
+static __u64 runtime_nsecs[MAX_RUN];
+static __u64 walltime_nsecs[MAX_RUN];
+static __u64 runtime_cycles[MAX_RUN];
+
+static __u64 event_res_avg[MAX_COUNTERS][3];
+static __u64 event_res_noise[MAX_COUNTERS][3];
+
+static __u64 event_scaled_avg[MAX_COUNTERS];
+
+static __u64 runtime_nsecs_avg;
+static __u64 runtime_nsecs_noise;
+
+static __u64 walltime_nsecs_avg;
+static __u64 walltime_nsecs_noise;
+
+static __u64 runtime_cycles_avg;
+static __u64 runtime_cycles_noise;
static void create_perf_stat_counter(int counter)
{
@@ -140,7 +163,7 @@ static void read_counter(int counter)
int cpu, nv;
int scaled;
- count = event_res[counter];
+ count = event_res[run_idx][counter];
count[0] = count[1] = count[2] = 0;
@@ -151,6 +174,8 @@ static void read_counter(int counter)
res = read(fd[cpu][counter], single_count, nv * sizeof(__u64));
assert(res == nv * sizeof(__u64));
+ close(fd[cpu][counter]);
+ fd[cpu][counter] = -1;
count[0] += single_count[0];
if (scale) {
@@ -162,13 +187,13 @@ static void read_counter(int counter)
scaled = 0;
if (scale) {
if (count[2] == 0) {
- event_scaled[counter] = -1;
+ event_scaled[run_idx][counter] = -1;
count[0] = 0;
return;
}
if (count[2] < count[1]) {
- event_scaled[counter] = 1;
+ event_scaled[run_idx][counter] = 1;
count[0] = (unsigned long long)
((double)count[0] * count[1] / count[2] + 0.5);
}
@@ -178,13 +203,62 @@ static void read_counter(int counter)
*/
if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
- runtime_nsecs = count[0];
+ runtime_nsecs[run_idx] = count[0];
if (attrs[counter].type == PERF_TYPE_HARDWARE &&
attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES)
- runtime_cycles = count[0];
+ runtime_cycles[run_idx] = count[0];
}
-static void nsec_printout(int counter, __u64 *count)
+static int run_perf_stat(int argc, const char **argv)
+{
+ unsigned long long t0, t1;
+ int status = 0;
+ int counter;
+ int pid;
+
+ if (!system_wide)
+ nr_cpus = 1;
+
+ for (counter = 0; counter < nr_counters; counter++)
+ create_perf_stat_counter(counter);
+
+ /*
+ * Enable counters and exec the command:
+ */
+ t0 = rdclock();
+ prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+
+ if ((pid = fork()) < 0)
+ perror("failed to fork");
+
+ if (!pid) {
+ if (execvp(argv[0], (char **)argv)) {
+ perror(argv[0]);
+ exit(-1);
+ }
+ }
+
+ wait(&status);
+
+ prctl(PR_TASK_PERF_COUNTERS_DISABLE);
+ t1 = rdclock();
+
+ walltime_nsecs[run_idx] = t1 - t0;
+
+ for (counter = 0; counter < nr_counters; counter++)
+ read_counter(counter);
+
+ return WEXITSTATUS(status);
+}
+
+static void print_noise(__u64 *count, __u64 *noise)
+{
+ if (run_count > 1)
+ fprintf(stderr, " ( +- %7.3f%% )",
+ (double)noise[0]/(count[0]+1)*100.0);
+}
+
+static void nsec_printout(int counter, __u64 *count, __u64 *noise)
{
double msecs = (double)count[0] / 1000000;
@@ -193,29 +267,30 @@ static void nsec_printout(int counter, __u64 *count)
if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
- if (walltime_nsecs)
- fprintf(stderr, " # %10.3f CPUs",
- (double)count[0] / (double)walltime_nsecs);
+ if (walltime_nsecs_avg)
+ fprintf(stderr, " # %10.3f CPUs ",
+ (double)count[0] / (double)walltime_nsecs_avg);
}
+ print_noise(count, noise);
}
-static void abs_printout(int counter, __u64 *count)
+static void abs_printout(int counter, __u64 *count, __u64 *noise)
{
fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter));
- if (runtime_cycles &&
+ if (runtime_cycles_avg &&
attrs[counter].type == PERF_TYPE_HARDWARE &&
attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
- fprintf(stderr, " # %10.3f IPC",
- (double)count[0] / (double)runtime_cycles);
-
- return;
+ fprintf(stderr, " # %10.3f IPC ",
+ (double)count[0] / (double)runtime_cycles_avg);
+ } else {
+ if (runtime_nsecs_avg) {
+ fprintf(stderr, " # %10.3f M/sec",
+ (double)count[0]/runtime_nsecs_avg*1000.0);
+ }
}
-
- if (runtime_nsecs)
- fprintf(stderr, " # %10.3f M/sec",
- (double)count[0]/runtime_nsecs*1000.0);
+ print_noise(count, noise);
}
/*
@@ -223,11 +298,12 @@ static void abs_printout(int counter, __u64 *count)
*/
static void print_counter(int counter)
{
- __u64 *count;
+ __u64 *count, *noise;
int scaled;
- count = event_res[counter];
- scaled = event_scaled[counter];
+ count = event_res_avg[counter];
+ noise = event_res_noise[counter];
+ scaled = event_scaled_avg[counter];
if (scaled == -1) {
fprintf(stderr, " %14s %-20s\n",
@@ -236,9 +312,9 @@ static void print_counter(int counter)
}
if (nsec_counter(counter))
- nsec_printout(counter, count);
+ nsec_printout(counter, count, noise);
else
- abs_printout(counter, count);
+ abs_printout(counter, count, noise);
if (scaled)
fprintf(stderr, " (scaled from %.2f%%)",
@@ -247,43 +323,83 @@ static void print_counter(int counter)
fprintf(stderr, "\n");
}
-static int do_perf_stat(int argc, const char **argv)
+/*
+ * Normalize noise values down to stddev:
+ */
+static void normalize(__u64 *val)
{
- unsigned long long t0, t1;
- int counter;
- int status;
- int pid;
- int i;
-
- if (!system_wide)
- nr_cpus = 1;
+ double res;
- for (counter = 0; counter < nr_counters; counter++)
- create_perf_stat_counter(counter);
+ res = (double)*val / (run_count * sqrt((double)run_count));
- /*
- * Enable counters and exec the command:
- */
- t0 = rdclock();
- prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+ *val = (__u64)res;
+}
- if ((pid = fork()) < 0)
- perror("failed to fork");
+/*
+ * Calculate the averages and noises:
+ */
+static void calc_avg(void)
+{
+ int i, j;
+
+ for (i = 0; i < run_count; i++) {
+ runtime_nsecs_avg += runtime_nsecs[i];
+ walltime_nsecs_avg += walltime_nsecs[i];
+ runtime_cycles_avg += runtime_cycles[i];
+
+ for (j = 0; j < nr_counters; j++) {
+ event_res_avg[j][0] += event_res[i][j][0];
+ event_res_avg[j][1] += event_res[i][j][1];
+ event_res_avg[j][2] += event_res[i][j][2];
+ event_scaled_avg[j] += event_scaled[i][j];
+ }
+ }
+ runtime_nsecs_avg /= run_count;
+ walltime_nsecs_avg /= run_count;
+ runtime_cycles_avg /= run_count;
+
+ for (j = 0; j < nr_counters; j++) {
+ event_res_avg[j][0] /= run_count;
+ event_res_avg[j][1] /= run_count;
+ event_res_avg[j][2] /= run_count;
+ }
- if (!pid) {
- if (execvp(argv[0], (char **)argv)) {
- perror(argv[0]);
- exit(-1);
+ for (i = 0; i < run_count; i++) {
+ runtime_nsecs_noise +=
+ abs((__s64)(runtime_nsecs[i] - runtime_nsecs_avg));
+ walltime_nsecs_noise +=
+ abs((__s64)(walltime_nsecs[i] - walltime_nsecs_avg));
+ runtime_cycles_noise +=
+ abs((__s64)(runtime_cycles[i] - runtime_cycles_avg));
+
+ for (j = 0; j < nr_counters; j++) {
+ event_res_noise[j][0] +=
+ abs((__s64)(event_res[i][j][0] - event_res_avg[j][0]));
+ event_res_noise[j][1] +=
+ abs((__s64)(event_res[i][j][1] - event_res_avg[j][1]));
+ event_res_noise[j][2] +=
+ abs((__s64)(event_res[i][j][2] - event_res_avg[j][2]));
}
}
- while (wait(&status) >= 0)
- ;
+ normalize(&runtime_nsecs_noise);
+ normalize(&walltime_nsecs_noise);
+ normalize(&runtime_cycles_noise);
- prctl(PR_TASK_PERF_COUNTERS_DISABLE);
- t1 = rdclock();
+ for (j = 0; j < nr_counters; j++) {
+ normalize(&event_res_noise[j][0]);
+ normalize(&event_res_noise[j][1]);
+ normalize(&event_res_noise[j][2]);
+ }
+}
+
+static void print_stat(int argc, const char **argv)
+{
+ int i, counter;
+
+ calc_avg();
- walltime_nsecs = t1 - t0;
+ run_idx = 0;
fflush(stdout);
@@ -293,21 +409,19 @@ static int do_perf_stat(int argc, const char **argv)
for (i = 1; i < argc; i++)
fprintf(stderr, " %s", argv[i]);
- fprintf(stderr, "\':\n");
- fprintf(stderr, "\n");
-
- for (counter = 0; counter < nr_counters; counter++)
- read_counter(counter);
+ fprintf(stderr, "\'");
+ if (run_count > 1)
+ fprintf(stderr, " (%d runs)", run_count);
+ fprintf(stderr, ":\n\n");
for (counter = 0; counter < nr_counters; counter++)
print_counter(counter);
fprintf(stderr, "\n");
- fprintf(stderr, " %14.9f seconds time elapsed.\n", (double)(t1-t0)/1e9);
+ fprintf(stderr, " %14.9f seconds time elapsed.\n",
+ (double)walltime_nsecs_avg/1e9);
fprintf(stderr, "\n");
-
- return 0;
}
static volatile int signr = -1;
@@ -345,11 +459,15 @@ static const struct option options[] = {
"scale/normalize counters"),
OPT_BOOLEAN('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
+ OPT_INTEGER('r', "repeat", &run_count,
+ "repeat command and print average + stddev (max: 100)"),
OPT_END()
};
int cmd_stat(int argc, const char **argv, const char *prefix)
{
+ int status;
+
page_size = sysconf(_SC_PAGE_SIZE);
memcpy(attrs, default_attrs, sizeof(attrs));
@@ -357,6 +475,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
argc = parse_options(argc, argv, options, stat_usage, 0);
if (!argc)
usage_with_options(stat_usage, options);
+ if (run_count <= 0 || run_count > MAX_RUN)
+ usage_with_options(stat_usage, options);
if (!nr_counters)
nr_counters = 8;
@@ -376,5 +496,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix)
signal(SIGALRM, skip_signal);
signal(SIGABRT, skip_signal);
- return do_perf_stat(argc, argv);
+ status = 0;
+ for (run_idx = 0; run_idx < run_count; run_idx++) {
+ if (run_count != 1 && verbose)
+ fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1);
+ status = run_perf_stat(argc, argv);
+ }
+
+ print_stat(argc, argv);
+
+ return status;
}
commit 44175b6f397a6724121eeaf0f072e2c912a46614
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Jun 13 13:35:00 2009 +0200
perf stat: Reorganize output
- use IPC for the instruction normalization output
- CPUs for the CPU utilization factor value.
- print out time elapsed like the other rows
- tidy up the task-clocks/cpu-clocks printout
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c43e4a97dc42..c12804853eab 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -184,6 +184,40 @@ static void read_counter(int counter)
runtime_cycles = count[0];
}
+static void nsec_printout(int counter, __u64 *count)
+{
+ double msecs = (double)count[0] / 1000000;
+
+ fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter));
+
+ if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
+ attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
+
+ if (walltime_nsecs)
+ fprintf(stderr, " # %10.3f CPUs",
+ (double)count[0] / (double)walltime_nsecs);
+ }
+}
+
+static void abs_printout(int counter, __u64 *count)
+{
+ fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter));
+
+ if (runtime_cycles &&
+ attrs[counter].type == PERF_TYPE_HARDWARE &&
+ attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
+
+ fprintf(stderr, " # %10.3f IPC",
+ (double)count[0] / (double)runtime_cycles);
+
+ return;
+ }
+
+ if (runtime_nsecs)
+ fprintf(stderr, " # %10.3f M/sec",
+ (double)count[0]/runtime_nsecs*1000.0);
+}
+
/*
* Print out the results of a single counter:
*/
@@ -201,35 +235,15 @@ static void print_counter(int counter)
return;
}
- if (nsec_counter(counter)) {
- double msecs = (double)count[0] / 1000000;
-
- fprintf(stderr, " %14.6f %-20s",
- msecs, event_name(counter));
- if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
- attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
+ if (nsec_counter(counter))
+ nsec_printout(counter, count);
+ else
+ abs_printout(counter, count);
- if (walltime_nsecs)
- fprintf(stderr, " # %11.3f CPU utilization factor",
- (double)count[0] / (double)walltime_nsecs);
- }
- } else {
- fprintf(stderr, " %14Ld %-20s",
- count[0], event_name(counter));
- if (runtime_nsecs)
- fprintf(stderr, " # %11.3f M/sec",
- (double)count[0]/runtime_nsecs*1000.0);
- if (runtime_cycles &&
- attrs[counter].type == PERF_TYPE_HARDWARE &&
- attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
-
- fprintf(stderr, " # %1.3f per cycle",
- (double)count[0] / (double)runtime_cycles);
- }
- }
if (scaled)
fprintf(stderr, " (scaled from %.2f%%)",
(double) count[2] / count[1] * 100);
+
fprintf(stderr, "\n");
}
@@ -290,8 +304,7 @@ static int do_perf_stat(int argc, const char **argv)
fprintf(stderr, "\n");
- fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
- (double)(t1-t0)/1e6);
+ fprintf(stderr, " %14.9f seconds time elapsed.\n", (double)(t1-t0)/1e9);
fprintf(stderr, "\n");
return 0;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5a72586e1df0..f0c9f2627fe1 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -63,8 +63,8 @@ static char *hw_event_names[] = {
};
static char *sw_event_names[] = {
- "cpu-clock-ticks",
- "task-clock-ticks",
+ "cpu-clock-msecs",
+ "task-clock-msecs",
"page-faults",
"context-switches",
"CPU-migrations",
commit 0d5959723e1db3fd7323c198a50c16cecf96c7a9
Merge: 62fdac5913f7 512626a04e72
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Jun 11 23:31:52 2009 +0200
Merge branch 'linus' into x86/mce3
Conflicts:
arch/x86/kernel/cpu/mcheck/mce_64.c
arch/x86/kernel/irq.c
Merge reason: Resolve the conflicts above.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --cc arch/x86/include/asm/entry_arch.h
index 69f886805ecb,d750a10ccad6..ff8cbfa07851
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@@ -50,10 -49,10 +50,10 @@@ BUILD_INTERRUPT(error_interrupt,ERROR_A
BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
#ifdef CONFIG_PERF_COUNTERS
- BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
+ BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
#endif
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_THERMAL_VECTOR
BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
#endif
diff --cc arch/x86/include/asm/irq_vectors.h
index 1b35c4357ea8,e997be98c9b9..5b21f0ec3df2
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@@ -104,22 -108,15 +104,22 @@@
#define LOCAL_TIMER_VECTOR 0xef
/*
- * Performance monitoring interrupt vector:
+ * Generic system vector for platform specific use
*/
- #define LOCAL_PERF_VECTOR 0xee
+ #define GENERIC_INTERRUPT_VECTOR 0xed
/*
- * Generic system vector for platform specific use
+ * Performance monitoring pending work vector:
*/
- #define GENERIC_INTERRUPT_VECTOR 0xed
+ #define LOCAL_PENDING_VECTOR 0xec
+#define UV_BAU_MESSAGE 0xec
+
+/*
+ * Self IPI vector for machine checks
+ */
+#define MCE_SELF_VECTOR 0xeb
+
/*
* First APIC vector available to drivers: (vectors 0x30-0xee) we
* start at 0x31(0x41) to spread out vectors evenly between priority
diff --cc arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 046087e9808f,65a0fceedcd7..f2ef6952c400
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@@ -15,10 -15,7 +15,9 @@@
#include <asm/hw_irq.h>
#include <asm/idle.h>
#include <asm/therm_throt.h>
- #include <asm/apic.h>
+#include "mce.h"
+
asmlinkage void smp_thermal_interrupt(void)
{
__u64 msr_val;
diff --cc arch/x86/kernel/irq.c
index 9773395aa758,38287b5f116e..b0cdde6932f5
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@@ -12,7 -12,7 +12,8 @@@
#include <asm/io_apic.h>
#include <asm/irq.h>
#include <asm/idle.h>
+#include <asm/mce.h>
+ #include <asm/hw_irq.h>
atomic_t irq_err_count;
commit 940010c5a314a7bd9b498593bc6ba1718ac5aec5
Merge: 8dc8e5e8bc0c 991ec02cdca3
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Jun 11 17:55:42 2009 +0200
Merge branch 'linus' into perfcounters/core
Conflicts:
arch/x86/kernel/irqinit.c
arch/x86/kernel/irqinit_64.c
arch/x86/kernel/traps.c
arch/x86/mm/fault.c
include/linux/sched.h
kernel/exit.c
diff --cc arch/x86/kernel/irqinit.c
index 205bdd880d31,2e08b10ad51a..267c6624c77f
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@@ -154,7 -181,12 +181,17 @@@ static void __init apic_intr_init(void
{
smp_intr_init();
- #ifdef CONFIG_X86_LOCAL_APIC
-#ifdef CONFIG_X86_64
++#ifdef CONFIG_X86_THERMAL_VECTOR
+ alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
++#endif
++#ifdef CONFIG_X86_THRESHOLD
+ alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
+ #endif
++#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC)
++ alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
++#endif
+
+ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
/* self generated IPI for local APIC timer */
alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
@@@ -164,14 -196,20 +201,12 @@@
/* IPI vectors for APIC spurious and error interrupts */
alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+
+ /* Performance monitoring interrupts: */
# ifdef CONFIG_PERF_COUNTERS
- alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
# endif
- # ifdef CONFIG_X86_MCE_P4THERMAL
-#endif
-
-#ifdef CONFIG_X86_32
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
-- /* thermal monitor LVT interrupt */
-- alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
- # endif
-#endif
#endif
}
diff --cc arch/x86/mm/fault.c
index 5c6d816f30b4,5ec7ae366615..c6acc6326374
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@@ -3,41 -3,16 +3,17 @@@
* Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
* Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
*/
- #include <linux/interrupt.h>
- #include <linux/mmiotrace.h>
- #include <linux/bootmem.h>
- #include <linux/compiler.h>
- #include <linux/highmem.h>
- #include <linux/kprobes.h>
- #include <linux/uaccess.h>
- #include <linux/vmalloc.h>
- #include <linux/vt_kern.h>
- #include <linux/signal.h>
- #include <linux/kernel.h>
- #include <linux/ptrace.h>
- #include <linux/string.h>
- #include <linux/module.h>
- #include <linux/kdebug.h>
- #include <linux/errno.h>
- #include <linux/magic.h>
- #include <linux/sched.h>
- #include <linux/types.h>
- #include <linux/init.h>
- #include <linux/mman.h>
- #include <linux/tty.h>
- #include <linux/smp.h>
- #include <linux/mm.h>
- #include <linux/perf_counter.h>
-
- #include <asm-generic/sections.h>
-
- #include <asm/tlbflush.h>
- #include <asm/pgalloc.h>
- #include <asm/segment.h>
- #include <asm/system.h>
- #include <asm/proto.h>
- #include <asm/traps.h>
- #include <asm/desc.h>
+ #include <linux/magic.h> /* STACK_END_MAGIC */
+ #include <linux/sched.h> /* test_thread_flag(), ... */
+ #include <linux/kdebug.h> /* oops_begin/end, ... */
+ #include <linux/module.h> /* search_exception_table */
+ #include <linux/bootmem.h> /* max_low_pfn */
+ #include <linux/kprobes.h> /* __kprobes, ... */
+ #include <linux/mmiotrace.h> /* kmmio_handler, ... */
++#include <linux/perf_counter.h> /* perf_swcounter_event */
+
+ #include <asm/traps.h> /* dotraplinkage, ... */
+ #include <asm/pgalloc.h> /* pgd_*(), ... */
/*
* Page fault error code bits:
diff --cc include/linux/sched.h
index bc9326dcdde1,d1399660b776..28c774ff3cc7
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -96,9 -97,8 +97,9 @@@ struct exec_domain
struct futex_pi_state;
struct robust_list_head;
struct bio;
- struct bts_tracer;
struct fs_struct;
+ struct bts_context;
+struct perf_counter_context;
/*
* List of flags we want to share for kernel threads,
@@@ -136,9 -137,8 +138,9 @@@ DECLARE_PER_CPU(unsigned long, process_
extern int nr_processes(void);
extern unsigned long nr_running(void);
extern unsigned long nr_uninterruptible(void);
- extern unsigned long nr_active(void);
extern unsigned long nr_iowait(void);
+ extern void calc_global_load(void);
+extern u64 cpu_nr_migrations(int cpu);
extern unsigned long get_parent_ip(unsigned long addr);
diff --cc kernel/Makefile
index e914ca992d70,a35eee3436de..90b53f6dc226
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@@ -93,9 -93,9 +93,10 @@@ obj-$(CONFIG_LATENCYTOP) += latencytop.
obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
+ obj-$(CONFIG_X86_DS) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o
+obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --cc kernel/exit.c
index 99ad4063ee4a,cab535c427b8..49cdf6946f34
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@@ -48,8 -48,7 +48,8 @@@
#include <linux/tracehook.h>
#include <linux/fs_struct.h>
#include <linux/init_task.h>
+#include <linux/perf_counter.h>
- #include <trace/sched.h>
+ #include <trace/events/sched.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
diff --cc kernel/fork.c
index f4466ca37ece,bb762b4dd217..4430eb1376f2
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@@ -61,9 -61,7 +61,8 @@@
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
#include <linux/fs_struct.h>
- #include <trace/sched.h>
#include <linux/magic.h>
+#include <linux/perf_counter.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
diff --cc kernel/sched.c
index 8d43347a0c0d,14c447ae5d53..5b3f6ec1b0b3
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@@ -2885,30 -2912,74 +2941,83 @@@ unsigned long nr_iowait(void
return sum;
}
- unsigned long nr_active(void)
+ /* Variables and functions for calc_load */
+ static atomic_long_t calc_load_tasks;
+ static unsigned long calc_load_update;
+ unsigned long avenrun[3];
+ EXPORT_SYMBOL(avenrun);
+
+ /**
+ * get_avenrun - get the load average array
+ * @loads: pointer to dest load array
+ * @offset: offset to add
+ * @shift: shift count to shift the result left
+ *
+ * These values are estimates at best, so no need for locking.
+ */
+ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
+ {
+ loads[0] = (avenrun[0] + offset) << shift;
+ loads[1] = (avenrun[1] + offset) << shift;
+ loads[2] = (avenrun[2] + offset) << shift;
+ }
+
+ static unsigned long
+ calc_load(unsigned long load, unsigned long exp, unsigned long active)
{
- unsigned long i, running = 0, uninterruptible = 0;
+ load *= exp;
+ load += active * (FIXED_1 - exp);
+ return load >> FSHIFT;
+ }
- for_each_online_cpu(i) {
- running += cpu_rq(i)->nr_running;
- uninterruptible += cpu_rq(i)->nr_uninterruptible;
- }
+ /*
+ * calc_load - update the avenrun load estimates 10 ticks after the
+ * CPUs have updated calc_load_tasks.
+ */
+ void calc_global_load(void)
+ {
+ unsigned long upd = calc_load_update + 10;
+ long active;
- if (unlikely((long)uninterruptible < 0))
- uninterruptible = 0;
+ if (time_before(jiffies, upd))
+ return;
- return running + uninterruptible;
+ active = atomic_long_read(&calc_load_tasks);
+ active = active > 0 ? active * FIXED_1 : 0;
+
+ avenrun[0] = calc_load(avenrun[0], EXP_1, active);
+ avenrun[1] = calc_load(avenrun[1], EXP_5, active);
+ avenrun[2] = calc_load(avenrun[2], EXP_15, active);
+
+ calc_load_update += LOAD_FREQ;
+ }
+
+ /*
+ * Either called from update_cpu_load() or from a cpu going idle
+ */
+ static void calc_load_account_active(struct rq *this_rq)
+ {
+ long nr_active, delta;
+
+ nr_active = this_rq->nr_running;
+ nr_active += (long) this_rq->nr_uninterruptible;
+
+ if (nr_active != this_rq->calc_load_active) {
+ delta = nr_active - this_rq->calc_load_active;
+ this_rq->calc_load_active = nr_active;
+ atomic_long_add(delta, &calc_load_tasks);
+ }
}
+/*
+ * Externally visible per-cpu scheduler statistics:
+ * cpu_nr_migrations(cpu) - number of migrations into that cpu
+ */
+u64 cpu_nr_migrations(int cpu)
+{
+ return cpu_rq(cpu)->nr_migrations_in;
+}
+
/*
* Update rq->cpu_load[] statistics. This function is usually called every
* scheduler tick (TICK_NSEC).