Patches contributed by Eötvös Lorand University
commit 370faf1dd0461ad811852c8abbbcd3d73b1e4fc4
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Apr 29 16:11:03 2011 +0200
perf stat: Fail softly on unsupported events
David Ahern reported this perf stat failure:
> # /tmp/build-perf/perf stat -- sleep 1
> Error: stalled-cycles-frontend event is not supported.
> Fatal: Not all events could be opened.
>
> This is a Dell R410 with an E5620 processor.
Fail in a softer fashion on unknown/unsupported events.
Reported-by: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n006io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9e596ab98d05..c8b535bc27bd 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -377,7 +377,7 @@ static int run_perf_stat(int argc __used, const char **argv)
list_for_each_entry(counter, &evsel_list->entries, node) {
if (create_perf_stat_counter(counter) < 0) {
- if (errno == EINVAL || errno == ENOSYS)
+ if (errno == EINVAL || errno == ENOSYS || errno == ENOENT)
continue;
if (errno == EPERM || errno == EACCES) {
@@ -385,8 +385,6 @@ static int run_perf_stat(int argc __used, const char **argv)
"\t Consider tweaking"
" /proc/sys/kernel/perf_event_paranoid or running as root.",
system_wide ? "system-wide " : "");
- } else if (errno == ENOENT) {
- error("%s event is not supported. ", event_name(counter));
} else {
error("open_counter returned with %d (%s). "
"/bin/dmesg may provide additional information.\n",
commit fce3c786d3a49eff397583b4b62fa38df90db937
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Apr 30 09:03:15 2011 +0200
perf stat: Leave more room for percentages
Triple digit percentages do not fit otherwise.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n005io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2492a0efa4d8..9e596ab98d05 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -499,8 +499,8 @@ static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __us
else if (ratio > 10.0)
color = PERF_COLOR_YELLOW;
- fprintf(stderr, " # ");
- color_fprintf(stderr, color, "%5.2f%%", ratio);
+ fprintf(stderr, " # ");
+ color_fprintf(stderr, color, "%6.2f%%", ratio);
fprintf(stderr, " frontend cycles idle ");
}
@@ -522,9 +522,9 @@ static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __use
else if (ratio > 20.0)
color = PERF_COLOR_YELLOW;
- fprintf(stderr, " # ");
- color_fprintf(stderr, color, "%5.2f%%", ratio);
- fprintf(stderr, " backend cycles idle ");
+ fprintf(stderr, " # ");
+ color_fprintf(stderr, color, "%6.2f%%", ratio);
+ fprintf(stderr, " backend cycles idle ");
}
static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
@@ -545,8 +545,8 @@ static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double
else if (ratio > 5.0)
color = PERF_COLOR_YELLOW;
- fprintf(stderr, " # ");
- color_fprintf(stderr, color, "%5.2f%%", ratio);
+ fprintf(stderr, " # ");
+ color_fprintf(stderr, color, "%6.2f%%", ratio);
fprintf(stderr, " of all branches ");
}
@@ -568,8 +568,8 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou
else if (ratio > 5.0)
color = PERF_COLOR_YELLOW;
- fprintf(stderr, " # ");
- color_fprintf(stderr, color, "%5.2f%%", ratio);
+ fprintf(stderr, " # ");
+ color_fprintf(stderr, color, "%6.2f%%", ratio);
fprintf(stderr, " of all L1-dcache hits ");
}
@@ -607,14 +607,14 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
if (total)
ratio = avg / total;
- fprintf(stderr, " # %4.2f insns per cycle ", ratio);
+ fprintf(stderr, " # %5.2f insns per cycle ", ratio);
total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
if (total && avg) {
ratio = total / avg;
- fprintf(stderr, "\n # %4.2f stalled cycles per insn", ratio);
+ fprintf(stderr, "\n # %5.2f stalled cycles per insn", ratio);
}
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
commit 2b427e14b77dbf3e05f1bd0785f1d07ea5fe924e
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Apr 29 14:16:18 2011 +0200
perf stat: Adjust stall cycles warning percentages
Adjust to color thresholds to better match the percentages seen in
real workloads. Both are now a bit more sensitive.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n004io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e45449938b80..2492a0efa4d8 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -492,11 +492,11 @@ static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __us
ratio = avg / total * 100.0;
color = PERF_COLOR_NORMAL;
- if (ratio > 75.0)
+ if (ratio > 50.0)
color = PERF_COLOR_RED;
- else if (ratio > 50.0)
+ else if (ratio > 30.0)
color = PERF_COLOR_MAGENTA;
- else if (ratio > 20.0)
+ else if (ratio > 10.0)
color = PERF_COLOR_YELLOW;
fprintf(stderr, " # ");
@@ -519,7 +519,7 @@ static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __use
color = PERF_COLOR_RED;
else if (ratio > 50.0)
color = PERF_COLOR_MAGENTA;
- else if (ratio > 25.0)
+ else if (ratio > 20.0)
color = PERF_COLOR_YELLOW;
fprintf(stderr, " # ");
commit d3d1e86da07b4565815e3dbcd082f53017d215f8
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Apr 29 13:49:08 2011 +0200
perf stat: Analyze front-end and back-end stall counts
Sample output:
Performance counter stats for './loop_1b':
873.691065 task-clock # 1.000 CPUs utilized
1 context-switches # 0.000 M/sec
1 CPU-migrations # 0.000 M/sec
96 page-faults # 0.000 M/sec
2,012,637,222 cycles # 2.304 GHz (66.58%)
1,001,397,911 stalled-cycles-frontend # 49.76% frontend cycles idle (66.58%)
7,523,398 stalled-cycles-backend # 0.37% backend cycles idle (66.76%)
2,004,551,046 instructions # 1.00 insns per cycle
# 0.50 stalled cycles per insn (66.80%)
1,001,304,992 branches # 1146.063 M/sec (66.76%)
39,453 branch-misses # 0.00% of all branches (66.64%)
0.874046121 seconds time elapsed
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n003io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6a4a8a399d95..e45449938b80 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -201,7 +201,8 @@ static double stddev_stats(struct stats *stats)
struct stats runtime_nsecs_stats[MAX_NR_CPUS];
struct stats runtime_cycles_stats[MAX_NR_CPUS];
-struct stats runtime_stalled_cycles_stats[MAX_NR_CPUS];
+struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
+struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
struct stats runtime_branches_stats[MAX_NR_CPUS];
struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
@@ -251,8 +252,10 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
update_stats(&runtime_nsecs_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
update_stats(&runtime_cycles_stats[0], count[0]);
+ else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+ update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
- update_stats(&runtime_stalled_cycles_stats[0], count[0]);
+ update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
update_stats(&runtime_branches_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
@@ -478,7 +481,30 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats));
}
-static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, double avg)
+static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+ double total, ratio = 0.0;
+ const char *color;
+
+ total = avg_stats(&runtime_cycles_stats[cpu]);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = PERF_COLOR_NORMAL;
+ if (ratio > 75.0)
+ color = PERF_COLOR_RED;
+ else if (ratio > 50.0)
+ color = PERF_COLOR_MAGENTA;
+ else if (ratio > 20.0)
+ color = PERF_COLOR_YELLOW;
+
+ fprintf(stderr, " # ");
+ color_fprintf(stderr, color, "%5.2f%%", ratio);
+ fprintf(stderr, " frontend cycles idle ");
+}
+
+static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg)
{
double total, ratio = 0.0;
const char *color;
@@ -498,7 +524,7 @@ static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, doubl
fprintf(stderr, " # ");
color_fprintf(stderr, color, "%5.2f%%", ratio);
- fprintf(stderr, " of all cycles are idle ");
+ fprintf(stderr, " backend cycles idle ");
}
static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
@@ -583,7 +609,8 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
fprintf(stderr, " # %4.2f insns per cycle ", ratio);
- total = avg_stats(&runtime_stalled_cycles_stats[cpu]);
+ total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
+ total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
if (total && avg) {
ratio = total / avg;
@@ -609,8 +636,10 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
fprintf(stderr, " # %8.3f %% of all cache refs ", ratio);
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
+ print_stalled_cycles_frontend(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
- print_stalled_cycles(cpu, evsel, avg);
+ print_stalled_cycles_backend(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
total = avg_stats(&runtime_nsecs_stats[cpu]);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 04d2f0a96674..8a407f3e286f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -60,7 +60,7 @@ static struct event_symbol event_symbols[] = {
#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE)
#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT)
-static const char *hw_event_names[] = {
+static const char *hw_event_names[PERF_COUNT_HW_MAX] = {
"cycles",
"instructions",
"cache-references",
@@ -68,10 +68,11 @@ static const char *hw_event_names[] = {
"branches",
"branch-misses",
"bus-cycles",
- "stalled-cycles",
+ "stalled-cycles-frontend",
+ "stalled-cycles-backend",
};
-static const char *sw_event_names[] = {
+static const char *sw_event_names[PERF_COUNT_SW_MAX] = {
"cpu-clock",
"task-clock",
"page-faults",
commit 129c04cb8ce2e4bf3f17223f58ef16aa8a2cb3b8
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Apr 29 14:41:28 2011 +0200
perf tools: Add front-end and back-end stalled cycles support
Update perf tooling to deal with front-end and back-end stalled cycles events.
Add both the default 'perf stat' output.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n002io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index da77077450cf..6a4a8a399d95 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -66,7 +66,8 @@ static struct perf_event_attr default_attrs[] = {
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
@@ -84,7 +85,8 @@ static struct perf_event_attr detailed_attrs[] = {
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
@@ -249,7 +251,7 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
update_stats(&runtime_nsecs_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
update_stats(&runtime_cycles_stats[0], count[0]);
- else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES))
+ else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
update_stats(&runtime_stalled_cycles_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
update_stats(&runtime_branches_stats[0], count[0]);
@@ -607,7 +609,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
fprintf(stderr, " # %8.3f %% of all cache refs ", ratio);
- } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES)) {
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
print_stalled_cycles(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
total = avg_stats(&runtime_nsecs_stats[cpu]);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index bbbb735268ef..04d2f0a96674 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -31,24 +31,25 @@ char debugfs_path[MAXPATHLEN];
#define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
static struct event_symbol event_symbols[] = {
- { CHW(CPU_CYCLES), "cpu-cycles", "cycles" },
- { CHW(STALLED_CYCLES), "stalled-cycles", "idle-cycles" },
- { CHW(INSTRUCTIONS), "instructions", "" },
- { CHW(CACHE_REFERENCES), "cache-references", "" },
- { CHW(CACHE_MISSES), "cache-misses", "" },
- { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" },
- { CHW(BRANCH_MISSES), "branch-misses", "" },
- { CHW(BUS_CYCLES), "bus-cycles", "" },
-
- { CSW(CPU_CLOCK), "cpu-clock", "" },
- { CSW(TASK_CLOCK), "task-clock", "" },
- { CSW(PAGE_FAULTS), "page-faults", "faults" },
- { CSW(PAGE_FAULTS_MIN), "minor-faults", "" },
- { CSW(PAGE_FAULTS_MAJ), "major-faults", "" },
- { CSW(CONTEXT_SWITCHES), "context-switches", "cs" },
- { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
- { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" },
- { CSW(EMULATION_FAULTS), "emulation-faults", "" },
+ { CHW(CPU_CYCLES), "cpu-cycles", "cycles" },
+ { CHW(STALLED_CYCLES_FRONTEND), "stalled-cycles-frontend", "idle-cycles-frontend" },
+ { CHW(STALLED_CYCLES_BACKEND), "stalled-cycles-backend", "idle-cycles-backend" },
+ { CHW(INSTRUCTIONS), "instructions", "" },
+ { CHW(CACHE_REFERENCES), "cache-references", "" },
+ { CHW(CACHE_MISSES), "cache-misses", "" },
+ { CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" },
+ { CHW(BRANCH_MISSES), "branch-misses", "" },
+ { CHW(BUS_CYCLES), "bus-cycles", "" },
+
+ { CSW(CPU_CLOCK), "cpu-clock", "" },
+ { CSW(TASK_CLOCK), "task-clock", "" },
+ { CSW(PAGE_FAULTS), "page-faults", "faults" },
+ { CSW(PAGE_FAULTS_MIN), "minor-faults", "" },
+ { CSW(PAGE_FAULTS_MAJ), "major-faults", "" },
+ { CSW(CONTEXT_SWITCHES), "context-switches", "cs" },
+ { CSW(CPU_MIGRATIONS), "cpu-migrations", "migrations" },
+ { CSW(ALIGNMENT_FAULTS), "alignment-faults", "" },
+ { CSW(EMULATION_FAULTS), "emulation-faults", "" },
};
#define __PERF_EVENT_FIELD(config, name) \
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 406f613ee619..8b0eff8b8283 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -798,7 +798,6 @@ static struct {
{ "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
{ "COUNT_HW_BRANCH_MISSES", PERF_COUNT_HW_BRANCH_MISSES },
{ "COUNT_HW_BUS_CYCLES", PERF_COUNT_HW_BUS_CYCLES },
- { "COUNT_HW_STALLED_CYCLES", PERF_COUNT_HW_STALLED_CYCLES },
{ "COUNT_HW_CACHE_L1D", PERF_COUNT_HW_CACHE_L1D },
{ "COUNT_HW_CACHE_L1I", PERF_COUNT_HW_CACHE_L1I },
{ "COUNT_HW_CACHE_LL", PERF_COUNT_HW_CACHE_LL },
@@ -811,6 +810,9 @@ static struct {
{ "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS },
{ "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS },
+ { "COUNT_HW_STALLED_CYCLES_FRONTEND", PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
+ { "COUNT_HW_STALLED_CYCLES_BACKEND", PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
+
{ "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK },
{ "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK },
{ "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS },
commit 91fc4cc00099986bc1ba50e1f421c3548cffae42
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Apr 29 14:17:19 2011 +0200
perf, x86: Add new stalled cycles events for Intel and AMD CPUs
Extend the Intel and AMD event definitions with generic front-end and
back-end stall events.
( These are only approximations - suggestions are welcome for better events. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n001io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index cf4e369cea67..fe29c1d2219e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -96,12 +96,14 @@ static __initconst const u64 amd_hw_cache_event_ids
*/
static const u64 amd_perfmon_event_map[] =
{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */
};
static u64 amd_pmu_event_map(int hw_event)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 393085b87a2c..7983b9a9533b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1413,7 +1413,9 @@ static __init int intel_pmu_init(void)
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.extra_regs = intel_nehalem_extra_regs;
- /* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+ /* UOPS_ISSUED.STALLED_CYCLES */
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+ /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
if (ebx & 0x40) {
commit 8f62242246351b5a4bc0c1f00c0c7003edea128a
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Apr 29 13:19:47 2011 +0200
perf events: Add generic front-end and back-end stalled cycle event definitions
Add two generic hardware events: front-end and back-end stalled cycles.
These events measure conditions when the CPU is executing code but its
capabilities are not fully utilized. Understanding such situations and
analyzing them is an important sub-task of code optimization workflows.
Both events limit performance: most front end stalls tend to be caused
by branch misprediction or instruction fetch cachemisses, backend
stalls can be caused by various resource shortages or inefficient
instruction scheduling.
Front-end stalls are the more important ones: code cannot run fast
if the instruction stream is not being kept up.
An over-utilized back-end can cause front-end stalls and thus
has to be kept an eye on as well.
The exact composition is very program logic and instruction mix
dependent.
We use the terms 'stall', 'front-end' and 'back-end' loosely and
try to use the best available events from specific CPUs that
approximate these concepts.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n000io7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 1ea94224f62e..393085b87a2c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1414,7 +1414,7 @@ static __init int intel_pmu_init(void)
x86_pmu.extra_regs = intel_nehalem_extra_regs;
/* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0x1803fb1;
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
if (ebx & 0x40) {
/*
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ac636dd20a0c..4e2d7ae71499 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -52,7 +52,8 @@ enum perf_hw_id {
PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
PERF_COUNT_HW_BRANCH_MISSES = 5,
PERF_COUNT_HW_BUS_CYCLES = 6,
- PERF_COUNT_HW_STALLED_CYCLES = 7,
+ PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
+ PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
PERF_COUNT_HW_MAX, /* non-ABI */
};
commit ede70290046043b2638204cab55e26ea1d0c6cd9
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Apr 28 08:48:42 2011 +0200
perf stat: Fix compatibility behavior
Instead of failing on an unknown event, when new perf stat is run on
older kernels:
$ ./perf stat true
Error: open_counter returned with 22 (Invalid argument). /bin/dmesg
may provide additional information.
Fatal: Not all events could be opened.
Just ignore EINVAL and ENOSYS, we'll print the results as not counted:
Performance counter stats for 'true':
0.239483 task-clock # 0.493 CPUs utilized
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
86 page-faults # 0.359 M/sec
704,766 cycles # 2.943 GHz
<not counted> stalled-cycles
381,961 instructions # 0.54 insns per cycle
69,626 branches # 290.735 M/sec
4,594 branch-misses # 6.60% of all branches
0.000485883 seconds time elapsed
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n1eqio5hjpn3dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5658a770dbd7..da77077450cf 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -372,7 +372,10 @@ static int run_perf_stat(int argc __used, const char **argv)
list_for_each_entry(counter, &evsel_list->entries, node) {
if (create_perf_stat_counter(counter) < 0) {
- if (errno == -EPERM || errno == -EACCES) {
+ if (errno == EINVAL || errno == ENOSYS)
+ continue;
+
+ if (errno == EPERM || errno == EACCES) {
error("You may not have permission to collect %sstats.\n"
"\t Consider tweaking"
" /proc/sys/kernel/perf_event_paranoid or running as root.",
commit f9cef0a90c4e7637f1ec98474a1a099aec45eb65
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Apr 28 18:17:11 2011 +0200
perf stat: Add --sync/-S option
--sync will tell perf stat to run sync() before starting a command.
This allows IO-heavy tests to be used with --repeat, without one
iteration impacting the other.
Elapsed time will stabilize for example:
before: 3.971525714 seconds time elapsed ( +- 8.56% )
after: 3.211098537 seconds time elapsed ( +- 1.52% )
So measurements will be more accurate.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n1eqio7hjpn1dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 003caa857a44..5658a770dbd7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -128,6 +128,7 @@ static pid_t target_tid = -1;
static pid_t child_pid = -1;
static bool null_run = false;
static bool detailed_run = false;
+static bool sync_run = false;
static bool big_num = true;
static int big_num_opt = -1;
static const char *cpu_list;
@@ -819,6 +820,8 @@ static const struct option options[] = {
"null run - dont start any counters"),
OPT_BOOLEAN('d', "detailed", &detailed_run,
"detailed run - start a lot of events"),
+ OPT_BOOLEAN('S', "sync", &sync_run,
+ "call sync() before starting a run"),
OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
"print large numbers with thousands\' separators",
stat__set_big_num),
@@ -944,6 +947,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
for (run_idx = 0; run_idx < run_count; run_idx++) {
if (run_count != 1 && verbose)
fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
+
+ if (sync_run)
+ sync();
+
status = run_perf_stat(argc, argv);
}
commit 8a850cadca0e387c87a0911a61e99fd66aeb57ec
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Apr 28 11:16:44 2011 +0200
perf event, x86: Use better stalled cycles metric
Use the UOPS_EXECUTED.*,c=1,i=1 event on Intel CPUs - it is a rather
good indicator of CPU execution stalls, more sensitive and more inclusive
than the 0xa2 resource stalls event (which does not count nearly as many
stall types).
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n1eqio7hjpn2dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 067a48b13a76..1ea94224f62e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1413,8 +1413,8 @@ static __init int intel_pmu_init(void)
x86_pmu.enable_all = intel_pmu_nhm_enable_all;
x86_pmu.extra_regs = intel_nehalem_extra_regs;
- /* Install the stalled-cycles event: 0xff: All reasons, 0xa2: Resource stalls */
- intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0xffa2;
+ /* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+ intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0x1803fb1;
if (ebx & 0x40) {
/*