Patches contributed by Eötvös Lorand University
commit 9ceb1c3d1fe15c2f9b55eaa8978019ef0e0a06ac
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Apr 28 02:57:53 2011 +0200
perf stat: Fix printout vertical alignment
Before:
|
| Performance counter stats for '/home/mingo/hackbench 20' (5 runs):
|
| 71,321,607 instructions:u # 0.42 insns per cycle ( +- 0.00% )
| 168,040,009 cycles:u # 0.000 GHz ( +- 0.81% )
|
| 1.468002368 seconds time elapsed ( +- 1.33% )
|
After:
|
| Performance counter stats for '/home/mingo/hackbench 20' (5 runs):
|
| 71,321,607 instructions:u # 0.42 insns per cycle ( +- 0.00% )
| 168,040,009 cycles:u # 0.000 GHz ( +- 0.81% )
|
| 1.468002368 seconds time elapsed ( +- 1.33% )
|
The last column (stddev noise) is properly aligned, vertically.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-7y40wib8n1eqio7hjpn0dsrm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6959fdecb203..003caa857a44 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -575,7 +575,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
if (total)
ratio = avg / total;
- fprintf(stderr, " # %4.2f insns per cycle", ratio);
+ fprintf(stderr, " # %4.2f insns per cycle ", ratio);
total = avg_stats(&runtime_stalled_cycles_stats[cpu]);
commit 32673822e440eb92eb334631eb0a199d0c532d13
Merge: fa7b69475a6c 5373db886b79
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Apr 27 10:38:30 2011 +0200
Merge branch 'tip/perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace into perf/core
Conflicts:
include/linux/perf_event.h
Merge reason: pick up the latest jump-label enhancements, they are cooked ready.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
commit 6c8a7213278324f381cbcbf51510711ed745d8e6
Merge: ec75a71634da d20ac252821a
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Apr 27 10:31:29 2011 +0200
Merge branch 'tip/perf/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace into perf/urgent
commit c6264deff7ea6125492b442edad885e5429679af
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Apr 27 13:50:47 2011 +0200
perf stat: Add -d/--detailed flag to run with a lot of events
Add the new -d/--detailed flag, which generates a pretty detailed event list:
Performance counter stats for './hackbench 10' (10 runs):
1514.287888 task-clock # 10.897 CPUs utilized ( +- 3.05% )
39,698 context-switches # 0.026 M/sec ( +- 12.19% )
8,147 CPU-migrations # 0.005 M/sec ( +- 16.55% )
17,918 page-faults # 0.012 M/sec ( +- 0.37% )
2,944,504,050 cycles # 1.944 GHz ( +- 3.89% ) (32.60%)
1,043,971,283 stalled-cycles # 35.45% of all cycles are idle ( +- 5.22% ) (44.48%)
1,655,906,768 instructions # 0.56 insns per cycle
# 0.63 stalled cycles per insn ( +- 1.95% ) (55.09%)
338,832,373 branches # 223.757 M/sec ( +- 1.96% ) (64.47%)
3,892,416 branch-misses # 1.15% of all branches ( +- 5.49% ) (73.12%)
606,410,482 L1-dcache-loads # 400.459 M/sec ( +- 1.29% ) (71.21%)
31,204,395 L1-dcache-load-misses # 5.15% of all L1-dcache hits ( +- 3.04% ) (60.43%)
3,922,751 LLC-loads # 2.590 M/sec ( +- 6.80% ) (46.87%)
5,037,288 LLC-load-misses # 3.327 M/sec ( +- 3.56% ) (13.00%)
0.138966828 seconds time elapsed ( +- 4.11% )
This can be used "at a glance" for narrower analysis.
-d can also be used in addition to other -e events, to further expand an event list.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-cxs98quixs3qyvdqx3goojc4@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 03bac6aa014b..6959fdecb203 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -73,6 +73,47 @@ static struct perf_event_attr default_attrs[] = {
};
+/*
+ * Detailed stats:
+ */
+static struct perf_event_attr detailed_attrs[] = {
+
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
+
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_L1D << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_L1D << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_LL << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_LL << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
+};
+
struct perf_evlist *evsel_list;
static bool system_wide = false;
@@ -86,6 +127,7 @@ static pid_t target_pid = -1;
static pid_t target_tid = -1;
static pid_t child_pid = -1;
static bool null_run = false;
+static bool detailed_run = false;
static bool big_num = true;
static int big_num_opt = -1;
static const char *cpu_list;
@@ -550,7 +592,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
- runtime_branches_stats[cpu].n != 0) {
+ runtime_l1_dcache_stats[cpu].n != 0) {
print_l1_dcache_misses(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
runtime_cacherefs_stats[cpu].n != 0) {
@@ -625,8 +667,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
avg_enabled = avg_stats(&ps->res_stats[1]);
avg_running = avg_stats(&ps->res_stats[2]);
- fprintf(stderr, " (scaled from %.2f%%)",
- 100 * avg_running / avg_enabled);
+ fprintf(stderr, " (%.2f%%)", 100 * avg_running / avg_enabled);
}
fprintf(stderr, "\n");
}
@@ -668,10 +709,8 @@ static void print_counter(struct perf_evsel *counter)
if (!csv_output) {
print_noise(counter, 1.0);
- if (run != ena) {
- fprintf(stderr, " (scaled from %.2f%%)",
- 100.0 * run / ena);
- }
+ if (run != ena)
+ fprintf(stderr, " (%.2f%%)", 100.0 * run / ena);
}
fputc('\n', stderr);
}
@@ -778,6 +817,8 @@ static const struct option options[] = {
"repeat command and print average + stddev (max: 100)"),
OPT_BOOLEAN('n', "null", &null_run,
"null run - dont start any counters"),
+ OPT_BOOLEAN('d', "detailed", &detailed_run,
+ "detailed run - start a lot of events"),
OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
"print large numbers with thousands\' separators",
stat__set_big_num),
@@ -839,7 +880,18 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
}
/* Set attrs and nr_counters if no event is selected and !null_run */
- if (!null_run && !evsel_list->nr_entries) {
+ if (detailed_run) {
+ size_t c;
+
+ for (c = 0; c < ARRAY_SIZE(detailed_attrs); ++c) {
+ pos = perf_evsel__new(&detailed_attrs[c], c);
+ if (pos == NULL)
+ goto out;
+ perf_evlist__add(evsel_list, pos);
+ }
+ }
+ /* Set attrs and nr_counters if no event is selected and !null_run */
+ if (!detailed_run && !null_run && !evsel_list->nr_entries) {
size_t c;
for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
commit 8bb6c79f24e66538f606076915e918242c02ec7c
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Apr 27 13:25:24 2011 +0200
perf stat: Print out miss/hit ratio for L1 data-cache events
Print out this kind of l1-dcache-misses percentage:
Performance counter stats for './bw_tcp localhost':
29,956,262,201 cycles # 3.002 GHz (scaled from 85.14%)
8,255,209,558 stalled-cycles # 27.56% of all cycles are idle (scaled from 86.56%)
1,206,130,308 l1-dcache-misses # 40.49% of all L1-dcache hits (scaled from 86.30%)
2,978,756,779 l1-dcache-refs # 298.512 M/sec (scaled from 70.02%)
8,861,956,159 instructions # 0.30 insns per cycle
# 0.93 stalled cycles per insn (scaled from 84.27%)
1,644,306,068 branches # 164.782 M/sec (scaled from 86.43%)
74,778,443 branch-misses # 4.55% of all branches (scaled from 70.69%)
9978.695711 task-clock # 0.693 CPUs utilized
14.404347983 seconds time elapsed
And color the result depending on the severity of cache-trashing.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-54gmz0zymaid84zcs7joq02p@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5d4e1b9b2d89..03bac6aa014b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -159,6 +159,7 @@ struct stats runtime_cycles_stats[MAX_NR_CPUS];
struct stats runtime_stalled_cycles_stats[MAX_NR_CPUS];
struct stats runtime_branches_stats[MAX_NR_CPUS];
struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
+struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
struct stats walltime_nsecs_stats;
static int create_perf_stat_counter(struct perf_evsel *evsel)
@@ -211,6 +212,8 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
update_stats(&runtime_branches_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
update_stats(&runtime_cacherefs_stats[0], count[0]);
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
+ update_stats(&runtime_l1_dcache_stats[0], count[0]);
}
/*
@@ -473,6 +476,29 @@ static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double
fprintf(stderr, " of all branches ");
}
+static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+ double total, ratio = 0.0;
+ const char *color;
+
+ total = avg_stats(&runtime_l1_dcache_stats[cpu]);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = PERF_COLOR_NORMAL;
+ if (ratio > 20.0)
+ color = PERF_COLOR_RED;
+ else if (ratio > 10.0)
+ color = PERF_COLOR_MAGENTA;
+ else if (ratio > 5.0)
+ color = PERF_COLOR_YELLOW;
+
+ fprintf(stderr, " # ");
+ color_fprintf(stderr, color, "%5.2f%%", ratio);
+ fprintf(stderr, " of all L1-dcache hits ");
+}
+
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
{
double total, ratio = 0.0;
@@ -519,6 +545,13 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
runtime_branches_stats[cpu].n != 0) {
print_branch_misses(cpu, evsel, avg);
+ } else if (
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+ runtime_branches_stats[cpu].n != 0) {
+ print_l1_dcache_misses(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
runtime_cacherefs_stats[cpu].n != 0) {
total = avg_stats(&runtime_cacherefs_stats[cpu]);
commit c78df6c1d49b5d798f1579141e3a12be7c325d1e
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Apr 27 12:16:10 2011 +0200
perf stat: Print branch misses warning colors
Print the missed-branches percentage with different warning level ASCII colors,
as the percentage passes the 5%/10%/20% thresholds.
These thresholds are set to relatively low levels, because on most CPUs even a
moderate percentage of branch-misses already shows up as a slowdown.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-ybqukg7p86leiup7gl03ecgk@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e7d91f9cf789..5d4e1b9b2d89 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -450,6 +450,29 @@ static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, doubl
fprintf(stderr, " of all cycles are idle ");
}
+static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+ double total, ratio = 0.0;
+ const char *color;
+
+ total = avg_stats(&runtime_branches_stats[cpu]);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = PERF_COLOR_NORMAL;
+ if (ratio > 20.0)
+ color = PERF_COLOR_RED;
+ else if (ratio > 10.0)
+ color = PERF_COLOR_MAGENTA;
+ else if (ratio > 5.0)
+ color = PERF_COLOR_YELLOW;
+
+ fprintf(stderr, " # ");
+ color_fprintf(stderr, color, "%5.2f%%", ratio);
+ fprintf(stderr, " of all branches ");
+}
+
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
{
double total, ratio = 0.0;
@@ -495,13 +518,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
runtime_branches_stats[cpu].n != 0) {
- total = avg_stats(&runtime_branches_stats[cpu]);
-
- if (total)
- ratio = avg * 100 / total;
-
- fprintf(stderr, " # %5.2f %% of all branches ", ratio);
-
+ print_branch_misses(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
runtime_cacherefs_stats[cpu].n != 0) {
total = avg_stats(&runtime_cacherefs_stats[cpu]);
commit a5d243d04a150acbfa79d641154f49e5d920f64f
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Apr 27 05:39:24 2011 +0200
perf stat: Print stalled cycles warning colors
Print the stalled-cycles percentage with different warning level ASCII colors,
as the percentage passes the 25%/50%/75% thresholds.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-e25zz44rcms7mu9az4fu5zp0@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 845ded8f15a2..e7d91f9cf789 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -46,6 +46,7 @@
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/debug.h"
+#include "util/color.h"
#include "util/header.h"
#include "util/cpumap.h"
#include "util/thread.h"
@@ -426,6 +427,29 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats));
}
+static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+ double total, ratio = 0.0;
+ const char *color;
+
+ total = avg_stats(&runtime_cycles_stats[cpu]);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = PERF_COLOR_NORMAL;
+ if (ratio > 75.0)
+ color = PERF_COLOR_RED;
+ else if (ratio > 50.0)
+ color = PERF_COLOR_MAGENTA;
+ else if (ratio > 25.0)
+ color = PERF_COLOR_YELLOW;
+
+ fprintf(stderr, " # ");
+ color_fprintf(stderr, color, "%5.2f%%", ratio);
+ fprintf(stderr, " of all cycles are idle ");
+}
+
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
{
double total, ratio = 0.0;
@@ -488,12 +512,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
fprintf(stderr, " # %8.3f %% of all cache refs ", ratio);
} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES)) {
- total = avg_stats(&runtime_cycles_stats[cpu]);
-
- if (total)
- ratio = avg / total * 100.0;
-
- fprintf(stderr, " # %5.2f%% of all cycles are idle ", ratio);
+ print_stalled_cycles(cpu, evsel, avg);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
total = avg_stats(&runtime_nsecs_stats[cpu]);
@@ -508,6 +527,8 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
ratio = 1000.0 * avg / total;
fprintf(stderr, " # %8.3f M/sec ", ratio);
+ } else {
+ fprintf(stderr, " ");
}
}
commit f99844cb76b7d347711c22cdcb94266b7214141f
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Apr 27 05:35:39 2011 +0200
perf stat: Fix -nan% output in perf stat noise printouts
Before:
0 CPU-migrations # 0.000 M/sec ( +- -nan% )
After:
0 CPU-migrations # 0.000 M/sec ( +- 0.00% )
Also factor out the noise printing function.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-z89h2v1bk1mikcbsf7e6v34q@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 924d18c407b8..845ded8f15a2 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -382,6 +382,16 @@ static int run_perf_stat(int argc __used, const char **argv)
return WEXITSTATUS(status);
}
+static void print_noise_pct(double total, double avg)
+{
+ double pct = 0.0;
+
+ if (avg)
+ pct = 100.0*total/avg;
+
+ fprintf(stderr, " ( +-%6.2f%% )", pct);
+}
+
static void print_noise(struct perf_evsel *evsel, double avg)
{
struct perf_stat *ps;
@@ -390,8 +400,7 @@ static void print_noise(struct perf_evsel *evsel, double avg)
return;
ps = evsel->priv;
- fprintf(stderr, " ( +- %7.3f%% )",
- 100 * stddev_stats(&ps->res_stats[0]) / avg);
+ print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
}
static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
@@ -635,9 +644,8 @@ static void print_stat(int argc, const char **argv)
fprintf(stderr, " %18.9f seconds time elapsed",
avg_stats(&walltime_nsecs_stats)/1e9);
if (run_count > 1) {
- fprintf(stderr, " ( +-%5.2f%% )",
- 100*stddev_stats(&walltime_nsecs_stats) /
- avg_stats(&walltime_nsecs_stats));
+ print_noise_pct(stddev_stats(&walltime_nsecs_stats),
+ avg_stats(&walltime_nsecs_stats));
}
fprintf(stderr, "\n\n");
}
commit 1fc570ad89e55dc32dfa4dda1311948b38f26524
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Apr 27 05:20:22 2011 +0200
perf stat: Add stalled cycles to the default output
The new default output looks like this:
Performance counter stats for './loop_1b_instructions':
236.010686 task-clock # 0.996 CPUs utilized
0 context-switches # 0.000 M/sec
0 CPU-migrations # 0.000 M/sec
99 page-faults # 0.000 M/sec
756,487,646 cycles # 3.205 GHz
354,938,996 stalled-cycles # 46.92% of all cycles are idle
1,001,403,797 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn
100,279,773 branches # 424.895 M/sec
12,646 branch-misses # 0.013 % of all branches
0.236902540 seconds time elapsed
We dropped cache-refs and cache-misses and added stalled-cycles - this is a
more generic "how well utilized is the CPU" metric.
If the stalled-cycles ratio is too high then more specific measurements can be
taken to figure out the source of the inefficiency.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-pbpl2l4mn797s69bclfpwkwn@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e881c2061381..924d18c407b8 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,11 +65,10 @@ static struct perf_event_attr default_attrs[] = {
{ .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
{ .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES },
- { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES },
};
@@ -468,7 +467,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
if (total)
ratio = avg * 100 / total;
- fprintf(stderr, " # %8.3f %% of all branches", ratio);
+ fprintf(stderr, " # %5.2f %% of all branches ", ratio);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
runtime_cacherefs_stats[cpu].n != 0) {
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index b5bfef12f399..bbbb735268ef 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -32,13 +32,13 @@ char debugfs_path[MAXPATHLEN];
static struct event_symbol event_symbols[] = {
{ CHW(CPU_CYCLES), "cpu-cycles", "cycles" },
+ { CHW(STALLED_CYCLES), "stalled-cycles", "idle-cycles" },
{ CHW(INSTRUCTIONS), "instructions", "" },
{ CHW(CACHE_REFERENCES), "cache-references", "" },
{ CHW(CACHE_MISSES), "cache-misses", "" },
{ CHW(BRANCH_INSTRUCTIONS), "branch-instructions", "branches" },
{ CHW(BRANCH_MISSES), "branch-misses", "" },
{ CHW(BUS_CYCLES), "bus-cycles", "" },
- { CHW(STALLED_CYCLES), "stalled-cycles", "" },
{ CSW(CPU_CLOCK), "cpu-clock", "" },
{ CSW(TASK_CLOCK), "task-clock", "" },
@@ -54,9 +54,9 @@ static struct event_symbol event_symbols[] = {
#define __PERF_EVENT_FIELD(config, name) \
((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT)
-#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW)
+#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW)
#define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG)
-#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE)
+#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE)
#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT)
static const char *hw_event_names[] = {
@@ -67,6 +67,7 @@ static const char *hw_event_names[] = {
"branches",
"branch-misses",
"bus-cycles",
+ "stalled-cycles",
};
static const char *sw_event_names[] = {
@@ -308,7 +309,7 @@ const char *__event_name(int type, u64 config)
switch (type) {
case PERF_TYPE_HARDWARE:
- if (config < PERF_COUNT_HW_MAX)
+ if (config < PERF_COUNT_HW_MAX && hw_event_names[config])
return hw_event_names[config];
return "unknown-hardware";
@@ -334,7 +335,7 @@ const char *__event_name(int type, u64 config)
}
case PERF_TYPE_SOFTWARE:
- if (config < PERF_COUNT_SW_MAX)
+ if (config < PERF_COUNT_SW_MAX && sw_event_names[config])
return sw_event_names[config];
return "unknown-software";
commit 481f988a016f7a0327a5537bde4794349fc4625c
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Apr 27 04:34:16 2011 +0200
perf stat: Add stalled cycles accounting, prettify the resulting output
Add stalled cycles accounting and use it to print the "cycles stalled per
instruction" value.
Also change the unit of the cycles output from M/sec to GHz - this is more
intuitive.
Prettify the output to:
Performance counter stats for './loop_1b_instructions':
239.775036 task-clock # 0.997 CPUs utilized
761,903,912 cycles # 3.178 GHz
356,620,620 stalled-cycles # 46.81% of all cycles are idle
1,001,578,351 instructions # 1.31 insns per cycle
# 0.36 stalled cycles per insn
14,782 cache-references # 0.062 M/sec
5,694 cache-misses # 38.520 % of all cache refs
0.240493656 seconds time elapsed
Also adjust the --repeat output to make the percentages align vertically:
Performance counter stats for './loop_1b_instructions' (10 runs):
236.096793 task-clock # 0.997 CPUs utilized ( +- 0.011% )
756,553,086 cycles # 3.204 GHz ( +- 0.002% )
354,942,692 stalled-cycles # 46.92% of all cycles are idle ( +- 0.008% )
1,001,389,700 instructions # 1.32 insns per cycle
# 0.35 stalled cycles per insn ( +- 0.000% )
10,166 cache-references # 0.043 M/sec ( +- 0.742% )
468 cache-misses # 4.608 % of all cache refs ( +- 13.385% )
0.236874136 seconds time elapsed ( +- 0.01% )
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Link: http://lkml.kernel.org/n/tip-uapziqny39601apdmmhoz7hk@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e5e82f62c784..e881c2061381 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -156,6 +156,7 @@ static double stddev_stats(struct stats *stats)
struct stats runtime_nsecs_stats[MAX_NR_CPUS];
struct stats runtime_cycles_stats[MAX_NR_CPUS];
+struct stats runtime_stalled_cycles_stats[MAX_NR_CPUS];
struct stats runtime_branches_stats[MAX_NR_CPUS];
struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
struct stats walltime_nsecs_stats;
@@ -204,6 +205,8 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
update_stats(&runtime_nsecs_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
update_stats(&runtime_cycles_stats[0], count[0]);
+ else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES))
+ update_stats(&runtime_stalled_cycles_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
update_stats(&runtime_branches_stats[0], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
@@ -412,8 +415,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
return;
if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
- fprintf(stderr, " # %10.3f CPUs",
- avg / avg_stats(&walltime_nsecs_stats));
+ fprintf(stderr, " # %8.3f CPUs utilized ", avg / avg_stats(&walltime_nsecs_stats));
}
static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
@@ -450,7 +452,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
if (total)
ratio = avg / total;
- fprintf(stderr, " # ( %4.2f instructions per cycle )", ratio);
+ fprintf(stderr, " # %4.2f insns per cycle", ratio);
+
+ total = avg_stats(&runtime_stalled_cycles_stats[cpu]);
+
+ if (total && avg) {
+ ratio = total / avg;
+ fprintf(stderr, "\n # %4.2f stalled cycles per insn", ratio);
+ }
+
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
runtime_branches_stats[cpu].n != 0) {
total = avg_stats(&runtime_branches_stats[cpu]);
@@ -458,7 +468,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
if (total)
ratio = avg * 100 / total;
- fprintf(stderr, " # %10.3f %%", ratio);
+ fprintf(stderr, " # %8.3f %% of all branches", ratio);
} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
runtime_cacherefs_stats[cpu].n != 0) {
@@ -467,22 +477,29 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
if (total)
ratio = avg * 100 / total;
- fprintf(stderr, " # %10.3f %%", ratio);
+ fprintf(stderr, " # %8.3f %% of all cache refs ", ratio);
- } else if (runtime_nsecs_stats[cpu].n != 0) {
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES)) {
+ total = avg_stats(&runtime_cycles_stats[cpu]);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ fprintf(stderr, " # %5.2f%% of all cycles are idle ", ratio);
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
total = avg_stats(&runtime_nsecs_stats[cpu]);
if (total)
- ratio = 1000.0 * avg / total;
+ ratio = 1.0 * avg / total;
- fprintf(stderr, " # %10.3f M/sec", ratio);
- } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES)) {
- total = avg_stats(&runtime_cycles_stats[cpu]);
+ fprintf(stderr, " # %8.3f GHz ", ratio);
+ } else if (runtime_nsecs_stats[cpu].n != 0) {
+ total = avg_stats(&runtime_nsecs_stats[cpu]);
if (total)
- ratio = avg / total * 100.0;
+ ratio = 1000.0 * avg / total;
- fprintf(stderr, " # (%5.2f%% of all cycles )", ratio);
+ fprintf(stderr, " # %8.3f M/sec ", ratio);
}
}
@@ -619,7 +636,7 @@ static void print_stat(int argc, const char **argv)
fprintf(stderr, " %18.9f seconds time elapsed",
avg_stats(&walltime_nsecs_stats)/1e9);
if (run_count > 1) {
- fprintf(stderr, " ( +- %7.3f%% )",
+ fprintf(stderr, " ( +-%5.2f%% )",
100*stddev_stats(&walltime_nsecs_stats) /
avg_stats(&walltime_nsecs_stats));
}