Patches contributed by Eötvös Lorand University


commit c3305257cd4df63e03e21e331a0140ae9c0faccc
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu May 19 14:01:42 2011 +0200

    perf stat: Add more cache-miss percentage printouts
    
    Print out the cache-miss percentage as well if the cache refs were
    collected, for all the generic cache event types.
    
    Before:
    
       11,103,723,230 dTLB-loads                #  622.471 M/sec                    ( +-  0.30% )
           87,065,337 dTLB-load-misses          #    4.881 M/sec                    ( +-  0.90% )
    
    After:
    
       11,353,713,242 dTLB-loads                #  626.020 M/sec                    ( +-  0.35% )
          113,393,472 dTLB-load-misses          #    1.00% of all dTLB cache hits   ( +-  0.49% )
    
    Also ASCII color highlight too high percentages, them when it's executed on the console.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Steven Rostedt <rostedt@goodmis.org>
    Link: http://lkml.kernel.org/n/tip-lkhwxsevdbd9a8nymx0vxc3y@git.kernel.org
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a89fc0835367..a9f06715e44d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -261,6 +261,10 @@ struct stats			runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
 struct stats			runtime_branches_stats[MAX_NR_CPUS];
 struct stats			runtime_cacherefs_stats[MAX_NR_CPUS];
 struct stats			runtime_l1_dcache_stats[MAX_NR_CPUS];
+struct stats			runtime_l1_icache_stats[MAX_NR_CPUS];
+struct stats			runtime_ll_cache_stats[MAX_NR_CPUS];
+struct stats			runtime_itlb_cache_stats[MAX_NR_CPUS];
+struct stats			runtime_dtlb_cache_stats[MAX_NR_CPUS];
 struct stats			walltime_nsecs_stats;
 
 static int create_perf_stat_counter(struct perf_evsel *evsel)
@@ -317,6 +321,14 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
 		update_stats(&runtime_cacherefs_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
 		update_stats(&runtime_l1_dcache_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
+		update_stats(&runtime_l1_icache_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
+		update_stats(&runtime_ll_cache_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
+		update_stats(&runtime_dtlb_cache_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
+		update_stats(&runtime_itlb_cache_stats[0], count[0]);
 }
 
 /*
@@ -630,6 +642,98 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou
 	fprintf(stderr, " of all L1-dcache hits  ");
 }
 
+static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = avg_stats(&runtime_l1_icache_stats[cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 20.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 10.0)
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > 5.0)
+		color = PERF_COLOR_YELLOW;
+
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
+	fprintf(stderr, " of all L1-icache hits  ");
+}
+
+static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 20.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 10.0)
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > 5.0)
+		color = PERF_COLOR_YELLOW;
+
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
+	fprintf(stderr, " of all dTLB cache hits ");
+}
+
+static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = avg_stats(&runtime_itlb_cache_stats[cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 20.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 10.0)
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > 5.0)
+		color = PERF_COLOR_YELLOW;
+
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
+	fprintf(stderr, " of all iTLB cache hits ");
+}
+
+static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = avg_stats(&runtime_ll_cache_stats[cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 20.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 10.0)
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > 5.0)
+		color = PERF_COLOR_YELLOW;
+
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
+	fprintf(stderr, " of all LL-cache hits   ");
+}
+
 static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 {
 	double total, ratio = 0.0;
@@ -684,6 +788,34 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 			runtime_l1_dcache_stats[cpu].n != 0) {
 		print_l1_dcache_misses(cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_l1_icache_stats[cpu].n != 0) {
+		print_l1_icache_misses(cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_dtlb_cache_stats[cpu].n != 0) {
+		print_dtlb_cache_misses(cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_itlb_cache_stats[cpu].n != 0) {
+		print_itlb_cache_misses(cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_ll_cache_stats[cpu].n != 0) {
+		print_ll_cache_misses(cpu, evsel, avg);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
 			runtime_cacherefs_stats[cpu].n != 0) {
 		total = avg_stats(&runtime_cacherefs_stats[cpu]);
@@ -842,10 +974,12 @@ static void print_stat(int argc, const char **argv)
 	}
 
 	if (!csv_output) {
-		fprintf(stderr, "\n");
-		fprintf(stderr, " %18.9f  seconds time elapsed",
+		if (!null_run)
+			fprintf(stderr, "\n");
+		fprintf(stderr, " %17.9f seconds time elapsed",
 				avg_stats(&walltime_nsecs_stats)/1e9);
 		if (run_count > 1) {
+			fprintf(stderr, "                                        ");
 			print_noise_pct(stddev_stats(&walltime_nsecs_stats),
 					avg_stats(&walltime_nsecs_stats));
 		}

commit 2cba3ffb9a9db3874304a1739002d053d53c738b
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu May 19 13:30:56 2011 +0200

    perf stat: Add -d -d and -d -d -d options to show more CPU events
    
    Print even more detailed statistics if requested via perf stat -d:
    
           -d:          detailed events, L1 and LLC data cache
        -d -d:     more detailed events, dTLB and iTLB events
     -d -d -d:     very detailed events, adding prefetch events
    
    Full output looks like this now:
    
     Performance counter stats for '/home/mingo/hackbench 10' (5 runs):
    
           1703.674707 task-clock                #    8.709 CPUs utilized            ( +-  4.19% )
                49,068 context-switches          #    0.029 M/sec                    ( +- 16.66% )
                 8,303 CPU-migrations            #    0.005 M/sec                    ( +- 24.90% )
                17,397 page-faults               #    0.010 M/sec                    ( +-  0.46% )
         2,345,389,239 cycles                    #    1.377 GHz                      ( +-  4.61% ) [55.90%]
         1,884,503,527 stalled-cycles-frontend   #   80.35% frontend cycles idle     ( +-  5.67% ) [50.39%]
           743,919,737 stalled-cycles-backend    #   31.72% backend  cycles idle     ( +-  8.75% ) [49.91%]
         1,314,416,379 instructions              #    0.56  insns per cycle
                                                 #    1.43  stalled cycles per insn  ( +-  2.53% ) [60.87%]
           272,592,567 branches                  #  160.003 M/sec                    ( +-  1.74% ) [56.56%]
             3,794,846 branch-misses             #    1.39% of all branches          ( +-  6.59% ) [58.50%]
           449,982,778 L1-dcache-loads           #  264.125 M/sec                    ( +-  2.47% ) [49.88%]
            22,404,961 L1-dcache-load-misses     #    4.98% of all L1-dcache hits    ( +-  6.08% ) [55.05%]
             6,204,750 LLC-loads                 #    3.642 M/sec                    ( +-  8.91% ) [43.75%]
             1,837,411 LLC-load-misses           #    1.078 M/sec                    ( +-  7.27% ) [12.07%]
           411,440,421 L1-icache-loads           #  241.502 M/sec                    ( +-  5.60% ) [36.52%]
            27,556,832 L1-icache-load-misses     #   16.175 M/sec                    ( +-  7.46% ) [46.72%]
           464,067,627 dTLB-loads                #  272.392 M/sec                    ( +-  4.46% ) [54.17%]
            10,765,648 dTLB-load-misses          #    6.319 M/sec                    ( +-  3.18% ) [48.68%]
         1,273,080,386 iTLB-loads                #  747.256 M/sec                    ( +-  3.38% ) [47.53%]
               117,481 iTLB-load-misses          #    0.069 M/sec                    ( +- 14.99% ) [47.01%]
             4,590,653 L1-dcache-prefetches      #    2.695 M/sec                    ( +-  4.49% ) [46.19%]
             1,712,660 L1-dcache-prefetch-misses #    1.005 M/sec                    ( +-  3.75% ) [44.82%]
    
            0.195622057  seconds time elapsed  ( +-  6.84% )
    
    Also clean up the attribute construction code to be appending, and factor
    it out into add_default_attributes().
    
    Tweak the coverage percentage printout a bit, so that it's easier to view it
    alongside the +- sttddev colum.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Steven Rostedt <rostedt@goodmis.org>
    Link: http://lkml.kernel.org/n/tip-to3kgu04449s64062val8b62@git.kernel.org
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 602c3c96fa1e..a89fc0835367 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -6,24 +6,28 @@
  *
  * Sample output:
 
-   $ perf stat ~/hackbench 10
-   Time: 0.104
+   $ perf stat ./hackbench 10
 
-    Performance counter stats for '/home/mingo/hackbench':
+  Time: 0.118
 
-       1255.538611  task clock ticks     #      10.143 CPU utilization factor
-             54011  context switches     #       0.043 M/sec
-               385  CPU migrations       #       0.000 M/sec
-             17755  pagefaults           #       0.014 M/sec
-        3808323185  CPU cycles           #    3033.219 M/sec
-        1575111190  instructions         #    1254.530 M/sec
-          17367895  cache references     #      13.833 M/sec
-           7674421  cache misses         #       6.112 M/sec
+  Performance counter stats for './hackbench 10':
 
-    Wall-clock time elapsed:   123.786620 msecs
+       1708.761321 task-clock                #   11.037 CPUs utilized
+            41,190 context-switches          #    0.024 M/sec
+             6,735 CPU-migrations            #    0.004 M/sec
+            17,318 page-faults               #    0.010 M/sec
+     5,205,202,243 cycles                    #    3.046 GHz
+     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
+     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
+     2,603,501,247 instructions              #    0.50  insns per cycle
+                                             #    1.48  stalled cycles per insn
+       484,357,498 branches                  #  283.455 M/sec
+         6,388,934 branch-misses             #    1.32% of all branches
+
+        0.154822978  seconds time elapsed
 
  *
- * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
  *
  * Improvements and fixes by:
  *
@@ -75,22 +79,10 @@ static struct perf_event_attr default_attrs[] = {
 };
 
 /*
- * Detailed stats:
+ * Detailed stats (-d), covering the L1 and last level data caches:
  */
 static struct perf_event_attr detailed_attrs[] = {
 
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
-  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
-
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
-
   { .type = PERF_TYPE_HW_CACHE,
     .config =
 	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
@@ -116,6 +108,69 @@ static struct perf_event_attr detailed_attrs[] = {
 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
 };
 
+/*
+ * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
+ */
+static struct perf_event_attr very_detailed_attrs[] = {
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+
+};
+
+/*
+ * Very, very detailed stats (-d -d -d), adding prefetch events:
+ */
+static struct perf_event_attr very_very_detailed_attrs[] = {
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
+
+  { .type = PERF_TYPE_HW_CACHE,
+    .config =
+	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
+	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
+	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
+};
+
+
+
 struct perf_evlist		*evsel_list;
 
 static bool			system_wide			=  false;
@@ -129,7 +184,7 @@ static pid_t			target_pid			= -1;
 static pid_t			target_tid			= -1;
 static pid_t			child_pid			= -1;
 static bool			null_run			=  false;
-static bool			detailed_run			=  false;
+static int			detailed_run			=  0;
 static bool			sync_run			=  false;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
@@ -464,7 +519,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
 {
 	double msecs = avg / 1e6;
 	char cpustr[16] = { '\0', };
-	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s";
+	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
 
 	if (no_aggr)
 		sprintf(cpustr, "CPU%*d%s",
@@ -584,9 +639,9 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 	if (csv_output)
 		fmt = "%s%.0f%s%s";
 	else if (big_num)
-		fmt = "%s%'18.0f%s%-24s";
+		fmt = "%s%'18.0f%s%-25s";
 	else
-		fmt = "%s%18.0f%s%-24s";
+		fmt = "%s%18.0f%s%-25s";
 
 	if (no_aggr)
 		sprintf(cpustr, "CPU%*d%s",
@@ -616,7 +671,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 
 		if (total && avg) {
 			ratio = total / avg;
-			fprintf(stderr, "\n                                            #   %5.2f  stalled cycles per insn", ratio);
+			fprintf(stderr, "\n                                             #   %5.2f  stalled cycles per insn", ratio);
 		}
 
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
@@ -704,7 +759,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
 		avg_enabled = avg_stats(&ps->res_stats[1]);
 		avg_running = avg_stats(&ps->res_stats[2]);
 
-		fprintf(stderr, "  (%.2f%%)", 100 * avg_running / avg_enabled);
+		fprintf(stderr, " [%5.2f%%]", 100 * avg_running / avg_enabled);
 	}
 	fprintf(stderr, "\n");
 }
@@ -854,7 +909,7 @@ static const struct option options[] = {
 		    "repeat command and print average + stddev (max: 100)"),
 	OPT_BOOLEAN('n', "null", &null_run,
 		    "null run - dont start any counters"),
-	OPT_BOOLEAN('d', "detailed", &detailed_run,
+	OPT_INCR('d', "detailed", &detailed_run,
 		    "detailed run - start a lot of events"),
 	OPT_BOOLEAN('S', "sync", &sync_run,
 		    "call sync() before starting a run"),
@@ -873,6 +928,70 @@ static const struct option options[] = {
 	OPT_END()
 };
 
+/*
+ * Add default attributes, if there were no attributes specified or
+ * if -d/--detailed, -d -d or -d -d -d is used:
+ */
+static int add_default_attributes(void)
+{
+	struct perf_evsel *pos;
+	size_t attr_nr = 0;
+	size_t c;
+
+	/* Set attrs if no event is selected and !null_run: */
+	if (null_run)
+		return 0;
+
+	if (!evsel_list->nr_entries) {
+		for (c = 0; c < ARRAY_SIZE(default_attrs); c++) {
+			pos = perf_evsel__new(default_attrs + c, c + attr_nr);
+			if (pos == NULL)
+				return -1;
+			perf_evlist__add(evsel_list, pos);
+		}
+		attr_nr += c;
+	}
+
+	/* Detailed events get appended to the event list: */
+
+	if (detailed_run <  1)
+		return 0;
+
+	/* Append detailed run extra attributes: */
+	for (c = 0; c < ARRAY_SIZE(detailed_attrs); c++) {
+		pos = perf_evsel__new(detailed_attrs + c, c + attr_nr);
+		if (pos == NULL)
+			return -1;
+		perf_evlist__add(evsel_list, pos);
+	}
+	attr_nr += c;
+
+	if (detailed_run < 2)
+		return 0;
+
+	/* Append very detailed run extra attributes: */
+	for (c = 0; c < ARRAY_SIZE(very_detailed_attrs); c++) {
+		pos = perf_evsel__new(very_detailed_attrs + c, c + attr_nr);
+		if (pos == NULL)
+			return -1;
+		perf_evlist__add(evsel_list, pos);
+	}
+
+	if (detailed_run < 3)
+		return 0;
+
+	/* Append very, very detailed run extra attributes: */
+	for (c = 0; c < ARRAY_SIZE(very_very_detailed_attrs); c++) {
+		pos = perf_evsel__new(very_very_detailed_attrs + c, c + attr_nr);
+		if (pos == NULL)
+			return -1;
+		perf_evlist__add(evsel_list, pos);
+	}
+
+
+	return 0;
+}
+
 int cmd_stat(int argc, const char **argv, const char *prefix __used)
 {
 	struct perf_evsel *pos;
@@ -918,28 +1037,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
 		usage_with_options(stat_usage, options);
 	}
 
-	/* Set attrs and nr_counters if no event is selected and !null_run */
-	if (detailed_run) {
-		size_t c;
-
-		for (c = 0; c < ARRAY_SIZE(detailed_attrs); ++c) {
-			pos = perf_evsel__new(&detailed_attrs[c], c);
-			if (pos == NULL)
-				goto out;
-			perf_evlist__add(evsel_list, pos);
-		}
-	}
-	/* Set attrs and nr_counters if no event is selected and !null_run */
-	if (!detailed_run && !null_run && !evsel_list->nr_entries) {
-		size_t c;
-
-		for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
-			pos = perf_evsel__new(&default_attrs[c], c);
-			if (pos == NULL)
-				goto out;
-			perf_evlist__add(evsel_list, pos);
-		}
-	}
+	if (add_default_attributes())
+		goto out;
 
 	if (target_pid != -1)
 		target_tid = target_pid;

commit b313207286a78abac19f1dd2721292eae598b0f5
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed May 18 21:00:44 2011 +0200

    perf bench, x86: Add alternatives-asm.h wrapper
    
    perf bench needs this to build the kernel's memcpy routine:
    
    In file included from bench/mem-memcpy-x86-64-asm.S:2:0:
    bench/../../../arch/x86/lib/memcpy_64.S:7:33: fatal error: asm/alternative-asm.h: No such file or directory
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Steven Rostedt <rostedt@goodmis.org>
    Link: http://lkml.kernel.org/n/tip-c5d41xibgullk8h2280q4gv0@git.kernel.org
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/perf/util/include/asm/alternative-asm.h
new file mode 100644
index 000000000000..6789d788d494
--- /dev/null
+++ b/tools/perf/util/include/asm/alternative-asm.h
@@ -0,0 +1,8 @@
+#ifndef _PERF_ASM_ALTERNATIVE_ASM_H
+#define _PERF_ASM_ALTERNATIVE_ASM_H
+
+/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
+
+#define altinstruction_entry #
+
+#endif

commit 01ed58abec07633791f03684b937a7e22e00c9bb
Merge: af2d03d4aaa8 26afb7c66108
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed May 18 20:59:27 2011 +0200

    Merge branch 'x86/mem' into perf/core
    
    Merge reason: memcpy_64.S changes an assumption perf bench has, so merge this
                  here so we can fix it.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

commit af2d03d4aaa847ef41a229dfee098a47908437c6
Merge: 94692349c4fc f29638868280
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed May 18 19:46:10 2011 +0200

    Merge branch 'tip/perf/core-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace into perf/core

commit 86b9523ab1517f6edeb87295329c901930d3732d
Merge: 2c14ddc135d9 fffcda1183e9
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue May 17 14:39:00 2011 +0200

    Merge branch 'gart/rename' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu into core/iommu

commit 52004ea7ca4c52a219362f973bfd1eb86ff668ce
Merge: 9bbeacf52f66 aece948f5ddd
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun May 15 19:41:00 2011 +0200

    Merge branch 'perf/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux-2.6 into perf/urgent

commit 411f05f123cbd7f8aa1edcae86970755a6e2a9d9
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu May 12 23:00:28 2011 +0200

    vsprintf: Turn kptr_restrict off by default
    
    kptr_restrict has been triggering bugs in apps such as perf, and it also makes
    the system less useful by default, so turn it off by default.
    
    This is how we generally handle security features that remove functionality,
    such as firewall code or SELinux - they have to be configured and activated
    from user-space.
    
    Distributions can turn kptr_restrict on again via this line in
    /etc/sysctrl.conf:
    
    kernel.kptr_restrict = 1
    
    ( Also mark the variable __read_mostly while at it, as it's typically modified
      only once per bootup, or not at all. )
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Acked-by: David S. Miller <davem@davemloft.net>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index bc0ac6b333dc..dfd60192bc2e 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -797,7 +797,7 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	return string(buf, end, uuid, spec);
 }
 
-int kptr_restrict = 1;
+int kptr_restrict __read_mostly;
 
 /*
  * Show a '%p' thing.  A kernel extension is that the '%p' is followed

commit 9cb5baba5e3acba0994ad899ee908799104c9965
Merge: 7142d17e8f93 693d92a1bbc9
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu May 12 09:36:18 2011 +0200

    Merge commit 'v2.6.39-rc7' into sched/core

commit 2c14ddc135d93cb4b33ee4bd4a2b05e21a4a762c
Merge: 54b333529df2 72fe00f01f9a
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue May 10 22:15:57 2011 +0200

    Merge branch 'iommu/2.6.40' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu into core/iommu