Patches contributed by Eötvös Lorand University


commit 370faf1dd0461ad811852c8abbbcd3d73b1e4fc4
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Apr 29 16:11:03 2011 +0200

    perf stat: Fail softly on unsupported events
    
    David Ahern reported this perf stat failure:
    
    > # /tmp/build-perf/perf stat -- sleep 1
    >   Error: stalled-cycles-frontend event is not supported.
    >   Fatal: Not all events could be opened.
    >
    > This is a Dell R410 with an E5620 processor.
    
    Fail in a softer fashion on unknown/unsupported events.
    
    Reported-by: David Ahern <dsahern@gmail.com>
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Link: http://lkml.kernel.org/n/tip-7y40wib8n006io7hjpn1dsrm@git.kernel.org
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9e596ab98d05..c8b535bc27bd 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -377,7 +377,7 @@ static int run_perf_stat(int argc __used, const char **argv)
 
 	list_for_each_entry(counter, &evsel_list->entries, node) {
 		if (create_perf_stat_counter(counter) < 0) {
-			if (errno == EINVAL || errno == ENOSYS)
+			if (errno == EINVAL || errno == ENOSYS || errno == ENOENT)
 				continue;
 
 			if (errno == EPERM || errno == EACCES) {
@@ -385,8 +385,6 @@ static int run_perf_stat(int argc __used, const char **argv)
 				      "\t Consider tweaking"
 				      " /proc/sys/kernel/perf_event_paranoid or running as root.",
 				      system_wide ? "system-wide " : "");
-			} else if (errno == ENOENT) {
-				error("%s event is not supported. ", event_name(counter));
 			} else {
 				error("open_counter returned with %d (%s). "
 				      "/bin/dmesg may provide additional information.\n",

commit fce3c786d3a49eff397583b4b62fa38df90db937
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Apr 30 09:03:15 2011 +0200

    perf stat: Leave more room for percentages
    
    Triple digit percentages do not fit otherwise.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Link: http://lkml.kernel.org/n/tip-7y40wib8n005io7hjpn1dsrm@git.kernel.org
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2492a0efa4d8..9e596ab98d05 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -499,8 +499,8 @@ static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __us
 	else if (ratio > 10.0)
 		color = PERF_COLOR_YELLOW;
 
-	fprintf(stderr, " #   ");
-	color_fprintf(stderr, color, "%5.2f%%", ratio);
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
 	fprintf(stderr, " frontend cycles idle   ");
 }
 
@@ -522,9 +522,9 @@ static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __use
 	else if (ratio > 20.0)
 		color = PERF_COLOR_YELLOW;
 
-	fprintf(stderr, " #   ");
-	color_fprintf(stderr, color, "%5.2f%%", ratio);
-	fprintf(stderr, "  backend cycles idle   ");
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
+	fprintf(stderr, " backend  cycles idle   ");
 }
 
 static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
@@ -545,8 +545,8 @@ static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double
 	else if (ratio > 5.0)
 		color = PERF_COLOR_YELLOW;
 
-	fprintf(stderr, " #   ");
-	color_fprintf(stderr, color, "%5.2f%%", ratio);
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
 	fprintf(stderr, " of all branches        ");
 }
 
@@ -568,8 +568,8 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou
 	else if (ratio > 5.0)
 		color = PERF_COLOR_YELLOW;
 
-	fprintf(stderr, " #   ");
-	color_fprintf(stderr, color, "%5.2f%%", ratio);
+	fprintf(stderr, " #  ");
+	color_fprintf(stderr, color, "%6.2f%%", ratio);
 	fprintf(stderr, " of all L1-dcache hits  ");
 }
 
@@ -607,14 +607,14 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 		if (total)
 			ratio = avg / total;
 
-		fprintf(stderr, " #    %4.2f  insns per cycle        ", ratio);
+		fprintf(stderr, " #   %5.2f  insns per cycle        ", ratio);
 
 		total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
 		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
 
 		if (total && avg) {
 			ratio = total / avg;
-			fprintf(stderr, "\n                                            #    %4.2f  stalled cycles per insn", ratio);
+			fprintf(stderr, "\n                                            #   %5.2f  stalled cycles per insn", ratio);
 		}
 
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&

commit 2b427e14b77dbf3e05f1bd0785f1d07ea5fe924e
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Apr 29 14:16:18 2011 +0200

    perf stat: Adjust stall cycles warning percentages
    
    Adjust to color thresholds to better match the percentages seen in
    real workloads. Both are now a bit more sensitive.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Link: http://lkml.kernel.org/n/tip-7y40wib8n004io7hjpn1dsrm@git.kernel.org
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e45449938b80..2492a0efa4d8 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -492,11 +492,11 @@ static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __us
 		ratio = avg / total * 100.0;
 
 	color = PERF_COLOR_NORMAL;
-	if (ratio > 75.0)
+	if (ratio > 50.0)
 		color = PERF_COLOR_RED;
-	else if (ratio > 50.0)
+	else if (ratio > 30.0)
 		color = PERF_COLOR_MAGENTA;
-	else if (ratio > 20.0)
+	else if (ratio > 10.0)
 		color = PERF_COLOR_YELLOW;
 
 	fprintf(stderr, " #   ");
@@ -519,7 +519,7 @@ static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __use
 		color = PERF_COLOR_RED;
 	else if (ratio > 50.0)
 		color = PERF_COLOR_MAGENTA;
-	else if (ratio > 25.0)
+	else if (ratio > 20.0)
 		color = PERF_COLOR_YELLOW;
 
 	fprintf(stderr, " #   ");

commit d3d1e86da07b4565815e3dbcd082f53017d215f8
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Apr 29 13:49:08 2011 +0200

    perf stat: Analyze front-end and back-end stall counts
    
    Sample output:
    
     Performance counter stats for './loop_1b':
    
            873.691065 task-clock               #    1.000 CPUs utilized
                     1 context-switches         #    0.000 M/sec
                     1 CPU-migrations           #    0.000 M/sec
                    96 page-faults              #    0.000 M/sec
         2,012,637,222 cycles                   #    2.304 GHz                      (66.58%)
         1,001,397,911 stalled-cycles-frontend  #   49.76% frontend cycles idle     (66.58%)
             7,523,398 stalled-cycles-backend   #    0.37%  backend cycles idle     (66.76%)
         2,004,551,046 instructions             #    1.00  insns per cycle
                                                #    0.50  stalled cycles per insn  (66.80%)
         1,001,304,992 branches                 # 1146.063 M/sec                    (66.76%)
                39,453 branch-misses            #    0.00% of all branches          (66.64%)
    
            0.874046121  seconds time elapsed
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Link: http://lkml.kernel.org/n/tip-7y40wib8n003io7hjpn1dsrm@git.kernel.org
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6a4a8a399d95..e45449938b80 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -201,7 +201,8 @@ static double stddev_stats(struct stats *stats)
 
 struct stats			runtime_nsecs_stats[MAX_NR_CPUS];
 struct stats			runtime_cycles_stats[MAX_NR_CPUS];
-struct stats			runtime_stalled_cycles_stats[MAX_NR_CPUS];
+struct stats			runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
+struct stats			runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
 struct stats			runtime_branches_stats[MAX_NR_CPUS];
 struct stats			runtime_cacherefs_stats[MAX_NR_CPUS];
 struct stats			runtime_l1_dcache_stats[MAX_NR_CPUS];
@@ -251,8 +252,10 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
 		update_stats(&runtime_nsecs_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
 		update_stats(&runtime_cycles_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+		update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
-		update_stats(&runtime_stalled_cycles_stats[0], count[0]);
+		update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
 		update_stats(&runtime_branches_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
@@ -478,7 +481,30 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
 		fprintf(stderr, " # %8.3f CPUs utilized          ", avg / avg_stats(&walltime_nsecs_stats));
 }
 
-static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, double avg)
+static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel __used, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+
+	total = avg_stats(&runtime_cycles_stats[cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = PERF_COLOR_NORMAL;
+	if (ratio > 75.0)
+		color = PERF_COLOR_RED;
+	else if (ratio > 50.0)
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > 20.0)
+		color = PERF_COLOR_YELLOW;
+
+	fprintf(stderr, " #   ");
+	color_fprintf(stderr, color, "%5.2f%%", ratio);
+	fprintf(stderr, " frontend cycles idle   ");
+}
+
+static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel __used, double avg)
 {
 	double total, ratio = 0.0;
 	const char *color;
@@ -498,7 +524,7 @@ static void print_stalled_cycles(int cpu, struct perf_evsel *evsel __used, doubl
 
 	fprintf(stderr, " #   ");
 	color_fprintf(stderr, color, "%5.2f%%", ratio);
-	fprintf(stderr, " of all cycles are idle ");
+	fprintf(stderr, "  backend cycles idle   ");
 }
 
 static void print_branch_misses(int cpu, struct perf_evsel *evsel __used, double avg)
@@ -583,7 +609,8 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 
 		fprintf(stderr, " #    %4.2f  insns per cycle        ", ratio);
 
-		total = avg_stats(&runtime_stalled_cycles_stats[cpu]);
+		total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
+		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
 
 		if (total && avg) {
 			ratio = total / avg;
@@ -609,8 +636,10 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 
 		fprintf(stderr, " # %8.3f %% of all cache refs    ", ratio);
 
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
+		print_stalled_cycles_frontend(cpu, evsel, avg);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
-		print_stalled_cycles(cpu, evsel, avg);
+		print_stalled_cycles_backend(cpu, evsel, avg);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 		total = avg_stats(&runtime_nsecs_stats[cpu]);
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 04d2f0a96674..8a407f3e286f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -60,7 +60,7 @@ static struct event_symbol event_symbols[] = {
 #define PERF_EVENT_TYPE(config)		__PERF_EVENT_FIELD(config, TYPE)
 #define PERF_EVENT_ID(config)		__PERF_EVENT_FIELD(config, EVENT)
 
-static const char *hw_event_names[] = {
+static const char *hw_event_names[PERF_COUNT_HW_MAX] = {
 	"cycles",
 	"instructions",
 	"cache-references",
@@ -68,10 +68,11 @@ static const char *hw_event_names[] = {
 	"branches",
 	"branch-misses",
 	"bus-cycles",
-	"stalled-cycles",
+	"stalled-cycles-frontend",
+	"stalled-cycles-backend",
 };
 
-static const char *sw_event_names[] = {
+static const char *sw_event_names[PERF_COUNT_SW_MAX] = {
 	"cpu-clock",
 	"task-clock",
 	"page-faults",

commit 129c04cb8ce2e4bf3f17223f58ef16aa8a2cb3b8
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Apr 29 14:41:28 2011 +0200

    perf tools: Add front-end and back-end stalled cycles support
    
    Update perf tooling to deal with front-end and back-end stalled cycles events.
    
    Add both the default 'perf stat' output.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Link: http://lkml.kernel.org/n/tip-7y40wib8n002io7hjpn1dsrm@git.kernel.org
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index da77077450cf..6a4a8a399d95 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -66,7 +66,8 @@ static struct perf_event_attr default_attrs[] = {
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
 
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES		},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
@@ -84,7 +85,8 @@ static struct perf_event_attr detailed_attrs[] = {
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
 
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
-  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES		},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
   { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
@@ -249,7 +251,7 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
 		update_stats(&runtime_nsecs_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
 		update_stats(&runtime_cycles_stats[0], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES))
+	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
 		update_stats(&runtime_stalled_cycles_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
 		update_stats(&runtime_branches_stats[0], count[0]);
@@ -607,7 +609,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 
 		fprintf(stderr, " # %8.3f %% of all cache refs    ", ratio);
 
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES)) {
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
 		print_stalled_cycles(cpu, evsel, avg);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 		total = avg_stats(&runtime_nsecs_stats[cpu]);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index bbbb735268ef..04d2f0a96674 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -31,24 +31,25 @@ char debugfs_path[MAXPATHLEN];
 #define CSW(x) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##x
 
 static struct event_symbol event_symbols[] = {
-  { CHW(CPU_CYCLES),		"cpu-cycles",		"cycles"	},
-  { CHW(STALLED_CYCLES),	"stalled-cycles",	"idle-cycles"	},
-  { CHW(INSTRUCTIONS),		"instructions",		""		},
-  { CHW(CACHE_REFERENCES),	"cache-references",	""		},
-  { CHW(CACHE_MISSES),		"cache-misses",		""		},
-  { CHW(BRANCH_INSTRUCTIONS),	"branch-instructions",	"branches"	},
-  { CHW(BRANCH_MISSES),		"branch-misses",	""		},
-  { CHW(BUS_CYCLES),		"bus-cycles",		""		},
-
-  { CSW(CPU_CLOCK),		"cpu-clock",		""		},
-  { CSW(TASK_CLOCK),		"task-clock",		""		},
-  { CSW(PAGE_FAULTS),		"page-faults",		"faults"	},
-  { CSW(PAGE_FAULTS_MIN),	"minor-faults",		""		},
-  { CSW(PAGE_FAULTS_MAJ),	"major-faults",		""		},
-  { CSW(CONTEXT_SWITCHES),	"context-switches",	"cs"		},
-  { CSW(CPU_MIGRATIONS),	"cpu-migrations",	"migrations"	},
-  { CSW(ALIGNMENT_FAULTS),	"alignment-faults",	""		},
-  { CSW(EMULATION_FAULTS),	"emulation-faults",	""		},
+  { CHW(CPU_CYCLES),			"cpu-cycles",			"cycles"		},
+  { CHW(STALLED_CYCLES_FRONTEND),	"stalled-cycles-frontend",	"idle-cycles-frontend"	},
+  { CHW(STALLED_CYCLES_BACKEND),	"stalled-cycles-backend",	"idle-cycles-backend"	},
+  { CHW(INSTRUCTIONS),			"instructions",			""			},
+  { CHW(CACHE_REFERENCES),		"cache-references",		""			},
+  { CHW(CACHE_MISSES),			"cache-misses",			""			},
+  { CHW(BRANCH_INSTRUCTIONS),		"branch-instructions",		"branches"		},
+  { CHW(BRANCH_MISSES),			"branch-misses",		""			},
+  { CHW(BUS_CYCLES),			"bus-cycles",			""			},
+
+  { CSW(CPU_CLOCK),			"cpu-clock",			""			},
+  { CSW(TASK_CLOCK),			"task-clock",			""			},
+  { CSW(PAGE_FAULTS),			"page-faults",			"faults"		},
+  { CSW(PAGE_FAULTS_MIN),		"minor-faults",			""			},
+  { CSW(PAGE_FAULTS_MAJ),		"major-faults",			""			},
+  { CSW(CONTEXT_SWITCHES),		"context-switches",		"cs"			},
+  { CSW(CPU_MIGRATIONS),		"cpu-migrations",		"migrations"		},
+  { CSW(ALIGNMENT_FAULTS),		"alignment-faults",		""			},
+  { CSW(EMULATION_FAULTS),		"emulation-faults",		""			},
 };
 
 #define __PERF_EVENT_FIELD(config, name) \
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 406f613ee619..8b0eff8b8283 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -798,7 +798,6 @@ static struct {
 	{ "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
 	{ "COUNT_HW_BRANCH_MISSES",	  PERF_COUNT_HW_BRANCH_MISSES },
 	{ "COUNT_HW_BUS_CYCLES",	  PERF_COUNT_HW_BUS_CYCLES },
-	{ "COUNT_HW_STALLED_CYCLES",	  PERF_COUNT_HW_STALLED_CYCLES },
 	{ "COUNT_HW_CACHE_L1D",		  PERF_COUNT_HW_CACHE_L1D },
 	{ "COUNT_HW_CACHE_L1I",		  PERF_COUNT_HW_CACHE_L1I },
 	{ "COUNT_HW_CACHE_LL",	  	  PERF_COUNT_HW_CACHE_LL },
@@ -811,6 +810,9 @@ static struct {
 	{ "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS },
 	{ "COUNT_HW_CACHE_RESULT_MISS",   PERF_COUNT_HW_CACHE_RESULT_MISS },
 
+	{ "COUNT_HW_STALLED_CYCLES_FRONTEND",	  PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
+	{ "COUNT_HW_STALLED_CYCLES_BACKEND",	  PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
+
 	{ "COUNT_SW_CPU_CLOCK",	       PERF_COUNT_SW_CPU_CLOCK },
 	{ "COUNT_SW_TASK_CLOCK",       PERF_COUNT_SW_TASK_CLOCK },
 	{ "COUNT_SW_PAGE_FAULTS",      PERF_COUNT_SW_PAGE_FAULTS },

commit 91fc4cc00099986bc1ba50e1f421c3548cffae42
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Apr 29 14:17:19 2011 +0200

    perf, x86: Add new stalled cycles events for Intel and AMD CPUs
    
    Extend the Intel and AMD event definitions with generic front-end and
    back-end stall events.
    
    ( These are only approximations - suggestions are welcome for better events. )
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Link: http://lkml.kernel.org/n/tip-7y40wib8n001io7hjpn1dsrm@git.kernel.org
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index cf4e369cea67..fe29c1d2219e 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -96,12 +96,14 @@ static __initconst const u64 amd_hw_cache_event_ids
  */
 static const u64 amd_perfmon_event_map[] =
 {
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c2,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c3,
+  [PERF_COUNT_HW_CPU_CYCLES]			= 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]			= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]		= 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]			= 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]		= 0x00c2,
+  [PERF_COUNT_HW_BRANCH_MISSES]			= 0x00c3,
+  [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00d0, /* "Decoder empty" event */
+  [PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= 0x00d1, /* "Dispatch stalls" event */
 };
 
 static u64 amd_pmu_event_map(int hw_event)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 393085b87a2c..7983b9a9533b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1413,7 +1413,9 @@ static __init int intel_pmu_init(void)
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
-		/* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+		/* UOPS_ISSUED.STALLED_CYCLES */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x180010e;
+		/* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
 		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
 
 		if (ebx & 0x40) {

commit 8f62242246351b5a4bc0c1f00c0c7003edea128a
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Apr 29 13:19:47 2011 +0200

    perf events: Add generic front-end and back-end stalled cycle event definitions
    
    Add two generic hardware events: front-end and back-end stalled cycles.
    
    These events measure conditions when the CPU is executing code but its
    capabilities are not fully utilized. Understanding such situations and
    analyzing them is an important sub-task of code optimization workflows.
    
    Both events limit performance: most front end stalls tend to be caused
    by branch misprediction or instruction fetch cachemisses, backend
    stalls can be caused by various resource shortages or inefficient
    instruction scheduling.
    
    Front-end stalls are the more important ones: code cannot run fast
    if the instruction stream is not being kept up.
    
    An over-utilized back-end can cause front-end stalls and thus
    has to be kept an eye on as well.
    
    The exact composition is very program logic and instruction mix
    dependent.
    
    We use the terms 'stall', 'front-end' and 'back-end' loosely and
    try to use the best available events from specific CPUs that
    approximate these concepts.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Link: http://lkml.kernel.org/n/tip-7y40wib8n000io7hjpn1dsrm@git.kernel.org
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 1ea94224f62e..393085b87a2c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1414,7 +1414,7 @@ static __init int intel_pmu_init(void)
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
 		/* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
-		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0x1803fb1;
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1;
 
 		if (ebx & 0x40) {
 			/*
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ac636dd20a0c..4e2d7ae71499 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -52,7 +52,8 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
-	PERF_COUNT_HW_STALLED_CYCLES		= 7,
+	PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	= 7,
+	PERF_COUNT_HW_STALLED_CYCLES_BACKEND	= 8,
 
 	PERF_COUNT_HW_MAX,			/* non-ABI */
 };

commit ede70290046043b2638204cab55e26ea1d0c6cd9
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Apr 28 08:48:42 2011 +0200

    perf stat: Fix compatibility behavior
    
    Instead of failing on an unknown event, when new perf stat is run on
    older kernels:
    
      $ ./perf stat true
      Error: open_counter returned with 22 (Invalid argument). /bin/dmesg
      may provide additional information.
    
      Fatal: Not all events could be opened.
    
    Just ignore EINVAL and ENOSYS, we'll print the results as not counted:
    
     Performance counter stats for 'true':
    
              0.239483 task-clock               #    0.493 CPUs utilized
                     0 context-switches         #    0.000 M/sec
                     0 CPU-migrations           #    0.000 M/sec
                    86 page-faults              #    0.359 M/sec
               704,766 cycles                   #    2.943 GHz
         <not counted> stalled-cycles
               381,961 instructions             #    0.54  insns per cycle
                69,626 branches                 #  290.735 M/sec
                 4,594 branch-misses            #    6.60% of all branches
    
            0.000485883  seconds time elapsed
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Link: http://lkml.kernel.org/n/tip-7y40wib8n1eqio5hjpn3dsrm@git.kernel.org
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5658a770dbd7..da77077450cf 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -372,7 +372,10 @@ static int run_perf_stat(int argc __used, const char **argv)
 
 	list_for_each_entry(counter, &evsel_list->entries, node) {
 		if (create_perf_stat_counter(counter) < 0) {
-			if (errno == -EPERM || errno == -EACCES) {
+			if (errno == EINVAL || errno == ENOSYS)
+				continue;
+
+			if (errno == EPERM || errno == EACCES) {
 				error("You may not have permission to collect %sstats.\n"
 				      "\t Consider tweaking"
 				      " /proc/sys/kernel/perf_event_paranoid or running as root.",

commit f9cef0a90c4e7637f1ec98474a1a099aec45eb65
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Apr 28 18:17:11 2011 +0200

    perf stat: Add --sync/-S option
    
    --sync will tell perf stat to run sync() before starting a command.
    
    This allows IO-heavy tests to be used with --repeat, without one
    iteration impacting the other.
    
    Elapsed time will stabilize for example:
    
      before:        3.971525714  seconds time elapsed  ( +-  8.56% )
      after:         3.211098537  seconds time elapsed  ( +-  1.52% )
    
    So measurements will be more accurate.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Link: http://lkml.kernel.org/n/tip-7y40wib8n1eqio7hjpn1dsrm@git.kernel.org
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 003caa857a44..5658a770dbd7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -128,6 +128,7 @@ static pid_t			target_tid			= -1;
 static pid_t			child_pid			= -1;
 static bool			null_run			=  false;
 static bool			detailed_run			=  false;
+static bool			sync_run			=  false;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
 static const char		*cpu_list;
@@ -819,6 +820,8 @@ static const struct option options[] = {
 		    "null run - dont start any counters"),
 	OPT_BOOLEAN('d', "detailed", &detailed_run,
 		    "detailed run - start a lot of events"),
+	OPT_BOOLEAN('S', "sync", &sync_run,
+		    "call sync() before starting a run"),
 	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, 
 			   "print large numbers with thousands\' separators",
 			   stat__set_big_num),
@@ -944,6 +947,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
 	for (run_idx = 0; run_idx < run_count; run_idx++) {
 		if (run_count != 1 && verbose)
 			fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx + 1);
+
+		if (sync_run)
+			sync();
+
 		status = run_perf_stat(argc, argv);
 	}
 

commit 8a850cadca0e387c87a0911a61e99fd66aeb57ec
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Apr 28 11:16:44 2011 +0200

    perf event, x86: Use better stalled cycles metric
    
    Use the UOPS_EXECUTED.*,c=1,i=1 event on Intel CPUs - it is a rather
    good indicator of CPU execution stalls, more sensitive and more inclusive
    than the 0xa2 resource stalls event (which does not count nearly as many
    stall types).
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    Link: http://lkml.kernel.org/n/tip-7y40wib8n1eqio7hjpn2dsrm@git.kernel.org
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 067a48b13a76..1ea94224f62e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1413,8 +1413,8 @@ static __init int intel_pmu_init(void)
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
 		x86_pmu.extra_regs = intel_nehalem_extra_regs;
 
-		/* Install the stalled-cycles event: 0xff: All reasons, 0xa2: Resource stalls */
-		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0xffa2;
+		/* Install the stalled-cycles event: UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES] = 0x1803fb1;
 
 		if (ebx & 0x40) {
 			/*