Patches contributed by Eötvös Lorand University

<<Prev 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58[59]60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 Next>>

commit eb24073bc1fe3e569a855cf38d529fb650c35524
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Sep 16 21:09:13 2009 +0200

    sched: Fix TASK_WAKING & loadaverage breakage
    
    Fix this:
    
    top - 21:54:00 up  2:59,  1 user,  load average: 432512.33, 426421.74, 417432.74
    
    Which happens because we now set TASK_WAKING before activate_task().
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Mike Galbraith <efault@gmx.de>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 5049d959bb26..969dfaef2465 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2343,7 +2343,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	/*
 	 * In order to handle concurrent wakeups and release the rq->lock
 	 * we put the task in TASK_WAKING state.
+	 *
+	 * First fix up the nr_uninterruptible count:
 	 */
+	if (task_contributes_to_load(p))
+		rq->nr_uninterruptible--;
 	p->state = TASK_WAKING;
 	task_rq_unlock(rq, &flags);

commit 8d7ac69ffaf740cdf98bdd5073c2d70a8828200e
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Aug 18 16:45:25 2009 +0000

    Blackfin: Fix link errors with binutils 2.19 and GCC 4.3
    
    Not sure whether this has been reported/fixed before.
    
    Today I built a Blackfin tool-chain from scratch for -tip testing,
    and it triggers:
    
     arch/blackfin/kernel/vmlinux.lds:1238: undefined section `.data_a_l1' referenced in expression
    
    and:
    
     arch/blackfin/kernel/vmlinux.lds:1238: undefined section `.text_data_l1'
    referenced in expression
    
    Now i dont have any way to test this linker script, but it now at
    least builds fine after fixing what appears to be typos in those
    assert statements.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Mike Frysinger <vapier@gentoo.org>

diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index d7ffe299b979..21ac7c26079e 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -221,7 +221,7 @@ SECTIONS
 		. = ALIGN(4);
 		__ebss_l1 = .;
 	}
-	ASSERT (SIZEOF(.data_a_l1) <= L1_DATA_A_LENGTH, "L1 data A overflow!")
+	ASSERT (SIZEOF(.data_l1) <= L1_DATA_A_LENGTH, "L1 data A overflow!")
 
 	.data_b_l1 L1_DATA_B_START : AT(LOADADDR(.data_l1) + SIZEOF(.data_l1))
 	{
@@ -262,7 +262,7 @@ SECTIONS
 		. = ALIGN(4);
 		__ebss_l2 = .;
 	}
-	ASSERT (SIZEOF(.text_data_l1) <= L2_LENGTH, "L2 overflow!")
+	ASSERT (SIZEOF(.text_data_l2) <= L2_LENGTH, "L2 overflow!")
 
 	/* Force trailing alignment of our init section so that when we
 	 * free our init memory, we don't leave behind a partial page.

commit 40d9d82c8ab8c4e2373a23a1e31dc8d84c53aa01
Merge: 983f2163e7fd b36461da2a03
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Sep 16 21:16:37 2009 +0200

    Merge branch 'tip/tracing/core4' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace into tracing/core

commit 0ec04e16d08b69d8da46abbcfa3e3f2cd9738852
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Sep 16 17:40:48 2009 +0200

    perf sched: Add 'perf sched map' scheduling event map printout
    
    This prints a textual context-switching outline of workload
    captured via perf sched record.
    
    For example, on a 16 CPU box it outputs:
    
       N1  O1  .   .   .   S1  .   .   .   B0  .  *I0  C1  .   M1  .    23002.773423 secs
       N1  O1  .  *Q0  .   S1  .   .   .   B0  .   I0  C1  .   M1  .    23002.773423 secs
       N1  O1  .   Q0  .   S1  .   .   .   B0  .  *R1  C1  .   M1  .    23002.773485 secs
       N1  O1  .   Q0  .   S1  .  *S0  .   B0  .   R1  C1  .   M1  .    23002.773478 secs
      *L0  O1  .   Q0  .   S1  .   S0  .   B0  .   R1  C1  .   M1  .    23002.773523 secs
       L0  O1  .  *.   .   S1  .   S0  .   B0  .   R1  C1  .   M1  .    23002.773531 secs
       L0  O1  .   .   .   S1  .   S0  .   B0  .   R1  C1 *T1  M1  .    23002.773547 secs T1 => irqbalance:2089
       L0  O1  .   .   .   S1  .   S0  .  *P0  .   R1  C1  T1  M1  .    23002.773549 secs
      *N1  O1  .   .   .   S1  .   S0  .   P0  .   R1  C1  T1  M1  .    23002.773566 secs
       N1  O1  .   .   .  *J0  .   S0  .   P0  .   R1  C1  T1  M1  .    23002.773571 secs
       N1  O1  .   .   .   J0  .   S0 *B0  P0  .   R1  C1  T1  M1  .    23002.773592 secs
       N1  O1  .   .   .   J0  .  *U0  B0  P0  .   R1  C1  T1  M1  .    23002.773582 secs
       N1  O1  .   .   .  *S1  .   U0  B0  P0  .   R1  C1  T1  M1  .    23002.773604 secs
       N1  O1  .   .   .   S1  .   U0  B0 *.   .   R1  C1  T1  M1  .    23002.773615 secs
       N1  O1  .   .   .   S1  .   U0  B0  .   .  *K0  C1  T1  M1  .    23002.773631 secs
       N1  O1  .  *M0  .   S1  .   U0  B0  .   .   K0  C1  T1  M1  .    23002.773624 secs
       N1  O1  .   M0  .   S1  .   U0 *.   .   .   K0  C1  T1  M1  .    23002.773644 secs
       N1  O1  .   M0  .   S1  .   U0  .   .   .  *R1  C1  T1  M1  .    23002.773662 secs
       N1  O1  .   M0  .   S1  .  *.   .   .   .   R1  C1  T1  M1  .    23002.773648 secs
       N1  O1  .  *.   .   S1  .   .   .   .   .   R1  C1  T1  M1  .    23002.773680 secs
       N1  O1  .   .   .  *L0  .   .   .   .   .   R1  C1  T1  M1  .    23002.773717 secs
      *N0  O1  .   .   .   L0  .   .   .   .   .   R1  C1  T1  M1  .    23002.773709 secs
      *N1  O1  .   .   .   L0  .   .   .   .   .   R1  C1  T1  M1  .    23002.773747 secs
    
    Columns stand for individual CPUs, from CPU0 to CPU15, and the
    two-letter shortcuts stand for tasks that are running on a CPU.
    
    '*' denotes the CPU that had the event.
    
    A dot signals an idle CPU.
    
    New tasks are assigned new two-letter shortcuts - when they occur
    first they are printed. In the above example 'T1' stood for irqbalance:
    
          T1 => irqbalance:2089
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Paul Mackerras <paulus@samba.org>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index da8f67483ae7..f67e351b050b 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -159,8 +159,6 @@ static struct rb_root		atom_root, sorted_atom_root;
 static u64			all_runtime;
 static u64			all_count;
 
-static int read_events(void);
-
 
 static u64 get_nsecs(void)
 {
@@ -634,38 +632,6 @@ static void test_calibrations(void)
 	printf("the sleep test took %Ld nsecs\n", T1-T0);
 }
 
-static void __cmd_replay(void)
-{
-	unsigned long i;
-
-	calibrate_run_measurement_overhead();
-	calibrate_sleep_measurement_overhead();
-
-	test_calibrations();
-
-	read_events();
-
-	printf("nr_run_events:        %ld\n", nr_run_events);
-	printf("nr_sleep_events:      %ld\n", nr_sleep_events);
-	printf("nr_wakeup_events:     %ld\n", nr_wakeup_events);
-
-	if (targetless_wakeups)
-		printf("target-less wakeups:  %ld\n", targetless_wakeups);
-	if (multitarget_wakeups)
-		printf("multi-target wakeups: %ld\n", multitarget_wakeups);
-	if (nr_run_events_optimized)
-		printf("run atoms optimized: %ld\n",
-			nr_run_events_optimized);
-
-	print_task_traces();
-	add_cross_task_wakeups();
-
-	create_tasks();
-	printf("------------------------------------------------------------\n");
-	for (i = 0; i < replay_repeat; i++)
-		run_one_test();
-}
-
 static int
 process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 {
@@ -1354,64 +1320,6 @@ static void sort_lat(void)
 	}
 }
 
-static void __cmd_lat(void)
-{
-	struct rb_node *next;
-
-	setup_pager();
-	read_events();
-	sort_lat();
-
-	printf("\n -----------------------------------------------------------------------------------------\n");
-	printf("  Task                  |   Runtime ms  | Switches | Average delay ms | Maximum delay ms |\n");
-	printf(" -----------------------------------------------------------------------------------------\n");
-
-	next = rb_first(&sorted_atom_root);
-
-	while (next) {
-		struct work_atoms *work_list;
-
-		work_list = rb_entry(next, struct work_atoms, node);
-		output_lat_thread(work_list);
-		next = rb_next(next);
-	}
-
-	printf(" -----------------------------------------------------------------------------------------\n");
-	printf("  TOTAL:                |%11.3f ms |%9Ld |\n",
-		(double)all_runtime/1e6, all_count);
-
-	printf(" ---------------------------------------------------\n");
-	if (nr_unordered_timestamps && nr_timestamps) {
-		printf("  INFO: %.3f%% unordered timestamps (%ld out of %ld)\n",
-			(double)nr_unordered_timestamps/(double)nr_timestamps*100.0,
-			nr_unordered_timestamps, nr_timestamps);
-	} else {
-	}
-	if (nr_lost_events && nr_events) {
-		printf("  INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
-			(double)nr_lost_events/(double)nr_events*100.0,
-			nr_lost_events, nr_events, nr_lost_chunks);
-	}
-	if (nr_state_machine_bugs && nr_timestamps) {
-		printf("  INFO: %.3f%% state machine bugs (%ld out of %ld)",
-			(double)nr_state_machine_bugs/(double)nr_timestamps*100.0,
-			nr_state_machine_bugs, nr_timestamps);
-		if (nr_lost_events)
-			printf(" (due to lost events?)");
-		printf("\n");
-	}
-	if (nr_context_switch_bugs && nr_timestamps) {
-		printf("  INFO: %.3f%% context switch bugs (%ld out of %ld)",
-			(double)nr_context_switch_bugs/(double)nr_timestamps*100.0,
-			nr_context_switch_bugs, nr_timestamps);
-		if (nr_lost_events)
-			printf(" (due to lost events?)");
-		printf("\n");
-	}
-	printf("\n");
-
-}
-
 static struct trace_sched_handler *trace_handler;
 
 static void
@@ -1431,19 +1339,106 @@ process_sched_wakeup_event(struct raw_event_sample *raw,
 	FILL_FIELD(wakeup_event, success, event, raw->data);
 	FILL_FIELD(wakeup_event, cpu, event, raw->data);
 
-	trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread);
+	if (trace_handler->wakeup_event)
+		trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread);
 }
 
 /*
  * Track the current task - that way we can know whether there's any
  * weird events, such as a task being switched away that is not current.
  */
+static int max_cpu = 15;
+
 static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 };
 
+static struct thread *curr_thread[MAX_CPUS];
+
+static char next_shortname1 = 'A';
+static char next_shortname2 = '0';
+
+static void
+map_switch_event(struct trace_switch_event *switch_event,
+		 struct event *event __used,
+		 int this_cpu,
+		 u64 timestamp,
+		 struct thread *thread __used)
+{
+	struct thread *sched_out, *sched_in;
+	int new_shortname;
+	u64 timestamp0;
+	s64 delta;
+	int cpu;
+
+	BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
+
+	if (this_cpu > max_cpu)
+		max_cpu = this_cpu;
+
+	timestamp0 = cpu_last_switched[this_cpu];
+	cpu_last_switched[this_cpu] = timestamp;
+	if (timestamp0)
+		delta = timestamp - timestamp0;
+	else
+		delta = 0;
+
+	if (delta < 0)
+		die("hm, delta: %Ld < 0 ?\n", delta);
+
+
+	sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
+	sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
+
+	curr_thread[this_cpu] = sched_in;
+
+	printf("  ");
+
+	new_shortname = 0;
+	if (!sched_in->shortname[0]) {
+		sched_in->shortname[0] = next_shortname1;
+		sched_in->shortname[1] = next_shortname2;
+
+		if (next_shortname1 < 'Z') {
+			next_shortname1++;
+		} else {
+			next_shortname1='A';
+			if (next_shortname2 < '9') {
+				next_shortname2++;
+			} else {
+				next_shortname2='0';
+			}
+		}
+		new_shortname = 1;
+	}
+
+	for (cpu = 0; cpu <= max_cpu; cpu++) {
+		if (cpu != this_cpu)
+			printf(" ");
+		else
+			printf("*");
+
+		if (curr_thread[cpu]) {
+			if (curr_thread[cpu]->pid)
+				printf("%2s ", curr_thread[cpu]->shortname);
+			else
+				printf(".  ");
+		} else
+			printf("   ");
+	}
+
+	printf("  %12.6f secs ", (double)timestamp/1e9);
+	if (new_shortname) {
+		printf("%s => %s:%d\n",
+			sched_in->shortname, sched_in->comm, sched_in->pid);
+	} else {
+		printf("\n");
+	}
+}
+
+
 static void
 process_sched_switch_event(struct raw_event_sample *raw,
 			   struct event *event,
-			   int cpu,
+			   int this_cpu,
 			   u64 timestamp __used,
 			   struct thread *thread __used)
 {
@@ -1459,17 +1454,18 @@ process_sched_switch_event(struct raw_event_sample *raw,
 	FILL_FIELD(switch_event, next_pid, event, raw->data);
 	FILL_FIELD(switch_event, next_prio, event, raw->data);
 
-	if (curr_pid[cpu] != (u32)-1) {
+	if (curr_pid[this_cpu] != (u32)-1) {
 		/*
 		 * Are we trying to switch away a PID that is
 		 * not current?
 		 */
-		if (curr_pid[cpu] != switch_event.prev_pid)
+		if (curr_pid[this_cpu] != switch_event.prev_pid)
 			nr_context_switch_bugs++;
 	}
-	curr_pid[cpu] = switch_event.next_pid;
+	if (trace_handler->switch_event)
+		trace_handler->switch_event(&switch_event, event, this_cpu, timestamp, thread);
 
-	trace_handler->switch_event(&switch_event, event, cpu, timestamp, thread);
+	curr_pid[this_cpu] = switch_event.next_pid;
 }
 
 static void
@@ -1486,7 +1482,8 @@ process_sched_runtime_event(struct raw_event_sample *raw,
 	FILL_FIELD(runtime_event, runtime, event, raw->data);
 	FILL_FIELD(runtime_event, vruntime, event, raw->data);
 
-	trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread);
+	if (trace_handler->runtime_event)
+		trace_handler->runtime_event(&runtime_event, event, cpu, timestamp, thread);
 }
 
 static void
@@ -1505,7 +1502,8 @@ process_sched_fork_event(struct raw_event_sample *raw,
 	FILL_ARRAY(fork_event, child_comm, event, raw->data);
 	FILL_FIELD(fork_event, child_pid, event, raw->data);
 
-	trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread);
+	if (trace_handler->fork_event)
+		trace_handler->fork_event(&fork_event, event, cpu, timestamp, thread);
 }
 
 static void
@@ -1748,6 +1746,116 @@ static int read_events(void)
 	return rc;
 }
 
+static void print_bad_events(void)
+{
+	if (nr_unordered_timestamps && nr_timestamps) {
+		printf("  INFO: %.3f%% unordered timestamps (%ld out of %ld)\n",
+			(double)nr_unordered_timestamps/(double)nr_timestamps*100.0,
+			nr_unordered_timestamps, nr_timestamps);
+	}
+	if (nr_lost_events && nr_events) {
+		printf("  INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
+			(double)nr_lost_events/(double)nr_events*100.0,
+			nr_lost_events, nr_events, nr_lost_chunks);
+	}
+	if (nr_state_machine_bugs && nr_timestamps) {
+		printf("  INFO: %.3f%% state machine bugs (%ld out of %ld)",
+			(double)nr_state_machine_bugs/(double)nr_timestamps*100.0,
+			nr_state_machine_bugs, nr_timestamps);
+		if (nr_lost_events)
+			printf(" (due to lost events?)");
+		printf("\n");
+	}
+	if (nr_context_switch_bugs && nr_timestamps) {
+		printf("  INFO: %.3f%% context switch bugs (%ld out of %ld)",
+			(double)nr_context_switch_bugs/(double)nr_timestamps*100.0,
+			nr_context_switch_bugs, nr_timestamps);
+		if (nr_lost_events)
+			printf(" (due to lost events?)");
+		printf("\n");
+	}
+}
+
+static void __cmd_lat(void)
+{
+	struct rb_node *next;
+
+	setup_pager();
+	read_events();
+	sort_lat();
+
+	printf("\n -----------------------------------------------------------------------------------------\n");
+	printf("  Task                  |   Runtime ms  | Switches | Average delay ms | Maximum delay ms |\n");
+	printf(" -----------------------------------------------------------------------------------------\n");
+
+	next = rb_first(&sorted_atom_root);
+
+	while (next) {
+		struct work_atoms *work_list;
+
+		work_list = rb_entry(next, struct work_atoms, node);
+		output_lat_thread(work_list);
+		next = rb_next(next);
+	}
+
+	printf(" -----------------------------------------------------------------------------------------\n");
+	printf("  TOTAL:                |%11.3f ms |%9Ld |\n",
+		(double)all_runtime/1e6, all_count);
+
+	printf(" ---------------------------------------------------\n");
+
+	print_bad_events();
+	printf("\n");
+
+}
+
+static struct trace_sched_handler map_ops  = {
+	.wakeup_event		= NULL,
+	.switch_event		= map_switch_event,
+	.runtime_event		= NULL,
+	.fork_event		= NULL,
+};
+
+static void __cmd_map(void)
+{
+	setup_pager();
+	read_events();
+	print_bad_events();
+}
+
+static void __cmd_replay(void)
+{
+	unsigned long i;
+
+	calibrate_run_measurement_overhead();
+	calibrate_sleep_measurement_overhead();
+
+	test_calibrations();
+
+	read_events();
+
+	printf("nr_run_events:        %ld\n", nr_run_events);
+	printf("nr_sleep_events:      %ld\n", nr_sleep_events);
+	printf("nr_wakeup_events:     %ld\n", nr_wakeup_events);
+
+	if (targetless_wakeups)
+		printf("target-less wakeups:  %ld\n", targetless_wakeups);
+	if (multitarget_wakeups)
+		printf("multi-target wakeups: %ld\n", multitarget_wakeups);
+	if (nr_run_events_optimized)
+		printf("run atoms optimized: %ld\n",
+			nr_run_events_optimized);
+
+	print_task_traces();
+	add_cross_task_wakeups();
+
+	create_tasks();
+	printf("------------------------------------------------------------\n");
+	for (i = 0; i < replay_repeat; i++)
+		run_one_test();
+}
+
+
 static const char * const sched_usage[] = {
 	"perf sched [<options>] {record|latency|replay|trace}",
 	NULL
@@ -1867,6 +1975,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
 		}
 		setup_sorting();
 		__cmd_lat();
+	} else if (!strcmp(argv[0], "map")) {
+		trace_handler = &map_ops;
+		setup_sorting();
+		__cmd_map();
 	} else if (!strncmp(argv[0], "rep", 3)) {
 		trace_handler = &replay_ops;
 		if (argc) {
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 12c4341078f9..45efb5db0d19 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -8,7 +8,7 @@
 
 static struct thread *thread__new(pid_t pid)
 {
-	struct thread *self = malloc(sizeof(*self));
+	struct thread *self = calloc(1, sizeof(*self));
 
 	if (self != NULL) {
 		self->pid = pid;
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 665d1f3dc977..32aea3c1c2ad 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -7,6 +7,7 @@ struct thread {
 	struct rb_node		rb_node;
 	struct list_head	maps;
 	pid_t			pid;
+	char			shortname[3];
 	char			*comm;
 };

commit 80ed0987f363d7eb50193df3e6f6d71451f74bc3
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Sep 16 14:12:36 2009 +0200

    perf sched: Make idle thread and comm/pid names more consistent
    
    Peter noticed that we have 3 ways of referring to the idle thread:
    
     [idle]:0
     swapper:0
     swapper-0
    
    Standardize on 'swapper:0'.
    
    Reported-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Paul Mackerras <paulus@samba.org>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 2d542368de3c..da8f67483ae7 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1204,13 +1204,13 @@ static void output_lat_thread(struct work_atoms *work_list)
 	/*
 	 * Ignore idle threads:
 	 */
-	if (!work_list->thread->pid)
+	if (!strcmp(work_list->thread->comm, "swapper"))
 		return;
 
 	all_runtime += work_list->total_runtime;
 	all_count += work_list->nb_atoms;
 
-	ret = printf("  %s-%d ", work_list->thread->comm, work_list->thread->pid);
+	ret = printf("  %s:%d ", work_list->thread->comm, work_list->thread->pid);
 
 	for (i = 0; i < 24 - ret; i++)
 		printf(" ");
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 7635928ca278..12c4341078f9 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -85,7 +85,7 @@ register_idle_thread(struct rb_root *threads, struct thread **last_match)
 {
 	struct thread *thread = threads__findnew(0, threads, last_match);
 
-	if (!thread || thread__set_comm(thread, "[init]")) {
+	if (!thread || thread__set_comm(thread, "swapper")) {
 		fprintf(stderr, "problem inserting idle task.\n");
 		exit(-1);
 	}

commit c8a37751043427c6e4397a2cbfd617cb5f215c72
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Sep 16 14:07:00 2009 +0200

    perf sched: Sanity check context switch events
    
    Use 'perf sched latency' to track the current task based on
    context-switch events, and flag the cases where there's some
    impossible transition: such as a PID being switched out that
    was not switched in.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Paul Mackerras <paulus@samba.org>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 1f0f9be34faa..2d542368de3c 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -119,6 +119,7 @@ static unsigned long		replay_repeat = 10;
 static unsigned long		nr_timestamps;
 static unsigned long		nr_unordered_timestamps;
 static unsigned long		nr_state_machine_bugs;
+static unsigned long		nr_context_switch_bugs;
 static unsigned long		nr_events;
 static unsigned long		nr_lost_chunks;
 static unsigned long		nr_lost_events;
@@ -1399,6 +1400,14 @@ static void __cmd_lat(void)
 			printf(" (due to lost events?)");
 		printf("\n");
 	}
+	if (nr_context_switch_bugs && nr_timestamps) {
+		printf("  INFO: %.3f%% context switch bugs (%ld out of %ld)",
+			(double)nr_context_switch_bugs/(double)nr_timestamps*100.0,
+			nr_context_switch_bugs, nr_timestamps);
+		if (nr_lost_events)
+			printf(" (due to lost events?)");
+		printf("\n");
+	}
 	printf("\n");
 
 }
@@ -1425,10 +1434,16 @@ process_sched_wakeup_event(struct raw_event_sample *raw,
 	trace_handler->wakeup_event(&wakeup_event, event, cpu, timestamp, thread);
 }
 
+/*
+ * Track the current task - that way we can know whether there's any
+ * weird events, such as a task being switched away that is not current.
+ */
+static u32 curr_pid[MAX_CPUS] = { [0 ... MAX_CPUS-1] = -1 };
+
 static void
 process_sched_switch_event(struct raw_event_sample *raw,
 			   struct event *event,
-			   int cpu __used,
+			   int cpu,
 			   u64 timestamp __used,
 			   struct thread *thread __used)
 {
@@ -1444,6 +1459,16 @@ process_sched_switch_event(struct raw_event_sample *raw,
 	FILL_FIELD(switch_event, next_pid, event, raw->data);
 	FILL_FIELD(switch_event, next_prio, event, raw->data);
 
+	if (curr_pid[cpu] != (u32)-1) {
+		/*
+		 * Are we trying to switch away a PID that is
+		 * not current?
+		 */
+		if (curr_pid[cpu] != switch_event.prev_pid)
+			nr_context_switch_bugs++;
+	}
+	curr_pid[cpu] = switch_event.next_pid;
+
 	trace_handler->switch_event(&switch_event, event, cpu, timestamp, thread);
 }

commit dc02bf7178c8e2cb3d442ae19027b736d51c7dd5
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Sep 16 13:45:00 2009 +0200

    perf sched: Account for lost events, increase default buffering
    
    Output such lost event and state machine weirdness stats:
    
       TOTAL:                |  14974.910 ms |    46384 |
      ---------------------------------------------------
       INFO: 8.865% lost events (19132 out of 215819, in 8 chunks)
       INFO: 0.198% state machine bugs (49 out of 24708) (due to lost events?)
    
    And increase buffering to -m 1024 (4 MB) by default. Since we
    use output multiplexing that kind of space is needed.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Mike Galbraith <efault@gmx.de>
    Cc: Paul Mackerras <paulus@samba.org>
    Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
    Cc: Frederic Weisbecker <fweisbec@gmail.com>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index adcb563ec4d2..1f0f9be34faa 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -117,7 +117,11 @@ static u64			run_avg;
 
 static unsigned long		replay_repeat = 10;
 static unsigned long		nr_timestamps;
-static unsigned long		unordered_timestamps;
+static unsigned long		nr_unordered_timestamps;
+static unsigned long		nr_state_machine_bugs;
+static unsigned long		nr_events;
+static unsigned long		nr_lost_chunks;
+static unsigned long		nr_lost_events;
 
 #define TASK_STATE_TO_CHAR_STR "RSDTtZX"
 
@@ -668,14 +672,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->comm.pid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+	dump_printf("%p [%p]: perf_event_comm: %s:%d\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->comm.comm, event->comm.pid);
 
 	if (thread == NULL ||
 	    thread__set_comm(thread, event->comm.comm)) {
-		dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		dump_printf("problem processing perf_event_comm, skipping event.\n");
 		return -1;
 	}
 	total_comm++;
@@ -1168,14 +1172,12 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
 
 	atom = list_entry(atoms->work_list.prev, struct work_atom, list);
 
-	if (atom->state != THREAD_SLEEPING) {
-		printf("boo2\n");
-		return;
-	}
+	if (atom->state != THREAD_SLEEPING)
+		nr_state_machine_bugs++;
 
 	nr_timestamps++;
 	if (atom->sched_out_time > timestamp) {
-		unordered_timestamps++;
+		nr_unordered_timestamps++;
 		return;
 	}
 
@@ -1214,7 +1216,7 @@ static void output_lat_thread(struct work_atoms *work_list)
 
 	avg = work_list->total_lat / work_list->nb_atoms;
 
-	printf("|%9.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n",
+	printf("|%11.3f ms |%9llu | avg:%9.3f ms | max:%9.3f ms |\n",
 	      (double)work_list->total_runtime / 1e6,
 		 work_list->nb_atoms, (double)avg / 1e6,
 		 (double)work_list->max_lat / 1e6);
@@ -1359,9 +1361,9 @@ static void __cmd_lat(void)
 	read_events();
 	sort_lat();
 
-	printf("\n ---------------------------------------------------------------------------------------\n");
-	printf("  Task                  |  Runtime ms | Switches | Average delay ms | Maximum delay ms |\n");
-	printf(" ---------------------------------------------------------------------------------------\n");
+	printf("\n -----------------------------------------------------------------------------------------\n");
+	printf("  Task                  |   Runtime ms  | Switches | Average delay ms | Maximum delay ms |\n");
+	printf(" -----------------------------------------------------------------------------------------\n");
 
 	next = rb_first(&sorted_atom_root);
 
@@ -1373,18 +1375,32 @@ static void __cmd_lat(void)
 		next = rb_next(next);
 	}
 
-	printf(" ---------------------------------------------------------------------------------------\n");
-	printf("  TOTAL:                |%9.3f ms |%9Ld |",
+	printf(" -----------------------------------------------------------------------------------------\n");
+	printf("  TOTAL:                |%11.3f ms |%9Ld |\n",
 		(double)all_runtime/1e6, all_count);
 
-	if (unordered_timestamps && nr_timestamps) {
-		printf(" INFO: %.2f%% unordered events.\n",
-			(double)unordered_timestamps/(double)nr_timestamps*100.0);
+	printf(" ---------------------------------------------------\n");
+	if (nr_unordered_timestamps && nr_timestamps) {
+		printf("  INFO: %.3f%% unordered timestamps (%ld out of %ld)\n",
+			(double)nr_unordered_timestamps/(double)nr_timestamps*100.0,
+			nr_unordered_timestamps, nr_timestamps);
 	} else {
+	}
+	if (nr_lost_events && nr_events) {
+		printf("  INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
+			(double)nr_lost_events/(double)nr_events*100.0,
+			nr_lost_events, nr_events, nr_lost_chunks);
+	}
+	if (nr_state_machine_bugs && nr_timestamps) {
+		printf("  INFO: %.3f%% state machine bugs (%ld out of %ld)",
+			(double)nr_state_machine_bugs/(double)nr_timestamps*100.0,
+			nr_state_machine_bugs, nr_timestamps);
+		if (nr_lost_events)
+			printf(" (due to lost events?)");
 		printf("\n");
 	}
+	printf("\n");
 
-	printf(" -------------------------------------------------\n\n");
 }
 
 static struct trace_sched_handler *trace_handler;
@@ -1585,8 +1601,13 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 {
 	trace_event(event);
 
+	nr_events++;
 	switch (event->header.type) {
-	case PERF_EVENT_MMAP ... PERF_EVENT_LOST:
+	case PERF_EVENT_MMAP:
+		return 0;
+	case PERF_EVENT_LOST:
+		nr_lost_chunks++;
+		nr_lost_events += event->lost.lost;
 		return 0;
 
 	case PERF_EVENT_COMM:
@@ -1768,6 +1789,7 @@ static const char *record_args[] = {
 	"-R",
 	"-M",
 	"-f",
+	"-m", "1024",
 	"-c", "1",
 	"-e", "sched:sched_switch:r",
 	"-e", "sched:sched_stat_wait:r",
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index fa2d4e91d329..2495529cae7d 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -52,7 +52,7 @@ struct lost_event {
  */
 struct read_event {
 	struct perf_event_header header;
-	u32 pid,tid;
+	u32 pid, tid;
 	u64 value;
 	u64 time_enabled;
 	u64 time_running;

commit b9183f9b99a9bd3349aefbd51d22f7e1bdc4a087
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Sep 15 15:56:32 2009 +0200

    amd64_edac: build driver only on AMD hardware
    
    -tip testing found the following build failure (config attached):
    
    drivers/built-in.o: In function `amd64_check':
    amd64_edac.c:(.text+0x3e9491): undefined reference to `amd_decode_nb_mce'
    drivers/built-in.o: In function `amd64_init_2nd_stage':
    amd64_edac.c:(.text+0x3e9b46): undefined reference to `amd_report_gart_errors'
    amd64_edac.c:(.text+0x3e9b55): undefined reference to `amd_register_ecc_decoder'
    drivers/built-in.o: In function `amd64_nbea_store':
    amd64_edac_dbg.c:(.text+0x3ea22e): undefined reference to `amd_decode_nb_mce'
    drivers/built-in.o: In function `amd64_remove_one_instance':
    amd64_edac.c:(.devexit.text+0x3eea): undefined reference to `amd_report_gart_errors'
    amd64_edac.c:(.devexit.text+0x3ef6): undefined reference to `amd_unregister_ecc_decoder'
    
    the AMD EDAC code has a dependency on CONFIG_CPU_SUP_AMD facilities. The
    patch below solves the problem here.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 4339b1a879cd..a3ca18e2d7cf 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -59,7 +59,7 @@ config EDAC_MM_EDAC
 
 config EDAC_AMD64
 	tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
-	depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI
+	depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && CPU_SUP_AMD
 	help
 	  Support for error detection and correction on the AMD 64
 	  Families of Memory Controllers (K8, F10h and F11h)

commit 51e0304ce6e55a6e59658558916b4f74da085ff0
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Sep 16 08:54:45 2009 +0200

    sched: Implement a gentler fair-sleepers feature
    
    Add back FAIR_SLEEPERS and GENTLE_FAIR_SLEEPERS.
    
    FAIR_SLEEPERS is the old logic: credit sleepers with their sleep time.
    
    GENTLE_FAIR_SLEEPERS dampens this a bit: 50% of their sleep time gets
    credited.
    
    The hope here is to still give the benefits of fair-sleepers logic
    (quick wakeups, etc.) while not allow them to have 100% of their
    sleep time as if they were running.
    
    Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Cc: Mike Galbraith <efault@gmx.de>
    LKML-Reference: <new-submission>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a37f311f436e..acf16a8d934b 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -711,7 +711,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 
 	if (!initial) {
 		/* sleeps upto a single latency don't count. */
-		if (sched_feat(NEW_FAIR_SLEEPERS)) {
+		if (sched_feat(FAIR_SLEEPERS)) {
 			unsigned long thresh = sysctl_sched_latency;
 
 			/*
@@ -725,6 +725,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 					 task_of(se)->policy != SCHED_IDLE))
 				thresh = calc_delta_fair(thresh, se);
 
+			/*
+			 * Halve their sleep time's effect, to allow
+			 * for a gentler effect of sleepers:
+			 */
+			if (sched_feat(GENTLE_FAIR_SLEEPERS))
+				thresh >>= 1;
+
 			vruntime -= thresh;
 		}
 	}
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 70115c69c7a9..fd375675f834 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -3,7 +3,14 @@
  * considers the task to be running during that period. This gives it
  * a service deficit on wakeup, allowing it to run sooner.
  */
-SCHED_FEAT(NEW_FAIR_SLEEPERS, 0)
+SCHED_FEAT(FAIR_SLEEPERS, 1)
+
+/*
+ * Only give sleepers 50% of their service deficit. This allows
+ * them to run sooner, but does not allow tons of sleepers to
+ * rip the spread apart.
+ */
+SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1)
 
 /*
  * By not normalizing the sleep time, heavy tasks get an effective

commit fdaa45e95d2ef59a140d2fb2e487141f83f5a07c
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Sep 15 11:00:26 2009 +0200

    slub: Fix build error in kmem_cache_open() with !CONFIG_SLUB_DEBUG
    
    This build bug:
    
     mm/slub.c: In function 'kmem_cache_open':
     mm/slub.c:2476: error: 'disable_higher_order_debug' undeclared (first use in this function)
     mm/slub.c:2476: error: (Each undeclared identifier is reported only once
     mm/slub.c:2476: error: for each function it appears in.)
    
    Triggers because there's no !CONFIG_SLUB_DEBUG definition for
    disable_higher_order_debug.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>

diff --git a/mm/slub.c b/mm/slub.c
index a5789b91d179..0a216aae227e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1071,6 +1071,8 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize,
 }
 #define slub_debug 0
 
+#define disable_higher_order_debug 0
+
 static inline unsigned long slabs_node(struct kmem_cache *s, int node)
 							{ return 0; }
 static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)