Patches contributed by Eötvös Lorand University


commit ef83a5714d9a817b2e9b97f04a6d070fbd6ecf80
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:08 2007 +0200

    sched: enhance debug output
    
    enhance debug output by changing 12345678 nsecs to 12.345678 output,
    this is more human-readable.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 22cf74c1dc03..e2c1e0dfdf50 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -28,6 +28,31 @@
 		printk(x);			\
  } while (0)
 
+/*
+ * Ease the printing of nsec fields:
+ */
+static long long nsec_high(long long nsec)
+{
+	if (nsec < 0) {
+		nsec = -nsec;
+		do_div(nsec, 1000000);
+		return -nsec;
+	}
+	do_div(nsec, 1000000);
+
+	return nsec;
+}
+
+static unsigned long nsec_low(long long nsec)
+{
+	if (nsec < 0)
+		nsec = -nsec;
+
+	return do_div(nsec, 1000000);
+}
+
+#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
+
 static void
 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 {
@@ -36,19 +61,19 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 	else
 		SEQ_printf(m, " ");
 
-	SEQ_printf(m, "%15s %5d %15Ld %13Ld %5d ",
+	SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ",
 		p->comm, p->pid,
-		(long long)p->se.vruntime,
+		SPLIT_NS(p->se.vruntime),
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 #ifdef CONFIG_SCHEDSTATS
-	SEQ_printf(m, "%15Ld %15Ld %15Ld\n",
-		(long long)p->se.vruntime,
-		(long long)p->se.sum_exec_runtime,
-		(long long)p->se.sum_sleep_runtime);
+	SEQ_printf(m, "%15Ld.%06ld %15Ld.%06ld %15Ld.%06ld\n",
+		SPLIT_NS(p->se.vruntime),
+		SPLIT_NS(p->se.sum_exec_runtime),
+		SPLIT_NS(p->se.sum_sleep_runtime));
 #else
-	SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n",
-		0LL, 0LL, 0LL, 0LL, 0LL);
+	SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld\n",
+		0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
 #endif
 }
 
@@ -85,10 +110,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
 	SEQ_printf(m, "\ncfs_rq\n");
 
-#define P(x) \
-	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(cfs_rq->x))
-
-	P(exec_clock);
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
+			SPLIT_NS(cfs_rq->exec_clock));
 
 	spin_lock_irqsave(&rq->lock, flags);
 	if (cfs_rq->rb_leftmost)
@@ -99,19 +122,18 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	min_vruntime = rq->cfs.min_vruntime;
 	rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime;
 	spin_unlock_irqrestore(&rq->lock, flags);
-	SEQ_printf(m, "  .%-30s: %Ld\n", "MIN_vruntime",
-			(long long)MIN_vruntime);
-	SEQ_printf(m, "  .%-30s: %Ld\n", "min_vruntime",
-			(long long)min_vruntime);
-	SEQ_printf(m, "  .%-30s: %Ld\n", "max_vruntime",
-			(long long)max_vruntime);
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
+			SPLIT_NS(MIN_vruntime));
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
+			SPLIT_NS(min_vruntime));
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
+			SPLIT_NS(max_vruntime));
 	spread = max_vruntime - MIN_vruntime;
-	SEQ_printf(m, "  .%-30s: %Ld\n", "spread",
-			(long long)spread);
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
+			SPLIT_NS(spread));
 	spread0 = min_vruntime - rq0_min_vruntime;
-	SEQ_printf(m, "  .%-30s: %Ld\n", "spread0",
-			(long long)spread0);
-#undef P
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
+			SPLIT_NS(spread0));
 }
 
 static void print_cpu(struct seq_file *m, int cpu)
@@ -131,6 +153,8 @@ static void print_cpu(struct seq_file *m, int cpu)
 
 #define P(x) \
 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rq->x))
+#define PN(x) \
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
 
 	P(nr_running);
 	SEQ_printf(m, "  .%-30s: %lu\n", "load",
@@ -139,21 +163,22 @@ static void print_cpu(struct seq_file *m, int cpu)
 	P(nr_load_updates);
 	P(nr_uninterruptible);
 	SEQ_printf(m, "  .%-30s: %lu\n", "jiffies", jiffies);
-	P(next_balance);
+	PN(next_balance);
 	P(curr->pid);
-	P(clock);
-	P(idle_clock);
-	P(prev_clock_raw);
+	PN(clock);
+	PN(idle_clock);
+	PN(prev_clock_raw);
 	P(clock_warps);
 	P(clock_overflows);
 	P(clock_deep_idle_events);
-	P(clock_max_delta);
+	PN(clock_max_delta);
 	P(cpu_load[0]);
 	P(cpu_load[1]);
 	P(cpu_load[2]);
 	P(cpu_load[3]);
 	P(cpu_load[4]);
 #undef P
+#undef PN
 
 	print_cfs_stats(m, cpu);
 
@@ -170,7 +195,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
 
-	SEQ_printf(m, "now at %Lu nsecs\n", (unsigned long long)now);
+	SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now));
 
 	for_each_online_cpu(cpu)
 		print_cpu(m, cpu);
@@ -228,20 +253,22 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	SEQ_printf(m, "----------------------------------------------\n");
 #define P(F) \
 	SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F)
+#define PN(F) \
+	SEQ_printf(m, "%-25s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
 
-	P(se.exec_start);
-	P(se.vruntime);
-	P(se.sum_exec_runtime);
+	PN(se.exec_start);
+	PN(se.vruntime);
+	PN(se.sum_exec_runtime);
 
 #ifdef CONFIG_SCHEDSTATS
-	P(se.wait_start);
-	P(se.sleep_start);
-	P(se.block_start);
-	P(se.sleep_max);
-	P(se.block_max);
-	P(se.exec_max);
-	P(se.slice_max);
-	P(se.wait_max);
+	PN(se.wait_start);
+	PN(se.sleep_start);
+	PN(se.block_start);
+	PN(se.sleep_max);
+	PN(se.block_max);
+	PN(se.exec_max);
+	PN(se.slice_max);
+	PN(se.wait_max);
 #endif
 	SEQ_printf(m, "%-25s:%20Ld\n",
 		   "nr_switches", (long long)(p->nvcsw + p->nivcsw));
@@ -249,6 +276,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(policy);
 	P(prio);
 #undef P
+#undef PN
 
 	{
 		u64 t0, t1;

commit 1a75b94f7bda591f4c53af86baa50e1eaee35927
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:08 2007 +0200

    sched: prettify /proc/sched_debug output
    
    print the correct amount of dashes in /proc/sched_debug.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index bb34b8188f61..22cf74c1dc03 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -60,10 +60,8 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 	"\nrunnable tasks:\n"
 	"            task   PID        tree-key  switches  prio"
 	"    exec-runtime        sum-exec       sum-sleep\n"
-	"------------------------------------------------------------------"
-	"--------------------------------"
-	"------------------------------------------------"
-	"--------------------------------\n");
+	"------------------------------------------------------"
+	"------------------------------------------------");
 
 	read_lock_irq(&tasklist_lock);
 

commit 75d4ef16a6aa84f708188bada182315f80aab6fa
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:08 2007 +0200

    sched: fix delay accounting performance regression
    
    fix delay accounting performance regression - those sched_clock()
    calls are not needed.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index c20a94dda61e..1d9ec98c38de 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -129,7 +129,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 # define schedstat_set(var, val)	do { } while (0)
 #endif
 
-#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+#ifdef CONFIG_SCHEDSTATS
 /*
  * Called when a process is dequeued from the active array and given
  * the cpu.  We should note that with the exception of interactive
@@ -233,5 +233,5 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
 #else
 #define sched_info_queued(t)		do { } while (0)
 #define sched_info_switch(t, next)	do { } while (0)
-#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
+#endif /* CONFIG_SCHEDSTATS */
 

commit 35a6ff5417bf94c9e19b6b55a9eb6eea14cc7be7
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:07 2007 +0200

    sched: x86: allow single-depth wchan output
    
    sched.o gets smaller and faster if we compile it with -fomit-frame-pointers,
    so make this a config option. The cost is the loss of multi-depth wchan
    lookups - but SysRq-T is a sufficient replacement for them anyway, so their
    utility is much lower these days.
    
    the size difference is significant:
    
       text    data     bss     dec     hex filename
      34005    3462      24   37491    9273 sched.o.before
      33470    3462      24   36956    905c sched.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index f1486f8a3e6d..bf9aafad4978 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -214,6 +214,17 @@ config X86_ES7000
 
 endchoice
 
+config SCHED_NO_NO_OMIT_FRAME_POINTER
+	bool "Single-depth WCHAN output"
+	default y
+	help
+	  Calculate simpler /proc/<PID>/wchan values. If this option
+	  is disabled then wchan values will recurse back to the
+	  caller function. This provides more accurate wchan values,
+	  at the expense of slightly more scheduling overhead.
+
+	  If in doubt, say "Y".
+
 config PARAVIRT
 	bool "Paravirtualization support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL

commit bbdba7c0e1161934ae881ad00e4db49830f5ef59
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:06 2007 +0200

    sched: remove wait_runtime fields and features
    
    remove wait_runtime based fields and features, now that the CFS
    math has been changed over to the vruntime metric.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 353630d6ae4b..572df1bbaeec 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -888,13 +888,9 @@ struct load_weight {
  *     4 se->block_start
  *     4 se->run_node
  *     4 se->sleep_start
- *     4 se->sleep_start_fair
  *     6 se->load.weight
- *     7 se->delta_fair
- *    15 se->wait_runtime
  */
 struct sched_entity {
-	long			wait_runtime;
 	s64			fair_key;
 	struct load_weight	load;		/* for load-balancing */
 	struct rb_node		run_node;
@@ -904,12 +900,10 @@ struct sched_entity {
 	u64			sum_exec_runtime;
 	u64			vruntime;
 	u64			prev_sum_exec_runtime;
-	u64			wait_start_fair;
 
 #ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
 	u64			wait_max;
-	s64			sum_wait_runtime;
 
 	u64			sleep_start;
 	u64			sleep_max;
@@ -919,9 +913,6 @@ struct sched_entity {
 	u64			block_max;
 	u64			exec_max;
 	u64			slice_max;
-
-	unsigned long		wait_runtime_overruns;
-	unsigned long		wait_runtime_underruns;
 #endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched.c b/kernel/sched.c
index 21cc3b2be023..0f0cf374c775 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -176,11 +176,8 @@ struct cfs_rq {
 	struct load_weight load;
 	unsigned long nr_running;
 
-	s64 fair_clock;
 	u64 exec_clock;
 	u64 min_vruntime;
-	s64 wait_runtime;
-	unsigned long wait_runtime_overruns, wait_runtime_underruns;
 
 	struct rb_root tasks_timeline;
 	struct rb_node *rb_leftmost;
@@ -389,20 +386,14 @@ static void update_rq_clock(struct rq *rq)
  * Debugging: various feature bits
  */
 enum {
-	SCHED_FEAT_FAIR_SLEEPERS	= 1,
-	SCHED_FEAT_NEW_FAIR_SLEEPERS	= 2,
-	SCHED_FEAT_SLEEPER_AVG		= 4,
-	SCHED_FEAT_SLEEPER_LOAD_AVG	= 8,
-	SCHED_FEAT_START_DEBIT		= 16,
-	SCHED_FEAT_USE_TREE_AVG         = 32,
-	SCHED_FEAT_APPROX_AVG           = 64,
+	SCHED_FEAT_NEW_FAIR_SLEEPERS	= 1,
+	SCHED_FEAT_START_DEBIT		= 2,
+	SCHED_FEAT_USE_TREE_AVG         = 4,
+	SCHED_FEAT_APPROX_AVG           = 8,
 };
 
 const_debug unsigned int sysctl_sched_features =
-		SCHED_FEAT_FAIR_SLEEPERS	*0 |
 		SCHED_FEAT_NEW_FAIR_SLEEPERS	*1 |
-		SCHED_FEAT_SLEEPER_AVG		*0 |
-		SCHED_FEAT_SLEEPER_LOAD_AVG	*1 |
 		SCHED_FEAT_START_DEBIT		*1 |
 		SCHED_FEAT_USE_TREE_AVG		*0 |
 		SCHED_FEAT_APPROX_AVG		*0;
@@ -716,15 +707,11 @@ calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
 static inline void update_load_add(struct load_weight *lw, unsigned long inc)
 {
 	lw->weight += inc;
-	if (sched_feat(FAIR_SLEEPERS))
-		lw->inv_weight = WMULT_CONST / lw->weight;
 }
 
 static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
 {
 	lw->weight -= dec;
-	if (sched_feat(FAIR_SLEEPERS) && likely(lw->weight))
-		lw->inv_weight = WMULT_CONST / lw->weight;
 }
 
 /*
@@ -848,8 +835,6 @@ static void dec_nr_running(struct task_struct *p, struct rq *rq)
 
 static void set_load_weight(struct task_struct *p)
 {
-	p->se.wait_runtime = 0;
-
 	if (task_has_rt_policy(p)) {
 		p->se.load.weight = prio_to_weight[0] * 2;
 		p->se.load.inv_weight = prio_to_wmult[0] >> 1;
@@ -995,13 +980,9 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
 	int old_cpu = task_cpu(p);
 	struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
-	u64 clock_offset, fair_clock_offset;
+	u64 clock_offset;
 
 	clock_offset = old_rq->clock - new_rq->clock;
-	fair_clock_offset = old_rq->cfs.fair_clock - new_rq->cfs.fair_clock;
-
-	if (p->se.wait_start_fair)
-		p->se.wait_start_fair -= fair_clock_offset;
 
 #ifdef CONFIG_SCHEDSTATS
 	if (p->se.wait_start)
@@ -1571,15 +1552,12 @@ int fastcall wake_up_state(struct task_struct *p, unsigned int state)
  */
 static void __sched_fork(struct task_struct *p)
 {
-	p->se.wait_start_fair		= 0;
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
-	p->se.wait_runtime		= 0;
 
 #ifdef CONFIG_SCHEDSTATS
 	p->se.wait_start		= 0;
-	p->se.sum_wait_runtime		= 0;
 	p->se.sum_sleep_runtime		= 0;
 	p->se.sleep_start		= 0;
 	p->se.block_start		= 0;
@@ -1588,8 +1566,6 @@ static void __sched_fork(struct task_struct *p)
 	p->se.exec_max			= 0;
 	p->se.slice_max			= 0;
 	p->se.wait_max			= 0;
-	p->se.wait_runtime_overruns	= 0;
-	p->se.wait_runtime_underruns	= 0;
 #endif
 
 	INIT_LIST_HEAD(&p->run_list);
@@ -6436,7 +6412,6 @@ int in_sched_functions(unsigned long addr)
 static inline void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
 {
 	cfs_rq->tasks_timeline = RB_ROOT;
-	cfs_rq->fair_clock = 1;
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	cfs_rq->rq = rq;
 #endif
@@ -6562,15 +6537,12 @@ void normalize_rt_tasks(void)
 	read_lock_irq(&tasklist_lock);
 	do_each_thread(g, p) {
 		p->se.fair_key			= 0;
-		p->se.wait_runtime		= 0;
 		p->se.exec_start		= 0;
-		p->se.wait_start_fair		= 0;
 #ifdef CONFIG_SCHEDSTATS
 		p->se.wait_start		= 0;
 		p->se.sleep_start		= 0;
 		p->se.block_start		= 0;
 #endif
-		task_rq(p)->cfs.fair_clock	= 0;
 		task_rq(p)->clock		= 0;
 
 		if (!rt_task(p)) {
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 3350169a7d2a..e3b62324ac31 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -36,21 +36,16 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 	else
 		SEQ_printf(m, " ");
 
-	SEQ_printf(m, "%15s %5d %15Ld %13Ld %13Ld %9Ld %5d ",
+	SEQ_printf(m, "%15s %5d %15Ld %13Ld %5d ",
 		p->comm, p->pid,
 		(long long)p->se.fair_key,
-		(long long)(p->se.fair_key - rq->cfs.fair_clock),
-		(long long)p->se.wait_runtime,
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 #ifdef CONFIG_SCHEDSTATS
-	SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld %15Ld\n",
+	SEQ_printf(m, "%15Ld %15Ld %15Ld\n",
 		(long long)p->se.vruntime,
 		(long long)p->se.sum_exec_runtime,
-		(long long)p->se.sum_wait_runtime,
-		(long long)p->se.sum_sleep_runtime,
-		(long long)p->se.wait_runtime_overruns,
-		(long long)p->se.wait_runtime_underruns);
+		(long long)p->se.sum_sleep_runtime);
 #else
 	SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n",
 		0LL, 0LL, 0LL, 0LL, 0LL);
@@ -63,10 +58,8 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 
 	SEQ_printf(m,
 	"\nrunnable tasks:\n"
-	"            task   PID        tree-key         delta       waiting"
-	"  switches  prio"
-	"    exec-runtime        sum-exec        sum-wait       sum-sleep"
-	"    wait-overrun   wait-underrun\n"
+	"            task   PID        tree-key  switches  prio"
+	"    exec-runtime        sum-exec       sum-sleep\n"
 	"------------------------------------------------------------------"
 	"--------------------------------"
 	"------------------------------------------------"
@@ -84,29 +77,6 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 	read_unlock_irq(&tasklist_lock);
 }
 
-static void
-print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
-{
-	s64 wait_runtime_rq_sum = 0;
-	struct task_struct *p;
-	struct rb_node *curr;
-	unsigned long flags;
-	struct rq *rq = &per_cpu(runqueues, cpu);
-
-	spin_lock_irqsave(&rq->lock, flags);
-	curr = first_fair(cfs_rq);
-	while (curr) {
-		p = rb_entry(curr, struct task_struct, se.run_node);
-		wait_runtime_rq_sum += p->se.wait_runtime;
-
-		curr = rb_next(curr);
-	}
-	spin_unlock_irqrestore(&rq->lock, flags);
-
-	SEQ_printf(m, "  .%-30s: %Ld\n", "wait_runtime_rq_sum",
-		(long long)wait_runtime_rq_sum);
-}
-
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
 	s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
@@ -120,7 +90,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define P(x) \
 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(cfs_rq->x))
 
-	P(fair_clock);
 	P(exec_clock);
 
 	spin_lock_irqsave(&rq->lock, flags);
@@ -144,13 +113,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	spread0 = min_vruntime - rq0_min_vruntime;
 	SEQ_printf(m, "  .%-30s: %Ld\n", "spread0",
 			(long long)spread0);
-
-	P(wait_runtime);
-	P(wait_runtime_overruns);
-	P(wait_runtime_underruns);
 #undef P
-
-	print_cfs_rq_runtime_sum(m, cpu, cfs_rq);
 }
 
 static void print_cpu(struct seq_file *m, int cpu)
@@ -268,8 +231,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 #define P(F) \
 	SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F)
 
-	P(se.wait_runtime);
-	P(se.wait_start_fair);
 	P(se.exec_start);
 	P(se.vruntime);
 	P(se.sum_exec_runtime);
@@ -283,9 +244,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.exec_max);
 	P(se.slice_max);
 	P(se.wait_max);
-	P(se.wait_runtime_overruns);
-	P(se.wait_runtime_underruns);
-	P(se.sum_wait_runtime);
 #endif
 	SEQ_printf(m, "%-25s:%20Ld\n",
 		   "nr_switches", (long long)(p->nvcsw + p->nivcsw));
@@ -312,8 +270,6 @@ void proc_sched_set_task(struct task_struct *p)
 	p->se.exec_max			= 0;
 	p->se.slice_max			= 0;
 	p->se.wait_max			= 0;
-	p->se.wait_runtime_overruns	= 0;
-	p->se.wait_runtime_underruns	= 0;
 #endif
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a94189c42d1a..2df5a6467812 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -178,8 +178,6 @@ __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	update_load_add(&cfs_rq->load, se->load.weight);
 	cfs_rq->nr_running++;
 	se->on_rq = 1;
-
-	schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
 }
 
 static void
@@ -192,8 +190,6 @@ __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	update_load_sub(&cfs_rq->load, se->load.weight);
 	cfs_rq->nr_running--;
 	se->on_rq = 0;
-
-	schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime);
 }
 
 static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
@@ -249,13 +245,6 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	return period;
 }
 
-static void
-add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
-{
-	se->wait_runtime += delta;
-	schedstat_add(cfs_rq, wait_runtime, delta);
-}
-
 /*
  * Update the current task's runtime statistics. Skip current tasks that
  * are not in our scheduling class.
@@ -264,9 +253,7 @@ static inline void
 __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 	      unsigned long delta_exec)
 {
-	unsigned long delta_fair, delta_mine, delta_exec_weighted;
-	struct load_weight *lw = &cfs_rq->load;
-	unsigned long load = lw->weight;
+	unsigned long delta_exec_weighted;
 
 	schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
 
@@ -278,25 +265,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 							&curr->load);
 	}
 	curr->vruntime += delta_exec_weighted;
-
-	if (!sched_feat(FAIR_SLEEPERS))
-		return;
-
-	if (unlikely(!load))
-		return;
-
-	delta_fair = calc_delta_fair(delta_exec, lw);
-	delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw);
-
-	cfs_rq->fair_clock += delta_fair;
-	/*
-	 * We executed delta_exec amount of time on the CPU,
-	 * but we were only entitled to delta_mine amount of
-	 * time during that period (if nr_running == 1 then
-	 * the two values are equal)
-	 * [Note: delta_mine - delta_exec is negative]:
-	 */
-	add_wait_runtime(cfs_rq, curr, delta_mine - delta_exec);
 }
 
 static void update_curr(struct cfs_rq *cfs_rq)
@@ -322,7 +290,6 @@ static void update_curr(struct cfs_rq *cfs_rq)
 static inline void
 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	se->wait_start_fair = cfs_rq->fair_clock;
 	schedstat_set(se->wait_start, rq_of(cfs_rq)->clock);
 }
 
@@ -354,35 +321,11 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	se->fair_key = se->vruntime;
 }
 
-/*
- * Note: must be called with a freshly updated rq->fair_clock.
- */
-static inline void
-__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se,
-			unsigned long delta_fair)
-{
-	schedstat_set(se->wait_max, max(se->wait_max,
-			rq_of(cfs_rq)->clock - se->wait_start));
-
-	delta_fair = calc_weighted(delta_fair, se);
-
-	add_wait_runtime(cfs_rq, se, delta_fair);
-}
-
 static void
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	unsigned long delta_fair;
-
-	if (unlikely(!se->wait_start_fair))
-		return;
-
-	delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit),
-			(u64)(cfs_rq->fair_clock - se->wait_start_fair));
-
-	__update_stats_wait_end(cfs_rq, se, delta_fair);
-
-	se->wait_start_fair = 0;
+	schedstat_set(se->wait_max, max(se->wait_max,
+			rq_of(cfs_rq)->clock - se->wait_start));
 	schedstat_set(se->wait_start, 0);
 }
 
@@ -552,9 +495,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	/*
 	 * Any task has to be enqueued before it get to execute on
 	 * a CPU. So account for the time it spent waiting on the
-	 * runqueue. (note, here we rely on pick_next_task() having
-	 * done a put_prev_task_fair() shortly before this, which
-	 * updated rq->fair_clock - used by update_stats_wait_end())
+	 * runqueue.
 	 */
 	update_stats_wait_end(cfs_rq, se);
 	update_stats_curr_start(cfs_rq, se);
@@ -989,13 +930,6 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 	update_curr(cfs_rq);
 	place_entity(cfs_rq, se, 1);
 
-	/*
-	 * The statistical average of wait_runtime is about
-	 * -granularity/2, so initialize the task with that:
-	 */
-	if (sched_feat(START_DEBIT))
-		se->wait_runtime = -(__sched_period(cfs_rq->nr_running+1) / 2);
-
 	if (sysctl_sched_child_runs_first &&
 			curr->vruntime < se->vruntime) {
 

commit e22f5bbf86d8cce710d5c8ba5bf57832e73aab8c
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:06 2007 +0200

    sched: remove wait_runtime limit
    
    remove the wait_runtime-limit fields and the code depending on it, now
    that the math has been changed over to rely on the vruntime metric.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5e5c457fba86..353630d6ae4b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -905,7 +905,6 @@ struct sched_entity {
 	u64			vruntime;
 	u64			prev_sum_exec_runtime;
 	u64			wait_start_fair;
-	u64			sleep_start_fair;
 
 #ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
diff --git a/kernel/sched.c b/kernel/sched.c
index 3a4ac0b75f2d..21cc3b2be023 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -180,7 +180,6 @@ struct cfs_rq {
 	u64 exec_clock;
 	u64 min_vruntime;
 	s64 wait_runtime;
-	u64 sleeper_bonus;
 	unsigned long wait_runtime_overruns, wait_runtime_underruns;
 
 	struct rb_root tasks_timeline;
@@ -673,19 +672,6 @@ static inline void resched_task(struct task_struct *p)
 }
 #endif
 
-static u64 div64_likely32(u64 divident, unsigned long divisor)
-{
-#if BITS_PER_LONG == 32
-	if (likely(divident <= 0xffffffffULL))
-		return (u32)divident / divisor;
-	do_div(divident, divisor);
-
-	return divident;
-#else
-	return divident / divisor;
-#endif
-}
-
 #if BITS_PER_LONG == 32
 # define WMULT_CONST	(~0UL)
 #else
@@ -1016,8 +1002,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
 	if (p->se.wait_start_fair)
 		p->se.wait_start_fair -= fair_clock_offset;
-	if (p->se.sleep_start_fair)
-		p->se.sleep_start_fair -= fair_clock_offset;
 
 #ifdef CONFIG_SCHEDSTATS
 	if (p->se.wait_start)
@@ -1592,7 +1576,6 @@ static void __sched_fork(struct task_struct *p)
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.wait_runtime		= 0;
-	p->se.sleep_start_fair		= 0;
 
 #ifdef CONFIG_SCHEDSTATS
 	p->se.wait_start		= 0;
@@ -6582,7 +6565,6 @@ void normalize_rt_tasks(void)
 		p->se.wait_runtime		= 0;
 		p->se.exec_start		= 0;
 		p->se.wait_start_fair		= 0;
-		p->se.sleep_start_fair		= 0;
 #ifdef CONFIG_SCHEDSTATS
 		p->se.wait_start		= 0;
 		p->se.sleep_start		= 0;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 62965f0ae37c..3350169a7d2a 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -148,7 +148,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	P(wait_runtime);
 	P(wait_runtime_overruns);
 	P(wait_runtime_underruns);
-	P(sleeper_bonus);
 #undef P
 
 	print_cfs_rq_runtime_sum(m, cpu, cfs_rq);
@@ -272,7 +271,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.wait_runtime);
 	P(se.wait_start_fair);
 	P(se.exec_start);
-	P(se.sleep_start_fair);
 	P(se.vruntime);
 	P(se.sum_exec_runtime);
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 72f202a8be96..a94189c42d1a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -249,41 +249,11 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	return period;
 }
 
-static inline void
-limit_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-	long limit = sysctl_sched_runtime_limit;
-
-	/*
-	 * Niced tasks have the same history dynamic range as
-	 * non-niced tasks:
-	 */
-	if (unlikely(se->wait_runtime > limit)) {
-		se->wait_runtime = limit;
-		schedstat_inc(se, wait_runtime_overruns);
-		schedstat_inc(cfs_rq, wait_runtime_overruns);
-	}
-	if (unlikely(se->wait_runtime < -limit)) {
-		se->wait_runtime = -limit;
-		schedstat_inc(se, wait_runtime_underruns);
-		schedstat_inc(cfs_rq, wait_runtime_underruns);
-	}
-}
-
-static inline void
-__add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
-{
-	se->wait_runtime += delta;
-	schedstat_add(se, sum_wait_runtime, delta);
-	limit_wait_runtime(cfs_rq, se);
-}
-
 static void
 add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
 {
-	schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime);
-	__add_wait_runtime(cfs_rq, se, delta);
-	schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
+	se->wait_runtime += delta;
+	schedstat_add(cfs_rq, wait_runtime, delta);
 }
 
 /*
@@ -294,7 +264,7 @@ static inline void
 __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 	      unsigned long delta_exec)
 {
-	unsigned long delta, delta_fair, delta_mine, delta_exec_weighted;
+	unsigned long delta_fair, delta_mine, delta_exec_weighted;
 	struct load_weight *lw = &cfs_rq->load;
 	unsigned long load = lw->weight;
 
@@ -318,14 +288,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 	delta_fair = calc_delta_fair(delta_exec, lw);
 	delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw);
 
-	if (cfs_rq->sleeper_bonus > sysctl_sched_min_granularity) {
-		delta = min((u64)delta_mine, cfs_rq->sleeper_bonus);
-		delta = min(delta, (unsigned long)(
-			(long)sysctl_sched_runtime_limit - curr->wait_runtime));
-		cfs_rq->sleeper_bonus -= delta;
-		delta_mine -= delta;
-	}
-
 	cfs_rq->fair_clock += delta_fair;
 	/*
 	 * We executed delta_exec amount of time on the CPU,
@@ -461,58 +423,8 @@ update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Scheduling class queueing methods:
  */
 
-static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se,
-			      unsigned long delta_fair)
-{
-	unsigned long load = cfs_rq->load.weight;
-	long prev_runtime;
-
-	/*
-	 * Do not boost sleepers if there's too much bonus 'in flight'
-	 * already:
-	 */
-	if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit))
-		return;
-
-	if (sched_feat(SLEEPER_LOAD_AVG))
-		load = rq_of(cfs_rq)->cpu_load[2];
-
-	/*
-	 * Fix up delta_fair with the effect of us running
-	 * during the whole sleep period:
-	 */
-	if (sched_feat(SLEEPER_AVG))
-		delta_fair = div64_likely32((u64)delta_fair * load,
-						load + se->load.weight);
-
-	delta_fair = calc_weighted(delta_fair, se);
-
-	prev_runtime = se->wait_runtime;
-	__add_wait_runtime(cfs_rq, se, delta_fair);
-	delta_fair = se->wait_runtime - prev_runtime;
-
-	/*
-	 * Track the amount of bonus we've given to sleepers:
-	 */
-	cfs_rq->sleeper_bonus += delta_fair;
-}
-
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	struct task_struct *tsk = task_of(se);
-	unsigned long delta_fair;
-
-	if ((entity_is_task(se) && tsk->policy == SCHED_BATCH) ||
-			 !sched_feat(FAIR_SLEEPERS))
-		return;
-
-	delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit),
-		(u64)(cfs_rq->fair_clock - se->sleep_start_fair));
-
-	__enqueue_sleeper(cfs_rq, se, delta_fair);
-
-	se->sleep_start_fair = 0;
-
 #ifdef CONFIG_SCHEDSTATS
 	if (se->sleep_start) {
 		u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
@@ -544,6 +456,8 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		 * time that the task spent sleeping:
 		 */
 		if (unlikely(prof_on == SLEEP_PROFILING)) {
+			struct task_struct *tsk = task_of(se);
+
 			profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
 				     delta >> 20);
 		}
@@ -604,7 +518,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 {
 	update_stats_dequeue(cfs_rq, se);
 	if (sleep) {
-		se->sleep_start_fair = cfs_rq->fair_clock;
 #ifdef CONFIG_SCHEDSTATS
 		if (entity_is_task(se)) {
 			struct task_struct *tsk = task_of(se);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9b1b0d4ff966..97b15c27407f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -264,17 +264,6 @@ static ctl_table kern_table[] = {
 		.extra1		= &min_wakeup_granularity_ns,
 		.extra2		= &max_wakeup_granularity_ns,
 	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_runtime_limit_ns",
-		.data		= &sysctl_sched_runtime_limit,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &min_sched_granularity_ns,
-		.extra2		= &max_sched_granularity_ns,
-	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "sched_child_runs_first",

commit 7a62eabc4d60980eb39fff659f168d903b55c6d7
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:06 2007 +0200

    sched: debug: update exec_clock only when SCHED_DEBUG
    
    micro-optimization: update cfs_rq->exec_clock only if
    CONFIG_SCHED_DEBUG=y.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 895fef74d99a..ce79eb0f0660 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -301,7 +301,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 	schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
 
 	curr->sum_exec_runtime += delta_exec;
-	cfs_rq->exec_clock += delta_exec;
+	schedstat_add(cfs_rq, exec_clock, delta_exec);
 	delta_exec_weighted = delta_exec;
 	if (unlikely(curr->load.weight != NICE_0_LOAD)) {
 		delta_exec_weighted = calc_delta_fair(delta_exec_weighted,

commit 86d9560cb6bd85986e98b4c63705daec94406bd4
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:06 2007 +0200

    sched: add more vruntime statistics
    
    add more vruntime statistics.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 75ccf7aa98f3..7a61706637c7 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -109,7 +109,8 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
-	s64 MIN_vruntime = -1, max_vruntime = -1, spread;
+	s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
+		spread, rq0_min_vruntime, spread0;
 	struct rq *rq = &per_cpu(runqueues, cpu);
 	struct sched_entity *last;
 	unsigned long flags;
@@ -121,7 +122,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
 	P(fair_clock);
 	P(exec_clock);
-	P(min_vruntime);
 
 	spin_lock_irqsave(&rq->lock, flags);
 	if (cfs_rq->rb_leftmost)
@@ -129,14 +129,21 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	last = __pick_last_entity(cfs_rq);
 	if (last)
 		max_vruntime = last->vruntime;
+	min_vruntime = rq->cfs.min_vruntime;
+	rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime;
 	spin_unlock_irqrestore(&rq->lock, flags);
 	SEQ_printf(m, "  .%-30s: %Ld\n", "MIN_vruntime",
 			(long long)MIN_vruntime);
+	SEQ_printf(m, "  .%-30s: %Ld\n", "min_vruntime",
+			(long long)min_vruntime);
 	SEQ_printf(m, "  .%-30s: %Ld\n", "max_vruntime",
 			(long long)max_vruntime);
 	spread = max_vruntime - MIN_vruntime;
 	SEQ_printf(m, "  .%-30s: %Ld\n", "spread",
 			(long long)spread);
+	spread0 = min_vruntime - rq0_min_vruntime;
+	SEQ_printf(m, "  .%-30s: %Ld\n", "spread0",
+			(long long)spread0);
 
 	P(wait_runtime);
 	P(wait_runtime_overruns);

commit 28a1f6fa2f7ecec7e5da28b03a24abbecbd2e864
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:05 2007 +0200

    sched: remove SCHED_FEAT_SKIP_INITIAL
    
    remove SCHED_FEAT_SKIP_INITIAL - it was off by default and even
    when enabled it never made any real difference.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched.c b/kernel/sched.c
index 5594e65166fc..bf85b4b281c5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -399,7 +399,6 @@ enum {
 	SCHED_FEAT_SLEEPER_AVG		= 4,
 	SCHED_FEAT_SLEEPER_LOAD_AVG	= 8,
 	SCHED_FEAT_START_DEBIT		= 16,
-	SCHED_FEAT_SKIP_INITIAL		= 32,
 };
 
 const_debug unsigned int sysctl_sched_features =
@@ -407,8 +406,7 @@ const_debug unsigned int sysctl_sched_features =
 		SCHED_FEAT_NEW_FAIR_SLEEPERS	*1 |
 		SCHED_FEAT_SLEEPER_AVG		*0 |
 		SCHED_FEAT_SLEEPER_LOAD_AVG	*1 |
-		SCHED_FEAT_START_DEBIT		*1 |
-		SCHED_FEAT_SKIP_INITIAL		*0;
+		SCHED_FEAT_START_DEBIT		*1;
 
 #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index cc447fbff51c..c8c6b0561391 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1063,13 +1063,6 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 	update_curr(cfs_rq);
 	place_entity(cfs_rq, se, 1);
 
-	/*
-	 * The first wait is dominated by the child-runs-first logic,
-	 * so do not credit it with that waiting time yet:
-	 */
-	if (sched_feat(SKIP_INITIAL))
-		se->wait_start_fair = 0;
-
 	/*
 	 * The statistical average of wait_runtime is about
 	 * -granularity/2, so initialize the task with that:

commit 67e12eac328b276dca7e61640632ed996ff1a93a
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:05 2007 +0200

    sched: add se->vruntime debugging
    
    debug se->vruntime fields.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 6b789dae7fdf..75ccf7aa98f3 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -44,7 +44,8 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 #ifdef CONFIG_SCHEDSTATS
-	SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n",
+	SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld %15Ld\n",
+		(long long)p->se.vruntime,
 		(long long)p->se.sum_exec_runtime,
 		(long long)p->se.sum_wait_runtime,
 		(long long)p->se.sum_sleep_runtime,
@@ -64,10 +65,10 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 	"\nrunnable tasks:\n"
 	"            task   PID        tree-key         delta       waiting"
 	"  switches  prio"
-	"        sum-exec        sum-wait       sum-sleep"
+	"    exec-runtime        sum-exec        sum-wait       sum-sleep"
 	"    wait-overrun   wait-underrun\n"
 	"------------------------------------------------------------------"
-	"----------------"
+	"--------------------------------"
 	"------------------------------------------------"
 	"--------------------------------\n");
 
@@ -108,6 +109,11 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
+	s64 MIN_vruntime = -1, max_vruntime = -1, spread;
+	struct rq *rq = &per_cpu(runqueues, cpu);
+	struct sched_entity *last;
+	unsigned long flags;
+
 	SEQ_printf(m, "\ncfs_rq\n");
 
 #define P(x) \
@@ -115,6 +121,23 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
 	P(fair_clock);
 	P(exec_clock);
+	P(min_vruntime);
+
+	spin_lock_irqsave(&rq->lock, flags);
+	if (cfs_rq->rb_leftmost)
+		MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime;
+	last = __pick_last_entity(cfs_rq);
+	if (last)
+		max_vruntime = last->vruntime;
+	spin_unlock_irqrestore(&rq->lock, flags);
+	SEQ_printf(m, "  .%-30s: %Ld\n", "MIN_vruntime",
+			(long long)MIN_vruntime);
+	SEQ_printf(m, "  .%-30s: %Ld\n", "max_vruntime",
+			(long long)max_vruntime);
+	spread = max_vruntime - MIN_vruntime;
+	SEQ_printf(m, "  .%-30s: %Ld\n", "spread",
+			(long long)spread);
+
 	P(wait_runtime);
 	P(wait_runtime_overruns);
 	P(wait_runtime_underruns);
@@ -243,6 +266,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.wait_start_fair);
 	P(se.exec_start);
 	P(se.sleep_start_fair);
+	P(se.vruntime);
 	P(se.sum_exec_runtime);
 
 #ifdef CONFIG_SCHEDSTATS