Patches contributed by Eötvös Lorand University


commit 53df556e06d85245cf6aacedaba8e4da684859c3
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:03 2007 +0200

    sched: remove precise CPU load calculations #2
    
    continued removal of precise CPU load calculations.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched.c b/kernel/sched.c
index 25cc9b2a8c15..f6a81061fd50 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -173,8 +173,6 @@ struct rt_prio_array {
 
 struct load_stat {
 	struct load_weight load;
-	u64 load_update_start, load_update_last;
-	unsigned long delta_fair, delta_exec, delta_stat;
 };
 
 /* CFS-related fields in a runqueue */
@@ -793,15 +791,6 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 
 #define sched_class_highest (&rt_sched_class)
 
-static void __update_curr_load(struct rq *rq, struct load_stat *ls)
-{
-	if (rq->curr != rq->idle && ls->load.weight) {
-		ls->delta_exec += ls->delta_stat;
-		ls->delta_fair += calc_delta_fair(ls->delta_stat, &ls->load);
-		ls->delta_stat = 0;
-	}
-}
-
 /*
  * Update delta_exec, delta_fair fields for rq.
  *
@@ -817,31 +806,13 @@ static void __update_curr_load(struct rq *rq, struct load_stat *ls)
  * This function is called /before/ updating rq->ls.load
  * and when switching tasks.
  */
-static void update_curr_load(struct rq *rq)
-{
-	struct load_stat *ls = &rq->ls;
-	u64 start;
-
-	start = ls->load_update_start;
-	ls->load_update_start = rq->clock;
-	ls->delta_stat += rq->clock - start;
-	/*
-	 * Stagger updates to ls->delta_fair. Very frequent updates
-	 * can be expensive.
-	 */
-	if (ls->delta_stat)
-		__update_curr_load(rq, ls);
-}
-
 static inline void inc_load(struct rq *rq, const struct task_struct *p)
 {
-	update_curr_load(rq);
 	update_load_add(&rq->ls.load, p->se.load.weight);
 }
 
 static inline void dec_load(struct rq *rq, const struct task_struct *p)
 {
-	update_curr_load(rq);
 	update_load_sub(&rq->ls.load, p->se.load.weight);
 }
 
@@ -1972,8 +1943,7 @@ unsigned long nr_active(void)
  */
 static void update_cpu_load(struct rq *this_rq)
 {
-	unsigned long total_load = this_rq->ls.load.weight;
-	unsigned long this_load =  total_load;
+	unsigned long this_load = this_rq->ls.load.weight;
 	int i, scale;
 
 	this_rq->nr_load_updates++;

commit a25707f3aef9cf68c341eba5960d580f364e4e6f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:03 2007 +0200

    sched: remove precise CPU load
    
    CPU load calculations are statistical anyway, and there's little benefit
    from having it calculated on every scheduling event. So remove this code,
    it gets rid of a divide from the scheduler wakeup and context-switch
    fastpath.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched.c b/kernel/sched.c
index d4dabfcc776c..25cc9b2a8c15 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1972,42 +1972,11 @@ unsigned long nr_active(void)
  */
 static void update_cpu_load(struct rq *this_rq)
 {
-	u64 fair_delta64, exec_delta64, idle_delta64, sample_interval64, tmp64;
 	unsigned long total_load = this_rq->ls.load.weight;
 	unsigned long this_load =  total_load;
-	struct load_stat *ls = &this_rq->ls;
 	int i, scale;
 
 	this_rq->nr_load_updates++;
-	if (unlikely(!(sysctl_sched_features & SCHED_FEAT_PRECISE_CPU_LOAD)))
-		goto do_avg;
-
-	/* Update delta_fair/delta_exec fields first */
-	update_curr_load(this_rq);
-
-	fair_delta64 = ls->delta_fair + 1;
-	ls->delta_fair = 0;
-
-	exec_delta64 = ls->delta_exec + 1;
-	ls->delta_exec = 0;
-
-	sample_interval64 = this_rq->clock - ls->load_update_last;
-	ls->load_update_last = this_rq->clock;
-
-	if ((s64)sample_interval64 < (s64)TICK_NSEC)
-		sample_interval64 = TICK_NSEC;
-
-	if (exec_delta64 > sample_interval64)
-		exec_delta64 = sample_interval64;
-
-	idle_delta64 = sample_interval64 - exec_delta64;
-
-	tmp64 = div64_64(SCHED_LOAD_SCALE * exec_delta64, fair_delta64);
-	tmp64 = div64_64(tmp64 * exec_delta64, sample_interval64);
-
-	this_load = (unsigned long)tmp64;
-
-do_avg:
 
 	/* Update our load: */
 	for (i = 0, scale = 1; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
@@ -2017,7 +1986,13 @@ static void update_cpu_load(struct rq *this_rq)
 
 		old_load = this_rq->cpu_load[i];
 		new_load = this_load;
-
+		/*
+		 * Round up the averaging division if load is increasing. This
+		 * prevents us from getting stuck on 9 if the load is 10, for
+		 * example.
+		 */
+		if (new_load > old_load)
+			new_load += scale-1;
 		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
 	}
 }
@@ -6484,7 +6459,6 @@ static inline void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
 
 void __init sched_init(void)
 {
-	u64 now = sched_clock();
 	int highest_cpu = 0;
 	int i, j;
 
@@ -6509,8 +6483,6 @@ void __init sched_init(void)
 		INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
 		list_add(&rq->cfs.leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
 #endif
-		rq->ls.load_update_last = now;
-		rq->ls.load_update_start = now;
 
 		for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
 			rq->cpu_load[j] = 0;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index fd080f686f18..6b789dae7fdf 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -145,8 +145,6 @@ static void print_cpu(struct seq_file *m, int cpu)
 	P(nr_running);
 	SEQ_printf(m, "  .%-30s: %lu\n", "load",
 		   rq->ls.load.weight);
-	P(ls.delta_fair);
-	P(ls.delta_exec);
 	P(nr_switches);
 	P(nr_load_updates);
 	P(nr_uninterruptible);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 2138c40f4836..105d57b41aa2 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -94,16 +94,14 @@ enum {
 	SCHED_FEAT_FAIR_SLEEPERS	= 1,
 	SCHED_FEAT_SLEEPER_AVG		= 2,
 	SCHED_FEAT_SLEEPER_LOAD_AVG	= 4,
-	SCHED_FEAT_PRECISE_CPU_LOAD	= 8,
-	SCHED_FEAT_START_DEBIT		= 16,
-	SCHED_FEAT_SKIP_INITIAL		= 32,
+	SCHED_FEAT_START_DEBIT		= 8,
+	SCHED_FEAT_SKIP_INITIAL		= 16,
 };
 
 const_debug unsigned int sysctl_sched_features =
 		SCHED_FEAT_FAIR_SLEEPERS	*1 |
 		SCHED_FEAT_SLEEPER_AVG		*0 |
 		SCHED_FEAT_SLEEPER_LOAD_AVG	*1 |
-		SCHED_FEAT_PRECISE_CPU_LOAD	*1 |
 		SCHED_FEAT_START_DEBIT		*1 |
 		SCHED_FEAT_SKIP_INITIAL		*0;
 

commit 8ebc91d93669af39dbed50914d7daf457eeb43be
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:03 2007 +0200

    sched: remove stat_gran
    
    remove the stat_gran code - it was disabled by default and it causes
    unnecessary overhead.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index befca3f9364a..3c38a5040e8f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -895,9 +895,6 @@ struct load_weight {
  */
 struct sched_entity {
 	long			wait_runtime;
-	unsigned long		delta_fair_run;
-	unsigned long		delta_fair_sleep;
-	unsigned long		delta_exec;
 	s64			fair_key;
 	struct load_weight	load;		/* for load-balancing */
 	struct rb_node		run_node;
diff --git a/kernel/sched.c b/kernel/sched.c
index ae1544f0a20d..d4dabfcc776c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -829,7 +829,7 @@ static void update_curr_load(struct rq *rq)
 	 * Stagger updates to ls->delta_fair. Very frequent updates
 	 * can be expensive.
 	 */
-	if (ls->delta_stat >= sysctl_sched_stat_granularity)
+	if (ls->delta_stat)
 		__update_curr_load(rq, ls);
 }
 
@@ -1588,9 +1588,6 @@ static void __sched_fork(struct task_struct *p)
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
-	p->se.delta_exec		= 0;
-	p->se.delta_fair_run		= 0;
-	p->se.delta_fair_sleep		= 0;
 	p->se.wait_runtime		= 0;
 	p->se.sleep_start_fair		= 0;
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 2e84aaffe425..2138c40f4836 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -85,8 +85,6 @@ const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 25000000UL;
  */
 const_debug unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
 
-const_debug unsigned int sysctl_sched_stat_granularity;
-
 unsigned int sysctl_sched_runtime_limit __read_mostly;
 
 /*
@@ -360,13 +358,13 @@ add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
  * are not in our scheduling class.
  */
 static inline void
-__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
+	      unsigned long delta_exec)
 {
-	unsigned long delta, delta_exec, delta_fair, delta_mine;
+	unsigned long delta, delta_fair, delta_mine;
 	struct load_weight *lw = &cfs_rq->load;
 	unsigned long load = lw->weight;
 
-	delta_exec = curr->delta_exec;
 	schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
 
 	curr->sum_exec_runtime += delta_exec;
@@ -400,6 +398,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 static void update_curr(struct cfs_rq *cfs_rq)
 {
 	struct sched_entity *curr = cfs_rq_curr(cfs_rq);
+	u64 now = rq_of(cfs_rq)->clock;
 	unsigned long delta_exec;
 
 	if (unlikely(!curr))
@@ -410,15 +409,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	 * since the last time we changed load (this cannot
 	 * overflow on 32 bits):
 	 */
-	delta_exec = (unsigned long)(rq_of(cfs_rq)->clock - curr->exec_start);
-
-	curr->delta_exec += delta_exec;
+	delta_exec = (unsigned long)(now - curr->exec_start);
 
-	if (unlikely(curr->delta_exec > sysctl_sched_stat_granularity)) {
-		__update_curr(cfs_rq, curr);
-		curr->delta_exec = 0;
-	}
-	curr->exec_start = rq_of(cfs_rq)->clock;
+	__update_curr(cfs_rq, curr, delta_exec);
+	curr->exec_start = now;
 }
 
 static inline void
@@ -494,10 +488,9 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Note: must be called with a freshly updated rq->fair_clock.
  */
 static inline void
-__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se,
+			unsigned long delta_fair)
 {
-	unsigned long delta_fair = se->delta_fair_run;
-
 	schedstat_set(se->wait_max, max(se->wait_max,
 			rq_of(cfs_rq)->clock - se->wait_start));
 
@@ -519,12 +512,7 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit),
 			(u64)(cfs_rq->fair_clock - se->wait_start_fair));
 
-	se->delta_fair_run += delta_fair;
-	if (unlikely(abs(se->delta_fair_run) >=
-				sysctl_sched_stat_granularity)) {
-		__update_stats_wait_end(cfs_rq, se);
-		se->delta_fair_run = 0;
-	}
+	__update_stats_wait_end(cfs_rq, se, delta_fair);
 
 	se->wait_start_fair = 0;
 	schedstat_set(se->wait_start, 0);
@@ -567,9 +555,10 @@ update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Scheduling class queueing methods:
  */
 
-static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se,
+			      unsigned long delta_fair)
 {
-	unsigned long load = cfs_rq->load.weight, delta_fair;
+	unsigned long load = cfs_rq->load.weight;
 	long prev_runtime;
 
 	/*
@@ -582,8 +571,6 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG)
 		load = rq_of(cfs_rq)->cpu_load[2];
 
-	delta_fair = se->delta_fair_sleep;
-
 	/*
 	 * Fix up delta_fair with the effect of us running
 	 * during the whole sleep period:
@@ -618,12 +605,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit),
 		(u64)(cfs_rq->fair_clock - se->sleep_start_fair));
 
-	se->delta_fair_sleep += delta_fair;
-	if (unlikely(abs(se->delta_fair_sleep) >=
-				sysctl_sched_stat_granularity)) {
-		__enqueue_sleeper(cfs_rq, se);
-		se->delta_fair_sleep = 0;
-	}
+	__enqueue_sleeper(cfs_rq, se, delta_fair);
 
 	se->sleep_start_fair = 0;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6c97259e863e..9b1b0d4ff966 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -264,17 +264,6 @@ static ctl_table kern_table[] = {
 		.extra1		= &min_wakeup_granularity_ns,
 		.extra2		= &max_wakeup_granularity_ns,
 	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_stat_granularity_ns",
-		.data		= &sysctl_sched_stat_granularity,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &min_wakeup_granularity_ns,
-		.extra2		= &max_wakeup_granularity_ns,
-	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "sched_runtime_limit_ns",

commit 2bd8e6d422a4f44c0994f909317eba80b0fe08a1
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:02 2007 +0200

    sched: use constants if !CONFIG_SCHED_DEBUG
    
    use constants if !CONFIG_SCHED_DEBUG.
    
    this speeds up the code and reduces code-size:
    
        text    data     bss     dec     hex filename
       27464    3014      16   30494    771e sched.o.before
       26929    3010      20   29959    7507 sched.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9761b165d563..befca3f9364a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1402,15 +1402,18 @@ static inline void idle_task_exit(void) {}
 
 extern void sched_idle_next(void);
 
+#ifdef CONFIG_SCHED_DEBUG
 extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_batch_wakeup_granularity;
 extern unsigned int sysctl_sched_stat_granularity;
 extern unsigned int sysctl_sched_runtime_limit;
-extern unsigned int sysctl_sched_compat_yield;
 extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_features;
+#endif
+
+extern unsigned int sysctl_sched_compat_yield;
 
 #ifdef CONFIG_RT_MUTEXES
 extern int rt_mutex_getprio(struct task_struct *p);
diff --git a/kernel/sched.c b/kernel/sched.c
index 2520923a0c3b..ae1544f0a20d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1658,12 +1658,6 @@ void sched_fork(struct task_struct *p, int clone_flags)
 	put_cpu();
 }
 
-/*
- * After fork, child runs first. (default) If set to 0 then
- * parent will (try to) run first.
- */
-unsigned int __read_mostly sysctl_sched_child_runs_first = 1;
-
 /*
  * wake_up_new_task - wake up a newly created task for the first time.
  *
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5c15d8ae92cb..2e84aaffe425 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -20,6 +20,15 @@
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  */
 
+/*
+ * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
+ */
+#ifdef CONFIG_SCHED_DEBUG
+# define const_debug __read_mostly
+#else
+# define const_debug static const
+#endif
+
 /*
  * Targeted preemption latency for CPU-bound tasks:
  * (default: 20ms, units: nanoseconds)
@@ -34,7 +43,13 @@
  * systems, 4x on 8-way systems, 5x on 16-way systems, etc.)
  * Targeted preemption latency for CPU-bound tasks:
  */
-unsigned int sysctl_sched_latency __read_mostly = 20000000ULL;
+const_debug unsigned int sysctl_sched_latency = 20000000ULL;
+
+/*
+ * After fork, child runs first. (default) If set to 0 then
+ * parent will (try to) run first.
+ */
+const_debug unsigned int sysctl_sched_child_runs_first = 1;
 
 /*
  * Minimal preemption granularity for CPU-bound tasks:
@@ -58,7 +73,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = 25000000UL;
+const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 25000000UL;
 
 /*
  * SCHED_OTHER wake-up granularity.
@@ -68,13 +83,10 @@ unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = 25000000UL;
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000UL;
+const_debug unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
 
-unsigned int sysctl_sched_stat_granularity __read_mostly;
+const_debug unsigned int sysctl_sched_stat_granularity;
 
-/*
- * Initialized in sched_init_granularity() [to 5 times the base granularity]:
- */
 unsigned int sysctl_sched_runtime_limit __read_mostly;
 
 /*
@@ -89,7 +101,7 @@ enum {
 	SCHED_FEAT_SKIP_INITIAL		= 32,
 };
 
-unsigned int sysctl_sched_features __read_mostly =
+const_debug unsigned int sysctl_sched_features =
 		SCHED_FEAT_FAIR_SLEEPERS	*1 |
 		SCHED_FEAT_SLEEPER_AVG		*0 |
 		SCHED_FEAT_SLEEPER_LOAD_AVG	*1 |

commit 38ad464d410dadceda1563f36bdb0be7fe4c8938
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:02 2007 +0200

    sched: uniform tunings
    
    use the same defaults on both UP and SMP.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched.c b/kernel/sched.c
index 282d037c7300..2520923a0c3b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4898,32 +4898,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
  */
 cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
 
-/*
- * Increase the granularity value when there are more CPUs,
- * because with more CPUs the 'effective latency' as visible
- * to users decreases. But the relationship is not linear,
- * so pick a second-best guess by going with the log2 of the
- * number of CPUs.
- *
- * This idea comes from the SD scheduler of Con Kolivas:
- */
-static inline void sched_init_granularity(void)
-{
-	unsigned int factor = 1 + ilog2(num_online_cpus());
-	const unsigned long limit = 100000000;
-
-	sysctl_sched_min_granularity *= factor;
-	if (sysctl_sched_min_granularity > limit)
-		sysctl_sched_min_granularity = limit;
-
-	sysctl_sched_latency *= factor;
-	if (sysctl_sched_latency > limit)
-		sysctl_sched_latency = limit;
-
-	sysctl_sched_runtime_limit = sysctl_sched_latency;
-	sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2;
-}
-
 #ifdef CONFIG_SMP
 /*
  * This is how migration works:
@@ -6491,12 +6465,10 @@ void __init sched_init_smp(void)
 	/* Move init over to a non-isolated CPU */
 	if (set_cpus_allowed(current, non_isolated_cpus) < 0)
 		BUG();
-	sched_init_granularity();
 }
 #else
 void __init sched_init_smp(void)
 {
-	sched_init_granularity();
 }
 #endif /* CONFIG_SMP */
 

commit eba1ed4b7e52720e3099325874811c38a5ec1562
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:02 2007 +0200

    sched: debug: track maximum 'slice'
    
    track the maximum amount of time a task has executed while
    the CPU load was at least 2x. (i.e. at least two nice-0
    tasks were runnable)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 833f7dc2b8de..9761b165d563 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -921,6 +921,7 @@ struct sched_entity {
 	u64			block_start;
 	u64			block_max;
 	u64			exec_max;
+	u64			slice_max;
 
 	unsigned long		wait_runtime_overruns;
 	unsigned long		wait_runtime_underruns;
diff --git a/kernel/sched.c b/kernel/sched.c
index e92b185e371b..282d037c7300 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1603,6 +1603,7 @@ static void __sched_fork(struct task_struct *p)
 	p->se.sleep_max			= 0;
 	p->se.block_max			= 0;
 	p->se.exec_max			= 0;
+	p->se.slice_max			= 0;
 	p->se.wait_max			= 0;
 	p->se.wait_runtime_overruns	= 0;
 	p->se.wait_runtime_underruns	= 0;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 94915f1fd9de..fd080f686f18 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -254,6 +254,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.sleep_max);
 	P(se.block_max);
 	P(se.exec_max);
+	P(se.slice_max);
 	P(se.wait_max);
 	P(se.wait_runtime_overruns);
 	P(se.wait_runtime_underruns);
@@ -282,6 +283,7 @@ void proc_sched_set_task(struct task_struct *p)
 	p->se.sleep_max			= 0;
 	p->se.block_max			= 0;
 	p->se.exec_max			= 0;
+	p->se.slice_max			= 0;
 	p->se.wait_max			= 0;
 	p->se.wait_runtime_overruns	= 0;
 	p->se.wait_runtime_underruns	= 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0990b20fdcf5..5c15d8ae92cb 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -739,6 +739,17 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	update_stats_wait_end(cfs_rq, se);
 	update_stats_curr_start(cfs_rq, se);
 	set_cfs_rq_curr(cfs_rq, se);
+#ifdef CONFIG_SCHEDSTATS
+	/*
+	 * Track our maximum slice length, if the CPU's load is at
+	 * least twice that of our own weight (i.e. dont track it
+	 * when there are only lesser-weight tasks around):
+	 */
+	if (rq_of(cfs_rq)->ls.load.weight >= 2*se->load.weight) {
+		se->slice_max = max(se->slice_max,
+			se->sum_exec_runtime - se->prev_sum_exec_runtime);
+	}
+#endif
 	se->prev_sum_exec_runtime = se->sum_exec_runtime;
 }
 

commit a4b29ba2f72673aaa60ba11ced74d579771dd578
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:02 2007 +0200

    sched: small sched_debug cleanup
    
    small kernel/sched_debug.c cleanup - break up
    multi-variable assignment.
    
    no code changed:
    
       text    data     bss     dec     hex filename
       38869    3550      24   42443    a5cb sched.o.before
       38869    3550      24   42443    a5cb sched.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index c3ee38bd3426..94915f1fd9de 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -279,9 +279,13 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 void proc_sched_set_task(struct task_struct *p)
 {
 #ifdef CONFIG_SCHEDSTATS
-	p->se.sleep_max = p->se.block_max = p->se.exec_max = p->se.wait_max = 0;
-	p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0;
+	p->se.sleep_max			= 0;
+	p->se.block_max			= 0;
+	p->se.exec_max			= 0;
+	p->se.wait_max			= 0;
+	p->se.wait_runtime_overruns	= 0;
+	p->se.wait_runtime_underruns	= 0;
 #endif
-	p->se.sum_exec_runtime = 0;
+	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
 }

commit bb61c210835db95b0e9fb612a316422e7cc675e3
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:02 2007 +0200

    sched: resched task in task_new_fair()
    
    to get full child-runs-first semantics make sure the parent is
    rescheduled.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 67c67a87146e..0990b20fdcf5 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1191,6 +1191,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 		se->wait_runtime = -(sched_granularity(cfs_rq) / 2);
 
 	__enqueue_entity(cfs_rq, se);
+	resched_task(rq->curr);
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED

commit 44142fac3446d08c08c5d717ec11d50a737e8640
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:01 2007 +0200

    sched: fix sysctl_sched_child_runs_first flag
    
    fix the sched_child_runs_first flag: always call into ->task_new()
    if we are on the same CPU, as SCHED_OTHER tasks depend on it for
    correct initial setup.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched.c b/kernel/sched.c
index 6c10fa796ca0..2054e557d0d2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1688,10 +1688,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	else
 		p->sched_class = &fair_sched_class;
 
-	if (!p->sched_class->task_new || !sysctl_sched_child_runs_first ||
-			(clone_flags & CLONE_VM) || task_cpu(p) != this_cpu ||
-			!current->se.on_rq) {
-
+	if (task_cpu(p) != this_cpu || !p->sched_class->task_new ||
+							!current->se.on_rq) {
 		activate_task(rq, p, 0);
 	} else {
 		/*

commit 092e9d93b3728d484a4e73df9852dc4002cf9923
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Oct 10 21:19:28 2007 -0700

    [9P]: build fix with !CONFIG_SYSCTL
    
    found via make randconfig build testing:
    
     net/built-in.o: In function `init_p9':
     mod.c:(.init.text+0x3b39): undefined reference to `p9_sysctl_register'
     net/built-in.o: In function `exit_p9':
     mod.c:(.exit.text+0x36b): undefined reference to `p9_sysctl_unregister'
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 88884d39f28f..7726ff41c3e6 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -412,6 +412,18 @@ int p9_idpool_check(int id, struct p9_idpool *p);
 
 int p9_error_init(void);
 int p9_errstr2errno(char *, int);
+
+#ifdef CONFIG_SYSCTL
 int __init p9_sysctl_register(void);
 void __exit p9_sysctl_unregister(void);
+#else
+static inline int p9_sysctl_register(void)
+{
+	return 0;
+}
+static inline void p9_sysctl_unregister(void)
+{
+}
+#endif
+
 #endif /* NET_9P_H */