Patches contributed by Eötvös Lorand University


commit 2e09bf556fbe1a4cd8d837a3e6607de55f7cf4fd
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:05 2007 +0200

    sched: wakeup granularity increase
    
    increase wakeup granularity - we were overscheduling a bit.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 45c7493d8ca8..a60b1dac598a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -74,7 +74,7 @@ const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 25000000UL;
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-const_debug unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
+const_debug unsigned int sysctl_sched_wakeup_granularity = 2000000UL;
 
 unsigned int sysctl_sched_runtime_limit __read_mostly;
 
@@ -582,7 +582,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
  * Preempt the current task with a newly woken task if needed:
  */
 static void
-__check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
 	unsigned long ideal_runtime, delta_exec;
 
@@ -646,8 +646,6 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 
 static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
-	struct sched_entity *next;
-
 	/*
 	 * Dequeue and enqueue the task to update its
 	 * position within the tree:
@@ -655,14 +653,8 @@ static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 	dequeue_entity(cfs_rq, curr, 0);
 	enqueue_entity(cfs_rq, curr, 0);
 
-	/*
-	 * Reschedule if another task tops the current one.
-	 */
-	next = __pick_next_entity(cfs_rq);
-	if (next == curr)
-		return;
-
-	__check_preempt_curr_fair(cfs_rq, curr);
+	if (cfs_rq->nr_running > 1)
+		check_preempt_tick(cfs_rq, curr);
 }
 
 /**************************************************
@@ -852,7 +844,7 @@ static void yield_task_fair(struct rq *rq, struct task_struct *p)
 /*
  * Preempt the current task with a newly woken task if needed:
  */
-static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
+static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
 {
 	struct task_struct *curr = rq->curr;
 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
@@ -863,9 +855,12 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
 		resched_task(curr);
 		return;
 	}
+	if (is_same_group(curr, p)) {
+		s64 delta = curr->se.vruntime - p->se.vruntime;
 
-	if (is_same_group(curr, p))
-		__check_preempt_curr_fair(cfs_rq, &curr->se);
+		if (delta > (s64)sysctl_sched_wakeup_granularity)
+			resched_task(curr);
+	}
 }
 
 static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1095,7 +1090,7 @@ struct sched_class fair_sched_class __read_mostly = {
 	.dequeue_task		= dequeue_task_fair,
 	.yield_task		= yield_task_fair,
 
-	.check_preempt_curr	= check_preempt_curr_fair,
+	.check_preempt_curr	= check_preempt_wakeup,
 
 	.pick_next_task		= pick_next_task_fair,
 	.put_prev_task		= put_prev_task_fair,

commit 5c6b5964a0629bd39fbf4e5648a8aca32de5bcaf
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:05 2007 +0200

    sched: simplify check_preempt() methods
    
    simplify the check_preempt() methods.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3179d1129a80..45c7493d8ca8 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -582,8 +582,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
  * Preempt the current task with a newly woken task if needed:
  */
 static void
-__check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se,
-			  struct sched_entity *curr)
+__check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
 	unsigned long ideal_runtime, delta_exec;
 
@@ -663,7 +662,7 @@ static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 	if (next == curr)
 		return;
 
-	__check_preempt_curr_fair(cfs_rq, next, curr);
+	__check_preempt_curr_fair(cfs_rq, curr);
 }
 
 /**************************************************
@@ -866,7 +865,7 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
 	}
 
 	if (is_same_group(curr, p))
-		__check_preempt_curr_fair(cfs_rq, &p->se, &curr->se);
+		__check_preempt_curr_fair(cfs_rq, &curr->se);
 }
 
 static struct task_struct *pick_next_task_fair(struct rq *rq)

commit 6cb58195143b55d4c427d92f8425bec2b0d9c56c
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:04 2007 +0200

    sched: optimize vruntime based scheduling
    
    optimize vruntime based scheduling.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched.c b/kernel/sched.c
index a5dd03522e32..5594e65166fc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -732,13 +732,14 @@ calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
 static inline void update_load_add(struct load_weight *lw, unsigned long inc)
 {
 	lw->weight += inc;
-	lw->inv_weight = WMULT_CONST / lw->weight;
+	if (sched_feat(FAIR_SLEEPERS))
+		lw->inv_weight = WMULT_CONST / lw->weight;
 }
 
 static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
 {
 	lw->weight -= dec;
-	if (likely(lw->weight))
+	if (sched_feat(FAIR_SLEEPERS) && likely(lw->weight))
 		lw->inv_weight = WMULT_CONST / lw->weight;
 }
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a566a4558167..7041dc697855 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -336,6 +336,9 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 	}
 	curr->vruntime += delta_exec_weighted;
 
+	if (!sched_feat(FAIR_SLEEPERS))
+		return;
+
 	if (unlikely(!load))
 		return;
 

commit bf5c91ba8c629b84413c761f529627195fd0a935
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:04 2007 +0200

    sched: move sched_feat() definitions
    
    move sched_feat() definitions so that it can be used sooner by generic
    code too.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched.c b/kernel/sched.c
index 8f80ebafacc1..a5dd03522e32 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -381,6 +381,37 @@ static void update_rq_clock(struct rq *rq)
 #define task_rq(p)		cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
 
+/*
+ * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
+ */
+#ifdef CONFIG_SCHED_DEBUG
+# define const_debug __read_mostly
+#else
+# define const_debug static const
+#endif
+
+/*
+ * Debugging: various feature bits
+ */
+enum {
+	SCHED_FEAT_FAIR_SLEEPERS	= 1,
+	SCHED_FEAT_NEW_FAIR_SLEEPERS	= 2,
+	SCHED_FEAT_SLEEPER_AVG		= 4,
+	SCHED_FEAT_SLEEPER_LOAD_AVG	= 8,
+	SCHED_FEAT_START_DEBIT		= 16,
+	SCHED_FEAT_SKIP_INITIAL		= 32,
+};
+
+const_debug unsigned int sysctl_sched_features =
+		SCHED_FEAT_FAIR_SLEEPERS	*0 |
+		SCHED_FEAT_NEW_FAIR_SLEEPERS	*1 |
+		SCHED_FEAT_SLEEPER_AVG		*0 |
+		SCHED_FEAT_SLEEPER_LOAD_AVG	*1 |
+		SCHED_FEAT_START_DEBIT		*1 |
+		SCHED_FEAT_SKIP_INITIAL		*0;
+
+#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
+
 /*
  * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
  * clock constructed from sched_clock():
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a2af09cb6a70..a566a4558167 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -20,15 +20,6 @@
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  */
 
-/*
- * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
- */
-#ifdef CONFIG_SCHED_DEBUG
-# define const_debug __read_mostly
-#else
-# define const_debug static const
-#endif
-
 /*
  * Targeted preemption latency for CPU-bound tasks:
  * (default: 20ms, units: nanoseconds)
@@ -87,28 +78,6 @@ const_debug unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
 
 unsigned int sysctl_sched_runtime_limit __read_mostly;
 
-/*
- * Debugging: various feature bits
- */
-enum {
-	SCHED_FEAT_FAIR_SLEEPERS	= 1,
-	SCHED_FEAT_NEW_FAIR_SLEEPERS	= 2,
-	SCHED_FEAT_SLEEPER_AVG		= 4,
-	SCHED_FEAT_SLEEPER_LOAD_AVG	= 8,
-	SCHED_FEAT_START_DEBIT		= 16,
-	SCHED_FEAT_SKIP_INITIAL		= 32,
-};
-
-const_debug unsigned int sysctl_sched_features =
-		SCHED_FEAT_FAIR_SLEEPERS	*0 |
-		SCHED_FEAT_NEW_FAIR_SLEEPERS	*1 |
-		SCHED_FEAT_SLEEPER_AVG		*0 |
-		SCHED_FEAT_SLEEPER_LOAD_AVG	*1 |
-		SCHED_FEAT_START_DEBIT		*1 |
-		SCHED_FEAT_SKIP_INITIAL		*0;
-
-#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
-
 extern struct sched_class fair_sched_class;
 
 /**************************************************************

commit e9acbff6484df51fd880e0f5fe0224e8be34c17b
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:04 2007 +0200

    sched: introduce se->vruntime
    
    introduce se->vruntime as a sum of weighted delta-exec's, and use that
    as the key into the tree.
    
    the idea to use absolute virtual time as the basic metric of scheduling
    has been first raised by William Lee Irwin, advanced by Tong Li and first
    prototyped by Roman Zippel in the "Really Fair Scheduler" (RFS) patchset.
    
    also see:
    
       http://lkml.org/lkml/2007/9/2/76
    
    for a simpler variant of this patch.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3c38a5040e8f..5e5c457fba86 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -902,6 +902,7 @@ struct sched_entity {
 
 	u64			exec_start;
 	u64			sum_exec_runtime;
+	u64			vruntime;
 	u64			prev_sum_exec_runtime;
 	u64			wait_start_fair;
 	u64			sleep_start_fair;
diff --git a/kernel/sched.c b/kernel/sched.c
index 992a1fae72a7..8f80ebafacc1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -182,6 +182,7 @@ struct cfs_rq {
 
 	s64 fair_clock;
 	u64 exec_clock;
+	u64 min_vruntime;
 	s64 wait_runtime;
 	u64 sleeper_bonus;
 	unsigned long wait_runtime_overruns, wait_runtime_underruns;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index b46f8078e78f..a2af09cb6a70 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -92,14 +92,16 @@ unsigned int sysctl_sched_runtime_limit __read_mostly;
  */
 enum {
 	SCHED_FEAT_FAIR_SLEEPERS	= 1,
-	SCHED_FEAT_SLEEPER_AVG		= 2,
-	SCHED_FEAT_SLEEPER_LOAD_AVG	= 4,
-	SCHED_FEAT_START_DEBIT		= 8,
-	SCHED_FEAT_SKIP_INITIAL		= 16,
+	SCHED_FEAT_NEW_FAIR_SLEEPERS	= 2,
+	SCHED_FEAT_SLEEPER_AVG		= 4,
+	SCHED_FEAT_SLEEPER_LOAD_AVG	= 8,
+	SCHED_FEAT_START_DEBIT		= 16,
+	SCHED_FEAT_SKIP_INITIAL		= 32,
 };
 
 const_debug unsigned int sysctl_sched_features =
-		SCHED_FEAT_FAIR_SLEEPERS	*1 |
+		SCHED_FEAT_FAIR_SLEEPERS	*0 |
+		SCHED_FEAT_NEW_FAIR_SLEEPERS	*1 |
 		SCHED_FEAT_SLEEPER_AVG		*0 |
 		SCHED_FEAT_SLEEPER_LOAD_AVG	*1 |
 		SCHED_FEAT_START_DEBIT		*1 |
@@ -145,6 +147,19 @@ static inline struct task_struct *task_of(struct sched_entity *se)
  * Scheduling class tree data structure manipulation methods:
  */
 
+static inline void
+set_leftmost(struct cfs_rq *cfs_rq, struct rb_node *leftmost)
+{
+	struct sched_entity *se;
+
+	cfs_rq->rb_leftmost = leftmost;
+	if (leftmost) {
+		se = rb_entry(leftmost, struct sched_entity, run_node);
+		cfs_rq->min_vruntime = max(se->vruntime,
+						cfs_rq->min_vruntime);
+	}
+}
+
 /*
  * Enqueue an entity into the rb-tree:
  */
@@ -180,7 +195,7 @@ __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	 * used):
 	 */
 	if (leftmost)
-		cfs_rq->rb_leftmost = &se->run_node;
+		set_leftmost(cfs_rq, &se->run_node);
 
 	rb_link_node(&se->run_node, parent, link);
 	rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -195,7 +210,8 @@ static void
 __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	if (cfs_rq->rb_leftmost == &se->run_node)
-		cfs_rq->rb_leftmost = rb_next(&se->run_node);
+		set_leftmost(cfs_rq, rb_next(&se->run_node));
+
 	rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
 	update_load_sub(&cfs_rq->load, se->load.weight);
 	cfs_rq->nr_running--;
@@ -336,7 +352,7 @@ static inline void
 __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 	      unsigned long delta_exec)
 {
-	unsigned long delta, delta_fair, delta_mine;
+	unsigned long delta, delta_fair, delta_mine, delta_exec_weighted;
 	struct load_weight *lw = &cfs_rq->load;
 	unsigned long load = lw->weight;
 
@@ -344,6 +360,12 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 
 	curr->sum_exec_runtime += delta_exec;
 	cfs_rq->exec_clock += delta_exec;
+	delta_exec_weighted = delta_exec;
+	if (unlikely(curr->load.weight != NICE_0_LOAD)) {
+		delta_exec_weighted = calc_delta_fair(delta_exec_weighted,
+							&curr->load);
+	}
+	curr->vruntime += delta_exec_weighted;
 
 	if (unlikely(!load))
 		return;
@@ -413,8 +435,6 @@ calc_weighted(unsigned long delta, struct sched_entity *se)
  */
 static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	s64 key;
-
 	/*
 	 * Are we enqueueing a waiting task? (for current tasks
 	 * a dequeue/enqueue event is a NOP)
@@ -424,28 +444,7 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	/*
 	 * Update the key:
 	 */
-	key = cfs_rq->fair_clock;
-
-	/*
-	 * Optimize the common nice 0 case:
-	 */
-	if (likely(se->load.weight == NICE_0_LOAD)) {
-		key -= se->wait_runtime;
-	} else {
-		u64 tmp;
-
-		if (se->wait_runtime < 0) {
-			tmp = -se->wait_runtime;
-			key += (tmp * se->load.inv_weight) >>
-					(WMULT_SHIFT - NICE_0_SHIFT);
-		} else {
-			tmp = se->wait_runtime;
-			key -= (tmp * se->load.inv_weight) >>
-					(WMULT_SHIFT - NICE_0_SHIFT);
-		}
-	}
-
-	se->fair_key = key;
+	se->fair_key = se->vruntime;
 }
 
 /*
@@ -615,8 +614,22 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
 	 */
 	update_curr(cfs_rq);
 
-	if (wakeup)
+	if (wakeup) {
+		u64 min_runtime, latency;
+
+		min_runtime = cfs_rq->min_vruntime;
+		min_runtime += sysctl_sched_latency/2;
+
+		if (sched_feat(NEW_FAIR_SLEEPERS)) {
+			latency = calc_weighted(sysctl_sched_latency, se);
+			if (min_runtime > latency)
+				min_runtime -= latency;
+		}
+
+		se->vruntime = max(se->vruntime, min_runtime);
+
 		enqueue_sleeper(cfs_rq, se);
+	}
 
 	update_stats_enqueue(cfs_rq, se);
 	__enqueue_entity(cfs_rq, se);
@@ -1155,6 +1168,8 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 	if (sched_feat(START_DEBIT))
 		se->wait_runtime = -(sched_granularity(cfs_rq) / 2);
 
+	se->vruntime = cfs_rq->min_vruntime;
+	update_stats_enqueue(cfs_rq, se);
 	__enqueue_entity(cfs_rq, se);
 	resched_task(rq->curr);
 }

commit 08e2388aa1e40cb06f7d04ac621e2ae94e1d8fdc
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:04 2007 +0200

    sched: clean up calc_weighted()
    
    clean up calc_weighted() - we always use the normalized shift so
    it's not needed to pass that in. Also, push the non-nice0 branch
    into the function.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 91a227b436ee..b46f8078e78f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -397,27 +397,16 @@ update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	schedstat_set(se->wait_start, rq_of(cfs_rq)->clock);
 }
 
-/*
- * We calculate fair deltas here, so protect against the random effects
- * of a multiplication overflow by capping it to the runtime limit:
- */
-#if BITS_PER_LONG == 32
 static inline unsigned long
-calc_weighted(unsigned long delta, unsigned long weight, int shift)
+calc_weighted(unsigned long delta, struct sched_entity *se)
 {
-	u64 tmp = (u64)delta * weight >> shift;
+	unsigned long weight = se->load.weight;
 
-	if (unlikely(tmp > sysctl_sched_runtime_limit*2))
-		return sysctl_sched_runtime_limit*2;
-	return tmp;
+	if (unlikely(weight != NICE_0_LOAD))
+		return (u64)delta * se->load.weight >> NICE_0_SHIFT;
+	else
+		return delta;
 }
-#else
-static inline unsigned long
-calc_weighted(unsigned long delta, unsigned long weight, int shift)
-{
-	return delta * weight >> shift;
-}
-#endif
 
 /*
  * Task is being enqueued - update stats:
@@ -469,9 +458,7 @@ __update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se,
 	schedstat_set(se->wait_max, max(se->wait_max,
 			rq_of(cfs_rq)->clock - se->wait_start));
 
-	if (unlikely(se->load.weight != NICE_0_LOAD))
-		delta_fair = calc_weighted(delta_fair, se->load.weight,
-							NICE_0_SHIFT);
+	delta_fair = calc_weighted(delta_fair, se);
 
 	add_wait_runtime(cfs_rq, se, delta_fair);
 }
@@ -554,9 +541,7 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se,
 		delta_fair = div64_likely32((u64)delta_fair * load,
 						load + se->load.weight);
 
-	if (unlikely(se->load.weight != NICE_0_LOAD))
-		delta_fair = calc_weighted(delta_fair, se->load.weight,
-							NICE_0_SHIFT);
+	delta_fair = calc_weighted(delta_fair, se);
 
 	prev_runtime = se->wait_runtime;
 	__add_wait_runtime(cfs_rq, se, delta_fair);

commit 1091985b482fdd577a5c511059b9d7b4467bd15d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:04 2007 +0200

    sched: speed up update_load_add/_sub()
    
    speed up update_load_add/_sub() by not delaying the division - this
    reduces CPU pipeline dependencies.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched.c b/kernel/sched.c
index 3209e2cc2c2e..992a1fae72a7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -697,16 +697,17 @@ calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
 	return calc_delta_mine(delta_exec, NICE_0_LOAD, lw);
 }
 
-static void update_load_add(struct load_weight *lw, unsigned long inc)
+static inline void update_load_add(struct load_weight *lw, unsigned long inc)
 {
 	lw->weight += inc;
-	lw->inv_weight = 0;
+	lw->inv_weight = WMULT_CONST / lw->weight;
 }
 
-static void update_load_sub(struct load_weight *lw, unsigned long dec)
+static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
 {
 	lw->weight -= dec;
-	lw->inv_weight = 0;
+	if (likely(lw->weight))
+		lw->inv_weight = WMULT_CONST / lw->weight;
 }
 
 /*

commit 19ccd97a03a026c2341b35af3ed2078a83c4a22b
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:04 2007 +0200

    sched: uninline __enqueue_entity()/__dequeue_entity()
    
    suggested by Roman Zippel: uninline __enqueue_entity() and
    __dequeue_entity().
    
    this reduces code size:
    
          text    data     bss     dec     hex filename
         25385    2386      16   27787    6c8b sched.o.before
         25257    2386      16   27659    6c0b sched.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 2488f6f3ffad..91a227b436ee 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -148,7 +148,7 @@ static inline struct task_struct *task_of(struct sched_entity *se)
 /*
  * Enqueue an entity into the rb-tree:
  */
-static inline void
+static void
 __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
@@ -191,7 +191,7 @@ __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
 }
 
-static inline void
+static void
 __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	if (cfs_rq->rb_leftmost == &se->run_node)

commit 429d43bcc026b92b9dfaccd3577fec290f6a67ce
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:03 2007 +0200

    sched: cleanup: simplify cfs_rq_curr() methods
    
    cleanup: simplify cfs_rq_curr() methods - now that the cfs_rq->curr
    pointer is unconditionally present, remove the wrappers.
    
      kernel/sched.o:
          text    data     bss     dec     hex filename
         11784     224    2012   14020    36c4 sched.o.before
         11784     224    2012   14020    36c4 sched.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 335faf06a561..74d47e65b9ea 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -111,18 +111,6 @@ extern struct sched_class fair_sched_class;
  * CFS operations on generic schedulable entities:
  */
 
-/* currently running entity (if any) on this cfs_rq */
-static inline struct sched_entity *cfs_rq_curr(struct cfs_rq *cfs_rq)
-{
-	return cfs_rq->curr;
-}
-
-static inline void
-set_cfs_rq_curr(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-	cfs_rq->curr = se;
-}
-
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
 /* cpu runqueue to which this cfs_rq is attached */
@@ -382,7 +370,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 
 static void update_curr(struct cfs_rq *cfs_rq)
 {
-	struct sched_entity *curr = cfs_rq_curr(cfs_rq);
+	struct sched_entity *curr = cfs_rq->curr;
 	u64 now = rq_of(cfs_rq)->clock;
 	unsigned long delta_exec;
 
@@ -440,7 +428,7 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	 * Are we enqueueing a waiting task? (for current tasks
 	 * a dequeue/enqueue event is a NOP)
 	 */
-	if (se != cfs_rq_curr(cfs_rq))
+	if (se != cfs_rq->curr)
 		update_stats_wait_start(cfs_rq, se);
 	/*
 	 * Update the key:
@@ -511,7 +499,7 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	 * Mark the end of the wait period if dequeueing a
 	 * waiting task:
 	 */
-	if (se != cfs_rq_curr(cfs_rq))
+	if (se != cfs_rq->curr)
 		update_stats_wait_end(cfs_rq, se);
 }
 
@@ -717,7 +705,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	 */
 	update_stats_wait_end(cfs_rq, se);
 	update_stats_curr_start(cfs_rq, se);
-	set_cfs_rq_curr(cfs_rq, se);
+	cfs_rq->curr = se;
 #ifdef CONFIG_SCHEDSTATS
 	/*
 	 * Track our maximum slice length, if the CPU's load is at
@@ -754,7 +742,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
 
 	if (prev->on_rq)
 		update_stats_wait_start(cfs_rq, prev);
-	set_cfs_rq_curr(cfs_rq, NULL);
+	cfs_rq->curr = NULL;
 }
 
 static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
@@ -1153,7 +1141,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr)
 static void task_new_fair(struct rq *rq, struct task_struct *p)
 {
 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
-	struct sched_entity *se = &p->se, *curr = cfs_rq_curr(cfs_rq);
+	struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
 
 	sched_info_queued(p);
 

commit 62160e3f4a06d948ec89665d29f1173e551deedc
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:03 2007 +0200

    sched: track cfs_rq->curr on !group-scheduling too
    
    Noticed by Roman Zippel: use cfs_rq->curr in the !group-scheduling
    case too. Small micro-optimization and cleanup effect:
    
       text    data     bss     dec     hex filename
       36269    3482      24   39775    9b5f sched.o.before
       36177    3486      24   39687    9b07 sched.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched.c b/kernel/sched.c
index f6a81061fd50..3209e2cc2c2e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -189,11 +189,11 @@ struct cfs_rq {
 	struct rb_root tasks_timeline;
 	struct rb_node *rb_leftmost;
 	struct rb_node *rb_load_balance_curr;
-#ifdef CONFIG_FAIR_GROUP_SCHED
 	/* 'curr' points to currently running entity on this cfs_rq.
 	 * It is set to NULL otherwise (i.e when none are currently running).
 	 */
 	struct sched_entity *curr;
+#ifdef CONFIG_FAIR_GROUP_SCHED
 	struct rq *rq;	/* cpu runqueue to which this cfs_rq is attached */
 
 	/* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 105d57b41aa2..335faf06a561 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -111,51 +111,38 @@ extern struct sched_class fair_sched_class;
  * CFS operations on generic schedulable entities:
  */
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-
-/* cpu runqueue to which this cfs_rq is attached */
-static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
-{
-	return cfs_rq->rq;
-}
-
 /* currently running entity (if any) on this cfs_rq */
 static inline struct sched_entity *cfs_rq_curr(struct cfs_rq *cfs_rq)
 {
 	return cfs_rq->curr;
 }
 
-/* An entity is a task if it doesn't "own" a runqueue */
-#define entity_is_task(se)	(!se->my_q)
-
 static inline void
 set_cfs_rq_curr(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	cfs_rq->curr = se;
 }
 
-#else	/* CONFIG_FAIR_GROUP_SCHED */
+#ifdef CONFIG_FAIR_GROUP_SCHED
 
+/* cpu runqueue to which this cfs_rq is attached */
 static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
 {
-	return container_of(cfs_rq, struct rq, cfs);
+	return cfs_rq->rq;
 }
 
-static inline struct sched_entity *cfs_rq_curr(struct cfs_rq *cfs_rq)
-{
-	struct rq *rq = rq_of(cfs_rq);
+/* An entity is a task if it doesn't "own" a runqueue */
+#define entity_is_task(se)	(!se->my_q)
 
-	if (unlikely(rq->curr->sched_class != &fair_sched_class))
-		return NULL;
+#else	/* CONFIG_FAIR_GROUP_SCHED */
 
-	return &rq->curr->se;
+static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
+{
+	return container_of(cfs_rq, struct rq, cfs);
 }
 
 #define entity_is_task(se)	1
 
-static inline void
-set_cfs_rq_curr(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
-
 #endif	/* CONFIG_FAIR_GROUP_SCHED */
 
 static inline struct task_struct *task_of(struct sched_entity *se)