Patches contributed by Eötvös Lorand University

<<Prev 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296[297]298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 Next>>

commit 53df556e06d85245cf6aacedaba8e4da684859c3
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:03 2007 +0200

    sched: remove precise CPU load calculations #2
    
    continued removal of precise CPU load calculations.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched.c b/kernel/sched.c
index 25cc9b2a8c15..f6a81061fd50 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -173,8 +173,6 @@ struct rt_prio_array {
 
 struct load_stat {
 	struct load_weight load;
-	u64 load_update_start, load_update_last;
-	unsigned long delta_fair, delta_exec, delta_stat;
 };
 
 /* CFS-related fields in a runqueue */
@@ -793,15 +791,6 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 
 #define sched_class_highest (&rt_sched_class)
 
-static void __update_curr_load(struct rq *rq, struct load_stat *ls)
-{
-	if (rq->curr != rq->idle && ls->load.weight) {
-		ls->delta_exec += ls->delta_stat;
-		ls->delta_fair += calc_delta_fair(ls->delta_stat, &ls->load);
-		ls->delta_stat = 0;
-	}
-}
-
 /*
  * Update delta_exec, delta_fair fields for rq.
  *
@@ -817,31 +806,13 @@ static void __update_curr_load(struct rq *rq, struct load_stat *ls)
  * This function is called /before/ updating rq->ls.load
  * and when switching tasks.
  */
-static void update_curr_load(struct rq *rq)
-{
-	struct load_stat *ls = &rq->ls;
-	u64 start;
-
-	start = ls->load_update_start;
-	ls->load_update_start = rq->clock;
-	ls->delta_stat += rq->clock - start;
-	/*
-	 * Stagger updates to ls->delta_fair. Very frequent updates
-	 * can be expensive.
-	 */
-	if (ls->delta_stat)
-		__update_curr_load(rq, ls);
-}
-
 static inline void inc_load(struct rq *rq, const struct task_struct *p)
 {
-	update_curr_load(rq);
 	update_load_add(&rq->ls.load, p->se.load.weight);
 }
 
 static inline void dec_load(struct rq *rq, const struct task_struct *p)
 {
-	update_curr_load(rq);
 	update_load_sub(&rq->ls.load, p->se.load.weight);
 }
 
@@ -1972,8 +1943,7 @@ unsigned long nr_active(void)
  */
 static void update_cpu_load(struct rq *this_rq)
 {
-	unsigned long total_load = this_rq->ls.load.weight;
-	unsigned long this_load =  total_load;
+	unsigned long this_load = this_rq->ls.load.weight;
 	int i, scale;
 
 	this_rq->nr_load_updates++;

commit a25707f3aef9cf68c341eba5960d580f364e4e6f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:03 2007 +0200

    sched: remove precise CPU load
    
    CPU load calculations are statistical anyway, and there's little benefit
    from having it calculated on every scheduling event. So remove this code,
    it gets rid of a divide from the scheduler wakeup and context-switch
    fastpath.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched.c b/kernel/sched.c
index d4dabfcc776c..25cc9b2a8c15 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1972,42 +1972,11 @@ unsigned long nr_active(void)
  */
 static void update_cpu_load(struct rq *this_rq)
 {
-	u64 fair_delta64, exec_delta64, idle_delta64, sample_interval64, tmp64;
 	unsigned long total_load = this_rq->ls.load.weight;
 	unsigned long this_load =  total_load;
-	struct load_stat *ls = &this_rq->ls;
 	int i, scale;
 
 	this_rq->nr_load_updates++;
-	if (unlikely(!(sysctl_sched_features & SCHED_FEAT_PRECISE_CPU_LOAD)))
-		goto do_avg;
-
-	/* Update delta_fair/delta_exec fields first */
-	update_curr_load(this_rq);
-
-	fair_delta64 = ls->delta_fair + 1;
-	ls->delta_fair = 0;
-
-	exec_delta64 = ls->delta_exec + 1;
-	ls->delta_exec = 0;
-
-	sample_interval64 = this_rq->clock - ls->load_update_last;
-	ls->load_update_last = this_rq->clock;
-
-	if ((s64)sample_interval64 < (s64)TICK_NSEC)
-		sample_interval64 = TICK_NSEC;
-
-	if (exec_delta64 > sample_interval64)
-		exec_delta64 = sample_interval64;
-
-	idle_delta64 = sample_interval64 - exec_delta64;
-
-	tmp64 = div64_64(SCHED_LOAD_SCALE * exec_delta64, fair_delta64);
-	tmp64 = div64_64(tmp64 * exec_delta64, sample_interval64);
-
-	this_load = (unsigned long)tmp64;
-
-do_avg:
 
 	/* Update our load: */
 	for (i = 0, scale = 1; i < CPU_LOAD_IDX_MAX; i++, scale += scale) {
@@ -2017,7 +1986,13 @@ static void update_cpu_load(struct rq *this_rq)
 
 		old_load = this_rq->cpu_load[i];
 		new_load = this_load;
-
+		/*
+		 * Round up the averaging division if load is increasing. This
+		 * prevents us from getting stuck on 9 if the load is 10, for
+		 * example.
+		 */
+		if (new_load > old_load)
+			new_load += scale-1;
 		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
 	}
 }
@@ -6484,7 +6459,6 @@ static inline void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq)
 
 void __init sched_init(void)
 {
-	u64 now = sched_clock();
 	int highest_cpu = 0;
 	int i, j;
 
@@ -6509,8 +6483,6 @@ void __init sched_init(void)
 		INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
 		list_add(&rq->cfs.leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
 #endif
-		rq->ls.load_update_last = now;
-		rq->ls.load_update_start = now;
 
 		for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
 			rq->cpu_load[j] = 0;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index fd080f686f18..6b789dae7fdf 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -145,8 +145,6 @@ static void print_cpu(struct seq_file *m, int cpu)
 	P(nr_running);
 	SEQ_printf(m, "  .%-30s: %lu\n", "load",
 		   rq->ls.load.weight);
-	P(ls.delta_fair);
-	P(ls.delta_exec);
 	P(nr_switches);
 	P(nr_load_updates);
 	P(nr_uninterruptible);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 2138c40f4836..105d57b41aa2 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -94,16 +94,14 @@ enum {
 	SCHED_FEAT_FAIR_SLEEPERS	= 1,
 	SCHED_FEAT_SLEEPER_AVG		= 2,
 	SCHED_FEAT_SLEEPER_LOAD_AVG	= 4,
-	SCHED_FEAT_PRECISE_CPU_LOAD	= 8,
-	SCHED_FEAT_START_DEBIT		= 16,
-	SCHED_FEAT_SKIP_INITIAL		= 32,
+	SCHED_FEAT_START_DEBIT		= 8,
+	SCHED_FEAT_SKIP_INITIAL		= 16,
 };
 
 const_debug unsigned int sysctl_sched_features =
 		SCHED_FEAT_FAIR_SLEEPERS	*1 |
 		SCHED_FEAT_SLEEPER_AVG		*0 |
 		SCHED_FEAT_SLEEPER_LOAD_AVG	*1 |
-		SCHED_FEAT_PRECISE_CPU_LOAD	*1 |
 		SCHED_FEAT_START_DEBIT		*1 |
 		SCHED_FEAT_SKIP_INITIAL		*0;

commit 8ebc91d93669af39dbed50914d7daf457eeb43be
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:03 2007 +0200

    sched: remove stat_gran
    
    remove the stat_gran code - it was disabled by default and it causes
    unnecessary overhead.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index befca3f9364a..3c38a5040e8f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -895,9 +895,6 @@ struct load_weight {
  */
 struct sched_entity {
 	long			wait_runtime;
-	unsigned long		delta_fair_run;
-	unsigned long		delta_fair_sleep;
-	unsigned long		delta_exec;
 	s64			fair_key;
 	struct load_weight	load;		/* for load-balancing */
 	struct rb_node		run_node;
diff --git a/kernel/sched.c b/kernel/sched.c
index ae1544f0a20d..d4dabfcc776c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -829,7 +829,7 @@ static void update_curr_load(struct rq *rq)
 	 * Stagger updates to ls->delta_fair. Very frequent updates
 	 * can be expensive.
 	 */
-	if (ls->delta_stat >= sysctl_sched_stat_granularity)
+	if (ls->delta_stat)
 		__update_curr_load(rq, ls);
 }
 
@@ -1588,9 +1588,6 @@ static void __sched_fork(struct task_struct *p)
 	p->se.exec_start		= 0;
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
-	p->se.delta_exec		= 0;
-	p->se.delta_fair_run		= 0;
-	p->se.delta_fair_sleep		= 0;
 	p->se.wait_runtime		= 0;
 	p->se.sleep_start_fair		= 0;
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 2e84aaffe425..2138c40f4836 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -85,8 +85,6 @@ const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 25000000UL;
  */
 const_debug unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
 
-const_debug unsigned int sysctl_sched_stat_granularity;
-
 unsigned int sysctl_sched_runtime_limit __read_mostly;
 
 /*
@@ -360,13 +358,13 @@ add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
  * are not in our scheduling class.
  */
 static inline void
-__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
+	      unsigned long delta_exec)
 {
-	unsigned long delta, delta_exec, delta_fair, delta_mine;
+	unsigned long delta, delta_fair, delta_mine;
 	struct load_weight *lw = &cfs_rq->load;
 	unsigned long load = lw->weight;
 
-	delta_exec = curr->delta_exec;
 	schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
 
 	curr->sum_exec_runtime += delta_exec;
@@ -400,6 +398,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 static void update_curr(struct cfs_rq *cfs_rq)
 {
 	struct sched_entity *curr = cfs_rq_curr(cfs_rq);
+	u64 now = rq_of(cfs_rq)->clock;
 	unsigned long delta_exec;
 
 	if (unlikely(!curr))
@@ -410,15 +409,10 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	 * since the last time we changed load (this cannot
 	 * overflow on 32 bits):
 	 */
-	delta_exec = (unsigned long)(rq_of(cfs_rq)->clock - curr->exec_start);
-
-	curr->delta_exec += delta_exec;
+	delta_exec = (unsigned long)(now - curr->exec_start);
 
-	if (unlikely(curr->delta_exec > sysctl_sched_stat_granularity)) {
-		__update_curr(cfs_rq, curr);
-		curr->delta_exec = 0;
-	}
-	curr->exec_start = rq_of(cfs_rq)->clock;
+	__update_curr(cfs_rq, curr, delta_exec);
+	curr->exec_start = now;
 }
 
 static inline void
@@ -494,10 +488,9 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Note: must be called with a freshly updated rq->fair_clock.
  */
 static inline void
-__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
+__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se,
+			unsigned long delta_fair)
 {
-	unsigned long delta_fair = se->delta_fair_run;
-
 	schedstat_set(se->wait_max, max(se->wait_max,
 			rq_of(cfs_rq)->clock - se->wait_start));
 
@@ -519,12 +512,7 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit),
 			(u64)(cfs_rq->fair_clock - se->wait_start_fair));
 
-	se->delta_fair_run += delta_fair;
-	if (unlikely(abs(se->delta_fair_run) >=
-				sysctl_sched_stat_granularity)) {
-		__update_stats_wait_end(cfs_rq, se);
-		se->delta_fair_run = 0;
-	}
+	__update_stats_wait_end(cfs_rq, se, delta_fair);
 
 	se->wait_start_fair = 0;
 	schedstat_set(se->wait_start, 0);
@@ -567,9 +555,10 @@ update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
  * Scheduling class queueing methods:
  */
 
-static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se,
+			      unsigned long delta_fair)
 {
-	unsigned long load = cfs_rq->load.weight, delta_fair;
+	unsigned long load = cfs_rq->load.weight;
 	long prev_runtime;
 
 	/*
@@ -582,8 +571,6 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG)
 		load = rq_of(cfs_rq)->cpu_load[2];
 
-	delta_fair = se->delta_fair_sleep;
-
 	/*
 	 * Fix up delta_fair with the effect of us running
 	 * during the whole sleep period:
@@ -618,12 +605,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit),
 		(u64)(cfs_rq->fair_clock - se->sleep_start_fair));
 
-	se->delta_fair_sleep += delta_fair;
-	if (unlikely(abs(se->delta_fair_sleep) >=
-				sysctl_sched_stat_granularity)) {
-		__enqueue_sleeper(cfs_rq, se);
-		se->delta_fair_sleep = 0;
-	}
+	__enqueue_sleeper(cfs_rq, se, delta_fair);
 
 	se->sleep_start_fair = 0;
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6c97259e863e..9b1b0d4ff966 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -264,17 +264,6 @@ static ctl_table kern_table[] = {
 		.extra1		= &min_wakeup_granularity_ns,
 		.extra2		= &max_wakeup_granularity_ns,
 	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_stat_granularity_ns",
-		.data		= &sysctl_sched_stat_granularity,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &min_wakeup_granularity_ns,
-		.extra2		= &max_wakeup_granularity_ns,
-	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "sched_runtime_limit_ns",

commit 2bd8e6d422a4f44c0994f909317eba80b0fe08a1
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:02 2007 +0200

    sched: use constants if !CONFIG_SCHED_DEBUG
    
    use constants if !CONFIG_SCHED_DEBUG.
    
    this speeds up the code and reduces code-size:
    
        text    data     bss     dec     hex filename
       27464    3014      16   30494    771e sched.o.before
       26929    3010      20   29959    7507 sched.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9761b165d563..befca3f9364a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1402,15 +1402,18 @@ static inline void idle_task_exit(void) {}
 
 extern void sched_idle_next(void);
 
+#ifdef CONFIG_SCHED_DEBUG
 extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_batch_wakeup_granularity;
 extern unsigned int sysctl_sched_stat_granularity;
 extern unsigned int sysctl_sched_runtime_limit;
-extern unsigned int sysctl_sched_compat_yield;
 extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_features;
+#endif
+
+extern unsigned int sysctl_sched_compat_yield;
 
 #ifdef CONFIG_RT_MUTEXES
 extern int rt_mutex_getprio(struct task_struct *p);
diff --git a/kernel/sched.c b/kernel/sched.c
index 2520923a0c3b..ae1544f0a20d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1658,12 +1658,6 @@ void sched_fork(struct task_struct *p, int clone_flags)
 	put_cpu();
 }
 
-/*
- * After fork, child runs first. (default) If set to 0 then
- * parent will (try to) run first.
- */
-unsigned int __read_mostly sysctl_sched_child_runs_first = 1;
-
 /*
  * wake_up_new_task - wake up a newly created task for the first time.
  *
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5c15d8ae92cb..2e84aaffe425 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -20,6 +20,15 @@
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  */
 
+/*
+ * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
+ */
+#ifdef CONFIG_SCHED_DEBUG
+# define const_debug __read_mostly
+#else
+# define const_debug static const
+#endif
+
 /*
  * Targeted preemption latency for CPU-bound tasks:
  * (default: 20ms, units: nanoseconds)
@@ -34,7 +43,13 @@
  * systems, 4x on 8-way systems, 5x on 16-way systems, etc.)
  * Targeted preemption latency for CPU-bound tasks:
  */
-unsigned int sysctl_sched_latency __read_mostly = 20000000ULL;
+const_debug unsigned int sysctl_sched_latency = 20000000ULL;
+
+/*
+ * After fork, child runs first. (default) If set to 0 then
+ * parent will (try to) run first.
+ */
+const_debug unsigned int sysctl_sched_child_runs_first = 1;
 
 /*
  * Minimal preemption granularity for CPU-bound tasks:
@@ -58,7 +73,7 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = 25000000UL;
+const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 25000000UL;
 
 /*
  * SCHED_OTHER wake-up granularity.
@@ -68,13 +83,10 @@ unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = 25000000UL;
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000UL;
+const_debug unsigned int sysctl_sched_wakeup_granularity = 1000000UL;
 
-unsigned int sysctl_sched_stat_granularity __read_mostly;
+const_debug unsigned int sysctl_sched_stat_granularity;
 
-/*
- * Initialized in sched_init_granularity() [to 5 times the base granularity]:
- */
 unsigned int sysctl_sched_runtime_limit __read_mostly;
 
 /*
@@ -89,7 +101,7 @@ enum {
 	SCHED_FEAT_SKIP_INITIAL		= 32,
 };
 
-unsigned int sysctl_sched_features __read_mostly =
+const_debug unsigned int sysctl_sched_features =
 		SCHED_FEAT_FAIR_SLEEPERS	*1 |
 		SCHED_FEAT_SLEEPER_AVG		*0 |
 		SCHED_FEAT_SLEEPER_LOAD_AVG	*1 |

commit 38ad464d410dadceda1563f36bdb0be7fe4c8938
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:02 2007 +0200

    sched: uniform tunings
    
    use the same defaults on both UP and SMP.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched.c b/kernel/sched.c
index 282d037c7300..2520923a0c3b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4898,32 +4898,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
  */
 cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
 
-/*
- * Increase the granularity value when there are more CPUs,
- * because with more CPUs the 'effective latency' as visible
- * to users decreases. But the relationship is not linear,
- * so pick a second-best guess by going with the log2 of the
- * number of CPUs.
- *
- * This idea comes from the SD scheduler of Con Kolivas:
- */
-static inline void sched_init_granularity(void)
-{
-	unsigned int factor = 1 + ilog2(num_online_cpus());
-	const unsigned long limit = 100000000;
-
-	sysctl_sched_min_granularity *= factor;
-	if (sysctl_sched_min_granularity > limit)
-		sysctl_sched_min_granularity = limit;
-
-	sysctl_sched_latency *= factor;
-	if (sysctl_sched_latency > limit)
-		sysctl_sched_latency = limit;
-
-	sysctl_sched_runtime_limit = sysctl_sched_latency;
-	sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2;
-}
-
 #ifdef CONFIG_SMP
 /*
  * This is how migration works:
@@ -6491,12 +6465,10 @@ void __init sched_init_smp(void)
 	/* Move init over to a non-isolated CPU */
 	if (set_cpus_allowed(current, non_isolated_cpus) < 0)
 		BUG();
-	sched_init_granularity();
 }
 #else
 void __init sched_init_smp(void)
 {
-	sched_init_granularity();
 }
 #endif /* CONFIG_SMP */

commit eba1ed4b7e52720e3099325874811c38a5ec1562
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:02 2007 +0200

    sched: debug: track maximum 'slice'
    
    track the maximum amount of time a task has executed while
    the CPU load was at least 2x. (i.e. at least two nice-0
    tasks were runnable)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 833f7dc2b8de..9761b165d563 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -921,6 +921,7 @@ struct sched_entity {
 	u64			block_start;
 	u64			block_max;
 	u64			exec_max;
+	u64			slice_max;
 
 	unsigned long		wait_runtime_overruns;
 	unsigned long		wait_runtime_underruns;
diff --git a/kernel/sched.c b/kernel/sched.c
index e92b185e371b..282d037c7300 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1603,6 +1603,7 @@ static void __sched_fork(struct task_struct *p)
 	p->se.sleep_max			= 0;
 	p->se.block_max			= 0;
 	p->se.exec_max			= 0;
+	p->se.slice_max			= 0;
 	p->se.wait_max			= 0;
 	p->se.wait_runtime_overruns	= 0;
 	p->se.wait_runtime_underruns	= 0;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 94915f1fd9de..fd080f686f18 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -254,6 +254,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.sleep_max);
 	P(se.block_max);
 	P(se.exec_max);
+	P(se.slice_max);
 	P(se.wait_max);
 	P(se.wait_runtime_overruns);
 	P(se.wait_runtime_underruns);
@@ -282,6 +283,7 @@ void proc_sched_set_task(struct task_struct *p)
 	p->se.sleep_max			= 0;
 	p->se.block_max			= 0;
 	p->se.exec_max			= 0;
+	p->se.slice_max			= 0;
 	p->se.wait_max			= 0;
 	p->se.wait_runtime_overruns	= 0;
 	p->se.wait_runtime_underruns	= 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0990b20fdcf5..5c15d8ae92cb 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -739,6 +739,17 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	update_stats_wait_end(cfs_rq, se);
 	update_stats_curr_start(cfs_rq, se);
 	set_cfs_rq_curr(cfs_rq, se);
+#ifdef CONFIG_SCHEDSTATS
+	/*
+	 * Track our maximum slice length, if the CPU's load is at
+	 * least twice that of our own weight (i.e. dont track it
+	 * when there are only lesser-weight tasks around):
+	 */
+	if (rq_of(cfs_rq)->ls.load.weight >= 2*se->load.weight) {
+		se->slice_max = max(se->slice_max,
+			se->sum_exec_runtime - se->prev_sum_exec_runtime);
+	}
+#endif
 	se->prev_sum_exec_runtime = se->sum_exec_runtime;
 }

commit a4b29ba2f72673aaa60ba11ced74d579771dd578
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:02 2007 +0200

    sched: small sched_debug cleanup
    
    small kernel/sched_debug.c cleanup - break up
    multi-variable assignment.
    
    no code changed:
    
       text    data     bss     dec     hex filename
       38869    3550      24   42443    a5cb sched.o.before
       38869    3550      24   42443    a5cb sched.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index c3ee38bd3426..94915f1fd9de 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -279,9 +279,13 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 void proc_sched_set_task(struct task_struct *p)
 {
 #ifdef CONFIG_SCHEDSTATS
-	p->se.sleep_max = p->se.block_max = p->se.exec_max = p->se.wait_max = 0;
-	p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0;
+	p->se.sleep_max			= 0;
+	p->se.block_max			= 0;
+	p->se.exec_max			= 0;
+	p->se.wait_max			= 0;
+	p->se.wait_runtime_overruns	= 0;
+	p->se.wait_runtime_underruns	= 0;
 #endif
-	p->se.sum_exec_runtime = 0;
+	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
 }

commit bb61c210835db95b0e9fb612a316422e7cc675e3
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:02 2007 +0200

    sched: resched task in task_new_fair()
    
    to get full child-runs-first semantics make sure the parent is
    rescheduled.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 67c67a87146e..0990b20fdcf5 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1191,6 +1191,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 		se->wait_runtime = -(sched_granularity(cfs_rq) / 2);
 
 	__enqueue_entity(cfs_rq, se);
+	resched_task(rq->curr);
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED

commit 44142fac3446d08c08c5d717ec11d50a737e8640
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:01 2007 +0200

    sched: fix sysctl_sched_child_runs_first flag
    
    fix the sched_child_runs_first flag: always call into ->task_new()
    if we are on the same CPU, as SCHED_OTHER tasks depend on it for
    correct initial setup.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Signed-off-by: Mike Galbraith <efault@gmx.de>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched.c b/kernel/sched.c
index 6c10fa796ca0..2054e557d0d2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1688,10 +1688,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	else
 		p->sched_class = &fair_sched_class;
 
-	if (!p->sched_class->task_new || !sysctl_sched_child_runs_first ||
-			(clone_flags & CLONE_VM) || task_cpu(p) != this_cpu ||
-			!current->se.on_rq) {
-
+	if (task_cpu(p) != this_cpu || !p->sched_class->task_new ||
+							!current->se.on_rq) {
 		activate_task(rq, p, 0);
 	} else {
 		/*

commit 092e9d93b3728d484a4e73df9852dc4002cf9923
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Oct 10 21:19:28 2007 -0700

    [9P]: build fix with !CONFIG_SYSCTL
    
    found via make randconfig build testing:
    
     net/built-in.o: In function `init_p9':
     mod.c:(.init.text+0x3b39): undefined reference to `p9_sysctl_register'
     net/built-in.o: In function `exit_p9':
     mod.c:(.exit.text+0x36b): undefined reference to `p9_sysctl_unregister'
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 88884d39f28f..7726ff41c3e6 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -412,6 +412,18 @@ int p9_idpool_check(int id, struct p9_idpool *p);
 
 int p9_error_init(void);
 int p9_errstr2errno(char *, int);
+
+#ifdef CONFIG_SYSCTL
 int __init p9_sysctl_register(void);
 void __exit p9_sysctl_unregister(void);
+#else
+static inline int p9_sysctl_register(void)
+{
+	return 0;
+}
+static inline void p9_sysctl_unregister(void)
+{
+}
+#endif
+
 #endif /* NET_9P_H */