Patches contributed by Eötvös Lorand University

<<Prev 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298[299]300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 Next>>

commit 50c46637aa894f904e2fb39086a3d7732f68bd50
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Aug 25 22:17:19 2007 +0200

    sched: s/sched_latency/sched_min_granularity
    
    runtime limit and wakeup granularity used to be a function of
    granularity and that was incorrect changed to sched_latency.
    
    Fix this to make wakeup granularity a function of min-granularity,
    and the runtime limit equal to latency.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index a40ab657ad19..9fe473a190de 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4921,8 +4921,8 @@ static inline void sched_init_granularity(void)
 	if (sysctl_sched_latency > limit)
 		sysctl_sched_latency = limit;
 
-	sysctl_sched_runtime_limit = sysctl_sched_latency * 5;
-	sysctl_sched_wakeup_granularity = sysctl_sched_latency / 2;
+	sysctl_sched_runtime_limit = sysctl_sched_latency;
+	sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2;
 }
 
 #ifdef CONFIG_SMP

commit 172ac3dbb7d3e528ac53d08a34df88d1ac53c534
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Aug 25 18:41:53 2007 +0200

    sched: cleanup, sched_granularity -> sched_min_granularity
    
    due to adaptive granularity scheduling the role of sched_granularity
    has changed to "minimum granularity", so rename the variable (and the
    tunable) accordingly.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 322764e04052..bd6a0320a770 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1400,7 +1400,7 @@ static inline void idle_task_exit(void) {}
 extern void sched_idle_next(void);
 
 extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_granularity;
+extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_batch_wakeup_granularity;
 extern unsigned int sysctl_sched_stat_granularity;
diff --git a/kernel/sched.c b/kernel/sched.c
index da26f46d50d7..a40ab657ad19 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4913,9 +4913,9 @@ static inline void sched_init_granularity(void)
 	unsigned int factor = 1 + ilog2(num_online_cpus());
 	const unsigned long limit = 100000000;
 
-	sysctl_sched_granularity *= factor;
-	if (sysctl_sched_granularity > limit)
-		sysctl_sched_granularity = limit;
+	sysctl_sched_min_granularity *= factor;
+	if (sysctl_sched_min_granularity > limit)
+		sysctl_sched_min_granularity = limit;
 
 	sysctl_sched_latency *= factor;
 	if (sysctl_sched_latency > limit)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0ba1e60f08d0..ee3771850aaf 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -40,7 +40,7 @@ unsigned int sysctl_sched_latency __read_mostly = 20000000ULL;
  * Minimal preemption granularity for CPU-bound tasks:
  * (default: 2 msec, units: nanoseconds)
  */
-unsigned int sysctl_sched_granularity __read_mostly = 2000000ULL;
+unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL;
 
 /*
  * SCHED_BATCH wake-up granularity.
@@ -258,7 +258,7 @@ sched_granularity(struct cfs_rq *cfs_rq)
 
 	if (nr > 1) {
 		gran = gran/nr - gran/nr/nr;
-		gran = max(gran, sysctl_sched_granularity);
+		gran = max(gran, sysctl_sched_min_granularity);
 	}
 
 	return gran;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9e3d2960faf5..6ace893c17c9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -222,8 +222,8 @@ static ctl_table kern_table[] = {
 #ifdef CONFIG_SCHED_DEBUG
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_granularity_ns",
-		.data		= &sysctl_sched_granularity,
+		.procname	= "sched_min_granularity_ns",
+		.data		= &sysctl_sched_min_granularity,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec_minmax,

commit 095e56c7036fe97bc3ebcd80ed6e121be0847656
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Aug 24 20:39:10 2007 +0200

    sched: fix startup penalty calculation
    
    fix task startup penalty miscalculation: sysctl_sched_granularity is
    unsigned int and wait_runtime is long so we first have to convert it
    to long before turning it negative ...
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c078f1af721c..4d6b7e2df2aa 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1047,7 +1047,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 	 * -granularity/2, so initialize the task with that:
 	 */
 	if (sysctl_sched_features & SCHED_FEAT_START_DEBIT)
-		p->se.wait_runtime = -(sysctl_sched_granularity / 2);
+		p->se.wait_runtime = -((long)sysctl_sched_granularity / 2);
 
 	__enqueue_entity(cfs_rq, se);
 }

commit b2133c8b1e270b4a7c36f70e29be8738d09e850b
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Aug 24 20:39:10 2007 +0200

    sched: tidy up and simplify the bonus balance
    
    make the bonus balance more consistent: do not hand out a bonus if
    there's too much in flight already, and only deduct as much from a
    runner as it has the capacity. This makes the bonus engine a zero-sum
    game (as intended).
    
    this also simplifies the code:
    
       text    data     bss     dec     hex filename
      34770    2998      24   37792    93a0 sched.o.before
      34749    2998      24   37771    938b sched.o.after
    
    and it also avoids overscheduling in sleep-happy workloads like
    hackbench.c.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6b0974c3fb67..c578370cd693 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -306,6 +306,8 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 		delta = min(cfs_rq->sleeper_bonus, (u64)delta_exec);
 		delta = calc_delta_mine(delta, curr->load.weight, lw);
 		delta = min((u64)delta, cfs_rq->sleeper_bonus);
+		delta = min(delta, (unsigned long)(
+			(long)sysctl_sched_runtime_limit - curr->wait_runtime));
 		cfs_rq->sleeper_bonus -= delta;
 		delta_mine -= delta;
 	}
@@ -493,6 +495,13 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	unsigned long load = cfs_rq->load.weight, delta_fair;
 	long prev_runtime;
 
+	/*
+	 * Do not boost sleepers if there's too much bonus 'in flight'
+	 * already:
+	 */
+	if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit))
+		return;
+
 	if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG)
 		load = rq_of(cfs_rq)->cpu_load[2];
 
@@ -512,16 +521,13 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 	prev_runtime = se->wait_runtime;
 	__add_wait_runtime(cfs_rq, se, delta_fair);
+	schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
 	delta_fair = se->wait_runtime - prev_runtime;
 
 	/*
 	 * Track the amount of bonus we've given to sleepers:
 	 */
 	cfs_rq->sleeper_bonus += delta_fair;
-	if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit))
-		cfs_rq->sleeper_bonus = sysctl_sched_runtime_limit;
-
-	schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
 }
 
 static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)

commit 71fd37146385c8255bfd370f33ca81fe8c81e5a5
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Aug 24 20:39:10 2007 +0200

    sched: remove HZ dependency from the granularity default
    
    remove HZ dependency from the granularity default. Use 10 msec for
    the base granularity, 1 msec for wakeup granularity and 25 msec for
    batch wakeup granularity. (These defaults are close to the values
    that the default HZ=250 setting got previously, and thus it's the
    most common setting.)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 96e9b82246d2..e95ff22ed174 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4923,7 +4923,7 @@ static inline void sched_init_granularity(void)
 	if (sysctl_sched_granularity > gran_limit)
 		sysctl_sched_granularity = gran_limit;
 
-	sysctl_sched_runtime_limit = sysctl_sched_granularity * 8;
+	sysctl_sched_runtime_limit = sysctl_sched_granularity * 5;
 	sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2;
 }
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index b5270dc98bef..6b0974c3fb67 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -19,7 +19,7 @@
 
 /*
  * Preemption granularity:
- * (default: 2 msec, units: nanoseconds)
+ * (default: 10 msec, units: nanoseconds)
  *
  * NOTE: this granularity value is not the same as the concept of
  * 'timeslice length' - timeslices in CFS will typically be somewhat
@@ -31,18 +31,17 @@
  * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way
  * systems, 4x on 8-way systems, 5x on 16-way systems, etc.)
  */
-unsigned int sysctl_sched_granularity __read_mostly = 2000000000ULL/HZ;
+unsigned int sysctl_sched_granularity __read_mostly = 10000000UL;
 
 /*
  * SCHED_BATCH wake-up granularity.
- * (default: 10 msec, units: nanoseconds)
+ * (default: 25 msec, units: nanoseconds)
  *
  * This option delays the preemption effects of decoupled workloads
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly =
-							10000000000ULL/HZ;
+unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = 25000000UL;
 
 /*
  * SCHED_OTHER wake-up granularity.
@@ -52,12 +51,12 @@ unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly =
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000000ULL/HZ;
+unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000UL;
 
 unsigned int sysctl_sched_stat_granularity __read_mostly;
 
 /*
- * Initialized in sched_init_granularity():
+ * Initialized in sched_init_granularity() [to 5 times the base granularity]:
  */
 unsigned int sysctl_sched_runtime_limit __read_mostly;

commit 505c0efd58031923ae01deac16d896607cafa70e
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Aug 23 15:18:02 2007 +0200

    sched: tweak the sched_runtime_limit tunable
    
    Michael Gerdau reported reniced task CPU usage weirdnesses.
    Such symptoms can be caused by limit underruns so double the
    sched_runtime_limit.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index a4b22d93e00d..96e9b82246d2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4923,7 +4923,7 @@ static inline void sched_init_granularity(void)
 	if (sysctl_sched_granularity > gran_limit)
 		sysctl_sched_granularity = gran_limit;
 
-	sysctl_sched_runtime_limit = sysctl_sched_granularity * 4;
+	sysctl_sched_runtime_limit = sysctl_sched_granularity * 8;
 	sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2;
 }

commit 2aa44d0567ed21b47b87d68819415d48194cb923
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Aug 23 15:18:02 2007 +0200

    sched: sched_clock_idle_[sleep|wakeup]_event()
    
    construct a more or less wall-clock time out of sched_clock(), by
    using ACPI-idle's existing knowledge about how much time we spent
    idling. This allows the rq clock to work around TSC-stops-in-C2,
    TSC-gets-corrupted-in-C3 type of problems.
    
    ( Besides the scheduler's statistics this also benefits blktrace and
      printk-timestamps as well. )
    
    Furthermore, the precise before-C2/C3-sleep and after-C2/C3-wakeup
    callbacks allow the scheduler to get out the most of the period where
    the CPU has a reliable TSC. This results in slightly more precise
    task statistics.
    
    the ACPI bits were acked by Len.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Acked-by: Len Brown <len.brown@intel.com>

diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index debd7dbb4158..a39280b4dd3a 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -292,7 +292,6 @@ static struct clocksource clocksource_tsc = {
 
 void mark_tsc_unstable(char *reason)
 {
-	sched_clock_unstable_event();
 	if (!tsc_unstable) {
 		tsc_unstable = 1;
 		tsc_enabled = 0;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index a8634a0655fc..d9b8af763e1e 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -63,6 +63,7 @@
 ACPI_MODULE_NAME("processor_idle");
 #define ACPI_PROCESSOR_FILE_POWER	"power"
 #define US_TO_PM_TIMER_TICKS(t)		((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
+#define PM_TIMER_TICK_NS		(1000000000ULL/PM_TIMER_FREQUENCY)
 #define C2_OVERHEAD			4	/* 1us (3.579 ticks per us) */
 #define C3_OVERHEAD			4	/* 1us (3.579 ticks per us) */
 static void (*pm_idle_save) (void) __read_mostly;
@@ -462,6 +463,9 @@ static void acpi_processor_idle(void)
 		 * TBD: Can't get time duration while in C1, as resumes
 		 *      go to an ISR rather than here.  Need to instrument
 		 *      base interrupt handler.
+		 *
+		 * Note: the TSC better not stop in C1, sched_clock() will
+		 *       skew otherwise.
 		 */
 		sleep_ticks = 0xFFFFFFFF;
 		break;
@@ -469,6 +473,8 @@ static void acpi_processor_idle(void)
 	case ACPI_STATE_C2:
 		/* Get start time (ticks) */
 		t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
+		/* Tell the scheduler that we are going deep-idle: */
+		sched_clock_idle_sleep_event();
 		/* Invoke C2 */
 		acpi_state_timer_broadcast(pr, cx, 1);
 		acpi_cstate_enter(cx);
@@ -479,17 +485,22 @@ static void acpi_processor_idle(void)
 		/* TSC halts in C2, so notify users */
 		mark_tsc_unstable("possible TSC halt in C2");
 #endif
+		/* Compute time (ticks) that we were actually asleep */
+		sleep_ticks = ticks_elapsed(t1, t2);
+
+		/* Tell the scheduler how much we idled: */
+		sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
+
 		/* Re-enable interrupts */
 		local_irq_enable();
+		/* Do not account our idle-switching overhead: */
+		sleep_ticks -= cx->latency_ticks + C2_OVERHEAD;
+
 		current_thread_info()->status |= TS_POLLING;
-		/* Compute time (ticks) that we were actually asleep */
-		sleep_ticks =
-		    ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
 		acpi_state_timer_broadcast(pr, cx, 0);
 		break;
 
 	case ACPI_STATE_C3:
-
 		/*
 		 * disable bus master
 		 * bm_check implies we need ARB_DIS
@@ -518,6 +529,8 @@ static void acpi_processor_idle(void)
 		t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
 		/* Invoke C3 */
 		acpi_state_timer_broadcast(pr, cx, 1);
+		/* Tell the scheduler that we are going deep-idle: */
+		sched_clock_idle_sleep_event();
 		acpi_cstate_enter(cx);
 		/* Get end time (ticks) */
 		t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
@@ -531,12 +544,17 @@ static void acpi_processor_idle(void)
 		/* TSC halts in C3, so notify users */
 		mark_tsc_unstable("TSC halts in C3");
 #endif
+		/* Compute time (ticks) that we were actually asleep */
+		sleep_ticks = ticks_elapsed(t1, t2);
+		/* Tell the scheduler how much we idled: */
+		sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS);
+
 		/* Re-enable interrupts */
 		local_irq_enable();
+		/* Do not account our idle-switching overhead: */
+		sleep_ticks -= cx->latency_ticks + C3_OVERHEAD;
+
 		current_thread_info()->status |= TS_POLLING;
-		/* Compute time (ticks) that we were actually asleep */
-		sleep_ticks =
-		    ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD;
 		acpi_state_timer_broadcast(pr, cx, 0);
 		break;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 682ef87da6eb..1845b2e99a87 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1388,7 +1388,8 @@ extern void sched_exec(void);
 #define sched_exec()   {}
 #endif
 
-extern void sched_clock_unstable_event(void);
+extern void sched_clock_idle_sleep_event(void);
+extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 
 #ifdef CONFIG_HOTPLUG_CPU
 extern void idle_task_exit(void);
diff --git a/kernel/sched.c b/kernel/sched.c
index 45e17b83b7f1..48e7586168ef 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -262,7 +262,8 @@ struct rq {
 	s64 clock_max_delta;
 
 	unsigned int clock_warps, clock_overflows;
-	unsigned int clock_unstable_events;
+	u64 idle_clock;
+	unsigned int clock_deep_idle_events;
 	u64 tick_timestamp;
 
 	atomic_t nr_iowait;
@@ -556,18 +557,40 @@ static inline struct rq *this_rq_lock(void)
 }
 
 /*
- * CPU frequency is/was unstable - start new by setting prev_clock_raw:
+ * We are going deep-idle (irqs are disabled):
  */
-void sched_clock_unstable_event(void)
+void sched_clock_idle_sleep_event(void)
 {
-	unsigned long flags;
-	struct rq *rq;
+	struct rq *rq = cpu_rq(smp_processor_id());
 
-	rq = task_rq_lock(current, &flags);
-	rq->prev_clock_raw = sched_clock();
-	rq->clock_unstable_events++;
-	task_rq_unlock(rq, &flags);
+	spin_lock(&rq->lock);
+	__update_rq_clock(rq);
+	spin_unlock(&rq->lock);
+	rq->clock_deep_idle_events++;
+}
+EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
+
+/*
+ * We just idled delta nanoseconds (called with irqs disabled):
+ */
+void sched_clock_idle_wakeup_event(u64 delta_ns)
+{
+	struct rq *rq = cpu_rq(smp_processor_id());
+	u64 now = sched_clock();
+
+	rq->idle_clock += delta_ns;
+	/*
+	 * Override the previous timestamp and ignore all
+	 * sched_clock() deltas that occured while we idled,
+	 * and use the PM-provided delta_ns to advance the
+	 * rq clock:
+	 */
+	spin_lock(&rq->lock);
+	rq->prev_clock_raw = now;
+	rq->clock += delta_ns;
+	spin_unlock(&rq->lock);
 }
+EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
 
 /*
  * resched_task - mark a task 'to be rescheduled now'.
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 87e524762b85..ab18f45f2ab2 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -154,10 +154,11 @@ static void print_cpu(struct seq_file *m, int cpu)
 	P(next_balance);
 	P(curr->pid);
 	P(clock);
+	P(idle_clock);
 	P(prev_clock_raw);
 	P(clock_warps);
 	P(clock_overflows);
-	P(clock_unstable_events);
+	P(clock_deep_idle_events);
 	P(clock_max_delta);
 	P(cpu_load[0]);
 	P(cpu_load[1]);

commit 60ac8f20feb0bba8caee63be3e7ca5801fe16d4c
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Jul 24 11:16:37 2007 +0200

    pci/hotplug/cpqphp_ctrl.c: remove stale BKL use
    
    remove stale BKL use from drivers/pci/hotplug/cpqphp_ctrl.c.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c
index 79ff6b4de3a6..37d72f123a80 100644
--- a/drivers/pci/hotplug/cpqphp_ctrl.c
+++ b/drivers/pci/hotplug/cpqphp_ctrl.c
@@ -1746,10 +1746,8 @@ static void pushbutton_helper_thread(unsigned long data)
 static int event_thread(void* data)
 {
 	struct controller *ctrl;
-	lock_kernel();
+
 	daemonize("phpd_event");
-	
-	unlock_kernel();
 
 	while (1) {
 		dbg("!!!!event_thread sleeping\n");

commit 5d2b3d3695a841231b65b5536a70dc29961c5611
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Aug 12 18:08:19 2007 +0200

    sched: fix sleeper bonus
    
    Peter Ziljstra noticed that the sleeper bonus deduction code
    was not properly rate-limited: a task that scheduled more
    frequently would get a disproportionately large deduction.
    So limit the deduction to delta_exec.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c5af38948a1e..fedbb51bba96 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -75,7 +75,7 @@ enum {
 
 unsigned int sysctl_sched_features __read_mostly =
 		SCHED_FEAT_FAIR_SLEEPERS	*1 |
-		SCHED_FEAT_SLEEPER_AVG		*1 |
+		SCHED_FEAT_SLEEPER_AVG		*0 |
 		SCHED_FEAT_SLEEPER_LOAD_AVG	*1 |
 		SCHED_FEAT_PRECISE_CPU_LOAD	*1 |
 		SCHED_FEAT_START_DEBIT		*1 |
@@ -304,11 +304,9 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 	delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw);
 
 	if (cfs_rq->sleeper_bonus > sysctl_sched_granularity) {
-		delta = calc_delta_mine(cfs_rq->sleeper_bonus,
-					curr->load.weight, lw);
-		if (unlikely(delta > cfs_rq->sleeper_bonus))
-			delta = cfs_rq->sleeper_bonus;
-
+		delta = min(cfs_rq->sleeper_bonus, (u64)delta_exec);
+		delta = calc_delta_mine(delta, curr->load.weight, lw);
+		delta = min((u64)delta, cfs_rq->sleeper_bonus);
 		cfs_rq->sleeper_bonus -= delta;
 		delta_mine -= delta;
 	}
@@ -521,6 +519,8 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	 * Track the amount of bonus we've given to sleepers:
 	 */
 	cfs_rq->sleeper_bonus += delta_fair;
+	if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit))
+		cfs_rq->sleeper_bonus = sysctl_sched_runtime_limit;
 
 	schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
 }

commit 5167e75f4d2d10bff6afee1f358313e87b4df246
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Aug 10 23:05:11 2007 +0200

    sched debug: dont print kernel address in /proc/sched_debug
    
    Arjan van de Ven pointed out that we should not print kernel addresses
    in world-readable /proc files - fix that.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 3da32156394e..87e524762b85 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -108,7 +108,7 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
-	SEQ_printf(m, "\ncfs_rq %p\n", cfs_rq);
+	SEQ_printf(m, "\ncfs_rq\n");
 
 #define P(x) \
 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(cfs_rq->x))