Patches contributed by Eötvös Lorand University


commit c24b7c524421f9ea9d9ebab55f80cfb1f3fb77a3
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Apr 18 10:55:34 2008 +0200

    sched: features fix
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index b59a44e1ea44..57ba7ea9b744 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -792,7 +792,7 @@ sched_feat_read(struct file *filp, char __user *ubuf,
 		if (sysctl_sched_features & (1UL << i))
 			r += sprintf(buf + r, "%s ", sched_feat_names[i]);
 		else
-			r += sprintf(buf + r, "no_%s ", sched_feat_names[i]);
+			r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
 	}
 
 	r += sprintf(buf + r, "\n");
@@ -822,7 +822,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
 
 	buf[cnt] = 0;
 
-	if (strncmp(buf, "no_", 3) == 0) {
+	if (strncmp(buf, "NO_", 3) == 0) {
 		neg = 1;
 		cmp += 3;
 	}
@@ -855,17 +855,6 @@ static struct file_operations sched_feat_fops = {
 
 static __init int sched_init_debug(void)
 {
-	int i, j, len;
-
-	for (i = 0; sched_feat_names[i]; i++) {
-		len = strlen(sched_feat_names[i]);
-
-		for (j = 0; j < len; j++) {
-			sched_feat_names[i][j] =
-				tolower(sched_feat_names[i][j]);
-		}
-	}
-
 	debugfs_create_file("sched_features", 0644, NULL, NULL,
 			&sched_feat_fops);
 

commit 06379aba522ebdabca37446ea988a23c43c03c67
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sat Apr 19 09:25:58 2008 +0200

    sched: add SCHED_FEAT_DEADLINE
    
    unused at the moment.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 6d55dfc56cab..8f03817e7dd3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -742,6 +742,7 @@ enum {
 	SCHED_FEAT_HRTICK		= 64,
 	SCHED_FEAT_DOUBLE_TICK		= 128,
 	SCHED_FEAT_NORMALIZED_SLEEPER	= 256,
+	SCHED_FEAT_DEADLINE		= 512,
 };
 
 const_debug unsigned int sysctl_sched_features =
@@ -753,7 +754,8 @@ const_debug unsigned int sysctl_sched_features =
 		SCHED_FEAT_SYNC_WAKEUPS		* 1 |
 		SCHED_FEAT_HRTICK		* 1 |
 		SCHED_FEAT_DOUBLE_TICK		* 0 |
-		SCHED_FEAT_NORMALIZED_SLEEPER	* 1;
+		SCHED_FEAT_NORMALIZED_SLEEPER	* 1 |
+		SCHED_FEAT_DEADLINE		* 1;
 
 #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
 

commit ea736ed5d353d7a3aa1cf8ce4cf8d947bc353fb2
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Mar 25 13:51:45 2008 +0100

    sched: fix checks
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 070eefdd90f5..62830eaec52f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -163,10 +163,11 @@ struct rt_prio_array {
 };
 
 struct rt_bandwidth {
-	ktime_t rt_period;
-	u64 rt_runtime;
-	spinlock_t rt_runtime_lock;
-	struct hrtimer rt_period_timer;
+	/* nests inside the rq lock: */
+	spinlock_t		rt_runtime_lock;
+	ktime_t			rt_period;
+	u64			rt_runtime;
+	struct hrtimer		rt_period_timer;
 };
 
 static struct rt_bandwidth def_rt_bandwidth;
@@ -403,6 +404,7 @@ struct rt_rq {
 	int rt_throttled;
 	u64 rt_time;
 	u64 rt_runtime;
+	/* Nests inside the rq lock: */
 	spinlock_t rt_runtime_lock;
 
 #ifdef CONFIG_RT_GROUP_SCHED

commit 57d3da2911787a101a384532f4519f9640bae883
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Feb 27 14:05:10 2008 +0100

    time: add ns_to_ktime()
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 2cd7fa73d1af..ce5983225be4 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -327,4 +327,10 @@ extern void ktime_get_ts(struct timespec *ts);
 /* Get the real (wall-) time in timespec format: */
 #define ktime_get_real_ts(ts)	getnstimeofday(ts)
 
+static inline ktime_t ns_to_ktime(u64 ns)
+{
+	static const ktime_t ktime_zero = { .tv64 = 0 };
+	return ktime_add_ns(ktime_zero, ns);
+}
+
 #endif

commit 50df5d6aea6694ca481b8005900401e8c95c2603
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Mar 14 16:09:59 2008 +0100

    sched: remove sysctl_sched_batch_wakeup_granularity
    
    it's unused.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6a1e7afb099b..15f05ff453d8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1551,7 +1551,6 @@ static inline void wake_up_idle_cpu(int cpu) { }
 extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
-extern unsigned int sysctl_sched_batch_wakeup_granularity;
 extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_features;
 extern unsigned int sysctl_sched_migration_cost;
diff --git a/kernel/sched.c b/kernel/sched.c
index 770449bee6da..e813e845d9cf 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5396,7 +5396,6 @@ static inline void sched_init_granularity(void)
 		sysctl_sched_latency = limit;
 
 	sysctl_sched_wakeup_granularity *= factor;
-	sysctl_sched_batch_wakeup_granularity *= factor;
 }
 
 #ifdef CONFIG_SMP
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index ef358ba07683..3d09106990cb 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -214,7 +214,6 @@ static int sched_debug_show(struct seq_file *m, void *v)
 	PN(sysctl_sched_latency);
 	PN(sysctl_sched_min_granularity);
 	PN(sysctl_sched_wakeup_granularity);
-	PN(sysctl_sched_batch_wakeup_granularity);
 	PN(sysctl_sched_child_runs_first);
 	P(sysctl_sched_features);
 #undef PN
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index b01f8e77f2ac..bedda18f37a5 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -61,16 +61,6 @@ const_debug unsigned int sysctl_sched_child_runs_first = 1;
  */
 unsigned int __read_mostly sysctl_sched_compat_yield;
 
-/*
- * SCHED_BATCH wake-up granularity.
- * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds)
- *
- * This option delays the preemption effects of decoupled workloads
- * and reduces their over-scheduling. Synchronous workloads will still
- * have immediate wakeup/sleep latencies.
- */
-unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL;
-
 /*
  * SCHED_OTHER wake-up granularity.
  * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b2a2d6889bab..be332e1a0c29 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -268,17 +268,6 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &min_wakeup_granularity_ns,
 		.extra2		= &max_wakeup_granularity_ns,
 	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_batch_wakeup_granularity_ns",
-		.data		= &sysctl_sched_batch_wakeup_granularity,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec_minmax,
-		.strategy	= &sysctl_intvec,
-		.extra1		= &min_wakeup_granularity_ns,
-		.extra2		= &max_wakeup_granularity_ns,
-	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "sched_child_runs_first",

commit 02e2b83bd25bb05ac2e69cb31458b7d1b3c70707
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Mar 19 01:37:10 2008 +0100

    sched: reenable sync wakeups
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 42d2f1155d30..770449bee6da 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -627,9 +627,9 @@ enum {
 	SCHED_FEAT_START_DEBIT		= 4,
 	SCHED_FEAT_AFFINE_WAKEUPS	= 8,
 	SCHED_FEAT_CACHE_HOT_BUDDY	= 16,
-	SCHED_FEAT_HRTICK		= 32,
-	SCHED_FEAT_DOUBLE_TICK		= 64,
-	SCHED_FEAT_SYNC_WAKEUPS		= 128,
+	SCHED_FEAT_SYNC_WAKEUPS		= 32,
+	SCHED_FEAT_HRTICK		= 64,
+	SCHED_FEAT_DOUBLE_TICK		= 128,
 };
 
 const_debug unsigned int sysctl_sched_features =
@@ -638,9 +638,9 @@ const_debug unsigned int sysctl_sched_features =
 		SCHED_FEAT_START_DEBIT		* 1 |
 		SCHED_FEAT_AFFINE_WAKEUPS	* 1 |
 		SCHED_FEAT_CACHE_HOT_BUDDY	* 1 |
+		SCHED_FEAT_SYNC_WAKEUPS		* 1 |
 		SCHED_FEAT_HRTICK		* 1 |
-		SCHED_FEAT_DOUBLE_TICK		* 0 |
-		SCHED_FEAT_SYNC_WAKEUPS		* 0;
+		SCHED_FEAT_DOUBLE_TICK		* 0;
 
 #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
 

commit d25ce4cd499a21aab89ff8755f8c4a2800eae25f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Mar 17 09:36:53 2008 +0100

    sched: cache hot buddy
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 7c5efad78c09..42d2f1155d30 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -625,20 +625,22 @@ enum {
 	SCHED_FEAT_NEW_FAIR_SLEEPERS	= 1,
 	SCHED_FEAT_WAKEUP_PREEMPT	= 2,
 	SCHED_FEAT_START_DEBIT		= 4,
-	SCHED_FEAT_HRTICK		= 8,
-	SCHED_FEAT_DOUBLE_TICK		= 16,
-	SCHED_FEAT_SYNC_WAKEUPS		= 32,
-	SCHED_FEAT_AFFINE_WAKEUPS	= 64,
+	SCHED_FEAT_AFFINE_WAKEUPS	= 8,
+	SCHED_FEAT_CACHE_HOT_BUDDY	= 16,
+	SCHED_FEAT_HRTICK		= 32,
+	SCHED_FEAT_DOUBLE_TICK		= 64,
+	SCHED_FEAT_SYNC_WAKEUPS		= 128,
 };
 
 const_debug unsigned int sysctl_sched_features =
 		SCHED_FEAT_NEW_FAIR_SLEEPERS	* 1 |
 		SCHED_FEAT_WAKEUP_PREEMPT	* 1 |
 		SCHED_FEAT_START_DEBIT		* 1 |
+		SCHED_FEAT_AFFINE_WAKEUPS	* 1 |
+		SCHED_FEAT_CACHE_HOT_BUDDY	* 1 |
 		SCHED_FEAT_HRTICK		* 1 |
 		SCHED_FEAT_DOUBLE_TICK		* 0 |
-		SCHED_FEAT_SYNC_WAKEUPS		* 0 |
-		SCHED_FEAT_AFFINE_WAKEUPS	* 1;
+		SCHED_FEAT_SYNC_WAKEUPS		* 0;
 
 #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
 
@@ -1519,7 +1521,7 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
 	/*
 	 * Buddy candidates are cache hot:
 	 */
-	if (&p->se == cfs_rq_of(&p->se)->next)
+	if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next))
 		return 1;
 
 	if (p->sched_class != &fair_sched_class)

commit 1fc8afa4c820fcde3658238eab5c010476ede521
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Mar 19 01:39:19 2008 +0100

    sched: feat affine wakeups
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 263e25e10204..7c5efad78c09 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -628,6 +628,7 @@ enum {
 	SCHED_FEAT_HRTICK		= 8,
 	SCHED_FEAT_DOUBLE_TICK		= 16,
 	SCHED_FEAT_SYNC_WAKEUPS		= 32,
+	SCHED_FEAT_AFFINE_WAKEUPS	= 64,
 };
 
 const_debug unsigned int sysctl_sched_features =
@@ -636,7 +637,8 @@ const_debug unsigned int sysctl_sched_features =
 		SCHED_FEAT_START_DEBIT		* 1 |
 		SCHED_FEAT_HRTICK		* 1 |
 		SCHED_FEAT_DOUBLE_TICK		* 0 |
-		SCHED_FEAT_SYNC_WAKEUPS		* 0;
+		SCHED_FEAT_SYNC_WAKEUPS		* 0 |
+		SCHED_FEAT_AFFINE_WAKEUPS	* 1;
 
 #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
 

commit b85d0667268320072ccdeb07c27c25b300ab3724
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Mar 16 20:03:22 2008 +0100

    sched: introduce SCHED_FEAT_SYNC_WAKEUPS, turn it off
    
    turn off sync wakeups by default. They are not needed anymore - the
    buddy logic should be smart enough to keep the system from
    overscheduling.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index d8456a9ac9af..263e25e10204 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -627,6 +627,7 @@ enum {
 	SCHED_FEAT_START_DEBIT		= 4,
 	SCHED_FEAT_HRTICK		= 8,
 	SCHED_FEAT_DOUBLE_TICK		= 16,
+	SCHED_FEAT_SYNC_WAKEUPS		= 32,
 };
 
 const_debug unsigned int sysctl_sched_features =
@@ -634,7 +635,8 @@ const_debug unsigned int sysctl_sched_features =
 		SCHED_FEAT_WAKEUP_PREEMPT	* 1 |
 		SCHED_FEAT_START_DEBIT		* 1 |
 		SCHED_FEAT_HRTICK		* 1 |
-		SCHED_FEAT_DOUBLE_TICK		* 0;
+		SCHED_FEAT_DOUBLE_TICK		* 0 |
+		SCHED_FEAT_SYNC_WAKEUPS		* 0;
 
 #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
 
@@ -1916,6 +1918,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	long old_state;
 	struct rq *rq;
 
+	if (!sched_feat(SYNC_WAKEUPS))
+		sync = 0;
+
 	smp_wmb();
 	rq = task_rq_lock(p, &flags);
 	old_state = p->state;

commit 27ec4407790d075c325e1f4da0a19c56953cce23
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 28 21:00:21 2008 +0100

    sched: make cpu_clock() globally synchronous
    
    Alexey Zaytsev reported (and bisected) that the introduction of
    cpu_clock() in printk made the timestamps jump back and forth.
    
    Make cpu_clock() more reliable while still keeping it fast when it's
    called frequently.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 8dcdec6fe0fe..7377222ab42f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -632,11 +632,39 @@ int sysctl_sched_rt_runtime = 950000;
  */
 #define RUNTIME_INF	((u64)~0ULL)
 
+static const unsigned long long time_sync_thresh = 100000;
+
+static DEFINE_PER_CPU(unsigned long long, time_offset);
+static DEFINE_PER_CPU(unsigned long long, prev_cpu_time);
+
 /*
- * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
- * clock constructed from sched_clock():
+ * Global lock which we take every now and then to synchronize
+ * the CPUs time. This method is not warp-safe, but it's good
+ * enough to synchronize slowly diverging time sources and thus
+ * it's good enough for tracing:
  */
-unsigned long long cpu_clock(int cpu)
+static DEFINE_SPINLOCK(time_sync_lock);
+static unsigned long long prev_global_time;
+
+static unsigned long long __sync_cpu_clock(cycles_t time, int cpu)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&time_sync_lock, flags);
+
+	if (time < prev_global_time) {
+		per_cpu(time_offset, cpu) += prev_global_time - time;
+		time = prev_global_time;
+	} else {
+		prev_global_time = time;
+	}
+
+	spin_unlock_irqrestore(&time_sync_lock, flags);
+
+	return time;
+}
+
+static unsigned long long __cpu_clock(int cpu)
 {
 	unsigned long long now;
 	unsigned long flags;
@@ -657,6 +685,24 @@ unsigned long long cpu_clock(int cpu)
 
 	return now;
 }
+
+/*
+ * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
+ * clock constructed from sched_clock():
+ */
+unsigned long long cpu_clock(int cpu)
+{
+	unsigned long long prev_cpu_time, time, delta_time;
+
+	prev_cpu_time = per_cpu(prev_cpu_time, cpu);
+	time = __cpu_clock(cpu) + per_cpu(time_offset, cpu);
+	delta_time = time-prev_cpu_time;
+
+	if (unlikely(delta_time > time_sync_thresh))
+		time = __sync_cpu_clock(time, cpu);
+
+	return time;
+}
 EXPORT_SYMBOL_GPL(cpu_clock);
 
 #ifndef prepare_arch_switch