Patches contributed by Eötvös Lorand University
commit c24b7c524421f9ea9d9ebab55f80cfb1f3fb77a3
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Apr 18 10:55:34 2008 +0200
sched: features fix
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index b59a44e1ea44..57ba7ea9b744 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -792,7 +792,7 @@ sched_feat_read(struct file *filp, char __user *ubuf,
if (sysctl_sched_features & (1UL << i))
r += sprintf(buf + r, "%s ", sched_feat_names[i]);
else
- r += sprintf(buf + r, "no_%s ", sched_feat_names[i]);
+ r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
}
r += sprintf(buf + r, "\n");
@@ -822,7 +822,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
buf[cnt] = 0;
- if (strncmp(buf, "no_", 3) == 0) {
+ if (strncmp(buf, "NO_", 3) == 0) {
neg = 1;
cmp += 3;
}
@@ -855,17 +855,6 @@ static struct file_operations sched_feat_fops = {
static __init int sched_init_debug(void)
{
- int i, j, len;
-
- for (i = 0; sched_feat_names[i]; i++) {
- len = strlen(sched_feat_names[i]);
-
- for (j = 0; j < len; j++) {
- sched_feat_names[i][j] =
- tolower(sched_feat_names[i][j]);
- }
- }
-
debugfs_create_file("sched_features", 0644, NULL, NULL,
&sched_feat_fops);
commit 06379aba522ebdabca37446ea988a23c43c03c67
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Apr 19 09:25:58 2008 +0200
sched: add SCHED_FEAT_DEADLINE
unused at the moment.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 6d55dfc56cab..8f03817e7dd3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -742,6 +742,7 @@ enum {
SCHED_FEAT_HRTICK = 64,
SCHED_FEAT_DOUBLE_TICK = 128,
SCHED_FEAT_NORMALIZED_SLEEPER = 256,
+ SCHED_FEAT_DEADLINE = 512,
};
const_debug unsigned int sysctl_sched_features =
@@ -753,7 +754,8 @@ const_debug unsigned int sysctl_sched_features =
SCHED_FEAT_SYNC_WAKEUPS * 1 |
SCHED_FEAT_HRTICK * 1 |
SCHED_FEAT_DOUBLE_TICK * 0 |
- SCHED_FEAT_NORMALIZED_SLEEPER * 1;
+ SCHED_FEAT_NORMALIZED_SLEEPER * 1 |
+ SCHED_FEAT_DEADLINE * 1;
#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
commit ea736ed5d353d7a3aa1cf8ce4cf8d947bc353fb2
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Mar 25 13:51:45 2008 +0100
sched: fix checks
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 070eefdd90f5..62830eaec52f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -163,10 +163,11 @@ struct rt_prio_array {
};
struct rt_bandwidth {
- ktime_t rt_period;
- u64 rt_runtime;
- spinlock_t rt_runtime_lock;
- struct hrtimer rt_period_timer;
+ /* nests inside the rq lock: */
+ spinlock_t rt_runtime_lock;
+ ktime_t rt_period;
+ u64 rt_runtime;
+ struct hrtimer rt_period_timer;
};
static struct rt_bandwidth def_rt_bandwidth;
@@ -403,6 +404,7 @@ struct rt_rq {
int rt_throttled;
u64 rt_time;
u64 rt_runtime;
+ /* Nests inside the rq lock: */
spinlock_t rt_runtime_lock;
#ifdef CONFIG_RT_GROUP_SCHED
commit 57d3da2911787a101a384532f4519f9640bae883
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Feb 27 14:05:10 2008 +0100
time: add ns_to_ktime()
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 2cd7fa73d1af..ce5983225be4 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -327,4 +327,10 @@ extern void ktime_get_ts(struct timespec *ts);
/* Get the real (wall-) time in timespec format: */
#define ktime_get_real_ts(ts) getnstimeofday(ts)
+static inline ktime_t ns_to_ktime(u64 ns)
+{
+ static const ktime_t ktime_zero = { .tv64 = 0 };
+ return ktime_add_ns(ktime_zero, ns);
+}
+
#endif
commit 50df5d6aea6694ca481b8005900401e8c95c2603
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Mar 14 16:09:59 2008 +0100
sched: remove sysctl_sched_batch_wakeup_granularity
it's unused.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6a1e7afb099b..15f05ff453d8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1551,7 +1551,6 @@ static inline void wake_up_idle_cpu(int cpu) { }
extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
-extern unsigned int sysctl_sched_batch_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
diff --git a/kernel/sched.c b/kernel/sched.c
index 770449bee6da..e813e845d9cf 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5396,7 +5396,6 @@ static inline void sched_init_granularity(void)
sysctl_sched_latency = limit;
sysctl_sched_wakeup_granularity *= factor;
- sysctl_sched_batch_wakeup_granularity *= factor;
}
#ifdef CONFIG_SMP
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index ef358ba07683..3d09106990cb 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -214,7 +214,6 @@ static int sched_debug_show(struct seq_file *m, void *v)
PN(sysctl_sched_latency);
PN(sysctl_sched_min_granularity);
PN(sysctl_sched_wakeup_granularity);
- PN(sysctl_sched_batch_wakeup_granularity);
PN(sysctl_sched_child_runs_first);
P(sysctl_sched_features);
#undef PN
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index b01f8e77f2ac..bedda18f37a5 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -61,16 +61,6 @@ const_debug unsigned int sysctl_sched_child_runs_first = 1;
*/
unsigned int __read_mostly sysctl_sched_compat_yield;
-/*
- * SCHED_BATCH wake-up granularity.
- * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds)
- *
- * This option delays the preemption effects of decoupled workloads
- * and reduces their over-scheduling. Synchronous workloads will still
- * have immediate wakeup/sleep latencies.
- */
-unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL;
-
/*
* SCHED_OTHER wake-up granularity.
* (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b2a2d6889bab..be332e1a0c29 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -268,17 +268,6 @@ static struct ctl_table kern_table[] = {
.extra1 = &min_wakeup_granularity_ns,
.extra2 = &max_wakeup_granularity_ns,
},
- {
- .ctl_name = CTL_UNNUMBERED,
- .procname = "sched_batch_wakeup_granularity_ns",
- .data = &sysctl_sched_batch_wakeup_granularity,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &min_wakeup_granularity_ns,
- .extra2 = &max_wakeup_granularity_ns,
- },
{
.ctl_name = CTL_UNNUMBERED,
.procname = "sched_child_runs_first",
commit 02e2b83bd25bb05ac2e69cb31458b7d1b3c70707
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Mar 19 01:37:10 2008 +0100
sched: reenable sync wakeups
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 42d2f1155d30..770449bee6da 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -627,9 +627,9 @@ enum {
SCHED_FEAT_START_DEBIT = 4,
SCHED_FEAT_AFFINE_WAKEUPS = 8,
SCHED_FEAT_CACHE_HOT_BUDDY = 16,
- SCHED_FEAT_HRTICK = 32,
- SCHED_FEAT_DOUBLE_TICK = 64,
- SCHED_FEAT_SYNC_WAKEUPS = 128,
+ SCHED_FEAT_SYNC_WAKEUPS = 32,
+ SCHED_FEAT_HRTICK = 64,
+ SCHED_FEAT_DOUBLE_TICK = 128,
};
const_debug unsigned int sysctl_sched_features =
@@ -638,9 +638,9 @@ const_debug unsigned int sysctl_sched_features =
SCHED_FEAT_START_DEBIT * 1 |
SCHED_FEAT_AFFINE_WAKEUPS * 1 |
SCHED_FEAT_CACHE_HOT_BUDDY * 1 |
+ SCHED_FEAT_SYNC_WAKEUPS * 1 |
SCHED_FEAT_HRTICK * 1 |
- SCHED_FEAT_DOUBLE_TICK * 0 |
- SCHED_FEAT_SYNC_WAKEUPS * 0;
+ SCHED_FEAT_DOUBLE_TICK * 0;
#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
commit d25ce4cd499a21aab89ff8755f8c4a2800eae25f
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Mar 17 09:36:53 2008 +0100
sched: cache hot buddy
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 7c5efad78c09..42d2f1155d30 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -625,20 +625,22 @@ enum {
SCHED_FEAT_NEW_FAIR_SLEEPERS = 1,
SCHED_FEAT_WAKEUP_PREEMPT = 2,
SCHED_FEAT_START_DEBIT = 4,
- SCHED_FEAT_HRTICK = 8,
- SCHED_FEAT_DOUBLE_TICK = 16,
- SCHED_FEAT_SYNC_WAKEUPS = 32,
- SCHED_FEAT_AFFINE_WAKEUPS = 64,
+ SCHED_FEAT_AFFINE_WAKEUPS = 8,
+ SCHED_FEAT_CACHE_HOT_BUDDY = 16,
+ SCHED_FEAT_HRTICK = 32,
+ SCHED_FEAT_DOUBLE_TICK = 64,
+ SCHED_FEAT_SYNC_WAKEUPS = 128,
};
const_debug unsigned int sysctl_sched_features =
SCHED_FEAT_NEW_FAIR_SLEEPERS * 1 |
SCHED_FEAT_WAKEUP_PREEMPT * 1 |
SCHED_FEAT_START_DEBIT * 1 |
+ SCHED_FEAT_AFFINE_WAKEUPS * 1 |
+ SCHED_FEAT_CACHE_HOT_BUDDY * 1 |
SCHED_FEAT_HRTICK * 1 |
SCHED_FEAT_DOUBLE_TICK * 0 |
- SCHED_FEAT_SYNC_WAKEUPS * 0 |
- SCHED_FEAT_AFFINE_WAKEUPS * 1;
+ SCHED_FEAT_SYNC_WAKEUPS * 0;
#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
@@ -1519,7 +1521,7 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
/*
* Buddy candidates are cache hot:
*/
- if (&p->se == cfs_rq_of(&p->se)->next)
+ if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next))
return 1;
if (p->sched_class != &fair_sched_class)
commit 1fc8afa4c820fcde3658238eab5c010476ede521
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Mar 19 01:39:19 2008 +0100
sched: feat affine wakeups
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 263e25e10204..7c5efad78c09 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -628,6 +628,7 @@ enum {
SCHED_FEAT_HRTICK = 8,
SCHED_FEAT_DOUBLE_TICK = 16,
SCHED_FEAT_SYNC_WAKEUPS = 32,
+ SCHED_FEAT_AFFINE_WAKEUPS = 64,
};
const_debug unsigned int sysctl_sched_features =
@@ -636,7 +637,8 @@ const_debug unsigned int sysctl_sched_features =
SCHED_FEAT_START_DEBIT * 1 |
SCHED_FEAT_HRTICK * 1 |
SCHED_FEAT_DOUBLE_TICK * 0 |
- SCHED_FEAT_SYNC_WAKEUPS * 0;
+ SCHED_FEAT_SYNC_WAKEUPS * 0 |
+ SCHED_FEAT_AFFINE_WAKEUPS * 1;
#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
commit b85d0667268320072ccdeb07c27c25b300ab3724
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Mar 16 20:03:22 2008 +0100
sched: introduce SCHED_FEAT_SYNC_WAKEUPS, turn it off
turn off sync wakeups by default. They are not needed anymore - the
buddy logic should be smart enough to keep the system from
overscheduling.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index d8456a9ac9af..263e25e10204 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -627,6 +627,7 @@ enum {
SCHED_FEAT_START_DEBIT = 4,
SCHED_FEAT_HRTICK = 8,
SCHED_FEAT_DOUBLE_TICK = 16,
+ SCHED_FEAT_SYNC_WAKEUPS = 32,
};
const_debug unsigned int sysctl_sched_features =
@@ -634,7 +635,8 @@ const_debug unsigned int sysctl_sched_features =
SCHED_FEAT_WAKEUP_PREEMPT * 1 |
SCHED_FEAT_START_DEBIT * 1 |
SCHED_FEAT_HRTICK * 1 |
- SCHED_FEAT_DOUBLE_TICK * 0;
+ SCHED_FEAT_DOUBLE_TICK * 0 |
+ SCHED_FEAT_SYNC_WAKEUPS * 0;
#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
@@ -1916,6 +1918,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
long old_state;
struct rq *rq;
+ if (!sched_feat(SYNC_WAKEUPS))
+ sync = 0;
+
smp_wmb();
rq = task_rq_lock(p, &flags);
old_state = p->state;
commit 27ec4407790d075c325e1f4da0a19c56953cce23
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 28 21:00:21 2008 +0100
sched: make cpu_clock() globally synchronous
Alexey Zaytsev reported (and bisected) that the introduction of
cpu_clock() in printk made the timestamps jump back and forth.
Make cpu_clock() more reliable while still keeping it fast when it's
called frequently.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 8dcdec6fe0fe..7377222ab42f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -632,11 +632,39 @@ int sysctl_sched_rt_runtime = 950000;
*/
#define RUNTIME_INF ((u64)~0ULL)
+static const unsigned long long time_sync_thresh = 100000;
+
+static DEFINE_PER_CPU(unsigned long long, time_offset);
+static DEFINE_PER_CPU(unsigned long long, prev_cpu_time);
+
/*
- * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
- * clock constructed from sched_clock():
+ * Global lock which we take every now and then to synchronize
+ * the CPUs time. This method is not warp-safe, but it's good
+ * enough to synchronize slowly diverging time sources and thus
+ * it's good enough for tracing:
*/
-unsigned long long cpu_clock(int cpu)
+static DEFINE_SPINLOCK(time_sync_lock);
+static unsigned long long prev_global_time;
+
+static unsigned long long __sync_cpu_clock(cycles_t time, int cpu)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&time_sync_lock, flags);
+
+ if (time < prev_global_time) {
+ per_cpu(time_offset, cpu) += prev_global_time - time;
+ time = prev_global_time;
+ } else {
+ prev_global_time = time;
+ }
+
+ spin_unlock_irqrestore(&time_sync_lock, flags);
+
+ return time;
+}
+
+static unsigned long long __cpu_clock(int cpu)
{
unsigned long long now;
unsigned long flags;
@@ -657,6 +685,24 @@ unsigned long long cpu_clock(int cpu)
return now;
}
+
+/*
+ * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
+ * clock constructed from sched_clock():
+ */
+unsigned long long cpu_clock(int cpu)
+{
+ unsigned long long prev_cpu_time, time, delta_time;
+
+ prev_cpu_time = per_cpu(prev_cpu_time, cpu);
+ time = __cpu_clock(cpu) + per_cpu(time_offset, cpu);
+ delta_time = time-prev_cpu_time;
+
+ if (unlikely(delta_time > time_sync_thresh))
+ time = __sync_cpu_clock(time, cpu);
+
+ return time;
+}
EXPORT_SYMBOL_GPL(cpu_clock);
#ifndef prepare_arch_switch