Patches contributed by Eötvös Lorand University
commit d274a4cee190c880ec25b60501efe50c4435b3d7
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Oct 15 17:00:14 2007 +0200
sched: update comment
update comment: clarify time-slices and remove obsolete tuning detail.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f819f943fb86..ec1592eb8d08 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -25,14 +25,12 @@
* (default: 20ms, units: nanoseconds)
*
* NOTE: this latency value is not the same as the concept of
- * 'timeslice length' - timeslices in CFS are of variable length.
- * (to see the precise effective timeslice length of your workload,
- * run vmstat and monitor the context-switches field)
+ * 'timeslice length' - timeslices in CFS are of variable length
+ * and have no persistent notion like in traditional, time-slice
+ * based scheduling concepts.
*
- * On SMP systems the value of this is multiplied by the log2 of the
- * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way
- * systems, 4x on 8-way systems, 5x on 16-way systems, etc.)
- * Targeted preemption latency for CPU-bound tasks:
+ * (to see the precise effective timeslice length of your workload,
+ * run vmstat and monitor the context-switches (cs) field)
*/
const_debug unsigned int sysctl_sched_latency = 20000000ULL;
commit 00bf7bfc2eaf775b634774e9ec435d720b6ecee7
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Oct 15 17:00:14 2007 +0200
sched: fix: move the CPU check into ->task_new_fair()
noticed by Peter Zijlstra:
fix: move the CPU check into ->task_new_fair(), this way we
can call place_entity() and get child ->vruntime right at
initial wakeup time.
(without this there can be large latencies)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
diff --git a/kernel/sched.c b/kernel/sched.c
index f2b8db4d6802..b41ef663b993 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1660,17 +1660,14 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
{
unsigned long flags;
struct rq *rq;
- int this_cpu;
rq = task_rq_lock(p, &flags);
BUG_ON(p->state != TASK_RUNNING);
- this_cpu = smp_processor_id(); /* parent's CPU */
update_rq_clock(rq);
p->prio = effective_prio(p);
- if (task_cpu(p) != this_cpu || !p->sched_class->task_new ||
- !current->se.on_rq) {
+ if (!p->sched_class->task_new || !current->se.on_rq || !rq->cfs.curr) {
activate_task(rq, p, 0);
} else {
/*
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a9dfb7746c5c..f5f491762e35 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1007,13 +1007,14 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
{
struct cfs_rq *cfs_rq = task_cfs_rq(p);
struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
+ int this_cpu = smp_processor_id();
sched_info_queued(p);
update_curr(cfs_rq);
place_entity(cfs_rq, se, 1);
- if (sysctl_sched_child_runs_first &&
+ if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
curr->vruntime < se->vruntime) {
/*
* Upon rescheduling, sched_class::put_prev_task() will place
commit 0702e3ebc1e42576a04d29f8adacf13be825b800
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Oct 15 17:00:14 2007 +0200
sched: cleanup: function prototype cleanups
noticed by Thomas Gleixner:
cleanup: function prototype cleanups - move into single line
wherever possible.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 14a9b9b997ce..a9dfb7746c5c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -112,8 +112,7 @@ static inline struct task_struct *task_of(struct sched_entity *se)
* Scheduling class tree data structure manipulation methods:
*/
-static inline u64
-max_vruntime(u64 min_vruntime, u64 vruntime)
+static inline u64 max_vruntime(u64 min_vruntime, u64 vruntime)
{
s64 delta = (s64)(vruntime - min_vruntime);
if (delta > 0)
@@ -122,8 +121,7 @@ max_vruntime(u64 min_vruntime, u64 vruntime)
return min_vruntime;
}
-static inline u64
-min_vruntime(u64 min_vruntime, u64 vruntime)
+static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
{
s64 delta = (s64)(vruntime - min_vruntime);
if (delta < 0)
@@ -132,8 +130,7 @@ min_vruntime(u64 min_vruntime, u64 vruntime)
return min_vruntime;
}
-static inline s64
-entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
return se->vruntime - cfs_rq->min_vruntime;
}
@@ -141,8 +138,7 @@ entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
/*
* Enqueue an entity into the rb-tree:
*/
-static void
-__enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
struct rb_node *parent = NULL;
@@ -179,8 +175,7 @@ __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
}
-static void
-__dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (cfs_rq->rb_leftmost == &se->run_node)
cfs_rq->rb_leftmost = rb_next(&se->run_node);
commit 4cf86d77f5942336e7cd9de874b38b3c83b54d5e
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Oct 15 17:00:14 2007 +0200
sched: cleanup: rename task_grp to task_group
cleanup: rename task_grp to task_group. No need to save two characters
and 'grp' is annoying to read.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 49c7b374eac8..3cddbfc0c91d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -136,7 +136,7 @@ extern unsigned long weighted_cpuload(const int cpu);
struct seq_file;
struct cfs_rq;
-struct task_grp;
+struct task_group;
#ifdef CONFIG_SCHED_DEBUG
extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
extern void proc_sched_set_task(struct task_struct *p);
@@ -598,7 +598,7 @@ struct user_struct {
uid_t uid;
#ifdef CONFIG_FAIR_USER_SCHED
- struct task_grp *tg;
+ struct task_group *tg;
#endif
};
@@ -1842,12 +1842,12 @@ extern void normalize_rt_tasks(void);
#ifdef CONFIG_FAIR_GROUP_SCHED
-extern struct task_grp init_task_grp;
+extern struct task_group init_task_group;
-extern struct task_grp *sched_create_group(void);
-extern void sched_destroy_group(struct task_grp *tg);
+extern struct task_group *sched_create_group(void);
+extern void sched_destroy_group(struct task_group *tg);
extern void sched_move_task(struct task_struct *tsk);
-extern int sched_group_set_shares(struct task_grp *tg, unsigned long shares);
+extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
#endif
diff --git a/kernel/sched.c b/kernel/sched.c
index 5bfe1df73f0f..f2b8db4d6802 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -156,7 +156,7 @@ struct rt_prio_array {
struct cfs_rq;
/* task group related information */
-struct task_grp {
+struct task_group {
/* schedulable entities of this group on each cpu */
struct sched_entity **se;
/* runqueue "owned" by this group on each cpu */
@@ -175,7 +175,7 @@ static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
/* Default task group.
* Every task in system belong to this group at bootup.
*/
-struct task_grp init_task_grp = {
+struct task_group init_task_group = {
.se = init_sched_entity_p,
.cfs_rq = init_cfs_rq_p,
};
@@ -186,17 +186,17 @@ struct task_grp init_task_grp = {
# define INIT_TASK_GRP_LOAD NICE_0_LOAD
#endif
-static int init_task_grp_load = INIT_TASK_GRP_LOAD;
+static int init_task_group_load = INIT_TASK_GRP_LOAD;
/* return group to which a task belongs */
-static inline struct task_grp *task_grp(struct task_struct *p)
+static inline struct task_group *task_group(struct task_struct *p)
{
- struct task_grp *tg;
+ struct task_group *tg;
#ifdef CONFIG_FAIR_USER_SCHED
tg = p->user->tg;
#else
- tg = &init_task_grp;
+ tg = &init_task_group;
#endif
return tg;
@@ -205,8 +205,8 @@ static inline struct task_grp *task_grp(struct task_struct *p)
/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
static inline void set_task_cfs_rq(struct task_struct *p)
{
- p->se.cfs_rq = task_grp(p)->cfs_rq[task_cpu(p)];
- p->se.parent = task_grp(p)->se[task_cpu(p)];
+ p->se.cfs_rq = task_group(p)->cfs_rq[task_cpu(p)];
+ p->se.parent = task_group(p)->se[task_cpu(p)];
}
#else
@@ -244,7 +244,7 @@ struct cfs_rq {
* list is used during load balance.
*/
struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */
- struct task_grp *tg; /* group that "owns" this runqueue */
+ struct task_group *tg; /* group that "owns" this runqueue */
struct rcu_head rcu;
#endif
};
@@ -6522,19 +6522,19 @@ void __init sched_init(void)
init_cfs_rq_p[i] = cfs_rq;
init_cfs_rq(cfs_rq, rq);
- cfs_rq->tg = &init_task_grp;
+ cfs_rq->tg = &init_task_group;
list_add(&cfs_rq->leaf_cfs_rq_list,
&rq->leaf_cfs_rq_list);
init_sched_entity_p[i] = se;
se->cfs_rq = &rq->cfs;
se->my_q = cfs_rq;
- se->load.weight = init_task_grp_load;
+ se->load.weight = init_task_group_load;
se->load.inv_weight =
- div64_64(1ULL<<32, init_task_grp_load);
+ div64_64(1ULL<<32, init_task_group_load);
se->parent = NULL;
}
- init_task_grp.shares = init_task_grp_load;
+ init_task_group.shares = init_task_group_load;
#endif
for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -6725,9 +6725,9 @@ void set_curr_task(int cpu, struct task_struct *p)
#ifdef CONFIG_FAIR_GROUP_SCHED
/* allocate runqueue etc for a new task group */
-struct task_grp *sched_create_group(void)
+struct task_group *sched_create_group(void)
{
- struct task_grp *tg;
+ struct task_group *tg;
struct cfs_rq *cfs_rq;
struct sched_entity *se;
struct rq *rq;
@@ -6800,7 +6800,7 @@ struct task_grp *sched_create_group(void)
static void free_sched_group(struct rcu_head *rhp)
{
struct cfs_rq *cfs_rq = container_of(rhp, struct cfs_rq, rcu);
- struct task_grp *tg = cfs_rq->tg;
+ struct task_group *tg = cfs_rq->tg;
struct sched_entity *se;
int i;
@@ -6819,7 +6819,7 @@ static void free_sched_group(struct rcu_head *rhp)
}
/* Destroy runqueue etc associated with a task group */
-void sched_destroy_group(struct task_grp *tg)
+void sched_destroy_group(struct task_group *tg)
{
struct cfs_rq *cfs_rq;
int i;
@@ -6895,7 +6895,7 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
spin_unlock_irq(&rq->lock);
}
-int sched_group_set_shares(struct task_grp *tg, unsigned long shares)
+int sched_group_set_shares(struct task_group *tg, unsigned long shares)
{
int i;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 48748d04144d..6f87b31d233c 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -239,7 +239,7 @@ static int
root_user_share_read_proc(char *page, char **start, off_t off, int count,
int *eof, void *data)
{
- return sprintf(page, "%d\n", init_task_grp_load);
+ return sprintf(page, "%d\n", init_task_group_load);
}
static int
@@ -260,8 +260,8 @@ root_user_share_write_proc(struct file *file, const char __user *buffer,
mutex_lock(&root_user_share_mutex);
- init_task_grp_load = shares;
- rc = sched_group_set_shares(&init_task_grp, shares);
+ init_task_group_load = shares;
+ rc = sched_group_set_shares(&init_task_group, shares);
mutex_unlock(&root_user_share_mutex);
diff --git a/kernel/user.c b/kernel/user.c
index c6387fac932d..0c9a7870d08f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,7 +51,7 @@ struct user_struct root_user = {
.session_keyring = &root_session_keyring,
#endif
#ifdef CONFIG_FAIR_USER_SCHED
- .tg = &init_task_grp,
+ .tg = &init_task_group,
#endif
};
commit 06877c33fe9261ccdf143492c28de93c56493079
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Oct 15 17:00:13 2007 +0200
sched: cleanup: rename SCHED_FEAT_USE_TREE_AVG to SCHED_FEAT_TREE_AVG
cleanup: rename SCHED_FEAT_USE_TREE_AVG to SCHED_FEAT_TREE_AVG, to
make SCHED_FEAT_ names more consistent.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 23da93360b22..5bfe1df73f0f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -440,14 +440,14 @@ static void update_rq_clock(struct rq *rq)
enum {
SCHED_FEAT_NEW_FAIR_SLEEPERS = 1,
SCHED_FEAT_START_DEBIT = 2,
- SCHED_FEAT_USE_TREE_AVG = 4,
+ SCHED_FEAT_TREE_AVG = 4,
SCHED_FEAT_APPROX_AVG = 8,
};
const_debug unsigned int sysctl_sched_features =
SCHED_FEAT_NEW_FAIR_SLEEPERS *1 |
SCHED_FEAT_START_DEBIT *1 |
- SCHED_FEAT_USE_TREE_AVG *0 |
+ SCHED_FEAT_TREE_AVG *0 |
SCHED_FEAT_APPROX_AVG *0;
#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 7826e18151a8..14a9b9b997ce 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -477,7 +477,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
vruntime = cfs_rq->min_vruntime;
- if (sched_feat(USE_TREE_AVG)) {
+ if (sched_feat(TREE_AVG)) {
struct sched_entity *last = __pick_last_entity(cfs_rq);
if (last) {
vruntime += last->vruntime;
commit a65914b3658043da27c159b8a28c5811bb0a88c9
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Oct 15 17:00:13 2007 +0200
sched: kfree(NULL) is valid
kfree(NULL) is valid.
pointed out by checkpatch.pl.
the fix shrinks the code a bit:
text data bss dec hex filename
40024 3842 100 43966 abbe sched.o.before
40002 3842 100 43944 aba8 sched.o.after
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 10b7bedfa35a..23da93360b22 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6784,17 +6784,14 @@ struct task_grp *sched_create_group(void)
err:
for_each_possible_cpu(i) {
- if (tg->cfs_rq && tg->cfs_rq[i])
+ if (tg->cfs_rq)
kfree(tg->cfs_rq[i]);
- if (tg->se && tg->se[i])
+ if (tg->se)
kfree(tg->se[i]);
}
- if (tg->cfs_rq)
- kfree(tg->cfs_rq);
- if (tg->se)
- kfree(tg->se);
- if (tg)
- kfree(tg);
+ kfree(tg->cfs_rq);
+ kfree(tg->se);
+ kfree(tg);
return ERR_PTR(-ENOMEM);
}
commit 8927f49479756c1aff76e8202ad32733c965864f
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Oct 15 17:00:13 2007 +0200
sched: style cleanup
fix up __setup() style bug - noticed via checkpatch.pl.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 7fefd8ab8b58..10b7bedfa35a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5680,7 +5680,7 @@ static int __init isolated_cpu_setup(char *str)
return 1;
}
-__setup ("isolcpus=", isolated_cpu_setup);
+__setup("isolcpus=", isolated_cpu_setup);
/*
* init_sched_build_groups takes the cpumask we wish to span, and a pointer
commit 26797a34a24cfeab9951a6f42f27432c0b2546af
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Oct 15 17:00:13 2007 +0200
sched: break out if printing a warning in sched_domain_debug()
checkpatch.pl and Andy Whitcroft noticed the following bug: we did
not break out after printing an error.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 1a80ac1b6dab..7fefd8ab8b58 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5534,16 +5534,19 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
printk("\n");
printk(KERN_ERR "ERROR: domain->cpu_power not "
"set\n");
+ break;
}
if (!cpus_weight(group->cpumask)) {
printk("\n");
printk(KERN_ERR "ERROR: empty group\n");
+ break;
}
if (cpus_intersects(groupmask, group->cpumask)) {
printk("\n");
printk(KERN_ERR "ERROR: repeated CPUs\n");
+ break;
}
cpus_or(groupmask, groupmask, group->cpumask);
commit 3e9830dcabdeb3656855ec1b678b6bcf3b50261c
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Oct 15 17:00:13 2007 +0200
sched: run sched_domain_debug() if CONFIG_SCHED_DEBUG=y
run sched_domain_debug() if CONFIG_SCHED_DEBUG=y, instead
of relying on the hand-crafted SCHED_DOMAIN_DEBUG switch.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index f370f108ed04..1a80ac1b6dab 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5476,8 +5476,7 @@ int __init migration_init(void)
int nr_cpu_ids __read_mostly = NR_CPUS;
EXPORT_SYMBOL(nr_cpu_ids);
-#undef SCHED_DOMAIN_DEBUG
-#ifdef SCHED_DOMAIN_DEBUG
+#ifdef CONFIG_SCHED_DEBUG
static void sched_domain_debug(struct sched_domain *sd, int cpu)
{
int level = 0;
commit 155bb293ae8387526e6e07d42b1691104e55d9a2
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Oct 15 17:00:13 2007 +0200
sched: tweak wakeup granularity
tweak wakeup granularity.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index fa78686ec227..0856701db14e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -58,23 +58,23 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
/*
* SCHED_BATCH wake-up granularity.
- * (default: 25 msec, units: nanoseconds)
+ * (default: 10 msec, units: nanoseconds)
*
* This option delays the preemption effects of decoupled workloads
* and reduces their over-scheduling. Synchronous workloads will still
* have immediate wakeup/sleep latencies.
*/
-const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 25000000UL;
+const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL;
/*
* SCHED_OTHER wake-up granularity.
- * (default: 1 msec, units: nanoseconds)
+ * (default: 10 msec, units: nanoseconds)
*
* This option delays the preemption effects of decoupled workloads
* and reduces their over-scheduling. Synchronous workloads will still
* have immediate wakeup/sleep latencies.
*/
-const_debug unsigned int sysctl_sched_wakeup_granularity = 2000000UL;
+const_debug unsigned int sysctl_sched_wakeup_granularity = 10000000UL;
/**************************************************************
* CFS operations on generic schedulable entities: