Patches contributed by Eötvös Lorand University


commit d274a4cee190c880ec25b60501efe50c4435b3d7
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:14 2007 +0200

    sched: update comment
    
    update comment: clarify time-slices and remove obsolete tuning detail.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f819f943fb86..ec1592eb8d08 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -25,14 +25,12 @@
  * (default: 20ms, units: nanoseconds)
  *
  * NOTE: this latency value is not the same as the concept of
- * 'timeslice length' - timeslices in CFS are of variable length.
- * (to see the precise effective timeslice length of your workload,
- *  run vmstat and monitor the context-switches field)
+ * 'timeslice length' - timeslices in CFS are of variable length
+ * and have no persistent notion like in traditional, time-slice
+ * based scheduling concepts.
  *
- * On SMP systems the value of this is multiplied by the log2 of the
- * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way
- * systems, 4x on 8-way systems, 5x on 16-way systems, etc.)
- * Targeted preemption latency for CPU-bound tasks:
+ * (to see the precise effective timeslice length of your workload,
+ *  run vmstat and monitor the context-switches (cs) field)
  */
 const_debug unsigned int sysctl_sched_latency = 20000000ULL;
 

commit 00bf7bfc2eaf775b634774e9ec435d720b6ecee7
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:14 2007 +0200

    sched: fix: move the CPU check into ->task_new_fair()
    
    noticed by Peter Zijlstra:
    
    fix: move the CPU check into ->task_new_fair(), this way we
    can call place_entity() and get child ->vruntime right at
    initial wakeup time.
    
    (without this there can be large latencies)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/kernel/sched.c b/kernel/sched.c
index f2b8db4d6802..b41ef663b993 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1660,17 +1660,14 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 {
 	unsigned long flags;
 	struct rq *rq;
-	int this_cpu;
 
 	rq = task_rq_lock(p, &flags);
 	BUG_ON(p->state != TASK_RUNNING);
-	this_cpu = smp_processor_id(); /* parent's CPU */
 	update_rq_clock(rq);
 
 	p->prio = effective_prio(p);
 
-	if (task_cpu(p) != this_cpu || !p->sched_class->task_new ||
-							!current->se.on_rq) {
+	if (!p->sched_class->task_new || !current->se.on_rq || !rq->cfs.curr) {
 		activate_task(rq, p, 0);
 	} else {
 		/*
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a9dfb7746c5c..f5f491762e35 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1007,13 +1007,14 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 {
 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
 	struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
+	int this_cpu = smp_processor_id();
 
 	sched_info_queued(p);
 
 	update_curr(cfs_rq);
 	place_entity(cfs_rq, se, 1);
 
-	if (sysctl_sched_child_runs_first &&
+	if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
 			curr->vruntime < se->vruntime) {
 		/*
 		 * Upon rescheduling, sched_class::put_prev_task() will place

commit 0702e3ebc1e42576a04d29f8adacf13be825b800
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:14 2007 +0200

    sched: cleanup: function prototype cleanups
    
    noticed by Thomas Gleixner:
    
    cleanup: function prototype cleanups - move into single line
    wherever possible.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 14a9b9b997ce..a9dfb7746c5c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -112,8 +112,7 @@ static inline struct task_struct *task_of(struct sched_entity *se)
  * Scheduling class tree data structure manipulation methods:
  */
 
-static inline u64
-max_vruntime(u64 min_vruntime, u64 vruntime)
+static inline u64 max_vruntime(u64 min_vruntime, u64 vruntime)
 {
 	s64 delta = (s64)(vruntime - min_vruntime);
 	if (delta > 0)
@@ -122,8 +121,7 @@ max_vruntime(u64 min_vruntime, u64 vruntime)
 	return min_vruntime;
 }
 
-static inline u64
-min_vruntime(u64 min_vruntime, u64 vruntime)
+static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
 {
 	s64 delta = (s64)(vruntime - min_vruntime);
 	if (delta < 0)
@@ -132,8 +130,7 @@ min_vruntime(u64 min_vruntime, u64 vruntime)
 	return min_vruntime;
 }
 
-static inline s64
-entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	return se->vruntime - cfs_rq->min_vruntime;
 }
@@ -141,8 +138,7 @@ entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
 /*
  * Enqueue an entity into the rb-tree:
  */
-static void
-__enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
 	struct rb_node *parent = NULL;
@@ -179,8 +175,7 @@ __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
 }
 
-static void
-__dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	if (cfs_rq->rb_leftmost == &se->run_node)
 		cfs_rq->rb_leftmost = rb_next(&se->run_node);

commit 4cf86d77f5942336e7cd9de874b38b3c83b54d5e
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:14 2007 +0200

    sched: cleanup: rename task_grp to task_group
    
    cleanup: rename task_grp to task_group. No need to save two characters
    and 'grp' is annoying to read.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 49c7b374eac8..3cddbfc0c91d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -136,7 +136,7 @@ extern unsigned long weighted_cpuload(const int cpu);
 
 struct seq_file;
 struct cfs_rq;
-struct task_grp;
+struct task_group;
 #ifdef CONFIG_SCHED_DEBUG
 extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
 extern void proc_sched_set_task(struct task_struct *p);
@@ -598,7 +598,7 @@ struct user_struct {
 	uid_t uid;
 
 #ifdef CONFIG_FAIR_USER_SCHED
-	struct task_grp *tg;
+	struct task_group *tg;
 #endif
 };
 
@@ -1842,12 +1842,12 @@ extern void normalize_rt_tasks(void);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
-extern struct task_grp init_task_grp;
+extern struct task_group init_task_group;
 
-extern struct task_grp *sched_create_group(void);
-extern void sched_destroy_group(struct task_grp *tg);
+extern struct task_group *sched_create_group(void);
+extern void sched_destroy_group(struct task_group *tg);
 extern void sched_move_task(struct task_struct *tsk);
-extern int sched_group_set_shares(struct task_grp *tg, unsigned long shares);
+extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
 
 #endif
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 5bfe1df73f0f..f2b8db4d6802 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -156,7 +156,7 @@ struct rt_prio_array {
 struct cfs_rq;
 
 /* task group related information */
-struct task_grp {
+struct task_group {
 	/* schedulable entities of this group on each cpu */
 	struct sched_entity **se;
 	/* runqueue "owned" by this group on each cpu */
@@ -175,7 +175,7 @@ static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
 /* Default task group.
  *	Every task in system belong to this group at bootup.
  */
-struct task_grp init_task_grp = {
+struct task_group init_task_group = {
 	.se     = init_sched_entity_p,
 	.cfs_rq = init_cfs_rq_p,
 };
@@ -186,17 +186,17 @@ struct task_grp init_task_grp = {
 # define INIT_TASK_GRP_LOAD	NICE_0_LOAD
 #endif
 
-static int init_task_grp_load = INIT_TASK_GRP_LOAD;
+static int init_task_group_load = INIT_TASK_GRP_LOAD;
 
 /* return group to which a task belongs */
-static inline struct task_grp *task_grp(struct task_struct *p)
+static inline struct task_group *task_group(struct task_struct *p)
 {
-	struct task_grp *tg;
+	struct task_group *tg;
 
 #ifdef CONFIG_FAIR_USER_SCHED
 	tg = p->user->tg;
 #else
-	tg  = &init_task_grp;
+	tg  = &init_task_group;
 #endif
 
 	return tg;
@@ -205,8 +205,8 @@ static inline struct task_grp *task_grp(struct task_struct *p)
 /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
 static inline void set_task_cfs_rq(struct task_struct *p)
 {
-	p->se.cfs_rq = task_grp(p)->cfs_rq[task_cpu(p)];
-	p->se.parent = task_grp(p)->se[task_cpu(p)];
+	p->se.cfs_rq = task_group(p)->cfs_rq[task_cpu(p)];
+	p->se.parent = task_group(p)->se[task_cpu(p)];
 }
 
 #else
@@ -244,7 +244,7 @@ struct cfs_rq {
 	 * list is used during load balance.
 	 */
 	struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */
-	struct task_grp *tg;    /* group that "owns" this runqueue */
+	struct task_group *tg;    /* group that "owns" this runqueue */
 	struct rcu_head rcu;
 #endif
 };
@@ -6522,19 +6522,19 @@ void __init sched_init(void)
 
 			init_cfs_rq_p[i] = cfs_rq;
 			init_cfs_rq(cfs_rq, rq);
-			cfs_rq->tg = &init_task_grp;
+			cfs_rq->tg = &init_task_group;
 			list_add(&cfs_rq->leaf_cfs_rq_list,
 							 &rq->leaf_cfs_rq_list);
 
 			init_sched_entity_p[i] = se;
 			se->cfs_rq = &rq->cfs;
 			se->my_q = cfs_rq;
-			se->load.weight = init_task_grp_load;
+			se->load.weight = init_task_group_load;
 			se->load.inv_weight =
-				 div64_64(1ULL<<32, init_task_grp_load);
+				 div64_64(1ULL<<32, init_task_group_load);
 			se->parent = NULL;
 		}
-		init_task_grp.shares = init_task_grp_load;
+		init_task_group.shares = init_task_group_load;
 #endif
 
 		for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -6725,9 +6725,9 @@ void set_curr_task(int cpu, struct task_struct *p)
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
 /* allocate runqueue etc for a new task group */
-struct task_grp *sched_create_group(void)
+struct task_group *sched_create_group(void)
 {
-	struct task_grp *tg;
+	struct task_group *tg;
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se;
 	struct rq *rq;
@@ -6800,7 +6800,7 @@ struct task_grp *sched_create_group(void)
 static void free_sched_group(struct rcu_head *rhp)
 {
 	struct cfs_rq *cfs_rq = container_of(rhp, struct cfs_rq, rcu);
-	struct task_grp *tg = cfs_rq->tg;
+	struct task_group *tg = cfs_rq->tg;
 	struct sched_entity *se;
 	int i;
 
@@ -6819,7 +6819,7 @@ static void free_sched_group(struct rcu_head *rhp)
 }
 
 /* Destroy runqueue etc associated with a task group */
-void sched_destroy_group(struct task_grp *tg)
+void sched_destroy_group(struct task_group *tg)
 {
 	struct cfs_rq *cfs_rq;
 	int i;
@@ -6895,7 +6895,7 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
 	spin_unlock_irq(&rq->lock);
 }
 
-int sched_group_set_shares(struct task_grp *tg, unsigned long shares)
+int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 {
 	int i;
 
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 48748d04144d..6f87b31d233c 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -239,7 +239,7 @@ static int
 root_user_share_read_proc(char *page, char **start, off_t off, int count,
 				 int *eof, void *data)
 {
-	return sprintf(page, "%d\n", init_task_grp_load);
+	return sprintf(page, "%d\n", init_task_group_load);
 }
 
 static int
@@ -260,8 +260,8 @@ root_user_share_write_proc(struct file *file, const char __user *buffer,
 
 	mutex_lock(&root_user_share_mutex);
 
-	init_task_grp_load = shares;
-	rc = sched_group_set_shares(&init_task_grp, shares);
+	init_task_group_load = shares;
+	rc = sched_group_set_shares(&init_task_group, shares);
 
 	mutex_unlock(&root_user_share_mutex);
 
diff --git a/kernel/user.c b/kernel/user.c
index c6387fac932d..0c9a7870d08f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,7 +51,7 @@ struct user_struct root_user = {
 	.session_keyring = &root_session_keyring,
 #endif
 #ifdef CONFIG_FAIR_USER_SCHED
-	.tg		= &init_task_grp,
+	.tg		= &init_task_group,
 #endif
 };
 

commit 06877c33fe9261ccdf143492c28de93c56493079
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:13 2007 +0200

    sched: cleanup: rename SCHED_FEAT_USE_TREE_AVG to SCHED_FEAT_TREE_AVG
    
    cleanup: rename SCHED_FEAT_USE_TREE_AVG to SCHED_FEAT_TREE_AVG, to
    make SCHED_FEAT_ names more consistent.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 23da93360b22..5bfe1df73f0f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -440,14 +440,14 @@ static void update_rq_clock(struct rq *rq)
 enum {
 	SCHED_FEAT_NEW_FAIR_SLEEPERS	= 1,
 	SCHED_FEAT_START_DEBIT		= 2,
-	SCHED_FEAT_USE_TREE_AVG         = 4,
+	SCHED_FEAT_TREE_AVG             = 4,
 	SCHED_FEAT_APPROX_AVG           = 8,
 };
 
 const_debug unsigned int sysctl_sched_features =
 		SCHED_FEAT_NEW_FAIR_SLEEPERS	*1 |
 		SCHED_FEAT_START_DEBIT		*1 |
-		SCHED_FEAT_USE_TREE_AVG		*0 |
+		SCHED_FEAT_TREE_AVG		*0 |
 		SCHED_FEAT_APPROX_AVG		*0;
 
 #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 7826e18151a8..14a9b9b997ce 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -477,7 +477,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 
 	vruntime = cfs_rq->min_vruntime;
 
-	if (sched_feat(USE_TREE_AVG)) {
+	if (sched_feat(TREE_AVG)) {
 		struct sched_entity *last = __pick_last_entity(cfs_rq);
 		if (last) {
 			vruntime += last->vruntime;

commit a65914b3658043da27c159b8a28c5811bb0a88c9
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:13 2007 +0200

    sched: kfree(NULL) is valid
    
    kfree(NULL) is valid.
    
    pointed out by checkpatch.pl.
    
    the fix shrinks the code a bit:
    
       text    data     bss     dec     hex filename
      40024    3842     100   43966    abbe sched.o.before
      40002    3842     100   43944    aba8 sched.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 10b7bedfa35a..23da93360b22 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6784,17 +6784,14 @@ struct task_grp *sched_create_group(void)
 
 err:
 	for_each_possible_cpu(i) {
-		if (tg->cfs_rq && tg->cfs_rq[i])
+		if (tg->cfs_rq)
 			kfree(tg->cfs_rq[i]);
-		if (tg->se && tg->se[i])
+		if (tg->se)
 			kfree(tg->se[i]);
 	}
-	if (tg->cfs_rq)
-		kfree(tg->cfs_rq);
-	if (tg->se)
-		kfree(tg->se);
-	if (tg)
-		kfree(tg);
+	kfree(tg->cfs_rq);
+	kfree(tg->se);
+	kfree(tg);
 
 	return ERR_PTR(-ENOMEM);
 }

commit 8927f49479756c1aff76e8202ad32733c965864f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:13 2007 +0200

    sched: style cleanup
    
    fix up __setup() style bug - noticed via checkpatch.pl.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 7fefd8ab8b58..10b7bedfa35a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5680,7 +5680,7 @@ static int __init isolated_cpu_setup(char *str)
 	return 1;
 }
 
-__setup ("isolcpus=", isolated_cpu_setup);
+__setup("isolcpus=", isolated_cpu_setup);
 
 /*
  * init_sched_build_groups takes the cpumask we wish to span, and a pointer

commit 26797a34a24cfeab9951a6f42f27432c0b2546af
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:13 2007 +0200

    sched: break out if printing a warning in sched_domain_debug()
    
    checkpatch.pl and Andy Whitcroft noticed the following bug: we did
    not break out after printing an error.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 1a80ac1b6dab..7fefd8ab8b58 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5534,16 +5534,19 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
 				printk("\n");
 				printk(KERN_ERR "ERROR: domain->cpu_power not "
 						"set\n");
+				break;
 			}
 
 			if (!cpus_weight(group->cpumask)) {
 				printk("\n");
 				printk(KERN_ERR "ERROR: empty group\n");
+				break;
 			}
 
 			if (cpus_intersects(groupmask, group->cpumask)) {
 				printk("\n");
 				printk(KERN_ERR "ERROR: repeated CPUs\n");
+				break;
 			}
 
 			cpus_or(groupmask, groupmask, group->cpumask);

commit 3e9830dcabdeb3656855ec1b678b6bcf3b50261c
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:13 2007 +0200

    sched: run sched_domain_debug() if CONFIG_SCHED_DEBUG=y
    
    run sched_domain_debug() if CONFIG_SCHED_DEBUG=y, instead
    of relying on the hand-crafted SCHED_DOMAIN_DEBUG switch.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index f370f108ed04..1a80ac1b6dab 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5476,8 +5476,7 @@ int __init migration_init(void)
 int nr_cpu_ids __read_mostly = NR_CPUS;
 EXPORT_SYMBOL(nr_cpu_ids);
 
-#undef SCHED_DOMAIN_DEBUG
-#ifdef SCHED_DOMAIN_DEBUG
+#ifdef CONFIG_SCHED_DEBUG
 static void sched_domain_debug(struct sched_domain *sd, int cpu)
 {
 	int level = 0;

commit 155bb293ae8387526e6e07d42b1691104e55d9a2
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:13 2007 +0200

    sched: tweak wakeup granularity
    
    tweak wakeup granularity.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index fa78686ec227..0856701db14e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -58,23 +58,23 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
 
 /*
  * SCHED_BATCH wake-up granularity.
- * (default: 25 msec, units: nanoseconds)
+ * (default: 10 msec, units: nanoseconds)
  *
  * This option delays the preemption effects of decoupled workloads
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 25000000UL;
+const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL;
 
 /*
  * SCHED_OTHER wake-up granularity.
- * (default: 1 msec, units: nanoseconds)
+ * (default: 10 msec, units: nanoseconds)
  *
  * This option delays the preemption effects of decoupled workloads
  * and reduces their over-scheduling. Synchronous workloads will still
  * have immediate wakeup/sleep latencies.
  */
-const_debug unsigned int sysctl_sched_wakeup_granularity = 2000000UL;
+const_debug unsigned int sysctl_sched_wakeup_granularity = 10000000UL;
 
 /**************************************************************
  * CFS operations on generic schedulable entities: