Patches contributed by Eötvös Lorand University


commit dc1f31c90cfa067af6f7000db7a5383c7667ccba
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:11 2007 +0200

    sched: remove last_min_vruntime effect
    
    remove last_min_vruntime use - prepare to remove it.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 926491f7f803..0228de186503 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -481,7 +481,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 
 	if (!initial) {
 		if (sched_feat(NEW_FAIR_SLEEPERS)) {
-			s64 latency = cfs_rq->min_vruntime - se->last_min_vruntime;
+			s64 latency = cfs_rq->min_vruntime - se->vruntime;
 			if (latency < 0 || !cfs_rq->nr_running)
 				latency = 0;
 			else

commit 785c29ef9573d98b31493c9a68c3589449082108
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:11 2007 +0200

    sched: remove condition from set_task_cpu()
    
    remove condition from set_task_cpu(). Now that ->vruntime
    is not global anymore, it should (and does) work fine without
    it too.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/kernel/sched.c b/kernel/sched.c
index 213294fdcd0f..c779bf9d3552 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1052,9 +1052,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	if (p->se.block_start)
 		p->se.block_start -= clock_offset;
 #endif
-	if (likely(new_rq->cfs.min_vruntime))
-		p->se.vruntime -= old_rq->cfs.min_vruntime -
-						new_rq->cfs.min_vruntime;
+	p->se.vruntime -= old_rq->cfs.min_vruntime - new_rq->cfs.min_vruntime;
 
 	__set_task_cpu(p, new_cpu);
 }

commit 8465e792e82c567b80358e38732164b770ed4b7f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:11 2007 +0200

    sched: entity_key() fix
    
    entity_key() fix - we'd occasionally end up with a 0 vruntime
    in the !initial case.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 8ea4c9b3e411..926491f7f803 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -479,13 +479,16 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 	if (initial && sched_feat(START_DEBIT))
 		vruntime += __sched_vslice(cfs_rq->nr_running + 1);
 
-	if (!initial && sched_feat(NEW_FAIR_SLEEPERS)) {
-		s64 latency = cfs_rq->min_vruntime - se->last_min_vruntime;
-		if (latency < 0 || !cfs_rq->nr_running)
-			latency = 0;
-		else
-			latency = min_t(s64, latency, sysctl_sched_latency);
-		vruntime -= latency;
+	if (!initial) {
+		if (sched_feat(NEW_FAIR_SLEEPERS)) {
+			s64 latency = cfs_rq->min_vruntime - se->last_min_vruntime;
+			if (latency < 0 || !cfs_rq->nr_running)
+				latency = 0;
+			else
+				latency = min_t(s64, latency, sysctl_sched_latency);
+			vruntime -= latency;
+		}
+		vruntime = max(vruntime, se->vruntime);
 	}
 
 	se->vruntime = vruntime;

commit d822cecedad88b69a7d68aa8d49e1f238aa320c7
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:10 2007 +0200

    sched debug: more width for parameter printouts
    
    more width for parameter printouts in /proc/sched_debug.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index d79e1ec5b06a..b24f17de19e3 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -204,9 +204,9 @@ static int sched_debug_show(struct seq_file *m, void *v)
 	SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now));
 
 #define P(x) \
-	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(x))
+	SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
 #define PN(x) \
-	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(x))
+	SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
 	PN(sysctl_sched_latency);
 	PN(sysctl_sched_min_granularity);
 	PN(sysctl_sched_wakeup_granularity);

commit 1aa4731eff7dab7bd01747b46f654f449f1cfc2c
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:10 2007 +0200

    sched debug: print settings
    
    print the current value of all tunables in /proc/sched_debug output.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index b6d0a94d4120..d79e1ec5b06a 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -203,6 +203,19 @@ static int sched_debug_show(struct seq_file *m, void *v)
 
 	SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now));
 
+#define P(x) \
+	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(x))
+#define PN(x) \
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(x))
+	PN(sysctl_sched_latency);
+	PN(sysctl_sched_min_granularity);
+	PN(sysctl_sched_wakeup_granularity);
+	PN(sysctl_sched_batch_wakeup_granularity);
+	PN(sysctl_sched_child_runs_first);
+	P(sysctl_sched_features);
+#undef PN
+#undef P
+
 	for_each_online_cpu(cpu)
 		print_cpu(m, cpu);
 

commit c18b8a7cbcbac46497ee1ce656b0e68197c7581d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:10 2007 +0200

    sched: remove unneeded tunables
    
    remove unneeded tunables.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 920eb7354d0a..2c33227b0f82 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1403,8 +1403,6 @@ extern unsigned int sysctl_sched_latency;
 extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_batch_wakeup_granularity;
-extern unsigned int sysctl_sched_stat_granularity;
-extern unsigned int sysctl_sched_runtime_limit;
 extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_features;
 #endif
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index abd65ed9f2a5..5db7bd18e818 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -76,8 +76,6 @@ const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 25000000UL;
  */
 const_debug unsigned int sysctl_sched_wakeup_granularity = 2000000UL;
 
-unsigned int sysctl_sched_runtime_limit __read_mostly;
-
 extern struct sched_class fair_sched_class;
 
 /**************************************************************

commit b8efb56172bc55082b8490778b07ef73eea0b551
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:10 2007 +0200

    sched debug: BKL usage statistics
    
    add per task and per rq BKL usage statistics.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d0cc58311b13..920eb7354d0a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -621,6 +621,10 @@ struct sched_info {
 	/* timestamps */
 	unsigned long long last_arrival,/* when we last ran on a cpu */
 			   last_queued;	/* when we were last queued to run */
+#ifdef CONFIG_SCHEDSTATS
+	/* BKL stats */
+	unsigned long bkl_cnt;
+#endif
 };
 #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
 
diff --git a/kernel/sched.c b/kernel/sched.c
index f33608e9e1a2..5004dff91850 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -356,6 +356,9 @@ struct rq {
 	/* try_to_wake_up() stats */
 	unsigned long ttwu_cnt;
 	unsigned long ttwu_local;
+
+	/* BKL stats */
+	unsigned long bkl_cnt;
 #endif
 	struct lock_class_key rq_lock_key;
 };
@@ -3414,6 +3417,12 @@ static inline void schedule_debug(struct task_struct *prev)
 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
 
 	schedstat_inc(this_rq(), sched_cnt);
+#ifdef CONFIG_SCHEDSTATS
+	if (unlikely(prev->lock_depth >= 0)) {
+		schedstat_inc(this_rq(), bkl_cnt);
+		schedstat_inc(prev, sched_info.bkl_cnt);
+	}
+#endif
 }
 
 /*
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 57ee9d5630a8..823b63a3a3e1 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -136,6 +136,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 			SPLIT_NS(spread0));
 	SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
 	SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
+	SEQ_printf(m, "  .%-30s: %ld\n", "bkl_cnt",
+			rq->bkl_cnt);
 }
 
 static void print_cpu(struct seq_file *m, int cpu)
@@ -323,6 +325,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.exec_max);
 	PN(se.slice_max);
 	PN(se.wait_max);
+	P(sched_info.bkl_cnt);
 #endif
 	SEQ_printf(m, "%-25s:%20Ld\n",
 		   "nr_switches", (long long)(p->nvcsw + p->nivcsw));
@@ -350,6 +353,7 @@ void proc_sched_set_task(struct task_struct *p)
 	p->se.exec_max			= 0;
 	p->se.slice_max			= 0;
 	p->se.wait_max			= 0;
+	p->sched_info.bkl_cnt		= 0;
 #endif
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;

commit de8d585a12aef40676f12ddc63e97daaf7752ba1
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:09 2007 +0200

    sched: enable CONFIG_FAIR_GROUP_SCHED=y by default
    
    enable CONFIG_FAIR_GROUP_SCHED=y by default.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/init/Kconfig b/init/Kconfig
index b680733270e4..faed9a0b6f24 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -283,7 +283,7 @@ config CPUSETS
 
 config FAIR_GROUP_SCHED
 	bool "Fair group cpu scheduler"
-	default n
+	default y
 	depends on EXPERIMENTAL
 	help
 	  This feature lets cpu scheduler recognize task groups and control cpu

commit 7ed2be459b61c66fcc4926ffb073a25fc077d51f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:09 2007 +0200

    sched: fair-group sched, cleanups
    
    fair-group sched, cleanups.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/init/Kconfig b/init/Kconfig
index 37711fe3c01c..b680733270e4 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -294,10 +294,10 @@ choice
 	prompt "Basis for grouping tasks"
 	default FAIR_USER_SCHED
 
- 	config FAIR_USER_SCHED
- 		bool "user id"
- 		help
- 		  This option will choose userid as the basis for grouping
+	config FAIR_USER_SCHED
+		bool "user id"
+		help
+		  This option will choose userid as the basis for grouping
 		  tasks, thus providing equal cpu bandwidth to each user.
 
 endchoice

commit edcb60a309769a5f6e7c9e76d7c98b34d1757448
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Oct 15 17:00:08 2007 +0200

    sched: kernel/sched_fair.c whitespace cleanups
    
    some trivial whitespace cleanups.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
    Reviewed-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 568e922255c6..9f93a5c127e8 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -476,8 +476,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 		int wakeup, int set_curr)
 {
 	/*
- 	 * In case of the 'current'.
- 	 */
+	 * In case of the 'current'.
+	 */
 	if (unlikely(set_curr)) {
 		update_stats_curr_start(cfs_rq, se);
 		cfs_rq->curr = se;
@@ -992,9 +992,9 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
 	if (sysctl_sched_child_runs_first &&
 			curr->vruntime < se->vruntime) {
 		/*
- 		 * Upon rescheduling, sched_class::put_prev_task() will place
- 		 * 'current' within the tree based on its new key value.
- 		 */
+		 * Upon rescheduling, sched_class::put_prev_task() will place
+		 * 'current' within the tree based on its new key value.
+		 */
 		swap(curr->vruntime, se->vruntime);
 	}