Patches contributed by Eötvös Lorand University
commit e56f31aad9d8c0102bc074cdab4e3ee76b38600d
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Aug 10 23:05:11 2007 +0200
sched: fix typo in the FAIR_GROUP_SCHED branch
while there's no in-tree way to turn group scheduling at the moment,
fix a typo in it nevertheless.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e91db32cadfd..c5af38948a1e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -959,13 +959,12 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
#ifdef CONFIG_FAIR_GROUP_SCHED
struct cfs_rq *this_cfs_rq;
- long imbalances;
+ long imbalance;
unsigned long maxload;
this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
- imbalance = busy_cfs_rq->load.weight -
- this_cfs_rq->load.weight;
+ imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
/* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
if (imbalance <= 0)
continue;
@@ -976,7 +975,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
*this_best_prio = cfs_rq_best_prio(this_cfs_rq);
#else
-#define maxload rem_load_move
+# define maxload rem_load_move
#endif
/* pass busy_cfs_rq argument into
* load_balance_[start|next]_fair iterators
commit 529c77261bccd9d37f110f58b0753d95beaa9fa2
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Aug 10 23:05:11 2007 +0200
sched: improve rq-clock overflow logic
improve the rq-clock overflow logic: limit the absolute rq->clock
delta since the last scheduler tick, instead of limiting the delta
itself.
tested by Arjan van de Ven - whole laptop was misbehaving due to
an incorrectly calibrated cpu_khz confusing sched_clock().
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
diff --git a/kernel/sched.c b/kernel/sched.c
index b0afd8db1396..6247e4a8350f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -263,6 +263,7 @@ struct rq {
unsigned int clock_warps, clock_overflows;
unsigned int clock_unstable_events;
+ u64 tick_timestamp;
atomic_t nr_iowait;
@@ -341,8 +342,11 @@ static void __update_rq_clock(struct rq *rq)
/*
* Catch too large forward jumps too:
*/
- if (unlikely(delta > 2*TICK_NSEC)) {
- clock++;
+ if (unlikely(clock + delta > rq->tick_timestamp + TICK_NSEC)) {
+ if (clock < rq->tick_timestamp + TICK_NSEC)
+ clock = rq->tick_timestamp + TICK_NSEC;
+ else
+ clock++;
rq->clock_overflows++;
} else {
if (unlikely(delta > rq->clock_max_delta))
@@ -3308,9 +3312,16 @@ void scheduler_tick(void)
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
struct task_struct *curr = rq->curr;
+ u64 next_tick = rq->tick_timestamp + TICK_NSEC;
spin_lock(&rq->lock);
__update_rq_clock(rq);
+ /*
+ * Let rq->clock advance by at least TICK_NSEC:
+ */
+ if (unlikely(rq->clock < next_tick))
+ rq->clock = next_tick;
+ rq->tick_timestamp = rq->clock;
update_cpu_load(rq);
if (curr != rq->idle) /* FIXME: needed? */
curr->sched_class->task_tick(rq, curr);
commit 7cff8cf61cac15fa29a1ca802826d2bcbca66152
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Aug 9 11:16:52 2007 +0200
sched: refine negative nice level granularity
refine the granularity of negative nice level tasks: let them
reschedule more often to offset the effect of them consuming
their wait_runtime proportionately slower. (This makes nice-0
task scheduling smoother in the presence of negatively
reniced tasks.)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 7a632c534ce5..e91db32cadfd 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -222,21 +222,25 @@ niced_granularity(struct sched_entity *curr, unsigned long granularity)
{
u64 tmp;
+ if (likely(curr->load.weight == NICE_0_LOAD))
+ return granularity;
/*
- * Negative nice levels get the same granularity as nice-0:
+ * Positive nice levels get the same granularity as nice-0:
*/
- if (likely(curr->load.weight >= NICE_0_LOAD))
- return granularity;
+ if (likely(curr->load.weight < NICE_0_LOAD)) {
+ tmp = curr->load.weight * (u64)granularity;
+ return (long) (tmp >> NICE_0_SHIFT);
+ }
/*
- * Positive nice level tasks get linearly finer
+ * Negative nice level tasks get linearly finer
* granularity:
*/
- tmp = curr->load.weight * (u64)granularity;
+ tmp = curr->load.inv_weight * (u64)granularity;
/*
* It will always fit into 'long':
*/
- return (long) (tmp >> NICE_0_SHIFT);
+ return (long) (tmp >> WMULT_SHIFT);
}
static inline void
commit a69edb55605117cc0f20aa36c49c20b96590774d
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Aug 9 11:16:52 2007 +0200
sched: fix update_stats_enqueue() reniced codepath
the key has to be rescaled to /weight even if it has a positive value.
(this change only affects the scheduling of reniced tasks)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index b885b3c85bba..7a632c534ce5 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -405,7 +405,8 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
(WMULT_SHIFT - NICE_0_SHIFT);
} else {
tmp = se->wait_runtime;
- key -= (tmp * se->load.weight) >> NICE_0_SHIFT;
+ key -= (tmp * se->load.inv_weight) >>
+ (WMULT_SHIFT - NICE_0_SHIFT);
}
}
commit 194081ebfaa8c7d16133e08dd79254910c20c6ff
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Aug 9 11:16:51 2007 +0200
sched: round a bit better
round a tiny bit better in high-frequency rescheduling scenarios,
by rounding around zero instead of rounding down.
(this is pretty theoretical though)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 5470ab0258a8..b0afd8db1396 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -638,6 +638,11 @@ static u64 div64_likely32(u64 divident, unsigned long divisor)
#define WMULT_SHIFT 32
+/*
+ * Shift right and round:
+ */
+#define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
+
static unsigned long
calc_delta_mine(unsigned long delta_exec, unsigned long weight,
struct load_weight *lw)
@@ -645,18 +650,17 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
u64 tmp;
if (unlikely(!lw->inv_weight))
- lw->inv_weight = WMULT_CONST / lw->weight;
+ lw->inv_weight = (WMULT_CONST - lw->weight/2) / lw->weight + 1;
tmp = (u64)delta_exec * weight;
/*
* Check whether we'd overflow the 64-bit multiplication:
*/
- if (unlikely(tmp > WMULT_CONST)) {
- tmp = ((tmp >> WMULT_SHIFT/2) * lw->inv_weight)
- >> (WMULT_SHIFT/2);
- } else {
- tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT;
- }
+ if (unlikely(tmp > WMULT_CONST))
+ tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
+ WMULT_SHIFT/2);
+ else
+ tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT);
return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
}
commit 254753dc321ea2b753ca9bc58ac329557a20efac
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Aug 9 11:16:51 2007 +0200
sched: make the multiplication table more accurate
do small deltas in the weight and multiplication constant table so
that the worst-case numeric error is better than 1:100000000. (8 digits)
the current error table is:
nice mult * inv_mult error
------------------------------------------
-20: 88761 * 48388 -0.0000000065
-19: 71755 * 59856 -0.0000000037
-18: 56483 * 76040 0.0000000056
-17: 46273 * 92818 0.0000000042
-16: 36291 * 118348 -0.0000000065
-15: 29154 * 147320 -0.0000000037
-14: 23254 * 184698 -0.0000000009
-13: 18705 * 229616 -0.0000000037
-12: 14949 * 287308 -0.0000000009
-11: 11916 * 360437 -0.0000000009
-10: 9548 * 449829 -0.0000000009
-9: 7620 * 563644 -0.0000000037
-8: 6100 * 704093 0.0000000009
-7: 4904 * 875809 0.0000000093
-6: 3906 * 1099582 -0.0000000009
-5: 3121 * 1376151 -0.0000000058
-4: 2501 * 1717300 0.0000000009
-3: 1991 * 2157191 -0.0000000035
-2: 1586 * 2708050 0.0000000009
-1: 1277 * 3363326 0.0000000014
0: 1024 * 4194304 0.0000000000
1: 820 * 5237765 0.0000000009
2: 655 * 6557202 0.0000000033
3: 526 * 8165337 -0.0000000079
4: 423 * 10153587 0.0000000012
5: 335 * 12820798 0.0000000079
6: 272 * 15790321 0.0000000037
7: 215 * 19976592 -0.0000000037
8: 172 * 24970740 -0.0000000037
9: 137 * 31350126 -0.0000000079
10: 110 * 39045157 -0.0000000061
11: 87 * 49367440 -0.0000000037
12: 70 * 61356676 0.0000000056
13: 56 * 76695844 -0.0000000075
14: 45 * 95443717 -0.0000000072
15: 36 * 119304647 -0.0000000009
16: 29 * 148102320 -0.0000000037
17: 23 * 186737708 -0.0000000028
18: 18 * 238609294 -0.0000000009
19: 15 * 286331153 -0.0000000002
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index afc59f274e58..5470ab0258a8 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -704,11 +704,14 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec)
* the relative distance between them is ~25%.)
*/
static const int prio_to_weight[40] = {
-/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921,
-/* -10 */ 9537, 7629, 6103, 4883, 3906, 3125, 2500, 2000, 1600, 1280,
-/* 0 */ NICE_0_LOAD /* 1024 */,
-/* 1 */ 819, 655, 524, 419, 336, 268, 215, 172, 137,
-/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15,
+ /* -20 */ 88761, 71755, 56483, 46273, 36291,
+ /* -15 */ 29154, 23254, 18705, 14949, 11916,
+ /* -10 */ 9548, 7620, 6100, 4904, 3906,
+ /* -5 */ 3121, 2501, 1991, 1586, 1277,
+ /* 0 */ 1024, 820, 655, 526, 423,
+ /* 5 */ 335, 272, 215, 172, 137,
+ /* 10 */ 110, 87, 70, 56, 45,
+ /* 15 */ 36, 29, 23, 18, 15,
};
/*
@@ -719,14 +722,14 @@ static const int prio_to_weight[40] = {
* into multiplications:
*/
static const u32 prio_to_wmult[40] = {
-/* -20 */ 48356, 60446, 75558, 94446, 118058,
-/* -15 */ 147573, 184467, 230589, 288233, 360285,
-/* -10 */ 450347, 562979, 703746, 879575, 1099582,
-/* -5 */ 1374389, 1717986, 2147483, 2684354, 3355443,
-/* 0 */ 4194304, 5244160, 6557201, 8196502, 10250518,
-/* 5 */ 12782640, 16025997, 19976592, 24970740, 31350126,
-/* 10 */ 39045157, 49367440, 61356675, 76695844, 95443717,
-/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
+ /* -20 */ 48388, 59856, 76040, 92818, 118348,
+ /* -15 */ 147320, 184698, 229616, 287308, 360437,
+ /* -10 */ 449829, 563644, 704093, 875809, 1099582,
+ /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
+ /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
+ /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
+ /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
+ /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
commit 6e82a3befe91423e501c2124312bd805be0048eb
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Aug 9 11:16:51 2007 +0200
sched: optimize update_rq_clock() calls in the load-balancer
optimize update_rq_clock() calls in the load-balancer: update them
right after locking the runqueue(s) so that the pull functions do
not have to call it.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 9ccd91e5b65b..afc59f274e58 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2017,6 +2017,8 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
spin_lock(&rq1->lock);
}
}
+ update_rq_clock(rq1);
+ update_rq_clock(rq2);
}
/*
@@ -2113,10 +2115,8 @@ void sched_exec(void)
static void pull_task(struct rq *src_rq, struct task_struct *p,
struct rq *this_rq, int this_cpu)
{
- update_rq_clock(src_rq);
deactivate_task(src_rq, p, 0);
set_task_cpu(p, this_cpu);
- __update_rq_clock(this_rq);
activate_task(this_rq, p, 0);
/*
* Note that idle threads have a prio of MAX_PRIO, for this test
@@ -2798,6 +2798,8 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
if (busiest->nr_running > 1) {
/* Attempt to move tasks */
double_lock_balance(this_rq, busiest);
+ /* this_rq->clock is already updated */
+ update_rq_clock(busiest);
ld_moved = move_tasks(this_rq, this_cpu, busiest,
imbalance, sd, CPU_NEWLY_IDLE,
&all_pinned);
@@ -2895,6 +2897,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
/* move a task from busiest_rq to target_rq */
double_lock_balance(busiest_rq, target_rq);
+ update_rq_clock(busiest_rq);
+ update_rq_clock(target_rq);
/* Search for an sd spanning us and the target CPU. */
for_each_domain(target_cpu, sd) {
@@ -4962,13 +4966,11 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
goto out;
on_rq = p->se.on_rq;
- if (on_rq) {
- update_rq_clock(rq_src);
+ if (on_rq)
deactivate_task(rq_src, p, 0);
- }
+
set_task_cpu(p, dest_cpu);
if (on_rq) {
- update_rq_clock(rq_dest);
activate_task(rq_dest, p, 0);
check_preempt_curr(rq_dest, p);
}
commit 2daa357705bfe68788132cf9079930ca948a90af
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Aug 9 11:16:51 2007 +0200
sched: optimize activate_task()
optimize activate_task() by removing update_rq_clock() from it.
(and add update_rq_clock() to all callsites of activate_task() that
did not have it before.)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 3f5d52949990..9ccd91e5b65b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -910,8 +910,6 @@ static int effective_prio(struct task_struct *p)
*/
static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
{
- update_rq_clock(rq);
-
if (p->state == TASK_UNINTERRUPTIBLE)
rq->nr_uninterruptible--;
@@ -1510,6 +1508,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
out_activate:
#endif /* CONFIG_SMP */
+ update_rq_clock(rq);
activate_task(rq, p, 1);
/*
* Sync wakeups (i.e. those types of wakeups where the waker
@@ -2117,6 +2116,7 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
update_rq_clock(src_rq);
deactivate_task(src_rq, p, 0);
set_task_cpu(p, this_cpu);
+ __update_rq_clock(this_rq);
activate_task(this_rq, p, 0);
/*
* Note that idle threads have a prio of MAX_PRIO, for this test
@@ -4207,11 +4207,10 @@ int sched_setscheduler(struct task_struct *p, int policy,
spin_unlock_irqrestore(&p->pi_lock, flags);
goto recheck;
}
+ update_rq_clock(rq);
on_rq = p->se.on_rq;
- if (on_rq) {
- update_rq_clock(rq);
+ if (on_rq)
deactivate_task(rq, p, 0);
- }
oldprio = p->prio;
__setscheduler(rq, p, policy, param->sched_priority);
if (on_rq) {
@@ -4969,6 +4968,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
}
set_task_cpu(p, dest_cpu);
if (on_rq) {
+ update_rq_clock(rq_dest);
activate_task(rq_dest, p, 0);
check_preempt_curr(rq_dest, p);
}
@@ -6623,14 +6623,13 @@ void normalize_rt_tasks(void)
goto out_unlock;
#endif
+ update_rq_clock(rq);
on_rq = p->se.on_rq;
- if (on_rq) {
- update_rq_clock(task_rq(p));
- deactivate_task(task_rq(p), p, 0);
- }
+ if (on_rq)
+ deactivate_task(rq, p, 0);
__setscheduler(rq, p, SCHED_NORMAL, 0);
if (on_rq) {
- activate_task(task_rq(p), p, 0);
+ activate_task(rq, p, 0);
resched_task(rq->curr);
}
#ifdef CONFIG_SMP
commit c3b64f1e4f772418a649bb8e3b39fcea6c358330
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Aug 9 11:16:51 2007 +0200
sched: clean up set_curr_task_fair()
clean up set_curr_task_fair().
( identity transformation that causes no change in functionality. )
text data bss dec hex filename
39170 3750 36 42956 a7cc sched.o.before
39170 3750 36 42956 a7cc sched.o.after
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e62d5b9b1582..b885b3c85bba 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1053,16 +1053,10 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
*/
static void set_curr_task_fair(struct rq *rq)
{
- struct task_struct *curr = rq->curr;
- struct sched_entity *se = &curr->se;
- struct cfs_rq *cfs_rq;
-
- update_rq_clock(rq);
+ struct sched_entity *se = &rq->curr.se;
- for_each_sched_entity(se) {
- cfs_rq = cfs_rq_of(se);
- set_next_entity(cfs_rq, se);
- }
+ for_each_sched_entity(se)
+ set_next_entity(cfs_rq_of(se), se);
}
#else
static void set_curr_task_fair(struct rq *rq)
@@ -1093,10 +1087,9 @@ struct sched_class fair_sched_class __read_mostly = {
#ifdef CONFIG_SCHED_DEBUG
static void print_cfs_stats(struct seq_file *m, int cpu)
{
- struct rq *rq = cpu_rq(cpu);
struct cfs_rq *cfs_rq;
- for_each_leaf_cfs_rq(rq, cfs_rq)
+ for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
print_cfs_rq(m, cpu, cfs_rq);
}
#endif
commit d9e0e6aa6d72df21ff190962c842e027fca0e009
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Aug 9 11:16:51 2007 +0200
sched: remove __update_rq_clock() call from entity_tick()
remove __update_rq_clock() call from entity_tick().
no change in functionality because scheduler_tick() already calls
__update_rq_clock().
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index eb7ca49c3260..e62d5b9b1582 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -665,11 +665,8 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
{
- struct rq *rq = rq_of(cfs_rq);
struct sched_entity *next;
- __update_rq_clock(rq);
-
/*
* Dequeue and enqueue the task to update its
* position within the tree: