Patches contributed by Eötvös Lorand University
commit 8bc6767acb3236e0345e99cf198168e60e7ae456
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Nov 9 22:39:39 2007 +0100
sched: wakeup preemption fix
wakeup preemption fix: do not make it dependent on p->prio.
Preemption purely depends on ->vruntime.
This improves preemption in mixed-nice-level workloads.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index fbcb426029d0..a3badf52bba2 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -863,10 +863,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
if (unlikely(se->load.weight != NICE_0_LOAD))
gran = calc_delta_fair(gran, &se->load);
- if (delta > gran) {
- if (p->prio < curr->prio)
- resched_task(curr);
- }
+ if (delta > gran)
+ resched_task(curr);
}
}
commit 3e3e13f399ac8060a20d14d210a28dc02dda372e
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Nov 9 22:39:39 2007 +0100
sched: remove PREEMPT_RESTRICT
remove PREEMPT_RESTRICT. (this is a separate commit so that any
regression related to the removal itself is bisectable)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 951759e30c09..93fd30d6dac4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -863,7 +863,6 @@ struct sched_entity {
struct load_weight load; /* for load-balancing */
struct rb_node run_node;
unsigned int on_rq;
- int peer_preempt;
u64 exec_start;
u64 sum_exec_runtime;
diff --git a/kernel/sched.c b/kernel/sched.c
index 4b23dfb4c80f..2a107e4ad5ed 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -460,7 +460,6 @@ enum {
SCHED_FEAT_TREE_AVG = 4,
SCHED_FEAT_APPROX_AVG = 8,
SCHED_FEAT_WAKEUP_PREEMPT = 16,
- SCHED_FEAT_PREEMPT_RESTRICT = 32,
};
const_debug unsigned int sysctl_sched_features =
@@ -468,8 +467,7 @@ const_debug unsigned int sysctl_sched_features =
SCHED_FEAT_START_DEBIT * 1 |
SCHED_FEAT_TREE_AVG * 0 |
SCHED_FEAT_APPROX_AVG * 0 |
- SCHED_FEAT_WAKEUP_PREEMPT * 1 |
- SCHED_FEAT_PREEMPT_RESTRICT * 0;
+ SCHED_FEAT_WAKEUP_PREEMPT * 1;
#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 7264814ba62a..fbcb426029d0 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -546,7 +546,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
update_stats_dequeue(cfs_rq, se);
if (sleep) {
- se->peer_preempt = 0;
#ifdef CONFIG_SCHEDSTATS
if (entity_is_task(se)) {
struct task_struct *tsk = task_of(se);
@@ -574,10 +573,8 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
ideal_runtime = sched_slice(cfs_rq, curr);
delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
- if (delta_exec > ideal_runtime ||
- (sched_feat(PREEMPT_RESTRICT) && curr->peer_preempt))
+ if (delta_exec > ideal_runtime)
resched_task(rq_of(cfs_rq)->curr);
- curr->peer_preempt = 0;
}
static void
@@ -867,9 +864,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
gran = calc_delta_fair(gran, &se->load);
if (delta > gran) {
- int now = !sched_feat(PREEMPT_RESTRICT);
-
- if (now || p->prio < curr->prio || !se->peer_preempt++)
+ if (p->prio < curr->prio)
resched_task(curr);
}
}
@@ -1083,7 +1078,6 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
swap(curr->vruntime, se->vruntime);
}
- se->peer_preempt = 0;
enqueue_task_fair(rq, p, 0);
resched_task(rq->curr);
}
commit 52d3da1ad4f442cec877fbeb83902707b56da0cf
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Nov 9 22:39:39 2007 +0100
sched: turn off PREEMPT_RESTRICT
PREEMPT_RESTRICT was a method aimed at reducing the amount of wakeup
related preemption. It has a disadvantage though, it can prevent
legitimate wakeups if a task is 'unlucky' to be hit too early by a tick
that clears peer_preempt.
Now that the wakeup preemption has been cleaned up we dont seem to have
excessive preemptions anymore, so this feature can be turned off. (and
removed in the next patch)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 387258cdd0b9..4b23dfb4c80f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -469,7 +469,7 @@ const_debug unsigned int sysctl_sched_features =
SCHED_FEAT_TREE_AVG * 0 |
SCHED_FEAT_APPROX_AVG * 0 |
SCHED_FEAT_WAKEUP_PREEMPT * 1 |
- SCHED_FEAT_PREEMPT_RESTRICT * 1;
+ SCHED_FEAT_PREEMPT_RESTRICT * 0;
#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
commit a5fbb6d1064be885d2a6b82f625186753cf74848
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Nov 9 22:39:38 2007 +0100
KVM: fix !SMP build error
fix a !SMP build error:
drivers/kvm/kvm_main.c: In function 'kvm_flush_remote_tlbs':
drivers/kvm/kvm_main.c:220: error: implicit declaration of function 'smp_call_function_mask'
(and also avoid unused function warning related to up_smp_call_function()
not making use of the 'func' parameter.)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 259a13c3bd98..c25e66bcecf3 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -84,11 +84,12 @@ void smp_prepare_boot_cpu(void);
* These macros fold the SMP functionality into a single CPU system
*/
#define raw_smp_processor_id() 0
-static inline int up_smp_call_function(void)
+static inline int up_smp_call_function(void (*func)(void *), void *info)
{
return 0;
}
-#define smp_call_function(func,info,retry,wait) (up_smp_call_function())
+#define smp_call_function(func, info, retry, wait) \
+ (up_smp_call_function(func, info))
#define on_each_cpu(func,info,retry,wait) \
({ \
local_irq_disable(); \
@@ -107,6 +108,8 @@ static inline void smp_send_reschedule(int cpu) { }
local_irq_enable(); \
0; \
})
+#define smp_call_function_mask(mask, func, info, wait) \
+ (up_smp_call_function(func, info))
#endif /* !SMP */
commit 0492007ed9b53f6a2a2f983910d0fe7c97b09822
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Nov 9 22:39:38 2007 +0100
x86: make nmi_cpu_busy() always defined
nmi_cpu_busy() must be available on !SMP too.
this is in preparation to a smp_call_function_mask() fix.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index f803ed0ed1c4..600fd404e440 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -51,13 +51,13 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
static int endflag __initdata = 0;
-#ifdef CONFIG_SMP
/* The performance counters used by NMI_LOCAL_APIC don't trigger when
* the CPU is idle. To make sure the NMI watchdog really ticks on all
* CPUs during the test make them busy.
*/
static __init void nmi_cpu_busy(void *data)
{
+#ifdef CONFIG_SMP
local_irq_enable_in_hardirq();
/* Intentionally don't use cpu_relax here. This is
to make sure that the performance counter really ticks,
@@ -67,8 +67,8 @@ static __init void nmi_cpu_busy(void *data)
care if they get somewhat less cycles. */
while (endflag == 0)
mb();
-}
#endif
+}
static int __init check_nmi_watchdog(void)
{
commit 4e2947f12516d13446d6ffa1d9e4fbd33b1636fa
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Nov 9 22:39:38 2007 +0100
x86: make ipi_handler() always defined
prepare for up_smp_call_function() to ensure that the 'func'
pointer is unused. (which is related to a KVM build fix)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 9abbdf7562c5..3b20613325dc 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -139,13 +139,12 @@ struct set_mtrr_data {
mtrr_type smp_type;
};
-#ifdef CONFIG_SMP
-
static void ipi_handler(void *info)
/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
[RETURNS] Nothing.
*/
{
+#ifdef CONFIG_SMP
struct set_mtrr_data *data = info;
unsigned long flags;
@@ -168,9 +167,8 @@ static void ipi_handler(void *info)
atomic_dec(&data->count);
local_irq_restore(flags);
-}
-
#endif
+}
static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
return type1 == MTRR_TYPE_UNCACHABLE ||
commit 19978ca610946ed57c071bad63f8f6642ca1298b
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Nov 9 22:39:38 2007 +0100
sched: reintroduce SMP tunings again
Yanmin Zhang reported an aim7 regression and bisected it down to:
| commit 38ad464d410dadceda1563f36bdb0be7fe4c8938
| Author: Ingo Molnar <mingo@elte.hu>
| Date: Mon Oct 15 17:00:02 2007 +0200
|
| sched: uniform tunings
|
| use the same defaults on both UP and SMP.
fix this by reintroducing similar SMP tunings again. This resolves
the regression.
(also update the comments to match the ilog2(nr_cpus) tuning effect)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 3f6bd1112900..69cae271c63b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4992,6 +4992,32 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
*/
cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
+/*
+ * Increase the granularity value when there are more CPUs,
+ * because with more CPUs the 'effective latency' as visible
+ * to users decreases. But the relationship is not linear,
+ * so pick a second-best guess by going with the log2 of the
+ * number of CPUs.
+ *
+ * This idea comes from the SD scheduler of Con Kolivas:
+ */
+static inline void sched_init_granularity(void)
+{
+ unsigned int factor = 1 + ilog2(num_online_cpus());
+ const unsigned long limit = 200000000;
+
+ sysctl_sched_min_granularity *= factor;
+ if (sysctl_sched_min_granularity > limit)
+ sysctl_sched_min_granularity = limit;
+
+ sysctl_sched_latency *= factor;
+ if (sysctl_sched_latency > limit)
+ sysctl_sched_latency = limit;
+
+ sysctl_sched_wakeup_granularity *= factor;
+ sysctl_sched_batch_wakeup_granularity *= factor;
+}
+
#ifdef CONFIG_SMP
/*
* This is how migration works:
@@ -6688,10 +6714,12 @@ void __init sched_init_smp(void)
/* Move init over to a non-isolated CPU */
if (set_cpus_allowed(current, non_isolated_cpus) < 0)
BUG();
+ sched_init_granularity();
}
#else
void __init sched_init_smp(void)
{
+ sched_init_granularity();
}
#endif /* CONFIG_SMP */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c495dcf7031b..7264814ba62a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -22,7 +22,7 @@
/*
* Targeted preemption latency for CPU-bound tasks:
- * (default: 20ms, units: nanoseconds)
+ * (default: 20ms * ilog(ncpus), units: nanoseconds)
*
* NOTE: this latency value is not the same as the concept of
* 'timeslice length' - timeslices in CFS are of variable length
@@ -32,18 +32,18 @@
* (to see the precise effective timeslice length of your workload,
* run vmstat and monitor the context-switches (cs) field)
*/
-const_debug unsigned int sysctl_sched_latency = 20000000ULL;
+unsigned int sysctl_sched_latency = 20000000ULL;
/*
* Minimal preemption granularity for CPU-bound tasks:
- * (default: 1 msec, units: nanoseconds)
+ * (default: 1 msec * ilog(ncpus), units: nanoseconds)
*/
-const_debug unsigned int sysctl_sched_min_granularity = 1000000ULL;
+unsigned int sysctl_sched_min_granularity = 1000000ULL;
/*
* is kept at sysctl_sched_latency / sysctl_sched_min_granularity
*/
-const_debug unsigned int sched_nr_latency = 20;
+unsigned int sched_nr_latency = 20;
/*
* After fork, child runs first. (default) If set to 0 then
@@ -61,23 +61,23 @@ unsigned int __read_mostly sysctl_sched_compat_yield;
/*
* SCHED_BATCH wake-up granularity.
- * (default: 10 msec, units: nanoseconds)
+ * (default: 10 msec * ilog(ncpus), units: nanoseconds)
*
* This option delays the preemption effects of decoupled workloads
* and reduces their over-scheduling. Synchronous workloads will still
* have immediate wakeup/sleep latencies.
*/
-const_debug unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL;
+unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL;
/*
* SCHED_OTHER wake-up granularity.
- * (default: 10 msec, units: nanoseconds)
+ * (default: 10 msec * ilog(ncpus), units: nanoseconds)
*
* This option delays the preemption effects of decoupled workloads
* and reduces their over-scheduling. Synchronous workloads will still
* have immediate wakeup/sleep latencies.
*/
-const_debug unsigned int sysctl_sched_wakeup_granularity = 10000000UL;
+unsigned int sysctl_sched_wakeup_granularity = 10000000UL;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
commit 38605cae99d386332df6822a22dba7bfdc8fae1c
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Oct 29 21:18:11 2007 +0100
sched: fix style in kernel/sched.c
fallout of recent commits: small coding style fixes.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 6c4abfbd68e6..3f6bd1112900 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5365,7 +5365,7 @@ static struct ctl_table sd_ctl_dir[] = {
.procname = "sched_domain",
.mode = 0555,
},
- {0,},
+ {0, },
};
static struct ctl_table sd_ctl_root[] = {
@@ -5375,7 +5375,7 @@ static struct ctl_table sd_ctl_root[] = {
.mode = 0555,
.child = sd_ctl_dir,
},
- {0,},
+ {0, },
};
static struct ctl_table *sd_alloc_ctl_entry(int n)
@@ -7251,13 +7251,13 @@ static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
}
struct cgroup_subsys cpu_cgroup_subsys = {
- .name = "cpu",
- .create = cpu_cgroup_create,
- .destroy = cpu_cgroup_destroy,
- .can_attach = cpu_cgroup_can_attach,
- .attach = cpu_cgroup_attach,
- .populate = cpu_cgroup_populate,
- .subsys_id = cpu_cgroup_subsys_id,
+ .name = "cpu",
+ .create = cpu_cgroup_create,
+ .destroy = cpu_cgroup_destroy,
+ .can_attach = cpu_cgroup_can_attach,
+ .attach = cpu_cgroup_attach,
+ .populate = cpu_cgroup_populate,
+ .subsys_id = cpu_cgroup_subsys_id,
.early_init = 1,
};
commit 8eb172d9418c9387234a2c9a344131c46b5eea5b
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Oct 29 21:18:11 2007 +0100
sched: fix style of swap() macro in kernel/sched_fair.c
fix style of swap() macro in kernel/sched_fair.c.
( this macro should eventually move to a general header, as ext3 uses
a similar construct too. )
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9971831b560e..01859f662ab7 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1025,7 +1025,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr)
}
}
-#define swap(a,b) do { typeof(a) tmp = (a); (a) = (b); (b) = tmp; } while (0)
+#define swap(a, b) do { typeof(a) tmp = (a); (a) = (b); (b) = tmp; } while (0)
/*
* Share the fairness runtime between parent and child, thus the
commit b22817b3c81cdb18ffe3d2debfee968731a8b5f4
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Oct 15 19:43:21 2007 +0200
USB: fix ssb_ohci_probe() build bug
fix ssb_ohci_probe() build bug:
drivers/built-in.o: In function `ssb_ohci_probe':
ohci-hcd.c:(.text+0xbff39): undefined reference to `ssb_device_enable'
ohci-hcd.c:(.text+0xbff6f): undefined reference to `ssb_admatch_base'
ohci-hcd.c:(.text+0xbff8b): undefined reference to `ssb_admatch_size'
ohci-hcd.c:(.text+0xbffe5): undefined reference to `ssb_device_disable'
[...]
the reason was that this Kconfig combination was allowed:
CONFIG_SSB=m
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_SSB=y
the fix is to require a modular USB_OHCI_HCD build when SSB is modular.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index c978d622fa8a..177e78ed241b 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -156,7 +156,7 @@ config USB_OHCI_HCD_PCI
config USB_OHCI_HCD_SSB
bool "OHCI support for Broadcom SSB OHCI core"
- depends on USB_OHCI_HCD && SSB && EXPERIMENTAL
+ depends on USB_OHCI_HCD && (SSB = y || SSB = CONFIG_USB_OHCI_HCD) && EXPERIMENTAL
default n
---help---
Support for the Sonics Silicon Backplane (SSB) attached