Patches contributed by Eötvös Lorand University
commit 3f2aa307c4d26b4ed6509d0a79e8254c9e07e921
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Sep 10 20:34:48 2009 +0200
sched: Disable NEW_FAIR_SLEEPERS for now
Nikos Chantziaras and Jens Axboe reported that turning off
NEW_FAIR_SLEEPERS improves desktop interactivity visibly.
Nikos described his experiences the following way:
" With this setting, I can do "nice -n 19 make -j20" and
still have a very smooth desktop and watch a movie at
the same time. Various other annoyances (like the
"logout/shutdown/restart" dialog of KDE not appearing
at all until the background fade-out effect has finished)
are also gone. So this seems to be the single most
important setting that vastly improves desktop behavior,
at least here. "
Jens described it the following way, referring to a 10-seconds
xmodmap scheduling delay he was trying to debug:
" Then I tried switching NO_NEW_FAIR_SLEEPERS on, and then
I get:
Performance counter stats for 'xmodmap .xmodmap-carl':
9.009137 task-clock-msecs # 0.447 CPUs
18 context-switches # 0.002 M/sec
1 CPU-migrations # 0.000 M/sec
315 page-faults # 0.035 M/sec
0.020167093 seconds time elapsed
Woot! "
So disable it for now. In perf trace output i can see weird
delta timestamps:
cc1-9943 [001] 2802.059479616: sched_stat_wait: task: as:9944 wait: 2801938766276 [ns]
That nsec field is not supposed to be that large. More digging
is needed - but lets turn it off while the real bug is found.
Reported-by: Nikos Chantziaras <realnc@arcor.de>
Tested-by: Nikos Chantziaras <realnc@arcor.de>
Reported-by: Jens Axboe <jens.axboe@oracle.com>
Tested-by: Jens Axboe <jens.axboe@oracle.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <4AA93D34.8040500@arcor.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 4569bfa7df9b..e2dc63a5815d 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -1,4 +1,4 @@
-SCHED_FEAT(NEW_FAIR_SLEEPERS, 1)
+SCHED_FEAT(NEW_FAIR_SLEEPERS, 0)
SCHED_FEAT(NORMALIZED_SLEEPER, 0)
SCHED_FEAT(ADAPTIVE_GRAN, 1)
SCHED_FEAT(WAKEUP_PREEMPT, 1)
commit a1922ed661ab2c1637d0b10cde933bd9cd33d965
Merge: 75e33751ca8b d28daf923ac5
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Sep 7 08:19:51 2009 +0200
Merge branch 'tracing/core' into tracing/hw-breakpoints
Conflicts:
arch/Kconfig
kernel/trace/trace.h
Merge reason: resolve the conflicts, plus adopt to the new
ring-buffer APIs.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --cc arch/Kconfig
index 1adf2d0e6356,99193b160232..c72f18fde319
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@@ -113,6 -113,4 +113,8 @@@ config HAVE_DMA_API_DEBU
config HAVE_DEFAULT_NO_SPIN_MUTEXES
bool
+config HAVE_HW_BREAKPOINT
+ bool
+
++
+ source "kernel/gcov/Kconfig"
diff --cc arch/x86/kernel/ptrace.c
index cabdabce3cb2,8d7d5c9c1be3..113b8927c822
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@@ -34,12 -34,12 +34,13 @@@
#include <asm/prctl.h>
#include <asm/proto.h>
#include <asm/ds.h>
+#include <asm/hw_breakpoint.h>
- #include <trace/syscall.h>
-
#include "tls.h"
+ #define CREATE_TRACE_POINTS
+ #include <trace/events/syscalls.h>
+
enum x86_regset {
REGSET_GENERAL,
REGSET_FP,
diff --cc kernel/Makefile
index f88decb1b445,2093a691f1c2..52508612a08f
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@@ -95,9 -96,9 +96,10 @@@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT
obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_X86_DS) += trace/
+ obj-$(CONFIG_RING_BUFFER) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
diff --cc kernel/trace/trace.h
index ff1ef411a176,fa1dccb579d5..ea7e0bcbd539
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@@ -334,11 -315,6 +330,7 @@@ extern void __ftrace_bad_type(void)
TRACE_KMEM_ALLOC); \
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
TRACE_KMEM_FREE); \
- IF_ASSIGN(var, ent, struct syscall_trace_enter, \
- TRACE_SYSCALL_ENTER); \
- IF_ASSIGN(var, ent, struct syscall_trace_exit, \
- TRACE_SYSCALL_EXIT); \
+ IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
__ftrace_bad_type(); \
} while (0)
commit d28daf923ac5e4a0d7cecebae56f3e339189366b
Merge: ed011b22ce56 4a88d44ab17d
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Sep 6 06:27:40 2009 +0200
Merge branch 'tracing/core' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/random-tracing into tracing/core
commit ed011b22ce567eabefa9ea571d3721c10ecd0553
Merge: 85bac32c4a52 e07cccf40469
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Sep 6 06:11:38 2009 +0200
Merge commit 'v2.6.31-rc9' into tracing/core
Merge reason: move from -rc5 to -rc9.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
commit 695a461296e5df148c99ac087b9e1cb380f4db15
Merge: c7084b35eb1a 2b681fafcc50
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Sep 4 14:44:16 2009 +0200
Merge branch 'amd-iommu/2.6.32' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu into core/iommu
commit 840a0653100dbde599ae8ddf83fa214dfa5fd1aa
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Sep 4 11:32:54 2009 +0200
sched: Turn on SD_BALANCE_NEWIDLE
Start the re-tuning of the balancer by turning on newidle.
It improves hackbench performance and parallelism on a 4x4 box.
The "perf stat --repeat 10" measurements give us:
domain0 domain1
.......................................
-SD_BALANCE_NEWIDLE -SD_BALANCE_NEWIDLE:
2041.273208 task-clock-msecs # 9.354 CPUs ( +- 0.363% )
+SD_BALANCE_NEWIDLE -SD_BALANCE_NEWIDLE:
2086.326925 task-clock-msecs # 11.934 CPUs ( +- 0.301% )
+SD_BALANCE_NEWIDLE +SD_BALANCE_NEWIDLE:
2115.289791 task-clock-msecs # 12.158 CPUs ( +- 0.263% )
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index be29eb81fb06..ef7bc7fc2528 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -142,7 +142,7 @@ extern unsigned long node_remap_size[];
.forkexec_idx = SD_FORKEXEC_IDX, \
\
.flags = 1*SD_LOAD_BALANCE \
- | 0*SD_BALANCE_NEWIDLE \
+ | 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
| 0*SD_WAKE_IDLE \
diff --git a/include/linux/topology.h b/include/linux/topology.h
index fe2c0329f82f..66774fddec9b 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -126,7 +126,7 @@ int arch_update_cpu_topology(void);
.forkexec_idx = 1, \
\
.flags = 1*SD_LOAD_BALANCE \
- | 0*SD_BALANCE_NEWIDLE \
+ | 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
| 0*SD_WAKE_IDLE \
@@ -160,7 +160,7 @@ int arch_update_cpu_topology(void);
.forkexec_idx = 1, \
\
.flags = 1*SD_LOAD_BALANCE \
- | 0*SD_BALANCE_NEWIDLE \
+ | 1*SD_BALANCE_NEWIDLE \
| 1*SD_BALANCE_EXEC \
| 1*SD_BALANCE_FORK \
| 0*SD_WAKE_IDLE \
commit 47734f89be0614b5acbd6a532390f9c72f019648
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Sep 4 11:21:24 2009 +0200
sched: Clean up topology.h
Re-organize the flag settings so that it's visible at a glance
which sched-domains flags are set and which not.
With the new balancer code we'll need to re-tune these details
anyway, so make it cleaner to make fewer mistakes down the
road ;-)
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 066ef590d7e0..be29eb81fb06 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -129,25 +129,34 @@ extern unsigned long node_remap_size[];
#endif
/* sched_domains SD_NODE_INIT for NUMA machines */
-#define SD_NODE_INIT (struct sched_domain) { \
- .min_interval = 8, \
- .max_interval = 32, \
- .busy_factor = 32, \
- .imbalance_pct = 125, \
- .cache_nice_tries = SD_CACHE_NICE_TRIES, \
- .busy_idx = 3, \
- .idle_idx = SD_IDLE_IDX, \
- .newidle_idx = SD_NEWIDLE_IDX, \
- .wake_idx = 1, \
- .forkexec_idx = SD_FORKEXEC_IDX, \
- .flags = SD_LOAD_BALANCE \
- | SD_BALANCE_EXEC \
- | SD_BALANCE_FORK \
- | SD_WAKE_AFFINE \
- | SD_WAKE_BALANCE \
- | SD_SERIALIZE, \
- .last_balance = jiffies, \
- .balance_interval = 1, \
+#define SD_NODE_INIT (struct sched_domain) { \
+ .min_interval = 8, \
+ .max_interval = 32, \
+ .busy_factor = 32, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = SD_CACHE_NICE_TRIES, \
+ .busy_idx = 3, \
+ .idle_idx = SD_IDLE_IDX, \
+ .newidle_idx = SD_NEWIDLE_IDX, \
+ .wake_idx = 1, \
+ .forkexec_idx = SD_FORKEXEC_IDX, \
+ \
+ .flags = 1*SD_LOAD_BALANCE \
+ | 0*SD_BALANCE_NEWIDLE \
+ | 1*SD_BALANCE_EXEC \
+ | 1*SD_BALANCE_FORK \
+ | 0*SD_WAKE_IDLE \
+ | 1*SD_WAKE_AFFINE \
+ | 1*SD_WAKE_BALANCE \
+ | 0*SD_SHARE_CPUPOWER \
+ | 0*SD_POWERSAVINGS_BALANCE \
+ | 0*SD_SHARE_PKG_RESOURCES \
+ | 1*SD_SERIALIZE \
+ | 0*SD_WAKE_IDLE_FAR \
+ | 0*SD_PREFER_SIBLING \
+ , \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
}
#ifdef CONFIG_X86_64_ACPI_NUMA
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 6203ae5067ce..fe2c0329f82f 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -85,21 +85,29 @@ int arch_update_cpu_topology(void);
#define ARCH_HAS_SCHED_WAKE_IDLE
/* Common values for SMT siblings */
#ifndef SD_SIBLING_INIT
-#define SD_SIBLING_INIT (struct sched_domain) { \
- .min_interval = 1, \
- .max_interval = 2, \
- .busy_factor = 64, \
- .imbalance_pct = 110, \
- .flags = SD_LOAD_BALANCE \
- | SD_BALANCE_NEWIDLE \
- | SD_BALANCE_FORK \
- | SD_BALANCE_EXEC \
- | SD_WAKE_AFFINE \
- | SD_WAKE_BALANCE \
- | SD_SHARE_CPUPOWER, \
- .last_balance = jiffies, \
- .balance_interval = 1, \
- .smt_gain = 1178, /* 15% */ \
+#define SD_SIBLING_INIT (struct sched_domain) { \
+ .min_interval = 1, \
+ .max_interval = 2, \
+ .busy_factor = 64, \
+ .imbalance_pct = 110, \
+ \
+ .flags = 1*SD_LOAD_BALANCE \
+ | 1*SD_BALANCE_NEWIDLE \
+ | 1*SD_BALANCE_EXEC \
+ | 1*SD_BALANCE_FORK \
+ | 0*SD_WAKE_IDLE \
+ | 1*SD_WAKE_AFFINE \
+ | 1*SD_WAKE_BALANCE \
+ | 1*SD_SHARE_CPUPOWER \
+ | 0*SD_POWERSAVINGS_BALANCE \
+ | 0*SD_SHARE_PKG_RESOURCES \
+ | 0*SD_SERIALIZE \
+ | 0*SD_WAKE_IDLE_FAR \
+ | 0*SD_PREFER_SIBLING \
+ , \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+ .smt_gain = 1178, /* 15% */ \
}
#endif
#endif /* CONFIG_SCHED_SMT */
@@ -107,69 +115,94 @@ int arch_update_cpu_topology(void);
#ifdef CONFIG_SCHED_MC
/* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */
#ifndef SD_MC_INIT
-#define SD_MC_INIT (struct sched_domain) { \
- .min_interval = 1, \
- .max_interval = 4, \
- .busy_factor = 64, \
- .imbalance_pct = 125, \
- .cache_nice_tries = 1, \
- .busy_idx = 2, \
- .wake_idx = 1, \
- .forkexec_idx = 1, \
- .flags = SD_LOAD_BALANCE \
- | SD_BALANCE_FORK \
- | SD_BALANCE_EXEC \
- | SD_WAKE_AFFINE \
- | SD_WAKE_BALANCE \
- | SD_SHARE_PKG_RESOURCES\
- | sd_balance_for_mc_power()\
- | sd_power_saving_flags(),\
- .last_balance = jiffies, \
- .balance_interval = 1, \
+#define SD_MC_INIT (struct sched_domain) { \
+ .min_interval = 1, \
+ .max_interval = 4, \
+ .busy_factor = 64, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = 1, \
+ .busy_idx = 2, \
+ .wake_idx = 1, \
+ .forkexec_idx = 1, \
+ \
+ .flags = 1*SD_LOAD_BALANCE \
+ | 0*SD_BALANCE_NEWIDLE \
+ | 1*SD_BALANCE_EXEC \
+ | 1*SD_BALANCE_FORK \
+ | 0*SD_WAKE_IDLE \
+ | 1*SD_WAKE_AFFINE \
+ | 1*SD_WAKE_BALANCE \
+ | 0*SD_SHARE_CPUPOWER \
+ | 1*SD_SHARE_PKG_RESOURCES \
+ | 0*SD_SERIALIZE \
+ | 0*SD_WAKE_IDLE_FAR \
+ | sd_balance_for_mc_power() \
+ | sd_power_saving_flags() \
+ , \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
}
#endif
#endif /* CONFIG_SCHED_MC */
/* Common values for CPUs */
#ifndef SD_CPU_INIT
-#define SD_CPU_INIT (struct sched_domain) { \
- .min_interval = 1, \
- .max_interval = 4, \
- .busy_factor = 64, \
- .imbalance_pct = 125, \
- .cache_nice_tries = 1, \
- .busy_idx = 2, \
- .idle_idx = 1, \
- .newidle_idx = 2, \
- .wake_idx = 1, \
- .forkexec_idx = 1, \
- .flags = SD_LOAD_BALANCE \
- | SD_BALANCE_EXEC \
- | SD_BALANCE_FORK \
- | SD_WAKE_AFFINE \
- | SD_WAKE_BALANCE \
- | sd_balance_for_package_power()\
- | sd_power_saving_flags(),\
- .last_balance = jiffies, \
- .balance_interval = 1, \
+#define SD_CPU_INIT (struct sched_domain) { \
+ .min_interval = 1, \
+ .max_interval = 4, \
+ .busy_factor = 64, \
+ .imbalance_pct = 125, \
+ .cache_nice_tries = 1, \
+ .busy_idx = 2, \
+ .idle_idx = 1, \
+ .newidle_idx = 2, \
+ .wake_idx = 1, \
+ .forkexec_idx = 1, \
+ \
+ .flags = 1*SD_LOAD_BALANCE \
+ | 0*SD_BALANCE_NEWIDLE \
+ | 1*SD_BALANCE_EXEC \
+ | 1*SD_BALANCE_FORK \
+ | 0*SD_WAKE_IDLE \
+ | 0*SD_WAKE_AFFINE \
+ | 1*SD_WAKE_BALANCE \
+ | 0*SD_SHARE_CPUPOWER \
+ | 0*SD_SHARE_PKG_RESOURCES \
+ | 0*SD_SERIALIZE \
+ | 0*SD_WAKE_IDLE_FAR \
+ | sd_balance_for_package_power() \
+ | sd_power_saving_flags() \
+ , \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
}
#endif
/* sched_domains SD_ALLNODES_INIT for NUMA machines */
-#define SD_ALLNODES_INIT (struct sched_domain) { \
- .min_interval = 64, \
- .max_interval = 64*num_online_cpus(), \
- .busy_factor = 128, \
- .imbalance_pct = 133, \
- .cache_nice_tries = 1, \
- .busy_idx = 3, \
- .idle_idx = 3, \
- .flags = SD_LOAD_BALANCE \
- | SD_BALANCE_NEWIDLE \
- | SD_WAKE_AFFINE \
- | SD_SERIALIZE, \
- .last_balance = jiffies, \
- .balance_interval = 64, \
+#define SD_ALLNODES_INIT (struct sched_domain) { \
+ .min_interval = 64, \
+ .max_interval = 64*num_online_cpus(), \
+ .busy_factor = 128, \
+ .imbalance_pct = 133, \
+ .cache_nice_tries = 1, \
+ .busy_idx = 3, \
+ .idle_idx = 3, \
+ .flags = 1*SD_LOAD_BALANCE \
+ | 1*SD_BALANCE_NEWIDLE \
+ | 0*SD_BALANCE_EXEC \
+ | 0*SD_BALANCE_FORK \
+ | 0*SD_WAKE_IDLE \
+ | 1*SD_WAKE_AFFINE \
+ | 0*SD_WAKE_BALANCE \
+ | 0*SD_SHARE_CPUPOWER \
+ | 0*SD_POWERSAVINGS_BALANCE \
+ | 0*SD_SHARE_PKG_RESOURCES \
+ | 1*SD_SERIALIZE \
+ | 0*SD_WAKE_IDLE_FAR \
+ | 0*SD_PREFER_SIBLING \
+ , \
+ .last_balance = jiffies, \
+ .balance_interval = 64, \
}
#ifdef CONFIG_NUMA
commit d7ea17a76916e456fcc78e45142c66f7fb875e3d
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Sep 4 11:49:25 2009 +0200
sched: Fix dynamic power-balancing crash
This crash:
[ 1774.088275] divide error: 0000 [#1] SMP
[ 1774.100355] CPU 13
[ 1774.102498] Modules linked in:
[ 1774.105631] Pid: 30881, comm: hackbench Not tainted 2.6.31-rc8-tip-01308-g484d664-dirty #1629 X8DTN
[ 1774.114807] RIP: 0010:[<ffffffff81041c38>] [<ffffffff81041c38>]
sched_balance_self+0x19b/0x2d4
Triggers because update_group_power() modifies the sd tree and does
temporary calculations there - not considering that other CPUs
could observe intermediate values, such as the zero initial value.
Calculate it in a temporary variable instead. (we need no memory
barrier as these are all statistical values anyway)
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20090904092742.GA11014@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index b53785346850..796baf731976 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3765,19 +3765,22 @@ static void update_group_power(struct sched_domain *sd, int cpu)
{
struct sched_domain *child = sd->child;
struct sched_group *group, *sdg = sd->groups;
+ unsigned long power;
if (!child) {
update_cpu_power(sd, cpu);
return;
}
- sdg->cpu_power = 0;
+ power = 0;
group = child->groups;
do {
- sdg->cpu_power += group->cpu_power;
+ power += group->cpu_power;
group = group->next;
} while (group != child->groups);
+
+ sdg->cpu_power = power;
}
/**
commit 9aa55fbd01779a0b476d87cd9b5170fd5bebab1d
Merge: 768d0c27226e 294b0c9619a0 fa84e9eecfff
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Sep 4 10:08:43 2009 +0200
Merge branches 'sched/domains' and 'sched/clock' into sched/core
Merge reason: both topics are ready now, and we want to merge dependent
changes.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --cc kernel/sched.c
index 38d05a89e0f2,cf4c953d6486,1b59e265273b..da1edc8277d0
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@@@ -8478,240 -8476,249 -8378,240 +8576,249 @@@@ static void set_domain_attribute(struc
}
}
- -/*
- - * Build sched domains for a given set of cpus and attach the sched domains
- - * to the individual cpus
- - */
- -static int __build_sched_domains(const struct cpumask *cpu_map,
- - struct sched_domain_attr *attr)
- -{
- - int i, err = -ENOMEM;
- - struct root_domain *rd;
- - cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered,
- - tmpmask;
+ +static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
+ + const struct cpumask *cpu_map)
+ +{
+ + switch (what) {
+ + case sa_sched_groups:
+ + free_sched_groups(cpu_map, d->tmpmask); /* fall through */
+ + d->sched_group_nodes = NULL;
+ + case sa_rootdomain:
+ + free_rootdomain(d->rd); /* fall through */
+ + case sa_tmpmask:
+ + free_cpumask_var(d->tmpmask); /* fall through */
+ + case sa_send_covered:
+ + free_cpumask_var(d->send_covered); /* fall through */
+ + case sa_this_core_map:
+ + free_cpumask_var(d->this_core_map); /* fall through */
+ + case sa_this_sibling_map:
+ + free_cpumask_var(d->this_sibling_map); /* fall through */
+ + case sa_nodemask:
+ + free_cpumask_var(d->nodemask); /* fall through */
+ + case sa_sched_group_nodes:
#ifdef CONFIG_NUMA
- - cpumask_var_t domainspan, covered, notcovered;
- - struct sched_group **sched_group_nodes = NULL;
- - int sd_allnodes = 0;
- -
- - if (!alloc_cpumask_var(&domainspan, GFP_KERNEL))
- - goto out;
- - if (!alloc_cpumask_var(&covered, GFP_KERNEL))
- - goto free_domainspan;
- - if (!alloc_cpumask_var(¬covered, GFP_KERNEL))
- - goto free_covered;
-#endif
-
- if (!alloc_cpumask_var(&nodemask, GFP_KERNEL))
- goto free_notcovered;
- if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL))
- goto free_nodemask;
- if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL))
- goto free_this_sibling_map;
- if (!alloc_cpumask_var(&send_covered, GFP_KERNEL))
- goto free_this_core_map;
- if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
- goto free_send_covered;
+ + kfree(d->sched_group_nodes); /* fall through */
+ + case sa_notcovered:
+ + free_cpumask_var(d->notcovered); /* fall through */
+ + case sa_covered:
+ + free_cpumask_var(d->covered); /* fall through */
+ + case sa_domainspan:
+ + free_cpumask_var(d->domainspan); /* fall through */
+#endif
+ + case sa_none:
+ + break;
+ + }
+ +}
- if (!alloc_cpumask_var(&nodemask, GFP_KERNEL))
- goto free_notcovered;
- if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL))
- goto free_nodemask;
- if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL))
- goto free_this_sibling_map;
- if (!alloc_cpumask_var(&send_covered, GFP_KERNEL))
- goto free_this_core_map;
- if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
- goto free_send_covered;
-
+ +static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
+ + const struct cpumask *cpu_map)
+ +{
#ifdef CONFIG_NUMA
- - /*
- - * Allocate the per-node list of sched groups
- - */
- - sched_group_nodes = kcalloc(nr_node_ids, sizeof(struct sched_group *),
- - GFP_KERNEL);
- - if (!sched_group_nodes) {
+ + if (!alloc_cpumask_var(&d->domainspan, GFP_KERNEL))
+ + return sa_none;
+ + if (!alloc_cpumask_var(&d->covered, GFP_KERNEL))
+ + return sa_domainspan;
+ + if (!alloc_cpumask_var(&d->notcovered, GFP_KERNEL))
+ + return sa_covered;
+ + /* Allocate the per-node list of sched groups */
+ + d->sched_group_nodes = kcalloc(nr_node_ids,
+ + sizeof(struct sched_group *), GFP_KERNEL);
+ + if (!d->sched_group_nodes) {
printk(KERN_WARNING "Can not alloc sched group node list\n");
- - goto free_tmpmask;
- }
-#endif
-
- rd = alloc_rootdomain();
- if (!rd) {
+ + return sa_notcovered;
+ }
+ + sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes;
+#endif
-
- rd = alloc_rootdomain();
- if (!rd) {
+ + if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL))
+ + return sa_sched_group_nodes;
+ + if (!alloc_cpumask_var(&d->this_sibling_map, GFP_KERNEL))
+ + return sa_nodemask;
+ + if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL))
+ + return sa_this_sibling_map;
+ + if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
+ + return sa_this_core_map;
+ + if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
+ + return sa_send_covered;
+ + d->rd = alloc_rootdomain();
+ + if (!d->rd) {
printk(KERN_WARNING "Cannot alloc root domain\n");
- - goto free_sched_groups;
+ + return sa_tmpmask;
}
+ + return sa_rootdomain;
+ +}
+ +static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
+ + const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i)
+ +{
+ + struct sched_domain *sd = NULL;
#ifdef CONFIG_NUMA
- - sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes;
- -#endif
- -
- - /*
- - * Set up domains for cpus specified by the cpu_map.
- - */
- - for_each_cpu(i, cpu_map) {
- - struct sched_domain *sd = NULL, *p;
- -
- - cpumask_and(nodemask, cpumask_of_node(cpu_to_node(i)), cpu_map);
- -
- -#ifdef CONFIG_NUMA
- - if (cpumask_weight(cpu_map) >
- - SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
- - sd = &per_cpu(allnodes_domains, i).sd;
- - SD_INIT(sd, ALLNODES);
- - set_domain_attribute(sd, attr);
- - cpumask_copy(sched_domain_span(sd), cpu_map);
- - cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
- - p = sd;
- - sd_allnodes = 1;
- - } else
- - p = NULL;
+ + struct sched_domain *parent;
- - sd = &per_cpu(node_domains, i).sd;
- - SD_INIT(sd, NODE);
+ + d->sd_allnodes = 0;
+ + if (cpumask_weight(cpu_map) >
+ + SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) {
+ + sd = &per_cpu(allnodes_domains, i).sd;
+ + SD_INIT(sd, ALLNODES);
set_domain_attribute(sd, attr);
- - sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
- - sd->parent = p;
- - if (p)
- - p->child = sd;
- - cpumask_and(sched_domain_span(sd),
- - sched_domain_span(sd), cpu_map);
+ + cpumask_copy(sched_domain_span(sd), cpu_map);
+ + cpu_to_allnodes_group(i, cpu_map, &sd->groups, d->tmpmask);
+ + d->sd_allnodes = 1;
+ + }
+ + parent = sd;
+ +
+ + sd = &per_cpu(node_domains, i).sd;
+ + SD_INIT(sd, NODE);
+ + set_domain_attribute(sd, attr);
+ + sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
+ + sd->parent = parent;
+ + if (parent)
+ + parent->child = sd;
+ + cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map);
#endif
+ + return sd;
+ +}
- - p = sd;
- - sd = &per_cpu(phys_domains, i).sd;
- - SD_INIT(sd, CPU);
- - set_domain_attribute(sd, attr);
- - cpumask_copy(sched_domain_span(sd), nodemask);
- - sd->parent = p;
- - if (p)
- - p->child = sd;
- - cpu_to_phys_group(i, cpu_map, &sd->groups, tmpmask);
+ +static struct sched_domain *__build_cpu_sched_domain(struct s_data *d,
+ + const struct cpumask *cpu_map, struct sched_domain_attr *attr,
+ + struct sched_domain *parent, int i)
+ +{
+ + struct sched_domain *sd;
+ + sd = &per_cpu(phys_domains, i).sd;
+ + SD_INIT(sd, CPU);
+ + set_domain_attribute(sd, attr);
+ + cpumask_copy(sched_domain_span(sd), d->nodemask);
+ + sd->parent = parent;
+ + if (parent)
+ + parent->child = sd;
+ + cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask);
+ + return sd;
+ +}
+ +static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
+ + const struct cpumask *cpu_map, struct sched_domain_attr *attr,
+ + struct sched_domain *parent, int i)
+ +{
+ + struct sched_domain *sd = parent;
#ifdef CONFIG_SCHED_MC
- - p = sd;
- - sd = &per_cpu(core_domains, i).sd;
- - SD_INIT(sd, MC);
- - set_domain_attribute(sd, attr);
- - cpumask_and(sched_domain_span(sd), cpu_map,
- - cpu_coregroup_mask(i));
- - sd->parent = p;
- - p->child = sd;
- - cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
+ + sd = &per_cpu(core_domains, i).sd;
+ + SD_INIT(sd, MC);
+ + set_domain_attribute(sd, attr);
+ + cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i));
+ + sd->parent = parent;
+ + parent->child = sd;
+ + cpu_to_core_group(i, cpu_map, &sd->groups, d->tmpmask);
#endif
+ + return sd;
+ +}
+ +static struct sched_domain *__build_smt_sched_domain(struct s_data *d,
+ + const struct cpumask *cpu_map, struct sched_domain_attr *attr,
+ + struct sched_domain *parent, int i)
+ +{
+ + struct sched_domain *sd = parent;
#ifdef CONFIG_SCHED_SMT
- - p = sd;
- - sd = &per_cpu(cpu_domains, i).sd;
- - SD_INIT(sd, SIBLING);
- - set_domain_attribute(sd, attr);
- - cpumask_and(sched_domain_span(sd),
- - topology_thread_cpumask(i), cpu_map);
- - sd->parent = p;
- - p->child = sd;
- - cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
+ + sd = &per_cpu(cpu_domains, i).sd;
+ + SD_INIT(sd, SIBLING);
+ + set_domain_attribute(sd, attr);
+ + cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i));
+ + sd->parent = parent;
+ + parent->child = sd;
+ + cpu_to_cpu_group(i, cpu_map, &sd->groups, d->tmpmask);
#endif
- - }
+ + return sd;
+ +}
+ +static void build_sched_groups(struct s_data *d, enum sched_domain_level l,
+ + const struct cpumask *cpu_map, int cpu)
+ +{
+ + switch (l) {
#ifdef CONFIG_SCHED_SMT
- - /* Set up CPU (sibling) groups */
- - for_each_cpu(i, cpu_map) {
- - cpumask_and(this_sibling_map,
- - topology_thread_cpumask(i), cpu_map);
- - if (i != cpumask_first(this_sibling_map))
- - continue;
- -
- - init_sched_build_groups(this_sibling_map, cpu_map,
- - &cpu_to_cpu_group,
- - send_covered, tmpmask);
- - }
+ + case SD_LV_SIBLING: /* set up CPU (sibling) groups */
+ + cpumask_and(d->this_sibling_map, cpu_map,
+ + topology_thread_cpumask(cpu));
+ + if (cpu == cpumask_first(d->this_sibling_map))
+ + init_sched_build_groups(d->this_sibling_map, cpu_map,
+ + &cpu_to_cpu_group,
+ + d->send_covered, d->tmpmask);
+ + break;
#endif
- -
#ifdef CONFIG_SCHED_MC
- - /* Set up multi-core groups */
- - for_each_cpu(i, cpu_map) {
- - cpumask_and(this_core_map, cpu_coregroup_mask(i), cpu_map);
- - if (i != cpumask_first(this_core_map))
- - continue;
- -
- - init_sched_build_groups(this_core_map, cpu_map,
- - &cpu_to_core_group,
- - send_covered, tmpmask);
- - }
+ + case SD_LV_MC: /* set up multi-core groups */
+ + cpumask_and(d->this_core_map, cpu_map, cpu_coregroup_mask(cpu));
+ + if (cpu == cpumask_first(d->this_core_map))
+ + init_sched_build_groups(d->this_core_map, cpu_map,
+ + &cpu_to_core_group,
+ + d->send_covered, d->tmpmask);
+ + break;
#endif
- -
- - /* Set up physical groups */
- - for (i = 0; i < nr_node_ids; i++) {
- - cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
- - if (cpumask_empty(nodemask))
- - continue;
- -
- - init_sched_build_groups(nodemask, cpu_map,
- - &cpu_to_phys_group,
- - send_covered, tmpmask);
- - }
- -
+ + case SD_LV_CPU: /* set up physical groups */
+ + cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map);
+ + if (!cpumask_empty(d->nodemask))
+ + init_sched_build_groups(d->nodemask, cpu_map,
+ + &cpu_to_phys_group,
+ + d->send_covered, d->tmpmask);
+ + break;
#ifdef CONFIG_NUMA
- - /* Set up node groups */
- - if (sd_allnodes) {
- - init_sched_build_groups(cpu_map, cpu_map,
- - &cpu_to_allnodes_group,
- - send_covered, tmpmask);
+ + case SD_LV_ALLNODES:
+ + init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group,
+ + d->send_covered, d->tmpmask);
+ + break;
+ +#endif
+ + default:
+ + break;
}
+ +}
- - for (i = 0; i < nr_node_ids; i++) {
- - /* Set up node groups */
- - struct sched_group *sg, *prev;
- - int j;
- -
- - cpumask_clear(covered);
- - cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
- - if (cpumask_empty(nodemask)) {
- - sched_group_nodes[i] = NULL;
- - continue;
- - }
+ +/*
+ + * Build sched domains for a given set of cpus and attach the sched domains
+ + * to the individual cpus
+ + */
+ +static int __build_sched_domains(const struct cpumask *cpu_map,
+ + struct sched_domain_attr *attr)
+ +{
+ + enum s_alloc alloc_state = sa_none;
+ + struct s_data d;
+ + struct sched_domain *sd;
+ + int i;
+ +#ifdef CONFIG_NUMA
+ + d.sd_allnodes = 0;
+ +#endif
- - sched_domain_node_span(i, domainspan);
- - cpumask_and(domainspan, domainspan, cpu_map);
+ + alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
+ + if (alloc_state != sa_rootdomain)
+ + goto error;
+ + alloc_state = sa_sched_groups;
- - sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
- - GFP_KERNEL, i);
- - if (!sg) {
- - printk(KERN_WARNING "Can not alloc domain group for "
- - "node %d\n", i);
- - goto error;
- - }
- - sched_group_nodes[i] = sg;
- - for_each_cpu(j, nodemask) {
- - struct sched_domain *sd;
+ + /*
+ + * Set up domains for cpus specified by the cpu_map.
+ + */
+ + for_each_cpu(i, cpu_map) {
+ + cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)),
+ + cpu_map);
- - sd = &per_cpu(node_domains, j).sd;
- - sd->groups = sg;
- - }
- - sg->__cpu_power = 0;
- - cpumask_copy(sched_group_cpus(sg), nodemask);
- - sg->next = sg;
- - cpumask_or(covered, covered, nodemask);
- - prev = sg;
+ + sd = __build_numa_sched_domains(&d, cpu_map, attr, i);
+ + sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i);
+ + sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i);
+ + sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
+ + }
- - for (j = 0; j < nr_node_ids; j++) {
- - int n = (i + j) % nr_node_ids;
+ + for_each_cpu(i, cpu_map) {
+ + build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i);
+ + build_sched_groups(&d, SD_LV_MC, cpu_map, i);
+ + }
- - cpumask_complement(notcovered, covered);
- - cpumask_and(tmpmask, notcovered, cpu_map);
- - cpumask_and(tmpmask, tmpmask, domainspan);
- - if (cpumask_empty(tmpmask))
- - break;
+ + /* Set up physical groups */
+ + for (i = 0; i < nr_node_ids; i++)
+ + build_sched_groups(&d, SD_LV_CPU, cpu_map, i);
- - cpumask_and(tmpmask, tmpmask, cpumask_of_node(n));
- - if (cpumask_empty(tmpmask))
- - continue;
+ +#ifdef CONFIG_NUMA
+ + /* Set up node groups */
+ + if (d.sd_allnodes)
+ + build_sched_groups(&d, SD_LV_ALLNODES, cpu_map, 0);
- - sg = kmalloc_node(sizeof(struct sched_group) +
- - cpumask_size(),
- - GFP_KERNEL, i);
- - if (!sg) {
- - printk(KERN_WARNING
- - "Can not alloc domain group for node %d\n", j);
- - goto error;
- - }
- - sg->__cpu_power = 0;
- - cpumask_copy(sched_group_cpus(sg), tmpmask);
- - sg->next = prev->next;
- - cpumask_or(covered, covered, tmpmask);
- - prev->next = sg;
- - prev = sg;
- - }
- - }
+ + for (i = 0; i < nr_node_ids; i++)
+ + if (build_numa_sched_groups(&d, cpu_map, i))
+ + goto error;
#endif
/* Calculate CPU power for physical packages and nodes */
@@@@ -8759,44 -8762,16 -8659,44 +8862,16 @@@@
#else
sd = &per_cpu(phys_domains, i).sd;
#endif
- - cpu_attach_domain(sd, rd, i);
- }
-
- err = 0;
-
- free_tmpmask:
- free_cpumask_var(tmpmask);
- free_send_covered:
- free_cpumask_var(send_covered);
- free_this_core_map:
- free_cpumask_var(this_core_map);
- free_this_sibling_map:
- free_cpumask_var(this_sibling_map);
- free_nodemask:
- free_cpumask_var(nodemask);
- free_notcovered:
- #ifdef CONFIG_NUMA
- free_cpumask_var(notcovered);
- free_covered:
- free_cpumask_var(covered);
- free_domainspan:
- free_cpumask_var(domainspan);
- out:
- #endif
- return err;
+ + cpu_attach_domain(sd, d.rd, i);
+ }
- err = 0;
-
-free_tmpmask:
- free_cpumask_var(tmpmask);
-free_send_covered:
- free_cpumask_var(send_covered);
-free_this_core_map:
- free_cpumask_var(this_core_map);
-free_this_sibling_map:
- free_cpumask_var(this_sibling_map);
-free_nodemask:
- free_cpumask_var(nodemask);
-free_notcovered:
-#ifdef CONFIG_NUMA
- free_cpumask_var(notcovered);
-free_covered:
- free_cpumask_var(covered);
-free_domainspan:
- free_cpumask_var(domainspan);
-out:
-#endif
- return err;
-
- -free_sched_groups:
- -#ifdef CONFIG_NUMA
- - kfree(sched_group_nodes);
- -#endif
- - goto free_tmpmask;
+ + d.sched_group_nodes = NULL; /* don't free this we still need it */
+ + __free_domain_allocs(&d, sa_tmpmask, cpu_map);
+ + return 0;
- -#ifdef CONFIG_NUMA
error:
- - free_sched_groups(cpu_map, tmpmask);
- - free_rootdomain(rd);
- - goto free_tmpmask;
- -#endif
+ + __free_domain_allocs(&d, alloc_state, cpu_map);
+ + return -ENOMEM;
}
static int build_sched_domains(const struct cpumask *cpu_map)
commit 29e2035bddecce3eb584a8304528b50da8370a24
Merge: 868489660dab 37d0892c5a94
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Sep 4 09:28:52 2009 +0200
Merge branch 'linus' into core/rcu
Merge reason: Avoid fuzz in init/main.c and update from rc6 to rc8.
Signed-off-by: Ingo Molnar <mingo@elte.hu>