Patches contributed by Eötvös Lorand University
commit 93093d099e5dd0c258fd530c12668e828c20df41
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Nov 30 10:20:20 2008 +0100
x86: provide readq()/writeq() on 32-bit too, complete
if HAVE_READQ/HAVE_WRITEQ are defined, the full range of readq/writeq
APIs has to be provided to drivers:
drivers/infiniband/hw/amso1100/c2.c: In function 'c2_tx_ring_alloc':
drivers/infiniband/hw/amso1100/c2.c:133: error: implicit declaration of function '__raw_writeq'
So provide them on 32-bit as well. Also, map all the APIs to the
strongest ordering variant. It's way too easy to mess such details
up in drivers and the difference between "memory" and "" constrained
asm() constructs is in the noise range.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 3ccfaf610c89..33513b9a67f3 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -46,16 +46,11 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
#define mmiowb() barrier()
#ifdef CONFIG_X86_64
+
build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
-build_mmio_read(__readq, "q", unsigned long, "=r", )
build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
-build_mmio_write(__writeq, "q", unsigned long, "r", )
-
-#define readq_relaxed(a) __readq(a)
-#define __raw_readq __readq
-#define __raw_writeq writeq
-#else /* CONFIG_X86_32 from here */
+#else
static inline __u64 readq(const volatile void __iomem *addr)
{
@@ -76,9 +71,14 @@ static inline void writeq(__u64 val, volatile void __iomem *addr)
#endif
+#define readq_relaxed(a) readq(a)
+
+#define __raw_readq(a) readq(a)
+#define __raw_writeq(val, addr) writeq(val, addr)
+
/* Let people know that we have them */
-#define readq readq
-#define writeq writeq
+#define readq readq
+#define writeq writeq
extern int iommu_bio_merge;
commit a0b1131e479e5af32eefac8bc54c9742e23d638e
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Nov 30 09:33:55 2008 +0100
x86: provide readq()/writeq() on 32-bit too, cleanup
Impact: cleanup
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 25946449df4f..3ccfaf610c89 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -55,21 +55,17 @@ build_mmio_write(__writeq, "q", unsigned long, "r", )
#define __raw_readq __readq
#define __raw_writeq writeq
-/* Let people know we have them */
-#define readq readq
-#define writeq writeq
-
#else /* CONFIG_X86_32 from here */
static inline __u64 readq(const volatile void __iomem *addr)
{
const volatile u32 __iomem *p = addr;
- u32 l, h;
+ u32 low, high;
- l = readl(p);
- h = readl(p + 1);
+ low = readl(p);
+ high = readl(p + 1);
- return l + ((u64)h << 32);
+ return low + ((u64)high << 32);
}
static inline void writeq(__u64 val, volatile void __iomem *addr)
@@ -78,11 +74,12 @@ static inline void writeq(__u64 val, volatile void __iomem *addr)
writel(val >> 32, addr+4);
}
+#endif
+
+/* Let people know that we have them */
#define readq readq
#define writeq writeq
-#endif
-
extern int iommu_bio_merge;
#ifdef CONFIG_X86_32
commit af6d596fd603219b054c1c90fb16672a9fd441bd
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Nov 29 20:45:15 2008 +0100
sched: prevent divide by zero error in cpu_avg_load_per_task, update
Regarding the bug addressed in:
4cd4262: sched: prevent divide by zero error in cpu_avg_load_per_task
Linus points out that the fix is not complete:
> There's nothing that keeps gcc from deciding not to reload
> rq->nr_running.
>
> Of course, in _practice_, I don't think gcc ever will (if it decides
> that it will spill, gcc is likely going to decide that it will
> literally spill the local variable to the stack rather than decide to
> reload off the pointer), but it's a valid compiler optimization, and
> it even has a name (rematerialization).
>
> So I suspect that your patch does fix the bug, but it still leaves the
> fairly unlikely _potential_ for it to re-appear at some point.
>
> We have ACCESS_ONCE() as a macro to guarantee that the compiler
> doesn't rematerialize a pointer access. That also would clarify
> the fact that we access something unsafe outside a lock.
So make sure our nr_running value is immutable and cannot change
after we check it for nonzero.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 700aa9a1413f..b7480fb5c3dc 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1453,7 +1453,7 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
static unsigned long cpu_avg_load_per_task(int cpu)
{
struct rq *rq = cpu_rq(cpu);
- unsigned long nr_running = rq->nr_running;
+ unsigned long nr_running = ACCESS_ONCE(rq->nr_running);
if (nr_running)
rq->avg_load_per_task = rq->load.weight / nr_running;
commit 1583715ddb61f822041807a0f18b3b4845e88c76
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Nov 25 10:27:49 2008 +0100
sched, cpusets: fix warning in kernel/cpuset.c
this warning:
kernel/cpuset.c: In function ‘generate_sched_domains’:
kernel/cpuset.c:588: warning: ‘ndoms’ may be used uninitialized in this function
triggers because GCC does not recognize that ndoms stays uninitialized
only if doms is NULL - but that flow is covered at the end of
generate_sched_domains().
Help out GCC by initializing this variable to 0. (that's prudent anyway)
Also, this function needs a splitup and code flow simplification:
with 160 lines length it's clearly too long.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index da7ff6137f37..96c0ba13b8cd 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -585,7 +585,7 @@ static int generate_sched_domains(cpumask_t **domains,
int i, j, k; /* indices for partition finding loops */
cpumask_t *doms; /* resulting partition; i.e. sched domains */
struct sched_domain_attr *dattr; /* attributes for custom domains */
- int ndoms; /* number of sched domains in result */
+ int ndoms = 0; /* number of sched domains in result */
int nslot; /* next empty doms[] cpumask_t slot */
doms = NULL;
commit f1860c34b3ed829ac774647f266abf1074cd58cd
Merge: 64b7482de253 4cd426203484
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Nov 28 20:11:05 2008 +0100
Merge branch 'sched/urgent' into sched/core
commit 604094f4615180f71da799e7e5b191f5c2a42a28
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Nov 28 18:03:22 2008 +0100
vfs, seqfile: export mangle_path() generally
mangle_path() is trivial enough to make export restrictions on it
pointless - so change the export from EXPORT_SYMBOL_GPL to EXPORT_SYMBOL.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Al Viro <viro@ZenIV.linux.org.uk>
diff --git a/fs/seq_file.c b/fs/seq_file.c
index f03220d7891b..16c211558c22 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -387,7 +387,7 @@ char *mangle_path(char *s, char *p, char *esc)
}
return NULL;
}
-EXPORT_SYMBOL_GPL(mangle_path);
+EXPORT_SYMBOL(mangle_path);
/*
* return the absolute path of 'dentry' residing in mount 'mnt'.
commit ec5679e513305f1411753e5f5489935bd638af23
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Nov 28 17:56:14 2008 +0100
debug warnings: eliminate warn_on_slowpath()
Impact: cleanup, eliminate code
now that warn_on_slowpath() uses warn_slowpath(...,NULL), we can
eliminate warn_on_slowpath() altogether and use warn_slowpath().
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 12c07c1866b2..b8ba6941f587 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -33,15 +33,14 @@ struct bug_entry {
#ifndef __WARN
#ifndef __ASSEMBLY__
-extern void warn_on_slowpath(const char *file, const int line);
extern void warn_slowpath(const char *file, const int line,
const char *fmt, ...) __attribute__((format(printf, 3, 4)));
#define WANT_WARN_ON_SLOWPATH
#endif
-#define __WARN() warn_on_slowpath(__FILE__, __LINE__)
-#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
+#define __WARN() warn_slowpath(__FILE__, __LINE__, NULL)
+#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg)
#else
-#define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0)
+#define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0)
#endif
#ifndef WARN_ON
diff --git a/kernel/panic.c b/kernel/panic.c
index 73d365199c3f..50349a41fba7 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -349,12 +349,6 @@ void warn_slowpath(const char *file, int line, const char *fmt, ...)
add_taint(TAINT_WARN);
}
EXPORT_SYMBOL(warn_slowpath);
-
-void warn_on_slowpath(const char *file, int line)
-{
- warn_slowpath(file, line, NULL);
-}
-EXPORT_SYMBOL(warn_on_slowpath);
#endif
#ifdef CONFIG_CC_STACKPROTECTOR
commit 5b3eec0c80038c8739ccd465b897a35c0dff1cc4
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Nov 27 14:41:21 2008 +0100
x86: ret_from_fork - get rid of jump back
Impact: remove dead code
If we take a closer look at the rff_trace/rff_action ret_from_fork code,
we have to realize that it does all the wrong things: for example it
checks the TIF flag - while later on jumping back to the ret-from-syscall
path - duplicating the check needlessly.
But checking for _TIF_SYSCALL_TRACE is completely unnecessary here because
we clear that flag for every freshly forked task. So the whole "tracing"
code here, for which there is a out of line jump optimization that makes
it even harder to read, is in reality completely dead code ...
Reported-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Cyrill Gorcunov <gorcunov@gmail.com>
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e41734a537bd..3194636a4293 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -361,34 +361,35 @@ ENTRY(save_paranoid)
END(save_paranoid)
/*
- * A newly forked process directly context switches into this.
+ * A newly forked process directly context switches into this address.
+ *
+ * rdi: prev task we switched from
*/
-/* rdi: prev */
ENTRY(ret_from_fork)
DEFAULT_FRAME
+
push kernel_eflags(%rip)
CFI_ADJUST_CFA_OFFSET 8
- popf # reset kernel eflags
+ popf # reset kernel eflags
CFI_ADJUST_CFA_OFFSET -8
- call schedule_tail
+
+ call schedule_tail # rdi: 'prev' task parameter
+
GET_THREAD_INFO(%rcx)
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
+
CFI_REMEMBER_STATE
- jnz rff_trace
-rff_action:
RESTORE_REST
- testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
+
+ testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
je int_ret_from_sys_call
- testl $_TIF_IA32,TI_flags(%rcx)
+
+ testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
jnz int_ret_from_sys_call
+
RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
- jmp ret_from_sys_call
+ jmp ret_from_sys_call # go to the SYSRET fastpath
+
CFI_RESTORE_STATE
-rff_trace:
- movq %rsp,%rdi
- call syscall_trace_leave
- GET_THREAD_INFO(%rcx)
- jmp rff_action
CFI_ENDPROC
END(ret_from_fork)
commit 3bdae4f46445ea7cc9ee031d7ff106fdc6228669
Merge: 9f1e87ea3ecb 5f5db5913267
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Nov 28 15:00:37 2008 +0100
Merge branch 'x86/debug' into x86/irq
We merge this branch because x86/debug touches code that we started
cleaning up in x86/irq. The two branches started out independent,
but as unexpected amount of activity went into x86/irq, they became
dependent. Resolve that by this cross-merge.
diff --cc arch/x86/kernel/entry_64.S
index 08c0c9777a09,4a16bf31c783..e41734a537bd
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@@ -373,15 -255,17 +373,17 @@@ ENTRY(ret_from_fork
call schedule_tail
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
+ CFI_REMEMBER_STATE
jnz rff_trace
-rff_action:
+rff_action:
RESTORE_REST
testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
je int_ret_from_sys_call
testl $_TIF_IA32,TI_flags(%rcx)
jnz int_ret_from_sys_call
- RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
+ RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
jmp ret_from_sys_call
+ CFI_RESTORE_STATE
rff_trace:
movq %rsp,%rdi
call syscall_trace_leave
@@@ -1173,8 -1172,9 +1175,9 @@@ ENTRY(child_rip
# exit
mov %eax, %edi
call do_exit
+ ud2 # padding for call trace
CFI_ENDPROC
-ENDPROC(child_rip)
+END(child_rip)
/*
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
commit d51090b34602a20984ab0312ef04e47069c0aec6
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Nov 28 09:55:16 2008 +0100
tracing/function-graph-tracer: more output tweaks
Impact: prettify the output some more
Before:
0) | sys_read() {
0) 0.796 us | fget_light();
0) | vfs_read() {
0) | rw_verify_area() {
0) | security_file_permission() {
------------8<---------- thread sshd-1755 ------------8<----------
After:
0) | sys_read() {
0) 0.796 us | fget_light();
0) | vfs_read() {
0) | rw_verify_area() {
0) | security_file_permission() {
------------------------------------------
| 1) migration/0--1 => sshd-1755
------------------------------------------
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 596a3ee43053..894b50bca313 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -79,6 +79,19 @@ print_graph_cpu(struct trace_seq *s, int cpu)
int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
+ /*
+ * Start with a space character - to make it stand out
+ * to the right a bit when trace output is pasted into
+ * email:
+ */
+ ret = trace_seq_printf(s, " ");
+
+ /*
+ * Tricky - we space the CPU field according to the max
+ * number of online CPUs. On a 2-cpu system it would take
+ * a maximum of 1 digit - on a 128 cpu system it would
+ * take up to 3 digits:
+ */
for (i = 0; i < log10_all - log10_this; i++) {
ret = trace_seq_printf(s, " ");
if (!ret)
@@ -86,7 +99,8 @@ print_graph_cpu(struct trace_seq *s, int cpu)
}
ret = trace_seq_printf(s, "%d) ", cpu);
if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_PARTIAL_LINE;
+
return TRACE_TYPE_HANDLED;
}
@@ -94,17 +108,34 @@ print_graph_cpu(struct trace_seq *s, int cpu)
/* If the pid changed since the last trace, output this event */
static int verif_pid(struct trace_seq *s, pid_t pid, int cpu)
{
- char *comm;
+ char *comm, *prev_comm;
+ pid_t prev_pid;
+ int ret;
if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
return 1;
+ prev_pid = last_pid[cpu];
last_pid[cpu] = pid;
+
comm = trace_find_cmdline(pid);
+ prev_comm = trace_find_cmdline(prev_pid);
- return trace_seq_printf(s, "\n------------8<---------- thread %s-%d"
- " ------------8<----------\n\n",
- cpu, comm, pid);
+/*
+ * Context-switch trace line:
+
+ ------------------------------------------
+ | 1) migration/0--1 => sshd-1755
+ ------------------------------------------
+
+ */
+ ret = trace_seq_printf(s,
+ " ------------------------------------------\n");
+ ret += trace_seq_printf(s, " | %d) %s-%d => %s-%d\n",
+ cpu, prev_comm, prev_pid, comm, pid);
+ ret += trace_seq_printf(s,
+ " ------------------------------------------\n\n");
+ return ret;
}
static bool
@@ -142,7 +173,7 @@ static inline int
print_graph_duration(unsigned long long duration, struct trace_seq *s)
{
unsigned long nsecs_rem = do_div(duration, 1000);
- return trace_seq_printf(s, "%4llu.%3lu us | ", duration, nsecs_rem);
+ return trace_seq_printf(s, "%4llu.%3lu us | ", duration, nsecs_rem);
}
/* Signal a overhead of time execution to the output */
@@ -229,7 +260,7 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
}
/* No time */
- ret = trace_seq_printf(s, " | ");
+ ret = trace_seq_printf(s, " | ");
/* Function */
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {