Patches contributed by Eötvös Lorand University
commit fd59e9e9c8e35cd2a1834c0d1f67aedf0c5c68c2
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Feb 17 20:20:24 2008 +0100
x86: change IO delay back to 0x80
change back the IO delay to 0x80.
Alan says that 0xed is known to break some older boxes, and given that
the get-rid-of-outb-APIs efforts are well underway we should just let
them be finished.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 864affc9a7b0..702eb39901ca 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -156,7 +156,7 @@ config IO_DELAY_TYPE_NONE
choice
prompt "IO delay type"
- default IO_DELAY_0XED
+ default IO_DELAY_0X80
config IO_DELAY_0X80
bool "port 0x80 based port-IO delay [recommended]"
commit d76c1ae4d1f4f322d47e7c6e47a277384ba9d9cb
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Feb 17 16:48:25 2008 +0100
x86: clean up csum-wrappers_64.c some more
no code changed:
arch/x86/lib/csum-wrappers_64.o:
text data bss dec hex filename
839 0 0 839 347 csum-wrappers_64.o.before
839 0 0 839 347 csum-wrappers_64.o.after
md5:
b31994226c33e0b52bef5a0e110b84b0 csum-wrappers_64.o.before.asm
b31994226c33e0b52bef5a0e110b84b0 csum-wrappers_64.o.after.asm
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index 95e45dcc5a29..459b58a8a15c 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -1,9 +1,9 @@
-/* Copyright 2002,2003 Andi Kleen, SuSE Labs.
+/*
+ * Copyright 2002, 2003 Andi Kleen, SuSE Labs.
* Subject to the GNU Public License v.2
*
* Wrappers of assembly checksum functions for x86-64.
*/
-
#include <asm/checksum.h>
#include <linux/module.h>
@@ -24,37 +24,47 @@ csum_partial_copy_from_user(const void __user *src, void *dst,
{
might_sleep();
*errp = 0;
- if (likely(access_ok(VERIFY_READ, src, len))) {
- /* Why 6, not 7? To handle odd addresses aligned we
- would need to do considerable complications to fix the
- checksum which is defined as an 16bit accumulator. The
- fix alignment code is primarily for performance
- compatibility with 32bit and that will handle odd
- addresses slowly too. */
- if (unlikely((unsigned long)src & 6)) {
- while (((unsigned long)src & 6) && len >= 2) {
- __u16 val16;
- *errp = __get_user(val16, (const __u16 __user *)src);
- if (*errp)
- return isum;
- *(__u16 *)dst = val16;
- isum = (__force __wsum)add32_with_carry(
- (__force unsigned)isum, val16);
- src += 2;
- dst += 2;
- len -= 2;
- }
+
+ if (!likely(access_ok(VERIFY_READ, src, len)))
+ goto out_err;
+
+ /*
+ * Why 6, not 7? To handle odd addresses aligned we
+ * would need to do considerable complications to fix the
+ * checksum which is defined as an 16bit accumulator. The
+ * fix alignment code is primarily for performance
+ * compatibility with 32bit and that will handle odd
+ * addresses slowly too.
+ */
+ if (unlikely((unsigned long)src & 6)) {
+ while (((unsigned long)src & 6) && len >= 2) {
+ __u16 val16;
+
+ *errp = __get_user(val16, (const __u16 __user *)src);
+ if (*errp)
+ return isum;
+
+ *(__u16 *)dst = val16;
+ isum = (__force __wsum)add32_with_carry(
+ (__force unsigned)isum, val16);
+ src += 2;
+ dst += 2;
+ len -= 2;
}
- isum = csum_partial_copy_generic((__force const void *)src,
- dst, len, isum, errp, NULL);
- if (likely(*errp == 0))
- return isum;
}
+ isum = csum_partial_copy_generic((__force const void *)src,
+ dst, len, isum, errp, NULL);
+ if (unlikely(*errp))
+ goto out_err;
+
+ return isum;
+
+out_err:
*errp = -EFAULT;
memset(dst, 0, len);
+
return isum;
}
-
EXPORT_SYMBOL(csum_partial_copy_from_user);
/**
@@ -73,6 +83,7 @@ csum_partial_copy_to_user(const void *src, void __user *dst,
int len, __wsum isum, int *errp)
{
might_sleep();
+
if (unlikely(!access_ok(VERIFY_WRITE, dst, len))) {
*errp = -EFAULT;
return 0;
@@ -81,6 +92,7 @@ csum_partial_copy_to_user(const void *src, void __user *dst,
if (unlikely((unsigned long)dst & 6)) {
while (((unsigned long)dst & 6) && len >= 2) {
__u16 val16 = *(__u16 *)src;
+
isum = (__force __wsum)add32_with_carry(
(__force unsigned)isum, val16);
*errp = __put_user(val16, (__u16 __user *)dst);
@@ -93,9 +105,9 @@ csum_partial_copy_to_user(const void *src, void __user *dst,
}
*errp = 0;
- return csum_partial_copy_generic(src, (void __force *)dst, len, isum, NULL, errp);
+ return csum_partial_copy_generic(src, (void __force *)dst,
+ len, isum, NULL, errp);
}
-
EXPORT_SYMBOL(csum_partial_copy_to_user);
/**
@@ -122,14 +134,17 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
rest = (__force __u64)htonl(len) + (__force __u64)htons(proto) +
(__force __u64)sum;
- asm(" addq (%[saddr]),%[sum]\n"
- " adcq 8(%[saddr]),%[sum]\n"
- " adcq (%[daddr]),%[sum]\n"
- " adcq 8(%[daddr]),%[sum]\n"
- " adcq $0,%[sum]\n"
+
+ asm(" addq (%[saddr]),%[sum]\n"
+ " adcq 8(%[saddr]),%[sum]\n"
+ " adcq (%[daddr]),%[sum]\n"
+ " adcq 8(%[daddr]),%[sum]\n"
+ " adcq $0,%[sum]\n"
+
: [sum] "=r" (sum64)
: "[sum]" (rest), [saddr] "r" (saddr), [daddr] "r" (daddr));
- return csum_fold((__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
-}
+ return csum_fold(
+ (__force __wsum)add32_with_carry(sum64 & 0xffffffff, sum64>>32));
+}
EXPORT_SYMBOL(csum_ipv6_magic);
commit e43eb7bab6e82e1aa93ce4d39546c54347a68077
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 14 08:38:49 2008 +0100
x86: exclude vsyscall files from stackprotect
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 76ec0f8f138a..4eb5ce841106 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,7 +6,15 @@ extra-y := head_$(BITS).o init_task.o vmlinux.lds
extra-$(CONFIG_X86_64) += head64.o
CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
-CFLAGS_vsyscall_64.o := $(PROFILING) -g0
+
+#
+# vsyscalls (which work on the user stack) should have
+# no stack-protector checks:
+#
+nostackp := $(call cc-option, -fno-stack-protector)
+CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
+CFLAGS_hpet.o := $(nostackp)
+CFLAGS_tsc_64.o := $(nostackp)
obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
obj-y += traps_$(BITS).o irq_$(BITS).o
commit f8d8406bcb58ff70e97b71c35ff5be90c54fc3d0
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Feb 13 14:09:53 2008 +0100
x86: cpa, fix out of date comment
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 5d2259468d3c..4119379f80ff 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -870,8 +870,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
return;
/*
- * The return value is ignored - the calls cannot fail,
- * large pages are disabled at boot time:
+ * The return value is ignored as the calls cannot fail.
+ * Large pages are kept enabled at boot time, and are
+ * split up quickly with DEBUG_PAGEALLOC. If a splitup
+ * fails here (due to temporary memory shortage) no damage
+ * is done because we just keep the largepage intact up
+ * to the next attempt when it will likely be split up:
*/
if (enable)
__set_pages_p(page, numpages);
commit 184652eb6f68050af48a2ce3d5cf0537c208bee2
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 14 23:30:20 2008 +0100
x86: fix gart_iommu_init()
When the GART table is unmapped from the kernel direct mappings
during early bootup, make sure we have no leftover cachelines in it.
Note: the clflush done by set_memory_np() was not enough, because
clflush does not work on unmapped pages.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 65f6acb025c8..faf3229f8fb3 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -749,6 +749,15 @@ void __init gart_iommu_init(void)
*/
set_memory_np((unsigned long)__va(iommu_bus_base),
iommu_size >> PAGE_SHIFT);
+ /*
+ * Tricky. The GART table remaps the physical memory range,
+ * so the CPU wont notice potential aliases and if the memory
+ * is remapped to UC later on, we might surprise the PCI devices
+ * with a stray writeout of a cacheline. So play it sure and
+ * do an explicit, full-scale wbinvd() _after_ having marked all
+ * the pages as Not-Present:
+ */
+ wbinvd();
/*
* Try to workaround a bug (thanks to BenH)
commit 3223f59f9cd9d69a4344eeac8b16a262c5f373f1
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 14 14:21:32 2008 +0100
x86: EFI set_memory_x()/set_memory_uc() fixes
The EFI-runtime mapping code changed a larger memory area than it
should have, due to a pages/bytes parameter mixup.
noticed by Andi Kleen.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index cbdf9bacc575..0c0eeb163d90 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -391,7 +391,7 @@ static void __init runtime_code_page_mkexec(void)
if (md->type != EFI_RUNTIME_SERVICES_CODE)
continue;
- set_memory_x(md->virt_addr, md->num_pages << EFI_PAGE_SHIFT);
+ set_memory_x(md->virt_addr, md->num_pages);
}
}
@@ -434,7 +434,7 @@ void __init efi_enter_virtual_mode(void)
}
if (!(md->attribute & EFI_MEMORY_WB))
- set_memory_uc(md->virt_addr, size);
+ set_memory_uc(md->virt_addr, md->num_pages);
systab = (u64) (unsigned long) efi_phys.systab;
if (md->phys_addr <= systab && systab < end) {
commit e8bff74afbdb4ad72bf6135c84289c47cf557892
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Feb 13 20:21:06 2008 +0100
x86: fix "BUG: sleeping function called from invalid context" in print_vma_addr()
Jiri Kosina reported the following deadlock scenario with
show_unhandled_signals enabled:
[ 68.379022] gnome-settings-[2941] trap int3 ip:3d2c840f34
sp:7fff36f5d100 error:0<3>BUG: sleeping function called from invalid
context at kernel/rwsem.c:21
[ 68.379039] in_atomic():1, irqs_disabled():0
[ 68.379044] no locks held by gnome-settings-/2941.
[ 68.379050] Pid: 2941, comm: gnome-settings- Not tainted 2.6.25-rc1 #30
[ 68.379054]
[ 68.379056] Call Trace:
[ 68.379061] <#DB> [<ffffffff81064883>] ? __debug_show_held_locks+0x13/0x30
[ 68.379109] [<ffffffff81036765>] __might_sleep+0xe5/0x110
[ 68.379123] [<ffffffff812f2240>] down_read+0x20/0x70
[ 68.379137] [<ffffffff8109cdca>] print_vma_addr+0x3a/0x110
[ 68.379152] [<ffffffff8100f435>] do_trap+0xf5/0x170
[ 68.379168] [<ffffffff8100f52b>] do_int3+0x7b/0xe0
[ 68.379180] [<ffffffff812f4a6f>] int3+0x9f/0xd0
[ 68.379203] <<EOE>>
[ 68.379229] in libglib-2.0.so.0.1505.0[3d2c800000+dc000]
and tracked it down to:
commit 03252919b79891063cf99145612360efbdf9500b
Author: Andi Kleen <ak@suse.de>
Date: Wed Jan 30 13:33:18 2008 +0100
x86: print which shared library/executable faulted in segfault etc. messages
the problem is that we call down_read() from an atomic context.
Solve this by returning from print_vma_addr() if the preempt count is
elevated. Update preempt_conditional_sti / preempt_conditional_cli to
unconditionally lift the preempt count even on !CONFIG_PREEMPT.
Reported-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index efc66df728b6..045466681911 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -84,7 +84,7 @@ static inline void conditional_sti(struct pt_regs *regs)
static inline void preempt_conditional_sti(struct pt_regs *regs)
{
- preempt_disable();
+ inc_preempt_count();
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
@@ -95,7 +95,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
local_irq_disable();
/* Make sure to not schedule here because we could be running
on an exception stack. */
- preempt_enable_no_resched();
+ dec_preempt_count();
}
int kstack_depth_to_print = 12;
diff --git a/mm/memory.c b/mm/memory.c
index 717aa0e3be2d..55b97ef6de11 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2711,6 +2711,13 @@ void print_vma_addr(char *prefix, unsigned long ip)
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
+ /*
+ * Do not print if we are in atomic
+ * contexts (in exception stacks, etc.):
+ */
+ if (preempt_count())
+ return;
+
down_read(&mm->mmap_sem);
vma = find_vma(mm, ip);
if (vma && vma->vm_file) {
commit 1cdde19109901e8f1194e227d0bcd48caf713323
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Feb 13 16:20:35 2008 +0100
x86: fix sigcontext.h user export
Jakub Jelinek reported that some user-space code that relies on
kernel headers has built dependency on the sigcontext->eip/rip
register names - which have been unified in commit:
commit 742fa54a62be6a263df14a553bf832724471dfbe
Author: H. Peter Anvin <hpa@zytor.com>
Date: Wed Jan 30 13:30:56 2008 +0100
x86: use generic register names in struct sigcontext
so give the old layout to user-space. This is not particularly
pretty, but it's an ABI so there's no danger of the two definitions
getting out of sync.
Reported-by: Jakub Jelinek <jakub@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h
index 681deade5f00..d743947f4c77 100644
--- a/include/asm-x86/sigcontext.h
+++ b/include/asm-x86/sigcontext.h
@@ -58,6 +58,7 @@ struct _fpstate {
#define X86_FXSR_MAGIC 0x0000
+#ifdef __KERNEL__
struct sigcontext {
unsigned short gs, __gsh;
unsigned short fs, __fsh;
@@ -82,6 +83,35 @@ struct sigcontext {
unsigned long oldmask;
unsigned long cr2;
};
+#else /* __KERNEL__ */
+/*
+ * User-space might still rely on the old definition:
+ */
+struct sigcontext {
+ unsigned short gs, __gsh;
+ unsigned short fs, __fsh;
+ unsigned short es, __esh;
+ unsigned short ds, __dsh;
+ unsigned long edi;
+ unsigned long esi;
+ unsigned long ebp;
+ unsigned long esp;
+ unsigned long ebx;
+ unsigned long edx;
+ unsigned long ecx;
+ unsigned long eax;
+ unsigned long trapno;
+ unsigned long err;
+ unsigned long eip;
+ unsigned short cs, __csh;
+ unsigned long eflags;
+ unsigned long esp_at_signal;
+ unsigned short ss, __ssh;
+ struct _fpstate __user * fpstate;
+ unsigned long oldmask;
+ unsigned long cr2;
+};
+#endif /* !__KERNEL__ */
#else /* __i386__ */
@@ -102,6 +132,7 @@ struct _fpstate {
__u32 reserved2[24];
};
+#ifdef __KERNEL__
struct sigcontext {
unsigned long r8;
unsigned long r9;
@@ -132,6 +163,41 @@ struct sigcontext {
struct _fpstate __user *fpstate; /* zero when no FPU context */
unsigned long reserved1[8];
};
+#else /* __KERNEL__ */
+/*
+ * User-space might still rely on the old definition:
+ */
+struct sigcontext {
+ unsigned long r8;
+ unsigned long r9;
+ unsigned long r10;
+ unsigned long r11;
+ unsigned long r12;
+ unsigned long r13;
+ unsigned long r14;
+ unsigned long r15;
+ unsigned long rdi;
+ unsigned long rsi;
+ unsigned long rbp;
+ unsigned long rbx;
+ unsigned long rdx;
+ unsigned long rax;
+ unsigned long rcx;
+ unsigned long rsp;
+ unsigned long rip;
+ unsigned long eflags; /* RFLAGS */
+ unsigned short cs;
+ unsigned short gs;
+ unsigned short fs;
+ unsigned short __pad0;
+ unsigned long err;
+ unsigned long trapno;
+ unsigned long oldmask;
+ unsigned long cr2;
+ struct _fpstate __user *fpstate; /* zero when no FPU context */
+ unsigned long reserved1[8];
+};
+#endif /* !__KERNEL__ */
#endif /* !__i386__ */
commit 166124fde978b5a6c4412fb295c7f39711beb1b0
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Feb 9 23:24:09 2008 +0100
brk: help text typo fix
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/init/Kconfig b/init/Kconfig
index 455170e1c1e3..824d48cb67bf 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -587,7 +587,7 @@ config COMPAT_BRK
disabled, and can be overriden runtime by setting
/proc/sys/kernel/randomize_va_space to 2.
- On non-ancient distros (post-2000 ones) Y is usually a safe choice.
+ On non-ancient distros (post-2000 ones) N is usually a safe choice.
config BASE_FULL
default y
commit 3701d863b43d05ffeb223d269583398f914fb5d3
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Feb 9 23:24:08 2008 +0100
x86: fixup more paravirt fallout
Use a common irq_return entry point for all the iret places, which
need the paravirt INTERRUPT return wrapper.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index be5c31d04884..824e21b80aad 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -409,7 +409,8 @@ restore_nocheck_notrace:
RESTORE_REGS
addl $4, %esp # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4
-1: INTERRUPT_RETURN
+ENTRY(irq_return)
+ INTERRUPT_RETURN
.section .fixup,"ax"
iret_exc:
pushl $0 # no error code
@@ -418,7 +419,7 @@ iret_exc:
.previous
.section __ex_table,"a"
.align 4
- .long 1b,iret_exc
+ .long irq_return,iret_exc
.previous
CFI_RESTORE_STATE
@@ -865,20 +866,16 @@ nmi_espfix_stack:
RESTORE_REGS
lss 12+4(%esp), %esp # back to espfix stack
CFI_ADJUST_CFA_OFFSET -24
-1: INTERRUPT_RETURN
+ jmp irq_return
CFI_ENDPROC
-.section __ex_table,"a"
- .align 4
- .long 1b,iret_exc
-.previous
KPROBE_END(nmi)
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
-1: iret
+ iret
.section __ex_table,"a"
.align 4
- .long 1b,iret_exc
+ .long native_iret, iret_exc
.previous
END(native_iret)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c7341e81941c..6be39a387c5a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -581,16 +581,24 @@ retint_restore_args: /* return to kernel space */
*/
TRACE_IRQS_IRETQ
restore_args:
- RESTORE_ARGS 0,8,0
-#ifdef CONFIG_PARAVIRT
+ RESTORE_ARGS 0,8,0
+
+ENTRY(irq_return)
INTERRUPT_RETURN
-#endif
+
+ .section __ex_table, "a"
+ .quad irq_return, bad_iret
+ .previous
+
+#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
iretq
.section __ex_table,"a"
.quad native_iret, bad_iret
.previous
+#endif
+
.section .fixup,"ax"
bad_iret:
/*
@@ -804,7 +812,7 @@ paranoid_swapgs\trace:
SWAPGS_UNSAFE_STACK
paranoid_restore\trace:
RESTORE_ALL 8
- INTERRUPT_RETURN
+ jmp irq_return
paranoid_userspace\trace:
GET_THREAD_INFO(%rcx)
movl threadinfo_flags(%rcx),%ebx
@@ -919,7 +927,7 @@ error_kernelspace:
iret run with kernel gs again, so don't set the user space flag.
B stepping K8s sometimes report an truncated RIP for IRET
exceptions returning to compat mode. Check for these here too. */
- leaq native_iret(%rip),%rbp
+ leaq irq_return(%rip),%rbp
cmpq %rbp,RIP(%rsp)
je error_swapgs
movl %ebp,%ebp /* zero extend */