Patches contributed by Eötvös Lorand University
commit 7c178a26d3e753d2a4346d3e4b8aa549d387f698
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Feb 20 23:27:29 2009 +0100
x86, mm: fault.c, remove #ifdef from fault_in_kernel_space()
Impact: cleanup
Removal of an #ifdef in fault_in_kernel_space(), by making
use of the new TASK_SIZE_MAX symbol which is now available
on 32-bit too.
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 6fa9f175cba3..f195691ec26e 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -960,11 +960,7 @@ access_error(unsigned long error_code, int write, struct vm_area_struct *vma)
static int fault_in_kernel_space(unsigned long address)
{
-#ifdef CONFIG_X86_32
- return address >= TASK_SIZE;
-#else
return address >= TASK_SIZE_MAX;
-#endif
}
/*
commit d951734654f76a370a89b4e531af9b765bd13541
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Feb 20 23:32:28 2009 +0100
x86, mm: rename TASK_SIZE64 => TASK_SIZE_MAX
Impact: cleanup
Rename TASK_SIZE64 to TASK_SIZE_MAX, and provide the
define on 32-bit too. (mapped to TASK_SIZE)
This allows 32-bit code to make use of the (former-) TASK_SIZE64
symbol as well, in a clean way.
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 72914d0315e9..c7a98f738210 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -861,6 +861,7 @@ static inline void spin_lock_prefetch(const void *x)
* User space process size: 3GB (default).
*/
#define TASK_SIZE PAGE_OFFSET
+#define TASK_SIZE_MAX TASK_SIZE
#define STACK_TOP TASK_SIZE
#define STACK_TOP_MAX STACK_TOP
@@ -920,7 +921,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
/*
* User space process size. 47bits minus one guard page.
*/
-#define TASK_SIZE64 ((1UL << 47) - PAGE_SIZE)
+#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
@@ -929,12 +930,12 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
0xc0000000 : 0xFFFFe000)
#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \
- IA32_PAGE_OFFSET : TASK_SIZE64)
+ IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \
- IA32_PAGE_OFFSET : TASK_SIZE64)
+ IA32_PAGE_OFFSET : TASK_SIZE_MAX)
#define STACK_TOP TASK_SIZE
-#define STACK_TOP_MAX TASK_SIZE64
+#define STACK_TOP_MAX TASK_SIZE_MAX
#define INIT_THREAD { \
.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index d2f7cd5b2c83..fb2159a5c817 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -268,7 +268,7 @@ static unsigned long debugreg_addr_limit(struct task_struct *task)
if (test_tsk_thread_flag(task, TIF_IA32))
return IA32_PAGE_OFFSET - 3;
#endif
- return TASK_SIZE64 - 7;
+ return TASK_SIZE_MAX - 7;
}
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9c2dc5d79531..6fa9f175cba3 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -963,7 +963,7 @@ static int fault_in_kernel_space(unsigned long address)
#ifdef CONFIG_X86_32
return address >= TASK_SIZE;
#else
- return address >= TASK_SIZE64;
+ return address >= TASK_SIZE_MAX;
#endif
}
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 9c98cc6ba978..7133cdf9098b 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -85,8 +85,8 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
unsigned long addr, end;
unsigned offset;
end = (start + PMD_SIZE - 1) & PMD_MASK;
- if (end >= TASK_SIZE64)
- end = TASK_SIZE64;
+ if (end >= TASK_SIZE_MAX)
+ end = TASK_SIZE_MAX;
end -= len;
/* This loses some more bits than a modulo, but is cheaper */
offset = get_random_int() & (PTRS_PER_PTE - 1);
commit c3731c68668325abddee8665018c74c7156a57be
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Feb 20 23:22:34 2009 +0100
x86, mm: fault.c, remove #ifdef from do_page_fault()
Impact: cleanup
do_page_fault() has this ugly #ifdef in its prototype:
#ifdef CONFIG_X86_64
asmlinkage
#endif
void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
Replace it with 'dotraplinkage' which maps to exactly the above
construct: nothing on 32-bit and asmlinkage on 64-bit.
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8fe2dd254df0..9c2dc5d79531 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -972,10 +972,8 @@ static int fault_in_kernel_space(unsigned long address)
* and the problem, and then passes it off to one of the appropriate
* routines.
*/
-#ifdef CONFIG_X86_64
-asmlinkage
-#endif
-void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
+dotraplinkage void __kprobes
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
struct vm_area_struct *vma;
struct task_struct *tsk;
commit 1cc99544dde9e48602979f16b9309fade6e93051
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Feb 20 23:07:48 2009 +0100
x86, mm: fault.c, unify oops handling
Impact: add oops-recursion check to 32-bit
Unify the oops state-machine, to the 64-bit version. It is
slightly more careful in that it does a recursion check
in oops_begin(), and is thus more likely to show the relevant
oops.
It also means that 32-bit will print one more line at the
end of pagefault triggered oopses:
printk(KERN_EMERG "CR2: %016lx\n", address);
Which is generally good information to be seen in partial-dump
digital-camera jpegs ;-)
The downside is the somewhat more complex critical path. Both
variants have been tested well meanwhile by kernel developers
crashing their boxes so i dont think this is a practical worry.
This removes 3 ugly #ifdefs from no_context() and makes the
function a lot nicer read.
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ebfaca3bbb12..8fe2dd254df0 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -659,11 +659,8 @@ no_context(struct pt_regs *regs, unsigned long error_code,
{
struct task_struct *tsk = current;
unsigned long *stackend;
-
-#ifdef CONFIG_X86_64
unsigned long flags;
int sig;
-#endif
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs))
@@ -690,11 +687,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice:
*/
-#ifdef CONFIG_X86_32
- bust_spinlocks(1);
-#else
flags = oops_begin();
-#endif
show_fault_oops(regs, error_code, address);
@@ -702,15 +695,10 @@ no_context(struct pt_regs *regs, unsigned long error_code,
if (*stackend != STACK_END_MAGIC)
printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
- tsk->thread.cr2 = address;
- tsk->thread.trap_no = 14;
- tsk->thread.error_code = error_code;
+ tsk->thread.cr2 = address;
+ tsk->thread.trap_no = 14;
+ tsk->thread.error_code = error_code;
-#ifdef CONFIG_X86_32
- die("Oops", regs, error_code);
- bust_spinlocks(0);
- do_exit(SIGKILL);
-#else
sig = SIGKILL;
if (__die("Oops", regs, error_code))
sig = 0;
@@ -719,7 +707,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
printk(KERN_EMERG "CR2: %016lx\n", address);
oops_end(flags, regs, sig);
-#endif
}
/*
commit 8f7661496cece8320137d5e26808825498fd2b26
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Feb 20 23:00:29 2009 +0100
x86, mm: fault.c, unify oops printing
Impact: refine/extend page fault related oops printing on 64-bit
- honor the pause_on_oops logic on 64-bit too
- print out NX fault warnings on 64-bit as well
- factor out the NX fault message to make it git-greppable and readable
Note that this means that we do the PF_INSTR check on 32-bit non-PAE
as well where it should not occur ... normally. Cannot hurt.
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4ce62fb80da7..ebfaca3bbb12 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -595,28 +595,24 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
return 0;
}
+static const char nx_warning[] = KERN_CRIT
+"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
+
static void
show_fault_oops(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
{
-#ifdef CONFIG_X86_32
if (!oops_may_print())
return;
-#endif
-#ifdef CONFIG_X86_PAE
if (error_code & PF_INSTR) {
unsigned int level;
pte_t *pte = lookup_address(address, &level);
- if (pte && pte_present(*pte) && !pte_exec(*pte)) {
- printk(KERN_CRIT "kernel tried to execute "
- "NX-protected page - exploit attempt? "
- "(uid: %d)\n", current_uid());
- }
+ if (pte && pte_present(*pte) && !pte_exec(*pte))
+ printk(nx_warning, current_uid());
}
-#endif
printk(KERN_ALERT "BUG: unable to handle kernel ");
if (address < PAGE_SIZE)
commit f2f13a8535174dbb813a0607a9d4737cfba98f6c
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Feb 20 22:50:24 2009 +0100
x86, mm: fault.c, reorder functions
Impact: cleanup
Avoid a couple more #ifdefs by moving fundamentally non-unifiable
functions into a single #ifdef 32-bit / #else / #endif block in
fault.c: vmalloc*(), dump_pagetable(), check_vm8086_mode().
No code changed:
text data bss dec hex filename
4618 32 24 4674 1242 fault.o.before
4618 32 24 4674 1242 fault.o.after
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 379beaec6caa..4ce62fb80da7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -191,18 +191,124 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
force_sig_info(si_signo, &info, tsk);
}
-#ifdef CONFIG_X86_64
-static int bad_address(void *p)
+DEFINE_SPINLOCK(pgd_lock);
+LIST_HEAD(pgd_list);
+
+#ifdef CONFIG_X86_32
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
{
- unsigned long dummy;
+ unsigned index = pgd_index(address);
+ pgd_t *pgd_k;
+ pud_t *pud, *pud_k;
+ pmd_t *pmd, *pmd_k;
- return probe_kernel_address((unsigned long *)p, dummy);
+ pgd += index;
+ pgd_k = init_mm.pgd + index;
+
+ if (!pgd_present(*pgd_k))
+ return NULL;
+
+ /*
+ * set_pgd(pgd, *pgd_k); here would be useless on PAE
+ * and redundant with the set_pmd() on non-PAE. As would
+ * set_pud.
+ */
+ pud = pud_offset(pgd, address);
+ pud_k = pud_offset(pgd_k, address);
+ if (!pud_present(*pud_k))
+ return NULL;
+
+ pmd = pmd_offset(pud, address);
+ pmd_k = pmd_offset(pud_k, address);
+ if (!pmd_present(*pmd_k))
+ return NULL;
+
+ if (!pmd_present(*pmd)) {
+ set_pmd(pmd, *pmd_k);
+ arch_flush_lazy_mmu_mode();
+ } else {
+ BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+ }
+
+ return pmd_k;
+}
+
+void vmalloc_sync_all(void)
+{
+ unsigned long address;
+
+ if (SHARED_KERNEL_PMD)
+ return;
+
+ for (address = VMALLOC_START & PMD_MASK;
+ address >= TASK_SIZE && address < FIXADDR_TOP;
+ address += PMD_SIZE) {
+
+ unsigned long flags;
+ struct page *page;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ list_for_each_entry(page, &pgd_list, lru) {
+ if (!vmalloc_sync_one(page_address(page), address))
+ break;
+ }
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ }
+}
+
+/*
+ * 32-bit:
+ *
+ * Handle a fault on the vmalloc or module mapping area
+ */
+static noinline int vmalloc_fault(unsigned long address)
+{
+ unsigned long pgd_paddr;
+ pmd_t *pmd_k;
+ pte_t *pte_k;
+
+ /* Make sure we are in vmalloc area: */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "current" here. We might be inside
+ * an interrupt in the middle of a task switch..
+ */
+ pgd_paddr = read_cr3();
+ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+ if (!pmd_k)
+ return -1;
+
+ pte_k = pte_offset_kernel(pmd_k, address);
+ if (!pte_present(*pte_k))
+ return -1;
+
+ return 0;
+}
+
+/*
+ * Did it hit the DOS screen memory VA from vm86 mode?
+ */
+static inline void
+check_v8086_mode(struct pt_regs *regs, unsigned long address,
+ struct task_struct *tsk)
+{
+ unsigned long bit;
+
+ if (!v8086_mode(regs))
+ return;
+
+ bit = (address - 0xA0000) >> PAGE_SHIFT;
+ if (bit < 32)
+ tsk->thread.screen_bitmap |= 1 << bit;
}
-#endif
static void dump_pagetable(unsigned long address)
{
-#ifdef CONFIG_X86_32
__typeof__(pte_val(__pte(0))) page;
page = read_cr3();
@@ -239,7 +345,132 @@ static void dump_pagetable(unsigned long address)
}
printk("\n");
-#else /* CONFIG_X86_64 */
+}
+
+#else /* CONFIG_X86_64: */
+
+void vmalloc_sync_all(void)
+{
+ unsigned long address;
+
+ for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
+ address += PGDIR_SIZE) {
+
+ const pgd_t *pgd_ref = pgd_offset_k(address);
+ unsigned long flags;
+ struct page *page;
+
+ if (pgd_none(*pgd_ref))
+ continue;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ list_for_each_entry(page, &pgd_list, lru) {
+ pgd_t *pgd;
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ }
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ }
+}
+
+/*
+ * 64-bit:
+ *
+ * Handle a fault on the vmalloc area
+ *
+ * This assumes no large pages in there.
+ */
+static noinline int vmalloc_fault(unsigned long address)
+{
+ pgd_t *pgd, *pgd_ref;
+ pud_t *pud, *pud_ref;
+ pmd_t *pmd, *pmd_ref;
+ pte_t *pte, *pte_ref;
+
+ /* Make sure we are in vmalloc area: */
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
+ return -1;
+
+ /*
+ * Copy kernel mappings over when needed. This can also
+ * happen within a race in page table update. In the later
+ * case just flush:
+ */
+ pgd = pgd_offset(current->active_mm, address);
+ pgd_ref = pgd_offset_k(address);
+ if (pgd_none(*pgd_ref))
+ return -1;
+
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+
+ /*
+ * Below here mismatches are bugs because these lower tables
+ * are shared:
+ */
+
+ pud = pud_offset(pgd, address);
+ pud_ref = pud_offset(pgd_ref, address);
+ if (pud_none(*pud_ref))
+ return -1;
+
+ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
+ BUG();
+
+ pmd = pmd_offset(pud, address);
+ pmd_ref = pmd_offset(pud_ref, address);
+ if (pmd_none(*pmd_ref))
+ return -1;
+
+ if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
+ BUG();
+
+ pte_ref = pte_offset_kernel(pmd_ref, address);
+ if (!pte_present(*pte_ref))
+ return -1;
+
+ pte = pte_offset_kernel(pmd, address);
+
+ /*
+ * Don't use pte_page here, because the mappings can point
+ * outside mem_map, and the NUMA hash lookup cannot handle
+ * that:
+ */
+ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
+ BUG();
+
+ return 0;
+}
+
+static const char errata93_warning[] =
+KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
+KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
+KERN_ERR "******* Please consider a BIOS update.\n"
+KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+
+/*
+ * No vm86 mode in 64-bit mode:
+ */
+static inline void
+check_v8086_mode(struct pt_regs *regs, unsigned long address,
+ struct task_struct *tsk)
+{
+}
+
+static int bad_address(void *p)
+{
+ unsigned long dummy;
+
+ return probe_kernel_address((unsigned long *)p, dummy);
+}
+
+static void dump_pagetable(unsigned long address)
+{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
@@ -284,83 +515,9 @@ static void dump_pagetable(unsigned long address)
return;
bad:
printk("BAD\n");
-#endif
-}
-
-#ifdef CONFIG_X86_32
-static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
-{
- unsigned index = pgd_index(address);
- pgd_t *pgd_k;
- pud_t *pud, *pud_k;
- pmd_t *pmd, *pmd_k;
-
- pgd += index;
- pgd_k = init_mm.pgd + index;
-
- if (!pgd_present(*pgd_k))
- return NULL;
-
- /*
- * set_pgd(pgd, *pgd_k); here would be useless on PAE
- * and redundant with the set_pmd() on non-PAE. As would
- * set_pud.
- */
- pud = pud_offset(pgd, address);
- pud_k = pud_offset(pgd_k, address);
- if (!pud_present(*pud_k))
- return NULL;
-
- pmd = pmd_offset(pud, address);
- pmd_k = pmd_offset(pud_k, address);
- if (!pmd_present(*pmd_k))
- return NULL;
-
- if (!pmd_present(*pmd)) {
- set_pmd(pmd, *pmd_k);
- arch_flush_lazy_mmu_mode();
- } else {
- BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
- }
-
- return pmd_k;
-}
-
-/*
- * Did it hit the DOS screen memory VA from vm86 mode?
- */
-static inline void
-check_v8086_mode(struct pt_regs *regs, unsigned long address,
- struct task_struct *tsk)
-{
- unsigned long bit;
-
- if (!v8086_mode(regs))
- return;
-
- bit = (address - 0xA0000) >> PAGE_SHIFT;
- if (bit < 32)
- tsk->thread.screen_bitmap |= 1 << bit;
-}
-
-#else /* CONFIG_X86_64: */
-
-static const char errata93_warning[] =
-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
-KERN_ERR "******* Please consider a BIOS update.\n"
-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
-
-/*
- * No vm86 mode in 64-bit mode:
- */
-static inline void
-check_v8086_mode(struct pt_regs *regs, unsigned long address,
- struct task_struct *tsk)
-{
}
-#endif
+#endif /* CONFIG_X86_64 */
/*
* Workaround for K8 erratum #93 & buggy BIOS.
@@ -795,109 +952,6 @@ spurious_fault(unsigned long error_code, unsigned long address)
return ret;
}
-/*
- * 32-bit:
- *
- * Handle a fault on the vmalloc or module mapping area
- *
- * 64-bit:
- *
- * Handle a fault on the vmalloc area
- *
- * This assumes no large pages in there.
- */
-static noinline int vmalloc_fault(unsigned long address)
-{
-#ifdef CONFIG_X86_32
- unsigned long pgd_paddr;
- pmd_t *pmd_k;
- pte_t *pte_k;
-
- /* Make sure we are in vmalloc area: */
- if (!(address >= VMALLOC_START && address < VMALLOC_END))
- return -1;
-
- /*
- * Synchronize this task's top level page-table
- * with the 'reference' page table.
- *
- * Do _not_ use "current" here. We might be inside
- * an interrupt in the middle of a task switch..
- */
- pgd_paddr = read_cr3();
- pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
- if (!pmd_k)
- return -1;
-
- pte_k = pte_offset_kernel(pmd_k, address);
- if (!pte_present(*pte_k))
- return -1;
-
- return 0;
-#else
- pgd_t *pgd, *pgd_ref;
- pud_t *pud, *pud_ref;
- pmd_t *pmd, *pmd_ref;
- pte_t *pte, *pte_ref;
-
- /* Make sure we are in vmalloc area: */
- if (!(address >= VMALLOC_START && address < VMALLOC_END))
- return -1;
-
- /*
- * Copy kernel mappings over when needed. This can also
- * happen within a race in page table update. In the later
- * case just flush:
- */
- pgd = pgd_offset(current->active_mm, address);
- pgd_ref = pgd_offset_k(address);
- if (pgd_none(*pgd_ref))
- return -1;
-
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-
- /*
- * Below here mismatches are bugs because these lower tables
- * are shared:
- */
-
- pud = pud_offset(pgd, address);
- pud_ref = pud_offset(pgd_ref, address);
- if (pud_none(*pud_ref))
- return -1;
-
- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
- BUG();
-
- pmd = pmd_offset(pud, address);
- pmd_ref = pmd_offset(pud_ref, address);
- if (pmd_none(*pmd_ref))
- return -1;
-
- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
- BUG();
-
- pte_ref = pte_offset_kernel(pmd_ref, address);
- if (!pte_present(*pte_ref))
- return -1;
-
- pte = pte_offset_kernel(pmd, address);
-
- /*
- * Don't use pte_page here, because the mappings can point
- * outside mem_map, and the NUMA hash lookup cannot handle
- * that:
- */
- if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
- BUG();
-
- return 0;
-#endif
-}
-
int show_unhandled_signals = 1;
static inline int
@@ -1115,53 +1169,3 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
up_read(&mm->mmap_sem);
}
-
-DEFINE_SPINLOCK(pgd_lock);
-LIST_HEAD(pgd_list);
-
-void vmalloc_sync_all(void)
-{
- unsigned long address;
-
-#ifdef CONFIG_X86_32
- if (SHARED_KERNEL_PMD)
- return;
-
- for (address = VMALLOC_START & PMD_MASK;
- address >= TASK_SIZE && address < FIXADDR_TOP;
- address += PMD_SIZE) {
-
- unsigned long flags;
- struct page *page;
-
- spin_lock_irqsave(&pgd_lock, flags);
- list_for_each_entry(page, &pgd_list, lru) {
- if (!vmalloc_sync_one(page_address(page), address))
- break;
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
-#else /* CONFIG_X86_64 */
- for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
- address += PGDIR_SIZE) {
-
- const pgd_t *pgd_ref = pgd_offset_k(address);
- unsigned long flags;
- struct page *page;
-
- if (pgd_none(*pgd_ref))
- continue;
-
- spin_lock_irqsave(&pgd_lock, flags);
- list_for_each_entry(page, &pgd_list, lru) {
- pgd_t *pgd;
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
-#endif
-}
commit b18018126f422f5b706fd750373425e10e84b486
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Feb 20 22:42:57 2009 +0100
x86, mm, kprobes: fault.c, simplify notify_page_fault()
Impact: cleanup
Remove an #ifdef from notify_page_fault(). The function still
compiles to nothing in the !CONFIG_KPROBES case.
Introduce kprobes_built_in() and kprobe_fault_handler() helpers
to allow this - they returns 0 if !CONFIG_KPROBES.
No code changed:
text data bss dec hex filename
4618 32 24 4674 1242 fault.o.before
4618 32 24 4674 1242 fault.o.after
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fe99af4b86d9..379beaec6caa 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -68,11 +68,10 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
static inline int notify_page_fault(struct pt_regs *regs)
{
-#ifdef CONFIG_KPROBES
int ret = 0;
/* kprobe_running() needs smp_processor_id() */
- if (!user_mode_vm(regs)) {
+ if (kprobes_built_in() && !user_mode_vm(regs)) {
preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 14))
ret = 1;
@@ -80,9 +79,6 @@ static inline int notify_page_fault(struct pt_regs *regs)
}
return ret;
-#else
- return 0;
-#endif
}
/*
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 32851eef48f0..2ec6cc14a114 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -182,6 +182,14 @@ struct kprobe_blackpoint {
DECLARE_PER_CPU(struct kprobe *, current_kprobe);
DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+/*
+ * For #ifdef avoidance:
+ */
+static inline int kprobes_built_in(void)
+{
+ return 1;
+}
+
#ifdef CONFIG_KRETPROBES
extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs);
@@ -271,8 +279,16 @@ void unregister_kretprobes(struct kretprobe **rps, int num);
void kprobe_flush_task(struct task_struct *tk);
void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
-#else /* CONFIG_KPROBES */
+#else /* !CONFIG_KPROBES: */
+static inline int kprobes_built_in(void)
+{
+ return 0;
+}
+static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ return 0;
+}
static inline struct kprobe *get_kprobe(void *addr)
{
return NULL;
@@ -329,5 +345,5 @@ static inline void unregister_kretprobes(struct kretprobe **rps, int num)
static inline void kprobe_flush_task(struct task_struct *tk)
{
}
-#endif /* CONFIG_KPROBES */
-#endif /* _LINUX_KPROBES_H */
+#endif /* CONFIG_KPROBES */
+#endif /* _LINUX_KPROBES_H */
commit b814d41f0987c7648d7ed07471258101c95c026b
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Feb 20 22:32:10 2009 +0100
x86, mm: fault.c, simplify kmmio_fault()
Impact: cleanup
Remove an #ifdef from kmmio_fault() - we can do this by
providing default implementations for is_kmmio_active()
and kmmio_handler(). The compiler optimizes it all away
in the !CONFIG_MMIOTRACE case.
Also, while at it, clean up mmiotrace.h a bit:
- standard header guards
- standard vertical spaces for structure definitions
No code changed (both with mmiotrace on and off in the config):
text data bss dec hex filename
2947 12 12 2971 b9b fault.o.before
2947 12 12 2971 b9b fault.o.after
Cc: Pekka Paalanen <pq@iki.fi>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 3e3661462739..fe99af4b86d9 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -55,13 +55,14 @@ enum x86_pf_error_code {
PF_INSTR = 1 << 4,
};
+/*
+ * (returns 0 if mmiotrace is disabled)
+ */
static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
-#ifdef CONFIG_MMIOTRACE
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
return -1;
-#endif
return 0;
}
diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h
index 139d7c88d9c9..3d1b7bde1283 100644
--- a/include/linux/mmiotrace.h
+++ b/include/linux/mmiotrace.h
@@ -1,5 +1,5 @@
-#ifndef MMIOTRACE_H
-#define MMIOTRACE_H
+#ifndef _LINUX_MMIOTRACE_H
+#define _LINUX_MMIOTRACE_H
#include <linux/types.h>
#include <linux/list.h>
@@ -13,28 +13,34 @@ typedef void (*kmmio_post_handler_t)(struct kmmio_probe *,
unsigned long condition, struct pt_regs *);
struct kmmio_probe {
- struct list_head list; /* kmmio internal list */
- unsigned long addr; /* start location of the probe point */
- unsigned long len; /* length of the probe region */
- kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */
- kmmio_post_handler_t post_handler; /* Called after addr is executed */
- void *private;
+ /* kmmio internal list: */
+ struct list_head list;
+ /* start location of the probe point: */
+ unsigned long addr;
+ /* length of the probe region: */
+ unsigned long len;
+ /* Called before addr is executed: */
+ kmmio_pre_handler_t pre_handler;
+ /* Called after addr is executed: */
+ kmmio_post_handler_t post_handler;
+ void *private;
};
+extern unsigned int kmmio_count;
+
+extern int register_kmmio_probe(struct kmmio_probe *p);
+extern void unregister_kmmio_probe(struct kmmio_probe *p);
+
+#ifdef CONFIG_MMIOTRACE
/* kmmio is active by some kmmio_probes? */
static inline int is_kmmio_active(void)
{
- extern unsigned int kmmio_count;
return kmmio_count;
}
-extern int register_kmmio_probe(struct kmmio_probe *p);
-extern void unregister_kmmio_probe(struct kmmio_probe *p);
-
/* Called from page fault handler. */
extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
-#ifdef CONFIG_MMIOTRACE
/* Called from ioremap.c */
extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
void __iomem *addr);
@@ -43,7 +49,17 @@ extern void mmiotrace_iounmap(volatile void __iomem *addr);
/* For anyone to insert markers. Remember trailing newline. */
extern int mmiotrace_printk(const char *fmt, ...)
__attribute__ ((format (printf, 1, 2)));
-#else
+#else /* !CONFIG_MMIOTRACE: */
+static inline int is_kmmio_active(void)
+{
+ return 0;
+}
+
+static inline int kmmio_handler(struct pt_regs *regs, unsigned long addr)
+{
+ return 0;
+}
+
static inline void mmiotrace_ioremap(resource_size_t offset,
unsigned long size, void __iomem *addr)
{
@@ -63,28 +79,28 @@ static inline int mmiotrace_printk(const char *fmt, ...)
#endif /* CONFIG_MMIOTRACE */
enum mm_io_opcode {
- MMIO_READ = 0x1, /* struct mmiotrace_rw */
- MMIO_WRITE = 0x2, /* struct mmiotrace_rw */
- MMIO_PROBE = 0x3, /* struct mmiotrace_map */
- MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */
- MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */
+ MMIO_READ = 0x1, /* struct mmiotrace_rw */
+ MMIO_WRITE = 0x2, /* struct mmiotrace_rw */
+ MMIO_PROBE = 0x3, /* struct mmiotrace_map */
+ MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */
+ MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */
};
struct mmiotrace_rw {
- resource_size_t phys; /* PCI address of register */
- unsigned long value;
- unsigned long pc; /* optional program counter */
- int map_id;
- unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */
- unsigned char width; /* size of register access in bytes */
+ resource_size_t phys; /* PCI address of register */
+ unsigned long value;
+ unsigned long pc; /* optional program counter */
+ int map_id;
+ unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */
+ unsigned char width; /* size of register access in bytes */
};
struct mmiotrace_map {
- resource_size_t phys; /* base address in PCI space */
- unsigned long virt; /* base virtual address */
- unsigned long len; /* mapping size */
- int map_id;
- unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */
+ resource_size_t phys; /* base address in PCI space */
+ unsigned long virt; /* base virtual address */
+ unsigned long len; /* mapping size */
+ int map_id;
+ unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */
};
/* in kernel/trace/trace_mmiotrace.c */
@@ -94,4 +110,4 @@ extern void mmio_trace_rw(struct mmiotrace_rw *rw);
extern void mmio_trace_mapping(struct mmiotrace_map *map);
extern int mmio_trace_printk(const char *fmt, va_list args);
-#endif /* MMIOTRACE_H */
+#endif /* _LINUX_MMIOTRACE_H */
commit 121d5d0a7e5808fbcfda484efd7ba840ac93450f
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Feb 20 22:18:08 2009 +0100
x86, mm: fault.c, enable PF_RSVD checks on 32-bit too
Impact: improve page fault handling robustness
The 'PF_RSVD' flag (bit 3) of the page-fault error_code is a
relatively recent addition to x86 CPUs, so the 32-bit do_fault()
implementation never had it. This flag gets set when the CPU
detects nonzero values in any reserved bits of the page directory
entries.
Extend the existing 64-bit check for PF_RSVD in do_page_fault()
to 32-bit too. If we detect such a fault then we print a more
informative oops and the pagetables.
This unifies the code some more, removes an ugly #ifdef and improves
the 32-bit page fault code robustness a bit. It slightly increases
the 32-bit kernel text size.
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7dc0615c3cfe..3e3661462739 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -477,7 +477,6 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
dump_pagetable(address);
}
-#ifdef CONFIG_X86_64
static noinline void
pgtable_bad(struct pt_regs *regs, unsigned long error_code,
unsigned long address)
@@ -503,7 +502,6 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code,
oops_end(flags, regs, sig);
}
-#endif
static noinline void
no_context(struct pt_regs *regs, unsigned long error_code,
@@ -1015,10 +1013,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
local_irq_enable();
}
-#ifdef CONFIG_X86_64
if (unlikely(error_code & PF_RSVD))
pgtable_bad(regs, error_code, address);
-#endif
/*
* If we're in an interrupt, have no user context or are running
commit 8c938f9fae887f6e180bf802aa1c33cf74712aff
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Feb 20 22:12:18 2009 +0100
x86, mm: fault.c, factor out the vm86 fault check
Impact: cleanup
Instead of an ugly, open-coded, #ifdef-ed vm86 related legacy check
in do_page_fault(), put it into the check_v8086_mode() helper
function and merge it with an existing #ifdef.
Also, simplify the code flow a tiny bit in the helper.
No code changed:
arch/x86/mm/fault.o:
text data bss dec hex filename
2711 12 12 2735 aaf fault.o.before
2711 12 12 2735 aaf fault.o.after
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 72973c7682ba..7dc0615c3cfe 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -328,14 +328,41 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
return pmd_k;
}
-#endif
-#ifdef CONFIG_X86_64
+/*
+ * Did it hit the DOS screen memory VA from vm86 mode?
+ */
+static inline void
+check_v8086_mode(struct pt_regs *regs, unsigned long address,
+ struct task_struct *tsk)
+{
+ unsigned long bit;
+
+ if (!v8086_mode(regs))
+ return;
+
+ bit = (address - 0xA0000) >> PAGE_SHIFT;
+ if (bit < 32)
+ tsk->thread.screen_bitmap |= 1 << bit;
+}
+
+#else /* CONFIG_X86_64: */
+
static const char errata93_warning[] =
KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
KERN_ERR "******* Please consider a BIOS update.\n"
KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+
+/*
+ * No vm86 mode in 64-bit mode:
+ */
+static inline void
+check_v8086_mode(struct pt_regs *regs, unsigned long address,
+ struct task_struct *tsk)
+{
+}
+
#endif
/*
@@ -1091,16 +1118,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
else
tsk->min_flt++;
-#ifdef CONFIG_X86_32
- /*
- * Did it hit the DOS screen memory VA from vm86 mode?
- */
- if (v8086_mode(regs)) {
- unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
- if (bit < 32)
- tsk->thread.screen_bitmap |= 1 << bit;
- }
-#endif
+ check_v8086_mode(regs, address, tsk);
+
up_read(&mm->mmap_sem);
}