Patches contributed by Eötvös Lorand University
commit 55ce29ba16f82a31424a98988cf37c3babe1b7c8
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:58 2008 +0100
x86: cpa self-test, WARN_ON()
add a WARN_ON() to the cpa-self-test failure branch.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index a12dabbd5c33..0dce0e248a42 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -224,10 +224,12 @@ static __init int exercise_pageattr(void)
failed += print_split(&sc);
- if (failed)
+ if (failed) {
printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n");
- else
+ WARN_ON(1);
+ } else {
printk(KERN_INFO "CPA selftests PASSED\n");
+ }
return 0;
}
commit 12d6f21eacc21d84a809829543f2fe45c7e37319
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:58 2008 +0100
x86: do not PSE on CONFIG_DEBUG_PAGEALLOC=y
get more testing of the c_p_a() code done by not turning off
PSE on DEBUG_PAGEALLOC.
this simplifies the early pagetable setup code, and tests
the largepage-splitup code quite heavily.
In the end, all the largepages will be split up pretty quickly,
so there's no difference to how DEBUG_PAGEALLOC worked before.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bba850b05d0e..db28aa9e2f69 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -641,13 +641,6 @@ void __init early_cpu_init(void)
nexgen_init_cpu();
umc_init_cpu();
early_cpu_detect();
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
- /* pse is not compatible with on-the-fly unmapping,
- * disable it even if the cpus claim to support it.
- */
- setup_clear_cpu_cap(X86_FEATURE_PSE);
-#endif
}
/* Make sure %fs is initialized properly in idle threads */
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 9cf2fea54eb5..dd49b16b3a0e 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -61,13 +61,17 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
static int split_large_page(pte_t *kpte, unsigned long address)
{
pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+ gfp_t gfp_flags = GFP_KERNEL;
unsigned long flags;
unsigned long addr;
pte_t *pbase, *tmp;
struct page *base;
int i, level;
- base = alloc_pages(GFP_KERNEL, 0);
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ gfp_flags = GFP_ATOMIC;
+#endif
+ base = alloc_pages(gfp_flags, 0);
if (!base)
return -ENOMEM;
@@ -218,6 +222,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
numpages * PAGE_SIZE);
}
+ /*
+ * If page allocator is not up yet then do not call c_p_a():
+ */
+ if (!debug_pagealloc_enabled)
+ return;
+
/*
* the return value is ignored - the calls cannot fail,
* large pages are disabled at boot time.
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index 9411a2d3f19c..fccb563e2305 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -29,11 +29,6 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot);
int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot);
void clflush_cache_range(void *addr, int size);
-#ifdef CONFIG_DEBUG_PAGEALLOC
-/* internal debugging function */
-void kernel_map_pages(struct page *page, int numpages, int enable);
-#endif
-
#ifdef CONFIG_DEBUG_RODATA
void mark_rodata_ro(void);
#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3c22d971afa7..1bba6789a50a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1118,9 +1118,21 @@ static inline void vm_stat_account(struct mm_struct *mm,
}
#endif /* CONFIG_PROC_FS */
-#ifndef CONFIG_DEBUG_PAGEALLOC
+#ifdef CONFIG_DEBUG_PAGEALLOC
+extern int debug_pagealloc_enabled;
+
+extern void kernel_map_pages(struct page *page, int numpages, int enable);
+
+static inline void enable_debug_pagealloc(void)
+{
+ debug_pagealloc_enabled = 1;
+}
+#else
static inline void
kernel_map_pages(struct page *page, int numpages, int enable) {}
+static inline void enable_debug_pagealloc(void)
+{
+}
#endif
extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk);
diff --git a/init/main.c b/init/main.c
index 3316dffe3e57..cb81ed116f62 100644
--- a/init/main.c
+++ b/init/main.c
@@ -318,6 +318,10 @@ static int __init unknown_bootoption(char *param, char *val)
return 0;
}
+#ifdef CONFIG_DEBUG_PAGEALLOC
+int __read_mostly debug_pagealloc_enabled = 0;
+#endif
+
static int __init init_setup(char *str)
{
unsigned int i;
@@ -552,6 +556,7 @@ asmlinkage void __init start_kernel(void)
preempt_disable();
build_all_zonelists();
page_alloc_init();
+ enable_debug_pagealloc();
printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
parse_early_param();
parse_args("Booting kernel", static_command_line, __start___param,
commit 9a3dc7804e9856668caef41efc54179e61ffccc0
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:57 2008 +0100
x86: cpa: simplify locking
further simplify cpa locking: since the largepage-split is a
slowpath, use the pgd_lock for the whole operation, intead
of the mmap_sem.
This also makes it suitable for DEBUG_PAGEALLOC purposes again.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 0966023dfd70..9cf2fea54eb5 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -37,9 +37,8 @@ pte_t *lookup_address(unsigned long address, int *level)
return pte_offset_kernel(pmd, address);
}
-static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
+static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
{
- unsigned long flags;
struct page *page;
/* change init_mm */
@@ -47,7 +46,6 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
if (SHARED_KERNEL_PMD)
return;
- spin_lock_irqsave(&pgd_lock, flags);
for (page = pgd_list; page; page = (struct page *)page->index) {
pgd_t *pgd;
pud_t *pud;
@@ -58,12 +56,12 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
pmd = pmd_offset(pud, address);
set_pte_atomic((pte_t *)pmd, pte);
}
- spin_unlock_irqrestore(&pgd_lock, flags);
}
static int split_large_page(pte_t *kpte, unsigned long address)
{
pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+ unsigned long flags;
unsigned long addr;
pte_t *pbase, *tmp;
struct page *base;
@@ -73,7 +71,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
if (!base)
return -ENOMEM;
- down_write(&init_mm.mmap_sem);
+ spin_lock_irqsave(&pgd_lock, flags);
/*
* Check for races, another CPU might have split this page
* up for us already:
@@ -95,11 +93,11 @@ static int split_large_page(pte_t *kpte, unsigned long address)
/*
* Install the new, split up pagetable:
*/
- set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
+ __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
base = NULL;
out_unlock:
- up_write(&init_mm.mmap_sem);
+ spin_unlock_irqrestore(&pgd_lock, flags);
if (base)
__free_pages(base, 0);
commit 7afe15b9d888050435cd154906828df88d4e667d
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:57 2008 +0100
x86: simplify cpa largepage split, #3
simplify cpa largepage split: push the reference protection bits
into the largepage-splitting function.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index ad0868bfa374..0966023dfd70 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -61,13 +61,13 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
spin_unlock_irqrestore(&pgd_lock, flags);
}
-static int
-split_large_page(pte_t *kpte, unsigned long address, pgprot_t ref_prot)
+static int split_large_page(pte_t *kpte, unsigned long address)
{
- int i, level;
+ pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
unsigned long addr;
pte_t *pbase, *tmp;
struct page *base;
+ int i, level;
base = alloc_pages(GFP_KERNEL, 0);
if (!base)
@@ -109,11 +109,9 @@ split_large_page(pte_t *kpte, unsigned long address, pgprot_t ref_prot)
static int __change_page_attr(struct page *page, pgprot_t prot)
{
- pgprot_t ref_prot = PAGE_KERNEL;
struct page *kpte_page;
unsigned long address;
int level, err = 0;
- pgprot_t oldprot;
pte_t *kpte;
BUG_ON(PageHighMem(page));
@@ -124,7 +122,6 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
if (!kpte)
return -EINVAL;
- oldprot = pte_pgprot(*kpte);
kpte_page = virt_to_page(kpte);
BUG_ON(PageLRU(kpte_page));
BUG_ON(PageCompound(kpte_page));
@@ -137,16 +134,10 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
address < (unsigned long)&_etext &&
(pgprot_val(prot) & _PAGE_NX));
- if ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
- ref_prot = PAGE_KERNEL_EXEC;
-
- ref_prot = canon_pgprot(ref_prot);
- prot = canon_pgprot(prot);
-
if (level == 3) {
- set_pte_atomic(kpte, mk_pte(page, prot));
+ set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
} else {
- err = split_large_page(kpte, address, ref_prot);
+ err = split_large_page(kpte, address);
if (!err)
goto repeat;
}
commit 5508a7489659f1eed108d3ae7c2d36c8794ee330
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:56 2008 +0100
x86: cpa self-test fixes
cpa self-test fixes. change_page_attr_addr() was buggy, it
passed in a virtual address as a physical one.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 91e05a26004d..a12dabbd5c33 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -106,12 +106,6 @@ static __init int print_split(struct split_state *s)
return err;
}
-static __init int state_same(struct split_state *a, struct split_state *b)
-{
- return a->lpg == b->lpg && a->gpg == b->gpg && a->spg == b->spg &&
- a->exec == b->exec;
-}
-
static unsigned long __initdata addr[NTEST];
static unsigned int __initdata len[NTEST];
@@ -229,8 +223,6 @@ static __init int exercise_pageattr(void)
global_flush_tlb();
failed += print_split(&sc);
- if (!state_same(&sa, &sc))
- failed++;
if (failed)
printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n");
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 14c923b3b07f..ad0868bfa374 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -79,8 +79,10 @@ split_large_page(pte_t *kpte, unsigned long address, pgprot_t ref_prot)
* up for us already:
*/
tmp = lookup_address(address, &level);
- if (tmp != kpte)
+ if (tmp != kpte) {
+ WARN_ON_ONCE(1);
goto out_unlock;
+ }
address = __pa(address);
addr = address & LARGE_PAGE_MASK;
@@ -181,17 +183,19 @@ EXPORT_SYMBOL(change_page_attr);
int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot)
{
int i;
- unsigned long pfn = (addr >> PAGE_SHIFT);
+ unsigned long pfn = (__pa(addr) >> PAGE_SHIFT);
for (i = 0; i < numpages; i++) {
if (!pfn_valid(pfn + i)) {
+ WARN_ON_ONCE(1);
break;
} else {
int level;
pte_t *pte = lookup_address(addr + i*PAGE_SIZE, &level);
- BUG_ON(pte && !pte_none(*pte));
+ BUG_ON(pte && pte_none(*pte));
}
}
+
return change_page_attr(virt_to_page(addr), i, prot);
}
commit bb5c2dbd57d93a36b0386dd783dd95e0cbaaa23f
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:56 2008 +0100
x86: further cpa largepage-split cleanups
further cpa largepage-split cleanups: make the splitup isolated
functionality, without leaking details back into __change_page_attr().
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 1011b21f8db0..14c923b3b07f 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -37,33 +37,6 @@ pte_t *lookup_address(unsigned long address, int *level)
return pte_offset_kernel(pmd, address);
}
-static struct page *
-split_large_page(unsigned long address, pgprot_t ref_prot)
-{
- unsigned long addr;
- struct page *base;
- pte_t *pbase;
- int i;
-
- base = alloc_pages(GFP_KERNEL, 0);
- if (!base)
- return NULL;
-
- /*
- * page_private is used to track the number of entries in
- * the page table page that have non standard attributes.
- */
- address = __pa(address);
- addr = address & LARGE_PAGE_MASK;
- pbase = (pte_t *)page_address(base);
- paravirt_alloc_pt(&init_mm, page_to_pfn(base));
-
- for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
- set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
-
- return base;
-}
-
static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
{
unsigned long flags;
@@ -88,14 +61,58 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
spin_unlock_irqrestore(&pgd_lock, flags);
}
+static int
+split_large_page(pte_t *kpte, unsigned long address, pgprot_t ref_prot)
+{
+ int i, level;
+ unsigned long addr;
+ pte_t *pbase, *tmp;
+ struct page *base;
+
+ base = alloc_pages(GFP_KERNEL, 0);
+ if (!base)
+ return -ENOMEM;
+
+ down_write(&init_mm.mmap_sem);
+ /*
+ * Check for races, another CPU might have split this page
+ * up for us already:
+ */
+ tmp = lookup_address(address, &level);
+ if (tmp != kpte)
+ goto out_unlock;
+
+ address = __pa(address);
+ addr = address & LARGE_PAGE_MASK;
+ pbase = (pte_t *)page_address(base);
+ paravirt_alloc_pt(&init_mm, page_to_pfn(base));
+
+ for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
+ set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
+
+ /*
+ * Install the new, split up pagetable:
+ */
+ set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
+ base = NULL;
+
+out_unlock:
+ up_write(&init_mm.mmap_sem);
+
+ if (base)
+ __free_pages(base, 0);
+
+ return 0;
+}
+
static int __change_page_attr(struct page *page, pgprot_t prot)
{
pgprot_t ref_prot = PAGE_KERNEL;
struct page *kpte_page;
unsigned long address;
+ int level, err = 0;
pgprot_t oldprot;
pte_t *kpte;
- int level;
BUG_ON(PageHighMem(page));
address = (unsigned long)page_address(page);
@@ -127,19 +144,11 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
if (level == 3) {
set_pte_atomic(kpte, mk_pte(page, prot));
} else {
- struct page *split;
-
- split = split_large_page(address, ref_prot);
- if (!split)
- return -ENOMEM;
-
- /*
- * There's a small window here to waste a bit of RAM:
- */
- set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
- goto repeat;
+ err = split_large_page(kpte, address, ref_prot);
+ if (!err)
+ goto repeat;
}
- return 0;
+ return err;
}
/*
commit 97f99fedf27f337e2d3d95ca01e321beb26edc3d
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:55 2008 +0100
x86: simplify 32-bit cpa largepage splitting
simplify 32-bit cpa largepage splitting: do a pure split and repeat
the pte lookup to get the new pte modified.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 570a37bf1401..1011b21f8db0 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -38,7 +38,7 @@ pte_t *lookup_address(unsigned long address, int *level)
}
static struct page *
-split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
+split_large_page(unsigned long address, pgprot_t ref_prot)
{
unsigned long addr;
struct page *base;
@@ -58,10 +58,9 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
pbase = (pte_t *)page_address(base);
paravirt_alloc_pt(&init_mm, page_to_pfn(base));
- for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
- set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
- addr == address ? prot : ref_prot));
- }
+ for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
+ set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
+
return base;
}
@@ -101,6 +100,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
BUG_ON(PageHighMem(page));
address = (unsigned long)page_address(page);
+repeat:
kpte = lookup_address(address, &level);
if (!kpte)
return -EINVAL;
@@ -128,7 +128,8 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
set_pte_atomic(kpte, mk_pte(page, prot));
} else {
struct page *split;
- split = split_large_page(address, prot, ref_prot);
+
+ split = split_large_page(address, ref_prot);
if (!split)
return -ENOMEM;
@@ -136,6 +137,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
* There's a small window here to waste a bit of RAM:
*/
set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
+ goto repeat;
}
return 0;
}
commit 78c94abaea55df7003f3ad0e5b6c78ee1cc860bb
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:55 2008 +0100
x86: simplify the 32-bit cpa code
simplify the 32-bit cpa code.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 66688a630839..570a37bf1401 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -15,9 +15,6 @@
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
-static DEFINE_SPINLOCK(cpa_lock);
-static struct list_head df_list = LIST_HEAD_INIT(df_list);
-
pte_t *lookup_address(unsigned long address, int *level)
{
pgd_t *pgd = pgd_offset_k(address);
@@ -48,9 +45,7 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
pte_t *pbase;
int i;
- spin_unlock_irq(&cpa_lock);
base = alloc_pages(GFP_KERNEL, 0);
- spin_lock_irq(&cpa_lock);
if (!base)
return NULL;
@@ -58,9 +53,6 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
* page_private is used to track the number of entries in
* the page table page that have non standard attributes.
*/
- SetPagePrivate(base);
- page_private(base) = 0;
-
address = __pa(address);
addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
@@ -73,36 +65,6 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
return base;
}
-static void cache_flush_page(struct page *p)
-{
- void *addr = page_address(p);
- int i;
-
- for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
- clflush(addr + i);
-}
-
-static void flush_kernel_map(void *arg)
-{
- struct list_head *lh = (struct list_head *)arg;
- struct page *p;
-
- /*
- * Flush all to work around Errata in early athlons regarding
- * large page flushing.
- */
- __flush_tlb_all();
-
- /* High level code is not ready for clflush yet */
- if (0 && cpu_has_clflush) {
- list_for_each_entry(p, lh, lru)
- cache_flush_page(p);
- } else {
- if (boot_cpu_data.x86_model >= 4)
- wbinvd();
- }
-}
-
static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
{
unsigned long flags;
@@ -127,36 +89,12 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
spin_unlock_irqrestore(&pgd_lock, flags);
}
-/*
- * No more special protections in this 2/4MB area - revert to a large
- * page again.
- */
-static inline void revert_page(struct page *kpte_page, unsigned long address)
-{
- pgprot_t ref_prot;
- pte_t *linear;
-
- ref_prot =
- ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
- ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE;
-
- linear = (pte_t *)
- pmd_offset(pud_offset(pgd_offset_k(address), address), address);
- set_pmd_pte(linear, address,
- pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
- ref_prot));
-}
-
-static inline void save_page(struct page *kpte_page)
-{
- if (!test_and_set_bit(PG_arch_1, &kpte_page->flags))
- list_add(&kpte_page->lru, &df_list);
-}
-
static int __change_page_attr(struct page *page, pgprot_t prot)
{
+ pgprot_t ref_prot = PAGE_KERNEL;
struct page *kpte_page;
unsigned long address;
+ pgprot_t oldprot;
pte_t *kpte;
int level;
@@ -167,58 +105,41 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
if (!kpte)
return -EINVAL;
+ oldprot = pte_pgprot(*kpte);
kpte_page = virt_to_page(kpte);
BUG_ON(PageLRU(kpte_page));
BUG_ON(PageCompound(kpte_page));
- if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
- if (level == 3) {
- set_pte_atomic(kpte, mk_pte(page, prot));
- } else {
- struct page *split;
- pgprot_t ref_prot;
-
- ref_prot =
- ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
- ? PAGE_KERNEL_EXEC : PAGE_KERNEL;
- split = split_large_page(address, prot, ref_prot);
- if (!split)
- return -ENOMEM;
-
- set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
- kpte_page = split;
- }
- page_private(kpte_page)++;
- } else {
- if (level == 3) {
- set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
- BUG_ON(page_private(kpte_page) == 0);
- page_private(kpte_page)--;
- } else
- BUG();
- }
-
/*
- * If the pte was reserved, it means it was created at boot
- * time (not via split_large_page) and in turn we must not
- * replace it with a largepage.
+ * Better fail early if someone sets the kernel text to NX.
+ * Does not cover __inittext
*/
+ BUG_ON(address >= (unsigned long)&_text &&
+ address < (unsigned long)&_etext &&
+ (pgprot_val(prot) & _PAGE_NX));
- save_page(kpte_page);
- if (!PageReserved(kpte_page)) {
- if (cpu_has_pse && (page_private(kpte_page) == 0)) {
- paravirt_release_pt(page_to_pfn(kpte_page));
- revert_page(kpte_page, address);
- }
+ if ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
+ ref_prot = PAGE_KERNEL_EXEC;
+
+ ref_prot = canon_pgprot(ref_prot);
+ prot = canon_pgprot(prot);
+
+ if (level == 3) {
+ set_pte_atomic(kpte, mk_pte(page, prot));
+ } else {
+ struct page *split;
+ split = split_large_page(address, prot, ref_prot);
+ if (!split)
+ return -ENOMEM;
+
+ /*
+ * There's a small window here to waste a bit of RAM:
+ */
+ set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
}
return 0;
}
-static inline void flush_map(struct list_head *l)
-{
- on_each_cpu(flush_kernel_map, l, 1, 1);
-}
-
/*
* Change the page attributes of an page in the linear mapping.
*
@@ -234,40 +155,52 @@ static inline void flush_map(struct list_head *l)
*/
int change_page_attr(struct page *page, int numpages, pgprot_t prot)
{
- unsigned long flags;
int err = 0, i;
- spin_lock_irqsave(&cpa_lock, flags);
for (i = 0; i < numpages; i++, page++) {
err = __change_page_attr(page, prot);
if (err)
break;
}
- spin_unlock_irqrestore(&cpa_lock, flags);
return err;
}
EXPORT_SYMBOL(change_page_attr);
-void global_flush_tlb(void)
+int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot)
{
- struct page *pg, *next;
- struct list_head l;
+ int i;
+ unsigned long pfn = (addr >> PAGE_SHIFT);
+ for (i = 0; i < numpages; i++) {
+ if (!pfn_valid(pfn + i)) {
+ break;
+ } else {
+ int level;
+ pte_t *pte = lookup_address(addr + i*PAGE_SIZE, &level);
+ BUG_ON(pte && !pte_none(*pte));
+ }
+ }
+ return change_page_attr(virt_to_page(addr), i, prot);
+}
+
+static void flush_kernel_map(void *arg)
+{
+ /*
+ * Flush all to work around Errata in early athlons regarding
+ * large page flushing.
+ */
+ __flush_tlb_all();
+
+ if (boot_cpu_data.x86_model >= 4)
+ wbinvd();
+}
+
+void global_flush_tlb(void)
+{
BUG_ON(irqs_disabled());
- spin_lock_irq(&cpa_lock);
- list_replace_init(&df_list, &l);
- spin_unlock_irq(&cpa_lock);
- flush_map(&l);
- list_for_each_entry_safe(pg, next, &l, lru) {
- list_del(&pg->lru);
- clear_bit(PG_arch_1, &pg->flags);
- if (PageReserved(pg) || !cpu_has_pse || page_private(pg) != 0)
- continue;
- ClearPagePrivate(pg);
- __free_page(pg);
- }
+ on_each_cpu(flush_kernel_map, NULL, 1, 1);
}
EXPORT_SYMBOL(global_flush_tlb);
commit cd58289667293593b04fd315ec7f2f37589134cb
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:54 2008 +0100
x86: fix more non-global TLB flushes
fix more __flush_tlb() instances, out of caution.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 58438bafedca..a317336cdeaa 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -27,7 +27,7 @@ static void __init zap_identity_mappings(void)
{
pgd_t *pgd = pgd_offset_k(0UL);
pgd_clear(pgd);
- __flush_tlb();
+ __flush_tlb_all();
}
/* Don't add a printk in there. printk relies on the PDA which is not initialized
commit 0e3a95492989e452a33e5df9b51365da574b854d
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:49 2008 +0100
x86: early_ioremap_init(), enhance warnings
enhance the debug warning in early_ioremap_init().
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index b743de841f68..f8e6c4709cc2 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -243,7 +243,22 @@ void __init early_ioremap_init(void)
pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
*pgd = __pa(bm_pte) | _PAGE_TABLE;
memset(bm_pte, 0, sizeof(bm_pte));
- BUG_ON(pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+ /*
+ * The boot-ioremap range spans multiple pgds, for which
+ * we are not prepared:
+ */
+ if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
+ WARN_ON(1);
+ printk("pgd %p != %p\n",
+ pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+ printk("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+ fix_to_virt(FIX_BTMAP_BEGIN));
+ printk("fix_to_virt(FIX_BTMAP_END): %08lx\n",
+ fix_to_virt(FIX_BTMAP_END));
+
+ printk("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
+ printk("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
+ }
}
void __init early_ioremap_clear(void)