Patches contributed by Eötvös Lorand University


commit 55ce29ba16f82a31424a98988cf37c3babe1b7c8
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jan 30 13:33:58 2008 +0100

    x86: cpa self-test, WARN_ON()
    
    add a WARN_ON() to the cpa-self-test failure branch.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index a12dabbd5c33..0dce0e248a42 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -224,10 +224,12 @@ static __init int exercise_pageattr(void)
 
 	failed += print_split(&sc);
 
-	if (failed)
+	if (failed) {
 		printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n");
-	else
+		WARN_ON(1);
+	} else {
 		printk(KERN_INFO "CPA selftests PASSED\n");
+	}
 
 	return 0;
 }

commit 12d6f21eacc21d84a809829543f2fe45c7e37319
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jan 30 13:33:58 2008 +0100

    x86: do not PSE on CONFIG_DEBUG_PAGEALLOC=y
    
    get more testing of the c_p_a() code done by not turning off
    PSE on DEBUG_PAGEALLOC.
    
    this simplifies the early pagetable setup code, and tests
    the largepage-splitup code quite heavily.
    
    In the end, all the largepages will be split up pretty quickly,
    so there's no difference to how DEBUG_PAGEALLOC worked before.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bba850b05d0e..db28aa9e2f69 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -641,13 +641,6 @@ void __init early_cpu_init(void)
 	nexgen_init_cpu();
 	umc_init_cpu();
 	early_cpu_detect();
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
-	/* pse is not compatible with on-the-fly unmapping,
-	 * disable it even if the cpus claim to support it.
-	 */
-	setup_clear_cpu_cap(X86_FEATURE_PSE);
-#endif
 }
 
 /* Make sure %fs is initialized properly in idle threads */
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 9cf2fea54eb5..dd49b16b3a0e 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -61,13 +61,17 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 static int split_large_page(pte_t *kpte, unsigned long address)
 {
 	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+	gfp_t gfp_flags = GFP_KERNEL;
 	unsigned long flags;
 	unsigned long addr;
 	pte_t *pbase, *tmp;
 	struct page *base;
 	int i, level;
 
-	base = alloc_pages(GFP_KERNEL, 0);
+#ifdef CONFIG_DEBUG_PAGEALLOC
+	gfp_flags = GFP_ATOMIC;
+#endif
+	base = alloc_pages(gfp_flags, 0);
 	if (!base)
 		return -ENOMEM;
 
@@ -218,6 +222,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 					   numpages * PAGE_SIZE);
 	}
 
+	/*
+	 * If page allocator is not up yet then do not call c_p_a():
+	 */
+	if (!debug_pagealloc_enabled)
+		return;
+
 	/*
 	 * the return value is ignored - the calls cannot fail,
 	 * large pages are disabled at boot time.
diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h
index 9411a2d3f19c..fccb563e2305 100644
--- a/include/asm-x86/cacheflush.h
+++ b/include/asm-x86/cacheflush.h
@@ -29,11 +29,6 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot);
 int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot);
 void clflush_cache_range(void *addr, int size);
 
-#ifdef CONFIG_DEBUG_PAGEALLOC
-/* internal debugging function */
-void kernel_map_pages(struct page *page, int numpages, int enable);
-#endif
-
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
 #endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3c22d971afa7..1bba6789a50a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1118,9 +1118,21 @@ static inline void vm_stat_account(struct mm_struct *mm,
 }
 #endif /* CONFIG_PROC_FS */
 
-#ifndef CONFIG_DEBUG_PAGEALLOC
+#ifdef CONFIG_DEBUG_PAGEALLOC
+extern int debug_pagealloc_enabled;
+
+extern void kernel_map_pages(struct page *page, int numpages, int enable);
+
+static inline void enable_debug_pagealloc(void)
+{
+	debug_pagealloc_enabled = 1;
+}
+#else
 static inline void
 kernel_map_pages(struct page *page, int numpages, int enable) {}
+static inline void enable_debug_pagealloc(void)
+{
+}
 #endif
 
 extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk);
diff --git a/init/main.c b/init/main.c
index 3316dffe3e57..cb81ed116f62 100644
--- a/init/main.c
+++ b/init/main.c
@@ -318,6 +318,10 @@ static int __init unknown_bootoption(char *param, char *val)
 	return 0;
 }
 
+#ifdef CONFIG_DEBUG_PAGEALLOC
+int __read_mostly debug_pagealloc_enabled = 0;
+#endif
+
 static int __init init_setup(char *str)
 {
 	unsigned int i;
@@ -552,6 +556,7 @@ asmlinkage void __init start_kernel(void)
 	preempt_disable();
 	build_all_zonelists();
 	page_alloc_init();
+	enable_debug_pagealloc();
 	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
 	parse_early_param();
 	parse_args("Booting kernel", static_command_line, __start___param,

commit 9a3dc7804e9856668caef41efc54179e61ffccc0
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jan 30 13:33:57 2008 +0100

    x86: cpa: simplify locking
    
    further simplify cpa locking: since the largepage-split is a
    slowpath, use the pgd_lock for the whole operation, intead
    of the mmap_sem.
    
    This also makes it suitable for DEBUG_PAGEALLOC purposes again.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 0966023dfd70..9cf2fea54eb5 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -37,9 +37,8 @@ pte_t *lookup_address(unsigned long address, int *level)
 	return pte_offset_kernel(pmd, address);
 }
 
-static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
+static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 {
-	unsigned long flags;
 	struct page *page;
 
 	/* change init_mm */
@@ -47,7 +46,6 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 	if (SHARED_KERNEL_PMD)
 		return;
 
-	spin_lock_irqsave(&pgd_lock, flags);
 	for (page = pgd_list; page; page = (struct page *)page->index) {
 		pgd_t *pgd;
 		pud_t *pud;
@@ -58,12 +56,12 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 		pmd = pmd_offset(pud, address);
 		set_pte_atomic((pte_t *)pmd, pte);
 	}
-	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
 static int split_large_page(pte_t *kpte, unsigned long address)
 {
 	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+	unsigned long flags;
 	unsigned long addr;
 	pte_t *pbase, *tmp;
 	struct page *base;
@@ -73,7 +71,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	if (!base)
 		return -ENOMEM;
 
-	down_write(&init_mm.mmap_sem);
+	spin_lock_irqsave(&pgd_lock, flags);
 	/*
 	 * Check for races, another CPU might have split this page
 	 * up for us already:
@@ -95,11 +93,11 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 	/*
 	 * Install the new, split up pagetable:
 	 */
-	set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
+	__set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
 	base = NULL;
 
 out_unlock:
-	up_write(&init_mm.mmap_sem);
+	spin_unlock_irqrestore(&pgd_lock, flags);
 
 	if (base)
 		__free_pages(base, 0);

commit 7afe15b9d888050435cd154906828df88d4e667d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jan 30 13:33:57 2008 +0100

    x86: simplify cpa largepage split, #3
    
    simplify cpa largepage split: push the reference protection bits
    into the largepage-splitting function.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index ad0868bfa374..0966023dfd70 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -61,13 +61,13 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
-static int
-split_large_page(pte_t *kpte, unsigned long address, pgprot_t ref_prot)
+static int split_large_page(pte_t *kpte, unsigned long address)
 {
-	int i, level;
+	pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 	unsigned long addr;
 	pte_t *pbase, *tmp;
 	struct page *base;
+	int i, level;
 
 	base = alloc_pages(GFP_KERNEL, 0);
 	if (!base)
@@ -109,11 +109,9 @@ split_large_page(pte_t *kpte, unsigned long address, pgprot_t ref_prot)
 
 static int __change_page_attr(struct page *page, pgprot_t prot)
 {
-	pgprot_t ref_prot = PAGE_KERNEL;
 	struct page *kpte_page;
 	unsigned long address;
 	int level, err = 0;
-	pgprot_t oldprot;
 	pte_t *kpte;
 
 	BUG_ON(PageHighMem(page));
@@ -124,7 +122,6 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 	if (!kpte)
 		return -EINVAL;
 
-	oldprot = pte_pgprot(*kpte);
 	kpte_page = virt_to_page(kpte);
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
@@ -137,16 +134,10 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 		address < (unsigned long)&_etext &&
 	       (pgprot_val(prot) & _PAGE_NX));
 
-	if ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
-		ref_prot = PAGE_KERNEL_EXEC;
-
-	ref_prot = canon_pgprot(ref_prot);
-	prot = canon_pgprot(prot);
-
 	if (level == 3) {
-		set_pte_atomic(kpte, mk_pte(page, prot));
+		set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
 	} else {
-		err = split_large_page(kpte, address, ref_prot);
+		err = split_large_page(kpte, address);
 		if (!err)
 			goto repeat;
 	}

commit 5508a7489659f1eed108d3ae7c2d36c8794ee330
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jan 30 13:33:56 2008 +0100

    x86: cpa self-test fixes
    
    cpa self-test fixes. change_page_attr_addr() was buggy, it
    passed in a virtual address as a physical one.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 91e05a26004d..a12dabbd5c33 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -106,12 +106,6 @@ static __init int print_split(struct split_state *s)
 	return err;
 }
 
-static __init int state_same(struct split_state *a, struct split_state *b)
-{
-	return a->lpg == b->lpg && a->gpg == b->gpg && a->spg == b->spg &&
-		a->exec == b->exec;
-}
-
 static unsigned long __initdata addr[NTEST];
 static unsigned int __initdata len[NTEST];
 
@@ -229,8 +223,6 @@ static __init int exercise_pageattr(void)
 	global_flush_tlb();
 
 	failed += print_split(&sc);
-	if (!state_same(&sa, &sc))
-		failed++;
 
 	if (failed)
 		printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n");
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 14c923b3b07f..ad0868bfa374 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -79,8 +79,10 @@ split_large_page(pte_t *kpte, unsigned long address, pgprot_t ref_prot)
 	 * up for us already:
 	 */
 	tmp = lookup_address(address, &level);
-	if (tmp != kpte)
+	if (tmp != kpte) {
+		WARN_ON_ONCE(1);
 		goto out_unlock;
+	}
 
 	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK;
@@ -181,17 +183,19 @@ EXPORT_SYMBOL(change_page_attr);
 int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot)
 {
 	int i;
-	unsigned long pfn = (addr >> PAGE_SHIFT);
+	unsigned long pfn = (__pa(addr) >> PAGE_SHIFT);
 
 	for (i = 0; i < numpages; i++) {
 		if (!pfn_valid(pfn + i)) {
+			WARN_ON_ONCE(1);
 			break;
 		} else {
 			int level;
 			pte_t *pte = lookup_address(addr + i*PAGE_SIZE, &level);
-			BUG_ON(pte && !pte_none(*pte));
+			BUG_ON(pte && pte_none(*pte));
 		}
 	}
+
 	return change_page_attr(virt_to_page(addr), i, prot);
 }
 

commit bb5c2dbd57d93a36b0386dd783dd95e0cbaaa23f
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jan 30 13:33:56 2008 +0100

    x86: further cpa largepage-split cleanups
    
    further cpa largepage-split cleanups: make the splitup isolated
    functionality, without leaking details back into __change_page_attr().
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 1011b21f8db0..14c923b3b07f 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -37,33 +37,6 @@ pte_t *lookup_address(unsigned long address, int *level)
 	return pte_offset_kernel(pmd, address);
 }
 
-static struct page *
-split_large_page(unsigned long address, pgprot_t ref_prot)
-{
-	unsigned long addr;
-	struct page *base;
-	pte_t *pbase;
-	int i;
-
-	base = alloc_pages(GFP_KERNEL, 0);
-	if (!base)
-		return NULL;
-
-	/*
-	 * page_private is used to track the number of entries in
-	 * the page table page that have non standard attributes.
-	 */
-	address = __pa(address);
-	addr = address & LARGE_PAGE_MASK;
-	pbase = (pte_t *)page_address(base);
-	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
-
-	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
-		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
-
-	return base;
-}
-
 static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 {
 	unsigned long flags;
@@ -88,14 +61,58 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
+static int
+split_large_page(pte_t *kpte, unsigned long address, pgprot_t ref_prot)
+{
+	int i, level;
+	unsigned long addr;
+	pte_t *pbase, *tmp;
+	struct page *base;
+
+	base = alloc_pages(GFP_KERNEL, 0);
+	if (!base)
+		return -ENOMEM;
+
+	down_write(&init_mm.mmap_sem);
+	/*
+	 * Check for races, another CPU might have split this page
+	 * up for us already:
+	 */
+	tmp = lookup_address(address, &level);
+	if (tmp != kpte)
+		goto out_unlock;
+
+	address = __pa(address);
+	addr = address & LARGE_PAGE_MASK;
+	pbase = (pte_t *)page_address(base);
+	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
+
+	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
+		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
+
+	/*
+	 * Install the new, split up pagetable:
+	 */
+	set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
+	base = NULL;
+
+out_unlock:
+	up_write(&init_mm.mmap_sem);
+
+	if (base)
+		__free_pages(base, 0);
+
+	return 0;
+}
+
 static int __change_page_attr(struct page *page, pgprot_t prot)
 {
 	pgprot_t ref_prot = PAGE_KERNEL;
 	struct page *kpte_page;
 	unsigned long address;
+	int level, err = 0;
 	pgprot_t oldprot;
 	pte_t *kpte;
-	int level;
 
 	BUG_ON(PageHighMem(page));
 	address = (unsigned long)page_address(page);
@@ -127,19 +144,11 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 	if (level == 3) {
 		set_pte_atomic(kpte, mk_pte(page, prot));
 	} else {
-		struct page *split;
-
-		split = split_large_page(address, ref_prot);
-		if (!split)
-			return -ENOMEM;
-
-		/*
-		 * There's a small window here to waste a bit of RAM:
-		 */
-		set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
-		goto repeat;
+		err = split_large_page(kpte, address, ref_prot);
+		if (!err)
+			goto repeat;
 	}
-	return 0;
+	return err;
 }
 
 /*

commit 97f99fedf27f337e2d3d95ca01e321beb26edc3d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jan 30 13:33:55 2008 +0100

    x86: simplify 32-bit cpa largepage splitting
    
    simplify 32-bit cpa largepage splitting: do a pure split and repeat
    the pte lookup to get the new pte modified.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 570a37bf1401..1011b21f8db0 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -38,7 +38,7 @@ pte_t *lookup_address(unsigned long address, int *level)
 }
 
 static struct page *
-split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
+split_large_page(unsigned long address, pgprot_t ref_prot)
 {
 	unsigned long addr;
 	struct page *base;
@@ -58,10 +58,9 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
 	pbase = (pte_t *)page_address(base);
 	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
 
-	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
-		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
-					   addr == address ? prot : ref_prot));
-	}
+	for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
+		set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
+
 	return base;
 }
 
@@ -101,6 +100,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 	BUG_ON(PageHighMem(page));
 	address = (unsigned long)page_address(page);
 
+repeat:
 	kpte = lookup_address(address, &level);
 	if (!kpte)
 		return -EINVAL;
@@ -128,7 +128,8 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 		set_pte_atomic(kpte, mk_pte(page, prot));
 	} else {
 		struct page *split;
-		split = split_large_page(address, prot, ref_prot);
+
+		split = split_large_page(address, ref_prot);
 		if (!split)
 			return -ENOMEM;
 
@@ -136,6 +137,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 		 * There's a small window here to waste a bit of RAM:
 		 */
 		set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
+		goto repeat;
 	}
 	return 0;
 }

commit 78c94abaea55df7003f3ad0e5b6c78ee1cc860bb
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jan 30 13:33:55 2008 +0100

    x86: simplify the 32-bit cpa code
    
    simplify the 32-bit cpa code.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 66688a630839..570a37bf1401 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -15,9 +15,6 @@
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 
-static DEFINE_SPINLOCK(cpa_lock);
-static struct list_head df_list = LIST_HEAD_INIT(df_list);
-
 pte_t *lookup_address(unsigned long address, int *level)
 {
 	pgd_t *pgd = pgd_offset_k(address);
@@ -48,9 +45,7 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
 	pte_t *pbase;
 	int i;
 
-	spin_unlock_irq(&cpa_lock);
 	base = alloc_pages(GFP_KERNEL, 0);
-	spin_lock_irq(&cpa_lock);
 	if (!base)
 		return NULL;
 
@@ -58,9 +53,6 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
 	 * page_private is used to track the number of entries in
 	 * the page table page that have non standard attributes.
 	 */
-	SetPagePrivate(base);
-	page_private(base) = 0;
-
 	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK;
 	pbase = (pte_t *)page_address(base);
@@ -73,36 +65,6 @@ split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
 	return base;
 }
 
-static void cache_flush_page(struct page *p)
-{
-	void *addr = page_address(p);
-	int i;
-
-	for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
-		clflush(addr + i);
-}
-
-static void flush_kernel_map(void *arg)
-{
-	struct list_head *lh = (struct list_head *)arg;
-	struct page *p;
-
-	/*
-	 * Flush all to work around Errata in early athlons regarding
-	 * large page flushing.
-	 */
-	__flush_tlb_all();
-
-	/* High level code is not ready for clflush yet */
-	if (0 && cpu_has_clflush) {
-		list_for_each_entry(p, lh, lru)
-			cache_flush_page(p);
-	} else {
-		if (boot_cpu_data.x86_model >= 4)
-			wbinvd();
-	}
-}
-
 static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 {
 	unsigned long flags;
@@ -127,36 +89,12 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
-/*
- * No more special protections in this 2/4MB area - revert to a large
- * page again.
- */
-static inline void revert_page(struct page *kpte_page, unsigned long address)
-{
-	pgprot_t ref_prot;
-	pte_t *linear;
-
-	ref_prot =
-	((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
-		? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE;
-
-	linear = (pte_t *)
-		pmd_offset(pud_offset(pgd_offset_k(address), address), address);
-	set_pmd_pte(linear,  address,
-		    pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT,
-			    ref_prot));
-}
-
-static inline void save_page(struct page *kpte_page)
-{
-	if (!test_and_set_bit(PG_arch_1, &kpte_page->flags))
-		list_add(&kpte_page->lru, &df_list);
-}
-
 static int __change_page_attr(struct page *page, pgprot_t prot)
 {
+	pgprot_t ref_prot = PAGE_KERNEL;
 	struct page *kpte_page;
 	unsigned long address;
+	pgprot_t oldprot;
 	pte_t *kpte;
 	int level;
 
@@ -167,58 +105,41 @@ static int __change_page_attr(struct page *page, pgprot_t prot)
 	if (!kpte)
 		return -EINVAL;
 
+	oldprot = pte_pgprot(*kpte);
 	kpte_page = virt_to_page(kpte);
 	BUG_ON(PageLRU(kpte_page));
 	BUG_ON(PageCompound(kpte_page));
 
-	if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
-		if (level == 3) {
-			set_pte_atomic(kpte, mk_pte(page, prot));
-		} else {
-			struct page *split;
-			pgprot_t ref_prot;
-
-			ref_prot =
-			((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
-				? PAGE_KERNEL_EXEC : PAGE_KERNEL;
-			split = split_large_page(address, prot, ref_prot);
-			if (!split)
-				return -ENOMEM;
-
-			set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
-			kpte_page = split;
-		}
-		page_private(kpte_page)++;
-	} else {
-		if (level == 3) {
-			set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
-			BUG_ON(page_private(kpte_page) == 0);
-			page_private(kpte_page)--;
-		} else
-			BUG();
-	}
-
 	/*
-	 * If the pte was reserved, it means it was created at boot
-	 * time (not via split_large_page) and in turn we must not
-	 * replace it with a largepage.
+	 * Better fail early if someone sets the kernel text to NX.
+	 * Does not cover __inittext
 	 */
+	BUG_ON(address >= (unsigned long)&_text &&
+		address < (unsigned long)&_etext &&
+	       (pgprot_val(prot) & _PAGE_NX));
 
-	save_page(kpte_page);
-	if (!PageReserved(kpte_page)) {
-		if (cpu_has_pse && (page_private(kpte_page) == 0)) {
-			paravirt_release_pt(page_to_pfn(kpte_page));
-			revert_page(kpte_page, address);
-		}
+	if ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
+		ref_prot = PAGE_KERNEL_EXEC;
+
+	ref_prot = canon_pgprot(ref_prot);
+	prot = canon_pgprot(prot);
+
+	if (level == 3) {
+		set_pte_atomic(kpte, mk_pte(page, prot));
+	} else {
+		struct page *split;
+		split = split_large_page(address, prot, ref_prot);
+		if (!split)
+			return -ENOMEM;
+
+		/*
+		 * There's a small window here to waste a bit of RAM:
+		 */
+		set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
 	}
 	return 0;
 }
 
-static inline void flush_map(struct list_head *l)
-{
-	on_each_cpu(flush_kernel_map, l, 1, 1);
-}
-
 /*
  * Change the page attributes of an page in the linear mapping.
  *
@@ -234,40 +155,52 @@ static inline void flush_map(struct list_head *l)
  */
 int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 {
-	unsigned long flags;
 	int err = 0, i;
 
-	spin_lock_irqsave(&cpa_lock, flags);
 	for (i = 0; i < numpages; i++, page++) {
 		err = __change_page_attr(page, prot);
 		if (err)
 			break;
 	}
-	spin_unlock_irqrestore(&cpa_lock, flags);
 
 	return err;
 }
 EXPORT_SYMBOL(change_page_attr);
 
-void global_flush_tlb(void)
+int change_page_attr_addr(unsigned long addr, int numpages, pgprot_t prot)
 {
-	struct page *pg, *next;
-	struct list_head l;
+	int i;
+	unsigned long pfn = (addr >> PAGE_SHIFT);
 
+	for (i = 0; i < numpages; i++) {
+		if (!pfn_valid(pfn + i)) {
+			break;
+		} else {
+			int level;
+			pte_t *pte = lookup_address(addr + i*PAGE_SIZE, &level);
+			BUG_ON(pte && !pte_none(*pte));
+		}
+	}
+	return change_page_attr(virt_to_page(addr), i, prot);
+}
+
+static void flush_kernel_map(void *arg)
+{
+	/*
+	 * Flush all to work around Errata in early athlons regarding
+	 * large page flushing.
+	 */
+	__flush_tlb_all();
+
+	if (boot_cpu_data.x86_model >= 4)
+		wbinvd();
+}
+
+void global_flush_tlb(void)
+{
 	BUG_ON(irqs_disabled());
 
-	spin_lock_irq(&cpa_lock);
-	list_replace_init(&df_list, &l);
-	spin_unlock_irq(&cpa_lock);
-	flush_map(&l);
-	list_for_each_entry_safe(pg, next, &l, lru) {
-		list_del(&pg->lru);
-		clear_bit(PG_arch_1, &pg->flags);
-		if (PageReserved(pg) || !cpu_has_pse || page_private(pg) != 0)
-			continue;
-		ClearPagePrivate(pg);
-		__free_page(pg);
-	}
+	on_each_cpu(flush_kernel_map, NULL, 1, 1);
 }
 EXPORT_SYMBOL(global_flush_tlb);
 

commit cd58289667293593b04fd315ec7f2f37589134cb
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jan 30 13:33:54 2008 +0100

    x86: fix more non-global TLB flushes
    
    fix more __flush_tlb() instances, out of caution.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 58438bafedca..a317336cdeaa 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -27,7 +27,7 @@ static void __init zap_identity_mappings(void)
 {
 	pgd_t *pgd = pgd_offset_k(0UL);
 	pgd_clear(pgd);
-	__flush_tlb();
+	__flush_tlb_all();
 }
 
 /* Don't add a printk in there. printk relies on the PDA which is not initialized 

commit 0e3a95492989e452a33e5df9b51365da574b854d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jan 30 13:33:49 2008 +0100

    x86: early_ioremap_init(), enhance warnings
    
    enhance the debug warning in early_ioremap_init().
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/mm/ioremap_32.c b/arch/x86/mm/ioremap_32.c
index b743de841f68..f8e6c4709cc2 100644
--- a/arch/x86/mm/ioremap_32.c
+++ b/arch/x86/mm/ioremap_32.c
@@ -243,7 +243,22 @@ void __init early_ioremap_init(void)
 	pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
 	*pgd = __pa(bm_pte) | _PAGE_TABLE;
 	memset(bm_pte, 0, sizeof(bm_pte));
-	BUG_ON(pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+	/*
+	 * The boot-ioremap range spans multiple pgds, for which
+	 * we are not prepared:
+	 */
+	if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
+		WARN_ON(1);
+		printk("pgd %p != %p\n",
+			pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+		printk("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+			fix_to_virt(FIX_BTMAP_BEGIN));
+		printk("fix_to_virt(FIX_BTMAP_END):   %08lx\n",
+			fix_to_virt(FIX_BTMAP_END));
+
+		printk("FIX_BTMAP_END:       %d\n", FIX_BTMAP_END);
+		printk("FIX_BTMAP_BEGIN:     %d\n", FIX_BTMAP_BEGIN);
+	}
 }
 
 void __init early_ioremap_clear(void)