Patches contributed by Eötvös Lorand University
commit 9f4c815ce7ab53150f17c97a382f136a8fb68044
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:41 2008 +0100
x86: clean up arch/x86/mm/pageattr_32.c
clean up arch/x86/mm/pageattr_32.c.
no code changed:
text data bss dec hex filename
1255 40 0 1295 50f pageattr_32.o.before
1255 40 0 1295 50f pageattr_32.o.after
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/mm/pageattr_32.c b/arch/x86/mm/pageattr_32.c
index 260073c07600..be4656403d77 100644
--- a/arch/x86/mm/pageattr_32.c
+++ b/arch/x86/mm/pageattr_32.c
@@ -1,28 +1,29 @@
-/*
- * Copyright 2002 Andi Kleen, SuSE Labs.
+/*
+ * Copyright 2002 Andi Kleen, SuSE Labs.
* Thanks to Ben LaHaise for precious feedback.
- */
+ */
-#include <linux/mm.h>
-#include <linux/sched.h>
#include <linux/highmem.h>
#include <linux/module.h>
+#include <linux/sched.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/mm.h>
+
#include <asm/processor.h>
#include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
#include <asm/sections.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
static DEFINE_SPINLOCK(cpa_lock);
static struct list_head df_list = LIST_HEAD_INIT(df_list);
-
-pte_t *lookup_address(unsigned long address)
-{
+pte_t *lookup_address(unsigned long address)
+{
pgd_t *pgd = pgd_offset_k(address);
pud_t *pud;
pmd_t *pmd;
+
if (pgd_none(*pgd))
return NULL;
pud = pud_offset(pgd, address);
@@ -33,21 +34,22 @@ pte_t *lookup_address(unsigned long address)
return NULL;
if (pmd_large(*pmd))
return (pte_t *)pmd;
- return pte_offset_kernel(pmd, address);
-}
-static struct page *split_large_page(unsigned long address, pgprot_t prot,
- pgprot_t ref_prot)
-{
- int i;
+ return pte_offset_kernel(pmd, address);
+}
+
+static struct page *
+split_large_page(unsigned long address, pgprot_t prot, pgprot_t ref_prot)
+{
unsigned long addr;
struct page *base;
pte_t *pbase;
+ int i;
spin_unlock_irq(&cpa_lock);
base = alloc_pages(GFP_KERNEL, 0);
spin_lock_irq(&cpa_lock);
- if (!base)
+ if (!base)
return NULL;
/*
@@ -58,22 +60,24 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
page_private(base) = 0;
address = __pa(address);
- addr = address & LARGE_PAGE_MASK;
+ addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
paravirt_alloc_pt(&init_mm, page_to_pfn(base));
+
for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
- set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
- addr == address ? prot : ref_prot));
+ set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT,
+ addr == address ? prot : ref_prot));
}
return base;
-}
+}
static void cache_flush_page(struct page *p)
-{
- void *adr = page_address(p);
+{
+ void *addr = page_address(p);
int i;
+
for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
- clflush(adr+i);
+ clflush(addr + i);
}
static void flush_kernel_map(void *arg)
@@ -83,23 +87,27 @@ static void flush_kernel_map(void *arg)
/* High level code is not ready for clflush yet */
if (0 && cpu_has_clflush) {
- list_for_each_entry (p, lh, lru)
+ list_for_each_entry(p, lh, lru)
cache_flush_page(p);
- } else if (boot_cpu_data.x86_model >= 4)
- wbinvd();
+ } else {
+ if (boot_cpu_data.x86_model >= 4)
+ wbinvd();
+ }
- /* Flush all to work around Errata in early athlons regarding
- * large page flushing.
+ /*
+ * Flush all to work around Errata in early athlons regarding
+ * large page flushing.
*/
- __flush_tlb_all();
+ __flush_tlb_all();
}
-static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
-{
- struct page *page;
+static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
+{
unsigned long flags;
+ struct page *page;
- set_pte_atomic(kpte, pte); /* change init_mm */
+ /* change init_mm */
+ set_pte_atomic(kpte, pte);
if (SHARED_KERNEL_PMD)
return;
@@ -108,6 +116,7 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
+
pgd = (pgd_t *)page_address(page) + pgd_index(address);
pud = pud_offset(pgd, address);
pmd = pmd_offset(pud, address);
@@ -116,9 +125,9 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
spin_unlock_irqrestore(&pgd_lock, flags);
}
-/*
- * No more special protections in this 2/4MB area - revert to a
- * large page again.
+/*
+ * No more special protections in this 2/4MB area - revert to a large
+ * page again.
*/
static inline void revert_page(struct page *kpte_page, unsigned long address)
{
@@ -142,12 +151,11 @@ static inline void save_page(struct page *kpte_page)
list_add(&kpte_page->lru, &df_list);
}
-static int
-__change_page_attr(struct page *page, pgprot_t prot)
-{
- pte_t *kpte;
- unsigned long address;
+static int __change_page_attr(struct page *page, pgprot_t prot)
+{
struct page *kpte_page;
+ unsigned long address;
+ pte_t *kpte;
BUG_ON(PageHighMem(page));
address = (unsigned long)page_address(page);
@@ -155,16 +163,17 @@ __change_page_attr(struct page *page, pgprot_t prot)
kpte = lookup_address(address);
if (!kpte)
return -EINVAL;
+
kpte_page = virt_to_page(kpte);
BUG_ON(PageLRU(kpte_page));
BUG_ON(PageCompound(kpte_page));
- if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
+ if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
if (!pte_huge(*kpte)) {
- set_pte_atomic(kpte, mk_pte(page, prot));
+ set_pte_atomic(kpte, mk_pte(page, prot));
} else {
- pgprot_t ref_prot;
struct page *split;
+ pgprot_t ref_prot;
ref_prot =
((address & LARGE_PAGE_MASK) < (unsigned long)&_etext)
@@ -172,16 +181,19 @@ __change_page_attr(struct page *page, pgprot_t prot)
split = split_large_page(address, prot, ref_prot);
if (!split)
return -ENOMEM;
- set_pmd_pte(kpte,address,mk_pte(split, ref_prot));
+
+ set_pmd_pte(kpte, address, mk_pte(split, ref_prot));
kpte_page = split;
}
page_private(kpte_page)++;
- } else if (!pte_huge(*kpte)) {
- set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
- BUG_ON(page_private(kpte_page) == 0);
- page_private(kpte_page)--;
- } else
- BUG();
+ } else {
+ if (!pte_huge(*kpte)) {
+ set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
+ BUG_ON(page_private(kpte_page) == 0);
+ page_private(kpte_page)--;
+ } else
+ BUG();
+ }
/*
* If the pte was reserved, it means it was created at boot
@@ -197,7 +209,7 @@ __change_page_attr(struct page *page, pgprot_t prot)
}
}
return 0;
-}
+}
static inline void flush_map(struct list_head *l)
{
@@ -211,32 +223,33 @@ static inline void flush_map(struct list_head *l)
* than write-back somewhere - some CPUs do not like it when mappings with
* different caching policies exist. This changes the page attributes of the
* in kernel linear mapping too.
- *
+ *
* The caller needs to ensure that there are no conflicting mappings elsewhere.
* This function only deals with the kernel linear map.
- *
+ *
* Caller must call global_flush_tlb() after this.
*/
int change_page_attr(struct page *page, int numpages, pgprot_t prot)
{
- int err = 0;
- int i;
unsigned long flags;
+ int err = 0, i;
spin_lock_irqsave(&cpa_lock, flags);
- for (i = 0; i < numpages; i++, page++) {
+ for (i = 0; i < numpages; i++, page++) {
err = __change_page_attr(page, prot);
- if (err)
- break;
- }
+ if (err)
+ break;
+ }
spin_unlock_irqrestore(&cpa_lock, flags);
+
return err;
}
+EXPORT_SYMBOL(change_page_attr);
void global_flush_tlb(void)
{
- struct list_head l;
struct page *pg, *next;
+ struct list_head l;
BUG_ON(irqs_disabled());
@@ -253,26 +266,28 @@ void global_flush_tlb(void)
__free_page(pg);
}
}
+EXPORT_SYMBOL(global_flush_tlb);
#ifdef CONFIG_DEBUG_PAGEALLOC
void kernel_map_pages(struct page *page, int numpages, int enable)
{
if (PageHighMem(page))
return;
- if (!enable)
+ if (!enable) {
debug_check_no_locks_freed(page_address(page),
numpages * PAGE_SIZE);
+ }
- /* the return value is ignored - the calls cannot fail,
+ /*
+ * the return value is ignored - the calls cannot fail,
* large pages are disabled at boot time.
*/
change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
- /* we should perform an IPI and flush all tlbs,
+
+ /*
+ * we should perform an IPI and flush all tlbs,
* but that can deadlock->flush only current cpu.
*/
__flush_tlb_all();
}
#endif
-
-EXPORT_SYMBOL(change_page_attr);
-EXPORT_SYMBOL(global_flush_tlb);
commit 6371b495991debfd1417b17c2bc4f7d7bae05739
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:40 2008 +0100
x86: change ioremap() to default to uncached
Prepare ioremap() to default to uncached. This will be the
safest - but first we have to fix CPA.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h
index 2a04bd17eac5..db3978846379 100644
--- a/include/asm-x86/io_32.h
+++ b/include/asm-x86/io_32.h
@@ -111,18 +111,27 @@ extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsign
* make bus memory CPU accessible via the readb/readw/readl/writeb/
* writew/writel functions and the other mmio helpers. The returned
* address is not guaranteed to be usable directly as a virtual
- * address.
+ * address.
*
* If the area you are trying to map is a PCI BAR you should have a
* look at pci_iomap().
*/
+extern void __iomem * ioremap_nocache(unsigned long offset, unsigned long size);
-static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
+static inline void __iomem *
+ioremap_cache(unsigned long offset, unsigned long size)
{
return __ioremap(offset, size, 0);
}
-extern void __iomem * ioremap_nocache(unsigned long offset, unsigned long size);
+/*
+ * The default ioremap() behavior is non-cached:
+ */
+static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
+{
+ return ioremap_nocache(offset, size);
+}
+
extern void iounmap(volatile void __iomem *addr);
/*
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h
index fef0ce2ced81..7dee3c6e9c39 100644
--- a/include/asm-x86/io_64.h
+++ b/include/asm-x86/io_64.h
@@ -152,11 +152,6 @@ static inline void * phys_to_virt(unsigned long address)
extern void __iomem *__ioremap(unsigned long offset, unsigned long size, unsigned long flags);
-static inline void __iomem * ioremap (unsigned long offset, unsigned long size)
-{
- return __ioremap(offset, size, 0);
-}
-
extern void *early_ioremap(unsigned long addr, unsigned long size);
extern void early_iounmap(void *addr, unsigned long size);
@@ -165,8 +160,24 @@ extern void early_iounmap(void *addr, unsigned long size);
* it's useful if some control registers are in such an area and write combining
* or read caching is not desirable:
*/
-extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
+extern void __iomem * ioremap_nocache(unsigned long offset, unsigned long size);
+
+static inline void __iomem *
+ioremap_cache(unsigned long offset, unsigned long size)
+{
+ return __ioremap(offset, size, 0);
+}
+
+/*
+ * The default ioremap() behavior is non-cached:
+ */
+static inline void __iomem * ioremap(unsigned long offset, unsigned long size)
+{
+ return ioremap_cache(offset, size);
+}
+
extern void iounmap(volatile void __iomem *addr);
+
extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
/*
commit cd7d72bb27a8c7502a602bdc299f1bb0a9357975
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:35 2008 +0100
x86: improve MTRR trimming messages
improve the MTTR trimming messages and also trigger a WARN_ON()
so that kerneloops.org can pick it up and categorize it.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index ac4b6338f3f4..715919582657 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -706,20 +706,17 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
/* kvm/qemu doesn't have mtrr set right, don't trim them all */
if (!highest_addr) {
- printk(KERN_WARNING "***************\n");
- printk(KERN_WARNING "**** WARNING: likely strange cpu\n");
- printk(KERN_WARNING "**** MTRRs all blank, cpu in qemu?\n");
- printk(KERN_WARNING "***************\n");
+ printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n");
+ WARN_ON(1);
return 0;
}
if ((highest_addr >> PAGE_SHIFT) < end_pfn) {
- printk(KERN_WARNING "***************\n");
- printk(KERN_WARNING "**** WARNING: likely BIOS bug\n");
- printk(KERN_WARNING "**** MTRRs don't cover all of "
- "memory, trimmed %ld pages\n", end_pfn -
- (highest_addr >> PAGE_SHIFT));
- printk(KERN_WARNING "***************\n");
+ printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
+ " all of memory, losing %LdMB of RAM.\n",
+ (((u64)end_pfn << PAGE_SHIFT) - highest_addr) >> 20);
+
+ WARN_ON(1);
printk(KERN_INFO "update e820 for mtrr\n");
trim_start = highest_addr;
commit 11201e603d28a1cb7a4bb1d65f39e61629c97a28
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:32 2008 +0100
x86: fix DEBUG_RODATA kconfig text
fix kconfig text and make DEBUG_RODATA default.
this helps debugging quite a bit.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 0a82b889d76e..9bb61e1aed69 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -61,13 +61,13 @@ config DEBUG_PER_CPU_MAPS
config DEBUG_RODATA
bool "Write protect kernel read-only data structures"
+ default y
depends on DEBUG_KERNEL
help
Mark the kernel read-only data as write-protected in the pagetables,
in order to catch accidental (and incorrect) writes to such const
- data. This option may have a slight performance impact because a
- portion of the kernel code won't be covered by a 2MB TLB anymore.
- If in doubt, say "N".
+ data. This is recommended so that we can catch kernel bugs sooner.
+ If in doubt, say "Y".
config 4KSTACKS
bool "Use 4Kb for kernel stacks instead of 8Kb"
commit 9c5ba48958acf6d584f57e9169ad7ecc80ccc390
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:24 2008 +0100
x86: clean up paging_init()
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 1268d33e7b6e..70b463abf6f4 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -561,7 +561,6 @@ unsigned long __init numa_free_all_bootmem(void)
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
- int i;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
commit 75f2ce033168ff435e72bf5bb615176d9930e77f
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:24 2008 +0100
x86: get_cycles() fix
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h
index a6e8d35c3f86..7d3e27f7d484 100644
--- a/include/asm-x86/tsc.h
+++ b/include/asm-x86/tsc.h
@@ -27,10 +27,8 @@ static inline cycles_t get_cycles(void)
if (!cpu_has_tsc)
return 0;
#endif
-
-#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
rdtscll(ret);
-#endif
+
return ret;
}
commit 70edcd77a0d6d0f8731c826764f5eb6732f521e9
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:24 2008 +0100
genirq: stackdump after the "Trying to free already-free IRQ" message
these bugs are harder to find than they seem, a stackdump helps.
make it dependent on CONFIG_DEBUG_SHIRQ so that people can turn it off
if it annoys them.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1f314221d534..438a01464287 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -479,6 +479,9 @@ void free_irq(unsigned int irq, void *dev_id)
return;
}
printk(KERN_ERR "Trying to free already-free IRQ %d\n", irq);
+#ifdef CONFIG_DEBUG_SHIRQ
+ dump_stack();
+#endif
spin_unlock_irqrestore(&desc->lock, flags);
return;
}
commit 17abecfe651c862cd31b1f9e8ef6cfc29083f00d
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:24 2008 +0100
x86: fix up alternatives with lockdep enabled
An older binutils bug caused us to not fix up alternatives.
This problem involved mutex.c but we dont do lockdep section tricks
there anymore, so this workaround is moot. Keep the printk nevertheless,
just in case ... We can remove that later on.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 318a4f9b7ece..45d79ea890ae 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -342,12 +342,13 @@ void alternatives_smp_switch(int smp)
#ifdef CONFIG_LOCKDEP
/*
- * A not yet fixed binutils section handling bug prevents
- * alternatives-replacement from working reliably, so turn
- * it off:
+ * Older binutils section handling bug prevented
+ * alternatives-replacement from working reliably.
+ *
+ * If this still occurs then you should see a hang
+ * or crash shortly after this line:
*/
- printk("lockdep: not fixing up alternatives.\n");
- return;
+ printk("lockdep: fixing up alternatives.\n");
#endif
if (noreplace_smp || smp_alt_once)
commit ad8ca495bd3e03e6751fc0c6a6af44018ebb4036
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:24 2008 +0100
x86: add warning to check_tsc_warp()
add warning to check_tsc_warp() - if get_cycles() does not progress.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 7110078f242c..0577825cf89b 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -87,7 +87,11 @@ static __cpuinit void check_tsc_warp(void)
nr_warps++;
__raw_spin_unlock(&sync_lock);
}
-
+ }
+ if (!(now-start)) {
+ printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
+ now-start, end-start);
+ WARN_ON(1);
}
}
commit df43510b18b8439465b4b58556f0495b5f5d771e
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jan 30 13:33:23 2008 +0100
x86: check_tsc_warp() slowness fix
100 million max # of loops is a bit too much - reduce it to 10 million.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index ace340524c01..7110078f242c 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -67,12 +67,12 @@ static __cpuinit void check_tsc_warp(void)
/*
* Be nice every now and then (and also check whether
- * measurement is done [we also insert a 100 million
+ * measurement is done [we also insert a 10 million
* loops safety exit, so we dont lock up in case the
* TSC readout is totally broken]):
*/
if (unlikely(!(i & 7))) {
- if (now > end || i > 100000000)
+ if (now > end || i > 10000000)
break;
cpu_relax();
touch_nmi_watchdog();