Patches contributed by Eötvös Lorand University
commit 902955fc13259dcec1321d45251a477977fcba39
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Mar 3 13:53:58 2008 +0100
x86: revert "x86: fix pmd_bad and pud_bad to support huge pages"
revert commit cded932b75ab0a5f9181ee3da34a0a488d1a14fd,
"x86: fix pmd_bad and pud_bad to support huge pages", it causes
a bootup hang, as reported and bisected by Arjan van de Ven.
Bisected-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index b478efa971e0..a842c7222b1e 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -91,9 +91,7 @@ extern unsigned long pg0[];
/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
#define pmd_none(x) (!(unsigned long)pmd_val(x))
#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
-#define pmd_bad(x) ((pmd_val(x) \
- & ~(PAGE_MASK | _PAGE_USER | _PAGE_PSE | _PAGE_NX)) \
- != _KERNPG_TABLE)
+#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 0a9258333cbd..0a0b77bc736a 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -153,14 +153,12 @@ static inline unsigned long pgd_bad(pgd_t pgd)
static inline unsigned long pud_bad(pud_t pud)
{
- return pud_val(pud) &
- ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX);
+ return pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
}
static inline unsigned long pmd_bad(pmd_t pmd)
{
- return pmd_val(pmd) &
- ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX);
+ return pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
}
#define pte_none(x) (!pte_val(x))
commit b4ef95de00be4c2c30feccf607a45093c8c118b7
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Feb 26 09:40:27 2008 +0100
x86: disable BTS ptrace extensions for now
revert the BTS ptrace extension for now.
based on general objections from Roland McGrath:
http://lkml.org/lkml/2008/2/21/323
we'll let the BTS functionality cook some more and re-enable
it in v2.6.26. We'll leave the dead code around to help the
development of this code.
(X86_BTS is not defined at the moment)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a7d50a547dc2..be3c7a299f02 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -603,11 +603,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
}
#endif
+#ifdef X86_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
+#endif
if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 43f287744f9f..3baf9b9f4c87 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -604,11 +604,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
+#ifdef X86_BTS
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
+#endif
}
/*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index d862e396b099..f41fdc98efb1 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -544,6 +544,8 @@ static int ptrace_set_debugreg(struct task_struct *child,
return 0;
}
+#ifdef X86_BTS
+
static int ptrace_bts_get_size(struct task_struct *child)
{
if (!child->thread.ds_area_msr)
@@ -826,6 +828,7 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk,
ptrace_bts_write_record(tsk, &rec);
}
+#endif /* X86_BTS */
/*
* Called by kernel/ptrace.c when detaching..
@@ -839,7 +842,9 @@ void ptrace_disable(struct task_struct *child)
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
#endif
if (child->thread.ds_area_msr) {
+#ifdef X86_BTS
ptrace_bts_realloc(child, 0, 0);
+#endif
child->thread.debugctlmsr &= ~ds_debugctl_mask();
if (!child->thread.debugctlmsr)
clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
@@ -961,6 +966,10 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
break;
#endif
+ /*
+ * These bits need more cooking - not enabled yet:
+ */
+#ifdef X86_BTS
case PTRACE_BTS_CONFIG:
ret = ptrace_bts_config
(child, data, (struct ptrace_bts_config __user *)addr);
@@ -988,6 +997,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
ret = ptrace_bts_drain
(child, data, (struct bts_struct __user *) addr);
break;
+#endif
default:
ret = ptrace_request(child, request, addr, data);
@@ -1226,12 +1236,14 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
case PTRACE_SETOPTIONS:
case PTRACE_SET_THREAD_AREA:
case PTRACE_GET_THREAD_AREA:
+#ifdef X86_BTS
case PTRACE_BTS_CONFIG:
case PTRACE_BTS_STATUS:
case PTRACE_BTS_SIZE:
case PTRACE_BTS_GET:
case PTRACE_BTS_CLEAR:
case PTRACE_BTS_DRAIN:
+#endif
return sys_ptrace(request, pid, addr, data);
default:
commit 757265b8c57bb8fd91785d3d1a87fb483c86c9c2
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 28 20:19:06 2008 +0100
x86: delay the export removal of init_mm
delay the removal of this symbol export by one more kernel release,
giving external modules such as VirtualBox a chance to stop using it.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index ba899ff2a8f9..c1d1fd0c299b 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -316,3 +316,15 @@ Why: Largely unmaintained and almost entirely unused. File system
is largely pointless as without a lot of work only the most
trivial of Solaris binaries can work with the emulation code.
Who: David S. Miller <davem@davemloft.net>
+
+---------------------------
+
+What: init_mm export
+When: 2.6.26
+Why: Not used in-tree. The current out-of-tree users used it to
+ work around problems in the CPA code which should be resolved
+ by now. One usecase was described to provide verification code
+ of the CPA operation. That's a good idea in general, but such
+ code / infrastructure should be in the kernel and not in some
+ out-of-tree driver.
+Who: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 5b3ce7934363..3d01e47777db 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -15,6 +15,7 @@ static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
+EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */
/*
* Initial thread structure.
commit b16bf712f491808a8c926dd481c696fe7d73ee5a
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 28 14:02:08 2008 +0100
x86: fix leak un ioremap_page_range() failure
Jan Beulich noticed it during code review that if a driver's ioremap()
fails (say due to -ENOMEM) then we might leak the struct vm_area.
Free it properly.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 882328efc3db..ac3c959e271d 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -162,7 +162,7 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
area->phys_addr = phys_addr;
vaddr = (unsigned long) area->addr;
if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
- remove_vm_area((void *)(vaddr & PAGE_MASK));
+ free_vm_area(area);
return NULL;
}
commit 5d119b2c9a490e2d647eae134211b32a18a04c7d
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Feb 26 12:55:57 2008 +0100
x86: fix execve with -fstack-protect
pointed out by pageexec@freemail.hu:
> what happens here is that gcc treats the argument area as owned by the
> callee, not the caller and is allowed to do certain tricks. for ssp it
> will make a copy of the struct passed by value into the local variable
> area and pass *its* address down, and it won't copy it back into the
> original instance stored in the argument area.
>
> so once sys_execve returns, the pt_regs passed by value hasn't at all
> changed and its default content will cause a nice double fault (FWIW,
> this part took me the longest to debug, being down with cold didn't
> help it either ;).
To fix this we pass in pt_regs by pointer.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2ad9a1bc6a73..c20c9e7e08dd 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -453,6 +453,7 @@ ENTRY(stub_execve)
CFI_REGISTER rip, r11
SAVE_REST
FIXUP_TOP_OF_STACK %r11
+ movq %rsp, %rcx
call sys_execve
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
@@ -1036,15 +1037,16 @@ ENDPROC(child_rip)
* rdi: name, rsi: argv, rdx: envp
*
* We want to fallback into:
- * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
+ * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
*
* do_sys_execve asm fallback arguments:
- * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
+ * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
*/
ENTRY(kernel_execve)
CFI_STARTPROC
FAKE_STACK_FRAME $0
SAVE_ALL
+ movq %rsp,%rcx
call sys_execve
movq %rax, RAX(%rsp)
RESTORE_REST
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b0cc8f0136d8..43f287744f9f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -730,16 +730,16 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
asmlinkage
long sys_execve(char __user *name, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs regs)
+ char __user * __user *envp, struct pt_regs *regs)
{
long error;
char * filename;
filename = getname(name);
error = PTR_ERR(filename);
- if (IS_ERR(filename))
+ if (IS_ERR(filename))
return error;
- error = do_execve(filename, argv, envp, ®s);
+ error = do_execve(filename, argv, envp, regs);
putname(filename);
return error;
}
commit d4afe414189b098d56bcd24280c018aa2ac9a990
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 21 13:39:30 2008 +0100
x86: rename KERNEL_TEXT_SIZE => KERNEL_IMAGE_SIZE
The KERNEL_TEXT_SIZE constant was mis-named, as we not only map the kernel
text but data, bss and init sections as well.
That name led me on the wrong path with the KERNEL_TEXT_SIZE regression,
because i knew how big of _text_ my images have and i knew about the 40 MB
"text" limit so i wrongly thought to be on the safe side of the 40 MB limit
with my 29 MB of text, while the total image size was slightly above 40 MB.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b037b15be7f8..a007454133a3 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -393,7 +393,7 @@ NEXT_PAGE(level2_kernel_pgt)
* too.)
*/
PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
- KERNEL_TEXT_SIZE/PMD_SIZE)
+ KERNEL_IMAGE_SIZE/PMD_SIZE)
NEXT_PAGE(level2_spare_pgt)
.fill 512, 8, 0
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index 3e2e3ca63048..143546073b95 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -51,8 +51,8 @@
* Kernel image size is limited to 128 MB (see level2_kernel_pgt in
* arch/x86/kernel/head_64.S), and it is mapped here:
*/
-#define KERNEL_TEXT_SIZE (128*1024*1024)
-#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
+#define KERNEL_IMAGE_SIZE (128*1024*1024)
+#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL)
#ifndef __ASSEMBLY__
void clear_page(void *page);
commit 88f3aec7afd9ae3e6f6d221801996b69aad1e3a4
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Feb 21 11:04:11 2008 +0100
x86: fix spontaneous reboot with allyesconfig bzImage
recently the 64-bit allyesconfig bzImage kernel started spontaneously
rebooting during early bootup.
after a few fun hours spent with early init debugging, it turns out
that we've got this rather annoying limit on the size of the kernel
image:
#define KERNEL_TEXT_SIZE (40*1024*1024)
which limit my vmlinux just happened to pass:
text data bss dec hex filename
29703744 4222751 8646224 42572719 2899baf vmlinux
40 MB is 42572719 bytes, so my vmlinux was just 1.5% above this limit :-/
So it happily crashed right in head_64.S, which - as we all know - is
the most debuggable code in the whole architecture ;-)
So increase the limit to allow an up to 128MB kernel image to be mapped.
(should anyone be that crazy or lazy)
We have a full 4K of pagetable (level2_kernel_pgt) allocated for these
mappings already, so there's no RAM overhead and the limit was rather
pointless and arbitrary.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index eb415043a929..b037b15be7f8 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -379,18 +379,24 @@ NEXT_PAGE(level2_ident_pgt)
/* Since I easily can, map the first 1G.
* Don't set NX because code runs from these pages.
*/
- PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
+ PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
NEXT_PAGE(level2_kernel_pgt)
- /* 40MB kernel mapping. The kernel code cannot be bigger than that.
- When you change this change KERNEL_TEXT_SIZE in page.h too. */
- /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
- PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, KERNEL_TEXT_SIZE/PMD_SIZE)
- /* Module mapping starts here */
- .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
+ /*
+ * 128 MB kernel mapping. We spend a full page on this pagetable
+ * anyway.
+ *
+ * The kernel code+data+bss must not be bigger than that.
+ *
+ * (NOTE: at +128MB starts the module area, see MODULES_VADDR.
+ * If you want to increase this then increase MODULES_VADDR
+ * too.)
+ */
+ PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
+ KERNEL_TEXT_SIZE/PMD_SIZE)
NEXT_PAGE(level2_spare_pgt)
- .fill 512,8,0
+ .fill 512, 8, 0
#undef PMDS
#undef NEXT_PAGE
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 6dbb0035c57a..a02a14f0f324 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -172,8 +172,9 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
}
/*
- * The head.S code sets up the kernel high mapping from:
- * __START_KERNEL_map to __START_KERNEL_map + KERNEL_TEXT_SIZE
+ * The head.S code sets up the kernel high mapping:
+ *
+ * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
*
* phys_addr holds the negative offset to the kernel, which is added
* to the compile time generated pmds. This results in invalid pmds up
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index f7393bc516ef..3e2e3ca63048 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -47,8 +47,12 @@
#define __PHYSICAL_MASK_SHIFT 46
#define __VIRTUAL_MASK_SHIFT 48
-#define KERNEL_TEXT_SIZE (40*1024*1024)
-#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
+/*
+ * Kernel image size is limited to 128 MB (see level2_kernel_pgt in
+ * arch/x86/kernel/head_64.S), and it is mapped here:
+ */
+#define KERNEL_TEXT_SIZE (128*1024*1024)
+#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
#ifndef __ASSEMBLY__
void clear_page(void *page);
commit 92cb54a37a42a41cfb2ef7f1478bfa4395198258
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Feb 13 14:37:52 2008 +0100
x86: make DEBUG_PAGEALLOC and CPA more robust
Use PF_MEMALLOC to prevent recursive calls in the DBEUG_PAGEALLOC
case. This makes the code simpler and more robust against allocation
failures.
This fixes the following fallback to non-mmconfig:
http://lkml.org/lkml/2008/2/20/551
http://bugzilla.kernel.org/show_bug.cgi?id=10083
Also, for DEBUG_PAGEALLOC=n reduce the pool size to one page.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 464d8fc21ce6..14e48b5a94ba 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -44,6 +44,12 @@ static inline unsigned long highmap_end_pfn(void)
#endif
+#ifdef CONFIG_DEBUG_PAGEALLOC
+# define debug_pagealloc 1
+#else
+# define debug_pagealloc 0
+#endif
+
static inline int
within(unsigned long addr, unsigned long start, unsigned long end)
{
@@ -355,45 +361,48 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
static LIST_HEAD(page_pool);
static unsigned long pool_size, pool_pages, pool_low;
-static unsigned long pool_used, pool_failed, pool_refill;
+static unsigned long pool_used, pool_failed;
-static void cpa_fill_pool(void)
+static void cpa_fill_pool(struct page **ret)
{
- struct page *p;
gfp_t gfp = GFP_KERNEL;
+ unsigned long flags;
+ struct page *p;
- /* Do not allocate from interrupt context */
- if (in_irq() || irqs_disabled())
- return;
/*
- * Check unlocked. I does not matter when we have one more
- * page in the pool. The bit lock avoids recursive pool
- * allocations:
+ * Avoid recursion (on debug-pagealloc) and also signal
+ * our priority to get to these pagetables:
*/
- if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
+ if (current->flags & PF_MEMALLOC)
return;
+ current->flags |= PF_MEMALLOC;
-#ifdef CONFIG_DEBUG_PAGEALLOC
/*
- * We could do:
- * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
- * but this fails on !PREEMPT kernels
+ * Allocate atomically from atomic contexts:
*/
- gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
-#endif
+ if (in_atomic() || irqs_disabled() || debug_pagealloc)
+ gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
- while (pool_pages < pool_size) {
+ while (pool_pages < pool_size || (ret && !*ret)) {
p = alloc_pages(gfp, 0);
if (!p) {
pool_failed++;
break;
}
- spin_lock_irq(&pgd_lock);
+ /*
+ * If the call site needs a page right now, provide it:
+ */
+ if (ret && !*ret) {
+ *ret = p;
+ continue;
+ }
+ spin_lock_irqsave(&pgd_lock, flags);
list_add(&p->lru, &page_pool);
pool_pages++;
- spin_unlock_irq(&pgd_lock);
+ spin_unlock_irqrestore(&pgd_lock, flags);
}
- clear_bit_unlock(0, &pool_refill);
+
+ current->flags &= ~PF_MEMALLOC;
}
#define SHIFT_MB (20 - PAGE_SHIFT)
@@ -414,11 +423,15 @@ void __init cpa_init(void)
* GiB. Shift MiB to Gib and multiply the result by
* POOL_PAGES_PER_GB:
*/
- gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
- pool_size = POOL_PAGES_PER_GB * gb;
+ if (debug_pagealloc) {
+ gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
+ pool_size = POOL_PAGES_PER_GB * gb;
+ } else {
+ pool_size = 1;
+ }
pool_low = pool_size;
- cpa_fill_pool();
+ cpa_fill_pool(NULL);
printk(KERN_DEBUG
"CPA: page pool initialized %lu of %lu pages preallocated\n",
pool_pages, pool_size);
@@ -440,16 +453,20 @@ static int split_large_page(pte_t *kpte, unsigned long address)
spin_lock_irqsave(&pgd_lock, flags);
if (list_empty(&page_pool)) {
spin_unlock_irqrestore(&pgd_lock, flags);
- return -ENOMEM;
+ base = NULL;
+ cpa_fill_pool(&base);
+ if (!base)
+ return -ENOMEM;
+ spin_lock_irqsave(&pgd_lock, flags);
+ } else {
+ base = list_first_entry(&page_pool, struct page, lru);
+ list_del(&base->lru);
+ pool_pages--;
+
+ if (pool_pages < pool_low)
+ pool_low = pool_pages;
}
- base = list_first_entry(&page_pool, struct page, lru);
- list_del(&base->lru);
- pool_pages--;
-
- if (pool_pages < pool_low)
- pool_low = pool_pages;
-
/*
* Check for races, another CPU might have split this page
* up for us already:
@@ -734,7 +751,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
cpa_flush_all(cache);
out:
- cpa_fill_pool();
+ cpa_fill_pool(NULL);
+
return ret;
}
@@ -897,7 +915,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
* Try to refill the page pool here. We can do this only after
* the tlb flush.
*/
- cpa_fill_pool();
+ cpa_fill_pool(NULL);
}
#ifdef CONFIG_HIBERNATION
commit 7eee3e677d6e2e9007afcd7d79b0715525aa552e
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Feb 22 10:32:21 2008 +0100
sched: clean up __pick_last_entity() a bit
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 7abad50d935f..c8e6492c5925 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -202,14 +202,12 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
{
- struct rb_node *last;
- struct sched_entity *se;
+ struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
- last = rb_last(&cfs_rq->tasks_timeline);
if (!last)
return NULL;
- se = rb_entry(last, struct sched_entity, run_node);
- return se;
+
+ return rb_entry(last, struct sched_entity, run_node);
}
/**************************************************************
commit 6892b75e60557a48c01d57ba320419a9e2ce9846
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Feb 13 14:02:36 2008 +0100
sched: make early bootup sched_clock() use safer
do not call sched_clock() too early. Not only might rq->idle
not be set up - but pure per-cpu data might not be accessible
either.
this solves an ia64 early bootup hang with CONFIG_PRINTK_TIME=y.
Tested-by: Tony Luck <tony.luck@gmail.com>
Acked-by: Tony Luck <tony.luck@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index b387a8de26a5..7286ccb01082 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -668,6 +668,8 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
*/
unsigned int sysctl_sched_rt_period = 1000000;
+static __read_mostly int scheduler_running;
+
/*
* part of the period that we allow rt tasks to run in us.
* default: 0.95s
@@ -689,14 +691,16 @@ unsigned long long cpu_clock(int cpu)
unsigned long flags;
struct rq *rq;
- local_irq_save(flags);
- rq = cpu_rq(cpu);
/*
* Only call sched_clock() if the scheduler has already been
* initialized (some code might call cpu_clock() very early):
*/
- if (rq->idle)
- update_rq_clock(rq);
+ if (unlikely(!scheduler_running))
+ return 0;
+
+ local_irq_save(flags);
+ rq = cpu_rq(cpu);
+ update_rq_clock(rq);
now = rq->clock;
local_irq_restore(flags);
@@ -7284,6 +7288,8 @@ void __init sched_init(void)
* During early bootup we pretend to be a normal task:
*/
current->sched_class = &fair_sched_class;
+
+ scheduler_running = 1;
}
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP