Patches contributed by Eötvös Lorand University
commit 018d6db4cb5bbdcd65424a16f2dcca692ed32ae4
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Apr 14 08:53:32 2008 +0200
sched: re-do "sched: fix fair sleepers"
re-apply:
| commit e22ecef1d2658ba54ed7d3fdb5d60829fb434c23
| Author: Ingo Molnar <mingo@elte.hu>
| Date: Fri Mar 14 22:16:08 2008 +0100
|
| sched: fix fair sleepers
|
| Fair sleepers need to scale their latency target down by runqueue
| weight. Otherwise busy systems will gain ever larger sleep bonus.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0080968d3e4a..86a93376282c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -510,8 +510,10 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
if (!initial) {
/* sleeps upto a single latency don't count. */
- if (sched_feat(NEW_FAIR_SLEEPERS))
- vruntime -= sysctl_sched_latency;
+ if (sched_feat(NEW_FAIR_SLEEPERS)) {
+ vruntime -= calc_delta_fair(sysctl_sched_latency,
+ &cfs_rq->load);
+ }
/* ensure we never gain time by being placed backwards. */
vruntime = max_vruntime(se->vruntime, vruntime);
commit 2be621498d461b63ca6124f86e3b9582e1a8e722
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Apr 19 19:19:56 2008 +0200
x86: dma-ops on highmem fix
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/pci-base_32.c b/arch/x86/kernel/pci-base_32.c
index 033d94ec5000..cf4bb28dfc6a 100644
--- a/arch/x86/kernel/pci-base_32.c
+++ b/arch/x86/kernel/pci-base_32.c
@@ -4,12 +4,12 @@
#include <linux/dma-mapping.h>
#include <asm/dma-mapping.h>
-static dma_addr_t pci32_map_single(struct device *dev, void *ptr,
+static dma_addr_t pci32_map_single(struct device *dev, phys_addr_t ptr,
size_t size, int direction)
{
WARN_ON(size == 0);
flush_write_buffers();
- return virt_to_phys(ptr);
+ return ptr;
}
static int pci32_dma_map_sg(struct device *dev, struct scatterlist *sglist,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 1b5464c2434f..adb91e4b62da 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -470,10 +470,11 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
return 0;
}
-static dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
+static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
size_t size, int direction)
{
dma_addr_t dma_handle = bad_dma_address;
+ void *vaddr = phys_to_virt(paddr);
unsigned long uaddr;
unsigned int npages;
struct iommu_table *tbl = find_iommu_table(dev);
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index e4fffaabe53b..f97a08d0a8f9 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -141,7 +141,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
}
if (dma_ops->map_simple) {
- *dma_handle = dma_ops->map_simple(dev, memory,
+ *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory),
size,
PCI_DMA_BIDIRECTIONAL);
if (*dma_handle != bad_dma_address)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 700e4647dd30..c07455d1695f 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -264,9 +264,9 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
}
static dma_addr_t
-gart_map_simple(struct device *dev, char *buf, size_t size, int dir)
+gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir)
{
- dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
+ dma_addr_t map = dma_map_area(dev, paddr, size, dir);
flush_gart();
@@ -275,18 +275,17 @@ gart_map_simple(struct device *dev, char *buf, size_t size, int dir)
/* Map a single area into the IOMMU */
static dma_addr_t
-gart_map_single(struct device *dev, void *addr, size_t size, int dir)
+gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
{
- unsigned long phys_mem, bus;
+ unsigned long bus;
if (!dev)
dev = &fallback_dev;
- phys_mem = virt_to_phys(addr);
- if (!need_iommu(dev, phys_mem, size))
- return phys_mem;
+ if (!need_iommu(dev, paddr, size))
+ return paddr;
- bus = gart_map_simple(dev, addr, size, dir);
+ bus = gart_map_simple(dev, paddr, size, dir);
return bus;
}
diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu_64.c
index ab08e1832228..6e330769d017 100644
--- a/arch/x86/kernel/pci-nommu_64.c
+++ b/arch/x86/kernel/pci-nommu_64.c
@@ -26,10 +26,10 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
}
static dma_addr_t
-nommu_map_single(struct device *hwdev, void *ptr, size_t size,
+nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size,
int direction)
{
- dma_addr_t bus = virt_to_bus(ptr);
+ dma_addr_t bus = paddr;
if (!check_addr("map_single", hwdev, bus, size))
return bad_dma_address;
return bus;
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c
index 82a0a674a003..490da7f4b8d0 100644
--- a/arch/x86/kernel/pci-swiotlb_64.c
+++ b/arch/x86/kernel/pci-swiotlb_64.c
@@ -11,11 +11,18 @@
int swiotlb __read_mostly;
+static dma_addr_t
+swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
+ int direction)
+{
+ return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
+}
+
const struct dma_mapping_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
.alloc_coherent = swiotlb_alloc_coherent,
.free_coherent = swiotlb_free_coherent,
- .map_single = swiotlb_map_single,
+ .map_single = swiotlb_map_single_phys,
.unmap_single = swiotlb_unmap_single,
.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
.sync_single_for_device = swiotlb_sync_single_for_device,
diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h
index b5a413acac96..b331a8d3a7cf 100644
--- a/include/asm-x86/dma-mapping.h
+++ b/include/asm-x86/dma-mapping.h
@@ -16,10 +16,10 @@ struct dma_mapping_ops {
dma_addr_t *dma_handle, gfp_t gfp);
void (*free_coherent)(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle);
- dma_addr_t (*map_single)(struct device *hwdev, void *ptr,
+ dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr,
size_t size, int direction);
/* like map_single, but doesn't check the device mask */
- dma_addr_t (*map_simple)(struct device *hwdev, char *ptr,
+ dma_addr_t (*map_simple)(struct device *hwdev, phys_addr_t ptr,
size_t size, int direction);
void (*unmap_single)(struct device *dev, dma_addr_t addr,
size_t size, int direction);
@@ -73,7 +73,7 @@ dma_map_single(struct device *hwdev, void *ptr, size_t size,
int direction)
{
BUG_ON(!valid_dma_direction(direction));
- return dma_ops->map_single(hwdev, ptr, size, direction);
+ return dma_ops->map_single(hwdev, virt_to_phys(ptr), size, direction);
}
static inline void
@@ -174,7 +174,9 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
size_t offset, size_t size,
int direction)
{
- return dma_map_single(dev, page_address(page)+offset, size, direction);
+ BUG_ON(!valid_dma_direction(direction));
+ return dma_ops->map_single(dev, page_to_phys(page)+offset,
+ size, direction);
}
static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
commit fa5c4639419668cbb18ca3d20c1253559a3b43ae
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Apr 16 02:29:42 2008 +0200
x86: rename find_max_pfn() to propagate_e820_map()
this function doesnt just 'find' the max_pfn - it also has
other side-effects such as registering sparse memory maps.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 0240cd778365..ed733e7cf4e6 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -475,7 +475,7 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
/*
* Find the highest page frame number we have available
*/
-void __init find_max_pfn(void)
+void __init propagate_e820_map(void)
{
int i;
@@ -704,7 +704,7 @@ static int __init parse_memmap(char *arg)
* size before original memory map is
* reset.
*/
- find_max_pfn();
+ propagate_e820_map();
saved_max_pfn = max_pfn;
#endif
e820.nr_map = 0;
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 5b0bffb7fcc9..1c4799e68718 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -812,10 +812,10 @@ void __init setup_arch(char **cmdline_p)
efi_init();
/* update e820 for memory not covered by WB MTRRs */
- find_max_pfn();
+ propagate_e820_map();
mtrr_bp_init();
if (mtrr_trim_uncached_memory(max_pfn))
- find_max_pfn();
+ propagate_e820_map();
max_low_pfn = setup_memory();
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index eba0bbede7a6..18378850e25a 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -120,7 +120,7 @@ int __init get_memcfg_numa_flat(void)
printk("NUMA - single node, flat memory mode\n");
/* Run the memory configuration and find the top of memory. */
- find_max_pfn();
+ propagate_e820_map();
node_start_pfn[0] = 0;
node_end_pfn[0] = max_pfn;
memory_present(0, 0, max_pfn);
@@ -134,7 +134,7 @@ int __init get_memcfg_numa_flat(void)
/*
* Find the highest page frame number we have available for the node
*/
-static void __init find_max_pfn_node(int nid)
+static void __init propagate_e820_map_node(int nid)
{
if (node_end_pfn[nid] > max_pfn)
node_end_pfn[nid] = max_pfn;
@@ -379,7 +379,7 @@ unsigned long __init setup_memory(void)
printk("High memory starts at vaddr %08lx\n",
(ulong) pfn_to_kaddr(highstart_pfn));
for_each_online_node(nid)
- find_max_pfn_node(nid);
+ propagate_e820_map_node(nid);
memset(NODE_DATA(0), 0, sizeof(struct pglist_data));
NODE_DATA(0)->bdata = &node0_bdata;
diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h
index 43b1a8bd4b34..a9f7c6ec32bf 100644
--- a/include/asm-x86/e820_32.h
+++ b/include/asm-x86/e820_32.h
@@ -24,7 +24,7 @@ extern void update_e820(void);
extern int e820_all_mapped(unsigned long start, unsigned long end,
unsigned type);
extern int e820_any_mapped(u64 start, u64 end, unsigned type);
-extern void find_max_pfn(void);
+extern void propagate_e820_map(void);
extern void register_bootmem_low_pages(unsigned long max_low_pfn);
extern void add_memory_region(unsigned long long start,
unsigned long long size, int type);
commit 8ce116e5993cf64729a4d2b3dc2c0f072852654b
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Feb 26 08:52:16 2008 +0100
x86: clean up cpu capabilities accesses, p4-clockmod.c
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index 14791ec55cfd..199e4e05e5dc 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -289,8 +289,8 @@ static int __init cpufreq_p4_init(void)
if (c->x86_vendor != X86_VENDOR_INTEL)
return -ENODEV;
- if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) ||
- !test_bit(X86_FEATURE_ACC, c->x86_capability))
+ if (!test_cpu_cap(c, X86_FEATURE_ACPI) ||
+ !test_cpu_cap(c, X86_FEATURE_ACC))
return -ENODEV;
ret = cpufreq_register_driver(&p4clockmod_driver);
commit f8dfd5ed149ae340451f25847b434297c20d4645
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Apr 19 19:19:54 2008 +0200
x86: KGDB build fix
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 24362ecf5f9a..f47f0eb886b8 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -46,11 +46,7 @@
#include <asm/apicdef.h>
#include <asm/system.h>
-#ifdef CONFIG_X86_32
-# include <mach_ipi.h>
-#else
-# include <asm/mach_apic.h>
-#endif
+#include <mach_ipi.h>
/*
* Put the error code here just in case the user cares:
commit d1a4be630fb068f251d64b62919f143c49ca8057
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Apr 18 21:32:22 2008 +0200
x86 PAT: fix mmap() of holes
do not return a -EINVAL when mmap()-ing PCI holes.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7d9517abc9af..f7823a172868 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -542,7 +542,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
repeat:
kpte = lookup_address(address, &level);
if (!kpte)
- return primary ? -EINVAL : 0;
+ return 0;
old_pte = *kpte;
if (!pte_val(old_pte)) {
commit 82da3ff89dc2a1842cff9b0d4cbc345cb90b59e1
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Apr 17 20:05:37 2008 +0200
x86: kgdb support
simplified and streamlined kgdb support on x86, both 32-bit and 64-bit,
based on patch from:
Subject: kgdb: core-lite
From: Jason Wessel <jason.wessel@windriver.com>
[ and countless other authors - see the patch for details. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Jan Kiszka <jan.kiszka@web.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6c70fed0f9a0..5c4c8d7a46cc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,6 +23,7 @@ config X86
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
+ select HAVE_ARCH_KGDB
config GENERIC_LOCKBREAK
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4eb5ce841106..4a4260c7f672 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -66,6 +66,7 @@ obj-$(CONFIG_MODULES) += module_$(BITS).o
obj-$(CONFIG_ACPI_SRAT) += srat_32.o
obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o
obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
+obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_VM86) += vm86_32.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
new file mode 100644
index 000000000000..37194d6374d8
--- /dev/null
+++ b/arch/x86/kernel/kgdb.c
@@ -0,0 +1,417 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ */
+
+/*
+ * Copyright (C) 2004 Amit S. Kale <amitkale@linsyssoft.com>
+ * Copyright (C) 2000-2001 VERITAS Software Corporation.
+ * Copyright (C) 2002 Andi Kleen, SuSE Labs
+ * Copyright (C) 2004 LinSysSoft Technologies Pvt. Ltd.
+ * Copyright (C) 2007 MontaVista Software, Inc.
+ * Copyright (C) 2007-2008 Jason Wessel, Wind River Systems, Inc.
+ */
+/****************************************************************************
+ * Contributor: Lake Stevens Instrument Division$
+ * Written by: Glenn Engel $
+ * Updated by: Amit Kale<akale@veritas.com>
+ * Updated by: Tom Rini <trini@kernel.crashing.org>
+ * Updated by: Jason Wessel <jason.wessel@windriver.com>
+ * Modified for 386 by Jim Kingdon, Cygnus Support.
+ * Origianl kgdb, compatibility with 2.1.xx kernel by
+ * David Grothe <dave@gcom.com>
+ * Integrated into 2.2.5 kernel by Tigran Aivazian <tigran@sco.com>
+ * X86_64 changes from Andi Kleen's patch merged by Jim Houston
+ */
+#include <linux/spinlock.h>
+#include <linux/kdebug.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/kgdb.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/apicdef.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_X86_32
+# include <mach_ipi.h>
+#else
+# include <asm/mach_apic.h>
+#endif
+
+/*
+ * Put the error code here just in case the user cares:
+ */
+static int gdb_x86errcode;
+
+/*
+ * Likewise, the vector number here (since GDB only gets the signal
+ * number through the usual means, and that's not very specific):
+ */
+static int gdb_x86vector = -1;
+
+/**
+ * pt_regs_to_gdb_regs - Convert ptrace regs to GDB regs
+ * @gdb_regs: A pointer to hold the registers in the order GDB wants.
+ * @regs: The &struct pt_regs of the current process.
+ *
+ * Convert the pt_regs in @regs into the format for registers that
+ * GDB expects, stored in @gdb_regs.
+ */
+void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+ gdb_regs[GDB_AX] = regs->ax;
+ gdb_regs[GDB_BX] = regs->bx;
+ gdb_regs[GDB_CX] = regs->cx;
+ gdb_regs[GDB_DX] = regs->dx;
+ gdb_regs[GDB_SI] = regs->si;
+ gdb_regs[GDB_DI] = regs->di;
+ gdb_regs[GDB_BP] = regs->bp;
+ gdb_regs[GDB_PS] = regs->flags;
+ gdb_regs[GDB_PC] = regs->ip;
+#ifdef CONFIG_X86_32
+ gdb_regs[GDB_DS] = regs->ds;
+ gdb_regs[GDB_ES] = regs->es;
+ gdb_regs[GDB_CS] = regs->cs;
+ gdb_regs[GDB_SS] = __KERNEL_DS;
+ gdb_regs[GDB_FS] = 0xFFFF;
+ gdb_regs[GDB_GS] = 0xFFFF;
+#else
+ gdb_regs[GDB_R8] = regs->r8;
+ gdb_regs[GDB_R9] = regs->r9;
+ gdb_regs[GDB_R10] = regs->r10;
+ gdb_regs[GDB_R11] = regs->r11;
+ gdb_regs[GDB_R12] = regs->r12;
+ gdb_regs[GDB_R13] = regs->r13;
+ gdb_regs[GDB_R14] = regs->r14;
+ gdb_regs[GDB_R15] = regs->r15;
+#endif
+ gdb_regs[GDB_SP] = regs->sp;
+}
+
+/**
+ * sleeping_thread_to_gdb_regs - Convert ptrace regs to GDB regs
+ * @gdb_regs: A pointer to hold the registers in the order GDB wants.
+ * @p: The &struct task_struct of the desired process.
+ *
+ * Convert the register values of the sleeping process in @p to
+ * the format that GDB expects.
+ * This function is called when kgdb does not have access to the
+ * &struct pt_regs and therefore it should fill the gdb registers
+ * @gdb_regs with what has been saved in &struct thread_struct
+ * thread field during switch_to.
+ */
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+ gdb_regs[GDB_AX] = 0;
+ gdb_regs[GDB_BX] = 0;
+ gdb_regs[GDB_CX] = 0;
+ gdb_regs[GDB_DX] = 0;
+ gdb_regs[GDB_SI] = 0;
+ gdb_regs[GDB_DI] = 0;
+ gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp;
+#ifdef CONFIG_X86_32
+ gdb_regs[GDB_DS] = __KERNEL_DS;
+ gdb_regs[GDB_ES] = __KERNEL_DS;
+ gdb_regs[GDB_PS] = 0;
+ gdb_regs[GDB_CS] = __KERNEL_CS;
+ gdb_regs[GDB_PC] = p->thread.ip;
+ gdb_regs[GDB_SS] = __KERNEL_DS;
+ gdb_regs[GDB_FS] = 0xFFFF;
+ gdb_regs[GDB_GS] = 0xFFFF;
+#else
+ gdb_regs[GDB_PS] = *(unsigned long *)(p->thread.sp + 8);
+ gdb_regs[GDB_PC] = 0;
+ gdb_regs[GDB_R8] = 0;
+ gdb_regs[GDB_R9] = 0;
+ gdb_regs[GDB_R10] = 0;
+ gdb_regs[GDB_R11] = 0;
+ gdb_regs[GDB_R12] = 0;
+ gdb_regs[GDB_R13] = 0;
+ gdb_regs[GDB_R14] = 0;
+ gdb_regs[GDB_R15] = 0;
+#endif
+ gdb_regs[GDB_SP] = p->thread.sp;
+}
+
+/**
+ * gdb_regs_to_pt_regs - Convert GDB regs to ptrace regs.
+ * @gdb_regs: A pointer to hold the registers we've received from GDB.
+ * @regs: A pointer to a &struct pt_regs to hold these values in.
+ *
+ * Convert the GDB regs in @gdb_regs into the pt_regs, and store them
+ * in @regs.
+ */
+void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+ regs->ax = gdb_regs[GDB_AX];
+ regs->bx = gdb_regs[GDB_BX];
+ regs->cx = gdb_regs[GDB_CX];
+ regs->dx = gdb_regs[GDB_DX];
+ regs->si = gdb_regs[GDB_SI];
+ regs->di = gdb_regs[GDB_DI];
+ regs->bp = gdb_regs[GDB_BP];
+ regs->flags = gdb_regs[GDB_PS];
+ regs->ip = gdb_regs[GDB_PC];
+#ifdef CONFIG_X86_32
+ regs->ds = gdb_regs[GDB_DS];
+ regs->es = gdb_regs[GDB_ES];
+ regs->cs = gdb_regs[GDB_CS];
+#else
+ regs->r8 = gdb_regs[GDB_R8];
+ regs->r9 = gdb_regs[GDB_R9];
+ regs->r10 = gdb_regs[GDB_R10];
+ regs->r11 = gdb_regs[GDB_R11];
+ regs->r12 = gdb_regs[GDB_R12];
+ regs->r13 = gdb_regs[GDB_R13];
+ regs->r14 = gdb_regs[GDB_R14];
+ regs->r15 = gdb_regs[GDB_R15];
+#endif
+}
+
+/**
+ * kgdb_post_primary_code - Save error vector/code numbers.
+ * @regs: Original pt_regs.
+ * @e_vector: Original error vector.
+ * @err_code: Original error code.
+ *
+ * This is needed on architectures which support SMP and KGDB.
+ * This function is called after all the slave cpus have been put
+ * to a know spin state and the primary CPU has control over KGDB.
+ */
+void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code)
+{
+ /* primary processor is completely in the debugger */
+ gdb_x86vector = e_vector;
+ gdb_x86errcode = err_code;
+}
+
+#ifdef CONFIG_SMP
+/**
+ * kgdb_roundup_cpus - Get other CPUs into a holding pattern
+ * @flags: Current IRQ state
+ *
+ * On SMP systems, we need to get the attention of the other CPUs
+ * and get them be in a known state. This should do what is needed
+ * to get the other CPUs to call kgdb_wait(). Note that on some arches,
+ * the NMI approach is not used for rounding up all the CPUs. For example,
+ * in case of MIPS, smp_call_function() is used to roundup CPUs. In
+ * this case, we have to make sure that interrupts are enabled before
+ * calling smp_call_function(). The argument to this function is
+ * the flags that will be used when restoring the interrupts. There is
+ * local_irq_save() call before kgdb_roundup_cpus().
+ *
+ * On non-SMP systems, this is not called.
+ */
+void kgdb_roundup_cpus(unsigned long flags)
+{
+ send_IPI_allbutself(APIC_DM_NMI);
+}
+#endif
+
+/**
+ * kgdb_arch_handle_exception - Handle architecture specific GDB packets.
+ * @vector: The error vector of the exception that happened.
+ * @signo: The signal number of the exception that happened.
+ * @err_code: The error code of the exception that happened.
+ * @remcom_in_buffer: The buffer of the packet we have read.
+ * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into.
+ * @regs: The &struct pt_regs of the current process.
+ *
+ * This function MUST handle the 'c' and 's' command packets,
+ * as well packets to set / remove a hardware breakpoint, if used.
+ * If there are additional packets which the hardware needs to handle,
+ * they are handled here. The code should return -1 if it wants to
+ * process more packets, and a %0 or %1 if it wants to exit from the
+ * kgdb callback.
+ */
+int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
+ char *remcomInBuffer, char *remcomOutBuffer,
+ struct pt_regs *linux_regs)
+{
+ unsigned long addr;
+ char *ptr;
+ int newPC;
+
+ switch (remcomInBuffer[0]) {
+ case 'c':
+ case 's':
+ /* try to read optional parameter, pc unchanged if no parm */
+ ptr = &remcomInBuffer[1];
+ if (kgdb_hex2long(&ptr, &addr))
+ linux_regs->ip = addr;
+ newPC = linux_regs->ip;
+
+ /* clear the trace bit */
+ linux_regs->flags &= ~TF_MASK;
+ atomic_set(&kgdb_cpu_doing_single_step, -1);
+
+ /* set the trace bit if we're stepping */
+ if (remcomInBuffer[0] == 's') {
+ linux_regs->flags |= TF_MASK;
+ kgdb_single_step = 1;
+ if (kgdb_contthread) {
+ atomic_set(&kgdb_cpu_doing_single_step,
+ raw_smp_processor_id());
+ }
+ }
+
+ return 0;
+ }
+
+ /* this means that we do not want to exit from the handler: */
+ return -1;
+}
+
+static inline int
+single_step_cont(struct pt_regs *regs, struct die_args *args)
+{
+ /*
+ * Single step exception from kernel space to user space so
+ * eat the exception and continue the process:
+ */
+ printk(KERN_ERR "KGDB: trap/step from kernel to user space, "
+ "resuming...\n");
+ kgdb_arch_handle_exception(args->trapnr, args->signr,
+ args->err, "c", "", regs);
+
+ return NOTIFY_STOP;
+}
+
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+{
+ struct pt_regs *regs = args->regs;
+
+ switch (cmd) {
+ case DIE_NMI:
+ if (atomic_read(&kgdb_active) != -1) {
+ /* KGDB CPU roundup */
+ kgdb_nmicallback(raw_smp_processor_id(), regs);
+ return NOTIFY_STOP;
+ }
+ return NOTIFY_DONE;
+
+ case DIE_NMI_IPI:
+ if (atomic_read(&kgdb_active) != -1) {
+ /* KGDB CPU roundup: */
+ if (kgdb_nmicallback(raw_smp_processor_id(), regs))
+ return NOTIFY_DONE;
+ return NOTIFY_STOP;
+ }
+ return NOTIFY_DONE;
+
+ case DIE_NMIWATCHDOG:
+ if (atomic_read(&kgdb_active) != -1) {
+ /* KGDB CPU roundup: */
+ kgdb_nmicallback(raw_smp_processor_id(), regs);
+ return NOTIFY_STOP;
+ }
+ /* Enter debugger: */
+ break;
+
+ case DIE_DEBUG:
+ if (atomic_read(&kgdb_cpu_doing_single_step) ==
+ raw_smp_processor_id() &&
+ user_mode(regs))
+ return single_step_cont(regs, args);
+ /* fall through */
+ default:
+ if (user_mode(regs))
+ return NOTIFY_DONE;
+ }
+
+ if (kgdb_handle_exception(args->trapnr, args->signr, args->err, regs))
+ return NOTIFY_DONE;
+
+ return NOTIFY_STOP;
+}
+
+static int
+kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+ ret = __kgdb_notify(ptr, cmd);
+ local_irq_restore(flags);
+
+ return ret;
+}
+
+static struct notifier_block kgdb_notifier = {
+ .notifier_call = kgdb_notify,
+
+ /*
+ * Lowest-prio notifier priority, we want to be notified last:
+ */
+ .priority = -INT_MAX,
+};
+
+/**
+ * kgdb_arch_init - Perform any architecture specific initalization.
+ *
+ * This function will handle the initalization of any architecture
+ * specific callbacks.
+ */
+int kgdb_arch_init(void)
+{
+ return register_die_notifier(&kgdb_notifier);
+}
+
+/**
+ * kgdb_arch_exit - Perform any architecture specific uninitalization.
+ *
+ * This function will handle the uninitalization of any architecture
+ * specific callbacks, for dynamic registration and unregistration.
+ */
+void kgdb_arch_exit(void)
+{
+ unregister_die_notifier(&kgdb_notifier);
+}
+
+/**
+ *
+ * kgdb_skipexception - Bail out of KGDB when we've been triggered.
+ * @exception: Exception vector number
+ * @regs: Current &struct pt_regs.
+ *
+ * On some architectures we need to skip a breakpoint exception when
+ * it occurs after a breakpoint has been removed.
+ *
+ * Skip an int3 exception when it occurs after a breakpoint has been
+ * removed. Backtrack eip by 1 since the int3 would have caused it to
+ * increment by 1.
+ */
+int kgdb_skipexception(int exception, struct pt_regs *regs)
+{
+ if (exception == 3 && kgdb_isremovedbreak(regs->ip - 1)) {
+ regs->ip -= 1;
+ return 1;
+ }
+ return 0;
+}
+
+unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs)
+{
+ if (exception == 3)
+ return instruction_pointer(regs) - 1;
+ return instruction_pointer(regs);
+}
+
+struct kgdb_arch arch_kgdb_ops = {
+ /* Breakpoint instruction: */
+ .gdb_bpt_instr = { 0xcc },
+};
diff --git a/include/asm-x86/kgdb.h b/include/asm-x86/kgdb.h
new file mode 100644
index 000000000000..484c47554f3b
--- /dev/null
+++ b/include/asm-x86/kgdb.h
@@ -0,0 +1,81 @@
+#ifndef _ASM_KGDB_H_
+#define _ASM_KGDB_H_
+
+/*
+ * Copyright (C) 2001-2004 Amit S. Kale
+ * Copyright (C) 2008 Wind River Systems, Inc.
+ */
+
+/*
+ * BUFMAX defines the maximum number of characters in inbound/outbound
+ * buffers at least NUMREGBYTES*2 are needed for register packets
+ * Longer buffer is needed to list all threads
+ */
+#define BUFMAX 1024
+
+/*
+ * Note that this register image is in a different order than
+ * the register image that Linux produces at interrupt time.
+ *
+ * Linux's register image is defined by struct pt_regs in ptrace.h.
+ * Just why GDB uses a different order is a historical mystery.
+ */
+#ifdef CONFIG_X86_32
+enum regnames {
+ GDB_AX, /* 0 */
+ GDB_CX, /* 1 */
+ GDB_DX, /* 2 */
+ GDB_BX, /* 3 */
+ GDB_SP, /* 4 */
+ GDB_BP, /* 5 */
+ GDB_SI, /* 6 */
+ GDB_DI, /* 7 */
+ GDB_PC, /* 8 also known as eip */
+ GDB_PS, /* 9 also known as eflags */
+ GDB_CS, /* 10 */
+ GDB_SS, /* 11 */
+ GDB_DS, /* 12 */
+ GDB_ES, /* 13 */
+ GDB_FS, /* 14 */
+ GDB_GS, /* 15 */
+};
+#else /* ! CONFIG_X86_32 */
+enum regnames {
+ GDB_AX, /* 0 */
+ GDB_DX, /* 1 */
+ GDB_CX, /* 2 */
+ GDB_BX, /* 3 */
+ GDB_SI, /* 4 */
+ GDB_DI, /* 5 */
+ GDB_BP, /* 6 */
+ GDB_SP, /* 7 */
+ GDB_R8, /* 8 */
+ GDB_R9, /* 9 */
+ GDB_R10, /* 10 */
+ GDB_R11, /* 11 */
+ GDB_R12, /* 12 */
+ GDB_R13, /* 13 */
+ GDB_R14, /* 14 */
+ GDB_R15, /* 15 */
+ GDB_PC, /* 16 */
+ GDB_PS, /* 17 */
+};
+#endif /* CONFIG_X86_32 */
+
+/*
+ * Number of bytes of registers:
+ */
+#ifdef CONFIG_X86_32
+# define NUMREGBYTES 64
+#else
+# define NUMREGBYTES ((GDB_PS+1)*8)
+#endif
+
+static inline void arch_kgdb_breakpoint(void)
+{
+ asm(" int $3");
+}
+#define BREAK_INSTR_SIZE 1
+#define CACHE_FLUSH_IS_SAFE 1
+
+#endif /* _ASM_KGDB_H_ */
commit c33fa9f5609e918824446ef9a75319d4a802f1f4
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Apr 17 20:05:36 2008 +0200
uaccess: add probe_kernel_write()
add probe_kernel_read() and probe_kernel_write().
Uninlined and restricted to kernel range memory only, as suggested
by Linus.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 975c963e5789..fec6decfb983 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -84,4 +84,26 @@ static inline unsigned long __copy_from_user_nocache(void *to,
ret; \
})
+/*
+ * probe_kernel_read(): safely attempt to read from a location
+ * @dst: pointer to the buffer that shall take the data
+ * @src: address to read from
+ * @size: size of the data chunk
+ *
+ * Safely read from address @src to the buffer at @dst. If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+extern long probe_kernel_read(void *dst, void *src, size_t size);
+
+/*
+ * probe_kernel_write(): safely attempt to write to a location
+ * @dst: address to write to
+ * @src: pointer to the data that shall be written
+ * @size: size of the data chunk
+ *
+ * Safely write to address @dst from the buffer at @src. If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+extern long probe_kernel_write(void *dst, void *src, size_t size);
+
#endif /* __LINUX_UACCESS_H__ */
diff --git a/mm/Makefile b/mm/Makefile
index a5b0dd93427a..18c143b3c46c 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -8,7 +8,7 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
vmalloc.o
obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
- page_alloc.o page-writeback.o pdflush.o \
+ maccess.o page_alloc.o page-writeback.o pdflush.o \
readahead.o swap.o truncate.o vmscan.o \
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
page_isolation.o $(mmu-y)
diff --git a/mm/maccess.c b/mm/maccess.c
new file mode 100644
index 000000000000..24f81b971403
--- /dev/null
+++ b/mm/maccess.c
@@ -0,0 +1,49 @@
+/*
+ * Access kernel memory without faulting.
+ */
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+
+/**
+ * probe_kernel_read(): safely attempt to read from a location
+ * @dst: pointer to the buffer that shall take the data
+ * @src: address to read from
+ * @size: size of the data chunk
+ *
+ * Safely read from address @src to the buffer at @dst. If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+long probe_kernel_read(void *dst, void *src, size_t size)
+{
+ long ret;
+
+ pagefault_disable();
+ ret = __copy_from_user_inatomic(dst,
+ (__force const void __user *)src, size);
+ pagefault_enable();
+
+ return ret ? -EFAULT : 0;
+}
+EXPORT_SYMBOL_GPL(probe_kernel_read);
+
+/**
+ * probe_kernel_write(): safely attempt to write to a location
+ * @dst: address to write to
+ * @src: pointer to the data that shall be written
+ * @size: size of the data chunk
+ *
+ * Safely write to address @dst from the buffer at @src. If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+long probe_kernel_write(void *dst, void *src, size_t size)
+{
+ long ret;
+
+ pagefault_disable();
+ ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
+ pagefault_enable();
+
+ return ret ? -EFAULT : 0;
+}
+EXPORT_SYMBOL_GPL(probe_kernel_write);
commit 77ad386e596c6b0930cc2e09e3cce485e3ee7f72
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Mar 21 15:23:19 2008 +0100
x86: standalone trampoline code
move the trampoline setup code out of smpboot.c - UP kernels can have
suspend support too.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index fdd8395e0ed3..530ed6a4a031 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -28,6 +28,7 @@ obj-y += alternative.o i8253.o
obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o
obj-y += tsc_$(BITS).o io_delay.o rtc.o
+obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += i387.o
obj-y += ptrace.o
obj-y += ds.o
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 424600e671bd..e6abe8a49b1f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -140,9 +140,6 @@ static atomic_t init_deasserted;
static int boot_cpu_logical_apicid;
-/* ready for x86_64, no harm for x86, since it will overwrite after alloc */
-unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
-
/* representing cpus for which sibling maps can be computed */
static cpumask_t cpu_sibling_setup_map;
@@ -550,18 +547,6 @@ cpumask_t cpu_coregroup_map(int cpu)
return c->llc_shared_map;
}
-/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
- */
-unsigned long setup_trampoline(void)
-{
- memcpy(trampoline_base, trampoline_data,
- trampoline_end - trampoline_data);
- return virt_to_phys(trampoline_base);
-}
-
#ifdef CONFIG_X86_32
/*
* We are called very early to get the low memory for the
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
new file mode 100644
index 000000000000..abbf199adebb
--- /dev/null
+++ b/arch/x86/kernel/trampoline.c
@@ -0,0 +1,18 @@
+#include <linux/io.h>
+
+#include <asm/trampoline.h>
+
+/* ready for x86_64, no harm for x86, since it will overwrite after alloc */
+unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
+
+/*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+ * has made sure it's suitably aligned.
+ */
+unsigned long setup_trampoline(void)
+{
+ memcpy(trampoline_base, trampoline_data,
+ trampoline_end - trampoline_data);
+ return virt_to_phys(trampoline_base);
+}
commit ba1ce61ff226bddebd2101a29fe56b4664ef7cec
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Apr 7 13:11:09 2008 +0200
x86: don't set io apic features if io-apic is not enabled, fix
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c
index 09cb77813680..44f52f623fd6 100644
--- a/arch/x86/kernel/mpparse_32.c
+++ b/arch/x86/kernel/mpparse_32.c
@@ -429,9 +429,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
struct mpc_config_ioapic *m=
(struct mpc_config_ioapic *)mpt;
MP_ioapic_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
#endif
+ mpt+=sizeof(struct mpc_config_ioapic);
+ count+=sizeof(struct mpc_config_ioapic);
break;
}
case MP_INTSRC:
@@ -441,9 +441,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
(struct mpc_config_intsrc *)mpt;
MP_intsrc_info(m);
- mpt+=sizeof(*m);
- count+=sizeof(*m);
#endif
+ mpt+=sizeof(struct mpc_config_intsrc);
+ count+=sizeof(struct mpc_config_intsrc);
break;
}
case MP_LINTSRC: