Patches contributed by Eötvös Lorand University


commit 00d1c5e05736f947687be27706bda01cec104e57
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Apr 17 17:40:45 2008 +0200

    x86: add gbpages switches
    
    These new controls toggle experimental support for a new CPU feature,
    the straightforward extension of largepages from the pmd level to the
    pud level, which allows 1GB (kernel) TLBs instead of 2MB TLBs.
    
    Turn it off by default, as this code has not been tested well enough yet.
    
    Use the CONFIG_DIRECT_GBPAGES=y .config option or gbpages on the
    boot line can be used to enable it. If enabled in the .config then
    nogbpages boot option disables it.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index 34abae4e9442..b0c7b6c4abda 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -307,3 +307,8 @@ Debugging
 			stuck (default)
 
 Miscellaneous
+
+	nogbpages
+		Do not use GB pages for kernel direct mappings.
+	gbpages
+		Use GB pages for kernel direct mappings.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 7ce8e7025661..f4413c04e687 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -76,6 +76,18 @@ config DEBUG_RODATA
 	  data. This is recommended so that we can catch kernel bugs sooner.
 	  If in doubt, say "Y".
 
+config DIRECT_GBPAGES
+	bool "Enable gbpages-mapped kernel pagetables"
+	depends on DEBUG_KERNEL && EXPERIMENTAL && X86_64
+	help
+	  Enable gigabyte pages support (if the CPU supports it). This can
+	  improve the kernel's performance a tiny bit by reducing TLB
+	  pressure.
+
+	  This is experimental code.
+
+	  If in doubt, say "N".
+
 config DEBUG_RODATA_TEST
 	bool "Testcase for the DEBUG_RODATA feature"
 	depends on DEBUG_RODATA
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a02a14f0f324..6e7d5a42a09a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -54,6 +54,26 @@ static unsigned long dma_reserve __initdata;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
+int direct_gbpages __meminitdata
+#ifdef CONFIG_DIRECT_GBPAGES
+				= 1
+#endif
+;
+
+static int __init parse_direct_gbpages_off(char *arg)
+{
+	direct_gbpages = 0;
+	return 0;
+}
+early_param("nogbpages", parse_direct_gbpages_off);
+
+static int __init parse_direct_gbpages_on(char *arg)
+{
+	direct_gbpages = 1;
+	return 0;
+}
+early_param("gbpages", parse_direct_gbpages_on);
+
 /*
  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
  * physical space so we can cache the place of the first one and move
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 01d2359e7a34..6ef09914acbe 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -239,6 +239,8 @@ static inline int pud_large(pud_t pte)
 
 #define update_mmu_cache(vma,address,pte) do { } while (0)
 
+extern int direct_gbpages;
+
 /* Encode and de-code a swap entry */
 #define __swp_type(x)			(((x).val >> 1) & 0x3f)
 #define __swp_offset(x)			((x).val >> 8)

commit 85eb69a16aab5a394ce043c2131319eae35e6493
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 21 12:50:51 2008 +0100

    x86: increase the kernel text limit to 512 MB
    
    people sometimes do crazy stuff like building really large static
    arrays into their kernels or building allyesconfig kernels. Give
    more space to the kernel and push modules up a bit: kernel has
    512 MB and modules have 1.5 GB.
    
    Should be enough for a few years ;-)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index a007454133a3..017216916dff 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -383,12 +383,12 @@ NEXT_PAGE(level2_ident_pgt)
 
 NEXT_PAGE(level2_kernel_pgt)
 	/*
-	 * 128 MB kernel mapping. We spend a full page on this pagetable
+	 * 512 MB kernel mapping. We spend a full page on this pagetable
 	 * anyway.
 	 *
 	 * The kernel code+data+bss must not be bigger than that.
 	 *
-	 * (NOTE: at +128MB starts the module area, see MODULES_VADDR.
+	 * (NOTE: at +512MB starts the module area, see MODULES_VADDR.
 	 *  If you want to increase this then increase MODULES_VADDR
 	 *  too.)
 	 */
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index 143546073b95..aee05c616e05 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -48,10 +48,10 @@
 #define __VIRTUAL_MASK_SHIFT	48
 
 /*
- * Kernel image size is limited to 128 MB (see level2_kernel_pgt in
+ * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
  * arch/x86/kernel/head_64.S), and it is mapped here:
  */
-#define KERNEL_IMAGE_SIZE	(128*1024*1024)
+#define KERNEL_IMAGE_SIZE	(512*1024*1024)
 #define KERNEL_IMAGE_START	_AC(0xffffffff80000000, UL)
 
 #ifndef __ASSEMBLY__
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 0a0b77bc736a..01d2359e7a34 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -140,7 +140,7 @@ static inline void native_pgd_clear(pgd_t * pgd)
 #define VMALLOC_START    _AC(0xffffc20000000000, UL)
 #define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
 #define VMEMMAP_START	 _AC(0xffffe20000000000, UL)
-#define MODULES_VADDR    _AC(0xffffffff88000000, UL)
+#define MODULES_VADDR    _AC(0xffffffffa0000000, UL)
 #define MODULES_END      _AC(0xfffffffffff00000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 

commit b4e0409a36f4533770a12095bde2a574a08a319e
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Feb 21 13:45:16 2008 +0100

    x86: check vmlinux limits, 64-bit
    
    these build-time and link-time checks would have prevented the
    vmlinux size regression.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index ad2440832de0..38f32e798a99 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -82,6 +82,19 @@ void __init x86_64_start_kernel(char * real_mode_data)
 {
 	int i;
 
+	/*
+	 * Build-time sanity checks on the kernel image and module
+	 * area mappings. (these are purely build-time and produce no code)
+	 */
+	BUILD_BUG_ON(MODULES_VADDR < KERNEL_IMAGE_START);
+	BUILD_BUG_ON(MODULES_VADDR-KERNEL_IMAGE_START < KERNEL_IMAGE_SIZE);
+	BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
+	BUILD_BUG_ON((KERNEL_IMAGE_START & ~PMD_MASK) != 0);
+	BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
+	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+				(__START_KERNEL & PGDIR_MASK)));
+
 	/* clear bss before set_intr_gate with early_idt_handler */
 	clear_bss();
 
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fab132299735..4c369451007b 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -247,3 +247,9 @@ SECTIONS
 
   DWARF_DEBUG
 }
+
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
+	"kernel image bigger than KERNEL_IMAGE_SIZE")
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ec08d8389850..81fcbeec3892 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -976,9 +976,5 @@ void vmalloc_sync_all(void)
 		if (address == start)
 			start = address + PGDIR_SIZE;
 	}
-	/* Check that there is no need to do the same for the modules area. */
-	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
-	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
-				(__START_KERNEL & PGDIR_MASK)));
 #endif
 }

commit bead9a3abd15710b0bdfd418daef606722d86282
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Apr 16 01:40:00 2008 +0200

    mm: sparsemem memory_present() fix
    
    Fix memory corruption and crash on 32-bit x86 systems.
    
    If a !PAE x86 kernel is booted on a 32-bit system with more than 4GB of
    RAM, then we call memory_present() with a start/end that goes outside
    the scope of MAX_PHYSMEM_BITS.
    
    That causes this loop to happily walk over the limit of the sparse
    memory section map:
    
        for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
                    unsigned long section = pfn_to_section_nr(pfn);
                    struct mem_section *ms;
    
                    sparse_index_init(section, nid);
                    set_section_nid(section, nid);
    
                    ms = __nr_to_section(section);
                    if (!ms->section_mem_map)
                            ms->section_mem_map = sparse_encode_early_nid(nid) |
                                                            SECTION_MARKED_PRESENT;
    
    'ms' will be out of bounds and we'll corrupt a small amount of memory by
    encoding the node ID and writing SECTION_MARKED_PRESENT (==0x1) over it.
    
    The corruption might happen when encoding a non-zero node ID, or due to
    the SECTION_MARKED_PRESENT which is 0x1:
    
            mmzone.h:#define        SECTION_MARKED_PRESENT  (1UL<<0)
    
    The fix is to sanity check anything the architecture passes to
    sparsemem.
    
    This bug seems to be rather old (as old as sparsemem support itself),
    but the exact incarnation depended on random details like configs, which
    made this bug more prominent in v2.6.25-to-be.
    
    An additional enhancement might be to print a warning about ignored or
    trimmed memory ranges.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Tested-by: Christoph Lameter <clameter@sgi.com>
    Cc: Pekka Enberg <penberg@cs.helsinki.fi>
    Cc: Mel Gorman <mel@csn.ul.ie>
    Cc: Nick Piggin <npiggin@suse.de>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Cc: Rafael J. Wysocki <rjw@sisk.pl>
    Cc: Yinghai Lu <Yinghai.Lu@sun.com>
    Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/mm/sparse.c b/mm/sparse.c
index f6a43c09c322..98d6b39c3472 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -149,8 +149,18 @@ static inline int sparse_early_nid(struct mem_section *section)
 /* Record a memory area against a node. */
 void __init memory_present(int nid, unsigned long start, unsigned long end)
 {
+	unsigned long max_arch_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
 	unsigned long pfn;
 
+	/*
+	 * Sanity checks - do not allow an architecture to pass
+	 * in larger pfns than the maximum scope of sparsemem:
+	 */
+	if (start >= max_arch_pfn)
+		return;
+	if (end >= max_arch_pfn)
+		end = max_arch_pfn;
+
 	start &= PAGE_SECTION_MASK;
 	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
 		unsigned long section = pfn_to_section_nr(pfn);

commit e2df9e0905136eebeca66eb9a994ca48d0fa7990
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Apr 14 08:50:02 2008 +0200

    revert "sched: fix fair sleepers"
    
    revert "sched: fix fair sleepers" (e22ecef1d2658ba54ed7d3fdb5d60829fb434c23),
    because it is causing audio skipping, see:
    
       http://bugzilla.kernel.org/show_bug.cgi?id=10428
    
    the patch is correct and the real cause of the skipping is not
    understood (tracing makes it go away), but time has run out so we'll
    revert it and re-try in 2.6.26.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 86a93376282c..0080968d3e4a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -510,10 +510,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 
 	if (!initial) {
 		/* sleeps upto a single latency don't count. */
-		if (sched_feat(NEW_FAIR_SLEEPERS)) {
-			vruntime -= calc_delta_fair(sysctl_sched_latency,
-						    &cfs_rq->load);
-		}
+		if (sched_feat(NEW_FAIR_SLEEPERS))
+			vruntime -= sysctl_sched_latency;
 
 		/* ensure we never gain time by being placed backwards. */
 		vruntime = max_vruntime(se->vruntime, vruntime);

commit 5b13d863573e746739ccfc24ac1a9473cfee8df1
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Apr 7 20:58:08 2008 +0200

    revert "x86: tsc prevent time going backwards"
    
    revert:
    
    | commit 47001d603375f857a7fab0e9c095d964a1ea0039
    | Author: Thomas Gleixner <tglx@linutronix.de>
    | Date:   Tue Apr 1 19:45:18 2008 +0200
    |
    |     x86: tsc prevent time going backwards
    
    it has been identified to cause suspend regression - and the
    commit fixes a longstanding bug that existed before 2.6.25 was
    opened - so it can wait some more until the effects are better
    understood.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index d7498b34c8e9..f14cfd9d1f94 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -287,27 +287,14 @@ core_initcall(cpufreq_tsc);
 /* clock source code */
 
 static unsigned long current_tsc_khz = 0;
-static struct clocksource clocksource_tsc;
 
-/*
- * We compare the TSC to the cycle_last value in the clocksource
- * structure to avoid a nasty time-warp issue. This can be observed in
- * a very small window right after one CPU updated cycle_last under
- * xtime lock and the other CPU reads a TSC value which is smaller
- * than the cycle_last reference value due to a TSC which is slighty
- * behind. This delta is nowhere else observable, but in that case it
- * results in a forward time jump in the range of hours due to the
- * unsigned delta calculation of the time keeping core code, which is
- * necessary to support wrapping clocksources like pm timer.
- */
 static cycle_t read_tsc(void)
 {
 	cycle_t ret;
 
 	rdtscll(ret);
 
-	return ret >= clocksource_tsc.cycle_last ?
-		ret : clocksource_tsc.cycle_last;
+	return ret;
 }
 
 static struct clocksource clocksource_tsc = {
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index 01fc9f0c39e2..947554ddabb6 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -11,7 +11,6 @@
 #include <asm/hpet.h>
 #include <asm/timex.h>
 #include <asm/timer.h>
-#include <asm/vgtod.h>
 
 static int notsc __initdata = 0;
 
@@ -291,34 +290,18 @@ int __init notsc_setup(char *s)
 
 __setup("notsc", notsc_setup);
 
-static struct clocksource clocksource_tsc;
 
-/*
- * We compare the TSC to the cycle_last value in the clocksource
- * structure to avoid a nasty time-warp. This can be observed in a
- * very small window right after one CPU updated cycle_last under
- * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
- * is smaller than the cycle_last reference value due to a TSC which
- * is slighty behind. This delta is nowhere else observable, but in
- * that case it results in a forward time jump in the range of hours
- * due to the unsigned delta calculation of the time keeping core
- * code, which is necessary to support wrapping clocksources like pm
- * timer.
- */
+/* clock source code: */
 static cycle_t read_tsc(void)
 {
 	cycle_t ret = (cycle_t)get_cycles();
-
-	return ret >= clocksource_tsc.cycle_last ?
-		ret : clocksource_tsc.cycle_last;
+	return ret;
 }
 
 static cycle_t __vsyscall_fn vread_tsc(void)
 {
 	cycle_t ret = (cycle_t)vget_cycles();
-
-	return ret >= __vsyscall_gtod_data.clock.cycle_last ?
-		ret : __vsyscall_gtod_data.clock.cycle_last;
+	return ret;
 }
 
 static struct clocksource clocksource_tsc = {

commit 9c9b81f77330ddc003a2de2f35fa6a20410c1a62
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Mar 27 23:39:42 2008 +0100

    x86: print message if nmi_watchdog=2 cannot be enabled
    
    right now if there's no CPU support for nmi_watchdog=2 we'll just
    refuse it silently.
    
    print a useful warning.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 19a359472ae1..b943e10ad814 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -667,8 +667,10 @@ int lapic_watchdog_init(unsigned nmi_hz)
 {
 	if (!wd_ops) {
 		probe_nmi_watchdog();
-		if (!wd_ops)
+		if (!wd_ops) {
+			printk(KERN_INFO "NMI watchdog: CPU not supported\n");
 			return -1;
+		}
 
 		if (!wd_ops->reserve()) {
 			printk(KERN_ERR

commit 4f14bdef41e599e218d71e3d0abf339d65e9b480
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Mar 27 23:37:58 2008 +0100

    x86: fix nmi_watchdog=2 on Pentium-D CPUs
    
    implement nmi_watchdog=2 on this class of CPUs:
    
      cpu family      : 15
      model           : 6
      model name      : Intel(R) Pentium(R) D CPU 3.00GHz
    
    the watchdog's ->setup() method is safe anyway, so if the CPU
    cannot support it we'll bail out safely.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 9b838324b818..19a359472ae1 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -652,9 +652,6 @@ static void probe_nmi_watchdog(void)
 			wd_ops = &p6_wd_ops;
 			break;
 		case 15:
-			if (boot_cpu_data.x86_model > 0x4)
-				return;
-
 			wd_ops = &p4_wd_ops;
 			break;
 		default:

commit bd6ca6375b9f18f40e814f391d9d1abaa916bc72
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Mar 28 14:41:30 2008 -0700

    forcedeth: fix locking bug with netconsole
    
    While using netconsole on forcedeth, lockdep noticed the following locking
    bug:
    
    =================================
    [ INFO: inconsistent lock state ]
    2.6.24-rc6 #6
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    ---------------------------------
    inconsistent {softirq-on-W} -> {in-softirq-W} usage.
    udevd/719 [HC0[0]:SC1[1]:HE1:SE0] takes:
     (_xmit_ETHER){-+..}, at: [<c043062e>] dev_watchdog+0x1c/0xb9
    {softirq-on-W} state was registered at:
      [<c0147f67>] mark_held_locks+0x4e/0x66
      [<c014810e>] trace_hardirqs_on+0xfe/0x136
      [<c048ae63>] _spin_unlock_irq+0x22/0x42
      [<c02ec617>] nv_start_xmit_optimized+0x347/0x37a
      [<c042c80d>] netpoll_send_skb+0xa4/0x147
      [<c042d4a6>] netpoll_send_udp+0x238/0x242
      [<c02f44f6>] write_msg+0x6d/0x9b
      [<c012c129>] __call_console_drivers+0x4e/0x5a
      [<c012c18c>] _call_console_drivers+0x57/0x5b
      [<c012c2dd>] release_console_sem+0x11c/0x1b9
      [<c012caeb>] register_console+0x1eb/0x1f3
      [<c06ae673>] init_netconsole+0x119/0x15f
      [<c069149b>] kernel_init+0x147/0x294
      [<c01058cb>] kernel_thread_helper+0x7/0x10
      [<ffffffff>] 0xffffffff
    irq event stamp: 950
    hardirqs last  enabled at (950): [<c048ae63>] _spin_unlock_irq+0x22/0x42
    hardirqs last disabled at (949): [<c048aaf7>] _spin_lock_irq+0xc/0x38
    softirqs last  enabled at (0): [<c012a29c>] copy_process+0x375/0x126d
    softirqs last disabled at (947): [<c0106d43>] do_softirq+0x61/0xc6
    
    other info that might help us debug this:
    no locks held by udevd/719.
    
    stack backtrace:
    Pid: 719, comm: udevd Not tainted 2.6.24-rc6 #6
     [<c0105c46>] show_trace_log_lvl+0x12/0x25
     [<c01063ec>] show_trace+0xd/0x10
     [<c010670c>] dump_stack+0x57/0x5f
     [<c0147505>] print_usage_bug+0x10a/0x117
     [<c0147c38>] mark_lock+0x121/0x402
     [<c01488b6>] __lock_acquire+0x3d1/0xb64
     [<c0149405>] lock_acquire+0x4e/0x6a
     [<c048a99b>] _spin_lock+0x23/0x32
     [<c043062e>] dev_watchdog+0x1c/0xb9
     [<c0133e4a>] run_timer_softirq+0x133/0x193
     [<c0130907>] __do_softirq+0x78/0xed
     [<c0106d43>] do_softirq+0x61/0xc6
     =======================
    eth1: link down
    
    The fix is to disable/restore irqs instead of disable/enable.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Cc: Ayaz Abdulla <aabdulla@nvidia.com>
    Cc: Jeff Garzik <jeff@garzik.org>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Jeff Garzik <jeff@garzik.org>

diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 6f7e3fde9e7c..980c2c229a71 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -1854,6 +1854,7 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ring_desc* start_tx;
 	struct ring_desc* prev_tx;
 	struct nv_skb_map* prev_tx_ctx;
+	unsigned long flags;
 
 	/* add fragments to entries count */
 	for (i = 0; i < fragments; i++) {
@@ -1863,10 +1864,10 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	empty_slots = nv_get_empty_tx_slots(np);
 	if (unlikely(empty_slots <= entries)) {
-		spin_lock_irq(&np->lock);
+		spin_lock_irqsave(&np->lock, flags);
 		netif_stop_queue(dev);
 		np->tx_stop = 1;
-		spin_unlock_irq(&np->lock);
+		spin_unlock_irqrestore(&np->lock, flags);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -1929,13 +1930,13 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		tx_flags_extra = skb->ip_summed == CHECKSUM_PARTIAL ?
 			 NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0;
 
-	spin_lock_irq(&np->lock);
+	spin_lock_irqsave(&np->lock, flags);
 
 	/* set tx flags */
 	start_tx->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
 	np->put_tx.orig = put_tx;
 
-	spin_unlock_irq(&np->lock);
+	spin_unlock_irqrestore(&np->lock, flags);
 
 	dprintk(KERN_DEBUG "%s: nv_start_xmit: entries %d queued for transmission. tx_flags_extra: %x\n",
 		dev->name, entries, tx_flags_extra);
@@ -1971,6 +1972,7 @@ static int nv_start_xmit_optimized(struct sk_buff *skb, struct net_device *dev)
 	struct ring_desc_ex* prev_tx;
 	struct nv_skb_map* prev_tx_ctx;
 	struct nv_skb_map* start_tx_ctx;
+	unsigned long flags;
 
 	/* add fragments to entries count */
 	for (i = 0; i < fragments; i++) {
@@ -1980,10 +1982,10 @@ static int nv_start_xmit_optimized(struct sk_buff *skb, struct net_device *dev)
 
 	empty_slots = nv_get_empty_tx_slots(np);
 	if (unlikely(empty_slots <= entries)) {
-		spin_lock_irq(&np->lock);
+		spin_lock_irqsave(&np->lock, flags);
 		netif_stop_queue(dev);
 		np->tx_stop = 1;
-		spin_unlock_irq(&np->lock);
+		spin_unlock_irqrestore(&np->lock, flags);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -2059,7 +2061,7 @@ static int nv_start_xmit_optimized(struct sk_buff *skb, struct net_device *dev)
 			start_tx->txvlan = 0;
 	}
 
-	spin_lock_irq(&np->lock);
+	spin_lock_irqsave(&np->lock, flags);
 
 	if (np->tx_limit) {
 		/* Limit the number of outstanding tx. Setup all fragments, but
@@ -2085,7 +2087,7 @@ static int nv_start_xmit_optimized(struct sk_buff *skb, struct net_device *dev)
 	start_tx->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
 	np->put_tx.ex = put_tx;
 
-	spin_unlock_irq(&np->lock);
+	spin_unlock_irqrestore(&np->lock, flags);
 
 	dprintk(KERN_DEBUG "%s: nv_start_xmit_optimized: entries %d queued for transmission. tx_flags_extra: %x\n",
 		dev->name, entries, tx_flags_extra);

commit 48d3d8263c491822d50e64547bae5f6b4a54ec59
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Mar 28 14:28:03 2008 +0100

    revert "ACPI: drivers/acpi: elide a non-zero test on a result that is never 0"
    
    Revert commit 1192aeb957402b45f311895f124e4ca41206843c ("ACPI:
    drivers/acpi: elide a non-zero test on a result that is never 0")
    because it turns out that thermal_cooling_device_register() does
    actually return NULL if CONFIG_THERMAL is turned off (then the routine
    turns into a dummy inline routine in the header files that returns NULL
    unconditionally).
    
    This was found with randconfig testing, causing a crash during bootup:
    
      initcall 0x78878534 ran for 13 msecs: acpi_button_init+0x0/0x51()
      Calling initcall 0x78878585: acpi_fan_init+0x0/0x2c()
      BUG: unable to handle kernel NULL pointer dereference at 00000000
      IP: [<782b8ad0>] acpi_fan_add+0x7d/0xfd
      *pde = 00000000
      Oops: 0000 [#1]
      Modules linked in:
    
      Pid: 1, comm: swapper Not tainted (2.6.25-rc7-sched-devel.git-x86-latest.git #14)
      EIP: 0060:[<782b8ad0>] EFLAGS: 00010246 CPU: 0
      EIP is at acpi_fan_add+0x7d/0xfd
      EAX: b787c718 EBX: b787c400 ECX: b782ceb4 EDX: 00000007
      ESI: 00000000 EDI: b787c6f4 EBP: b782cee0 ESP: b782cecc
       DS: 007b ES: 007b FS: 0000 GS: 0000 SS: 0068
      Process swapper (pid: 1, ti=b782c000 task=b7846000 task.ti=b782c000)
      Stack: b787c459 00000000 b787c400 78790888 b787c60c b782cef8 782b6fb8 ffffffda
             b787c60c 00000000 78790958 b782cf0c 783005d7 b787c60c 78790958 78790584
             b782cf1c 783007f6 b782cf28 00000000 b782cf40 782ffc4a 78790958 b794d558
      Call Trace:
       [<782b6fb8>] ? acpi_device_probe+0x3e/0xdb
       [<783005d7>] ? driver_probe_device+0x82/0xfc
       [<783007f6>] ? __driver_attach+0x3a/0x70
       [<782ffc4a>] ? bus_for_each_dev+0x3e/0x60
       [<7830048c>] ? driver_attach+0x14/0x16
       [<783007bc>] ? __driver_attach+0x0/0x70
       [<7830006a>] ? bus_add_driver+0x9d/0x1b0
       [<783008c3>] ? driver_register+0x47/0xa3
       [<7813db00>] ? timespec_to_ktime+0x9/0xc
       [<782b7331>] ? acpi_bus_register_driver+0x3a/0x3c
       [<78878592>] ? acpi_fan_init+0xd/0x2c
       [<78863656>] ? kernel_init+0xac/0x1f9
       [<788635aa>] ? kernel_init+0x0/0x1f9
       [<78114563>] ? kernel_thread_helper+0x7/0x10
       =======================
      Code: 6e 78 e8 57 44 e7 ff 58 e9 93 00 00 00 8b 55 f0 8d bb f4 02 00 00 80 4b 2d 10 8b 03 e8 87 cb ff ff 8d 83 18 03 00 00 80 63 2d ef <ff> 35 00 00 00 00 50 68 e8 9c 6e 78 e8 22 44 e7 ff b9 b6 9c 6e
      EIP: [<782b8ad0>] acpi_fan_add+0x7d/0xfd SS:ESP 0068:b782cecc
      ---[ end trace 778e504de7e3b1e3 ]---
      Kernel panic - not syncing: Attempted to kill init!
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Acked-by: Julia Lawall <julia@diku.dk>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 4d535c50d821..c8e3cba423ef 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -260,22 +260,24 @@ static int acpi_fan_add(struct acpi_device *device)
 		result = PTR_ERR(cdev);
 		goto end;
 	}
-	printk(KERN_INFO PREFIX
-		"%s is registered as cooling_device%d\n",
-		device->dev.bus_id, cdev->id);
-
-	acpi_driver_data(device) = cdev;
-	result = sysfs_create_link(&device->dev.kobj,
-				   &cdev->device.kobj,
-				   "thermal_cooling");
-	if (result)
-		return result;
-
-	result = sysfs_create_link(&cdev->device.kobj,
-				   &device->dev.kobj,
-				   "device");
-	if (result)
-		return result;
+	if (cdev) {
+		printk(KERN_INFO PREFIX
+			"%s is registered as cooling_device%d\n",
+			device->dev.bus_id, cdev->id);
+
+		acpi_driver_data(device) = cdev;
+		result = sysfs_create_link(&device->dev.kobj,
+					   &cdev->device.kobj,
+					   "thermal_cooling");
+		if (result)
+			return result;
+
+		result = sysfs_create_link(&cdev->device.kobj,
+					   &device->dev.kobj,
+					   "device");
+		if (result)
+			return result;
+	}
 
 	result = acpi_fan_add_fs(device);
 	if (result)
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 3a136f6c66a3..36a68fa114e3 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -674,20 +674,22 @@ static int __cpuinit acpi_processor_start(struct acpi_device *device)
 		result = PTR_ERR(pr->cdev);
 		goto end;
 	}
-	printk(KERN_INFO PREFIX
-		"%s is registered as cooling_device%d\n",
-		device->dev.bus_id, pr->cdev->id);
-
-	result = sysfs_create_link(&device->dev.kobj,
-				   &pr->cdev->device.kobj,
-				   "thermal_cooling");
-	if (result)
-		return result;
-	result = sysfs_create_link(&pr->cdev->device.kobj,
-				   &device->dev.kobj,
-				   "device");
-	if (result)
-		return result;
+	if (pr->cdev) {
+		printk(KERN_INFO PREFIX
+			"%s is registered as cooling_device%d\n",
+			device->dev.bus_id, pr->cdev->id);
+
+		result = sysfs_create_link(&device->dev.kobj,
+					   &pr->cdev->device.kobj,
+					   "thermal_cooling");
+		if (result)
+			return result;
+		result = sysfs_create_link(&pr->cdev->device.kobj,
+					   &device->dev.kobj,
+					   "device");
+		if (result)
+			return result;
+	}
 
 	if (pr->flags.throttling) {
 		printk(KERN_INFO PREFIX "%s [%s] (supports",
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index fe09b57de617..12fb44f16766 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -734,19 +734,21 @@ static void acpi_video_device_find_cap(struct acpi_video_device *device)
 		if (IS_ERR(device->cdev))
 			return;
 
-		printk(KERN_INFO PREFIX
-			"%s is registered as cooling_device%d\n",
-			device->dev->dev.bus_id, device->cdev->id);
-		result = sysfs_create_link(&device->dev->dev.kobj,
-				  &device->cdev->device.kobj,
-				  "thermal_cooling");
-		if (result)
-			printk(KERN_ERR PREFIX "Create sysfs link\n");
-		result = sysfs_create_link(&device->cdev->device.kobj,
-				  &device->dev->dev.kobj,
-				  "device");
-		if (result)
-			printk(KERN_ERR PREFIX "Create sysfs link\n");
+		if (device->cdev) {
+			printk(KERN_INFO PREFIX
+				"%s is registered as cooling_device%d\n",
+				device->dev->dev.bus_id, device->cdev->id);
+			result = sysfs_create_link(&device->dev->dev.kobj,
+					  &device->cdev->device.kobj,
+					  "thermal_cooling");
+			if (result)
+				printk(KERN_ERR PREFIX "Create sysfs link\n");
+			result = sysfs_create_link(&device->cdev->device.kobj,
+					  &device->dev->dev.kobj,
+					  "device");
+                        if (result)
+				printk(KERN_ERR PREFIX "Create sysfs link\n");
+		}
 	}
 	if (device->cap._DCS && device->cap._DSS){
 		static int count = 0;