Patches contributed by Eötvös Lorand University


commit cae2ed9aa573415c6e5de9a09b7ff0d74af793bc
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jul 3 00:24:48 2006 -0700

    [PATCH] lockdep: locking API self tests
    
    Introduce DEBUG_LOCKING_API_SELFTESTS, which uses the generic lock debugging
    code's silent-failure feature to run a matrix of testcases.  There are 210
    testcases currently:
    
      +-----------------------
      | Locking API testsuite:
      +------------------------------+------+------+------+------+------+------+
                                     | spin |wlock |rlock |mutex | wsem | rsem |
      -------------------------------+------+------+------+------+------+------+
                         A-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                     A-B-B-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                 A-B-B-C-C-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                 A-B-C-A-B-C deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
             A-B-B-C-C-D-D-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
             A-B-C-D-B-D-D-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
             A-B-C-D-B-C-D-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                        double unlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
                     bad unlock order:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |
      --------------------------------------+------+------+------+------+------+
                  recursive read-lock:             |  ok  |             |  ok  |
      --------------------------------------+------+------+------+------+------+
                    non-nested unlock:  ok  |  ok  |  ok  |  ok  |
      --------------------------------------+------+------+------+
         hard-irqs-on + irq-safe-A/12:  ok  |  ok  |  ok  |
         soft-irqs-on + irq-safe-A/12:  ok  |  ok  |  ok  |
         hard-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
         soft-irqs-on + irq-safe-A/21:  ok  |  ok  |  ok  |
           sirq-safe-A => hirqs-on/12:  ok  |  ok  |  ok  |
           sirq-safe-A => hirqs-on/21:  ok  |  ok  |  ok  |
             hard-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
             soft-safe-A + irqs-on/12:  ok  |  ok  |  ok  |
             hard-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
             soft-safe-A + irqs-on/21:  ok  |  ok  |  ok  |
        hard-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
        soft-safe-A + unsafe-B #1/123:  ok  |  ok  |  ok  |
        hard-safe-A + unsafe-B #1/132:  ok  |  ok  |  ok  |
        soft-safe-A + unsafe-B #1/132:  ok  |  ok  |  ok  |
        hard-safe-A + unsafe-B #1/213:  ok  |  ok  |  ok  |
        soft-safe-A + unsafe-B #1/213:  ok  |  ok  |  ok  |
        hard-safe-A + unsafe-B #1/231:  ok  |  ok  |  ok  |
        soft-safe-A + unsafe-B #1/231:  ok  |  ok  |  ok  |
        hard-safe-A + unsafe-B #1/312:  ok  |  ok  |  ok  |
        soft-safe-A + unsafe-B #1/312:  ok  |  ok  |  ok  |
        hard-safe-A + unsafe-B #1/321:  ok  |  ok  |  ok  |
        soft-safe-A + unsafe-B #1/321:  ok  |  ok  |  ok  |
        hard-safe-A + unsafe-B #2/123:  ok  |  ok  |  ok  |
        soft-safe-A + unsafe-B #2/123:  ok  |  ok  |  ok  |
        hard-safe-A + unsafe-B #2/132:  ok  |  ok  |  ok  |
        soft-safe-A + unsafe-B #2/132:  ok  |  ok  |  ok  |
        hard-safe-A + unsafe-B #2/213:  ok  |  ok  |  ok  |
        soft-safe-A + unsafe-B #2/213:  ok  |  ok  |  ok  |
        hard-safe-A + unsafe-B #2/231:  ok  |  ok  |  ok  |
        soft-safe-A + unsafe-B #2/231:  ok  |  ok  |  ok  |
        hard-safe-A + unsafe-B #2/312:  ok  |  ok  |  ok  |
        soft-safe-A + unsafe-B #2/312:  ok  |  ok  |  ok  |
        hard-safe-A + unsafe-B #2/321:  ok  |  ok  |  ok  |
        soft-safe-A + unsafe-B #2/321:  ok  |  ok  |  ok  |
          hard-irq lock-inversion/123:  ok  |  ok  |  ok  |
          soft-irq lock-inversion/123:  ok  |  ok  |  ok  |
          hard-irq lock-inversion/132:  ok  |  ok  |  ok  |
          soft-irq lock-inversion/132:  ok  |  ok  |  ok  |
          hard-irq lock-inversion/213:  ok  |  ok  |  ok  |
          soft-irq lock-inversion/213:  ok  |  ok  |  ok  |
          hard-irq lock-inversion/231:  ok  |  ok  |  ok  |
          soft-irq lock-inversion/231:  ok  |  ok  |  ok  |
          hard-irq lock-inversion/312:  ok  |  ok  |  ok  |
          soft-irq lock-inversion/312:  ok  |  ok  |  ok  |
          hard-irq lock-inversion/321:  ok  |  ok  |  ok  |
          soft-irq lock-inversion/321:  ok  |  ok  |  ok  |
          hard-irq read-recursion/123:  ok  |
          soft-irq read-recursion/123:  ok  |
          hard-irq read-recursion/132:  ok  |
          soft-irq read-recursion/132:  ok  |
          hard-irq read-recursion/213:  ok  |
          soft-irq read-recursion/213:  ok  |
          hard-irq read-recursion/231:  ok  |
          soft-irq read-recursion/231:  ok  |
          hard-irq read-recursion/312:  ok  |
          soft-irq read-recursion/312:  ok  |
          hard-irq read-recursion/321:  ok  |
          soft-irq read-recursion/321:  ok  |
      --------------------------------+-----+----------------
      Good, all 210 testcases passed! |
      --------------------------------+
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 86e9282d1c20..149f62ba14a5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -435,6 +435,15 @@ running once the system is up.
 
 	debug		[KNL] Enable kernel debugging (events log level).
 
+	debug_locks_verbose=
+			[KNL] verbose self-tests
+			Format=<0|1>
+			Print debugging info while doing the locking API
+			self-tests.
+			We default to 0 (no extra messages), setting it to
+			1 will print _a lot_ more information - normally
+			only useful to kernel developers.
+
 	decnet=		[HW,NET]
 			Format: <area>[,<node>]
 			See also Documentation/networking/decnet.txt.
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 91e338a3d069..16021b09c184 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -149,6 +149,17 @@ config DEBUG_SPINLOCK_SLEEP
 	  If you say Y here, various routines which may sleep will become very
 	  noisy if they are called with a spinlock held.
 
+config DEBUG_LOCKING_API_SELFTESTS
+	bool "Locking API boot-time self-tests"
+	depends on DEBUG_KERNEL
+	help
+	  Say Y here if you want the kernel to run a short self-test during
+	  bootup. The self-test checks whether common types of locking bugs
+	  are detected by debugging mechanisms or not. (if you disable
+	  lock debugging then those bugs wont be detected of course.)
+	  The following locking APIs are covered: spinlocks, rwlocks,
+	  mutexes and rwsems.
+
 config STACKTRACE
 	bool
 	depends on STACKTRACE_SUPPORT
diff --git a/lib/Makefile b/lib/Makefile
index 4f5d01922f82..be9719ae82d0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,6 +18,7 @@ CFLAGS_kobject.o += -DDEBUG
 CFLAGS_kobject_uevent.o += -DDEBUG
 endif
 
+obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
 obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
diff --git a/lib/locking-selftest-hardirq.h b/lib/locking-selftest-hardirq.h
new file mode 100644
index 000000000000..10d4a150b259
--- /dev/null
+++ b/lib/locking-selftest-hardirq.h
@@ -0,0 +1,9 @@
+#undef IRQ_DISABLE
+#undef IRQ_ENABLE
+#undef IRQ_ENTER
+#undef IRQ_EXIT
+
+#define IRQ_ENABLE		HARDIRQ_ENABLE
+#define IRQ_DISABLE		HARDIRQ_DISABLE
+#define IRQ_ENTER		HARDIRQ_ENTER
+#define IRQ_EXIT		HARDIRQ_EXIT
diff --git a/lib/locking-selftest-mutex.h b/lib/locking-selftest-mutex.h
new file mode 100644
index 000000000000..68601b6f584b
--- /dev/null
+++ b/lib/locking-selftest-mutex.h
@@ -0,0 +1,11 @@
+#undef LOCK
+#define LOCK		ML
+
+#undef UNLOCK
+#define UNLOCK		MU
+
+#undef RLOCK
+#undef WLOCK
+
+#undef INIT
+#define INIT		MI
diff --git a/lib/locking-selftest-rlock-hardirq.h b/lib/locking-selftest-rlock-hardirq.h
new file mode 100644
index 000000000000..9f517ebcb786
--- /dev/null
+++ b/lib/locking-selftest-rlock-hardirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-rlock.h"
+#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-rlock-softirq.h b/lib/locking-selftest-rlock-softirq.h
new file mode 100644
index 000000000000..981455db7ff0
--- /dev/null
+++ b/lib/locking-selftest-rlock-softirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-rlock.h"
+#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-rlock.h b/lib/locking-selftest-rlock.h
new file mode 100644
index 000000000000..6789044f4d0e
--- /dev/null
+++ b/lib/locking-selftest-rlock.h
@@ -0,0 +1,14 @@
+#undef LOCK
+#define LOCK		RL
+
+#undef UNLOCK
+#define UNLOCK		RU
+
+#undef RLOCK
+#define RLOCK		RL
+
+#undef WLOCK
+#define WLOCK		WL
+
+#undef INIT
+#define INIT		RWI
diff --git a/lib/locking-selftest-rsem.h b/lib/locking-selftest-rsem.h
new file mode 100644
index 000000000000..62da886680c7
--- /dev/null
+++ b/lib/locking-selftest-rsem.h
@@ -0,0 +1,14 @@
+#undef LOCK
+#define LOCK		RSL
+
+#undef UNLOCK
+#define UNLOCK		RSU
+
+#undef RLOCK
+#define RLOCK		RSL
+
+#undef WLOCK
+#define WLOCK		WSL
+
+#undef INIT
+#define INIT		RWSI
diff --git a/lib/locking-selftest-softirq.h b/lib/locking-selftest-softirq.h
new file mode 100644
index 000000000000..a83de2a04ace
--- /dev/null
+++ b/lib/locking-selftest-softirq.h
@@ -0,0 +1,9 @@
+#undef IRQ_DISABLE
+#undef IRQ_ENABLE
+#undef IRQ_ENTER
+#undef IRQ_EXIT
+
+#define IRQ_DISABLE		SOFTIRQ_DISABLE
+#define IRQ_ENABLE		SOFTIRQ_ENABLE
+#define IRQ_ENTER		SOFTIRQ_ENTER
+#define IRQ_EXIT		SOFTIRQ_EXIT
diff --git a/lib/locking-selftest-spin-hardirq.h b/lib/locking-selftest-spin-hardirq.h
new file mode 100644
index 000000000000..693198dce30a
--- /dev/null
+++ b/lib/locking-selftest-spin-hardirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-spin.h"
+#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-spin-softirq.h b/lib/locking-selftest-spin-softirq.h
new file mode 100644
index 000000000000..c472e2a87ffc
--- /dev/null
+++ b/lib/locking-selftest-spin-softirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-spin.h"
+#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-spin.h b/lib/locking-selftest-spin.h
new file mode 100644
index 000000000000..ccd1b4b09757
--- /dev/null
+++ b/lib/locking-selftest-spin.h
@@ -0,0 +1,11 @@
+#undef LOCK
+#define LOCK		L
+
+#undef UNLOCK
+#define UNLOCK		U
+
+#undef RLOCK
+#undef WLOCK
+
+#undef INIT
+#define INIT		SI
diff --git a/lib/locking-selftest-wlock-hardirq.h b/lib/locking-selftest-wlock-hardirq.h
new file mode 100644
index 000000000000..2dd2e5122caa
--- /dev/null
+++ b/lib/locking-selftest-wlock-hardirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-wlock.h"
+#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-wlock-softirq.h b/lib/locking-selftest-wlock-softirq.h
new file mode 100644
index 000000000000..cb80d1cb944e
--- /dev/null
+++ b/lib/locking-selftest-wlock-softirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-wlock.h"
+#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-wlock.h b/lib/locking-selftest-wlock.h
new file mode 100644
index 000000000000..0815322d99ed
--- /dev/null
+++ b/lib/locking-selftest-wlock.h
@@ -0,0 +1,14 @@
+#undef LOCK
+#define LOCK		WL
+
+#undef UNLOCK
+#define UNLOCK		WU
+
+#undef RLOCK
+#define RLOCK		RL
+
+#undef WLOCK
+#define WLOCK		WL
+
+#undef INIT
+#define INIT		RWI
diff --git a/lib/locking-selftest-wsem.h b/lib/locking-selftest-wsem.h
new file mode 100644
index 000000000000..b88c5f2dc5f0
--- /dev/null
+++ b/lib/locking-selftest-wsem.h
@@ -0,0 +1,14 @@
+#undef LOCK
+#define LOCK		WSL
+
+#undef UNLOCK
+#define UNLOCK		WSU
+
+#undef RLOCK
+#define RLOCK		RSL
+
+#undef WLOCK
+#define WLOCK		WSL
+
+#undef INIT
+#define INIT		RWSI
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
new file mode 100644
index 000000000000..5cd05f20bdec
--- /dev/null
+++ b/lib/locking-selftest.c
@@ -0,0 +1,1218 @@
+/*
+ * lib/locking-selftest.c
+ *
+ * Testsuite for various locking APIs: spinlocks, rwlocks,
+ * mutexes and rw-semaphores.
+ *
+ * It is checking both false positives and false negatives.
+ *
+ * Started by Ingo Molnar:
+ *
+ *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+#include <linux/debug_locks.h>
+#include <linux/irqflags.h>
+
+/*
+ * Change this to 1 if you want to see the failure printouts:
+ */
+static unsigned int debug_locks_verbose;
+
+static int __init setup_debug_locks_verbose(char *str)
+{
+	get_option(&str, &debug_locks_verbose);
+
+	return 1;
+}
+
+__setup("debug_locks_verbose=", setup_debug_locks_verbose);
+
+#define FAILURE		0
+#define SUCCESS		1
+
+#define LOCKTYPE_SPIN	0x1
+#define LOCKTYPE_RWLOCK	0x2
+#define LOCKTYPE_MUTEX	0x4
+#define LOCKTYPE_RWSEM	0x8
+
+/*
+ * Normal standalone locks, for the circular and irq-context
+ * dependency tests:
+ */
+static DEFINE_SPINLOCK(lock_A);
+static DEFINE_SPINLOCK(lock_B);
+static DEFINE_SPINLOCK(lock_C);
+static DEFINE_SPINLOCK(lock_D);
+
+static DEFINE_RWLOCK(rwlock_A);
+static DEFINE_RWLOCK(rwlock_B);
+static DEFINE_RWLOCK(rwlock_C);
+static DEFINE_RWLOCK(rwlock_D);
+
+static DEFINE_MUTEX(mutex_A);
+static DEFINE_MUTEX(mutex_B);
+static DEFINE_MUTEX(mutex_C);
+static DEFINE_MUTEX(mutex_D);
+
+static DECLARE_RWSEM(rwsem_A);
+static DECLARE_RWSEM(rwsem_B);
+static DECLARE_RWSEM(rwsem_C);
+static DECLARE_RWSEM(rwsem_D);
+
+/*
+ * Locks that we initialize dynamically as well so that
+ * e.g. X1 and X2 becomes two instances of the same class,
+ * but X* and Y* are different classes. We do this so that
+ * we do not trigger a real lockup:
+ */
+static DEFINE_SPINLOCK(lock_X1);
+static DEFINE_SPINLOCK(lock_X2);
+static DEFINE_SPINLOCK(lock_Y1);
+static DEFINE_SPINLOCK(lock_Y2);
+static DEFINE_SPINLOCK(lock_Z1);
+static DEFINE_SPINLOCK(lock_Z2);
+
+static DEFINE_RWLOCK(rwlock_X1);
+static DEFINE_RWLOCK(rwlock_X2);
+static DEFINE_RWLOCK(rwlock_Y1);
+static DEFINE_RWLOCK(rwlock_Y2);
+static DEFINE_RWLOCK(rwlock_Z1);
+static DEFINE_RWLOCK(rwlock_Z2);
+
+static DEFINE_MUTEX(mutex_X1);
+static DEFINE_MUTEX(mutex_X2);
+static DEFINE_MUTEX(mutex_Y1);
+static DEFINE_MUTEX(mutex_Y2);
+static DEFINE_MUTEX(mutex_Z1);
+static DEFINE_MUTEX(mutex_Z2);
+
+static DECLARE_RWSEM(rwsem_X1);
+static DECLARE_RWSEM(rwsem_X2);
+static DECLARE_RWSEM(rwsem_Y1);
+static DECLARE_RWSEM(rwsem_Y2);
+static DECLARE_RWSEM(rwsem_Z1);
+static DECLARE_RWSEM(rwsem_Z2);
+
+/*
+ * non-inlined runtime initializers, to let separate locks share
+ * the same lock-class:
+ */
+#define INIT_CLASS_FUNC(class) 				\
+static noinline void					\
+init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \
+		 struct rw_semaphore *rwsem)		\
+{							\
+	spin_lock_init(lock);				\
+	rwlock_init(rwlock);				\
+	mutex_init(mutex);				\
+	init_rwsem(rwsem);				\
+}
+
+INIT_CLASS_FUNC(X)
+INIT_CLASS_FUNC(Y)
+INIT_CLASS_FUNC(Z)
+
+static void init_shared_classes(void)
+{
+	init_class_X(&lock_X1, &rwlock_X1, &mutex_X1, &rwsem_X1);
+	init_class_X(&lock_X2, &rwlock_X2, &mutex_X2, &rwsem_X2);
+
+	init_class_Y(&lock_Y1, &rwlock_Y1, &mutex_Y1, &rwsem_Y1);
+	init_class_Y(&lock_Y2, &rwlock_Y2, &mutex_Y2, &rwsem_Y2);
+
+	init_class_Z(&lock_Z1, &rwlock_Z1, &mutex_Z1, &rwsem_Z1);
+	init_class_Z(&lock_Z2, &rwlock_Z2, &mutex_Z2, &rwsem_Z2);
+}
+
+/*
+ * For spinlocks and rwlocks we also do hardirq-safe / softirq-safe tests.
+ * The following functions use a lock from a simulated hardirq/softirq
+ * context, causing the locks to be marked as hardirq-safe/softirq-safe:
+ */
+
+#define HARDIRQ_DISABLE		local_irq_disable
+#define HARDIRQ_ENABLE		local_irq_enable
+
+#define HARDIRQ_ENTER()				\
+	local_irq_disable();			\
+	irq_enter();				\
+	WARN_ON(!in_irq());
+
+#define HARDIRQ_EXIT()				\
+	__irq_exit();				\
+	local_irq_enable();
+
+#define SOFTIRQ_DISABLE		local_bh_disable
+#define SOFTIRQ_ENABLE		local_bh_enable
+
+#define SOFTIRQ_ENTER()				\
+		local_bh_disable();		\
+		local_irq_disable();		\
+		trace_softirq_enter();		\
+		WARN_ON(!in_softirq());
+
+#define SOFTIRQ_EXIT()				\
+		trace_softirq_exit();		\
+		local_irq_enable();		\
+		local_bh_enable();
+
+/*
+ * Shortcuts for lock/unlock API variants, to keep
+ * the testcases compact:
+ */
+#define L(x)			spin_lock(&lock_##x)
+#define U(x)			spin_unlock(&lock_##x)
+#define LU(x)			L(x); U(x)
+#define SI(x)			spin_lock_init(&lock_##x)
+
+#define WL(x)			write_lock(&rwlock_##x)
+#define WU(x)			write_unlock(&rwlock_##x)
+#define WLU(x)			WL(x); WU(x)
+
+#define RL(x)			read_lock(&rwlock_##x)
+#define RU(x)			read_unlock(&rwlock_##x)
+#define RLU(x)			RL(x); RU(x)
+#define RWI(x)			rwlock_init(&rwlock_##x)
+
+#define ML(x)			mutex_lock(&mutex_##x)
+#define MU(x)			mutex_unlock(&mutex_##x)
+#define MI(x)			mutex_init(&mutex_##x)
+
+#define WSL(x)			down_write(&rwsem_##x)
+#define WSU(x)			up_write(&rwsem_##x)
+
+#define RSL(x)			down_read(&rwsem_##x)
+#define RSU(x)			up_read(&rwsem_##x)
+#define RWSI(x)			init_rwsem(&rwsem_##x)
+
+#define LOCK_UNLOCK_2(x,y)	LOCK(x); LOCK(y); UNLOCK(y); UNLOCK(x)
+
+/*
+ * Generate different permutations of the same testcase, using
+ * the same basic lock-dependency/state events:
+ */
+
+#define GENERATE_TESTCASE(name)			\
+						\
+static void name(void) { E(); }
+
+#define GENERATE_PERMUTATIONS_2_EVENTS(name)	\
+						\
+static void name##_12(void) { E1(); E2(); }	\
+static void name##_21(void) { E2(); E1(); }
+
+#define GENERATE_PERMUTATIONS_3_EVENTS(name)		\
+							\
+static void name##_123(void) { E1(); E2(); E3(); }	\
+static void name##_132(void) { E1(); E3(); E2(); }	\
+static void name##_213(void) { E2(); E1(); E3(); }	\
+static void name##_231(void) { E2(); E3(); E1(); }	\
+static void name##_312(void) { E3(); E1(); E2(); }	\
+static void name##_321(void) { E3(); E2(); E1(); }
+
+/*
+ * AA deadlock:
+ */
+
+#define E()					\
+						\
+	LOCK(X1);				\
+	LOCK(X2); /* this one should fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(AA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(AA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(AA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(AA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(AA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(AA_rsem)
+
+#undef E
+
+/*
+ * Special-case for read-locking, they are
+ * allowed to recurse on the same lock instance:
+ */
+static void rlock_AA1(void)
+{
+	RL(X1);
+	RL(X1); // this one should NOT fail
+}
+
+static void rlock_AA1B(void)
+{
+	RL(X1);
+	RL(X2); // this one should fail
+}
+
+static void rsem_AA1(void)
+{
+	RSL(X1);
+	RSL(X1); // this one should fail
+}
+
+static void rsem_AA1B(void)
+{
+	RSL(X1);
+	RSL(X2); // this one should fail
+}
+/*
+ * The mixing of read and write locks is not allowed:
+ */
+static void rlock_AA2(void)
+{
+	RL(X1);
+	WL(X2); // this one should fail
+}
+
+static void rsem_AA2(void)
+{
+	RSL(X1);
+	WSL(X2); // this one should fail
+}
+
+static void rlock_AA3(void)
+{
+	WL(X1);
+	RL(X2); // this one should fail
+}
+
+static void rsem_AA3(void)
+{
+	WSL(X1);
+	RSL(X2); // this one should fail
+}
+
+/*
+ * ABBA deadlock:
+ */
+
+#define E()					\
+						\
+	LOCK_UNLOCK_2(A, B);			\
+	LOCK_UNLOCK_2(B, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABBA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABBA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABBA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABBA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABBA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABBA_rsem)
+
+#undef E
+
+/*
+ * AB BC CA deadlock:
+ */
+
+#define E()					\
+						\
+	LOCK_UNLOCK_2(A, B);			\
+	LOCK_UNLOCK_2(B, C);			\
+	LOCK_UNLOCK_2(C, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABBCCA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABBCCA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABBCCA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABBCCA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABBCCA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABBCCA_rsem)
+
+#undef E
+
+/*
+ * AB CA BC deadlock:
+ */
+
+#define E()					\
+						\
+	LOCK_UNLOCK_2(A, B);			\
+	LOCK_UNLOCK_2(C, A);			\
+	LOCK_UNLOCK_2(B, C); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABCABC_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABCABC_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABCABC_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABCABC_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABCABC_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABCABC_rsem)
+
+#undef E
+
+/*
+ * AB BC CD DA deadlock:
+ */
+
+#define E()					\
+						\
+	LOCK_UNLOCK_2(A, B);			\
+	LOCK_UNLOCK_2(B, C);			\
+	LOCK_UNLOCK_2(C, D);			\
+	LOCK_UNLOCK_2(D, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABBCCDDA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABBCCDDA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABBCCDDA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABBCCDDA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABBCCDDA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABBCCDDA_rsem)
+
+#undef E
+
+/*
+ * AB CD BD DA deadlock:
+ */
+#define E()					\
+						\
+	LOCK_UNLOCK_2(A, B);			\
+	LOCK_UNLOCK_2(C, D);			\
+	LOCK_UNLOCK_2(B, D);			\
+	LOCK_UNLOCK_2(D, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABCDBDDA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABCDBDDA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABCDBDDA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABCDBDDA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABCDBDDA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABCDBDDA_rsem)
+
+#undef E
+
+/*
+ * AB CD BC DA deadlock:
+ */
+#define E()					\
+						\
+	LOCK_UNLOCK_2(A, B);			\
+	LOCK_UNLOCK_2(C, D);			\
+	LOCK_UNLOCK_2(B, C);			\
+	LOCK_UNLOCK_2(D, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABCDBCDA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABCDBCDA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABCDBCDA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABCDBCDA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABCDBCDA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABCDBCDA_rsem)
+
+#undef E
+
+/*
+ * Double unlock:
+ */
+#define E()					\
+						\
+	LOCK(A);				\
+	UNLOCK(A);				\
+	UNLOCK(A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(double_unlock_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(double_unlock_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(double_unlock_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(double_unlock_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(double_unlock_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(double_unlock_rsem)
+
+#undef E
+
+/*
+ * Bad unlock ordering:
+ */
+#define E()					\
+						\
+	LOCK(A);				\
+	LOCK(B);				\
+	UNLOCK(A); /* fail */			\
+	UNLOCK(B);
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(bad_unlock_order_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(bad_unlock_order_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(bad_unlock_order_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(bad_unlock_order_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(bad_unlock_order_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(bad_unlock_order_rsem)
+
+#undef E
+
+/*
+ * initializing a held lock:
+ */
+#define E()					\
+						\
+	LOCK(A);				\
+	INIT(A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(init_held_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(init_held_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(init_held_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(init_held_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(init_held_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(init_held_rsem)
+
+#undef E
+
+/*
+ * locking an irq-safe lock with irqs enabled:
+ */
+#define E1()				\
+					\
+	IRQ_ENTER();			\
+	LOCK(A);			\
+	UNLOCK(A);			\
+	IRQ_EXIT();
+
+#define E2()				\
+					\
+	LOCK(A);			\
+	UNLOCK(A);
+
+/*
+ * Generate 24 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
+
+#undef E1
+#undef E2
+
+/*
+ * Enabling hardirqs with a softirq-safe lock held:
+ */
+#define E1()				\
+					\
+	SOFTIRQ_ENTER();		\
+	LOCK(A);			\
+	UNLOCK(A);			\
+	SOFTIRQ_EXIT();
+
+#define E2()				\
+					\
+	HARDIRQ_DISABLE();		\
+	LOCK(A);			\
+	HARDIRQ_ENABLE();		\
+	UNLOCK(A);
+
+/*
+ * Generate 12 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_spin)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_wlock)
+
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
+
+#undef E1
+#undef E2
+
+/*
+ * Enabling irqs with an irq-safe lock held:
+ */
+#define E1()				\
+					\
+	IRQ_ENTER();			\
+	LOCK(A);			\
+	UNLOCK(A);			\
+	IRQ_EXIT();
+
+#define E2()				\
+					\
+	IRQ_DISABLE();			\
+	LOCK(A);			\
+	IRQ_ENABLE();			\
+	UNLOCK(A);
+
+/*
+ * Generate 24 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
+
+#undef E1
+#undef E2
+
+/*
+ * Acquiring a irq-unsafe lock while holding an irq-safe-lock:
+ */
+#define E1()				\
+					\
+	LOCK(A);			\
+	LOCK(B);			\
+	UNLOCK(B);			\
+	UNLOCK(A);			\
+
+#define E2()				\
+					\
+	LOCK(B);			\
+	UNLOCK(B);
+
+#define E3()				\
+					\
+	IRQ_ENTER();			\
+	LOCK(A);			\
+	UNLOCK(A);			\
+	IRQ_EXIT();
+
+/*
+ * Generate 36 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * If a lock turns into softirq-safe, but earlier it took
+ * a softirq-unsafe lock:
+ */
+
+#define E1()				\
+	IRQ_DISABLE();			\
+	LOCK(A);			\
+	LOCK(B);			\
+	UNLOCK(B);			\
+	UNLOCK(A);			\
+	IRQ_ENABLE();
+
+#define E2()				\
+	LOCK(B);			\
+	UNLOCK(B);
+
+#define E3()				\
+	IRQ_ENTER();			\
+	LOCK(A);			\
+	UNLOCK(A);			\
+	IRQ_EXIT();
+
+/*
+ * Generate 36 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * read-lock / write-lock irq inversion.
+ *
+ * Deadlock scenario:
+ *
+ * CPU#1 is at #1, i.e. it has write-locked A, but has not
+ * taken B yet.
+ *
+ * CPU#2 is at #2, i.e. it has locked B.
+ *
+ * Hardirq hits CPU#2 at point #2 and is trying to read-lock A.
+ *
+ * The deadlock occurs because CPU#1 will spin on B, and CPU#2
+ * will spin on A.
+ */
+
+#define E1()				\
+					\
+	IRQ_DISABLE();			\
+	WL(A);				\
+	LOCK(B);			\
+	UNLOCK(B);			\
+	WU(A);				\
+	IRQ_ENABLE();
+
+#define E2()				\
+					\
+	LOCK(B);			\
+	UNLOCK(B);
+
+#define E3()				\
+					\
+	IRQ_ENTER();			\
+	RL(A);				\
+	RU(A);				\
+	IRQ_EXIT();
+
+/*
+ * Generate 36 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * read-lock / write-lock recursion that is actually safe.
+ */
+
+#define E1()				\
+					\
+	IRQ_DISABLE();			\
+	WL(A);				\
+	WU(A);				\
+	IRQ_ENABLE();
+
+#define E2()				\
+					\
+	RL(A);				\
+	RU(A);				\
+
+#define E3()				\
+					\
+	IRQ_ENTER();			\
+	RL(A);				\
+	L(B);				\
+	U(B);				\
+	RU(A);				\
+	IRQ_EXIT();
+
+/*
+ * Generate 12 testcases:
+ */
+#include "locking-selftest-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard)
+
+#include "locking-selftest-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * read-lock / write-lock recursion that is unsafe.
+ */
+
+#define E1()				\
+					\
+	IRQ_DISABLE();			\
+	L(B);				\
+	WL(A);				\
+	WU(A);				\
+	U(B);				\
+	IRQ_ENABLE();
+
+#define E2()				\
+					\
+	RL(A);				\
+	RU(A);				\
+
+#define E3()				\
+					\
+	IRQ_ENTER();			\
+	L(B);				\
+	U(B);				\
+	IRQ_EXIT();
+
+/*
+ * Generate 12 testcases:
+ */
+#include "locking-selftest-hardirq.h"
+// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard)
+
+#include "locking-selftest-softirq.h"
+// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft)
+
+#define lockdep_reset()
+#define lockdep_reset_lock(x)
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define I_SPINLOCK(x)	lockdep_reset_lock(&lock_##x.dep_map)
+# define I_RWLOCK(x)	lockdep_reset_lock(&rwlock_##x.dep_map)
+# define I_MUTEX(x)	lockdep_reset_lock(&mutex_##x.dep_map)
+# define I_RWSEM(x)	lockdep_reset_lock(&rwsem_##x.dep_map)
+#else
+# define I_SPINLOCK(x)
+# define I_RWLOCK(x)
+# define I_MUTEX(x)
+# define I_RWSEM(x)
+#endif
+
+#define I1(x)					\
+	do {					\
+		I_SPINLOCK(x);			\
+		I_RWLOCK(x);			\
+		I_MUTEX(x);			\
+		I_RWSEM(x);			\
+	} while (0)
+
+#define I2(x)					\
+	do {					\
+		spin_lock_init(&lock_##x);	\
+		rwlock_init(&rwlock_##x);	\
+		mutex_init(&mutex_##x);		\
+		init_rwsem(&rwsem_##x);		\
+	} while (0)
+
+static void reset_locks(void)
+{
+	local_irq_disable();
+	I1(A); I1(B); I1(C); I1(D);
+	I1(X1); I1(X2); I1(Y1); I1(Y2); I1(Z1); I1(Z2);
+	lockdep_reset();
+	I2(A); I2(B); I2(C); I2(D);
+	init_shared_classes();
+	local_irq_enable();
+}
+
+#undef I
+
+static int testcase_total;
+static int testcase_successes;
+static int expected_testcase_failures;
+static int unexpected_testcase_failures;
+
+static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
+{
+	unsigned long saved_preempt_count = preempt_count();
+	int expected_failure = 0;
+
+	WARN_ON(irqs_disabled());
+
+	testcase_fn();
+	/*
+	 * Filter out expected failures:
+	 */
+#ifndef CONFIG_PROVE_LOCKING
+	if ((lockclass_mask & LOCKTYPE_SPIN) && debug_locks != expected)
+		expected_failure = 1;
+	if ((lockclass_mask & LOCKTYPE_RWLOCK) && debug_locks != expected)
+		expected_failure = 1;
+	if ((lockclass_mask & LOCKTYPE_MUTEX) && debug_locks != expected)
+		expected_failure = 1;
+	if ((lockclass_mask & LOCKTYPE_RWSEM) && debug_locks != expected)
+		expected_failure = 1;
+#endif
+	if (debug_locks != expected) {
+		if (expected_failure) {
+			expected_testcase_failures++;
+			printk("failed|");
+		} else {
+			unexpected_testcase_failures++;
+			printk("FAILED|");
+		}
+	} else {
+		testcase_successes++;
+		printk("  ok  |");
+	}
+	testcase_total++;
+
+	if (debug_locks_verbose)
+		printk(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
+			lockclass_mask, debug_locks, expected);
+	/*
+	 * Some tests (e.g. double-unlock) might corrupt the preemption
+	 * count, so restore it:
+	 */
+	preempt_count() = saved_preempt_count;
+#ifdef CONFIG_TRACE_IRQFLAGS
+	if (softirq_count())
+		current->softirqs_enabled = 0;
+	else
+		current->softirqs_enabled = 1;
+#endif
+
+	reset_locks();
+}
+
+static inline void print_testname(const char *testname)
+{
+	printk("%33s:", testname);
+}
+
+#define DO_TESTCASE_1(desc, name, nr)				\
+	print_testname(desc"/"#nr);				\
+	dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK);		\
+	printk("\n");
+
+#define DO_TESTCASE_1B(desc, name, nr)				\
+	print_testname(desc"/"#nr);				\
+	dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
+	printk("\n");
+
+#define DO_TESTCASE_3(desc, name, nr)				\
+	print_testname(desc"/"#nr);				\
+	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN);	\
+	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
+	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
+	printk("\n");
+
+#define DO_TESTCASE_3RW(desc, name, nr)				\
+	print_testname(desc"/"#nr);				\
+	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\
+	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
+	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
+	printk("\n");
+
+#define DO_TESTCASE_6(desc, name)				\
+	print_testname(desc);					\
+	dotest(name##_spin, FAILURE, LOCKTYPE_SPIN);		\
+	dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK);		\
+	dotest(name##_rlock, FAILURE, LOCKTYPE_RWLOCK);		\
+	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
+	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
+	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
+	printk("\n");
+
+#define DO_TESTCASE_6_SUCCESS(desc, name)			\
+	print_testname(desc);					\
+	dotest(name##_spin, SUCCESS, LOCKTYPE_SPIN);		\
+	dotest(name##_wlock, SUCCESS, LOCKTYPE_RWLOCK);		\
+	dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK);		\
+	dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX);		\
+	dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM);		\
+	dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM);		\
+	printk("\n");
+
+/*
+ * 'read' variant: rlocks must not trigger.
+ */
+#define DO_TESTCASE_6R(desc, name)				\
+	print_testname(desc);					\
+	dotest(name##_spin, FAILURE, LOCKTYPE_SPIN);		\
+	dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK);		\
+	dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK);		\
+	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
+	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
+	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
+	printk("\n");
+
+#define DO_TESTCASE_2I(desc, name, nr)				\
+	DO_TESTCASE_1("hard-"desc, name##_hard, nr);		\
+	DO_TESTCASE_1("soft-"desc, name##_soft, nr);
+
+#define DO_TESTCASE_2IB(desc, name, nr)				\
+	DO_TESTCASE_1B("hard-"desc, name##_hard, nr);		\
+	DO_TESTCASE_1B("soft-"desc, name##_soft, nr);
+
+#define DO_TESTCASE_6I(desc, name, nr)				\
+	DO_TESTCASE_3("hard-"desc, name##_hard, nr);		\
+	DO_TESTCASE_3("soft-"desc, name##_soft, nr);
+
+#define DO_TESTCASE_6IRW(desc, name, nr)			\
+	DO_TESTCASE_3RW("hard-"desc, name##_hard, nr);		\
+	DO_TESTCASE_3RW("soft-"desc, name##_soft, nr);
+
+#define DO_TESTCASE_2x3(desc, name)				\
+	DO_TESTCASE_3(desc, name, 12);				\
+	DO_TESTCASE_3(desc, name, 21);
+
+#define DO_TESTCASE_2x6(desc, name)				\
+	DO_TESTCASE_6I(desc, name, 12);				\
+	DO_TESTCASE_6I(desc, name, 21);
+
+#define DO_TESTCASE_6x2(desc, name)				\
+	DO_TESTCASE_2I(desc, name, 123);			\
+	DO_TESTCASE_2I(desc, name, 132);			\
+	DO_TESTCASE_2I(desc, name, 213);			\
+	DO_TESTCASE_2I(desc, name, 231);			\
+	DO_TESTCASE_2I(desc, name, 312);			\
+	DO_TESTCASE_2I(desc, name, 321);
+
+#define DO_TESTCASE_6x2B(desc, name)				\
+	DO_TESTCASE_2IB(desc, name, 123);			\
+	DO_TESTCASE_2IB(desc, name, 132);			\
+	DO_TESTCASE_2IB(desc, name, 213);			\
+	DO_TESTCASE_2IB(desc, name, 231);			\
+	DO_TESTCASE_2IB(desc, name, 312);			\
+	DO_TESTCASE_2IB(desc, name, 321);
+
+#define DO_TESTCASE_6x6(desc, name)				\
+	DO_TESTCASE_6I(desc, name, 123);			\
+	DO_TESTCASE_6I(desc, name, 132);			\
+	DO_TESTCASE_6I(desc, name, 213);			\
+	DO_TESTCASE_6I(desc, name, 231);			\
+	DO_TESTCASE_6I(desc, name, 312);			\
+	DO_TESTCASE_6I(desc, name, 321);
+
+#define DO_TESTCASE_6x6RW(desc, name)				\
+	DO_TESTCASE_6IRW(desc, name, 123);			\
+	DO_TESTCASE_6IRW(desc, name, 132);			\
+	DO_TESTCASE_6IRW(desc, name, 213);			\
+	DO_TESTCASE_6IRW(desc, name, 231);			\
+	DO_TESTCASE_6IRW(desc, name, 312);			\
+	DO_TESTCASE_6IRW(desc, name, 321);
+
+
+void locking_selftest(void)
+{
+	/*
+	 * Got a locking failure before the selftest ran?
+	 */
+	if (!debug_locks) {
+		printk("----------------------------------\n");
+		printk("| Locking API testsuite disabled |\n");
+		printk("----------------------------------\n");
+		return;
+	}
+
+	/*
+	 * Run the testsuite:
+	 */
+	printk("------------------------\n");
+	printk("| Locking API testsuite:\n");
+	printk("----------------------------------------------------------------------------\n");
+	printk("                                 | spin |wlock |rlock |mutex | wsem | rsem |\n");
+	printk("  --------------------------------------------------------------------------\n");
+
+	init_shared_classes();
+	debug_locks_silent = !debug_locks_verbose;
+
+	DO_TESTCASE_6("A-A deadlock", AA);
+	DO_TESTCASE_6R("A-B-B-A deadlock", ABBA);
+	DO_TESTCASE_6R("A-B-B-C-C-A deadlock", ABBCCA);
+	DO_TESTCASE_6R("A-B-C-A-B-C deadlock", ABCABC);
+	DO_TESTCASE_6R("A-B-B-C-C-D-D-A deadlock", ABBCCDDA);
+	DO_TESTCASE_6R("A-B-C-D-B-D-D-A deadlock", ABCDBDDA);
+	DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA);
+	DO_TESTCASE_6("double unlock", double_unlock);
+	DO_TESTCASE_6("initialize held", init_held);
+	DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order);
+
+	printk("  --------------------------------------------------------------------------\n");
+	print_testname("recursive read-lock");
+	printk("             |");
+	dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
+	printk("             |");
+	dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
+	printk("\n");
+
+	print_testname("recursive read-lock #2");
+	printk("             |");
+	dotest(rlock_AA1B, FAILURE, LOCKTYPE_RWLOCK);
+	printk("             |");
+	dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
+	printk("\n");
+
+	print_testname("mixed read-write-lock");
+	printk("             |");
+	dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK);
+	printk("             |");
+	dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM);
+	printk("\n");
+
+	print_testname("mixed write-read-lock");
+	printk("             |");
+	dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK);
+	printk("             |");
+	dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
+	printk("\n");
+
+	printk("  --------------------------------------------------------------------------\n");
+
+	/*
+	 * irq-context testcases:
+	 */
+	DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
+	DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
+	DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
+	DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3);
+	DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
+	DO_TESTCASE_6x6RW("irq lock-inversion", irq_inversion);
+
+	DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
+//	DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
+
+	if (unexpected_testcase_failures) {
+		printk("-----------------------------------------------------------------\n");
+		debug_locks = 0;
+		printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
+			unexpected_testcase_failures, testcase_total);
+		printk("-----------------------------------------------------------------\n");
+	} else if (expected_testcase_failures && testcase_successes) {
+		printk("--------------------------------------------------------\n");
+		printk("%3d out of %3d testcases failed, as expected. |\n",
+			expected_testcase_failures, testcase_total);
+		printk("----------------------------------------------------\n");
+		debug_locks = 1;
+	} else if (expected_testcase_failures && !testcase_successes) {
+		printk("--------------------------------------------------------\n");
+		printk("All %3d testcases failed, as expected. |\n",
+			expected_testcase_failures);
+		printk("----------------------------------------\n");
+		debug_locks = 1;
+	} else {
+		printk("-------------------------------------------------------\n");
+		printk("Good, all %3d testcases passed! |\n",
+			testcase_successes);
+		printk("---------------------------------\n");
+		debug_locks = 1;
+	}
+	debug_locks_silent = 0;
+}

commit 6375e2b74c620794e1a27a26e4338aec2e41346a
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jul 3 00:24:45 2006 -0700

    [PATCH] lockdep: irqtrace cleanup of include/asm-x86_64/irqflags.h
    
    Clean up the x86-64 irqflags.h file:
    
     - macro => inline function transformation
     - simplifications
     - style fixes
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
    Cc: Andi Kleen <ak@muc.de>
    Cc: Jan Beulich <jbeulich@novell.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S
index e49af0032e94..332ea5dff916 100644
--- a/arch/x86_64/lib/thunk.S
+++ b/arch/x86_64/lib/thunk.S
@@ -47,6 +47,11 @@
 	thunk_retrax __down_failed_interruptible,__down_interruptible
 	thunk_retrax __down_failed_trylock,__down_trylock
 	thunk __up_wakeup,__up
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	thunk trace_hardirqs_on_thunk,trace_hardirqs_on
+	thunk trace_hardirqs_off_thunk,trace_hardirqs_off
+#endif
 	
 	/* SAVE_ARGS below is used only for the .cfi directives it contains. */
 	CFI_STARTPROC
diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h
index 22f3c06b247e..cce6937e87c0 100644
--- a/include/asm-x86_64/irqflags.h
+++ b/include/asm-x86_64/irqflags.h
@@ -5,57 +5,137 @@
  *
  * This file gets included from lowlevel asm headers too, to provide
  * wrapped versions of the local_irq_*() APIs, based on the
- * raw_local_irq_*() macros from the lowlevel headers.
+ * raw_local_irq_*() functions from the lowlevel headers.
  */
 #ifndef _ASM_IRQFLAGS_H
 #define _ASM_IRQFLAGS_H
 
 #ifndef __ASSEMBLY__
+/*
+ * Interrupt control:
+ */
+
+static inline unsigned long __raw_local_save_flags(void)
+{
+	unsigned long flags;
+
+	__asm__ __volatile__(
+		"# __raw_save_flags\n\t"
+		"pushfq ; popq %q0"
+		: "=g" (flags)
+		: /* no input */
+		: "memory"
+	);
 
-/* interrupt control.. */
-#define raw_local_save_flags(x)	do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
-#define raw_local_irq_restore(x) 	__asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
+	return flags;
+}
+
+#define raw_local_save_flags(flags) \
+		do { (flags) = __raw_local_save_flags(); } while (0)
+
+static inline void raw_local_irq_restore(unsigned long flags)
+{
+	__asm__ __volatile__(
+		"pushq %0 ; popfq"
+		: /* no output */
+		:"g" (flags)
+		:"memory", "cc"
+	);
+}
 
 #ifdef CONFIG_X86_VSMP
-/* Interrupt control for VSMP  architecture */
-#define raw_local_irq_disable()	do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0)
-#define raw_local_irq_enable()	do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0)
-
-#define raw_irqs_disabled_flags(flags)	\
-({						\
-	(flags & (1<<18)) || !(flags & (1<<9));	\
-})
-
-/* For spinlocks etc */
-#define raw_local_irq_save(x)	do { raw_local_save_flags(x); raw_local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0)
-#else  /* CONFIG_X86_VSMP */
-#define raw_local_irq_disable() 	__asm__ __volatile__("cli": : :"memory")
-#define raw_local_irq_enable()	__asm__ __volatile__("sti": : :"memory")
-
-#define raw_irqs_disabled_flags(flags)	\
-({						\
-	!(flags & (1<<9));			\
-})
-
-/* For spinlocks etc */
-#define raw_local_irq_save(x) 	do { warn_if_not_ulong(x); __asm__ __volatile__("# raw_local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
+
+/*
+ * Interrupt control for the VSMP architecture:
+ */
+
+static inline void raw_local_irq_disable(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18));
+}
+
+static inline void raw_local_irq_enable(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18));
+}
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & (1<<9)) || (flags & (1 << 18));
+}
+
+#else /* CONFIG_X86_VSMP */
+
+static inline void raw_local_irq_disable(void)
+{
+	__asm__ __volatile__("cli" : : : "memory");
+}
+
+static inline void raw_local_irq_enable(void)
+{
+	__asm__ __volatile__("sti" : : : "memory");
+}
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & (1 << 9));
+}
+
 #endif
 
-#define raw_irqs_disabled()			\
-({						\
-	unsigned long flags;			\
-	raw_local_save_flags(flags);		\
-	raw_irqs_disabled_flags(flags);		\
-})
+/*
+ * For spinlocks, etc.:
+ */
+
+static inline unsigned long __raw_local_irq_save(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	raw_local_irq_disable();
+
+	return flags;
+}
 
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define raw_safe_halt()	__asm__ __volatile__("sti; hlt": : :"memory")
-/* used when interrupts are already enabled or to shutdown the processor */
-#define halt()			__asm__ __volatile__("hlt": : :"memory")
+#define raw_local_irq_save(flags) \
+		do { (flags) = __raw_local_irq_save(); } while (0)
+
+static inline int raw_irqs_disabled(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	return raw_irqs_disabled_flags(flags);
+}
+
+/*
+ * Used in the idle loop; sti takes one instruction cycle
+ * to complete:
+ */
+static inline void raw_safe_halt(void)
+{
+	__asm__ __volatile__("sti; hlt" : : : "memory");
+}
+
+/*
+ * Used when interrupts are already enabled or to
+ * shutdown the processor:
+ */
+static inline void halt(void)
+{
+	__asm__ __volatile__("hlt": : :"memory");
+}
 
 #else /* __ASSEMBLY__: */
-# define TRACE_IRQS_ON
-# define TRACE_IRQS_OFF
+# ifdef CONFIG_TRACE_IRQFLAGS
+#  define TRACE_IRQS_ON		call trace_hardirqs_on_thunk
+#  define TRACE_IRQS_OFF	call trace_hardirqs_off_thunk
+# else
+#  define TRACE_IRQS_ON
+#  define TRACE_IRQS_OFF
+# endif
 #endif
 
 #endif

commit 2601e64d262ee5ed4d4a5737345803800d9c4db3
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jul 3 00:24:45 2006 -0700

    [PATCH] lockdep: irqtrace subsystem, x86_64 support
    
    Add irqflags-tracing support to x86_64.
    
    [akpm@osdl.org: build fix]
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Cc: Andi Kleen <ak@muc.de>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
index 1d92ab56c0f9..775d211a5cf9 100644
--- a/arch/x86_64/Kconfig.debug
+++ b/arch/x86_64/Kconfig.debug
@@ -1,5 +1,9 @@
 menu "Kernel hacking"
 
+config TRACE_IRQFLAGS_SUPPORT
+	bool
+	default y
+
 source "lib/Kconfig.debug"
 
 config DEBUG_RODATA
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index c536fa98ea37..9b5bb413a6e9 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -13,6 +13,7 @@
 #include <asm/thread_info.h>	
 #include <asm/segment.h>
 #include <asm/vsyscall32.h>
+#include <asm/irqflags.h>
 #include <linux/linkage.h>
 
 #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
@@ -75,6 +76,10 @@ ENTRY(ia32_sysenter_target)
 	swapgs
 	movq	%gs:pda_kernelstack, %rsp
 	addq	$(PDA_STACKOFFSET),%rsp	
+	/*
+	 * No need to follow this irqs on/off section: the syscall
+	 * disabled irqs, here we enable it straight after entry:
+	 */
 	sti	
  	movl	%ebp,%ebp		/* zero extension */
 	pushq	$__USER32_DS
@@ -118,6 +123,7 @@ sysenter_do_call:
 	movq	%rax,RAX-ARGOFFSET(%rsp)
 	GET_THREAD_INFO(%r10)
 	cli
+	TRACE_IRQS_OFF
 	testl	$_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
 	jnz	int_ret_from_sys_call
 	andl    $~TS_COMPAT,threadinfo_status(%r10)
@@ -132,6 +138,7 @@ sysenter_do_call:
 	CFI_REGISTER rsp,rcx
 	movl	$VSYSCALL32_SYSEXIT,%edx	/* User %eip */
 	CFI_REGISTER rip,rdx
+	TRACE_IRQS_ON
 	swapgs
 	sti		/* sti only takes effect after the next instruction */
 	/* sysexit */
@@ -186,6 +193,10 @@ ENTRY(ia32_cstar_target)
 	movl	%esp,%r8d
 	CFI_REGISTER	rsp,r8
 	movq	%gs:pda_kernelstack,%rsp
+	/*
+	 * No need to follow this irqs on/off section: the syscall
+	 * disabled irqs and here we enable it straight after entry:
+	 */
 	sti
 	SAVE_ARGS 8,1,1
 	movl 	%eax,%eax	/* zero extension */
@@ -220,6 +231,7 @@ cstar_do_call:
 	movq %rax,RAX-ARGOFFSET(%rsp)
 	GET_THREAD_INFO(%r10)
 	cli
+	TRACE_IRQS_OFF
 	testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
 	jnz  int_ret_from_sys_call
 	andl $~TS_COMPAT,threadinfo_status(%r10)
@@ -228,6 +240,7 @@ cstar_do_call:
 	CFI_REGISTER rip,rcx
 	movl EFLAGS-ARGOFFSET(%rsp),%r11d	
 	/*CFI_REGISTER rflags,r11*/
+	TRACE_IRQS_ON
 	movl RSP-ARGOFFSET(%rsp),%esp
 	CFI_RESTORE rsp
 	swapgs
@@ -286,7 +299,11 @@ ENTRY(ia32_syscall)
 	/*CFI_REL_OFFSET	rflags,EFLAGS-RIP*/
 	/*CFI_REL_OFFSET	cs,CS-RIP*/
 	CFI_REL_OFFSET	rip,RIP-RIP
-	swapgs	
+	swapgs
+	/*
+	 * No need to follow this irqs on/off section: the syscall
+	 * disabled irqs and here we enable it straight after entry:
+	 */
 	sti
 	movl %eax,%eax
 	pushq %rax
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index ed92c2983606..d464dded68c0 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -41,13 +41,24 @@
 #include <asm/thread_info.h>
 #include <asm/hw_irq.h>
 #include <asm/page.h>
+#include <asm/irqflags.h>
 
 	.code64
 
 #ifndef CONFIG_PREEMPT
 #define retint_kernel retint_restore_args
 #endif	
-	
+
+
+.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
+#ifdef CONFIG_TRACE_IRQFLAGS
+	bt   $9,EFLAGS-\offset(%rsp)	/* interrupts off? */
+	jnc  1f
+	TRACE_IRQS_ON
+1:
+#endif
+.endm
+
 /*
  * C code is not supposed to know about undefined top of stack. Every time 
  * a C function with an pt_regs argument is called from the SYSCALL based 
@@ -194,6 +205,10 @@ ENTRY(system_call)
 	swapgs
 	movq	%rsp,%gs:pda_oldrsp 
 	movq	%gs:pda_kernelstack,%rsp
+	/*
+	 * No need to follow this irqs off/on section - it's straight
+	 * and short:
+	 */
 	sti					
 	SAVE_ARGS 8,1
 	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp) 
@@ -219,10 +234,15 @@ ret_from_sys_call:
 sysret_check:		
 	GET_THREAD_INFO(%rcx)
 	cli
+	TRACE_IRQS_OFF
 	movl threadinfo_flags(%rcx),%edx
 	andl %edi,%edx
 	CFI_REMEMBER_STATE
 	jnz  sysret_careful 
+	/*
+	 * sysretq will re-enable interrupts:
+	 */
+	TRACE_IRQS_ON
 	movq RIP-ARGOFFSET(%rsp),%rcx
 	CFI_REGISTER	rip,rcx
 	RESTORE_ARGS 0,-ARG_SKIP,1
@@ -237,6 +257,7 @@ sysret_careful:
 	CFI_RESTORE_STATE
 	bt $TIF_NEED_RESCHED,%edx
 	jnc sysret_signal
+	TRACE_IRQS_ON
 	sti
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET 8
@@ -247,6 +268,7 @@ sysret_careful:
 
 	/* Handle a signal */ 
 sysret_signal:
+	TRACE_IRQS_ON
 	sti
 	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
 	jz    1f
@@ -261,6 +283,7 @@ sysret_signal:
 	/* Use IRET because user could have changed frame. This
 	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
 	cli
+	TRACE_IRQS_OFF
 	jmp int_with_check
 	
 badsys:
@@ -309,6 +332,7 @@ ENTRY(int_ret_from_sys_call)
 	CFI_REL_OFFSET	r10,R10-ARGOFFSET
 	CFI_REL_OFFSET	r11,R11-ARGOFFSET
 	cli
+	TRACE_IRQS_OFF
 	testl $3,CS-ARGOFFSET(%rsp)
 	je retint_restore_args
 	movl $_TIF_ALLWORK_MASK,%edi
@@ -327,6 +351,7 @@ int_with_check:
 int_careful:
 	bt $TIF_NEED_RESCHED,%edx
 	jnc  int_very_careful
+	TRACE_IRQS_ON
 	sti
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET 8
@@ -334,10 +359,12 @@ int_careful:
 	popq %rdi
 	CFI_ADJUST_CFA_OFFSET -8
 	cli
+	TRACE_IRQS_OFF
 	jmp int_with_check
 
 	/* handle signals and tracing -- both require a full stack frame */
 int_very_careful:
+	TRACE_IRQS_ON
 	sti
 	SAVE_REST
 	/* Check for syscall exit trace */	
@@ -351,6 +378,7 @@ int_very_careful:
 	CFI_ADJUST_CFA_OFFSET -8
 	andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
 	cli
+	TRACE_IRQS_OFF
 	jmp int_restore_rest
 	
 int_signal:
@@ -363,6 +391,7 @@ int_signal:
 int_restore_rest:
 	RESTORE_REST
 	cli
+	TRACE_IRQS_OFF
 	jmp int_with_check
 	CFI_ENDPROC
 END(int_ret_from_sys_call)
@@ -484,6 +513,10 @@ END(stub_rt_sigreturn)
 	swapgs	
 1:	incl	%gs:pda_irqcount	# RED-PEN should check preempt count
 	cmoveq %gs:pda_irqstackptr,%rsp
+	/*
+	 * We entered an interrupt context - irqs are off:
+	 */
+	TRACE_IRQS_OFF
 	call \func
 	.endm
 
@@ -493,6 +526,7 @@ ENTRY(common_interrupt)
 	/* 0(%rsp): oldrsp-ARGOFFSET */
 ret_from_intr:
 	cli	
+	TRACE_IRQS_OFF
 	decl %gs:pda_irqcount
 	leaveq
 	CFI_DEF_CFA_REGISTER	rsp
@@ -515,9 +549,21 @@ retint_check:
 	CFI_REMEMBER_STATE
 	jnz  retint_careful
 retint_swapgs:	 	
+	/*
+	 * The iretq could re-enable interrupts:
+	 */
+	cli
+	TRACE_IRQS_IRETQ
 	swapgs 
+	jmp restore_args
+
 retint_restore_args:				
 	cli
+	/*
+	 * The iretq could re-enable interrupts:
+	 */
+	TRACE_IRQS_IRETQ
+restore_args:
 	RESTORE_ARGS 0,8,0						
 iret_label:	
 	iretq
@@ -530,6 +576,7 @@ iret_label:
 	/* running with kernel gs */
 bad_iret:
 	movq $11,%rdi	/* SIGSEGV */
+	TRACE_IRQS_ON
 	sti
 	jmp do_exit			
 	.previous	
@@ -539,6 +586,7 @@ retint_careful:
 	CFI_RESTORE_STATE
 	bt    $TIF_NEED_RESCHED,%edx
 	jnc   retint_signal
+	TRACE_IRQS_ON
 	sti
 	pushq %rdi
 	CFI_ADJUST_CFA_OFFSET	8
@@ -547,11 +595,13 @@ retint_careful:
 	CFI_ADJUST_CFA_OFFSET	-8
 	GET_THREAD_INFO(%rcx)
 	cli
+	TRACE_IRQS_OFF
 	jmp retint_check
 	
 retint_signal:
 	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
 	jz    retint_swapgs
+	TRACE_IRQS_ON
 	sti
 	SAVE_REST
 	movq $-1,ORIG_RAX(%rsp) 			
@@ -560,6 +610,7 @@ retint_signal:
 	call do_notify_resume
 	RESTORE_REST
 	cli
+	TRACE_IRQS_OFF
 	movl $_TIF_NEED_RESCHED,%edi
 	GET_THREAD_INFO(%rcx)
 	jmp retint_check
@@ -666,7 +717,7 @@ END(spurious_interrupt)
 
 	/* error code is on the stack already */
 	/* handle NMI like exceptions that can happen everywhere */
-	.macro paranoidentry sym, ist=0
+	.macro paranoidentry sym, ist=0, irqtrace=1
 	SAVE_ALL
 	cld
 	movl $1,%ebx
@@ -691,8 +742,73 @@ END(spurious_interrupt)
 	addq	$EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
 	.endif
 	cli
+	.if \irqtrace
+	TRACE_IRQS_OFF
+	.endif
 	.endm
-	
+
+	/*
+ 	 * "Paranoid" exit path from exception stack.
+  	 * Paranoid because this is used by NMIs and cannot take
+	 * any kernel state for granted.
+	 * We don't do kernel preemption checks here, because only
+	 * NMI should be common and it does not enable IRQs and
+	 * cannot get reschedule ticks.
+	 *
+	 * "trace" is 0 for the NMI handler only, because irq-tracing
+	 * is fundamentally NMI-unsafe. (we cannot change the soft and
+	 * hard flags at once, atomically)
+	 */
+	.macro paranoidexit trace=1
+	/* ebx:	no swapgs flag */
+paranoid_exit\trace:
+	testl %ebx,%ebx				/* swapgs needed? */
+	jnz paranoid_restore\trace
+	testl $3,CS(%rsp)
+	jnz   paranoid_userspace\trace
+paranoid_swapgs\trace:
+	TRACE_IRQS_IRETQ 0
+	swapgs
+paranoid_restore\trace:
+	RESTORE_ALL 8
+	iretq
+paranoid_userspace\trace:
+	GET_THREAD_INFO(%rcx)
+	movl threadinfo_flags(%rcx),%ebx
+	andl $_TIF_WORK_MASK,%ebx
+	jz paranoid_swapgs\trace
+	movq %rsp,%rdi			/* &pt_regs */
+	call sync_regs
+	movq %rax,%rsp			/* switch stack for scheduling */
+	testl $_TIF_NEED_RESCHED,%ebx
+	jnz paranoid_schedule\trace
+	movl %ebx,%edx			/* arg3: thread flags */
+	.if \trace
+	TRACE_IRQS_ON
+	.endif
+	sti
+	xorl %esi,%esi 			/* arg2: oldset */
+	movq %rsp,%rdi 			/* arg1: &pt_regs */
+	call do_notify_resume
+	cli
+	.if \trace
+	TRACE_IRQS_OFF
+	.endif
+	jmp paranoid_userspace\trace
+paranoid_schedule\trace:
+	.if \trace
+	TRACE_IRQS_ON
+	.endif
+	sti
+	call schedule
+	cli
+	.if \trace
+	TRACE_IRQS_OFF
+	.endif
+	jmp paranoid_userspace\trace
+	CFI_ENDPROC
+	.endm
+
 /*
  * Exception entry point. This expects an error code/orig_rax on the stack
  * and the exception handler in %rax.	
@@ -748,6 +864,7 @@ error_exit:
 	movl %ebx,%eax		
 	RESTORE_REST
 	cli
+	TRACE_IRQS_OFF
 	GET_THREAD_INFO(%rcx)	
 	testl %eax,%eax
 	jne  retint_kernel
@@ -755,6 +872,10 @@ error_exit:
 	movl  $_TIF_WORK_MASK,%edi
 	andl  %edi,%edx
 	jnz  retint_careful
+	/*
+	 * The iret might restore flags:
+	 */
+	TRACE_IRQS_IRETQ
 	swapgs 
 	RESTORE_ARGS 0,8,0						
 	jmp iret_label
@@ -916,8 +1037,7 @@ KPROBE_ENTRY(debug)
 	pushq $0
 	CFI_ADJUST_CFA_OFFSET 8		
 	paranoidentry do_debug, DEBUG_STACK
-	jmp paranoid_exit
-	CFI_ENDPROC
+	paranoidexit
 END(debug)
 	.previous .text
 
@@ -926,49 +1046,13 @@ KPROBE_ENTRY(nmi)
 	INTR_FRAME
 	pushq $-1
 	CFI_ADJUST_CFA_OFFSET 8
-	paranoidentry do_nmi
-	/*
- 	 * "Paranoid" exit path from exception stack.
-  	 * Paranoid because this is used by NMIs and cannot take
-	 * any kernel state for granted.
-	 * We don't do kernel preemption checks here, because only
-	 * NMI should be common and it does not enable IRQs and
-	 * cannot get reschedule ticks.
-	 */
-	/* ebx:	no swapgs flag */
-paranoid_exit:
-	testl %ebx,%ebx				/* swapgs needed? */
-	jnz paranoid_restore
-	testl $3,CS(%rsp)
-	jnz   paranoid_userspace
-paranoid_swapgs:	
-	swapgs
-paranoid_restore:	
-	RESTORE_ALL 8
-	iretq
-paranoid_userspace:	
-	GET_THREAD_INFO(%rcx)
-	movl threadinfo_flags(%rcx),%ebx
-	andl $_TIF_WORK_MASK,%ebx
-	jz paranoid_swapgs
-	movq %rsp,%rdi			/* &pt_regs */
-	call sync_regs
-	movq %rax,%rsp			/* switch stack for scheduling */
-	testl $_TIF_NEED_RESCHED,%ebx
-	jnz paranoid_schedule
-	movl %ebx,%edx			/* arg3: thread flags */
-	sti
-	xorl %esi,%esi 			/* arg2: oldset */
-	movq %rsp,%rdi 			/* arg1: &pt_regs */
-	call do_notify_resume
-	cli
-	jmp paranoid_userspace
-paranoid_schedule:
-	sti
-	call schedule
-	cli
-	jmp paranoid_userspace
-	CFI_ENDPROC
+	paranoidentry do_nmi, 0, 0
+#ifdef CONFIG_TRACE_IRQFLAGS
+	paranoidexit 0
+#else
+	jmp paranoid_exit1
+ 	CFI_ENDPROC
+#endif
 END(nmi)
 	.previous .text
 
@@ -977,7 +1061,7 @@ KPROBE_ENTRY(int3)
  	pushq $0
  	CFI_ADJUST_CFA_OFFSET 8
  	paranoidentry do_int3, DEBUG_STACK
- 	jmp paranoid_exit
+ 	jmp paranoid_exit1
  	CFI_ENDPROC
 END(int3)
 	.previous .text
@@ -1006,7 +1090,7 @@ END(reserved)
 ENTRY(double_fault)
 	XCPT_FRAME
 	paranoidentry do_double_fault
-	jmp paranoid_exit
+	jmp paranoid_exit1
 	CFI_ENDPROC
 END(double_fault)
 
@@ -1022,7 +1106,7 @@ END(segment_not_present)
 ENTRY(stack_segment)
 	XCPT_FRAME
 	paranoidentry do_stack_segment
-	jmp paranoid_exit
+	jmp paranoid_exit1
 	CFI_ENDPROC
 END(stack_segment)
 
@@ -1050,7 +1134,7 @@ ENTRY(machine_check)
 	pushq $0
 	CFI_ADJUST_CFA_OFFSET 8	
 	paranoidentry do_machine_check
-	jmp paranoid_exit
+	jmp paranoid_exit1
 	CFI_ENDPROC
 END(machine_check)
 #endif
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index a1f1df5f7bfc..5221a53e90c1 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -177,8 +177,10 @@ asmlinkage void do_softirq(void)
  	local_irq_save(flags);
  	pending = local_softirq_pending();
  	/* Switch to interrupt stack */
- 	if (pending)
+ 	if (pending) {
 		call_softirq();
+		WARN_ON_ONCE(softirq_count());
+	}
  	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(do_softirq);
diff --git a/include/asm-x86_64/irqflags.h b/include/asm-x86_64/irqflags.h
new file mode 100644
index 000000000000..22f3c06b247e
--- /dev/null
+++ b/include/asm-x86_64/irqflags.h
@@ -0,0 +1,61 @@
+/*
+ * include/asm-x86_64/irqflags.h
+ *
+ * IRQ flags handling
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() macros from the lowlevel headers.
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#ifndef __ASSEMBLY__
+
+/* interrupt control.. */
+#define raw_local_save_flags(x)	do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
+#define raw_local_irq_restore(x) 	__asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
+
+#ifdef CONFIG_X86_VSMP
+/* Interrupt control for VSMP  architecture */
+#define raw_local_irq_disable()	do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0)
+#define raw_local_irq_enable()	do { unsigned long flags; raw_local_save_flags(flags); raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0)
+
+#define raw_irqs_disabled_flags(flags)	\
+({						\
+	(flags & (1<<18)) || !(flags & (1<<9));	\
+})
+
+/* For spinlocks etc */
+#define raw_local_irq_save(x)	do { raw_local_save_flags(x); raw_local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0)
+#else  /* CONFIG_X86_VSMP */
+#define raw_local_irq_disable() 	__asm__ __volatile__("cli": : :"memory")
+#define raw_local_irq_enable()	__asm__ __volatile__("sti": : :"memory")
+
+#define raw_irqs_disabled_flags(flags)	\
+({						\
+	!(flags & (1<<9));			\
+})
+
+/* For spinlocks etc */
+#define raw_local_irq_save(x) 	do { warn_if_not_ulong(x); __asm__ __volatile__("# raw_local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
+#endif
+
+#define raw_irqs_disabled()			\
+({						\
+	unsigned long flags;			\
+	raw_local_save_flags(flags);		\
+	raw_irqs_disabled_flags(flags);		\
+})
+
+/* used in the idle loop; sti takes one instruction cycle to complete */
+#define raw_safe_halt()	__asm__ __volatile__("sti; hlt": : :"memory")
+/* used when interrupts are already enabled or to shutdown the processor */
+#define halt()			__asm__ __volatile__("hlt": : :"memory")
+
+#else /* __ASSEMBLY__: */
+# define TRACE_IRQS_ON
+# define TRACE_IRQS_OFF
+#endif
+
+#endif
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h
index 68e559f3631c..f67f2873a922 100644
--- a/include/asm-x86_64/system.h
+++ b/include/asm-x86_64/system.h
@@ -244,43 +244,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 
 #define warn_if_not_ulong(x) do { unsigned long foo; (void) (&(x) == &foo); } while (0)
 
-/* interrupt control.. */
-#define local_save_flags(x)	do { warn_if_not_ulong(x); __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory"); } while (0)
-#define local_irq_restore(x) 	__asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
-
-#ifdef CONFIG_X86_VSMP
-/* Interrupt control for VSMP  architecture */
-#define local_irq_disable()	do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags & ~(1 << 9)) | (1 << 18)); } while (0)
-#define local_irq_enable()	do { unsigned long flags; local_save_flags(flags); local_irq_restore((flags | (1 << 9)) & ~(1 << 18)); } while (0)
-
-#define irqs_disabled()					\
-({							\
-	unsigned long flags;				\
-	local_save_flags(flags);			\
-	(flags & (1<<18)) || !(flags & (1<<9));		\
-})
-
-/* For spinlocks etc */
-#define local_irq_save(x)	do { local_save_flags(x); local_irq_restore((x & ~(1 << 9)) | (1 << 18)); } while (0)
-#else  /* CONFIG_X86_VSMP */
-#define local_irq_disable() 	__asm__ __volatile__("cli": : :"memory")
-#define local_irq_enable()	__asm__ __volatile__("sti": : :"memory")
-
-#define irqs_disabled()			\
-({					\
-	unsigned long flags;		\
-	local_save_flags(flags);	\
-	!(flags & (1<<9));		\
-})
-
-/* For spinlocks etc */
-#define local_irq_save(x) 	do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
-#endif
-
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define safe_halt()		__asm__ __volatile__("sti; hlt": : :"memory")
-/* used when interrupts are already enabled or to shutdown the processor */
-#define halt()			__asm__ __volatile__("hlt": : :"memory")
+#include <linux/irqflags.h>
 
 void cpu_idle_wait(void);
 

commit c8558fcdecb1f920df8050be4f2d5f499060030e
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jul 3 00:24:44 2006 -0700

    [PATCH] lockdep: irqtrace cleanup of include/asm-i386/irqflags.h
    
    Clean up the x86 irqflags.h file:
    
     - macro => inline function transformation
     - simplifications
     - style fixes
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h
index ca777e894c92..e1bdb97c07fa 100644
--- a/include/asm-i386/irqflags.h
+++ b/include/asm-i386/irqflags.h
@@ -5,24 +5,95 @@
  *
  * This file gets included from lowlevel asm headers too, to provide
  * wrapped versions of the local_irq_*() APIs, based on the
- * raw_local_irq_*() macros from the lowlevel headers.
+ * raw_local_irq_*() functions from the lowlevel headers.
  */
 #ifndef _ASM_IRQFLAGS_H
 #define _ASM_IRQFLAGS_H
 
-#define raw_local_save_flags(x)	do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0)
-#define raw_local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0)
-#define raw_local_irq_disable()	__asm__ __volatile__("cli": : :"memory")
-#define raw_local_irq_enable()	__asm__ __volatile__("sti": : :"memory")
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define raw_safe_halt()		__asm__ __volatile__("sti; hlt": : :"memory")
-/* used when interrupts are already enabled or to shutdown the processor */
-#define halt()			__asm__ __volatile__("hlt": : :"memory")
+#ifndef __ASSEMBLY__
 
-#define raw_irqs_disabled_flags(flags)	(!((flags) & (1<<9)))
+static inline unsigned long __raw_local_save_flags(void)
+{
+	unsigned long flags;
 
-/* For spinlocks etc */
-#define raw_local_irq_save(x)	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+	__asm__ __volatile__(
+		"pushfl ; popl %0"
+		: "=g" (flags)
+		: /* no input */
+	);
+
+	return flags;
+}
+
+#define raw_local_save_flags(flags) \
+		do { (flags) = __raw_local_save_flags(); } while (0)
+
+static inline void raw_local_irq_restore(unsigned long flags)
+{
+	__asm__ __volatile__(
+		"pushl %0 ; popfl"
+		: /* no output */
+		:"g" (flags)
+		:"memory", "cc"
+	);
+}
+
+static inline void raw_local_irq_disable(void)
+{
+	__asm__ __volatile__("cli" : : : "memory");
+}
+
+static inline void raw_local_irq_enable(void)
+{
+	__asm__ __volatile__("sti" : : : "memory");
+}
+
+/*
+ * Used in the idle loop; sti takes one instruction cycle
+ * to complete:
+ */
+static inline void raw_safe_halt(void)
+{
+	__asm__ __volatile__("sti; hlt" : : : "memory");
+}
+
+/*
+ * Used when interrupts are already enabled or to
+ * shutdown the processor:
+ */
+static inline void halt(void)
+{
+	__asm__ __volatile__("hlt": : :"memory");
+}
+
+static inline int raw_irqs_disabled_flags(unsigned long flags)
+{
+	return !(flags & (1 << 9));
+}
+
+static inline int raw_irqs_disabled(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	return raw_irqs_disabled_flags(flags);
+}
+
+/*
+ * For spinlocks, etc:
+ */
+static inline unsigned long __raw_local_irq_save(void)
+{
+	unsigned long flags = __raw_local_save_flags();
+
+	raw_local_irq_disable();
+
+	return flags;
+}
+
+#define raw_local_irq_save(flags) \
+		do { (flags) = __raw_local_irq_save(); } while (0)
+
+#endif /* __ASSEMBLY__ */
 
 /*
  * Do the CPU's IRQ-state tracing from assembly code. We call a

commit 55f327fa9e876758491a82af7491104f1cc3fc4d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jul 3 00:24:43 2006 -0700

    [PATCH] lockdep: irqtrace subsystem, i386 support
    
    Add irqflags-tracing support to i386.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug
index b20ddd04f3d8..b31c0802e1cc 100644
--- a/arch/i386/Kconfig.debug
+++ b/arch/i386/Kconfig.debug
@@ -1,5 +1,9 @@
 menu "Kernel hacking"
 
+config TRACE_IRQFLAGS_SUPPORT
+	bool
+	default y
+
 source "lib/Kconfig.debug"
 
 config EARLY_PRINTK
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 787190c45fdb..d9a260f2efb4 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -42,6 +42,7 @@
 
 #include <linux/linkage.h>
 #include <asm/thread_info.h>
+#include <asm/irqflags.h>
 #include <asm/errno.h>
 #include <asm/segment.h>
 #include <asm/smp.h>
@@ -76,12 +77,21 @@ NT_MASK		= 0x00004000
 VM_MASK		= 0x00020000
 
 #ifdef CONFIG_PREEMPT
-#define preempt_stop		cli
+#define preempt_stop		cli; TRACE_IRQS_OFF
 #else
 #define preempt_stop
 #define resume_kernel		restore_nocheck
 #endif
 
+.macro TRACE_IRQS_IRET
+#ifdef CONFIG_TRACE_IRQFLAGS
+	testl $IF_MASK,EFLAGS(%esp)     # interrupts off?
+	jz 1f
+	TRACE_IRQS_ON
+1:
+#endif
+.endm
+
 #ifdef CONFIG_VM86
 #define resume_userspace_sig	check_userspace
 #else
@@ -257,6 +267,10 @@ ENTRY(sysenter_entry)
 	CFI_REGISTER esp, ebp
 	movl TSS_sysenter_esp0(%esp),%esp
 sysenter_past_esp:
+	/*
+	 * No need to follow this irqs on/off section: the syscall
+	 * disabled irqs and here we enable it straight after entry:
+	 */
 	sti
 	pushl $(__USER_DS)
 	CFI_ADJUST_CFA_OFFSET 4
@@ -303,6 +317,7 @@ sysenter_past_esp:
 	call *sys_call_table(,%eax,4)
 	movl %eax,EAX(%esp)
 	cli
+	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx
 	jne syscall_exit_work
@@ -310,6 +325,7 @@ sysenter_past_esp:
 	movl EIP(%esp), %edx
 	movl OLDESP(%esp), %ecx
 	xorl %ebp,%ebp
+	TRACE_IRQS_ON
 	sti
 	sysexit
 	CFI_ENDPROC
@@ -339,6 +355,7 @@ syscall_exit:
 	cli				# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
+	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx	# current->work
 	jne syscall_exit_work
@@ -355,12 +372,15 @@ restore_all:
 	CFI_REMEMBER_STATE
 	je ldt_ss			# returning to user-space with LDT SS
 restore_nocheck:
+	TRACE_IRQS_IRET
+restore_nocheck_notrace:
 	RESTORE_REGS
 	addl $4, %esp
 	CFI_ADJUST_CFA_OFFSET -4
 1:	iret
 .section .fixup,"ax"
 iret_exc:
+	TRACE_IRQS_ON
 	sti
 	pushl $0			# no error code
 	pushl $do_iret_error
@@ -386,11 +406,13 @@ ldt_ss:
 	subl $8, %esp		# reserve space for switch16 pointer
 	CFI_ADJUST_CFA_OFFSET 8
 	cli
+	TRACE_IRQS_OFF
 	movl %esp, %eax
 	/* Set up the 16bit stack frame with switch32 pointer on top,
 	 * and a switch16 pointer on top of the current frame. */
 	call setup_x86_bogus_stack
 	CFI_ADJUST_CFA_OFFSET -8	# frame has moved
+	TRACE_IRQS_IRET
 	RESTORE_REGS
 	lss 20+4(%esp), %esp	# switch to 16bit stack
 1:	iret
@@ -411,6 +433,7 @@ work_resched:
 	cli				# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# between sampling and the iret
+	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
 					# than syscall tracing?
@@ -462,6 +485,7 @@ syscall_trace_entry:
 syscall_exit_work:
 	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
 	jz work_pending
+	TRACE_IRQS_ON
 	sti				# could let do_syscall_trace() call
 					# schedule() instead
 	movl %esp, %eax
@@ -535,9 +559,14 @@ ENTRY(irq_entries_start)
 vector=vector+1
 .endr
 
+/*
+ * the CPU automatically disables interrupts when executing an IRQ vector,
+ * so IRQ-flags tracing has to follow that:
+ */
 	ALIGN
 common_interrupt:
 	SAVE_ALL
+	TRACE_IRQS_OFF
 	movl %esp,%eax
 	call do_IRQ
 	jmp ret_from_intr
@@ -549,9 +578,10 @@ ENTRY(name)				\
 	pushl $~(nr);			\
 	CFI_ADJUST_CFA_OFFSET 4;	\
 	SAVE_ALL;			\
+	TRACE_IRQS_OFF			\
 	movl %esp,%eax;			\
 	call smp_/**/name;		\
-	jmp ret_from_intr;	\
+	jmp ret_from_intr;		\
 	CFI_ENDPROC
 
 /* The include is where all of the SMP etc. interrupts come from */
@@ -726,7 +756,7 @@ nmi_stack_correct:
 	xorl %edx,%edx		# zero error code
 	movl %esp,%eax		# pt_regs pointer
 	call do_nmi
-	jmp restore_all
+	jmp restore_nocheck_notrace
 	CFI_ENDPROC
 
 nmi_stack_fixup:
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 16b491703967..6cb529f60dcc 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -166,7 +166,7 @@ void irq_ctx_init(int cpu)
 	irqctx->tinfo.task              = NULL;
 	irqctx->tinfo.exec_domain       = NULL;
 	irqctx->tinfo.cpu               = cpu;
-	irqctx->tinfo.preempt_count     = SOFTIRQ_OFFSET;
+	irqctx->tinfo.preempt_count     = 0;
 	irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
 
 	softirq_ctx[cpu] = irqctx;
@@ -211,6 +211,10 @@ asmlinkage void do_softirq(void)
 			: "0"(isp)
 			: "memory", "cc", "edx", "ecx", "eax"
 		);
+		/*
+		 * Shouldnt happen, we returned above if in_interrupt():
+	 	 */
+		WARN_ON_ONCE(softirq_count());
 	}
 
 	local_irq_restore(flags);
diff --git a/include/asm-i386/irqflags.h b/include/asm-i386/irqflags.h
new file mode 100644
index 000000000000..ca777e894c92
--- /dev/null
+++ b/include/asm-i386/irqflags.h
@@ -0,0 +1,56 @@
+/*
+ * include/asm-i386/irqflags.h
+ *
+ * IRQ flags handling
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() macros from the lowlevel headers.
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#define raw_local_save_flags(x)	do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0)
+#define raw_local_irq_restore(x) do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0)
+#define raw_local_irq_disable()	__asm__ __volatile__("cli": : :"memory")
+#define raw_local_irq_enable()	__asm__ __volatile__("sti": : :"memory")
+/* used in the idle loop; sti takes one instruction cycle to complete */
+#define raw_safe_halt()		__asm__ __volatile__("sti; hlt": : :"memory")
+/* used when interrupts are already enabled or to shutdown the processor */
+#define halt()			__asm__ __volatile__("hlt": : :"memory")
+
+#define raw_irqs_disabled_flags(flags)	(!((flags) & (1<<9)))
+
+/* For spinlocks etc */
+#define raw_local_irq_save(x)	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+
+/*
+ * Do the CPU's IRQ-state tracing from assembly code. We call a
+ * C function, so save all the C-clobbered registers:
+ */
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+# define TRACE_IRQS_ON				\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call trace_hardirqs_on;			\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+# define TRACE_IRQS_OFF				\
+	pushl %eax;				\
+	pushl %ecx;				\
+	pushl %edx;				\
+	call trace_hardirqs_off;		\
+	popl %edx;				\
+	popl %ecx;				\
+	popl %eax;
+
+#else
+# define TRACE_IRQS_ON
+# define TRACE_IRQS_OFF
+#endif
+
+#endif
diff --git a/include/asm-i386/spinlock.h b/include/asm-i386/spinlock.h
index 04ba30234c48..7e29b51bcaa0 100644
--- a/include/asm-i386/spinlock.h
+++ b/include/asm-i386/spinlock.h
@@ -31,6 +31,11 @@
 	"jmp 1b\n" \
 	"3:\n\t"
 
+/*
+ * NOTE: there's an irqs-on section here, which normally would have to be
+ * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use
+ * __raw_spin_lock_string_flags().
+ */
 #define __raw_spin_lock_string_flags \
 	"\n1:\t" \
 	"lock ; decb %0\n\t" \
diff --git a/include/asm-i386/system.h b/include/asm-i386/system.h
index cab0180567f9..db398d88b1d9 100644
--- a/include/asm-i386/system.h
+++ b/include/asm-i386/system.h
@@ -456,25 +456,7 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long l
 
 #define set_wmb(var, value) do { var = value; wmb(); } while (0)
 
-/* interrupt control.. */
-#define local_save_flags(x)	do { typecheck(unsigned long,x); __asm__ __volatile__("pushfl ; popl %0":"=g" (x): /* no input */); } while (0)
-#define local_irq_restore(x) 	do { typecheck(unsigned long,x); __asm__ __volatile__("pushl %0 ; popfl": /* no output */ :"g" (x):"memory", "cc"); } while (0)
-#define local_irq_disable() 	__asm__ __volatile__("cli": : :"memory")
-#define local_irq_enable()	__asm__ __volatile__("sti": : :"memory")
-/* used in the idle loop; sti takes one instruction cycle to complete */
-#define safe_halt()		__asm__ __volatile__("sti; hlt": : :"memory")
-/* used when interrupts are already enabled or to shutdown the processor */
-#define halt()			__asm__ __volatile__("hlt": : :"memory")
-
-#define irqs_disabled()			\
-({					\
-	unsigned long flags;		\
-	local_save_flags(flags);	\
-	!(flags & (1<<9));		\
-})
-
-/* For spinlocks etc */
-#define local_irq_save(x)	__asm__ __volatile__("pushfl ; popl %0 ; cli":"=g" (x): /* no input */ :"memory")
+#include <linux/irqflags.h>
 
 /*
  * disable hlt during certain critical i/o operations

commit 55df314fbdb44c20fa7a5112d16546ee970c1d76
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jul 3 00:24:43 2006 -0700

    [PATCH] lockdep: irqtrace subsystem, docs
    
    Add Documentation/irqflags-tracing.txt.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/Documentation/irqflags-tracing.txt b/Documentation/irqflags-tracing.txt
new file mode 100644
index 000000000000..6a444877ee0b
--- /dev/null
+++ b/Documentation/irqflags-tracing.txt
@@ -0,0 +1,57 @@
+IRQ-flags state tracing
+
+started by Ingo Molnar <mingo@redhat.com>
+
+the "irq-flags tracing" feature "traces" hardirq and softirq state, in
+that it gives interested subsystems an opportunity to be notified of
+every hardirqs-off/hardirqs-on, softirqs-off/softirqs-on event that
+happens in the kernel.
+
+CONFIG_TRACE_IRQFLAGS_SUPPORT is needed for CONFIG_PROVE_SPIN_LOCKING
+and CONFIG_PROVE_RW_LOCKING to be offered by the generic lock debugging
+code. Otherwise only CONFIG_PROVE_MUTEX_LOCKING and
+CONFIG_PROVE_RWSEM_LOCKING will be offered on an architecture - these
+are locking APIs that are not used in IRQ context. (the one exception
+for rwsems is worked around)
+
+architecture support for this is certainly not in the "trivial"
+category, because lots of lowlevel assembly code deal with irq-flags
+state changes. But an architecture can be irq-flags-tracing enabled in a
+rather straightforward and risk-free manner.
+
+Architectures that want to support this need to do a couple of
+code-organizational changes first:
+
+- move their irq-flags manipulation code from their asm/system.h header
+  to asm/irqflags.h
+
+- rename local_irq_disable()/etc to raw_local_irq_disable()/etc. so that
+  the linux/irqflags.h code can inject callbacks and can construct the
+  real local_irq_disable()/etc APIs.
+
+- add and enable TRACE_IRQFLAGS_SUPPORT in their arch level Kconfig file
+
+and then a couple of functional changes are needed as well to implement
+irq-flags-tracing support:
+
+- in lowlevel entry code add (build-conditional) calls to the
+  trace_hardirqs_off()/trace_hardirqs_on() functions. The lock validator
+  closely guards whether the 'real' irq-flags matches the 'virtual'
+  irq-flags state, and complains loudly (and turns itself off) if the
+  two do not match. Usually most of the time for arch support for
+  irq-flags-tracing is spent in this state: look at the lockdep
+  complaint, try to figure out the assembly code we did not cover yet,
+  fix and repeat. Once the system has booted up and works without a
+  lockdep complaint in the irq-flags-tracing functions arch support is
+  complete.
+- if the architecture has non-maskable interrupts then those need to be
+  excluded from the irq-tracing [and lock validation] mechanism via
+  lockdep_off()/lockdep_on().
+
+in general there is no risk from having an incomplete irq-flags-tracing
+implementation in an architecture: lockdep will detect that and will
+turn itself off. I.e. the lock validator will still be reliable. There
+should be no crashes due to irq-tracing bugs. (except if the assembly
+changes break other code by modifying conditions or registers that
+shouldnt be)
+

commit de30a2b355ea85350ca2f58f3b9bf4e5bc007986
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jul 3 00:24:42 2006 -0700

    [PATCH] lockdep: irqtrace subsystem, core
    
    Accurate hard-IRQ-flags and softirq-flags state tracing.
    
    This allows us to attach extra functionality to IRQ flags on/off
    events (such as trace-on/off).
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 525baab45d2d..027728b95429 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -429,7 +429,7 @@ void do_softirq(void)
 		local_bh_disable();
 		do_softirq_onstack();
 		account_system_vtime(current);
-		__local_bh_enable();
+		_local_bh_enable();
 	}
 
 	local_irq_restore(flags);
diff --git a/include/asm-powerpc/irqflags.h b/include/asm-powerpc/irqflags.h
new file mode 100644
index 000000000000..7970cbaeaa54
--- /dev/null
+++ b/include/asm-powerpc/irqflags.h
@@ -0,0 +1,31 @@
+/*
+ * include/asm-powerpc/irqflags.h
+ *
+ * IRQ flags handling
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() macros from the lowlevel headers.
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+/*
+ * Get definitions for raw_local_save_flags(x), etc.
+ */
+#include <asm-powerpc/hw_irq.h>
+
+/*
+ * Do the CPU's IRQ-state tracing from assembly code. We call a
+ * C function, so save all the C-clobbered registers:
+ */
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+#error No support on PowerPC yet for CONFIG_TRACE_IRQFLAGS
+
+#else
+# define TRACE_IRQS_ON
+# define TRACE_IRQS_OFF
+#endif
+
+#endif
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 114ae583cca9..b1d4332b5cf0 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -86,9 +86,6 @@ extern void synchronize_irq(unsigned int irq);
 # define synchronize_irq(irq)	barrier()
 #endif
 
-#define nmi_enter()		irq_enter()
-#define nmi_exit()		sub_preempt_count(HARDIRQ_OFFSET)
-
 struct task_struct;
 
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
@@ -97,12 +94,35 @@ static inline void account_system_vtime(struct task_struct *tsk)
 }
 #endif
 
+/*
+ * It is safe to do non-atomic ops on ->hardirq_context,
+ * because NMI handlers may not preempt and the ops are
+ * always balanced, so the interrupted value of ->hardirq_context
+ * will always be restored.
+ */
 #define irq_enter()					\
 	do {						\
 		account_system_vtime(current);		\
 		add_preempt_count(HARDIRQ_OFFSET);	\
+		trace_hardirq_enter();			\
+	} while (0)
+
+/*
+ * Exit irq context without processing softirqs:
+ */
+#define __irq_exit()					\
+	do {						\
+		trace_hardirq_exit();			\
+		account_system_vtime(current);		\
+		sub_preempt_count(HARDIRQ_OFFSET);	\
 	} while (0)
 
+/*
+ * Exit irq context and process softirqs if needed:
+ */
 extern void irq_exit(void);
 
+#define nmi_enter()		irq_enter()
+#define nmi_exit()		__irq_exit()
+
 #endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 1b7bb37624bb..444a3ae0de2a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -3,6 +3,7 @@
 
 #include <linux/file.h>
 #include <linux/rcupdate.h>
+#include <linux/irqflags.h>
 
 #define INIT_FDTABLE \
 {							\
@@ -124,6 +125,7 @@ extern struct group_info init_groups;
 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
 	.fs_excl	= ATOMIC_INIT(0),				\
 	.pi_lock	= SPIN_LOCK_UNLOCKED,				\
+	INIT_TRACE_IRQFLAGS						\
 }
 
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 73463fbb38e4..d5afee95fd43 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -10,6 +10,7 @@
 #include <linux/irqreturn.h>
 #include <linux/hardirq.h>
 #include <linux/sched.h>
+#include <linux/irqflags.h>
 #include <asm/atomic.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
@@ -199,13 +200,11 @@ static inline void __deprecated save_and_cli(unsigned long *x)
 #define save_and_cli(x)	save_and_cli(&x)
 #endif /* CONFIG_SMP */
 
-/* SoftIRQ primitives.  */
-#define local_bh_disable() \
-		do { add_preempt_count(SOFTIRQ_OFFSET); barrier(); } while (0)
-#define __local_bh_enable() \
-		do { barrier(); sub_preempt_count(SOFTIRQ_OFFSET); } while (0)
-
+extern void local_bh_disable(void);
+extern void __local_bh_enable(void);
+extern void _local_bh_enable(void);
 extern void local_bh_enable(void);
+extern void local_bh_enable_ip(unsigned long ip);
 
 /* PLEASE, avoid to allocate new softirqs, if you need not _really_ high
    frequency threaded job scheduling. For almost all the purposes
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
new file mode 100644
index 000000000000..412e025bc5c7
--- /dev/null
+++ b/include/linux/irqflags.h
@@ -0,0 +1,96 @@
+/*
+ * include/linux/irqflags.h
+ *
+ * IRQ flags tracing: follow the state of the hardirq and softirq flags and
+ * provide callbacks for transitions between ON and OFF states.
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() macros from the lowlevel headers.
+ */
+#ifndef _LINUX_TRACE_IRQFLAGS_H
+#define _LINUX_TRACE_IRQFLAGS_H
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+  extern void trace_hardirqs_on(void);
+  extern void trace_hardirqs_off(void);
+  extern void trace_softirqs_on(unsigned long ip);
+  extern void trace_softirqs_off(unsigned long ip);
+# define trace_hardirq_context(p)	((p)->hardirq_context)
+# define trace_softirq_context(p)	((p)->softirq_context)
+# define trace_hardirqs_enabled(p)	((p)->hardirqs_enabled)
+# define trace_softirqs_enabled(p)	((p)->softirqs_enabled)
+# define trace_hardirq_enter()	do { current->hardirq_context++; } while (0)
+# define trace_hardirq_exit()	do { current->hardirq_context--; } while (0)
+# define trace_softirq_enter()	do { current->softirq_context++; } while (0)
+# define trace_softirq_exit()	do { current->softirq_context--; } while (0)
+# define INIT_TRACE_IRQFLAGS	.softirqs_enabled = 1,
+#else
+# define trace_hardirqs_on()		do { } while (0)
+# define trace_hardirqs_off()		do { } while (0)
+# define trace_softirqs_on(ip)		do { } while (0)
+# define trace_softirqs_off(ip)		do { } while (0)
+# define trace_hardirq_context(p)	0
+# define trace_softirq_context(p)	0
+# define trace_hardirqs_enabled(p)	0
+# define trace_softirqs_enabled(p)	0
+# define trace_hardirq_enter()		do { } while (0)
+# define trace_hardirq_exit()		do { } while (0)
+# define trace_softirq_enter()		do { } while (0)
+# define trace_softirq_exit()		do { } while (0)
+# define INIT_TRACE_IRQFLAGS
+#endif
+
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+
+#include <asm/irqflags.h>
+
+#define local_irq_enable() \
+	do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0)
+#define local_irq_disable() \
+	do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0)
+#define local_irq_save(flags) \
+	do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0)
+
+#define local_irq_restore(flags)				\
+	do {							\
+		if (raw_irqs_disabled_flags(flags)) {		\
+			raw_local_irq_restore(flags);		\
+			trace_hardirqs_off();			\
+		} else {					\
+			trace_hardirqs_on();			\
+			raw_local_irq_restore(flags);		\
+		}						\
+	} while (0)
+#else /* !CONFIG_TRACE_IRQFLAGS_SUPPORT */
+/*
+ * The local_irq_*() APIs are equal to the raw_local_irq*()
+ * if !TRACE_IRQFLAGS.
+ */
+# define raw_local_irq_disable()	local_irq_disable()
+# define raw_local_irq_enable()		local_irq_enable()
+# define raw_local_irq_save(flags)	local_irq_save(flags)
+# define raw_local_irq_restore(flags)	local_irq_restore(flags)
+#endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */
+
+#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+#define safe_halt()						\
+	do {							\
+		trace_hardirqs_on();				\
+		raw_safe_halt();				\
+	} while (0)
+
+#define local_save_flags(flags)		raw_local_save_flags(flags)
+
+#define irqs_disabled()						\
+({								\
+	unsigned long flags;					\
+								\
+	raw_local_save_flags(flags);				\
+	raw_irqs_disabled_flags(flags);				\
+})
+
+#define irqs_disabled_flags(flags)	raw_irqs_disabled_flags(flags)
+#endif		/* CONFIG_X86 */
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index bdabeee10a78..ad7a89014d29 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -871,6 +871,21 @@ struct task_struct {
 	/* mutex deadlock detection */
 	struct mutex_waiter *blocked_on;
 #endif
+#ifdef CONFIG_TRACE_IRQFLAGS
+	unsigned int irq_events;
+	int hardirqs_enabled;
+	unsigned long hardirq_enable_ip;
+	unsigned int hardirq_enable_event;
+	unsigned long hardirq_disable_ip;
+	unsigned int hardirq_disable_event;
+	int softirqs_enabled;
+	unsigned long softirq_disable_ip;
+	unsigned int softirq_disable_event;
+	unsigned long softirq_enable_ip;
+	unsigned int softirq_enable_event;
+	int hardirq_context;
+	int softirq_context;
+#endif
 
 /* journalling filesystem info */
 	void *journal_info;
diff --git a/kernel/fork.c b/kernel/fork.c
index 1cd46a4fb0d3..b7db7fb74f53 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -968,6 +968,10 @@ static task_t *copy_process(unsigned long clone_flags,
 	if (!p)
 		goto fork_out;
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
+	DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
+#endif
 	retval = -EAGAIN;
 	if (atomic_read(&p->user->processes) >=
 			p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
@@ -1042,6 +1046,21 @@ static task_t *copy_process(unsigned long clone_flags,
  	}
 	mpol_fix_fork_child_flag(p);
 #endif
+#ifdef CONFIG_TRACE_IRQFLAGS
+	p->irq_events = 0;
+	p->hardirqs_enabled = 0;
+	p->hardirq_enable_ip = 0;
+	p->hardirq_enable_event = 0;
+	p->hardirq_disable_ip = _THIS_IP_;
+	p->hardirq_disable_event = 0;
+	p->softirqs_enabled = 1;
+	p->softirq_enable_ip = _THIS_IP_;
+	p->softirq_enable_event = 0;
+	p->softirq_disable_ip = 0;
+	p->softirq_disable_event = 0;
+	p->hardirq_context = 0;
+	p->softirq_context = 0;
+#endif
 
 	rt_mutex_init_task(p);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 48c1faa60a67..911829966534 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4462,7 +4462,9 @@ int __sched cond_resched_softirq(void)
 	BUG_ON(!in_softirq());
 
 	if (need_resched() && __resched_legal()) {
-		__local_bh_enable();
+		raw_local_irq_disable();
+		_local_bh_enable();
+		raw_local_irq_enable();
 		__cond_resched();
 		local_bh_disable();
 		return 1;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8f03e3b89b55..584609b6a66e 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -61,6 +61,119 @@ static inline void wakeup_softirqd(void)
 		wake_up_process(tsk);
 }
 
+/*
+ * This one is for softirq.c-internal use,
+ * where hardirqs are disabled legitimately:
+ */
+static void __local_bh_disable(unsigned long ip)
+{
+	unsigned long flags;
+
+	WARN_ON_ONCE(in_irq());
+
+	raw_local_irq_save(flags);
+	add_preempt_count(SOFTIRQ_OFFSET);
+	/*
+	 * Were softirqs turned off above:
+	 */
+	if (softirq_count() == SOFTIRQ_OFFSET)
+		trace_softirqs_off(ip);
+	raw_local_irq_restore(flags);
+}
+
+void local_bh_disable(void)
+{
+	__local_bh_disable((unsigned long)__builtin_return_address(0));
+}
+
+EXPORT_SYMBOL(local_bh_disable);
+
+void __local_bh_enable(void)
+{
+	WARN_ON_ONCE(in_irq());
+
+	/*
+	 * softirqs should never be enabled by __local_bh_enable(),
+	 * it always nests inside local_bh_enable() sections:
+	 */
+	WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
+
+	sub_preempt_count(SOFTIRQ_OFFSET);
+}
+EXPORT_SYMBOL_GPL(__local_bh_enable);
+
+/*
+ * Special-case - softirqs can safely be enabled in
+ * cond_resched_softirq(), or by __do_softirq(),
+ * without processing still-pending softirqs:
+ */
+void _local_bh_enable(void)
+{
+	WARN_ON_ONCE(in_irq());
+	WARN_ON_ONCE(!irqs_disabled());
+
+	if (softirq_count() == SOFTIRQ_OFFSET)
+		trace_softirqs_on((unsigned long)__builtin_return_address(0));
+	sub_preempt_count(SOFTIRQ_OFFSET);
+}
+
+EXPORT_SYMBOL(_local_bh_enable);
+
+void local_bh_enable(void)
+{
+	unsigned long flags;
+
+	WARN_ON_ONCE(in_irq());
+	WARN_ON_ONCE(irqs_disabled());
+
+	local_irq_save(flags);
+	/*
+	 * Are softirqs going to be turned on now:
+	 */
+	if (softirq_count() == SOFTIRQ_OFFSET)
+		trace_softirqs_on((unsigned long)__builtin_return_address(0));
+	/*
+	 * Keep preemption disabled until we are done with
+	 * softirq processing:
+ 	 */
+ 	sub_preempt_count(SOFTIRQ_OFFSET - 1);
+
+	if (unlikely(!in_interrupt() && local_softirq_pending()))
+		do_softirq();
+
+	dec_preempt_count();
+	local_irq_restore(flags);
+	preempt_check_resched();
+}
+EXPORT_SYMBOL(local_bh_enable);
+
+void local_bh_enable_ip(unsigned long ip)
+{
+	unsigned long flags;
+
+	WARN_ON_ONCE(in_irq());
+
+	local_irq_save(flags);
+	/*
+	 * Are softirqs going to be turned on now:
+	 */
+	if (softirq_count() == SOFTIRQ_OFFSET)
+		trace_softirqs_on(ip);
+	/*
+	 * Keep preemption disabled until we are done with
+	 * softirq processing:
+ 	 */
+ 	sub_preempt_count(SOFTIRQ_OFFSET - 1);
+
+	if (unlikely(!in_interrupt() && local_softirq_pending()))
+		do_softirq();
+
+	dec_preempt_count();
+	local_irq_restore(flags);
+	preempt_check_resched();
+}
+EXPORT_SYMBOL(local_bh_enable_ip);
+
 /*
  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
  * and we fall back to softirqd after that.
@@ -80,8 +193,9 @@ asmlinkage void __do_softirq(void)
 	int cpu;
 
 	pending = local_softirq_pending();
+	__local_bh_disable((unsigned long)__builtin_return_address(0));
+	trace_softirq_enter();
 
-	local_bh_disable();
 	cpu = smp_processor_id();
 restart:
 	/* Reset the pending bitmask before enabling irqs */
@@ -109,7 +223,8 @@ asmlinkage void __do_softirq(void)
 	if (pending)
 		wakeup_softirqd();
 
-	__local_bh_enable();
+	trace_softirq_exit();
+	_local_bh_enable();
 }
 
 #ifndef __ARCH_HAS_DO_SOFTIRQ
@@ -136,23 +251,6 @@ EXPORT_SYMBOL(do_softirq);
 
 #endif
 
-void local_bh_enable(void)
-{
-	WARN_ON(irqs_disabled());
-	/*
-	 * Keep preemption disabled until we are done with
-	 * softirq processing:
- 	 */
- 	sub_preempt_count(SOFTIRQ_OFFSET - 1);
-
-	if (unlikely(!in_interrupt() && local_softirq_pending()))
-		do_softirq();
-
-	dec_preempt_count();
-	preempt_check_resched();
-}
-EXPORT_SYMBOL(local_bh_enable);
-
 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
 # define invoke_softirq()	__do_softirq()
 #else
@@ -165,6 +263,7 @@ EXPORT_SYMBOL(local_bh_enable);
 void irq_exit(void)
 {
 	account_system_vtime(current);
+	trace_hardirq_exit();
 	sub_preempt_count(IRQ_EXIT_OFFSET);
 	if (!in_interrupt() && local_softirq_pending())
 		invoke_softirq();

commit 21b32bbff950771f196da91011249fa05fa83b32
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jul 3 00:24:40 2006 -0700

    [PATCH] lockdep: stacktrace subsystem, x86_64 support
    
    Framework to generate and save stacktraces quickly, without printing anything
    to the console.  x86_64 support.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
    Cc: Andi Kleen <ak@muc.de>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index e856804c447f..445f436688f3 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -24,6 +24,10 @@ config X86
 	bool
 	default y
 
+config STACKTRACE_SUPPORT
+	bool
+	default y
+
 config SEMAPHORE_SLEEPERS
 	bool
 	default y
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 819e84ec5b64..b5aaeafc1cd3 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y	:= process.o signal.o entry.o traps.o irq.o \
 		setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
 		pci-dma.o pci-nommu.o alternative.o
 
+obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_X86_MCE)         += mce.o
 obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
 obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd.o
diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c
new file mode 100644
index 000000000000..32cf55eb9af8
--- /dev/null
+++ b/arch/x86_64/kernel/stacktrace.c
@@ -0,0 +1,221 @@
+/*
+ * arch/x86_64/kernel/stacktrace.c
+ *
+ * Stack trace management functions
+ *
+ *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+
+#include <asm/smp.h>
+
+static inline int
+in_range(unsigned long start, unsigned long addr, unsigned long end)
+{
+	return addr >= start && addr <= end;
+}
+
+static unsigned long
+get_stack_end(struct task_struct *task, unsigned long stack)
+{
+	unsigned long stack_start, stack_end, flags;
+	int i, cpu;
+
+	/*
+	 * The most common case is that we are in the task stack:
+	 */
+	stack_start = (unsigned long)task->thread_info;
+	stack_end = stack_start + THREAD_SIZE;
+
+	if (in_range(stack_start, stack, stack_end))
+		return stack_end;
+
+	/*
+	 * We are in an interrupt if irqstackptr is set:
+	 */
+	raw_local_irq_save(flags);
+	cpu = safe_smp_processor_id();
+	stack_end = (unsigned long)cpu_pda(cpu)->irqstackptr;
+
+	if (stack_end) {
+		stack_start = stack_end & ~(IRQSTACKSIZE-1);
+		if (in_range(stack_start, stack, stack_end))
+			goto out_restore;
+		/*
+		 * We get here if we are in an IRQ context but we
+		 * are also in an exception stack.
+		 */
+	}
+
+	/*
+	 * Iterate over all exception stacks, and figure out whether
+	 * 'stack' is in one of them:
+	 */
+	for (i = 0; i < N_EXCEPTION_STACKS; i++) {
+		/*
+		 * set 'end' to the end of the exception stack.
+		 */
+		stack_end = per_cpu(init_tss, cpu).ist[i];
+		stack_start = stack_end - EXCEPTION_STKSZ;
+
+		/*
+		 * Is 'stack' above this exception frame's end?
+		 * If yes then skip to the next frame.
+		 */
+		if (stack >= stack_end)
+			continue;
+		/*
+		 * Is 'stack' above this exception frame's start address?
+		 * If yes then we found the right frame.
+		 */
+		if (stack >= stack_start)
+			goto out_restore;
+
+		/*
+		 * If this is a debug stack, and if it has a larger size than
+		 * the usual exception stacks, then 'stack' might still
+		 * be within the lower portion of the debug stack:
+		 */
+#if DEBUG_STKSZ > EXCEPTION_STKSZ
+		if (i == DEBUG_STACK - 1 && stack >= stack_end - DEBUG_STKSZ) {
+			/*
+			 * Black magic. A large debug stack is composed of
+			 * multiple exception stack entries, which we
+			 * iterate through now. Dont look:
+			 */
+			do {
+				stack_end -= EXCEPTION_STKSZ;
+				stack_start -= EXCEPTION_STKSZ;
+			} while (stack < stack_start);
+
+			goto out_restore;
+		}
+#endif
+	}
+	/*
+	 * Ok, 'stack' is not pointing to any of the system stacks.
+	 */
+	stack_end = 0;
+
+out_restore:
+	raw_local_irq_restore(flags);
+
+	return stack_end;
+}
+
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer:
+ */
+static inline unsigned long
+save_context_stack(struct stack_trace *trace, unsigned int skip,
+		   unsigned long stack, unsigned long stack_end)
+{
+	unsigned long addr;
+
+#ifdef CONFIG_FRAME_POINTER
+	unsigned long prev_stack = 0;
+
+	while (in_range(prev_stack, stack, stack_end)) {
+		pr_debug("stack:          %p\n", (void *)stack);
+		addr = (unsigned long)(((unsigned long *)stack)[1]);
+		pr_debug("addr:           %p\n", (void *)addr);
+		if (!skip)
+			trace->entries[trace->nr_entries++] = addr-1;
+		else
+			skip--;
+		if (trace->nr_entries >= trace->max_entries)
+			break;
+		if (!addr)
+			return 0;
+		/*
+		 * Stack frames must go forwards (otherwise a loop could
+		 * happen if the stackframe is corrupted), so we move
+		 * prev_stack forwards:
+		 */
+		prev_stack = stack;
+		stack = (unsigned long)(((unsigned long *)stack)[0]);
+	}
+	pr_debug("invalid:        %p\n", (void *)stack);
+#else
+	while (stack < stack_end) {
+		addr = ((unsigned long *)stack)[0];
+		stack += sizeof(long);
+		if (__kernel_text_address(addr)) {
+			if (!skip)
+				trace->entries[trace->nr_entries++] = addr-1;
+			else
+				skip--;
+			if (trace->nr_entries >= trace->max_entries)
+				break;
+		}
+	}
+#endif
+	return stack;
+}
+
+#define MAX_STACKS 10
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ * If all_contexts is set, all contexts (hardirq, softirq and process)
+ * are saved. If not set then only the current context is saved.
+ */
+void save_stack_trace(struct stack_trace *trace,
+		      struct task_struct *task, int all_contexts,
+		      unsigned int skip)
+{
+	unsigned long stack = (unsigned long)&stack;
+	int i, nr_stacks = 0, stacks_done[MAX_STACKS];
+
+	WARN_ON(trace->nr_entries || !trace->max_entries);
+
+	if (!task)
+		task = current;
+
+	pr_debug("task: %p, ti: %p\n", task, task->thread_info);
+
+	if (!task || task == current) {
+		/* Grab rbp right from our regs: */
+		asm ("mov %%rbp, %0" : "=r" (stack));
+		pr_debug("rbp:            %p\n", (void *)stack);
+	} else {
+		/* rbp is the last reg pushed by switch_to(): */
+		stack = task->thread.rsp;
+		pr_debug("other task rsp: %p\n", (void *)stack);
+		stack = (unsigned long)(((unsigned long *)stack)[0]);
+		pr_debug("other task rbp: %p\n", (void *)stack);
+	}
+
+	while (1) {
+		unsigned long stack_end = get_stack_end(task, stack);
+
+		pr_debug("stack:          %p\n", (void *)stack);
+		pr_debug("stack end:      %p\n", (void *)stack_end);
+
+		/*
+		 * Invalid stack addres?
+		 */
+		if (!stack_end)
+			return;
+		/*
+		 * Were we in this stack already? (recursion)
+		 */
+		for (i = 0; i < nr_stacks; i++)
+			if (stacks_done[i] == stack_end)
+				return;
+		stacks_done[nr_stacks] = stack_end;
+
+		stack = save_context_stack(trace, skip, stack, stack_end);
+		if (!all_contexts || !stack ||
+				trace->nr_entries >= trace->max_entries)
+			return;
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+		if (trace->nr_entries >= trace->max_entries)
+			return;
+		if (++nr_stacks >= MAX_STACKS)
+			return;
+	}
+}
+

commit 4a7c7197817e7180f56110334d961f4aa6ac69cb
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jul 3 00:24:39 2006 -0700

    [PATCH] lockdep: stacktrace subsystem, i386 support
    
    Framework to generate and save stacktraces quickly, without printing anything
    to the console.  i386 support.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 27d8dddbaa47..076a72c7a41f 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -18,6 +18,10 @@ config GENERIC_TIME
 	bool
 	default y
 
+config STACKTRACE_SUPPORT
+	bool
+	default y
+
 config SEMAPHORE_SLEEPERS
 	bool
 	default y
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index cbc1184e9473..1b452a1665c4 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -9,6 +9,7 @@ obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o \
 		pci-dma.o i386_ksyms.o i387.o bootflag.o \
 		quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
 
+obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
 obj-y				+= acpi/
 obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c
new file mode 100644
index 000000000000..e62a037ab399
--- /dev/null
+++ b/arch/i386/kernel/stacktrace.c
@@ -0,0 +1,98 @@
+/*
+ * arch/i386/kernel/stacktrace.c
+ *
+ * Stack trace management functions
+ *
+ *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+
+static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+{
+	return	p > (void *)tinfo &&
+		p < (void *)tinfo + THREAD_SIZE - 3;
+}
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer:
+ */
+static inline unsigned long
+save_context_stack(struct stack_trace *trace, unsigned int skip,
+		   struct thread_info *tinfo, unsigned long *stack,
+		   unsigned long ebp)
+{
+	unsigned long addr;
+
+#ifdef CONFIG_FRAME_POINTER
+	while (valid_stack_ptr(tinfo, (void *)ebp)) {
+		addr = *(unsigned long *)(ebp + 4);
+		if (!skip)
+			trace->entries[trace->nr_entries++] = addr;
+		else
+			skip--;
+		if (trace->nr_entries >= trace->max_entries)
+			break;
+		/*
+		 * break out of recursive entries (such as
+		 * end_of_stack_stop_unwind_function):
+	 	 */
+		if (ebp == *(unsigned long *)ebp)
+			break;
+
+		ebp = *(unsigned long *)ebp;
+	}
+#else
+	while (valid_stack_ptr(tinfo, stack)) {
+		addr = *stack++;
+		if (__kernel_text_address(addr)) {
+			if (!skip)
+				trace->entries[trace->nr_entries++] = addr;
+			else
+				skip--;
+			if (trace->nr_entries >= trace->max_entries)
+				break;
+		}
+	}
+#endif
+
+	return ebp;
+}
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ * If all_contexts is set, all contexts (hardirq, softirq and process)
+ * are saved. If not set then only the current context is saved.
+ */
+void save_stack_trace(struct stack_trace *trace,
+		      struct task_struct *task, int all_contexts,
+		      unsigned int skip)
+{
+	unsigned long ebp;
+	unsigned long *stack = &ebp;
+
+	WARN_ON(trace->nr_entries || !trace->max_entries);
+
+	if (!task || task == current) {
+		/* Grab ebp right from our regs: */
+		asm ("movl %%ebp, %0" : "=r" (ebp));
+	} else {
+		/* ebp is the last reg pushed by switch_to(): */
+		ebp = *(unsigned long *) task->thread.esp;
+	}
+
+	while (1) {
+		struct thread_info *context = (struct thread_info *)
+				((unsigned long)stack & (~(THREAD_SIZE - 1)));
+
+		ebp = save_context_stack(trace, skip, context, stack, ebp);
+		stack = (unsigned long *)context->previous_esp;
+		if (!all_contexts || !stack ||
+				trace->nr_entries >= trace->max_entries)
+			break;
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+		if (trace->nr_entries >= trace->max_entries)
+			break;
+	}
+}
+

commit 8637c09901049f061b94f684915d4f18ecf91d79
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jul 3 00:24:38 2006 -0700

    [PATCH] lockdep: stacktrace subsystem, core
    
    Framework to generate and save stacktraces quickly, without printing anything
    to the console.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
new file mode 100644
index 000000000000..9cc81e572224
--- /dev/null
+++ b/include/linux/stacktrace.h
@@ -0,0 +1,20 @@
+#ifndef __LINUX_STACKTRACE_H
+#define __LINUX_STACKTRACE_H
+
+#ifdef CONFIG_STACKTRACE
+struct stack_trace {
+	unsigned int nr_entries, max_entries;
+	unsigned long *entries;
+};
+
+extern void save_stack_trace(struct stack_trace *trace,
+			     struct task_struct *task, int all_contexts,
+			     unsigned int skip);
+
+extern void print_stack_trace(struct stack_trace *trace, int spaces);
+#else
+# define save_stack_trace(trace, task, all, skip)	do { } while (0)
+# define print_stack_trace(trace)			do { } while (0)
+#endif
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 82fb182f6f61..e7fd20e70f1b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,6 +10,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o
 
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
 obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
 obj-$(CONFIG_FUTEX) += futex.o
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
new file mode 100644
index 000000000000..b71816e47a30
--- /dev/null
+++ b/kernel/stacktrace.c
@@ -0,0 +1,24 @@
+/*
+ * kernel/stacktrace.c
+ *
+ * Stack trace management functions
+ *
+ *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/sched.h>
+#include <linux/kallsyms.h>
+#include <linux/stacktrace.h>
+
+void print_stack_trace(struct stack_trace *trace, int spaces)
+{
+	int i, j;
+
+	for (i = 0; i < trace->nr_entries; i++) {
+		unsigned long ip = trace->entries[i];
+
+		for (j = 0; j < spaces + 1; j++)
+			printk(" ");
+		print_ip_sym(ip);
+	}
+}
+
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7b3863d4f5e8..04e374c6fd46 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -107,7 +107,7 @@ config DEBUG_SLAB_LEAK
 
 config DEBUG_PREEMPT
 	bool "Debug preemptible kernel"
-	depends on DEBUG_KERNEL && PREEMPT
+	depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
 	default y
 	help
 	  If you say Y here then the kernel will use a debug variant of the
@@ -149,6 +149,10 @@ config DEBUG_SPINLOCK_SLEEP
 	  If you say Y here, various routines which may sleep will become very
 	  noisy if they are called with a spinlock held.
 
+config STACKTRACE
+	bool
+	depends on STACKTRACE_SUPPORT
+
 config DEBUG_KOBJECT
 	bool "kobject debugging"
 	depends on DEBUG_KERNEL