Patches contributed by Eötvös Lorand University

<<Prev 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348[349]350 351 352 353 Next>>

commit f4818900fa3ee1c56e96f6dede7cc4c05ed383d1
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jan 9 20:52:23 2006 -0800

    [PATCH] hrtimer: clean up mktime and make arguments const
    
    add 'const' to mktime arguments, and clean it up a bit
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
    Signed-off-by: Andrew Morton <akpm@osdl.org>
    Signed-off-by: Linus Torvalds <torvalds@osdl.org>

diff --git a/include/linux/time.h b/include/linux/time.h
index 9c444d9c4aa0..773b83ddd8ef 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -38,9 +38,11 @@ static __inline__ int timespec_equal(struct timespec *a, struct timespec *b)
 	return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
 } 
 
-extern unsigned long mktime (unsigned int year, unsigned int mon,
-			     unsigned int day, unsigned int hour,
-			     unsigned int min, unsigned int sec);
+extern unsigned long mktime(const unsigned int year, const unsigned int mon,
+			    const unsigned int day, const unsigned int hour,
+			    const unsigned int min, const unsigned int sec);
+
+extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
 
 extern struct timespec xtime;
 extern struct timespec wall_to_monotonic;
@@ -51,8 +53,6 @@ static inline unsigned long get_seconds(void)
 	return xtime.tv_sec;
 }
 
-extern void set_normalized_timespec (struct timespec *ts, time_t sec, long nsec);
-
 struct timespec current_kernel_time(void);
 
 #define CURRENT_TIME (current_kernel_time())
diff --git a/kernel/time.c b/kernel/time.c
index fa569885e22b..a0502aef43ce 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -599,12 +599,15 @@ EXPORT_SYMBOL_GPL(getnstimestamp);
  * will already get problems at other places on 2038-01-19 03:14:08)
  */
 unsigned long
-mktime (unsigned int year, unsigned int mon,
-	unsigned int day, unsigned int hour,
-	unsigned int min, unsigned int sec)
+mktime(const unsigned int year0, const unsigned int mon0,
+       const unsigned int day, const unsigned int hour,
+       const unsigned int min, const unsigned int sec)
 {
-	if (0 >= (int) (mon -= 2)) {	/* 1..12 -> 11,12,1..10 */
-		mon += 12;		/* Puts Feb last since it has leap day */
+	unsigned int mon = mon0, year = year0;
+
+	/* 1..12 -> 11,12,1..10 */
+	if (0 >= (int) (mon -= 2)) {
+		mon += 12;	/* Puts Feb last since it has leap day */
 		year -= 1;
 	}
 
@@ -630,7 +633,7 @@ mktime (unsigned int year, unsigned int mon,
  * 	0 <= tv_nsec < NSEC_PER_SEC
  * For negative values only the tv_sec field is negative !
  */
-void set_normalized_timespec (struct timespec *ts, time_t sec, long nsec)
+void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
 {
 	while (nsec >= NSEC_PER_SEC) {
 		nsec -= NSEC_PER_SEC;

commit 11b751ae8c8ca3fa24c85bd5a3e51dd9f95cda17
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jan 9 15:59:27 2006 -0800

    [PATCH] mutex subsystem, semaphore to completion: drivers/block/loop.c
    
    convert the block loop device from semaphores to completions.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index bed9ad76c04c..864729046e22 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -527,12 +527,12 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio)
 	lo->lo_pending++;
 	loop_add_bio(lo, old_bio);
 	spin_unlock_irq(&lo->lo_lock);
-	up(&lo->lo_bh_mutex);
+	complete(&lo->lo_bh_done);
 	return 0;
 
 out:
 	if (lo->lo_pending == 0)
-		up(&lo->lo_bh_mutex);
+		complete(&lo->lo_bh_done);
 	spin_unlock_irq(&lo->lo_lock);
 	bio_io_error(old_bio, old_bio->bi_size);
 	return 0;
@@ -593,23 +593,20 @@ static int loop_thread(void *data)
 	lo->lo_pending = 1;
 
 	/*
-	 * up sem, we are running
+	 * complete it, we are running
 	 */
-	up(&lo->lo_sem);
+	complete(&lo->lo_done);
 
 	for (;;) {
 		int pending;
 
-		/*
-		 * interruptible just to not contribute to load avg
-		 */
-		if (down_interruptible(&lo->lo_bh_mutex))
+		if (wait_for_completion_interruptible(&lo->lo_bh_done))
 			continue;
 
 		spin_lock_irq(&lo->lo_lock);
 
 		/*
-		 * could be upped because of tear-down, not pending work
+		 * could be completed because of tear-down, not pending work
 		 */
 		if (unlikely(!lo->lo_pending)) {
 			spin_unlock_irq(&lo->lo_lock);
@@ -632,7 +629,7 @@ static int loop_thread(void *data)
 			break;
 	}
 
-	up(&lo->lo_sem);
+	complete(&lo->lo_done);
 	return 0;
 }
 
@@ -843,7 +840,7 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
 	set_blocksize(bdev, lo_blocksize);
 
 	kernel_thread(loop_thread, lo, CLONE_KERNEL);
-	down(&lo->lo_sem);
+	wait_for_completion(&lo->lo_done);
 	return 0;
 
  out_putf:
@@ -909,10 +906,10 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
 	lo->lo_state = Lo_rundown;
 	lo->lo_pending--;
 	if (!lo->lo_pending)
-		up(&lo->lo_bh_mutex);
+		complete(&lo->lo_bh_done);
 	spin_unlock_irq(&lo->lo_lock);
 
-	down(&lo->lo_sem);
+	wait_for_completion(&lo->lo_done);
 
 	lo->lo_backing_file = NULL;
 
@@ -1289,8 +1286,8 @@ static int __init loop_init(void)
 		if (!lo->lo_queue)
 			goto out_mem4;
 		init_MUTEX(&lo->lo_ctl_mutex);
-		init_MUTEX_LOCKED(&lo->lo_sem);
-		init_MUTEX_LOCKED(&lo->lo_bh_mutex);
+		init_completion(&lo->lo_done);
+		init_completion(&lo->lo_bh_done);
 		lo->lo_number = i;
 		spin_lock_init(&lo->lo_lock);
 		disk->major = LOOP_MAJOR;
diff --git a/include/linux/loop.h b/include/linux/loop.h
index 40f63c9879d2..f96506782ebe 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -58,9 +58,9 @@ struct loop_device {
 	struct bio 		*lo_bio;
 	struct bio		*lo_biotail;
 	int			lo_state;
-	struct semaphore	lo_sem;
+	struct completion	lo_done;
+	struct completion	lo_bh_done;
 	struct semaphore	lo_ctl_mutex;
-	struct semaphore	lo_bh_mutex;
 	int			lo_pending;
 
 	request_queue_t		*lo_queue;

commit 7892f2f48d165a34b0b8130c8a195dfd807b8cb6
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jan 9 15:59:25 2006 -0800

    [PATCH] mutex subsystem, semaphore to mutex: VFS, sb->s_lock
    
    This patch converts the superblock-lock semaphore to a mutex, affecting
    lock_super()/unlock_super(). Tested on ext3 and XFS.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index c3dbebdb9897..56bf76586019 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2150,7 +2150,7 @@ int ext3_force_commit(struct super_block *sb)
 
 static void ext3_write_super (struct super_block * sb)
 {
-	if (down_trylock(&sb->s_lock) == 0)
+	if (mutex_trylock(&sb->s_lock) != 0)
 		BUG();
 	sb->s_dirt = 0;
 }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 48bf7f0ce544..364d64bd5f10 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -169,7 +169,7 @@ static match_table_t tokens = {
  */
 static void ocfs2_write_super(struct super_block *sb)
 {
-	if (down_trylock(&sb->s_lock) == 0)
+	if (mutex_trylock(&sb->s_lock) != 0)
 		BUG();
 	sb->s_dirt = 0;
 }
diff --git a/fs/super.c b/fs/super.c
index 0a30e51692cf..c177b92419c5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -72,7 +72,7 @@ static struct super_block *alloc_super(void)
 		INIT_HLIST_HEAD(&s->s_anon);
 		INIT_LIST_HEAD(&s->s_inodes);
 		init_rwsem(&s->s_umount);
-		sema_init(&s->s_lock, 1);
+		mutex_init(&s->s_lock);
 		down_write(&s->s_umount);
 		s->s_count = S_BIAS;
 		atomic_set(&s->s_active, 1);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 01654b218e42..92ae3e2067b0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -821,7 +821,7 @@ struct super_block {
 	unsigned long		s_magic;
 	struct dentry		*s_root;
 	struct rw_semaphore	s_umount;
-	struct semaphore	s_lock;
+	struct mutex		s_lock;
 	int			s_count;
 	int			s_syncing;
 	int			s_need_sync_fs;
@@ -893,13 +893,13 @@ static inline int has_fs_excl(void)
 static inline void lock_super(struct super_block * sb)
 {
 	get_fs_excl();
-	down(&sb->s_lock);
+	mutex_lock(&sb->s_lock);
 }
 
 static inline void unlock_super(struct super_block * sb)
 {
 	put_fs_excl();
-	up(&sb->s_lock);
+	mutex_unlock(&sb->s_lock);
 }
 
 /*

commit de5097c2e73f826302cd8957c225b3725e0c7553
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jan 9 15:59:21 2006 -0800

    [PATCH] mutex subsystem, more debugging code
    
    more mutex debugging: check for held locks during memory freeing,
    task exit, enable sysrq printouts, etc.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@infradead.org>

diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index c30a16df6440..e8a53552b13d 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -222,6 +222,10 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 {
 	if (PageHighMem(page))
 		return;
+	if (!enable)
+		mutex_debug_check_no_locks_freed(page_address(page),
+						 page_address(page+numpages));
+
 	/* the return value is ignored - the calls cannot fail,
 	 * large pages are disabled at boot time.
 	 */
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 145275ebdd7e..5765f672e853 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -153,6 +153,21 @@ static struct sysrq_key_op sysrq_mountro_op = {
 
 /* END SYNC SYSRQ HANDLERS BLOCK */
 
+#ifdef CONFIG_DEBUG_MUTEXES
+
+static void
+sysrq_handle_showlocks(int key, struct pt_regs *pt_regs, struct tty_struct *tty)
+{
+	mutex_debug_show_all_locks();
+}
+
+static struct sysrq_key_op sysrq_showlocks_op = {
+	.handler	= sysrq_handle_showlocks,
+	.help_msg	= "show-all-locks(D)",
+	.action_msg	= "Show Locks Held",
+};
+
+#endif
 
 /* SHOW SYSRQ HANDLERS BLOCK */
 
@@ -294,7 +309,11 @@ static struct sysrq_key_op *sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
 #else
 /* c */	NULL,
 #endif
+#ifdef CONFIG_DEBUG_MUTEXES
+/* d */ &sysrq_showlocks_op,
+#else
 /* d */ NULL,
+#endif
 /* e */	&sysrq_term_op,
 /* f */	&sysrq_moom_op,
 /* g */	NULL,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index df80e63903b5..3f1fafc0245e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -13,6 +13,7 @@
 #include <linux/rbtree.h>
 #include <linux/prio_tree.h>
 #include <linux/fs.h>
+#include <linux/mutex.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -1024,6 +1025,9 @@ static inline void vm_stat_account(struct mm_struct *mm,
 static inline void
 kernel_map_pages(struct page *page, int numpages, int enable)
 {
+	if (!PageHighMem(page) && !enable)
+		mutex_debug_check_no_locks_freed(page_address(page),
+						 page_address(page + numpages));
 }
 #endif
 
diff --git a/kernel/exit.c b/kernel/exit.c
index caceabf3f230..309a46fa16f8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -29,6 +29,7 @@
 #include <linux/syscalls.h>
 #include <linux/signal.h>
 #include <linux/cn_proc.h>
+#include <linux/mutex.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -869,6 +870,10 @@ fastcall NORET_TYPE void do_exit(long code)
 	mpol_free(tsk->mempolicy);
 	tsk->mempolicy = NULL;
 #endif
+	/*
+	 * If DEBUG_MUTEXES is on, make sure we are holding no locks:
+	 */
+	mutex_debug_check_no_locks_held(tsk);
 
 	/* PF_DEAD causes final put_task_struct after we schedule. */
 	preempt_disable();
diff --git a/kernel/sched.c b/kernel/sched.c
index 92733091154c..34a945bcc022 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4386,6 +4386,7 @@ void show_state(void)
 	} while_each_thread(g, p);
 
 	read_unlock(&tasklist_lock);
+	mutex_debug_show_all_locks();
 }
 
 /**
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e0e84924171b..a5e6891f7bb6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -415,6 +415,9 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 	int reserved = 0;
 
 	arch_free_page(page, order);
+	if (!PageHighMem(page))
+		mutex_debug_check_no_locks_freed(page_address(page),
+			page_address(page+(1<<order)));
 
 #ifndef CONFIG_MMU
 	for (i = 1 ; i < (1 << order) ; ++i)
diff --git a/mm/slab.c b/mm/slab.c
index 1c46c6383552..33aab345cd4a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3071,6 +3071,7 @@ void kfree(const void *objp)
 	local_irq_save(flags);
 	kfree_debugcheck(objp);
 	c = page_get_cache(virt_to_page(objp));
+	mutex_debug_check_no_locks_freed(objp, objp+obj_reallen(c));
 	__cache_free(c, (void *)objp);
 	local_irq_restore(flags);
 }

commit 408894ee4dd4debfdedd472eb4d8414892fc90f6
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jan 9 15:59:20 2006 -0800

    [PATCH] mutex subsystem, debugging code
    
    mutex implementation - add debugging code.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@infradead.org>

diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h
new file mode 100644
index 000000000000..0ccd8f983b50
--- /dev/null
+++ b/include/linux/mutex-debug.h
@@ -0,0 +1,21 @@
+#ifndef __LINUX_MUTEX_DEBUG_H
+#define __LINUX_MUTEX_DEBUG_H
+
+/*
+ * Mutexes - debugging helpers:
+ */
+
+#define __DEBUG_MUTEX_INITIALIZER(lockname) \
+	, .held_list = LIST_HEAD_INIT(lockname.held_list), \
+	  .name = #lockname , .magic = &lockname
+
+#define mutex_init(sem)		__mutex_init(sem, __FUNCTION__)
+
+extern void FASTCALL(mutex_destroy(struct mutex *lock));
+
+extern void mutex_debug_show_all_locks(void);
+extern void mutex_debug_show_held_locks(struct task_struct *filter);
+extern void mutex_debug_check_no_locks_held(struct task_struct *task);
+extern void mutex_debug_check_no_locks_freed(const void *from, const void *to);
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78eb92ae4d94..85b53f87c703 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -817,6 +817,11 @@ struct task_struct {
 /* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
 	spinlock_t proc_lock;
 
+#ifdef CONFIG_DEBUG_MUTEXES
+	/* mutex deadlock detection */
+	struct mutex_waiter *blocked_on;
+#endif
+
 /* journalling filesystem info */
 	void *journal_info;
 
diff --git a/kernel/Makefile b/kernel/Makefile
index de580b4d54a4..a940bac02837 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,6 +9,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    rcupdate.o intermodule.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o
 
+obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
 obj-$(CONFIG_FUTEX) += futex.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
 obj-$(CONFIG_SMP) += cpu.o spinlock.o
diff --git a/kernel/fork.c b/kernel/fork.c
index 72e3252c6763..b18d64554feb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -979,6 +979,10 @@ static task_t *copy_process(unsigned long clone_flags,
  	}
 #endif
 
+#ifdef CONFIG_DEBUG_MUTEXES
+	p->blocked_on = NULL; /* not blocked yet */
+#endif
+
 	p->tgid = p->pid;
 	if (clone_flags & CLONE_THREAD)
 		p->tgid = current->tgid;
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
new file mode 100644
index 000000000000..4fcb051a8b9e
--- /dev/null
+++ b/kernel/mutex-debug.c
@@ -0,0 +1,464 @@
+/*
+ * kernel/mutex-debug.c
+ *
+ * Debugging code for mutexes
+ *
+ * Started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * lock debugging, locking tree, deadlock detection started by:
+ *
+ *  Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
+ *  Released under the General Public License (GPL).
+ */
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+
+#include <asm/mutex.h>
+
+#include "mutex-debug.h"
+
+/*
+ * We need a global lock when we walk through the multi-process
+ * lock tree. Only used in the deadlock-debugging case.
+ */
+DEFINE_SPINLOCK(debug_mutex_lock);
+
+/*
+ * All locks held by all tasks, in a single global list:
+ */
+LIST_HEAD(debug_mutex_held_locks);
+
+/*
+ * In the debug case we carry the caller's instruction pointer into
+ * other functions, but we dont want the function argument overhead
+ * in the nondebug case - hence these macros:
+ */
+#define __IP_DECL__		, unsigned long ip
+#define __IP__			, ip
+#define __RET_IP__		, (unsigned long)__builtin_return_address(0)
+
+/*
+ * "mutex debugging enabled" flag. We turn it off when we detect
+ * the first problem because we dont want to recurse back
+ * into the tracing code when doing error printk or
+ * executing a BUG():
+ */
+int debug_mutex_on = 1;
+
+static void printk_task(struct task_struct *p)
+{
+	if (p)
+		printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
+	else
+		printk("<none>");
+}
+
+static void printk_ti(struct thread_info *ti)
+{
+	if (ti)
+		printk_task(ti->task);
+	else
+		printk("<none>");
+}
+
+static void printk_task_short(struct task_struct *p)
+{
+	if (p)
+		printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
+	else
+		printk("<none>");
+}
+
+static void printk_lock(struct mutex *lock, int print_owner)
+{
+	printk(" [%p] {%s}\n", lock, lock->name);
+
+	if (print_owner && lock->owner) {
+		printk(".. held by:  ");
+		printk_ti(lock->owner);
+		printk("\n");
+	}
+	if (lock->owner) {
+		printk("... acquired at:               ");
+		print_symbol("%s\n", lock->acquire_ip);
+	}
+}
+
+/*
+ * printk locks held by a task:
+ */
+static void show_task_locks(struct task_struct *p)
+{
+	switch (p->state) {
+	case TASK_RUNNING:		printk("R"); break;
+	case TASK_INTERRUPTIBLE:	printk("S"); break;
+	case TASK_UNINTERRUPTIBLE:	printk("D"); break;
+	case TASK_STOPPED:		printk("T"); break;
+	case EXIT_ZOMBIE:		printk("Z"); break;
+	case EXIT_DEAD:			printk("X"); break;
+	default:			printk("?"); break;
+	}
+	printk_task(p);
+	if (p->blocked_on) {
+		struct mutex *lock = p->blocked_on->lock;
+
+		printk(" blocked on mutex:");
+		printk_lock(lock, 1);
+	} else
+		printk(" (not blocked on mutex)\n");
+}
+
+/*
+ * printk all locks held in the system (if filter == NULL),
+ * or all locks belonging to a single task (if filter != NULL):
+ */
+void show_held_locks(struct task_struct *filter)
+{
+	struct list_head *curr, *cursor = NULL;
+	struct mutex *lock;
+	struct thread_info *t;
+	unsigned long flags;
+	int count = 0;
+
+	if (filter) {
+		printk("------------------------------\n");
+		printk("| showing all locks held by: |  (");
+		printk_task_short(filter);
+		printk("):\n");
+		printk("------------------------------\n");
+	} else {
+		printk("---------------------------\n");
+		printk("| showing all locks held: |\n");
+		printk("---------------------------\n");
+	}
+
+	/*
+	 * Play safe and acquire the global trace lock. We
+	 * cannot printk with that lock held so we iterate
+	 * very carefully:
+	 */
+next:
+	debug_spin_lock_save(&debug_mutex_lock, flags);
+	list_for_each(curr, &debug_mutex_held_locks) {
+		if (cursor && curr != cursor)
+			continue;
+		lock = list_entry(curr, struct mutex, held_list);
+		t = lock->owner;
+		if (filter && (t != filter->thread_info))
+			continue;
+		count++;
+		cursor = curr->next;
+		debug_spin_lock_restore(&debug_mutex_lock, flags);
+
+		printk("\n#%03d:            ", count);
+		printk_lock(lock, filter ? 0 : 1);
+		goto next;
+	}
+	debug_spin_lock_restore(&debug_mutex_lock, flags);
+	printk("\n");
+}
+
+void mutex_debug_show_all_locks(void)
+{
+	struct task_struct *g, *p;
+	int count = 10;
+	int unlock = 1;
+
+	printk("\nShowing all blocking locks in the system:\n");
+
+	/*
+	 * Here we try to get the tasklist_lock as hard as possible,
+	 * if not successful after 2 seconds we ignore it (but keep
+	 * trying). This is to enable a debug printout even if a
+	 * tasklist_lock-holding task deadlocks or crashes.
+	 */
+retry:
+	if (!read_trylock(&tasklist_lock)) {
+		if (count == 10)
+			printk("hm, tasklist_lock locked, retrying... ");
+		if (count) {
+			count--;
+			printk(" #%d", 10-count);
+			mdelay(200);
+			goto retry;
+		}
+		printk(" ignoring it.\n");
+		unlock = 0;
+	}
+	if (count != 10)
+		printk(" locked it.\n");
+
+	do_each_thread(g, p) {
+		show_task_locks(p);
+		if (!unlock)
+			if (read_trylock(&tasklist_lock))
+				unlock = 1;
+	} while_each_thread(g, p);
+
+	printk("\n");
+	show_held_locks(NULL);
+	printk("=============================================\n\n");
+
+	if (unlock)
+		read_unlock(&tasklist_lock);
+}
+
+static void report_deadlock(struct task_struct *task, struct mutex *lock,
+			    struct mutex *lockblk, unsigned long ip)
+{
+	printk("\n%s/%d is trying to acquire this lock:\n",
+		current->comm, current->pid);
+	printk_lock(lock, 1);
+	printk("... trying at:                 ");
+	print_symbol("%s\n", ip);
+	show_held_locks(current);
+
+	if (lockblk) {
+		printk("but %s/%d is deadlocking current task %s/%d!\n\n",
+			task->comm, task->pid, current->comm, current->pid);
+		printk("\n%s/%d is blocked on this lock:\n",
+			task->comm, task->pid);
+		printk_lock(lockblk, 1);
+
+		show_held_locks(task);
+
+		printk("\n%s/%d's [blocked] stackdump:\n\n",
+			task->comm, task->pid);
+		show_stack(task, NULL);
+	}
+
+	printk("\n%s/%d's [current] stackdump:\n\n",
+		current->comm, current->pid);
+	dump_stack();
+	mutex_debug_show_all_locks();
+	printk("[ turning off deadlock detection. Please report this. ]\n\n");
+	local_irq_disable();
+}
+
+/*
+ * Recursively check for mutex deadlocks:
+ */
+static int check_deadlock(struct mutex *lock, int depth,
+			  struct thread_info *ti, unsigned long ip)
+{
+	struct mutex *lockblk;
+	struct task_struct *task;
+
+	if (!debug_mutex_on)
+		return 0;
+
+	ti = lock->owner;
+	if (!ti)
+		return 0;
+
+	task = ti->task;
+	lockblk = NULL;
+	if (task->blocked_on)
+		lockblk = task->blocked_on->lock;
+
+	/* Self-deadlock: */
+	if (current == task) {
+		DEBUG_OFF();
+		if (depth)
+			return 1;
+		printk("\n==========================================\n");
+		printk(  "[ BUG: lock recursion deadlock detected! |\n");
+		printk(  "------------------------------------------\n");
+		report_deadlock(task, lock, NULL, ip);
+		return 0;
+	}
+
+	/* Ugh, something corrupted the lock data structure? */
+	if (depth > 20) {
+		DEBUG_OFF();
+		printk("\n===========================================\n");
+		printk(  "[ BUG: infinite lock dependency detected!? |\n");
+		printk(  "-------------------------------------------\n");
+		report_deadlock(task, lock, lockblk, ip);
+		return 0;
+	}
+
+	/* Recursively check for dependencies: */
+	if (lockblk && check_deadlock(lockblk, depth+1, ti, ip)) {
+		printk("\n============================================\n");
+		printk(  "[ BUG: circular locking deadlock detected! ]\n");
+		printk(  "--------------------------------------------\n");
+		report_deadlock(task, lock, lockblk, ip);
+		return 0;
+	}
+	return 0;
+}
+
+/*
+ * Called when a task exits, this function checks whether the
+ * task is holding any locks, and reports the first one if so:
+ */
+void mutex_debug_check_no_locks_held(struct task_struct *task)
+{
+	struct list_head *curr, *next;
+	struct thread_info *t;
+	unsigned long flags;
+	struct mutex *lock;
+
+	if (!debug_mutex_on)
+		return;
+
+	debug_spin_lock_save(&debug_mutex_lock, flags);
+	list_for_each_safe(curr, next, &debug_mutex_held_locks) {
+		lock = list_entry(curr, struct mutex, held_list);
+		t = lock->owner;
+		if (t != task->thread_info)
+			continue;
+		list_del_init(curr);
+		DEBUG_OFF();
+		debug_spin_lock_restore(&debug_mutex_lock, flags);
+
+		printk("BUG: %s/%d, lock held at task exit time!\n",
+			task->comm, task->pid);
+		printk_lock(lock, 1);
+		if (lock->owner != task->thread_info)
+			printk("exiting task is not even the owner??\n");
+		return;
+	}
+	debug_spin_lock_restore(&debug_mutex_lock, flags);
+}
+
+/*
+ * Called when kernel memory is freed (or unmapped), or if a mutex
+ * is destroyed or reinitialized - this code checks whether there is
+ * any held lock in the memory range of <from> to <to>:
+ */
+void mutex_debug_check_no_locks_freed(const void *from, const void *to)
+{
+	struct list_head *curr, *next;
+	unsigned long flags;
+	struct mutex *lock;
+	void *lock_addr;
+
+	if (!debug_mutex_on)
+		return;
+
+	debug_spin_lock_save(&debug_mutex_lock, flags);
+	list_for_each_safe(curr, next, &debug_mutex_held_locks) {
+		lock = list_entry(curr, struct mutex, held_list);
+		lock_addr = lock;
+		if (lock_addr < from || lock_addr >= to)
+			continue;
+		list_del_init(curr);
+		DEBUG_OFF();
+		debug_spin_lock_restore(&debug_mutex_lock, flags);
+
+		printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
+			current->comm, current->pid, lock, from, to);
+		dump_stack();
+		printk_lock(lock, 1);
+		if (lock->owner != current_thread_info())
+			printk("freeing task is not even the owner??\n");
+		return;
+	}
+	debug_spin_lock_restore(&debug_mutex_lock, flags);
+}
+
+/*
+ * Must be called with lock->wait_lock held.
+ */
+void debug_mutex_set_owner(struct mutex *lock,
+			   struct thread_info *new_owner __IP_DECL__)
+{
+	lock->owner = new_owner;
+	DEBUG_WARN_ON(!list_empty(&lock->held_list));
+	if (debug_mutex_on) {
+		list_add_tail(&lock->held_list, &debug_mutex_held_locks);
+		lock->acquire_ip = ip;
+	}
+}
+
+void debug_mutex_init_waiter(struct mutex_waiter *waiter)
+{
+	memset(waiter, 0x11, sizeof(*waiter));
+	waiter->magic = waiter;
+	INIT_LIST_HEAD(&waiter->list);
+}
+
+void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
+{
+	SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock));
+	DEBUG_WARN_ON(list_empty(&lock->wait_list));
+	DEBUG_WARN_ON(waiter->magic != waiter);
+	DEBUG_WARN_ON(list_empty(&waiter->list));
+}
+
+void debug_mutex_free_waiter(struct mutex_waiter *waiter)
+{
+	DEBUG_WARN_ON(!list_empty(&waiter->list));
+	memset(waiter, 0x22, sizeof(*waiter));
+}
+
+void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+			    struct thread_info *ti __IP_DECL__)
+{
+	SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock));
+	check_deadlock(lock, 0, ti, ip);
+	/* Mark the current thread as blocked on the lock: */
+	ti->task->blocked_on = waiter;
+	waiter->lock = lock;
+}
+
+void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+			 struct thread_info *ti)
+{
+	DEBUG_WARN_ON(list_empty(&waiter->list));
+	DEBUG_WARN_ON(waiter->task != ti->task);
+	DEBUG_WARN_ON(ti->task->blocked_on != waiter);
+	ti->task->blocked_on = NULL;
+
+	list_del_init(&waiter->list);
+	waiter->task = NULL;
+}
+
+void debug_mutex_unlock(struct mutex *lock)
+{
+	DEBUG_WARN_ON(lock->magic != lock);
+	DEBUG_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
+	DEBUG_WARN_ON(lock->owner != current_thread_info());
+	if (debug_mutex_on) {
+		DEBUG_WARN_ON(list_empty(&lock->held_list));
+		list_del_init(&lock->held_list);
+	}
+}
+
+void debug_mutex_init(struct mutex *lock, const char *name)
+{
+	/*
+	 * Make sure we are not reinitializing a held lock:
+	 */
+	mutex_debug_check_no_locks_freed((void *)lock, (void *)(lock + 1));
+	lock->owner = NULL;
+	INIT_LIST_HEAD(&lock->held_list);
+	lock->name = name;
+	lock->magic = lock;
+}
+
+/***
+ * mutex_destroy - mark a mutex unusable
+ * @lock: the mutex to be destroyed
+ *
+ * This function marks the mutex uninitialized, and any subsequent
+ * use of the mutex is forbidden. The mutex must not be locked when
+ * this function is called.
+ */
+void fastcall mutex_destroy(struct mutex *lock)
+{
+	DEBUG_WARN_ON(mutex_is_locked(lock));
+	lock->magic = NULL;
+}
+
+EXPORT_SYMBOL_GPL(mutex_destroy);
+
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
new file mode 100644
index 000000000000..fd384050acb1
--- /dev/null
+++ b/kernel/mutex-debug.h
@@ -0,0 +1,134 @@
+/*
+ * Mutexes: blocking mutual exclusion locks
+ *
+ * started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * This file contains mutex debugging related internal declarations,
+ * prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case.
+ * More details are in kernel/mutex-debug.c.
+ */
+
+extern spinlock_t debug_mutex_lock;
+extern struct list_head debug_mutex_held_locks;
+extern int debug_mutex_on;
+
+/*
+ * In the debug case we carry the caller's instruction pointer into
+ * other functions, but we dont want the function argument overhead
+ * in the nondebug case - hence these macros:
+ */
+#define __IP_DECL__		, unsigned long ip
+#define __IP__			, ip
+#define __RET_IP__		, (unsigned long)__builtin_return_address(0)
+
+/*
+ * This must be called with lock->wait_lock held.
+ */
+extern void debug_mutex_set_owner(struct mutex *lock,
+				  struct thread_info *new_owner __IP_DECL__);
+
+static inline void debug_mutex_clear_owner(struct mutex *lock)
+{
+	lock->owner = NULL;
+}
+
+extern void debug_mutex_init_waiter(struct mutex_waiter *waiter);
+extern void debug_mutex_wake_waiter(struct mutex *lock,
+				    struct mutex_waiter *waiter);
+extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
+extern void debug_mutex_add_waiter(struct mutex *lock,
+				   struct mutex_waiter *waiter,
+				   struct thread_info *ti __IP_DECL__);
+extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+				struct thread_info *ti);
+extern void debug_mutex_unlock(struct mutex *lock);
+extern void debug_mutex_init(struct mutex *lock, const char *name);
+
+#define debug_spin_lock(lock)				\
+	do {						\
+		local_irq_disable();			\
+		if (debug_mutex_on)			\
+			spin_lock(lock);		\
+	} while (0)
+
+#define debug_spin_unlock(lock)				\
+	do {						\
+		if (debug_mutex_on)			\
+			spin_unlock(lock);		\
+		local_irq_enable();			\
+		preempt_check_resched();		\
+	} while (0)
+
+#define debug_spin_lock_save(lock, flags)		\
+	do {						\
+		local_irq_save(flags);			\
+		if (debug_mutex_on)			\
+			spin_lock(lock);		\
+	} while (0)
+
+#define debug_spin_lock_restore(lock, flags)		\
+	do {						\
+		if (debug_mutex_on)			\
+			spin_unlock(lock);		\
+		local_irq_restore(flags);		\
+		preempt_check_resched();		\
+	} while (0)
+
+#define spin_lock_mutex(lock)				\
+	do {						\
+		struct mutex *l = container_of(lock, struct mutex, wait_lock); \
+							\
+		DEBUG_WARN_ON(in_interrupt());		\
+		debug_spin_lock(&debug_mutex_lock);	\
+		spin_lock(lock);			\
+		DEBUG_WARN_ON(l->magic != l);		\
+	} while (0)
+
+#define spin_unlock_mutex(lock)				\
+	do {						\
+		spin_unlock(lock);			\
+		debug_spin_unlock(&debug_mutex_lock);	\
+	} while (0)
+
+#define DEBUG_OFF()					\
+do {							\
+	if (debug_mutex_on) {				\
+		debug_mutex_on = 0;			\
+		console_verbose();			\
+		if (spin_is_locked(&debug_mutex_lock))	\
+			spin_unlock(&debug_mutex_lock);	\
+	}						\
+} while (0)
+
+#define DEBUG_BUG()					\
+do {							\
+	if (debug_mutex_on) {				\
+		DEBUG_OFF();				\
+		BUG();					\
+	}						\
+} while (0)
+
+#define DEBUG_WARN_ON(c)				\
+do {							\
+	if (unlikely(c && debug_mutex_on)) {		\
+		DEBUG_OFF();				\
+		WARN_ON(1);				\
+	}						\
+} while (0)
+
+# define DEBUG_BUG_ON(c)				\
+do {							\
+	if (unlikely(c))				\
+		DEBUG_BUG();				\
+} while (0)
+
+#ifdef CONFIG_SMP
+# define SMP_DEBUG_WARN_ON(c)			DEBUG_WARN_ON(c)
+# define SMP_DEBUG_BUG_ON(c)			DEBUG_BUG_ON(c)
+#else
+# define SMP_DEBUG_WARN_ON(c)			do { } while (0)
+# define SMP_DEBUG_BUG_ON(c)			do { } while (0)
+#endif
+
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c48260fb8fd9..1fcd856edec1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -95,6 +95,14 @@ config DEBUG_PREEMPT
 	  if kernel code uses it in a preemption-unsafe way. Also, the kernel
 	  will detect preemption count underflows.
 
+config DEBUG_MUTEXES
+	bool "Mutex debugging, deadlock detection"
+	default y
+	depends on DEBUG_KERNEL
+	help
+	 This allows mutex semantics violations and mutex related deadlocks
+	 (lockups) to be detected and reported automatically.
+
 config DEBUG_SPINLOCK
 	bool "Spinlock debugging"
 	depends on DEBUG_KERNEL

commit f3f54ffa703c6298240ffd69616451d645bae4d5
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jan 9 15:59:20 2006 -0800

    [PATCH] mutex subsystem, documentation
    
    Add mutex design related documentation.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@infradead.org>

diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index 90dc2de8e0af..158ffe9bfade 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -222,7 +222,7 @@
    <title>Two Main Types of Kernel Locks: Spinlocks and Semaphores</title>
 
    <para>
-     There are two main types of kernel locks.  The fundamental type
+     There are three main types of kernel locks.  The fundamental type
      is the spinlock 
      (<filename class="headerfile">include/asm/spinlock.h</filename>),
      which is a very simple single-holder lock: if you can't get the 
@@ -230,16 +230,22 @@
      very small and fast, and can be used anywhere.
    </para>
    <para>
-     The second type is a semaphore
+     The second type is a mutex
+     (<filename class="headerfile">include/linux/mutex.h</filename>): it
+     is like a spinlock, but you may block holding a mutex.
+     If you can't lock a mutex, your task will suspend itself, and be woken
+     up when the mutex is released.  This means the CPU can do something
+     else while you are waiting.  There are many cases when you simply
+     can't sleep (see <xref linkend="sleeping-things"/>), and so have to
+     use a spinlock instead.
+   </para>
+   <para>
+     The third type is a semaphore
      (<filename class="headerfile">include/asm/semaphore.h</filename>): it
      can have more than one holder at any time (the number decided at
      initialization time), although it is most commonly used as a
-     single-holder lock (a mutex).  If you can't get a semaphore,
-     your task will put itself on the queue, and be woken up when the
-     semaphore is released.  This means the CPU will do something
-     else while you are waiting, but there are many cases when you
-     simply can't sleep (see <xref linkend="sleeping-things"/>), and so
-     have to use a spinlock instead.
+     single-holder lock (a mutex).  If you can't get a semaphore, your
+     task will be suspended and later on woken up - just like for mutexes.
    </para>
    <para>
      Neither type of lock is recursive: see
diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt
new file mode 100644
index 000000000000..cbf79881a41c
--- /dev/null
+++ b/Documentation/mutex-design.txt
@@ -0,0 +1,135 @@
+Generic Mutex Subsystem
+
+started by Ingo Molnar <mingo@redhat.com>
+
+  "Why on earth do we need a new mutex subsystem, and what's wrong
+   with semaphores?"
+
+firstly, there's nothing wrong with semaphores. But if the simpler
+mutex semantics are sufficient for your code, then there are a couple
+of advantages of mutexes:
+
+ - 'struct mutex' is smaller on most architectures: .e.g on x86,
+   'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes.
+   A smaller structure size means less RAM footprint, and better
+   CPU-cache utilization.
+
+ - tighter code. On x86 i get the following .text sizes when
+   switching all mutex-alike semaphores in the kernel to the mutex
+   subsystem:
+
+        text    data     bss     dec     hex filename
+     3280380  868188  396860 4545428  455b94 vmlinux-semaphore
+     3255329  865296  396732 4517357  44eded vmlinux-mutex
+
+   that's 25051 bytes of code saved, or a 0.76% win - off the hottest
+   codepaths of the kernel. (The .data savings are 2892 bytes, or 0.33%)
+   Smaller code means better icache footprint, which is one of the
+   major optimization goals in the Linux kernel currently.
+
+ - the mutex subsystem is slightly faster and has better scalability for
+   contended workloads. On an 8-way x86 system, running a mutex-based
+   kernel and testing creat+unlink+close (of separate, per-task files)
+   in /tmp with 16 parallel tasks, the average number of ops/sec is:
+
+    Semaphores:                        Mutexes:
+
+    $ ./test-mutex V 16 10             $ ./test-mutex V 16 10
+    8 CPUs, running 16 tasks.          8 CPUs, running 16 tasks.
+    checking VFS performance.          checking VFS performance.
+    avg loops/sec:      34713          avg loops/sec:      84153
+    CPU utilization:    63%            CPU utilization:    22%
+
+   i.e. in this workload, the mutex based kernel was 2.4 times faster
+   than the semaphore based kernel, _and_ it also had 2.8 times less CPU
+   utilization. (In terms of 'ops per CPU cycle', the semaphore kernel
+   performed 551 ops/sec per 1% of CPU time used, while the mutex kernel
+   performed 3825 ops/sec per 1% of CPU time used - it was 6.9 times
+   more efficient.)
+
+   the scalability difference is visible even on a 2-way P4 HT box:
+
+    Semaphores:                        Mutexes:
+
+    $ ./test-mutex V 16 10             $ ./test-mutex V 16 10
+    4 CPUs, running 16 tasks.          8 CPUs, running 16 tasks.
+    checking VFS performance.          checking VFS performance.
+    avg loops/sec:      127659         avg loops/sec:      181082
+    CPU utilization:    100%           CPU utilization:    34%
+
+   (the straight performance advantage of mutexes is 41%, the per-cycle
+    efficiency of mutexes is 4.1 times better.)
+
+ - there are no fastpath tradeoffs, the mutex fastpath is just as tight
+   as the semaphore fastpath. On x86, the locking fastpath is 2
+   instructions:
+
+    c0377ccb <mutex_lock>:
+    c0377ccb:       f0 ff 08                lock decl (%eax)
+    c0377cce:       78 0e                   js     c0377cde <.text.lock.mutex>
+    c0377cd0:       c3                      ret
+
+   the unlocking fastpath is equally tight:
+
+    c0377cd1 <mutex_unlock>:
+    c0377cd1:       f0 ff 00                lock incl (%eax)
+    c0377cd4:       7e 0f                   jle    c0377ce5 <.text.lock.mutex+0x7>
+    c0377cd6:       c3                      ret
+
+ - 'struct mutex' semantics are well-defined and are enforced if
+   CONFIG_DEBUG_MUTEXES is turned on. Semaphores on the other hand have
+   virtually no debugging code or instrumentation. The mutex subsystem
+   checks and enforces the following rules:
+
+   * - only one task can hold the mutex at a time
+   * - only the owner can unlock the mutex
+   * - multiple unlocks are not permitted
+   * - recursive locking is not permitted
+   * - a mutex object must be initialized via the API
+   * - a mutex object must not be initialized via memset or copying
+   * - task may not exit with mutex held
+   * - memory areas where held locks reside must not be freed
+   * - held mutexes must not be reinitialized
+   * - mutexes may not be used in irq contexts
+
+   furthermore, there are also convenience features in the debugging
+   code:
+
+   * - uses symbolic names of mutexes, whenever they are printed in debug output
+   * - point-of-acquire tracking, symbolic lookup of function names
+   * - list of all locks held in the system, printout of them
+   * - owner tracking
+   * - detects self-recursing locks and prints out all relevant info
+   * - detects multi-task circular deadlocks and prints out all affected
+   *   locks and tasks (and only those tasks)
+
+Disadvantages
+-------------
+
+The stricter mutex API means you cannot use mutexes the same way you
+can use semaphores: e.g. they cannot be used from an interrupt context,
+nor can they be unlocked from a different context that which acquired
+it. [ I'm not aware of any other (e.g. performance) disadvantages from
+using mutexes at the moment, please let me know if you find any. ]
+
+Implementation of mutexes
+-------------------------
+
+'struct mutex' is the new mutex type, defined in include/linux/mutex.h
+and implemented in kernel/mutex.c. It is a counter-based mutex with a
+spinlock and a wait-list. The counter has 3 states: 1 for "unlocked",
+0 for "locked" and negative numbers (usually -1) for "locked, potential
+waiters queued".
+
+the APIs of 'struct mutex' have been streamlined:
+
+ DEFINE_MUTEX(name);
+
+ mutex_init(mutex);
+
+ void mutex_lock(struct mutex *lock);
+ int  mutex_lock_interruptible(struct mutex *lock);
+ int  mutex_trylock(struct mutex *lock);
+ void mutex_unlock(struct mutex *lock);
+ int  mutex_is_locked(struct mutex *lock);
+

commit 6053ee3b32e3437e8c1e72687850f436e779bd49
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jan 9 15:59:19 2006 -0800

    [PATCH] mutex subsystem, core
    
    mutex implementation, core files: just the basic subsystem, no users of it.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@infradead.org>

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
new file mode 100644
index 000000000000..9bce0fee68d4
--- /dev/null
+++ b/include/linux/mutex.h
@@ -0,0 +1,119 @@
+/*
+ * Mutexes: blocking mutual exclusion locks
+ *
+ * started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * This file contains the main data structure and API definitions.
+ */
+#ifndef __LINUX_MUTEX_H
+#define __LINUX_MUTEX_H
+
+#include <linux/list.h>
+#include <linux/spinlock_types.h>
+
+#include <asm/atomic.h>
+
+/*
+ * Simple, straightforward mutexes with strict semantics:
+ *
+ * - only one task can hold the mutex at a time
+ * - only the owner can unlock the mutex
+ * - multiple unlocks are not permitted
+ * - recursive locking is not permitted
+ * - a mutex object must be initialized via the API
+ * - a mutex object must not be initialized via memset or copying
+ * - task may not exit with mutex held
+ * - memory areas where held locks reside must not be freed
+ * - held mutexes must not be reinitialized
+ * - mutexes may not be used in irq contexts
+ *
+ * These semantics are fully enforced when DEBUG_MUTEXES is
+ * enabled. Furthermore, besides enforcing the above rules, the mutex
+ * debugging code also implements a number of additional features
+ * that make lock debugging easier and faster:
+ *
+ * - uses symbolic names of mutexes, whenever they are printed in debug output
+ * - point-of-acquire tracking, symbolic lookup of function names
+ * - list of all locks held in the system, printout of them
+ * - owner tracking
+ * - detects self-recursing locks and prints out all relevant info
+ * - detects multi-task circular deadlocks and prints out all affected
+ *   locks and tasks (and only those tasks)
+ */
+struct mutex {
+	/* 1: unlocked, 0: locked, negative: locked, possible waiters */
+	atomic_t		count;
+	spinlock_t		wait_lock;
+	struct list_head	wait_list;
+#ifdef CONFIG_DEBUG_MUTEXES
+	struct thread_info	*owner;
+	struct list_head	held_list;
+	unsigned long		acquire_ip;
+	const char 		*name;
+	void			*magic;
+#endif
+};
+
+/*
+ * This is the control structure for tasks blocked on mutex,
+ * which resides on the blocked task's kernel stack:
+ */
+struct mutex_waiter {
+	struct list_head	list;
+	struct task_struct	*task;
+#ifdef CONFIG_DEBUG_MUTEXES
+	struct mutex		*lock;
+	void			*magic;
+#endif
+};
+
+#ifdef CONFIG_DEBUG_MUTEXES
+# include <linux/mutex-debug.h>
+#else
+# define __DEBUG_MUTEX_INITIALIZER(lockname)
+# define mutex_init(mutex)			__mutex_init(mutex, NULL)
+# define mutex_destroy(mutex)				do { } while (0)
+# define mutex_debug_show_all_locks()			do { } while (0)
+# define mutex_debug_show_held_locks(p)			do { } while (0)
+# define mutex_debug_check_no_locks_held(task)		do { } while (0)
+# define mutex_debug_check_no_locks_freed(from, to)	do { } while (0)
+#endif
+
+#define __MUTEX_INITIALIZER(lockname) \
+		{ .count = ATOMIC_INIT(1) \
+		, .wait_lock = SPIN_LOCK_UNLOCKED \
+		, .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
+		__DEBUG_MUTEX_INITIALIZER(lockname) }
+
+#define DEFINE_MUTEX(mutexname) \
+	struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
+
+extern void fastcall __mutex_init(struct mutex *lock, const char *name);
+
+/***
+ * mutex_is_locked - is the mutex locked
+ * @lock: the mutex to be queried
+ *
+ * Returns 1 if the mutex is locked, 0 if unlocked.
+ */
+static inline int fastcall mutex_is_locked(struct mutex *lock)
+{
+	return atomic_read(&lock->count) != 1;
+}
+
+/*
+ * See kernel/mutex.c for detailed documentation of these APIs.
+ * Also see Documentation/mutex-design.txt.
+ */
+extern void fastcall mutex_lock(struct mutex *lock);
+extern int fastcall mutex_lock_interruptible(struct mutex *lock);
+/*
+ * NOTE: mutex_trylock() follows the spin_trylock() convention,
+ *       not the down_trylock() convention!
+ */
+extern int fastcall mutex_trylock(struct mutex *lock);
+extern void fastcall mutex_unlock(struct mutex *lock);
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 4f5a1453093a..de580b4d54a4 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -7,7 +7,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
 	    sysctl.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o \
 	    rcupdate.o intermodule.o extable.o params.o posix-timers.o \
-	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o
+	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o
 
 obj-$(CONFIG_FUTEX) += futex.o
 obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
diff --git a/kernel/mutex.c b/kernel/mutex.c
new file mode 100644
index 000000000000..7eb960661441
--- /dev/null
+++ b/kernel/mutex.c
@@ -0,0 +1,325 @@
+/*
+ * kernel/mutex.c
+ *
+ * Mutexes: blocking mutual exclusion locks
+ *
+ * Started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and
+ * David Howells for suggestions and improvements.
+ *
+ * Also see Documentation/mutex-design.txt.
+ */
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+
+/*
+ * In the DEBUG case we are using the "NULL fastpath" for mutexes,
+ * which forces all calls into the slowpath:
+ */
+#ifdef CONFIG_DEBUG_MUTEXES
+# include "mutex-debug.h"
+# include <asm-generic/mutex-null.h>
+#else
+# include "mutex.h"
+# include <asm/mutex.h>
+#endif
+
+/***
+ * mutex_init - initialize the mutex
+ * @lock: the mutex to be initialized
+ *
+ * Initialize the mutex to unlocked state.
+ *
+ * It is not allowed to initialize an already locked mutex.
+ */
+void fastcall __mutex_init(struct mutex *lock, const char *name)
+{
+	atomic_set(&lock->count, 1);
+	spin_lock_init(&lock->wait_lock);
+	INIT_LIST_HEAD(&lock->wait_list);
+
+	debug_mutex_init(lock, name);
+}
+
+EXPORT_SYMBOL(__mutex_init);
+
+/*
+ * We split the mutex lock/unlock logic into separate fastpath and
+ * slowpath functions, to reduce the register pressure on the fastpath.
+ * We also put the fastpath first in the kernel image, to make sure the
+ * branch is predicted by the CPU as default-untaken.
+ */
+static void fastcall noinline __sched
+__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__);
+
+/***
+ * mutex_lock - acquire the mutex
+ * @lock: the mutex to be acquired
+ *
+ * Lock the mutex exclusively for this task. If the mutex is not
+ * available right now, it will sleep until it can get it.
+ *
+ * The mutex must later on be released by the same task that
+ * acquired it. Recursive locking is not allowed. The task
+ * may not exit without first unlocking the mutex. Also, kernel
+ * memory where the mutex resides mutex must not be freed with
+ * the mutex still locked. The mutex must first be initialized
+ * (or statically defined) before it can be locked. memset()-ing
+ * the mutex to 0 is not allowed.
+ *
+ * ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging
+ *   checks that will enforce the restrictions and will also do
+ *   deadlock debugging. )
+ *
+ * This function is similar to (but not equivalent to) down().
+ */
+void fastcall __sched mutex_lock(struct mutex *lock)
+{
+	/*
+	 * The locking fastpath is the 1->0 transition from
+	 * 'unlocked' into 'locked' state.
+	 *
+	 * NOTE: if asm/mutex.h is included, then some architectures
+	 * rely on mutex_lock() having _no other code_ here but this
+	 * fastpath. That allows the assembly fastpath to do
+	 * tail-merging optimizations. (If you want to put testcode
+	 * here, do it under #ifndef CONFIG_MUTEX_DEBUG.)
+	 */
+	__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
+}
+
+EXPORT_SYMBOL(mutex_lock);
+
+static void fastcall noinline __sched
+__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__);
+
+/***
+ * mutex_unlock - release the mutex
+ * @lock: the mutex to be released
+ *
+ * Unlock a mutex that has been locked by this task previously.
+ *
+ * This function must not be used in interrupt context. Unlocking
+ * of a not locked mutex is not allowed.
+ *
+ * This function is similar to (but not equivalent to) up().
+ */
+void fastcall __sched mutex_unlock(struct mutex *lock)
+{
+	/*
+	 * The unlocking fastpath is the 0->1 transition from 'locked'
+	 * into 'unlocked' state:
+	 *
+	 * NOTE: no other code must be here - see mutex_lock() .
+	 */
+	__mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
+}
+
+EXPORT_SYMBOL(mutex_unlock);
+
+/*
+ * Lock a mutex (possibly interruptible), slowpath:
+ */
+static inline int __sched
+__mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
+{
+	struct task_struct *task = current;
+	struct mutex_waiter waiter;
+	unsigned int old_val;
+
+	debug_mutex_init_waiter(&waiter);
+
+	spin_lock_mutex(&lock->wait_lock);
+
+	debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip);
+
+	/* add waiting tasks to the end of the waitqueue (FIFO): */
+	list_add_tail(&waiter.list, &lock->wait_list);
+	waiter.task = task;
+
+	for (;;) {
+		/*
+		 * Lets try to take the lock again - this is needed even if
+		 * we get here for the first time (shortly after failing to
+		 * acquire the lock), to make sure that we get a wakeup once
+		 * it's unlocked. Later on, if we sleep, this is the
+		 * operation that gives us the lock. We xchg it to -1, so
+		 * that when we release the lock, we properly wake up the
+		 * other waiters:
+		 */
+		old_val = atomic_xchg(&lock->count, -1);
+		if (old_val == 1)
+			break;
+
+		/*
+		 * got a signal? (This code gets eliminated in the
+		 * TASK_UNINTERRUPTIBLE case.)
+		 */
+		if (unlikely(state == TASK_INTERRUPTIBLE &&
+						signal_pending(task))) {
+			mutex_remove_waiter(lock, &waiter, task->thread_info);
+			spin_unlock_mutex(&lock->wait_lock);
+
+			debug_mutex_free_waiter(&waiter);
+			return -EINTR;
+		}
+		__set_task_state(task, state);
+
+		/* didnt get the lock, go to sleep: */
+		spin_unlock_mutex(&lock->wait_lock);
+		schedule();
+		spin_lock_mutex(&lock->wait_lock);
+	}
+
+	/* got the lock - rejoice! */
+	mutex_remove_waiter(lock, &waiter, task->thread_info);
+	debug_mutex_set_owner(lock, task->thread_info __IP__);
+
+	/* set it to 0 if there are no waiters left: */
+	if (likely(list_empty(&lock->wait_list)))
+		atomic_set(&lock->count, 0);
+
+	spin_unlock_mutex(&lock->wait_lock);
+
+	debug_mutex_free_waiter(&waiter);
+
+	DEBUG_WARN_ON(list_empty(&lock->held_list));
+	DEBUG_WARN_ON(lock->owner != task->thread_info);
+
+	return 0;
+}
+
+static void fastcall noinline __sched
+__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__)
+{
+	struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+	__mutex_lock_common(lock, TASK_UNINTERRUPTIBLE __IP__);
+}
+
+/*
+ * Release the lock, slowpath:
+ */
+static fastcall noinline void
+__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
+{
+        struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+	DEBUG_WARN_ON(lock->owner != current_thread_info());
+
+	spin_lock_mutex(&lock->wait_lock);
+
+	/*
+	 * some architectures leave the lock unlocked in the fastpath failure
+	 * case, others need to leave it locked. In the later case we have to
+	 * unlock it here
+	 */
+	if (__mutex_slowpath_needs_to_unlock())
+		atomic_set(&lock->count, 1);
+
+	debug_mutex_unlock(lock);
+
+	if (!list_empty(&lock->wait_list)) {
+		/* get the first entry from the wait-list: */
+		struct mutex_waiter *waiter =
+				list_entry(lock->wait_list.next,
+					   struct mutex_waiter, list);
+
+		debug_mutex_wake_waiter(lock, waiter);
+
+		wake_up_process(waiter->task);
+	}
+
+	debug_mutex_clear_owner(lock);
+
+	spin_unlock_mutex(&lock->wait_lock);
+}
+
+/*
+ * Here come the less common (and hence less performance-critical) APIs:
+ * mutex_lock_interruptible() and mutex_trylock().
+ */
+static int fastcall noinline __sched
+__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__);
+
+/***
+ * mutex_lock_interruptible - acquire the mutex, interruptable
+ * @lock: the mutex to be acquired
+ *
+ * Lock the mutex like mutex_lock(), and return 0 if the mutex has
+ * been acquired or sleep until the mutex becomes available. If a
+ * signal arrives while waiting for the lock then this function
+ * returns -EINTR.
+ *
+ * This function is similar to (but not equivalent to) down_interruptible().
+ */
+int fastcall __sched mutex_lock_interruptible(struct mutex *lock)
+{
+	/* NOTE: no other code must be here - see mutex_lock() */
+	return __mutex_fastpath_lock_retval
+			(&lock->count, __mutex_lock_interruptible_slowpath);
+}
+
+EXPORT_SYMBOL(mutex_lock_interruptible);
+
+static int fastcall noinline __sched
+__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__)
+{
+	struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE __IP__);
+}
+
+/*
+ * Spinlock based trylock, we take the spinlock and check whether we
+ * can get the lock:
+ */
+static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
+{
+	struct mutex *lock = container_of(lock_count, struct mutex, count);
+	int prev;
+
+	spin_lock_mutex(&lock->wait_lock);
+
+	prev = atomic_xchg(&lock->count, -1);
+	if (likely(prev == 1))
+		debug_mutex_set_owner(lock, current_thread_info() __RET_IP__);
+	/* Set it back to 0 if there are no waiters: */
+	if (likely(list_empty(&lock->wait_list)))
+		atomic_set(&lock->count, 0);
+
+	spin_unlock_mutex(&lock->wait_lock);
+
+	return prev == 1;
+}
+
+/***
+ * mutex_trylock - try acquire the mutex, without waiting
+ * @lock: the mutex to be acquired
+ *
+ * Try to acquire the mutex atomically. Returns 1 if the mutex
+ * has been acquired successfully, and 0 on contention.
+ *
+ * NOTE: this function follows the spin_trylock() convention, so
+ * it is negated to the down_trylock() return values! Be careful
+ * about this when converting semaphore users to mutexes.
+ *
+ * This function must not be used in interrupt context. The
+ * mutex must be released by the same task that acquired it.
+ */
+int fastcall mutex_trylock(struct mutex *lock)
+{
+	return __mutex_fastpath_trylock(&lock->count,
+					__mutex_trylock_slowpath);
+}
+
+EXPORT_SYMBOL(mutex_trylock);
+
+
+
diff --git a/kernel/mutex.h b/kernel/mutex.h
new file mode 100644
index 000000000000..00fe84e7b672
--- /dev/null
+++ b/kernel/mutex.h
@@ -0,0 +1,35 @@
+/*
+ * Mutexes: blocking mutual exclusion locks
+ *
+ * started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * This file contains mutex debugging related internal prototypes, for the
+ * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
+ */
+
+#define spin_lock_mutex(lock)			spin_lock(lock)
+#define spin_unlock_mutex(lock)			spin_unlock(lock)
+#define mutex_remove_waiter(lock, waiter, ti) \
+		__list_del((waiter)->list.prev, (waiter)->list.next)
+
+#define DEBUG_WARN_ON(c)				do { } while (0)
+#define debug_mutex_set_owner(lock, new_owner)		do { } while (0)
+#define debug_mutex_clear_owner(lock)			do { } while (0)
+#define debug_mutex_init_waiter(waiter)			do { } while (0)
+#define debug_mutex_wake_waiter(lock, waiter)		do { } while (0)
+#define debug_mutex_free_waiter(waiter)			do { } while (0)
+#define debug_mutex_add_waiter(lock, waiter, ti, ip)	do { } while (0)
+#define debug_mutex_unlock(lock)			do { } while (0)
+#define debug_mutex_init(lock, name)			do { } while (0)
+
+/*
+ * Return-address parameters/declarations. They are very useful for
+ * debugging, but add overhead in the !DEBUG case - so we go the
+ * trouble of using this not too elegant but zero-cost solution:
+ */
+#define __IP_DECL__
+#define __IP__
+#define __RET_IP__
+

commit b8aa0361e434f8d9cbe9bb34525af1e8721396d8
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jan 9 15:59:18 2006 -0800

    [PATCH] mutex subsystem, add include/asm-x86_64/mutex.h
    
    add the x86_64 version of mutex.h, optimized in assembly.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@infradead.org>

diff --git a/include/asm-x86_64/mutex.h b/include/asm-x86_64/mutex.h
new file mode 100644
index 000000000000..818abfd262d1
--- /dev/null
+++ b/include/asm-x86_64/mutex.h
@@ -0,0 +1,113 @@
+/*
+ * Assembly implementation of the mutex fastpath, based on atomic
+ * decrement/increment.
+ *
+ * started by Ingo Molnar:
+ *
+ *  Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#ifndef _ASM_MUTEX_H
+#define _ASM_MUTEX_H
+
+/**
+ * __mutex_fastpath_lock - decrement and call function if negative
+ * @v: pointer of type atomic_t
+ * @fail_fn: function to call if the result is negative
+ *
+ * Atomically decrements @v and calls <fail_fn> if the result is negative.
+ */
+#define __mutex_fastpath_lock(v, fail_fn)				\
+do {									\
+	unsigned long dummy;						\
+									\
+	typecheck(atomic_t *, v);					\
+	typecheck_fn(fastcall void (*)(atomic_t *), fail_fn);		\
+									\
+	__asm__ __volatile__(						\
+		LOCK	"   decl (%%rdi)	\n"			\
+			"   js 2f		\n"			\
+			"1:			\n"			\
+									\
+		LOCK_SECTION_START("")					\
+			"2: call "#fail_fn"	\n"			\
+			"   jmp 1b		\n"			\
+		LOCK_SECTION_END					\
+									\
+		:"=D" (dummy)						\
+		: "D" (v)						\
+		: "rax", "rsi", "rdx", "rcx",				\
+		  "r8", "r9", "r10", "r11", "memory");			\
+} while (0)
+
+/**
+ *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
+ *                                 from 1 to a 0 value
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if
+ * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
+ * or anything the slow path function returns
+ */
+static inline int
+__mutex_fastpath_lock_retval(atomic_t *count,
+			     int fastcall (*fail_fn)(atomic_t *))
+{
+	if (unlikely(atomic_dec_return(count) < 0))
+		return fail_fn(count);
+	else
+		return 0;
+}
+
+/**
+ * __mutex_fastpath_unlock - increment and call function if nonpositive
+ * @v: pointer of type atomic_t
+ * @fail_fn: function to call if the result is nonpositive
+ *
+ * Atomically increments @v and calls <fail_fn> if the result is nonpositive.
+ */
+#define __mutex_fastpath_unlock(v, fail_fn)				\
+do {									\
+	unsigned long dummy;						\
+									\
+	typecheck(atomic_t *, v);					\
+	typecheck_fn(fastcall void (*)(atomic_t *), fail_fn);		\
+									\
+	__asm__ __volatile__(						\
+		LOCK	"   incl (%%rdi)	\n"			\
+			"   jle 2f		\n"			\
+			"1:			\n"			\
+									\
+		LOCK_SECTION_START("")					\
+			"2: call "#fail_fn"	\n"			\
+			"   jmp 1b		\n"			\
+		LOCK_SECTION_END					\
+									\
+		:"=D" (dummy)						\
+		: "D" (v)						\
+		: "rax", "rsi", "rdx", "rcx",				\
+		  "r8", "r9", "r10", "r11", "memory");			\
+} while (0)
+
+#define __mutex_slowpath_needs_to_unlock()	1
+
+/**
+ * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
+ *
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: fallback function
+ *
+ * Change the count from 1 to 0 and return 1 (success), or return 0 (failure)
+ * if it wasn't 1 originally. [the fallback function is never used on
+ * x86_64, because all x86_64 CPUs have a CMPXCHG instruction.]
+ */
+static inline int
+__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+	if (likely(atomic_cmpxchg(count, 1, 0)) == 1)
+		return 1;
+	else
+		return 0;
+}
+
+#endif

commit 620a6fd185c084aa617c411f711533f01ea673c9
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jan 9 15:59:17 2006 -0800

    [PATCH] mutex subsystem, add asm-generic/mutex-[dec|xchg|null].h implementations
    
    Add three (generic) mutex fastpath implementations.
    
    The mutex-xchg.h implementation is atomic_xchg() based, and should
    work fine on every architecture.
    
    The mutex-dec.h implementation is atomic_dec_return() based - this
    one too should work on every architecture, but might not perform the
    most optimally on architectures that have no atomic-dec/inc instructions.
    
    The mutex-null.h implementation forces all calls into the slowpath. This
    is used for mutex debugging, but it can also be used on platforms that do
    not want (or need) a fastpath at all.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@infradead.org>

diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
new file mode 100644
index 000000000000..74b18cda169f
--- /dev/null
+++ b/include/asm-generic/mutex-dec.h
@@ -0,0 +1,110 @@
+/*
+ * asm-generic/mutex-dec.h
+ *
+ * Generic implementation of the mutex fastpath, based on atomic
+ * decrement/increment.
+ */
+#ifndef _ASM_GENERIC_MUTEX_DEC_H
+#define _ASM_GENERIC_MUTEX_DEC_H
+
+/**
+ *  __mutex_fastpath_lock - try to take the lock by moving the count
+ *                          from 1 to a 0 value
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if
+ * it wasn't 1 originally. This function MUST leave the value lower than
+ * 1 even when the "1" assertion wasn't true.
+ */
+#define __mutex_fastpath_lock(count, fail_fn)				\
+do {									\
+	if (unlikely(atomic_dec_return(count) < 0))			\
+		fail_fn(count);						\
+	else								\
+		smp_mb();						\
+} while (0)
+
+/**
+ *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
+ *                                 from 1 to a 0 value
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if
+ * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
+ * or anything the slow path function returns.
+ */
+static inline int
+__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+	if (unlikely(atomic_dec_return(count) < 0))
+		return fail_fn(count);
+	else {
+		smp_mb();
+		return 0;
+	}
+}
+
+/**
+ *  __mutex_fastpath_unlock - try to promote the count from 0 to 1
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 0
+ *
+ * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
+ * In the failure case, this function is allowed to either set the value to
+ * 1, or to set it to a value lower than 1.
+ *
+ * If the implementation sets it to a value of lower than 1, then the
+ * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
+ * to return 0 otherwise.
+ */
+#define __mutex_fastpath_unlock(count, fail_fn)				\
+do {									\
+	smp_mb();							\
+	if (unlikely(atomic_inc_return(count) <= 0))			\
+		fail_fn(count);						\
+} while (0)
+
+#define __mutex_slowpath_needs_to_unlock()		1
+
+/**
+ * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
+ *
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: fallback function
+ *
+ * Change the count from 1 to a value lower than 1, and return 0 (failure)
+ * if it wasn't 1 originally, or return 1 (success) otherwise. This function
+ * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
+ * Additionally, if the value was < 0 originally, this function must not leave
+ * it to 0 on failure.
+ *
+ * If the architecture has no effective trylock variant, it should call the
+ * <fail_fn> spinlock-based trylock variant unconditionally.
+ */
+static inline int
+__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+	/*
+	 * We have two variants here. The cmpxchg based one is the best one
+	 * because it never induce a false contention state.  It is included
+	 * here because architectures using the inc/dec algorithms over the
+	 * xchg ones are much more likely to support cmpxchg natively.
+	 *
+	 * If not we fall back to the spinlock based variant - that is
+	 * just as efficient (and simpler) as a 'destructive' probing of
+	 * the mutex state would be.
+	 */
+#ifdef __HAVE_ARCH_CMPXCHG
+	if (likely(atomic_cmpxchg(count, 1, 0)) == 1) {
+		smp_mb();
+		return 1;
+	}
+	return 0;
+#else
+	return fail_fn(count);
+#endif
+}
+
+#endif
diff --git a/include/asm-generic/mutex-null.h b/include/asm-generic/mutex-null.h
new file mode 100644
index 000000000000..5cf8b7ce0c45
--- /dev/null
+++ b/include/asm-generic/mutex-null.h
@@ -0,0 +1,24 @@
+/*
+ * asm-generic/mutex-null.h
+ *
+ * Generic implementation of the mutex fastpath, based on NOP :-)
+ *
+ * This is used by the mutex-debugging infrastructure, but it can also
+ * be used by architectures that (for whatever reason) want to use the
+ * spinlock based slowpath.
+ */
+#ifndef _ASM_GENERIC_MUTEX_NULL_H
+#define _ASM_GENERIC_MUTEX_NULL_H
+
+/* extra parameter only needed for mutex debugging: */
+#ifndef __IP__
+# define __IP__
+#endif
+
+#define __mutex_fastpath_lock(count, fail_fn)	      fail_fn(count __RET_IP__)
+#define __mutex_fastpath_lock_retval(count, fail_fn)  fail_fn(count __RET_IP__)
+#define __mutex_fastpath_unlock(count, fail_fn)       fail_fn(count __RET_IP__)
+#define __mutex_fastpath_trylock(count, fail_fn)      fail_fn(count)
+#define __mutex_slowpath_needs_to_unlock()	      1
+
+#endif
diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
new file mode 100644
index 000000000000..1d24f47e6c48
--- /dev/null
+++ b/include/asm-generic/mutex-xchg.h
@@ -0,0 +1,117 @@
+/*
+ * asm-generic/mutex-xchg.h
+ *
+ * Generic implementation of the mutex fastpath, based on xchg().
+ *
+ * NOTE: An xchg based implementation is less optimal than an atomic
+ *       decrement/increment based implementation. If your architecture
+ *       has a reasonable atomic dec/inc then you should probably use
+ *	 asm-generic/mutex-dec.h instead, or you could open-code an
+ *	 optimized version in asm/mutex.h.
+ */
+#ifndef _ASM_GENERIC_MUTEX_XCHG_H
+#define _ASM_GENERIC_MUTEX_XCHG_H
+
+/**
+ *  __mutex_fastpath_lock - try to take the lock by moving the count
+ *                          from 1 to a 0 value
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if it
+ * wasn't 1 originally. This function MUST leave the value lower than 1
+ * even when the "1" assertion wasn't true.
+ */
+#define __mutex_fastpath_lock(count, fail_fn)				\
+do {									\
+	if (unlikely(atomic_xchg(count, 0) != 1))			\
+		fail_fn(count);						\
+	else								\
+		smp_mb();						\
+} while (0)
+
+
+/**
+ *  __mutex_fastpath_lock_retval - try to take the lock by moving the count
+ *                                 from 1 to a 0 value
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if it
+ * wasn't 1 originally. This function returns 0 if the fastpath succeeds,
+ * or anything the slow path function returns
+ */
+static inline int
+__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+	if (unlikely(atomic_xchg(count, 0) != 1))
+		return fail_fn(count);
+	else {
+		smp_mb();
+		return 0;
+	}
+}
+
+/**
+ *  __mutex_fastpath_unlock - try to promote the mutex from 0 to 1
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: function to call if the original value was not 0
+ *
+ * try to promote the mutex from 0 to 1. if it wasn't 0, call <function>
+ * In the failure case, this function is allowed to either set the value to
+ * 1, or to set it to a value lower than one.
+ * If the implementation sets it to a value of lower than one, the
+ * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
+ * to return 0 otherwise.
+ */
+#define __mutex_fastpath_unlock(count, fail_fn)				\
+do {									\
+	smp_mb();							\
+	if (unlikely(atomic_xchg(count, 1) != 0))			\
+		fail_fn(count);						\
+} while (0)
+
+#define __mutex_slowpath_needs_to_unlock()		0
+
+/**
+ * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
+ *
+ *  @count: pointer of type atomic_t
+ *  @fail_fn: spinlock based trylock implementation
+ *
+ * Change the count from 1 to a value lower than 1, and return 0 (failure)
+ * if it wasn't 1 originally, or return 1 (success) otherwise. This function
+ * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
+ * Additionally, if the value was < 0 originally, this function must not leave
+ * it to 0 on failure.
+ *
+ * If the architecture has no effective trylock variant, it should call the
+ * <fail_fn> spinlock-based trylock variant unconditionally.
+ */
+static inline int
+__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+	int prev = atomic_xchg(count, 0);
+
+	if (unlikely(prev < 0)) {
+		/*
+		 * The lock was marked contended so we must restore that
+		 * state. If while doing so we get back a prev value of 1
+		 * then we just own it.
+		 *
+		 * [ In the rare case of the mutex going to 1, to 0, to -1
+		 *   and then back to 0 in this few-instructions window,
+		 *   this has the potential to trigger the slowpath for the
+		 *   owner's unlock path needlessly, but that's not a problem
+		 *   in practice. ]
+		 */
+		prev = atomic_xchg(count, prev);
+		if (prev < 0)
+			prev = 0;
+	}
+	smp_mb();
+
+	return prev;
+}
+
+#endif

commit ffbf670f5cd50501a34a5187981460da2216071e
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Jan 9 15:59:17 2006 -0800

    [PATCH] mutex subsystem, add atomic_xchg() to all arches
    
    add atomic_xchg() to all the architectures. Needed by the new mutex code.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Arjan van de Ven <arjan@infradead.org>

diff --git a/include/asm-alpha/atomic.h b/include/asm-alpha/atomic.h
index cb03bbe92cdf..fc77f7413083 100644
--- a/include/asm-alpha/atomic.h
+++ b/include/asm-alpha/atomic.h
@@ -176,6 +176,7 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v)
 }
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 #define atomic_add_unless(v, a, u)				\
 ({								\
diff --git a/include/asm-arm/atomic.h b/include/asm-arm/atomic.h
index f72b63309bc5..3d7283d84405 100644
--- a/include/asm-arm/atomic.h
+++ b/include/asm-arm/atomic.h
@@ -175,6 +175,8 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 
 #endif /* __LINUX_ARM_ARCH__ */
 
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
 static inline int atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int c, old;
diff --git a/include/asm-arm26/atomic.h b/include/asm-arm26/atomic.h
index 3074b0e76343..1552c8653990 100644
--- a/include/asm-arm26/atomic.h
+++ b/include/asm-arm26/atomic.h
@@ -76,6 +76,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
 static inline int atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
diff --git a/include/asm-cris/atomic.h b/include/asm-cris/atomic.h
index 2df2c7aa19b7..0b51a87e5532 100644
--- a/include/asm-cris/atomic.h
+++ b/include/asm-cris/atomic.h
@@ -136,6 +136,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
 static inline int atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h
index 9c9e9499cfd8..a59f684b4f33 100644
--- a/include/asm-frv/atomic.h
+++ b/include/asm-frv/atomic.h
@@ -328,6 +328,7 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
 #endif
 
 #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 #define atomic_add_unless(v, a, u)				\
 ({								\
diff --git a/include/asm-h8300/atomic.h b/include/asm-h8300/atomic.h
index d891541e89c3..21f54428c86b 100644
--- a/include/asm-h8300/atomic.h
+++ b/include/asm-h8300/atomic.h
@@ -95,6 +95,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
 static inline int atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h
index 7a5472d77091..de649d3aa2d4 100644
--- a/include/asm-i386/atomic.h
+++ b/include/asm-i386/atomic.h
@@ -216,6 +216,7 @@ static __inline__ int atomic_sub_return(int i, atomic_t *v)
 }
 
 #define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
  * atomic_add_unless - add unless the number is a given value
diff --git a/include/asm-ia64/atomic.h b/include/asm-ia64/atomic.h
index 15cf7984c48e..d3e0dfa99e1f 100644
--- a/include/asm-ia64/atomic.h
+++ b/include/asm-ia64/atomic.h
@@ -89,6 +89,7 @@ ia64_atomic64_sub (__s64 i, atomic64_t *v)
 }
 
 #define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 #define atomic_add_unless(v, a, u)				\
 ({								\
diff --git a/include/asm-m32r/atomic.h b/include/asm-m32r/atomic.h
index 70761278b6cb..3122fe106f05 100644
--- a/include/asm-m32r/atomic.h
+++ b/include/asm-m32r/atomic.h
@@ -243,6 +243,7 @@ static __inline__ int atomic_dec_return(atomic_t *v)
 #define atomic_add_negative(i,v) (atomic_add_return((i), (v)) < 0)
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
  * atomic_add_unless - add unless the number is a given value
diff --git a/include/asm-m68k/atomic.h b/include/asm-m68k/atomic.h
index b8a4e75d679d..a4a84d5c65d5 100644
--- a/include/asm-m68k/atomic.h
+++ b/include/asm-m68k/atomic.h
@@ -140,6 +140,7 @@ static inline void atomic_set_mask(unsigned long mask, unsigned long *v)
 }
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 #define atomic_add_unless(v, a, u)				\
 ({								\
diff --git a/include/asm-m68knommu/atomic.h b/include/asm-m68knommu/atomic.h
index 1702dbe9318c..6c4e4b63e454 100644
--- a/include/asm-m68knommu/atomic.h
+++ b/include/asm-m68knommu/atomic.h
@@ -129,6 +129,7 @@ static inline int atomic_sub_return(int i, atomic_t * v)
 }
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 #define atomic_add_unless(v, a, u)				\
 ({								\
diff --git a/include/asm-mips/atomic.h b/include/asm-mips/atomic.h
index 92256e43a938..94a95872d727 100644
--- a/include/asm-mips/atomic.h
+++ b/include/asm-mips/atomic.h
@@ -289,6 +289,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
 }
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
  * atomic_add_unless - add unless the number is a given value
diff --git a/include/asm-parisc/atomic.h b/include/asm-parisc/atomic.h
index 64ebd086c40d..2ca56d34aaad 100644
--- a/include/asm-parisc/atomic.h
+++ b/include/asm-parisc/atomic.h
@@ -165,6 +165,7 @@ static __inline__ int atomic_read(const atomic_t *v)
 
 /* exported interface */
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
  * atomic_add_unless - add unless the number is a given value
diff --git a/include/asm-powerpc/atomic.h b/include/asm-powerpc/atomic.h
index ae395a0632a6..248f9aec959c 100644
--- a/include/asm-powerpc/atomic.h
+++ b/include/asm-powerpc/atomic.h
@@ -165,6 +165,7 @@ static __inline__ int atomic_dec_return(atomic_t *v)
 }
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
  * atomic_add_unless - add unless the number is a given value
diff --git a/include/asm-s390/atomic.h b/include/asm-s390/atomic.h
index d82aedf616fe..be6fefe223d6 100644
--- a/include/asm-s390/atomic.h
+++ b/include/asm-s390/atomic.h
@@ -75,6 +75,8 @@ static __inline__ void atomic_set_mask(unsigned long mask, atomic_t * v)
 	       __CS_LOOP(v, mask, "or");
 }
 
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
 static __inline__ int atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	__asm__ __volatile__("  cs   %0,%3,0(%2)\n"
diff --git a/include/asm-sh/atomic.h b/include/asm-sh/atomic.h
index 618d8e0de348..fb627de217f2 100644
--- a/include/asm-sh/atomic.h
+++ b/include/asm-sh/atomic.h
@@ -101,6 +101,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
 static inline int atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
diff --git a/include/asm-sh64/atomic.h b/include/asm-sh64/atomic.h
index f3ce5c0df13a..28f2ea9b567b 100644
--- a/include/asm-sh64/atomic.h
+++ b/include/asm-sh64/atomic.h
@@ -113,6 +113,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
 static inline int atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
diff --git a/include/asm-sparc/atomic.h b/include/asm-sparc/atomic.h
index accb4967e9d2..e1033170bd3a 100644
--- a/include/asm-sparc/atomic.h
+++ b/include/asm-sparc/atomic.h
@@ -20,6 +20,7 @@ typedef struct { volatile int counter; } atomic_t;
 
 extern int __atomic_add_return(int, atomic_t *);
 extern int atomic_cmpxchg(atomic_t *, int, int);
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 extern int atomic_add_unless(atomic_t *, int, int);
 extern void atomic_set(atomic_t *, int);
 
diff --git a/include/asm-sparc64/atomic.h b/include/asm-sparc64/atomic.h
index 11f5aa5d108c..25256bdc8aae 100644
--- a/include/asm-sparc64/atomic.h
+++ b/include/asm-sparc64/atomic.h
@@ -72,6 +72,7 @@ extern int atomic64_sub_ret(int, atomic64_t *);
 #define atomic64_add_negative(i, v) (atomic64_add_ret(i, v) < 0)
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 #define atomic_add_unless(v, a, u)				\
 ({								\
diff --git a/include/asm-v850/atomic.h b/include/asm-v850/atomic.h
index f5b9ab6f4e70..166df00457ea 100644
--- a/include/asm-v850/atomic.h
+++ b/include/asm-v850/atomic.h
@@ -104,6 +104,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return ret;
 }
 
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
 static inline int atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int ret;
diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h
index 72eb071488c7..6b540237a2f8 100644
--- a/include/asm-x86_64/atomic.h
+++ b/include/asm-x86_64/atomic.h
@@ -389,6 +389,7 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t *v)
 #define atomic64_dec_return(v)  (atomic64_sub_return(1,v))
 
 #define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
  * atomic_add_unless - add unless the number is a given value
diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h
index e2ce06b101ad..fe105a123924 100644
--- a/include/asm-xtensa/atomic.h
+++ b/include/asm-xtensa/atomic.h
@@ -224,6 +224,7 @@ static inline int atomic_sub_return(int i, atomic_t * v)
 #define atomic_add_negative(i,v) (atomic_add_return((i),(v)) < 0)
 
 #define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 
 /**
  * atomic_add_unless - add unless the number is a given value