Patches contributed by Eötvös Lorand University
commit f4818900fa3ee1c56e96f6dede7cc4c05ed383d1
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jan 9 20:52:23 2006 -0800
[PATCH] hrtimer: clean up mktime and make arguments const
add 'const' to mktime arguments, and clean it up a bit
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/linux/time.h b/include/linux/time.h
index 9c444d9c4aa0..773b83ddd8ef 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -38,9 +38,11 @@ static __inline__ int timespec_equal(struct timespec *a, struct timespec *b)
return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
}
-extern unsigned long mktime (unsigned int year, unsigned int mon,
- unsigned int day, unsigned int hour,
- unsigned int min, unsigned int sec);
+extern unsigned long mktime(const unsigned int year, const unsigned int mon,
+ const unsigned int day, const unsigned int hour,
+ const unsigned int min, const unsigned int sec);
+
+extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
extern struct timespec xtime;
extern struct timespec wall_to_monotonic;
@@ -51,8 +53,6 @@ static inline unsigned long get_seconds(void)
return xtime.tv_sec;
}
-extern void set_normalized_timespec (struct timespec *ts, time_t sec, long nsec);
-
struct timespec current_kernel_time(void);
#define CURRENT_TIME (current_kernel_time())
diff --git a/kernel/time.c b/kernel/time.c
index fa569885e22b..a0502aef43ce 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -599,12 +599,15 @@ EXPORT_SYMBOL_GPL(getnstimestamp);
* will already get problems at other places on 2038-01-19 03:14:08)
*/
unsigned long
-mktime (unsigned int year, unsigned int mon,
- unsigned int day, unsigned int hour,
- unsigned int min, unsigned int sec)
+mktime(const unsigned int year0, const unsigned int mon0,
+ const unsigned int day, const unsigned int hour,
+ const unsigned int min, const unsigned int sec)
{
- if (0 >= (int) (mon -= 2)) { /* 1..12 -> 11,12,1..10 */
- mon += 12; /* Puts Feb last since it has leap day */
+ unsigned int mon = mon0, year = year0;
+
+ /* 1..12 -> 11,12,1..10 */
+ if (0 >= (int) (mon -= 2)) {
+ mon += 12; /* Puts Feb last since it has leap day */
year -= 1;
}
@@ -630,7 +633,7 @@ mktime (unsigned int year, unsigned int mon,
* 0 <= tv_nsec < NSEC_PER_SEC
* For negative values only the tv_sec field is negative !
*/
-void set_normalized_timespec (struct timespec *ts, time_t sec, long nsec)
+void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
{
while (nsec >= NSEC_PER_SEC) {
nsec -= NSEC_PER_SEC;
commit 11b751ae8c8ca3fa24c85bd5a3e51dd9f95cda17
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jan 9 15:59:27 2006 -0800
[PATCH] mutex subsystem, semaphore to completion: drivers/block/loop.c
convert the block loop device from semaphores to completions.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index bed9ad76c04c..864729046e22 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -527,12 +527,12 @@ static int loop_make_request(request_queue_t *q, struct bio *old_bio)
lo->lo_pending++;
loop_add_bio(lo, old_bio);
spin_unlock_irq(&lo->lo_lock);
- up(&lo->lo_bh_mutex);
+ complete(&lo->lo_bh_done);
return 0;
out:
if (lo->lo_pending == 0)
- up(&lo->lo_bh_mutex);
+ complete(&lo->lo_bh_done);
spin_unlock_irq(&lo->lo_lock);
bio_io_error(old_bio, old_bio->bi_size);
return 0;
@@ -593,23 +593,20 @@ static int loop_thread(void *data)
lo->lo_pending = 1;
/*
- * up sem, we are running
+ * complete it, we are running
*/
- up(&lo->lo_sem);
+ complete(&lo->lo_done);
for (;;) {
int pending;
- /*
- * interruptible just to not contribute to load avg
- */
- if (down_interruptible(&lo->lo_bh_mutex))
+ if (wait_for_completion_interruptible(&lo->lo_bh_done))
continue;
spin_lock_irq(&lo->lo_lock);
/*
- * could be upped because of tear-down, not pending work
+ * could be completed because of tear-down, not pending work
*/
if (unlikely(!lo->lo_pending)) {
spin_unlock_irq(&lo->lo_lock);
@@ -632,7 +629,7 @@ static int loop_thread(void *data)
break;
}
- up(&lo->lo_sem);
+ complete(&lo->lo_done);
return 0;
}
@@ -843,7 +840,7 @@ static int loop_set_fd(struct loop_device *lo, struct file *lo_file,
set_blocksize(bdev, lo_blocksize);
kernel_thread(loop_thread, lo, CLONE_KERNEL);
- down(&lo->lo_sem);
+ wait_for_completion(&lo->lo_done);
return 0;
out_putf:
@@ -909,10 +906,10 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
lo->lo_state = Lo_rundown;
lo->lo_pending--;
if (!lo->lo_pending)
- up(&lo->lo_bh_mutex);
+ complete(&lo->lo_bh_done);
spin_unlock_irq(&lo->lo_lock);
- down(&lo->lo_sem);
+ wait_for_completion(&lo->lo_done);
lo->lo_backing_file = NULL;
@@ -1289,8 +1286,8 @@ static int __init loop_init(void)
if (!lo->lo_queue)
goto out_mem4;
init_MUTEX(&lo->lo_ctl_mutex);
- init_MUTEX_LOCKED(&lo->lo_sem);
- init_MUTEX_LOCKED(&lo->lo_bh_mutex);
+ init_completion(&lo->lo_done);
+ init_completion(&lo->lo_bh_done);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);
disk->major = LOOP_MAJOR;
diff --git a/include/linux/loop.h b/include/linux/loop.h
index 40f63c9879d2..f96506782ebe 100644
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -58,9 +58,9 @@ struct loop_device {
struct bio *lo_bio;
struct bio *lo_biotail;
int lo_state;
- struct semaphore lo_sem;
+ struct completion lo_done;
+ struct completion lo_bh_done;
struct semaphore lo_ctl_mutex;
- struct semaphore lo_bh_mutex;
int lo_pending;
request_queue_t *lo_queue;
commit 7892f2f48d165a34b0b8130c8a195dfd807b8cb6
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jan 9 15:59:25 2006 -0800
[PATCH] mutex subsystem, semaphore to mutex: VFS, sb->s_lock
This patch converts the superblock-lock semaphore to a mutex, affecting
lock_super()/unlock_super(). Tested on ext3 and XFS.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index c3dbebdb9897..56bf76586019 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2150,7 +2150,7 @@ int ext3_force_commit(struct super_block *sb)
static void ext3_write_super (struct super_block * sb)
{
- if (down_trylock(&sb->s_lock) == 0)
+ if (mutex_trylock(&sb->s_lock) != 0)
BUG();
sb->s_dirt = 0;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 48bf7f0ce544..364d64bd5f10 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -169,7 +169,7 @@ static match_table_t tokens = {
*/
static void ocfs2_write_super(struct super_block *sb)
{
- if (down_trylock(&sb->s_lock) == 0)
+ if (mutex_trylock(&sb->s_lock) != 0)
BUG();
sb->s_dirt = 0;
}
diff --git a/fs/super.c b/fs/super.c
index 0a30e51692cf..c177b92419c5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -72,7 +72,7 @@ static struct super_block *alloc_super(void)
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
init_rwsem(&s->s_umount);
- sema_init(&s->s_lock, 1);
+ mutex_init(&s->s_lock);
down_write(&s->s_umount);
s->s_count = S_BIAS;
atomic_set(&s->s_active, 1);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 01654b218e42..92ae3e2067b0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -821,7 +821,7 @@ struct super_block {
unsigned long s_magic;
struct dentry *s_root;
struct rw_semaphore s_umount;
- struct semaphore s_lock;
+ struct mutex s_lock;
int s_count;
int s_syncing;
int s_need_sync_fs;
@@ -893,13 +893,13 @@ static inline int has_fs_excl(void)
static inline void lock_super(struct super_block * sb)
{
get_fs_excl();
- down(&sb->s_lock);
+ mutex_lock(&sb->s_lock);
}
static inline void unlock_super(struct super_block * sb)
{
put_fs_excl();
- up(&sb->s_lock);
+ mutex_unlock(&sb->s_lock);
}
/*
commit de5097c2e73f826302cd8957c225b3725e0c7553
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jan 9 15:59:21 2006 -0800
[PATCH] mutex subsystem, more debugging code
more mutex debugging: check for held locks during memory freeing,
task exit, enable sysrq printouts, etc.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index c30a16df6440..e8a53552b13d 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -222,6 +222,10 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
{
if (PageHighMem(page))
return;
+ if (!enable)
+ mutex_debug_check_no_locks_freed(page_address(page),
+ page_address(page+numpages));
+
/* the return value is ignored - the calls cannot fail,
* large pages are disabled at boot time.
*/
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 145275ebdd7e..5765f672e853 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -153,6 +153,21 @@ static struct sysrq_key_op sysrq_mountro_op = {
/* END SYNC SYSRQ HANDLERS BLOCK */
+#ifdef CONFIG_DEBUG_MUTEXES
+
+static void
+sysrq_handle_showlocks(int key, struct pt_regs *pt_regs, struct tty_struct *tty)
+{
+ mutex_debug_show_all_locks();
+}
+
+static struct sysrq_key_op sysrq_showlocks_op = {
+ .handler = sysrq_handle_showlocks,
+ .help_msg = "show-all-locks(D)",
+ .action_msg = "Show Locks Held",
+};
+
+#endif
/* SHOW SYSRQ HANDLERS BLOCK */
@@ -294,7 +309,11 @@ static struct sysrq_key_op *sysrq_key_table[SYSRQ_KEY_TABLE_LENGTH] = {
#else
/* c */ NULL,
#endif
+#ifdef CONFIG_DEBUG_MUTEXES
+/* d */ &sysrq_showlocks_op,
+#else
/* d */ NULL,
+#endif
/* e */ &sysrq_term_op,
/* f */ &sysrq_moom_op,
/* g */ NULL,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index df80e63903b5..3f1fafc0245e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -13,6 +13,7 @@
#include <linux/rbtree.h>
#include <linux/prio_tree.h>
#include <linux/fs.h>
+#include <linux/mutex.h>
struct mempolicy;
struct anon_vma;
@@ -1024,6 +1025,9 @@ static inline void vm_stat_account(struct mm_struct *mm,
static inline void
kernel_map_pages(struct page *page, int numpages, int enable)
{
+ if (!PageHighMem(page) && !enable)
+ mutex_debug_check_no_locks_freed(page_address(page),
+ page_address(page + numpages));
}
#endif
diff --git a/kernel/exit.c b/kernel/exit.c
index caceabf3f230..309a46fa16f8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -29,6 +29,7 @@
#include <linux/syscalls.h>
#include <linux/signal.h>
#include <linux/cn_proc.h>
+#include <linux/mutex.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -869,6 +870,10 @@ fastcall NORET_TYPE void do_exit(long code)
mpol_free(tsk->mempolicy);
tsk->mempolicy = NULL;
#endif
+ /*
+ * If DEBUG_MUTEXES is on, make sure we are holding no locks:
+ */
+ mutex_debug_check_no_locks_held(tsk);
/* PF_DEAD causes final put_task_struct after we schedule. */
preempt_disable();
diff --git a/kernel/sched.c b/kernel/sched.c
index 92733091154c..34a945bcc022 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4386,6 +4386,7 @@ void show_state(void)
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
+ mutex_debug_show_all_locks();
}
/**
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e0e84924171b..a5e6891f7bb6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -415,6 +415,9 @@ static void __free_pages_ok(struct page *page, unsigned int order)
int reserved = 0;
arch_free_page(page, order);
+ if (!PageHighMem(page))
+ mutex_debug_check_no_locks_freed(page_address(page),
+ page_address(page+(1<<order)));
#ifndef CONFIG_MMU
for (i = 1 ; i < (1 << order) ; ++i)
diff --git a/mm/slab.c b/mm/slab.c
index 1c46c6383552..33aab345cd4a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3071,6 +3071,7 @@ void kfree(const void *objp)
local_irq_save(flags);
kfree_debugcheck(objp);
c = page_get_cache(virt_to_page(objp));
+ mutex_debug_check_no_locks_freed(objp, objp+obj_reallen(c));
__cache_free(c, (void *)objp);
local_irq_restore(flags);
}
commit 408894ee4dd4debfdedd472eb4d8414892fc90f6
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jan 9 15:59:20 2006 -0800
[PATCH] mutex subsystem, debugging code
mutex implementation - add debugging code.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h
new file mode 100644
index 000000000000..0ccd8f983b50
--- /dev/null
+++ b/include/linux/mutex-debug.h
@@ -0,0 +1,21 @@
+#ifndef __LINUX_MUTEX_DEBUG_H
+#define __LINUX_MUTEX_DEBUG_H
+
+/*
+ * Mutexes - debugging helpers:
+ */
+
+#define __DEBUG_MUTEX_INITIALIZER(lockname) \
+ , .held_list = LIST_HEAD_INIT(lockname.held_list), \
+ .name = #lockname , .magic = &lockname
+
+#define mutex_init(sem) __mutex_init(sem, __FUNCTION__)
+
+extern void FASTCALL(mutex_destroy(struct mutex *lock));
+
+extern void mutex_debug_show_all_locks(void);
+extern void mutex_debug_show_held_locks(struct task_struct *filter);
+extern void mutex_debug_check_no_locks_held(struct task_struct *task);
+extern void mutex_debug_check_no_locks_freed(const void *from, const void *to);
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 78eb92ae4d94..85b53f87c703 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -817,6 +817,11 @@ struct task_struct {
/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
spinlock_t proc_lock;
+#ifdef CONFIG_DEBUG_MUTEXES
+ /* mutex deadlock detection */
+ struct mutex_waiter *blocked_on;
+#endif
+
/* journalling filesystem info */
void *journal_info;
diff --git a/kernel/Makefile b/kernel/Makefile
index de580b4d54a4..a940bac02837 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -9,6 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
rcupdate.o intermodule.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o
+obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
obj-$(CONFIG_SMP) += cpu.o spinlock.o
diff --git a/kernel/fork.c b/kernel/fork.c
index 72e3252c6763..b18d64554feb 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -979,6 +979,10 @@ static task_t *copy_process(unsigned long clone_flags,
}
#endif
+#ifdef CONFIG_DEBUG_MUTEXES
+ p->blocked_on = NULL; /* not blocked yet */
+#endif
+
p->tgid = p->pid;
if (clone_flags & CLONE_THREAD)
p->tgid = current->tgid;
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
new file mode 100644
index 000000000000..4fcb051a8b9e
--- /dev/null
+++ b/kernel/mutex-debug.c
@@ -0,0 +1,464 @@
+/*
+ * kernel/mutex-debug.c
+ *
+ * Debugging code for mutexes
+ *
+ * Started by Ingo Molnar:
+ *
+ * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * lock debugging, locking tree, deadlock detection started by:
+ *
+ * Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
+ * Released under the General Public License (GPL).
+ */
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+
+#include <asm/mutex.h>
+
+#include "mutex-debug.h"
+
+/*
+ * We need a global lock when we walk through the multi-process
+ * lock tree. Only used in the deadlock-debugging case.
+ */
+DEFINE_SPINLOCK(debug_mutex_lock);
+
+/*
+ * All locks held by all tasks, in a single global list:
+ */
+LIST_HEAD(debug_mutex_held_locks);
+
+/*
+ * In the debug case we carry the caller's instruction pointer into
+ * other functions, but we dont want the function argument overhead
+ * in the nondebug case - hence these macros:
+ */
+#define __IP_DECL__ , unsigned long ip
+#define __IP__ , ip
+#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
+
+/*
+ * "mutex debugging enabled" flag. We turn it off when we detect
+ * the first problem because we dont want to recurse back
+ * into the tracing code when doing error printk or
+ * executing a BUG():
+ */
+int debug_mutex_on = 1;
+
+static void printk_task(struct task_struct *p)
+{
+ if (p)
+ printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
+ else
+ printk("<none>");
+}
+
+static void printk_ti(struct thread_info *ti)
+{
+ if (ti)
+ printk_task(ti->task);
+ else
+ printk("<none>");
+}
+
+static void printk_task_short(struct task_struct *p)
+{
+ if (p)
+ printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
+ else
+ printk("<none>");
+}
+
+static void printk_lock(struct mutex *lock, int print_owner)
+{
+ printk(" [%p] {%s}\n", lock, lock->name);
+
+ if (print_owner && lock->owner) {
+ printk(".. held by: ");
+ printk_ti(lock->owner);
+ printk("\n");
+ }
+ if (lock->owner) {
+ printk("... acquired at: ");
+ print_symbol("%s\n", lock->acquire_ip);
+ }
+}
+
+/*
+ * printk locks held by a task:
+ */
+static void show_task_locks(struct task_struct *p)
+{
+ switch (p->state) {
+ case TASK_RUNNING: printk("R"); break;
+ case TASK_INTERRUPTIBLE: printk("S"); break;
+ case TASK_UNINTERRUPTIBLE: printk("D"); break;
+ case TASK_STOPPED: printk("T"); break;
+ case EXIT_ZOMBIE: printk("Z"); break;
+ case EXIT_DEAD: printk("X"); break;
+ default: printk("?"); break;
+ }
+ printk_task(p);
+ if (p->blocked_on) {
+ struct mutex *lock = p->blocked_on->lock;
+
+ printk(" blocked on mutex:");
+ printk_lock(lock, 1);
+ } else
+ printk(" (not blocked on mutex)\n");
+}
+
+/*
+ * printk all locks held in the system (if filter == NULL),
+ * or all locks belonging to a single task (if filter != NULL):
+ */
+void show_held_locks(struct task_struct *filter)
+{
+ struct list_head *curr, *cursor = NULL;
+ struct mutex *lock;
+ struct thread_info *t;
+ unsigned long flags;
+ int count = 0;
+
+ if (filter) {
+ printk("------------------------------\n");
+ printk("| showing all locks held by: | (");
+ printk_task_short(filter);
+ printk("):\n");
+ printk("------------------------------\n");
+ } else {
+ printk("---------------------------\n");
+ printk("| showing all locks held: |\n");
+ printk("---------------------------\n");
+ }
+
+ /*
+ * Play safe and acquire the global trace lock. We
+ * cannot printk with that lock held so we iterate
+ * very carefully:
+ */
+next:
+ debug_spin_lock_save(&debug_mutex_lock, flags);
+ list_for_each(curr, &debug_mutex_held_locks) {
+ if (cursor && curr != cursor)
+ continue;
+ lock = list_entry(curr, struct mutex, held_list);
+ t = lock->owner;
+ if (filter && (t != filter->thread_info))
+ continue;
+ count++;
+ cursor = curr->next;
+ debug_spin_lock_restore(&debug_mutex_lock, flags);
+
+ printk("\n#%03d: ", count);
+ printk_lock(lock, filter ? 0 : 1);
+ goto next;
+ }
+ debug_spin_lock_restore(&debug_mutex_lock, flags);
+ printk("\n");
+}
+
+void mutex_debug_show_all_locks(void)
+{
+ struct task_struct *g, *p;
+ int count = 10;
+ int unlock = 1;
+
+ printk("\nShowing all blocking locks in the system:\n");
+
+ /*
+ * Here we try to get the tasklist_lock as hard as possible,
+ * if not successful after 2 seconds we ignore it (but keep
+ * trying). This is to enable a debug printout even if a
+ * tasklist_lock-holding task deadlocks or crashes.
+ */
+retry:
+ if (!read_trylock(&tasklist_lock)) {
+ if (count == 10)
+ printk("hm, tasklist_lock locked, retrying... ");
+ if (count) {
+ count--;
+ printk(" #%d", 10-count);
+ mdelay(200);
+ goto retry;
+ }
+ printk(" ignoring it.\n");
+ unlock = 0;
+ }
+ if (count != 10)
+ printk(" locked it.\n");
+
+ do_each_thread(g, p) {
+ show_task_locks(p);
+ if (!unlock)
+ if (read_trylock(&tasklist_lock))
+ unlock = 1;
+ } while_each_thread(g, p);
+
+ printk("\n");
+ show_held_locks(NULL);
+ printk("=============================================\n\n");
+
+ if (unlock)
+ read_unlock(&tasklist_lock);
+}
+
+static void report_deadlock(struct task_struct *task, struct mutex *lock,
+ struct mutex *lockblk, unsigned long ip)
+{
+ printk("\n%s/%d is trying to acquire this lock:\n",
+ current->comm, current->pid);
+ printk_lock(lock, 1);
+ printk("... trying at: ");
+ print_symbol("%s\n", ip);
+ show_held_locks(current);
+
+ if (lockblk) {
+ printk("but %s/%d is deadlocking current task %s/%d!\n\n",
+ task->comm, task->pid, current->comm, current->pid);
+ printk("\n%s/%d is blocked on this lock:\n",
+ task->comm, task->pid);
+ printk_lock(lockblk, 1);
+
+ show_held_locks(task);
+
+ printk("\n%s/%d's [blocked] stackdump:\n\n",
+ task->comm, task->pid);
+ show_stack(task, NULL);
+ }
+
+ printk("\n%s/%d's [current] stackdump:\n\n",
+ current->comm, current->pid);
+ dump_stack();
+ mutex_debug_show_all_locks();
+ printk("[ turning off deadlock detection. Please report this. ]\n\n");
+ local_irq_disable();
+}
+
+/*
+ * Recursively check for mutex deadlocks:
+ */
+static int check_deadlock(struct mutex *lock, int depth,
+ struct thread_info *ti, unsigned long ip)
+{
+ struct mutex *lockblk;
+ struct task_struct *task;
+
+ if (!debug_mutex_on)
+ return 0;
+
+ ti = lock->owner;
+ if (!ti)
+ return 0;
+
+ task = ti->task;
+ lockblk = NULL;
+ if (task->blocked_on)
+ lockblk = task->blocked_on->lock;
+
+ /* Self-deadlock: */
+ if (current == task) {
+ DEBUG_OFF();
+ if (depth)
+ return 1;
+ printk("\n==========================================\n");
+ printk( "[ BUG: lock recursion deadlock detected! |\n");
+ printk( "------------------------------------------\n");
+ report_deadlock(task, lock, NULL, ip);
+ return 0;
+ }
+
+ /* Ugh, something corrupted the lock data structure? */
+ if (depth > 20) {
+ DEBUG_OFF();
+ printk("\n===========================================\n");
+ printk( "[ BUG: infinite lock dependency detected!? |\n");
+ printk( "-------------------------------------------\n");
+ report_deadlock(task, lock, lockblk, ip);
+ return 0;
+ }
+
+ /* Recursively check for dependencies: */
+ if (lockblk && check_deadlock(lockblk, depth+1, ti, ip)) {
+ printk("\n============================================\n");
+ printk( "[ BUG: circular locking deadlock detected! ]\n");
+ printk( "--------------------------------------------\n");
+ report_deadlock(task, lock, lockblk, ip);
+ return 0;
+ }
+ return 0;
+}
+
+/*
+ * Called when a task exits, this function checks whether the
+ * task is holding any locks, and reports the first one if so:
+ */
+void mutex_debug_check_no_locks_held(struct task_struct *task)
+{
+ struct list_head *curr, *next;
+ struct thread_info *t;
+ unsigned long flags;
+ struct mutex *lock;
+
+ if (!debug_mutex_on)
+ return;
+
+ debug_spin_lock_save(&debug_mutex_lock, flags);
+ list_for_each_safe(curr, next, &debug_mutex_held_locks) {
+ lock = list_entry(curr, struct mutex, held_list);
+ t = lock->owner;
+ if (t != task->thread_info)
+ continue;
+ list_del_init(curr);
+ DEBUG_OFF();
+ debug_spin_lock_restore(&debug_mutex_lock, flags);
+
+ printk("BUG: %s/%d, lock held at task exit time!\n",
+ task->comm, task->pid);
+ printk_lock(lock, 1);
+ if (lock->owner != task->thread_info)
+ printk("exiting task is not even the owner??\n");
+ return;
+ }
+ debug_spin_lock_restore(&debug_mutex_lock, flags);
+}
+
+/*
+ * Called when kernel memory is freed (or unmapped), or if a mutex
+ * is destroyed or reinitialized - this code checks whether there is
+ * any held lock in the memory range of <from> to <to>:
+ */
+void mutex_debug_check_no_locks_freed(const void *from, const void *to)
+{
+ struct list_head *curr, *next;
+ unsigned long flags;
+ struct mutex *lock;
+ void *lock_addr;
+
+ if (!debug_mutex_on)
+ return;
+
+ debug_spin_lock_save(&debug_mutex_lock, flags);
+ list_for_each_safe(curr, next, &debug_mutex_held_locks) {
+ lock = list_entry(curr, struct mutex, held_list);
+ lock_addr = lock;
+ if (lock_addr < from || lock_addr >= to)
+ continue;
+ list_del_init(curr);
+ DEBUG_OFF();
+ debug_spin_lock_restore(&debug_mutex_lock, flags);
+
+ printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
+ current->comm, current->pid, lock, from, to);
+ dump_stack();
+ printk_lock(lock, 1);
+ if (lock->owner != current_thread_info())
+ printk("freeing task is not even the owner??\n");
+ return;
+ }
+ debug_spin_lock_restore(&debug_mutex_lock, flags);
+}
+
+/*
+ * Must be called with lock->wait_lock held.
+ */
+void debug_mutex_set_owner(struct mutex *lock,
+ struct thread_info *new_owner __IP_DECL__)
+{
+ lock->owner = new_owner;
+ DEBUG_WARN_ON(!list_empty(&lock->held_list));
+ if (debug_mutex_on) {
+ list_add_tail(&lock->held_list, &debug_mutex_held_locks);
+ lock->acquire_ip = ip;
+ }
+}
+
+void debug_mutex_init_waiter(struct mutex_waiter *waiter)
+{
+ memset(waiter, 0x11, sizeof(*waiter));
+ waiter->magic = waiter;
+ INIT_LIST_HEAD(&waiter->list);
+}
+
+void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
+{
+ SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock));
+ DEBUG_WARN_ON(list_empty(&lock->wait_list));
+ DEBUG_WARN_ON(waiter->magic != waiter);
+ DEBUG_WARN_ON(list_empty(&waiter->list));
+}
+
+void debug_mutex_free_waiter(struct mutex_waiter *waiter)
+{
+ DEBUG_WARN_ON(!list_empty(&waiter->list));
+ memset(waiter, 0x22, sizeof(*waiter));
+}
+
+void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+ struct thread_info *ti __IP_DECL__)
+{
+ SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock));
+ check_deadlock(lock, 0, ti, ip);
+ /* Mark the current thread as blocked on the lock: */
+ ti->task->blocked_on = waiter;
+ waiter->lock = lock;
+}
+
+void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+ struct thread_info *ti)
+{
+ DEBUG_WARN_ON(list_empty(&waiter->list));
+ DEBUG_WARN_ON(waiter->task != ti->task);
+ DEBUG_WARN_ON(ti->task->blocked_on != waiter);
+ ti->task->blocked_on = NULL;
+
+ list_del_init(&waiter->list);
+ waiter->task = NULL;
+}
+
+void debug_mutex_unlock(struct mutex *lock)
+{
+ DEBUG_WARN_ON(lock->magic != lock);
+ DEBUG_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
+ DEBUG_WARN_ON(lock->owner != current_thread_info());
+ if (debug_mutex_on) {
+ DEBUG_WARN_ON(list_empty(&lock->held_list));
+ list_del_init(&lock->held_list);
+ }
+}
+
+void debug_mutex_init(struct mutex *lock, const char *name)
+{
+ /*
+ * Make sure we are not reinitializing a held lock:
+ */
+ mutex_debug_check_no_locks_freed((void *)lock, (void *)(lock + 1));
+ lock->owner = NULL;
+ INIT_LIST_HEAD(&lock->held_list);
+ lock->name = name;
+ lock->magic = lock;
+}
+
+/***
+ * mutex_destroy - mark a mutex unusable
+ * @lock: the mutex to be destroyed
+ *
+ * This function marks the mutex uninitialized, and any subsequent
+ * use of the mutex is forbidden. The mutex must not be locked when
+ * this function is called.
+ */
+void fastcall mutex_destroy(struct mutex *lock)
+{
+ DEBUG_WARN_ON(mutex_is_locked(lock));
+ lock->magic = NULL;
+}
+
+EXPORT_SYMBOL_GPL(mutex_destroy);
+
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
new file mode 100644
index 000000000000..fd384050acb1
--- /dev/null
+++ b/kernel/mutex-debug.h
@@ -0,0 +1,134 @@
+/*
+ * Mutexes: blocking mutual exclusion locks
+ *
+ * started by Ingo Molnar:
+ *
+ * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * This file contains mutex debugging related internal declarations,
+ * prototypes and inline functions, for the CONFIG_DEBUG_MUTEXES case.
+ * More details are in kernel/mutex-debug.c.
+ */
+
+extern spinlock_t debug_mutex_lock;
+extern struct list_head debug_mutex_held_locks;
+extern int debug_mutex_on;
+
+/*
+ * In the debug case we carry the caller's instruction pointer into
+ * other functions, but we dont want the function argument overhead
+ * in the nondebug case - hence these macros:
+ */
+#define __IP_DECL__ , unsigned long ip
+#define __IP__ , ip
+#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
+
+/*
+ * This must be called with lock->wait_lock held.
+ */
+extern void debug_mutex_set_owner(struct mutex *lock,
+ struct thread_info *new_owner __IP_DECL__);
+
+static inline void debug_mutex_clear_owner(struct mutex *lock)
+{
+ lock->owner = NULL;
+}
+
+extern void debug_mutex_init_waiter(struct mutex_waiter *waiter);
+extern void debug_mutex_wake_waiter(struct mutex *lock,
+ struct mutex_waiter *waiter);
+extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
+extern void debug_mutex_add_waiter(struct mutex *lock,
+ struct mutex_waiter *waiter,
+ struct thread_info *ti __IP_DECL__);
+extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+ struct thread_info *ti);
+extern void debug_mutex_unlock(struct mutex *lock);
+extern void debug_mutex_init(struct mutex *lock, const char *name);
+
+#define debug_spin_lock(lock) \
+ do { \
+ local_irq_disable(); \
+ if (debug_mutex_on) \
+ spin_lock(lock); \
+ } while (0)
+
+#define debug_spin_unlock(lock) \
+ do { \
+ if (debug_mutex_on) \
+ spin_unlock(lock); \
+ local_irq_enable(); \
+ preempt_check_resched(); \
+ } while (0)
+
+#define debug_spin_lock_save(lock, flags) \
+ do { \
+ local_irq_save(flags); \
+ if (debug_mutex_on) \
+ spin_lock(lock); \
+ } while (0)
+
+#define debug_spin_lock_restore(lock, flags) \
+ do { \
+ if (debug_mutex_on) \
+ spin_unlock(lock); \
+ local_irq_restore(flags); \
+ preempt_check_resched(); \
+ } while (0)
+
+#define spin_lock_mutex(lock) \
+ do { \
+ struct mutex *l = container_of(lock, struct mutex, wait_lock); \
+ \
+ DEBUG_WARN_ON(in_interrupt()); \
+ debug_spin_lock(&debug_mutex_lock); \
+ spin_lock(lock); \
+ DEBUG_WARN_ON(l->magic != l); \
+ } while (0)
+
+#define spin_unlock_mutex(lock) \
+ do { \
+ spin_unlock(lock); \
+ debug_spin_unlock(&debug_mutex_lock); \
+ } while (0)
+
+#define DEBUG_OFF() \
+do { \
+ if (debug_mutex_on) { \
+ debug_mutex_on = 0; \
+ console_verbose(); \
+ if (spin_is_locked(&debug_mutex_lock)) \
+ spin_unlock(&debug_mutex_lock); \
+ } \
+} while (0)
+
+#define DEBUG_BUG() \
+do { \
+ if (debug_mutex_on) { \
+ DEBUG_OFF(); \
+ BUG(); \
+ } \
+} while (0)
+
+#define DEBUG_WARN_ON(c) \
+do { \
+ if (unlikely(c && debug_mutex_on)) { \
+ DEBUG_OFF(); \
+ WARN_ON(1); \
+ } \
+} while (0)
+
+# define DEBUG_BUG_ON(c) \
+do { \
+ if (unlikely(c)) \
+ DEBUG_BUG(); \
+} while (0)
+
+#ifdef CONFIG_SMP
+# define SMP_DEBUG_WARN_ON(c) DEBUG_WARN_ON(c)
+# define SMP_DEBUG_BUG_ON(c) DEBUG_BUG_ON(c)
+#else
+# define SMP_DEBUG_WARN_ON(c) do { } while (0)
+# define SMP_DEBUG_BUG_ON(c) do { } while (0)
+#endif
+
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c48260fb8fd9..1fcd856edec1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -95,6 +95,14 @@ config DEBUG_PREEMPT
if kernel code uses it in a preemption-unsafe way. Also, the kernel
will detect preemption count underflows.
+config DEBUG_MUTEXES
+ bool "Mutex debugging, deadlock detection"
+ default y
+ depends on DEBUG_KERNEL
+ help
+ This allows mutex semantics violations and mutex related deadlocks
+ (lockups) to be detected and reported automatically.
+
config DEBUG_SPINLOCK
bool "Spinlock debugging"
depends on DEBUG_KERNEL
commit f3f54ffa703c6298240ffd69616451d645bae4d5
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jan 9 15:59:20 2006 -0800
[PATCH] mutex subsystem, documentation
Add mutex design related documentation.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index 90dc2de8e0af..158ffe9bfade 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -222,7 +222,7 @@
<title>Two Main Types of Kernel Locks: Spinlocks and Semaphores</title>
<para>
- There are two main types of kernel locks. The fundamental type
+ There are three main types of kernel locks. The fundamental type
is the spinlock
(<filename class="headerfile">include/asm/spinlock.h</filename>),
which is a very simple single-holder lock: if you can't get the
@@ -230,16 +230,22 @@
very small and fast, and can be used anywhere.
</para>
<para>
- The second type is a semaphore
+ The second type is a mutex
+ (<filename class="headerfile">include/linux/mutex.h</filename>): it
+ is like a spinlock, but you may block holding a mutex.
+ If you can't lock a mutex, your task will suspend itself, and be woken
+ up when the mutex is released. This means the CPU can do something
+ else while you are waiting. There are many cases when you simply
+ can't sleep (see <xref linkend="sleeping-things"/>), and so have to
+ use a spinlock instead.
+ </para>
+ <para>
+ The third type is a semaphore
(<filename class="headerfile">include/asm/semaphore.h</filename>): it
can have more than one holder at any time (the number decided at
initialization time), although it is most commonly used as a
- single-holder lock (a mutex). If you can't get a semaphore,
- your task will put itself on the queue, and be woken up when the
- semaphore is released. This means the CPU will do something
- else while you are waiting, but there are many cases when you
- simply can't sleep (see <xref linkend="sleeping-things"/>), and so
- have to use a spinlock instead.
+ single-holder lock (a mutex). If you can't get a semaphore, your
+ task will be suspended and later on woken up - just like for mutexes.
</para>
<para>
Neither type of lock is recursive: see
diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt
new file mode 100644
index 000000000000..cbf79881a41c
--- /dev/null
+++ b/Documentation/mutex-design.txt
@@ -0,0 +1,135 @@
+Generic Mutex Subsystem
+
+started by Ingo Molnar <mingo@redhat.com>
+
+ "Why on earth do we need a new mutex subsystem, and what's wrong
+ with semaphores?"
+
+firstly, there's nothing wrong with semaphores. But if the simpler
+mutex semantics are sufficient for your code, then there are a couple
+of advantages of mutexes:
+
+ - 'struct mutex' is smaller on most architectures: .e.g on x86,
+ 'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes.
+ A smaller structure size means less RAM footprint, and better
+ CPU-cache utilization.
+
+ - tighter code. On x86 i get the following .text sizes when
+ switching all mutex-alike semaphores in the kernel to the mutex
+ subsystem:
+
+ text data bss dec hex filename
+ 3280380 868188 396860 4545428 455b94 vmlinux-semaphore
+ 3255329 865296 396732 4517357 44eded vmlinux-mutex
+
+ that's 25051 bytes of code saved, or a 0.76% win - off the hottest
+ codepaths of the kernel. (The .data savings are 2892 bytes, or 0.33%)
+ Smaller code means better icache footprint, which is one of the
+ major optimization goals in the Linux kernel currently.
+
+ - the mutex subsystem is slightly faster and has better scalability for
+ contended workloads. On an 8-way x86 system, running a mutex-based
+ kernel and testing creat+unlink+close (of separate, per-task files)
+ in /tmp with 16 parallel tasks, the average number of ops/sec is:
+
+ Semaphores: Mutexes:
+
+ $ ./test-mutex V 16 10 $ ./test-mutex V 16 10
+ 8 CPUs, running 16 tasks. 8 CPUs, running 16 tasks.
+ checking VFS performance. checking VFS performance.
+ avg loops/sec: 34713 avg loops/sec: 84153
+ CPU utilization: 63% CPU utilization: 22%
+
+ i.e. in this workload, the mutex based kernel was 2.4 times faster
+ than the semaphore based kernel, _and_ it also had 2.8 times less CPU
+ utilization. (In terms of 'ops per CPU cycle', the semaphore kernel
+ performed 551 ops/sec per 1% of CPU time used, while the mutex kernel
+ performed 3825 ops/sec per 1% of CPU time used - it was 6.9 times
+ more efficient.)
+
+ the scalability difference is visible even on a 2-way P4 HT box:
+
+ Semaphores: Mutexes:
+
+ $ ./test-mutex V 16 10 $ ./test-mutex V 16 10
+ 4 CPUs, running 16 tasks. 8 CPUs, running 16 tasks.
+ checking VFS performance. checking VFS performance.
+ avg loops/sec: 127659 avg loops/sec: 181082
+ CPU utilization: 100% CPU utilization: 34%
+
+ (the straight performance advantage of mutexes is 41%, the per-cycle
+ efficiency of mutexes is 4.1 times better.)
+
+ - there are no fastpath tradeoffs, the mutex fastpath is just as tight
+ as the semaphore fastpath. On x86, the locking fastpath is 2
+ instructions:
+
+ c0377ccb <mutex_lock>:
+ c0377ccb: f0 ff 08 lock decl (%eax)
+ c0377cce: 78 0e js c0377cde <.text.lock.mutex>
+ c0377cd0: c3 ret
+
+ the unlocking fastpath is equally tight:
+
+ c0377cd1 <mutex_unlock>:
+ c0377cd1: f0 ff 00 lock incl (%eax)
+ c0377cd4: 7e 0f jle c0377ce5 <.text.lock.mutex+0x7>
+ c0377cd6: c3 ret
+
+ - 'struct mutex' semantics are well-defined and are enforced if
+ CONFIG_DEBUG_MUTEXES is turned on. Semaphores on the other hand have
+ virtually no debugging code or instrumentation. The mutex subsystem
+ checks and enforces the following rules:
+
+ * - only one task can hold the mutex at a time
+ * - only the owner can unlock the mutex
+ * - multiple unlocks are not permitted
+ * - recursive locking is not permitted
+ * - a mutex object must be initialized via the API
+ * - a mutex object must not be initialized via memset or copying
+ * - task may not exit with mutex held
+ * - memory areas where held locks reside must not be freed
+ * - held mutexes must not be reinitialized
+ * - mutexes may not be used in irq contexts
+
+ furthermore, there are also convenience features in the debugging
+ code:
+
+ * - uses symbolic names of mutexes, whenever they are printed in debug output
+ * - point-of-acquire tracking, symbolic lookup of function names
+ * - list of all locks held in the system, printout of them
+ * - owner tracking
+ * - detects self-recursing locks and prints out all relevant info
+ * - detects multi-task circular deadlocks and prints out all affected
+ * locks and tasks (and only those tasks)
+
+Disadvantages
+-------------
+
+The stricter mutex API means you cannot use mutexes the same way you
+can use semaphores: e.g. they cannot be used from an interrupt context,
+nor can they be unlocked from a different context that which acquired
+it. [ I'm not aware of any other (e.g. performance) disadvantages from
+using mutexes at the moment, please let me know if you find any. ]
+
+Implementation of mutexes
+-------------------------
+
+'struct mutex' is the new mutex type, defined in include/linux/mutex.h
+and implemented in kernel/mutex.c. It is a counter-based mutex with a
+spinlock and a wait-list. The counter has 3 states: 1 for "unlocked",
+0 for "locked" and negative numbers (usually -1) for "locked, potential
+waiters queued".
+
+the APIs of 'struct mutex' have been streamlined:
+
+ DEFINE_MUTEX(name);
+
+ mutex_init(mutex);
+
+ void mutex_lock(struct mutex *lock);
+ int mutex_lock_interruptible(struct mutex *lock);
+ int mutex_trylock(struct mutex *lock);
+ void mutex_unlock(struct mutex *lock);
+ int mutex_is_locked(struct mutex *lock);
+
commit 6053ee3b32e3437e8c1e72687850f436e779bd49
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jan 9 15:59:19 2006 -0800
[PATCH] mutex subsystem, core
mutex implementation, core files: just the basic subsystem, no users of it.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
new file mode 100644
index 000000000000..9bce0fee68d4
--- /dev/null
+++ b/include/linux/mutex.h
@@ -0,0 +1,119 @@
+/*
+ * Mutexes: blocking mutual exclusion locks
+ *
+ * started by Ingo Molnar:
+ *
+ * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * This file contains the main data structure and API definitions.
+ */
+#ifndef __LINUX_MUTEX_H
+#define __LINUX_MUTEX_H
+
+#include <linux/list.h>
+#include <linux/spinlock_types.h>
+
+#include <asm/atomic.h>
+
+/*
+ * Simple, straightforward mutexes with strict semantics:
+ *
+ * - only one task can hold the mutex at a time
+ * - only the owner can unlock the mutex
+ * - multiple unlocks are not permitted
+ * - recursive locking is not permitted
+ * - a mutex object must be initialized via the API
+ * - a mutex object must not be initialized via memset or copying
+ * - task may not exit with mutex held
+ * - memory areas where held locks reside must not be freed
+ * - held mutexes must not be reinitialized
+ * - mutexes may not be used in irq contexts
+ *
+ * These semantics are fully enforced when DEBUG_MUTEXES is
+ * enabled. Furthermore, besides enforcing the above rules, the mutex
+ * debugging code also implements a number of additional features
+ * that make lock debugging easier and faster:
+ *
+ * - uses symbolic names of mutexes, whenever they are printed in debug output
+ * - point-of-acquire tracking, symbolic lookup of function names
+ * - list of all locks held in the system, printout of them
+ * - owner tracking
+ * - detects self-recursing locks and prints out all relevant info
+ * - detects multi-task circular deadlocks and prints out all affected
+ * locks and tasks (and only those tasks)
+ */
+struct mutex {
+ /* 1: unlocked, 0: locked, negative: locked, possible waiters */
+ atomic_t count;
+ spinlock_t wait_lock;
+ struct list_head wait_list;
+#ifdef CONFIG_DEBUG_MUTEXES
+ struct thread_info *owner;
+ struct list_head held_list;
+ unsigned long acquire_ip;
+ const char *name;
+ void *magic;
+#endif
+};
+
+/*
+ * This is the control structure for tasks blocked on mutex,
+ * which resides on the blocked task's kernel stack:
+ */
+struct mutex_waiter {
+ struct list_head list;
+ struct task_struct *task;
+#ifdef CONFIG_DEBUG_MUTEXES
+ struct mutex *lock;
+ void *magic;
+#endif
+};
+
+#ifdef CONFIG_DEBUG_MUTEXES
+# include <linux/mutex-debug.h>
+#else
+# define __DEBUG_MUTEX_INITIALIZER(lockname)
+# define mutex_init(mutex) __mutex_init(mutex, NULL)
+# define mutex_destroy(mutex) do { } while (0)
+# define mutex_debug_show_all_locks() do { } while (0)
+# define mutex_debug_show_held_locks(p) do { } while (0)
+# define mutex_debug_check_no_locks_held(task) do { } while (0)
+# define mutex_debug_check_no_locks_freed(from, to) do { } while (0)
+#endif
+
+#define __MUTEX_INITIALIZER(lockname) \
+ { .count = ATOMIC_INIT(1) \
+ , .wait_lock = SPIN_LOCK_UNLOCKED \
+ , .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
+ __DEBUG_MUTEX_INITIALIZER(lockname) }
+
+#define DEFINE_MUTEX(mutexname) \
+ struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
+
+extern void fastcall __mutex_init(struct mutex *lock, const char *name);
+
+/***
+ * mutex_is_locked - is the mutex locked
+ * @lock: the mutex to be queried
+ *
+ * Returns 1 if the mutex is locked, 0 if unlocked.
+ */
+static inline int fastcall mutex_is_locked(struct mutex *lock)
+{
+ return atomic_read(&lock->count) != 1;
+}
+
+/*
+ * See kernel/mutex.c for detailed documentation of these APIs.
+ * Also see Documentation/mutex-design.txt.
+ */
+extern void fastcall mutex_lock(struct mutex *lock);
+extern int fastcall mutex_lock_interruptible(struct mutex *lock);
+/*
+ * NOTE: mutex_trylock() follows the spin_trylock() convention,
+ * not the down_trylock() convention!
+ */
+extern int fastcall mutex_trylock(struct mutex *lock);
+extern void fastcall mutex_unlock(struct mutex *lock);
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 4f5a1453093a..de580b4d54a4 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -7,7 +7,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
sysctl.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o intermodule.o extable.o params.o posix-timers.o \
- kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o
+ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o
obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
diff --git a/kernel/mutex.c b/kernel/mutex.c
new file mode 100644
index 000000000000..7eb960661441
--- /dev/null
+++ b/kernel/mutex.c
@@ -0,0 +1,325 @@
+/*
+ * kernel/mutex.c
+ *
+ * Mutexes: blocking mutual exclusion locks
+ *
+ * Started by Ingo Molnar:
+ *
+ * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and
+ * David Howells for suggestions and improvements.
+ *
+ * Also see Documentation/mutex-design.txt.
+ */
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+
+/*
+ * In the DEBUG case we are using the "NULL fastpath" for mutexes,
+ * which forces all calls into the slowpath:
+ */
+#ifdef CONFIG_DEBUG_MUTEXES
+# include "mutex-debug.h"
+# include <asm-generic/mutex-null.h>
+#else
+# include "mutex.h"
+# include <asm/mutex.h>
+#endif
+
+/***
+ * mutex_init - initialize the mutex
+ * @lock: the mutex to be initialized
+ *
+ * Initialize the mutex to unlocked state.
+ *
+ * It is not allowed to initialize an already locked mutex.
+ */
+void fastcall __mutex_init(struct mutex *lock, const char *name)
+{
+ atomic_set(&lock->count, 1);
+ spin_lock_init(&lock->wait_lock);
+ INIT_LIST_HEAD(&lock->wait_list);
+
+ debug_mutex_init(lock, name);
+}
+
+EXPORT_SYMBOL(__mutex_init);
+
+/*
+ * We split the mutex lock/unlock logic into separate fastpath and
+ * slowpath functions, to reduce the register pressure on the fastpath.
+ * We also put the fastpath first in the kernel image, to make sure the
+ * branch is predicted by the CPU as default-untaken.
+ */
+static void fastcall noinline __sched
+__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__);
+
+/***
+ * mutex_lock - acquire the mutex
+ * @lock: the mutex to be acquired
+ *
+ * Lock the mutex exclusively for this task. If the mutex is not
+ * available right now, it will sleep until it can get it.
+ *
+ * The mutex must later on be released by the same task that
+ * acquired it. Recursive locking is not allowed. The task
+ * may not exit without first unlocking the mutex. Also, kernel
+ * memory where the mutex resides mutex must not be freed with
+ * the mutex still locked. The mutex must first be initialized
+ * (or statically defined) before it can be locked. memset()-ing
+ * the mutex to 0 is not allowed.
+ *
+ * ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging
+ * checks that will enforce the restrictions and will also do
+ * deadlock debugging. )
+ *
+ * This function is similar to (but not equivalent to) down().
+ */
+void fastcall __sched mutex_lock(struct mutex *lock)
+{
+ /*
+ * The locking fastpath is the 1->0 transition from
+ * 'unlocked' into 'locked' state.
+ *
+ * NOTE: if asm/mutex.h is included, then some architectures
+ * rely on mutex_lock() having _no other code_ here but this
+ * fastpath. That allows the assembly fastpath to do
+ * tail-merging optimizations. (If you want to put testcode
+ * here, do it under #ifndef CONFIG_MUTEX_DEBUG.)
+ */
+ __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
+}
+
+EXPORT_SYMBOL(mutex_lock);
+
+static void fastcall noinline __sched
+__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__);
+
+/***
+ * mutex_unlock - release the mutex
+ * @lock: the mutex to be released
+ *
+ * Unlock a mutex that has been locked by this task previously.
+ *
+ * This function must not be used in interrupt context. Unlocking
+ * of a not locked mutex is not allowed.
+ *
+ * This function is similar to (but not equivalent to) up().
+ */
+void fastcall __sched mutex_unlock(struct mutex *lock)
+{
+ /*
+ * The unlocking fastpath is the 0->1 transition from 'locked'
+ * into 'unlocked' state:
+ *
+ * NOTE: no other code must be here - see mutex_lock() .
+ */
+ __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
+}
+
+EXPORT_SYMBOL(mutex_unlock);
+
+/*
+ * Lock a mutex (possibly interruptible), slowpath:
+ */
+static inline int __sched
+__mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
+{
+ struct task_struct *task = current;
+ struct mutex_waiter waiter;
+ unsigned int old_val;
+
+ debug_mutex_init_waiter(&waiter);
+
+ spin_lock_mutex(&lock->wait_lock);
+
+ debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip);
+
+ /* add waiting tasks to the end of the waitqueue (FIFO): */
+ list_add_tail(&waiter.list, &lock->wait_list);
+ waiter.task = task;
+
+ for (;;) {
+ /*
+ * Lets try to take the lock again - this is needed even if
+ * we get here for the first time (shortly after failing to
+ * acquire the lock), to make sure that we get a wakeup once
+ * it's unlocked. Later on, if we sleep, this is the
+ * operation that gives us the lock. We xchg it to -1, so
+ * that when we release the lock, we properly wake up the
+ * other waiters:
+ */
+ old_val = atomic_xchg(&lock->count, -1);
+ if (old_val == 1)
+ break;
+
+ /*
+ * got a signal? (This code gets eliminated in the
+ * TASK_UNINTERRUPTIBLE case.)
+ */
+ if (unlikely(state == TASK_INTERRUPTIBLE &&
+ signal_pending(task))) {
+ mutex_remove_waiter(lock, &waiter, task->thread_info);
+ spin_unlock_mutex(&lock->wait_lock);
+
+ debug_mutex_free_waiter(&waiter);
+ return -EINTR;
+ }
+ __set_task_state(task, state);
+
+ /* didnt get the lock, go to sleep: */
+ spin_unlock_mutex(&lock->wait_lock);
+ schedule();
+ spin_lock_mutex(&lock->wait_lock);
+ }
+
+ /* got the lock - rejoice! */
+ mutex_remove_waiter(lock, &waiter, task->thread_info);
+ debug_mutex_set_owner(lock, task->thread_info __IP__);
+
+ /* set it to 0 if there are no waiters left: */
+ if (likely(list_empty(&lock->wait_list)))
+ atomic_set(&lock->count, 0);
+
+ spin_unlock_mutex(&lock->wait_lock);
+
+ debug_mutex_free_waiter(&waiter);
+
+ DEBUG_WARN_ON(list_empty(&lock->held_list));
+ DEBUG_WARN_ON(lock->owner != task->thread_info);
+
+ return 0;
+}
+
+static void fastcall noinline __sched
+__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__)
+{
+ struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE __IP__);
+}
+
+/*
+ * Release the lock, slowpath:
+ */
+static fastcall noinline void
+__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
+{
+ struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+ DEBUG_WARN_ON(lock->owner != current_thread_info());
+
+ spin_lock_mutex(&lock->wait_lock);
+
+ /*
+ * some architectures leave the lock unlocked in the fastpath failure
+ * case, others need to leave it locked. In the later case we have to
+ * unlock it here
+ */
+ if (__mutex_slowpath_needs_to_unlock())
+ atomic_set(&lock->count, 1);
+
+ debug_mutex_unlock(lock);
+
+ if (!list_empty(&lock->wait_list)) {
+ /* get the first entry from the wait-list: */
+ struct mutex_waiter *waiter =
+ list_entry(lock->wait_list.next,
+ struct mutex_waiter, list);
+
+ debug_mutex_wake_waiter(lock, waiter);
+
+ wake_up_process(waiter->task);
+ }
+
+ debug_mutex_clear_owner(lock);
+
+ spin_unlock_mutex(&lock->wait_lock);
+}
+
+/*
+ * Here come the less common (and hence less performance-critical) APIs:
+ * mutex_lock_interruptible() and mutex_trylock().
+ */
+static int fastcall noinline __sched
+__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__);
+
+/***
+ * mutex_lock_interruptible - acquire the mutex, interruptable
+ * @lock: the mutex to be acquired
+ *
+ * Lock the mutex like mutex_lock(), and return 0 if the mutex has
+ * been acquired or sleep until the mutex becomes available. If a
+ * signal arrives while waiting for the lock then this function
+ * returns -EINTR.
+ *
+ * This function is similar to (but not equivalent to) down_interruptible().
+ */
+int fastcall __sched mutex_lock_interruptible(struct mutex *lock)
+{
+ /* NOTE: no other code must be here - see mutex_lock() */
+ return __mutex_fastpath_lock_retval
+ (&lock->count, __mutex_lock_interruptible_slowpath);
+}
+
+EXPORT_SYMBOL(mutex_lock_interruptible);
+
+static int fastcall noinline __sched
+__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__)
+{
+ struct mutex *lock = container_of(lock_count, struct mutex, count);
+
+ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE __IP__);
+}
+
+/*
+ * Spinlock based trylock, we take the spinlock and check whether we
+ * can get the lock:
+ */
+static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
+{
+ struct mutex *lock = container_of(lock_count, struct mutex, count);
+ int prev;
+
+ spin_lock_mutex(&lock->wait_lock);
+
+ prev = atomic_xchg(&lock->count, -1);
+ if (likely(prev == 1))
+ debug_mutex_set_owner(lock, current_thread_info() __RET_IP__);
+ /* Set it back to 0 if there are no waiters: */
+ if (likely(list_empty(&lock->wait_list)))
+ atomic_set(&lock->count, 0);
+
+ spin_unlock_mutex(&lock->wait_lock);
+
+ return prev == 1;
+}
+
+/***
+ * mutex_trylock - try acquire the mutex, without waiting
+ * @lock: the mutex to be acquired
+ *
+ * Try to acquire the mutex atomically. Returns 1 if the mutex
+ * has been acquired successfully, and 0 on contention.
+ *
+ * NOTE: this function follows the spin_trylock() convention, so
+ * it is negated to the down_trylock() return values! Be careful
+ * about this when converting semaphore users to mutexes.
+ *
+ * This function must not be used in interrupt context. The
+ * mutex must be released by the same task that acquired it.
+ */
+int fastcall mutex_trylock(struct mutex *lock)
+{
+ return __mutex_fastpath_trylock(&lock->count,
+ __mutex_trylock_slowpath);
+}
+
+EXPORT_SYMBOL(mutex_trylock);
+
+
+
diff --git a/kernel/mutex.h b/kernel/mutex.h
new file mode 100644
index 000000000000..00fe84e7b672
--- /dev/null
+++ b/kernel/mutex.h
@@ -0,0 +1,35 @@
+/*
+ * Mutexes: blocking mutual exclusion locks
+ *
+ * started by Ingo Molnar:
+ *
+ * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * This file contains mutex debugging related internal prototypes, for the
+ * !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
+ */
+
+#define spin_lock_mutex(lock) spin_lock(lock)
+#define spin_unlock_mutex(lock) spin_unlock(lock)
+#define mutex_remove_waiter(lock, waiter, ti) \
+ __list_del((waiter)->list.prev, (waiter)->list.next)
+
+#define DEBUG_WARN_ON(c) do { } while (0)
+#define debug_mutex_set_owner(lock, new_owner) do { } while (0)
+#define debug_mutex_clear_owner(lock) do { } while (0)
+#define debug_mutex_init_waiter(waiter) do { } while (0)
+#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
+#define debug_mutex_free_waiter(waiter) do { } while (0)
+#define debug_mutex_add_waiter(lock, waiter, ti, ip) do { } while (0)
+#define debug_mutex_unlock(lock) do { } while (0)
+#define debug_mutex_init(lock, name) do { } while (0)
+
+/*
+ * Return-address parameters/declarations. They are very useful for
+ * debugging, but add overhead in the !DEBUG case - so we go the
+ * trouble of using this not too elegant but zero-cost solution:
+ */
+#define __IP_DECL__
+#define __IP__
+#define __RET_IP__
+
commit b8aa0361e434f8d9cbe9bb34525af1e8721396d8
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jan 9 15:59:18 2006 -0800
[PATCH] mutex subsystem, add include/asm-x86_64/mutex.h
add the x86_64 version of mutex.h, optimized in assembly.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
diff --git a/include/asm-x86_64/mutex.h b/include/asm-x86_64/mutex.h
new file mode 100644
index 000000000000..818abfd262d1
--- /dev/null
+++ b/include/asm-x86_64/mutex.h
@@ -0,0 +1,113 @@
+/*
+ * Assembly implementation of the mutex fastpath, based on atomic
+ * decrement/increment.
+ *
+ * started by Ingo Molnar:
+ *
+ * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#ifndef _ASM_MUTEX_H
+#define _ASM_MUTEX_H
+
+/**
+ * __mutex_fastpath_lock - decrement and call function if negative
+ * @v: pointer of type atomic_t
+ * @fail_fn: function to call if the result is negative
+ *
+ * Atomically decrements @v and calls <fail_fn> if the result is negative.
+ */
+#define __mutex_fastpath_lock(v, fail_fn) \
+do { \
+ unsigned long dummy; \
+ \
+ typecheck(atomic_t *, v); \
+ typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \
+ \
+ __asm__ __volatile__( \
+ LOCK " decl (%%rdi) \n" \
+ " js 2f \n" \
+ "1: \n" \
+ \
+ LOCK_SECTION_START("") \
+ "2: call "#fail_fn" \n" \
+ " jmp 1b \n" \
+ LOCK_SECTION_END \
+ \
+ :"=D" (dummy) \
+ : "D" (v) \
+ : "rax", "rsi", "rdx", "rcx", \
+ "r8", "r9", "r10", "r11", "memory"); \
+} while (0)
+
+/**
+ * __mutex_fastpath_lock_retval - try to take the lock by moving the count
+ * from 1 to a 0 value
+ * @count: pointer of type atomic_t
+ * @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if
+ * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
+ * or anything the slow path function returns
+ */
+static inline int
+__mutex_fastpath_lock_retval(atomic_t *count,
+ int fastcall (*fail_fn)(atomic_t *))
+{
+ if (unlikely(atomic_dec_return(count) < 0))
+ return fail_fn(count);
+ else
+ return 0;
+}
+
+/**
+ * __mutex_fastpath_unlock - increment and call function if nonpositive
+ * @v: pointer of type atomic_t
+ * @fail_fn: function to call if the result is nonpositive
+ *
+ * Atomically increments @v and calls <fail_fn> if the result is nonpositive.
+ */
+#define __mutex_fastpath_unlock(v, fail_fn) \
+do { \
+ unsigned long dummy; \
+ \
+ typecheck(atomic_t *, v); \
+ typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \
+ \
+ __asm__ __volatile__( \
+ LOCK " incl (%%rdi) \n" \
+ " jle 2f \n" \
+ "1: \n" \
+ \
+ LOCK_SECTION_START("") \
+ "2: call "#fail_fn" \n" \
+ " jmp 1b \n" \
+ LOCK_SECTION_END \
+ \
+ :"=D" (dummy) \
+ : "D" (v) \
+ : "rax", "rsi", "rdx", "rcx", \
+ "r8", "r9", "r10", "r11", "memory"); \
+} while (0)
+
+#define __mutex_slowpath_needs_to_unlock() 1
+
+/**
+ * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
+ *
+ * @count: pointer of type atomic_t
+ * @fail_fn: fallback function
+ *
+ * Change the count from 1 to 0 and return 1 (success), or return 0 (failure)
+ * if it wasn't 1 originally. [the fallback function is never used on
+ * x86_64, because all x86_64 CPUs have a CMPXCHG instruction.]
+ */
+static inline int
+__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+ if (likely(atomic_cmpxchg(count, 1, 0)) == 1)
+ return 1;
+ else
+ return 0;
+}
+
+#endif
commit 620a6fd185c084aa617c411f711533f01ea673c9
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jan 9 15:59:17 2006 -0800
[PATCH] mutex subsystem, add asm-generic/mutex-[dec|xchg|null].h implementations
Add three (generic) mutex fastpath implementations.
The mutex-xchg.h implementation is atomic_xchg() based, and should
work fine on every architecture.
The mutex-dec.h implementation is atomic_dec_return() based - this
one too should work on every architecture, but might not perform the
most optimally on architectures that have no atomic-dec/inc instructions.
The mutex-null.h implementation forces all calls into the slowpath. This
is used for mutex debugging, but it can also be used on platforms that do
not want (or need) a fastpath at all.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
new file mode 100644
index 000000000000..74b18cda169f
--- /dev/null
+++ b/include/asm-generic/mutex-dec.h
@@ -0,0 +1,110 @@
+/*
+ * asm-generic/mutex-dec.h
+ *
+ * Generic implementation of the mutex fastpath, based on atomic
+ * decrement/increment.
+ */
+#ifndef _ASM_GENERIC_MUTEX_DEC_H
+#define _ASM_GENERIC_MUTEX_DEC_H
+
+/**
+ * __mutex_fastpath_lock - try to take the lock by moving the count
+ * from 1 to a 0 value
+ * @count: pointer of type atomic_t
+ * @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if
+ * it wasn't 1 originally. This function MUST leave the value lower than
+ * 1 even when the "1" assertion wasn't true.
+ */
+#define __mutex_fastpath_lock(count, fail_fn) \
+do { \
+ if (unlikely(atomic_dec_return(count) < 0)) \
+ fail_fn(count); \
+ else \
+ smp_mb(); \
+} while (0)
+
+/**
+ * __mutex_fastpath_lock_retval - try to take the lock by moving the count
+ * from 1 to a 0 value
+ * @count: pointer of type atomic_t
+ * @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if
+ * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
+ * or anything the slow path function returns.
+ */
+static inline int
+__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+ if (unlikely(atomic_dec_return(count) < 0))
+ return fail_fn(count);
+ else {
+ smp_mb();
+ return 0;
+ }
+}
+
+/**
+ * __mutex_fastpath_unlock - try to promote the count from 0 to 1
+ * @count: pointer of type atomic_t
+ * @fail_fn: function to call if the original value was not 0
+ *
+ * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
+ * In the failure case, this function is allowed to either set the value to
+ * 1, or to set it to a value lower than 1.
+ *
+ * If the implementation sets it to a value of lower than 1, then the
+ * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
+ * to return 0 otherwise.
+ */
+#define __mutex_fastpath_unlock(count, fail_fn) \
+do { \
+ smp_mb(); \
+ if (unlikely(atomic_inc_return(count) <= 0)) \
+ fail_fn(count); \
+} while (0)
+
+#define __mutex_slowpath_needs_to_unlock() 1
+
+/**
+ * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
+ *
+ * @count: pointer of type atomic_t
+ * @fail_fn: fallback function
+ *
+ * Change the count from 1 to a value lower than 1, and return 0 (failure)
+ * if it wasn't 1 originally, or return 1 (success) otherwise. This function
+ * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
+ * Additionally, if the value was < 0 originally, this function must not leave
+ * it to 0 on failure.
+ *
+ * If the architecture has no effective trylock variant, it should call the
+ * <fail_fn> spinlock-based trylock variant unconditionally.
+ */
+static inline int
+__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+ /*
+ * We have two variants here. The cmpxchg based one is the best one
+ * because it never induce a false contention state. It is included
+ * here because architectures using the inc/dec algorithms over the
+ * xchg ones are much more likely to support cmpxchg natively.
+ *
+ * If not we fall back to the spinlock based variant - that is
+ * just as efficient (and simpler) as a 'destructive' probing of
+ * the mutex state would be.
+ */
+#ifdef __HAVE_ARCH_CMPXCHG
+ if (likely(atomic_cmpxchg(count, 1, 0)) == 1) {
+ smp_mb();
+ return 1;
+ }
+ return 0;
+#else
+ return fail_fn(count);
+#endif
+}
+
+#endif
diff --git a/include/asm-generic/mutex-null.h b/include/asm-generic/mutex-null.h
new file mode 100644
index 000000000000..5cf8b7ce0c45
--- /dev/null
+++ b/include/asm-generic/mutex-null.h
@@ -0,0 +1,24 @@
+/*
+ * asm-generic/mutex-null.h
+ *
+ * Generic implementation of the mutex fastpath, based on NOP :-)
+ *
+ * This is used by the mutex-debugging infrastructure, but it can also
+ * be used by architectures that (for whatever reason) want to use the
+ * spinlock based slowpath.
+ */
+#ifndef _ASM_GENERIC_MUTEX_NULL_H
+#define _ASM_GENERIC_MUTEX_NULL_H
+
+/* extra parameter only needed for mutex debugging: */
+#ifndef __IP__
+# define __IP__
+#endif
+
+#define __mutex_fastpath_lock(count, fail_fn) fail_fn(count __RET_IP__)
+#define __mutex_fastpath_lock_retval(count, fail_fn) fail_fn(count __RET_IP__)
+#define __mutex_fastpath_unlock(count, fail_fn) fail_fn(count __RET_IP__)
+#define __mutex_fastpath_trylock(count, fail_fn) fail_fn(count)
+#define __mutex_slowpath_needs_to_unlock() 1
+
+#endif
diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
new file mode 100644
index 000000000000..1d24f47e6c48
--- /dev/null
+++ b/include/asm-generic/mutex-xchg.h
@@ -0,0 +1,117 @@
+/*
+ * asm-generic/mutex-xchg.h
+ *
+ * Generic implementation of the mutex fastpath, based on xchg().
+ *
+ * NOTE: An xchg based implementation is less optimal than an atomic
+ * decrement/increment based implementation. If your architecture
+ * has a reasonable atomic dec/inc then you should probably use
+ * asm-generic/mutex-dec.h instead, or you could open-code an
+ * optimized version in asm/mutex.h.
+ */
+#ifndef _ASM_GENERIC_MUTEX_XCHG_H
+#define _ASM_GENERIC_MUTEX_XCHG_H
+
+/**
+ * __mutex_fastpath_lock - try to take the lock by moving the count
+ * from 1 to a 0 value
+ * @count: pointer of type atomic_t
+ * @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if it
+ * wasn't 1 originally. This function MUST leave the value lower than 1
+ * even when the "1" assertion wasn't true.
+ */
+#define __mutex_fastpath_lock(count, fail_fn) \
+do { \
+ if (unlikely(atomic_xchg(count, 0) != 1)) \
+ fail_fn(count); \
+ else \
+ smp_mb(); \
+} while (0)
+
+
+/**
+ * __mutex_fastpath_lock_retval - try to take the lock by moving the count
+ * from 1 to a 0 value
+ * @count: pointer of type atomic_t
+ * @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if it
+ * wasn't 1 originally. This function returns 0 if the fastpath succeeds,
+ * or anything the slow path function returns
+ */
+static inline int
+__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+ if (unlikely(atomic_xchg(count, 0) != 1))
+ return fail_fn(count);
+ else {
+ smp_mb();
+ return 0;
+ }
+}
+
+/**
+ * __mutex_fastpath_unlock - try to promote the mutex from 0 to 1
+ * @count: pointer of type atomic_t
+ * @fail_fn: function to call if the original value was not 0
+ *
+ * try to promote the mutex from 0 to 1. if it wasn't 0, call <function>
+ * In the failure case, this function is allowed to either set the value to
+ * 1, or to set it to a value lower than one.
+ * If the implementation sets it to a value of lower than one, the
+ * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
+ * to return 0 otherwise.
+ */
+#define __mutex_fastpath_unlock(count, fail_fn) \
+do { \
+ smp_mb(); \
+ if (unlikely(atomic_xchg(count, 1) != 0)) \
+ fail_fn(count); \
+} while (0)
+
+#define __mutex_slowpath_needs_to_unlock() 0
+
+/**
+ * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
+ *
+ * @count: pointer of type atomic_t
+ * @fail_fn: spinlock based trylock implementation
+ *
+ * Change the count from 1 to a value lower than 1, and return 0 (failure)
+ * if it wasn't 1 originally, or return 1 (success) otherwise. This function
+ * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
+ * Additionally, if the value was < 0 originally, this function must not leave
+ * it to 0 on failure.
+ *
+ * If the architecture has no effective trylock variant, it should call the
+ * <fail_fn> spinlock-based trylock variant unconditionally.
+ */
+static inline int
+__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+ int prev = atomic_xchg(count, 0);
+
+ if (unlikely(prev < 0)) {
+ /*
+ * The lock was marked contended so we must restore that
+ * state. If while doing so we get back a prev value of 1
+ * then we just own it.
+ *
+ * [ In the rare case of the mutex going to 1, to 0, to -1
+ * and then back to 0 in this few-instructions window,
+ * this has the potential to trigger the slowpath for the
+ * owner's unlock path needlessly, but that's not a problem
+ * in practice. ]
+ */
+ prev = atomic_xchg(count, prev);
+ if (prev < 0)
+ prev = 0;
+ }
+ smp_mb();
+
+ return prev;
+}
+
+#endif
commit ffbf670f5cd50501a34a5187981460da2216071e
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jan 9 15:59:17 2006 -0800
[PATCH] mutex subsystem, add atomic_xchg() to all arches
add atomic_xchg() to all the architectures. Needed by the new mutex code.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
diff --git a/include/asm-alpha/atomic.h b/include/asm-alpha/atomic.h
index cb03bbe92cdf..fc77f7413083 100644
--- a/include/asm-alpha/atomic.h
+++ b/include/asm-alpha/atomic.h
@@ -176,6 +176,7 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v)
}
#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
#define atomic_add_unless(v, a, u) \
({ \
diff --git a/include/asm-arm/atomic.h b/include/asm-arm/atomic.h
index f72b63309bc5..3d7283d84405 100644
--- a/include/asm-arm/atomic.h
+++ b/include/asm-arm/atomic.h
@@ -175,6 +175,8 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
#endif /* __LINUX_ARM_ARCH__ */
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
static inline int atomic_add_unless(atomic_t *v, int a, int u)
{
int c, old;
diff --git a/include/asm-arm26/atomic.h b/include/asm-arm26/atomic.h
index 3074b0e76343..1552c8653990 100644
--- a/include/asm-arm26/atomic.h
+++ b/include/asm-arm26/atomic.h
@@ -76,6 +76,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
return ret;
}
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
static inline int atomic_add_unless(atomic_t *v, int a, int u)
{
int ret;
diff --git a/include/asm-cris/atomic.h b/include/asm-cris/atomic.h
index 2df2c7aa19b7..0b51a87e5532 100644
--- a/include/asm-cris/atomic.h
+++ b/include/asm-cris/atomic.h
@@ -136,6 +136,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
return ret;
}
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
static inline int atomic_add_unless(atomic_t *v, int a, int u)
{
int ret;
diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h
index 9c9e9499cfd8..a59f684b4f33 100644
--- a/include/asm-frv/atomic.h
+++ b/include/asm-frv/atomic.h
@@ -328,6 +328,7 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
#endif
#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
#define atomic_add_unless(v, a, u) \
({ \
diff --git a/include/asm-h8300/atomic.h b/include/asm-h8300/atomic.h
index d891541e89c3..21f54428c86b 100644
--- a/include/asm-h8300/atomic.h
+++ b/include/asm-h8300/atomic.h
@@ -95,6 +95,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
return ret;
}
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
static inline int atomic_add_unless(atomic_t *v, int a, int u)
{
int ret;
diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h
index 7a5472d77091..de649d3aa2d4 100644
--- a/include/asm-i386/atomic.h
+++ b/include/asm-i386/atomic.h
@@ -216,6 +216,7 @@ static __inline__ int atomic_sub_return(int i, atomic_t *v)
}
#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
/**
* atomic_add_unless - add unless the number is a given value
diff --git a/include/asm-ia64/atomic.h b/include/asm-ia64/atomic.h
index 15cf7984c48e..d3e0dfa99e1f 100644
--- a/include/asm-ia64/atomic.h
+++ b/include/asm-ia64/atomic.h
@@ -89,6 +89,7 @@ ia64_atomic64_sub (__s64 i, atomic64_t *v)
}
#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
#define atomic_add_unless(v, a, u) \
({ \
diff --git a/include/asm-m32r/atomic.h b/include/asm-m32r/atomic.h
index 70761278b6cb..3122fe106f05 100644
--- a/include/asm-m32r/atomic.h
+++ b/include/asm-m32r/atomic.h
@@ -243,6 +243,7 @@ static __inline__ int atomic_dec_return(atomic_t *v)
#define atomic_add_negative(i,v) (atomic_add_return((i), (v)) < 0)
#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
/**
* atomic_add_unless - add unless the number is a given value
diff --git a/include/asm-m68k/atomic.h b/include/asm-m68k/atomic.h
index b8a4e75d679d..a4a84d5c65d5 100644
--- a/include/asm-m68k/atomic.h
+++ b/include/asm-m68k/atomic.h
@@ -140,6 +140,7 @@ static inline void atomic_set_mask(unsigned long mask, unsigned long *v)
}
#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
#define atomic_add_unless(v, a, u) \
({ \
diff --git a/include/asm-m68knommu/atomic.h b/include/asm-m68knommu/atomic.h
index 1702dbe9318c..6c4e4b63e454 100644
--- a/include/asm-m68knommu/atomic.h
+++ b/include/asm-m68knommu/atomic.h
@@ -129,6 +129,7 @@ static inline int atomic_sub_return(int i, atomic_t * v)
}
#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
#define atomic_add_unless(v, a, u) \
({ \
diff --git a/include/asm-mips/atomic.h b/include/asm-mips/atomic.h
index 92256e43a938..94a95872d727 100644
--- a/include/asm-mips/atomic.h
+++ b/include/asm-mips/atomic.h
@@ -289,6 +289,7 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v)
}
#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
/**
* atomic_add_unless - add unless the number is a given value
diff --git a/include/asm-parisc/atomic.h b/include/asm-parisc/atomic.h
index 64ebd086c40d..2ca56d34aaad 100644
--- a/include/asm-parisc/atomic.h
+++ b/include/asm-parisc/atomic.h
@@ -165,6 +165,7 @@ static __inline__ int atomic_read(const atomic_t *v)
/* exported interface */
#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
/**
* atomic_add_unless - add unless the number is a given value
diff --git a/include/asm-powerpc/atomic.h b/include/asm-powerpc/atomic.h
index ae395a0632a6..248f9aec959c 100644
--- a/include/asm-powerpc/atomic.h
+++ b/include/asm-powerpc/atomic.h
@@ -165,6 +165,7 @@ static __inline__ int atomic_dec_return(atomic_t *v)
}
#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
/**
* atomic_add_unless - add unless the number is a given value
diff --git a/include/asm-s390/atomic.h b/include/asm-s390/atomic.h
index d82aedf616fe..be6fefe223d6 100644
--- a/include/asm-s390/atomic.h
+++ b/include/asm-s390/atomic.h
@@ -75,6 +75,8 @@ static __inline__ void atomic_set_mask(unsigned long mask, atomic_t * v)
__CS_LOOP(v, mask, "or");
}
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
static __inline__ int atomic_cmpxchg(atomic_t *v, int old, int new)
{
__asm__ __volatile__(" cs %0,%3,0(%2)\n"
diff --git a/include/asm-sh/atomic.h b/include/asm-sh/atomic.h
index 618d8e0de348..fb627de217f2 100644
--- a/include/asm-sh/atomic.h
+++ b/include/asm-sh/atomic.h
@@ -101,6 +101,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
return ret;
}
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
static inline int atomic_add_unless(atomic_t *v, int a, int u)
{
int ret;
diff --git a/include/asm-sh64/atomic.h b/include/asm-sh64/atomic.h
index f3ce5c0df13a..28f2ea9b567b 100644
--- a/include/asm-sh64/atomic.h
+++ b/include/asm-sh64/atomic.h
@@ -113,6 +113,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
return ret;
}
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
static inline int atomic_add_unless(atomic_t *v, int a, int u)
{
int ret;
diff --git a/include/asm-sparc/atomic.h b/include/asm-sparc/atomic.h
index accb4967e9d2..e1033170bd3a 100644
--- a/include/asm-sparc/atomic.h
+++ b/include/asm-sparc/atomic.h
@@ -20,6 +20,7 @@ typedef struct { volatile int counter; } atomic_t;
extern int __atomic_add_return(int, atomic_t *);
extern int atomic_cmpxchg(atomic_t *, int, int);
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
extern int atomic_add_unless(atomic_t *, int, int);
extern void atomic_set(atomic_t *, int);
diff --git a/include/asm-sparc64/atomic.h b/include/asm-sparc64/atomic.h
index 11f5aa5d108c..25256bdc8aae 100644
--- a/include/asm-sparc64/atomic.h
+++ b/include/asm-sparc64/atomic.h
@@ -72,6 +72,7 @@ extern int atomic64_sub_ret(int, atomic64_t *);
#define atomic64_add_negative(i, v) (atomic64_add_ret(i, v) < 0)
#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
#define atomic_add_unless(v, a, u) \
({ \
diff --git a/include/asm-v850/atomic.h b/include/asm-v850/atomic.h
index f5b9ab6f4e70..166df00457ea 100644
--- a/include/asm-v850/atomic.h
+++ b/include/asm-v850/atomic.h
@@ -104,6 +104,8 @@ static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
return ret;
}
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
static inline int atomic_add_unless(atomic_t *v, int a, int u)
{
int ret;
diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h
index 72eb071488c7..6b540237a2f8 100644
--- a/include/asm-x86_64/atomic.h
+++ b/include/asm-x86_64/atomic.h
@@ -389,6 +389,7 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t *v)
#define atomic64_dec_return(v) (atomic64_sub_return(1,v))
#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
/**
* atomic_add_unless - add unless the number is a given value
diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h
index e2ce06b101ad..fe105a123924 100644
--- a/include/asm-xtensa/atomic.h
+++ b/include/asm-xtensa/atomic.h
@@ -224,6 +224,7 @@ static inline int atomic_sub_return(int i, atomic_t * v)
#define atomic_add_negative(i,v) (atomic_add_return((i),(v)) < 0)
#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
/**
* atomic_add_unless - add unless the number is a given value