Patches contributed by Eötvös Lorand University
commit 5a06a363ef48444186f18095ae1b932dddbbfa89
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Jul 30 03:04:11 2006 -0700
[PATCH] ipc/msg.c: clean up coding style
Clean up ipc/msg.c to conform to Documentation/CodingStyle. (before it was
an inconsistent hodepodge of various coding styles)
Verified that the before/after .o's are identical.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/ipc/msg.c b/ipc/msg.c
index cd92d342953e..2b4fccf8ea55 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -1,6 +1,6 @@
/*
* linux/ipc/msg.c
- * Copyright (C) 1992 Krishna Balasubramanian
+ * Copyright (C) 1992 Krishna Balasubramanian
*
* Removed all the remaining kerneld mess
* Catch the -EFAULT stuff properly
@@ -41,22 +41,24 @@ int msg_ctlmax = MSGMAX;
int msg_ctlmnb = MSGMNB;
int msg_ctlmni = MSGMNI;
-/* one msg_receiver structure for each sleeping receiver */
+/*
+ * one msg_receiver structure for each sleeping receiver:
+ */
struct msg_receiver {
- struct list_head r_list;
- struct task_struct* r_tsk;
+ struct list_head r_list;
+ struct task_struct *r_tsk;
- int r_mode;
- long r_msgtype;
- long r_maxsize;
+ int r_mode;
+ long r_msgtype;
+ long r_maxsize;
- struct msg_msg* volatile r_msg;
+ volatile struct msg_msg *r_msg;
};
/* one msg_sender for each sleeping sender */
struct msg_sender {
- struct list_head list;
- struct task_struct* tsk;
+ struct list_head list;
+ struct task_struct *tsk;
};
#define SEARCH_ANY 1
@@ -64,45 +66,42 @@ struct msg_sender {
#define SEARCH_NOTEQUAL 3
#define SEARCH_LESSEQUAL 4
-static atomic_t msg_bytes = ATOMIC_INIT(0);
-static atomic_t msg_hdrs = ATOMIC_INIT(0);
+static atomic_t msg_bytes = ATOMIC_INIT(0);
+static atomic_t msg_hdrs = ATOMIC_INIT(0);
static struct ipc_ids msg_ids;
-#define msg_lock(id) ((struct msg_queue*)ipc_lock(&msg_ids,id))
-#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm)
-#define msg_rmid(id) ((struct msg_queue*)ipc_rmid(&msg_ids,id))
-#define msg_checkid(msq, msgid) \
- ipc_checkid(&msg_ids,&msq->q_perm,msgid)
-#define msg_buildid(id, seq) \
- ipc_buildid(&msg_ids, id, seq)
+#define msg_lock(id) ((struct msg_queue *)ipc_lock(&msg_ids, id))
+#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm)
+#define msg_rmid(id) ((struct msg_queue *)ipc_rmid(&msg_ids, id))
+#define msg_checkid(msq, msgid) ipc_checkid(&msg_ids, &msq->q_perm, msgid)
+#define msg_buildid(id, seq) ipc_buildid(&msg_ids, id, seq)
-static void freeque (struct msg_queue *msq, int id);
-static int newque (key_t key, int msgflg);
+static void freeque(struct msg_queue *msq, int id);
+static int newque(key_t key, int msgflg);
#ifdef CONFIG_PROC_FS
static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
#endif
-void __init msg_init (void)
+void __init msg_init(void)
{
- ipc_init_ids(&msg_ids,msg_ctlmni);
+ ipc_init_ids(&msg_ids, msg_ctlmni);
ipc_init_proc_interface("sysvipc/msg",
" key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
&msg_ids,
sysvipc_msg_proc_show);
}
-static int newque (key_t key, int msgflg)
+static int newque(key_t key, int msgflg)
{
- int id;
- int retval;
struct msg_queue *msq;
+ int id, retval;
- msq = ipc_rcu_alloc(sizeof(*msq));
- if (!msq)
+ msq = ipc_rcu_alloc(sizeof(*msq));
+ if (!msq)
return -ENOMEM;
- msq->q_perm.mode = (msgflg & S_IRWXUGO);
+ msq->q_perm.mode = msgflg & S_IRWXUGO;
msq->q_perm.key = key;
msq->q_perm.security = NULL;
@@ -113,13 +112,13 @@ static int newque (key_t key, int msgflg)
}
id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni);
- if(id == -1) {
+ if (id == -1) {
security_msg_queue_free(msq);
ipc_rcu_putref(msq);
return -ENOSPC;
}
- msq->q_id = msg_buildid(id,msq->q_perm.seq);
+ msq->q_id = msg_buildid(id, msq->q_perm.seq);
msq->q_stime = msq->q_rtime = 0;
msq->q_ctime = get_seconds();
msq->q_cbytes = msq->q_qnum = 0;
@@ -133,44 +132,44 @@ static int newque (key_t key, int msgflg)
return msq->q_id;
}
-static inline void ss_add(struct msg_queue* msq, struct msg_sender* mss)
+static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss)
{
- mss->tsk=current;
- current->state=TASK_INTERRUPTIBLE;
- list_add_tail(&mss->list,&msq->q_senders);
+ mss->tsk = current;
+ current->state = TASK_INTERRUPTIBLE;
+ list_add_tail(&mss->list, &msq->q_senders);
}
-static inline void ss_del(struct msg_sender* mss)
+static inline void ss_del(struct msg_sender *mss)
{
- if(mss->list.next != NULL)
+ if (mss->list.next != NULL)
list_del(&mss->list);
}
-static void ss_wakeup(struct list_head* h, int kill)
+static void ss_wakeup(struct list_head *h, int kill)
{
struct list_head *tmp;
tmp = h->next;
while (tmp != h) {
- struct msg_sender* mss;
-
- mss = list_entry(tmp,struct msg_sender,list);
+ struct msg_sender *mss;
+
+ mss = list_entry(tmp, struct msg_sender, list);
tmp = tmp->next;
- if(kill)
- mss->list.next=NULL;
+ if (kill)
+ mss->list.next = NULL;
wake_up_process(mss->tsk);
}
}
-static void expunge_all(struct msg_queue* msq, int res)
+static void expunge_all(struct msg_queue *msq, int res)
{
struct list_head *tmp;
tmp = msq->q_receivers.next;
while (tmp != &msq->q_receivers) {
- struct msg_receiver* msr;
-
- msr = list_entry(tmp,struct msg_receiver,r_list);
+ struct msg_receiver *msr;
+
+ msr = list_entry(tmp, struct msg_receiver, r_list);
tmp = tmp->next;
msr->r_msg = NULL;
wake_up_process(msr->r_tsk);
@@ -178,26 +177,28 @@ static void expunge_all(struct msg_queue* msq, int res)
msr->r_msg = ERR_PTR(res);
}
}
-/*
- * freeque() wakes up waiters on the sender and receiver waiting queue,
- * removes the message queue from message queue ID
+
+/*
+ * freeque() wakes up waiters on the sender and receiver waiting queue,
+ * removes the message queue from message queue ID
* array, and cleans up all the messages associated with this queue.
*
* msg_ids.mutex and the spinlock for this message queue is hold
* before freeque() is called. msg_ids.mutex remains locked on exit.
*/
-static void freeque (struct msg_queue *msq, int id)
+static void freeque(struct msg_queue *msq, int id)
{
struct list_head *tmp;
- expunge_all(msq,-EIDRM);
- ss_wakeup(&msq->q_senders,1);
+ expunge_all(msq, -EIDRM);
+ ss_wakeup(&msq->q_senders, 1);
msq = msg_rmid(id);
msg_unlock(msq);
-
+
tmp = msq->q_messages.next;
- while(tmp != &msq->q_messages) {
- struct msg_msg* msg = list_entry(tmp,struct msg_msg,m_list);
+ while (tmp != &msq->q_messages) {
+ struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list);
+
tmp = tmp->next;
atomic_dec(&msg_hdrs);
free_msg(msg);
@@ -207,10 +208,10 @@ static void freeque (struct msg_queue *msq, int id)
ipc_rcu_putref(msq);
}
-asmlinkage long sys_msgget (key_t key, int msgflg)
+asmlinkage long sys_msgget(key_t key, int msgflg)
{
- int id, ret = -EPERM;
struct msg_queue *msq;
+ int id, ret = -EPERM;
mutex_lock(&msg_ids.mutex);
if (key == IPC_PRIVATE)
@@ -224,31 +225,34 @@ asmlinkage long sys_msgget (key_t key, int msgflg)
ret = -EEXIST;
} else {
msq = msg_lock(id);
- BUG_ON(msq==NULL);
+ BUG_ON(msq == NULL);
if (ipcperms(&msq->q_perm, msgflg))
ret = -EACCES;
else {
int qid = msg_buildid(id, msq->q_perm.seq);
- ret = security_msg_queue_associate(msq, msgflg);
+
+ ret = security_msg_queue_associate(msq, msgflg);
if (!ret)
ret = qid;
}
msg_unlock(msq);
}
mutex_unlock(&msg_ids.mutex);
+
return ret;
}
-static inline unsigned long copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
+static inline unsigned long
+copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
{
switch(version) {
case IPC_64:
- return copy_to_user (buf, in, sizeof(*in));
+ return copy_to_user(buf, in, sizeof(*in));
case IPC_OLD:
- {
+ {
struct msqid_ds out;
- memset(&out,0,sizeof(out));
+ memset(&out, 0, sizeof(out));
ipc64_perm_to_ipc_perm(&in->msg_perm, &out.msg_perm);
@@ -256,18 +260,18 @@ static inline unsigned long copy_msqid_to_user(void __user *buf, struct msqid64_
out.msg_rtime = in->msg_rtime;
out.msg_ctime = in->msg_ctime;
- if(in->msg_cbytes > USHRT_MAX)
+ if (in->msg_cbytes > USHRT_MAX)
out.msg_cbytes = USHRT_MAX;
else
out.msg_cbytes = in->msg_cbytes;
out.msg_lcbytes = in->msg_cbytes;
- if(in->msg_qnum > USHRT_MAX)
+ if (in->msg_qnum > USHRT_MAX)
out.msg_qnum = USHRT_MAX;
else
out.msg_qnum = in->msg_qnum;
- if(in->msg_qbytes > USHRT_MAX)
+ if (in->msg_qbytes > USHRT_MAX)
out.msg_qbytes = USHRT_MAX;
else
out.msg_qbytes = in->msg_qbytes;
@@ -276,8 +280,8 @@ static inline unsigned long copy_msqid_to_user(void __user *buf, struct msqid64_
out.msg_lspid = in->msg_lspid;
out.msg_lrpid = in->msg_lrpid;
- return copy_to_user (buf, &out, sizeof(out));
- }
+ return copy_to_user(buf, &out, sizeof(out));
+ }
default:
return -EINVAL;
}
@@ -290,14 +294,15 @@ struct msq_setbuf {
mode_t mode;
};
-static inline unsigned long copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version)
+static inline unsigned long
+copy_msqid_from_user(struct msq_setbuf *out, void __user *buf, int version)
{
switch(version) {
case IPC_64:
- {
+ {
struct msqid64_ds tbuf;
- if (copy_from_user (&tbuf, buf, sizeof (tbuf)))
+ if (copy_from_user(&tbuf, buf, sizeof(tbuf)))
return -EFAULT;
out->qbytes = tbuf.msg_qbytes;
@@ -306,60 +311,61 @@ static inline unsigned long copy_msqid_from_user(struct msq_setbuf *out, void __
out->mode = tbuf.msg_perm.mode;
return 0;
- }
+ }
case IPC_OLD:
- {
+ {
struct msqid_ds tbuf_old;
- if (copy_from_user (&tbuf_old, buf, sizeof (tbuf_old)))
+ if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
return -EFAULT;
out->uid = tbuf_old.msg_perm.uid;
out->gid = tbuf_old.msg_perm.gid;
out->mode = tbuf_old.msg_perm.mode;
- if(tbuf_old.msg_qbytes == 0)
+ if (tbuf_old.msg_qbytes == 0)
out->qbytes = tbuf_old.msg_lqbytes;
else
out->qbytes = tbuf_old.msg_qbytes;
return 0;
- }
+ }
default:
return -EINVAL;
}
}
-asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf)
+asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
{
- int err, version;
- struct msg_queue *msq;
- struct msq_setbuf setbuf;
struct kern_ipc_perm *ipcp;
-
+ struct msq_setbuf setbuf;
+ struct msg_queue *msq;
+ int err, version;
+
if (msqid < 0 || cmd < 0)
return -EINVAL;
version = ipc_parse_version(&cmd);
switch (cmd) {
- case IPC_INFO:
- case MSG_INFO:
- {
+ case IPC_INFO:
+ case MSG_INFO:
+ {
struct msginfo msginfo;
int max_id;
+
if (!buf)
return -EFAULT;
- /* We must not return kernel stack data.
+ /*
+ * We must not return kernel stack data.
* due to padding, it's not enough
* to set all member fields.
*/
-
err = security_msg_queue_msgctl(NULL, cmd);
if (err)
return err;
- memset(&msginfo,0,sizeof(msginfo));
+ memset(&msginfo, 0, sizeof(msginfo));
msginfo.msgmni = msg_ctlmni;
msginfo.msgmax = msg_ctlmax;
msginfo.msgmnb = msg_ctlmnb;
@@ -377,36 +383,37 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf)
}
max_id = msg_ids.max_id;
mutex_unlock(&msg_ids.mutex);
- if (copy_to_user (buf, &msginfo, sizeof(struct msginfo)))
+ if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
return -EFAULT;
- return (max_id < 0) ? 0: max_id;
+ return (max_id < 0) ? 0 : max_id;
}
case MSG_STAT:
case IPC_STAT:
{
struct msqid64_ds tbuf;
int success_return;
+
if (!buf)
return -EFAULT;
- if(cmd == MSG_STAT && msqid >= msg_ids.entries->size)
+ if (cmd == MSG_STAT && msqid >= msg_ids.entries->size)
return -EINVAL;
- memset(&tbuf,0,sizeof(tbuf));
+ memset(&tbuf, 0, sizeof(tbuf));
msq = msg_lock(msqid);
if (msq == NULL)
return -EINVAL;
- if(cmd == MSG_STAT) {
+ if (cmd == MSG_STAT) {
success_return = msg_buildid(msqid, msq->q_perm.seq);
} else {
err = -EIDRM;
- if (msg_checkid(msq,msqid))
+ if (msg_checkid(msq, msqid))
goto out_unlock;
success_return = 0;
}
err = -EACCES;
- if (ipcperms (&msq->q_perm, S_IRUGO))
+ if (ipcperms(&msq->q_perm, S_IRUGO))
goto out_unlock;
err = security_msg_queue_msgctl(msq, cmd);
@@ -430,7 +437,7 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf)
case IPC_SET:
if (!buf)
return -EFAULT;
- if (copy_msqid_from_user (&setbuf, buf, version))
+ if (copy_msqid_from_user(&setbuf, buf, version))
return -EFAULT;
break;
case IPC_RMID:
@@ -441,12 +448,12 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf)
mutex_lock(&msg_ids.mutex);
msq = msg_lock(msqid);
- err=-EINVAL;
+ err = -EINVAL;
if (msq == NULL)
goto out_up;
err = -EIDRM;
- if (msg_checkid(msq,msqid))
+ if (msg_checkid(msq, msqid))
goto out_unlock_up;
ipcp = &msq->q_perm;
@@ -454,15 +461,16 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf)
if (err)
goto out_unlock_up;
if (cmd==IPC_SET) {
- err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid, setbuf.mode);
+ err = audit_ipc_set_perm(setbuf.qbytes, setbuf.uid, setbuf.gid,
+ setbuf.mode);
if (err)
goto out_unlock_up;
}
err = -EPERM;
- if (current->euid != ipcp->cuid &&
+ if (current->euid != ipcp->cuid &&
current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
- /* We _could_ check for CAP_CHOWN above, but we don't */
+ /* We _could_ check for CAP_CHOWN above, but we don't */
goto out_unlock_up;
err = security_msg_queue_msgctl(msq, cmd);
@@ -480,22 +488,22 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf)
ipcp->uid = setbuf.uid;
ipcp->gid = setbuf.gid;
- ipcp->mode = (ipcp->mode & ~S_IRWXUGO) |
- (S_IRWXUGO & setbuf.mode);
+ ipcp->mode = (ipcp->mode & ~S_IRWXUGO) |
+ (S_IRWXUGO & setbuf.mode);
msq->q_ctime = get_seconds();
/* sleeping receivers might be excluded by
* stricter permissions.
*/
- expunge_all(msq,-EAGAIN);
+ expunge_all(msq, -EAGAIN);
/* sleeping senders might be able to send
* due to a larger queue size.
*/
- ss_wakeup(&msq->q_senders,0);
+ ss_wakeup(&msq->q_senders, 0);
msg_unlock(msq);
break;
}
case IPC_RMID:
- freeque (msq, msqid);
+ freeque(msq, msqid);
break;
}
err = 0;
@@ -510,41 +518,44 @@ asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds __user *buf)
return err;
}
-static int testmsg(struct msg_msg* msg,long type,int mode)
+static int testmsg(struct msg_msg *msg, long type, int mode)
{
switch(mode)
{
case SEARCH_ANY:
return 1;
case SEARCH_LESSEQUAL:
- if(msg->m_type <=type)
+ if (msg->m_type <=type)
return 1;
break;
case SEARCH_EQUAL:
- if(msg->m_type == type)
+ if (msg->m_type == type)
return 1;
break;
case SEARCH_NOTEQUAL:
- if(msg->m_type != type)
+ if (msg->m_type != type)
return 1;
break;
}
return 0;
}
-static inline int pipelined_send(struct msg_queue* msq, struct msg_msg* msg)
+static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg)
{
- struct list_head* tmp;
+ struct list_head *tmp;
tmp = msq->q_receivers.next;
while (tmp != &msq->q_receivers) {
- struct msg_receiver* msr;
- msr = list_entry(tmp,struct msg_receiver,r_list);
+ struct msg_receiver *msr;
+
+ msr = list_entry(tmp, struct msg_receiver, r_list);
tmp = tmp->next;
- if(testmsg(msg,msr->r_msgtype,msr->r_mode) &&
- !security_msg_queue_msgrcv(msq, msg, msr->r_tsk, msr->r_msgtype, msr->r_mode)) {
+ if (testmsg(msg, msr->r_msgtype, msr->r_mode) &&
+ !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
+ msr->r_msgtype, msr->r_mode)) {
+
list_del(&msr->r_list);
- if(msr->r_maxsize < msg->m_ts) {
+ if (msr->r_maxsize < msg->m_ts) {
msr->r_msg = NULL;
wake_up_process(msr->r_tsk);
smp_mb();
@@ -556,6 +567,7 @@ static inline int pipelined_send(struct msg_queue* msq, struct msg_msg* msg)
wake_up_process(msr->r_tsk);
smp_mb();
msr->r_msg = msg;
+
return 1;
}
}
@@ -563,40 +575,41 @@ static inline int pipelined_send(struct msg_queue* msq, struct msg_msg* msg)
return 0;
}
-asmlinkage long sys_msgsnd (int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg)
+asmlinkage long
+sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg)
{
struct msg_queue *msq;
struct msg_msg *msg;
long mtype;
int err;
-
+
if (msgsz > msg_ctlmax || (long) msgsz < 0 || msqid < 0)
return -EINVAL;
if (get_user(mtype, &msgp->mtype))
- return -EFAULT;
+ return -EFAULT;
if (mtype < 1)
return -EINVAL;
msg = load_msg(msgp->mtext, msgsz);
- if(IS_ERR(msg))
+ if (IS_ERR(msg))
return PTR_ERR(msg);
msg->m_type = mtype;
msg->m_ts = msgsz;
msq = msg_lock(msqid);
- err=-EINVAL;
- if(msq==NULL)
+ err = -EINVAL;
+ if (msq == NULL)
goto out_free;
err= -EIDRM;
- if (msg_checkid(msq,msqid))
+ if (msg_checkid(msq, msqid))
goto out_unlock_free;
for (;;) {
struct msg_sender s;
- err=-EACCES;
+ err = -EACCES;
if (ipcperms(&msq->q_perm, S_IWUGO))
goto out_unlock_free;
@@ -604,14 +617,14 @@ asmlinkage long sys_msgsnd (int msqid, struct msgbuf __user *msgp, size_t msgsz,
if (err)
goto out_unlock_free;
- if(msgsz + msq->q_cbytes <= msq->q_qbytes &&
+ if (msgsz + msq->q_cbytes <= msq->q_qbytes &&
1 + msq->q_qnum <= msq->q_qbytes) {
break;
}
/* queue full, wait: */
- if(msgflg&IPC_NOWAIT) {
- err=-EAGAIN;
+ if (msgflg & IPC_NOWAIT) {
+ err = -EAGAIN;
goto out_unlock_free;
}
ss_add(msq, &s);
@@ -626,9 +639,9 @@ asmlinkage long sys_msgsnd (int msqid, struct msgbuf __user *msgp, size_t msgsz,
goto out_unlock_free;
}
ss_del(&s);
-
+
if (signal_pending(current)) {
- err=-ERESTARTNOHAND;
+ err = -ERESTARTNOHAND;
goto out_unlock_free;
}
}
@@ -636,47 +649,47 @@ asmlinkage long sys_msgsnd (int msqid, struct msgbuf __user *msgp, size_t msgsz,
msq->q_lspid = current->tgid;
msq->q_stime = get_seconds();
- if(!pipelined_send(msq,msg)) {
+ if (!pipelined_send(msq, msg)) {
/* noone is waiting for this message, enqueue it */
- list_add_tail(&msg->m_list,&msq->q_messages);
+ list_add_tail(&msg->m_list, &msq->q_messages);
msq->q_cbytes += msgsz;
msq->q_qnum++;
- atomic_add(msgsz,&msg_bytes);
+ atomic_add(msgsz, &msg_bytes);
atomic_inc(&msg_hdrs);
}
-
+
err = 0;
msg = NULL;
out_unlock_free:
msg_unlock(msq);
out_free:
- if(msg!=NULL)
+ if (msg != NULL)
free_msg(msg);
return err;
}
-static inline int convert_mode(long* msgtyp, int msgflg)
+static inline int convert_mode(long *msgtyp, int msgflg)
{
- /*
+ /*
* find message of correct type.
* msgtyp = 0 => get first.
* msgtyp > 0 => get first message of matching type.
- * msgtyp < 0 => get message with least type must be < abs(msgtype).
+ * msgtyp < 0 => get message with least type must be < abs(msgtype).
*/
- if(*msgtyp==0)
+ if (*msgtyp == 0)
return SEARCH_ANY;
- if(*msgtyp<0) {
- *msgtyp=-(*msgtyp);
+ if (*msgtyp < 0) {
+ *msgtyp = -*msgtyp;
return SEARCH_LESSEQUAL;
}
- if(msgflg & MSG_EXCEPT)
+ if (msgflg & MSG_EXCEPT)
return SEARCH_NOTEQUAL;
return SEARCH_EQUAL;
}
-asmlinkage long sys_msgrcv (int msqid, struct msgbuf __user *msgp, size_t msgsz,
- long msgtyp, int msgflg)
+asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz,
+ long msgtyp, int msgflg)
{
struct msg_queue *msq;
struct msg_msg *msg;
@@ -684,44 +697,51 @@ asmlinkage long sys_msgrcv (int msqid, struct msgbuf __user *msgp, size_t msgsz,
if (msqid < 0 || (long) msgsz < 0)
return -EINVAL;
- mode = convert_mode(&msgtyp,msgflg);
+ mode = convert_mode(&msgtyp, msgflg);
msq = msg_lock(msqid);
- if(msq==NULL)
+ if (msq == NULL)
return -EINVAL;
msg = ERR_PTR(-EIDRM);
- if (msg_checkid(msq,msqid))
+ if (msg_checkid(msq, msqid))
goto out_unlock;
for (;;) {
struct msg_receiver msr_d;
- struct list_head* tmp;
+ struct list_head *tmp;
msg = ERR_PTR(-EACCES);
- if (ipcperms (&msq->q_perm, S_IRUGO))
+ if (ipcperms(&msq->q_perm, S_IRUGO))
goto out_unlock;
msg = ERR_PTR(-EAGAIN);
tmp = msq->q_messages.next;
while (tmp != &msq->q_messages) {
struct msg_msg *walk_msg;
- walk_msg = list_entry(tmp,struct msg_msg,m_list);
- if(testmsg(walk_msg,msgtyp,mode) &&
- !security_msg_queue_msgrcv(msq, walk_msg, current, msgtyp, mode)) {
+
+ walk_msg = list_entry(tmp, struct msg_msg, m_list);
+ if (testmsg(walk_msg, msgtyp, mode) &&
+ !security_msg_queue_msgrcv(msq, walk_msg, current,
+ msgtyp, mode)) {
+
msg = walk_msg;
- if(mode == SEARCH_LESSEQUAL && walk_msg->m_type != 1) {
- msg=walk_msg;
- msgtyp=walk_msg->m_type-1;
+ if (mode == SEARCH_LESSEQUAL &&
+ walk_msg->m_type != 1) {
+ msg = walk_msg;
+ msgtyp = walk_msg->m_type - 1;
} else {
- msg=walk_msg;
+ msg = walk_msg;
break;
}
}
tmp = tmp->next;
}
- if(!IS_ERR(msg)) {
- /* Found a suitable message. Unlink it from the queue. */
+ if (!IS_ERR(msg)) {
+ /*
+ * Found a suitable message.
+ * Unlink it from the queue.
+ */
if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
msg = ERR_PTR(-E2BIG);
goto out_unlock;
@@ -731,9 +751,9 @@ asmlinkage long sys_msgrcv (int msqid, struct msgbuf __user *msgp, size_t msgsz,
msq->q_rtime = get_seconds();
msq->q_lrpid = current->tgid;
msq->q_cbytes -= msg->m_ts;
- atomic_sub(msg->m_ts,&msg_bytes);
+ atomic_sub(msg->m_ts, &msg_bytes);
atomic_dec(&msg_hdrs);
- ss_wakeup(&msq->q_senders,0);
+ ss_wakeup(&msq->q_senders, 0);
msg_unlock(msq);
break;
}
@@ -742,13 +762,13 @@ asmlinkage long sys_msgrcv (int msqid, struct msgbuf __user *msgp, size_t msgsz,
msg = ERR_PTR(-ENOMSG);
goto out_unlock;
}
- list_add_tail(&msr_d.r_list,&msq->q_receivers);
+ list_add_tail(&msr_d.r_list, &msq->q_receivers);
msr_d.r_tsk = current;
msr_d.r_msgtype = msgtyp;
msr_d.r_mode = mode;
- if(msgflg & MSG_NOERROR)
+ if (msgflg & MSG_NOERROR)
msr_d.r_maxsize = INT_MAX;
- else
+ else
msr_d.r_maxsize = msgsz;
msr_d.r_msg = ERR_PTR(-EAGAIN);
current->state = TASK_INTERRUPTIBLE;
@@ -773,17 +793,17 @@ asmlinkage long sys_msgrcv (int msqid, struct msgbuf __user *msgp, size_t msgsz,
* wake_up_process(). There is a race with exit(), see
* ipc/mqueue.c for the details.
*/
- msg = (struct msg_msg*) msr_d.r_msg;
+ msg = (struct msg_msg*)msr_d.r_msg;
while (msg == NULL) {
cpu_relax();
- msg = (struct msg_msg*) msr_d.r_msg;
+ msg = (struct msg_msg *)msr_d.r_msg;
}
/* Lockless receive, part 3:
* If there is a message or an error then accept it without
* locking.
*/
- if(msg != ERR_PTR(-EAGAIN)) {
+ if (msg != ERR_PTR(-EAGAIN)) {
rcu_read_unlock();
break;
}
@@ -798,7 +818,7 @@ asmlinkage long sys_msgrcv (int msqid, struct msgbuf __user *msgp, size_t msgsz,
* Repeat test after acquiring the spinlock.
*/
msg = (struct msg_msg*)msr_d.r_msg;
- if(msg != ERR_PTR(-EAGAIN))
+ if (msg != ERR_PTR(-EAGAIN))
goto out_unlock;
list_del(&msr_d.r_list);
@@ -810,14 +830,15 @@ asmlinkage long sys_msgrcv (int msqid, struct msgbuf __user *msgp, size_t msgsz,
}
}
if (IS_ERR(msg))
- return PTR_ERR(msg);
+ return PTR_ERR(msg);
msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
if (put_user (msg->m_type, &msgp->mtype) ||
store_msg(msgp->mtext, msg, msgsz)) {
- msgsz = -EFAULT;
+ msgsz = -EFAULT;
}
free_msg(msg);
+
return msgsz;
}
@@ -827,20 +848,20 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
struct msg_queue *msq = it;
return seq_printf(s,
- "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
- msq->q_perm.key,
- msq->q_id,
- msq->q_perm.mode,
- msq->q_cbytes,
- msq->q_qnum,
- msq->q_lspid,
- msq->q_lrpid,
- msq->q_perm.uid,
- msq->q_perm.gid,
- msq->q_perm.cuid,
- msq->q_perm.cgid,
- msq->q_stime,
- msq->q_rtime,
- msq->q_ctime);
+ "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
+ msq->q_perm.key,
+ msq->q_id,
+ msq->q_perm.mode,
+ msq->q_cbytes,
+ msq->q_qnum,
+ msq->q_lspid,
+ msq->q_lrpid,
+ msq->q_perm.uid,
+ msq->q_perm.gid,
+ msq->q_perm.cuid,
+ msq->q_perm.cgid,
+ msq->q_stime,
+ msq->q_rtime,
+ msq->q_ctime);
}
#endif
commit e3f2ddeac718c768fdac4b7fe69d465172f788a8
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Jul 29 05:17:57 2006 +0200
[PATCH] pi-futex: robust-futex exit
Fix robust PI-futexes to be properly unlocked on unexpected exit.
For this to work the kernel has to know whether a futex is a PI or a
non-PI one, because the semantics are different. Since the space in
relevant glibc data structures is extremely scarce, the best solution is
to encode the 'PI' information in bit 0 of the robust list pointer.
Existing (non-PI) glibc robust futexes have this bit always zero, so the
ABI is kept. New glibc with PI-robust-futexes will set this bit.
Further fixes from Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 34c3a215f2cd..d097b5b72bc6 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -96,7 +96,8 @@ struct robust_list_head {
long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
-extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr);
+extern int
+handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
#ifdef CONFIG_FUTEX
extern void exit_robust_list(struct task_struct *curr);
diff --git a/kernel/futex.c b/kernel/futex.c
index f59003b1d8f9..dda2049692a2 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -495,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
}
/*
- * We are the first waiter - try to look up the real owner and
- * attach the new pi_state to it:
+ * We are the first waiter - try to look up the real owner and attach
+ * the new pi_state to it, but bail out when the owner died bit is set
+ * and TID = 0:
*/
pid = uval & FUTEX_TID_MASK;
+ if (!pid && (uval & FUTEX_OWNER_DIED))
+ return -ESRCH;
p = futex_find_get_task(pid);
if (!p)
return -ESRCH;
@@ -579,16 +582,17 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
* kept enabled while there is PI state around. We must also
* preserve the owner died bit.)
*/
- newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid;
-
- inc_preempt_count();
- curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
- dec_preempt_count();
+ if (!(uval & FUTEX_OWNER_DIED)) {
+ newval = FUTEX_WAITERS | new_owner->pid;
- if (curval == -EFAULT)
- return -EFAULT;
- if (curval != uval)
- return -EINVAL;
+ inc_preempt_count();
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ dec_preempt_count();
+ if (curval == -EFAULT)
+ return -EFAULT;
+ if (curval != uval)
+ return -EINVAL;
+ }
spin_lock_irq(&pi_state->owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
@@ -1443,9 +1447,11 @@ static int futex_unlock_pi(u32 __user *uaddr)
* again. If it succeeds then we can return without waking
* anyone else up:
*/
- inc_preempt_count();
- uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
- dec_preempt_count();
+ if (!(uval & FUTEX_OWNER_DIED)) {
+ inc_preempt_count();
+ uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
+ dec_preempt_count();
+ }
if (unlikely(uval == -EFAULT))
goto pi_faulted;
@@ -1478,9 +1484,11 @@ static int futex_unlock_pi(u32 __user *uaddr)
/*
* No waiters - kernel unlocks the futex:
*/
- ret = unlock_futex_pi(uaddr, uval);
- if (ret == -EFAULT)
- goto pi_faulted;
+ if (!(uval & FUTEX_OWNER_DIED)) {
+ ret = unlock_futex_pi(uaddr, uval);
+ if (ret == -EFAULT)
+ goto pi_faulted;
+ }
out_unlock:
spin_unlock(&hb->lock);
@@ -1699,9 +1707,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
* Process a futex-list entry, check whether it's owned by the
* dying task, and do notification if so:
*/
-int handle_futex_death(u32 __user *uaddr, struct task_struct *curr)
+int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
{
- u32 uval, nval;
+ u32 uval, nval, mval;
retry:
if (get_user(uval, uaddr))
@@ -1718,20 +1726,44 @@ int handle_futex_death(u32 __user *uaddr, struct task_struct *curr)
* thread-death.) The rest of the cleanup is done in
* userspace.
*/
- nval = futex_atomic_cmpxchg_inatomic(uaddr, uval,
- uval | FUTEX_OWNER_DIED);
+ mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
+ nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
+
if (nval == -EFAULT)
return -1;
if (nval != uval)
goto retry;
- if (uval & FUTEX_WAITERS)
- futex_wake(uaddr, 1);
+ /*
+ * Wake robust non-PI futexes here. The wakeup of
+ * PI futexes happens in exit_pi_state():
+ */
+ if (!pi) {
+ if (uval & FUTEX_WAITERS)
+ futex_wake(uaddr, 1);
+ }
}
return 0;
}
+/*
+ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+ */
+static inline int fetch_robust_entry(struct robust_list __user **entry,
+ struct robust_list __user **head, int *pi)
+{
+ unsigned long uentry;
+
+ if (get_user(uentry, (unsigned long *)head))
+ return -EFAULT;
+
+ *entry = (void *)(uentry & ~1UL);
+ *pi = uentry & 1;
+
+ return 0;
+}
+
/*
* Walk curr->robust_list (very carefully, it's a userspace list!)
* and mark any locks found there dead, and notify any waiters.
@@ -1742,14 +1774,14 @@ void exit_robust_list(struct task_struct *curr)
{
struct robust_list_head __user *head = curr->robust_list;
struct robust_list __user *entry, *pending;
- unsigned int limit = ROBUST_LIST_LIMIT;
+ unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
unsigned long futex_offset;
/*
* Fetch the list head (which was registered earlier, via
* sys_set_robust_list()):
*/
- if (get_user(entry, &head->list.next))
+ if (fetch_robust_entry(&entry, &head->list.next, &pi))
return;
/*
* Fetch the relative futex offset:
@@ -1760,10 +1792,11 @@ void exit_robust_list(struct task_struct *curr)
* Fetch any possibly pending lock-add first, and handle it
* if it exists:
*/
- if (get_user(pending, &head->list_op_pending))
+ if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
return;
+
if (pending)
- handle_futex_death((void *)pending + futex_offset, curr);
+ handle_futex_death((void *)pending + futex_offset, curr, pip);
while (entry != &head->list) {
/*
@@ -1772,12 +1805,12 @@ void exit_robust_list(struct task_struct *curr)
*/
if (entry != pending)
if (handle_futex_death((void *)entry + futex_offset,
- curr))
+ curr, pi))
return;
/*
* Fetch the next entry in the list:
*/
- if (get_user(entry, &entry->next))
+ if (fetch_robust_entry(&entry, &entry->next, &pi))
return;
/*
* Avoid excessively long or circular lists:
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index d1d92b441fb7..d1aab1a452cc 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -12,6 +12,23 @@
#include <asm/uaccess.h>
+
+/*
+ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+ */
+static inline int
+fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
+ compat_uptr_t *head, int *pi)
+{
+ if (get_user(*uentry, head))
+ return -EFAULT;
+
+ *entry = compat_ptr((*uentry) & ~1);
+ *pi = (unsigned int)(*uentry) & 1;
+
+ return 0;
+}
+
/*
* Walk curr->robust_list (very carefully, it's a userspace list!)
* and mark any locks found there dead, and notify any waiters.
@@ -22,17 +39,16 @@ void compat_exit_robust_list(struct task_struct *curr)
{
struct compat_robust_list_head __user *head = curr->compat_robust_list;
struct robust_list __user *entry, *pending;
+ unsigned int limit = ROBUST_LIST_LIMIT, pi;
compat_uptr_t uentry, upending;
- unsigned int limit = ROBUST_LIST_LIMIT;
compat_long_t futex_offset;
/*
* Fetch the list head (which was registered earlier, via
* sys_set_robust_list()):
*/
- if (get_user(uentry, &head->list.next))
+ if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
return;
- entry = compat_ptr(uentry);
/*
* Fetch the relative futex offset:
*/
@@ -42,11 +58,11 @@ void compat_exit_robust_list(struct task_struct *curr)
* Fetch any possibly pending lock-add first, and handle it
* if it exists:
*/
- if (get_user(upending, &head->list_op_pending))
+ if (fetch_robust_entry(&upending, &pending,
+ &head->list_op_pending, &pi))
return;
- pending = compat_ptr(upending);
if (upending)
- handle_futex_death((void *)pending + futex_offset, curr);
+ handle_futex_death((void *)pending + futex_offset, curr, pi);
while (compat_ptr(uentry) != &head->list) {
/*
@@ -55,15 +71,15 @@ void compat_exit_robust_list(struct task_struct *curr)
*/
if (entry != pending)
if (handle_futex_death((void *)entry + futex_offset,
- curr))
+ curr, pi))
return;
/*
* Fetch the next entry in the list:
*/
- if (get_user(uentry, (compat_uptr_t *)&entry->next))
+ if (fetch_robust_entry(&uentry, &entry,
+ (compat_uptr_t *)&entry->next, &pi))
return;
- entry = compat_ptr(uentry);
/*
* Avoid excessively long or circular lists:
*/
commit 627371d73cdd04ed23fe098755b4f855138ad9e0
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Jul 29 05:16:20 2006 +0200
[PATCH] pi-futex: robust-futex exit crash fix
Fix pi_state->list handling bugs: list handling mishap, locking error.
Plus add more debug checks and fix a few style issues i noticed while
debugging this.
(reported by Ulrich Drepper and Jakub Jelinek.)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/kernel/futex.c b/kernel/futex.c
index cf0c8e21d1ab..f59003b1d8f9 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -415,15 +415,15 @@ static struct task_struct * futex_find_get_task(pid_t pid)
*/
void exit_pi_state_list(struct task_struct *curr)
{
- struct futex_hash_bucket *hb;
struct list_head *next, *head = &curr->pi_state_list;
struct futex_pi_state *pi_state;
+ struct futex_hash_bucket *hb;
union futex_key key;
/*
* We are a ZOMBIE and nobody can enqueue itself on
* pi_state_list anymore, but we have to be careful
- * versus waiters unqueueing themselfs
+ * versus waiters unqueueing themselves:
*/
spin_lock_irq(&curr->pi_lock);
while (!list_empty(head)) {
@@ -431,21 +431,24 @@ void exit_pi_state_list(struct task_struct *curr)
next = head->next;
pi_state = list_entry(next, struct futex_pi_state, list);
key = pi_state->key;
+ hb = hash_futex(&key);
spin_unlock_irq(&curr->pi_lock);
- hb = hash_futex(&key);
spin_lock(&hb->lock);
spin_lock_irq(&curr->pi_lock);
+ /*
+ * We dropped the pi-lock, so re-check whether this
+ * task still owns the PI-state:
+ */
if (head->next != next) {
spin_unlock(&hb->lock);
continue;
}
- list_del_init(&pi_state->list);
-
WARN_ON(pi_state->owner != curr);
-
+ WARN_ON(list_empty(&pi_state->list));
+ list_del_init(&pi_state->list);
pi_state->owner = NULL;
spin_unlock_irq(&curr->pi_lock);
@@ -470,7 +473,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
head = &hb->chain;
list_for_each_entry_safe(this, next, head, list) {
- if (match_futex (&this->key, &me->key)) {
+ if (match_futex(&this->key, &me->key)) {
/*
* Another waiter already exists - bump up
* the refcount and return its pi_state:
@@ -482,6 +485,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
if (unlikely(!pi_state))
return -EINVAL;
+ WARN_ON(!atomic_read(&pi_state->refcount));
+
atomic_inc(&pi_state->refcount);
me->pi_state = pi_state;
@@ -510,6 +515,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
pi_state->key = me->key;
spin_lock_irq(&p->pi_lock);
+ WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &p->pi_state_list);
pi_state->owner = p;
spin_unlock_irq(&p->pi_lock);
@@ -584,9 +590,17 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
if (curval != uval)
return -EINVAL;
- list_del_init(&pi_state->owner->pi_state_list);
+ spin_lock_irq(&pi_state->owner->pi_lock);
+ WARN_ON(list_empty(&pi_state->list));
+ list_del_init(&pi_state->list);
+ spin_unlock_irq(&pi_state->owner->pi_lock);
+
+ spin_lock_irq(&new_owner->pi_lock);
+ WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &new_owner->pi_state_list);
pi_state->owner = new_owner;
+ spin_unlock_irq(&new_owner->pi_lock);
+
rt_mutex_unlock(&pi_state->pi_mutex);
return 0;
@@ -1236,6 +1250,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
/* Owner died? */
if (q.pi_state->owner != NULL) {
spin_lock_irq(&q.pi_state->owner->pi_lock);
+ WARN_ON(list_empty(&q.pi_state->list));
list_del_init(&q.pi_state->list);
spin_unlock_irq(&q.pi_state->owner->pi_lock);
} else
@@ -1244,6 +1259,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
q.pi_state->owner = current;
spin_lock_irq(¤t->pi_lock);
+ WARN_ON(!list_empty(&q.pi_state->list));
list_add(&q.pi_state->list, ¤t->pi_state_list);
spin_unlock_irq(¤t->pi_lock);
commit 3a5f5e488ceee9e08df3dff3f01b12fafc9e7e68
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Jul 14 00:24:27 2006 -0700
[PATCH] lockdep: core, fix rq-lock handling on __ARCH_WANT_UNLOCKED_CTXSW
On platforms that have __ARCH_WANT_UNLOCKED_CTXSW set and want to implement
lock validator support there's a bug in rq->lock handling: in this case we
dont 'carry over' the runqueue lock into another task - but still we did a
spinlock_release() of it. Fix this by making the spinlock_release() in
context_switch() dependent on !__ARCH_WANT_UNLOCKED_CTXSW.
(Reported by Ralf Baechle on MIPS, which has __ARCH_WANT_UNLOCKED_CTXSW.
This fixes a lockdep-internal BUG message on such platforms.)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/kernel/sched.c b/kernel/sched.c
index d714611f1691..e9a0b61f12ab 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1788,7 +1788,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
WARN_ON(rq->prev_mm);
rq->prev_mm = oldmm;
}
+ /*
+ * Since the runqueue lock will be released by the next
+ * task (which is an invalid locking op but in the case
+ * of the scheduler it's an obvious special-case), so we
+ * do an early lockdep release here:
+ */
+#ifndef __ARCH_WANT_UNLOCKED_CTXSW
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
+#endif
/* Here we just switch the register state and the stack. */
switch_to(prev, next, prev);
commit fc818301a8a39fedd7f0a71f878f29130c72193d
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Jul 13 09:12:21 2006 +0200
[PATCH] revert slab.c locking change
Chandra Seetharaman reported SLAB crashes caused by the slab.c lock
annotation patch. There is only one chunk of that patch that has a
material effect on the slab logic - this patch undoes that chunk.
This was confirmed to fix the slab problem by Chandra.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/mm/slab.c b/mm/slab.c
index 5a57cda7490d..0f20843beffd 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3119,16 +3119,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
if (slabp->inuse == 0) {
if (l3->free_objects > l3->free_limit) {
l3->free_objects -= cachep->num;
- /*
- * It is safe to drop the lock. The slab is
- * no longer linked to the cache. cachep
- * cannot disappear - we are using it and
- * all destruction of caches must be
- * serialized properly by the user.
- */
- spin_unlock(&l3->list_lock);
slab_destroy(cachep, slabp);
- spin_lock(&l3->list_lock);
} else {
list_add(&slabp->list, &l3->slabs_free);
}
commit 873623dfabaa6ebbdc1ce16c1766a3c0ec5d9923
Author: Ingo Molnar <mingo@elte.hu>
Date: Thu Jul 13 14:44:38 2006 +0200
[PATCH] lockdep: undo mm/slab.c annotation
undo existing mm/slab.c lock-validator annotations, in preparation
of a new, less intrusive annotation patch.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/mm/slab.c b/mm/slab.c
index 85c2e03098a7..fd1e4c4c1397 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1021,8 +1021,7 @@ static void drain_alien_cache(struct kmem_cache *cachep,
}
}
-static inline int cache_free_alien(struct kmem_cache *cachep, void *objp,
- int nesting)
+static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
{
struct slab *slabp = virt_to_slab(objp);
int nodeid = slabp->nodeid;
@@ -1040,7 +1039,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp,
STATS_INC_NODEFREES(cachep);
if (l3->alien && l3->alien[nodeid]) {
alien = l3->alien[nodeid];
- spin_lock_nested(&alien->lock, nesting);
+ spin_lock(&alien->lock);
if (unlikely(alien->avail == alien->limit)) {
STATS_INC_ACOVERFLOW(cachep);
__drain_alien_cache(cachep, alien, nodeid);
@@ -1069,8 +1068,7 @@ static inline void free_alien_cache(struct array_cache **ac_ptr)
{
}
-static inline int cache_free_alien(struct kmem_cache *cachep, void *objp,
- int nesting)
+static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
{
return 0;
}
@@ -1760,8 +1758,6 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
}
#endif
-static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting);
-
/**
* slab_destroy - destroy and release all objects in a slab
* @cachep: cache pointer being destroyed
@@ -1785,17 +1781,8 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
call_rcu(&slab_rcu->head, kmem_rcu_free);
} else {
kmem_freepages(cachep, addr);
- if (OFF_SLAB(cachep)) {
- unsigned long flags;
-
- /*
- * lockdep: we may nest inside an already held
- * ac->lock, so pass in a nesting flag:
- */
- local_irq_save(flags);
- __cache_free(cachep->slabp_cache, slabp, 1);
- local_irq_restore(flags);
- }
+ if (OFF_SLAB(cachep))
+ kmem_cache_free(cachep->slabp_cache, slabp);
}
}
@@ -3135,7 +3122,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
#endif
check_irq_off();
l3 = cachep->nodelists[node];
- spin_lock_nested(&l3->list_lock, SINGLE_DEPTH_NESTING);
+ spin_lock(&l3->list_lock);
if (l3->shared) {
struct array_cache *shared_array = l3->shared;
int max = shared_array->limit - shared_array->avail;
@@ -3178,14 +3165,14 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
* Release an obj back to its cache. If the obj has a constructed state, it must
* be in this state _before_ it is released. Called with disabled ints.
*/
-static void __cache_free(struct kmem_cache *cachep, void *objp, int nesting)
+static inline void __cache_free(struct kmem_cache *cachep, void *objp)
{
struct array_cache *ac = cpu_cache_get(cachep);
check_irq_off();
objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
- if (cache_free_alien(cachep, objp, nesting))
+ if (cache_free_alien(cachep, objp))
return;
if (likely(ac->avail < ac->limit)) {
@@ -3424,7 +3411,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
BUG_ON(virt_to_cache(objp) != cachep);
local_irq_save(flags);
- __cache_free(cachep, objp, 0);
+ __cache_free(cachep, objp);
local_irq_restore(flags);
}
EXPORT_SYMBOL(kmem_cache_free);
@@ -3449,7 +3436,7 @@ void kfree(const void *objp)
kfree_debugcheck(objp);
c = virt_to_cache(objp);
debug_check_no_locks_freed(objp, obj_size(c));
- __cache_free(c, (void *)objp, 0);
+ __cache_free(c, (void *)objp);
local_irq_restore(flags);
}
EXPORT_SYMBOL(kfree);
commit 0f74964627e0ece4ac8da0e2cd01906ec322b4fe
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Jul 12 09:03:10 2006 -0700
[PATCH] lockdep: HPET/RTC fix
Joseph Fannin reported that hpet_rtc_interrupt() enables hardirqs
in irq context:
[ 25.628000] [<c014af4e>] trace_hardirqs_on+0xce/0x200
[ 25.628000] [<c036cf21>] _spin_unlock_irq+0x31/0x70
[ 25.628000] [<c0296584>] rtc_get_rtc_time+0x44/0x1a0
[ 25.628000] [<c01198bb>] hpet_rtc_interrupt+0x21b/0x280
[ 25.628000] [<c0161141>] handle_IRQ_event+0x31/0x70
[ 25.628000] [<c0162d37>] handle_edge_irq+0xe7/0x210
[ 25.628000] [<c0106192>] do_IRQ+0x92/0x120
[ 25.628000] [<c0104121>] common_interrupt+0x25/0x2c
the call of rtc_get_rtc_time() is highly suspect. At a minimum we
need the patch below to save/restore hardirq state.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Joseph Fannin <jfannin@gmail.com>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c
index 6ccc364c08df..6e6a7c7a7eff 100644
--- a/drivers/char/rtc.c
+++ b/drivers/char/rtc.c
@@ -1245,7 +1245,7 @@ static int rtc_proc_open(struct inode *inode, struct file *file)
void rtc_get_rtc_time(struct rtc_time *rtc_tm)
{
- unsigned long uip_watchdog = jiffies;
+ unsigned long uip_watchdog = jiffies, flags;
unsigned char ctrl;
#ifdef CONFIG_MACH_DECSTATION
unsigned int real_year;
@@ -1272,7 +1272,7 @@ void rtc_get_rtc_time(struct rtc_time *rtc_tm)
* RTC has RTC_DAY_OF_WEEK, we should usually ignore it, as it is
* only updated by the RTC when initially set to a non-zero value.
*/
- spin_lock_irq(&rtc_lock);
+ spin_lock_irqsave(&rtc_lock, flags);
rtc_tm->tm_sec = CMOS_READ(RTC_SECONDS);
rtc_tm->tm_min = CMOS_READ(RTC_MINUTES);
rtc_tm->tm_hour = CMOS_READ(RTC_HOURS);
@@ -1286,7 +1286,7 @@ void rtc_get_rtc_time(struct rtc_time *rtc_tm)
real_year = CMOS_READ(RTC_DEC_YEAR);
#endif
ctrl = CMOS_READ(RTC_CONTROL);
- spin_unlock_irq(&rtc_lock);
+ spin_unlock_irqrestore(&rtc_lock, flags);
if (!(ctrl & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
{
commit 21d71f513b6221f482ed6ad45e05f073ae67f319
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jul 10 04:45:32 2006 -0700
[PATCH] uninline init_waitqueue_head()
allyesconfig vmlinux size delta:
text data bss dec filename
20736884 6073834 3075176 29885894 vmlinux.before
20721009 6073966 3075176 29870151 vmlinux.after
~18 bytes per callsite, 15K of text size (~0.1%) saved.
(as an added bonus this also removes a lockdep annotation.)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 794be7af58ae..b3b9048421d8 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -77,17 +77,7 @@ struct task_struct;
#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
{ .flags = word, .bit_nr = bit, }
-/*
- * lockdep: we want one lock-class for all waitqueue locks.
- */
-extern struct lock_class_key waitqueue_lock_key;
-
-static inline void init_waitqueue_head(wait_queue_head_t *q)
-{
- spin_lock_init(&q->lock);
- lockdep_set_class(&q->lock, &waitqueue_lock_key);
- INIT_LIST_HEAD(&q->task_list);
-}
+extern void init_waitqueue_head(wait_queue_head_t *q);
static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
{
diff --git a/kernel/wait.c b/kernel/wait.c
index a1d57aeb7f75..59a82f63275d 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,9 +10,13 @@
#include <linux/wait.h>
#include <linux/hash.h>
-struct lock_class_key waitqueue_lock_key;
+void init_waitqueue_head(wait_queue_head_t *q)
+{
+ spin_lock_init(&q->lock);
+ INIT_LIST_HEAD(&q->task_list);
+}
-EXPORT_SYMBOL(waitqueue_lock_key);
+EXPORT_SYMBOL(init_waitqueue_head);
void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
commit f86bf9b7bcc5d325687a8b80da8ee3eb56e02da7
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jul 10 04:44:05 2006 -0700
[PATCH] lockdep: clean up completion initializer in smpboot.c
Clean up lockdep on-stack-completion initializer. (This also removes the
dependency on waitqueue_lock_key.)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index b7c705969791..975380207b46 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -771,12 +771,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
unsigned long start_rip;
struct create_idle c_idle = {
.cpu = cpu,
- .done = COMPLETION_INITIALIZER(c_idle.done),
+ .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
};
DECLARE_WORK(work, do_fork_idle, &c_idle);
- lockdep_set_class(&c_idle.done.wait.lock, &waitqueue_lock_key);
-
/* allocate memory for gdts of secondary cpus. Hotplug is considered */
if (!cpu_gdt_descr[cpu].address &&
!(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 251c41e3ddd5..268c5a4a2bd4 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -18,6 +18,9 @@ struct completion {
#define COMPLETION_INITIALIZER(work) \
{ 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
+#define COMPLETION_INITIALIZER_ONSTACK(work) \
+ ({ init_completion(&work); work; })
+
#define DECLARE_COMPLETION(work) \
struct completion work = COMPLETION_INITIALIZER(work)
@@ -28,7 +31,7 @@ struct completion {
*/
#ifdef CONFIG_LOCKDEP
# define DECLARE_COMPLETION_ONSTACK(work) \
- struct completion work = ({ init_completion(&work); work; })
+ struct completion work = COMPLETION_INITIALIZER_ONSTACK(work)
#else
# define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work)
#endif
commit d6d897cec29252b8d0785198cfa6ca16d30c739d
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Jul 10 04:44:04 2006 -0700
[PATCH] lockdep: core, reduce per-lock class-cache size
lockdep_map is embedded into every lock, which blows up data structure
sizes all around the kernel. Reduce the class-cache to be for the default
class only - that is used in 99.9% of the cases and even if we dont have a
class cached, the lookup in the class-hash is lockless.
This change reduces the per-lock dep_map overhead by 56 bytes on 64-bit
platforms and by 28 bytes on 32-bit platforms.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 316e0fb8d7b1..c040a8c969aa 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -120,7 +120,7 @@ struct lock_class {
*/
struct lockdep_map {
struct lock_class_key *key;
- struct lock_class *class[MAX_LOCKDEP_SUBCLASSES];
+ struct lock_class *class_cache;
const char *name;
};
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index c1f34addd003..9bad17884513 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1104,7 +1104,7 @@ extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void);
* itself, so actual lookup of the hash should be once per lock object.
*/
static inline struct lock_class *
-register_lock_class(struct lockdep_map *lock, unsigned int subclass)
+look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
{
struct lockdep_subclass_key *key;
struct list_head *hash_head;
@@ -1148,7 +1148,26 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass)
*/
list_for_each_entry(class, hash_head, hash_entry)
if (class->key == key)
- goto out_set;
+ return class;
+
+ return NULL;
+}
+
+/*
+ * Register a lock's class in the hash-table, if the class is not present
+ * yet. Otherwise we look it up. We cache the result in the lock object
+ * itself, so actual lookup of the hash should be once per lock object.
+ */
+static inline struct lock_class *
+register_lock_class(struct lockdep_map *lock, unsigned int subclass)
+{
+ struct lockdep_subclass_key *key;
+ struct list_head *hash_head;
+ struct lock_class *class;
+
+ class = look_up_lock_class(lock, subclass);
+ if (likely(class))
+ return class;
/*
* Debug-check: all keys must be persistent!
@@ -1163,6 +1182,9 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass)
return NULL;
}
+ key = lock->key->subkeys + subclass;
+ hash_head = classhashentry(key);
+
__raw_spin_lock(&hash_lock);
/*
* We have to do the hash-walk again, to avoid races
@@ -1209,8 +1231,8 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass)
out_unlock_set:
__raw_spin_unlock(&hash_lock);
-out_set:
- lock->class[subclass] = class;
+ if (!subclass)
+ lock->class_cache = class;
DEBUG_LOCKS_WARN_ON(class->subclass != subclass);
@@ -1914,7 +1936,7 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
}
lock->name = name;
lock->key = key;
- memset(lock->class, 0, sizeof(lock->class[0])*MAX_LOCKDEP_SUBCLASSES);
+ lock->class_cache = NULL;
}
EXPORT_SYMBOL_GPL(lockdep_init_map);
@@ -1928,8 +1950,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
unsigned long ip)
{
struct task_struct *curr = current;
+ struct lock_class *class = NULL;
struct held_lock *hlock;
- struct lock_class *class;
unsigned int depth, id;
int chain_head = 0;
u64 chain_key;
@@ -1947,8 +1969,11 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
return 0;
}
- class = lock->class[subclass];
- /* not cached yet? */
+ if (!subclass)
+ class = lock->class_cache;
+ /*
+ * Not cached yet or subclass?
+ */
if (unlikely(!class)) {
class = register_lock_class(lock, subclass);
if (!class)
@@ -2449,48 +2474,44 @@ void lockdep_free_key_range(void *start, unsigned long size)
void lockdep_reset_lock(struct lockdep_map *lock)
{
- struct lock_class *class, *next, *entry;
+ struct lock_class *class, *next;
struct list_head *head;
unsigned long flags;
int i, j;
raw_local_irq_save(flags);
- __raw_spin_lock(&hash_lock);
/*
- * Remove all classes this lock has:
+ * Remove all classes this lock might have:
+ */
+ for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
+ /*
+ * If the class exists we look it up and zap it:
+ */
+ class = look_up_lock_class(lock, j);
+ if (class)
+ zap_class(class);
+ }
+ /*
+ * Debug check: in the end all mapped classes should
+ * be gone.
*/
+ __raw_spin_lock(&hash_lock);
for (i = 0; i < CLASSHASH_SIZE; i++) {
head = classhash_table + i;
if (list_empty(head))
continue;
list_for_each_entry_safe(class, next, head, hash_entry) {
- for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
- entry = lock->class[j];
- if (class == entry) {
- zap_class(class);
- lock->class[j] = NULL;
- break;
- }
+ if (unlikely(class == lock->class_cache)) {
+ __raw_spin_unlock(&hash_lock);
+ DEBUG_LOCKS_WARN_ON(1);
+ goto out_restore;
}
}
}
-
- /*
- * Debug check: in the end all mapped classes should
- * be gone.
- */
- for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
- entry = lock->class[j];
- if (!entry)
- continue;
- __raw_spin_unlock(&hash_lock);
- DEBUG_LOCKS_WARN_ON(1);
- raw_local_irq_restore(flags);
- return;
- }
-
__raw_spin_unlock(&hash_lock);
+
+out_restore:
raw_local_irq_restore(flags);
}