Patches contributed by Eötvös Lorand University

<<Prev 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283[284]285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 Next>>

commit 57a04513cb35086d54bcb2cb92e6627fc8fa0fae
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Jan 6 21:09:53 2008 +0100

    hda_intel suspend latency: shorten codec read
    
    not sleeping for every codec read/write but doing a short udelay and
    a conditional reschedule has cut suspend+resume latency by about 1
    second on my T60.
    
    The patch also fixes the unexpected codec-connection errors that
    happen more often in the new power-save mode:
        http://lkml.org/lkml/2007/11/8/255
        http://bugzilla.kernel.org/show_bug.cgi?id=9332
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Acked-by: Takashi Iwai <tiwai@suse.de>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 3fa0f9704909..62b9fb386903 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -555,7 +555,8 @@ static unsigned int azx_rirb_get_response(struct hda_codec *codec)
 		}
 		if (!chip->rirb.cmds)
 			return chip->rirb.res; /* the last value */
-		schedule_timeout_uninterruptible(1);
+		udelay(10);
+		cond_resched();
 	} while (time_after_eq(timeout, jiffies));
 
 	if (chip->msi) {

commit ac40532ef0b8649e6f7f83859ea0de1c4ed08a19
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Jan 2 17:25:34 2008 +0100

    scsi: revert "[SCSI] Get rid of scsi_cmnd->done"
    
    This reverts commit 6f5391c283d7fdcf24bf40786ea79061919d1e1d ("[SCSI]
    Get rid of scsi_cmnd->done") that was supposed to be a cleanup commit,
    but apparently it causes regressions:
    
      Bug 9370 - v2.6.24-rc2-409-g9418d5d: attempt to access beyond end of device
      http://bugzilla.kernel.org/show_bug.cgi?id=9370
    
    this patch should be reintroduced in a more split-up form to make
    testing of it easier.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Acked-by: Matthew Wilcox <matthew@wil.cx>
    Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 0fb1709ce5e3..7ceb8209e5df 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -59,7 +59,6 @@
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_dbg.h>
 #include <scsi/scsi_device.h>
-#include <scsi/scsi_driver.h>
 #include <scsi/scsi_eh.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_tcq.h>
@@ -368,8 +367,9 @@ void scsi_log_send(struct scsi_cmnd *cmd)
 			scsi_print_command(cmd);
 			if (level > 3) {
 				printk(KERN_INFO "buffer = 0x%p, bufflen = %d,"
-				       " queuecommand 0x%p\n",
+				       " done = 0x%p, queuecommand 0x%p\n",
 					scsi_sglist(cmd), scsi_bufflen(cmd),
+					cmd->done,
 					cmd->device->host->hostt->queuecommand);
 
 			}
@@ -654,12 +654,6 @@ void __scsi_done(struct scsi_cmnd *cmd)
 	blk_complete_request(rq);
 }
 
-/* Move this to a header if it becomes more generally useful */
-static struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd)
-{
-	return *(struct scsi_driver **)cmd->request->rq_disk->private_data;
-}
-
 /*
  * Function:    scsi_finish_command
  *
@@ -671,8 +665,6 @@ void scsi_finish_command(struct scsi_cmnd *cmd)
 {
 	struct scsi_device *sdev = cmd->device;
 	struct Scsi_Host *shost = sdev->host;
-	struct scsi_driver *drv;
-	unsigned int good_bytes;
 
 	scsi_device_unbusy(sdev);
 
@@ -698,13 +690,7 @@ void scsi_finish_command(struct scsi_cmnd *cmd)
 				"Notifying upper driver of completion "
 				"(result %x)\n", cmd->result));
 
-	good_bytes = cmd->request_bufflen;
-        if (cmd->request->cmd_type != REQ_TYPE_BLOCK_PC) {
-		drv = scsi_cmd_to_driver(cmd);
-		if (drv->done)
-			good_bytes = drv->done(cmd);
-	}
-	scsi_io_completion(cmd, good_bytes);
+	cmd->done(cmd);
 }
 EXPORT_SYMBOL(scsi_finish_command);
 
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index ebaca4ca4a13..70700b97c915 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1699,6 +1699,7 @@ scsi_reset_provider(struct scsi_device *dev, int flag)
 	memset(&scmd->cmnd, '\0', sizeof(scmd->cmnd));
     
 	scmd->scsi_done		= scsi_reset_provider_done_command;
+	scmd->done			= NULL;
 	scmd->request_buffer		= NULL;
 	scmd->request_bufflen		= 0;
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0e81e4cf8876..8df8267ce316 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1092,6 +1092,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	}
 	scsi_end_request(cmd, 0, this_count, !result);
 }
+EXPORT_SYMBOL(scsi_io_completion);
 
 /*
  * Function:    scsi_init_io()
@@ -1170,6 +1171,18 @@ static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev,
 	return cmd;
 }
 
+static void scsi_blk_pc_done(struct scsi_cmnd *cmd)
+{
+	BUG_ON(!blk_pc_request(cmd->request));
+	/*
+	 * This will complete the whole command with uptodate=1 so
+	 * as far as the block layer is concerned the command completed
+	 * successfully. Since this is a REQ_BLOCK_PC command the
+	 * caller should check the request's errors value
+	 */
+	scsi_io_completion(cmd, cmd->request_bufflen);
+}
+
 int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 {
 	struct scsi_cmnd *cmd;
@@ -1219,6 +1232,7 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 	cmd->transfersize = req->data_len;
 	cmd->allowed = req->retries;
 	cmd->timeout_per_command = req->timeout;
+	cmd->done = scsi_blk_pc_done;
 	return BLKPREP_OK;
 }
 EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd);
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index eff005951895..ee8efe849bf4 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -68,7 +68,6 @@ extern int scsi_maybe_unblock_host(struct scsi_device *sdev);
 extern void scsi_device_unbusy(struct scsi_device *sdev);
 extern int scsi_queue_insert(struct scsi_cmnd *cmd, int reason);
 extern void scsi_next_command(struct scsi_cmnd *cmd);
-extern void scsi_io_completion(struct scsi_cmnd *, unsigned int);
 extern void scsi_run_host_queues(struct Scsi_Host *shost);
 extern struct request_queue *scsi_alloc_queue(struct scsi_device *sdev);
 extern void scsi_free_queue(struct request_queue *q);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index a69b155f39a2..cb85296d5384 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -86,19 +86,6 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK);
 MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD);
 MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC);
 
-static int  sd_revalidate_disk(struct gendisk *);
-static int  sd_probe(struct device *);
-static int  sd_remove(struct device *);
-static void sd_shutdown(struct device *);
-static int sd_suspend(struct device *, pm_message_t state);
-static int sd_resume(struct device *);
-static void sd_rescan(struct device *);
-static int sd_done(struct scsi_cmnd *);
-static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer);
-static void scsi_disk_release(struct class_device *cdev);
-static void sd_print_sense_hdr(struct scsi_disk *, struct scsi_sense_hdr *);
-static void sd_print_result(struct scsi_disk *, int);
-
 static DEFINE_IDR(sd_index_idr);
 static DEFINE_SPINLOCK(sd_index_lock);
 
@@ -253,7 +240,6 @@ static struct scsi_driver sd_template = {
 		.shutdown	= sd_shutdown,
 	},
 	.rescan			= sd_rescan,
-	.done			= sd_done,
 };
 
 /*
@@ -522,6 +508,12 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq)
 	SCpnt->allowed = SD_MAX_RETRIES;
 	SCpnt->timeout_per_command = timeout;
 
+	/*
+	 * This is the completion routine we use.  This is matched in terms
+	 * of capability to this function.
+	 */
+	SCpnt->done = sd_rw_intr;
+
 	/*
 	 * This indicates that the command is ready from our end to be
 	 * queued.
@@ -895,13 +887,13 @@ static struct block_device_operations sd_fops = {
 };
 
 /**
- *	sd_done - bottom half handler: called when the lower level
+ *	sd_rw_intr - bottom half handler: called when the lower level
  *	driver has completed (successfully or otherwise) a scsi command.
  *	@SCpnt: mid-level's per command structure.
  *
  *	Note: potentially run from within an ISR. Must not block.
  **/
-static int sd_done(struct scsi_cmnd *SCpnt)
+static void sd_rw_intr(struct scsi_cmnd * SCpnt)
 {
 	int result = SCpnt->result;
  	unsigned int xfer_size = SCpnt->request_bufflen;
@@ -922,7 +914,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 	SCSI_LOG_HLCOMPLETE(1, scsi_print_result(SCpnt));
 	if (sense_valid) {
 		SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, SCpnt,
-						   "sd_done: sb[respc,sk,asc,"
+						   "sd_rw_intr: sb[respc,sk,asc,"
 						   "ascq]=%x,%x,%x,%x\n",
 						   sshdr.response_code,
 						   sshdr.sense_key, sshdr.asc,
@@ -994,7 +986,7 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 		break;
 	}
  out:
-	return good_bytes;
+	scsi_io_completion(SCpnt, good_bytes);
 }
 
 static int media_not_present(struct scsi_disk *sdkp,
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index c61999031141..a0c4e13d4dab 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -78,7 +78,6 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_WORM);
 
 static int sr_probe(struct device *);
 static int sr_remove(struct device *);
-static int sr_done(struct scsi_cmnd *);
 
 static struct scsi_driver sr_template = {
 	.owner			= THIS_MODULE,
@@ -87,7 +86,6 @@ static struct scsi_driver sr_template = {
 		.probe		= sr_probe,
 		.remove		= sr_remove,
 	},
-	.done			= sr_done,
 };
 
 static unsigned long sr_index_bits[SR_DISKS / BITS_PER_LONG];
@@ -210,12 +208,12 @@ static int sr_media_change(struct cdrom_device_info *cdi, int slot)
 }
  
 /*
- * sr_done is the interrupt routine for the device driver.
+ * rw_intr is the interrupt routine for the device driver.
  *
- * It will be notified on the end of a SCSI read / write, and will take one
+ * It will be notified on the end of a SCSI read / write, and will take on
  * of several actions based on success or failure.
  */
-static int sr_done(struct scsi_cmnd *SCpnt)
+static void rw_intr(struct scsi_cmnd * SCpnt)
 {
 	int result = SCpnt->result;
 	int this_count = SCpnt->request_bufflen;
@@ -288,7 +286,12 @@ static int sr_done(struct scsi_cmnd *SCpnt)
 		}
 	}
 
-	return good_bytes;
+	/*
+	 * This calls the generic completion function, now that we know
+	 * how many actual sectors finished, and how many sectors we need
+	 * to say have failed.
+	 */
+	scsi_io_completion(SCpnt, good_bytes);
 }
 
 static int sr_prep_fn(struct request_queue *q, struct request *rq)
@@ -424,6 +427,12 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq)
 	SCpnt->allowed = MAX_RETRIES;
 	SCpnt->timeout_per_command = timeout;
 
+	/*
+	 * This is the completion routine we use.  This is matched in terms
+	 * of capability to this function.
+	 */
+	SCpnt->done = rw_intr;
+
 	/*
 	 * This indicates that the command is ready from our end to be
 	 * queued.
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 3f47e522a1ec..7613c2989370 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -34,6 +34,7 @@ struct scsi_cmnd {
 	struct list_head list;  /* scsi_cmnd participates in queue lists */
 	struct list_head eh_entry; /* entry for the host eh_cmd_q */
 	int eh_eflags;		/* Used by error handlr */
+	void (*done) (struct scsi_cmnd *);	/* Mid-level done function */
 
 	/*
 	 * A SCSI Command is assigned a nonzero serial_number before passed
@@ -121,6 +122,7 @@ extern struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *, gfp_t);
 extern void scsi_put_command(struct scsi_cmnd *);
 extern void __scsi_put_command(struct Scsi_Host *, struct scsi_cmnd *,
 			       struct device *);
+extern void scsi_io_completion(struct scsi_cmnd *, unsigned int);
 extern void scsi_finish_command(struct scsi_cmnd *cmd);
 extern void scsi_req_abort_cmd(struct scsi_cmnd *cmd);
 
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index 1f5ca7f62116..56a304709fde 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -15,7 +15,6 @@ struct scsi_driver {
 	struct device_driver	gendrv;
 
 	void (*rescan)(struct device *);
-	int (*done)(struct scsi_cmnd *);
 };
 #define to_scsi_driver(drv) \
 	container_of((drv), struct scsi_driver, gendrv)
diff --git a/include/scsi/sd.h b/include/scsi/sd.h
index f7513313ef0d..aa1e71613010 100644
--- a/include/scsi/sd.h
+++ b/include/scsi/sd.h
@@ -47,6 +47,19 @@ struct scsi_disk {
 };
 #define to_scsi_disk(obj) container_of(obj,struct scsi_disk,cdev)
 
+static int  sd_revalidate_disk(struct gendisk *disk);
+static void sd_rw_intr(struct scsi_cmnd * SCpnt);
+static int  sd_probe(struct device *);
+static int  sd_remove(struct device *);
+static void sd_shutdown(struct device *dev);
+static int sd_suspend(struct device *dev, pm_message_t state);
+static int sd_resume(struct device *dev);
+static void sd_rescan(struct device *);
+static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer);
+static void scsi_disk_release(struct class_device *cdev);
+static void sd_print_sense_hdr(struct scsi_disk *, struct scsi_sense_hdr *);
+static void sd_print_result(struct scsi_disk *, int);
+
 #define sd_printk(prefix, sdsk, fmt, a...)				\
         (sdsk)->disk ?							\
 	sdev_printk(prefix, (sdsk)->device, "[%s] " fmt,		\

commit 90b2628f1fe94a667330d425a7fb76ec8d2a49ec
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Dec 30 17:24:35 2007 +0100

    sched: fix gcc warnings
    
    Meelis Roos reported these warnings on sparc64:
    
      CC      kernel/sched.o
      In file included from kernel/sched.c:879:
      kernel/sched_debug.c: In function 'nsec_high':
      kernel/sched_debug.c:38: warning: comparison of distinct pointer types lacks a cast
    
    the debug check in do_div() is over-eager here, because the long long
    is always positive in these places. Mark this by casting them to
    unsigned long long.
    
    no change in code output:
    
       text    data     bss     dec     hex filename
      51471    6582     376   58429    e43d sched.o.before
      51471    6582     376   58429    e43d sched.o.after
    
      md5:
       7f7729c111f185bf3ccea4d542abc049  sched.o.before.asm
       7f7729c111f185bf3ccea4d542abc049  sched.o.after.asm
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index d30467b47ddd..80fbbfc04290 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -31,9 +31,9 @@
 /*
  * Ease the printing of nsec fields:
  */
-static long long nsec_high(long long nsec)
+static long long nsec_high(unsigned long long nsec)
 {
-	if (nsec < 0) {
+	if ((long long)nsec < 0) {
 		nsec = -nsec;
 		do_div(nsec, 1000000);
 		return -nsec;
@@ -43,9 +43,9 @@ static long long nsec_high(long long nsec)
 	return nsec;
 }
 
-static unsigned long nsec_low(long long nsec)
+static unsigned long nsec_low(unsigned long long nsec)
 {
-	if (nsec < 0)
+	if ((long long)nsec < 0)
 		nsec = -nsec;
 
 	return do_div(nsec, 1000000);

commit c0a698b7443a9fce76b0a849f06c45ac78f3b0a0
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Dec 21 01:27:19 2007 +0100

    x86: fix die() to not be preemptible
    
    Andrew "Eagle Eye" Morton noticed that we use raw_local_save_flags()
    instead of raw_local_irq_save(flags) in die(). This allows the
    preemption of oopsing contexts - which is highly undesirable. It also
    causes CONFIG_DEBUG_PREEMPT to complain, as reported by Miles Lane.
    
    this bug was introduced via:
    
      commit 39743c9ef717fd4f2b5583f010115c5f2482b8ae
      Author: Andi Kleen <ak@suse.de>
      Date:   Fri Oct 19 20:35:03 2007 +0200
    
          x86: use raw locks during oopses
    
    -               spin_lock_irqsave(&die.lock, flags);
    +               __raw_spin_lock(&die.lock);
    +               raw_local_save_flags(flags);
    
    that is not a correct open-coding of spin_lock_irqsave(): both the
    ordering is wrong (irqs should be disabled _first_), and the wrong
    flags-saving API was used.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index ef6010262597..c88bbffcaa03 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -373,14 +373,13 @@ void die(const char * str, struct pt_regs * regs, long err)
 
 	if (die.lock_owner != raw_smp_processor_id()) {
 		console_verbose();
+		raw_local_irq_save(flags);
 		__raw_spin_lock(&die.lock);
-		raw_local_save_flags(flags);
 		die.lock_owner = smp_processor_id();
 		die.lock_owner_depth = 0;
 		bust_spinlocks(1);
-	}
-	else
-		raw_local_save_flags(flags);
+	} else
+		raw_local_irq_save(flags);
 
 	if (++die.lock_owner_depth < 3) {
 		unsigned long esp;

commit 4aae07025265151e3f7041dfbf0f529e122de1d8
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 18 18:05:58 2007 +0100

    x86: fix "Kernel panic - not syncing: IO-APIC + timer doesn't work!"
    
    this is the tale of a full day spent debugging an ancient but elusive bug.
    
    after booting up thousands of random .config kernels, i finally happened
    to generate a .config that produced the following rare bootup failure
    on 32-bit x86:
    
    | ..TIMER: vector=0x31 apic1=0 pin1=2 apic2=-1 pin2=-1
    | ..MP-BIOS bug: 8254 timer not connected to IO-APIC
    | ...trying to set up timer (IRQ0) through the 8259A ...  failed.
    | ...trying to set up timer as Virtual Wire IRQ... failed.
    | ...trying to set up timer as ExtINT IRQ... failed :(.
    | Kernel panic - not syncing: IO-APIC + timer doesn't work!  Boot with apic=debug
    | and send a report.  Then try booting with the 'noapic' option
    
    this bug has been reported many times during the years, but it was never
    reproduced nor fixed.
    
    the bug that i hit was extremely sensitive to .config details.
    
    First i did a .config-bisection - suspecting some .config detail.
    That led to CONFIG_X86_MCE: enabling X86_MCE magically made the bug disappear
    and the system would boot up just fine.
    
    Debugging my way through the MCE code ended up identifying two unlikely
    candidates: the thing that made a real difference to the hang was that
    X86_MCE did two printks:
    
     Intel machine check architecture supported.
     Intel machine check reporting enabled on CPU#1.
    
    Adding the same printks to a !CONFIG_X86_MCE kernel made the bug go away!
    
    this left timing as the main suspect: i experimented with adding various
    udelay()s to the arch/x86/kernel/io_apic_32.c:check_timer() function, and
    the race window turned out to be narrower than 30 microseconds (!).
    
    That made debugging especially funny, debugging without having printk
    ability before the bug hits is ... interesting ;-)
    
    eventually i started suspecting IRQ activities - those are pretty much the
    only thing that happen this early during bootup and have the timescale of
    a few dozen microseconds. Also, check_timer() changes the IRQ hardware
    in various creative ways, so the main candidate became IRQ0 interaction.
    
    i've added a counter to track timer irqs (on which core they arrived, at
    what exact time, etc.) and found that no timer IRQ would arrive after the
    bug condition hits - even if we re-enable IRQ0 and re-initialize the i8259A,
    but that we'd get a small number of timer irqs right around the time when we
    call the check_timer() function.
    
    Eventually i got the following backtrace triggered from debug code in the
    timer interrupt:
    
    ...trying to set up timer as Virtual Wire IRQ... failed.
    ...trying to set up timer as ExtINT IRQ...
    Pid: 1, comm: swapper Not tainted (2.6.24-rc5 #57)
    EIP: 0060:[<c044d57e>] EFLAGS: 00000246 CPU: 0
    EIP is at _spin_unlock_irqrestore+0x5/0x1c
    EAX: c0634178 EBX: 00000000 ECX: c4947d63 EDX: 00000246
    ESI: 00000002 EDI: 00010031 EBP: c04e0f2e ESP: f7c41df4
     DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
     CR0: 8005003b CR2: ffe04000 CR3: 00630000 CR4: 000006d0
     DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
     DR6: ffff0ff0 DR7: 00000400
      [<c05f5784>] setup_IO_APIC+0x9c3/0xc5c
    
    the spin_unlock() was called from init_8259A(). Wait ... we have an IRQ0
    entry while we are in the middle of setting up the local APIC, the i8259A
    and the PIT??
    
    That is certainly not how it's supposed to work! check_timer() was supposed
    to be called with irqs turned off - but this eroded away sometime in the
    past. This code would still work most of the time because this code runs
    very quickly, but just the right timing conditions are present and IRQ0
    hits in this small, ~30 usecs window, timer irqs stop and the system does
    not boot up. Also, given how early this is during bootup, the hang is
    very deterministic - but it would only occur on certain machines (and
    certain configs).
    
    The fix was quite simple: disable/restore interrupts properly in this
    function. With that in place the test-system now boots up just fine.
    
    (64-bit x86 io_apic_64.c had the same bug.)
    
    Phew! One down, only 1500 other kernel bugs are left ;-)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 6cf27319a91c..c3a565bba106 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1882,13 +1882,16 @@ __setup("no_timer_check", notimercheck);
 static int __init timer_irq_works(void)
 {
 	unsigned long t1 = jiffies;
+	unsigned long flags;
 
 	if (no_timer_check)
 		return 1;
 
+	local_save_flags(flags);
 	local_irq_enable();
 	/* Let ten ticks pass... */
 	mdelay((10 * 1000) / HZ);
+	local_irq_restore(flags);
 
 	/*
 	 * Expect a few ticks at least, to be sure some possible
@@ -2167,6 +2170,9 @@ static inline void __init check_timer(void)
 	int apic1, pin1, apic2, pin2;
 	int vector;
 	unsigned int ver;
+	unsigned long flags;
+
+	local_irq_save(flags);
 
 	ver = apic_read(APIC_LVR);
 	ver = GET_APIC_VERSION(ver);
@@ -2219,7 +2225,7 @@ static inline void __init check_timer(void)
 			}
 			if (disable_timer_pin_1 > 0)
 				clear_IO_APIC_pin(0, pin1);
-			return;
+			goto out;
 		}
 		clear_IO_APIC_pin(apic1, pin1);
 		printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
@@ -2242,7 +2248,7 @@ static inline void __init check_timer(void)
 			if (nmi_watchdog == NMI_IO_APIC) {
 				setup_nmi();
 			}
-			return;
+			goto out;
 		}
 		/*
 		 * Cleanup, just in case ...
@@ -2266,7 +2272,7 @@ static inline void __init check_timer(void)
 
 	if (timer_irq_works()) {
 		printk(" works.\n");
-		return;
+		goto out;
 	}
 	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
 	printk(" failed.\n");
@@ -2282,11 +2288,13 @@ static inline void __init check_timer(void)
 
 	if (timer_irq_works()) {
 		printk(" works.\n");
-		return;
+		goto out;
 	}
 	printk(" failed :(.\n");
 	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
 		"report.  Then try booting with the 'noapic' option");
+out:
+	local_irq_restore(flags);
 }
 
 /*
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 435a8c9b55f8..cbac1670c7c3 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1281,10 +1281,13 @@ void disable_IO_APIC(void)
 static int __init timer_irq_works(void)
 {
 	unsigned long t1 = jiffies;
+	unsigned long flags;
 
+	local_save_flags(flags);
 	local_irq_enable();
 	/* Let ten ticks pass... */
 	mdelay((10 * 1000) / HZ);
+	local_irq_restore(flags);
 
 	/*
 	 * Expect a few ticks at least, to be sure some possible
@@ -1655,6 +1658,9 @@ static inline void check_timer(void)
 {
 	struct irq_cfg *cfg = irq_cfg + 0;
 	int apic1, pin1, apic2, pin2;
+	unsigned long flags;
+
+	local_irq_save(flags);
 
 	/*
 	 * get/set the timer IRQ vector:
@@ -1696,7 +1702,7 @@ static inline void check_timer(void)
 			}
 			if (disable_timer_pin_1 > 0)
 				clear_IO_APIC_pin(0, pin1);
-			return;
+			goto out;
 		}
 		clear_IO_APIC_pin(apic1, pin1);
 		apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
@@ -1718,7 +1724,7 @@ static inline void check_timer(void)
 			if (nmi_watchdog == NMI_IO_APIC) {
 				setup_nmi();
 			}
-			return;
+			goto out;
 		}
 		/*
 		 * Cleanup, just in case ...
@@ -1741,7 +1747,7 @@ static inline void check_timer(void)
 
 	if (timer_irq_works()) {
 		apic_printk(APIC_VERBOSE," works.\n");
-		return;
+		goto out;
 	}
 	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
 	apic_printk(APIC_VERBOSE," failed.\n");
@@ -1756,10 +1762,12 @@ static inline void check_timer(void)
 
 	if (timer_irq_works()) {
 		apic_printk(APIC_VERBOSE," works.\n");
-		return;
+		goto out;
 	}
 	apic_printk(APIC_VERBOSE," failed :(.\n");
 	panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
+out:
+	local_irq_restore(flags);
 }
 
 static int __init notimercheck(char *s)

commit 6cbf1c126cf6a727287d61b122fde00a8b827bfe
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 18 15:21:13 2007 +0100

    sched: do not hurt SCHED_BATCH on wakeup
    
    measurements by Yanmin Zhang have shown that SCHED_BATCH tasks benefit
    if they run the same place_entity() logic as SCHED_OTHER tasks - so
    uniformize behavior in this area.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c33f0ceb3de9..da7c061e7206 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -511,8 +511,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 
 	if (!initial) {
 		/* sleeps upto a single latency don't count. */
-		if (sched_feat(NEW_FAIR_SLEEPERS) && entity_is_task(se) &&
-				task_of(se)->policy != SCHED_BATCH)
+		if (sched_feat(NEW_FAIR_SLEEPERS) && entity_is_task(se))
 			vruntime -= sysctl_sched_latency;
 
 		/* ensure we never gain time by being placed backwards. */

commit 2bacec8c318ca0418c0ee9ac662ee44207765dd4
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Dec 18 15:21:13 2007 +0100

    sched: touch softlockup watchdog after idling
    
    touch softlockup watchdog after idling.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 5ae0d4296e7c..3df84ea6aba9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -668,6 +668,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
 	struct rq *rq = cpu_rq(smp_processor_id());
 	u64 now = sched_clock();
 
+	touch_softlockup_watchdog();
 	rq->idle_clock += delta_ns;
 	/*
 	 * Override the previous timestamp and ignore all

commit 8ced5f69e4bc09adcc6442e090e2e64c197246cf
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Dec 7 19:02:47 2007 +0100

    sched: enable early use of sched_clock()
    
    some platforms have sched_clock() implementations that cannot be called
    very early during wakeup. If it's called it might hang or crash in hard
    to debug ways. So only call update_rq_clock() [which calls sched_clock()]
    if sched_init() has already been called. (rq->idle is NULL before the
    scheduler is initialized.)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index 67d9d1799d86..c6e551de795b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -488,7 +488,12 @@ unsigned long long cpu_clock(int cpu)
 
 	local_irq_save(flags);
 	rq = cpu_rq(cpu);
-	update_rq_clock(rq);
+	/*
+	 * Only call sched_clock() if the scheduler has already been
+	 * initialized (some code might call cpu_clock() very early):
+	 */
+	if (rq->idle)
+		update_rq_clock(rq);
 	now = rq->clock;
 	local_irq_restore(flags);

commit 5f9fa8a62d6a98f5cb2ee2e00b85bfe95e45888d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Dec 7 19:02:47 2007 +0100

    lockdep: make cli/sti annotation warnings clearer
    
    make cli/sti annotation warnings easier to interpret.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 0f389621bb6b..723bd9f92556 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2654,10 +2654,15 @@ static void check_flags(unsigned long flags)
 	if (!debug_locks)
 		return;
 
-	if (irqs_disabled_flags(flags))
-		DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled);
-	else
-		DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled);
+	if (irqs_disabled_flags(flags)) {
+		if (DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled)) {
+			printk("possible reason: unannotated irqs-off.\n");
+		}
+	} else {
+		if (DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled)) {
+			printk("possible reason: unannotated irqs-on.\n");
+		}
+	}
 
 	/*
 	 * We dont accurately track softirq state in e.g.

commit 856848737bd944c1db3ce0a66bbf67e56bd6f77d
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Dec 5 15:46:09 2007 +0100

    lockdep: fix debug_show_all_locks()
    
    fix the oops that can be seen in:
    
       http://bugzilla.kernel.org/attachment.cgi?id=13828&action=view
    
    it is not safe to print the locks of running tasks.
    
    (even with this fix we have a small race - but this is a debug
     function after all.)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index ed38bbfc48a3..7e2ca7c9d99c 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3173,6 +3173,13 @@ void debug_show_all_locks(void)
 		printk(" locked it.\n");
 
 	do_each_thread(g, p) {
+		/*
+		 * It's not reliable to print a task's held locks
+		 * if it's not sleeping (or if it's not the current
+		 * task):
+		 */
+		if (p->state == TASK_RUNNING && p != current)
+			continue;
 		if (p->lockdep_depth)
 			lockdep_print_held_locks(p);
 		if (!unlock)