Patches contributed by Eötvös Lorand University

<<Prev 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263[264]265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 Next>>

commit 098fb9db2c74cfd6ffdbf61eb026a0c21abc5f75
Author: Ingo Molnar <mingo@elte.hu>
Date:   Sun Mar 16 20:36:10 2008 +0100

    sched: clean up wakeup balancing, move wake_affine()
    
    split out the affine-wakeup bits.
    
    No code changed:
    
    kernel/sched.o:
    
       text    data     bss     dec     hex filename
      42521    2858     232   45611    b22b sched.o.before
      42521    2858     232   45611    b22b sched.o.after
    
    md5:
       9d76738f1272aa82f0b7affd2f51df6b  sched.o.before.asm
       09b31c44e9aff8666f72773dc433e2df  sched.o.after.asm
    
    (the md5's changed because stack slots changed and some registers
    get scheduled by gcc in a different order - but otherwise the before
    and after assembly is instruction for instruction equivalent.)
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f2cc59080efa..70679b266693 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -980,12 +980,59 @@ static inline int wake_idle(int cpu, struct task_struct *p)
 #endif
 
 #ifdef CONFIG_SMP
+
+static int
+wake_affine(struct rq *rq, struct sched_domain *this_sd, struct task_struct *p,
+	    int cpu, int this_cpu, int sync, int idx,
+	    unsigned long load, unsigned long this_load,
+	    unsigned int imbalance)
+{
+	unsigned long tl = this_load;
+	unsigned long tl_per_task;
+
+	if (!(this_sd->flags & SD_WAKE_AFFINE))
+		return 0;
+
+	/*
+	 * Attract cache-cold tasks on sync wakeups:
+	 */
+	if (sync && !task_hot(p, rq->clock, this_sd))
+		return 1;
+
+	schedstat_inc(p, se.nr_wakeups_affine_attempts);
+	tl_per_task = cpu_avg_load_per_task(this_cpu);
+
+	/*
+	 * If sync wakeup then subtract the (maximum possible)
+	 * effect of the currently running task from the load
+	 * of the current CPU:
+	 */
+	if (sync)
+		tl -= current->se.load.weight;
+
+	if ((tl <= load && tl + target_load(cpu, idx) <= tl_per_task) ||
+			100*(tl + p->se.load.weight) <= imbalance*load) {
+		/*
+		 * This domain has SD_WAKE_AFFINE and
+		 * p is cache cold in this domain, and
+		 * there is no bad imbalance.
+		 */
+		schedstat_inc(this_sd, ttwu_move_affine);
+		schedstat_inc(p, se.nr_wakeups_affine);
+
+		return 1;
+	}
+	return 0;
+}
+
 static int select_task_rq_fair(struct task_struct *p, int sync)
 {
-	int cpu, this_cpu;
-	struct rq *rq;
 	struct sched_domain *sd, *this_sd = NULL;
-	int new_cpu;
+	unsigned long load, this_load;
+	int cpu, this_cpu, new_cpu;
+	unsigned int imbalance;
+	struct rq *rq;
+	int idx;
 
 	cpu      = task_cpu(p);
 	rq       = task_rq(p);
@@ -1008,66 +1055,35 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
 	/*
 	 * Check for affine wakeup and passive balancing possibilities.
 	 */
-	if (this_sd) {
-		int idx = this_sd->wake_idx;
-		unsigned int imbalance;
-		unsigned long load, this_load;
-
-		imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
-
-		load = source_load(cpu, idx);
-		this_load = target_load(this_cpu, idx);
-
-		new_cpu = this_cpu; /* Wake to this CPU if we can */
-
-		if (this_sd->flags & SD_WAKE_AFFINE) {
-			unsigned long tl = this_load;
-			unsigned long tl_per_task;
-
-			/*
-			 * Attract cache-cold tasks on sync wakeups:
-			 */
-			if (sync && !task_hot(p, rq->clock, this_sd))
-				goto out_set_cpu;
-
-			schedstat_inc(p, se.nr_wakeups_affine_attempts);
-			tl_per_task = cpu_avg_load_per_task(this_cpu);
-
-			/*
-			 * If sync wakeup then subtract the (maximum possible)
-			 * effect of the currently running task from the load
-			 * of the current CPU:
-			 */
-			if (sync)
-				tl -= current->se.load.weight;
-
-			if ((tl <= load &&
-				tl + target_load(cpu, idx) <= tl_per_task) ||
-			       100*(tl + p->se.load.weight) <= imbalance*load) {
-				/*
-				 * This domain has SD_WAKE_AFFINE and
-				 * p is cache cold in this domain, and
-				 * there is no bad imbalance.
-				 */
-				schedstat_inc(this_sd, ttwu_move_affine);
-				schedstat_inc(p, se.nr_wakeups_affine);
-				goto out_set_cpu;
-			}
-		}
+	if (!this_sd)
+		goto out_keep_cpu;
 
-		/*
-		 * Start passive balancing when half the imbalance_pct
-		 * limit is reached.
-		 */
-		if (this_sd->flags & SD_WAKE_BALANCE) {
-			if (imbalance*this_load <= 100*load) {
-				schedstat_inc(this_sd, ttwu_move_balance);
-				schedstat_inc(p, se.nr_wakeups_passive);
-				goto out_set_cpu;
-			}
+	idx = this_sd->wake_idx;
+
+	imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
+
+	load = source_load(cpu, idx);
+	this_load = target_load(this_cpu, idx);
+
+	new_cpu = this_cpu; /* Wake to this CPU if we can */
+
+	if (wake_affine(rq, this_sd, p, cpu, this_cpu, sync, idx,
+				     load, this_load, imbalance))
+		goto out_set_cpu;
+
+	/*
+	 * Start passive balancing when half the imbalance_pct
+	 * limit is reached.
+	 */
+	if (this_sd->flags & SD_WAKE_BALANCE) {
+		if (imbalance*this_load <= 100*load) {
+			schedstat_inc(this_sd, ttwu_move_balance);
+			schedstat_inc(p, se.nr_wakeups_passive);
+			goto out_set_cpu;
 		}
 	}
 
+out_keep_cpu:
 	new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
 out_set_cpu:
 	return wake_idle(new_cpu, p);

commit 6a6029b8cefe0ca7e82f27f3904dbedba3de4e06
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Mar 14 22:17:08 2008 +0100

    sched: simplify sched_slice()
    
    Use the existing calc_delta_mine() calculation for sched_slice(). This
    saves a divide and simplifies the code because we share it with the
    other /cfs_rq->load users.
    
    It also improves code size:
    
          text    data     bss     dec     hex filename
         42659    2740     144   45543    b1e7 sched.o.before
         42093    2740     144   44977    afb1 sched.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 31aa1b9fa762..f2cc59080efa 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -283,12 +283,8 @@ static u64 __sched_period(unsigned long nr_running)
  */
 static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	u64 slice = __sched_period(cfs_rq->nr_running);
-
-	slice *= se->load.weight;
-	do_div(slice, cfs_rq->load.weight);
-
-	return slice;
+	return calc_delta_mine(__sched_period(cfs_rq->nr_running),
+			       se->load.weight, &cfs_rq->load);
 }
 
 /*

commit e22ecef1d2658ba54ed7d3fdb5d60829fb434c23
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Mar 14 22:16:08 2008 +0100

    sched: fix fair sleepers
    
    Fair sleepers need to scale their latency target down by runqueue
    weight. Otherwise busy systems will gain ever larger sleep bonus.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 31c4a2988b64..31aa1b9fa762 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -528,8 +528,10 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 
 	if (!initial) {
 		/* sleeps upto a single latency don't count. */
-		if (sched_feat(NEW_FAIR_SLEEPERS))
-			vruntime -= sysctl_sched_latency;
+		if (sched_feat(NEW_FAIR_SLEEPERS)) {
+			vruntime -= calc_delta_fair(sysctl_sched_latency,
+						    &cfs_rq->load);
+		}
 
 		/* ensure we never gain time by being placed backwards. */
 		vruntime = max_vruntime(se->vruntime, vruntime);

commit 27d117266097101dcf79c4576903cdcdd0eabffc
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Mar 14 22:20:01 2008 +0100

    sched: fix calc_delta_mine()
    
    lw->weight can be 0 for a short time during bootup.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/kernel/sched.c b/kernel/sched.c
index 3a4ba3dc0f49..6b06f23261c0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1084,7 +1084,7 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
 	u64 tmp;
 
 	if (unlikely(!lw->inv_weight))
-		lw->inv_weight = (WMULT_CONST - lw->weight/2) / lw->weight + 1;
+		lw->inv_weight = (WMULT_CONST-lw->weight/2) / (lw->weight+1);
 
 	tmp = (u64)delta_exec * weight;
 	/*

commit e89996ae3f9e88d4fd75751a15c10b19d197e702
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Mar 14 23:48:28 2008 +0100

    sched: fix update_load_add()/sub()
    
    Clear the cached inverse value when updating load. This is needed for
    calc_delta_mine() to work correctly when using the rq load.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>

diff --git a/kernel/sched.c b/kernel/sched.c
index 9df9ba73cb7a..3a4ba3dc0f49 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1108,11 +1108,13 @@ calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
 static inline void update_load_add(struct load_weight *lw, unsigned long inc)
 {
 	lw->weight += inc;
+	lw->inv_weight = 0;
 }
 
 static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
 {
 	lw->weight -= dec;
+	lw->inv_weight = 0;
 }
 
 /*

commit 9a46d7e5b63903a70cd96c2c1391a7a26a8dbec9
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Feb 26 09:30:32 2008 +0100

    x86: ioremap, remove WARN_ON()
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ac3c959e271d..8fe576baa148 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -134,8 +134,6 @@ static void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
 			return NULL;
 	}
 
-	WARN_ON_ONCE(page_is_ram(pfn));
-
 	switch (mode) {
 	case IOR_MODE_UNCACHED:
 	default:

commit f5dbb55b995b77d396fe2204495a0af3e24d28c2
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Mar 10 18:04:34 2008 +0100

    fix BIOS PCI config cycle buglet causing ACPI boot regression
    
    I figured out another ACPI related regression today.
    
    randconfig testing triggered an early boot-time hang on a laptop of mine
    (32-bit x86, config attached) - the screen was scrolling ACPI AML
    exceptions [with no serial port and no early debugging available].
    
    v2.6.24 works fine on that laptop with the same .config, so after a few
    hours of bisection (had to restart it 3 times - other regressions
    interacted), it honed in on this commit:
    
    | 10270d4838bdc493781f5a1cf2e90e9c34c9142f is first bad commit
    |
    | Author: Linus Torvalds <torvalds@woody.linux-foundation.org>
    | Date:   Wed Feb 13 09:56:14 2008 -0800
    |
    |     acpi: fix acpi_os_read_pci_configuration() misuse of raw_pci_read()
    
    reverting this commit ontop of -rc5 gave a correctly booting kernel.
    
    But this commit fixes a real bug so the real question is, why did it
    break the bootup?
    
    After quite some head-scratching, the following change stood out:
    
    -                               pci_id->bus = tu8;
    +                               pci_id->bus = val;
    
    pci_id->bus is defined as u16:
    
       struct acpi_pci_id {
               u16 segment;
               u16 bus;
       ...
    
    and 'tu8' changed from u8 to u32. So previously we'd unconditionally
    mask the return value of acpi_os_read_pci_configuration()
    (raw_pci_read()) to 8 bits, but now we just trust whatever comes back
    from the PCI access routines and only crop it to 16 bits.
    
    But if the high 8 bits of that result contains any noise then we'll
    write that into ACPI's PCI ID descriptor and confuse the heck out of the
    rest of ACPI.
    
    So lets check the PCI-BIOS code on that theory. We have this codepath
    for 8-bit accesses (arch/x86/pci/pcbios.c:pci_bios_read()):
    
            switch (len) {
            case 1:
                    __asm__("lcall *(%%esi); cld\n\t"
                            "jc 1f\n\t"
                            "xor %%ah, %%ah\n"
                            "1:"
                            : "=c" (*value),
                              "=a" (result)
                            : "1" (PCIBIOS_READ_CONFIG_BYTE),
                              "b" (bx),
                              "D" ((long)reg),
                              "S" (&pci_indirect));
    
    Aha! The "=a" output constraint puts the full 32 bits of EAX into
    *value. But if the BIOS's routines set any of the high bits to nonzero,
    we'll return a value with more set in it than intended.
    
    The other, more common PCI access methods (v1 and v2 PCI reads) clear
    out the high bits already, for example pci_conf1_read() does:
    
            switch (len) {
            case 1:
                    *value = inb(0xCFC + (reg & 3));
    
    which explicitly converts the return byte up to 32 bits and zero-extends
    it.
    
    So zero-extending the result in the PCI-BIOS read routine fixes the
    regression on my laptop. ( It might fix some other long-standing issues
    we had with PCI-BIOS during the past decade ... ) Both 8-bit and 16-bit
    accesses were buggy.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 10ac8c316c46..2f7109ac4c15 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -198,6 +198,11 @@ static int pci_bios_read(unsigned int seg, unsigned int bus,
 			  "b" (bx),
 			  "D" ((long)reg),
 			  "S" (&pci_indirect));
+		/*
+		 * Zero-extend the result beyond 8 bits, do not trust the
+		 * BIOS having done it:
+		 */
+		*value &= 0xff;
 		break;
 	case 2:
 		__asm__("lcall *(%%esi); cld\n\t"
@@ -210,6 +215,11 @@ static int pci_bios_read(unsigned int seg, unsigned int bus,
 			  "b" (bx),
 			  "D" ((long)reg),
 			  "S" (&pci_indirect));
+		/*
+		 * Zero-extend the result beyond 16 bits, do not trust the
+		 * BIOS having done it:
+		 */
+		*value &= 0xffff;
 		break;
 	case 4:
 		__asm__("lcall *(%%esi); cld\n\t"

commit 80d38f9a7871d9bafc3f244dabe48b41a58de705
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Mar 7 10:47:43 2008 +0100

    drivers/char/esp.c: fix bootup lockup
    
    randconfig testing found a bootup lockup in drivers/char/esp.c because
    of a spinlock that wasn't correctly initialized.
    
    I'm not sure why it became more prominent in 2.6.25-rc4, the bug seems
    rather old and i've been doing allyesconfig bootups for ages with
    CONFIG_ESP enabled.
    
    This fixes this bootup lockup:
    
     PM: Adding info for No Bus:ttyP63
     ttyP32 at 0x0240 (irq = 0) is an ESP primary port
     BUG: spinlock lockup on CPU#0, swapper/1, f56dd004
     Pid: 1, comm: swapper Not tainted 2.6.25-rc4-sched-devel.git-x86-latest.git #402 [<c03ac6f4>] _raw_spin_lock+0x134/0x140
      [<c08649be>] _spin_lock_irqsave+0x5e/0x80
      [<c0b9fbfe>] ? espserial_init+0x2be/0x6e0
      [<c0b9fbfe>] espserial_init+0x2be/0x6e0
      [<c0b877a3>] kernel_init+0x83/0x260
      [<c0b9f940>] ? espserial_init+0x0/0x6e0
      [<c010416a>] ? restore_nocheck_notrace+0x0/0xe
      [<c0b87720>] ? kernel_init+0x0/0x260
      [<c0b87720>] ? kernel_init+0x0/0x260
      [<c0104507>] kernel_thread_helper+0x7/0x10
      =======================
    
    kzalloc() is not the way to initialize spinlocks anymore.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index c01e26d9ee5e..f3fe62067344 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -2484,6 +2484,7 @@ static int __init espserial_init(void)
 			return 0;
 		}
 
+		spin_lock_init(&info->lock);
 		/* rx_trigger, tx_trigger are needed by autoconfig */
 		info->config.rx_trigger = rx_trigger;
 		info->config.tx_trigger = tx_trigger;

commit 7432d149fda8ce9ead9df91e577b83ce52ad5f65
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Mar 6 18:29:43 2008 +0100

    x86: re-add reboot fixups
    
    Jan Beulich noticed that the reboot fixups went missing during
    reboot.c unification.
    
    (commit 4d022e35fd7e07c522c7863fee6f07e53cf3fc14)
    
    Geode and a few other rare boards with special reboot quirks are
    affected.
    
    Reported-by: Jan Beulich <jbeulich@novell.com>
    Signed-off-by: Jan Beulich <jbeulich@novell.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 7fd6ac43e4a1..55ceb8cdef75 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -326,6 +326,10 @@ static inline void kb_wait(void)
 	}
 }
 
+void __attribute__((weak)) mach_reboot_fixups(void)
+{
+}
+
 static void native_machine_emergency_restart(void)
 {
 	int i;
@@ -337,6 +341,8 @@ static void native_machine_emergency_restart(void)
 		/* Could also try the reset bit in the Hammer NB */
 		switch (reboot_type) {
 		case BOOT_KBD:
+			mach_reboot_fixups(); /* for board specific fixups */
+
 			for (i = 0; i < 10; i++) {
 				kb_wait();
 				udelay(50);

commit d47846c5866b7d98a1173c86a39d810a06647329
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Mar 4 14:54:47 2008 +0100

    sysfs: CONFIG_SYSFS_DEPRECATED fix
    
    CONFIG_SYSFS_DEPRECATED=y changed its meaning recently and causes
    regressions in working setups that had SYSFS_DEPRECATED disabled.
    
    so rename it to SYSFS_DEPRECATED_V2 so that testers pick up the new
    default via 'make oldconfig', even if their old .config's disabled
    CONFIG_SYSFS_DEPRECATED ...
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Cc: Kay Sievers <kay.sievers@vrfy.org>
    Cc: Linus Torvalds <torvalds@linux-foundation.org>
    Cc: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

diff --git a/init/Kconfig b/init/Kconfig
index e6606e6e99e4..98ebf3725412 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -367,9 +367,13 @@ config RESOURCE_COUNTERS
 	depends on CGROUPS
 
 config SYSFS_DEPRECATED
+	bool
+
+config SYSFS_DEPRECATED_V2
 	bool "Create deprecated sysfs files"
 	depends on SYSFS
 	default y
+	select SYSFS_DEPRECATED
 	help
 	  This option creates deprecated symlinks such as the
 	  "device"-link, the <subsystem>:<name>-link, and the