Patches contributed by Eötvös Lorand University


commit f7b9329e556a8bdb9e07292cddbbe484c7a2b8c5
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Nov 26 21:21:49 2007 +0100

    sched: bump version of kernel/sched_debug.c
    
    bump version of kernel/sched_debug.c and remove CFS version
    information from it.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index ca198a797bfa..5d0d623a5465 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -199,7 +199,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
 	u64 now = ktime_to_ns(ktime_get());
 	int cpu;
 
-	SEQ_printf(m, "Sched Debug Version: v0.06-v22, %s %.*s\n",
+	SEQ_printf(m, "Sched Debug Version: v0.07, %s %.*s\n",
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);

commit 58e1010da3c15e7bdf426b0a3d4b13dba1b7d055
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Nov 26 21:21:49 2007 +0100

    sched: fix RLIMIT_CPU comment
    
    Devan Lippman noticed that the RLIMIT_CPU comment in resource.h is
    incorrect: the field is in seconds, not msecs. We used msecs in
    earlier versions of the patch but that got changed.
    
    Found-by: Devan Lippman <devan.lippman@gmail.com>
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h
index cfe3692b23e5..a4a22cc35898 100644
--- a/include/asm-generic/resource.h
+++ b/include/asm-generic/resource.h
@@ -12,7 +12,7 @@
  *   then it defines them prior including asm-generic/resource.h. )
  */
 
-#define RLIMIT_CPU		0	/* CPU time in ms */
+#define RLIMIT_CPU		0	/* CPU time in sec */
 #define RLIMIT_FSIZE		1	/* Maximum filesize */
 #define RLIMIT_DATA		2	/* max data size */
 #define RLIMIT_STACK		3	/* max stack size */

commit bcbe4a076609e15ea84cbebd9cd8f317ed70ce92
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Nov 26 21:21:49 2007 +0100

    sched: fix kernel/acct.c comment
    
    fix kernel/acct.c comment.
    
    noticed by Lin Tan. Comment suggested by Olaf Kirch.
    
    also see:
    
      http://bugzilla.kernel.org/show_bug.cgi?id=8220
    
    Reported-by: tammy000@gmail.com
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/acct.c b/kernel/acct.c
index fce53d8df8a7..cf19547cc9e4 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -413,7 +413,7 @@ static u32 encode_float(u64 value)
  *  The acct_process() call is the workhorse of the process
  *  accounting system. The struct acct is built here and then written
  *  into the accounting file. This function should only be called from
- *  do_exit().
+ *  do_exit() or when switching to a different output file.
  */
 
 /*

commit 08e4570a4a393bcc241f78dfc444cb0b07995fc0
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Nov 26 21:21:49 2007 +0100

    sched: fix prev_stime calculation
    
    Srivatsa Vaddagiri noticed occasionally incorrect CPU usage
    values in top and tracked it down to stime going below 0 in
    task_stime(). Negative values are possible there due to the
    sampled nature of stime/utime.
    
    Fix suggested by Balbir Singh.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Tested-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
    Reviewed-by: Balbir Singh <balbir@linux.vnet.ibm.com>

diff --git a/fs/proc/array.c b/fs/proc/array.c
index eba339ecba27..65c62e1bfd6f 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -374,7 +374,9 @@ static cputime_t task_stime(struct task_struct *p)
 	stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
 			cputime_to_clock_t(task_utime(p));
 
-	p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
+	if (stime >= 0)
+		p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
+
 	return p->prev_stime;
 }
 #endif

commit f44d9efd3510776216938fef84adc99cc0e12412
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Nov 26 20:42:20 2007 +0100

    x86: fix APIC related bootup crash on Athlon XP CPUs
    
    warmbloodedcreature@gmail.com reported that an APIC-enabled
    Asus a7v8x-x with an Athlon XP reboots early in the bootup:
    
       http://bugzilla.kernel.org/show_bug.cgi?id=8723
    
    after a long marathon of spontaneous-reboot debugging, it turns
    out to be caused by sync_Arb_ids(). AMD CPUs never really needed
    this sequence anyway, so just return early if we meet an AMD CPU.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 08b07c176962..96986b46bc85 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -789,7 +789,7 @@ void __init sync_Arb_IDs(void)
 	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
 	 * needed on AMD.
 	 */
-	if (modern_apic())
+	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
 		return;
 	/*
 	 * Wait for idle.

commit bc84cf17b50ca5b49bec0a5fef63c58c1526d46b
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Nov 26 20:42:19 2007 +0100

    x86: turn off iommu merge by default
    
    revert this commit for now:
    
        commit 948062683004d13ca21c8c05ac052d387978a449
        Author: Andi Kleen <ak@suse.de>
        Date:   Fri Oct 19 20:35:03 2007 +0200
    
            x86: enable iommu_merge by default
    
    it's causing regressions:
    
        http://bugzilla.kernel.org/show_bug.cgi?id=9412
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index aa805b11b24f..5552d23d23c2 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -12,7 +12,7 @@
 #include <asm/gart.h>
 #include <asm/calgary.h>
 
-int iommu_merge __read_mostly = 1;
+int iommu_merge __read_mostly = 0;
 EXPORT_SYMBOL(iommu_merge);
 
 dma_addr_t bad_dma_address __read_mostly;

commit 4307d1e5ada595c87f9a4d16db16ba5edb70dcb1
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Nov 7 18:37:48 2007 +0100

    x86: ignore the sys_getcpu() tcache parameter
    
    dont use the vgetcpu tcache - it's causing problems with tasks
    migrating, they'll see the old cache up to a jiffy after the
    migration, further increasing the costs of the migration.
    
    In the worst case they see a complete bogus information from
    the tcache, when a sys_getcpu() call "invalidated" the cache
    info by incrementing the jiffies _and_ the cpuid info in the
    cache and the following vdso_getcpu() call happens after
    vdso_jiffies have been incremented.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>
    Signed-off-by: Ulrich Drepper <drepper@redhat.com>
    Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index 91f6e85d0fc2..3b1ae1abfba9 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -13,32 +13,17 @@
 #include <asm/vgtod.h>
 #include "vextern.h"
 
-long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
+long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
 {
 	unsigned int dummy, p;
-	unsigned long j = 0;
 
-	/* Fast cache - only recompute value once per jiffies and avoid
-	   relatively costly rdtscp/cpuid otherwise.
-	   This works because the scheduler usually keeps the process
-	   on the same CPU and this syscall doesn't guarantee its
-	   results anyways.
-	   We do this here because otherwise user space would do it on
-	   its own in a likely inferior way (no access to jiffies).
-	   If you don't like it pass NULL. */
-	if (tcache && tcache->blob[0] == (j = *vdso_jiffies)) {
-		p = tcache->blob[1];
-	} else if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
+	if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
 		/* Load per CPU data from RDTSCP */
 		rdtscp(dummy, dummy, p);
 	} else {
 		/* Load per CPU data from GDT */
 		asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
 	}
-	if (tcache) {
-		tcache->blob[0] = j;
-		tcache->blob[1] = p;
-	}
 	if (cpu)
 		*cpu = p & 0xfff;
 	if (node)
diff --git a/kernel/sys.c b/kernel/sys.c
index 304b5410d746..d1fe71eb4546 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1750,7 +1750,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 }
 
 asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep,
-	   		   struct getcpu_cache __user *cache)
+			   struct getcpu_cache __user *unused)
 {
 	int err = 0;
 	int cpu = raw_smp_processor_id();
@@ -1758,24 +1758,6 @@ asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep,
 		err |= put_user(cpu, cpup);
 	if (nodep)
 		err |= put_user(cpu_to_node(cpu), nodep);
-	if (cache) {
-		/*
-		 * The cache is not needed for this implementation,
-		 * but make sure user programs pass something
-		 * valid. vsyscall implementations can instead make
-		 * good use of the cache. Only use t0 and t1 because
-		 * these are available in both 32bit and 64bit ABI (no
-		 * need for a compat_getcpu). 32bit has enough
-		 * padding
-		 */
-		unsigned long t0, t1;
-		get_user(t0, &cache->blob[0]);
-		get_user(t1, &cache->blob[1]);
-		t0++;
-		t1++;
-		put_user(t0, &cache->blob[0]);
-		put_user(t1, &cache->blob[1]);
-	}
 	return err ? -EFAULT : 0;
 }
 

commit 9612633a21ae8424531caf977f0560f64285bf36
Author: Ingo Molnar <mingo@elte.hu>
Date:   Thu Nov 15 20:57:40 2007 +0100

    sched: reorder SCHED_FEAT_ bits
    
    reorder SCHED_FEAT_ bits so that the used ones come first. Makes
    tuning instructions easier.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched.c b/kernel/sched.c
index cc8cb6f7d82e..38933cafea8a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -455,18 +455,18 @@ static void update_rq_clock(struct rq *rq)
  */
 enum {
 	SCHED_FEAT_NEW_FAIR_SLEEPERS	= 1,
-	SCHED_FEAT_START_DEBIT		= 2,
-	SCHED_FEAT_TREE_AVG             = 4,
-	SCHED_FEAT_APPROX_AVG           = 8,
-	SCHED_FEAT_WAKEUP_PREEMPT	= 16,
+	SCHED_FEAT_WAKEUP_PREEMPT	= 2,
+	SCHED_FEAT_START_DEBIT		= 4,
+	SCHED_FEAT_TREE_AVG             = 8,
+	SCHED_FEAT_APPROX_AVG           = 16,
 };
 
 const_debug unsigned int sysctl_sched_features =
 		SCHED_FEAT_NEW_FAIR_SLEEPERS	* 1 |
+		SCHED_FEAT_WAKEUP_PREEMPT	* 1 |
 		SCHED_FEAT_START_DEBIT		* 1 |
 		SCHED_FEAT_TREE_AVG		* 0 |
-		SCHED_FEAT_APPROX_AVG		* 0 |
-		SCHED_FEAT_WAKEUP_PREEMPT	* 1;
+		SCHED_FEAT_APPROX_AVG		* 0;
 
 #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
 

commit 502d26b524d8980f3ed80d9aec398e85671a8160
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Nov 9 22:39:39 2007 +0100

    sched: clean up the wakeup preempt check, #2
    
    clean up the preemption check to not use unnecessary 64-bit
    variables. This improves code size:
    
       text    data     bss     dec     hex filename
      44227    3326      36   47589    b9e5 sched.o.before
      44201    3326      36   47563    b9cb sched.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index d558716a9add..6c361472cc74 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -837,7 +837,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
 	struct task_struct *curr = rq->curr;
 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
 	struct sched_entity *se = &curr->se, *pse = &p->se;
-	s64 delta, gran;
+	unsigned long gran;
 
 	if (unlikely(rt_prio(p->prio))) {
 		update_rq_clock(rq);
@@ -860,12 +860,11 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
 		pse = parent_entity(pse);
 	}
 
-	delta = se->vruntime - pse->vruntime;
 	gran = sysctl_sched_wakeup_granularity;
 	if (unlikely(se->load.weight != NICE_0_LOAD))
 		gran = calc_delta_fair(gran, &se->load);
 
-	if (delta > gran)
+	if (pse->vruntime + gran < se->vruntime)
 		resched_task(curr);
 }
 

commit 77d9cc44b543fa831169e54c495ad06ef3a0c726
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Nov 9 22:39:39 2007 +0100

    sched: clean up the wakeup preempt check
    
    clean up the wakeup preemption check. No code changed:
    
       text    data     bss     dec     hex filename
      44227    3326      36   47589    b9e5 sched.o.before
      44227    3326      36   47589    b9e5 sched.o.after
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a3badf52bba2..d558716a9add 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -852,20 +852,21 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
 	if (unlikely(p->policy == SCHED_BATCH))
 		return;
 
-	if (sched_feat(WAKEUP_PREEMPT)) {
-		while (!is_same_group(se, pse)) {
-			se = parent_entity(se);
-			pse = parent_entity(pse);
-		}
-
-		delta = se->vruntime - pse->vruntime;
-		gran = sysctl_sched_wakeup_granularity;
-		if (unlikely(se->load.weight != NICE_0_LOAD))
-			gran = calc_delta_fair(gran, &se->load);
+	if (!sched_feat(WAKEUP_PREEMPT))
+		return;
 
-		if (delta > gran)
-			resched_task(curr);
+	while (!is_same_group(se, pse)) {
+		se = parent_entity(se);
+		pse = parent_entity(pse);
 	}
+
+	delta = se->vruntime - pse->vruntime;
+	gran = sysctl_sched_wakeup_granularity;
+	if (unlikely(se->load.weight != NICE_0_LOAD))
+		gran = calc_delta_fair(gran, &se->load);
+
+	if (delta > gran)
+		resched_task(curr);
 }
 
 static struct task_struct *pick_next_task_fair(struct rq *rq)