Patches contributed by Eötvös Lorand University


commit e6aa0f07cb5e81a7cbeaf3be6e2101234c2f0d30
Merge: d4738792fb86 72d31053f62c
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Sep 24 10:31:34 2008 +0200

    Merge commit 'v2.6.27-rc7' into x86/microcode

commit ebdd90a8cb2e3963f55499850f02ce6003558b55
Merge: 3c9339049df5 72d31053f62c
Author: Ingo Molnar <mingo@elte.hu>
Date:   Wed Sep 24 09:56:20 2008 +0200

    Merge commit 'v2.6.27-rc7' into x86/pebs

commit 07bbc16a8676b06950a21f35b59f69b2fe763bbd
Merge: 6a9e91846bf5 f8e256c687eb
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Sep 23 23:26:42 2008 +0200

    Merge branch 'timers/urgent' into x86/xen
    
    Conflicts:
            arch/x86/kernel/process_32.c
            arch/x86/kernel/process_64.c
    
    Manual merge:
    
            arch/x86/kernel/smpboot.c
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --cc arch/x86/kernel/smpboot.c
index 66b04e598817,7985c5b3f916..06f1407d5542
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@@ -52,6 -52,6 +52,7 @@@
  #include <asm/desc.h>
  #include <asm/nmi.h>
  #include <asm/irq.h>
++#include <asm/idle.h>
  #include <asm/smp.h>
  #include <asm/trampoline.h>
  #include <asm/cpu.h>
@@@ -1408,31 -1402,8 +1409,32 @@@ void native_cpu_die(unsigned int cpu
  	}
  	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
  }
 +
 +void play_dead_common(void)
 +{
 +	idle_task_exit();
 +	reset_lazy_tlbstate();
 +	irq_ctx_exit(raw_smp_processor_id());
++	c1e_remove_cpu(raw_smp_processor_id());
 +
 +	mb();
 +	/* Ack it */
 +	__get_cpu_var(cpu_state) = CPU_DEAD;
 +
 +	/*
 +	 * With physical CPU hotplug, we should halt the cpu
 +	 */
 +	local_irq_disable();
 +}
 +
 +void native_play_dead(void)
 +{
 +	play_dead_common();
 +	wbinvd_halt();
 +}
 +
  #else /* ... !CONFIG_HOTPLUG_CPU */
 -int __cpu_disable(void)
 +int native_cpu_disable(void)
  {
  	return -ENOSYS;
  }

commit 63e5c39859a41591662466028c4d1281c033c05a
Merge: 695698500912 fa748203175d c8bfff6dd4d4
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Sep 23 16:23:05 2008 +0200

    Merge branches 'sched/urgent' and 'sched/rt' into sched/devel

diff --cc kernel/sched.c
index 927c9307cd00,13dd2db9fb2d,4de2bfb28c58..669c49aa57f0
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@@@ -1425,9 -1425,9 -1418,35 +1418,35 @@@@ up
   	parent = parent->parent;
   	if (parent)
   		goto up;
++ out_unlock:
   	rcu_read_unlock();
++ 
++ 	return ret;
 + }
 + 
++ static int tg_nop(struct task_group *tg, void *data)
++ {
++ 	return 0;
+  }
++ #endif
++ 
++ #ifdef CONFIG_SMP
++ static unsigned long source_load(int cpu, int type);
++ static unsigned long target_load(int cpu, int type);
++ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
++ 
++ static unsigned long cpu_avg_load_per_task(int cpu)
++ {
++ 	struct rq *rq = cpu_rq(cpu);
++ 
++ 	if (rq->nr_running)
++ 		rq->avg_load_per_task = rq->load.weight / rq->nr_running;
++ 
++ 	return rq->avg_load_per_task;
++ }
++ 
++ #ifdef CONFIG_FAIR_GROUP_SCHED
+  
   static void __set_se_shares(struct sched_entity *se, unsigned long shares);
   
   /*
@@@@ -8808,73 -8753,73 -8706,77 +8827,77 @@@@ static DEFINE_MUTEX(rt_constraints_mute
   static unsigned long to_ratio(u64 period, u64 runtime)
   {
   	if (runtime == RUNTIME_INF)
-- 		return 1ULL << 16;
++ 		return 1ULL << 20;
   
-- 	return div64_u64(runtime << 16, period);
++ 	return div64_u64(runtime << 20, period);
   }
   
-- #ifdef CONFIG_CGROUP_SCHED
-- static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
++ /* Must be called with tasklist_lock held */
++ static inline int tg_has_rt_tasks(struct task_group *tg)
   {
-- 	struct task_group *tgi, *parent = tg->parent;
-- 	unsigned long total = 0;
++ 	struct task_struct *g, *p;
   
-- 	if (!parent) {
-- 		if (global_rt_period() < period)
-- 			return 0;
++ 	do_each_thread(g, p) {
++ 		if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
++ 			return 1;
++ 	} while_each_thread(g, p);
   
-- 		return to_ratio(period, runtime) <
-- 			to_ratio(global_rt_period(), global_rt_runtime());
-- 	}
++ 	return 0;
++ }
   
-- 	if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
-- 		return 0;
++ struct rt_schedulable_data {
++ 	struct task_group *tg;
++ 	u64 rt_period;
++ 	u64 rt_runtime;
++ };
   
-- 	rcu_read_lock();
-- 	list_for_each_entry_rcu(tgi, &parent->children, siblings) {
-- 		if (tgi == tg)
-- 			continue;
++ static int tg_schedulable(struct task_group *tg, void *data)
++ {
++ 	struct rt_schedulable_data *d = data;
++ 	struct task_group *child;
++ 	unsigned long total, sum = 0;
++ 	u64 period, runtime;
++ 
++ 	period = ktime_to_ns(tg->rt_bandwidth.rt_period);
++ 	runtime = tg->rt_bandwidth.rt_runtime;
   
-- 		total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
-- 				tgi->rt_bandwidth.rt_runtime);
++ 	if (tg == d->tg) {
++ 		period = d->rt_period;
++ 		runtime = d->rt_runtime;
   	}
-- 	rcu_read_unlock();
   
-- 	return total + to_ratio(period, runtime) <=
-- 		to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
-- 				parent->rt_bandwidth.rt_runtime);
-- }
-- #elif defined CONFIG_USER_SCHED
-- static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
-- {
-- 	struct task_group *tgi;
-- 	unsigned long total = 0;
-- 	unsigned long global_ratio =
-- 		to_ratio(global_rt_period(), global_rt_runtime());
++ 	if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
++ 		return -EBUSY;
   
-- 	rcu_read_lock();
-- 	list_for_each_entry_rcu(tgi, &task_groups, list) {
-- 		if (tgi == tg)
-- 			continue;
++ 	total = to_ratio(period, runtime);
 + 
-  		total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
-  				tgi->rt_bandwidth.rt_runtime);
++ 	list_for_each_entry_rcu(child, &tg->children, siblings) {
++ 		period = ktime_to_ns(child->rt_bandwidth.rt_period);
++ 		runtime = child->rt_bandwidth.rt_runtime;
++ 
++ 		if (child == d->tg) {
++ 			period = d->rt_period;
++ 			runtime = d->rt_runtime;
++ 		}
+  
 - 		total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
 - 				tgi->rt_bandwidth.rt_runtime);
++ 		sum += to_ratio(period, runtime);
   	}
-- 	rcu_read_unlock();
   
-- 	return total + to_ratio(period, runtime) < global_ratio;
++ 	if (sum > total)
++ 		return -EINVAL;
++ 
++ 	return 0;
   }
-- #endif
   
-- /* Must be called with tasklist_lock held */
-- static inline int tg_has_rt_tasks(struct task_group *tg)
++ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
   {
-- 	struct task_struct *g, *p;
-- 	do_each_thread(g, p) {
-- 		if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
-- 			return 1;
-- 	} while_each_thread(g, p);
-- 	return 0;
++ 	struct rt_schedulable_data data = {
++ 		.tg = tg,
++ 		.rt_period = period,
++ 		.rt_runtime = runtime,
++ 	};
++ 
++ 	return walk_tg_tree(tg_schedulable, tg_nop, &data);
   }
   
   static int tg_set_bandwidth(struct task_group *tg,
@@@@ -8884,14 -8829,14 -8786,9 +8907,9 @@@@
   
   	mutex_lock(&rt_constraints_mutex);
   	read_lock(&tasklist_lock);
-- 	if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
-- 		err = -EBUSY;
 - 		goto unlock;
 - 	}
 - 	if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
 - 		err = -EINVAL;
++ 	err = __rt_schedulable(tg, rt_period, rt_runtime);
++ 	if (err)
   		goto unlock;
-  	}
-  	if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
-  		err = -EINVAL;
-  		goto unlock;
-- 	}
   
   	spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
   	tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);

commit 9b9b181ce53ef387dfe3df9316bbc641fca13d51
Merge: fb71e4533845 72d31053f62c
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Sep 23 16:19:26 2008 +0200

    Merge commit 'v2.6.27-rc7' into core/locking

commit 101d5b713700b902b1c200cdd1925c3cb7d34567
Merge: cec5eb7be3a1 e6babb6b7fed
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Sep 23 13:26:27 2008 +0200

    Merge branch 'x86/signal' into core/signal
    
    Conflicts:
            arch/x86/kernel/cpu/feature_names.c
            arch/x86/kernel/setup.c
            drivers/pci/intel-iommu.c
            include/asm-x86/cpufeature.h
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --cc arch/x86/kernel/cpu/common.c
index 4e456bd955bb,c63ec65f484c..8260d930eabc
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@@ -340,21 -338,39 +340,50 @@@ static void __init early_cpu_detect(voi
  	if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
  	    cpu_devs[c->x86_vendor]->c_early_init)
  		cpu_devs[c->x86_vendor]->c_early_init(c);
 +}
  
 -	early_get_cap(c);
 +/*
 + * The NOPL instruction is supposed to exist on all CPUs with
 + * family >= 6; unfortunately, that's not true in practice because
 + * of early VIA chips and (more importantly) broken virtualizers that
 + * are not easy to detect.  In the latter case it doesn't even *fail*
 + * reliably, so probing for it doesn't even work.  Disable it completely
 + * unless we can find a reliable way to detect all the broken cases.
 + */
 +static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
 +{
 +	clear_cpu_cap(c, X86_FEATURE_NOPL);
  }
  
+ /*
+  * The NOPL instruction is supposed to exist on all CPUs with
+  * family >= 6, unfortunately, that's not true in practice because
+  * of early VIA chips and (more importantly) broken virtualizers that
+  * are not easy to detect.  Hence, probe for it based on first
+  * principles.
+  */
+ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+ {
+ 	const u32 nopl_signature = 0x888c53b1; /* Random number */
+ 	u32 has_nopl = nopl_signature;
+ 
+ 	clear_cpu_cap(c, X86_FEATURE_NOPL);
+ 	if (c->x86 >= 6) {
+ 		asm volatile("\n"
+ 			     "1:      .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
+ 			     "2:\n"
+ 			     "        .section .fixup,\"ax\"\n"
+ 			     "3:      xor %0,%0\n"
+ 			     "        jmp 2b\n"
+ 			     "        .previous\n"
+ 			     _ASM_EXTABLE(1b,3b)
+ 			     : "+a" (has_nopl));
+ 
+ 		if (has_nopl == nopl_signature)
+ 			set_cpu_cap(c, X86_FEATURE_NOPL);
+ 	}
+ }
+ 
  static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
  {
  	u32 tfms, xlvl;
diff --cc arch/x86/kernel/setup.c
index 9838f2539dfc,673f12cf6eb0..c6b9330c1bff
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@@ -742,6 -738,9 +742,8 @@@ void __init setup_arch(char **cmdline_p
  #else
  	num_physpages = max_pfn;
  
 -	check_efer();
+  	if (cpu_has_x2apic)
+  		check_x2apic();
  
  	/* How many end-of-memory variables you have, grandma! */
  	/* need this before calling reserve_initrd */
diff --cc include/asm-x86/cpufeature.h
index 9489283a4bcf,6dfa2b3f18c7..f1b8a53c3e67
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@@ -64,22 -72,22 +72,23 @@@
  #define X86_FEATURE_CYRIX_ARR	(3*32+ 2) /* Cyrix ARRs (= MTRRs) */
  #define X86_FEATURE_CENTAUR_MCR	(3*32+ 3) /* Centaur MCRs (= MTRRs) */
  /* cpu types for specific tunings: */
- #define X86_FEATURE_K8		(3*32+ 4) /* Opteron, Athlon64 */
- #define X86_FEATURE_K7		(3*32+ 5) /* Athlon */
- #define X86_FEATURE_P3		(3*32+ 6) /* P3 */
- #define X86_FEATURE_P4		(3*32+ 7) /* P4 */
+ #define X86_FEATURE_K8		(3*32+ 4) /* "" Opteron, Athlon64 */
+ #define X86_FEATURE_K7		(3*32+ 5) /* "" Athlon */
+ #define X86_FEATURE_P3		(3*32+ 6) /* "" P3 */
+ #define X86_FEATURE_P4		(3*32+ 7) /* "" P4 */
  #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
  #define X86_FEATURE_UP		(3*32+ 9) /* smp kernel running on up */
- #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
+ #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */
  #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
++#define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
  #define X86_FEATURE_PEBS	(3*32+12) /* Precise-Event Based Sampling */
  #define X86_FEATURE_BTS		(3*32+13) /* Branch Trace Store */
- #define X86_FEATURE_SYSCALL32	(3*32+14) /* syscall in ia32 userspace */
- #define X86_FEATURE_SYSENTER32	(3*32+15) /* sysenter in ia32 userspace */
- #define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well on this CPU */
- #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */
- #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */
- #define X86_FEATURE_11AP	(3*32+19) /* Bad local APIC aka 11AP */
+ #define X86_FEATURE_SYSCALL32	(3*32+14) /* "" syscall in ia32 userspace */
+ #define X86_FEATURE_SYSENTER32	(3*32+15) /* "" sysenter in ia32 userspace */
+ #define X86_FEATURE_REP_GOOD	(3*32+16) /* rep microcode works well */
+ #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */
+ #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */
+ #define X86_FEATURE_11AP	(3*32+19) /* "" Bad local APIC aka 11AP */
  #define X86_FEATURE_NOPL	(3*32+20) /* The NOPL (0F 1F) instructions */
  
  /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */

commit f8e256c687eb53850685747757c8d75e58756e15
Author: Ingo Molnar <mingo@elte.hu>
Date:   Tue Sep 23 13:00:57 2008 +0200

    timers: fix build error in !oneshot case
    
     kernel/time/tick-common.c: In function ‘tick_setup_periodic’:
     kernel/time/tick-common.c:113: error: implicit declaration of function ‘tick_broadcast_oneshot_active’
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 55c3f4be6077..469248782c23 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -74,6 +74,7 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 {
 	return 0;
 }
+static inline int tick_broadcast_oneshot_active(void) { return 0; }
 #endif /* !TICK_ONESHOT */
 
 /*

commit f681bbd656b01439be904250a1581ca9c27505a1
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Sep 22 16:29:00 2008 +0200

    sched: turn off WAKEUP_OVERLAP
    
    WAKEUP_OVERLAP is not a winner on a 16way box, running psql+sysbench:
    
           .27-rc7-NO_WAKEUP_OVERLAP  .27-rc7-WAKEUP_OVERLAP
    -------------------------------------------------
        1:             694              811    +14.39%
        2:            1454             1427    -1.86%
        4:            3017             3070    +1.70%
        8:            5694             5808    +1.96%
       16:           10592            10612    +0.19%
       32:            9693             9647    -0.48%
       64:            8507             8262    -2.97%
      128:            8402             7087    -18.55%
      256:            8419             5124    -64.30%
      512:            7990             3671    -117.62%
    -------------------------------------------------
      SUM:           64466            55524    -16.11%
    
    ... so turn it off by default.
    
    Signed-off-by: Ingo Molnar <mingo@elte.hu>

diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index bf027a7accf8..7c9e8f4a049f 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -11,4 +11,4 @@ SCHED_FEAT(ASYM_GRAN, 1)
 SCHED_FEAT(LB_BIAS, 1)
 SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
 SCHED_FEAT(ASYM_EFF_LOAD, 1)
-SCHED_FEAT(WAKEUP_OVERLAP, 1)
+SCHED_FEAT(WAKEUP_OVERLAP, 0)

commit 0b88641f1bafdbd087d5e63987a30cc0eadd63b9
Merge: fbdbf709938d 72d31053f62c
Author: Ingo Molnar <mingo@elte.hu>
Date:   Mon Sep 22 13:08:57 2008 +0200

    Merge commit 'v2.6.27-rc7' into x86/debug

commit 5e51900be6c15488b80343d3c3e62d4d605ba9a9
Merge: 998564789137 adee14b2e155
Author: Ingo Molnar <mingo@elte.hu>
Date:   Fri Sep 19 09:15:50 2008 +0200

    Merge commit 'v2.6.27-rc6' into x86/cleanups