Patches contributed by Eötvös Lorand University
commit 41cf54455da5e5dc847a9733d49ca23b5e7dd59e
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Feb 16 01:27:28 2007 -0800
[PATCH] Fix multiple conversion bugs in msecs_to_jiffies
Fix multiple conversion bugs in msecs_to_jiffies().
The main problem is that this condition:
if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
overflows if HZ is smaller than 1000!
This change is user-visible: for HZ=250 SUS-compliant poll()-timeout
value of -20 is mistakenly converted to 'immediate timeout'.
(The new dyntick code also triggered this, that's how we noticed.)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/kernel/time.c b/kernel/time.c
index 4a8657171584..c6c80ea5d0ea 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -500,15 +500,56 @@ unsigned int jiffies_to_usecs(const unsigned long j)
}
EXPORT_SYMBOL(jiffies_to_usecs);
+/*
+ * When we convert to jiffies then we interpret incoming values
+ * the following way:
+ *
+ * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET)
+ *
+ * - 'too large' values [that would result in larger than
+ * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too.
+ *
+ * - all other values are converted to jiffies by either multiplying
+ * the input value by a factor or dividing it with a factor
+ *
+ * We must also be careful about 32-bit overflows.
+ */
unsigned long msecs_to_jiffies(const unsigned int m)
{
- if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+ /*
+ * Negative value, means infinite timeout:
+ */
+ if ((int)m < 0)
return MAX_JIFFY_OFFSET;
+
#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+ /*
+ * HZ is equal to or smaller than 1000, and 1000 is a nice
+ * round multiple of HZ, divide with the factor between them,
+ * but round upwards:
+ */
return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+ /*
+ * HZ is larger than 1000, and HZ is a nice round multiple of
+ * 1000 - simply multiply with the factor between them.
+ *
+ * But first make sure the multiplication result cannot
+ * overflow:
+ */
+ if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+ return MAX_JIFFY_OFFSET;
+
return m * (HZ / MSEC_PER_SEC);
#else
+ /*
+ * Generic case - multiply, round and divide. But first
+ * check that if we are doing a net multiplication, that
+ * we wouldnt overflow:
+ */
+ if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+ return MAX_JIFFY_OFFSET;
+
return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC;
#endif
}
commit 8b9365d753d9870bb6451504c13570b81923228f
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Feb 16 01:27:27 2007 -0800
[PATCH] Uninline jiffies.h functions
There are loads of fat functions hidden in jiffies.h. Uninline them. No code
changes.
[jeremy@goop.org: export fix]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: john stultz <johnstul@us.ibm.com>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 0ec6e28bccd2..9243cefa4512 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -259,207 +259,23 @@ static inline u64 get_jiffies_64(void)
#endif
/*
- * Convert jiffies to milliseconds and back.
- *
- * Avoid unnecessary multiplications/divisions in the
- * two most common HZ cases:
- */
-static inline unsigned int jiffies_to_msecs(const unsigned long j)
-{
-#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
- return (MSEC_PER_SEC / HZ) * j;
-#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
- return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
-#else
- return (j * MSEC_PER_SEC) / HZ;
-#endif
-}
-
-static inline unsigned int jiffies_to_usecs(const unsigned long j)
-{
-#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
- return (USEC_PER_SEC / HZ) * j;
-#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
- return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC);
-#else
- return (j * USEC_PER_SEC) / HZ;
-#endif
-}
-
-static inline unsigned long msecs_to_jiffies(const unsigned int m)
-{
- if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
- return MAX_JIFFY_OFFSET;
-#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
- return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
-#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
- return m * (HZ / MSEC_PER_SEC);
-#else
- return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC;
-#endif
-}
-
-static inline unsigned long usecs_to_jiffies(const unsigned int u)
-{
- if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
- return MAX_JIFFY_OFFSET;
-#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
- return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ);
-#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
- return u * (HZ / USEC_PER_SEC);
-#else
- return (u * HZ + USEC_PER_SEC - 1) / USEC_PER_SEC;
-#endif
-}
-
-/*
- * The TICK_NSEC - 1 rounds up the value to the next resolution. Note
- * that a remainder subtract here would not do the right thing as the
- * resolution values don't fall on second boundries. I.e. the line:
- * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding.
- *
- * Rather, we just shift the bits off the right.
- *
- * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec
- * value to a scaled second value.
- */
-static __inline__ unsigned long
-timespec_to_jiffies(const struct timespec *value)
-{
- unsigned long sec = value->tv_sec;
- long nsec = value->tv_nsec + TICK_NSEC - 1;
-
- if (sec >= MAX_SEC_IN_JIFFIES){
- sec = MAX_SEC_IN_JIFFIES;
- nsec = 0;
- }
- return (((u64)sec * SEC_CONVERSION) +
- (((u64)nsec * NSEC_CONVERSION) >>
- (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
-
-}
-
-static __inline__ void
-jiffies_to_timespec(const unsigned long jiffies, struct timespec *value)
-{
- /*
- * Convert jiffies to nanoseconds and separate with
- * one divide.
- */
- u64 nsec = (u64)jiffies * TICK_NSEC;
- value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_nsec);
-}
-
-/* Same for "timeval"
- *
- * Well, almost. The problem here is that the real system resolution is
- * in nanoseconds and the value being converted is in micro seconds.
- * Also for some machines (those that use HZ = 1024, in-particular),
- * there is a LARGE error in the tick size in microseconds.
-
- * The solution we use is to do the rounding AFTER we convert the
- * microsecond part. Thus the USEC_ROUND, the bits to be shifted off.
- * Instruction wise, this should cost only an additional add with carry
- * instruction above the way it was done above.
- */
-static __inline__ unsigned long
-timeval_to_jiffies(const struct timeval *value)
-{
- unsigned long sec = value->tv_sec;
- long usec = value->tv_usec;
-
- if (sec >= MAX_SEC_IN_JIFFIES){
- sec = MAX_SEC_IN_JIFFIES;
- usec = 0;
- }
- return (((u64)sec * SEC_CONVERSION) +
- (((u64)usec * USEC_CONVERSION + USEC_ROUND) >>
- (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
-}
-
-static __inline__ void
-jiffies_to_timeval(const unsigned long jiffies, struct timeval *value)
-{
- /*
- * Convert jiffies to nanoseconds and separate with
- * one divide.
- */
- u64 nsec = (u64)jiffies * TICK_NSEC;
- long tv_usec;
-
- value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &tv_usec);
- tv_usec /= NSEC_PER_USEC;
- value->tv_usec = tv_usec;
-}
-
-/*
- * Convert jiffies/jiffies_64 to clock_t and back.
+ * Convert various time units to each other:
*/
-static inline clock_t jiffies_to_clock_t(long x)
-{
-#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
- return x / (HZ / USER_HZ);
-#else
- u64 tmp = (u64)x * TICK_NSEC;
- do_div(tmp, (NSEC_PER_SEC / USER_HZ));
- return (long)tmp;
-#endif
-}
-
-static inline unsigned long clock_t_to_jiffies(unsigned long x)
-{
-#if (HZ % USER_HZ)==0
- if (x >= ~0UL / (HZ / USER_HZ))
- return ~0UL;
- return x * (HZ / USER_HZ);
-#else
- u64 jif;
-
- /* Don't worry about loss of precision here .. */
- if (x >= ~0UL / HZ * USER_HZ)
- return ~0UL;
-
- /* .. but do try to contain it here */
- jif = x * (u64) HZ;
- do_div(jif, USER_HZ);
- return jif;
-#endif
-}
-
-static inline u64 jiffies_64_to_clock_t(u64 x)
-{
-#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
- do_div(x, HZ / USER_HZ);
-#else
- /*
- * There are better ways that don't overflow early,
- * but even this doesn't overflow in hundreds of years
- * in 64 bits, so..
- */
- x *= TICK_NSEC;
- do_div(x, (NSEC_PER_SEC / USER_HZ));
-#endif
- return x;
-}
-
-static inline u64 nsec_to_clock_t(u64 x)
-{
-#if (NSEC_PER_SEC % USER_HZ) == 0
- do_div(x, (NSEC_PER_SEC / USER_HZ));
-#elif (USER_HZ % 512) == 0
- x *= USER_HZ/512;
- do_div(x, (NSEC_PER_SEC / 512));
-#else
- /*
- * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024,
- * overflow after 64.99 years.
- * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ...
- */
- x *= 9;
- do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (USER_HZ/2))
- / USER_HZ));
-#endif
- return x;
-}
+extern unsigned int jiffies_to_msecs(const unsigned long j);
+extern unsigned int jiffies_to_usecs(const unsigned long j);
+extern unsigned long msecs_to_jiffies(const unsigned int m);
+extern unsigned long usecs_to_jiffies(const unsigned int u);
+extern unsigned long timespec_to_jiffies(const struct timespec *value);
+extern void jiffies_to_timespec(const unsigned long jiffies,
+ struct timespec *value);
+extern unsigned long timeval_to_jiffies(const struct timeval *value);
+extern void jiffies_to_timeval(const unsigned long jiffies,
+ struct timeval *value);
+extern clock_t jiffies_to_clock_t(long x);
+extern unsigned long clock_t_to_jiffies(unsigned long x);
+extern u64 jiffies_64_to_clock_t(u64 x);
+extern u64 nsec_to_clock_t(u64 x);
+
+#define TIMESTAMP_SIZE 30
#endif
diff --git a/kernel/time.c b/kernel/time.c
index 0e017bff4c19..4a8657171584 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -470,6 +470,219 @@ struct timeval ns_to_timeval(const s64 nsec)
return tv;
}
+/*
+ * Convert jiffies to milliseconds and back.
+ *
+ * Avoid unnecessary multiplications/divisions in the
+ * two most common HZ cases:
+ */
+unsigned int jiffies_to_msecs(const unsigned long j)
+{
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+ return (MSEC_PER_SEC / HZ) * j;
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+ return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
+#else
+ return (j * MSEC_PER_SEC) / HZ;
+#endif
+}
+EXPORT_SYMBOL(jiffies_to_msecs);
+
+unsigned int jiffies_to_usecs(const unsigned long j)
+{
+#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
+ return (USEC_PER_SEC / HZ) * j;
+#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
+ return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC);
+#else
+ return (j * USEC_PER_SEC) / HZ;
+#endif
+}
+EXPORT_SYMBOL(jiffies_to_usecs);
+
+unsigned long msecs_to_jiffies(const unsigned int m)
+{
+ if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+ return MAX_JIFFY_OFFSET;
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+ return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ);
+#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC)
+ return m * (HZ / MSEC_PER_SEC);
+#else
+ return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC;
+#endif
+}
+EXPORT_SYMBOL(msecs_to_jiffies);
+
+unsigned long usecs_to_jiffies(const unsigned int u)
+{
+ if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET))
+ return MAX_JIFFY_OFFSET;
+#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ)
+ return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ);
+#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC)
+ return u * (HZ / USEC_PER_SEC);
+#else
+ return (u * HZ + USEC_PER_SEC - 1) / USEC_PER_SEC;
+#endif
+}
+EXPORT_SYMBOL(usecs_to_jiffies);
+
+/*
+ * The TICK_NSEC - 1 rounds up the value to the next resolution. Note
+ * that a remainder subtract here would not do the right thing as the
+ * resolution values don't fall on second boundries. I.e. the line:
+ * nsec -= nsec % TICK_NSEC; is NOT a correct resolution rounding.
+ *
+ * Rather, we just shift the bits off the right.
+ *
+ * The >> (NSEC_JIFFIE_SC - SEC_JIFFIE_SC) converts the scaled nsec
+ * value to a scaled second value.
+ */
+unsigned long
+timespec_to_jiffies(const struct timespec *value)
+{
+ unsigned long sec = value->tv_sec;
+ long nsec = value->tv_nsec + TICK_NSEC - 1;
+
+ if (sec >= MAX_SEC_IN_JIFFIES){
+ sec = MAX_SEC_IN_JIFFIES;
+ nsec = 0;
+ }
+ return (((u64)sec * SEC_CONVERSION) +
+ (((u64)nsec * NSEC_CONVERSION) >>
+ (NSEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
+
+}
+EXPORT_SYMBOL(timespec_to_jiffies);
+
+void
+jiffies_to_timespec(const unsigned long jiffies, struct timespec *value)
+{
+ /*
+ * Convert jiffies to nanoseconds and separate with
+ * one divide.
+ */
+ u64 nsec = (u64)jiffies * TICK_NSEC;
+ value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_nsec);
+}
+EXPORT_SYMBOL(jiffies_to_timespec);
+
+/* Same for "timeval"
+ *
+ * Well, almost. The problem here is that the real system resolution is
+ * in nanoseconds and the value being converted is in micro seconds.
+ * Also for some machines (those that use HZ = 1024, in-particular),
+ * there is a LARGE error in the tick size in microseconds.
+
+ * The solution we use is to do the rounding AFTER we convert the
+ * microsecond part. Thus the USEC_ROUND, the bits to be shifted off.
+ * Instruction wise, this should cost only an additional add with carry
+ * instruction above the way it was done above.
+ */
+unsigned long
+timeval_to_jiffies(const struct timeval *value)
+{
+ unsigned long sec = value->tv_sec;
+ long usec = value->tv_usec;
+
+ if (sec >= MAX_SEC_IN_JIFFIES){
+ sec = MAX_SEC_IN_JIFFIES;
+ usec = 0;
+ }
+ return (((u64)sec * SEC_CONVERSION) +
+ (((u64)usec * USEC_CONVERSION + USEC_ROUND) >>
+ (USEC_JIFFIE_SC - SEC_JIFFIE_SC))) >> SEC_JIFFIE_SC;
+}
+
+void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value)
+{
+ /*
+ * Convert jiffies to nanoseconds and separate with
+ * one divide.
+ */
+ u64 nsec = (u64)jiffies * TICK_NSEC;
+ long tv_usec;
+
+ value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &tv_usec);
+ tv_usec /= NSEC_PER_USEC;
+ value->tv_usec = tv_usec;
+}
+
+/*
+ * Convert jiffies/jiffies_64 to clock_t and back.
+ */
+clock_t jiffies_to_clock_t(long x)
+{
+#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
+ return x / (HZ / USER_HZ);
+#else
+ u64 tmp = (u64)x * TICK_NSEC;
+ do_div(tmp, (NSEC_PER_SEC / USER_HZ));
+ return (long)tmp;
+#endif
+}
+EXPORT_SYMBOL(jiffies_to_clock_t);
+
+unsigned long clock_t_to_jiffies(unsigned long x)
+{
+#if (HZ % USER_HZ)==0
+ if (x >= ~0UL / (HZ / USER_HZ))
+ return ~0UL;
+ return x * (HZ / USER_HZ);
+#else
+ u64 jif;
+
+ /* Don't worry about loss of precision here .. */
+ if (x >= ~0UL / HZ * USER_HZ)
+ return ~0UL;
+
+ /* .. but do try to contain it here */
+ jif = x * (u64) HZ;
+ do_div(jif, USER_HZ);
+ return jif;
+#endif
+}
+EXPORT_SYMBOL(clock_t_to_jiffies);
+
+u64 jiffies_64_to_clock_t(u64 x)
+{
+#if (TICK_NSEC % (NSEC_PER_SEC / USER_HZ)) == 0
+ do_div(x, HZ / USER_HZ);
+#else
+ /*
+ * There are better ways that don't overflow early,
+ * but even this doesn't overflow in hundreds of years
+ * in 64 bits, so..
+ */
+ x *= TICK_NSEC;
+ do_div(x, (NSEC_PER_SEC / USER_HZ));
+#endif
+ return x;
+}
+
+EXPORT_SYMBOL(jiffies_64_to_clock_t);
+
+u64 nsec_to_clock_t(u64 x)
+{
+#if (NSEC_PER_SEC % USER_HZ) == 0
+ do_div(x, (NSEC_PER_SEC / USER_HZ));
+#elif (USER_HZ % 512) == 0
+ x *= USER_HZ/512;
+ do_div(x, (NSEC_PER_SEC / 512));
+#else
+ /*
+ * max relative error 5.7e-8 (1.8s per year) for USER_HZ <= 1024,
+ * overflow after 64.99 years.
+ * exact for HZ=60, 72, 90, 120, 144, 180, 300, 600, 900, ...
+ */
+ x *= 9;
+ do_div(x, (unsigned long)((9ull * NSEC_PER_SEC + (USER_HZ/2)) /
+ USER_HZ));
+#endif
+ return x;
+}
+
#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void)
{
commit 5d0e600d903caa09e790824cc5812f0d97113b23
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Feb 13 13:26:24 2007 +0100
[PATCH] x86: fix laptop bootup hang in init_acpi()
During kernel bootup, a new T60 laptop (CoreDuo, 32-bit) hangs about
10%-20% of the time in acpi_init():
Calling initcall 0xc055ce1a: topology_init+0x0/0x2f()
Calling initcall 0xc055d75e: mtrr_init_finialize+0x0/0x2c()
Calling initcall 0xc05664f3: param_sysfs_init+0x0/0x175()
Calling initcall 0xc014cb65: pm_sysrq_init+0x0/0x17()
Calling initcall 0xc0569f99: init_bio+0x0/0xf4()
Calling initcall 0xc056b865: genhd_device_init+0x0/0x50()
Calling initcall 0xc056c4bd: fbmem_init+0x0/0x87()
Calling initcall 0xc056dd74: acpi_init+0x0/0x1ee()
It's a hard hang that not even an NMI could punch through! Frustratingly,
adding printks or function tracing to the ACPI code made the hangs go away
...
After some time an additional detail emerged: disabling the NMI watchdog
made these occasional hangs go away.
So i spent the better part of today trying to debug this and trying out
various theories when i finally found the likely reason for the hang: if
acpi_ns_initialize_devices() executes an _INI AML method and an NMI
happens to hit that AML execution in the wrong moment, the machine would
hang. (my theory is that this must be some sort of chipset setup method
doing stores to chipset mmio registers?)
Unfortunately given the characteristics of the hang it was sheer
impossible to figure out which of the numerous AML methods is impacted
by this problem.
As a workaround i wrote an interface to disable chipset-based NMIs while
executing _INI sections - and indeed this fixed the hang. I did a
boot-loop of 100 separate reboots and none hung - while without the patch
it would hang every 5-10 attempts. Out of caution i did not touch the
nmi_watchdog=2 case (it's not related to the chipset anyway and didnt
hang).
I implemented this for both x86_64 and i686, tested the i686 laptop both
with nmi_watchdog=1 [which triggered the hangs] and nmi_watchdog=2, and
tested an Athlon64 box with the 64-bit kernel as well. Everything builds
and works with the patch applied.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@suse.de>
Cc: Len Brown <lenb@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 7d3f4e22d6fb..b11abacc5cfd 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -383,6 +383,34 @@ void enable_timer_nmi_watchdog(void)
}
}
+static void __acpi_nmi_disable(void *__unused)
+{
+ apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
+}
+
+/*
+ * Disable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_disable(void)
+{
+ if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+ on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
+}
+
+static void __acpi_nmi_enable(void *__unused)
+{
+ apic_write_around(APIC_LVT0, APIC_DM_NMI);
+}
+
+/*
+ * Enable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_enable(void)
+{
+ if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+ on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
+}
+
#ifdef CONFIG_PM
static int nmi_pm_active; /* nmi_active before suspend */
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index e59cda134166..269fe585e71e 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -368,6 +368,33 @@ void enable_timer_nmi_watchdog(void)
}
}
+static void __acpi_nmi_disable(void *__unused)
+{
+ apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
+}
+
+/*
+ * Disable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_disable(void)
+{
+ if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+ on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
+}
+
+static void __acpi_nmi_enable(void *__unused)
+{
+ apic_write(APIC_LVT0, APIC_DM_NMI);
+}
+
+/*
+ * Enable timer based NMIs on all CPUs:
+ */
+void acpi_nmi_enable(void)
+{
+ if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+ on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
+}
#ifdef CONFIG_PM
static int nmi_pm_active; /* nmi_active before suspend */
diff --git a/drivers/acpi/namespace/nsinit.c b/drivers/acpi/namespace/nsinit.c
index 326af8fc0ce7..33db2241044e 100644
--- a/drivers/acpi/namespace/nsinit.c
+++ b/drivers/acpi/namespace/nsinit.c
@@ -45,6 +45,7 @@
#include <acpi/acnamesp.h>
#include <acpi/acdispat.h>
#include <acpi/acinterp.h>
+#include <linux/nmi.h>
#define _COMPONENT ACPI_NAMESPACE
ACPI_MODULE_NAME("nsinit")
@@ -534,7 +535,15 @@ acpi_ns_init_one_device(acpi_handle obj_handle,
info->parameter_type = ACPI_PARAM_ARGS;
info->flags = ACPI_IGNORE_RETURN_VALUE;
+ /*
+ * Some hardware relies on this being executed as atomically
+ * as possible (without an NMI being received in the middle of
+ * this) - so disable NMIs and initialize the device:
+ */
+ acpi_nmi_disable();
status = acpi_ns_evaluate(info);
+ acpi_nmi_enable();
+
if (ACPI_SUCCESS(status)) {
walk_info->num_INI++;
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index acb4ed130247..29af2d5df097 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -17,8 +17,15 @@
#ifdef ARCH_HAS_NMI_WATCHDOG
#include <asm/nmi.h>
extern void touch_nmi_watchdog(void);
+extern void acpi_nmi_disable(void);
+extern void acpi_nmi_enable(void);
#else
-# define touch_nmi_watchdog() touch_softlockup_watchdog()
+static inline void touch_nmi_watchdog(void)
+{
+ touch_softlockup_watchdog();
+}
+static inline void acpi_nmi_disable(void) { }
+static inline void acpi_nmi_enable(void) { }
#endif
#ifndef trigger_all_cpu_backtrace
commit f9690982b8c2f9a2c65acdc113e758ec356676a3
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Feb 13 13:26:22 2007 +0100
[PATCH] i386: improve sched_clock() on i686
Clean up sched_clock() on i686: it will use the TSC if available and falls
back to jiffies only if the user asked for it to be disabled via notsc or
the CPU calibration code didnt figure out the right cpu_khz.
This generally makes the scheduler timestamps more finegrained, on all
hardware. (the current scheduler is pretty resistant against asynchronous
sched_clock() values on different CPUs, it will allow at most up to a jiffy
of jitter.)
Also simplify sched_clock()'s check for TSC availability: propagate the
desire and ability to use the TSC into the tsc_disable flag, previously
this flag only indicated whether the notsc option was passed. This makes
the rare low-res sched_clock() codepath a single branch off a read-mostly
flag.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Andi Kleen <ak@suse.de>
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 12fef14995a5..46f752a8bbf3 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -112,13 +112,10 @@ unsigned long long sched_clock(void)
return (*custom_sched_clock)();
/*
- * in the NUMA case we dont use the TSC as they are not
- * synchronized across all CPUs.
+ * Fall back to jiffies if there's no TSC available:
*/
-#ifndef CONFIG_NUMA
- if (!cpu_khz || check_tsc_unstable())
-#endif
- /* no locking but a rare wrong value is not a big deal */
+ if (unlikely(tsc_disable))
+ /* No locking but a rare wrong value is not a big deal: */
return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
/* read the Time Stamp Counter: */
@@ -198,13 +195,13 @@ EXPORT_SYMBOL(recalibrate_cpu_khz);
void __init tsc_init(void)
{
if (!cpu_has_tsc || tsc_disable)
- return;
+ goto out_no_tsc;
cpu_khz = calculate_cpu_khz();
tsc_khz = cpu_khz;
if (!cpu_khz)
- return;
+ goto out_no_tsc;
printk("Detected %lu.%03lu MHz processor.\n",
(unsigned long)cpu_khz / 1000,
@@ -212,6 +209,15 @@ void __init tsc_init(void)
set_cyc2ns_scale(cpu_khz);
use_tsc_delay();
+ return;
+
+out_no_tsc:
+ /*
+ * Set the tsc_disable flag if there's no TSC support, this
+ * makes it a fast flag for the kernel to see whether it
+ * should be using the TSC.
+ */
+ tsc_disable = 1;
}
#ifdef CONFIG_CPU_FREQ
diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h
index 38f1aebbbdb5..c90c7c499302 100644
--- a/include/asm-i386/bugs.h
+++ b/include/asm-i386/bugs.h
@@ -160,7 +160,7 @@ static void __init check_config(void)
* If we configured ourselves for a TSC, we'd better have one!
*/
#ifdef CONFIG_X86_TSC
- if (!cpu_has_tsc)
+ if (!cpu_has_tsc && !tsc_disable)
panic("Kernel compiled for Pentium+, requires TSC feature!");
#endif
commit 1e8ba6fba5050ec11bba90c8622aa2ed95ff711f
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Feb 12 00:54:42 2007 -0800
[PATCH] kvm: fix vcpu freeing bug
vcpu_load() can return NULL and it sometimes does in failure paths (for
example when the userspace ABI version is too old) - causing a preemption
count underflow in the ->vcpu_free() later on. So check for NULL.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Avi Kivity <avi@qumranet.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index eb3931ca680a..9b79d3451f6f 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -272,7 +272,9 @@ static void kvm_free_physmem(struct kvm *kvm)
static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
{
- vcpu_load(vcpu->kvm, vcpu_slot(vcpu));
+ if (!vcpu_load(vcpu->kvm, vcpu_slot(vcpu)))
+ return;
+
kvm_mmu_destroy(vcpu);
vcpu_put(vcpu);
kvm_arch_ops->vcpu_free(vcpu);
commit 96958231cea5985e32db2ae1125ec20483e3556b
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Feb 12 00:54:33 2007 -0800
[PATCH] kvm: optimize inline assembly
Forms like "0(%rsp)" generate an instruction with an unnecessary one byte
displacement under certain circumstances. replace with the equivalent
"(%rsp)".
Signed-off-by: Avi Kivity <avi@qumranet.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 27e05a77e21a..5cc1a6ec0174 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1786,10 +1786,10 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
"kvm_vmx_return: "
/* Save guest registers, load host registers, keep flags */
#ifdef CONFIG_X86_64
- "xchg %3, 0(%%rsp) \n\t"
+ "xchg %3, (%%rsp) \n\t"
"mov %%rax, %c[rax](%3) \n\t"
"mov %%rbx, %c[rbx](%3) \n\t"
- "pushq 0(%%rsp); popq %c[rcx](%3) \n\t"
+ "pushq (%%rsp); popq %c[rcx](%3) \n\t"
"mov %%rdx, %c[rdx](%3) \n\t"
"mov %%rsi, %c[rsi](%3) \n\t"
"mov %%rdi, %c[rdi](%3) \n\t"
@@ -1804,24 +1804,24 @@ static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
"mov %%r15, %c[r15](%3) \n\t"
"mov %%cr2, %%rax \n\t"
"mov %%rax, %c[cr2](%3) \n\t"
- "mov 0(%%rsp), %3 \n\t"
+ "mov (%%rsp), %3 \n\t"
"pop %%rcx; pop %%r15; pop %%r14; pop %%r13; pop %%r12;"
"pop %%r11; pop %%r10; pop %%r9; pop %%r8;"
"pop %%rbp; pop %%rdi; pop %%rsi;"
"pop %%rdx; pop %%rbx; pop %%rax \n\t"
#else
- "xchg %3, 0(%%esp) \n\t"
+ "xchg %3, (%%esp) \n\t"
"mov %%eax, %c[rax](%3) \n\t"
"mov %%ebx, %c[rbx](%3) \n\t"
- "pushl 0(%%esp); popl %c[rcx](%3) \n\t"
+ "pushl (%%esp); popl %c[rcx](%3) \n\t"
"mov %%edx, %c[rdx](%3) \n\t"
"mov %%esi, %c[rsi](%3) \n\t"
"mov %%edi, %c[rdi](%3) \n\t"
"mov %%ebp, %c[rbp](%3) \n\t"
"mov %%cr2, %%eax \n\t"
"mov %%eax, %c[cr2](%3) \n\t"
- "mov 0(%%esp), %3 \n\t"
+ "mov (%%esp), %3 \n\t"
"pop %%ecx; popa \n\t"
#endif
commit 944be0b224724fcbf63c3a3fe3a5478c325a6547
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Feb 12 00:52:26 2007 -0800
[PATCH] close_files(): add scheduling point
close_files() can sometimes take long enough to trigger the soft lockup
detector.
Cc: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/kernel/exit.c b/kernel/exit.c
index bc71fdfcd8a7..14f17033f563 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -430,8 +430,10 @@ static void close_files(struct files_struct * files)
while (set) {
if (set & 1) {
struct file * file = xchg(&fdt->fd[i], NULL);
- if (file)
+ if (file) {
filp_close(file, files);
+ cond_resched();
+ }
}
i++;
set >>= 1;
commit 656dad312fb41ed95ef08325e9df9bece3aacbbb
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Feb 10 01:46:36 2007 -0800
[PATCH] highmem: catch illegal nesting
Catch illegally nested kmap_atomic()s even if the page that is mapped by
the 'inner' instance is from lowmem.
This avoids spuriously zapped kmap-atomic ptes and turns hard to find
crashes into clear asserts at the bug site.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c
index e0fa6cb655a8..bb2de1089add 100644
--- a/arch/i386/mm/highmem.c
+++ b/arch/i386/mm/highmem.c
@@ -33,13 +33,14 @@ void *kmap_atomic(struct page *page, enum km_type type)
/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
pagefault_disable();
+
+ idx = type + KM_TYPE_NR*smp_processor_id();
+ BUG_ON(!pte_none(*(kmap_pte-idx)));
+
if (!PageHighMem(page))
return page_address(page);
- idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
- if (!pte_none(*(kmap_pte-idx)))
- BUG();
set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
return (void*) vaddr;
commit 11f57cedcf382574a1e41d6cec2349f287fcea67
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Feb 10 01:46:09 2007 -0800
[PATCH] audit: fix audit_filter_user_rules() initialization bug
gcc emits this warning:
kernel/auditfilter.c: In function 'audit_filter_user':
kernel/auditfilter.c:1611: warning: 'state' is used uninitialized in this function
I tend to agree with gcc - there are a couple of plausible exit paths from
audit_filter_user_rules() where it does not set 'state', keeping the
variable uninitialized. For example if a filter rule has an AUDIT_POSSIBLE
action. Initialize to 'wont audit'. Fix whitespace damage too.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 9c8c23227c7f..87865f8b4ce3 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1601,8 +1601,8 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb,
int audit_filter_user(struct netlink_skb_parms *cb, int type)
{
+ enum audit_state state = AUDIT_DISABLED;
struct audit_entry *e;
- enum audit_state state;
int ret = 1;
rcu_read_lock();
commit 898552c9d807fe59f3ecaf9c300c109358375c12
Author: Ingo Molnar <mingo@elte.hu>
Date: Sat Feb 10 01:44:57 2007 -0800
[PATCH] lockdep: also check for freed locks in kmem_cache_free()
kmem_cache_free() was missing the check for freeing held locks.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/slab.c b/mm/slab.c
index 348396d691a1..196df70eb8cb 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3751,6 +3751,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
BUG_ON(virt_to_cache(objp) != cachep);
local_irq_save(flags);
+ debug_check_no_locks_freed(objp, obj_size(cachep));
__cache_free(cachep, objp);
local_irq_restore(flags);
}