Patches contributed by Eötvös Lorand University
commit 7c329288d72e025db4feac65f0fed95fb3e3ef1c
Author: Ingo Molnar <mingo@elte.hu>
Date: Wed Sep 23 09:52:18 2009 +1000
vgaarb: make client interface config invariant.
Fixes build when VGA_ARB is off.
Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index e81c64af80c1..923f9040ea20 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -41,7 +41,7 @@
* interrupts at any time.
*/
extern void vga_set_legacy_decoding(struct pci_dev *pdev,
- unsigned int decodes);
+ unsigned int decodes);
/**
* vga_get - acquire & locks VGA resources
@@ -193,8 +193,17 @@ static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2)
* They driver will get a callback when VGA arbitration is first used
* by userspace since we some older X servers have issues.
*/
+#if defined(CONFIG_VGA_ARB)
int vga_client_register(struct pci_dev *pdev, void *cookie,
void (*irq_set_state)(void *cookie, bool state),
unsigned int (*set_vga_decode)(void *cookie, bool state));
+#else
+static inline int vga_client_register(struct pci_dev *pdev, void *cookie,
+ void (*irq_set_state)(void *cookie, bool state),
+ unsigned int (*set_vga_decode)(void *cookie, bool state))
+{
+ return 0;
+}
+#endif
#endif /* LINUX_VGA_H */
commit b1912a85b54c27738afe1c4fa069df02d3316f0c
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Sep 21 15:23:45 2009 -0300
media: video: Fix build in saa7164
-tip testing found that the x86 build (64-bit allyesconfig) fails due to:
LD vmlinux.o
drivers/built-in.o:(.bss+0x4b648): multiple definition of `debug'
arch/x86/built-in.o:(.kprobes.text+0x88): first defined here
ld: Warning: size of symbol `debug' changed from 90 in
arch/x86/built-in.o to 4 in drivers/built-in.o
make: *** [vmlinux.o] Error 1
This is because recent saa7164 changes introduced a global symbol
named 'debug'. The x86 platform code already defines a 'debug'
symbol. (which is named in a too generic way as well - but it
can be used nicely to weed out too generic symbols in drivers ;-)
Rename it to saa_debug.
[mchehab@redhat.com: use module_param_named to preserve old name]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
diff --git a/drivers/media/video/saa7164/saa7164-api.c b/drivers/media/video/saa7164/saa7164-api.c
index bb6df1b276be..6f094a96ac81 100644
--- a/drivers/media/video/saa7164/saa7164-api.c
+++ b/drivers/media/video/saa7164/saa7164-api.c
@@ -415,7 +415,7 @@ int saa7164_api_enum_subdevs(struct saa7164_dev *dev)
goto out;
}
- if (debug & DBGLVL_API)
+ if (saa_debug & DBGLVL_API)
saa7164_dumphex16(dev, buf, (buflen/16)*16);
saa7164_api_dump_subdevs(dev, buf, buflen);
@@ -480,7 +480,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg,
dprintk(DBGLVL_API, "%s() len = %d bytes\n", __func__, len);
- if (debug & DBGLVL_I2C)
+ if (saa_debug & DBGLVL_I2C)
saa7164_dumphex16(dev, buf, 2 * 16);
ret = saa7164_cmd_send(bus->dev, unitid, GET_CUR,
@@ -488,7 +488,7 @@ int saa7164_api_i2c_read(struct saa7164_i2c *bus, u8 addr, u32 reglen, u8 *reg,
if (ret != SAA_OK)
printk(KERN_ERR "%s() error, ret(2) = 0x%x\n", __func__, ret);
else {
- if (debug & DBGLVL_I2C)
+ if (saa_debug & DBGLVL_I2C)
saa7164_dumphex16(dev, buf, sizeof(buf));
memcpy(data, (buf + 2 * sizeof(u32) + reglen), datalen);
}
@@ -548,7 +548,7 @@ int saa7164_api_i2c_write(struct saa7164_i2c *bus, u8 addr, u32 datalen,
*((u32 *)(buf + 1 * sizeof(u32))) = datalen - reglen;
memcpy((buf + 2 * sizeof(u32)), data, datalen);
- if (debug & DBGLVL_I2C)
+ if (saa_debug & DBGLVL_I2C)
saa7164_dumphex16(dev, buf, sizeof(buf));
ret = saa7164_cmd_send(bus->dev, unitid, SET_CUR,
diff --git a/drivers/media/video/saa7164/saa7164-cmd.c b/drivers/media/video/saa7164/saa7164-cmd.c
index e097f1a0969a..c45966edc0cf 100644
--- a/drivers/media/video/saa7164/saa7164-cmd.c
+++ b/drivers/media/video/saa7164/saa7164-cmd.c
@@ -250,7 +250,7 @@ int saa7164_cmd_wait(struct saa7164_dev *dev, u8 seqno)
unsigned long stamp;
int r;
- if (debug >= 4)
+ if (saa_debug >= 4)
saa7164_bus_dump(dev);
dprintk(DBGLVL_CMD, "%s(seqno=%d)\n", __func__, seqno);
diff --git a/drivers/media/video/saa7164/saa7164-core.c b/drivers/media/video/saa7164/saa7164-core.c
index f0dbead188c8..709affc31042 100644
--- a/drivers/media/video/saa7164/saa7164-core.c
+++ b/drivers/media/video/saa7164/saa7164-core.c
@@ -45,8 +45,8 @@ MODULE_LICENSE("GPL");
32 bus
*/
-unsigned int debug;
-module_param(debug, int, 0644);
+unsigned int saa_debug;
+module_param_named(debug, saa_debug, int, 0644);
MODULE_PARM_DESC(debug, "enable debug messages");
unsigned int waitsecs = 10;
@@ -653,7 +653,7 @@ static int __devinit saa7164_initdev(struct pci_dev *pci_dev,
printk(KERN_ERR "%s() Unsupported board detected, "
"registering without firmware\n", __func__);
- dprintk(1, "%s() parameter debug = %d\n", __func__, debug);
+ dprintk(1, "%s() parameter debug = %d\n", __func__, saa_debug);
dprintk(1, "%s() parameter waitsecs = %d\n", __func__, waitsecs);
fail_fw:
diff --git a/drivers/media/video/saa7164/saa7164.h b/drivers/media/video/saa7164/saa7164.h
index 6753008a9c9b..42660b546f0e 100644
--- a/drivers/media/video/saa7164/saa7164.h
+++ b/drivers/media/video/saa7164/saa7164.h
@@ -375,9 +375,9 @@ extern int saa7164_buffer_dealloc(struct saa7164_tsport *port,
/* ----------------------------------------------------------- */
-extern unsigned int debug;
+extern unsigned int saa_debug;
#define dprintk(level, fmt, arg...)\
- do { if (debug & level)\
+ do { if (saa_debug & level)\
printk(KERN_DEBUG "%s: " fmt, dev->name, ## arg);\
} while (0)
commit b417c9fd8690637f0c91479435ab3e2bf450c038
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Sep 22 15:50:24 2009 +0200
x86: mce: Fix thermal throttling message storm
If a system switches back and forth between hot and cold mode,
the MCE code will print a stream of critical kernel messages.
Extend the throttling code to properly notice this, by
only printing the first hot + cold transition and omitting
the rest up to CHECK_INTERVAL (5 minutes).
This way we'll only get a single incident of:
[ 102.356584] CPU0: Temperature above threshold, cpu clock throttled (total events = 1)
[ 102.357000] Disabling lock debugging due to kernel taint
[ 102.369223] CPU0: Temperature/speed normal
Every 5 minutes. The 'total events' count tells the number of cold/hot
transitions detected, should overheating occur after 5 minutes again:
[ 402.357580] CPU0: Temperature above threshold, cpu clock throttled (total events = 24891)
[ 402.358001] CPU0: Temperature/speed normal
[ 450.704142] Machine check events logged
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index db80b577f601..b3a1dba75330 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -42,6 +42,7 @@ struct thermal_state {
u64 next_check;
unsigned long throttle_count;
+ unsigned long last_throttle_count;
};
static DEFINE_PER_CPU(struct thermal_state, thermal_state);
@@ -120,11 +121,12 @@ static int therm_throt_process(bool is_throttled)
if (is_throttled)
state->throttle_count++;
- if (!(was_throttled ^ is_throttled) &&
- time_before64(now, state->next_check))
+ if (time_before64(now, state->next_check) &&
+ state->throttle_count != state->last_throttle_count)
return 0;
state->next_check = now + CHECK_INTERVAL;
+ state->last_throttle_count = state->throttle_count;
/* if we just entered the thermal event */
if (is_throttled) {
commit 3967684006f30c253bc6d4a6604d1bad4a7fc672
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Sep 22 15:50:24 2009 +0200
x86: mce: Clean up thermal throttling state tracking code
Instead of a mess of three separate percpu variables, consolidate
the state into a single structure.
Also clean up therm_throt_process(), use cleaner and more
understandable variable names and a clearer logic.
This, without changing the logic, makes the code more
streamlined, more readable and smaller as well:
text data bss dec hex filename
1487 169 4 1660 67c therm_throt.o.before
1432 176 4 1612 64c therm_throt.o.after
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 63a56d147e4a..db80b577f601 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -34,20 +34,30 @@
/* How long to wait between reporting thermal events */
#define CHECK_INTERVAL (300 * HZ)
-static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
-static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
-static DEFINE_PER_CPU(bool, thermal_throttle_active);
+/*
+ * Current thermal throttling state:
+ */
+struct thermal_state {
+ bool is_throttled;
+
+ u64 next_check;
+ unsigned long throttle_count;
+};
+
+static DEFINE_PER_CPU(struct thermal_state, thermal_state);
-static atomic_t therm_throt_en = ATOMIC_INIT(0);
+static atomic_t therm_throt_en = ATOMIC_INIT(0);
#ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \
static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
#define define_therm_throt_sysdev_show_func(name) \
-static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
- struct sysdev_attribute *attr, \
- char *buf) \
+ \
+static ssize_t therm_throt_sysdev_show_##name( \
+ struct sys_device *dev, \
+ struct sysdev_attribute *attr, \
+ char *buf) \
{ \
unsigned int cpu = dev->id; \
ssize_t ret; \
@@ -55,7 +65,7 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
preempt_disable(); /* CPU hotplug */ \
if (cpu_online(cpu)) \
ret = sprintf(buf, "%lu\n", \
- per_cpu(thermal_throttle_##name, cpu)); \
+ per_cpu(thermal_state, cpu).name); \
else \
ret = 0; \
preempt_enable(); \
@@ -63,11 +73,11 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
return ret; \
}
-define_therm_throt_sysdev_show_func(count);
-define_therm_throt_sysdev_one_ro(count);
+define_therm_throt_sysdev_show_func(throttle_count);
+define_therm_throt_sysdev_one_ro(throttle_count);
static struct attribute *thermal_throttle_attrs[] = {
- &attr_count.attr,
+ &attr_throttle_count.attr,
NULL
};
@@ -93,33 +103,38 @@ static struct attribute_group thermal_throttle_attr_group = {
* 1 : Event should be logged further, and a message has been
* printed to the syslog.
*/
-static int therm_throt_process(int curr)
+static int therm_throt_process(bool is_throttled)
{
- unsigned int cpu = smp_processor_id();
- __u64 tmp_jiffs = get_jiffies_64();
- bool was_throttled = __get_cpu_var(thermal_throttle_active);
- bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
+ struct thermal_state *state;
+ unsigned int this_cpu;
+ bool was_throttled;
+ u64 now;
+
+ this_cpu = smp_processor_id();
+ now = get_jiffies_64();
+ state = &per_cpu(thermal_state, this_cpu);
+
+ was_throttled = state->is_throttled;
+ state->is_throttled = is_throttled;
if (is_throttled)
- __get_cpu_var(thermal_throttle_count)++;
+ state->throttle_count++;
if (!(was_throttled ^ is_throttled) &&
- time_before64(tmp_jiffs, __get_cpu_var(next_check)))
+ time_before64(now, state->next_check))
return 0;
- __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
+ state->next_check = now + CHECK_INTERVAL;
/* if we just entered the thermal event */
if (is_throttled) {
- printk(KERN_CRIT "CPU%d: Temperature above threshold, "
- "cpu clock throttled (total events = %lu)\n",
- cpu, __get_cpu_var(thermal_throttle_count));
+ printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count);
add_taint(TAINT_MACHINE_CHECK);
return 1;
}
if (was_throttled) {
- printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+ printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu);
return 1;
}
@@ -213,7 +228,7 @@ static void intel_thermal_interrupt(void)
__u64 msr_val;
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
- if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
+ if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0))
mce_log_therm_throt_event(msr_val);
}
commit 3fff4c42bd0a89869a0eb1e7874cc06ffa4aa0f5
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Sep 22 16:18:09 2009 +0200
printk: Remove ratelimit.h from kernel.h
Decouple kernel.h from ratelimit.h: the global declaration of
printk's ratelimit_state is not needed, and it leads to messy
circular dependencies due to ratelimit.h's (new) adding of a
spinlock_types.h include.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David S. Miller <davem@davemloft.net>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b5b1e0899a8..3305f33201be 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -15,7 +15,6 @@
#include <linux/bitops.h>
#include <linux/log2.h>
#include <linux/typecheck.h>
-#include <linux/ratelimit.h>
#include <linux/dynamic_debug.h>
#include <asm/byteorder.h>
#include <asm/bug.h>
@@ -241,7 +240,6 @@ asmlinkage int vprintk(const char *fmt, va_list args)
asmlinkage int printk(const char * fmt, ...)
__attribute__ ((format (printf, 1, 2))) __cold;
-extern struct ratelimit_state printk_ratelimit_state;
extern int printk_ratelimit(void);
extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
unsigned int interval_msec);
diff --git a/include/linux/net.h b/include/linux/net.h
index 9040a10584f7..df20f680f455 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -358,6 +358,7 @@ static const struct proto_ops name##_ops = { \
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
+#include <linux/ratelimit.h>
extern struct ratelimit_state net_ratelimit_state;
#endif
diff --git a/kernel/printk.c b/kernel/printk.c
index 602033acd6c7..b997c893cdcf 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -33,6 +33,7 @@
#include <linux/bootmem.h>
#include <linux/syscalls.h>
#include <linux/kexec.h>
+#include <linux/ratelimit.h>
#include <asm/uaccess.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a631ba684a4..6c37048b9db9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -37,6 +37,7 @@
#include <linux/sysrq.h>
#include <linux/highuid.h>
#include <linux/writeback.h>
+#include <linux/ratelimit.h>
#include <linux/hugetlb.h>
#include <linux/initrd.h>
#include <linux/key.h>
@@ -155,6 +156,8 @@ extern int no_unaligned_warning;
extern int unaligned_dump_stack;
#endif
+extern struct ratelimit_state printk_ratelimit_state;
+
#ifdef CONFIG_RT_MUTEXES
extern int max_lock_depth;
#endif
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 69bfcacda16d..5551731ae1d4 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -9,7 +9,7 @@
* This file is released under the GPLv2.
*/
-#include <linux/kernel.h>
+#include <linux/ratelimit.h>
#include <linux/jiffies.h>
#include <linux/module.h>
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7db1de0497c6..887c03c4e3c6 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -10,7 +10,9 @@
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/netdevice.h>
+#include <linux/ratelimit.h>
#include <linux/init.h>
+
#include <net/ip.h>
#include <net/sock.h>
diff --git a/net/core/utils.c b/net/core/utils.c
index 83221aee7084..838250241d26 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -24,6 +24,8 @@
#include <linux/types.h>
#include <linux/percpu.h>
#include <linux/init.h>
+#include <linux/ratelimit.h>
+
#include <net/sock.h>
#include <asm/byteorder.h>
commit c7f7fea30b7e52c9d4b9cef271110a98d59adcbc
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Sep 22 14:53:51 2009 +0200
perf stat: Fix zero total printouts
Before:
0 sched:sched_switch # nan M/sec
After:
0 sched:sched_switch # 0.000 M/sec
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 16af2d82e858..e5f6ece65a13 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -338,14 +338,24 @@ static void nsec_printout(int counter, double avg)
static void abs_printout(int counter, double avg)
{
+ double total, ratio = 0.0;
+
fprintf(stderr, " %14.0f %-24s", avg, event_name(counter));
if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
- fprintf(stderr, " # %10.3f IPC ",
- avg / avg_stats(&runtime_cycles_stats));
+ total = avg_stats(&runtime_cycles_stats);
+
+ if (total)
+ ratio = avg / total;
+
+ fprintf(stderr, " # %10.3f IPC ", ratio);
} else {
- fprintf(stderr, " # %10.3f M/sec",
- 1000.0 * avg / avg_stats(&runtime_nsecs_stats));
+ total = avg_stats(&runtime_nsecs_stats);
+
+ if (total)
+ ratio = 1000.0 * avg / total;
+
+ fprintf(stderr, " # %10.3f M/sec", ratio);
}
}
commit edaac8e3167501cda336231d00611bf59c164346
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Sep 22 14:44:11 2009 +0200
ratelimit: Fix/allow use in atomic contexts
I'd like to use printk_ratelimit() in NMI context, but it's not
robust right now due to spinlock usage in lib/ratelimit.c. If an
NMI is unlucky enough to hit just that spot we might lock up trying
to take the spinlock again.
Fix that by using a trylock variant. If we contend on that lock we
can genuinely skip the message because the state is just being
accessed by another CPU (or by this CPU).
( We could use atomics for the suppressed messages field, but
i doubt it matters in practice and it makes the code heavier. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David S. Miller <davem@davemloft.net>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 0e2c28e8a0ca..69bfcacda16d 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -28,7 +28,15 @@ int __ratelimit(struct ratelimit_state *rs)
if (!rs->interval)
return 1;
- spin_lock_irqsave(&rs->lock, flags);
+ /*
+ * If we contend on this state's lock then almost
+ * by definition we are too busy to print a message,
+ * in addition to the one that will be printed by
+ * the entity that is holding the lock already:
+ */
+ if (!spin_trylock_irqsave(&rs->lock, flags))
+ return 1;
+
if (!rs->begin)
rs->begin = jiffies;
commit 979f693def9084a452846365dfde5dcb28366333
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Sep 22 14:44:11 2009 +0200
ratelimit: Use per ratelimit context locking
I'd like to use printk_ratelimit() in atomic context, but that's
not possible right now due to the spinlock usage this commit
introduced more than a year ago:
717115e: printk ratelimiting rewrite
As a first step push the lock into the ratelimit state structure.
This allows us to deal with locking failures to be considered as an
event related to that state being too busy.
Also clean up the code a bit (without changing functionality):
- tidy up the definitions
- clean up the code flow
This also shrinks the code a tiny bit:
text data bss dec hex filename
264 0 4 268 10c ratelimit.o.before
255 0 0 255 ff ratelimit.o.after
( Whole-kernel data size got a bit larger, because we have
two ratelimit-state data structures right now. )
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David S. Miller <davem@davemloft.net>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 00044b856453..187bc16c1f15 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -1,20 +1,30 @@
#ifndef _LINUX_RATELIMIT_H
#define _LINUX_RATELIMIT_H
+
#include <linux/param.h>
+#include <linux/spinlock_types.h>
-#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
-#define DEFAULT_RATELIMIT_BURST 10
+#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ)
+#define DEFAULT_RATELIMIT_BURST 10
struct ratelimit_state {
- int interval;
- int burst;
- int printed;
- int missed;
- unsigned long begin;
+ spinlock_t lock; /* protect the state */
+
+ int interval;
+ int burst;
+ int printed;
+ int missed;
+ unsigned long begin;
};
-#define DEFINE_RATELIMIT_STATE(name, interval, burst) \
- struct ratelimit_state name = {interval, burst,}
+#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) \
+ \
+ struct ratelimit_state name = { \
+ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
+ .interval = interval_init, \
+ .burst = burst_init, \
+ }
extern int __ratelimit(struct ratelimit_state *rs);
-#endif
+
+#endif /* _LINUX_RATELIMIT_H */
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index 26187edcc7ea..0e2c28e8a0ca 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -7,15 +7,12 @@
* parameter. Now every user can use their own standalone ratelimit_state.
*
* This file is released under the GPLv2.
- *
*/
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/module.h>
-static DEFINE_SPINLOCK(ratelimit_lock);
-
/*
* __ratelimit - rate limiting
* @rs: ratelimit_state data
@@ -26,11 +23,12 @@ static DEFINE_SPINLOCK(ratelimit_lock);
int __ratelimit(struct ratelimit_state *rs)
{
unsigned long flags;
+ int ret;
if (!rs->interval)
return 1;
- spin_lock_irqsave(&ratelimit_lock, flags);
+ spin_lock_irqsave(&rs->lock, flags);
if (!rs->begin)
rs->begin = jiffies;
@@ -38,20 +36,19 @@ int __ratelimit(struct ratelimit_state *rs)
if (rs->missed)
printk(KERN_WARNING "%s: %d callbacks suppressed\n",
__func__, rs->missed);
- rs->begin = 0;
+ rs->begin = 0;
rs->printed = 0;
- rs->missed = 0;
+ rs->missed = 0;
}
- if (rs->burst && rs->burst > rs->printed)
- goto print;
-
- rs->missed++;
- spin_unlock_irqrestore(&ratelimit_lock, flags);
- return 0;
+ if (rs->burst && rs->burst > rs->printed) {
+ rs->printed++;
+ ret = 1;
+ } else {
+ rs->missed++;
+ ret = 0;
+ }
+ spin_unlock_irqrestore(&rs->lock, flags);
-print:
- rs->printed++;
- spin_unlock_irqrestore(&ratelimit_lock, flags);
- return 1;
+ return ret;
}
EXPORT_SYMBOL(__ratelimit);
commit 388dba30471c236a290c4082bce5f2b5cd1a7a06
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Sep 21 08:56:58 2009 +0200
Driver-Core: fix devnode callbacks for dabusb and industrialio
The build of the dabusb driver broke:
drivers/media/video/dabusb.c:758: error: unknown field 'nodename' specified in initializer
drivers/media/video/dabusb.c:758: warning: initialization from incompatible pointer type
make[3]: *** wait: No child processes. Stop.
Due to this commit:
e454cea: Driver-Core: extend devnode callbacks to provide permissions
Missing the dabusb driver's dabusb_nodename() callback.
Similar issues with the iio/industrialio driver in staging, pointed out
and patched by Jean Delvare.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Industrialio-parts-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/drivers/media/video/dabusb.c b/drivers/media/video/dabusb.c
index 0664d111085f..ee43876adb06 100644
--- a/drivers/media/video/dabusb.c
+++ b/drivers/media/video/dabusb.c
@@ -748,14 +748,14 @@ static const struct file_operations dabusb_fops =
.release = dabusb_release,
};
-static char *dabusb_nodename(struct device *dev)
+static char *dabusb_devnode(struct device *dev, mode_t *mode)
{
return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
}
static struct usb_class_driver dabusb_class = {
.name = "dabusb%d",
- .nodename = dabusb_nodename,
+ .devnode = dabusb_devnode,
.fops = &dabusb_fops,
.minor_base = DABUSB_MINOR,
};
diff --git a/drivers/staging/iio/industrialio-core.c b/drivers/staging/iio/industrialio-core.c
index 660a9c1a1f3f..1fa18f255814 100644
--- a/drivers/staging/iio/industrialio-core.c
+++ b/drivers/staging/iio/industrialio-core.c
@@ -39,14 +39,14 @@ dev_t iio_devt;
EXPORT_SYMBOL(iio_devt);
#define IIO_DEV_MAX 256
-static char *iio_nodename(struct device *dev)
+static char *iio_devnode(struct device *dev, mode_t *mode)
{
return kasprintf(GFP_KERNEL, "iio/%s", dev_name(dev));
}
struct class iio_class = {
.name = "iio",
- .nodename = iio_nodename,
+ .devnode = iio_devnode,
};
EXPORT_SYMBOL(iio_class);
commit 57c0c15b5244320065374ad2c54f4fbec77a6428
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Sep 21 12:20:38 2009 +0200
perf: Tidy up after the big rename
- provide compatibility Kconfig entry for existing PERF_COUNTERS .config's
- provide courtesy copy of old perf_counter.h, for user-space projects
- small indentation fixups
- fix up MAINTAINERS
- fix small x86 printout fallout
- fix up small PowerPC comment fallout (use 'counter' as in register)
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/MAINTAINERS b/MAINTAINERS
index 43761a00e3f1..751a307dc44e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4000,7 +4000,7 @@ S: Maintained
F: include/linux/delayacct.h
F: kernel/delayacct.c
-PERFORMANCE COUNTER SUBSYSTEM
+PERFORMANCE EVENTS SUBSYSTEM
M: Peter Zijlstra <a.p.zijlstra@chello.nl>
M: Paul Mackerras <paulus@samba.org>
M: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 154f405b642f..7d8514ceceae 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -122,7 +122,7 @@ struct paca_struct {
u8 soft_enabled; /* irq soft-enable flag */
u8 hard_enabled; /* set if irqs are enabled in MSR */
u8 io_sync; /* writel() needs spin_unlock sync */
- u8 perf_event_pending; /* PM interrupt while soft-disabled */
+ u8 perf_event_pending; /* PM interrupt while soft-disabled */
/* Stuff for accurate time accounting */
u64 user_time; /* accumulated usermode TB ticks */
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index c98321fcb459..197b7d958796 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -41,7 +41,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
struct power_pmu *ppmu;
/*
- * Normally, to ignore kernel events we set the FCS (freeze events
+ * Normally, to ignore kernel events we set the FCS (freeze counters
* in supervisor mode) bit in MMCR0, but if the kernel runs with the
* hypervisor bit set in the MSR, or if we are running on a processor
* where the hypervisor bit is forced to 1 (as on Apple G5 processors),
@@ -159,7 +159,7 @@ void perf_event_print_debug(void)
}
/*
- * Read one performance monitor event (PMC).
+ * Read one performance monitor counter (PMC).
*/
static unsigned long read_pmc(int idx)
{
@@ -409,7 +409,7 @@ static void power_pmu_read(struct perf_event *event)
val = read_pmc(event->hw.idx);
} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
- /* The events are only 32 bits wide */
+ /* The counters are only 32 bits wide */
delta = (val - prev) & 0xfffffffful;
atomic64_add(delta, &event->count);
atomic64_sub(delta, &event->hw.period_left);
@@ -543,7 +543,7 @@ void hw_perf_disable(void)
}
/*
- * Set the 'freeze events' bit.
+ * Set the 'freeze counters' bit.
* The barrier is to make sure the mtspr has been
* executed and the PMU has frozen the events
* before we return.
@@ -1124,7 +1124,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
}
/*
- * A event has overflowed; update its count and record
+ * A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled
* here so there is no possibility of being interrupted.
*/
@@ -1271,7 +1271,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
/*
* Reset MMCR0 to its normal value. This will set PMXE and
- * clear FC (freeze events) and PMAO (perf mon alert occurred)
+ * clear FC (freeze counters) and PMAO (perf mon alert occurred)
* and thus allow interrupts to occur again.
* XXX might want to use MSR.PM to keep the events frozen until
* we get back out of this interrupt.
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0d03629fb1a5..a3c7adb06b78 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2081,13 +2081,13 @@ void __init init_hw_perf_events(void)
perf_events_lapic_init();
register_die_notifier(&perf_event_nmi_notifier);
- pr_info("... version: %d\n", x86_pmu.version);
- pr_info("... bit width: %d\n", x86_pmu.event_bits);
- pr_info("... generic events: %d\n", x86_pmu.num_events);
- pr_info("... value mask: %016Lx\n", x86_pmu.event_mask);
- pr_info("... max period: %016Lx\n", x86_pmu.max_period);
- pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed);
- pr_info("... event mask: %016Lx\n", perf_event_mask);
+ pr_info("... version: %d\n", x86_pmu.version);
+ pr_info("... bit width: %d\n", x86_pmu.event_bits);
+ pr_info("... generic registers: %d\n", x86_pmu.num_events);
+ pr_info("... value mask: %016Lx\n", x86_pmu.event_mask);
+ pr_info("... max period: %016Lx\n", x86_pmu.max_period);
+ pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed);
+ pr_info("... event mask: %016Lx\n", perf_event_mask);
}
static inline void x86_pmu_read(struct perf_event *event)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
new file mode 100644
index 000000000000..368bd70f1d2d
--- /dev/null
+++ b/include/linux/perf_counter.h
@@ -0,0 +1,441 @@
+/*
+ * NOTE: this file will be removed in a future kernel release, it is
+ * provided as a courtesy copy of user-space code that relies on the
+ * old (pre-rename) symbols and constants.
+ *
+ * Performance events:
+ *
+ * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
+ * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
+ *
+ * Data type definitions, declarations, prototypes.
+ *
+ * Started by: Thomas Gleixner and Ingo Molnar
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_PERF_COUNTER_H
+#define _LINUX_PERF_COUNTER_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/byteorder.h>
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * attr.type
+ */
+enum perf_type_id {
+ PERF_TYPE_HARDWARE = 0,
+ PERF_TYPE_SOFTWARE = 1,
+ PERF_TYPE_TRACEPOINT = 2,
+ PERF_TYPE_HW_CACHE = 3,
+ PERF_TYPE_RAW = 4,
+
+ PERF_TYPE_MAX, /* non-ABI */
+};
+
+/*
+ * Generalized performance counter event types, used by the
+ * attr.event_id parameter of the sys_perf_counter_open()
+ * syscall:
+ */
+enum perf_hw_id {
+ /*
+ * Common hardware events, generalized by the kernel:
+ */
+ PERF_COUNT_HW_CPU_CYCLES = 0,
+ PERF_COUNT_HW_INSTRUCTIONS = 1,
+ PERF_COUNT_HW_CACHE_REFERENCES = 2,
+ PERF_COUNT_HW_CACHE_MISSES = 3,
+ PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
+ PERF_COUNT_HW_BRANCH_MISSES = 5,
+ PERF_COUNT_HW_BUS_CYCLES = 6,
+
+ PERF_COUNT_HW_MAX, /* non-ABI */
+};
+
+/*
+ * Generalized hardware cache counters:
+ *
+ * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
+ * { read, write, prefetch } x
+ * { accesses, misses }
+ */
+enum perf_hw_cache_id {
+ PERF_COUNT_HW_CACHE_L1D = 0,
+ PERF_COUNT_HW_CACHE_L1I = 1,
+ PERF_COUNT_HW_CACHE_LL = 2,
+ PERF_COUNT_HW_CACHE_DTLB = 3,
+ PERF_COUNT_HW_CACHE_ITLB = 4,
+ PERF_COUNT_HW_CACHE_BPU = 5,
+
+ PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
+};
+
+enum perf_hw_cache_op_id {
+ PERF_COUNT_HW_CACHE_OP_READ = 0,
+ PERF_COUNT_HW_CACHE_OP_WRITE = 1,
+ PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
+
+ PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
+};
+
+enum perf_hw_cache_op_result_id {
+ PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
+ PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
+
+ PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
+};
+
+/*
+ * Special "software" counters provided by the kernel, even if the hardware
+ * does not support performance counters. These counters measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+ PERF_COUNT_SW_CPU_CLOCK = 0,
+ PERF_COUNT_SW_TASK_CLOCK = 1,
+ PERF_COUNT_SW_PAGE_FAULTS = 2,
+ PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
+ PERF_COUNT_SW_CPU_MIGRATIONS = 4,
+ PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
+ PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
+
+ PERF_COUNT_SW_MAX, /* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.sample_type to request information
+ * in the overflow packets.
+ */
+enum perf_counter_sample_format {
+ PERF_SAMPLE_IP = 1U << 0,
+ PERF_SAMPLE_TID = 1U << 1,
+ PERF_SAMPLE_TIME = 1U << 2,
+ PERF_SAMPLE_ADDR = 1U << 3,
+ PERF_SAMPLE_READ = 1U << 4,
+ PERF_SAMPLE_CALLCHAIN = 1U << 5,
+ PERF_SAMPLE_ID = 1U << 6,
+ PERF_SAMPLE_CPU = 1U << 7,
+ PERF_SAMPLE_PERIOD = 1U << 8,
+ PERF_SAMPLE_STREAM_ID = 1U << 9,
+ PERF_SAMPLE_RAW = 1U << 10,
+
+ PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */
+};
+
+/*
+ * The format of the data returned by read() on a perf counter fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ * { u64 value;
+ * { u64 time_enabled; } && PERF_FORMAT_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_RUNNING
+ * { u64 id; } && PERF_FORMAT_ID
+ * } && !PERF_FORMAT_GROUP
+ *
+ * { u64 nr;
+ * { u64 time_enabled; } && PERF_FORMAT_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_RUNNING
+ * { u64 value;
+ * { u64 id; } && PERF_FORMAT_ID
+ * } cntr[nr];
+ * } && PERF_FORMAT_GROUP
+ * };
+ */
+enum perf_counter_read_format {
+ PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
+ PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
+ PERF_FORMAT_ID = 1U << 2,
+ PERF_FORMAT_GROUP = 1U << 3,
+
+ PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
+};
+
+#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
+
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_attr {
+
+ /*
+ * Major type: hardware/software/tracepoint/etc.
+ */
+ __u32 type;
+
+ /*
+ * Size of the attr structure, for fwd/bwd compat.
+ */
+ __u32 size;
+
+ /*
+ * Type specific configuration information.
+ */
+ __u64 config;
+
+ union {
+ __u64 sample_period;
+ __u64 sample_freq;
+ };
+
+ __u64 sample_type;
+ __u64 read_format;
+
+ __u64 disabled : 1, /* off by default */
+ inherit : 1, /* children inherit it */
+ pinned : 1, /* must always be on PMU */
+ exclusive : 1, /* only group on PMU */
+ exclude_user : 1, /* don't count user */
+ exclude_kernel : 1, /* ditto kernel */
+ exclude_hv : 1, /* ditto hypervisor */
+ exclude_idle : 1, /* don't count when idle */
+ mmap : 1, /* include mmap data */
+ comm : 1, /* include comm data */
+ freq : 1, /* use freq, not period */
+ inherit_stat : 1, /* per task counts */
+ enable_on_exec : 1, /* next exec enables */
+ task : 1, /* trace fork/exit */
+ watermark : 1, /* wakeup_watermark */
+
+ __reserved_1 : 49;
+
+ union {
+ __u32 wakeup_events; /* wakeup every n events */
+ __u32 wakeup_watermark; /* bytes before wakeup */
+ };
+ __u32 __reserved_2;
+
+ __u64 __reserved_3;
+};
+
+/*
+ * Ioctls that can be done on a perf counter fd:
+ */
+#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0)
+#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1)
+#define PERF_COUNTER_IOC_REFRESH _IO ('$', 2)
+#define PERF_COUNTER_IOC_RESET _IO ('$', 3)
+#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64)
+#define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5)
+
+enum perf_counter_ioc_flags {
+ PERF_IOC_FLAG_GROUP = 1U << 0,
+};
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_counter_mmap_page {
+ __u32 version; /* version number of this structure */
+ __u32 compat_version; /* lowest version this is compat with */
+
+ /*
+ * Bits needed to read the hw counters in user-space.
+ *
+ * u32 seq;
+ * s64 count;
+ *
+ * do {
+ * seq = pc->lock;
+ *
+ * barrier()
+ * if (pc->index) {
+ * count = pmc_read(pc->index - 1);
+ * count += pc->offset;
+ * } else
+ * goto regular_read;
+ *
+ * barrier();
+ * } while (pc->lock != seq);
+ *
+ * NOTE: for obvious reason this only works on self-monitoring
+ * processes.
+ */
+ __u32 lock; /* seqlock for synchronization */
+ __u32 index; /* hardware counter identifier */
+ __s64 offset; /* add to hardware counter value */
+ __u64 time_enabled; /* time counter active */
+ __u64 time_running; /* time counter on cpu */
+
+ /*
+ * Hole for extension of the self monitor capabilities
+ */
+
+ __u64 __reserved[123]; /* align to 1k */
+
+ /*
+ * Control data for the mmap() data buffer.
+ *
+ * User-space reading the @data_head value should issue an rmb(), on
+ * SMP capable platforms, after reading this value -- see
+ * perf_counter_wakeup().
+ *
+ * When the mapping is PROT_WRITE the @data_tail value should be
+ * written by userspace to reflect the last read data. In this case
+ * the kernel will not over-write unread data.
+ */
+ __u64 data_head; /* head in the data section */
+ __u64 data_tail; /* user-space written tail */
+};
+
+#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0)
+#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0)
+#define PERF_EVENT_MISC_KERNEL (1 << 0)
+#define PERF_EVENT_MISC_USER (2 << 0)
+#define PERF_EVENT_MISC_HYPERVISOR (3 << 0)
+
+struct perf_event_header {
+ __u32 type;
+ __u16 misc;
+ __u16 size;
+};
+
+enum perf_event_type {
+
+ /*
+ * The MMAP events record the PROT_EXEC mappings so that we can
+ * correlate userspace IPs to code. They have the following structure:
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u32 pid, tid;
+ * u64 addr;
+ * u64 len;
+ * u64 pgoff;
+ * char filename[];
+ * };
+ */
+ PERF_EVENT_MMAP = 1,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u64 id;
+ * u64 lost;
+ * };
+ */
+ PERF_EVENT_LOST = 2,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u32 pid, tid;
+ * char comm[];
+ * };
+ */
+ PERF_EVENT_COMM = 3,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid, ppid;
+ * u32 tid, ptid;
+ * u64 time;
+ * };
+ */
+ PERF_EVENT_EXIT = 4,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u64 time;
+ * u64 id;
+ * u64 stream_id;
+ * };
+ */
+ PERF_EVENT_THROTTLE = 5,
+ PERF_EVENT_UNTHROTTLE = 6,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid, ppid;
+ * u32 tid, ptid;
+ * { u64 time; } && PERF_SAMPLE_TIME
+ * };
+ */
+ PERF_EVENT_FORK = 7,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid, tid;
+ *
+ * struct read_format values;
+ * };
+ */
+ PERF_EVENT_READ = 8,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ *
+ * { u64 ip; } && PERF_SAMPLE_IP
+ * { u32 pid, tid; } && PERF_SAMPLE_TID
+ * { u64 time; } && PERF_SAMPLE_TIME
+ * { u64 addr; } && PERF_SAMPLE_ADDR
+ * { u64 id; } && PERF_SAMPLE_ID
+ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
+ * { u32 cpu, res; } && PERF_SAMPLE_CPU
+ * { u64 period; } && PERF_SAMPLE_PERIOD
+ *
+ * { struct read_format values; } && PERF_SAMPLE_READ
+ *
+ * { u64 nr,
+ * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
+ *
+ * #
+ * # The RAW record below is opaque data wrt the ABI
+ * #
+ * # That is, the ABI doesn't make any promises wrt to
+ * # the stability of its content, it may vary depending
+ * # on event, hardware, kernel version and phase of
+ * # the moon.
+ * #
+ * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
+ * #
+ *
+ * { u32 size;
+ * char data[size];}&& PERF_SAMPLE_RAW
+ * };
+ */
+ PERF_EVENT_SAMPLE = 9,
+
+ PERF_EVENT_MAX, /* non-ABI */
+};
+
+enum perf_callchain_context {
+ PERF_CONTEXT_HV = (__u64)-32,
+ PERF_CONTEXT_KERNEL = (__u64)-128,
+ PERF_CONTEXT_USER = (__u64)-512,
+
+ PERF_CONTEXT_GUEST = (__u64)-2048,
+ PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
+ PERF_CONTEXT_GUEST_USER = (__u64)-2560,
+
+ PERF_CONTEXT_MAX = (__u64)-4095,
+};
+
+#define PERF_FLAG_FD_NO_GROUP (1U << 0)
+#define PERF_FLAG_FD_OUTPUT (1U << 1)
+
+/*
+ * In case some app still references the old symbols:
+ */
+
+#define __NR_perf_counter_open __NR_perf_event_open
+
+#define PR_TASK_PERF_COUNTERS_DISABLE PR_TASK_PERF_EVENTS_DISABLE
+#define PR_TASK_PERF_COUNTERS_ENABLE PR_TASK_PERF_EVENTS_ENABLE
+
+#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ae9d9ed6df2a..acefaf71e6dd 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1,15 +1,15 @@
/*
- * Performance events:
+ * Performance events:
*
* Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
*
- * Data type definitions, declarations, prototypes.
+ * Data type definitions, declarations, prototypes.
*
* Started by: Thomas Gleixner and Ingo Molnar
*
- * For licencing details see kernel-base/COPYING
+ * For licencing details see kernel-base/COPYING
*/
#ifndef _LINUX_PERF_EVENT_H
#define _LINUX_PERF_EVENT_H
@@ -131,19 +131,19 @@ enum perf_event_sample_format {
* as specified by attr.read_format:
*
* struct read_format {
- * { u64 value;
- * { u64 time_enabled; } && PERF_FORMAT_ENABLED
- * { u64 time_running; } && PERF_FORMAT_RUNNING
- * { u64 id; } && PERF_FORMAT_ID
- * } && !PERF_FORMAT_GROUP
+ * { u64 value;
+ * { u64 time_enabled; } && PERF_FORMAT_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_RUNNING
+ * { u64 id; } && PERF_FORMAT_ID
+ * } && !PERF_FORMAT_GROUP
*
- * { u64 nr;
- * { u64 time_enabled; } && PERF_FORMAT_ENABLED
- * { u64 time_running; } && PERF_FORMAT_RUNNING
- * { u64 value;
- * { u64 id; } && PERF_FORMAT_ID
- * } cntr[nr];
- * } && PERF_FORMAT_GROUP
+ * { u64 nr;
+ * { u64 time_enabled; } && PERF_FORMAT_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_RUNNING
+ * { u64 value;
+ * { u64 id; } && PERF_FORMAT_ID
+ * } cntr[nr];
+ * } && PERF_FORMAT_GROUP
* };
*/
enum perf_event_read_format {
@@ -152,7 +152,7 @@ enum perf_event_read_format {
PERF_FORMAT_ID = 1U << 2,
PERF_FORMAT_GROUP = 1U << 3,
- PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
+ PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
};
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
@@ -216,8 +216,8 @@ struct perf_event_attr {
* Ioctls that can be done on a perf event fd:
*/
#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
-#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
-#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
+#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
#define PERF_EVENT_IOC_RESET _IO ('$', 3)
#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, u64)
#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
@@ -314,9 +314,9 @@ enum perf_event_type {
/*
* struct {
- * struct perf_event_header header;
- * u64 id;
- * u64 lost;
+ * struct perf_event_header header;
+ * u64 id;
+ * u64 lost;
* };
*/
PERF_RECORD_LOST = 2,
@@ -383,23 +383,23 @@ enum perf_event_type {
* { u64 id; } && PERF_SAMPLE_ID
* { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
* { u32 cpu, res; } && PERF_SAMPLE_CPU
- * { u64 period; } && PERF_SAMPLE_PERIOD
+ * { u64 period; } && PERF_SAMPLE_PERIOD
*
* { struct read_format values; } && PERF_SAMPLE_READ
*
* { u64 nr,
* u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
*
- * #
- * # The RAW record below is opaque data wrt the ABI
- * #
- * # That is, the ABI doesn't make any promises wrt to
- * # the stability of its content, it may vary depending
- * # on event_id, hardware, kernel version and phase of
- * # the moon.
- * #
- * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
- * #
+ * #
+ * # The RAW record below is opaque data wrt the ABI
+ * #
+ * # That is, the ABI doesn't make any promises wrt to
+ * # the stability of its content, it may vary depending
+ * # on event, hardware, kernel version and phase of
+ * # the moon.
+ * #
+ * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
+ * #
*
* { u32 size;
* char data[size];}&& PERF_SAMPLE_RAW
@@ -503,10 +503,10 @@ struct pmu {
* enum perf_event_active_state - the states of a event
*/
enum perf_event_active_state {
- PERF_EVENT_STATE_ERROR = -2,
+ PERF_EVENT_STATE_ERROR = -2,
PERF_EVENT_STATE_OFF = -1,
PERF_EVENT_STATE_INACTIVE = 0,
- PERF_EVENT_STATE_ACTIVE = 1,
+ PERF_EVENT_STATE_ACTIVE = 1,
};
struct file;
@@ -529,7 +529,7 @@ struct perf_mmap_data {
long watermark; /* wakeup watermark */
- struct perf_event_mmap_page *user_page;
+ struct perf_event_mmap_page *user_page;
void *data_pages[0];
};
@@ -694,14 +694,14 @@ struct perf_cpu_context {
};
struct perf_output_handle {
- struct perf_event *event;
- struct perf_mmap_data *data;
- unsigned long head;
- unsigned long offset;
- int nmi;
- int sample;
- int locked;
- unsigned long flags;
+ struct perf_event *event;
+ struct perf_mmap_data *data;
+ unsigned long head;
+ unsigned long offset;
+ int nmi;
+ int sample;
+ int locked;
+ unsigned long flags;
};
#ifdef CONFIG_PERF_EVENTS
@@ -829,22 +829,22 @@ static inline void
perf_event_task_sched_out(struct task_struct *task,
struct task_struct *next, int cpu) { }
static inline void
-perf_event_task_tick(struct task_struct *task, int cpu) { }
+perf_event_task_tick(struct task_struct *task, int cpu) { }
static inline int perf_event_init_task(struct task_struct *child) { return 0; }
static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { }
-static inline void perf_event_do_pending(void) { }
-static inline void perf_event_print_debug(void) { }
+static inline void perf_event_do_pending(void) { }
+static inline void perf_event_print_debug(void) { }
static inline void perf_disable(void) { }
static inline void perf_enable(void) { }
-static inline int perf_event_task_disable(void) { return -EINVAL; }
-static inline int perf_event_task_enable(void) { return -EINVAL; }
+static inline int perf_event_task_disable(void) { return -EINVAL; }
+static inline int perf_event_task_enable(void) { return -EINVAL; }
static inline void
perf_sw_event(u32 event_id, u64 nr, int nmi,
struct pt_regs *regs, u64 addr) { }
-static inline void perf_event_mmap(struct vm_area_struct *vma) { }
+static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_comm(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { }
diff --git a/init/Kconfig b/init/Kconfig
index cfdf5c322806..706728be312f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -920,26 +920,31 @@ config HAVE_PERF_EVENTS
help
See tools/perf/design.txt for details.
-menu "Performance Counters"
+menu "Kernel Performance Events And Counters"
config PERF_EVENTS
- bool "Kernel Performance Counters"
- default y if PROFILING
+ bool "Kernel performance events and counters"
+ default y if (PROFILING || PERF_COUNTERS)
depends on HAVE_PERF_EVENTS
select ANON_INODES
help
- Enable kernel support for performance counter hardware.
+ Enable kernel support for various performance events provided
+ by software and hardware.
- Performance counters are special hardware registers available
- on most modern CPUs. These registers count the number of certain
+ Software events are supported either build-in or via the
+ use of generic tracepoints.
+
+ Most modern CPUs support performance events via performance
+ counter registers. These registers count the number of certain
types of hw events: such as instructions executed, cachemisses
suffered, or branches mis-predicted - without slowing down the
kernel or applications. These registers can also trigger interrupts
when a threshold number of events have passed - and can thus be
used to profile the code that runs on that CPU.
- The Linux Performance Counter subsystem provides an abstraction of
- these hardware capabilities, available via a system call. It
+ The Linux Performance Event subsystem provides an abstraction of
+ these software and hardware cevent apabilities, available via a
+ system call and used by the "perf" utility in tools/perf/. It
provides per task and per CPU counters, and it provides event
capabilities on top of those.
@@ -950,14 +955,26 @@ config EVENT_PROFILE
depends on PERF_EVENTS && EVENT_TRACING
default y
help
- Allow the use of tracepoints as software performance counters.
+ Allow the use of tracepoints as software performance events.
- When this is enabled, you can create perf counters based on
+ When this is enabled, you can create perf events based on
tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID
found in debugfs://tracing/events/*/*/id. (The -e/--events
option to the perf tool can parse and interpret symbolic
tracepoints, in the subsystem:tracepoint_name format.)
+config PERF_COUNTERS
+ bool "Kernel performance counters (old config option)"
+ depends on HAVE_PERF_EVENTS
+ help
+ This config has been obsoleted by the PERF_EVENTS
+ config option - please see that one for details.
+
+ It has no effect on the kernel whether you enable
+ it or not, it is a compatibility placeholder.
+
+ Say N if unsure.
+
endmenu
config VM_EVENT_COUNTERS
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 6e8b99a04e1e..76ac4db405e9 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1,12 +1,12 @@
/*
- * Performance event core code
+ * Performance events core code:
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
* Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
* Copyright ? 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
- * For licensing details see kernel-base/COPYING
+ * For licensing details see kernel-base/COPYING
*/
#include <linux/fs.h>