Patches contributed by Eötvös Lorand University
commit 983f2163e7fdf11a15e05816de243f93f07eafca
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Sep 15 12:29:20 2009 +0200
MAINTAINERS: Update tracing tree details
Acked-by: Steven Rostedt <srostedt@redhat.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/MAINTAINERS b/MAINTAINERS
index 8dca9d89c6c1..1505129ec5a0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2118,13 +2118,16 @@ F: Documentation/filesystems/caching/
F: fs/fscache/
F: include/linux/fscache*.h
-FTRACE
+TRACING
M: Steven Rostedt <rostedt@goodmis.org>
+M: Frederic Weisbecker <fweisbec@gmail.com>
+M: Ingo Molnar <mingo@redhat.com>
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing/core
S: Maintained
F: Documentation/trace/ftrace.txt
F: arch/*/*/*/ftrace.h
F: arch/*/kernel/ftrace.c
-F: include/*/ftrace.h
+F: include/*/ftrace.h include/trace/ include/linux/trace*.h
F: kernel/trace/
FUJITSU FR-V (FRV) PORT
commit dca2d6ac09d9ef59ff46820d4f0c94b08a671202
Merge: d6a65dffb30d 18240904960a
Author: Ingo Molnar <mingo@elte.hu>
Date: Tue Sep 15 12:18:15 2009 +0200
Merge branch 'linus' into tracing/hw-breakpoints
Conflicts:
arch/x86/kernel/process_64.c
Semantic conflict fixed in:
arch/x86/kvm/x86.c
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --cc arch/x86/kernel/process_64.c
index 89c46f1259d3,ad535b683170..72edac026a78
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@@ -493,33 -489,12 +502,30 @@@ __switch_to(struct task_struct *prev_p
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);
- /* If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- *
- * tsk_used_math() checks prevent calling math_state_restore(),
- * which can sleep in the case of !tsk_used_math()
+ /*
+ * Preload the FPU context, now that we've determined that the
+ * task is likely to be using it.
*/
- if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
- math_state_restore();
+ if (preload_fpu)
+ __math_state_restore();
+ /*
+ * There's a problem with moving the arch_install_thread_hw_breakpoint()
+ * call before current is updated. Suppose a kernel breakpoint is
+ * triggered in between the two, the hw-breakpoint handler will see that
+ * the 'current' task does not have TIF_DEBUG flag set and will think it
+ * is leftover from an old task (lazy switching) and will erase it. Then
+ * until the next context switch, no user-breakpoints will be installed.
+ *
+ * The real problem is that it's impossible to update both current and
+ * physical debug registers at the same instant, so there will always be
+ * a window in which they disagree and a breakpoint might get triggered.
+ * Since we use lazy switching, we are forced to assume that a
+ * disagreement means that current is correct and the exception is due
+ * to lazy debug register switching.
+ */
+ if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
+ arch_install_thread_hw_breakpoint(next_p);
+
return prev_p;
}
diff --cc arch/x86/kvm/x86.c
index 3d4529011828,be451ee44249..74029f50b26a
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -3312,18 -3638,17 +3638,17 @@@ static int vcpu_enter_guest(struct kvm_
set_debugreg(vcpu->arch.eff_db[3], 3);
}
- KVMTRACE_0D(VMENTRY, vcpu, entryexit);
+ trace_kvm_entry(vcpu->vcpu_id);
kvm_x86_ops->run(vcpu, kvm_run);
- if (unlikely(vcpu->arch.switch_db_regs)) {
- set_debugreg(0, 7);
- set_debugreg(vcpu->arch.host_db[0], 0);
- set_debugreg(vcpu->arch.host_db[1], 1);
- set_debugreg(vcpu->arch.host_db[2], 2);
- set_debugreg(vcpu->arch.host_db[3], 3);
+ if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
- set_debugreg(current->thread.debugreg0, 0);
- set_debugreg(current->thread.debugreg1, 1);
- set_debugreg(current->thread.debugreg2, 2);
- set_debugreg(current->thread.debugreg3, 3);
++ set_debugreg(current->thread.debugreg[0], 0);
++ set_debugreg(current->thread.debugreg[1], 1);
++ set_debugreg(current->thread.debugreg[2], 2);
++ set_debugreg(current->thread.debugreg[3], 3);
+ set_debugreg(current->thread.debugreg6, 6);
+ set_debugreg(current->thread.debugreg7, 7);
}
- set_debugreg(vcpu->arch.host_dr6, 6);
- set_debugreg(vcpu->arch.host_dr7, 7);
set_bit(KVM_REQ_KICK, &vcpu->requests);
local_irq_enable();
commit d11533893b31ab7806ff04bfa69ae646068610ce
Author: Ingo Molnar <mingo@elte.hu>
Date: Mon Sep 14 18:22:53 2009 +0200
perf sched: Fix 'perf sched latency' output on 32-bit systems
Before:
-----------------------------------------------------------------------------------
Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |
-----------------------------------------------------------------------------------
perf |4853313.251 ms | 10 | avg: 0.046 ms | max: 0.337 ms |
flush-8:0 |2426659.202 ms | 5 | avg: 0.015 ms | max: 0.016 ms |
sleep |485331.966 ms | 1 | avg: 0.012 ms | max: 0.012 ms |
ksoftirqd/1 |485331.320 ms | 1 | avg: 0.005 ms | max: 0.005 ms |
-----------------------------------------------------------------------------------
TOTAL: |8250635.739 ms | 17 |
---------------------------------------------
After:
-----------------------------------------------------------------------------------
Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |
-----------------------------------------------------------------------------------
perf | 0.206 ms | 10 | avg: 0.046 ms | max: 0.337 ms |
flush-8:0 | 2.680 ms | 5 | avg: 0.015 ms | max: 0.016 ms |
sleep | 0.662 ms | 1 | avg: 0.012 ms | max: 0.012 ms |
ksoftirqd/1 | 0.015 ms | 1 | avg: 0.005 ms | max: 0.005 ms |
-----------------------------------------------------------------------------------
TOTAL: | 3.563 ms | 17 |
---------------------------------------------
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 2ce87ef5a3e6..f856a02cd4fc 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -805,7 +805,7 @@ replay_wakeup_event(struct trace_wakeup_event *wakeup_event,
add_sched_event_wakeup(waker, timestamp, wakee);
}
-static unsigned long cpu_last_switched[MAX_CPUS];
+static u64 cpu_last_switched[MAX_CPUS];
static void
replay_switch_event(struct trace_switch_event *switch_event,
commit ea57c4f5203d82c7844c54cdef54e972cf4e9d1f
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Sep 13 18:15:54 2009 +0200
perf tools: Implement counter output multiplexing
Finish the -M/--multiplex option implementation:
- separate it out from group_fd
- correctly set it via the ioctl and dont mmap counters that
are multiplexed
- modify the perf record event loop to deal with buffer-less
counters.
- remove the -g option from perf sched record
- account for unordered events in perf sched latency
- (add -f to perf sched record to ease measurements)
- skip idle threads (pid==0) in latency output
The result is better latency output by 'perf sched latency':
-----------------------------------------------------------------------------------
Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |
-----------------------------------------------------------------------------------
ksoftirqd/8 | 0.071 ms | 2 | avg: 0.458 ms | max: 0.913 ms |
at-spi-registry | 0.609 ms | 19 | avg: 0.013 ms | max: 0.023 ms |
perf | 3.316 ms | 16 | avg: 0.013 ms | max: 0.054 ms |
Xorg | 0.392 ms | 19 | avg: 0.011 ms | max: 0.018 ms |
sleep | 0.537 ms | 2 | avg: 0.009 ms | max: 0.009 ms |
-----------------------------------------------------------------------------------
TOTAL: | 4.925 ms | 58 |
---------------------------------------------
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 79f99dba5be0..5f3127e7a615 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -49,6 +49,7 @@ static int inherit_stat = 0;
static int no_samples = 0;
static int sample_address = 0;
static int multiplex = 0;
+static int multiplex_fd = -1;
static long samples;
static struct timeval last_read;
@@ -471,23 +472,29 @@ static void create_counter(int counter, int cpu, pid_t pid)
*/
if (group && group_fd == -1)
group_fd = fd[nr_cpu][counter];
+ if (multiplex && multiplex_fd == -1)
+ multiplex_fd = fd[nr_cpu][counter];
- event_array[nr_poll].fd = fd[nr_cpu][counter];
- event_array[nr_poll].events = POLLIN;
- nr_poll++;
-
- mmap_array[nr_cpu][counter].counter = counter;
- mmap_array[nr_cpu][counter].prev = 0;
- mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
- mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
- PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0);
- if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
- error("failed to mmap with %d (%s)\n", errno, strerror(errno));
- exit(-1);
- }
+ if (multiplex && fd[nr_cpu][counter] != multiplex_fd) {
+ int ret;
- if (multiplex && fd[nr_cpu][counter] != group_fd)
- ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, group_fd);
+ ret = ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, multiplex_fd);
+ assert(ret != -1);
+ } else {
+ event_array[nr_poll].fd = fd[nr_cpu][counter];
+ event_array[nr_poll].events = POLLIN;
+ nr_poll++;
+
+ mmap_array[nr_cpu][counter].counter = counter;
+ mmap_array[nr_cpu][counter].prev = 0;
+ mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1;
+ mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
+ PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0);
+ if (mmap_array[nr_cpu][counter].base == MAP_FAILED) {
+ error("failed to mmap with %d (%s)\n", errno, strerror(errno));
+ exit(-1);
+ }
+ }
ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE);
}
@@ -618,8 +625,10 @@ static int __cmd_record(int argc, const char **argv)
int hits = samples;
for (i = 0; i < nr_cpu; i++) {
- for (counter = 0; counter < nr_counters; counter++)
- mmap_read(&mmap_array[i][counter]);
+ for (counter = 0; counter < nr_counters; counter++) {
+ if (mmap_array[i][counter].base)
+ mmap_read(&mmap_array[i][counter]);
+ }
}
if (hits == samples) {
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 3e003237c42f..2ce87ef5a3e6 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -116,6 +116,8 @@ static u64 sum_fluct;
static u64 run_avg;
static unsigned long replay_repeat = 10;
+static unsigned long nr_timestamps;
+static unsigned long unordered_timestamps;
#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
@@ -1109,8 +1111,11 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
if (atom->state != THREAD_SLEEPING)
return;
- if (atom->sched_out_time > timestamp)
+ nr_timestamps++;
+ if (atom->sched_out_time > timestamp) {
+ unordered_timestamps++;
return;
+ }
atom->state = THREAD_WAIT_CPU;
atom->wake_up_time = timestamp;
@@ -1130,6 +1135,11 @@ static void output_lat_thread(struct task_atoms *atom_list)
if (!atom_list->nb_atoms)
return;
+ /*
+ * Ignore idle threads:
+ */
+ if (!atom_list->thread->pid)
+ return;
all_runtime += atom_list->total_runtime;
all_count += atom_list->nb_atoms;
@@ -1301,8 +1311,16 @@ static void __cmd_lat(void)
}
printf("-----------------------------------------------------------------------------------\n");
- printf(" TOTAL: |%9.3f ms |%9Ld |\n",
+ printf(" TOTAL: |%9.3f ms |%9Ld |",
(double)all_runtime/1e6, all_count);
+
+ if (unordered_timestamps && nr_timestamps) {
+ printf(" INFO: %.2f%% unordered events.\n",
+ (double)unordered_timestamps/(double)nr_timestamps*100.0);
+ } else {
+ printf("\n");
+ }
+
printf("---------------------------------------------\n");
}
@@ -1667,12 +1685,13 @@ static const char *record_args[] = {
"-a",
"-R",
"-M",
- "-g",
+ "-f",
"-c", "1",
"-e", "sched:sched_switch:r",
"-e", "sched:sched_stat_wait:r",
"-e", "sched:sched_stat_sleep:r",
"-e", "sched:sched_stat_iowait:r",
+ "-e", "sched:sched_stat_runtime:r",
"-e", "sched:sched_process_exit:r",
"-e", "sched:sched_process_fork:r",
"-e", "sched:sched_wakeup:r",
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 64d6e302751a..f6a8437141c8 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -2722,8 +2722,10 @@ void print_event(int cpu, void *data, int size, unsigned long long nsecs,
type = trace_parse_common_type(data);
event = trace_find_event(type);
- if (!event)
- die("ug! no event found for type %d", type);
+ if (!event) {
+ printf("ug! no event found for type %d\n", type);
+ return;
+ }
pid = parse_common_pid(data);
commit f977bb4937857994312fff4f9c2cad336a36a932
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Sep 13 18:15:54 2009 +0200
perf_counter, sched: Add sched_stat_runtime tracepoint
This allows more precise tracking of how the scheduler accounts
(and acts upon) a task having spent N nanoseconds of CPU time.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index b48f1ad7c946..4069c43f4187 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -379,6 +379,39 @@ TRACE_EVENT(sched_stat_wait,
(unsigned long long)__entry->delay)
);
+/*
+ * Tracepoint for accounting runtime (time the task is executing
+ * on a CPU).
+ */
+TRACE_EVENT(sched_stat_runtime,
+
+ TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
+
+ TP_ARGS(tsk, runtime, vruntime),
+
+ TP_STRUCT__entry(
+ __array( char, comm, TASK_COMM_LEN )
+ __field( pid_t, pid )
+ __field( u64, runtime )
+ __field( u64, vruntime )
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+ __entry->pid = tsk->pid;
+ __entry->runtime = runtime;
+ __entry->vruntime = vruntime;
+ )
+ TP_perf_assign(
+ __perf_count(runtime);
+ ),
+
+ TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]",
+ __entry->comm, __entry->pid,
+ (unsigned long long)__entry->runtime,
+ (unsigned long long)__entry->vruntime)
+);
+
/*
* Tracepoint for accounting sleep time (time the task is not runnable,
* including iowait, see below).
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index aa7f84121016..a097e909e80f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -513,6 +513,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
if (entity_is_task(curr)) {
struct task_struct *curtask = task_of(curr);
+ trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
cpuacct_charge(curtask, delta_exec);
account_group_exec_runtime(curtask, delta_exec);
}
commit c13f0d3c8165e9592102687fa999da0a0d9c3724
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Sep 13 16:51:04 2009 +0200
perf sched: Add 'perf sched trace', improve documentation
Alias 'perf sched trace' to 'perf trace', for workflow completeness.
Add a bit of documentation for perf sched.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
index 056320eecb3a..1ce79198997b 100644
--- a/tools/perf/Documentation/perf-sched.txt
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -3,16 +3,32 @@ perf-sched(1)
NAME
----
-perf-sched - Read perf.data (created by perf record) and display sched output
+perf-sched - Tool to trace/measure scheduler properties (latencies)
SYNOPSIS
--------
[verse]
-'perf sched' [-i <file> | --input=file] symbol_name
+'perf sched' {record|latency|replay|trace}
DESCRIPTION
-----------
-This command reads the input file and displays the latencies recorded.
+There's four variants of perf sched:
+
+ 'perf sched record <command>' to record the scheduling events
+ of an arbitrary workload.
+
+ 'perf sched latency' to report the per task scheduling latencies
+ and other scheduling properties of the workload.
+
+ 'perf sched trace' to see a detailed trace of the workload that
+ was recorded.
+
+ 'perf sched replay' to simulate the workload that was recorded
+ via perf sched record. (this is done by starting up mockup threads
+ that mimic the workload based on the events in the trace. These
+ threads can then replay the timings (CPU runtime and sleep patterns)
+ of the workload as it occured when it was recorded - and can repeat
+ it a number of times, measuring its performance.)
OPTIONS
-------
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index ede40c1429a8..8db0fd222f80 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1597,7 +1597,7 @@ static int read_events(void)
}
static const char * const sched_usage[] = {
- "perf sched [<options>] {record|latency|replay}",
+ "perf sched [<options>] {record|latency|replay|trace}",
NULL
};
@@ -1719,6 +1719,11 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
usage_with_options(replay_usage, replay_options);
}
__cmd_replay();
+ } else if (!strcmp(argv[0], "trace")) {
+ /*
+ * Aliased to 'perf trace' for now:
+ */
+ return cmd_trace(argc, argv, prefix);
} else {
usage_with_options(sched_usage, sched_options);
}
commit 459ec28ab404d7afcd512ce9b855959ad301605a
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Sep 13 17:33:44 2009 +0200
perf_counter: Allow mmap if paranoid checks are turned off
Before:
$ perf sched record -f sleep 1
Error: failed to mmap with 1 (Operation not permitted)
After:
$ perf sched record -f sleep 1
[ perf record: Captured and wrote 0.095 MB perf.data (~4161 samples) ]
Note, this is only allowed if perfcounter_paranoid is set to
the most permissive (non-default) value of -1.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index e0d91fdf0c3c..667ab25ad3d5 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2315,7 +2315,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
lock_limit >>= PAGE_SHIFT;
locked = vma->vm_mm->locked_vm + extra;
- if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
+ !capable(CAP_IPC_LOCK)) {
ret = -EPERM;
goto unlock;
}
commit 1fc35b29b4098aa3bf9fc9acb4c1615d0b5dd95d
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun Sep 13 09:44:29 2009 +0200
perf sched: Implement the 'perf sched record' subcommand
Implement the 'perf sched record' subcommand that adds a
default list of events, turns on raw sampling and system-wide
tracing and passes off the rest of the command to perf record.
This is more convenient than having to specify the events all
the time.
Before:
$ perf record -a -R -e sched:sched_switch:r -e sched:sched_stat_wait:r -e sched:sched_stat_sleep:r -e sched:sched_stat_iowait:r -e sched:sched_process_exit:r -e sched:sched_process_fork:r -e sched:sched_wakeup:r -e sched:sched_migrate_task:r -c 1 sleep 1
After:
$ perf sched record -f sleep 1
Also fix an assumption in the event string parser that assumed
that strings passed in can be modified. (In this case they wont
be as they come from a readonly constant section.)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index b72544f2b964..ede40c1429a8 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1656,6 +1656,40 @@ static void setup_sorting(void)
sort_dimension__add((char *)"pid", &cmp_pid);
}
+static const char *record_args[] = {
+ "record",
+ "-a",
+ "-R",
+ "-c", "1",
+ "-e", "sched:sched_switch:r",
+ "-e", "sched:sched_stat_wait:r",
+ "-e", "sched:sched_stat_sleep:r",
+ "-e", "sched:sched_stat_iowait:r",
+ "-e", "sched:sched_process_exit:r",
+ "-e", "sched:sched_process_fork:r",
+ "-e", "sched:sched_wakeup:r",
+ "-e", "sched:sched_migrate_task:r",
+};
+
+static int __cmd_record(int argc, const char **argv)
+{
+ unsigned int rec_argc, i, j;
+ const char **rec_argv;
+
+ rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+ for (i = 0; i < ARRAY_SIZE(record_args); i++)
+ rec_argv[i] = strdup(record_args[i]);
+
+ for (j = 1; j < (unsigned int)argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ BUG_ON(i != rec_argc);
+
+ return cmd_record(i, rec_argv, NULL);
+}
+
int cmd_sched(int argc, const char **argv, const char *prefix __used)
{
symbol__init();
@@ -1666,7 +1700,9 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
if (!argc)
usage_with_options(sched_usage, sched_options);
- if (!strncmp(argv[0], "lat", 3)) {
+ if (!strncmp(argv[0], "rec", 3)) {
+ return __cmd_record(argc, argv);
+ } else if (!strncmp(argv[0], "lat", 3)) {
trace_handler = &lat_ops;
if (argc > 1) {
argc = parse_options(argc, argv, latency_options, latency_usage, 0);
@@ -1687,6 +1723,5 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
usage_with_options(sched_usage, sched_options);
}
-
return 0;
}
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index d06c66cd358b..034245e46817 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -525,7 +525,8 @@ static enum event_result parse_tracepoint_event(const char **strp,
flags = strchr(evt_name, ':');
if (flags) {
- *flags = '\0';
+ /* split it out: */
+ evt_name = strndup(evt_name, flags - evt_name);
flags++;
}
commit b5fae128e41021889777f8ead810cbd2a8b249fc
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Sep 11 12:12:54 2009 +0200
perf sched: Clean up PID sorting logic
Use a sort list for thread atoms insertion as well - instead of
hardcoded for PID.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index cc2dbd5b50eb..b72544f2b964 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -144,7 +144,7 @@ struct task_atoms {
u64 total_runtime;
};
-typedef int (*sort_thread_lat)(struct task_atoms *, struct task_atoms *);
+typedef int (*sort_fn_t)(struct task_atoms *, struct task_atoms *);
static struct rb_root atom_root, sorted_atom_root;
@@ -869,41 +869,22 @@ static struct trace_sched_handler replay_ops = {
.fork_event = replay_fork_event,
};
-static struct task_atoms *
-thread_atoms_search(struct rb_root *root, struct thread *thread)
-{
- struct rb_node *node = root->rb_node;
-
- while (node) {
- struct task_atoms *atoms;
-
- atoms = container_of(node, struct task_atoms, node);
- if (thread->pid > atoms->thread->pid)
- node = node->rb_left;
- else if (thread->pid < atoms->thread->pid)
- node = node->rb_right;
- else {
- return atoms;
- }
- }
- return NULL;
-}
-
struct sort_dimension {
const char *name;
- sort_thread_lat cmp;
+ sort_fn_t cmp;
struct list_head list;
};
static LIST_HEAD(cmp_pid);
static int
-thread_lat_cmp(struct list_head *list, struct task_atoms *l,
- struct task_atoms *r)
+thread_lat_cmp(struct list_head *list, struct task_atoms *l, struct task_atoms *r)
{
struct sort_dimension *sort;
int ret = 0;
+ BUG_ON(list_empty(list));
+
list_for_each_entry(sort, list, list) {
ret = sort->cmp(l, r);
if (ret)
@@ -913,6 +894,32 @@ thread_lat_cmp(struct list_head *list, struct task_atoms *l,
return ret;
}
+static struct task_atoms *
+thread_atoms_search(struct rb_root *root, struct thread *thread,
+ struct list_head *sort_list)
+{
+ struct rb_node *node = root->rb_node;
+ struct task_atoms key = { .thread = thread };
+
+ while (node) {
+ struct task_atoms *atoms;
+ int cmp;
+
+ atoms = container_of(node, struct task_atoms, node);
+
+ cmp = thread_lat_cmp(sort_list, &key, atoms);
+ if (cmp > 0)
+ node = node->rb_left;
+ else if (cmp < 0)
+ node = node->rb_right;
+ else {
+ BUG_ON(thread != atoms->thread);
+ return atoms;
+ }
+ }
+ return NULL;
+}
+
static void
__thread_latency_insert(struct rb_root *root, struct task_atoms *data,
struct list_head *sort_list)
@@ -1049,18 +1056,18 @@ latency_switch_event(struct trace_switch_event *switch_event,
sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
- in_atoms = thread_atoms_search(&atom_root, sched_in);
+ in_atoms = thread_atoms_search(&atom_root, sched_in, &cmp_pid);
if (!in_atoms) {
thread_atoms_insert(sched_in);
- in_atoms = thread_atoms_search(&atom_root, sched_in);
+ in_atoms = thread_atoms_search(&atom_root, sched_in, &cmp_pid);
if (!in_atoms)
die("in-atom: Internal tree error");
}
- out_atoms = thread_atoms_search(&atom_root, sched_out);
+ out_atoms = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
if (!out_atoms) {
thread_atoms_insert(sched_out);
- out_atoms = thread_atoms_search(&atom_root, sched_out);
+ out_atoms = thread_atoms_search(&atom_root, sched_out, &cmp_pid);
if (!out_atoms)
die("out-atom: Internal tree error");
}
@@ -1085,7 +1092,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
return;
wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
- atoms = thread_atoms_search(&atom_root, wakee);
+ atoms = thread_atoms_search(&atom_root, wakee, &cmp_pid);
if (!atoms) {
thread_atoms_insert(wakee);
return;
@@ -1136,7 +1143,6 @@ static void output_lat_thread(struct task_atoms *atom_list)
static int pid_cmp(struct task_atoms *l, struct task_atoms *r)
{
-
if (l->thread->pid < r->thread->pid)
return -1;
if (l->thread->pid > r->thread->pid)
@@ -1146,8 +1152,8 @@ static int pid_cmp(struct task_atoms *l, struct task_atoms *r)
}
static struct sort_dimension pid_sort_dimension = {
- .name = "pid",
- .cmp = pid_cmp,
+ .name = "pid",
+ .cmp = pid_cmp,
};
static int avg_cmp(struct task_atoms *l, struct task_atoms *r)
@@ -1172,8 +1178,8 @@ static int avg_cmp(struct task_atoms *l, struct task_atoms *r)
}
static struct sort_dimension avg_sort_dimension = {
- .name = "avg",
- .cmp = avg_cmp,
+ .name = "avg",
+ .cmp = avg_cmp,
};
static int max_cmp(struct task_atoms *l, struct task_atoms *r)
@@ -1187,8 +1193,8 @@ static int max_cmp(struct task_atoms *l, struct task_atoms *r)
}
static struct sort_dimension max_sort_dimension = {
- .name = "max",
- .cmp = max_cmp,
+ .name = "max",
+ .cmp = max_cmp,
};
static int switch_cmp(struct task_atoms *l, struct task_atoms *r)
@@ -1202,8 +1208,8 @@ static int switch_cmp(struct task_atoms *l, struct task_atoms *r)
}
static struct sort_dimension switch_sort_dimension = {
- .name = "switch",
- .cmp = switch_cmp,
+ .name = "switch",
+ .cmp = switch_cmp,
};
static int runtime_cmp(struct task_atoms *l, struct task_atoms *r)
@@ -1217,8 +1223,8 @@ static int runtime_cmp(struct task_atoms *l, struct task_atoms *r)
}
static struct sort_dimension runtime_sort_dimension = {
- .name = "runtime",
- .cmp = runtime_cmp,
+ .name = "runtime",
+ .cmp = runtime_cmp,
};
static struct sort_dimension *available_sorts[] = {
@@ -1666,8 +1672,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __used)
argc = parse_options(argc, argv, latency_options, latency_usage, 0);
if (argc)
usage_with_options(latency_usage, latency_options);
- setup_sorting();
}
+ setup_sorting();
__cmd_lat();
} else if (!strncmp(argv[0], "rep", 3)) {
trace_handler = &replay_ops;
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 634f2809a342..665d1f3dc977 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -4,10 +4,10 @@
#include "symbol.h"
struct thread {
- struct rb_node rb_node;
- struct list_head maps;
- pid_t pid;
- char *comm;
+ struct rb_node rb_node;
+ struct list_head maps;
+ pid_t pid;
+ char *comm;
};
int thread__set_comm(struct thread *self, const char *comm);
commit b1ffe8f3e0c96f5527a89e24410d6b0e59b3554a
Author: Ingo Molnar <mingo@elte.hu>
Date: Fri Sep 11 12:12:54 2009 +0200
perf sched: Finish latency => atom rename and misc cleanups
- Rename 'latency' field/variable names to the better 'atom' ones
- Reduce the number of #include lines and consolidate them
- Gather file scope variables at the top of the file
- Remove unused bits
No change in functionality.
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index e01cc63b98cc..cc2dbd5b50eb 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1,4 +1,5 @@
#include "builtin.h"
+#include "perf.h"
#include "util/util.h"
#include "util/cache.h"
@@ -7,15 +8,16 @@
#include "util/header.h"
#include "util/parse-options.h"
+#include "util/trace-event.h"
-#include "perf.h"
#include "util/debug.h"
-#include "util/trace-event.h"
#include <sys/types.h>
+#include <sys/prctl.h>
-
-#define MAX_CPUS 4096
+#include <semaphore.h>
+#include <pthread.h>
+#include <math.h>
static char const *input_name = "perf.data";
static int input;
@@ -33,44 +35,126 @@ static u64 sample_type;
static char default_sort_order[] = "avg, max, switch, runtime";
static char *sort_order = default_sort_order;
+#define PR_SET_NAME 15 /* Set process name */
+#define MAX_CPUS 4096
-/*
- * Scheduler benchmarks
- */
-#include <sys/resource.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <sys/prctl.h>
+#define BUG_ON(x) assert(!(x))
-#include <linux/unistd.h>
+static u64 run_measurement_overhead;
+static u64 sleep_measurement_overhead;
-#include <semaphore.h>
-#include <pthread.h>
-#include <signal.h>
-#include <values.h>
-#include <string.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <fcntl.h>
-#include <time.h>
-#include <math.h>
+#define COMM_LEN 20
+#define SYM_LEN 129
-#include <stdio.h>
+#define MAX_PID 65536
-#define PR_SET_NAME 15 /* Set process name */
+static unsigned long nr_tasks;
-#define BUG_ON(x) assert(!(x))
+struct sched_event;
-#define DEBUG 0
+struct task_desc {
+ unsigned long nr;
+ unsigned long pid;
+ char comm[COMM_LEN];
-typedef unsigned long long nsec_t;
+ unsigned long nr_events;
+ unsigned long curr_event;
+ struct sched_event **events;
+
+ pthread_t thread;
+ sem_t sleep_sem;
-static nsec_t run_measurement_overhead;
-static nsec_t sleep_measurement_overhead;
+ sem_t ready_for_work;
+ sem_t work_done_sem;
+
+ u64 cpu_usage;
+};
+
+enum sched_event_type {
+ SCHED_EVENT_RUN,
+ SCHED_EVENT_SLEEP,
+ SCHED_EVENT_WAKEUP,
+};
+
+struct sched_event {
+ enum sched_event_type type;
+ u64 timestamp;
+ u64 duration;
+ unsigned long nr;
+ int specific_wait;
+ sem_t *wait_sem;
+ struct task_desc *wakee;
+};
+
+static struct task_desc *pid_to_task[MAX_PID];
+
+static struct task_desc **tasks;
+
+static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER;
+static u64 start_time;
+
+static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER;
-static nsec_t get_nsecs(void)
+static unsigned long nr_run_events;
+static unsigned long nr_sleep_events;
+static unsigned long nr_wakeup_events;
+
+static unsigned long nr_sleep_corrections;
+static unsigned long nr_run_events_optimized;
+
+static unsigned long targetless_wakeups;
+static unsigned long multitarget_wakeups;
+
+static u64 cpu_usage;
+static u64 runavg_cpu_usage;
+static u64 parent_cpu_usage;
+static u64 runavg_parent_cpu_usage;
+
+static unsigned long nr_runs;
+static u64 sum_runtime;
+static u64 sum_fluct;
+static u64 run_avg;
+
+static unsigned long replay_repeat = 10;
+
+#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
+
+enum thread_state {
+ THREAD_SLEEPING = 0,
+ THREAD_WAIT_CPU,
+ THREAD_SCHED_IN,
+ THREAD_IGNORE
+};
+
+struct work_atom {
+ struct list_head list;
+ enum thread_state state;
+ u64 wake_up_time;
+ u64 sched_in_time;
+ u64 runtime;
+};
+
+struct task_atoms {
+ struct list_head atom_list;
+ struct thread *thread;
+ struct rb_node node;
+ u64 max_lat;
+ u64 total_lat;
+ u64 nb_atoms;
+ u64 total_runtime;
+};
+
+typedef int (*sort_thread_lat)(struct task_atoms *, struct task_atoms *);
+
+static struct rb_root atom_root, sorted_atom_root;
+
+static u64 all_runtime;
+static u64 all_count;
+
+static int read_events(void);
+
+
+static u64 get_nsecs(void)
{
struct timespec ts;
@@ -79,16 +163,16 @@ static nsec_t get_nsecs(void)
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
-static void burn_nsecs(nsec_t nsecs)
+static void burn_nsecs(u64 nsecs)
{
- nsec_t T0 = get_nsecs(), T1;
+ u64 T0 = get_nsecs(), T1;
do {
T1 = get_nsecs();
} while (T1 + run_measurement_overhead < T0 + nsecs);
}
-static void sleep_nsecs(nsec_t nsecs)
+static void sleep_nsecs(u64 nsecs)
{
struct timespec ts;
@@ -100,7 +184,7 @@ static void sleep_nsecs(nsec_t nsecs)
static void calibrate_run_measurement_overhead(void)
{
- nsec_t T0, T1, delta, min_delta = 1000000000ULL;
+ u64 T0, T1, delta, min_delta = 1000000000ULL;
int i;
for (i = 0; i < 10; i++) {
@@ -117,7 +201,7 @@ static void calibrate_run_measurement_overhead(void)
static void calibrate_sleep_measurement_overhead(void)
{
- nsec_t T0, T1, delta, min_delta = 1000000000ULL;
+ u64 T0, T1, delta, min_delta = 1000000000ULL;
int i;
for (i = 0; i < 10; i++) {
@@ -133,67 +217,8 @@ static void calibrate_sleep_measurement_overhead(void)
printf("sleep measurement overhead: %Ld nsecs\n", min_delta);
}
-#define COMM_LEN 20
-#define SYM_LEN 129
-
-#define MAX_PID 65536
-
-static unsigned long nr_tasks;
-
-struct sched_event;
-
-struct task_desc {
- unsigned long nr;
- unsigned long pid;
- char comm[COMM_LEN];
-
- unsigned long nr_events;
- unsigned long curr_event;
- struct sched_event **events;
-
- pthread_t thread;
- sem_t sleep_sem;
-
- sem_t ready_for_work;
- sem_t work_done_sem;
-
- nsec_t cpu_usage;
-};
-
-enum sched_event_type {
- SCHED_EVENT_RUN,
- SCHED_EVENT_SLEEP,
- SCHED_EVENT_WAKEUP,
-};
-
-struct sched_event {
- enum sched_event_type type;
- nsec_t timestamp;
- nsec_t duration;
- unsigned long nr;
- int specific_wait;
- sem_t *wait_sem;
- struct task_desc *wakee;
-};
-
-static struct task_desc *pid_to_task[MAX_PID];
-
-static struct task_desc **tasks;
-
-static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER;
-static nsec_t start_time;
-
-static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static unsigned long nr_run_events;
-static unsigned long nr_sleep_events;
-static unsigned long nr_wakeup_events;
-
-static unsigned long nr_sleep_corrections;
-static unsigned long nr_run_events_optimized;
-
static struct sched_event *
-get_new_event(struct task_desc *task, nsec_t timestamp)
+get_new_event(struct task_desc *task, u64 timestamp)
{
struct sched_event *event = calloc(1, sizeof(*event));
unsigned long idx = task->nr_events;
@@ -221,7 +246,7 @@ static struct sched_event *last_event(struct task_desc *task)
}
static void
-add_sched_event_run(struct task_desc *task, nsec_t timestamp, u64 duration)
+add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration)
{
struct sched_event *event, *curr_event = last_event(task);
@@ -243,11 +268,8 @@ add_sched_event_run(struct task_desc *task, nsec_t timestamp, u64 duration)
nr_run_events++;
}
-static unsigned long targetless_wakeups;
-static unsigned long multitarget_wakeups;
-
static void
-add_sched_event_wakeup(struct task_desc *task, nsec_t timestamp,
+add_sched_event_wakeup(struct task_desc *task, u64 timestamp,
struct task_desc *wakee)
{
struct sched_event *event, *wakee_event;
@@ -275,7 +297,7 @@ add_sched_event_wakeup(struct task_desc *task, nsec_t timestamp,
}
static void
-add_sched_event_sleep(struct task_desc *task, nsec_t timestamp,
+add_sched_event_sleep(struct task_desc *task, u64 timestamp,
u64 task_state __used)
{
struct sched_event *event = get_new_event(task, timestamp);
@@ -350,7 +372,7 @@ static void
process_sched_event(struct task_desc *this_task __used, struct sched_event *event)
{
int ret = 0;
- nsec_t now;
+ u64 now;
long long delta;
now = get_nsecs();
@@ -375,10 +397,10 @@ process_sched_event(struct task_desc *this_task __used, struct sched_event *even
}
}
-static nsec_t get_cpu_usage_nsec_parent(void)
+static u64 get_cpu_usage_nsec_parent(void)
{
struct rusage ru;
- nsec_t sum;
+ u64 sum;
int err;
err = getrusage(RUSAGE_SELF, &ru);
@@ -390,12 +412,12 @@ static nsec_t get_cpu_usage_nsec_parent(void)
return sum;
}
-static nsec_t get_cpu_usage_nsec_self(void)
+static u64 get_cpu_usage_nsec_self(void)
{
char filename [] = "/proc/1234567890/sched";
unsigned long msecs, nsecs;
char *line = NULL;
- nsec_t total = 0;
+ u64 total = 0;
size_t len = 0;
ssize_t chars;
FILE *file;
@@ -423,7 +445,7 @@ static nsec_t get_cpu_usage_nsec_self(void)
static void *thread_func(void *ctx)
{
struct task_desc *this_task = ctx;
- nsec_t cpu_usage_0, cpu_usage_1;
+ u64 cpu_usage_0, cpu_usage_1;
unsigned long i, ret;
char comm2[22];
@@ -485,14 +507,9 @@ static void create_tasks(void)
}
}
-static nsec_t cpu_usage;
-static nsec_t runavg_cpu_usage;
-static nsec_t parent_cpu_usage;
-static nsec_t runavg_parent_cpu_usage;
-
static void wait_for_tasks(void)
{
- nsec_t cpu_usage_0, cpu_usage_1;
+ u64 cpu_usage_0, cpu_usage_1;
struct task_desc *task;
unsigned long i, ret;
@@ -543,16 +560,9 @@ static void wait_for_tasks(void)
}
}
-static int read_events(void);
-
-static unsigned long nr_runs;
-static nsec_t sum_runtime;
-static nsec_t sum_fluct;
-static nsec_t run_avg;
-
static void run_one_test(void)
{
- nsec_t T0, T1, delta, avg_delta, fluct, std_dev;
+ u64 T0, T1, delta, avg_delta, fluct, std_dev;
T0 = get_nsecs();
wait_for_tasks();
@@ -576,10 +586,6 @@ static void run_one_test(void)
printf("#%-3ld: %0.3f, ",
nr_runs, (double)delta/1000000.0);
-#if 0
- printf("%0.2f +- %0.2f, ",
- (double)avg_delta/1e6, (double)std_dev/1e6);
-#endif
printf("ravg: %0.2f, ",
(double)run_avg/1e6);
@@ -605,7 +611,7 @@ static void run_one_test(void)
static void test_calibrations(void)
{
- nsec_t T0, T1;
+ u64 T0, T1;
T0 = get_nsecs();
burn_nsecs(1e6);
@@ -620,8 +626,6 @@ static void test_calibrations(void)
printf("the sleep test took %Ld nsecs\n", T1-T0);
}
-static unsigned long replay_repeat = 10;
-
static void __cmd_replay(void)
{
unsigned long i;
@@ -865,47 +869,8 @@ static struct trace_sched_handler replay_ops = {
.fork_event = replay_fork_event,
};
-#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
-
-enum thread_state {
- THREAD_SLEEPING = 0,
- THREAD_WAIT_CPU,
- THREAD_SCHED_IN,
- THREAD_IGNORE
-};
-
-struct work_atom {
- struct list_head list;
- enum thread_state state;
- u64 wake_up_time;
- u64 sched_in_time;
- u64 runtime;
-};
-
-struct task_atoms {
- struct list_head snapshot_list;
- struct thread *thread;
- struct rb_node node;
- u64 max_lat;
- u64 total_lat;
- u64 nb_atoms;
- u64 total_runtime;
-};
-
-typedef int (*sort_thread_lat)(struct task_atoms *, struct task_atoms *);
-
-struct sort_dimension {
- const char *name;
- sort_thread_lat cmp;
- struct list_head list;
-};
-
-static LIST_HEAD(cmp_pid);
-
-static struct rb_root lat_snapshot_root, sorted_lat_snapshot_root;
-
static struct task_atoms *
-thread_atom_list_search(struct rb_root *root, struct thread *thread)
+thread_atoms_search(struct rb_root *root, struct thread *thread)
{
struct rb_node *node = root->rb_node;
@@ -924,6 +889,14 @@ thread_atom_list_search(struct rb_root *root, struct thread *thread)
return NULL;
}
+struct sort_dimension {
+ const char *name;
+ sort_thread_lat cmp;
+ struct list_head list;
+};
+
+static LIST_HEAD(cmp_pid);
+
static int
thread_lat_cmp(struct list_head *list, struct task_atoms *l,
struct task_atoms *r)
@@ -965,16 +938,17 @@ __thread_latency_insert(struct rb_root *root, struct task_atoms *data,
rb_insert_color(&data->node, root);
}
-static void thread_atom_list_insert(struct thread *thread)
+static void thread_atoms_insert(struct thread *thread)
{
struct task_atoms *atoms;
+
atoms = calloc(sizeof(*atoms), 1);
if (!atoms)
die("No memory");
atoms->thread = thread;
- INIT_LIST_HEAD(&atoms->snapshot_list);
- __thread_latency_insert(&lat_snapshot_root, atoms, &cmp_pid);
+ INIT_LIST_HEAD(&atoms->atom_list);
+ __thread_latency_insert(&atom_root, atoms, &cmp_pid);
}
static void
@@ -1001,50 +975,49 @@ lat_sched_out(struct task_atoms *atoms,
u64 delta,
u64 timestamp)
{
- struct work_atom *snapshot;
+ struct work_atom *atom;
- snapshot = calloc(sizeof(*snapshot), 1);
- if (!snapshot)
+ atom = calloc(sizeof(*atom), 1);
+ if (!atom)
die("Non memory");
if (sched_out_state(switch_event) == 'R') {
- snapshot->state = THREAD_WAIT_CPU;
- snapshot->wake_up_time = timestamp;
+ atom->state = THREAD_WAIT_CPU;
+ atom->wake_up_time = timestamp;
}
- snapshot->runtime = delta;
- list_add_tail(&snapshot->list, &atoms->snapshot_list);
+ atom->runtime = delta;
+ list_add_tail(&atom->list, &atoms->atom_list);
}
static void
lat_sched_in(struct task_atoms *atoms, u64 timestamp)
{
- struct work_atom *snapshot;
+ struct work_atom *atom;
u64 delta;
- if (list_empty(&atoms->snapshot_list))
+ if (list_empty(&atoms->atom_list))
return;
- snapshot = list_entry(atoms->snapshot_list.prev, struct work_atom,
- list);
+ atom = list_entry(atoms->atom_list.prev, struct work_atom, list);
- if (snapshot->state != THREAD_WAIT_CPU)
+ if (atom->state != THREAD_WAIT_CPU)
return;
- if (timestamp < snapshot->wake_up_time) {
- snapshot->state = THREAD_IGNORE;
+ if (timestamp < atom->wake_up_time) {
+ atom->state = THREAD_IGNORE;
return;
}
- snapshot->state = THREAD_SCHED_IN;
- snapshot->sched_in_time = timestamp;
+ atom->state = THREAD_SCHED_IN;
+ atom->sched_in_time = timestamp;
- delta = snapshot->sched_in_time - snapshot->wake_up_time;
+ delta = atom->sched_in_time - atom->wake_up_time;
atoms->total_lat += delta;
if (delta > atoms->max_lat)
atoms->max_lat = delta;
atoms->nb_atoms++;
- atoms->total_runtime += snapshot->runtime;
+ atoms->total_runtime += atom->runtime;
}
static void
@@ -1076,20 +1049,20 @@ latency_switch_event(struct trace_switch_event *switch_event,
sched_out = threads__findnew(switch_event->prev_pid, &threads, &last_match);
sched_in = threads__findnew(switch_event->next_pid, &threads, &last_match);
- in_atoms = thread_atom_list_search(&lat_snapshot_root, sched_in);
+ in_atoms = thread_atoms_search(&atom_root, sched_in);
if (!in_atoms) {
- thread_atom_list_insert(sched_in);
- in_atoms = thread_atom_list_search(&lat_snapshot_root, sched_in);
+ thread_atoms_insert(sched_in);
+ in_atoms = thread_atoms_search(&atom_root, sched_in);
if (!in_atoms)
- die("Internal latency tree error");
+ die("in-atom: Internal tree error");
}
- out_atoms = thread_atom_list_search(&lat_snapshot_root, sched_out);
+ out_atoms = thread_atoms_search(&atom_root, sched_out);
if (!out_atoms) {
- thread_atom_list_insert(sched_out);
- out_atoms = thread_atom_list_search(&lat_snapshot_root, sched_out);
+ thread_atoms_insert(sched_out);
+ out_atoms = thread_atoms_search(&atom_root, sched_out);
if (!out_atoms)
- die("Internal latency tree error");
+ die("out-atom: Internal tree error");
}
lat_sched_in(in_atoms, timestamp);
@@ -1104,7 +1077,7 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
struct thread *thread __used)
{
struct task_atoms *atoms;
- struct work_atom *snapshot;
+ struct work_atom *atom;
struct thread *wakee;
/* Note for later, it may be interesting to observe the failing cases */
@@ -1112,23 +1085,22 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event,
return;
wakee = threads__findnew(wakeup_event->pid, &threads, &last_match);
- atoms = thread_atom_list_search(&lat_snapshot_root, wakee);
+ atoms = thread_atoms_search(&atom_root, wakee);
if (!atoms) {
- thread_atom_list_insert(wakee);
+ thread_atoms_insert(wakee);
return;
}
- if (list_empty(&atoms->snapshot_list))
+ if (list_empty(&atoms->atom_list))
return;
- snapshot = list_entry(atoms->snapshot_list.prev, struct work_atom,
- list);
+ atom = list_entry(atoms->atom_list.prev, struct work_atom, list);
- if (snapshot->state != THREAD_SLEEPING)
+ if (atom->state != THREAD_SLEEPING)
return;
- snapshot->state = THREAD_WAIT_CPU;
- snapshot->wake_up_time = timestamp;
+ atom->state = THREAD_WAIT_CPU;
+ atom->wake_up_time = timestamp;
}
static struct trace_sched_handler lat_ops = {
@@ -1137,9 +1109,6 @@ static struct trace_sched_handler lat_ops = {
.fork_event = latency_fork_event,
};
-static u64 all_runtime;
-static u64 all_count;
-
static void output_lat_thread(struct task_atoms *atom_list)
{
int i;
@@ -1287,13 +1256,13 @@ static void sort_lat(void)
for (;;) {
struct task_atoms *data;
- node = rb_first(&lat_snapshot_root);
+ node = rb_first(&atom_root);
if (!node)
break;
- rb_erase(node, &lat_snapshot_root);
+ rb_erase(node, &atom_root);
data = rb_entry(node, struct task_atoms, node);
- __thread_latency_insert(&sorted_lat_snapshot_root, data, &sort_list);
+ __thread_latency_insert(&sorted_atom_root, data, &sort_list);
}
}
@@ -1309,7 +1278,7 @@ static void __cmd_lat(void)
printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms |\n");
printf("-----------------------------------------------------------------------------------\n");
- next = rb_first(&sorted_lat_snapshot_root);
+ next = rb_first(&sorted_atom_root);
while (next) {
struct task_atoms *atom_list;