bool use_thread_stack;
bool callstack;
unsigned int br_stack_sz;
+ unsigned int br_stack_sz_plus;
int have_sched_switch;
u32 pmu_type;
u64 kernel_start;
unsigned int range_cnt;
struct ip_callchain *chain;
+ struct branch_stack *br_stack;
};
enum switch_state {
sample->callchain = pt->chain;
}
+static struct branch_stack *intel_pt_alloc_br_stack(struct intel_pt *pt)
+{
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += pt->br_stack_sz * sizeof(struct branch_entry);
+ return zalloc(sz);
+}
+
+static int intel_pt_br_stack_init(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK))
+ evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ }
+
+ pt->br_stack = intel_pt_alloc_br_stack(pt);
+ if (!pt->br_stack)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void intel_pt_add_br_stack(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__findnew_thread(pt->machine,
+ sample->pid,
+ sample->tid);
+
+ thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack,
+ pt->br_stack_sz, sample->ip,
+ pt->kernel_start);
+
+ sample->branch_stack = pt->br_stack;
+}
+
static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
unsigned int queue_nr)
{
}
if (pt->synth_opts.last_branch) {
- size_t sz = sizeof(struct branch_stack);
-
- sz += pt->br_stack_sz * sizeof(struct branch_entry);
- ptq->last_branch = zalloc(sz);
+ ptq->last_branch = intel_pt_alloc_br_stack(pt);
if (!ptq->last_branch)
goto out_free;
}
thread_stack__event(ptq->thread, ptq->cpu, ptq->flags,
state->from_ip, state->to_ip, ptq->insn_len,
state->trace_nr, pt->callstack,
- pt->br_stack_sz,
+ pt->br_stack_sz_plus,
pt->mispred_all);
} else {
thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
if (event->header.type == PERF_RECORD_SAMPLE) {
if (pt->synth_opts.add_callchain && !sample->callchain)
intel_pt_add_callchain(pt, sample);
+ if (pt->synth_opts.add_last_branch && !sample->branch_stack)
+ intel_pt_add_br_stack(pt, sample);
}
if (event->header.type == PERF_RECORD_AUX &&
goto err_delete_thread;
}
- if (pt->synth_opts.last_branch)
+ if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) {
pt->br_stack_sz = pt->synth_opts.last_branch_sz;
+ pt->br_stack_sz_plus = pt->br_stack_sz;
+ }
+
+ if (pt->synth_opts.add_last_branch) {
+ err = intel_pt_br_stack_init(pt);
+ if (err)
+ goto err_delete_thread;
+ /*
+ * Additional branch stack size to cater for tracing from the
+ * actual sample ip to where the sample time is recorded.
+ * Measured at about 200 branches, but generously set to 1024.
+ * If kernel space is not being traced, then add just 1 for the
+ * branch to kernel space.
+ */
+ if (intel_pt_tracing_kernel(pt))
+ pt->br_stack_sz_plus += 1024;
+ else
+ pt->br_stack_sz_plus += 1;
+ }
pt->use_thread_stack = pt->synth_opts.callchain ||
pt->synth_opts.add_callchain ||
pt->synth_opts.thread_stack ||
- pt->synth_opts.last_branch;
+ pt->synth_opts.last_branch ||
+ pt->synth_opts.add_last_branch;
pt->callstack = pt->synth_opts.callchain ||
pt->synth_opts.add_callchain ||