perf cs_etm: Set the time field in the synthetic samples

author German Gomez <german.gomez@arm.com>

Fri, 20 Jan 2023 14:37:00 +0000 (14:37 +0000)

committer Arnaldo Carvalho de Melo <acme@redhat.com>

Sun, 22 Jan 2023 21:17:45 +0000 (18:17 -0300)
author German Gomez <german.gomez@arm.com>
Fri, 20 Jan 2023 14:37:00 +0000 (14:37 +0000)
committer Arnaldo Carvalho de Melo <acme@redhat.com>
Sun, 22 Jan 2023 21:17:45 +0000 (18:17 -0300)
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c

index 31fa3b45134a25fb047594e64b907e2648e94702..440fe844ed17a5ccb0f6a361690969952ce4c081 100644 (file)
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -30,6 +30,15 @@
  #endif
  #endif
  
+/*
+ * Assume a maximum of 0.1ns elapsed per instruction. This would be the
+ * case with a theoretical 10GHz core executing 1 instruction per cycle.
+ * Used to estimate the sample time for synthesized instructions because
+ * Coresight only emits a timestamp for a range of instructions rather
+ * than per instruction.
+ */
+const u32 INSTR_PER_NS = 10;
+
  struct cs_etm_decoder {
         void *data;
         void (*packet_printer)(const char *msg);
@@ -112,6 +121,20 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
         return 1;
  }
  
+/*
+ * Calculate the number of nanoseconds elapsed.
+ *
+ * instr_count is updated in place with the remainder of the instructions
+ * which didn't make up a whole nanosecond.
+ */
+static u32 cs_etm_decoder__dec_instr_count_to_ns(u32 *instr_count)
+{
+       const u32 instr_copy = *instr_count;
+
+       *instr_count %= INSTR_PER_NS;
+       return instr_copy / INSTR_PER_NS;
+}
+
  static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params,
                                             ocsd_etmv3_cfg *config)
  {
@@ -267,8 +290,8 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
         packet_queue->cs_timestamp = packet_queue->next_cs_timestamp;
  
         /* Estimate the timestamp for the next range packet */
-       packet_queue->next_cs_timestamp += packet_queue->instr_count;
-       packet_queue->instr_count = 0;
+       packet_queue->next_cs_timestamp +=
+               cs_etm_decoder__dec_instr_count_to_ns(&packet_queue->instr_count);
  
         /* Tell the front end which traceid_queue needs attention */
         cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
@@ -283,24 +306,31 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
                                   const ocsd_trc_index_t indx)
  {
         struct cs_etm_packet_queue *packet_queue;
+       u64 converted_timestamp;
  
         /* First get the packet queue for this traceID */
         packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
         if (!packet_queue)
                 return OCSD_RESP_FATAL_SYS_ERR;
  
+       /*
+        * Coresight timestamps are raw timer values which need to be scaled to ns. Assume
+        * 0 is a bad value so don't try to convert it.
+        */
+       converted_timestamp = elem->timestamp ?
+                               cs_etm__convert_sample_time(etmq, elem->timestamp) : 0;
+
         /*
          * We've seen a timestamp packet before - simply record the new value.
          * Function do_soft_timestamp() will report the value to the front end,
          * hence asking the decoder to keep decoding rather than stopping.
          */
         if (packet_queue->cs_timestamp) {
-               packet_queue->next_cs_timestamp = elem->timestamp;
+               packet_queue->next_cs_timestamp = converted_timestamp;
                 return OCSD_RESP_CONT;
         }
  
-
-       if (!elem->timestamp) {
+       if (!converted_timestamp) {
                 /*
                  * Zero timestamps can be seen due to misconfiguration or hardware bugs.
                  * Warn once, and don't try to subtract instr_count as it would result in an
@@ -312,7 +342,7 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
                                         ". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n",
                                         indx);
  
-       } else if (packet_queue->instr_count > elem->timestamp) {
+       } else if (packet_queue->instr_count / INSTR_PER_NS > converted_timestamp) {
                 /*
                  * Sanity check that the elem->timestamp - packet_queue->instr_count would not
                  * result in an underflow. Warn and clamp at 0 if it would.
@@ -327,9 +357,10 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
                  * which instructions started by subtracting the number of instructions
                  * executed to the timestamp.
                  */
-               packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
+               packet_queue->cs_timestamp = converted_timestamp -
+                                               (packet_queue->instr_count / INSTR_PER_NS);
         }
-       packet_queue->next_cs_timestamp = elem->timestamp;
+       packet_queue->next_cs_timestamp = converted_timestamp;
         packet_queue->instr_count = 0;
  
         /* Tell the front end which traceid_queue needs attention */
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c

index 879576d5f89975ca35a6f8a9a222ddcc6e902a88..f65bac5ddbdb6bb7b700d3aaeb4d6707510b15cc 100644 (file)
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -35,6 +35,7 @@
  #include "tool.h"
  #include "thread.h"
  #include "thread-stack.h"
+#include "tsc.h"
  #include <tools/libc_compat.h>
  #include "util/synthetic-events.h"
  
@@ -46,10 +47,12 @@ struct cs_etm_auxtrace {
         struct perf_session *session;
         struct machine *machine;
         struct thread *unknown_thread;
+       struct perf_tsc_conversion tc;
  
         u8 timeless_decoding;
         u8 snapshot_mode;
         u8 data_queued;
+       u8 has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
  
         int num_cpu;
         u64 latest_kernel_timestamp;
@@ -1161,6 +1164,30 @@ static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
                            sample->insn_len, (void *)sample->insn);
  }
  
+u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
+{
+       struct cs_etm_auxtrace *etm = etmq->etm;
+
+       if (etm->has_virtual_ts)
+               return tsc_to_perf_time(cs_timestamp, &etm->tc);
+       else
+               return cs_timestamp;
+}
+
+static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
+                                              struct cs_etm_traceid_queue *tidq)
+{
+       struct cs_etm_auxtrace *etm = etmq->etm;
+       struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
+
+       if (etm->timeless_decoding)
+               return 0;
+       else if (etm->has_virtual_ts)
+               return packet_queue->cs_timestamp;
+       else
+               return etm->latest_kernel_timestamp;
+}
+
  static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
                                             struct cs_etm_traceid_queue *tidq,
                                             u64 addr, u64 period)
@@ -1174,8 +1201,9 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
         event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
         event->sample.header.size = sizeof(struct perf_event_header);
  
-       if (!etm->timeless_decoding)
-               sample.time = etm->latest_kernel_timestamp;
+       /* Set time field based on etm auxtrace config. */
+       sample.time = cs_etm__resolve_sample_time(etmq, tidq);
+
         sample.ip = addr;
         sample.pid = tidq->pid;
         sample.tid = tidq->tid;
@@ -1232,8 +1260,9 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
         event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
         event->sample.header.size = sizeof(struct perf_event_header);
  
-       if (!etm->timeless_decoding)
-               sample.time = etm->latest_kernel_timestamp;
+       /* Set time field based on etm auxtrace config. */
+       sample.time = cs_etm__resolve_sample_time(etmq, tidq);
+
         sample.ip = ip;
         sample.pid = tidq->pid;
         sample.tid = tidq->tid;
@@ -2746,12 +2775,42 @@ static int cs_etm__queue_aux_records(struct perf_session *session)
         return 0;
  }
  
+#define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
+                                 (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
+
+/*
+ * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
+ * timestamps).
+ */
+static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
+{
+       int j;
+
+       for (j = 0; j < num_cpu; j++) {
+               switch (metadata[j][CS_ETM_MAGIC]) {
+               case __perf_cs_etmv4_magic:
+                       if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
+                               return false;
+                       break;
+               case __perf_cs_ete_magic:
+                       if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
+                               return false;
+                       break;
+               default:
+                       /* Unknown / unsupported magic number. */
+                       return false;
+               }
+       }
+       return true;
+}
+
  int cs_etm__process_auxtrace_info_full(union perf_event *event,
                                        struct perf_session *session)
  {
         struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
         struct cs_etm_auxtrace *etm = NULL;
         struct int_node *inode;
+       struct perf_record_time_conv *tc = &session->time_conv;
         int event_header_size = sizeof(struct perf_event_header);
         int total_size = auxtrace_info->header.size;
         int priv_size = 0;
@@ -2886,6 +2945,13 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
         etm->auxtrace_type = auxtrace_info->type;
         etm->timeless_decoding = cs_etm__is_timeless_decoding(etm);
  
+       /* Use virtual timestamps if all ETMs report ts_source = 1 */
+       etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
+
+       if (!etm->has_virtual_ts)
+               ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
+                           "The time field of the samples will not be set accurately.\n\n");
+
         etm->auxtrace.process_event = cs_etm__process_event;
         etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
         etm->auxtrace.flush_events = cs_etm__flush_events;
@@ -2915,6 +2981,15 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
                 goto err_delete_thread;
         }
  
+       etm->tc.time_shift = tc->time_shift;
+       etm->tc.time_mult = tc->time_mult;
+       etm->tc.time_zero = tc->time_zero;
+       if (event_contains(*tc, time_cycles)) {
+               etm->tc.time_cycles = tc->time_cycles;
+               etm->tc.time_mask = tc->time_mask;
+               etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
+               etm->tc.cap_user_time_short = tc->cap_user_time_short;
+       }
         err = cs_etm__synth_events(etm, session);
         if (err)
                 goto err_delete_thread;
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h

index ad790930bcbc04f76f8ad517cbaa5c4964761942..98a4f7113d2f55810e761102657b7dcf0a467777 100644 (file)
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -192,7 +192,7 @@ struct cs_etm_packet_queue {
         u32 head;
         u32 tail;
         u32 instr_count;
-       u64 cs_timestamp;
+       u64 cs_timestamp; /* Timestamp from trace data, converted to ns if possible */
         u64 next_cs_timestamp;
         struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER];
  };
@@ -231,6 +231,7 @@ struct cs_etm_packet_queue
  *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id);
  int cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused,
                                        struct perf_session *session __maybe_unused);
+u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp);
  #else
  static inline int
  cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused,
author	German Gomez <german.gomez@arm.com>
	Fri, 20 Jan 2023 14:37:00 +0000 (14:37 +0000)
committer	Arnaldo Carvalho de Melo <acme@redhat.com>
	Sun, 22 Jan 2023 21:17:45 +0000 (18:17 -0300)
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c		patch \| blob \| history
tools/perf/util/cs-etm.c		patch \| blob \| history
tools/perf/util/cs-etm.h		patch \| blob \| history