struct perf_cpu_map *cpus;
struct perf_thread_map *threads;
enum aggr_mode aggr_mode;
+ u32 aggr_level;
};
static struct perf_stat perf_stat;
static struct perf_stat_config stat_config = {
.aggr_mode = AGGR_GLOBAL,
+ .aggr_level = MAX_CACHE_LVL + 1,
.scale = true,
.unit_width = 4, /* strlen("unit") */
.run_count = 1,
OPT_END()
};
+/**
+ * Calculate the cache instance ID from the map in
+ * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
+ * Cache instance ID is the first CPU reported in the shared_cpu_list file.
+ */
+static int cpu__get_cache_id_from_map(struct perf_cpu cpu, char *map)
+{
+ int id;
+ struct perf_cpu_map *cpu_map = perf_cpu_map__new(map);
+
+ /*
+ * If the map contains no CPU, consider the current CPU to
+ * be the first online CPU in the cache domain else use the
+ * first online CPU of the cache domain as the ID.
+ */
+ if (perf_cpu_map__empty(cpu_map))
+ id = cpu.cpu;
+ else
+ id = perf_cpu_map__cpu(cpu_map, 0).cpu;
+
+ /* Free the perf_cpu_map used to find the cache ID */
+ perf_cpu_map__put(cpu_map);
+
+ return id;
+}
+
+/**
+ * cpu__get_cache_id - Returns 0 if successful in populating the
+ * cache level and cache id. Cache level is read from
+ * /sys/devices/system/cpu/cpuX/cache/indexY/level where as cache instance ID
+ * is the first CPU reported by
+ * /sys/devices/system/cpu/cpuX/cache/indexY/shared_cpu_list
+ */
+static int cpu__get_cache_details(struct perf_cpu cpu, struct perf_cache *cache)
+{
+ int ret = 0;
+ u32 cache_level = stat_config.aggr_level;
+ struct cpu_cache_level caches[MAX_CACHE_LVL];
+ u32 i = 0, caches_cnt = 0;
+
+ cache->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level;
+ cache->cache = -1;
+
+ ret = build_caches_for_cpu(cpu.cpu, caches, &caches_cnt);
+ if (ret) {
+ /*
+ * If caches_cnt is not 0, cpu_cache_level data
+ * was allocated when building the topology.
+ * Free the allocated data before returning.
+ */
+ if (caches_cnt)
+ goto free_caches;
+
+ return ret;
+ }
+
+ if (!caches_cnt)
+ return -1;
+
+ /*
+ * Save the data for the highest level if no
+ * level was specified by the user.
+ */
+ if (cache_level > MAX_CACHE_LVL) {
+ int max_level_index = 0;
+
+ for (i = 1; i < caches_cnt; ++i) {
+ if (caches[i].level > caches[max_level_index].level)
+ max_level_index = i;
+ }
+
+ cache->cache_lvl = caches[max_level_index].level;
+ cache->cache = cpu__get_cache_id_from_map(cpu, caches[max_level_index].map);
+
+ /* Reset i to 0 to free entire caches[] */
+ i = 0;
+ goto free_caches;
+ }
+
+ for (i = 0; i < caches_cnt; ++i) {
+ if (caches[i].level == cache_level) {
+ cache->cache_lvl = cache_level;
+ cache->cache = cpu__get_cache_id_from_map(cpu, caches[i].map);
+ }
+
+ cpu_cache_level__free(&caches[i]);
+ }
+
+free_caches:
+ /*
+ * Free all the allocated cpu_cache_level data.
+ */
+ while (i < caches_cnt)
+ cpu_cache_level__free(&caches[i++]);
+
+ return ret;
+}
+
+/**
+ * aggr_cpu_id__cache - Create an aggr_cpu_id with cache instache ID, cache
+ * level, die and socket populated with the cache instache ID, cache level,
+ * die and socket for cpu. The function signature is compatible with
+ * aggr_cpu_id_get_t.
+ */
+static struct aggr_cpu_id aggr_cpu_id__cache(struct perf_cpu cpu, void *data)
+{
+ int ret;
+ struct aggr_cpu_id id;
+ struct perf_cache cache;
+
+ id = aggr_cpu_id__die(cpu, data);
+ if (aggr_cpu_id__is_empty(&id))
+ return id;
+
+ ret = cpu__get_cache_details(cpu, &cache);
+ if (ret)
+ return id;
+
+ id.cache_lvl = cache.cache_lvl;
+ id.cache = cache.cache;
+ return id;
+}
+
static const char *const aggr_mode__string[] = {
[AGGR_CORE] = "core",
+ [AGGR_CACHE] = "cache",
[AGGR_DIE] = "die",
[AGGR_GLOBAL] = "global",
[AGGR_NODE] = "node",
return aggr_cpu_id__die(cpu, /*data=*/NULL);
}
+static struct aggr_cpu_id perf_stat__get_cache_id(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return aggr_cpu_id__cache(cpu, /*data=*/NULL);
+}
+
static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
return perf_stat__get_aggr(config, perf_stat__get_die, cpu);
}
+static struct aggr_cpu_id perf_stat__get_cache_id_cached(struct perf_stat_config *config,
+ struct perf_cpu cpu)
+{
+ return perf_stat__get_aggr(config, perf_stat__get_cache_id, cpu);
+}
+
static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config,
struct perf_cpu cpu)
{
return aggr_cpu_id__socket;
case AGGR_DIE:
return aggr_cpu_id__die;
+ case AGGR_CACHE:
+ return aggr_cpu_id__cache;
case AGGR_CORE:
return aggr_cpu_id__core;
case AGGR_NODE:
return perf_stat__get_socket_cached;
case AGGR_DIE:
return perf_stat__get_die_cached;
+ case AGGR_CACHE:
+ return perf_stat__get_cache_id_cached;
case AGGR_CORE:
return perf_stat__get_core_cached;
case AGGR_NODE:
return id;
}
+static void perf_env__get_cache_id_for_cpu(struct perf_cpu cpu, struct perf_env *env,
+ u32 cache_level, struct aggr_cpu_id *id)
+{
+ int i;
+ int caches_cnt = env->caches_cnt;
+ struct cpu_cache_level *caches = env->caches;
+
+ id->cache_lvl = (cache_level > MAX_CACHE_LVL) ? 0 : cache_level;
+ id->cache = -1;
+
+ if (!caches_cnt)
+ return;
+
+ for (i = caches_cnt - 1; i > -1; --i) {
+ struct perf_cpu_map *cpu_map;
+ int map_contains_cpu;
+
+ /*
+ * If user has not specified a level, find the fist level with
+ * the cpu in the map. Since building the map is expensive, do
+ * this only if levels match.
+ */
+ if (cache_level <= MAX_CACHE_LVL && caches[i].level != cache_level)
+ continue;
+
+ cpu_map = perf_cpu_map__new(caches[i].map);
+ map_contains_cpu = perf_cpu_map__idx(cpu_map, cpu);
+ perf_cpu_map__put(cpu_map);
+
+ if (map_contains_cpu != -1) {
+ id->cache_lvl = caches[i].level;
+ id->cache = cpu__get_cache_id_from_map(cpu, caches[i].map);
+ return;
+ }
+ }
+}
+
+static struct aggr_cpu_id perf_env__get_cache_aggr_by_cpu(struct perf_cpu cpu,
+ void *data)
+{
+ struct perf_env *env = data;
+ struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+ if (cpu.cpu != -1) {
+ u32 cache_level = (perf_stat.aggr_level) ?: stat_config.aggr_level;
+
+ id.socket = env->cpu[cpu.cpu].socket_id;
+ id.die = env->cpu[cpu.cpu].die_id;
+ perf_env__get_cache_id_for_cpu(cpu, env, cache_level, &id);
+ }
+
+ return id;
+}
+
static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
+static struct aggr_cpu_id perf_stat__get_cache_file(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return perf_env__get_cache_aggr_by_cpu(cpu, &perf_stat.session->header.env);
+}
+
static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu cpu)
{
return perf_env__get_socket_aggr_by_cpu;
case AGGR_DIE:
return perf_env__get_die_aggr_by_cpu;
+ case AGGR_CACHE:
+ return perf_env__get_cache_aggr_by_cpu;
case AGGR_CORE:
return perf_env__get_core_aggr_by_cpu;
case AGGR_NODE:
return perf_stat__get_socket_file;
case AGGR_DIE:
return perf_stat__get_die_file;
+ case AGGR_CACHE:
+ return perf_stat__get_cache_file;
case AGGR_CORE:
return perf_stat__get_core_file;
case AGGR_NODE:
.stat = perf_event__process_stat_event,
.stat_round = process_stat_round_event,
},
- .aggr_mode = AGGR_UNSET,
+ .aggr_mode = AGGR_UNSET,
+ .aggr_level = 0,
};
static int __cmd_report(int argc, const char **argv)
static int aggr_header_lens[] = {
[AGGR_CORE] = 18,
+ [AGGR_CACHE] = 22,
[AGGR_DIE] = 12,
[AGGR_SOCKET] = 6,
[AGGR_NODE] = 6,
static const char *aggr_header_csv[] = {
[AGGR_CORE] = "core,cpus,",
+ [AGGR_CACHE] = "cache,cpus,",
[AGGR_DIE] = "die,cpus,",
[AGGR_SOCKET] = "socket,cpus,",
[AGGR_NONE] = "cpu,",
static const char *aggr_header_std[] = {
[AGGR_CORE] = "core",
+ [AGGR_CACHE] = "cache",
[AGGR_DIE] = "die",
[AGGR_SOCKET] = "socket",
[AGGR_NONE] = "cpu",
case AGGR_CORE:
snprintf(buf, sizeof(buf), "S%d-D%d-C%d", id.socket, id.die, id.core);
break;
+ case AGGR_CACHE:
+ snprintf(buf, sizeof(buf), "S%d-D%d-L%d-ID%d",
+ id.socket, id.die, id.cache_lvl, id.cache);
+ break;
case AGGR_DIE:
snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die);
break;
fprintf(output, "S%d-D%d-C%d%s%d%s",
id.socket, id.die, id.core, sep, aggr_nr, sep);
break;
+ case AGGR_CACHE:
+ fprintf(config->output, "S%d-D%d-L%d-ID%d%s%d%s",
+ id.socket, id.die, id.cache_lvl, id.cache, sep, aggr_nr, sep);
+ break;
case AGGR_DIE:
fprintf(output, "S%d-D%d%s%d%s",
id.socket, id.die, sep, aggr_nr, sep);
fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ",
id.socket, id.die, id.core, aggr_nr);
break;
+ case AGGR_CACHE:
+ fprintf(output, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d, ",
+ id.socket, id.die, id.cache_lvl, id.cache, aggr_nr);
+ break;
case AGGR_DIE:
fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ",
id.socket, id.die, aggr_nr);
case AGGR_NODE:
case AGGR_SOCKET:
case AGGR_DIE:
+ case AGGR_CACHE:
case AGGR_CORE:
fprintf(output, "#%*s %-*s cpus",
INTERVAL_LEN - 1, "time",
switch (config->aggr_mode) {
case AGGR_CORE:
+ case AGGR_CACHE:
case AGGR_DIE:
case AGGR_SOCKET:
case AGGR_NODE: