perf callchain: Make brtype_stat in callchain_list optional
authorIan Rogers <irogers@google.com>
Tue, 24 Oct 2023 22:23:12 +0000 (15:23 -0700)
committerNamhyung Kim <namhyung@kernel.org>
Wed, 25 Oct 2023 20:39:08 +0000 (13:39 -0700)
struct callchain_list is 352bytes in size, 232 of which are
brtype_stat. brtype_stat is only used for certain callchain_list
items so make it optional, allocating when necessary. So that
printing doesn't need to deal with an optional brtype_stat, pass
an empty/zero version.

Before:
```
struct callchain_list {
        u64                        ip;                   /*     0     8 */
        struct map_symbol          ms;                   /*     8    24 */
        struct {
                _Bool              unfolded;             /*    32     1 */
                _Bool              has_children;         /*    33     1 */
        };                                               /*    32     2 */

        /* XXX 6 bytes hole, try to pack */

        u64                        branch_count;         /*    40     8 */
        u64                        from_count;           /*    48     8 */
        u64                        predicted_count;      /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        u64                        abort_count;          /*    64     8 */
        u64                        cycles_count;         /*    72     8 */
        u64                        iter_count;           /*    80     8 */
        u64                        iter_cycles;          /*    88     8 */
        struct branch_type_stat    brtype_stat;          /*    96   232 */
        /* --- cacheline 5 boundary (320 bytes) was 8 bytes ago --- */
        const char  *              srcline;              /*   328     8 */
        struct list_head           list;                 /*   336    16 */

        /* size: 352, cachelines: 6, members: 13 */
        /* sum members: 346, holes: 1, sum holes: 6 */
        /* last cacheline: 32 bytes */
};
```

After:
```
struct callchain_list {
        u64                        ip;                   /*     0     8 */
        struct map_symbol          ms;                   /*     8    24 */
        struct {
                _Bool              unfolded;             /*    32     1 */
                _Bool              has_children;         /*    33     1 */
        };                                               /*    32     2 */

        /* XXX 6 bytes hole, try to pack */

        u64                        branch_count;         /*    40     8 */
        u64                        from_count;           /*    48     8 */
        u64                        predicted_count;      /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        u64                        abort_count;          /*    64     8 */
        u64                        cycles_count;         /*    72     8 */
        u64                        iter_count;           /*    80     8 */
        u64                        iter_cycles;          /*    88     8 */
        struct branch_type_stat *  brtype_stat;          /*    96     8 */
        const char  *              srcline;              /*   104     8 */
        struct list_head           list;                 /*   112    16 */

        /* size: 128, cachelines: 2, members: 13 */
        /* sum members: 122, holes: 1, sum holes: 6 */
};
```

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: James Clark <james.clark@arm.com>
Cc: Nick Terrell <terrelln@fb.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Changbin Du <changbin.du@huawei.com>
Cc: liuwenyu <liuwenyu7@huawei.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Song Liu <song@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Yanteng Si <siyanteng@loongson.cn>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lore.kernel.org/r/20231024222353.3024098-10-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
tools/perf/util/callchain.c
tools/perf/util/callchain.h

index cde4860e6f2817a0294ef8095e9e82356d19e40a..5349c6a218491df61804ce4b6c9a0af12be34453 100644 (file)
@@ -586,7 +586,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
                call = zalloc(sizeof(*call));
                if (!call) {
                        perror("not enough memory for the code path tree");
-                       return -1;
+                       return -ENOMEM;
                }
                call->ip = cursor_node->ip;
                call->ms = cursor_node->ms;
@@ -602,7 +602,15 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
                                 * branch_from is set with value somewhere else
                                 * to imply it's "to" of a branch.
                                 */
-                               call->brtype_stat.branch_to = true;
+                               if (!call->brtype_stat) {
+                                       call->brtype_stat = zalloc(sizeof(*call->brtype_stat));
+                                       if (!call->brtype_stat) {
+                                               perror("not enough memory for the code path branch statisitcs");
+                                               free(call->brtype_stat);
+                                               return -ENOMEM;
+                                       }
+                               }
+                               call->brtype_stat->branch_to = true;
 
                                if (cursor_node->branch_flags.predicted)
                                        call->predicted_count = 1;
@@ -610,7 +618,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
                                if (cursor_node->branch_flags.abort)
                                        call->abort_count = 1;
 
-                               branch_type_count(&call->brtype_stat,
+                               branch_type_count(call->brtype_stat,
                                                  &cursor_node->branch_flags,
                                                  cursor_node->branch_from,
                                                  cursor_node->ip);
@@ -618,7 +626,8 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
                                /*
                                 * It's "from" of a branch
                                 */
-                               call->brtype_stat.branch_to = false;
+                               if (call->brtype_stat && call->brtype_stat->branch_to)
+                                       call->brtype_stat->branch_to = false;
                                call->cycles_count =
                                        cursor_node->branch_flags.cycles;
                                call->iter_count = cursor_node->nr_loop_iter;
@@ -652,6 +661,7 @@ add_child(struct callchain_node *parent,
                        list_del_init(&call->list);
                        map__zput(call->ms.map);
                        maps__zput(call->ms.maps);
+                       zfree(&call->brtype_stat);
                        free(call);
                }
                free(new);
@@ -762,7 +772,14 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
                        /*
                         * It's "to" of a branch
                         */
-                       cnode->brtype_stat.branch_to = true;
+                       if (!cnode->brtype_stat) {
+                               cnode->brtype_stat = zalloc(sizeof(*cnode->brtype_stat));
+                               if (!cnode->brtype_stat) {
+                                       perror("not enough memory for the code path branch statisitcs");
+                                       return MATCH_ERROR;
+                               }
+                       }
+                       cnode->brtype_stat->branch_to = true;
 
                        if (node->branch_flags.predicted)
                                cnode->predicted_count++;
@@ -770,7 +787,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
                        if (node->branch_flags.abort)
                                cnode->abort_count++;
 
-                       branch_type_count(&cnode->brtype_stat,
+                       branch_type_count(cnode->brtype_stat,
                                          &node->branch_flags,
                                          node->branch_from,
                                          node->ip);
@@ -778,7 +795,8 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
                        /*
                         * It's "from" of a branch
                         */
-                       cnode->brtype_stat.branch_to = false;
+                       if (cnode->brtype_stat && cnode->brtype_stat->branch_to)
+                               cnode->brtype_stat->branch_to = false;
                        cnode->cycles_count += node->branch_flags.cycles;
                        cnode->iter_count += node->nr_loop_iter;
                        cnode->iter_cycles += node->iter_cycles;
@@ -1026,6 +1044,7 @@ merge_chain_branch(struct callchain_cursor *cursor,
                maps__zput(ms.maps);
                map__zput(list->ms.map);
                maps__zput(list->ms.maps);
+               zfree(&list->brtype_stat);
                free(list);
        }
 
@@ -1447,11 +1466,14 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
 int callchain_list_counts__printf_value(struct callchain_list *clist,
                                        FILE *fp, char *bf, int bfsize)
 {
+       static const struct branch_type_stat empty_brtype_stat = {};
+       const struct branch_type_stat *brtype_stat;
        u64 branch_count, predicted_count;
        u64 abort_count, cycles_count;
        u64 iter_count, iter_cycles;
        u64 from_count;
 
+       brtype_stat = clist->brtype_stat ?: &empty_brtype_stat;
        branch_count = clist->branch_count;
        predicted_count = clist->predicted_count;
        abort_count = clist->abort_count;
@@ -1463,7 +1485,7 @@ int callchain_list_counts__printf_value(struct callchain_list *clist,
        return callchain_counts_printf(fp, bf, bfsize, branch_count,
                                       predicted_count, abort_count,
                                       cycles_count, iter_count, iter_cycles,
-                                      from_count, &clist->brtype_stat);
+                                      from_count, brtype_stat);
 }
 
 static void free_callchain_node(struct callchain_node *node)
@@ -1476,6 +1498,7 @@ static void free_callchain_node(struct callchain_node *node)
                list_del_init(&list->list);
                map__zput(list->ms.map);
                maps__zput(list->ms.maps);
+               zfree(&list->brtype_stat);
                free(list);
        }
 
@@ -1483,6 +1506,7 @@ static void free_callchain_node(struct callchain_node *node)
                list_del_init(&list->list);
                map__zput(list->ms.map);
                maps__zput(list->ms.maps);
+               zfree(&list->brtype_stat);
                free(list);
        }
 
@@ -1569,6 +1593,7 @@ out:
                list_del_init(&chain->list);
                map__zput(chain->ms.map);
                maps__zput(chain->ms.maps);
+               zfree(&chain->brtype_stat);
                free(chain);
        }
        return -ENOMEM;
index d2618a47deca8f27a5d94350443cc53f69c864d8..86e8a9e8145698767e091dce22b5b51dbec84fb8 100644 (file)
@@ -129,7 +129,7 @@ struct callchain_list {
        u64                     cycles_count;
        u64                     iter_count;
        u64                     iter_cycles;
-       struct branch_type_stat brtype_stat;
+       struct branch_type_stat *brtype_stat;
        const char              *srcline;
        struct list_head        list;
 };