bpf: Support __kptr to local kptrs
authorDave Marchevsky <davemarchevsky@fb.com>
Fri, 10 Mar 2023 23:07:41 +0000 (15:07 -0800)
committerAlexei Starovoitov <ast@kernel.org>
Sat, 11 Mar 2023 00:38:05 +0000 (16:38 -0800)
If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module
BTF - it must have been allocated by bpf_obj_new and therefore must be
free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local
kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new.

This patch adds support for treating __kptr-tagged pointers to "local
kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr
acquire / release semantics. Consider the following example:

  struct node_data {
          long key;
          long data;
          struct bpf_rb_node node;
  };

  struct map_value {
          struct node_data __kptr *node;
  };

  struct {
          __uint(type, BPF_MAP_TYPE_ARRAY);
          __type(key, int);
          __type(value, struct map_value);
          __uint(max_entries, 1);
  } some_nodes SEC(".maps");

If struct node_data had a matching definition in kernel BTF, the verifier would
expect a destructor for the type to be registered. Since struct node_data does
not match any type in kernel BTF, the verifier knows that there is no kfunc
that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can
only come from bpf_obj_new. So instead of searching for a registered dtor,
a bpf_obj_drop dtor can be assumed.

This allows the runtime to properly destruct such kptrs in
bpf_obj_free_fields, which enables maps to clean up map_vals w/ such
kptrs when going away.

Implementation notes:
  * "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr.
    Before this patch, the variable would only ever point to vmlinux or
    module BTFs, but now it can point to some program BTF for local kptr
    type. It's later used to populate the (btf, btf_id) pair in kptr btf
    field.
  * It's necessary to btf_get the program BTF when populating btf_field
    for local kptr. btf_record_free later does a btf_put.
  * Behavior for non-local referenced kptrs is not modified, as
    bpf_find_btf_id helper only searches vmlinux and module BTFs for
    matching BTF type. If such a type is found, btf_field_kptr's btf will
    pass btf_is_kernel check, and the associated release function is
    some one-argument dtor. If btf_is_kernel check fails, associated
    release function is two-arg bpf_obj_drop_impl. Before this patch
    only btf_field_kptr's w/ kernel or module BTFs were created.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230310230743.2320707-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
include/linux/bpf.h
include/linux/btf.h
kernel/bpf/btf.c
kernel/bpf/helpers.c
kernel/bpf/syscall.c

index 3a38db315f7f6c9657bfac423e9b18abfa596155..756b85f0d0d38c56624ff1543fadc76ac1ee3310 100644 (file)
@@ -189,10 +189,19 @@ enum btf_field_type {
                                 BPF_RB_NODE | BPF_RB_ROOT,
 };
 
+typedef void (*btf_dtor_kfunc_t)(void *);
+typedef void (*btf_dtor_obj_drop)(void *, const struct btf_record *);
+
 struct btf_field_kptr {
        struct btf *btf;
        struct module *module;
-       btf_dtor_kfunc_t dtor;
+       union {
+               /* dtor used if btf_is_kernel(btf), otherwise the type
+                * is program-allocated and obj_drop is used
+                */
+               btf_dtor_kfunc_t dtor;
+               btf_dtor_obj_drop obj_drop;
+       };
        u32 btf_id;
 };
 
index 1bba0827e8c46a6eaa4b26daca1511c35204d39e..d53b10cc55f2c2e3c94ebf012a78fbd10312f898 100644 (file)
@@ -121,8 +121,6 @@ struct btf_struct_metas {
        struct btf_struct_meta types[];
 };
 
-typedef void (*btf_dtor_kfunc_t)(void *);
-
 extern const struct file_operations btf_fops;
 
 void btf_get(struct btf *btf);
index 37779ceefd09f21aee18f0dfa8ab3f96e18a4013..66fad7a16b6cf69bda212d3b58688f5004c08136 100644 (file)
@@ -3551,12 +3551,17 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
        return -EINVAL;
 }
 
+extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec);
+
 static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
                          struct btf_field_info *info)
 {
        struct module *mod = NULL;
        const struct btf_type *t;
-       struct btf *kernel_btf;
+       /* If a matching btf type is found in kernel or module BTFs, kptr_ref
+        * is that BTF, otherwise it's program BTF
+        */
+       struct btf *kptr_btf;
        int ret;
        s32 id;
 
@@ -3565,7 +3570,20 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
         */
        t = btf_type_by_id(btf, info->kptr.type_id);
        id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info),
-                            &kernel_btf);
+                            &kptr_btf);
+       if (id == -ENOENT) {
+               /* btf_parse_kptr should only be called w/ btf = program BTF */
+               WARN_ON_ONCE(btf_is_kernel(btf));
+
+               /* Type exists only in program BTF. Assume that it's a MEM_ALLOC
+                * kptr allocated via bpf_obj_new
+                */
+               field->kptr.dtor = (void *)&__bpf_obj_drop_impl;
+               id = info->kptr.type_id;
+               kptr_btf = (struct btf *)btf;
+               btf_get(kptr_btf);
+               goto found_dtor;
+       }
        if (id < 0)
                return id;
 
@@ -3582,20 +3600,20 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
                 * can be used as a referenced pointer and be stored in a map at
                 * the same time.
                 */
-               dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id);
+               dtor_btf_id = btf_find_dtor_kfunc(kptr_btf, id);
                if (dtor_btf_id < 0) {
                        ret = dtor_btf_id;
                        goto end_btf;
                }
 
-               dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id);
+               dtor_func = btf_type_by_id(kptr_btf, dtor_btf_id);
                if (!dtor_func) {
                        ret = -ENOENT;
                        goto end_btf;
                }
 
-               if (btf_is_module(kernel_btf)) {
-                       mod = btf_try_get_module(kernel_btf);
+               if (btf_is_module(kptr_btf)) {
+                       mod = btf_try_get_module(kptr_btf);
                        if (!mod) {
                                ret = -ENXIO;
                                goto end_btf;
@@ -3605,7 +3623,7 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
                /* We already verified dtor_func to be btf_type_is_func
                 * in register_btf_id_dtor_kfuncs.
                 */
-               dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off);
+               dtor_func_name = __btf_name_by_offset(kptr_btf, dtor_func->name_off);
                addr = kallsyms_lookup_name(dtor_func_name);
                if (!addr) {
                        ret = -EINVAL;
@@ -3614,14 +3632,15 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
                field->kptr.dtor = (void *)addr;
        }
 
+found_dtor:
        field->kptr.btf_id = id;
-       field->kptr.btf = kernel_btf;
+       field->kptr.btf = kptr_btf;
        field->kptr.module = mod;
        return 0;
 end_mod:
        module_put(mod);
 end_btf:
-       btf_put(kernel_btf);
+       btf_put(kptr_btf);
        return ret;
 }
 
index f9b7eeedce08bde1b1989f9eebc1a4db084628f8..77d64b6951b9c9cc94ece9d249abf1d4e2457adc 100644 (file)
@@ -1896,14 +1896,19 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
        return p;
 }
 
+void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
+{
+       if (rec)
+               bpf_obj_free_fields(rec, p);
+       bpf_mem_free(&bpf_global_ma, p);
+}
+
 __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
 {
        struct btf_struct_meta *meta = meta__ign;
        void *p = p__alloc;
 
-       if (meta)
-               bpf_obj_free_fields(meta->record, p);
-       bpf_mem_free(&bpf_global_ma, p);
+       __bpf_obj_drop_impl(p, meta ? meta->record : NULL);
 }
 
 static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail)
index cc4b7684910cb3d2ee529aad83c0fc55311048ce..0684febc447a8ad6d4dce706f84dab2bc520caf6 100644 (file)
@@ -659,8 +659,10 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
                return;
        fields = rec->fields;
        for (i = 0; i < rec->cnt; i++) {
+               struct btf_struct_meta *pointee_struct_meta;
                const struct btf_field *field = &fields[i];
                void *field_ptr = obj + field->offset;
+               void *xchgd_field;
 
                switch (fields[i].type) {
                case BPF_SPIN_LOCK:
@@ -672,7 +674,17 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
                        WRITE_ONCE(*(u64 *)field_ptr, 0);
                        break;
                case BPF_KPTR_REF:
-                       field->kptr.dtor((void *)xchg((unsigned long *)field_ptr, 0));
+                       xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0);
+                       if (!btf_is_kernel(field->kptr.btf)) {
+                               pointee_struct_meta = btf_find_struct_meta(field->kptr.btf,
+                                                                          field->kptr.btf_id);
+                               WARN_ON_ONCE(!pointee_struct_meta);
+                               field->kptr.obj_drop(xchgd_field, pointee_struct_meta ?
+                                                                 pointee_struct_meta->record :
+                                                                 NULL);
+                       } else {
+                               field->kptr.dtor(xchgd_field);
+                       }
                        break;
                case BPF_LIST_HEAD:
                        if (WARN_ON_ONCE(rec->spin_lock_off < 0))