bpf: Add alloc/xchg/direct_access support for local percpu kptr
authorYonghong Song <yonghong.song@linux.dev>
Sun, 27 Aug 2023 15:27:44 +0000 (08:27 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 8 Sep 2023 15:42:17 +0000 (08:42 -0700)
Add two new kfunc's, bpf_percpu_obj_new_impl() and
bpf_percpu_obj_drop_impl(), to allocate a percpu obj.
Two functions are very similar to bpf_obj_new_impl()
and bpf_obj_drop_impl(). The major difference is related
to percpu handling.

    bpf_rcu_read_lock()
    struct val_t __percpu_kptr *v = map_val->percpu_data;
    ...
    bpf_rcu_read_unlock()

For a percpu data map_val like above 'v', the reg->type
is set as
PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU
if inside rcu critical section.

MEM_RCU marking here is similar to NON_OWN_REF as 'v'
is not a owning reference. But NON_OWN_REF is
trusted and typically inside the spinlock while
MEM_RCU is under rcu read lock. RCU is preferred here
since percpu data structures mean potential concurrent
access into its contents.

Also, bpf_percpu_obj_new_impl() is restricted such that
no pointers or special fields are allowed. Therefore,
the bpf_list_head and bpf_rb_root will not be supported
in this patch set to avoid potential memory leak issue
due to racing between bpf_obj_free_fields() and another
bpf_kptr_xchg() moving an allocated object to
bpf_list_head and bpf_rb_root.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
Link: https://lore.kernel.org/r/20230827152744.1996739-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
kernel/bpf/helpers.c
kernel/bpf/verifier.c

index 8bd3812fb8df44970256449e100d07079263988d..b0a9834f1051d16f08ab0abd6b576f7442ba4d63 100644 (file)
@@ -1902,6 +1902,14 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
        return p;
 }
 
+__bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+{
+       u64 size = local_type_id__k;
+
+       /* The verifier has ensured that meta__ign must be NULL */
+       return bpf_mem_alloc(&bpf_global_percpu_ma, size);
+}
+
 /* Must be called under migrate_disable(), as required by bpf_mem_free */
 void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
 {
@@ -1930,6 +1938,12 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
        __bpf_obj_drop_impl(p, meta ? meta->record : NULL);
 }
 
+__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
+{
+       /* The verifier has ensured that meta__ign must be NULL */
+       bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc);
+}
+
 __bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
 {
        struct btf_struct_meta *meta = meta__ign;
@@ -2442,7 +2456,9 @@ BTF_SET8_START(generic_btf_ids)
 BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
 #endif
 BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_list_push_front_impl)
 BTF_ID_FLAGS(func, bpf_list_push_back_impl)
index bb78212fa5b27305e6672931ae815def8e931fe9..6c886ead18f6be8aec56db7bdd2b205bd5e04cea 100644 (file)
@@ -304,7 +304,7 @@ struct bpf_kfunc_call_arg_meta {
        /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
         * generally to pass info about user-defined local kptr types to later
         * verification logic
-        *   bpf_obj_drop
+        *   bpf_obj_drop/bpf_percpu_obj_drop
         *     Record the local kptr type to be drop'd
         *   bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
         *     Record the local kptr type to be refcount_incr'd and use
@@ -5001,6 +5001,8 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
                        perm_flags |= PTR_UNTRUSTED;
        } else {
                perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
+               if (kptr_field->type == BPF_KPTR_PERCPU)
+                       perm_flags |= MEM_PERCPU;
        }
 
        if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
@@ -5044,7 +5046,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
         */
        if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
                                  kptr_field->kptr.btf, kptr_field->kptr.btf_id,
-                                 kptr_field->type == BPF_KPTR_REF))
+                                 kptr_field->type != BPF_KPTR_UNREF))
                goto bad_type;
        return 0;
 bad_type:
@@ -5088,7 +5090,18 @@ static bool rcu_safe_kptr(const struct btf_field *field)
 {
        const struct btf_field_kptr *kptr = &field->kptr;
 
-       return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id);
+       return field->type == BPF_KPTR_PERCPU ||
+              (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
+}
+
+static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
+{
+       if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
+               if (kptr_field->type != BPF_KPTR_PERCPU)
+                       return PTR_MAYBE_NULL | MEM_RCU;
+               return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU;
+       }
+       return PTR_MAYBE_NULL | PTR_UNTRUSTED;
 }
 
 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
@@ -5114,7 +5127,8 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
        /* We only allow loading referenced kptr, since it will be marked as
         * untrusted, similar to unreferenced kptr.
         */
-       if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) {
+       if (class != BPF_LDX &&
+           (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
                verbose(env, "store to referenced kptr disallowed\n");
                return -EACCES;
        }
@@ -5125,10 +5139,7 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
                 * value from map as PTR_TO_BTF_ID, with the correct type.
                 */
                mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
-                               kptr_field->kptr.btf_id,
-                               rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ?
-                               PTR_MAYBE_NULL | MEM_RCU :
-                               PTR_MAYBE_NULL | PTR_UNTRUSTED);
+                               kptr_field->kptr.btf_id, btf_ld_kptr_type(env, kptr_field));
                /* For mark_ptr_or_null_reg */
                val_reg->id = ++env->id_gen;
        } else if (class == BPF_STX) {
@@ -5182,6 +5193,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
                        switch (field->type) {
                        case BPF_KPTR_UNREF:
                        case BPF_KPTR_REF:
+                       case BPF_KPTR_PERCPU:
                                if (src != ACCESS_DIRECT) {
                                        verbose(env, "kptr cannot be accessed indirectly by helper\n");
                                        return -EACCES;
@@ -7320,7 +7332,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
                verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
                return -EACCES;
        }
-       if (kptr_field->type != BPF_KPTR_REF) {
+       if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
                verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
                return -EACCES;
        }
@@ -7831,8 +7843,10 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
        if (base_type(arg_type) == ARG_PTR_TO_MEM)
                type &= ~DYNPTR_TYPE_FLAG_MASK;
 
-       if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type))
+       if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) {
                type &= ~MEM_ALLOC;
+               type &= ~MEM_PERCPU;
+       }
 
        for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
                expected = compatible->types[i];
@@ -7915,6 +7929,7 @@ found:
                break;
        }
        case PTR_TO_BTF_ID | MEM_ALLOC:
+       case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
                if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
                    meta->func_id != BPF_FUNC_kptr_xchg) {
                        verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
@@ -9882,8 +9897,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
                if (func_id == BPF_FUNC_kptr_xchg) {
                        ret_btf = meta.kptr_field->kptr.btf;
                        ret_btf_id = meta.kptr_field->kptr.btf_id;
-                       if (!btf_is_kernel(ret_btf))
+                       if (!btf_is_kernel(ret_btf)) {
                                regs[BPF_REG_0].type |= MEM_ALLOC;
+                               if (meta.kptr_field->type == BPF_KPTR_PERCPU)
+                                       regs[BPF_REG_0].type |= MEM_PERCPU;
+                       }
                } else {
                        if (fn->ret_btf_id == BPF_PTR_POISON) {
                                verbose(env, "verifier internal error:");
@@ -10268,6 +10286,8 @@ enum special_kfunc_type {
        KF_bpf_dynptr_slice,
        KF_bpf_dynptr_slice_rdwr,
        KF_bpf_dynptr_clone,
+       KF_bpf_percpu_obj_new_impl,
+       KF_bpf_percpu_obj_drop_impl,
 };
 
 BTF_SET_START(special_kfunc_set)
@@ -10288,6 +10308,8 @@ BTF_ID(func, bpf_dynptr_from_xdp)
 BTF_ID(func, bpf_dynptr_slice)
 BTF_ID(func, bpf_dynptr_slice_rdwr)
 BTF_ID(func, bpf_dynptr_clone)
+BTF_ID(func, bpf_percpu_obj_new_impl)
+BTF_ID(func, bpf_percpu_obj_drop_impl)
 BTF_SET_END(special_kfunc_set)
 
 BTF_ID_LIST(special_kfunc_list)
@@ -10310,6 +10332,8 @@ BTF_ID(func, bpf_dynptr_from_xdp)
 BTF_ID(func, bpf_dynptr_slice)
 BTF_ID(func, bpf_dynptr_slice_rdwr)
 BTF_ID(func, bpf_dynptr_clone)
+BTF_ID(func, bpf_percpu_obj_new_impl)
+BTF_ID(func, bpf_percpu_obj_drop_impl)
 
 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
 {
@@ -11004,7 +11028,17 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
                        }
                        break;
                case KF_ARG_PTR_TO_ALLOC_BTF_ID:
-                       if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+                       if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
+                               if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) {
+                                       verbose(env, "arg#%d expected for bpf_obj_drop_impl()\n", i);
+                                       return -EINVAL;
+                               }
+                       } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
+                               if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
+                                       verbose(env, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i);
+                                       return -EINVAL;
+                               }
+                       } else {
                                verbose(env, "arg#%d expected pointer to allocated object\n", i);
                                return -EINVAL;
                        }
@@ -11012,8 +11046,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
                                verbose(env, "allocated object must be referenced\n");
                                return -EINVAL;
                        }
-                       if (meta->btf == btf_vmlinux &&
-                           meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+                       if (meta->btf == btf_vmlinux) {
                                meta->arg_btf = reg->btf;
                                meta->arg_btf_id = reg->btf_id;
                        }
@@ -11413,6 +11446,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                /* Only exception is bpf_obj_new_impl */
                if (meta.btf != btf_vmlinux ||
                    (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
+                    meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
                     meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
                        verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
                        return -EINVAL;
@@ -11426,11 +11460,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
 
                if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
-                       if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
+                       if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
+                           meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+                               struct btf_struct_meta *struct_meta;
                                struct btf *ret_btf;
                                u32 ret_btf_id;
 
-                               if (unlikely(!bpf_global_ma_set))
+                               if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
+                                       return -ENOMEM;
+
+                               if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
                                        return -ENOMEM;
 
                                if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
@@ -11443,24 +11482,38 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 
                                /* This may be NULL due to user not supplying a BTF */
                                if (!ret_btf) {
-                                       verbose(env, "bpf_obj_new requires prog BTF\n");
+                                       verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
                                        return -EINVAL;
                                }
 
                                ret_t = btf_type_by_id(ret_btf, ret_btf_id);
                                if (!ret_t || !__btf_type_is_struct(ret_t)) {
-                                       verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
+                                       verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
                                        return -EINVAL;
                                }
 
+                               struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
+                               if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+                                       if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
+                                               verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
+                                               return -EINVAL;
+                                       }
+
+                                       if (struct_meta) {
+                                               verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
+                                               return -EINVAL;
+                                       }
+                               }
+
                                mark_reg_known_zero(env, regs, BPF_REG_0);
                                regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
                                regs[BPF_REG_0].btf = ret_btf;
                                regs[BPF_REG_0].btf_id = ret_btf_id;
+                               if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
+                                       regs[BPF_REG_0].type |= MEM_PERCPU;
 
                                insn_aux->obj_new_size = ret_t->size;
-                               insn_aux->kptr_struct_meta =
-                                       btf_find_struct_meta(ret_btf, ret_btf_id);
+                               insn_aux->kptr_struct_meta = struct_meta;
                        } else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
                                mark_reg_known_zero(env, regs, BPF_REG_0);
                                regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
@@ -11597,7 +11650,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                        regs[BPF_REG_0].id = ++env->id_gen;
        } else if (btf_type_is_void(t)) {
                if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
-                       if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+                       if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
+                           meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
                                insn_aux->kptr_struct_meta =
                                        btf_find_struct_meta(meta.arg_btf,
                                                             meta.arg_btf_id);
@@ -18266,21 +18320,35 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                insn->imm = BPF_CALL_IMM(desc->addr);
        if (insn->off)
                return 0;
-       if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
+       if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
+           desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
                struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
                struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
                u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
 
+               if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
+                       verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
+                               insn_idx);
+                       return -EFAULT;
+               }
+
                insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
                insn_buf[1] = addr[0];
                insn_buf[2] = addr[1];
                insn_buf[3] = *insn;
                *cnt = 4;
        } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
+                  desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] ||
                   desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
                struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
                struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
 
+               if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
+                       verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
+                               insn_idx);
+                       return -EFAULT;
+               }
+
                if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
                    !kptr_struct_meta) {
                        verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",