bpf: Move getsockopt retval to struct bpf_cg_run_ctx
authorYiFei Zhu <zhuyifei@google.com>
Thu, 16 Dec 2021 02:04:26 +0000 (02:04 +0000)
committerAlexei Starovoitov <ast@kernel.org>
Wed, 19 Jan 2022 20:51:30 +0000 (12:51 -0800)
The retval value is moved to struct bpf_cg_run_ctx for ease of access
in different prog types with different context structs layouts. The
helper implementation (to be added in a later patch in the series) can
simply perform a container_of from current->bpf_ctx to retrieve
bpf_cg_run_ctx.

Unfortunately, there is no easy way to access the current task_struct
via the verifier BPF bytecode rewrite, aside from possibly calling a
helper, so a pointer to current task is added to struct bpf_sockopt_kern
so that the rewritten BPF bytecode can access struct bpf_cg_run_ctx with
an indirection.

For backward compatibility, if a getsockopt program rejects a syscall
by returning 0, an -EPERM will be generated, by having the
BPF_PROG_RUN_ARRAY_CG family macros automatically set the retval to
-EPERM. Unlike prior to this patch, this -EPERM will be visible to
ctx->retval for any other hooks down the line in the prog array.

Additionally, the restriction that getsockopt filters can only set
the retval to 0 is removed, considering that certain getsockopt
implementations may return optlen. Filters are now able to set the
value arbitrarily.

Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/73b0325f5c29912ccea7ea57ec1ed4d388fc1d37.1639619851.git.zhuyifei@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
include/linux/bpf.h
include/linux/filter.h
kernel/bpf/cgroup.c

index 83da1764fcfe5c48ef33a765a8d84db997c2a983..7b0c11f414d0ff18bf8f2c0271fdf7f54f9e11b7 100644 (file)
@@ -1245,6 +1245,7 @@ struct bpf_run_ctx {};
 struct bpf_cg_run_ctx {
        struct bpf_run_ctx run_ctx;
        const struct bpf_prog_array_item *prog_item;
+       int retval;
 };
 
 struct bpf_trace_run_ctx {
@@ -1280,16 +1281,16 @@ typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
 static __always_inline int
 BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
                            const void *ctx, bpf_prog_run_fn run_prog,
-                           u32 *ret_flags)
+                           int retval, u32 *ret_flags)
 {
        const struct bpf_prog_array_item *item;
        const struct bpf_prog *prog;
        const struct bpf_prog_array *array;
        struct bpf_run_ctx *old_run_ctx;
        struct bpf_cg_run_ctx run_ctx;
-       int ret = 0;
        u32 func_ret;
 
+       run_ctx.retval = retval;
        migrate_disable();
        rcu_read_lock();
        array = rcu_dereference(array_rcu);
@@ -1299,27 +1300,28 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
                run_ctx.prog_item = item;
                func_ret = run_prog(prog, ctx);
                if (!(func_ret & 1))
-                       ret = -EPERM;
+                       run_ctx.retval = -EPERM;
                *(ret_flags) |= (func_ret >> 1);
                item++;
        }
        bpf_reset_run_ctx(old_run_ctx);
        rcu_read_unlock();
        migrate_enable();
-       return ret;
+       return run_ctx.retval;
 }
 
 static __always_inline int
 BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
-                     const void *ctx, bpf_prog_run_fn run_prog)
+                     const void *ctx, bpf_prog_run_fn run_prog,
+                     int retval)
 {
        const struct bpf_prog_array_item *item;
        const struct bpf_prog *prog;
        const struct bpf_prog_array *array;
        struct bpf_run_ctx *old_run_ctx;
        struct bpf_cg_run_ctx run_ctx;
-       int ret = 0;
 
+       run_ctx.retval = retval;
        migrate_disable();
        rcu_read_lock();
        array = rcu_dereference(array_rcu);
@@ -1328,13 +1330,13 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
        while ((prog = READ_ONCE(item->prog))) {
                run_ctx.prog_item = item;
                if (!run_prog(prog, ctx))
-                       ret = -EPERM;
+                       run_ctx.retval = -EPERM;
                item++;
        }
        bpf_reset_run_ctx(old_run_ctx);
        rcu_read_unlock();
        migrate_enable();
-       return ret;
+       return run_ctx.retval;
 }
 
 static __always_inline u32
@@ -1394,7 +1396,7 @@ out:
                u32 _flags = 0;                         \
                bool _cn;                               \
                u32 _ret;                               \
-               _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
+               _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
                _cn = _flags & BPF_RET_SET_CN;          \
                if (!_ret)                              \
                        _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);  \
index 71fa57b88bfc0d42db2d812858e599c93185a785..d23e999dc0324b72dae804049fde2092b250a5d0 100644 (file)
@@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern {
        s32             level;
        s32             optname;
        s32             optlen;
-       s32             retval;
+       /* for retval in struct bpf_cg_run_ctx */
+       struct task_struct *current_task;
+       /* Temporary "register" for indirect stores to ppos. */
+       u64             tmp_reg;
 };
 
 int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
index 386155d279b3ee588b640c2a7c3fd58cd5a9ae26..b6fad0bbf5a7ced164b5a55e1736265e380ef272 100644 (file)
@@ -1079,7 +1079,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
                        cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
        } else {
                ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
-                                           __bpf_prog_run_save_cb);
+                                           __bpf_prog_run_save_cb, 0);
        }
        bpf_restore_data_end(skb, saved_data_end);
        __skb_pull(skb, offset);
@@ -1108,7 +1108,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
        struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 
        return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
-                                    bpf_prog_run);
+                                    bpf_prog_run, 0);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
 
@@ -1154,7 +1154,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
 
        cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
        return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
-                                          bpf_prog_run, flags);
+                                          bpf_prog_run, 0, flags);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
 
@@ -1181,7 +1181,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
        struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 
        return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
-                                    bpf_prog_run);
+                                    bpf_prog_run, 0);
 }
 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
 
@@ -1199,7 +1199,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
        rcu_read_lock();
        cgrp = task_dfl_cgroup(current);
        ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
-                                   bpf_prog_run);
+                                   bpf_prog_run, 0);
        rcu_read_unlock();
 
        return ret;
@@ -1330,7 +1330,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 
        rcu_read_lock();
        cgrp = task_dfl_cgroup(current);
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
+       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
+                                   bpf_prog_run, 0);
        rcu_read_unlock();
 
        kfree(ctx.cur_val);
@@ -1445,7 +1446,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
 
        lock_sock(sk);
        ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
-                                   &ctx, bpf_prog_run);
+                                   &ctx, bpf_prog_run, 0);
        release_sock(sk);
 
        if (ret)
@@ -1509,7 +1510,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                .sk = sk,
                .level = level,
                .optname = optname,
-               .retval = retval,
+               .current_task = current,
        };
        int ret;
 
@@ -1553,10 +1554,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 
        lock_sock(sk);
        ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
-                                   &ctx, bpf_prog_run);
+                                   &ctx, bpf_prog_run, retval);
        release_sock(sk);
 
-       if (ret)
+       if (ret < 0)
                goto out;
 
        if (ctx.optlen > max_optlen || ctx.optlen < 0) {
@@ -1564,14 +1565,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                goto out;
        }
 
-       /* BPF programs only allowed to set retval to 0, not some
-        * arbitrary value.
-        */
-       if (ctx.retval != 0 && ctx.retval != retval) {
-               ret = -EFAULT;
-               goto out;
-       }
-
        if (ctx.optlen != 0) {
                if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
                    put_user(ctx.optlen, optlen)) {
@@ -1580,8 +1573,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
                }
        }
 
-       ret = ctx.retval;
-
 out:
        sockopt_free_buf(&ctx, &buf);
        return ret;
@@ -1596,10 +1587,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
                .sk = sk,
                .level = level,
                .optname = optname,
-               .retval = retval,
                .optlen = *optlen,
                .optval = optval,
                .optval_end = optval + *optlen,
+               .current_task = current,
        };
        int ret;
 
@@ -1612,25 +1603,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
         */
 
        ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
-                                   &ctx, bpf_prog_run);
-       if (ret)
+                                   &ctx, bpf_prog_run, retval);
+       if (ret < 0)
                return ret;
 
        if (ctx.optlen > *optlen)
                return -EFAULT;
 
-       /* BPF programs only allowed to set retval to 0, not some
-        * arbitrary value.
-        */
-       if (ctx.retval != 0 && ctx.retval != retval)
-               return -EFAULT;
-
        /* BPF programs can shrink the buffer, export the modifications.
         */
        if (ctx.optlen != 0)
                *optlen = ctx.optlen;
 
-       return ctx.retval;
+       return ret;
 }
 #endif
 
@@ -2046,10 +2031,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
                        *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
                break;
        case offsetof(struct bpf_sockopt, retval):
-               if (type == BPF_WRITE)
-                       *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
-               else
-                       *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
+               BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
+
+               if (type == BPF_WRITE) {
+                       int treg = BPF_REG_9;
+
+                       if (si->src_reg == treg || si->dst_reg == treg)
+                               --treg;
+                       if (si->src_reg == treg || si->dst_reg == treg)
+                               --treg;
+                       *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
+                                             offsetof(struct bpf_sockopt_kern, tmp_reg));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+                                             treg, si->dst_reg,
+                                             offsetof(struct bpf_sockopt_kern, current_task));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+                                             treg, treg,
+                                             offsetof(struct task_struct, bpf_ctx));
+                       *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+                                             treg, si->src_reg,
+                                             offsetof(struct bpf_cg_run_ctx, retval));
+                       *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
+                                             offsetof(struct bpf_sockopt_kern, tmp_reg));
+               } else {
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
+                                             si->dst_reg, si->src_reg,
+                                             offsetof(struct bpf_sockopt_kern, current_task));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
+                                             si->dst_reg, si->dst_reg,
+                                             offsetof(struct task_struct, bpf_ctx));
+                       *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
+                                             si->dst_reg, si->dst_reg,
+                                             offsetof(struct bpf_cg_run_ctx, retval));
+               }
                break;
        case offsetof(struct bpf_sockopt, optval):
                *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);