bpf: add special internal-only MOV instruction to resolve per-CPU addrs
authorAndrii Nakryiko <andrii@kernel.org>
Tue, 2 Apr 2024 02:13:02 +0000 (19:13 -0700)
committerAlexei Starovoitov <ast@kernel.org>
Wed, 3 Apr 2024 17:29:55 +0000 (10:29 -0700)
Add a new BPF instruction for resolving absolute addresses of per-CPU
data from their per-CPU offsets. This instruction is internal-only and
users are not allowed to use them directly. They will only be used for
internal inlining optimizations for now between BPF verifier and BPF JITs.

We use a special BPF_MOV | BPF_ALU64 | BPF_X form with insn->off field
set to BPF_ADDR_PERCPU = -1. I used negative offset value to distinguish
them from positive ones used by user-exposed instructions.

Such instruction performs a resolution of a per-CPU offset stored in
a register to a valid kernel address which can be dereferenced. It is
useful in any use case where absolute address of a per-CPU data has to
be resolved (e.g., in inlining bpf_map_lookup_elem()).

BPF disassembler is also taught to recognize them to support dumping
final BPF assembly code (non-JIT'ed version).

Add arch-specific way for BPF JITs to mark support for this instructions.

This patch also adds support for these instructions in x86-64 BPF JIT.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/r/20240402021307.1012571-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
arch/x86/net/bpf_jit_comp.c
include/linux/filter.h
kernel/bpf/core.c
kernel/bpf/disasm.c

index 3b639d6f2f54d2b3ccb8c868d5ca8033a150bca9..af89dd117dce466dccd22e5d6653f7abeff5b059 100644 (file)
@@ -1382,6 +1382,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
                                maybe_emit_mod(&prog, AUX_REG, dst_reg, true);
                                EMIT3(0x0F, 0x44, add_2reg(0xC0, AUX_REG, dst_reg));
                                break;
+                       } else if (insn_is_mov_percpu_addr(insn)) {
+                               u32 off = (u32)(unsigned long)&this_cpu_off;
+
+                               /* mov <dst>, <src> (if necessary) */
+                               EMIT_mov(dst_reg, src_reg);
+
+                               /* add <dst>, gs:[<off>] */
+                               EMIT2(0x65, add_1mod(0x48, dst_reg));
+                               EMIT3(0x03, add_1reg(0x04, dst_reg), 0x25);
+                               EMIT(off, 4);
+                               break;
                        }
                        fallthrough;
                case BPF_ALU | BPF_MOV | BPF_X:
@@ -3365,6 +3376,11 @@ bool bpf_jit_supports_subprog_tailcalls(void)
        return true;
 }
 
+bool bpf_jit_supports_percpu_insn(void)
+{
+       return true;
+}
+
 void bpf_jit_free(struct bpf_prog *prog)
 {
        if (prog->jited) {
index 531b360901223e35b6f20a150d837e6a8580e4d9..161d5f7b64ed1d7a73537699ab361567c5d904d7 100644 (file)
@@ -178,6 +178,25 @@ struct ctl_table_header;
                .off   = 0,                                     \
                .imm   = 0 })
 
+/* Special (internal-only) form of mov, used to resolve per-CPU addrs:
+ * dst_reg = src_reg + <percpu_base_off>
+ * BPF_ADDR_PERCPU is used as a special insn->off value.
+ */
+#define BPF_ADDR_PERCPU        (-1)
+
+#define BPF_MOV64_PERCPU_REG(DST, SRC)                         \
+       ((struct bpf_insn) {                                    \
+               .code  = BPF_ALU64 | BPF_MOV | BPF_X,           \
+               .dst_reg = DST,                                 \
+               .src_reg = SRC,                                 \
+               .off   = BPF_ADDR_PERCPU,                       \
+               .imm   = 0 })
+
+static inline bool insn_is_mov_percpu_addr(const struct bpf_insn *insn)
+{
+       return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->off == BPF_ADDR_PERCPU;
+}
+
 /* Short form of mov, dst_reg = imm32 */
 
 #define BPF_MOV64_IMM(DST, IMM)                                        \
@@ -972,6 +991,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
 void bpf_jit_compile(struct bpf_prog *prog);
 bool bpf_jit_needs_zext(void);
 bool bpf_jit_supports_subprog_tailcalls(void);
+bool bpf_jit_supports_percpu_insn(void);
 bool bpf_jit_supports_kfunc_call(void);
 bool bpf_jit_supports_far_kfunc_call(void);
 bool bpf_jit_supports_exceptions(void);
index ae406a2814db05a1b8df1400f5585c71ed409e54..7a33a3a7e63cb7d52244d09ad23eb98cd57e2f74 100644 (file)
@@ -2945,6 +2945,11 @@ bool __weak bpf_jit_supports_subprog_tailcalls(void)
        return false;
 }
 
+bool __weak bpf_jit_supports_percpu_insn(void)
+{
+       return false;
+}
+
 bool __weak bpf_jit_supports_kfunc_call(void)
 {
        return false;
index bd2e2dd04740c1810ba559b67ac9ba69092dc11a..309c4aa1b026abcd30cec50211d5dd1c50bfa52f 100644 (file)
@@ -172,6 +172,17 @@ static bool is_addr_space_cast(const struct bpf_insn *insn)
                insn->off == BPF_ADDR_SPACE_CAST;
 }
 
+/* Special (internal-only) form of mov, used to resolve per-CPU addrs:
+ * dst_reg = src_reg + <percpu_base_off>
+ * BPF_ADDR_PERCPU is used as a special insn->off value.
+ */
+#define BPF_ADDR_PERCPU        (-1)
+
+static inline bool is_mov_percpu_addr(const struct bpf_insn *insn)
+{
+       return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->off == BPF_ADDR_PERCPU;
+}
+
 void print_bpf_insn(const struct bpf_insn_cbs *cbs,
                    const struct bpf_insn *insn,
                    bool allow_ptr_leaks)
@@ -194,6 +205,9 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
                        verbose(cbs->private_data, "(%02x) r%d = addr_space_cast(r%d, %d, %d)\n",
                                insn->code, insn->dst_reg,
                                insn->src_reg, ((u32)insn->imm) >> 16, (u16)insn->imm);
+               } else if (is_mov_percpu_addr(insn)) {
+                       verbose(cbs->private_data, "(%02x) r%d = &(void __percpu *)(r%d)\n",
+                               insn->code, insn->dst_reg, insn->src_reg);
                } else if (BPF_SRC(insn->code) == BPF_X) {
                        verbose(cbs->private_data, "(%02x) %c%d %s %s%c%d\n",
                                insn->code, class == BPF_ALU ? 'w' : 'r',