bpf: Support inlining bpf_kptr_xchg() helper
authorHou Tao <houtao1@huawei.com>
Fri, 5 Jan 2024 10:48:17 +0000 (18:48 +0800)
committerAlexei Starovoitov <ast@kernel.org>
Tue, 23 Jan 2024 22:40:21 +0000 (14:40 -0800)
The motivation of inlining bpf_kptr_xchg() comes from the performance
profiling of bpf memory allocator benchmark. The benchmark uses
bpf_kptr_xchg() to stash the allocated objects and to pop the stashed
objects for free. After inling bpf_kptr_xchg(), the performance for
object free on 8-CPUs VM increases about 2%~10%. The inline also has
downside: both the kasan and kcsan checks on the pointer will be
unavailable.

bpf_kptr_xchg() can be inlined by converting the calling of
bpf_kptr_xchg() into an atomic_xchg() instruction. But the conversion
depends on two conditions:
1) JIT backend supports atomic_xchg() on pointer-sized word
2) For the specific arch, the implementation of xchg is the same as
   atomic_xchg() on pointer-sized words.

It seems most 64-bit JIT backends satisfies these two conditions. But
as a precaution, defining a weak function bpf_jit_supports_ptr_xchg()
to state whether such conversion is safe and only supporting inline for
64-bit host.

For x86-64, it supports BPF_XCHG atomic operation and both xchg() and
atomic_xchg() use arch_xchg() to implement the exchange, so enabling the
inline of bpf_kptr_xchg() on x86-64 first.

Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20240105104819.3916743-2-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
arch/x86/net/bpf_jit_comp.c
include/linux/filter.h
kernel/bpf/core.c
kernel/bpf/helpers.c
kernel/bpf/verifier.c

index 919f647c740fb54f0fca4f4a16c5f614f9cf8521..e1390d1e331b589270a5d614b357d726e717679e 100644 (file)
@@ -3242,3 +3242,8 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
                BUG_ON(ret < 0);
        }
 }
+
+bool bpf_jit_supports_ptr_xchg(void)
+{
+       return true;
+}
index 68fb6c8142fec0d48141a6a376653229f7f3cb7b..35f067fd3840a6d0f3a0644999f21a0b111f0b31 100644 (file)
@@ -955,6 +955,7 @@ bool bpf_jit_supports_subprog_tailcalls(void);
 bool bpf_jit_supports_kfunc_call(void);
 bool bpf_jit_supports_far_kfunc_call(void);
 bool bpf_jit_supports_exceptions(void);
+bool bpf_jit_supports_ptr_xchg(void);
 void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
 bool bpf_helper_changes_pkt_data(void *func);
 
index ea6843be2616c968b91c58751b8f975a5869c2be..fbb1d95a9b446d13f1f57bc8298c4dbc6fc494e0 100644 (file)
@@ -2925,6 +2925,16 @@ bool __weak bpf_jit_supports_far_kfunc_call(void)
        return false;
 }
 
+/* Return TRUE if the JIT backend satisfies the following two conditions:
+ * 1) JIT backend supports atomic_xchg() on pointer-sized words.
+ * 2) Under the specific arch, the implementation of xchg() is the same
+ *    as atomic_xchg() on pointer-sized words.
+ */
+bool __weak bpf_jit_supports_ptr_xchg(void)
+{
+       return false;
+}
+
 /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
  * skb_copy_bits(), so provide a weak definition of it for NET-less config.
  */
index be72824f32b2cc5e3dfcb8d2bd613b86116a498c..e04ca1af892723581354b06ba24f5637dab200d8 100644 (file)
@@ -1414,6 +1414,7 @@ BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
 {
        unsigned long *kptr = map_value;
 
+       /* This helper may be inlined by verifier. */
        return xchg(kptr, (unsigned long)ptr);
 }
 
index 65f598694d550359f2b926ef26ae30d0c80c6f69..5b33d65eef7bef3aabadb9aee1ea116499d24dfc 100644 (file)
@@ -19809,6 +19809,23 @@ patch_map_ops_generic:
                        continue;
                }
 
+               /* Implement bpf_kptr_xchg inline */
+               if (prog->jit_requested && BITS_PER_LONG == 64 &&
+                   insn->imm == BPF_FUNC_kptr_xchg &&
+                   bpf_jit_supports_ptr_xchg()) {
+                       insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
+                       insn_buf[1] = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
+                       cnt = 2;
+
+                       new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+                       if (!new_prog)
+                               return -ENOMEM;
+
+                       delta    += cnt - 1;
+                       env->prog = prog = new_prog;
+                       insn      = new_prog->insnsi + i + delta;
+                       continue;
+               }
 patch_call_imm:
                fn = env->ops->get_func_proto(insn->imm, env->prog);
                /* all functions that have prototype and verifier allowed