bpf: Introduce "volatile compare" macros
authorAlexei Starovoitov <ast@kernel.org>
Tue, 26 Dec 2023 19:11:44 +0000 (11:11 -0800)
committerAndrii Nakryiko <andrii@kernel.org>
Wed, 3 Jan 2024 18:58:42 +0000 (10:58 -0800)
Compilers optimize conditional operators at will, but often bpf programmers
want to force compilers to keep the same operator in asm as it's written in C.
Introduce bpf_cmp_likely/unlikely(var1, conditional_op, var2) macros that can be used as:

-               if (seen >= 1000)
+               if (bpf_cmp_unlikely(seen, >=, 1000))

The macros take advantage of BPF assembly that is C like.

The macros check the sign of variable 'seen' and emits either
signed or unsigned compare.

For example:
int a;
bpf_cmp_unlikely(a, >, 0) will be translated to 'if rX s> 0 goto' in BPF assembly.

unsigned int a;
bpf_cmp_unlikely(a, >, 0) will be translated to 'if rX > 0 goto' in BPF assembly.

C type conversions coupled with comparison operator are tricky.
  int i = -1;
  unsigned int j = 1;
  if (i < j) // this is false.

  long i = -1;
  unsigned int j = 1;
  if (i < j) // this is true.

Make sure BPF program is compiled with -Wsign-compare then the macros will catch
the mistake.

The macros check LHS (left hand side) only to figure out the sign of compare.

'if 0 < rX goto' is not allowed in the assembly, so the users
have to use a variable on LHS anyway.

The patch updates few tests to demonstrate the use of the macros.

The macro allows to use BPF_JSET in C code, since LLVM doesn't generate it at
present. For example:

if (i & j) compiles into r0 &= r1; if r0 == 0 goto

while

if (bpf_cmp_unlikely(i, &, j)) compiles into if r0 & r1 goto

Note that the macros has to be careful with RHS assembly predicate.
Since:
u64 __rhs = 1ull << 42;
asm goto("if r0 < %[rhs] goto +1" :: [rhs] "ri" (__rhs));
LLVM will silently truncate 64-bit constant into s32 imm.

Note that [lhs] "r"((short)LHS) the type cast is a workaround for LLVM issue.
When LHS is exactly 32-bit LLVM emits redundant <<=32, >>=32 to zero upper 32-bits.
When LHS is 64 or 16 or 8-bit variable there are no shifts.
When LHS is 32-bit the (u64) cast doesn't help. Hence use (short) cast.
It does _not_ truncate the variable before it's assigned to a register.

Traditional likely()/unlikely() macros that use __builtin_expect(!!(x), 1 or 0)
have no effect on these macros, hence macros implement the logic manually.
bpf_cmp_unlikely() macro preserves compare operator as-is while
bpf_cmp_likely() macro flips the compare.

Consider two cases:
A.
  for() {
    if (foo >= 10) {
      bar += foo;
    }
    other code;
  }

B.
  for() {
    if (foo >= 10)
       break;
    other code;
  }

It's ok to use either bpf_cmp_likely or bpf_cmp_unlikely macros in both cases,
but consider that 'break' is effectively 'goto out_of_the_loop'.
Hence it's better to use bpf_cmp_unlikely in the B case.
While 'bar += foo' is better to keep as 'fallthrough' == likely code path in the A case.

When it's written as:
A.
  for() {
    if (bpf_cmp_likely(foo, >=, 10)) {
      bar += foo;
    }
    other code;
  }

B.
  for() {
    if (bpf_cmp_unlikely(foo, >=, 10))
       break;
    other code;
  }

The assembly will look like:
A.
  for() {
    if r1 < 10 goto L1;
      bar += foo;
  L1:
    other code;
  }

B.
  for() {
    if r1 >= 10 goto L2;
    other code;
  }
  L2:

The bpf_cmp_likely vs bpf_cmp_unlikely changes basic block layout, hence it will
greatly influence the verification process. The number of processed instructions
will be different, since the verifier walks the fallthrough first.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/bpf/20231226191148.48536-3-alexei.starovoitov@gmail.com
tools/testing/selftests/bpf/bpf_experimental.h
tools/testing/selftests/bpf/progs/exceptions.c
tools/testing/selftests/bpf/progs/iters_task_vma.c

index 1386baf9ae4adf4334c6b5c99a3c7e241cd4cfc7..19ed6c941c1c4b067c24880826f77dcee5203850 100644 (file)
@@ -254,6 +254,75 @@ extern void bpf_throw(u64 cookie) __ksym;
                }                                                                       \
         })
 
+#define __cmp_cannot_be_signed(x) \
+       __builtin_strcmp(#x, "==") == 0 || __builtin_strcmp(#x, "!=") == 0 || \
+       __builtin_strcmp(#x, "&") == 0
+
+#define __is_signed_type(type) (((type)(-1)) < (type)1)
+
+#define __bpf_cmp(LHS, OP, SIGN, PRED, RHS, DEFAULT)                                           \
+       ({                                                                                      \
+               __label__ l_true;                                                               \
+               bool ret = DEFAULT;                                                             \
+               asm volatile goto("if %[lhs] " SIGN #OP " %[rhs] goto %l[l_true]"               \
+                                 :: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true);        \
+               ret = !DEFAULT;                                                                 \
+l_true:                                                                                                \
+               ret;                                                                            \
+       })
+
+/* C type conversions coupled with comparison operator are tricky.
+ * Make sure BPF program is compiled with -Wsign-compare then
+ * __lhs OP __rhs below will catch the mistake.
+ * Be aware that we check only __lhs to figure out the sign of compare.
+ */
+#define _bpf_cmp(LHS, OP, RHS, NOFLIP)                                                         \
+       ({                                                                                      \
+               typeof(LHS) __lhs = (LHS);                                                      \
+               typeof(RHS) __rhs = (RHS);                                                      \
+               bool ret;                                                                       \
+               _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression");    \
+               (void)(__lhs OP __rhs);                                                         \
+               if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) {           \
+                       if (sizeof(__rhs) == 8)                                                 \
+                               ret = __bpf_cmp(__lhs, OP, "", "r", __rhs, NOFLIP);             \
+                       else                                                                    \
+                               ret = __bpf_cmp(__lhs, OP, "", "i", __rhs, NOFLIP);             \
+               } else {                                                                        \
+                       if (sizeof(__rhs) == 8)                                                 \
+                               ret = __bpf_cmp(__lhs, OP, "s", "r", __rhs, NOFLIP);            \
+                       else                                                                    \
+                               ret = __bpf_cmp(__lhs, OP, "s", "i", __rhs, NOFLIP);            \
+               }                                                                               \
+               ret;                                                                            \
+       })
+
+#ifndef bpf_cmp_unlikely
+#define bpf_cmp_unlikely(LHS, OP, RHS) _bpf_cmp(LHS, OP, RHS, true)
+#endif
+
+#ifndef bpf_cmp_likely
+#define bpf_cmp_likely(LHS, OP, RHS)                                                           \
+       ({                                                                                      \
+               bool ret;                                                                       \
+               if (__builtin_strcmp(#OP, "==") == 0)                                           \
+                       ret = _bpf_cmp(LHS, !=, RHS, false);                                    \
+               else if (__builtin_strcmp(#OP, "!=") == 0)                                      \
+                       ret = _bpf_cmp(LHS, ==, RHS, false);                                    \
+               else if (__builtin_strcmp(#OP, "<=") == 0)                                      \
+                       ret = _bpf_cmp(LHS, >, RHS, false);                                     \
+               else if (__builtin_strcmp(#OP, "<") == 0)                                       \
+                       ret = _bpf_cmp(LHS, >=, RHS, false);                                    \
+               else if (__builtin_strcmp(#OP, ">") == 0)                                       \
+                       ret = _bpf_cmp(LHS, <=, RHS, false);                                    \
+               else if (__builtin_strcmp(#OP, ">=") == 0)                                      \
+                       ret = _bpf_cmp(LHS, <, RHS, false);                                     \
+               else                                                                            \
+                       (void) "bug";                                                           \
+               ret;                                                                            \
+       })
+#endif
+
 /* Description
  *     Assert that a conditional expression is true.
  * Returns
index 2811ee842b01ef945f342c3d79551391ff570032..f09cd14d8e040eaa1f7450a62236542af51fcfc9 100644 (file)
@@ -210,7 +210,7 @@ __noinline int assert_zero_gfunc(u64 c)
 {
        volatile u64 cookie = c;
 
-       bpf_assert_eq(cookie, 0);
+       bpf_assert(bpf_cmp_unlikely(cookie, ==, 0));
        return 0;
 }
 
@@ -218,7 +218,7 @@ __noinline int assert_neg_gfunc(s64 c)
 {
        volatile s64 cookie = c;
 
-       bpf_assert_lt(cookie, 0);
+       bpf_assert(bpf_cmp_unlikely(cookie, <, 0));
        return 0;
 }
 
@@ -226,7 +226,7 @@ __noinline int assert_pos_gfunc(s64 c)
 {
        volatile s64 cookie = c;
 
-       bpf_assert_gt(cookie, 0);
+       bpf_assert(bpf_cmp_unlikely(cookie, >, 0));
        return 0;
 }
 
@@ -234,7 +234,7 @@ __noinline int assert_negeq_gfunc(s64 c)
 {
        volatile s64 cookie = c;
 
-       bpf_assert_le(cookie, -1);
+       bpf_assert(bpf_cmp_unlikely(cookie, <=, -1));
        return 0;
 }
 
@@ -242,7 +242,7 @@ __noinline int assert_poseq_gfunc(s64 c)
 {
        volatile s64 cookie = c;
 
-       bpf_assert_ge(cookie, 1);
+       bpf_assert(bpf_cmp_unlikely(cookie, >=, 1));
        return 0;
 }
 
@@ -258,7 +258,7 @@ __noinline int assert_zero_gfunc_with(u64 c)
 {
        volatile u64 cookie = c;
 
-       bpf_assert_eq_with(cookie, 0, cookie + 100);
+       bpf_assert_with(bpf_cmp_unlikely(cookie, ==, 0), cookie + 100);
        return 0;
 }
 
@@ -266,7 +266,7 @@ __noinline int assert_neg_gfunc_with(s64 c)
 {
        volatile s64 cookie = c;
 
-       bpf_assert_lt_with(cookie, 0, cookie + 100);
+       bpf_assert_with(bpf_cmp_unlikely(cookie, <, 0), cookie + 100);
        return 0;
 }
 
@@ -274,7 +274,7 @@ __noinline int assert_pos_gfunc_with(s64 c)
 {
        volatile s64 cookie = c;
 
-       bpf_assert_gt_with(cookie, 0, cookie + 100);
+       bpf_assert_with(bpf_cmp_unlikely(cookie, >, 0), cookie + 100);
        return 0;
 }
 
@@ -282,7 +282,7 @@ __noinline int assert_negeq_gfunc_with(s64 c)
 {
        volatile s64 cookie = c;
 
-       bpf_assert_le_with(cookie, -1, cookie + 100);
+       bpf_assert_with(bpf_cmp_unlikely(cookie, <=, -1), cookie + 100);
        return 0;
 }
 
@@ -290,7 +290,7 @@ __noinline int assert_poseq_gfunc_with(s64 c)
 {
        volatile s64 cookie = c;
 
-       bpf_assert_ge_with(cookie, 1, cookie + 100);
+       bpf_assert_with(bpf_cmp_unlikely(cookie, >=, 1), cookie + 100);
        return 0;
 }
 
index e085a51d153ee8e19789935d41522b75e1c284b9..dc0c3691dcc2b21b3d6b082302d08b7b32bdab5c 100644 (file)
@@ -28,9 +28,8 @@ int iter_task_vma_for_each(const void *ctx)
                return 0;
 
        bpf_for_each(task_vma, vma, task, 0) {
-               if (seen >= 1000)
+               if (bpf_cmp_unlikely(seen, >=, 1000))
                        break;
-               barrier_var(seen);
 
                vm_ranges[seen].vm_start = vma->vm_start;
                vm_ranges[seen].vm_end = vma->vm_end;