bpf: Avoid kfree_rcu() under lock in bpf_lpm_trie.
authorAlexei Starovoitov <ast@kernel.org>
Fri, 29 Mar 2024 17:14:39 +0000 (10:14 -0700)
committerAndrii Nakryiko <andrii@kernel.org>
Fri, 29 Mar 2024 18:10:41 +0000 (11:10 -0700)
syzbot reported the following lock sequence:
cpu 2:
  grabs timer_base lock
    spins on bpf_lpm lock

cpu 1:
  grab rcu krcp lock
    spins on timer_base lock

cpu 0:
  grab bpf_lpm lock
    spins on rcu krcp lock

bpf_lpm lock can be the same.
timer_base lock can also be the same due to timer migration.
but rcu krcp lock is always per-cpu, so it cannot be the same lock.
Hence it's a false positive.
To avoid lockdep complaining move kfree_rcu() after spin_unlock.

Reported-by: syzbot+1fa663a2100308ab6eab@syzkaller.appspotmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20240329171439.37813-1-alexei.starovoitov@gmail.com
kernel/bpf/lpm_trie.c

index 939620b91c0eaee1885afa75de6e19ca52c61130..0218a5132ab562088e1821fffb5cb80294cef150 100644 (file)
@@ -316,6 +316,7 @@ static long trie_update_elem(struct bpf_map *map,
 {
        struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
        struct lpm_trie_node *node, *im_node = NULL, *new_node = NULL;
+       struct lpm_trie_node *free_node = NULL;
        struct lpm_trie_node __rcu **slot;
        struct bpf_lpm_trie_key_u8 *key = _key;
        unsigned long irq_flags;
@@ -390,7 +391,7 @@ static long trie_update_elem(struct bpf_map *map,
                        trie->n_entries--;
 
                rcu_assign_pointer(*slot, new_node);
-               kfree_rcu(node, rcu);
+               free_node = node;
 
                goto out;
        }
@@ -437,6 +438,7 @@ out:
        }
 
        spin_unlock_irqrestore(&trie->lock, irq_flags);
+       kfree_rcu(free_node, rcu);
 
        return ret;
 }
@@ -445,6 +447,7 @@ out:
 static long trie_delete_elem(struct bpf_map *map, void *_key)
 {
        struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+       struct lpm_trie_node *free_node = NULL, *free_parent = NULL;
        struct bpf_lpm_trie_key_u8 *key = _key;
        struct lpm_trie_node __rcu **trim, **trim2;
        struct lpm_trie_node *node, *parent;
@@ -514,8 +517,8 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
                else
                        rcu_assign_pointer(
                                *trim2, rcu_access_pointer(parent->child[0]));
-               kfree_rcu(parent, rcu);
-               kfree_rcu(node, rcu);
+               free_parent = parent;
+               free_node = node;
                goto out;
        }
 
@@ -529,10 +532,12 @@ static long trie_delete_elem(struct bpf_map *map, void *_key)
                rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1]));
        else
                RCU_INIT_POINTER(*trim, NULL);
-       kfree_rcu(node, rcu);
+       free_node = node;
 
 out:
        spin_unlock_irqrestore(&trie->lock, irq_flags);
+       kfree_rcu(free_parent, rcu);
+       kfree_rcu(free_node, rcu);
 
        return ret;
 }