enum bpf_link_type type;
        const struct bpf_link_ops *ops;
        struct bpf_prog *prog;
-       struct work_struct work;
+       /* rcu is used before freeing, work can be used to schedule that
+        * RCU-based freeing before that, so they never overlap
+        */
+       union {
+               struct rcu_head rcu;
+               struct work_struct work;
+       };
 };
 
 struct bpf_link_ops {
        void (*release)(struct bpf_link *link);
+       /* deallocate link resources callback, called without RCU grace period
+        * waiting
+        */
        void (*dealloc)(struct bpf_link *link);
+       /* deallocate link resources callback, called after RCU grace period;
+        * if underlying BPF program is sleepable we go through tasks trace
+        * RCU GP and then "classic" RCU GP
+        */
+       void (*dealloc_deferred)(struct bpf_link *link);
        int (*detach)(struct bpf_link *link);
        int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
                           struct bpf_prog *old_prog);
 
        atomic64_inc(&link->refcnt);
 }
 
+static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
+{
+       struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
+
+       /* free bpf_link and its containing memory */
+       link->ops->dealloc_deferred(link);
+}
+
+static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
+{
+       if (rcu_trace_implies_rcu_gp())
+               bpf_link_defer_dealloc_rcu_gp(rcu);
+       else
+               call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
+}
+
 /* bpf_link_free is guaranteed to be called from process context */
 static void bpf_link_free(struct bpf_link *link)
 {
+       bool sleepable = false;
+
        bpf_link_free_id(link->id);
        if (link->prog) {
+               sleepable = link->prog->sleepable;
                /* detach BPF program, clean up used resources */
                link->ops->release(link);
                bpf_prog_put(link->prog);
        }
-       /* free bpf_link and its containing memory */
-       link->ops->dealloc(link);
+       if (link->ops->dealloc_deferred) {
+               /* schedule BPF link deallocation; if underlying BPF program
+                * is sleepable, we need to first wait for RCU tasks trace
+                * sync, then go through "classic" RCU grace period
+                */
+               if (sleepable)
+                       call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
+               else
+                       call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
+       }
+       if (link->ops->dealloc)
+               link->ops->dealloc(link);
 }
 
 static void bpf_link_put_deferred(struct work_struct *work)
 
 static const struct bpf_link_ops bpf_raw_tp_link_lops = {
        .release = bpf_raw_tp_link_release,
-       .dealloc = bpf_raw_tp_link_dealloc,
+       .dealloc_deferred = bpf_raw_tp_link_dealloc,
        .show_fdinfo = bpf_raw_tp_link_show_fdinfo,
        .fill_link_info = bpf_raw_tp_link_fill_link_info,
 };
 
 
 static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
        .release = bpf_kprobe_multi_link_release,
-       .dealloc = bpf_kprobe_multi_link_dealloc,
+       .dealloc_deferred = bpf_kprobe_multi_link_dealloc,
        .fill_link_info = bpf_kprobe_multi_link_fill_link_info,
 };
 
 
 static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
        .release = bpf_uprobe_multi_link_release,
-       .dealloc = bpf_uprobe_multi_link_dealloc,
+       .dealloc_deferred = bpf_uprobe_multi_link_dealloc,
        .fill_link_info = bpf_uprobe_multi_link_fill_link_info,
 };