int perf_trace_add(struct perf_event *p_event, int flags)
 {
        struct trace_event_call *tp_event = p_event->tp_event;
-       struct hlist_head __percpu *pcpu_list;
-       struct hlist_head *list;
-
-       pcpu_list = tp_event->perf_events;
-       if (WARN_ON_ONCE(!pcpu_list))
-               return -EINVAL;
 
        if (!(flags & PERF_EF_START))
                p_event->hw.state = PERF_HES_STOPPED;
 
-       list = this_cpu_ptr(pcpu_list);
-       hlist_add_head_rcu(&p_event->hlist_entry, list);
+       /*
+        * If TRACE_REG_PERF_ADD returns false; no custom action was performed
+        * and we need to take the default action of enqueueing our event on
+        * the right per-cpu hlist.
+        */
+       if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event)) {
+               struct hlist_head __percpu *pcpu_list;
+               struct hlist_head *list;
+
+               pcpu_list = tp_event->perf_events;
+               if (WARN_ON_ONCE(!pcpu_list))
+                       return -EINVAL;
+
+               list = this_cpu_ptr(pcpu_list);
+               hlist_add_head_rcu(&p_event->hlist_entry, list);
+       }
 
-       return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event);
+       return 0;
 }
 
 void perf_trace_del(struct perf_event *p_event, int flags)
 {
        struct trace_event_call *tp_event = p_event->tp_event;
-       hlist_del_rcu(&p_event->hlist_entry);
-       tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
+
+       /*
+        * If TRACE_REG_PERF_DEL returns false; no custom action was performed
+        * and we need to take the default action of dequeueing our event from
+        * the right per-cpu hlist.
+        */
+       if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event))
+               hlist_del_rcu(&p_event->hlist_entry);
 }
 
 void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp)
                          struct ftrace_ops *ops, struct pt_regs *pt_regs)
 {
        struct ftrace_entry *entry;
-       struct hlist_head *head;
+       struct perf_event *event;
+       struct hlist_head head;
        struct pt_regs regs;
        int rctx;
 
-       head = this_cpu_ptr(event_function.perf_events);
-       if (hlist_empty(head))
+       if ((unsigned long)ops->private != smp_processor_id())
                return;
 
+       event = container_of(ops, struct perf_event, ftrace_ops);
+
+       /*
+        * @event->hlist entry is NULL (per INIT_HLIST_NODE), and all
+        * the perf code does is hlist_for_each_entry_rcu(), so we can
+        * get away with simply setting the @head.first pointer in order
+        * to create a singular list.
+        */
+       head.first = &event->hlist_entry;
+
 #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
                    sizeof(u64)) - sizeof(u32))
 
        entry->ip = ip;
        entry->parent_ip = parent_ip;
        perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN,
-                             1, ®s, head, NULL);
+                             1, ®s, &head, NULL);
 
 #undef ENTRY_SIZE
 }
 {
        struct ftrace_ops *ops = &event->ftrace_ops;
 
-       ops->flags |= FTRACE_OPS_FL_PER_CPU | FTRACE_OPS_FL_RCU;
-       ops->func = perf_ftrace_function_call;
+       ops->flags   |= FTRACE_OPS_FL_RCU;
+       ops->func    = perf_ftrace_function_call;
+       ops->private = (void *)(unsigned long)nr_cpu_ids;
+
        return register_ftrace_function(ops);
 }
 
        return ret;
 }
 
-static void perf_ftrace_function_enable(struct perf_event *event)
-{
-       ftrace_function_local_enable(&event->ftrace_ops);
-}
-
-static void perf_ftrace_function_disable(struct perf_event *event)
-{
-       ftrace_function_local_disable(&event->ftrace_ops);
-}
-
 int perf_ftrace_event_register(struct trace_event_call *call,
                               enum trace_reg type, void *data)
 {
+       struct perf_event *event = data;
+
        switch (type) {
        case TRACE_REG_REGISTER:
        case TRACE_REG_UNREGISTER:
        case TRACE_REG_PERF_CLOSE:
                return perf_ftrace_function_unregister(data);
        case TRACE_REG_PERF_ADD:
-               perf_ftrace_function_enable(data);
-               return 0;
+               event->ftrace_ops.private = (void *)(unsigned long)smp_processor_id();
+               return 1;
        case TRACE_REG_PERF_DEL:
-               perf_ftrace_function_disable(data);
-               return 0;
+               event->ftrace_ops.private = (void *)(unsigned long)nr_cpu_ids;
+               return 1;
        }
 
        return -EINVAL;