#include <linux/ftrace.h>
 
 #include <asm/cacheflush.h>
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
        return 0;
 }
 #endif                         /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+#define S_RA_SP        (0xafbf << 16)  /* s{d,w} ra, offset(sp) */
+#define S_R_SP (0xafb0 << 16)  /* s{d,w} R, offset(sp) */
+#define OFFSET_MASK    0xffff  /* stack offset range: 0 ~ PT_SIZE */
+
+unsigned long ftrace_get_parent_addr(unsigned long self_addr,
+                                    unsigned long parent,
+                                    unsigned long parent_addr,
+                                    unsigned long fp)
+{
+       unsigned long sp, ip, ra;
+       unsigned int code;
+
+       /* in module or kernel? */
+       if (self_addr & 0x40000000) {
+               /* module: move to the instruction "lui v1, HI_16BIT_OF_MCOUNT" */
+               ip = self_addr - 20;
+       } else {
+               /* kernel: move to the instruction "move ra, at" */
+               ip = self_addr - 12;
+       }
+
+       /* search the text until finding the non-store instruction or "s{d,w}
+        * ra, offset(sp)" instruction */
+       do {
+               ip -= 4;
+
+               /* get the code at "ip" */
+               code = *(unsigned int *)ip;
+
+               /* If we hit the non-store instruction before finding where the
+                * ra is stored, then this is a leaf function and it does not
+                * store the ra on the stack. */
+               if ((code & S_R_SP) != S_R_SP)
+                       return parent_addr;
+
+       } while (((code & S_RA_SP) != S_RA_SP));
+
+       sp = fp + (code & OFFSET_MASK);
+       ra = *(unsigned long *)sp;
+
+       if (ra == parent)
+               return sp;
+
+       return 0;
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info.
+ */
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+                          unsigned long fp)
+{
+       unsigned long old;
+       struct ftrace_graph_ent trace;
+       unsigned long return_hooker = (unsigned long)
+           &return_to_handler;
+
+       if (unlikely(atomic_read(¤t->tracing_graph_pause)))
+               return;
+
+       /* "parent" is the stack address saved the return address of the caller
+        * of _mcount, for a leaf function not save the return address in the
+        * stack address, so, we "emulate" one in _mcount's stack space, and
+        * hijack it directly, but for a non-leaf function, it will save the
+        * return address to the its stack space, so, we can not hijack the
+        * "parent" directly, but need to find the real stack address,
+        * ftrace_get_parent_addr() does it!
+        */
+
+       old = *parent;
+
+       parent = (unsigned long *)ftrace_get_parent_addr(self_addr, old,
+                                                        (unsigned long)parent,
+                                                        fp);
+
+       /* If fails when getting the stack address of the non-leaf function's
+        * ra, stop function graph tracer and return */
+       if (parent == 0) {
+               ftrace_graph_stop();
+               WARN_ON(1);
+               return;
+       }
+
+       *parent = return_hooker;
+
+       if (ftrace_push_return_trace(old, self_addr, &trace.depth, fp) ==
+           -EBUSY) {
+               *parent = old;
+               return;
+       }
+
+       trace.func = self_addr;
+
+       /* Only trace if the calling function expects to */
+       if (!ftrace_graph_entry(&trace)) {
+               current->curr_ret_stack--;
+               *parent = old;
+       }
+}
+#endif                         /* CONFIG_FUNCTION_GRAPH_TRACER */
 
        PTR_L   t1, ftrace_trace_function /* Prepare t1 for (1) */
        bne     t0, t1, static_trace
         nop
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+       PTR_L   t2, ftrace_graph_return
+       bne     t0, t2, ftrace_graph_caller
+        nop
+       PTR_LA  t0, ftrace_graph_entry_stub
+       PTR_L   t2, ftrace_graph_entry
+       bne     t0, t2, ftrace_graph_caller
+        nop
+#endif
        b       ftrace_stub
         nop
 
 
 #endif /* ! CONFIG_DYNAMIC_FTRACE */
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+NESTED(ftrace_graph_caller, PT_SIZE, ra)
+       MCOUNT_SAVE_REGS
+
+       PTR_LA  a0, PT_R1(sp)   /* arg1: &AT -> a0 */
+       move    a1, ra          /* arg2: next ip, selfaddr */
+       jal     prepare_ftrace_return
+        move   a2, fp          /* arg3: frame pointer */
+
+       MCOUNT_RESTORE_REGS
+       RETURN_BACK
+       END(ftrace_graph_caller)
+
+       .align  2
+       .globl  return_to_handler
+return_to_handler:
+       PTR_SUBU        sp, PT_SIZE
+       PTR_S   v0, PT_R2(sp)
+
+       jal     ftrace_return_to_handler
+        PTR_S  v1, PT_R3(sp)
+
+       /* restore the real parent address: v0 -> ra */
+       move    ra, v0
+
+       PTR_L   v0, PT_R2(sp)
+       PTR_L   v1, PT_R3(sp)
+       jr      ra
+        PTR_ADDIU      sp, PT_SIZE
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
        .set at
        .set reorder