#ifdef CONFIG_STACKTRACE
 
-#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
+/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
+#define FTRACE_KSTACK_NESTING  4
+
+#define FTRACE_KSTACK_ENTRIES  (PAGE_SIZE / FTRACE_KSTACK_NESTING)
+
 struct ftrace_stack {
-       unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
+       unsigned long           calls[FTRACE_KSTACK_ENTRIES];
+};
+
+
+struct ftrace_stacks {
+       struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
 };
 
-static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
+static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
 
 static void __ftrace_trace_stack(struct ring_buffer *buffer,
 {
        struct trace_event_call *call = &event_kernel_stack;
        struct ring_buffer_event *event;
+       struct ftrace_stack *fstack;
        struct stack_entry *entry;
        struct stack_trace trace;
-       int use_stack;
-       int size = FTRACE_STACK_ENTRIES;
+       int size = FTRACE_KSTACK_ENTRIES;
+       int stackidx;
 
        trace.nr_entries        = 0;
        trace.skip              = skip;
         */
        preempt_disable_notrace();
 
-       use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
+       stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
+
+       /* This should never happen. If it does, yell once and skip */
+       if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
+               goto out;
+
        /*
-        * We don't need any atomic variables, just a barrier.
-        * If an interrupt comes in, we don't care, because it would
-        * have exited and put the counter back to what we want.
-        * We just need a barrier to keep gcc from moving things
-        * around.
+        * The above __this_cpu_inc_return() is 'atomic' cpu local. An
+        * interrupt will either see the value pre increment or post
+        * increment. If the interrupt happens pre increment it will have
+        * restored the counter when it returns.  We just need a barrier to
+        * keep gcc from moving things around.
         */
        barrier();
-       if (use_stack == 1) {
-               trace.entries           = this_cpu_ptr(ftrace_stack.calls);
-               trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
 
-               if (regs)
-                       save_stack_trace_regs(regs, &trace);
-               else
-                       save_stack_trace(&trace);
+       fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
+       trace.entries           = fstack->calls;
+       trace.max_entries       = FTRACE_KSTACK_ENTRIES;
 
-               if (trace.nr_entries > size)
-                       size = trace.nr_entries;
-       } else
-               /* From now on, use_stack is a boolean */
-               use_stack = 0;
+       if (regs)
+               save_stack_trace_regs(regs, &trace);
+       else
+               save_stack_trace(&trace);
+
+       if (trace.nr_entries > size)
+               size = trace.nr_entries;
 
        size *= sizeof(unsigned long);
 
                goto out;
        entry = ring_buffer_event_data(event);
 
-       memset(&entry->caller, 0, size);
-
-       if (use_stack)
-               memcpy(&entry->caller, trace.entries,
-                      trace.nr_entries * sizeof(unsigned long));
-       else {
-               trace.max_entries       = FTRACE_STACK_ENTRIES;
-               trace.entries           = entry->caller;
-               if (regs)
-                       save_stack_trace_regs(regs, &trace);
-               else
-                       save_stack_trace(&trace);
-       }
+       memcpy(&entry->caller, trace.entries, size);
 
        entry->size = trace.nr_entries;