This option will slow down process creation somewhat.
 
+config HCALL_STATS
+       bool "Hypervisor call instrumentation"
+       depends on PPC_PSERIES && DEBUG_FS
+       help
+         Adds code to keep track of the number of hypervisor calls made and
+         the amount of time spent in hypervisor callsr.  Wall time spent in
+         each call is always calculated, and if available CPU cycles spent
+         are also calculated.  A directory named hcall_inst is added at the
+         root of the debugfs filesystem.  Within the hcall_inst directory
+         are files that contain CPU specific call statistics.
+
+         This option will add a small amount of overhead to all hypervisor
+         calls.
+
 config DEBUGGER
        bool "Enable debugger hooks"
        depends on DEBUG_KERNEL
 
        DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
        DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
        DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
+       DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset));
 
        DEFINE(SLBSHADOW_STACKVSID,
               offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
        /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
        DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
        DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
+
+       /* hcall statistics */
+       DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats));
+       DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls));
+       DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total));
+       DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total));
 #endif /* CONFIG_PPC64 */
        DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
        DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
 
 
 obj-$(CONFIG_HVC_CONSOLE)      += hvconsole.o
 obj-$(CONFIG_HVCS)             += hvcserver.o
+obj-$(CONFIG_HCALL_STATS)      += hvCall_inst.o
 
 #include <asm/hvcall.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
        
 #define STK_PARM(i)     (48 + ((i)-3)*8)
 
+#ifdef CONFIG_HCALL_STATS
+/*
+ * precall must preserve all registers.  use unused STK_PARM()
+ * areas to save snapshots and opcode.
+ */
+#define HCALL_INST_PRECALL                                     \
+       std     r3,STK_PARM(r3)(r1);    /* save opcode */       \
+       mftb    r0;                     /* get timebase and */  \
+       std     r0,STK_PARM(r5)(r1);    /* save for later */    \
+BEGIN_FTR_SECTION;                                             \
+       mfspr   r0,SPRN_PURR;           /* get PURR and */      \
+       std     r0,STK_PARM(r6)(r1);    /* save for later */    \
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);
+       
+/*
+ * postcall is performed immediately before function return which
+ * allows liberal use of volatile registers.
+ */
+#define HCALL_INST_POSTCALL                                    \
+       ld      r4,STK_PARM(r3)(r1);    /* validate opcode */   \
+       cmpldi  cr7,r4,MAX_HCALL_OPCODE;                        \
+       bgt-    cr7,1f;                                         \
+                                                               \
+       /* get time and PURR snapshots after hcall */           \
+       mftb    r7;                     /* timebase after */    \
+BEGIN_FTR_SECTION;                                             \
+       mfspr   r8,SPRN_PURR;           /* PURR after */        \
+       ld      r6,STK_PARM(r6)(r1);    /* PURR before */       \
+       subf    r6,r6,r8;               /* delta */             \
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);                           \
+       ld      r5,STK_PARM(r5)(r1);    /* timebase before */   \
+       subf    r5,r5,r7;               /* time delta */        \
+                                                               \
+       /* calculate address of stat structure r4 = opcode */   \
+       srdi    r4,r4,2;                /* index into array */  \
+       mulli   r4,r4,HCALL_STAT_SIZE;                          \
+       LOAD_REG_ADDR(r7, per_cpu__hcall_stats);                \
+       add     r4,r4,r7;                                       \
+       ld      r7,PACA_DATA_OFFSET(r13); /* per cpu offset */  \
+       add     r4,r4,r7;                                       \
+                                                               \
+       /* update stats */                                      \
+       ld      r7,HCALL_STAT_CALLS(r4); /* count */            \
+       addi    r7,r7,1;                                        \
+       std     r7,HCALL_STAT_CALLS(r4);                        \
+       ld      r7,HCALL_STAT_TB(r4);   /* timebase */          \
+       add     r7,r7,r5;                                       \
+       std     r7,HCALL_STAT_TB(r4);                           \
+BEGIN_FTR_SECTION;                                             \
+       ld      r7,HCALL_STAT_PURR(r4); /* PURR */              \
+       add     r7,r7,r6;                                       \
+       std     r7,HCALL_STAT_PURR(r4);                         \
+END_FTR_SECTION_IFCLR(CPU_FTR_PURR);                           \
+1:
+#else
+#define HCALL_INST_PRECALL
+#define HCALL_INST_POSTCALL
+#endif
+
        .text
 
 _GLOBAL(plpar_hcall_norets)
        mfcr    r0
        stw     r0,8(r1)
 
+       HCALL_INST_PRECALL
+
        HVSC                            /* invoke the hypervisor */
 
+       HCALL_INST_POSTCALL
+
        lwz     r0,8(r1)
        mtcrf   0xff,r0
        blr                             /* return r3 = status */
        mfcr    r0
        stw     r0,8(r1)
 
+       HCALL_INST_PRECALL
+
        std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
        mr      r4,r5
        std     r6, 16(r12)
        std     r7, 24(r12)
 
+       HCALL_INST_POSTCALL
+
        lwz     r0,8(r1)
        mtcrf   0xff,r0
 
        mfcr    r0
        stw     r0,8(r1)
 
+       HCALL_INST_PRECALL
+
        std     r4,STK_PARM(r4)(r1)     /* Save ret buffer */
 
        mr      r4,r5
        std     r11,56(r12)
        std     r12,64(r12)
 
+       HCALL_INST_POSTCALL
+
        lwz     r0,8(r1)
        mtcrf   0xff,r0
 
 
--- /dev/null
+/*
+ * Copyright (C) 2006 Mike Kravetz IBM Corporation
+ *
+ * Hypervisor Call Instrumentation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+#include <asm/cputable.h>
+
+DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
+
+/*
+ * Routines for displaying the statistics in debugfs
+ */
+static void *hc_start(struct seq_file *m, loff_t *pos)
+{
+       if ((int)*pos < HCALL_STAT_ARRAY_SIZE)
+               return (void *)(unsigned long)(*pos + 1);
+
+       return NULL;
+}
+
+static void *hc_next(struct seq_file *m, void *p, loff_t * pos)
+{
+       ++*pos;
+
+       return hc_start(m, pos);
+}
+
+static void hc_stop(struct seq_file *m, void *p)
+{
+}
+
+static int hc_show(struct seq_file *m, void *p)
+{
+       unsigned long h_num = (unsigned long)p;
+       struct hcall_stats *hs = (struct hcall_stats *)m->private;
+
+       if (hs[h_num].num_calls) {
+               if (!cpu_has_feature(CPU_FTR_PURR))
+                       seq_printf(m, "%lu %lu %lu %lu\n", h_num<<2,
+                                  hs[h_num].num_calls,
+                                  hs[h_num].tb_total,
+                                  hs[h_num].purr_total);
+               else
+                       seq_printf(m, "%lu %lu %lu\n", h_num<<2,
+                                  hs[h_num].num_calls,
+                                  hs[h_num].tb_total);
+       }
+
+       return 0;
+}
+
+static struct seq_operations hcall_inst_seq_ops = {
+        .start = hc_start,
+        .next  = hc_next,
+        .stop  = hc_stop,
+        .show  = hc_show
+};
+
+static int hcall_inst_seq_open(struct inode *inode, struct file *file)
+{
+       int rc;
+       struct seq_file *seq;
+
+       rc = seq_open(file, &hcall_inst_seq_ops);
+       seq = file->private_data;
+       seq->private = file->f_dentry->d_inode->u.generic_ip;
+
+       return rc;
+}
+
+static struct file_operations hcall_inst_seq_fops = {
+       .open = hcall_inst_seq_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = seq_release,
+};
+
+#define        HCALL_ROOT_DIR          "hcall_inst"
+#define CPU_NAME_BUF_SIZE      32
+
+static int __init hcall_inst_init(void)
+{
+       struct dentry *hcall_root;
+       struct dentry *hcall_file;
+       char cpu_name_buf[CPU_NAME_BUF_SIZE];
+       int cpu;
+
+       if (!firmware_has_feature(FW_FEATURE_LPAR))
+               return 0;
+
+       hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
+       if (!hcall_root)
+               return -ENOMEM;
+
+       for_each_possible_cpu(cpu) {
+               snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
+               hcall_file = debugfs_create_file(cpu_name_buf, S_IRUGO,
+                                                hcall_root,
+                                                per_cpu(hcall_stats, cpu),
+                                                &hcall_inst_seq_fops);
+               if (!hcall_file)
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
+__initcall(hcall_inst_init);
 
 #define H_JOIN                 0x298
 #define H_VASI_STATE            0x2A4
 #define H_ENABLE_CRQ           0x2B0
-#define MAX_HCALL_OPCODES      (H_ENABLE_CRQ >> 2)
+#define MAX_HCALL_OPCODE       H_ENABLE_CRQ
 
 #ifndef __ASSEMBLY__
 
 #define PLPAR_HCALL9_BUFSIZE 9
 long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...);
 
+/* For hcall instrumentation.  One structure per-hcall, per-CPU */
+struct hcall_stats {
+       unsigned long   num_calls;      /* number of calls (on this CPU) */
+       unsigned long   tb_total;       /* total wall time (mftb) of calls. */
+       unsigned long   purr_total;     /* total cpu time (PURR) of calls. */
+};
+void update_hcall_stats(unsigned long opcode, unsigned long tb_delta,
+                       unsigned long purr_delta);
+#define HCALL_STAT_ARRAY_SIZE  ((MAX_HCALL_OPCODE >> 2) + 1)
+
 #endif /* __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_HVCALL_H */