uint64_t ktime_seg10_min;
        uint64_t ktime_seg10_max;
 #endif
+
+       struct hlist_node cpuhp;        /* used for cpuhp per hba callback */
+       struct timer_list cpuhp_poll_timer;
+       struct list_head poll_list;     /* slowpath eq polling list */
+#define LPFC_POLL_HB   1               /* slowpath heartbeat */
+#define LPFC_POLL_FASTPATH     0       /* called from fastpath */
+#define LPFC_POLL_SLOWPATH     1       /* called from slowpath */
 };
 
 static inline struct Scsi_Host *
 
 irqreturn_t lpfc_sli4_intr_handler(int, void *);
 irqreturn_t lpfc_sli4_hba_intr_handler(int, void *);
 
+inline void lpfc_sli4_cleanup_poll_list(struct lpfc_hba *phba);
+int lpfc_sli4_poll_eq(struct lpfc_queue *q, uint8_t path);
+void lpfc_sli4_poll_hbtimer(struct timer_list *t);
+void lpfc_sli4_start_polling(struct lpfc_queue *q);
+void lpfc_sli4_stop_polling(struct lpfc_queue *q);
+
 void lpfc_read_rev(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_sli4_swap_str(struct lpfc_hba *, LPFC_MBOXQ_t *);
 void lpfc_config_ring(struct lpfc_hba *, int, LPFC_MBOXQ_t *);
 
 #include <linux/irq.h>
 #include <linux/bitops.h>
 #include <linux/crash_dump.h>
+#include <linux/cpuhotplug.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
 #include "lpfc_version.h"
 #include "lpfc_ids.h"
 
+static enum cpuhp_state lpfc_cpuhp_state;
 /* Used when mapping IRQ vectors in a driver centric manner */
 static uint32_t lpfc_present_cpu;
 
+static void __lpfc_cpuhp_remove(struct lpfc_hba *phba);
+static void lpfc_cpuhp_remove(struct lpfc_hba *phba);
+static void lpfc_cpuhp_add(struct lpfc_hba *phba);
 static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *);
 static int lpfc_post_rcv_buf(struct lpfc_hba *);
 static int lpfc_sli4_queue_verify(struct lpfc_hba *);
        if (phba->cfg_xri_rebalancing)
                lpfc_create_multixri_pools(phba);
 
+       lpfc_cpuhp_add(phba);
+
        lpfc_unblock_mgmt_io(phba);
        return 0;
 }
                        spin_unlock_irq(shost->host_lock);
                }
        lpfc_destroy_vport_work_array(phba, vports);
+       __lpfc_cpuhp_remove(phba);
 
        if (phba->cfg_xri_rebalancing)
                lpfc_destroy_multixri_pools(phba);
        }
        spin_unlock_irq(&phba->hbalock);
 
+       lpfc_sli4_cleanup_poll_list(phba);
+
        /* Release HBA eqs */
        if (phba->sli4_hba.hdwq)
                lpfc_sli4_release_hdwq(phba);
        return;
 }
 
+/**
+ * lpfc_cpuhp_get_eq
+ *
+ * @phba:   pointer to lpfc hba data structure.
+ * @cpu:    cpu going offline
+ * @eqlist:
+ */
+static void
+lpfc_cpuhp_get_eq(struct lpfc_hba *phba, unsigned int cpu,
+                 struct list_head *eqlist)
+{
+       struct lpfc_vector_map_info *map;
+       const struct cpumask *maskp;
+       struct lpfc_queue *eq;
+       unsigned int i;
+       cpumask_t tmp;
+       u16 idx;
+
+       for (idx = 0; idx < phba->cfg_irq_chann; idx++) {
+               maskp = pci_irq_get_affinity(phba->pcidev, idx);
+               if (!maskp)
+                       continue;
+               /*
+                * if irq is not affinitized to the cpu going
+                * then we don't need to poll the eq attached
+                * to it.
+                */
+               if (!cpumask_and(&tmp, maskp, cpumask_of(cpu)))
+                       continue;
+               /* get the cpus that are online and are affini-
+                * tized to this irq vector.  If the count is
+                * more than 1 then cpuhp is not going to shut-
+                * down this vector.  Since this cpu has not
+                * gone offline yet, we need >1.
+                */
+               cpumask_and(&tmp, maskp, cpu_online_mask);
+               if (cpumask_weight(&tmp) > 1)
+                       continue;
+
+               /* Now that we have an irq to shutdown, get the eq
+                * mapped to this irq.  Note: multiple hdwq's in
+                * the software can share an eq, but eventually
+                * only eq will be mapped to this vector
+                */
+               for_each_possible_cpu(i) {
+                       map = &phba->sli4_hba.cpu_map[i];
+                       if (!(map->irq == pci_irq_vector(phba->pcidev, idx)))
+                               continue;
+                       eq = phba->sli4_hba.hdwq[map->hdwq].hba_eq;
+                       list_add(&eq->_poll_list, eqlist);
+                       /* 1 is good enough. others will be a copy of this */
+                       break;
+               }
+       }
+}
+
+static void __lpfc_cpuhp_remove(struct lpfc_hba *phba)
+{
+       if (phba->sli_rev != LPFC_SLI_REV4)
+               return;
+
+       cpuhp_state_remove_instance_nocalls(lpfc_cpuhp_state,
+                                           &phba->cpuhp);
+       /*
+        * unregistering the instance doesn't stop the polling
+        * timer. Wait for the poll timer to retire.
+        */
+       synchronize_rcu();
+       del_timer_sync(&phba->cpuhp_poll_timer);
+}
+
+static void lpfc_cpuhp_remove(struct lpfc_hba *phba)
+{
+       if (phba->pport->fc_flag & FC_OFFLINE_MODE)
+               return;
+
+       __lpfc_cpuhp_remove(phba);
+}
+
+static void lpfc_cpuhp_add(struct lpfc_hba *phba)
+{
+       if (phba->sli_rev != LPFC_SLI_REV4)
+               return;
+
+       rcu_read_lock();
+
+       if (!list_empty(&phba->poll_list)) {
+               timer_setup(&phba->cpuhp_poll_timer, lpfc_sli4_poll_hbtimer, 0);
+               mod_timer(&phba->cpuhp_poll_timer,
+                         jiffies + msecs_to_jiffies(LPFC_POLL_HB));
+       }
+
+       rcu_read_unlock();
+
+       cpuhp_state_add_instance_nocalls(lpfc_cpuhp_state,
+                                        &phba->cpuhp);
+}
+
+static int __lpfc_cpuhp_checks(struct lpfc_hba *phba, int *retval)
+{
+       if (phba->pport->load_flag & FC_UNLOADING) {
+               *retval = -EAGAIN;
+               return true;
+       }
+
+       if (phba->sli_rev != LPFC_SLI_REV4) {
+               *retval = 0;
+               return true;
+       }
+
+       /* proceed with the hotplug */
+       return false;
+}
+
+static int lpfc_cpu_offline(unsigned int cpu, struct hlist_node *node)
+{
+       struct lpfc_hba *phba = hlist_entry_safe(node, struct lpfc_hba, cpuhp);
+       struct lpfc_queue *eq, *next;
+       LIST_HEAD(eqlist);
+       int retval;
+
+       if (!phba) {
+               WARN_ONCE(!phba, "cpu: %u. phba:NULL", raw_smp_processor_id());
+               return 0;
+       }
+
+       if (__lpfc_cpuhp_checks(phba, &retval))
+               return retval;
+
+       lpfc_cpuhp_get_eq(phba, cpu, &eqlist);
+
+       /* start polling on these eq's */
+       list_for_each_entry_safe(eq, next, &eqlist, _poll_list) {
+               list_del_init(&eq->_poll_list);
+               lpfc_sli4_start_polling(eq);
+       }
+
+       return 0;
+}
+
+static int lpfc_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+       struct lpfc_hba *phba = hlist_entry_safe(node, struct lpfc_hba, cpuhp);
+       struct lpfc_queue *eq, *next;
+       unsigned int n;
+       int retval;
+
+       if (!phba) {
+               WARN_ONCE(!phba, "cpu: %u. phba:NULL", raw_smp_processor_id());
+               return 0;
+       }
+
+       if (__lpfc_cpuhp_checks(phba, &retval))
+               return retval;
+
+       list_for_each_entry_safe(eq, next, &phba->poll_list, _poll_list) {
+               n = lpfc_find_cpu_handle(phba, eq->hdwq, LPFC_FIND_BY_HDWQ);
+               if (n == cpu)
+                       lpfc_sli4_stop_polling(eq);
+       }
+
+       return 0;
+}
+
 /**
  * lpfc_sli4_enable_msix - Enable MSI-X interrupt mode to SLI-4 device
  * @phba: pointer to lpfc hba data structure.
        /* Wait for completion of device XRI exchange busy */
        lpfc_sli4_xri_exchange_busy_wait(phba);
 
+       /* per-phba callback de-registration for hotplug event */
+       lpfc_cpuhp_remove(phba);
+
        /* Disable PCI subsystem interrupt */
        lpfc_sli4_disable_intr(phba);
 
        /* Enable RAS FW log support */
        lpfc_sli4_ras_setup(phba);
 
+       INIT_LIST_HEAD(&phba->poll_list);
+       cpuhp_state_add_instance_nocalls(lpfc_cpuhp_state, &phba->cpuhp);
+
        return 0;
 
 out_free_sysfs_attr:
        /* Initialize in case vector mapping is needed */
        lpfc_present_cpu = num_present_cpus();
 
+       error = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+                                       "lpfc/sli4:online",
+                                       lpfc_cpu_online, lpfc_cpu_offline);
+       if (error < 0)
+               goto cpuhp_failure;
+       lpfc_cpuhp_state = error;
+
        error = pci_register_driver(&lpfc_driver);
-       if (error) {
-               fc_release_transport(lpfc_transport_template);
-               fc_release_transport(lpfc_vport_transport_template);
-       }
+       if (error)
+               goto unwind;
+
+       return error;
+
+unwind:
+       cpuhp_remove_multi_state(lpfc_cpuhp_state);
+cpuhp_failure:
+       fc_release_transport(lpfc_transport_template);
+       fc_release_transport(lpfc_vport_transport_template);
 
        return error;
 }
 {
        misc_deregister(&lpfc_mgmt_dev);
        pci_unregister_driver(&lpfc_driver);
+       cpuhp_remove_multi_state(lpfc_cpuhp_state);
        fc_release_transport(lpfc_transport_template);
        fc_release_transport(lpfc_vport_transport_template);
        idr_destroy(&lpfc_hba_index);
 
 }
 
 static int
-lpfc_sli4_process_eq(struct lpfc_hba *phba, struct lpfc_queue *eq)
+lpfc_sli4_process_eq(struct lpfc_hba *phba, struct lpfc_queue *eq,
+                    uint8_t rearm)
 {
        struct lpfc_eqe *eqe;
        int count = 0, consumed = 0;
        eq->queue_claimed = 0;
 
 rearm_and_exit:
-       /* Always clear and re-arm the EQ */
-       phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed, LPFC_QUEUE_REARM);
+       /* Always clear the EQ. */
+       phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed, rearm);
 
        return count;
 }
 
        if (mbox_pending)
                /* process and rearm the EQ */
-               lpfc_sli4_process_eq(phba, fpeq);
+               lpfc_sli4_process_eq(phba, fpeq, LPFC_QUEUE_REARM);
        else
                /* Always clear and re-arm the EQ */
                sli4_hba->sli4_write_eq_db(phba, fpeq, 0, LPFC_QUEUE_REARM);
                    struct lpfc_iocbq *piocb, uint32_t flag)
 {
        struct lpfc_sli_ring *pring;
+       struct lpfc_queue *eq;
        unsigned long iflags;
        int rc;
 
        if (phba->sli_rev == LPFC_SLI_REV4) {
+               eq = phba->sli4_hba.hdwq[piocb->hba_wqidx].hba_eq;
+
                pring = lpfc_sli4_calc_ring(phba, piocb);
                if (unlikely(pring == NULL))
                        return IOCB_ERROR;
                spin_lock_irqsave(&pring->ring_lock, iflags);
                rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
                spin_unlock_irqrestore(&pring->ring_lock, iflags);
+
+               lpfc_sli4_poll_eq(eq, LPFC_POLL_FASTPATH);
        } else {
                /* For now, SLI2/3 will still use hbalock */
                spin_lock_irqsave(&phba->hbalock, iflags);
                lpfc_sli4_mod_hba_eq_delay(phba, fpeq, LPFC_MAX_AUTO_EQ_DELAY);
 
        /* process and rearm the EQ */
-       ecount = lpfc_sli4_process_eq(phba, fpeq);
+       ecount = lpfc_sli4_process_eq(phba, fpeq, LPFC_QUEUE_REARM);
 
        if (unlikely(ecount == 0)) {
                fpeq->EQ_no_entry++;
        return (hba_handled == true) ? IRQ_HANDLED : IRQ_NONE;
 } /* lpfc_sli4_intr_handler */
 
+void lpfc_sli4_poll_hbtimer(struct timer_list *t)
+{
+       struct lpfc_hba *phba = from_timer(phba, t, cpuhp_poll_timer);
+       struct lpfc_queue *eq;
+       int i = 0;
+
+       rcu_read_lock();
+
+       list_for_each_entry_rcu(eq, &phba->poll_list, _poll_list)
+               i += lpfc_sli4_poll_eq(eq, LPFC_POLL_SLOWPATH);
+       if (!list_empty(&phba->poll_list))
+               mod_timer(&phba->cpuhp_poll_timer,
+                         jiffies + msecs_to_jiffies(LPFC_POLL_HB));
+
+       rcu_read_unlock();
+}
+
+inline int lpfc_sli4_poll_eq(struct lpfc_queue *eq, uint8_t path)
+{
+       struct lpfc_hba *phba = eq->phba;
+       int i = 0;
+
+       /*
+        * Unlocking an irq is one of the entry point to check
+        * for re-schedule, but we are good for io submission
+        * path as midlayer does a get_cpu to glue us in. Flush
+        * out the invalidate queue so we can see the updated
+        * value for flag.
+        */
+       smp_rmb();
+
+       if (READ_ONCE(eq->mode) == LPFC_EQ_POLL)
+               /* We will not likely get the completion for the caller
+                * during this iteration but i guess that's fine.
+                * Future io's coming on this eq should be able to
+                * pick it up.  As for the case of single io's, they
+                * will be handled through a sched from polling timer
+                * function which is currently triggered every 1msec.
+                */
+               i = lpfc_sli4_process_eq(phba, eq, LPFC_QUEUE_NOARM);
+
+       return i;
+}
+
+static inline void lpfc_sli4_add_to_poll_list(struct lpfc_queue *eq)
+{
+       struct lpfc_hba *phba = eq->phba;
+
+       if (list_empty(&phba->poll_list)) {
+               timer_setup(&phba->cpuhp_poll_timer, lpfc_sli4_poll_hbtimer, 0);
+               /* kickstart slowpath processing for this eq */
+               mod_timer(&phba->cpuhp_poll_timer,
+                         jiffies + msecs_to_jiffies(LPFC_POLL_HB));
+       }
+
+       list_add_rcu(&eq->_poll_list, &phba->poll_list);
+       synchronize_rcu();
+}
+
+static inline void lpfc_sli4_remove_from_poll_list(struct lpfc_queue *eq)
+{
+       struct lpfc_hba *phba = eq->phba;
+
+       /* Disable slowpath processing for this eq.  Kick start the eq
+        * by RE-ARMING the eq's ASAP
+        */
+       list_del_rcu(&eq->_poll_list);
+       synchronize_rcu();
+
+       if (list_empty(&phba->poll_list))
+               del_timer_sync(&phba->cpuhp_poll_timer);
+}
+
+inline void lpfc_sli4_cleanup_poll_list(struct lpfc_hba *phba)
+{
+       struct lpfc_queue *eq, *next;
+
+       list_for_each_entry_safe(eq, next, &phba->poll_list, _poll_list)
+               list_del(&eq->_poll_list);
+
+       INIT_LIST_HEAD(&phba->poll_list);
+       synchronize_rcu();
+}
+
+static inline void
+__lpfc_sli4_switch_eqmode(struct lpfc_queue *eq, uint8_t mode)
+{
+       if (mode == eq->mode)
+               return;
+       /*
+        * currently this function is only called during a hotplug
+        * event and the cpu on which this function is executing
+        * is going offline.  By now the hotplug has instructed
+        * the scheduler to remove this cpu from cpu active mask.
+        * So we don't need to work about being put aside by the
+        * scheduler for a high priority process.  Yes, the inte-
+        * rrupts could come but they are known to retire ASAP.
+        */
+
+       /* Disable polling in the fastpath */
+       WRITE_ONCE(eq->mode, mode);
+       /* flush out the store buffer */
+       smp_wmb();
+
+       /*
+        * Add this eq to the polling list and start polling. For
+        * a grace period both interrupt handler and poller will
+        * try to process the eq _but_ that's fine.  We have a
+        * synchronization mechanism in place (queue_claimed) to
+        * deal with it.  This is just a draining phase for int-
+        * errupt handler (not eq's) as we have guranteed through
+        * barrier that all the CPUs have seen the new CQ_POLLED
+        * state. which will effectively disable the REARMING of
+        * the EQ.  The whole idea is eq's die off eventually as
+        * we are not rearming EQ's anymore.
+        */
+       mode ? lpfc_sli4_add_to_poll_list(eq) :
+              lpfc_sli4_remove_from_poll_list(eq);
+}
+
+void lpfc_sli4_start_polling(struct lpfc_queue *eq)
+{
+       __lpfc_sli4_switch_eqmode(eq, LPFC_EQ_POLL);
+}
+
+void lpfc_sli4_stop_polling(struct lpfc_queue *eq)
+{
+       struct lpfc_hba *phba = eq->phba;
+
+       __lpfc_sli4_switch_eqmode(eq, LPFC_EQ_INTERRUPT);
+
+       /* Kick start for the pending io's in h/w.
+        * Once we switch back to interrupt processing on a eq
+        * the io path completion will only arm eq's when it
+        * receives a completion.  But since eq's are in disa-
+        * rmed state it doesn't receive a completion.  This
+        * creates a deadlock scenaro.
+        */
+       phba->sli4_hba.sli4_write_eq_db(phba, eq, 0, LPFC_QUEUE_REARM);
+}
+
 /**
  * lpfc_sli4_queue_free - free a queue structure and associated memory
  * @queue: The queue structure to free.
                return NULL;
 
        INIT_LIST_HEAD(&queue->list);
+       INIT_LIST_HEAD(&queue->_poll_list);
        INIT_LIST_HEAD(&queue->wq_list);
        INIT_LIST_HEAD(&queue->wqfull_list);
        INIT_LIST_HEAD(&queue->page_list);
 
                lpfc_sli_ringtxcmpl_put(phba, pring, pwqe);
                spin_unlock_irqrestore(&pring->ring_lock, iflags);
+
+               lpfc_sli4_poll_eq(qp->hba_eq, LPFC_POLL_FASTPATH);
                return 0;
        }
 
                }
                lpfc_sli_ringtxcmpl_put(phba, pring, pwqe);
                spin_unlock_irqrestore(&pring->ring_lock, iflags);
+
+               lpfc_sli4_poll_eq(qp->hba_eq, LPFC_POLL_FASTPATH);
                return 0;
        }
 
                }
                lpfc_sli_ringtxcmpl_put(phba, pring, pwqe);
                spin_unlock_irqrestore(&pring->ring_lock, iflags);
+
+               lpfc_sli4_poll_eq(qp->hba_eq, LPFC_POLL_FASTPATH);
                return 0;
        }
        return WQE_ERROR;
 
 struct lpfc_queue {
        struct list_head list;
        struct list_head wq_list;
+
+       /*
+        * If interrupts are in effect on _all_ the eq's the footprint
+        * of polling code is zero (except mode). This memory is chec-
+        * ked for every io to see if the io needs to be polled and
+        * while completion to check if the eq's needs to be rearmed.
+        * Keep in same cacheline as the queue ptr to avoid cpu fetch
+        * stalls. Using 1B memory will leave us with 7B hole. Fill
+        * it with other frequently used members.
+        */
+       uint16_t last_cpu;      /* most recent cpu */
+       uint16_t hdwq;
+       uint8_t  qe_valid;
+       uint8_t  mode;  /* interrupt or polling */
+#define LPFC_EQ_INTERRUPT      0
+#define LPFC_EQ_POLL           1
+
        struct list_head wqfull_list;
        enum lpfc_sli4_queue_type type;
        enum lpfc_sli4_queue_subtype subtype;
        struct delayed_work     sched_spwork;
 
        uint64_t isr_timestamp;
-       uint16_t hdwq;
-       uint16_t last_cpu;      /* most recent cpu */
-       uint8_t qe_valid;
        struct lpfc_queue *assoc_qp;
+       struct list_head _poll_list;
        void **q_pgs;   /* array to index entries per page */
 };