enum ras_state state; /* RAS logging running state */
};
+enum lpfc_irq_chann_mode {
+ /* Assign IRQs to all possible cpus that have hardware queues */
+ NORMAL_MODE,
+
+ /* Assign IRQs only to cpus on the same numa node as HBA */
+ NUMA_MODE,
+
+ /* Assign IRQs only on non-hyperthreaded CPUs. This is the
+ * same as normal_mode, but assign IRQS only on physical CPUs.
+ */
+ NHT_MODE,
+};
+
struct lpfc_hba {
/* SCSI interface function jump table entries */
struct lpfc_io_buf * (*lpfc_get_scsi_buf)
uint32_t cfg_fcp_mq_threshold;
uint32_t cfg_hdw_queue;
uint32_t cfg_irq_chann;
- uint32_t cfg_irq_numa;
uint32_t cfg_suppress_rsp;
uint32_t cfg_nvme_oas;
uint32_t cfg_nvme_embed_cmd;
mempool_t *active_rrq_pool;
struct fc_host_statistics link_stats;
+ enum lpfc_irq_chann_mode irq_chann_mode;
enum intr_type_t intr_type;
uint32_t intr_mode;
#define LPFC_INTR_ERROR 0xFFFFFFFF
}
/**
- * lpfc_next_online_numa_cpu - Finds next online CPU on NUMA node
- * @numa_mask: Pointer to phba's numa_mask member.
+ * lpfc_next_online_cpu - Finds next online CPU on cpumask
+ * @mask: Pointer to phba's cpumask member.
* @start: starting cpu index
*
* Note: If no valid cpu found, then nr_cpu_ids is returned.
*
**/
static inline unsigned int
-lpfc_next_online_numa_cpu(const struct cpumask *numa_mask, unsigned int start)
+lpfc_next_online_cpu(const struct cpumask *mask, unsigned int start)
{
unsigned int cpu_it;
- for_each_cpu_wrap(cpu_it, numa_mask, start) {
+ for_each_cpu_wrap(cpu_it, mask, start) {
if (cpu_online(cpu_it))
break;
}
LPFC_HBA_HDWQ_MIN, LPFC_HBA_HDWQ_MAX,
"Set the number of I/O Hardware Queues");
-static inline void
-lpfc_assign_default_irq_numa(struct lpfc_hba *phba)
+#if IS_ENABLED(CONFIG_X86)
+/**
+ * lpfc_cpumask_irq_mode_init - initalizes cpumask of phba based on
+ * irq_chann_mode
+ * @phba: Pointer to HBA context object.
+ **/
+static void
+lpfc_cpumask_irq_mode_init(struct lpfc_hba *phba)
+{
+ unsigned int cpu, first_cpu, numa_node = NUMA_NO_NODE;
+ const struct cpumask *sibling_mask;
+ struct cpumask *aff_mask = &phba->sli4_hba.irq_aff_mask;
+
+ cpumask_clear(aff_mask);
+
+ if (phba->irq_chann_mode == NUMA_MODE) {
+ /* Check if we're a NUMA architecture */
+ numa_node = dev_to_node(&phba->pcidev->dev);
+ if (numa_node == NUMA_NO_NODE) {
+ phba->irq_chann_mode = NORMAL_MODE;
+ return;
+ }
+ }
+
+ for_each_possible_cpu(cpu) {
+ switch (phba->irq_chann_mode) {
+ case NUMA_MODE:
+ if (cpu_to_node(cpu) == numa_node)
+ cpumask_set_cpu(cpu, aff_mask);
+ break;
+ case NHT_MODE:
+ sibling_mask = topology_sibling_cpumask(cpu);
+ first_cpu = cpumask_first(sibling_mask);
+ if (first_cpu < nr_cpu_ids)
+ cpumask_set_cpu(first_cpu, aff_mask);
+ break;
+ default:
+ break;
+ }
+ }
+}
+#endif
+
+static void
+lpfc_assign_default_irq_chann(struct lpfc_hba *phba)
{
#if IS_ENABLED(CONFIG_X86)
- /* If AMD architecture, then default is LPFC_IRQ_CHANN_NUMA */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- phba->cfg_irq_numa = 1;
- else
- phba->cfg_irq_numa = 0;
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ /* If AMD architecture, then default is NUMA_MODE */
+ phba->irq_chann_mode = NUMA_MODE;
+ break;
+ case X86_VENDOR_INTEL:
+ /* If Intel architecture, then default is no hyperthread mode */
+ phba->irq_chann_mode = NHT_MODE;
+ break;
+ default:
+ phba->irq_chann_mode = NORMAL_MODE;
+ break;
+ }
+ lpfc_cpumask_irq_mode_init(phba);
#else
- phba->cfg_irq_numa = 0;
+ phba->irq_chann_mode = NORMAL_MODE;
#endif
}
*
* 0 = Configure number of IRQ Channels to:
* if AMD architecture, number of CPUs on HBA's NUMA node
+ * if Intel architecture, number of physical CPUs.
* otherwise, number of active CPUs.
* [1,256] = Manually specify how many IRQ Channels to use.
*
static int
lpfc_irq_chann_init(struct lpfc_hba *phba, uint32_t val)
{
- const struct cpumask *numa_mask;
+ const struct cpumask *aff_mask;
if (phba->cfg_use_msi != 2) {
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"8532 use_msi = %u ignoring cfg_irq_numa\n",
phba->cfg_use_msi);
- phba->cfg_irq_numa = 0;
- phba->cfg_irq_chann = LPFC_IRQ_CHANN_MIN;
+ phba->irq_chann_mode = NORMAL_MODE;
+ phba->cfg_irq_chann = LPFC_IRQ_CHANN_DEF;
return 0;
}
/* Check if default setting was passed */
if (val == LPFC_IRQ_CHANN_DEF)
- lpfc_assign_default_irq_numa(phba);
+ lpfc_assign_default_irq_chann(phba);
- if (phba->cfg_irq_numa) {
- numa_mask = &phba->sli4_hba.numa_mask;
+ if (phba->irq_chann_mode != NORMAL_MODE) {
+ aff_mask = &phba->sli4_hba.irq_aff_mask;
- if (cpumask_empty(numa_mask)) {
+ if (cpumask_empty(aff_mask)) {
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
- "8533 Could not identify NUMA node, "
- "ignoring cfg_irq_numa\n");
- phba->cfg_irq_numa = 0;
- phba->cfg_irq_chann = LPFC_IRQ_CHANN_MIN;
+ "8533 Could not identify CPUS for "
+ "mode %d, ignoring\n",
+ phba->irq_chann_mode);
+ phba->irq_chann_mode = NORMAL_MODE;
+ phba->cfg_irq_chann = LPFC_IRQ_CHANN_DEF;
} else {
- phba->cfg_irq_chann = cpumask_weight(numa_mask);
+ phba->cfg_irq_chann = cpumask_weight(aff_mask);
+
+ /* If no hyperthread mode, then set hdwq count to
+ * aff_mask weight as well
+ */
+ if (phba->irq_chann_mode == NHT_MODE)
+ phba->cfg_hdw_queue = phba->cfg_irq_chann;
+
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"8543 lpfc_irq_chann set to %u "
- "(numa)\n", phba->cfg_irq_chann);
+ "(mode: %d)\n", phba->cfg_irq_chann,
+ phba->irq_chann_mode);
}
} else {
if (val > LPFC_IRQ_CHANN_MAX) {
val,
LPFC_IRQ_CHANN_MIN,
LPFC_IRQ_CHANN_MAX);
- phba->cfg_irq_chann = LPFC_IRQ_CHANN_MIN;
+ phba->cfg_irq_chann = LPFC_IRQ_CHANN_DEF;
return -EINVAL;
}
phba->cfg_irq_chann = val;
return;
}
-/**
- * lpfc_cpumask_of_node_init - initalizes cpumask of phba's NUMA node
- * @phba: Pointer to HBA context object.
- *
- **/
-static void
-lpfc_cpumask_of_node_init(struct lpfc_hba *phba)
-{
- unsigned int cpu, numa_node;
- struct cpumask *numa_mask = &phba->sli4_hba.numa_mask;
-
- cpumask_clear(numa_mask);
-
- /* Check if we're a NUMA architecture */
- numa_node = dev_to_node(&phba->pcidev->dev);
- if (numa_node == NUMA_NO_NODE)
- return;
-
- for_each_possible_cpu(cpu)
- if (cpu_to_node(cpu) == numa_node)
- cpumask_set_cpu(cpu, numa_mask);
-}
-
/**
* lpfc_enable_pci_dev - Enable a generic PCI device.
* @phba: pointer to lpfc hba data structure.
phba->sli4_hba.num_present_cpu = lpfc_present_cpu;
phba->sli4_hba.num_possible_cpu = cpumask_last(cpu_possible_mask) + 1;
phba->sli4_hba.curr_disp_cpu = 0;
- lpfc_cpumask_of_node_init(phba);
/* Get all the module params for configuring this host */
lpfc_get_cfgparam(phba);
#endif
/* Not supported for NVMET */
phba->cfg_xri_rebalancing = 0;
+ if (phba->irq_chann_mode == NHT_MODE) {
+ phba->cfg_irq_chann =
+ phba->sli4_hba.num_present_cpu;
+ phba->cfg_hdw_queue =
+ phba->sli4_hba.num_present_cpu;
+ phba->irq_chann_mode = NORMAL_MODE;
+ }
break;
}
}
phba->sli4_hba.num_possible_cpu = 0;
phba->sli4_hba.num_present_cpu = 0;
phba->sli4_hba.curr_disp_cpu = 0;
- cpumask_clear(&phba->sli4_hba.numa_mask);
+ cpumask_clear(&phba->sli4_hba.irq_aff_mask);
/* Free memory allocated for fast-path work queue handles */
kfree(phba->sli4_hba.hba_eq_hdl);
* @offline: true, cpu is going offline. false, cpu is coming online.
*
* If cpu is going offline, we'll try our best effort to find the next
- * online cpu on the phba's NUMA node and migrate all offlining IRQ affinities.
+ * online cpu on the phba's original_mask and migrate all offlining IRQ
+ * affinities.
*
- * If cpu is coming online, reaffinitize the IRQ back to the onlineng cpu.
+ * If cpu is coming online, reaffinitize the IRQ back to the onlining cpu.
*
- * Note: Call only if cfg_irq_numa is enabled, otherwise rely on
+ * Note: Call only if NUMA or NHT mode is enabled, otherwise rely on
* PCI_IRQ_AFFINITY to auto-manage IRQ affinity.
*
**/
struct lpfc_vector_map_info *cpup;
struct cpumask *aff_mask;
unsigned int cpu_select, cpu_next, idx;
- const struct cpumask *numa_mask;
+ const struct cpumask *orig_mask;
- if (!phba->cfg_irq_numa)
+ if (phba->irq_chann_mode == NORMAL_MODE)
return;
- numa_mask = &phba->sli4_hba.numa_mask;
+ orig_mask = &phba->sli4_hba.irq_aff_mask;
- if (!cpumask_test_cpu(cpu, numa_mask))
+ if (!cpumask_test_cpu(cpu, orig_mask))
return;
cpup = &phba->sli4_hba.cpu_map[cpu];
return;
if (offline) {
- /* Find next online CPU on NUMA node */
- cpu_next = cpumask_next_wrap(cpu, numa_mask, cpu, true);
- cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu_next);
+ /* Find next online CPU on original mask */
+ cpu_next = cpumask_next_wrap(cpu, orig_mask, cpu, true);
+ cpu_select = lpfc_next_online_cpu(orig_mask, cpu_next);
/* Found a valid CPU */
if ((cpu_select < nr_cpu_ids) && (cpu_select != cpu)) {
{
int vectors, rc, index;
char *name;
- const struct cpumask *numa_mask = NULL;
+ const struct cpumask *aff_mask = NULL;
unsigned int cpu = 0, cpu_cnt = 0, cpu_select = nr_cpu_ids;
struct lpfc_hba_eq_hdl *eqhdl;
const struct cpumask *maskp;
/* Set up MSI-X multi-message vectors */
vectors = phba->cfg_irq_chann;
- if (phba->cfg_irq_numa) {
- numa_mask = &phba->sli4_hba.numa_mask;
- cpu_cnt = cpumask_weight(numa_mask);
+ if (phba->irq_chann_mode != NORMAL_MODE)
+ aff_mask = &phba->sli4_hba.irq_aff_mask;
+
+ if (aff_mask) {
+ cpu_cnt = cpumask_weight(aff_mask);
vectors = min(phba->cfg_irq_chann, cpu_cnt);
- /* cpu: iterates over numa_mask including offline or online
- * cpu_select: iterates over online numa_mask to set affinity
+ /* cpu: iterates over aff_mask including offline or online
+ * cpu_select: iterates over online aff_mask to set affinity
*/
- cpu = cpumask_first(numa_mask);
- cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu);
+ cpu = cpumask_first(aff_mask);
+ cpu_select = lpfc_next_online_cpu(aff_mask, cpu);
} else {
flags |= PCI_IRQ_AFFINITY;
}
eqhdl->irq = pci_irq_vector(phba->pcidev, index);
- if (phba->cfg_irq_numa) {
+ if (aff_mask) {
/* If found a neighboring online cpu, set affinity */
if (cpu_select < nr_cpu_ids)
lpfc_irq_set_aff(eqhdl, cpu_select);
LPFC_CPU_FIRST_IRQ,
cpu);
- /* Iterate to next offline or online cpu in numa_mask */
- cpu = cpumask_next(cpu, numa_mask);
+ /* Iterate to next offline or online cpu in aff_mask */
+ cpu = cpumask_next(cpu, aff_mask);
- /* Find next online cpu in numa_mask to set affinity */
- cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu);
+ /* Find next online cpu in aff_mask to set affinity */
+ cpu_select = lpfc_next_online_cpu(aff_mask, cpu);
} else if (vectors == 1) {
cpu = cpumask_first(cpu_present_mask);
lpfc_assign_eq_map_info(phba, index, LPFC_CPU_FIRST_IRQ,
struct lpfc_vector_map_info *cpu_map;
uint16_t num_possible_cpu;
uint16_t num_present_cpu;
- struct cpumask numa_mask;
+ struct cpumask irq_aff_mask;
uint16_t curr_disp_cpu;
struct lpfc_eq_intr_info __percpu *eq_info;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS