habanalabs: use variable poll interval for fw loading
authorOhad Sharabi <osharabi@habana.ai>
Tue, 26 Oct 2021 12:33:23 +0000 (15:33 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Sun, 26 Dec 2021 06:59:04 +0000 (08:59 +0200)
Using a variable poll interval for fw loading allows us to support
much slower environments (emulation) while changing only a single
line in the code, instead of choosing a different interval in each
function that polls.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/firmware_if.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/habanalabs_drv.c

index 8cbec10cddb1a24d9de5159e61fc749c8dbd96f5..c68ad4d7b1bbbfb1a61d393bfd18616309acea0c 100644 (file)
@@ -15,8 +15,6 @@
 
 #define FW_FILE_MAX_SIZE               0x1400000 /* maximum size of 20MB */
 
-#define FW_CPU_STATUS_POLL_INTERVAL_USEC       10000
-
 static char *extract_fw_ver_from_str(const char *fw_str)
 {
        char *str, *fw_ver, *whitespace;
@@ -1102,7 +1100,7 @@ static int hl_fw_read_preboot_caps(struct hl_device *hdev,
                (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
                (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
                (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
-               FW_CPU_STATUS_POLL_INTERVAL_USEC,
+               hdev->fw_poll_interval_usec,
                timeout);
 
        if (rc) {
@@ -1286,11 +1284,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
 {
        int rc;
 
-       /* pldm was added for cases in which we use preboot on pldm and want
-        * to load boot fit, but we can't wait for preboot because it runs
-        * very slowly
-        */
-       if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) || hdev->pldm)
+       if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
                return 0;
 
        /*
@@ -1436,7 +1430,7 @@ static int hl_fw_dynamic_wait_for_status(struct hl_device *hdev,
                le32_to_cpu(dyn_regs->cpu_cmd_status_to_host),
                status,
                FIELD_GET(COMMS_STATUS_STATUS_MASK, status) == expected_status,
-               FW_CPU_STATUS_POLL_INTERVAL_USEC,
+               hdev->fw_poll_interval_usec,
                timeout);
 
        if (rc) {
@@ -2070,7 +2064,7 @@ static int hl_fw_dynamic_wait_for_boot_fit_active(struct hl_device *hdev,
                status,
                (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
                (status == CPU_BOOT_STATUS_SRAM_AVAIL),
-               FW_CPU_STATUS_POLL_INTERVAL_USEC,
+               hdev->fw_poll_interval_usec,
                dyn_loader->wait_for_bl_timeout);
        if (rc) {
                dev_err(hdev->dev, "failed to wait for boot\n");
@@ -2097,7 +2091,7 @@ static int hl_fw_dynamic_wait_for_linux_active(struct hl_device *hdev,
                le32_to_cpu(dyn_loader->comm_desc.cpu_dyn_regs.cpu_boot_status),
                status,
                (status == CPU_BOOT_STATUS_SRAM_AVAIL),
-               FW_CPU_STATUS_POLL_INTERVAL_USEC,
+               hdev->fw_poll_interval_usec,
                fw_loader->cpu_timeout);
        if (rc) {
                dev_err(hdev->dev, "failed to wait for Linux\n");
@@ -2296,6 +2290,15 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
                goto protocol_err;
        }
 
+       /*
+        * when testing FW load (without Linux) on PLDM we don't want to
+        * wait until boot fit is active as it may take several hours.
+        * instead, we load the bootfit and let it do all initializations in
+        * the background.
+        */
+       if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX))
+               return 0;
+
        rc = hl_fw_dynamic_wait_for_boot_fit_active(hdev, fw_loader);
        if (rc)
                goto protocol_err;
@@ -2388,7 +2391,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
                cpu_boot_status_reg,
                status,
                status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
-               FW_CPU_STATUS_POLL_INTERVAL_USEC,
+               hdev->fw_poll_interval_usec,
                fw_loader->boot_fit_timeout);
 
        if (rc) {
@@ -2411,7 +2414,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
                        cpu_msg_status_reg,
                        status,
                        status == CPU_MSG_OK,
-                       FW_CPU_STATUS_POLL_INTERVAL_USEC,
+                       hdev->fw_poll_interval_usec,
                        fw_loader->boot_fit_timeout);
 
                if (rc) {
@@ -2440,7 +2443,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
                (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
                (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
                (status == CPU_BOOT_STATUS_SRAM_AVAIL),
-               FW_CPU_STATUS_POLL_INTERVAL_USEC,
+               hdev->fw_poll_interval_usec,
                cpu_timeout);
 
        dev_dbg(hdev->dev, "uboot status = %d\n", status);
@@ -2489,7 +2492,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
                        cpu_boot_status_reg,
                        status,
                        (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
-                       FW_CPU_STATUS_POLL_INTERVAL_USEC,
+                       hdev->fw_poll_interval_usec,
                        cpu_timeout);
 
                if (rc) {
@@ -2509,7 +2512,7 @@ static int hl_fw_static_init_cpu(struct hl_device *hdev,
                cpu_boot_status_reg,
                status,
                (status == CPU_BOOT_STATUS_SRAM_AVAIL),
-               FW_CPU_STATUS_POLL_INTERVAL_USEC,
+               hdev->fw_poll_interval_usec,
                cpu_timeout);
 
        /* Clear message */
index b3c6b660c7aac7be58e0692cdb68cf64a47bcc27..5fc9cfd892e8d8851fa46bd3d87e3850fe40a30d 100644 (file)
@@ -61,6 +61,9 @@
 #define HL_CPUCP_INFO_TIMEOUT_USEC     10000000 /* 10s */
 #define HL_CPUCP_EEPROM_TIMEOUT_USEC   10000000 /* 10s */
 
+#define HL_FW_STATUS_POLL_INTERVAL_USEC                10000 /* 10ms */
+#define HL_FW_STATUS_PLDM_POLL_INTERVAL_USEC   300000000 /* 300s */
+
 #define HL_PCI_ELBI_TIMEOUT_MSEC       10 /* 10ms */
 
 #define HL_SIM_MAX_TIMEOUT_US          10000000 /* 10s */
@@ -2459,6 +2462,7 @@ struct multi_cs_data {
  * @last_open_session_duration_jif: duration (jiffies) of the last device open
  *                                  session.
  * @open_counter: number of successful device open operations.
+ * @fw_poll_interval_usec: FW status poll interval in usec.
  * @in_reset: is device in reset flow.
  * @curr_pll_profile: current PLL profile.
  * @card_type: Various ASICs have several card types. This indicates the card
@@ -2607,6 +2611,7 @@ struct hl_device {
        u64                             last_successful_open_jif;
        u64                             last_open_session_duration_jif;
        u64                             open_counter;
+       u64                             fw_poll_interval_usec;
        atomic_t                        in_reset;
        enum hl_pll_frequency           curr_pll_profile;
        enum cpucp_card_types           card_type;
index 949d1b5c5c41cc86c01c7b0ca623fa4e67ec3e89..5989826701bce4ddfe99c83005d00d3a94b52ca4 100644 (file)
@@ -345,6 +345,9 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 
        set_driver_behavior_per_device(hdev);
 
+       hdev->fw_poll_interval_usec = hdev->pldm ? HL_FW_STATUS_PLDM_POLL_INTERVAL_USEC :
+                                                       HL_FW_STATUS_POLL_INTERVAL_USEC;
+
        hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
        hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;