set_bit(PDSC_S_FW_DEAD, &pdsc->state);
 }
+
+static void pdsc_fw_down(struct pdsc *pdsc)
+{
+       if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+               dev_err(pdsc->dev, "%s: already happening\n", __func__);
+               return;
+       }
+
+       pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
+}
+
+static void pdsc_fw_up(struct pdsc *pdsc)
+{
+       int err;
+
+       if (!test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+               dev_err(pdsc->dev, "%s: fw not dead\n", __func__);
+               return;
+       }
+
+       err = pdsc_setup(pdsc, PDSC_SETUP_RECOVERY);
+       if (err)
+               goto err_out;
+
+       return;
+
+err_out:
+       pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
+}
+
+void pdsc_health_thread(struct work_struct *work)
+{
+       struct pdsc *pdsc = container_of(work, struct pdsc, health_work);
+       unsigned long mask;
+       bool healthy;
+
+       mutex_lock(&pdsc->config_lock);
+
+       /* Don't do a check when in a transition state */
+       mask = BIT_ULL(PDSC_S_INITING_DRIVER) |
+              BIT_ULL(PDSC_S_STOPPING_DRIVER);
+       if (pdsc->state & mask)
+               goto out_unlock;
+
+       healthy = pdsc_is_fw_good(pdsc);
+       dev_dbg(pdsc->dev, "%s: health %d fw_status %#02x fw_heartbeat %d\n",
+               __func__, healthy, pdsc->fw_status, pdsc->last_hb);
+
+       if (test_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
+               if (healthy)
+                       pdsc_fw_up(pdsc);
+       } else {
+               if (!healthy)
+                       pdsc_fw_down(pdsc);
+       }
+
+       pdsc->fw_generation = pdsc->fw_status & PDS_CORE_FW_STS_F_GENERATION;
+
+out_unlock:
+       mutex_unlock(&pdsc->config_lock);
+}
 
 #include <linux/pds/pds_intr.h>
 
 #define PDSC_DRV_DESCRIPTION   "AMD/Pensando Core Driver"
+
+#define PDSC_WATCHDOG_SECS     5
 #define PDSC_TEARDOWN_RECOVERY false
 #define PDSC_TEARDOWN_REMOVING true
 #define PDSC_SETUP_RECOVERY    false
        u8 fw_generation;
        unsigned long last_fw_time;
        u32 last_hb;
+       struct timer_list wdtimer;
+       unsigned int wdtimer_period;
+       struct work_struct health_work;
 
        struct pdsc_devinfo dev_info;
        struct pds_core_dev_identity dev_ident;
        unsigned int nintrs;
        struct pdsc_intr_info *intr_info;       /* array of nintrs elements */
 
+       struct workqueue_struct *wq;
+
        unsigned int devcmd_timeout;
        struct mutex devcmd_lock;       /* lock for dev_cmd operations */
        struct mutex config_lock;       /* lock for configuration operations */
 
 int pdsc_setup(struct pdsc *pdsc, bool init);
 void pdsc_teardown(struct pdsc *pdsc, bool removing);
+void pdsc_health_thread(struct work_struct *work);
 
 #endif /* _PDSC_H_ */
 
        err = pdsc_devcmd_wait(pdsc, max_seconds);
        memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp));
 
+       if (err == -ENXIO || err == -ETIMEDOUT)
+               queue_work(pdsc->wq, &pdsc->health_work);
+
        return err;
 }
 
 
 };
 MODULE_DEVICE_TABLE(pci, pdsc_id_table);
 
+static void pdsc_wdtimer_cb(struct timer_list *t)
+{
+       struct pdsc *pdsc = from_timer(pdsc, t, wdtimer);
+
+       dev_dbg(pdsc->dev, "%s: jiffies %ld\n", __func__, jiffies);
+       mod_timer(&pdsc->wdtimer,
+                 round_jiffies(jiffies + pdsc->wdtimer_period));
+
+       queue_work(pdsc->wq, &pdsc->health_work);
+}
+
 static void pdsc_unmap_bars(struct pdsc *pdsc)
 {
        struct pdsc_dev_bar *bars = pdsc->bars;
        return -1;
 }
 
+#define PDSC_WQ_NAME_LEN 24
+
 static int pdsc_init_pf(struct pdsc *pdsc)
 {
+       char wq_name[PDSC_WQ_NAME_LEN];
        struct devlink *dl;
        int err;
 
        if (err)
                goto err_out_release_regions;
 
+       /* General workqueue and timer, but don't start timer yet */
+       snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->uid);
+       pdsc->wq = create_singlethread_workqueue(wq_name);
+       INIT_WORK(&pdsc->health_work, pdsc_health_thread);
+       timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
+       pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;
+
        mutex_init(&pdsc->devcmd_lock);
        mutex_init(&pdsc->config_lock);
 
        devl_register(dl);
        devl_unlock(dl);
 
+       /* Lastly, start the health check timer */
+       mod_timer(&pdsc->wdtimer, round_jiffies(jiffies + pdsc->wdtimer_period));
+
        return 0;
 
 err_out_unmap_bars:
        mutex_unlock(&pdsc->config_lock);
+       del_timer_sync(&pdsc->wdtimer);
+       if (pdsc->wq)
+               destroy_workqueue(pdsc->wq);
        mutex_destroy(&pdsc->config_lock);
        mutex_destroy(&pdsc->devcmd_lock);
        pci_free_irq_vectors(pdsc->pdev);
        devl_unlock(dl);
 
        if (!pdev->is_virtfn) {
+               del_timer_sync(&pdsc->wdtimer);
+               if (pdsc->wq)
+                       destroy_workqueue(pdsc->wq);
+
                mutex_lock(&pdsc->config_lock);
                set_bit(PDSC_S_STOPPING_DRIVER, &pdsc->state);