enum hl_cs_wait_status *status, s64 *timestamp);
 static void cs_do_release(struct kref *ref);
 
+static void hl_push_cs_outcome(struct hl_device *hdev,
+                              struct hl_cs_outcome_store *outcome_store,
+                              u64 seq, ktime_t ts, int error)
+{
+       struct hl_cs_outcome *node;
+       unsigned long flags;
+
+       /*
+        * CS outcome store supports the following operations:
+        * push outcome - store a recent CS outcome in the store
+        * pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store
+        * It uses 2 lists: used list and free list.
+        * It has a pre-allocated amount of nodes, each node stores
+        * a single CS outcome.
+        * Initially, all the nodes are in the free list.
+        * On push outcome, a node (any) is taken from the free list, its
+        * information is filled in, and the node is moved to the used list.
+        * It is possible, that there are no nodes left in the free list.
+        * In this case, we will lose some information about old outcomes. We
+        * will pop the OLDEST node from the used list, and make it free.
+        * On pop, the node is searched for in the used list (using a search
+        * index).
+        * If found, the node is then removed from the used list, and moved
+        * back to the free list. The outcome data that the node contained is
+        * returned back to the user.
+        */
+
+       spin_lock_irqsave(&outcome_store->db_lock, flags);
+
+       if (list_empty(&outcome_store->free_list)) {
+               node = list_last_entry(&outcome_store->used_list,
+                                      struct hl_cs_outcome, list_link);
+               hash_del(&node->map_link);
+               dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq);
+       } else {
+               node = list_last_entry(&outcome_store->free_list,
+                                      struct hl_cs_outcome, list_link);
+       }
+
+       list_del_init(&node->list_link);
+
+       node->seq = seq;
+       node->ts = ts;
+       node->error = error;
+
+       list_add(&node->list_link, &outcome_store->used_list);
+       hash_add(outcome_store->outcome_map, &node->map_link, node->seq);
+
+       spin_unlock_irqrestore(&outcome_store->db_lock, flags);
+}
+
+static bool hl_pop_cs_outcome(struct hl_cs_outcome_store *outcome_store,
+                              u64 seq, ktime_t *ts, int *error)
+{
+       struct hl_cs_outcome *node;
+       unsigned long flags;
+
+       spin_lock_irqsave(&outcome_store->db_lock, flags);
+
+       hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq)
+               if (node->seq == seq) {
+                       *ts = node->ts;
+                       *error = node->error;
+
+                       hash_del(&node->map_link);
+                       list_del_init(&node->list_link);
+                       list_add(&node->list_link, &outcome_store->free_list);
+
+                       spin_unlock_irqrestore(&outcome_store->db_lock, flags);
+
+                       return true;
+               }
+
+       spin_unlock_irqrestore(&outcome_store->db_lock, flags);
+
+       return false;
+}
+
 static void hl_sob_reset(struct kref *ref)
 {
        struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
         */
        hl_debugfs_remove_cs(cs);
 
-       hl_ctx_put(cs->ctx);
 
        /* We need to mark an error for not submitted because in that case
         * the hl fence release flow is different. Mainly, we don't need
                        div_u64(jiffies - cs->submission_time_jiffies, HZ));
        }
 
-       if (cs->timestamp)
+       if (cs->timestamp) {
                cs->fence->timestamp = ktime_get();
+               hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence,
+                                  cs->fence->timestamp, cs->fence->error);
+       }
+
+       hl_ctx_put(cs->ctx);
+
        complete_all(&cs->fence->completion);
        complete_multi_cs(hdev, cs);
 
                                s64 *timestamp)
 {
        struct hl_device *hdev = ctx->hdev;
+       ktime_t timestamp_kt;
        long completion_rc;
-       int rc = 0;
+       int rc = 0, error;
 
        if (IS_ERR(fence)) {
                rc = PTR_ERR(fence);
        }
 
        if (!fence) {
-               dev_dbg(hdev->dev,
+               if (!hl_pop_cs_outcome(&ctx->outcome_store, seq, ×tamp_kt, &error)) {
+                       dev_dbg(hdev->dev,
                        "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
                                seq, ctx->cs_sequence);
 
-               *status = CS_WAIT_STATUS_GONE;
-               return 0;
+                       *status = CS_WAIT_STATUS_GONE;
+                       return 0;
+               }
+
+               completion_rc = 1;
+               goto report_results;
        }
 
        if (!timeout_us) {
                                &fence->completion, timeout);
        }
 
+       error = fence->error;
+       timestamp_kt = fence->timestamp;
+
+report_results:
        if (completion_rc > 0) {
                *status = CS_WAIT_STATUS_COMPLETED;
                if (timestamp)
-                       *timestamp = ktime_to_ns(fence->timestamp);
+                       *timestamp = ktime_to_ns(timestamp_kt);
        } else {
                *status = CS_WAIT_STATUS_BUSY;
        }
 
-       if (fence->error == -ETIMEDOUT)
-               rc = -ETIMEDOUT;
-       else if (fence->error == -EIO)
-               rc = -EIO;
+       if (error == -ETIMEDOUT || error == -EIO)
+               rc = error;
 
        return rc;
 }
 
        uint64_t                        device_address;
 };
 
+#define HL_CS_OUTCOME_HISTORY_LEN 256
+
+/**
+ * struct hl_cs_outcome - represents a single completed CS outcome
+ * @list_link: link to either container's used list or free list
+ * @map_link: list to the container hash map
+ * @ts: completion ts
+ * @seq: the original cs sequence
+ * @error: error code cs completed with, if any
+ */
+struct hl_cs_outcome {
+       struct list_head list_link;
+       struct hlist_node map_link;
+       ktime_t ts;
+       u64 seq;
+       int error;
+};
+
+/**
+ * struct hl_cs_outcome_store - represents a limited store of completed CS outcomes
+ * @outcome_map: index of completed CS searcheable by sequence number
+ * @used_list: list of outcome objects currently in use
+ * @free_list: list of outcome objects currently not in use
+ * @nodes_pool: a static pool of preallocated outcome objects
+ * @db_lock: any operation on the store must take this lock
+ */
+struct hl_cs_outcome_store {
+       DECLARE_HASHTABLE(outcome_map, 8);
+       struct list_head used_list;
+       struct list_head free_list;
+       struct hl_cs_outcome nodes_pool[HL_CS_OUTCOME_HISTORY_LEN];
+       spinlock_t db_lock;
+};
+
 /**
  * struct hl_ctx - user/kernel context.
  * @mem_hash: holds mapping from virtual address to virtual memory area
  * @refcount: reference counter for the context. Context is released only when
  *             this hits 0l. It is incremented on CS and CS_WAIT.
  * @cs_pending: array of hl fence objects representing pending CS.
+ * @outcome_store: storage data structure used to remember ouitcomes of completed
+ *                 command submissions for a long time after CS id wraparound.
  * @va_range: holds available virtual addresses for host and dram mappings.
  * @mem_hash_lock: protects the mem_hash.
  * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
        struct hl_device                *hdev;
        struct kref                     refcount;
        struct hl_fence                 **cs_pending;
+       struct hl_cs_outcome_store      outcome_store;
        struct hl_va_range              *va_range[HL_VA_RANGE_TYPE_MAX];
        struct mutex                    mem_hash_lock;
        struct mutex                    mmu_lock;