enum hl_cs_wait_status *status, s64 *timestamp);
static void cs_do_release(struct kref *ref);
+static void hl_push_cs_outcome(struct hl_device *hdev,
+ struct hl_cs_outcome_store *outcome_store,
+ u64 seq, ktime_t ts, int error)
+{
+ struct hl_cs_outcome *node;
+ unsigned long flags;
+
+ /*
+ * CS outcome store supports the following operations:
+ * push outcome - store a recent CS outcome in the store
+ * pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store
+ * It uses 2 lists: used list and free list.
+ * It has a pre-allocated amount of nodes, each node stores
+ * a single CS outcome.
+ * Initially, all the nodes are in the free list.
+ * On push outcome, a node (any) is taken from the free list, its
+ * information is filled in, and the node is moved to the used list.
+ * It is possible, that there are no nodes left in the free list.
+ * In this case, we will lose some information about old outcomes. We
+ * will pop the OLDEST node from the used list, and make it free.
+ * On pop, the node is searched for in the used list (using a search
+ * index).
+ * If found, the node is then removed from the used list, and moved
+ * back to the free list. The outcome data that the node contained is
+ * returned back to the user.
+ */
+
+ spin_lock_irqsave(&outcome_store->db_lock, flags);
+
+ if (list_empty(&outcome_store->free_list)) {
+ node = list_last_entry(&outcome_store->used_list,
+ struct hl_cs_outcome, list_link);
+ hash_del(&node->map_link);
+ dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq);
+ } else {
+ node = list_last_entry(&outcome_store->free_list,
+ struct hl_cs_outcome, list_link);
+ }
+
+ list_del_init(&node->list_link);
+
+ node->seq = seq;
+ node->ts = ts;
+ node->error = error;
+
+ list_add(&node->list_link, &outcome_store->used_list);
+ hash_add(outcome_store->outcome_map, &node->map_link, node->seq);
+
+ spin_unlock_irqrestore(&outcome_store->db_lock, flags);
+}
+
+static bool hl_pop_cs_outcome(struct hl_cs_outcome_store *outcome_store,
+ u64 seq, ktime_t *ts, int *error)
+{
+ struct hl_cs_outcome *node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&outcome_store->db_lock, flags);
+
+ hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq)
+ if (node->seq == seq) {
+ *ts = node->ts;
+ *error = node->error;
+
+ hash_del(&node->map_link);
+ list_del_init(&node->list_link);
+ list_add(&node->list_link, &outcome_store->free_list);
+
+ spin_unlock_irqrestore(&outcome_store->db_lock, flags);
+
+ return true;
+ }
+
+ spin_unlock_irqrestore(&outcome_store->db_lock, flags);
+
+ return false;
+}
+
static void hl_sob_reset(struct kref *ref)
{
struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
*/
hl_debugfs_remove_cs(cs);
- hl_ctx_put(cs->ctx);
/* We need to mark an error for not submitted because in that case
* the hl fence release flow is different. Mainly, we don't need
div_u64(jiffies - cs->submission_time_jiffies, HZ));
}
- if (cs->timestamp)
+ if (cs->timestamp) {
cs->fence->timestamp = ktime_get();
+ hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence,
+ cs->fence->timestamp, cs->fence->error);
+ }
+
+ hl_ctx_put(cs->ctx);
+
complete_all(&cs->fence->completion);
complete_multi_cs(hdev, cs);
s64 *timestamp)
{
struct hl_device *hdev = ctx->hdev;
+ ktime_t timestamp_kt;
long completion_rc;
- int rc = 0;
+ int rc = 0, error;
if (IS_ERR(fence)) {
rc = PTR_ERR(fence);
}
if (!fence) {
- dev_dbg(hdev->dev,
+ if (!hl_pop_cs_outcome(&ctx->outcome_store, seq, ×tamp_kt, &error)) {
+ dev_dbg(hdev->dev,
"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
seq, ctx->cs_sequence);
- *status = CS_WAIT_STATUS_GONE;
- return 0;
+ *status = CS_WAIT_STATUS_GONE;
+ return 0;
+ }
+
+ completion_rc = 1;
+ goto report_results;
}
if (!timeout_us) {
&fence->completion, timeout);
}
+ error = fence->error;
+ timestamp_kt = fence->timestamp;
+
+report_results:
if (completion_rc > 0) {
*status = CS_WAIT_STATUS_COMPLETED;
if (timestamp)
- *timestamp = ktime_to_ns(fence->timestamp);
+ *timestamp = ktime_to_ns(timestamp_kt);
} else {
*status = CS_WAIT_STATUS_BUSY;
}
- if (fence->error == -ETIMEDOUT)
- rc = -ETIMEDOUT;
- else if (fence->error == -EIO)
- rc = -EIO;
+ if (error == -ETIMEDOUT || error == -EIO)
+ rc = error;
return rc;
}
uint64_t device_address;
};
+#define HL_CS_OUTCOME_HISTORY_LEN 256
+
+/**
+ * struct hl_cs_outcome - represents a single completed CS outcome
+ * @list_link: link to either container's used list or free list
+ * @map_link: list to the container hash map
+ * @ts: completion ts
+ * @seq: the original cs sequence
+ * @error: error code cs completed with, if any
+ */
+struct hl_cs_outcome {
+ struct list_head list_link;
+ struct hlist_node map_link;
+ ktime_t ts;
+ u64 seq;
+ int error;
+};
+
+/**
+ * struct hl_cs_outcome_store - represents a limited store of completed CS outcomes
+ * @outcome_map: index of completed CS searcheable by sequence number
+ * @used_list: list of outcome objects currently in use
+ * @free_list: list of outcome objects currently not in use
+ * @nodes_pool: a static pool of preallocated outcome objects
+ * @db_lock: any operation on the store must take this lock
+ */
+struct hl_cs_outcome_store {
+ DECLARE_HASHTABLE(outcome_map, 8);
+ struct list_head used_list;
+ struct list_head free_list;
+ struct hl_cs_outcome nodes_pool[HL_CS_OUTCOME_HISTORY_LEN];
+ spinlock_t db_lock;
+};
+
/**
* struct hl_ctx - user/kernel context.
* @mem_hash: holds mapping from virtual address to virtual memory area
* @refcount: reference counter for the context. Context is released only when
* this hits 0l. It is incremented on CS and CS_WAIT.
* @cs_pending: array of hl fence objects representing pending CS.
+ * @outcome_store: storage data structure used to remember ouitcomes of completed
+ * command submissions for a long time after CS id wraparound.
* @va_range: holds available virtual addresses for host and dram mappings.
* @mem_hash_lock: protects the mem_hash.
* @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
struct hl_device *hdev;
struct kref refcount;
struct hl_fence **cs_pending;
+ struct hl_cs_outcome_store outcome_store;
struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX];
struct mutex mem_hash_lock;
struct mutex mmu_lock;