drm/xe/devcoredump: Print errno if VM snapshot was not captured
authorJosé Roberto de Souza <jose.souza@intel.com>
Thu, 7 Mar 2024 13:52:28 +0000 (05:52 -0800)
committerJosé Roberto de Souza <jose.souza@intel.com>
Fri, 22 Mar 2024 15:08:47 +0000 (08:08 -0700)
My testing machine has only 8GB of RAM and while running piglit tests
I can reach the OOM cache in xe_vm_snapshot_capture() snap allocaiton
sometimes.

So to differentiate the OOM from race between capture and UMDs
unbinbind VMs here I'm adding a '[0].error: -12' to devcoredump.

v2:
- fix returned errno values

Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240307135229.41973-2-jose.souza@intel.com
drivers/gpu/drm/xe/xe_devcoredump.c
drivers/gpu/drm/xe/xe_vm.c

index 7d3aa6bd3524259386409f084165afbeaa2cc1e6..3a6263ecff01fe2013db98668e4fe836f480849f 100644 (file)
@@ -120,10 +120,8 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
                if (coredump->snapshot.hwe[i])
                        xe_hw_engine_snapshot_print(coredump->snapshot.hwe[i],
                                                    &p);
-       if (coredump->snapshot.vm) {
-               drm_printf(&p, "\n**** VM state ****\n");
-               xe_vm_snapshot_print(coredump->snapshot.vm, &p);
-       }
+       drm_printf(&p, "\n**** VM state ****\n");
+       xe_vm_snapshot_print(coredump->snapshot.vm, &p);
 
        return count - iter.remain;
 }
index 900fd868a16672b4259911e2af430f4f0b5e59d5..d82d7cd27123ec75e15d3ef9dc779f45ae97e5d8 100644 (file)
@@ -3359,8 +3359,10 @@ struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm)
 
        if (num_snaps)
                snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
-       if (!snap)
+       if (!snap) {
+               snap = num_snaps ? ERR_PTR(-ENOMEM) : ERR_PTR(-ENODEV);
                goto out_unlock;
+       }
 
        snap->num_snaps = num_snaps;
        i = 0;
@@ -3400,7 +3402,7 @@ out_unlock:
 
 void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
 {
-       if (!snap)
+       if (IS_ERR(snap))
                return;
 
        for (int i = 0; i < snap->num_snaps; i++) {
@@ -3457,6 +3459,11 @@ void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p)
 {
        unsigned long i, j;
 
+       if (IS_ERR(snap)) {
+               drm_printf(p, "[0].error: %li\n", PTR_ERR(snap));
+               return;
+       }
+
        for (i = 0; i < snap->num_snaps; i++) {
                drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
 
@@ -3483,7 +3490,7 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
 {
        unsigned long i;
 
-       if (!snap)
+       if (IS_ERR(snap))
                return;
 
        for (i = 0; i < snap->num_snaps; i++) {