From 4f027e304a6c7ae77150965d10b8a1edee0398a2 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Thu, 27 Jul 2023 04:56:49 +0530 Subject: [PATCH] drm/xe: Notify Userspace when gt reset fails Send uevent in case of gt reset failure. This intimation can be used by userspace monitoring tool to do the device level reset/reboot when GT reset fails. udevadm can be used to monitor the uevents. v2: - Support only gt failure notification (Rodrigo) v3 - Rectify the comments in header file. v4 - Use pci kobj instead of drm kobj for notification.(Rodrigo) - Cleanup (Badal) v5 - Add tile id and gt id as additional info provided by uevent. - Provide code documentation for the uevent. (Rodrigo) Cc: Aravind Iddamsetty Cc: Tejas Upadhyay Cc: Rodrigo Vivi Reviewed-by: Badal Nilawar Signed-off-by: Himal Prasad Ghimiray Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_gt.c | 19 +++++++++++++++++++ include/uapi/drm/xe_drm.h | 10 ++++++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index bb7794cf2c1a4..82b9874040706 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -8,6 +8,7 @@ #include #include +#include #include "regs/xe_gt_regs.h" #include "xe_bb.h" @@ -499,6 +500,20 @@ static int do_gt_restart(struct xe_gt *gt) return 0; } +static void xe_uevent_gt_reset_failure(struct pci_dev *pdev, u8 tile_id, u8 gt_id) +{ + char *reset_event[4]; + + reset_event[0] = XE_RESET_FAILED_UEVENT "=NEEDS_RESET"; + reset_event[1] = kasprintf(GFP_KERNEL, "TILE_ID=%d", tile_id); + reset_event[2] = kasprintf(GFP_KERNEL, "GT_ID=%d", gt_id); + reset_event[3] = NULL; + kobject_uevent_env(&pdev->dev.kobj, KOBJ_CHANGE, reset_event); + + kfree(reset_event[1]); + kfree(reset_event[2]); +} + static int gt_reset(struct xe_gt *gt) { int err; @@ -549,6 +564,10 @@ err_msg: xe_device_mem_access_put(gt_to_xe(gt)); xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); + /* Notify userspace about gt reset failure */ + xe_uevent_gt_reset_failure(to_pci_dev(gt_to_xe(gt)->drm.dev), + gt_to_tile(gt)->id, gt->info.id); + return err; } diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 259de80376b4a..3d09e9e9267b5 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -16,6 +16,16 @@ extern "C" { * subject to backwards-compatibility constraints. */ +/** + * DOC: uevent generated by xe on it's pci node. + * + * XE_RESET_FAILED_UEVENT - Event is generated when attempt to reset gt + * fails. The value supplied with the event is always "NEEDS_RESET". + * Additional information supplied is tile id and gt id of the gt unit for + * which reset has failed. + */ +#define XE_RESET_FAILED_UEVENT "DEVICE_STATUS" + /** * struct xe_user_extension - Base class for defining a chain of extensions * -- 2.30.2