From 8f3013e0b22206b27f37dcf1b96ce68df3393040 Mon Sep 17 00:00:00 2001 From: Himal Prasad Ghimiray Date: Thu, 27 Jul 2023 04:56:50 +0530 Subject: [PATCH] drm/xe: Introduce fault injection for gt reset To trigger gt reset failure: echo 100 > /sys/kernel/debug/dri//fail_gt_reset/probability echo 2 > /sys/kernel/debug/dri//fail_gt_reset/times Cc: Rodrigo Vivi Cc: Lucas De Marchi Reviewed-by: Rodrigo Vivi Signed-off-by: Himal Prasad Ghimiray Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/xe/xe_debugfs.c | 10 ++++++++++ drivers/gpu/drm/xe/xe_gt.c | 8 +++++++- drivers/gpu/drm/xe/xe_gt.h | 14 ++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c index 491506a1e12e5..2de8a0b9da183 100644 --- a/drivers/gpu/drm/xe/xe_debugfs.c +++ b/drivers/gpu/drm/xe/xe_debugfs.c @@ -5,6 +5,7 @@ #include "xe_debugfs.h" +#include #include #include @@ -20,6 +21,10 @@ #include "xe_vm.h" #endif +#ifdef CONFIG_FAULT_INJECTION +DECLARE_FAULT_ATTR(gt_reset_failure); +#endif + static struct xe_device *node_to_xe(struct drm_info_node *node) { return to_xe_device(node->minor->dev); @@ -135,4 +140,9 @@ void xe_debugfs_register(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_debugfs_register(gt); + +#ifdef CONFIG_FAULT_INJECTION + fault_create_debugfs_attr("fail_gt_reset", root, >_reset_failure); +#endif + } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 82b9874040706..28bf577c7bf2e 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -524,6 +524,11 @@ static int gt_reset(struct xe_gt *gt) xe_gt_info(gt, "reset started\n"); + if (xe_fault_inject_gt_reset()) { + err = -ECANCELED; + goto err_fail; + } + xe_gt_sanitize(gt); xe_device_mem_access_get(gt_to_xe(gt)); @@ -562,6 +567,7 @@ err_out: err_msg: XE_WARN_ON(xe_uc_start(>->uc)); xe_device_mem_access_put(gt_to_xe(gt)); +err_fail: xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); /* Notify userspace about gt reset failure */ @@ -583,7 +589,7 @@ void xe_gt_reset_async(struct xe_gt *gt) xe_gt_info(gt, "trying reset\n"); /* Don't do a reset while one is already in flight */ - if (xe_uc_reset_prepare(>->uc)) + if (!xe_fault_inject_gt_reset() && xe_uc_reset_prepare(>->uc)) return; xe_gt_info(gt, "reset queued\n"); diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h index 7298653a73de3..caded203a8a03 100644 --- a/drivers/gpu/drm/xe/xe_gt.h +++ b/drivers/gpu/drm/xe/xe_gt.h @@ -7,6 +7,7 @@ #define _XE_GT_H_ #include +#include #include "xe_device_types.h" #include "xe_hw_engine.h" @@ -16,6 +17,19 @@ for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \ xe_hw_engine_is_valid((hwe__))) +#ifdef CONFIG_FAULT_INJECTION +extern struct fault_attr gt_reset_failure; +static inline bool xe_fault_inject_gt_reset(void) +{ + return should_fail(>_reset_failure, 1); +} +#else +static inline bool xe_fault_inject_gt_reset(void) +{ + return false; +} +#endif + struct xe_gt *xe_gt_alloc(struct xe_tile *tile); int xe_gt_init_early(struct xe_gt *gt); int xe_gt_init(struct xe_gt *gt); -- 2.30.2