drm/i915/guc: Use context hints for GT frequency
authorVinay Belgaumkar <vinay.belgaumkar@intel.com>
Wed, 6 Mar 2024 01:27:59 +0000 (17:27 -0800)
committerJohn Harrison <John.C.Harrison@Intel.com>
Thu, 7 Mar 2024 18:25:06 +0000 (10:25 -0800)
Allow user to provide a low latency context hint. When set, KMD
sends a hint to GuC which results in special handling for this
context. SLPC will ramp the GT frequency aggressively every time
it switches to this context. The down freq threshold will also be
lower so GuC will ramp down the GT freq for this context more slowly.
We also disable waitboost for this context as that will interfere with
the strategy.

We need to enable the use of SLPC Compute strategy during init, but
it will apply only to contexts that set this bit during context
creation.

Userland can check whether this feature is supported using a new param-
I915_PARAM_HAS_CONTEXT_FREQ_HINT. This flag is true for all guc submission
enabled platforms as they use SLPC for frequency management.

The Mesa usage model for this flag is here -
https://gitlab.freedesktop.org/sushmave/mesa/-/commits/compute_hint

v2: Rename flags as per review suggestions (Rodrigo, Tvrtko).
Also, use flag bits in intel_context as it allows finer control for
toggling per engine if needed (Tvrtko).

v3: Minor review comments (Tvrtko)

v4: Update comment (Sushma)

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Cc: Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: Ivan Briano <ivan.briano@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240306012759.204938-1-vinay.belgaumkar@intel.com
drivers/gpu/drm/i915/gem/i915_gem_context.c
drivers/gpu/drm/i915/gem/i915_gem_context_types.h
drivers/gpu/drm/i915/gt/intel_context_types.h
drivers/gpu/drm/i915/gt/intel_rps.c
drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/i915_getparam.c
include/uapi/drm/i915_drm.h

index dcbfe32fd30c3b02122e5e8811d4c473a7eb6090..81f65cab13308145ebb03cc56fedb5bc84c30208 100644 (file)
@@ -879,6 +879,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
                               struct i915_gem_proto_context *pc,
                               struct drm_i915_gem_context_param *args)
 {
+       struct drm_i915_private *i915 = fpriv->i915;
        int ret = 0;
 
        switch (args->param) {
@@ -904,6 +905,13 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
                        pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
                break;
 
+       case I915_CONTEXT_PARAM_LOW_LATENCY:
+               if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
+                       pc->user_flags |= BIT(UCONTEXT_LOW_LATENCY);
+               else
+                       ret = -EINVAL;
+               break;
+
        case I915_CONTEXT_PARAM_RECOVERABLE:
                if (args->size)
                        ret = -EINVAL;
@@ -992,6 +1000,9 @@ static int intel_context_set_gem(struct intel_context *ce,
        if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS))
                ret = intel_context_reconfigure_sseu(ce, sseu);
 
+       if (test_bit(UCONTEXT_LOW_LATENCY, &ctx->user_flags))
+               __set_bit(CONTEXT_LOW_LATENCY, &ce->flags);
+
        return ret;
 }
 
@@ -1630,6 +1641,9 @@ i915_gem_create_context(struct drm_i915_private *i915,
        if (vm)
                ctx->vm = vm;
 
+       /* Assign early so intel_context_set_gem can access these flags */
+       ctx->user_flags = pc->user_flags;
+
        mutex_init(&ctx->engines_mutex);
        if (pc->num_user_engines >= 0) {
                i915_gem_context_set_user_engines(ctx);
@@ -1652,8 +1666,6 @@ i915_gem_create_context(struct drm_i915_private *i915,
         * is no remap info, it will be a NOP. */
        ctx->remap_slice = ALL_L3_SLICES(i915);
 
-       ctx->user_flags = pc->user_flags;
-
        for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
                ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
 
index 03bc7f9d191b98a4df1201098f9fa5c160a3502c..b6d97da63d1fa9ac869ecdc7d5d9045a1be9ca27 100644 (file)
@@ -338,6 +338,7 @@ struct i915_gem_context {
 #define UCONTEXT_BANNABLE              2
 #define UCONTEXT_RECOVERABLE           3
 #define UCONTEXT_PERSISTENCE           4
+#define UCONTEXT_LOW_LATENCY           5
 
        /**
         * @flags: small set of booleans
index 7eccbd70d89fce0e14f1cd93eec53b135f8f5c7b..ed95a7b57cbba8e527614cf9dcda9fd75ef0bd90 100644 (file)
@@ -130,6 +130,7 @@ struct intel_context {
 #define CONTEXT_PERMA_PIN              11
 #define CONTEXT_IS_PARKING             12
 #define CONTEXT_EXITING                        13
+#define CONTEXT_LOW_LATENCY            14
 
        struct {
                u64 timeout_us;
index 9c6812257ac2ca7e7b66d0cc284d2da5b9550136..a929aa6e3c85a3160297a3961304cf7c5234c4e2 100644 (file)
@@ -1013,6 +1013,10 @@ void intel_rps_boost(struct i915_request *rq)
        if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
                return;
 
+       /* Waitboost is not needed for contexts marked with a Freq hint */
+       if (test_bit(CONTEXT_LOW_LATENCY, &rq->context->flags))
+               return;
+
        /* Serializes with i915_request_retire() */
        if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
                struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
index 811add10c30dc21a357841dccd10e6583468978c..c34674e797c61abe3b6f0ad96c6da855bbff8427 100644 (file)
@@ -207,6 +207,27 @@ struct slpc_shared_data {
        u8 reserved_mode_definition[4096];
 } __packed;
 
+struct slpc_context_frequency_request {
+       u32 frequency_request:16;
+       u32 reserved:12;
+       u32 is_compute:1;
+       u32 ignore_busyness:1;
+       u32 is_minimum:1;
+       u32 is_predefined:1;
+} __packed;
+
+#define SLPC_CTX_FREQ_REQ_IS_COMPUTE           REG_BIT(28)
+
+struct slpc_optimized_strategies {
+       u32 compute:1;
+       u32 async_flip:1;
+       u32 media:1;
+       u32 vsync_flip:1;
+       u32 reserved:28;
+} __packed;
+
+#define SLPC_OPTIMIZED_STRATEGY_COMPUTE                REG_BIT(0)
+
 /**
  * DOC: SLPC H2G MESSAGE FORMAT
  *
index 3e681ab6fbf9fb9892c31b77c5f73e6c3f40aa7f..706fffca698b664ac43c2e5b4931ff39aa899ab5 100644 (file)
@@ -537,6 +537,20 @@ int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val)
        return ret;
 }
 
+int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val)
+{
+       struct drm_i915_private *i915 = slpc_to_i915(slpc);
+       intel_wakeref_t wakeref;
+       int ret = 0;
+
+       with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+               ret = slpc_set_param(slpc,
+                                    SLPC_PARAM_STRATEGIES,
+                                    val);
+
+       return ret;
+}
+
 int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val)
 {
        struct drm_i915_private *i915 = slpc_to_i915(slpc);
@@ -711,6 +725,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
        /* Set cached media freq ratio mode */
        intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode);
 
+       /* Enable SLPC Optimized Strategy for compute */
+       intel_guc_slpc_set_strategy(slpc, SLPC_OPTIMIZED_STRATEGY_COMPUTE);
+
        return 0;
 }
 
index 6ac6503c39d45d7688179870e4b68e77c81cb8a9..1cb5fd44f05ca43ffc6e43cea682fddb243980fe 100644 (file)
@@ -45,5 +45,6 @@ void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
 void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
 void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
 int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val);
+int intel_guc_slpc_set_strategy(struct intel_guc_slpc *slpc, u32 val);
 
 #endif
index cc076e9302ad7c6a085518dd485ee835cf2da12c..e5c645137cfe721c63b833c32b4233b5eb725b0a 100644 (file)
@@ -2645,6 +2645,7 @@ MAKE_CONTEXT_POLICY_ADD(execution_quantum, EXECUTION_QUANTUM)
 MAKE_CONTEXT_POLICY_ADD(preemption_timeout, PREEMPTION_TIMEOUT)
 MAKE_CONTEXT_POLICY_ADD(priority, SCHEDULING_PRIORITY)
 MAKE_CONTEXT_POLICY_ADD(preempt_to_idle, PREEMPT_TO_IDLE_ON_QUANTUM_EXPIRY)
+MAKE_CONTEXT_POLICY_ADD(slpc_ctx_freq_req, SLPM_GT_FREQUENCY)
 
 #undef MAKE_CONTEXT_POLICY_ADD
 
@@ -2664,6 +2665,7 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
        struct context_policy policy;
        u32 execution_quantum;
        u32 preemption_timeout;
+       u32 slpc_ctx_freq_req = 0;
        unsigned long flags;
        int ret;
 
@@ -2675,11 +2677,15 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
        execution_quantum = engine->props.timeslice_duration_ms * 1000;
        preemption_timeout = engine->props.preempt_timeout_ms * 1000;
 
+       if (ce && (ce->flags & BIT(CONTEXT_LOW_LATENCY)))
+               slpc_ctx_freq_req |= SLPC_CTX_FREQ_REQ_IS_COMPUTE;
+
        __guc_context_policy_start_klv(&policy, ce->guc_id.id);
 
        __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
        __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
        __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
+       __guc_context_policy_add_slpc_ctx_freq_req(&policy, slpc_ctx_freq_req);
 
        if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
                __guc_context_policy_add_preempt_to_idle(&policy, 1);
index 5c3fec63cb4c14960719aa9daf9ada8fbc4e7020..95c58805b2a4a7c74627aece27fd1be9fb369848 100644 (file)
@@ -155,6 +155,12 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
                 */
                value = 1;
                break;
+       case I915_PARAM_HAS_CONTEXT_FREQ_HINT:
+               if (intel_uc_uses_guc_submission(&to_gt(i915)->uc))
+                       value = 1;
+               else
+                       value = -EINVAL;
+               break;
        case I915_PARAM_HAS_CONTEXT_ISOLATION:
                value = intel_engines_has_context_isolation(i915);
                break;
index 2ee338860b7e08c80fb9f0a65702dc1b18456b6b..558d95baf8515a20c02f21aed0630cfffb952817 100644 (file)
@@ -806,6 +806,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_PXP_STATUS           58
 
+/*
+ * Query if kernel allows marking a context to send a Freq hint to SLPC. This
+ * will enable use of the strategies allowed by the SLPC algorithm.
+ */
+#define I915_PARAM_HAS_CONTEXT_FREQ_HINT       59
+
 /* Must be kept compact -- no holes and well documented */
 
 /**
@@ -2148,6 +2154,15 @@ struct drm_i915_gem_context_param {
  * -EIO: The firmware did not succeed in creating the protected context.
  */
 #define I915_CONTEXT_PARAM_PROTECTED_CONTENT    0xd
+
+/*
+ * I915_CONTEXT_PARAM_LOW_LATENCY:
+ *
+ * Mark this context as a low latency workload which requires aggressive GT
+ * frequency scaling. Use I915_PARAM_HAS_CONTEXT_FREQ_HINT to check if the kernel
+ * supports this per context flag.
+ */
+#define I915_CONTEXT_PARAM_LOW_LATENCY         0xe
 /* Must be kept compact -- no holes and well documented */
 
        /** @value: Context parameter value to be set or queried */