#include <drm/drm_cache.h>
 
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
+
 #include "i915_drv.h"
 #include "i915_gem_object.h"
 #include "i915_scatterlist.h"
 #include "i915_gem_lmem.h"
 #include "i915_gem_mman.h"
 
-#include "gt/intel_gt.h"
-
 void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
                                 struct sg_table *pages,
                                 unsigned int sg_page_sizes)
 
        if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
                struct drm_i915_private *i915 = to_i915(obj->base.dev);
+               struct intel_gt *gt = to_gt(i915);
                intel_wakeref_t wakeref;
 
-               with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
-                       intel_gt_invalidate_tlbs(to_gt(i915));
+               with_intel_gt_pm_if_awake(gt, wakeref)
+                       intel_gt_invalidate_tlbs(gt);
        }
 
        return pages;
 
 
 #include "i915_drv.h"
 #include "intel_context.h"
+#include "intel_engine_pm.h"
 #include "intel_engine_regs.h"
 #include "intel_ggtt_gmch.h"
 #include "intel_gt.h"
        struct drm_i915_private *i915 = gt->i915;
        struct intel_uncore *uncore = gt->uncore;
        struct intel_engine_cs *engine;
+       intel_engine_mask_t awake, tmp;
        enum intel_engine_id id;
        const i915_reg_t *regs;
        unsigned int num = 0;
 
        GEM_TRACE("\n");
 
-       assert_rpm_wakelock_held(&i915->runtime_pm);
-
        mutex_lock(>->tlb_invalidate_lock);
        intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
 
        spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
 
+       awake = 0;
        for_each_engine(engine, gt, id) {
                struct reg_and_bit rb;
 
+               if (!intel_engine_pm_is_awake(engine))
+                       continue;
+
                rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
                if (!i915_mmio_reg_offset(rb.reg))
                        continue;
 
                intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+               awake |= engine->mask;
        }
 
        spin_unlock_irq(&uncore->lock);
 
-       for_each_engine(engine, gt, id) {
+       for_each_engine_masked(engine, gt, awake, tmp) {
+               struct reg_and_bit rb;
+
                /*
                 * HW architecture suggest typical invalidation time at 40us,
                 * with pessimistic cases up to 100us and a recommendation to
                 */
                const unsigned int timeout_us = 100;
                const unsigned int timeout_ms = 4;
-               struct reg_and_bit rb;
 
                rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
-               if (!i915_mmio_reg_offset(rb.reg))
-                       continue;
-
                if (__intel_wait_for_register_fw(uncore,
                                                 rb.reg, rb.bit, 0,
                                                 timeout_us, timeout_ms,
 
        for (tmp = 1, intel_gt_pm_get(gt); tmp; \
             intel_gt_pm_put(gt), tmp = 0)
 
+#define with_intel_gt_pm_if_awake(gt, wf) \
+       for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
+
 static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
 {
        return intel_wakeref_wait_for_idle(>->wakeref);