drm/amdkfd: CRIU Implement KFD resume ioctl
authorRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Mon, 11 Jan 2021 18:27:50 +0000 (13:27 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 7 Feb 2022 22:59:41 +0000 (17:59 -0500)
This adds support to create userptr BOs on restore and introduces a new
ioctl op to restart memory notifiers for the restored userptr BOs.
When doing CRIU restore MMU notifications can happen anytime after we call
amdgpu_mn_register. Prevent MMU notifications until we reach stage-4 of the
restore process i.e. criu_resume ioctl op is received, and the process is
ready to be resumed. This ioctl is different from other KFD CRIU ioctls
since its called by CRIU master restore process for all the target
processes being resumed by CRIU.

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: David Yat Sin <david.yatsin@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c

index 395ba9566afe93847b483381ed3b6faf25d0dbdf..4cb14c2fe53fbe3fda50094f1c668279c232ec72 100644 (file)
@@ -131,6 +131,7 @@ struct amdkfd_process_info {
        atomic_t evicted_bos;
        struct delayed_work restore_userptr_work;
        struct pid *pid;
+       bool block_mmu_notifications;
 };
 
 int amdgpu_amdkfd_init(void);
@@ -268,7 +269,7 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                struct amdgpu_device *adev, uint64_t va, uint64_t size,
                void *drm_priv, struct kgd_mem **mem,
-               uint64_t *offset, uint32_t flags);
+               uint64_t *offset, uint32_t flags, bool criu_resume);
 int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
                struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
                uint64_t *size);
@@ -298,6 +299,9 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
 void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
                                bool reset);
 bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
+void amdgpu_amdkfd_block_mmu_notifications(void *p);
+int amdgpu_amdkfd_criu_resume(void *p);
+
 #if IS_ENABLED(CONFIG_HSA_AMD)
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
index 5cf4bedca1d6e57c259660263f87bd8b015fe7ec..2e00c3fb4bd3579cbf0448ccff2ced6d77e67571 100644 (file)
@@ -842,7 +842,8 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
  *
  * Returns 0 for success, negative errno for errors.
  */
-static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
+                          bool criu_resume)
 {
        struct amdkfd_process_info *process_info = mem->process_info;
        struct amdgpu_bo *bo = mem->bo;
@@ -864,6 +865,18 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
                goto out;
        }
 
+       if (criu_resume) {
+               /*
+                * During a CRIU restore operation, the userptr buffer objects
+                * will be validated in the restore_userptr_work worker at a
+                * later stage when it is scheduled by another ioctl called by
+                * CRIU master process for the target pid for restore.
+                */
+               atomic_inc(&mem->invalid);
+               mutex_unlock(&process_info->lock);
+               return 0;
+       }
+
        ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
        if (ret) {
                pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
@@ -1452,10 +1465,39 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
        return avm->pd_phys_addr;
 }
 
+void amdgpu_amdkfd_block_mmu_notifications(void *p)
+{
+       struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+       mutex_lock(&pinfo->lock);
+       WRITE_ONCE(pinfo->block_mmu_notifications, true);
+       mutex_unlock(&pinfo->lock);
+}
+
+int amdgpu_amdkfd_criu_resume(void *p)
+{
+       int ret = 0;
+       struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p;
+
+       mutex_lock(&pinfo->lock);
+       pr_debug("scheduling work\n");
+       atomic_inc(&pinfo->evicted_bos);
+       if (!READ_ONCE(pinfo->block_mmu_notifications)) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+       WRITE_ONCE(pinfo->block_mmu_notifications, false);
+       schedule_delayed_work(&pinfo->restore_userptr_work, 0);
+
+out_unlock:
+       mutex_unlock(&pinfo->lock);
+       return ret;
+}
+
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                struct amdgpu_device *adev, uint64_t va, uint64_t size,
                void *drm_priv, struct kgd_mem **mem,
-               uint64_t *offset, uint32_t flags)
+               uint64_t *offset, uint32_t flags, bool criu_resume)
 {
        struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
        enum ttm_bo_type bo_type = ttm_bo_type_device;
@@ -1558,7 +1600,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
        add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
 
        if (user_addr) {
-               ret = init_user_pages(*mem, user_addr);
+               pr_debug("creating userptr BO for user_addr = %llu\n", user_addr);
+               ret = init_user_pages(*mem, user_addr, criu_resume);
                if (ret)
                        goto allocate_init_user_pages_failed;
        } else  if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
@@ -2062,6 +2105,10 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
        int evicted_bos;
        int r = 0;
 
+       /* Do not process MMU notifications until stage-4 IOCTL is received */
+       if (READ_ONCE(process_info->block_mmu_notifications))
+               return 0;
+
        atomic_inc(&mem->invalid);
        evicted_bos = atomic_inc_return(&process_info->evicted_bos);
        if (evicted_bos == 1) {
index 342fc56b194049d1f63632679a42e3d0603327a8..95fc5668195ca11a0b32cf9c3bb5e48e1fdcf28e 100644 (file)
@@ -1324,7 +1324,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
        err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                dev->adev, args->va_addr, args->size,
                pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
-               flags);
+               flags, false);
 
        if (err)
                goto err_unlock;
@@ -2116,6 +2116,7 @@ static int criu_restore_bos(struct kfd_process *p,
 {
        struct kfd_criu_bo_bucket *bo_buckets;
        struct kfd_criu_bo_priv_data *bo_privs;
+       const bool criu_resume = true;
        bool flush_tlbs = false;
        int ret = 0, j = 0;
        uint32_t i;
@@ -2123,6 +2124,9 @@ static int criu_restore_bos(struct kfd_process *p,
        if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
                return -EINVAL;
 
+       /* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */
+       amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
+
        bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);
        if (!bo_buckets)
                return -ENOMEM;
@@ -2211,7 +2215,6 @@ static int criu_restore_bos(struct kfd_process *p,
                } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
                        offset = bo_priv->user_addr;
                }
-
                /* Create the BO */
                ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(dev->adev,
                                                bo_bucket->addr,
@@ -2219,7 +2222,8 @@ static int criu_restore_bos(struct kfd_process *p,
                                                pdd->drm_priv,
                                                (struct kgd_mem **) &mem,
                                                &offset,
-                                               bo_bucket->alloc_flags);
+                                               bo_bucket->alloc_flags,
+                                               criu_resume);
                if (ret) {
                        pr_err("Could not create the BO\n");
                        ret = -ENOMEM;
@@ -2239,7 +2243,6 @@ static int criu_restore_bos(struct kfd_process *p,
                        amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev,
                                                (struct kgd_mem *)mem,
                                                pdd->drm_priv, NULL);
-
                        ret = -ENOMEM;
                        goto exit;
                }
@@ -2392,7 +2395,35 @@ static int criu_resume(struct file *filep,
                        struct kfd_process *p,
                        struct kfd_ioctl_criu_args *args)
 {
-       return 0;
+       struct kfd_process *target = NULL;
+       struct pid *pid = NULL;
+       int ret = 0;
+
+       pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,
+                args->pid);
+
+       pid = find_get_pid(args->pid);
+       if (!pid) {
+               pr_err("Cannot find pid info for %i\n", args->pid);
+               return -ESRCH;
+       }
+
+       pr_debug("calling kfd_lookup_process_by_pid\n");
+       target = kfd_lookup_process_by_pid(pid);
+
+       put_pid(pid);
+
+       if (!target) {
+               pr_debug("Cannot find process info for %i\n", args->pid);
+               return -ESRCH;
+       }
+
+       mutex_lock(&target->mutex);
+       ret =  amdgpu_amdkfd_criu_resume(target->kgd_process_info);
+       mutex_unlock(&target->mutex);
+
+       kfd_unref_process(target);
+       return ret;
 }
 
 static int criu_process_info(struct file *filep,
index a4d08b6b2e6b87ded4643bbc4a72cce09c802aa4..9b347247055c88e311bf0aa5fe0cfc9e509b2bb4 100644 (file)
@@ -951,6 +951,7 @@ void *kfd_process_device_translate_handle(struct kfd_process_device *p,
                                        int handle);
 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
                                        int handle);
+struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid);
 
 /* PASIDs */
 int kfd_pasid_init(void);
index 74f162887d3b12a99e426880d6c99a70288e4d77..b3198e18662220fd430c46060734cd52c80c32e6 100644 (file)
@@ -64,7 +64,8 @@ static struct workqueue_struct *kfd_process_wq;
  */
 static struct workqueue_struct *kfd_restore_wq;
 
-static struct kfd_process *find_process(const struct task_struct *thread);
+static struct kfd_process *find_process(const struct task_struct *thread,
+                                       bool ref);
 static void kfd_process_ref_release(struct kref *ref);
 static struct kfd_process *create_process(const struct task_struct *thread);
 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
@@ -715,7 +716,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
        int err;
 
        err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,
-                                                pdd->drm_priv, mem, NULL, flags);
+                                                pdd->drm_priv, mem, NULL,
+                                                flags, false);
        if (err)
                goto err_alloc_mem;
 
@@ -816,7 +818,7 @@ struct kfd_process *kfd_create_process(struct file *filep)
        mutex_lock(&kfd_processes_mutex);
 
        /* A prior open of /dev/kfd could have already created the process. */
-       process = find_process(thread);
+       process = find_process(thread, false);
        if (process) {
                pr_debug("Process already found\n");
        } else {
@@ -884,7 +886,7 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread)
        if (thread->group_leader->mm != thread->mm)
                return ERR_PTR(-EINVAL);
 
-       process = find_process(thread);
+       process = find_process(thread, false);
        if (!process)
                return ERR_PTR(-EINVAL);
 
@@ -903,13 +905,16 @@ static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
        return NULL;
 }
 
-static struct kfd_process *find_process(const struct task_struct *thread)
+static struct kfd_process *find_process(const struct task_struct *thread,
+                                       bool ref)
 {
        struct kfd_process *p;
        int idx;
 
        idx = srcu_read_lock(&kfd_processes_srcu);
        p = find_process_by_mm(thread->mm);
+       if (p && ref)
+               kref_get(&p->ref);
        srcu_read_unlock(&kfd_processes_srcu, idx);
 
        return p;
@@ -920,6 +925,26 @@ void kfd_unref_process(struct kfd_process *p)
        kref_put(&p->ref, kfd_process_ref_release);
 }
 
+/* This increments the process->ref counter. */
+struct kfd_process *kfd_lookup_process_by_pid(struct pid *pid)
+{
+       struct task_struct *task = NULL;
+       struct kfd_process *p    = NULL;
+
+       if (!pid) {
+               task = current;
+               get_task_struct(task);
+       } else {
+               task = get_pid_task(pid, PIDTYPE_PID);
+       }
+
+       if (task) {
+               p = find_process(task, true);
+               put_task_struct(task);
+       }
+
+       return p;
+}
 
 static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
 {