drm/amdkfd: CRIU Discover svm ranges
authorRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Wed, 3 Nov 2021 00:59:17 +0000 (20:59 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 7 Feb 2022 22:59:53 +0000 (17:59 -0500)
A KFD process may contain a number of virtual address ranges for shared
virtual memory management and each such range can have many SVM
attributes spanning across various nodes within the process boundary.
This change reports the total number of such SVM ranges and
their total private data size by extending the PROCESS_INFO op of the the
CRIU IOCTL to discover the svm ranges in the target process and a future
patches brings in the required support for checkpoint and restore for
SVM ranges.

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.h

index 3ec44f71307df298c3363399d13e68dd061211a8..a755ea68a428424fedf42fa9e002538b63de5ec1 100644 (file)
@@ -2099,10 +2099,9 @@ static int criu_get_process_object_info(struct kfd_process *p,
                                        uint32_t *num_objects,
                                        uint64_t *objs_priv_size)
 {
-       int ret;
-       uint64_t priv_size;
+       uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;
        uint32_t num_queues, num_events, num_svm_ranges;
-       uint64_t queues_priv_data_size;
+       int ret;
 
        *num_devices = p->n_pdds;
        *num_bos = get_process_num_bos(p);
@@ -2112,7 +2111,10 @@ static int criu_get_process_object_info(struct kfd_process *p,
                return ret;
 
        num_events = kfd_get_num_events(p);
-       num_svm_ranges = 0; /* TODO: Implement SVM-Ranges */
+
+       ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
+       if (ret)
+               return ret;
 
        *num_objects = num_queues + num_events + num_svm_ranges;
 
@@ -2122,7 +2124,7 @@ static int criu_get_process_object_info(struct kfd_process *p,
                priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
                priv_size += queues_priv_data_size;
                priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);
-               /* TODO: Add SVM ranges priv size */
+               priv_size += svm_priv_data_size;
                *objs_priv_size = priv_size;
        }
        return 0;
index 903ad4a263f09964a359d22b512e950f94beb683..715dd0d4fac5caf1fbb85060d436c3c615c85bf9 100644 (file)
@@ -1082,7 +1082,10 @@ enum kfd_criu_object_type {
 
 struct kfd_criu_svm_range_priv_data {
        uint32_t object_type;
-       uint32_t reserved;
+       uint64_t start_addr;
+       uint64_t size;
+       /* Variable length array of attributes */
+       struct kfd_ioctl_svm_attribute attrs[0];
 };
 
 struct kfd_criu_queue_priv_data {
index d34508f5e88b04b6eb453be278442489e6f8b02a..64cd7712c098527ec5d2c8991eb66b908a06c6e1 100644 (file)
@@ -3481,6 +3481,65 @@ fill_values:
        return 0;
 }
 
+int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+                      uint64_t *svm_priv_data_size)
+{
+       uint64_t total_size, accessibility_size, common_attr_size;
+       int nattr_common = 4, nattr_accessibility = 1;
+       int num_devices = p->n_pdds;
+       struct svm_range_list *svms;
+       struct svm_range *prange;
+       uint32_t count = 0;
+
+       *svm_priv_data_size = 0;
+
+       svms = &p->svms;
+       if (!svms)
+               return -EINVAL;
+
+       mutex_lock(&svms->lock);
+       list_for_each_entry(prange, &svms->list, list) {
+               pr_debug("prange: 0x%p start: 0x%lx\t npages: 0x%llx\t end: 0x%llx\n",
+                        prange, prange->start, prange->npages,
+                        prange->start + prange->npages - 1);
+               count++;
+       }
+       mutex_unlock(&svms->lock);
+
+       *num_svm_ranges = count;
+       /* Only the accessbility attributes need to be queried for all the gpus
+        * individually, remaining ones are spanned across the entire process
+        * regardless of the various gpu nodes. Of the remaining attributes,
+        * KFD_IOCTL_SVM_ATTR_CLR_FLAGS need not be saved.
+        *
+        * KFD_IOCTL_SVM_ATTR_PREFERRED_LOC
+        * KFD_IOCTL_SVM_ATTR_PREFETCH_LOC
+        * KFD_IOCTL_SVM_ATTR_SET_FLAGS
+        * KFD_IOCTL_SVM_ATTR_GRANULARITY
+        *
+        * ** ACCESSBILITY ATTRIBUTES **
+        * (Considered as one, type is altered during query, value is gpuid)
+        * KFD_IOCTL_SVM_ATTR_ACCESS
+        * KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE
+        * KFD_IOCTL_SVM_ATTR_NO_ACCESS
+        */
+       if (*num_svm_ranges > 0) {
+               common_attr_size = sizeof(struct kfd_ioctl_svm_attribute) *
+                       nattr_common;
+               accessibility_size = sizeof(struct kfd_ioctl_svm_attribute) *
+                       nattr_accessibility * num_devices;
+
+               total_size = sizeof(struct kfd_criu_svm_range_priv_data) +
+                       common_attr_size + accessibility_size;
+
+               *svm_priv_data_size = *num_svm_ranges * total_size;
+       }
+
+       pr_debug("num_svm_ranges %u total_priv_size %llu\n", *num_svm_ranges,
+                *svm_priv_data_size);
+       return 0;
+}
+
 int
 svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,
          uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs)
index 949b477e2f4c971c9c5d9aa004312848473f20e4..f052b6022e76cc0d08adf0f8ee01e624e5975d3b 100644 (file)
@@ -183,6 +183,8 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
 void svm_range_free_dma_mappings(struct svm_range *prange);
 void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
                        void *owner);
+int svm_range_get_info(struct kfd_process *p, uint32_t *num_svm_ranges,
+                      uint64_t *svm_priv_data_size);
 struct kfd_process_device *
 svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev);
 void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm);
@@ -220,6 +222,15 @@ static inline int svm_range_schedule_evict_svm_bo(
        return -EINVAL;
 }
 
+static inline int svm_range_get_info(struct kfd_process *p,
+                                    uint32_t *num_svm_ranges,
+                                    uint64_t *svm_priv_data_size)
+{
+       *num_svm_ranges = 0;
+       *svm_priv_data_size = 0;
+       return 0;
+}
+
 #define KFD_IS_SVM_API_SUPPORTED(dev) false
 
 #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */