This prepares for GFXv9 (Vega10), which has 64-bit doorbells.
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.max_pasid_bits = 16,
/* max num of queues for CZ.TODO should be a dynamic value */
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.asic_family = CHIP_TONGA,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.asic_family = CHIP_TONGA,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.asic_family = CHIP_FIJI,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.asic_family = CHIP_FIJI,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
.asic_family = CHIP_POLARIS11,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
static DEFINE_IDA(doorbell_ida);
static unsigned int max_doorbell_slices;
-#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
/*
* Each device exposes a doorbell aperture, a PCI MMIO aperture that
*/
/* # of doorbell bytes allocated for each process. */
-static inline size_t doorbell_process_allocation(void)
+static size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{
- return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES *
+ return roundup(kfd->device_info->doorbell_size *
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
PAGE_SIZE);
}
doorbell_start_offset =
roundup(kfd->shared_resources.doorbell_start_offset,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(kfd));
doorbell_aperture_size =
rounddown(kfd->shared_resources.doorbell_aperture_size,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(kfd));
if (doorbell_aperture_size > doorbell_start_offset)
doorbell_process_limit =
(doorbell_aperture_size - doorbell_start_offset) /
- doorbell_process_allocation();
+ kfd_doorbell_process_slice(kfd);
else
return -ENOSPC;
kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(kfd));
if (!kfd->doorbell_kernel_ptr)
return -ENOMEM;
phys_addr_t address;
struct kfd_dev *dev;
+ /* Find kfd device according to gpu id */
+ dev = kfd_device_by_id(vma->vm_pgoff);
+ if (!dev)
+ return -EINVAL;
+
/*
* For simplicitly we only allow mapping of the entire doorbell
* allocation of a single device & process.
*/
- if (vma->vm_end - vma->vm_start != doorbell_process_allocation())
- return -EINVAL;
-
- /* Find kfd device according to gpu id */
- dev = kfd_device_by_id(vma->vm_pgoff);
- if (!dev)
+ if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
return -EINVAL;
/* Calculate physical address of doorbell */
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
(unsigned long long) vma->vm_start, address, vma->vm_flags,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(dev));
return io_remap_pfn_range(vma,
vma->vm_start,
address >> PAGE_SHIFT,
- doorbell_process_allocation(),
+ kfd_doorbell_process_slice(dev),
vma->vm_page_prot);
}
/* get kernel iomem pointer for a doorbell */
-u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off)
{
u32 inx;
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
+ inx *= kfd->device_info->doorbell_size / sizeof(u32);
+
/*
* Calculating the kernel doorbell offset using the first
* doorbell page.
mutex_unlock(&kfd->doorbell_mutex);
}
-inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
+void write_kernel_doorbell(void __iomem *db, u32 value)
{
if (db) {
writel(value, db);
{
/*
* doorbell_id_offset accounts for doorbells taken by KGD.
- * index * doorbell_process_allocation/sizeof(u32) adjusts to
- * the process's doorbells.
+ * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
+ * the process's doorbells. The offset returned is in dword
+ * units regardless of the ASIC-dependent doorbell size.
*/
return kfd->doorbell_id_offset +
process->doorbell_index
- * doorbell_process_allocation() / sizeof(u32) +
- queue_id;
+ * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
+ queue_id * kfd->device_info->doorbell_size / sizeof(u32);
}
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
{
uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
kfd->shared_resources.doorbell_start_offset) /
- doorbell_process_allocation() + 1;
+ kfd_doorbell_process_slice(kfd) + 1;
return num_of_elems;
struct kfd_process *process)
{
return dev->doorbell_base +
- process->doorbell_index * doorbell_process_allocation();
+ process->doorbell_index * kfd_doorbell_process_slice(dev);
}
int kfd_alloc_process_doorbells(struct kfd_process *process)
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits;
unsigned int max_no_of_hqd;
+ unsigned int doorbell_size;
size_t ih_ring_entry_size;
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
uint32_t queue_percent;
uint32_t *read_ptr;
uint32_t *write_ptr;
- uint32_t __iomem *doorbell_ptr;
+ void __iomem *doorbell_ptr;
uint32_t doorbell_off;
bool is_interop;
bool is_evicted;
int kfd_doorbell_init(struct kfd_dev *kfd);
void kfd_doorbell_fini(struct kfd_dev *kfd);
int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
-u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off);
void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
u32 read_kernel_doorbell(u32 __iomem *db);
-void write_kernel_doorbell(u32 __iomem *db, u32 value);
+void write_kernel_doorbell(void __iomem *db, u32 value);
unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
struct kfd_process *process,
unsigned int queue_id);