From: Jakub Kicinski Date: Thu, 10 Nov 2022 20:07:18 +0000 (-0800) Subject: Merge branch 'mana-shared-6.2' of https://git.kernel.org/pub/scm/linux/kernel/git... X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=79b0872b1075abd36b3c141f510ff7ec1878c22f;p=linux.git Merge branch 'mana-shared-6.2' of https://git./linux/kernel/git/rdma/rdma Long Li says: ==================== Introduce Microsoft Azure Network Adapter (MANA) RDMA driver [netdev prep] The first 11 patches which modify the MANA Ethernet driver to support RDMA driver. * 'mana-shared-6.2' of https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: net: mana: Define data structures for protection domain and memory registration net: mana: Define data structures for allocating doorbell page from GDMA net: mana: Define and process GDMA response code GDMA_STATUS_MORE_ENTRIES net: mana: Define max values for SGL entries net: mana: Move header files to a common location net: mana: Record port number in netdev net: mana: Export Work Queue functions for use by RDMA driver net: mana: Set the DMA device max segment size net: mana: Handle vport sharing between devices net: mana: Record the physical address for doorbell page region net: mana: Add support for auxiliary device ==================== Link: https://lore.kernel.org/all/1667502990-2559-1-git-send-email-longli@linuxonhyperv.com/ Signed-off-by: Jakub Kicinski --- 79b0872b1075abd36b3c141f510ff7ec1878c22f diff --cc include/net/mana/gdma.h index 0000000000000,221adc96340cb..28d0687bf7da4 mode 000000,100644..100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@@ -1,0 -1,833 +1,834 @@@ + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ + /* Copyright (c) 2021, Microsoft Corporation. */ + + #ifndef _GDMA_H + #define _GDMA_H + + #include + #include + + #include "shm_channel.h" + + #define GDMA_STATUS_MORE_ENTRIES 0x00000105 + + /* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + + enum gdma_request_type { + GDMA_VERIFY_VF_DRIVER_VERSION = 1, + GDMA_QUERY_MAX_RESOURCES = 2, + GDMA_LIST_DEVICES = 3, + GDMA_REGISTER_DEVICE = 4, + GDMA_DEREGISTER_DEVICE = 5, + GDMA_GENERATE_TEST_EQE = 10, + GDMA_CREATE_QUEUE = 12, + GDMA_DISABLE_QUEUE = 13, + GDMA_ALLOCATE_RESOURCE_RANGE = 22, + GDMA_DESTROY_RESOURCE_RANGE = 24, + GDMA_CREATE_DMA_REGION = 25, + GDMA_DMA_REGION_ADD_PAGES = 26, + GDMA_DESTROY_DMA_REGION = 27, + GDMA_CREATE_PD = 29, + GDMA_DESTROY_PD = 30, + GDMA_CREATE_MR = 31, + GDMA_DESTROY_MR = 32, + }; + + #define GDMA_RESOURCE_DOORBELL_PAGE 27 + + enum gdma_queue_type { + GDMA_INVALID_QUEUE, + GDMA_SQ, + GDMA_RQ, + GDMA_CQ, + GDMA_EQ, + }; + + enum gdma_work_request_flags { + GDMA_WR_NONE = 0, + GDMA_WR_OOB_IN_SGL = BIT(0), + GDMA_WR_PAD_BY_SGE0 = BIT(1), + }; + + enum gdma_eqe_type { + GDMA_EQE_COMPLETION = 3, + GDMA_EQE_TEST_EVENT = 64, + GDMA_EQE_HWC_INIT_EQ_ID_DB = 129, + GDMA_EQE_HWC_INIT_DATA = 130, + GDMA_EQE_HWC_INIT_DONE = 131, + }; + + enum { + GDMA_DEVICE_NONE = 0, + GDMA_DEVICE_HWC = 1, + GDMA_DEVICE_MANA = 2, + }; + + typedef u64 gdma_obj_handle_t; + + struct gdma_resource { + /* Protect the bitmap */ + spinlock_t lock; + + /* The bitmap size in bits. */ + u32 size; + + /* The bitmap tracks the resources. */ + unsigned long *map; + }; + + union gdma_doorbell_entry { + u64 as_uint64; + + struct { + u64 id : 24; + u64 reserved : 8; + u64 tail_ptr : 31; + u64 arm : 1; + } cq; + + struct { + u64 id : 24; + u64 wqe_cnt : 8; + u64 tail_ptr : 32; + } rq; + + struct { + u64 id : 24; + u64 reserved : 8; + u64 tail_ptr : 32; + } sq; + + struct { + u64 id : 16; + u64 reserved : 16; + u64 tail_ptr : 31; + u64 arm : 1; + } eq; + }; /* HW DATA */ + + struct gdma_msg_hdr { + u32 hdr_type; + u32 msg_type; + u16 msg_version; + u16 hwc_msg_id; + u32 msg_size; + }; /* HW DATA */ + + struct gdma_dev_id { + union { + struct { + u16 type; + u16 instance; + }; + + u32 as_uint32; + }; + }; /* HW DATA */ + + struct gdma_req_hdr { + struct gdma_msg_hdr req; + struct gdma_msg_hdr resp; /* The expected response */ + struct gdma_dev_id dev_id; + u32 activity_id; + }; /* HW DATA */ + + struct gdma_resp_hdr { + struct gdma_msg_hdr response; + struct gdma_dev_id dev_id; + u32 activity_id; + u32 status; + u32 reserved; + }; /* HW DATA */ + + struct gdma_general_req { + struct gdma_req_hdr hdr; + }; /* HW DATA */ + + #define GDMA_MESSAGE_V1 1 + + struct gdma_general_resp { + struct gdma_resp_hdr hdr; + }; /* HW DATA */ + + #define GDMA_STANDARD_HEADER_TYPE 0 + + static inline void mana_gd_init_req_hdr(struct gdma_req_hdr *hdr, u32 code, + u32 req_size, u32 resp_size) + { + hdr->req.hdr_type = GDMA_STANDARD_HEADER_TYPE; + hdr->req.msg_type = code; + hdr->req.msg_version = GDMA_MESSAGE_V1; + hdr->req.msg_size = req_size; + + hdr->resp.hdr_type = GDMA_STANDARD_HEADER_TYPE; + hdr->resp.msg_type = code; + hdr->resp.msg_version = GDMA_MESSAGE_V1; + hdr->resp.msg_size = resp_size; + } + + /* The 16-byte struct is part of the GDMA work queue entry (WQE). */ + struct gdma_sge { + u64 address; + u32 mem_key; + u32 size; + }; /* HW DATA */ + + struct gdma_wqe_request { + struct gdma_sge *sgl; + u32 num_sge; + + u32 inline_oob_size; + const void *inline_oob_data; + + u32 flags; + u32 client_data_unit; + }; + + enum gdma_page_type { + GDMA_PAGE_TYPE_4K, + }; + + #define GDMA_INVALID_DMA_REGION 0 + + struct gdma_mem_info { + struct device *dev; + + dma_addr_t dma_handle; + void *virt_addr; + u64 length; + + /* Allocated by the PF driver */ + gdma_obj_handle_t dma_region_handle; + }; + + #define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8 + + struct gdma_dev { + struct gdma_context *gdma_context; + + struct gdma_dev_id dev_id; + + u32 pdid; + u32 doorbell; + u32 gpa_mkey; + + /* GDMA driver specific pointer */ + void *driver_data; + + struct auxiliary_device *adev; + }; + + #define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE + + #define GDMA_CQE_SIZE 64 + #define GDMA_EQE_SIZE 16 + #define GDMA_MAX_SQE_SIZE 512 + #define GDMA_MAX_RQE_SIZE 256 + + #define GDMA_COMP_DATA_SIZE 0x3C + + #define GDMA_EVENT_DATA_SIZE 0xC + + /* The WQE size must be a multiple of the Basic Unit, which is 32 bytes. */ + #define GDMA_WQE_BU_SIZE 32 + + #define INVALID_PDID UINT_MAX + #define INVALID_DOORBELL UINT_MAX + #define INVALID_MEM_KEY UINT_MAX + #define INVALID_QUEUE_ID UINT_MAX + #define INVALID_PCI_MSIX_INDEX UINT_MAX + + struct gdma_comp { + u32 cqe_data[GDMA_COMP_DATA_SIZE / 4]; + u32 wq_num; + bool is_sq; + }; + + struct gdma_event { + u32 details[GDMA_EVENT_DATA_SIZE / 4]; + u8 type; + }; + + struct gdma_queue; + + struct mana_eq { + struct gdma_queue *eq; + }; + + typedef void gdma_eq_callback(void *context, struct gdma_queue *q, + struct gdma_event *e); + + typedef void gdma_cq_callback(void *context, struct gdma_queue *q); + + /* The 'head' is the producer index. For SQ/RQ, when the driver posts a WQE + * (Note: the WQE size must be a multiple of the 32-byte Basic Unit), the + * driver increases the 'head' in BUs rather than in bytes, and notifies + * the HW of the updated head. For EQ/CQ, the driver uses the 'head' to track + * the HW head, and increases the 'head' by 1 for every processed EQE/CQE. + * + * The 'tail' is the consumer index for SQ/RQ. After the CQE of the SQ/RQ is + * processed, the driver increases the 'tail' to indicate that WQEs have + * been consumed by the HW, so the driver can post new WQEs into the SQ/RQ. + * + * The driver doesn't use the 'tail' for EQ/CQ, because the driver ensures + * that the EQ/CQ is big enough so they can't overflow, and the driver uses + * the owner bits mechanism to detect if the queue has become empty. + */ + struct gdma_queue { + struct gdma_dev *gdma_dev; + + enum gdma_queue_type type; + u32 id; + + struct gdma_mem_info mem_info; + + void *queue_mem_ptr; + u32 queue_size; + + bool monitor_avl_buf; + + u32 head; + u32 tail; + + /* Extra fields specific to EQ/CQ. */ + union { + struct { + bool disable_needed; + + gdma_eq_callback *callback; + void *context; + + unsigned int msix_index; + + u32 log2_throttle_limit; + } eq; + + struct { + gdma_cq_callback *callback; + void *context; + + struct gdma_queue *parent; /* For CQ/EQ relationship */ + } cq; + }; + }; + + struct gdma_queue_spec { + enum gdma_queue_type type; + bool monitor_avl_buf; + unsigned int queue_size; + + /* Extra fields specific to EQ/CQ. */ + union { + struct { + gdma_eq_callback *callback; + void *context; + + unsigned long log2_throttle_limit; + } eq; + + struct { + gdma_cq_callback *callback; + void *context; + + struct gdma_queue *parent_eq; + + } cq; + }; + }; + + struct gdma_irq_context { + void (*handler)(void *arg); + void *arg; + }; + + struct gdma_context { + struct device *dev; + + /* Per-vPort max number of queues */ + unsigned int max_num_queues; + unsigned int max_num_msix; + unsigned int num_msix_usable; + struct gdma_resource msix_resource; + struct gdma_irq_context *irq_contexts; + + /* This maps a CQ index to the queue structure. */ + unsigned int max_num_cqs; + struct gdma_queue **cq_table; + + /* Protect eq_test_event and test_event_eq_id */ + struct mutex eq_test_event_mutex; + struct completion eq_test_event; + u32 test_event_eq_id; + + bool is_pf; + phys_addr_t bar0_pa; + void __iomem *bar0_va; + void __iomem *shm_base; + void __iomem *db_page_base; + phys_addr_t phys_db_page_base; + u32 db_page_size; ++ int numa_node; + + /* Shared memory chanenl (used to bootstrap HWC) */ + struct shm_channel shm_channel; + + /* Hardware communication channel (HWC) */ + struct gdma_dev hwc; + + /* Azure network adapter */ + struct gdma_dev mana; + }; + + #define MAX_NUM_GDMA_DEVICES 4 + + static inline bool mana_gd_is_mana(struct gdma_dev *gd) + { + return gd->dev_id.type == GDMA_DEVICE_MANA; + } + + static inline bool mana_gd_is_hwc(struct gdma_dev *gd) + { + return gd->dev_id.type == GDMA_DEVICE_HWC; + } + + u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset); + u32 mana_gd_wq_avail_space(struct gdma_queue *wq); + + int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq); + + int mana_gd_create_hwc_queue(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + + int mana_gd_create_mana_eq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + + int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + + void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue); + + int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe); + + void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit); + + struct gdma_wqe { + u32 reserved :24; + u32 last_vbytes :8; + + union { + u32 flags; + + struct { + u32 num_sge :8; + u32 inline_oob_size_div4:3; + u32 client_oob_in_sgl :1; + u32 reserved1 :4; + u32 client_data_unit :14; + u32 reserved2 :2; + }; + }; + }; /* HW DATA */ + + #define INLINE_OOB_SMALL_SIZE 8 + #define INLINE_OOB_LARGE_SIZE 24 + + #define MAX_TX_WQE_SIZE 512 + #define MAX_RX_WQE_SIZE 256 + + #define MAX_TX_WQE_SGL_ENTRIES ((GDMA_MAX_SQE_SIZE - \ + sizeof(struct gdma_sge) - INLINE_OOB_SMALL_SIZE) / \ + sizeof(struct gdma_sge)) + + #define MAX_RX_WQE_SGL_ENTRIES ((GDMA_MAX_RQE_SIZE - \ + sizeof(struct gdma_sge)) / sizeof(struct gdma_sge)) + + struct gdma_cqe { + u32 cqe_data[GDMA_COMP_DATA_SIZE / 4]; + + union { + u32 as_uint32; + + struct { + u32 wq_num : 24; + u32 is_sq : 1; + u32 reserved : 4; + u32 owner_bits : 3; + }; + } cqe_info; + }; /* HW DATA */ + + #define GDMA_CQE_OWNER_BITS 3 + + #define GDMA_CQE_OWNER_MASK ((1 << GDMA_CQE_OWNER_BITS) - 1) + + #define SET_ARM_BIT 1 + + #define GDMA_EQE_OWNER_BITS 3 + + union gdma_eqe_info { + u32 as_uint32; + + struct { + u32 type : 8; + u32 reserved1 : 8; + u32 client_id : 2; + u32 reserved2 : 11; + u32 owner_bits : 3; + }; + }; /* HW DATA */ + + #define GDMA_EQE_OWNER_MASK ((1 << GDMA_EQE_OWNER_BITS) - 1) + #define INITIALIZED_OWNER_BIT(log2_num_entries) (1UL << (log2_num_entries)) + + struct gdma_eqe { + u32 details[GDMA_EVENT_DATA_SIZE / 4]; + u32 eqe_info; + }; /* HW DATA */ + + #define GDMA_REG_DB_PAGE_OFFSET 8 + #define GDMA_REG_DB_PAGE_SIZE 0x10 + #define GDMA_REG_SHM_OFFSET 0x18 + + #define GDMA_PF_REG_DB_PAGE_SIZE 0xD0 + #define GDMA_PF_REG_DB_PAGE_OFF 0xC8 + #define GDMA_PF_REG_SHM_OFF 0x70 + + #define GDMA_SRIOV_REG_CFG_BASE_OFF 0x108 + + #define MANA_PF_DEVICE_ID 0x00B9 + #define MANA_VF_DEVICE_ID 0x00BA + + struct gdma_posted_wqe_info { + u32 wqe_size_in_bu; + }; + + /* GDMA_GENERATE_TEST_EQE */ + struct gdma_generate_test_event_req { + struct gdma_req_hdr hdr; + u32 queue_index; + }; /* HW DATA */ + + /* GDMA_VERIFY_VF_DRIVER_VERSION */ + enum { + GDMA_PROTOCOL_V1 = 1, + GDMA_PROTOCOL_FIRST = GDMA_PROTOCOL_V1, + GDMA_PROTOCOL_LAST = GDMA_PROTOCOL_V1, + }; + + #define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0) + + #define GDMA_DRV_CAP_FLAGS1 GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT + + #define GDMA_DRV_CAP_FLAGS2 0 + + #define GDMA_DRV_CAP_FLAGS3 0 + + #define GDMA_DRV_CAP_FLAGS4 0 + + struct gdma_verify_ver_req { + struct gdma_req_hdr hdr; + + /* Mandatory fields required for protocol establishment */ + u64 protocol_ver_min; + u64 protocol_ver_max; + + /* Gdma Driver Capability Flags */ + u64 gd_drv_cap_flags1; + u64 gd_drv_cap_flags2; + u64 gd_drv_cap_flags3; + u64 gd_drv_cap_flags4; + + /* Advisory fields */ + u64 drv_ver; + u32 os_type; /* Linux = 0x10; Windows = 0x20; Other = 0x30 */ + u32 reserved; + u32 os_ver_major; + u32 os_ver_minor; + u32 os_ver_build; + u32 os_ver_platform; + u64 reserved_2; + u8 os_ver_str1[128]; + u8 os_ver_str2[128]; + u8 os_ver_str3[128]; + u8 os_ver_str4[128]; + }; /* HW DATA */ + + struct gdma_verify_ver_resp { + struct gdma_resp_hdr hdr; + u64 gdma_protocol_ver; + u64 pf_cap_flags1; + u64 pf_cap_flags2; + u64 pf_cap_flags3; + u64 pf_cap_flags4; + }; /* HW DATA */ + + /* GDMA_QUERY_MAX_RESOURCES */ + struct gdma_query_max_resources_resp { + struct gdma_resp_hdr hdr; + u32 status; + u32 max_sq; + u32 max_rq; + u32 max_cq; + u32 max_eq; + u32 max_db; + u32 max_mst; + u32 max_cq_mod_ctx; + u32 max_mod_cq; + u32 max_msix; + }; /* HW DATA */ + + /* GDMA_LIST_DEVICES */ + struct gdma_list_devices_resp { + struct gdma_resp_hdr hdr; + u32 num_of_devs; + u32 reserved; + struct gdma_dev_id devs[64]; + }; /* HW DATA */ + + /* GDMA_REGISTER_DEVICE */ + struct gdma_register_device_resp { + struct gdma_resp_hdr hdr; + u32 pdid; + u32 gpa_mkey; + u32 db_id; + }; /* HW DATA */ + + struct gdma_allocate_resource_range_req { + struct gdma_req_hdr hdr; + u32 resource_type; + u32 num_resources; + u32 alignment; + u32 allocated_resources; + }; + + struct gdma_allocate_resource_range_resp { + struct gdma_resp_hdr hdr; + u32 allocated_resources; + }; + + struct gdma_destroy_resource_range_req { + struct gdma_req_hdr hdr; + u32 resource_type; + u32 num_resources; + u32 allocated_resources; + }; + + /* GDMA_CREATE_QUEUE */ + struct gdma_create_queue_req { + struct gdma_req_hdr hdr; + u32 type; + u32 reserved1; + u32 pdid; + u32 doolbell_id; + gdma_obj_handle_t gdma_region; + u32 reserved2; + u32 queue_size; + u32 log2_throttle_limit; + u32 eq_pci_msix_index; + u32 cq_mod_ctx_id; + u32 cq_parent_eq_id; + u8 rq_drop_on_overrun; + u8 rq_err_on_wqe_overflow; + u8 rq_chain_rec_wqes; + u8 sq_hw_db; + u32 reserved3; + }; /* HW DATA */ + + struct gdma_create_queue_resp { + struct gdma_resp_hdr hdr; + u32 queue_index; + }; /* HW DATA */ + + /* GDMA_DISABLE_QUEUE */ + struct gdma_disable_queue_req { + struct gdma_req_hdr hdr; + u32 type; + u32 queue_index; + u32 alloc_res_id_on_creation; + }; /* HW DATA */ + + enum atb_page_size { + ATB_PAGE_SIZE_4K, + ATB_PAGE_SIZE_8K, + ATB_PAGE_SIZE_16K, + ATB_PAGE_SIZE_32K, + ATB_PAGE_SIZE_64K, + ATB_PAGE_SIZE_128K, + ATB_PAGE_SIZE_256K, + ATB_PAGE_SIZE_512K, + ATB_PAGE_SIZE_1M, + ATB_PAGE_SIZE_2M, + ATB_PAGE_SIZE_MAX, + }; + + enum gdma_mr_access_flags { + GDMA_ACCESS_FLAG_LOCAL_READ = BIT_ULL(0), + GDMA_ACCESS_FLAG_LOCAL_WRITE = BIT_ULL(1), + GDMA_ACCESS_FLAG_REMOTE_READ = BIT_ULL(2), + GDMA_ACCESS_FLAG_REMOTE_WRITE = BIT_ULL(3), + GDMA_ACCESS_FLAG_REMOTE_ATOMIC = BIT_ULL(4), + }; + + /* GDMA_CREATE_DMA_REGION */ + struct gdma_create_dma_region_req { + struct gdma_req_hdr hdr; + + /* The total size of the DMA region */ + u64 length; + + /* The offset in the first page */ + u32 offset_in_page; + + /* enum gdma_page_type */ + u32 gdma_page_type; + + /* The total number of pages */ + u32 page_count; + + /* If page_addr_list_len is smaller than page_count, + * the remaining page addresses will be added via the + * message GDMA_DMA_REGION_ADD_PAGES. + */ + u32 page_addr_list_len; + u64 page_addr_list[]; + }; /* HW DATA */ + + struct gdma_create_dma_region_resp { + struct gdma_resp_hdr hdr; + gdma_obj_handle_t dma_region_handle; + }; /* HW DATA */ + + /* GDMA_DMA_REGION_ADD_PAGES */ + struct gdma_dma_region_add_pages_req { + struct gdma_req_hdr hdr; + + gdma_obj_handle_t dma_region_handle; + + u32 page_addr_list_len; + u32 reserved3; + + u64 page_addr_list[]; + }; /* HW DATA */ + + /* GDMA_DESTROY_DMA_REGION */ + struct gdma_destroy_dma_region_req { + struct gdma_req_hdr hdr; + + gdma_obj_handle_t dma_region_handle; + }; /* HW DATA */ + + enum gdma_pd_flags { + GDMA_PD_FLAG_INVALID = 0, + }; + + struct gdma_create_pd_req { + struct gdma_req_hdr hdr; + enum gdma_pd_flags flags; + u32 reserved; + };/* HW DATA */ + + struct gdma_create_pd_resp { + struct gdma_resp_hdr hdr; + gdma_obj_handle_t pd_handle; + u32 pd_id; + u32 reserved; + };/* HW DATA */ + + struct gdma_destroy_pd_req { + struct gdma_req_hdr hdr; + gdma_obj_handle_t pd_handle; + };/* HW DATA */ + + struct gdma_destory_pd_resp { + struct gdma_resp_hdr hdr; + };/* HW DATA */ + + enum gdma_mr_type { + /* Guest Virtual Address - MRs of this type allow access + * to memory mapped by PTEs associated with this MR using a virtual + * address that is set up in the MST + */ + GDMA_MR_TYPE_GVA = 2, + }; + + struct gdma_create_mr_params { + gdma_obj_handle_t pd_handle; + enum gdma_mr_type mr_type; + union { + struct { + gdma_obj_handle_t dma_region_handle; + u64 virtual_address; + enum gdma_mr_access_flags access_flags; + } gva; + }; + }; + + struct gdma_create_mr_request { + struct gdma_req_hdr hdr; + gdma_obj_handle_t pd_handle; + enum gdma_mr_type mr_type; + u32 reserved_1; + + union { + struct { + gdma_obj_handle_t dma_region_handle; + u64 virtual_address; + enum gdma_mr_access_flags access_flags; + } gva; + + }; + u32 reserved_2; + };/* HW DATA */ + + struct gdma_create_mr_response { + struct gdma_resp_hdr hdr; + gdma_obj_handle_t mr_handle; + u32 lkey; + u32 rkey; + };/* HW DATA */ + + struct gdma_destroy_mr_request { + struct gdma_req_hdr hdr; + gdma_obj_handle_t mr_handle; + };/* HW DATA */ + + struct gdma_destroy_mr_response { + struct gdma_resp_hdr hdr; + };/* HW DATA */ + + int mana_gd_verify_vf_version(struct pci_dev *pdev); + + int mana_gd_register_device(struct gdma_dev *gd); + int mana_gd_deregister_device(struct gdma_dev *gd); + + int mana_gd_post_work_request(struct gdma_queue *wq, + const struct gdma_wqe_request *wqe_req, + struct gdma_posted_wqe_info *wqe_info); + + int mana_gd_post_and_ring(struct gdma_queue *queue, + const struct gdma_wqe_request *wqe, + struct gdma_posted_wqe_info *wqe_info); + + int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r); + void mana_gd_free_res_map(struct gdma_resource *r); + + void mana_gd_wq_ring_doorbell(struct gdma_context *gc, + struct gdma_queue *queue); + + int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, + struct gdma_mem_info *gmi); + + void mana_gd_free_memory(struct gdma_mem_info *gmi); + + int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, + u32 resp_len, void *resp); + + int mana_gd_destroy_dma_region(struct gdma_context *gc, + gdma_obj_handle_t dma_region_handle); + + #endif /* _GDMA_H */ diff --cc include/net/mana/mana.h index 0000000000000,713a8f8cca9a7..575ea36ce606b mode 000000,100644..100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@@ -1,0 -1,648 +1,648 @@@ + /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ + /* Copyright (c) 2021, Microsoft Corporation. */ + + #ifndef _MANA_H + #define _MANA_H + + #include "gdma.h" + #include "hw_channel.h" + + /* Microsoft Azure Network Adapter (MANA)'s definitions + * + * Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + + /* MANA protocol version */ + #define MANA_MAJOR_VERSION 0 + #define MANA_MINOR_VERSION 1 + #define MANA_MICRO_VERSION 1 + + typedef u64 mana_handle_t; + #define INVALID_MANA_HANDLE ((mana_handle_t)-1) + + enum TRI_STATE { + TRI_STATE_UNKNOWN = -1, + TRI_STATE_FALSE = 0, + TRI_STATE_TRUE = 1 + }; + + /* Number of entries for hardware indirection table must be in power of 2 */ + #define MANA_INDIRECT_TABLE_SIZE 64 + #define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1) + + /* The Toeplitz hash key's length in bytes: should be multiple of 8 */ + #define MANA_HASH_KEY_SIZE 40 + + #define COMP_ENTRY_SIZE 64 + + #define ADAPTER_MTU_SIZE 1500 + #define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14) + + #define RX_BUFFERS_PER_QUEUE 512 + + #define MAX_SEND_BUFFERS_PER_QUEUE 256 + + #define EQ_SIZE (8 * PAGE_SIZE) + #define LOG2_EQ_THROTTLE 3 + + #define MAX_PORTS_IN_MANA_DEV 256 + + struct mana_stats_rx { + u64 packets; + u64 bytes; + u64 xdp_drop; + u64 xdp_tx; + u64 xdp_redirect; + struct u64_stats_sync syncp; + }; + + struct mana_stats_tx { + u64 packets; + u64 bytes; + u64 xdp_xmit; + struct u64_stats_sync syncp; + }; + + struct mana_txq { + struct gdma_queue *gdma_sq; + + union { + u32 gdma_txq_id; + struct { + u32 reserved1 : 10; + u32 vsq_frame : 14; + u32 reserved2 : 8; + }; + }; + + u16 vp_offset; + + struct net_device *ndev; + + /* The SKBs are sent to the HW and we are waiting for the CQEs. */ + struct sk_buff_head pending_skbs; + struct netdev_queue *net_txq; + + atomic_t pending_sends; + + struct mana_stats_tx stats; + }; + + /* skb data and frags dma mappings */ + struct mana_skb_head { + dma_addr_t dma_handle[MAX_SKB_FRAGS + 1]; + + u32 size[MAX_SKB_FRAGS + 1]; + }; + + #define MANA_HEADROOM sizeof(struct mana_skb_head) + + enum mana_tx_pkt_format { + MANA_SHORT_PKT_FMT = 0, + MANA_LONG_PKT_FMT = 1, + }; + + struct mana_tx_short_oob { + u32 pkt_fmt : 2; + u32 is_outer_ipv4 : 1; + u32 is_outer_ipv6 : 1; + u32 comp_iphdr_csum : 1; + u32 comp_tcp_csum : 1; + u32 comp_udp_csum : 1; + u32 supress_txcqe_gen : 1; + u32 vcq_num : 24; + + u32 trans_off : 10; /* Transport header offset */ + u32 vsq_frame : 14; + u32 short_vp_offset : 8; + }; /* HW DATA */ + + struct mana_tx_long_oob { + u32 is_encap : 1; + u32 inner_is_ipv6 : 1; + u32 inner_tcp_opt : 1; + u32 inject_vlan_pri_tag : 1; + u32 reserved1 : 12; + u32 pcp : 3; /* 802.1Q */ + u32 dei : 1; /* 802.1Q */ + u32 vlan_id : 12; /* 802.1Q */ + + u32 inner_frame_offset : 10; + u32 inner_ip_rel_offset : 6; + u32 long_vp_offset : 12; + u32 reserved2 : 4; + + u32 reserved3; + u32 reserved4; + }; /* HW DATA */ + + struct mana_tx_oob { + struct mana_tx_short_oob s_oob; + struct mana_tx_long_oob l_oob; + }; /* HW DATA */ + + enum mana_cq_type { + MANA_CQ_TYPE_RX, + MANA_CQ_TYPE_TX, + }; + + enum mana_cqe_type { + CQE_INVALID = 0, + CQE_RX_OKAY = 1, + CQE_RX_COALESCED_4 = 2, + CQE_RX_OBJECT_FENCE = 3, + CQE_RX_TRUNCATED = 4, + + CQE_TX_OKAY = 32, + CQE_TX_SA_DROP = 33, + CQE_TX_MTU_DROP = 34, + CQE_TX_INVALID_OOB = 35, + CQE_TX_INVALID_ETH_TYPE = 36, + CQE_TX_HDR_PROCESSING_ERROR = 37, + CQE_TX_VF_DISABLED = 38, + CQE_TX_VPORT_IDX_OUT_OF_RANGE = 39, + CQE_TX_VPORT_DISABLED = 40, + CQE_TX_VLAN_TAGGING_VIOLATION = 41, + }; + + #define MANA_CQE_COMPLETION 1 + + struct mana_cqe_header { + u32 cqe_type : 6; + u32 client_type : 2; + u32 vendor_err : 24; + }; /* HW DATA */ + + /* NDIS HASH Types */ + #define NDIS_HASH_IPV4 BIT(0) + #define NDIS_HASH_TCP_IPV4 BIT(1) + #define NDIS_HASH_UDP_IPV4 BIT(2) + #define NDIS_HASH_IPV6 BIT(3) + #define NDIS_HASH_TCP_IPV6 BIT(4) + #define NDIS_HASH_UDP_IPV6 BIT(5) + #define NDIS_HASH_IPV6_EX BIT(6) + #define NDIS_HASH_TCP_IPV6_EX BIT(7) + #define NDIS_HASH_UDP_IPV6_EX BIT(8) + + #define MANA_HASH_L3 (NDIS_HASH_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_IPV6_EX) + #define MANA_HASH_L4 \ + (NDIS_HASH_TCP_IPV4 | NDIS_HASH_UDP_IPV4 | NDIS_HASH_TCP_IPV6 | \ + NDIS_HASH_UDP_IPV6 | NDIS_HASH_TCP_IPV6_EX | NDIS_HASH_UDP_IPV6_EX) + + struct mana_rxcomp_perpkt_info { + u32 pkt_len : 16; + u32 reserved1 : 16; + u32 reserved2; + u32 pkt_hash; + }; /* HW DATA */ + + #define MANA_RXCOMP_OOB_NUM_PPI 4 + + /* Receive completion OOB */ + struct mana_rxcomp_oob { + struct mana_cqe_header cqe_hdr; + + u32 rx_vlan_id : 12; + u32 rx_vlantag_present : 1; + u32 rx_outer_iphdr_csum_succeed : 1; + u32 rx_outer_iphdr_csum_fail : 1; + u32 reserved1 : 1; + u32 rx_hashtype : 9; + u32 rx_iphdr_csum_succeed : 1; + u32 rx_iphdr_csum_fail : 1; + u32 rx_tcp_csum_succeed : 1; + u32 rx_tcp_csum_fail : 1; + u32 rx_udp_csum_succeed : 1; + u32 rx_udp_csum_fail : 1; + u32 reserved2 : 1; + + struct mana_rxcomp_perpkt_info ppi[MANA_RXCOMP_OOB_NUM_PPI]; + + u32 rx_wqe_offset; + }; /* HW DATA */ + + struct mana_tx_comp_oob { + struct mana_cqe_header cqe_hdr; + + u32 tx_data_offset; + + u32 tx_sgl_offset : 5; + u32 tx_wqe_offset : 27; + + u32 reserved[12]; + }; /* HW DATA */ + + struct mana_rxq; + + #define CQE_POLLING_BUFFER 512 + + struct mana_cq { + struct gdma_queue *gdma_cq; + + /* Cache the CQ id (used to verify if each CQE comes to the right CQ. */ + u32 gdma_id; + + /* Type of the CQ: TX or RX */ + enum mana_cq_type type; + + /* Pointer to the mana_rxq that is pushing RX CQEs to the queue. + * Only and must be non-NULL if type is MANA_CQ_TYPE_RX. + */ + struct mana_rxq *rxq; + + /* Pointer to the mana_txq that is pushing TX CQEs to the queue. + * Only and must be non-NULL if type is MANA_CQ_TYPE_TX. + */ + struct mana_txq *txq; + + /* Buffer which the CQ handler can copy the CQE's into. */ + struct gdma_comp gdma_comp_buf[CQE_POLLING_BUFFER]; + + /* NAPI data */ + struct napi_struct napi; + int work_done; + int budget; + }; + + struct mana_recv_buf_oob { + /* A valid GDMA work request representing the data buffer. */ + struct gdma_wqe_request wqe_req; + + void *buf_va; + dma_addr_t buf_dma_addr; + + /* SGL of the buffer going to be sent has part of the work request. */ + u32 num_sge; + struct gdma_sge sgl[MAX_RX_WQE_SGL_ENTRIES]; + + /* Required to store the result of mana_gd_post_work_request. + * gdma_posted_wqe_info.wqe_size_in_bu is required for progressing the + * work queue when the WQE is consumed. + */ + struct gdma_posted_wqe_info wqe_inf; + }; + + struct mana_rxq { + struct gdma_queue *gdma_rq; + /* Cache the gdma receive queue id */ + u32 gdma_id; + + /* Index of RQ in the vPort, not gdma receive queue id */ + u32 rxq_idx; + + u32 datasize; + + mana_handle_t rxobj; + + struct mana_cq rx_cq; + + struct completion fence_event; + + struct net_device *ndev; + + /* Total number of receive buffers to be allocated */ + u32 num_rx_buf; + + u32 buf_index; + + struct mana_stats_rx stats; + + struct bpf_prog __rcu *bpf_prog; + struct xdp_rxq_info xdp_rxq; + struct page *xdp_save_page; + bool xdp_flush; + int xdp_rc; /* XDP redirect return code */ + + /* MUST BE THE LAST MEMBER: + * Each receive buffer has an associated mana_recv_buf_oob. + */ + struct mana_recv_buf_oob rx_oobs[]; + }; + + struct mana_tx_qp { + struct mana_txq txq; + + struct mana_cq tx_cq; + + mana_handle_t tx_object; + }; + + struct mana_ethtool_stats { + u64 stop_queue; + u64 wake_queue; + }; + + struct mana_context { + struct gdma_dev *gdma_dev; + + u16 num_ports; + + struct mana_eq *eqs; + + struct net_device *ports[MAX_PORTS_IN_MANA_DEV]; + }; + + struct mana_port_context { + struct mana_context *ac; + struct net_device *ndev; + + u8 mac_addr[ETH_ALEN]; + + enum TRI_STATE rss_state; + + mana_handle_t default_rxobj; + bool tx_shortform_allowed; + u16 tx_vp_offset; + + struct mana_tx_qp *tx_qp; + + /* Indirection Table for RX & TX. The values are queue indexes */ + u32 indir_table[MANA_INDIRECT_TABLE_SIZE]; + + /* Indirection table containing RxObject Handles */ + mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE]; + + /* Hash key used by the NIC */ + u8 hashkey[MANA_HASH_KEY_SIZE]; + + /* This points to an array of num_queues of RQ pointers. */ + struct mana_rxq **rxqs; + + struct bpf_prog *bpf_prog; + + /* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */ + unsigned int max_queues; + unsigned int num_queues; + + mana_handle_t port_handle; + mana_handle_t pf_filter_handle; + + /* Mutex for sharing access to vport_use_count */ + struct mutex vport_mutex; + int vport_use_count; + + u16 port_idx; + + bool port_is_up; + bool port_st_save; /* Saved port state */ + + struct mana_ethtool_stats eth_stats; + }; + -int mana_start_xmit(struct sk_buff *skb, struct net_device *ndev); ++netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev); + int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx, + bool update_hash, bool update_tab); + + int mana_alloc_queues(struct net_device *ndev); + int mana_attach(struct net_device *ndev); + int mana_detach(struct net_device *ndev, bool from_close); + + int mana_probe(struct gdma_dev *gd, bool resuming); + void mana_remove(struct gdma_dev *gd, bool suspending); + + void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev); + int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames, + u32 flags); + u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, + struct xdp_buff *xdp, void *buf_va, uint pkt_len); + struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); + void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); + int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); + + extern const struct ethtool_ops mana_ethtool_ops; + + struct mana_obj_spec { + u32 queue_index; + u64 gdma_region; + u32 queue_size; + u32 attached_eq; + u32 modr_ctx_id; + }; + + enum mana_command_code { + MANA_QUERY_DEV_CONFIG = 0x20001, + MANA_QUERY_GF_STAT = 0x20002, + MANA_CONFIG_VPORT_TX = 0x20003, + MANA_CREATE_WQ_OBJ = 0x20004, + MANA_DESTROY_WQ_OBJ = 0x20005, + MANA_FENCE_RQ = 0x20006, + MANA_CONFIG_VPORT_RX = 0x20007, + MANA_QUERY_VPORT_CONFIG = 0x20008, + + /* Privileged commands for the PF mode */ + MANA_REGISTER_FILTER = 0x28000, + MANA_DEREGISTER_FILTER = 0x28001, + MANA_REGISTER_HW_PORT = 0x28003, + MANA_DEREGISTER_HW_PORT = 0x28004, + }; + + /* Query Device Configuration */ + struct mana_query_device_cfg_req { + struct gdma_req_hdr hdr; + + /* MANA Nic Driver Capability flags */ + u64 mn_drv_cap_flags1; + u64 mn_drv_cap_flags2; + u64 mn_drv_cap_flags3; + u64 mn_drv_cap_flags4; + + u32 proto_major_ver; + u32 proto_minor_ver; + u32 proto_micro_ver; + + u32 reserved; + }; /* HW DATA */ + + struct mana_query_device_cfg_resp { + struct gdma_resp_hdr hdr; + + u64 pf_cap_flags1; + u64 pf_cap_flags2; + u64 pf_cap_flags3; + u64 pf_cap_flags4; + + u16 max_num_vports; + u16 reserved; + u32 max_num_eqs; + }; /* HW DATA */ + + /* Query vPort Configuration */ + struct mana_query_vport_cfg_req { + struct gdma_req_hdr hdr; + u32 vport_index; + }; /* HW DATA */ + + struct mana_query_vport_cfg_resp { + struct gdma_resp_hdr hdr; + u32 max_num_sq; + u32 max_num_rq; + u32 num_indirection_ent; + u32 reserved1; + u8 mac_addr[6]; + u8 reserved2[2]; + mana_handle_t vport; + }; /* HW DATA */ + + /* Configure vPort */ + struct mana_config_vport_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u32 pdid; + u32 doorbell_pageid; + }; /* HW DATA */ + + struct mana_config_vport_resp { + struct gdma_resp_hdr hdr; + u16 tx_vport_offset; + u8 short_form_allowed; + u8 reserved; + }; /* HW DATA */ + + /* Create WQ Object */ + struct mana_create_wqobj_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u32 wq_type; + u32 reserved; + u64 wq_gdma_region; + u64 cq_gdma_region; + u32 wq_size; + u32 cq_size; + u32 cq_moderation_ctx_id; + u32 cq_parent_qid; + }; /* HW DATA */ + + struct mana_create_wqobj_resp { + struct gdma_resp_hdr hdr; + u32 wq_id; + u32 cq_id; + mana_handle_t wq_obj; + }; /* HW DATA */ + + /* Destroy WQ Object */ + struct mana_destroy_wqobj_req { + struct gdma_req_hdr hdr; + u32 wq_type; + u32 reserved; + mana_handle_t wq_obj_handle; + }; /* HW DATA */ + + struct mana_destroy_wqobj_resp { + struct gdma_resp_hdr hdr; + }; /* HW DATA */ + + /* Fence RQ */ + struct mana_fence_rq_req { + struct gdma_req_hdr hdr; + mana_handle_t wq_obj_handle; + }; /* HW DATA */ + + struct mana_fence_rq_resp { + struct gdma_resp_hdr hdr; + }; /* HW DATA */ + + /* Configure vPort Rx Steering */ + struct mana_cfg_rx_steer_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u16 num_indir_entries; + u16 indir_tab_offset; + u32 rx_enable; + u32 rss_enable; + u8 update_default_rxobj; + u8 update_hashkey; + u8 update_indir_tab; + u8 reserved; + mana_handle_t default_rxobj; + u8 hashkey[MANA_HASH_KEY_SIZE]; + }; /* HW DATA */ + + struct mana_cfg_rx_steer_resp { + struct gdma_resp_hdr hdr; + }; /* HW DATA */ + + /* Register HW vPort */ + struct mana_register_hw_vport_req { + struct gdma_req_hdr hdr; + u16 attached_gfid; + u8 is_pf_default_vport; + u8 reserved1; + u8 allow_all_ether_types; + u8 reserved2; + u8 reserved3; + u8 reserved4; + }; /* HW DATA */ + + struct mana_register_hw_vport_resp { + struct gdma_resp_hdr hdr; + mana_handle_t hw_vport_handle; + }; /* HW DATA */ + + /* Deregister HW vPort */ + struct mana_deregister_hw_vport_req { + struct gdma_req_hdr hdr; + mana_handle_t hw_vport_handle; + }; /* HW DATA */ + + struct mana_deregister_hw_vport_resp { + struct gdma_resp_hdr hdr; + }; /* HW DATA */ + + /* Register filter */ + struct mana_register_filter_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u8 mac_addr[6]; + u8 reserved1; + u8 reserved2; + u8 reserved3; + u8 reserved4; + u16 reserved5; + u32 reserved6; + u32 reserved7; + u32 reserved8; + }; /* HW DATA */ + + struct mana_register_filter_resp { + struct gdma_resp_hdr hdr; + mana_handle_t filter_handle; + }; /* HW DATA */ + + /* Deregister filter */ + struct mana_deregister_filter_req { + struct gdma_req_hdr hdr; + mana_handle_t filter_handle; + }; /* HW DATA */ + + struct mana_deregister_filter_resp { + struct gdma_resp_hdr hdr; + }; /* HW DATA */ + + #define MANA_MAX_NUM_QUEUES 64 + + #define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1) + + struct mana_tx_package { + struct gdma_wqe_request wqe_req; + struct gdma_sge sgl_array[5]; + struct gdma_sge *sgl_ptr; + + struct mana_tx_oob tx_oob; + + struct gdma_posted_wqe_info wqe_info; + }; + + int mana_create_wq_obj(struct mana_port_context *apc, + mana_handle_t vport, + u32 wq_type, struct mana_obj_spec *wq_spec, + struct mana_obj_spec *cq_spec, + mana_handle_t *wq_obj); + + void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, + mana_handle_t wq_obj); + + int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, + u32 doorbell_pg_id); + void mana_uncfg_vport(struct mana_port_context *apc); + #endif /* _MANA_H */