ath11k: allocate dst ring descriptors from cacheable memory
authorP Praneesh <ppranees@codeaurora.org>
Fri, 12 Nov 2021 09:01:26 +0000 (11:01 +0200)
committerKalle Valo <kvalo@codeaurora.org>
Mon, 15 Nov 2021 09:21:49 +0000 (11:21 +0200)
tcl_data and reo_dst rings are currently being allocated using
dma_allocate_coherent() which is non cacheable.

Allocating ring memory from cacheable memory area allows cached descriptor
access and prefetch next descriptors to optimize CPU usage during
descriptor processing on NAPI. Based on the hardware param we can enable
or disable this feature for the corresponding platform.

Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.4.0.1.r2-00012-QCAHKSWPL_SILICONZ-1
Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01695-QCAHKSWPL_SILICONZ-1

Co-developed-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Signed-off-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Co-developed-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: P Praneesh <ppranees@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1630560820-21905-3-git-send-email-ppranees@codeaurora.org
drivers/net/wireless/ath/ath11k/core.c
drivers/net/wireless/ath/ath11k/dp.c
drivers/net/wireless/ath/ath11k/dp.h
drivers/net/wireless/ath/ath11k/hal.c
drivers/net/wireless/ath/ath11k/hal.h
drivers/net/wireless/ath/ath11k/hw.h

index bd77aedc7fe63982aaf5b91b25ac509d2ee07c02..2ba3bf8d3fde265daed7c776e4c4f3e152f57ac6 100644 (file)
@@ -84,6 +84,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
                .max_tx_ring = DP_TCL_NUM_RING_MAX,
                .hal_params = &ath11k_hw_hal_params_ipq8074,
                .supports_dynamic_smps_6ghz = false,
+               .alloc_cacheable_memory = true,
        },
        {
                .hw_rev = ATH11K_HW_IPQ6018_HW10,
@@ -135,6 +136,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
                .max_tx_ring = DP_TCL_NUM_RING_MAX,
                .hal_params = &ath11k_hw_hal_params_ipq8074,
                .supports_dynamic_smps_6ghz = false,
+               .alloc_cacheable_memory = true,
        },
        {
                .name = "qca6390 hw2.0",
@@ -185,6 +187,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
                .max_tx_ring = DP_TCL_NUM_RING_MAX_QCA6390,
                .hal_params = &ath11k_hw_hal_params_qca6390,
                .supports_dynamic_smps_6ghz = false,
+               .alloc_cacheable_memory = false,
        },
        {
                .name = "qcn9074 hw1.0",
@@ -235,6 +238,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
                .max_tx_ring = DP_TCL_NUM_RING_MAX,
                .hal_params = &ath11k_hw_hal_params_ipq8074,
                .supports_dynamic_smps_6ghz = true,
+               .alloc_cacheable_memory = true,
        },
        {
                .name = "wcn6855 hw2.0",
@@ -285,6 +289,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = {
                .max_tx_ring = DP_TCL_NUM_RING_MAX_QCA6390,
                .hal_params = &ath11k_hw_hal_params_qca6390,
                .supports_dynamic_smps_6ghz = false,
+               .alloc_cacheable_memory = false,
        },
 };
 
index 8baaeeb8cf821feef7577fa3b3abb98c246d4ec3..8058b56028ded0df749fb0fdef91e803b13b44b3 100644 (file)
@@ -101,8 +101,11 @@ void ath11k_dp_srng_cleanup(struct ath11k_base *ab, struct dp_srng *ring)
        if (!ring->vaddr_unaligned)
                return;
 
-       dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
-                         ring->paddr_unaligned);
+       if (ring->cached)
+               kfree(ring->vaddr_unaligned);
+       else
+               dma_free_coherent(ab->dev, ring->size, ring->vaddr_unaligned,
+                                 ring->paddr_unaligned);
 
        ring->vaddr_unaligned = NULL;
 }
@@ -222,6 +225,7 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab, struct dp_srng *ring,
        int entry_sz = ath11k_hal_srng_get_entrysize(ab, type);
        int max_entries = ath11k_hal_srng_get_max_entries(ab, type);
        int ret;
+       bool cached = false;
 
        if (max_entries < 0 || entry_sz < 0)
                return -EINVAL;
@@ -230,9 +234,28 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab, struct dp_srng *ring,
                num_entries = max_entries;
 
        ring->size = (num_entries * entry_sz) + HAL_RING_BASE_ALIGN - 1;
-       ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size,
-                                                  &ring->paddr_unaligned,
-                                                  GFP_KERNEL);
+
+       if (ab->hw_params.alloc_cacheable_memory) {
+               /* Allocate the reo dst and tx completion rings from cacheable memory */
+               switch (type) {
+               case HAL_REO_DST:
+                       cached = true;
+                       break;
+               default:
+                       cached = false;
+               }
+
+               if (cached) {
+                       ring->vaddr_unaligned = kzalloc(ring->size, GFP_KERNEL);
+                       ring->paddr_unaligned = virt_to_phys(ring->vaddr_unaligned);
+               }
+       }
+
+       if (!cached)
+               ring->vaddr_unaligned = dma_alloc_coherent(ab->dev, ring->size,
+                                                          &ring->paddr_unaligned,
+                                                          GFP_KERNEL);
+
        if (!ring->vaddr_unaligned)
                return -ENOMEM;
 
@@ -292,6 +315,11 @@ int ath11k_dp_srng_setup(struct ath11k_base *ab, struct dp_srng *ring,
                return -EINVAL;
        }
 
+       if (cached) {
+               params.flags |= HAL_SRNG_FLAGS_CACHED;
+               ring->cached = 1;
+       }
+
        ret = ath11k_hal_srng_setup(ab, type, ring_num, mac_id, &params);
        if (ret < 0) {
                ath11k_warn(ab, "failed to setup srng: %d ring_id %d\n",
index f524d19aca349096adb2f875616be5acf88fad45..a4c36a9be338a7bbfaaa97c567b19a76a419f40c 100644 (file)
@@ -64,6 +64,7 @@ struct dp_srng {
        dma_addr_t paddr;
        int size;
        u32 ring_id;
+       u8 cached;
 };
 
 struct dp_rxdma_ring {
index eaa0edca557611b6af6c4d84e55a4e5cda2f7fef..f04edafbd0f151632b068e97904ed58d5b518d84 100644 (file)
@@ -627,6 +627,21 @@ u32 *ath11k_hal_srng_dst_peek(struct ath11k_base *ab, struct hal_srng *srng)
        return NULL;
 }
 
+static void ath11k_hal_srng_prefetch_desc(struct ath11k_base *ab,
+                                         struct hal_srng *srng)
+{
+       u32 *desc;
+
+       /* prefetch only if desc is available */
+       desc = ath11k_hal_srng_dst_peek(ab, srng);
+       if (likely(desc)) {
+               dma_sync_single_for_cpu(ab->dev, virt_to_phys(desc),
+                                       (srng->entry_size * sizeof(u32)),
+                                       DMA_FROM_DEVICE);
+               prefetch(desc);
+       }
+}
+
 u32 *ath11k_hal_srng_dst_get_next_entry(struct ath11k_base *ab,
                                        struct hal_srng *srng)
 {
@@ -642,6 +657,10 @@ u32 *ath11k_hal_srng_dst_get_next_entry(struct ath11k_base *ab,
        srng->u.dst_ring.tp = (srng->u.dst_ring.tp + srng->entry_size) %
                              srng->ring_size;
 
+       /* Try to prefetch the next descriptor in the ring */
+       if (srng->flags & HAL_SRNG_FLAGS_CACHED)
+               ath11k_hal_srng_prefetch_desc(ab, srng);
+
        return desc;
 }
 
@@ -775,11 +794,16 @@ void ath11k_hal_srng_access_begin(struct ath11k_base *ab, struct hal_srng *srng)
 {
        lockdep_assert_held(&srng->lock);
 
-       if (srng->ring_dir == HAL_SRNG_DIR_SRC)
+       if (srng->ring_dir == HAL_SRNG_DIR_SRC) {
                srng->u.src_ring.cached_tp =
                        *(volatile u32 *)srng->u.src_ring.tp_addr;
-       else
+       } else {
                srng->u.dst_ring.cached_hp = *srng->u.dst_ring.hp_addr;
+
+               /* Try to prefetch the next descriptor in the ring */
+               if (srng->flags & HAL_SRNG_FLAGS_CACHED)
+                       ath11k_hal_srng_prefetch_desc(ab, srng);
+       }
 }
 
 /* Update cached ring head/tail pointers to HW. ath11k_hal_srng_access_begin()
index 35ed3a14e200a01e8515eb8a25ca34f0954d8480..0f4f9ce74354bdbad6e55c16fdc4453d90cdb9be 100644 (file)
@@ -513,6 +513,7 @@ enum hal_srng_dir {
 #define HAL_SRNG_FLAGS_DATA_TLV_SWAP           0x00000020
 #define HAL_SRNG_FLAGS_LOW_THRESH_INTR_EN      0x00010000
 #define HAL_SRNG_FLAGS_MSI_INTR                        0x00020000
+#define HAL_SRNG_FLAGS_CACHED                   0x20000000
 #define HAL_SRNG_FLAGS_LMAC_RING               0x80000000
 
 #define HAL_SRNG_TLV_HDR_TAG           GENMASK(9, 1)
index 2c7bd7a36ba5a89d10e569cefb81bda83c3a5674..3e64c9b94db00f467cce21b48f4037b15de0df6f 100644 (file)
@@ -178,6 +178,7 @@ struct ath11k_hw_params {
        u8 max_tx_ring;
        const struct ath11k_hw_hal_params *hal_params;
        bool supports_dynamic_smps_6ghz;
+       bool alloc_cacheable_memory;
 };
 
 struct ath11k_hw_ops {