spi: spi-qcom-qspi: Add DMA mode support
authorVijaya Krishna Nivarthi <quic_vnivarth@quicinc.com>
Mon, 24 Apr 2023 09:32:41 +0000 (15:02 +0530)
committerMark Brown <broonie@kernel.org>
Sun, 7 May 2023 23:50:40 +0000 (08:50 +0900)
Current driver supports only PIO mode.

HW supports DMA, so add DMA mode support to the driver
for better performance for larger xfers.

Signed-off-by: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com
Reviewed-by: Douglas Anderson <dianders@chromium.org
Link: https://lore.kernel.org/r/1682328761-17517-6-git-send-email-quic_vnivarth@quicinc.com
Signed-off-by: Mark Brown <broonie@kernel.org
drivers/spi/spi-qcom-qspi.c

index fab155389999372ec920ca768c61c894d781b062..a3991e617c907c4bb3b68dbe9f810dc870660047 100644 (file)
@@ -2,6 +2,8 @@
 // Copyright (c) 2017-2018, The Linux foundation. All rights reserved.
 
 #include <linux/clk.h>
+#include <linux/dmapool.h>
+#include <linux/dma-mapping.h>
 #include <linux/interconnect.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -62,6 +64,7 @@
 #define WR_FIFO_FULL           BIT(10)
 #define WR_FIFO_OVERRUN                BIT(11)
 #define TRANSACTION_DONE       BIT(16)
+#define DMA_CHAIN_DONE         BIT(31)
 #define QSPI_ERR_IRQS          (RESP_FIFO_UNDERRUN | HRESP_FROM_NOC_ERR | \
                                 WR_FIFO_OVERRUN)
 #define QSPI_ALL_IRQS          (QSPI_ERR_IRQS | RESP_FIFO_RDY | \
 #define RD_FIFO_RESET          0x0030
 #define RESET_FIFO             BIT(0)
 
+#define NEXT_DMA_DESC_ADDR     0x0040
+#define CURRENT_DMA_DESC_ADDR  0x0044
+#define CURRENT_MEM_ADDR       0x0048
+
 #define CUR_MEM_ADDR           0x0048
 #define HW_VERSION             0x004c
 #define RD_FIFO                        0x0050
 #define SAMPLING_CLK_CFG       0x0090
 #define SAMPLING_CLK_STATUS    0x0094
 
+#define QSPI_ALIGN_REQ 32
 
 enum qspi_dir {
        QSPI_READ,
        QSPI_WRITE,
 };
 
+struct qspi_cmd_desc {
+       u32 data_address;
+       u32 next_descriptor;
+       u32 direction:1;
+       u32 multi_io_mode:3;
+       u32 reserved1:4;
+       u32 fragment:1;
+       u32 reserved2:7;
+       u32 length:16;
+};
+
 struct qspi_xfer {
        union {
                const void *tx_buf;
@@ -137,11 +156,23 @@ enum qspi_clocks {
        QSPI_NUM_CLKS
 };
 
+/*
+ * Number of entries in sgt returned from spi framework that-
+ * will be supported. Can be modified as required.
+ * In practice, given max_dma_len is 64KB, the number of
+ * entries is not expected to exceed 1.
+ */
+#define QSPI_MAX_SG 5
+
 struct qcom_qspi {
        void __iomem *base;
        struct device *dev;
        struct clk_bulk_data *clks;
        struct qspi_xfer xfer;
+       struct dma_pool *dma_cmd_pool;
+       dma_addr_t dma_cmd_desc[QSPI_MAX_SG];
+       void *virt_cmd_desc[QSPI_MAX_SG];
+       unsigned int n_cmd_desc;
        struct icc_path *icc_path_cpu_to_qspi;
        unsigned long last_speed;
        /* Lock to protect data accessed by IRQs */
@@ -153,21 +184,22 @@ static u32 qspi_buswidth_to_iomode(struct qcom_qspi *ctrl,
 {
        switch (buswidth) {
        case 1:
-               return SDR_1BIT << MULTI_IO_MODE_SHFT;
+               return SDR_1BIT;
        case 2:
-               return SDR_2BIT << MULTI_IO_MODE_SHFT;
+               return SDR_2BIT;
        case 4:
-               return SDR_4BIT << MULTI_IO_MODE_SHFT;
+               return SDR_4BIT;
        default:
                dev_warn_once(ctrl->dev,
                                "Unexpected bus width: %u\n", buswidth);
-               return SDR_1BIT << MULTI_IO_MODE_SHFT;
+               return SDR_1BIT;
        }
 }
 
 static void qcom_qspi_pio_xfer_cfg(struct qcom_qspi *ctrl)
 {
        u32 pio_xfer_cfg;
+       u32 iomode;
        const struct qspi_xfer *xfer;
 
        xfer = &ctrl->xfer;
@@ -179,7 +211,8 @@ static void qcom_qspi_pio_xfer_cfg(struct qcom_qspi *ctrl)
        else
                pio_xfer_cfg |= TRANSFER_FRAGMENT;
        pio_xfer_cfg &= ~MULTI_IO_MODE_MSK;
-       pio_xfer_cfg |= qspi_buswidth_to_iomode(ctrl, xfer->buswidth);
+       iomode = qspi_buswidth_to_iomode(ctrl, xfer->buswidth);
+       pio_xfer_cfg |= iomode << MULTI_IO_MODE_SHFT;
 
        writel(pio_xfer_cfg, ctrl->base + PIO_XFER_CFG);
 }
@@ -217,12 +250,22 @@ static void qcom_qspi_pio_xfer(struct qcom_qspi *ctrl)
 static void qcom_qspi_handle_err(struct spi_master *master,
                                 struct spi_message *msg)
 {
+       u32 int_status;
        struct qcom_qspi *ctrl = spi_master_get_devdata(master);
        unsigned long flags;
+       int i;
 
        spin_lock_irqsave(&ctrl->lock, flags);
        writel(0, ctrl->base + MSTR_INT_EN);
+       int_status = readl(ctrl->base + MSTR_INT_STATUS);
+       writel(int_status, ctrl->base + MSTR_INT_STATUS);
        ctrl->xfer.rem_bytes = 0;
+
+       /* free cmd descriptors if they are around (DMA mode) */
+       for (i = 0; i < ctrl->n_cmd_desc; i++)
+               dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i],
+                                 ctrl->dma_cmd_desc[i]);
+       ctrl->n_cmd_desc = 0;
        spin_unlock_irqrestore(&ctrl->lock, flags);
 }
 
@@ -242,7 +285,7 @@ static int qcom_qspi_set_speed(struct qcom_qspi *ctrl, unsigned long speed_hz)
        }
 
        /*
-        * Set BW quota for CPU as driver supports FIFO mode only.
+        * Set BW quota for CPU.
         * We don't have explicit peak requirement so keep it equal to avg_bw.
         */
        avg_bw_cpu = Bps_to_icc(speed_hz);
@@ -258,6 +301,102 @@ static int qcom_qspi_set_speed(struct qcom_qspi *ctrl, unsigned long speed_hz)
        return 0;
 }
 
+static int qcom_qspi_alloc_desc(struct qcom_qspi *ctrl, dma_addr_t dma_ptr,
+                       uint32_t n_bytes)
+{
+       struct qspi_cmd_desc *virt_cmd_desc, *prev;
+       dma_addr_t dma_cmd_desc;
+
+       /* allocate for dma cmd descriptor */
+       virt_cmd_desc = dma_pool_alloc(ctrl->dma_cmd_pool, GFP_KERNEL | __GFP_ZERO, &dma_cmd_desc);
+       if (!virt_cmd_desc)
+               return -ENOMEM;
+
+       ctrl->virt_cmd_desc[ctrl->n_cmd_desc] = virt_cmd_desc;
+       ctrl->dma_cmd_desc[ctrl->n_cmd_desc] = dma_cmd_desc;
+       ctrl->n_cmd_desc++;
+
+       /* setup cmd descriptor */
+       virt_cmd_desc->data_address = dma_ptr;
+       virt_cmd_desc->direction = ctrl->xfer.dir;
+       virt_cmd_desc->multi_io_mode = qspi_buswidth_to_iomode(ctrl, ctrl->xfer.buswidth);
+       virt_cmd_desc->fragment = !ctrl->xfer.is_last;
+       virt_cmd_desc->length = n_bytes;
+
+       /* update previous descriptor */
+       if (ctrl->n_cmd_desc >= 2) {
+               prev = (ctrl->virt_cmd_desc)[ctrl->n_cmd_desc - 2];
+               prev->next_descriptor = dma_cmd_desc;
+               prev->fragment = 1;
+       }
+
+       return 0;
+}
+
+static int qcom_qspi_setup_dma_desc(struct qcom_qspi *ctrl,
+                               struct spi_transfer *xfer)
+{
+       int ret;
+       struct sg_table *sgt;
+       dma_addr_t dma_ptr_sg;
+       unsigned int dma_len_sg;
+       int i;
+
+       if (ctrl->n_cmd_desc) {
+               dev_err(ctrl->dev, "Remnant dma buffers n_cmd_desc-%d\n", ctrl->n_cmd_desc);
+               return -EIO;
+       }
+
+       sgt = (ctrl->xfer.dir == QSPI_READ) ? &xfer->rx_sg : &xfer->tx_sg;
+       if (!sgt->nents || sgt->nents > QSPI_MAX_SG) {
+               dev_warn_once(ctrl->dev, "Cannot handle %d entries in scatter list\n", sgt->nents);
+               return -EAGAIN;
+       }
+
+       for (i = 0; i < sgt->nents; i++) {
+               dma_ptr_sg = sg_dma_address(sgt->sgl + i);
+               if (!IS_ALIGNED(dma_ptr_sg, QSPI_ALIGN_REQ)) {
+                       dev_warn_once(ctrl->dev, "dma_address not aligned to %d\n", QSPI_ALIGN_REQ);
+                       return -EAGAIN;
+               }
+       }
+
+       for (i = 0; i < sgt->nents; i++) {
+               dma_ptr_sg = sg_dma_address(sgt->sgl + i);
+               dma_len_sg = sg_dma_len(sgt->sgl + i);
+
+               ret = qcom_qspi_alloc_desc(ctrl, dma_ptr_sg, dma_len_sg);
+               if (ret)
+                       goto cleanup;
+       }
+       return 0;
+
+cleanup:
+       for (i = 0; i < ctrl->n_cmd_desc; i++)
+               dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i],
+                                 ctrl->dma_cmd_desc[i]);
+       ctrl->n_cmd_desc = 0;
+       return ret;
+}
+
+static void qcom_qspi_dma_xfer(struct qcom_qspi *ctrl)
+{
+       /* Setup new interrupts */
+       writel(DMA_CHAIN_DONE, ctrl->base + MSTR_INT_EN);
+
+       /* kick off transfer */
+       writel((u32)((ctrl->dma_cmd_desc)[0]), ctrl->base + NEXT_DMA_DESC_ADDR);
+}
+
+/* Switch to DMA if transfer length exceeds this */
+#define QSPI_MAX_BYTES_FIFO 64
+
+static bool qcom_qspi_can_dma(struct spi_controller *ctlr,
+                        struct spi_device *slv, struct spi_transfer *xfer)
+{
+       return xfer->len > QSPI_MAX_BYTES_FIFO;
+}
+
 static int qcom_qspi_transfer_one(struct spi_master *master,
                                  struct spi_device *slv,
                                  struct spi_transfer *xfer)
@@ -266,6 +405,7 @@ static int qcom_qspi_transfer_one(struct spi_master *master,
        int ret;
        unsigned long speed_hz;
        unsigned long flags;
+       u32 mstr_cfg;
 
        speed_hz = slv->max_speed_hz;
        if (xfer->speed_hz)
@@ -276,6 +416,7 @@ static int qcom_qspi_transfer_one(struct spi_master *master,
                return ret;
 
        spin_lock_irqsave(&ctrl->lock, flags);
+       mstr_cfg = readl(ctrl->base + MSTR_CONFIG);
 
        /* We are half duplex, so either rx or tx will be set */
        if (xfer->rx_buf) {
@@ -290,10 +431,36 @@ static int qcom_qspi_transfer_one(struct spi_master *master,
        ctrl->xfer.is_last = list_is_last(&xfer->transfer_list,
                                          &master->cur_msg->transfers);
        ctrl->xfer.rem_bytes = xfer->len;
+
+       if (xfer->rx_sg.nents || xfer->tx_sg.nents) {
+               /* do DMA transfer */
+               if (!(mstr_cfg & DMA_ENABLE)) {
+                       mstr_cfg |= DMA_ENABLE;
+                       writel(mstr_cfg, ctrl->base + MSTR_CONFIG);
+               }
+
+               ret = qcom_qspi_setup_dma_desc(ctrl, xfer);
+               if (ret != -EAGAIN) {
+                       if (!ret)
+                               qcom_qspi_dma_xfer(ctrl);
+                       goto exit;
+               }
+               dev_warn_once(ctrl->dev, "DMA failure, falling back to PIO");
+               ret = 0; /* We'll retry w/ PIO */
+       }
+
+       if (mstr_cfg & DMA_ENABLE) {
+               mstr_cfg &= ~DMA_ENABLE;
+               writel(mstr_cfg, ctrl->base + MSTR_CONFIG);
+       }
        qcom_qspi_pio_xfer(ctrl);
 
+exit:
        spin_unlock_irqrestore(&ctrl->lock, flags);
 
+       if (ret)
+               return ret;
+
        /* We'll call spi_finalize_current_transfer() when done */
        return 1;
 }
@@ -328,6 +495,16 @@ static int qcom_qspi_prepare_message(struct spi_master *master,
        return 0;
 }
 
+static int qcom_qspi_alloc_dma(struct qcom_qspi *ctrl)
+{
+       ctrl->dma_cmd_pool = dmam_pool_create("qspi cmd desc pool",
+               ctrl->dev, sizeof(struct qspi_cmd_desc), 0, 0);
+       if (!ctrl->dma_cmd_pool)
+               return -ENOMEM;
+
+       return 0;
+}
+
 static irqreturn_t pio_read(struct qcom_qspi *ctrl)
 {
        u32 rd_fifo_status;
@@ -426,6 +603,7 @@ static irqreturn_t qcom_qspi_irq(int irq, void *dev_id)
        int_status = readl(ctrl->base + MSTR_INT_STATUS);
        writel(int_status, ctrl->base + MSTR_INT_STATUS);
 
+       /* PIO mode handling */
        if (ctrl->xfer.dir == QSPI_WRITE) {
                if (int_status & WR_FIFO_EMPTY)
                        ret = pio_write(ctrl);
@@ -449,6 +627,22 @@ static irqreturn_t qcom_qspi_irq(int irq, void *dev_id)
                spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev));
        }
 
+       /* DMA mode handling */
+       if (int_status & DMA_CHAIN_DONE) {
+               int i;
+
+               writel(0, ctrl->base + MSTR_INT_EN);
+               ctrl->xfer.rem_bytes = 0;
+
+               for (i = 0; i < ctrl->n_cmd_desc; i++)
+                       dma_pool_free(ctrl->dma_cmd_pool, ctrl->virt_cmd_desc[i],
+                                         ctrl->dma_cmd_desc[i]);
+               ctrl->n_cmd_desc = 0;
+
+               ret = IRQ_HANDLED;
+               spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev));
+       }
+
        spin_unlock(&ctrl->lock);
        return ret;
 }
@@ -517,7 +711,13 @@ static int qcom_qspi_probe(struct platform_device *pdev)
                return ret;
        }
 
+       ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+       if (ret)
+               return dev_err_probe(dev, ret, "could not set DMA mask\n");
+
        master->max_speed_hz = 300000000;
+       master->max_dma_len = 65536; /* as per HPG */
+       master->dma_alignment = QSPI_ALIGN_REQ;
        master->num_chipselect = QSPI_NUM_CS;
        master->bus_num = -1;
        master->dev.of_node = pdev->dev.of_node;
@@ -528,6 +728,8 @@ static int qcom_qspi_probe(struct platform_device *pdev)
        master->prepare_message = qcom_qspi_prepare_message;
        master->transfer_one = qcom_qspi_transfer_one;
        master->handle_err = qcom_qspi_handle_err;
+       if (of_property_read_bool(pdev->dev.of_node, "iommus"))
+               master->can_dma = qcom_qspi_can_dma;
        master->auto_runtime_pm = true;
 
        ret = devm_pm_opp_set_clkname(&pdev->dev, "core");
@@ -540,6 +742,10 @@ static int qcom_qspi_probe(struct platform_device *pdev)
                return ret;
        }
 
+       ret = qcom_qspi_alloc_dma(ctrl);
+       if (ret)
+               return ret;
+
        pm_runtime_use_autosuspend(dev);
        pm_runtime_set_autosuspend_delay(dev, 250);
        pm_runtime_enable(dev);