#define SPI_RSER                       0x30
 #define SPI_RSER_TCFQE                 BIT(31)
 #define SPI_RSER_EOQFE                 BIT(28)
+#define SPI_RSER_CMDTCFE               BIT(23)
 
 #define SPI_PUSHR                      0x34
 #define SPI_PUSHR_CMD_CONT             BIT(15)
 
 enum dspi_trans_mode {
        DSPI_EOQ_MODE = 0,
-       DSPI_TCFQ_MODE,
+       DSPI_XSPI_MODE,
        DSPI_DMA_MODE,
 };
 
 struct fsl_dspi_devtype_data {
        enum dspi_trans_mode    trans_mode;
        u8                      max_clock_factor;
-       bool                    xspi_mode;
        int                     fifo_size;
        int                     dma_bufsize;
 };
        },
        [LS1021A] = {
                /* Has A-011218 DMA erratum */
-               .trans_mode             = DSPI_TCFQ_MODE,
+               .trans_mode             = DSPI_XSPI_MODE,
                .max_clock_factor       = 8,
-               .xspi_mode              = true,
                .fifo_size              = 4,
        },
        [LS1012A] = {
                /* Has A-011218 DMA erratum */
-               .trans_mode             = DSPI_TCFQ_MODE,
+               .trans_mode             = DSPI_XSPI_MODE,
                .max_clock_factor       = 8,
-               .xspi_mode              = true,
                .fifo_size              = 16,
        },
        [LS1043A] = {
                /* Has A-011218 DMA erratum */
-               .trans_mode             = DSPI_TCFQ_MODE,
+               .trans_mode             = DSPI_XSPI_MODE,
                .max_clock_factor       = 8,
-               .xspi_mode              = true,
                .fifo_size              = 16,
        },
        [LS1046A] = {
                /* Has A-011218 DMA erratum */
-               .trans_mode             = DSPI_TCFQ_MODE,
+               .trans_mode             = DSPI_XSPI_MODE,
                .max_clock_factor       = 8,
-               .xspi_mode              = true,
                .fifo_size              = 16,
        },
        [LS2080A] = {
                .trans_mode             = DSPI_DMA_MODE,
                .dma_bufsize            = 8,
                .max_clock_factor       = 8,
-               .xspi_mode              = true,
                .fifo_size              = 4,
        },
        [LS2085A] = {
                .trans_mode             = DSPI_DMA_MODE,
                .dma_bufsize            = 8,
                .max_clock_factor       = 8,
-               .xspi_mode              = true,
                .fifo_size              = 4,
        },
        [MCF5441X] = {
        size_t                                  len;
        const void                              *tx;
        void                                    *rx;
-       void                                    *rx_end;
        u16                                     tx_cmd;
        u8                                      bits_per_word;
        u8                                      bytes_per_word;
        u32                                     waitflags;
 
        struct fsl_dspi_dma                     *dma;
+
+       int                                     words_in_flight;
 };
 
 /*
 {
        u16 cmd = dspi->tx_cmd;
 
-       if (dspi->len > 0)
+       /*
+        * The only time when the PCS doesn't need continuation after this word
+        * is when it's last. We need to look ahead, because we actually call
+        * dspi_pop_tx (the function that decrements dspi->len) _after_
+        * dspi_pushr_cmd_write with XSPI mode. As for how much in advance? One
+        * word is enough. If there's more to transmit than that,
+        * dspi_xspi_write will know to split the FIFO writes in 2, and
+        * generate a new PUSHR command with the final word that will have PCS
+        * deasserted (not continued) here.
+        */
+       if (dspi->len > dspi->bytes_per_word)
                cmd |= SPI_PUSHR_CMD_CONT;
        regmap_write(dspi->regmap_pushr, PUSHR_CMD, cmd);
 }
        regmap_write(dspi->regmap_pushr, PUSHR_TX, txdata);
 }
 
-static void dspi_tcfq_write(struct fsl_dspi *dspi)
+static void dspi_xspi_write(struct fsl_dspi *dspi, int cnt)
 {
-       /* Clear transfer count */
-       dspi->tx_cmd |= SPI_PUSHR_CMD_CTCNT;
+       regmap_write(dspi->regmap, SPI_CTARE(0),
+                    SPI_FRAME_EBITS(dspi->bits_per_word) |
+                    SPI_CTARE_DTCP(cnt));
 
-       if (dspi->devtype_data->xspi_mode && dspi->bits_per_word > 16) {
-               /* Write the CMD FIFO entry first, and then the two
-                * corresponding TX FIFO entries.
-                */
+       /*
+        * Write the CMD FIFO entry first, and then the two
+        * corresponding TX FIFO entries (or one...).
+        */
+       dspi_pushr_cmd_write(dspi);
+
+       /* Fill TX FIFO with as many transfers as possible */
+       while (cnt--) {
                u32 data = dspi_pop_tx(dspi);
 
-               dspi_pushr_cmd_write(dspi);
                dspi_pushr_txdata_write(dspi, data & 0xFFFF);
-               dspi_pushr_txdata_write(dspi, data >> 16);
-       } else {
-               /* Write one entry to both TX FIFO and CMD FIFO
-                * simultaneously.
-                */
-               dspi_pushr_write(dspi);
+               if (dspi->bits_per_word > 16)
+                       dspi_pushr_txdata_write(dspi, data >> 16);
        }
 }
 
-static u32 dspi_popr_read(struct fsl_dspi *dspi)
+static void dspi_xspi_fifo_write(struct fsl_dspi *dspi)
 {
-       u32 rxdata = 0;
+       int num_fifo_entries = dspi->devtype_data->fifo_size;
+       int bytes_in_flight;
 
-       regmap_read(dspi->regmap, SPI_POPR, &rxdata);
-       return rxdata;
-}
+       /* In XSPI mode each 32-bit word occupies 2 TX FIFO entries */
+       if (dspi->bits_per_word > 16)
+               num_fifo_entries /= 2;
 
-static void dspi_tcfq_read(struct fsl_dspi *dspi)
-{
-       dspi_push_rx(dspi, dspi_popr_read(dspi));
+       dspi->words_in_flight = dspi->len / dspi->bytes_per_word;
+
+       if (dspi->words_in_flight > num_fifo_entries)
+               dspi->words_in_flight = num_fifo_entries;
+
+       bytes_in_flight = dspi->words_in_flight * dspi->bytes_per_word;
+
+       /*
+        * If the PCS needs to de-assert (i.e. we're at the end of the buffer
+        * and cs_change does not want the PCS to stay on), then we need a new
+        * PUSHR command, since this one (for the body of the buffer)
+        * necessarily has the CONT bit set.
+        * So send one word less during this go, to force a split and a command
+        * with a single word next time, when CONT will be unset.
+        */
+       if (bytes_in_flight == dspi->len && dspi->words_in_flight > 1 &&
+           !(dspi->tx_cmd & SPI_PUSHR_CMD_CONT))
+               dspi->words_in_flight--;
+
+       dspi_xspi_write(dspi, dspi->words_in_flight);
 }
 
-static void dspi_eoq_write(struct fsl_dspi *dspi)
+static void dspi_eoq_fifo_write(struct fsl_dspi *dspi)
 {
-       int fifo_size = dspi->devtype_data->fifo_size;
+       int num_fifo_entries = dspi->devtype_data->fifo_size;
        u16 xfer_cmd = dspi->tx_cmd;
 
+       dspi->words_in_flight = num_fifo_entries;
+
        /* Fill TX FIFO with as many transfers as possible */
-       while (dspi->len && fifo_size--) {
+       while (dspi->len && num_fifo_entries--) {
                dspi->tx_cmd = xfer_cmd;
                /* Request EOQF for last transfer in FIFO */
-               if (dspi->len == dspi->bytes_per_word || fifo_size == 0)
+               if (dspi->len == dspi->bytes_per_word || num_fifo_entries == 0)
                        dspi->tx_cmd |= SPI_PUSHR_CMD_EOQ;
-               /* Clear transfer count for first transfer in FIFO */
-               if (fifo_size == (dspi->devtype_data->fifo_size - 1))
-                       dspi->tx_cmd |= SPI_PUSHR_CMD_CTCNT;
                /* Write combined TX FIFO and CMD FIFO entry */
                dspi_pushr_write(dspi);
        }
 }
 
-static void dspi_eoq_read(struct fsl_dspi *dspi)
+static u32 dspi_popr_read(struct fsl_dspi *dspi)
 {
-       int fifo_size = dspi->devtype_data->fifo_size;
+       u32 rxdata = 0;
 
+       regmap_read(dspi->regmap, SPI_POPR, &rxdata);
+       return rxdata;
+}
+
+static void dspi_fifo_read(struct fsl_dspi *dspi)
+{
        /* Read one FIFO entry and push to rx buffer */
-       while ((dspi->rx < dspi->rx_end) && fifo_size--)
+       while (dspi->words_in_flight--)
                dspi_push_rx(dspi, dspi_popr_read(dspi));
 }
 
+static void dspi_fifo_write(struct fsl_dspi *dspi)
+{
+       if (dspi->devtype_data->trans_mode == DSPI_EOQ_MODE)
+               dspi_eoq_fifo_write(dspi);
+       else
+               dspi_xspi_fifo_write(dspi);
+}
+
 static int dspi_rxtx(struct fsl_dspi *dspi)
 {
+       struct spi_transfer *xfer = dspi->cur_transfer;
        struct spi_message *msg = dspi->cur_msg;
-       enum dspi_trans_mode trans_mode;
-       u16 spi_tcnt;
-       u32 spi_tcr;
+       int bytes_sent;
+
+       /* Update total number of bytes that were transferred */
+       bytes_sent = dspi->words_in_flight * dspi->bytes_per_word;
+       msg->actual_length += bytes_sent;
+       dspi->progress += bytes_sent / DIV_ROUND_UP(xfer->bits_per_word, 8);
 
        spi_take_timestamp_post(dspi->ctlr, dspi->cur_transfer,
                                dspi->progress, !dspi->irq);
 
-       /* Get transfer counter (in number of SPI transfers). It was
-        * reset to 0 when transfer(s) were started.
-        */
-       regmap_read(dspi->regmap, SPI_TCR, &spi_tcr);
-       spi_tcnt = SPI_TCR_GET_TCNT(spi_tcr);
-       /* Update total number of bytes that were transferred */
-       msg->actual_length += spi_tcnt * dspi->bytes_per_word;
-       dspi->progress += spi_tcnt;
-
-       trans_mode = dspi->devtype_data->trans_mode;
-       if (trans_mode == DSPI_EOQ_MODE)
-               dspi_eoq_read(dspi);
-       else if (trans_mode == DSPI_TCFQ_MODE)
-               dspi_tcfq_read(dspi);
+       dspi_fifo_read(dspi);
 
        if (!dspi->len)
                /* Success! */
        spi_take_timestamp_pre(dspi->ctlr, dspi->cur_transfer,
                               dspi->progress, !dspi->irq);
 
-       if (trans_mode == DSPI_EOQ_MODE)
-               dspi_eoq_write(dspi);
-       else if (trans_mode == DSPI_TCFQ_MODE)
-               dspi_tcfq_write(dspi);
+       dspi_fifo_write(dspi);
 
        return -EINPROGRESS;
 }
                regmap_read(dspi->regmap, SPI_SR, &spi_sr);
                regmap_write(dspi->regmap, SPI_SR, spi_sr);
 
-               if (spi_sr & (SPI_SR_EOQF | SPI_SR_TCFQF))
+               if (spi_sr & (SPI_SR_EOQF | SPI_SR_CMDTCF))
                        break;
        } while (--tries);
 
        regmap_read(dspi->regmap, SPI_SR, &spi_sr);
        regmap_write(dspi->regmap, SPI_SR, spi_sr);
 
-       if (!(spi_sr & SPI_SR_EOQF))
+       if (!(spi_sr & (SPI_SR_EOQF | SPI_SR_CMDTCF)))
                return IRQ_NONE;
 
        if (dspi_rxtx(dspi) == 0) {
 
                dspi->tx = transfer->tx_buf;
                dspi->rx = transfer->rx_buf;
-               dspi->rx_end = dspi->rx + transfer->len;
                dspi->len = transfer->len;
                dspi->progress = 0;
                /* Validated transfer specific frame size (defaults applied) */
                regmap_write(dspi->regmap, SPI_CTAR(0),
                             dspi->cur_chip->ctar_val |
                             SPI_FRAME_BITS(transfer->bits_per_word));
-               if (dspi->devtype_data->xspi_mode)
-                       regmap_write(dspi->regmap, SPI_CTARE(0),
-                                    SPI_FRAME_EBITS(transfer->bits_per_word) |
-                                    SPI_CTARE_DTCP(1));
 
                spi_take_timestamp_pre(dspi->ctlr, dspi->cur_transfer,
                                       dspi->progress, !dspi->irq);
                switch (trans_mode) {
                case DSPI_EOQ_MODE:
                        regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_EOQFE);
-                       dspi_eoq_write(dspi);
+                       dspi_fifo_write(dspi);
                        break;
-               case DSPI_TCFQ_MODE:
-                       regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_TCFQE);
-                       dspi_tcfq_write(dspi);
+               case DSPI_XSPI_MODE:
+                       regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_CMDTCFE);
+                       dspi_fifo_write(dspi);
                        break;
                case DSPI_DMA_MODE:
                        regmap_write(dspi->regmap, SPI_RSER,
 {
        unsigned int mcr = SPI_MCR_PCSIS;
 
-       if (dspi->devtype_data->xspi_mode)
+       if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE)
                mcr |= SPI_MCR_XSPI;
        if (!spi_controller_is_slave(dspi->ctlr))
                mcr |= SPI_MCR_MASTER;
 
        regmap_write(dspi->regmap, SPI_MCR, mcr);
        regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR);
-       if (dspi->devtype_data->xspi_mode)
-               regmap_write(dspi->regmap, SPI_CTARE(0),
-                            SPI_CTARE_FMSZE(0) | SPI_CTARE_DTCP(1));
 }
 
 static int dspi_slave_abort(struct spi_master *master)
                }
        }
 
-       if (dspi->devtype_data->xspi_mode)
+       if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE)
                ctlr->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
        else
                ctlr->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
                goto out_ctlr_put;
        }
 
-       if (dspi->devtype_data->xspi_mode)
+       if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE)
                regmap_config = &dspi_xspi_regmap_config[0];
        else
                regmap_config = &dspi_regmap_config;
                goto out_ctlr_put;
        }
 
-       if (dspi->devtype_data->xspi_mode) {
+       if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE) {
                dspi->regmap_pushr = devm_regmap_init_mmio(
                        &pdev->dev, base + SPI_PUSHR,
                        &dspi_xspi_regmap_config[1]);
 
        dspi_init(dspi);
 
-       if (dspi->devtype_data->trans_mode == DSPI_TCFQ_MODE)
-               goto poll_mode;
-
        dspi->irq = platform_get_irq(pdev, 0);
        if (dspi->irq <= 0) {
                dev_info(&pdev->dev,