}
                else
                        data->bytes_xfered =
-                               (data->blocks * (1 << data->blksz_bits)) -
+                               (data->blocks * data->blksz) -
                                host->pio.len;
        }
 
 au1xmmc_prepare_data(struct au1xmmc_host *host, struct mmc_data *data)
 {
 
-       int datalen = data->blocks * (1 << data->blksz_bits);
+       int datalen = data->blocks * data->blksz;
 
        if (dma != 0)
                host->flags |= HOST_F_DMA;
        if (host->dma.len == 0)
                return MMC_ERR_TIMEOUT;
 
-       au_writel((1 << data->blksz_bits) - 1, HOST_BLKSIZE(host));
+       au_writel(data->blksz - 1, HOST_BLKSIZE(host));
 
        if (host->flags & HOST_F_DMA) {
                int i;
 
        if(!loops)
                return 0;
 
-       dev_info(mmc_dev(host->mmc), "busy wait for %d usec in %s, STATUS = 0x%x (0x%x)\n",
-               loops, where, *pstat, stat_mask);
+       /* The busy-wait is expected there for clock <8MHz due to SDHC hardware flaws */
+       if(!(stat_mask & STATUS_END_CMD_RESP) || (host->mmc->ios.clock>=8000000))
+               dev_info(mmc_dev(host->mmc), "busy wait for %d usec in %s, STATUS = 0x%x (0x%x)\n",
+                       loops, where, *pstat, stat_mask);
        return loops;
 }
 
        WARN_ON(host->cmd != NULL);
        host->cmd = cmd;
 
+       /* Ensure, that clock are stopped else command programming and start fails */
+       imxmci_stop_clock(host);
+
        if (cmd->flags & MMC_RSP_BUSY)
                cmdat |= CMD_DAT_CONT_BUSY;
 
        int trans_done = 0;
        unsigned int stat = *pstat;
 
-       if(host->actual_bus_width == MMC_BUS_WIDTH_4)
+       if(host->actual_bus_width != MMC_BUS_WIDTH_4)
                burst_len = 16;
        else
                burst_len = 64;
                        stat = MMC_STATUS;
 
                        /* Flush extra bytes from FIFO */
-                       while(flush_len >= 2){
-                               flush_len -= 2;
+                       while(flush_len && !(stat & STATUS_DATA_TRANS_DONE)){
                                i = MMC_BUFFER_ACCESS;
                                stat = MMC_STATUS;
                                stat &= ~STATUS_CRC_READ_ERR; /* Stupid but required there */
                        data_dir_mask = STATUS_DATA_TRANS_DONE;
                }
 
-               imxmci_busy_wait_for_status(host, &stat,
-                               data_dir_mask,
-                               50, "imxmci_tasklet_fnc data");
-
                if(stat & data_dir_mask) {
                        clear_bit(IMXMCI_PEND_DMA_END_b, &host->pending_events);
                        imxmci_data_done(host, stat);
 
                imxmci_stop_clock(host);
                MMC_CLK_RATE = (prescaler<<3) | clk;
-               imxmci_start_clock(host);
+               /*
+                * Under my understanding, clock should not be started there, because it would
+                * initiate SDHC sequencer and send last or random command into card
+                */
+               /*imxmci_start_clock(host);*/
 
                dev_dbg(mmc_dev(host->mmc), "MMC_CLK_RATE: 0x%08x\n", MMC_CLK_RATE);
        } else {
 
                data.timeout_ns = card->csd.tacc_ns * 10;
                data.timeout_clks = card->csd.tacc_clks * 10;
                data.blksz_bits = 3;
+               data.blksz = 1 << 3;
                data.blocks = 1;
                data.flags = MMC_DATA_READ;
                data.sg = &sg;
 
                brq.data.timeout_ns = card->csd.tacc_ns * 10;
                brq.data.timeout_clks = card->csd.tacc_clks * 10;
                brq.data.blksz_bits = md->block_bits;
+               brq.data.blksz = 1 << md->block_bits;
                brq.data.blocks = req->nr_sectors >> (md->block_bits - 9);
                brq.stop.opcode = MMC_STOP_TRANSMISSION;
                brq.stop.arg = 0;
 
                nob = 0xffff;
 
        writel(nob, host->base + MMC_NOB);
-       writel(1 << data->blksz_bits, host->base + MMC_BLKLEN);
+       writel(data->blksz, host->base + MMC_BLKLEN);
 
        clks = (unsigned long long)data->timeout_ns * CLOCKRATE;
        do_div(clks, 1000000000UL);
         * data blocks as being in error.
         */
        if (data->error == MMC_ERR_NONE)
-               data->bytes_xfered = data->blocks << data->blksz_bits;
+               data->bytes_xfered = data->blocks * data->blksz;
        else
                data->bytes_xfered = 0;
 
 
        unsigned long dmaflags;
 
        DBGF("blksz %04x blks %04x flags %08x\n",
-               1 << data->blksz_bits, data->blocks, data->flags);
+               data->blksz, data->blocks, data->flags);
        DBGF("tsac %d ms nsac %d clk\n",
                data->timeout_ns / 1000000, data->timeout_clks);
 
        /*
         * Calculate size.
         */
-       host->size = data->blocks << data->blksz_bits;
+       host->size = data->blocks * data->blksz;
 
        /*
         * Check timeout values for overflow.
         * Two bytes are needed for each data line.
         */
        if (host->bus_width == MMC_BUS_WIDTH_1) {
-               blksize = (1 << data->blksz_bits) + 2;
+               blksize = data->blksz + 2;
 
                wbsd_write_index(host, WBSD_IDX_PBSMSB, (blksize >> 4) & 0xF0);
                wbsd_write_index(host, WBSD_IDX_PBSLSB, blksize & 0xFF);
        } else if (host->bus_width == MMC_BUS_WIDTH_4) {
-               blksize = (1 << data->blksz_bits) + 2 * 4;
+               blksize = data->blksz + 2 * 4;
 
                wbsd_write_index(host, WBSD_IDX_PBSMSB,
                        ((blksize >> 4) & 0xF0) | WBSD_DATA_WIDTH);
 
        unsigned int            timeout_ns;     /* data timeout (in ns, max 80ms) */
        unsigned int            timeout_clks;   /* data timeout (in clocks) */
        unsigned int            blksz_bits;     /* data block size */
+       unsigned int            blksz;          /* data block size */
        unsigned int            blocks;         /* number of blocks */
        unsigned int            error;          /* data error */
        unsigned int            flags;