dm_stats_account_io(&md->stats, bio_data_dir(bio),
                                    bio->bi_iter.bi_sector, bio_sectors(bio),
                                    true, duration, stats_aux);
-
-       /* nudge anyone waiting on suspend queue */
-       if (unlikely(wq_has_sleeper(&md->wait)))
-               wake_up(&md->wait);
 }
 
 static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
        io->magic = DM_IO_MAGIC;
        io->status = 0;
        atomic_set(&io->io_count, 1);
+       this_cpu_inc(*md->pending_io);
        io->orig_bio = bio;
        io->md = md;
        spin_lock_init(&io->endio_lock);
                stats_aux = io->stats_aux;
                free_io(md, io);
                end_io_acct(md, bio, start_time, &stats_aux);
+               smp_wmb();
+               this_cpu_dec(*md->pending_io);
+
+               /* nudge anyone waiting on suspend queue */
+               if (unlikely(wq_has_sleeper(&md->wait)))
+                       wake_up(&md->wait);
 
                if (io_error == BLK_STS_DM_REQUEUE)
                        return;
                blk_cleanup_disk(md->disk);
        }
 
+       if (md->pending_io) {
+               free_percpu(md->pending_io);
+               md->pending_io = NULL;
+       }
+
        cleanup_srcu_struct(&md->io_barrier);
 
        mutex_destroy(&md->suspend_lock);
        if (!md->wq)
                goto bad;
 
+       md->pending_io = alloc_percpu(unsigned long);
+       if (!md->pending_io)
+               goto bad;
+
        dm_stats_init(&md->stats);
 
        /* Populate the mapping, nobody knows we exist yet */
 }
 EXPORT_SYMBOL_GPL(dm_put);
 
-static bool md_in_flight_bios(struct mapped_device *md)
+static bool dm_in_flight_bios(struct mapped_device *md)
 {
        int cpu;
-       struct block_device *part = dm_disk(md)->part0;
-       long sum = 0;
+       unsigned long sum = 0;
 
-       for_each_possible_cpu(cpu) {
-               sum += part_stat_local_read_cpu(part, in_flight[0], cpu);
-               sum += part_stat_local_read_cpu(part, in_flight[1], cpu);
-       }
+       for_each_possible_cpu(cpu)
+               sum += *per_cpu_ptr(md->pending_io, cpu);
 
        return sum != 0;
 }
        while (true) {
                prepare_to_wait(&md->wait, &wait, task_state);
 
-               if (!md_in_flight_bios(md))
+               if (!dm_in_flight_bios(md))
                        break;
 
                if (signal_pending_state(task_state, current)) {
        }
        finish_wait(&md->wait, &wait);
 
+       smp_rmb();
+
        return r;
 }