unsigned                sq_thread_idle;
                unsigned                cached_sq_dropped;
                atomic_t                cached_cq_overflow;
-               struct io_uring_sqe     *sq_sqes;
+               unsigned long           sq_check_overflow;
 
                struct list_head        defer_list;
                struct list_head        timeout_list;
                struct list_head        cq_overflow_list;
 
                wait_queue_head_t       inflight_wait;
+               struct io_uring_sqe     *sq_sqes;
        } ____cacheline_aligned_in_smp;
 
        struct io_rings *rings;
                unsigned                cq_entries;
                unsigned                cq_mask;
                atomic_t                cq_timeouts;
+               unsigned long           cq_check_overflow;
                struct wait_queue_head  cq_wait;
                struct fasync_struct    *cq_fasync;
                struct eventfd_ctx      *cq_ev_fd;
        }
 
        io_commit_cqring(ctx);
+       if (cqe) {
+               clear_bit(0, &ctx->sq_check_overflow);
+               clear_bit(0, &ctx->cq_check_overflow);
+       }
        spin_unlock_irqrestore(&ctx->completion_lock, flags);
        io_cqring_ev_posted(ctx);
 
                WRITE_ONCE(ctx->rings->cq_overflow,
                                atomic_inc_return(&ctx->cached_cq_overflow));
        } else {
+               if (list_empty(&ctx->cq_overflow_list)) {
+                       set_bit(0, &ctx->sq_check_overflow);
+                       set_bit(0, &ctx->cq_check_overflow);
+               }
                refcount_inc(&req->refs);
                req->result = res;
                list_add_tail(&req->list, &ctx->cq_overflow_list);
 {
        struct io_rings *rings = ctx->rings;
 
-       /*
-        * noflush == true is from the waitqueue handler, just ensure we wake
-        * up the task, and the next invocation will flush the entries. We
-        * cannot safely to it from here.
-        */
-       if (noflush && !list_empty(&ctx->cq_overflow_list))
-               return -1U;
+       if (test_bit(0, &ctx->cq_check_overflow)) {
+               /*
+                * noflush == true is from the waitqueue handler, just ensure
+                * we wake up the task, and the next invocation will flush the
+                * entries. We cannot safely to it from here.
+                */
+               if (noflush && !list_empty(&ctx->cq_overflow_list))
+                       return -1U;
 
-       io_cqring_overflow_flush(ctx, false);
+               io_cqring_overflow_flush(ctx, false);
+       }
 
        /* See comment at the top of this file */
        smp_rmb();
-       return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head);
+       return ctx->cached_cq_tail - READ_ONCE(rings->cq.head);
 }
 
 static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
        bool mm_fault = false;
 
        /* if we have a backlog and couldn't flush it all, return BUSY */
-       if (!list_empty(&ctx->cq_overflow_list) &&
-           !io_cqring_overflow_flush(ctx, false))
-               return -EBUSY;
+       if (test_bit(0, &ctx->sq_check_overflow)) {
+               if (!list_empty(&ctx->cq_overflow_list) &&
+                   !io_cqring_overflow_flush(ctx, false))
+                       return -EBUSY;
+       }
 
        if (nr > IO_PLUG_THRESHOLD) {
                io_submit_state_start(&state, nr);