qemu_co_queue_init(&bs->flush_queue);
+ for (i = 0; i < bdrv_drain_all_count; i++) {
+ bdrv_drained_begin(bs);
+ }
+
QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
return bs;
int open_flags, Error **errp)
{
Error *local_err = NULL;
- int ret;
+ int i, ret;
bdrv_assign_node_name(bs, node_name, &local_err);
if (local_err) {
assert(bdrv_min_mem_align(bs) != 0);
assert(is_power_of_2(bs->bl.request_alignment));
+ for (i = 0; i < bs->quiesce_counter; i++) {
+ if (drv->bdrv_co_drain_begin) {
+ drv->bdrv_co_drain_begin(bs);
+ }
+ }
+
return 0;
open_failed:
bs->drv = NULL;
child->role->detach(child);
}
if (old_bs->quiesce_counter && child->role->drained_end) {
- for (i = 0; i < old_bs->quiesce_counter; i++) {
+ int num = old_bs->quiesce_counter;
+ if (child->role->parent_is_bds) {
+ num -= bdrv_drain_all_count;
+ }
+ assert(num >= 0);
+ for (i = 0; i < num; i++) {
child->role->drained_end(child);
}
}
if (new_bs) {
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
if (new_bs->quiesce_counter && child->role->drained_begin) {
- for (i = 0; i < new_bs->quiesce_counter; i++) {
+ int num = new_bs->quiesce_counter;
+ if (child->role->parent_is_bds) {
+ num -= bdrv_drain_all_count;
+ }
+ assert(num >= 0);
+ for (i = 0; i < num; i++) {
child->role->drained_begin(child);
}
}
return QTAILQ_NEXT(bs, node_list);
}
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs)
+{
+ if (!bs) {
+ return QTAILQ_FIRST(&all_bdrv_states);
+ }
+ return QTAILQ_NEXT(bs, bs_list);
+}
+
const char *bdrv_get_node_name(const BlockDriverState *bs)
{
return bs->node_name;
/* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
+static AioWait drain_all_aio_wait;
+
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int bytes, BdrvRequestFlags flags);
}
}
+unsigned int bdrv_drain_all_count = 0;
+
+static bool bdrv_drain_all_poll(void)
+{
+ BlockDriverState *bs = NULL;
+ bool result = false;
+
+ /* Execute pending BHs first (may modify the graph) and check everything
+ * else only after the BHs have executed. */
+ while (aio_poll(qemu_get_aio_context(), false));
+
+ /* bdrv_drain_poll() can't make changes to the graph and we are holding the
+ * main AioContext lock, so iterating bdrv_next_all_states() is safe. */
+ while ((bs = bdrv_next_all_states(bs))) {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+ result |= bdrv_drain_poll(bs, false, NULL, true);
+ aio_context_release(aio_context);
+ }
+
+ return result;
+}
+
/*
* Wait for pending requests to complete across all BlockDriverStates
*
*/
void bdrv_drain_all_begin(void)
{
- BlockDriverState *bs;
- BdrvNextIterator it;
+ BlockDriverState *bs = NULL;
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(NULL, true, false, NULL, false, true);
+ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
return;
}
- /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
- * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
- * nodes in several different AioContexts, so make sure we're in the main
- * context. */
+ /* AIO_WAIT_WHILE() with a NULL context can only be called from the main
+ * loop AioContext, so make sure we're in the main context. */
assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+ assert(bdrv_drain_all_count < INT_MAX);
+ bdrv_drain_all_count++;
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ /* Quiesce all nodes, without polling in-flight requests yet. The graph
+ * cannot change during this loop. */
+ while ((bs = bdrv_next_all_states(bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_begin(bs, true, NULL, false, true);
+ bdrv_do_drained_begin(bs, false, NULL, true, false);
aio_context_release(aio_context);
}
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ /* Now poll the in-flight requests */
+ AIO_WAIT_WHILE(&drain_all_aio_wait, NULL, bdrv_drain_all_poll());
+
+ while ((bs = bdrv_next_all_states(bs))) {
bdrv_drain_assert_idle(bs);
}
}
void bdrv_drain_all_end(void)
{
- BlockDriverState *bs;
- BdrvNextIterator it;
+ BlockDriverState *bs = NULL;
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ while ((bs = bdrv_next_all_states(bs))) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_end(bs, true, NULL, false);
+ bdrv_do_drained_end(bs, false, NULL, true);
aio_context_release(aio_context);
}
+
+ assert(bdrv_drain_all_count > 0);
+ bdrv_drain_all_count--;
}
void bdrv_drain_all(void)
void bdrv_wakeup(BlockDriverState *bs)
{
aio_wait_kick(bdrv_get_aio_wait(bs));
+ aio_wait_kick(&drain_all_aio_wait);
}
void bdrv_dec_in_flight(BlockDriverState *bs)