retry = false;
QLIST_FOREACH(req, &job->inflight_reqs, list) {
if (end > req->start && start < req->end) {
- qemu_co_queue_wait(&req->wait_queue);
+ qemu_co_queue_wait(&req->wait_queue, NULL);
retry = true;
break;
}
* (instead of producing a deadlock in the former case). */
if (!req->waiting_for) {
self->waiting_for = req;
- qemu_co_queue_wait(&req->wait_queue);
+ qemu_co_queue_wait(&req->wait_queue, NULL);
self->waiting_for = NULL;
retry = true;
waited = true;
/* Wait until any previous flushes are completed */
while (bs->active_flush_req) {
- qemu_co_queue_wait(&bs->flush_queue);
+ qemu_co_queue_wait(&bs->flush_queue, NULL);
}
bs->active_flush_req = true;
/* Poor man semaphore. The free_sema is locked when no other request
* can be accepted, and unlocked after receiving one reply. */
if (s->in_flight == MAX_NBD_REQUESTS) {
- qemu_co_queue_wait(&s->free_sema);
+ qemu_co_queue_wait(&s->free_sema, NULL);
assert(s->in_flight < MAX_NBD_REQUESTS);
}
s->in_flight++;
if (bytes == 0) {
/* Wait for the dependency to complete. We need to recheck
* the free/allocated clusters when we continue. */
- qemu_co_mutex_unlock(&s->lock);
- qemu_co_queue_wait(&old_alloc->dependent_requests);
- qemu_co_mutex_lock(&s->lock);
+ qemu_co_queue_wait(&old_alloc->dependent_requests, &s->lock);
return -EAGAIN;
}
}
retry:
QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
if (AIOCBOverlapping(acb, cb)) {
- qemu_co_queue_wait(&s->overlapping_queue);
+ qemu_co_queue_wait(&s->overlapping_queue, NULL);
goto retry;
}
}
if (must_wait || blkp->pending_reqs[is_write]) {
blkp->pending_reqs[is_write]++;
qemu_mutex_unlock(&tg->lock);
- qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
+ qemu_co_queue_wait(&blkp->throttled_reqs[is_write], NULL);
qemu_mutex_lock(&tg->lock);
blkp->pending_reqs[is_write]--;
}
/*
* Wait for pdu to complete.
*/
- qemu_co_queue_wait(&cancel_pdu->complete);
+ qemu_co_queue_wait(&cancel_pdu->complete, NULL);
cancel_pdu->cancelled = 0;
pdu_free(cancel_pdu);
}
/**
* CoQueues are a mechanism to queue coroutines in order to continue executing
- * them later.
+ * them later. They are similar to condition variables, but they need help
+ * from an external mutex in order to maintain thread-safety.
*/
typedef struct CoQueue {
QSIMPLEQ_HEAD(, Coroutine) entries;
/**
* Adds the current coroutine to the CoQueue and transfers control to the
- * caller of the coroutine.
+ * caller of the coroutine. The mutex is unlocked during the wait and
+ * locked again afterwards.
*/
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue, CoMutex *mutex);
/**
* Restarts the next coroutine in the CoQueue and removes it from the queue.
QSIMPLEQ_INIT(&queue->entries);
}
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue, CoMutex *mutex)
{
Coroutine *self = qemu_coroutine_self();
QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+
+ if (mutex) {
+ qemu_co_mutex_unlock(mutex);
+ }
+
+ /* There is no race condition here. Other threads will call
+ * aio_co_schedule on our AioContext, which can reenter this
+ * coroutine but only after this yield and after the main loop
+ * has gone through the next iteration.
+ */
qemu_coroutine_yield();
assert(qemu_in_coroutine());
+
+ /* TODO: OSv implements wait morphing here, where the wakeup
+ * primitive automatically places the woken coroutine on the
+ * mutex's queue. This avoids the thundering herd effect.
+ */
+ if (mutex) {
+ qemu_co_mutex_lock(mutex);
+ }
}
/**
Coroutine *self = qemu_coroutine_self();
while (lock->writer) {
- qemu_co_queue_wait(&lock->queue);
+ qemu_co_queue_wait(&lock->queue, NULL);
}
lock->reader++;
self->locks_held++;
Coroutine *self = qemu_coroutine_self();
while (lock->writer || lock->reader) {
- qemu_co_queue_wait(&lock->queue);
+ qemu_co_queue_wait(&lock->queue, NULL);
}
lock->writer = true;
self->locks_held++;