static int btree_key_cache_flush_pos(struct btree_trans *trans,
                                     struct bkey_cached_key key,
                                     u64 journal_seq,
+                                    unsigned commit_flags,
                                     bool evict)
 {
        struct bch_fs *c = trans->c;
                                  BTREE_INSERT_NOUNLOCK|
                                  BTREE_INSERT_NOCHECK_RW|
                                  BTREE_INSERT_NOFAIL|
-                                 BTREE_INSERT_JOURNAL_RESERVED|
-                                 BTREE_INSERT_JOURNAL_RECLAIM);
+                                 (ck->journal.seq == journal_last_seq(j)
+                                  ? BTREE_INSERT_JOURNAL_RESERVED
+                                  : 0)|
+                                 commit_flags);
 err:
        if (ret == -EINTR)
                goto retry;
 
+       if (ret == -EAGAIN)
+               goto out;
+
        if (ret) {
                bch2_fs_fatal_err_on(!bch2_journal_error(j), c,
                        "error flushing key cache: %i", ret);
        return ret;
 }
 
-static void btree_key_cache_journal_flush(struct journal *j,
-                                         struct journal_entry_pin *pin,
-                                         u64 seq)
+static int btree_key_cache_journal_flush(struct journal *j,
+                                        struct journal_entry_pin *pin,
+                                        u64 seq)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct bkey_cached *ck =
                container_of(pin, struct bkey_cached, journal);
        struct bkey_cached_key key;
        struct btree_trans trans;
+       int ret = 0;
 
        int srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
 
        six_unlock_read(&ck->c.lock);
 
        bch2_trans_init(&trans, c, 0, 0);
-       btree_key_cache_flush_pos(&trans, key, seq, false);
+       ret = btree_key_cache_flush_pos(&trans, key, seq,
+                                 BTREE_INSERT_JOURNAL_RECLAIM, false);
        bch2_trans_exit(&trans);
 unlock:
        srcu_read_unlock(&c->btree_trans_barrier, srcu_idx);
+
+       return ret;
 }
 
 /*
        if (!bch2_btree_key_cache_find(c, id, pos))
                return 0;
 
-       return btree_key_cache_flush_pos(trans, key, 0, true);
+       return btree_key_cache_flush_pos(trans, key, 0, 0, true);
 }
 
 bool bch2_btree_insert_key_cached(struct btree_trans *trans,
 
        struct closure cl;
        int disk_res_flags = (flags & BTREE_INSERT_NOFAIL)
                ? BCH_DISK_RESERVATION_NOFAIL : 0;
-       int journal_flags = (flags & BTREE_INSERT_JOURNAL_RESERVED)
-               ? JOURNAL_RES_GET_RECLAIM : 0;
+       int journal_flags = 0;
        int ret = 0;
 
+       if (flags & BTREE_INSERT_JOURNAL_RESERVED)
+               journal_flags |= JOURNAL_RES_GET_RESERVED;
+
        closure_init_stack(&cl);
 retry:
        /*
 
                bch2_trans_unlock(trans);
 
+               if (flags & BTREE_INSERT_JOURNAL_RECLAIM)
+                       goto err;
+
                ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
                                BTREE_UPDATE_JOURNAL_RES,
                                journal_flags);
 
        return true;
 }
 
-static void __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
+static int __btree_node_flush(struct journal *j, struct journal_entry_pin *pin,
                               unsigned i, u64 seq)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        bch2_btree_node_write_cond(c, b,
                (btree_current_write(b) == w && w->journal.seq == seq));
        six_unlock_read(&b->c.lock);
+       return 0;
 }
 
-static void btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
+static int btree_node_flush0(struct journal *j, struct journal_entry_pin *pin, u64 seq)
 {
        return __btree_node_flush(j, pin, 0, seq);
 }
 
-static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
+static int btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, u64 seq)
 {
        return __btree_node_flush(j, pin, 1, seq);
 }
        ret = bch2_journal_preres_get(&c->journal,
                        &trans->journal_preres, trans->journal_preres_u64s,
                        JOURNAL_RES_GET_NONBLOCK|
-                       ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM)
-                        ? JOURNAL_RES_GET_RECLAIM : 0));
+                       ((trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
+                        ? JOURNAL_RES_GET_RESERVED : 0));
        if (unlikely(ret == -EAGAIN))
                ret = bch2_trans_journal_preres_get_cold(trans,
                                                trans->journal_preres_u64s);
        case BTREE_INSERT_NEED_JOURNAL_RES:
                bch2_trans_unlock(trans);
 
+               if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
+                   !(trans->flags & BTREE_INSERT_JOURNAL_RESERVED))
+                       return -EAGAIN;
+
                ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
                if (ret)
                        return ret;
 
 #include "btree_gc.h"
 #include "btree_update.h"
 #include "buckets.h"
+#include "error.h"
 #include "journal.h"
 #include "journal_io.h"
 #include "journal_reclaim.h"
        if (!ret)
                goto retry;
 
+       if ((ret == cur_entry_journal_full ||
+            ret == cur_entry_journal_pin_full) &&
+           !can_discard &&
+           j->reservations.idx == j->reservations.unwritten_idx &&
+           (flags & JOURNAL_RES_GET_RESERVED)) {
+               char *journal_debug_buf = kmalloc(4096, GFP_ATOMIC);
+
+               bch_err(c, "Journal stuck!");
+               if (journal_debug_buf) {
+                       bch2_journal_debug_to_text(&_PBUF(journal_debug_buf, 4096), j);
+                       bch_err(c, "%s", journal_debug_buf);
+
+                       bch2_journal_pins_to_text(&_PBUF(journal_debug_buf, 4096), j);
+                       bch_err(c, "Journal pins:\n%s", journal_debug_buf);
+                       kfree(journal_debug_buf);
+               }
+
+               bch2_fatal_error(c);
+               dump_stack();
+       }
+
        /*
         * Journal is full - can't rely on reclaim from work item due to
         * freezing:
               "last_seq_ondisk:\t%llu\n"
               "flushed_seq_ondisk:\t%llu\n"
               "prereserved:\t\t%u/%u\n"
+              "each entry reserved:\t%u\n"
               "nr flush writes:\t%llu\n"
               "nr noflush writes:\t%llu\n"
               "nr direct reclaim:\t%llu\n"
               j->flushed_seq_ondisk,
               j->prereserved.reserved,
               j->prereserved.remaining,
+              j->entry_u64s_reserved,
               j->nr_flush_writes,
               j->nr_noflush_writes,
               j->nr_direct_reclaim,
 
 #define JOURNAL_RES_GET_NONBLOCK       (1 << 0)
 #define JOURNAL_RES_GET_CHECK          (1 << 1)
 #define JOURNAL_RES_GET_RESERVED       (1 << 2)
-#define JOURNAL_RES_GET_RECLAIM                (1 << 3)
 
 static inline int journal_res_get_fast(struct journal *j,
                                       struct journal_res *res,
                 * into the reclaim path and deadlock:
                 */
 
-               if (!(flags & JOURNAL_RES_GET_RECLAIM) &&
+               if (!(flags & JOURNAL_RES_GET_RESERVED) &&
                    new.reserved > new.remaining)
                        return 0;
        } while ((v = atomic64_cmpxchg(&j->prereserved.counter,
 
        u64s_remaining  = (u64) clean << 6;
        u64s_remaining -= (u64) total << 3;
        u64s_remaining = max(0LL, u64s_remaining);
-       u64s_remaining /= 2;
+       u64s_remaining /= 4;
        u64s_remaining = min_t(u64, u64s_remaining, U32_MAX);
 out:
        j->cur_entry_sectors    = !ret ? j->space[journal_space_discarded].next_entry : 0;
        if (!journal_pin_active(pin))
                return;
 
+       if (j->flush_in_progress == pin)
+               j->flush_in_progress_dropped = true;
+
        pin_list = journal_seq_pin(j, pin->seq);
        pin->seq = 0;
        list_del_init(&pin->list);
        struct journal_entry_pin_list *pin_list;
        struct journal_entry_pin *ret = NULL;
 
-       if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
-               return NULL;
-
-       spin_lock(&j->lock);
-
        fifo_for_each_entry_ptr(pin_list, &j->pin, *seq)
                if (*seq > max_seq ||
                    (ret = list_first_entry_or_null(&pin_list->list,
                                struct journal_entry_pin, list)))
                        break;
 
-       if (ret) {
-               list_move(&ret->list, &pin_list->flushed);
-               BUG_ON(j->flush_in_progress);
-               j->flush_in_progress = ret;
-       }
-
-       spin_unlock(&j->lock);
-
        return ret;
 }
 
 /* returns true if we did work */
-static u64 journal_flush_pins(struct journal *j, u64 seq_to_flush,
-                             unsigned min_nr)
+static size_t journal_flush_pins(struct journal *j, u64 seq_to_flush,
+                                unsigned min_nr)
 {
        struct journal_entry_pin *pin;
-       u64 seq, ret = 0;
+       size_t nr_flushed = 0;
+       journal_pin_flush_fn flush_fn;
+       u64 seq;
+       int err;
+
+       if (!test_bit(JOURNAL_RECLAIM_STARTED, &j->flags))
+               return 0;
 
        lockdep_assert_held(&j->reclaim_lock);
 
 
                j->last_flushed = jiffies;
 
+               spin_lock(&j->lock);
                pin = journal_get_next_pin(j, min_nr
                                ? U64_MAX : seq_to_flush, &seq);
+               if (pin) {
+                       BUG_ON(j->flush_in_progress);
+                       j->flush_in_progress = pin;
+                       j->flush_in_progress_dropped = false;
+                       flush_fn = pin->flush;
+               }
+               spin_unlock(&j->lock);
+
                if (!pin)
                        break;
 
                if (min_nr)
                        min_nr--;
 
-               pin->flush(j, pin, seq);
+               err = flush_fn(j, pin, seq);
 
-               BUG_ON(j->flush_in_progress != pin);
+               spin_lock(&j->lock);
+               /* Pin might have been dropped or rearmed: */
+               if (likely(!err && !j->flush_in_progress_dropped))
+                       list_move(&pin->list, &journal_seq_pin(j, seq)->flushed);
                j->flush_in_progress = NULL;
+               j->flush_in_progress_dropped = false;
+               spin_unlock(&j->lock);
+
                wake_up(&j->pin_flush_wait);
-               ret++;
+
+               if (err)
+                       break;
+
+               nr_flushed++;
        }
 
-       return ret;
+       return nr_flushed;
 }
 
 static u64 journal_seq_to_flush(struct journal *j)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        bool kthread = (current->flags & PF_KTHREAD) != 0;
-       u64 seq_to_flush, nr_flushed = 0;
-       size_t min_nr;
+       u64 seq_to_flush;
+       size_t min_nr, nr_flushed;
        unsigned flags;
        int ret = 0;
 
 
 
 struct journal;
 struct journal_entry_pin;
-typedef void (*journal_pin_flush_fn)(struct journal *j,
+typedef int (*journal_pin_flush_fn)(struct journal *j,
                                struct journal_entry_pin *, u64);
 
 struct journal_entry_pin {
 
        unsigned long           last_flushed;
        struct journal_entry_pin *flush_in_progress;
+       bool                    flush_in_progress_dropped;
        wait_queue_head_t       pin_flush_wait;
 
        /* protects advancing ja->discard_idx: */