* reflink:                    gates KEY_TYPE_reflink
  * inline_data:                        gates KEY_TYPE_inline_data
  * new_siphash:                        gates BCH_STR_HASH_SIPHASH
+ * new_extent_overwrite:       gates BTREE_NODE_NEW_EXTENT_OVERWRITE
  */
 #define BCH_SB_FEATURES()                      \
        x(lz4,                          0)      \
        x(journal_seq_blacklist_v3,     5)      \
        x(reflink,                      6)      \
        x(new_siphash,                  7)      \
-       x(inline_data,                  8)
+       x(inline_data,                  8)      \
+       x(new_extent_overwrite,         9)
 
 enum bch_sb_feature {
 #define x(f, n) BCH_FEATURE_##f,
 
 LE64_BITMASK(BTREE_NODE_ID,    struct btree_node, flags,  0,  4);
 LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags,  4,  8);
-/* 8-32 unused */
+LE64_BITMASK(BTREE_NODE_NEW_EXTENT_OVERWRITE,
+                               struct btree_node, flags,  8,  9);
+/* 9-32 unused */
 LE64_BITMASK(BTREE_NODE_SEQ,   struct btree_node, flags, 32, 64);
 
 struct btree_node_entry {
 
        return nr;
 }
 
-/*
- * If keys compare equal, compare by pointer order:
- *
- * Necessary for sort_fix_overlapping() - if there are multiple keys that
- * compare equal in different sets, we have to process them newest to oldest.
- */
-static inline int extent_sort_fix_overlapping_cmp(struct btree *b,
-                                                 struct bkey_packed *l,
-                                                 struct bkey_packed *r)
-{
-       struct bkey ul = bkey_unpack_key(b, l);
-       struct bkey ur = bkey_unpack_key(b, r);
-
-       return bkey_cmp(bkey_start_pos(&ul),
-                       bkey_start_pos(&ur)) ?:
-               cmp_int((unsigned long) r, (unsigned long) l);
-}
-
 static void extent_sort_advance_prev(struct bkey_format *f,
                                     struct btree_nr_keys *nr,
                                     struct bkey_packed *start,
        bkey_reassemble((void *) *prev, k.s_c);
 }
 
-struct btree_nr_keys
-bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
-                                struct sort_iter *iter)
-{
-       struct btree *b = iter->b;
-       struct bkey_format *f = &b->format;
-       struct sort_iter_set *_l = iter->data, *_r = iter->data + 1;
-       struct bkey_packed *prev = NULL;
-       struct bkey l_unpacked, r_unpacked;
-       struct bkey_s l, r;
-       struct btree_nr_keys nr;
-       struct bkey_on_stack split;
-
-       memset(&nr, 0, sizeof(nr));
-       bkey_on_stack_init(&split);
-
-       sort_iter_sort(iter, extent_sort_fix_overlapping_cmp);
-
-       while (!sort_iter_end(iter)) {
-               l = __bkey_disassemble(b, _l->k, &l_unpacked);
-
-               if (iter->used == 1) {
-                       extent_sort_append(c, f, &nr, dst->start, &prev, l);
-                       sort_iter_advance(iter,
-                                         extent_sort_fix_overlapping_cmp);
-                       continue;
-               }
-
-               r = __bkey_disassemble(b, _r->k, &r_unpacked);
-
-               /* If current key and next key don't overlap, just append */
-               if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
-                       extent_sort_append(c, f, &nr, dst->start, &prev, l);
-                       sort_iter_advance(iter,
-                                         extent_sort_fix_overlapping_cmp);
-                       continue;
-               }
-
-               /* Skip 0 size keys */
-               if (!r.k->size) {
-                       __sort_iter_advance(iter, 1,
-                                           extent_sort_fix_overlapping_cmp);
-                       continue;
-               }
-
-               /*
-                * overlap: keep the newer key and trim the older key so they
-                * don't overlap. comparing pointers tells us which one is
-                * newer, since the bsets are appended one after the other.
-                */
-
-               /* can't happen because of comparison func */
-               BUG_ON(_l->k < _r->k &&
-                      !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k)));
-
-               if (_l->k > _r->k) {
-                       /* l wins, trim r */
-                       if (bkey_cmp(l.k->p, r.k->p) >= 0) {
-                               __sort_iter_advance(iter, 1,
-                                        extent_sort_fix_overlapping_cmp);
-                       } else {
-                               bch2_cut_front_s(l.k->p, r);
-                               extent_save(b, _r->k, r.k);
-                               __sort_iter_sift(iter, 1,
-                                        extent_sort_fix_overlapping_cmp);
-                       }
-               } else if (bkey_cmp(l.k->p, r.k->p) > 0) {
-
-                       /*
-                        * r wins, but it overlaps in the middle of l - split l:
-                        */
-                       bkey_on_stack_reassemble(&split, c, l.s_c);
-                       bch2_cut_back(bkey_start_pos(r.k), split.k);
-
-                       bch2_cut_front_s(r.k->p, l);
-                       extent_save(b, _l->k, l.k);
-
-                       __sort_iter_sift(iter, 0,
-                                        extent_sort_fix_overlapping_cmp);
-
-                       extent_sort_append(c, f, &nr, dst->start,
-                                          &prev, bkey_i_to_s(split.k));
-               } else {
-                       bch2_cut_back_s(bkey_start_pos(r.k), l);
-                       extent_save(b, _l->k, l.k);
-               }
-       }
-
-       extent_sort_advance_prev(f, &nr, dst->start, &prev);
-
-       dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
-
-       bkey_on_stack_exit(&split, c);
-       return nr;
-}
-
 /* Sort + repack in a new format: */
 struct btree_nr_keys
 bch2_sort_repack(struct bset *dst, struct btree *src,
                                struct bkey_packed *r)
 {
        return bkey_cmp_packed(b, l, r) ?:
-               (int) bkey_whiteout(r) - (int) bkey_whiteout(l) ?:
+               (int) bkey_deleted(r) - (int) bkey_deleted(l) ?:
                (int) l->needs_whiteout - (int) r->needs_whiteout;
 }
 
        return (u64 *) out - (u64 *) dst;
 }
 
+/* Compat code for btree_node_old_extent_overwrite: */
+
+/*
+ * If keys compare equal, compare by pointer order:
+ *
+ * Necessary for sort_fix_overlapping() - if there are multiple keys that
+ * compare equal in different sets, we have to process them newest to oldest.
+ */
+static inline int extent_sort_fix_overlapping_cmp(struct btree *b,
+                                                 struct bkey_packed *l,
+                                                 struct bkey_packed *r)
+{
+       struct bkey ul = bkey_unpack_key(b, l);
+       struct bkey ur = bkey_unpack_key(b, r);
+
+       return bkey_cmp(bkey_start_pos(&ul),
+                       bkey_start_pos(&ur)) ?:
+               cmp_int((unsigned long) r, (unsigned long) l);
+}
+
+struct btree_nr_keys
+bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
+                                struct sort_iter *iter)
+{
+       struct btree *b = iter->b;
+       struct bkey_format *f = &b->format;
+       struct sort_iter_set *_l = iter->data, *_r = iter->data + 1;
+       struct bkey_packed *prev = NULL;
+       struct bkey l_unpacked, r_unpacked;
+       struct bkey_s l, r;
+       struct btree_nr_keys nr;
+       struct bkey_on_stack split;
+
+       memset(&nr, 0, sizeof(nr));
+       bkey_on_stack_init(&split);
+
+       sort_iter_sort(iter, extent_sort_fix_overlapping_cmp);
+
+       while (!sort_iter_end(iter)) {
+               l = __bkey_disassemble(b, _l->k, &l_unpacked);
+
+               if (iter->used == 1) {
+                       extent_sort_append(c, f, &nr, dst->start, &prev, l);
+                       sort_iter_advance(iter,
+                                         extent_sort_fix_overlapping_cmp);
+                       continue;
+               }
+
+               r = __bkey_disassemble(b, _r->k, &r_unpacked);
+
+               /* If current key and next key don't overlap, just append */
+               if (bkey_cmp(l.k->p, bkey_start_pos(r.k)) <= 0) {
+                       extent_sort_append(c, f, &nr, dst->start, &prev, l);
+                       sort_iter_advance(iter,
+                                         extent_sort_fix_overlapping_cmp);
+                       continue;
+               }
+
+               /* Skip 0 size keys */
+               if (!r.k->size) {
+                       __sort_iter_advance(iter, 1,
+                                           extent_sort_fix_overlapping_cmp);
+                       continue;
+               }
+
+               /*
+                * overlap: keep the newer key and trim the older key so they
+                * don't overlap. comparing pointers tells us which one is
+                * newer, since the bsets are appended one after the other.
+                */
+
+               /* can't happen because of comparison func */
+               BUG_ON(_l->k < _r->k &&
+                      !bkey_cmp(bkey_start_pos(l.k), bkey_start_pos(r.k)));
+
+               if (_l->k > _r->k) {
+                       /* l wins, trim r */
+                       if (bkey_cmp(l.k->p, r.k->p) >= 0) {
+                               __sort_iter_advance(iter, 1,
+                                        extent_sort_fix_overlapping_cmp);
+                       } else {
+                               bch2_cut_front_s(l.k->p, r);
+                               extent_save(b, _r->k, r.k);
+                               __sort_iter_sift(iter, 1,
+                                        extent_sort_fix_overlapping_cmp);
+                       }
+               } else if (bkey_cmp(l.k->p, r.k->p) > 0) {
+
+                       /*
+                        * r wins, but it overlaps in the middle of l - split l:
+                        */
+                       bkey_on_stack_reassemble(&split, c, l.s_c);
+                       bch2_cut_back(bkey_start_pos(r.k), split.k);
+
+                       bch2_cut_front_s(r.k->p, l);
+                       extent_save(b, _l->k, l.k);
+
+                       __sort_iter_sift(iter, 0,
+                                        extent_sort_fix_overlapping_cmp);
+
+                       extent_sort_append(c, f, &nr, dst->start,
+                                          &prev, bkey_i_to_s(split.k));
+               } else {
+                       bch2_cut_back_s(bkey_start_pos(r.k), l);
+                       extent_save(b, _l->k, l.k);
+               }
+       }
+
+       extent_sort_advance_prev(f, &nr, dst->start, &prev);
+
+       dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
+
+       bkey_on_stack_exit(&split, c);
+       return nr;
+}
+
 static inline int sort_extents_cmp(struct btree *b,
                                   struct bkey_packed *l,
                                   struct bkey_packed *r)
 
 
 static void verify_no_dups(struct btree *b,
                           struct bkey_packed *start,
-                          struct bkey_packed *end)
+                          struct bkey_packed *end,
+                          bool extents)
 {
 #ifdef CONFIG_BCACHEFS_DEBUG
        struct bkey_packed *k, *p;
                struct bkey l = bkey_unpack_key(b, p);
                struct bkey r = bkey_unpack_key(b, k);
 
-               BUG_ON(btree_node_is_extents(b)
+               BUG_ON(extents
                       ? bkey_cmp(l.p, bkey_start_pos(&r)) > 0
                       : bkey_cmp(l.p, bkey_start_pos(&r)) >= 0);
                //BUG_ON(bkey_cmp_packed(&b->format, p, k) >= 0);
        }
 
        verify_no_dups(b, new_whiteouts,
-                      (void *) ((u64 *) new_whiteouts + b->whiteout_u64s));
+                      (void *) ((u64 *) new_whiteouts + b->whiteout_u64s),
+                      btree_node_old_extent_overwrite(b));
 
        memcpy_u64s(unwritten_whiteouts_start(c, b),
                    new_whiteouts, b->whiteout_u64s);
 
        verify_no_dups(b,
                       unwritten_whiteouts_start(c, b),
-                      unwritten_whiteouts_end(c, b));
+                      unwritten_whiteouts_end(c, b),
+                      true);
 
        btree_bounce_free(c, order, used_mempool, whiteouts);
 
 bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b,
                            enum compact_mode mode)
 {
-       return !btree_node_is_extents(b)
+       return !btree_node_old_extent_overwrite(b)
                ? bch2_drop_whiteouts(b, mode)
                : bch2_compact_extent_whiteouts(c, b, mode);
 }
 
        start_time = local_clock();
 
-       if (btree_node_is_extents(b))
+       if (btree_node_old_extent_overwrite(b))
                filter_whiteouts = bset_written(b, start_bset);
 
-       u64s = (btree_node_is_extents(b)
+       u64s = (btree_node_old_extent_overwrite(b)
                ? bch2_sort_extents
                : bch2_sort_keys)(out->keys.start,
                                  &sort_iter,
                         bool have_retry)
 {
        struct bkey_packed *k, *prev = NULL;
-       struct bpos prev_pos = POS_MIN;
+       struct bpos prev_pos    = POS_MIN;
+       struct bpos prev_data   = POS_MIN;
        bool seen_non_whiteout = false;
        unsigned version;
        const char *err;
                     (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0))) {
                        *whiteout_u64s = k->_data - i->_data;
                        seen_non_whiteout = true;
-               } else if (bkey_cmp(prev_pos, bkey_start_pos(u.k)) > 0) {
+               } else if (bkey_cmp(prev_data, bkey_start_pos(u.k)) > 0 ||
+                          bkey_cmp(prev_pos, u.k->p) > 0) {
                        btree_err(BTREE_ERR_FATAL, c, b, i,
                                  "keys out of order: %llu:%llu > %llu:%llu",
                                  prev_pos.inode,
                        /* XXX: repair this */
                }
 
+               if (!bkey_deleted(u.k))
+                       prev_data = u.k->p;
                prev_pos = u.k->p;
+
                prev = k;
                k = bkey_next_skip_noops(k, vstruct_last(i));
        }
 
                        bset_encrypt(c, i, b->written << 9);
 
+                       if (btree_node_is_extents(b) &&
+                           !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data))
+                               set_btree_node_old_extent_overwrite(b);
+
                        sectors = vstruct_sectors(b->data, c->block_bits);
 
                        btree_node_set_format(b, b->data->format);
 
        set_btree_bset(b, b->set, &b->data->keys);
 
-       b->nr = (btree_node_is_extents(b)
+       b->nr = (btree_node_old_extent_overwrite(b)
                 ? bch2_extent_sort_fix_overlapping
                 : bch2_key_sort_fix_overlapping)(c, &sorted->keys, iter);
 
        i->journal_seq  = cpu_to_le64(seq);
        i->u64s         = 0;
 
-       if (!btree_node_is_extents(b)) {
+       if (!btree_node_old_extent_overwrite(b)) {
                sort_iter_add(&sort_iter,
                              unwritten_whiteouts_start(c, b),
                              unwritten_whiteouts_end(c, b));
 
        b->whiteout_u64s = 0;
 
-       u64s = btree_node_is_extents(b)
+       u64s = btree_node_old_extent_overwrite(b)
                ? bch2_sort_extents(vstruct_last(i), &sort_iter, false)
                : bch2_sort_keys(i->start, &sort_iter, false);
        le16_add_cpu(&i->u64s, u64s);
 
        BTREE_NODE_just_written,
        BTREE_NODE_dying,
        BTREE_NODE_fake,
+       BTREE_NODE_old_extent_overwrite,
 };
 
 BTREE_FLAG(read_in_flight);
 BTREE_FLAG(just_written);
 BTREE_FLAG(dying);
 BTREE_FLAG(fake);
+BTREE_FLAG(old_extent_overwrite);
 
 static inline struct btree_write *btree_current_write(struct btree *b)
 {
 
        SET_BTREE_NODE_LEVEL(b->data, level);
        b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0];
 
+       if (c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))
+               SET_BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data, true);
+
+       if (btree_node_is_extents(b) &&
+           !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data))
+               set_btree_node_old_extent_overwrite(b);
+
        bch2_btree_build_aux_trees(b);
 
        btree_node_will_make_reachable(as, b);
 
        int old_live_u64s = b->nr.live_u64s;
        int live_u64s_added, u64s_added;
 
+       insert->k->k.needs_whiteout = false;
+
        if (!btree_node_is_extents(b))
                bch2_insert_fixup_key(trans, insert);
        else
 
 
                overlap = bch2_extent_overlap(&insert->k->k, k.k);
 
+               /*
+                * If we're overwriting an existing extent, we may need to emit
+                * a whiteout - unless we're inserting a new extent at the same
+                * position:
+                */
+               if (k.k->needs_whiteout &&
+                   (!bkey_whiteout(&insert->k->k) ||
+                    bkey_cmp(k.k->p, insert->k->k.p)))
+                       *u64s += BKEY_U64s;
+
+               /*
+                * If we're partially overwriting an existing extent which has
+                * been written out to disk, we'll need to emit a new version of
+                * that extent:
+                */
                if (bkey_written(l->b, _k) &&
                    overlap != BCH_EXTENT_OVERLAP_ALL)
                        *u64s += _k->u64s;
 
-               /* account for having to split existing extent: */
+               /* And we may be splitting an existing extent: */
                if (overlap == BCH_EXTENT_OVERLAP_MIDDLE)
                        *u64s += _k->u64s;
 
        bch2_btree_node_iter_fix(iter, l->b, &l->iter, k, 0, k->u64s);
 }
 
+static void pack_push_whiteout(struct bch_fs *c, struct btree *b,
+                              struct bpos pos)
+{
+       struct bkey_packed k;
+
+       if (!bkey_pack_pos(&k, pos, b)) {
+               struct bkey_i tmp;
+
+               bkey_init(&tmp.k);
+               tmp.k.p = pos;
+               bkey_copy(&k, &tmp);
+       }
+
+       k.needs_whiteout = true;
+       push_whiteout(c, b, &k);
+}
+
 static void
 extent_drop(struct bch_fs *c, struct btree_iter *iter,
            struct bkey_packed *_k, struct bkey_s k)
 
        k.k->size = 0;
        k.k->type = KEY_TYPE_deleted;
-       k.k->needs_whiteout = false;
+
+       if (!btree_node_old_extent_overwrite(l->b) &&
+           k.k->needs_whiteout) {
+               pack_push_whiteout(c, l->b, k.k->p);
+               k.k->needs_whiteout = false;
+       }
 
        if (_k >= btree_bset_last(l->b)->start) {
                unsigned u64s = _k->u64s;
        bkey_on_stack_init(&tmp);
        bkey_on_stack_init(&split);
 
+       if (!btree_node_old_extent_overwrite(l->b)) {
+               if (!bkey_whiteout(&insert->k) &&
+                   !bkey_cmp(k.k->p, insert->k.p)) {
+                       insert->k.needs_whiteout = k.k->needs_whiteout;
+                       k.k->needs_whiteout = false;
+               }
+       } else {
+               insert->k.needs_whiteout |= k.k->needs_whiteout;
+       }
+
        switch (overlap) {
        case BCH_EXTENT_OVERLAP_FRONT:
                if (bkey_written(l->b, _k)) {
                        bkey_on_stack_reassemble(&tmp, c, k.s_c);
                        bch2_cut_front(insert->k.p, tmp.k);
 
+                       /*
+                        * needs_whiteout was propagated to new version of @k,
+                        * @tmp:
+                        */
+                       if (!btree_node_old_extent_overwrite(l->b))
+                               k.k->needs_whiteout = false;
+
                        extent_drop(c, iter, _k, k);
                        extent_bset_insert(c, iter, tmp.k);
                } else {
                        bkey_on_stack_reassemble(&tmp, c, k.s_c);
                        bch2_cut_back(bkey_start_pos(&insert->k), tmp.k);
 
+                       /*
+                        * @tmp has different position than @k, needs_whiteout
+                        * should not be propagated:
+                        */
+                       if (!btree_node_old_extent_overwrite(l->b))
+                               tmp.k->k.needs_whiteout = false;
+
                        extent_drop(c, iter, _k, k);
                        extent_bset_insert(c, iter, tmp.k);
                } else {
+                       /*
+                        * position of @k is changing, emit a whiteout if
+                        * needs_whiteout is set:
+                        */
+                       if (!btree_node_old_extent_overwrite(l->b) &&
+                           k.k->needs_whiteout) {
+                               pack_push_whiteout(c, l->b, k.k->p);
+                               k.k->needs_whiteout = false;
+                       }
+
                        btree_keys_account_val_delta(l->b, _k,
                                bch2_cut_back_s(bkey_start_pos(&insert->k), k));
                        extent_save(l->b, _k, k.k);
                bkey_on_stack_reassemble(&split, c, k.s_c);
                bch2_cut_back(bkey_start_pos(&insert->k), split.k);
 
+               if (!btree_node_old_extent_overwrite(l->b))
+                       split.k->k.needs_whiteout = false;
+
+               /* this is identical to BCH_EXTENT_OVERLAP_FRONT: */
                if (bkey_written(l->b, _k)) {
                        bkey_on_stack_reassemble(&tmp, c, k.s_c);
                        bch2_cut_front(insert->k.p, tmp.k);
 
+                       if (!btree_node_old_extent_overwrite(l->b))
+                               k.k->needs_whiteout = false;
+
                        extent_drop(c, iter, _k, k);
                        extent_bset_insert(c, iter, tmp.k);
                } else {
                        bch2_cut_front(cur_end, insert);
                        bch2_btree_iter_set_pos_same_leaf(iter, cur_end);
                } else {
-                       insert->k.needs_whiteout |= k.k->needs_whiteout;
                        extent_squash(c, iter, insert, _k, k, overlap);
                }
 
                if (insert->k.type == KEY_TYPE_deleted)
                        insert->k.type = KEY_TYPE_discard;
 
-               extent_bset_insert(c, iter, insert);
+               if (!bkey_whiteout(&insert->k) ||
+                   btree_node_old_extent_overwrite(l->b))
+                       extent_bset_insert(c, iter, insert);
+
                bch2_btree_journal_key(trans, iter, insert);
        }
 
 
                                le16_to_cpu(bcachefs_metadata_version_min);
                c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
                c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_siphash;
+               c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
                write_sb = true;
        }
 
                le16_to_cpu(bcachefs_metadata_version_current);
        c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_atomic_nlink;
        c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_siphash;
+       c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
 
        SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
        SET_BCH_SB_CLEAN(c->disk_sb.sb, false);