bcachefs: Improved extent merging
authorKent Overstreet <kent.overstreet@gmail.com>
Sat, 15 May 2021 04:37:37 +0000 (00:37 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:06 +0000 (17:09 -0400)
Previously, checksummed extents could only be merged when the checksum
covered only the currently live data.

xfstest generic/064 creates a test file, then uses finsert calls to
split the extent, then collapse calls to see if they get merged. But
without any reads to trigger the narrow_crcs path, each of the split
extents will still have a checksum for the entire original extent.

This patch improves the extent merge path so that if either of the
extents we're attempting to merge has a checksum that covers the entire
merged extent, we just use that checksum.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/extents.c

index 2ced3557e13b8872c6e96a5a37fc9783fb7e50fc..abb15688a6642349a63e32da24ce71b744190b1c 100644 (file)
@@ -233,102 +233,121 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s _l, struct bkey_s_c _r)
 {
        struct bkey_s_extent l = bkey_s_to_extent(_l);
        struct bkey_s_c_extent r = bkey_s_c_to_extent(_r);
-       union bch_extent_entry *en_l = l.v->start;
-       const union bch_extent_entry *en_r = r.v->start;
-       struct bch_extent_crc_unpacked crc_l, crc_r;
+       union bch_extent_entry *en_l;
+       const union bch_extent_entry *en_r;
+       struct extent_ptr_decoded lp, rp;
+       bool use_right_ptr;
+       struct bch_dev *ca;
 
        if (bkey_val_u64s(l.k) != bkey_val_u64s(r.k))
                return false;
 
-       crc_l = bch2_extent_crc_unpack(l.k, NULL);
-
        extent_for_each_entry(l, en_l) {
                en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
 
                if (extent_entry_type(en_l) != extent_entry_type(en_r))
                        return false;
+       }
 
-               switch (extent_entry_type(en_l)) {
-               case BCH_EXTENT_ENTRY_ptr: {
-                       const struct bch_extent_ptr *lp = &en_l->ptr;
-                       const struct bch_extent_ptr *rp = &en_r->ptr;
-                       struct bch_dev *ca;
-
-                       if (lp->offset + crc_l.compressed_size != rp->offset ||
-                           lp->dev                     != rp->dev ||
-                           lp->gen                     != rp->gen)
-                               return false;
-
-                       /* We don't allow extents to straddle buckets: */
-                       ca = bch_dev_bkey_exists(c, lp->dev);
-
-                       if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp))
-                               return false;
+       en_l = l.v->start;
+       en_r = r.v->start;
+       lp.crc = bch2_extent_crc_unpack(l.k, NULL);
+       rp.crc = bch2_extent_crc_unpack(r.k, NULL);
+
+       while (__bkey_ptr_next_decode(l.k, extent_entry_last(l), lp, en_l) &&
+              __bkey_ptr_next_decode(r.k, extent_entry_last(r), rp, en_r)) {
+               if (lp.ptr.offset + lp.crc.offset + lp.crc.live_size !=
+                   rp.ptr.offset + rp.crc.offset ||
+                   lp.ptr.dev                  != rp.ptr.dev ||
+                   lp.ptr.gen                  != rp.ptr.gen ||
+                   lp.has_ec                   != rp.has_ec)
+                       return false;
 
-                       break;
-               }
-               case BCH_EXTENT_ENTRY_stripe_ptr:
-                       if (en_l->stripe_ptr.block      != en_r->stripe_ptr.block ||
-                           en_l->stripe_ptr.idx        != en_r->stripe_ptr.idx)
-                               return false;
-                       break;
-               case BCH_EXTENT_ENTRY_crc32:
-               case BCH_EXTENT_ENTRY_crc64:
-               case BCH_EXTENT_ENTRY_crc128:
-                       crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
-                       crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
+               /* Extents may not straddle buckets: */
+               ca = bch_dev_bkey_exists(c, lp.ptr.dev);
+               if (PTR_BUCKET_NR(ca, &lp.ptr) != PTR_BUCKET_NR(ca, &rp.ptr))
+                       return false;
 
-                       if (crc_l.csum_type             != crc_r.csum_type ||
-                           crc_l.compression_type      != crc_r.compression_type ||
-                           crc_l.nonce                 != crc_r.nonce)
-                               return false;
+               if (lp.has_ec                   != rp.has_ec ||
+                   (lp.has_ec &&
+                    (lp.ec.block               != rp.ec.block ||
+                     lp.ec.redundancy          != rp.ec.redundancy ||
+                     lp.ec.idx                 != rp.ec.idx)))
+                       return false;
 
-                       if (crc_l.offset + crc_l.live_size != crc_l.compressed_size ||
-                           crc_r.offset)
-                               return false;
+               if (lp.crc.compression_type     != rp.crc.compression_type ||
+                   lp.crc.nonce                != rp.crc.nonce)
+                       return false;
 
-                       if (!bch2_checksum_mergeable(crc_l.csum_type))
+               if (lp.crc.offset + lp.crc.live_size + rp.crc.live_size <=
+                   lp.crc.uncompressed_size) {
+                       /* can use left extent's crc entry */
+               } else if (lp.crc.live_size <= rp.crc.offset ) {
+                       /* can use right extent's crc entry */
+               } else {
+                       /* check if checksums can be merged: */
+                       if (lp.crc.csum_type            != rp.crc.csum_type ||
+                           lp.crc.nonce                != rp.crc.nonce ||
+                           crc_is_compressed(lp.crc) ||
+                           !bch2_checksum_mergeable(lp.crc.csum_type))
                                return false;
 
-                       if (crc_is_compressed(crc_l))
+                       if (lp.crc.offset + lp.crc.live_size != lp.crc.compressed_size ||
+                           rp.crc.offset)
                                return false;
 
-                       if (crc_l.csum_type &&
-                           crc_l.uncompressed_size +
-                           crc_r.uncompressed_size > c->sb.encoded_extent_max)
+                       if (lp.crc.csum_type &&
+                           lp.crc.uncompressed_size +
+                           rp.crc.uncompressed_size > c->sb.encoded_extent_max)
                                return false;
 
-                       if (crc_l.uncompressed_size + crc_r.uncompressed_size >
+                       if (lp.crc.uncompressed_size + rp.crc.uncompressed_size >
                            bch2_crc_field_size_max[extent_entry_type(en_l)])
                                return false;
-
-                       break;
-               default:
-                       return false;
                }
+
+               en_l = extent_entry_next(en_l);
+               en_r = extent_entry_next(en_r);
        }
 
+       use_right_ptr = false;
        extent_for_each_entry(l, en_l) {
                struct bch_extent_crc_unpacked crc_l, crc_r;
 
                en_r = vstruct_idx(r.v, (u64 *) en_l - l.v->_data);
 
+               if (extent_entry_type(en_l) == BCH_EXTENT_ENTRY_ptr &&
+                   use_right_ptr)
+                       en_l->ptr = en_r->ptr;
+
                if (!extent_entry_is_crc(en_l))
                        continue;
 
+               use_right_ptr = false;
+
                crc_l = bch2_extent_crc_unpack(l.k, entry_to_crc(en_l));
                crc_r = bch2_extent_crc_unpack(r.k, entry_to_crc(en_r));
 
-               crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
-                                                crc_l.csum,
-                                                crc_r.csum,
-                                                crc_r.uncompressed_size << 9);
-
-               crc_l.uncompressed_size += crc_r.uncompressed_size;
-               crc_l.compressed_size   += crc_r.compressed_size;
-
-               bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
-                                    extent_entry_type(en_l));
+               if (crc_l.offset + crc_l.live_size + crc_r.live_size <=
+                   crc_l.uncompressed_size) {
+                       /* can use left extent's crc entry */
+               } else if (crc_l.live_size <= crc_r.offset ) {
+                       /* can use right extent's crc entry */
+                       crc_r.offset -= crc_l.live_size;
+                       bch2_extent_crc_pack(entry_to_crc(en_l), crc_r,
+                                            extent_entry_type(en_l));
+                       use_right_ptr = true;
+               } else {
+                       crc_l.csum = bch2_checksum_merge(crc_l.csum_type,
+                                                        crc_l.csum,
+                                                        crc_r.csum,
+                                                        crc_r.uncompressed_size << 9);
+
+                       crc_l.uncompressed_size += crc_r.uncompressed_size;
+                       crc_l.compressed_size   += crc_r.compressed_size;
+                       bch2_extent_crc_pack(entry_to_crc(en_l), crc_l,
+                                            extent_entry_type(en_l));
+               }
        }
 
        bch2_key_resize(l.k, l.k->size + r.k->size);