bcachefs: Reflink
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 16 Aug 2019 13:59:56 +0000 (09:59 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:25 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
23 files changed:
fs/bcachefs/Makefile
fs/bcachefs/bcachefs.h
fs/bcachefs/bcachefs_format.h
fs/bcachefs/bkey.h
fs/bcachefs/bkey_methods.c
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/buckets.c
fs/bcachefs/extents.c
fs/bcachefs/extents.h
fs/bcachefs/fs-io.c
fs/bcachefs/fs-io.h
fs/bcachefs/fs.c
fs/bcachefs/fs.h
fs/bcachefs/io.c
fs/bcachefs/io.h
fs/bcachefs/migrate.c
fs/bcachefs/move.c
fs/bcachefs/move.h
fs/bcachefs/recovery.c
fs/bcachefs/reflink.c [new file with mode: 0644]
fs/bcachefs/reflink.h [new file with mode: 0644]
fs/bcachefs/replicas.c

index c29ccdb459659d1ded2a3e22e5a59f2d6f921bd7..4c26084091449ff3abf4bcfab23cf81fcb249f4e 100644 (file)
@@ -44,6 +44,7 @@ bcachefs-y            :=      \
        quota.o                 \
        rebalance.o             \
        recovery.o              \
+       reflink.o               \
        replicas.o              \
        siphash.o               \
        six.o                   \
index 68e2d3b1a9a6824510b47e03755d0c9b59cbe8cc..410fce3ed8d4d3f753853e54e7c54b2cf000dd7e 100644 (file)
@@ -361,6 +361,7 @@ enum gc_phase {
        GC_PHASE_BTREE_XATTRS,
        GC_PHASE_BTREE_ALLOC,
        GC_PHASE_BTREE_QUOTAS,
+       GC_PHASE_BTREE_REFLINK,
 
        GC_PHASE_PENDING_DELETE,
        GC_PHASE_ALLOC,
@@ -750,6 +751,9 @@ struct bch_fs {
        struct work_struct      ec_stripe_delete_work;
        struct llist_head       ec_stripe_delete_list;
 
+       /* REFLINK */
+       u64                     reflink_hint;
+
        /* VFS IO PATH - fs-io.c */
        struct bio_set          writepage_bioset;
        struct bio_set          dio_write_bioset;
index b8aafd2e283a0c33bbeb5eb49978a7dfbffbe1e0..62afea1e7ec3e90d573ddda3f18f8ce0d3b6a53b 100644 (file)
@@ -340,7 +340,9 @@ static inline void bkey_init(struct bkey *k)
        x(xattr,                11)                     \
        x(alloc,                12)                     \
        x(quota,                13)                     \
-       x(stripe,               14)
+       x(stripe,               14)                     \
+       x(reflink_p,            15)                     \
+       x(reflink_v,            16)
 
 enum bch_bkey_type {
 #define x(name, nr) KEY_TYPE_##name    = nr,
@@ -895,6 +897,24 @@ struct bch_stripe {
        struct bch_extent_ptr   ptrs[0];
 } __attribute__((packed, aligned(8)));
 
+/* Reflink: */
+
+struct bch_reflink_p {
+       struct bch_val          v;
+       __le64                  idx;
+
+       __le32                  reservation_generation;
+       __u8                    nr_replicas;
+       __u8                    pad[3];
+};
+
+struct bch_reflink_v {
+       struct bch_val          v;
+       __le64                  refcount;
+       union bch_extent_entry  start[0];
+       __u64                   _data[0];
+};
+
 /* Optional/variable size superblock sections: */
 
 struct bch_sb_field {
@@ -1297,6 +1317,7 @@ enum bch_sb_features {
        BCH_FEATURE_ATOMIC_NLINK        = 3, /* should have gone under compat */
        BCH_FEATURE_EC                  = 4,
        BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3 = 5,
+       BCH_FEATURE_REFLINK             = 6,
        BCH_FEATURE_NR,
 };
 
@@ -1487,7 +1508,8 @@ LE32_BITMASK(JSET_BIG_ENDIAN,     struct jset, flags, 4, 5);
        x(XATTRS,       3, "xattrs")                    \
        x(ALLOC,        4, "alloc")                     \
        x(QUOTAS,       5, "quotas")                    \
-       x(EC,           6, "erasure_coding")
+       x(EC,           6, "erasure_coding")            \
+       x(REFLINK,      7, "reflink")
 
 enum btree_id {
 #define x(kwd, val, name) BTREE_ID_##kwd = val,
index b3a08e52e6b3a3e9e16d919564a88bc491dc14d8..321fe6fe0b55f7d25390d70982af5f9702200f55 100644 (file)
@@ -560,6 +560,8 @@ BKEY_VAL_ACCESSORS(xattr);
 BKEY_VAL_ACCESSORS(alloc);
 BKEY_VAL_ACCESSORS(quota);
 BKEY_VAL_ACCESSORS(stripe);
+BKEY_VAL_ACCESSORS(reflink_p);
+BKEY_VAL_ACCESSORS(reflink_v);
 
 /* byte order helpers */
 
index 8af16ca994e08619cc199cef0c08c27ee71aa7ea..6fa6ac1fadc13494c826c0175a5588220558862f 100644 (file)
@@ -10,6 +10,7 @@
 #include "extents.h"
 #include "inode.h"
 #include "quota.h"
+#include "reflink.h"
 #include "xattr.h"
 
 const char * const bch2_bkey_types[] = {
index ec14e2deecb7d5d77b97e0d33f009c740e35466f..621cbfa22fc9e3ab839cc9108c449656d4d0005a 100644 (file)
@@ -464,7 +464,13 @@ static inline enum btree_node_type btree_node_type(struct btree *b)
 
 static inline bool btree_node_type_is_extents(enum btree_node_type type)
 {
-       return type == BKEY_TYPE_EXTENTS;
+       switch (type) {
+       case BKEY_TYPE_EXTENTS:
+       case BKEY_TYPE_REFLINK:
+               return true;
+       default:
+               return false;
+       }
 }
 
 static inline bool btree_node_is_extents(struct btree *b)
@@ -480,6 +486,7 @@ static inline bool btree_node_type_needs_gc(enum btree_node_type type)
        case BKEY_TYPE_EXTENTS:
        case BKEY_TYPE_INODES:
        case BKEY_TYPE_EC:
+       case BKEY_TYPE_REFLINK:
                return true;
        default:
                return false;
index 5f94b6e9cf2865a692c7a195f7c4e3c8226d5eb2..443ffb5c709d2bda0fb2444bb594b8468b1d3727 100644 (file)
@@ -521,7 +521,8 @@ static inline bool update_triggers_transactional(struct btree_trans *trans,
 {
        return likely(!(trans->flags & BTREE_INSERT_MARK_INMEM)) &&
                (i->iter->btree_id == BTREE_ID_EXTENTS ||
-                i->iter->btree_id == BTREE_ID_INODES);
+                i->iter->btree_id == BTREE_ID_INODES ||
+                i->iter->btree_id == BTREE_ID_REFLINK);
 }
 
 static inline bool update_has_triggers(struct btree_trans *trans,
index baf9642d21caad9dbec8e48d43a11c26898b2080..3d243f2d10952181a071eead6f811004f914b9d4 100644 (file)
@@ -972,7 +972,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
                spin_unlock(&c->ec_stripes_heap_lock);
                bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
                                    (u64) p.idx);
-               return -1;
+               return -EIO;
        }
 
        BUG_ON(m->r.e.data_type != data_type);
@@ -1144,6 +1144,7 @@ int bch2_mark_key_locked(struct bch_fs *c,
                                fs_usage, journal_seq, flags);
                break;
        case KEY_TYPE_extent:
+       case KEY_TYPE_reflink_v:
                ret = bch2_mark_extent(c, k, offset, sectors, BCH_DATA_USER,
                                fs_usage, journal_seq, flags);
                break;
@@ -1304,7 +1305,8 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
            xchg(&warned_disk_usage, 1))
                return;
 
-       pr_err("disk usage increased more than %llu sectors reserved", disk_res_sectors);
+       bch_err(c, "disk usage increased more than %llu sectors reserved",
+               disk_res_sectors);
 
        trans_for_each_update_iter(trans, i) {
                struct btree_iter       *iter = i->iter;
@@ -1319,7 +1321,7 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
 
                node_iter = iter->l[0].iter;
                while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
-                                                             KEY_TYPE_discard))) {
+                                                       KEY_TYPE_discard))) {
                        struct bkey             unpacked;
                        struct bkey_s_c         k;
 
@@ -1471,6 +1473,7 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
                        struct bch_extent_stripe_ptr p,
                        s64 sectors, enum bch_data_type data_type)
 {
+       struct bch_fs *c = trans->c;
        struct bch_replicas_padded r;
        struct btree_iter *iter;
        struct bkey_i *new_k;
@@ -1487,10 +1490,10 @@ static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
                return ret;
 
        if (k.k->type != KEY_TYPE_stripe) {
-               bch_err_ratelimited(trans->c,
-                                   "pointer to nonexistent stripe %llu",
-                                   (u64) p.idx);
-               ret = -1;
+               bch2_fs_inconsistent(c,
+                       "pointer to nonexistent stripe %llu",
+                       (u64) p.idx);
+               ret = -EIO;
                goto out;
        }
 
@@ -1578,6 +1581,84 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
        return 0;
 }
 
+static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
+                       struct bkey_s_c_reflink_p p,
+                       u64 idx, unsigned sectors,
+                       unsigned flags)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter *iter;
+       struct bkey_i *new_k;
+       struct bkey_s_c k;
+       struct bkey_i_reflink_v *r_v;
+       s64 ret;
+
+       ret = trans_get_key(trans, BTREE_ID_REFLINK,
+                           POS(0, idx), &iter, &k);
+       if (ret)
+               return ret;
+
+       if (k.k->type != KEY_TYPE_reflink_v) {
+               bch2_fs_inconsistent(c,
+                       "%llu:%llu len %u points to nonexistent indirect extent %llu",
+                       p.k->p.inode, p.k->p.offset, p.k->size, idx);
+               ret = -EIO;
+               goto err;
+       }
+
+       if ((flags & BCH_BUCKET_MARK_OVERWRITE) &&
+           (bkey_start_offset(k.k) < idx ||
+            k.k->p.offset > idx + sectors))
+               goto out;
+
+       bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
+       BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
+
+       new_k = trans_update_key(trans, iter, k.k->u64s);
+       ret = PTR_ERR_OR_ZERO(new_k);
+       if (ret)
+               goto err;
+
+       bkey_reassemble(new_k, k);
+       r_v = bkey_i_to_reflink_v(new_k);
+
+       le64_add_cpu(&r_v->v.refcount,
+                    !(flags & BCH_BUCKET_MARK_OVERWRITE) ? 1 : -1);
+
+       if (!r_v->v.refcount) {
+               r_v->k.type = KEY_TYPE_deleted;
+               set_bkey_val_u64s(&r_v->k, 0);
+       }
+out:
+       ret = k.k->p.offset - idx;
+err:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
+                       struct bkey_s_c_reflink_p p, unsigned offset,
+                       s64 sectors, unsigned flags)
+{
+       u64 idx = le64_to_cpu(p.v->idx) + offset;
+       s64 ret = 0;
+
+       sectors = abs(sectors);
+       BUG_ON(offset + sectors > p.k->size);
+
+       while (sectors) {
+               ret = __bch2_trans_mark_reflink_p(trans, p, idx, sectors, flags);
+               if (ret < 0)
+                       break;
+
+               idx += ret;
+               sectors = max_t(s64, 0LL, sectors - ret);
+               ret = 0;
+       }
+
+       return ret;
+}
+
 int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
                        unsigned offset, s64 sectors, unsigned flags)
 {
@@ -1593,6 +1674,7 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
                return bch2_trans_mark_extent(trans, k, offset, sectors,
                                              flags, BCH_DATA_BTREE);
        case KEY_TYPE_extent:
+       case KEY_TYPE_reflink_v:
                return bch2_trans_mark_extent(trans, k, offset, sectors,
                                              flags, BCH_DATA_USER);
        case KEY_TYPE_inode:
@@ -1616,6 +1698,10 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
                d->fs_usage.persistent_reserved[replicas - 1]   += sectors;
                return 0;
        }
+       case KEY_TYPE_reflink_p:
+               return bch2_trans_mark_reflink_p(trans,
+                                       bkey_s_c_to_reflink_p(k),
+                                       offset, sectors, flags);
        default:
                return 0;
        }
index 11defa3d99a546a1682af163c59615660bccf408..81ec55526ce9716df5cb2f4c1737a49164a3aa76 100644 (file)
@@ -744,7 +744,8 @@ void __bch2_cut_front(struct bpos where, struct bkey_s k)
        case KEY_TYPE_error:
        case KEY_TYPE_cookie:
                break;
-       case KEY_TYPE_extent: {
+       case KEY_TYPE_extent:
+       case KEY_TYPE_reflink_v: {
                struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
                union bch_extent_entry *entry;
                bool seen_crc = false;
@@ -774,6 +775,12 @@ void __bch2_cut_front(struct bpos where, struct bkey_s k)
 
                break;
        }
+       case KEY_TYPE_reflink_p: {
+               struct bkey_s_reflink_p p = bkey_s_to_reflink_p(k);
+
+               le64_add_cpu(&p.v->idx, sub);
+               break;
+       }
        case KEY_TYPE_reservation:
                break;
        default:
@@ -968,6 +975,33 @@ static int __bch2_extent_atomic_end(struct btree_trans *trans,
                }
 
                break;
+       case KEY_TYPE_reflink_p: {
+               struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+               u64 idx = le64_to_cpu(p.v->idx);
+               unsigned sectors = end->offset - bkey_start_offset(p.k);
+               struct btree_iter *iter;
+               struct bkey_s_c r_k;
+
+               for_each_btree_key(trans, iter,
+                                  BTREE_ID_REFLINK, POS(0, idx + offset),
+                                  BTREE_ITER_SLOTS, r_k, ret) {
+                       if (bkey_cmp(bkey_start_pos(r_k.k),
+                                    POS(0, idx + sectors)) >= 0)
+                               break;
+
+                       *nr_iters += 1;
+                       if (*nr_iters >= max_iters) {
+                               struct bpos pos = bkey_start_pos(k.k);
+                               pos.offset += r_k.k->p.offset - idx;
+
+                               *end = bpos_min(*end, pos);
+                               break;
+                       }
+               }
+
+               bch2_trans_iter_put(trans, iter);
+               break;
+       }
        }
 
        return ret;
@@ -1561,17 +1595,17 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
        return false;
 }
 
-void bch2_extent_mark_replicas_cached(struct bch_fs *c,
-                                     struct bkey_s_extent e,
-                                     unsigned target,
-                                     unsigned nr_desired_replicas)
+void bch2_bkey_mark_replicas_cached(struct bch_fs *c, struct bkey_s k,
+                                   unsigned target,
+                                   unsigned nr_desired_replicas)
 {
+       struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
        union bch_extent_entry *entry;
        struct extent_ptr_decoded p;
-       int extra = bch2_bkey_durability(c, e.s_c) - nr_desired_replicas;
+       int extra = bch2_bkey_durability(c, k.s_c) - nr_desired_replicas;
 
        if (target && extra > 0)
-               extent_for_each_ptr_decode(e, p, entry) {
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
                        int n = bch2_extent_ptr_durability(c, p);
 
                        if (n && n <= extra &&
@@ -1582,7 +1616,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c,
                }
 
        if (extra > 0)
-               extent_for_each_ptr_decode(e, p, entry) {
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
                        int n = bch2_extent_ptr_durability(c, p);
 
                        if (n && n <= extra) {
index 156d8e37045ad078279525c019f84f243c09bf9d..cef93af258589c4b55d16650650690023848b046 100644 (file)
@@ -306,6 +306,14 @@ static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k)
                        to_entry(&s.v->ptrs[s.v->nr_blocks]),
                };
        }
+       case KEY_TYPE_reflink_v: {
+               struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
+
+               return (struct bkey_ptrs_c) {
+                       r.v->start,
+                       bkey_val_end(r),
+               };
+       }
        default:
                return (struct bkey_ptrs_c) { NULL, NULL };
        }
@@ -436,8 +444,8 @@ bch2_extent_can_insert(struct btree_trans *, struct btree_insert_entry *,
 void bch2_insert_fixup_extent(struct btree_trans *,
                              struct btree_insert_entry *);
 
-void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent,
-                                     unsigned, unsigned);
+void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s,
+                                   unsigned, unsigned);
 
 const struct bch_extent_ptr *
 bch2_extent_has_device(struct bkey_s_c_extent, unsigned);
@@ -452,17 +460,24 @@ static inline bool bkey_extent_is_data(const struct bkey *k)
        switch (k->type) {
        case KEY_TYPE_btree_ptr:
        case KEY_TYPE_extent:
+       case KEY_TYPE_reflink_p:
+       case KEY_TYPE_reflink_v:
                return true;
        default:
                return false;
        }
 }
 
+/*
+ * Should extent be counted under inode->i_sectors?
+ */
 static inline bool bkey_extent_is_allocation(const struct bkey *k)
 {
        switch (k->type) {
        case KEY_TYPE_extent:
        case KEY_TYPE_reservation:
+       case KEY_TYPE_reflink_p:
+       case KEY_TYPE_reflink_v:
                return true;
        default:
                return false;
index ef94aecaa7cbd9f02a83f587982c6c3753ce74c3..771fb111550d1e231554711b8cb6a67ae07b00c0 100644 (file)
@@ -16,6 +16,7 @@
 #include "io.h"
 #include "keylist.h"
 #include "quota.h"
+#include "reflink.h"
 #include "trace.h"
 
 #include <linux/aio.h>
@@ -201,9 +202,9 @@ static int inode_set_size(struct bch_inode_info *inode,
        return 0;
 }
 
-static int __must_check bch2_write_inode_size(struct bch_fs *c,
-                                             struct bch_inode_info *inode,
-                                             loff_t new_size, unsigned fields)
+int __must_check bch2_write_inode_size(struct bch_fs *c,
+                                      struct bch_inode_info *inode,
+                                      loff_t new_size, unsigned fields)
 {
        struct inode_new_size s = {
                .new_size       = new_size,
@@ -936,15 +937,12 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
 {
        struct bvec_iter iter;
        struct bio_vec bv;
-       unsigned nr_ptrs = bch2_bkey_nr_ptrs_allocated(k);
+       unsigned nr_ptrs = k.k->type == KEY_TYPE_reflink_v
+               ? 0 : bch2_bkey_nr_ptrs_allocated(k);
        unsigned state = k.k->type == KEY_TYPE_reservation
                ? SECTOR_RESERVED
                : SECTOR_ALLOCATED;
 
-       BUG_ON(bio->bi_iter.bi_sector   < bkey_start_offset(k.k));
-       BUG_ON(bio_end_sector(bio)      > k.k->p.offset);
-
-
        bio_for_each_segment(bv, bio, iter) {
                struct bch_page_state *s = bch2_page_state(bv.bv_page);
                unsigned i;
@@ -959,10 +957,11 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
 }
 
 static void readpage_bio_extend(struct readpages_iter *iter,
-                               struct bio *bio, u64 offset,
+                               struct bio *bio,
+                               unsigned sectors_this_extent,
                                bool get_more)
 {
-       while (bio_end_sector(bio) < offset &&
+       while (bio_sectors(bio) < sectors_this_extent &&
               bio->bi_vcnt < bio->bi_max_vecs) {
                pgoff_t page_offset = bio_end_sector(bio) >> PAGE_SECTOR_SHIFT;
                struct page *page = readpage_iter_next(iter);
@@ -1012,35 +1011,39 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
        struct bch_fs *c = trans->c;
        int flags = BCH_READ_RETRY_IF_STALE|
                BCH_READ_MAY_PROMOTE;
+       int ret = 0;
 
        rbio->c = c;
        rbio->start_time = local_clock();
-
+retry:
        while (1) {
                BKEY_PADDED(k) tmp;
                struct bkey_s_c k;
-               unsigned bytes, offset_into_extent;
+               unsigned bytes, sectors, offset_into_extent;
 
                bch2_btree_iter_set_pos(iter,
                                POS(inum, rbio->bio.bi_iter.bi_sector));
 
                k = bch2_btree_iter_peek_slot(iter);
-               BUG_ON(!k.k);
-
-               if (IS_ERR(k.k)) {
-                       int ret = btree_iter_err(iter);
-                       BUG_ON(!ret);
-                       bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
-                       bio_endio(&rbio->bio);
-                       return;
-               }
+               ret = bkey_err(k);
+               if (ret)
+                       break;
 
                bkey_reassemble(&tmp.k, k);
-               bch2_trans_unlock(trans);
                k = bkey_i_to_s_c(&tmp.k);
 
                offset_into_extent = iter->pos.offset -
                        bkey_start_offset(k.k);
+               sectors = k.k->size - offset_into_extent;
+
+               ret = bch2_read_indirect_extent(trans, iter,
+                                       &offset_into_extent, &tmp.k);
+               if (ret)
+                       break;
+
+               sectors = min(sectors, k.k->size - offset_into_extent);
+
+               bch2_trans_unlock(trans);
 
                if (readpages_iter) {
                        bool want_full_extent = false;
@@ -1055,13 +1058,11 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
                                                             (p.crc.compression_type != 0));
                        }
 
-                       readpage_bio_extend(readpages_iter,
-                                           &rbio->bio, k.k->p.offset,
-                                           want_full_extent);
+                       readpage_bio_extend(readpages_iter, &rbio->bio,
+                                           sectors, want_full_extent);
                }
 
-               bytes = min_t(unsigned, bio_sectors(&rbio->bio),
-                             (k.k->size - offset_into_extent)) << 9;
+               bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
                swap(rbio->bio.bi_iter.bi_size, bytes);
 
                if (rbio->bio.bi_iter.bi_size == bytes)
@@ -1078,6 +1079,12 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
                swap(rbio->bio.bi_iter.bi_size, bytes);
                bio_advance(&rbio->bio, bytes);
        }
+
+       if (ret == -EINTR)
+               goto retry;
+
+       bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
+       bio_endio(&rbio->bio);
 }
 
 void bch2_readahead(struct readahead_control *ractl)
@@ -2256,29 +2263,25 @@ out:
 
 /* truncate: */
 
-static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode,
-                        u64 start_offset, u64 end_offset, u64 *journal_seq)
+int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
+                  struct bpos end, struct bch_inode_info *inode,
+                  u64 new_i_size)
 {
-       struct bpos start       = POS(inode->v.i_ino, start_offset);
-       struct bpos end         = POS(inode->v.i_ino, end_offset);
+       struct bch_fs *c        = trans->c;
        unsigned max_sectors    = KEY_SIZE_MAX & (~0 << c->block_bits);
-       struct btree_trans trans;
-       struct btree_iter *iter;
        struct bkey_s_c k;
-       int ret = 0;
-
-       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
-
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, start,
-                                  BTREE_ITER_INTENT);
+       int ret = 0, ret2 = 0;
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
-              !(ret = bkey_err(k)) &&
               bkey_cmp(iter->pos, end) < 0) {
                struct disk_reservation disk_res =
                        bch2_disk_reservation_init(c, 0);
                struct bkey_i delete;
 
+               ret = bkey_err(k);
+               if (ret)
+                       goto btree_err;
+
                bkey_init(&delete.k);
                delete.k.p = iter->pos;
 
@@ -2286,23 +2289,51 @@ static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode,
                bch2_key_resize(&delete.k, max_sectors);
                bch2_cut_back(end, &delete.k);
 
-               bch2_trans_begin_updates(&trans);
+               bch2_trans_begin_updates(trans);
 
-               ret = bch2_extent_update(&trans, inode,
+               ret = bch2_extent_update(trans, inode,
                                &disk_res, NULL, iter, &delete,
-                               0, true, true, NULL);
+                               new_i_size, false, true, NULL);
                bch2_disk_reservation_put(c, &disk_res);
-
-               if (ret == -EINTR)
+btree_err:
+               if (ret == -EINTR) {
+                       ret2 = ret;
                        ret = 0;
+               }
                if (ret)
                        break;
+       }
 
-               bch2_trans_cond_resched(&trans);
+       if (bkey_cmp(iter->pos, end) > 0) {
+               bch2_btree_iter_set_pos(iter, end);
+               ret = bch2_btree_iter_traverse(iter);
        }
 
+       return ret ?: ret2;
+}
+
+static int __bch2_fpunch(struct bch_fs *c, struct bch_inode_info *inode,
+                        u64 start_offset, u64 end_offset)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       int ret = 0;
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+                                  POS(inode->v.i_ino, start_offset),
+                                  BTREE_ITER_INTENT);
+
+       ret = bch2_fpunch_at(&trans, iter,
+                            POS(inode->v.i_ino, end_offset),
+                            inode, 0);
+
        bch2_trans_exit(&trans);
 
+       if (ret == -EINTR)
+               ret = 0;
+
        return ret;
 }
 
@@ -2510,7 +2541,7 @@ int bch2_truncate(struct bch_inode_info *inode, struct iattr *iattr)
 
        ret = __bch2_fpunch(c, inode,
                        round_up(iattr->ia_size, block_bytes(c)) >> 9,
-                       U64_MAX, &inode->ei_journal_seq);
+                       U64_MAX);
        if (unlikely(ret))
                goto err;
 
@@ -2557,8 +2588,7 @@ static long bch2_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len)
        truncate_pagecache_range(&inode->v, offset, offset + len - 1);
 
        if (discard_start < discard_end)
-               ret = __bch2_fpunch(c, inode, discard_start, discard_end,
-                                   &inode->ei_journal_seq);
+               ret = __bch2_fpunch(c, inode, discard_start, discard_end);
 err:
        bch2_pagecache_block_put(&inode->ei_pagecache_lock);
        inode_unlock(&inode->v);
@@ -2670,7 +2700,7 @@ bkey_err:
 
        ret = __bch2_fpunch(c, inode,
                        round_up(new_size, block_bytes(c)) >> 9,
-                       U64_MAX, &inode->ei_journal_seq);
+                       U64_MAX);
        if (ret)
                goto err;
 
@@ -2853,6 +2883,94 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
        return -EOPNOTSUPP;
 }
 
+static void mark_range_unallocated(struct bch_inode_info *inode,
+                                  loff_t start, loff_t end)
+{
+       pgoff_t index = start >> PAGE_SHIFT;
+       pgoff_t end_index = (end - 1) >> PAGE_SHIFT;
+       struct folio_batch fbatch;
+       unsigned i, j;
+
+       folio_batch_init(&fbatch);
+
+       while (filemap_get_folios(inode->v.i_mapping,
+                                 &index, end_index, &fbatch)) {
+               for (i = 0; i < folio_batch_count(&fbatch); i++) {
+                       struct folio *folio = fbatch.folios[i];
+                       struct bch_page_state *s;
+
+                       folio_lock(folio);
+                       s = bch2_page_state(&folio->page);
+
+                       if (s)
+                               for (j = 0; j < PAGE_SECTORS; j++)
+                                       s->s[j].nr_replicas = 0;
+
+                       folio_unlock(folio);
+               }
+               folio_batch_release(&fbatch);
+               cond_resched();
+       }
+}
+
+loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
+                            struct file *file_dst, loff_t pos_dst,
+                            loff_t len, unsigned remap_flags)
+{
+       struct bch_inode_info *src = file_bch_inode(file_src);
+       struct bch_inode_info *dst = file_bch_inode(file_dst);
+       struct bch_fs *c = src->v.i_sb->s_fs_info;
+       loff_t ret = 0;
+       loff_t aligned_len;
+
+       if (remap_flags & ~(REMAP_FILE_DEDUP|REMAP_FILE_ADVISORY))
+               return -EINVAL;
+
+       if (remap_flags & REMAP_FILE_DEDUP)
+               return -EOPNOTSUPP;
+
+       if ((pos_src & (block_bytes(c) - 1)) ||
+           (pos_dst & (block_bytes(c) - 1)))
+               return -EINVAL;
+
+       if (src == dst &&
+           abs(pos_src - pos_dst) < len)
+               return -EINVAL;
+
+       bch2_lock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
+
+       inode_dio_wait(&src->v);
+       inode_dio_wait(&dst->v);
+
+       ret = generic_remap_file_range_prep(file_src, pos_src,
+                                           file_dst, pos_dst,
+                                           &len, remap_flags);
+       if (ret < 0 || len == 0)
+               goto out_unlock;
+
+       aligned_len = round_up(len, block_bytes(c));
+
+       ret = write_invalidate_inode_pages_range(dst->v.i_mapping,
+                               pos_dst, pos_dst + aligned_len);
+       if (ret)
+               goto out_unlock;
+
+       mark_range_unallocated(src, pos_src, pos_src + aligned_len);
+
+       ret = bch2_remap_range(c, dst,
+                              POS(dst->v.i_ino, pos_dst >> 9),
+                              POS(src->v.i_ino, pos_src >> 9),
+                              aligned_len >> 9,
+                              pos_dst + len);
+       if (ret > 0)
+               ret = min(ret << 9, len);
+
+out_unlock:
+       bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
+
+       return ret;
+}
+
 /* fseek: */
 
 static int folio_data_offset(struct folio *folio, unsigned offset)
index e263b515e9019046709454d5cd7f6f105920faec..861ec25ab9efcb67b380a4a00d7a3f6a124f4de8 100644 (file)
@@ -9,6 +9,22 @@
 
 #include <linux/uio.h>
 
+struct quota_res;
+
+int bch2_extent_update(struct btree_trans *,
+                      struct bch_inode_info *,
+                      struct disk_reservation *,
+                      struct quota_res *,
+                      struct btree_iter *,
+                      struct bkey_i *,
+                      u64, bool, bool, s64 *);
+int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
+                  struct bpos, struct bch_inode_info *, u64);
+
+int __must_check bch2_write_inode_size(struct bch_fs *,
+                                      struct bch_inode_info *,
+                                      loff_t, unsigned);
+
 int bch2_writepage(struct page *, struct writeback_control *);
 int bch2_read_folio(struct file *, struct folio *);
 
@@ -28,6 +44,9 @@ int bch2_fsync(struct file *, loff_t, loff_t, int);
 int bch2_truncate(struct bch_inode_info *, struct iattr *);
 long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
 
+loff_t bch2_remap_file_range(struct file *, loff_t, struct file *,
+                            loff_t, loff_t, unsigned);
+
 loff_t bch2_llseek(struct file *, loff_t, int);
 
 vm_fault_t bch2_page_fault(struct vm_fault *);
index 54e555fb4d5d80488be837f52833f5316d473cd9..fad019d3c3f583d78a9d69e920a970ff1de9e569 100644 (file)
@@ -1157,6 +1157,9 @@ static int bch2_fill_extent(struct bch_fs *c,
                struct extent_ptr_decoded p;
                int ret;
 
+               if (k.k->type == KEY_TYPE_reflink_v)
+                       flags |= FIEMAP_EXTENT_SHARED;
+
                bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
                        int flags2 = 0;
                        u64 offset = p.ptr.offset;
@@ -1200,6 +1203,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
        struct btree_iter *iter;
        struct bkey_s_c k;
        BKEY_PADDED(k) cur, prev;
+       unsigned offset_into_extent, sectors;
        bool have_extent = false;
        int ret = 0;
 
@@ -1212,15 +1216,36 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
 
        bch2_trans_init(&trans, c, 0, 0);
 
-       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-                          POS(ei->v.i_ino, start >> 9), 0, k, ret) {
-               if (bkey_cmp(bkey_start_pos(k.k),
-                            POS(ei->v.i_ino, (start + len) >> 9)) >= 0)
-                       break;
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+                                  POS(ei->v.i_ino, start >> 9),
+                                  BTREE_ITER_SLOTS);
+
+       while (bkey_cmp(iter->pos, POS(ei->v.i_ino, (start + len) >> 9)) < 0) {
+               k = bch2_btree_iter_peek_slot(iter);
+               ret = bkey_err(k);
+               if (ret)
+                       goto err;
 
                bkey_reassemble(&cur.k, k);
                k = bkey_i_to_s_c(&cur.k);
 
+               offset_into_extent      = iter->pos.offset -
+                       bkey_start_offset(k.k);
+               sectors                 = k.k->size - offset_into_extent;
+
+               ret = bch2_read_indirect_extent(&trans, iter,
+                                       &offset_into_extent, &cur.k);
+               if (ret)
+                       break;
+
+               sectors = min(sectors, k.k->size - offset_into_extent);
+
+               bch2_cut_front(POS(k.k->p.inode,
+                                  bkey_start_offset(k.k) + offset_into_extent),
+                              &cur.k);
+               bch2_key_resize(&cur.k.k, sectors);
+               cur.k.k.p.offset = iter->pos.offset + cur.k.k.size;
+
                if (bkey_extent_is_data(k.k) ||
                    k.k->type == KEY_TYPE_reservation) {
                        if (have_extent) {
@@ -1233,12 +1258,16 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
                        bkey_copy(&prev.k, &cur.k);
                        have_extent = true;
                }
+
+               bch2_btree_iter_set_pos(iter,
+                               POS(iter->pos.inode,
+                                   iter->pos.offset + sectors));
        }
 
        if (!ret && have_extent)
                ret = bch2_fill_extent(c, info, bkey_i_to_s_c(&prev.k),
                                       FIEMAP_EXTENT_LAST);
-
+err:
        ret = bch2_trans_exit(&trans) ?: ret;
        return ret < 0 ? ret : 0;
 }
@@ -1286,6 +1315,7 @@ static const struct file_operations bch_file_operations = {
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = bch2_compat_fs_ioctl,
 #endif
+       .remap_file_range = bch2_remap_file_range,
 };
 
 static const struct inode_operations bch_file_inode_operations = {
index de07f0f1dd510b08f61c599a8bb5f5d50399a946..6edf5dd803f08c2fbc0cef5e510448fd81a47aeb 100644 (file)
@@ -59,7 +59,8 @@ static inline int ptrcmp(void *l, void *r)
 
 enum bch_inode_lock_op {
        INODE_LOCK              = (1U << 0),
-       INODE_UPDATE_LOCK       = (1U << 1),
+       INODE_PAGECACHE_BLOCK   = (1U << 1),
+       INODE_UPDATE_LOCK       = (1U << 2),
 };
 
 #define bch2_lock_inodes(_locks, ...)                                  \
@@ -71,9 +72,11 @@ do {                                                                 \
                                                                        \
        for (i = 1; i < ARRAY_SIZE(a); i++)                             \
                if (a[i] != a[i - 1]) {                                 \
-                       if (_locks & INODE_LOCK)                        \
+                       if ((_locks) & INODE_LOCK)                      \
                                down_write_nested(&a[i]->v.i_rwsem, i); \
-                       if (_locks & INODE_UPDATE_LOCK)                 \
+                       if ((_locks) & INODE_PAGECACHE_BLOCK)           \
+                               bch2_pagecache_block_get(&a[i]->ei_pagecache_lock);\
+                       if ((_locks) & INODE_UPDATE_LOCK)                       \
                                mutex_lock_nested(&a[i]->ei_update_lock, i);\
                }                                                       \
 } while (0)
@@ -87,9 +90,11 @@ do {                                                                 \
                                                                        \
        for (i = 1; i < ARRAY_SIZE(a); i++)                             \
                if (a[i] != a[i - 1]) {                                 \
-                       if (_locks & INODE_LOCK)                        \
+                       if ((_locks) & INODE_LOCK)                      \
                                up_write(&a[i]->v.i_rwsem);             \
-                       if (_locks & INODE_UPDATE_LOCK)                 \
+                       if ((_locks) & INODE_PAGECACHE_BLOCK)           \
+                               bch2_pagecache_block_put(&a[i]->ei_pagecache_lock);\
+                       if ((_locks) & INODE_UPDATE_LOCK)                       \
                                mutex_unlock(&a[i]->ei_update_lock);    \
                }                                                       \
 } while (0)
index ed84572a9e676029bb25daef05ea1b1aa17cdcda..4d359931edb3090d6c3e1113de371e9d8096c5bd 100644 (file)
@@ -1041,6 +1041,7 @@ static void promote_start(struct promote_op *op, struct bch_read_bio *rbio)
 
 noinline
 static struct promote_op *__promote_alloc(struct bch_fs *c,
+                                         enum btree_id btree_id,
                                          struct bpos pos,
                                          struct extent_ptr_decoded *pick,
                                          struct bch_io_opts opts,
@@ -1097,6 +1098,7 @@ static struct promote_op *__promote_alloc(struct bch_fs *c,
                        (struct data_opts) {
                                .target = opts.promote_target
                        },
+                       btree_id,
                        bkey_s_c_null);
        BUG_ON(ret);
 
@@ -1134,7 +1136,11 @@ static inline struct promote_op *promote_alloc(struct bch_fs *c,
        if (!should_promote(c, k, pos, opts, flags))
                return NULL;
 
-       promote = __promote_alloc(c, pos, pick, opts, sectors, rbio);
+       promote = __promote_alloc(c,
+                                 k.k->type == KEY_TYPE_reflink_v
+                                 ? BTREE_ID_REFLINK
+                                 : BTREE_ID_EXTENTS,
+                                 pos, pick, opts, sectors, rbio);
        if (!promote)
                return NULL;
 
@@ -1278,18 +1284,25 @@ retry:
                           POS(inode, bvec_iter.bi_sector),
                           BTREE_ITER_SLOTS, k, ret) {
                BKEY_PADDED(k) tmp;
-               unsigned bytes, offset_into_extent;
+               unsigned bytes, sectors, offset_into_extent;
 
                bkey_reassemble(&tmp.k, k);
                k = bkey_i_to_s_c(&tmp.k);
 
-               bch2_trans_unlock(&trans);
-
                offset_into_extent = iter->pos.offset -
                        bkey_start_offset(k.k);
+               sectors = k.k->size - offset_into_extent;
+
+               ret = bch2_read_indirect_extent(&trans, iter,
+                                       &offset_into_extent, &tmp.k);
+               if (ret)
+                       break;
 
-               bytes = min_t(unsigned, bvec_iter_sectors(bvec_iter),
-                             (k.k->size - offset_into_extent)) << 9;
+               sectors = min(sectors, k.k->size - offset_into_extent);
+
+               bch2_trans_unlock(&trans);
+
+               bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
                swap(bvec_iter.bi_size, bytes);
 
                ret = __bch2_read_extent(c, rbio, bvec_iter, k,
@@ -1569,6 +1582,48 @@ static void bch2_read_endio(struct bio *bio)
        bch2_rbio_punt(rbio, __bch2_read_endio, context, wq);
 }
 
+int bch2_read_indirect_extent(struct btree_trans *trans,
+                             struct btree_iter *extent_iter,
+                             unsigned *offset_into_extent,
+                             struct bkey_i *orig_k)
+{
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       u64 reflink_offset;
+       int ret;
+
+       if (orig_k->k.type != KEY_TYPE_reflink_p)
+               return 0;
+
+       reflink_offset = le64_to_cpu(bkey_i_to_reflink_p(orig_k)->v.idx) +
+               *offset_into_extent;
+
+       iter = __bch2_trans_get_iter(trans, BTREE_ID_REFLINK,
+                                    POS(0, reflink_offset),
+                                    BTREE_ITER_SLOTS, 1);
+       ret = PTR_ERR_OR_ZERO(iter);
+       if (ret)
+               return ret;
+
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
+
+       if (k.k->type != KEY_TYPE_reflink_v) {
+               __bcache_io_error(trans->c,
+                               "pointer to nonexistent indirect extent");
+               ret = -EIO;
+               goto err;
+       }
+
+       *offset_into_extent = iter->pos.offset - bkey_start_offset(k.k);
+       bkey_reassemble(orig_k, k);
+err:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
 int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
                       struct bvec_iter iter, struct bkey_s_c k,
                       unsigned offset_into_extent,
@@ -1644,6 +1699,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig,
                pos.offset += offset_into_extent;
                pick.ptr.offset += pick.crc.offset +
                        offset_into_extent;
+               offset_into_extent              = 0;
                pick.crc.compressed_size        = bvec_iter_sectors(iter);
                pick.crc.uncompressed_size      = bvec_iter_sectors(iter);
                pick.crc.offset                 = 0;
@@ -1829,25 +1885,47 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
        rbio->c = c;
        rbio->start_time = local_clock();
 
-       for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
-                          POS(inode, rbio->bio.bi_iter.bi_sector),
-                          BTREE_ITER_SLOTS, k, ret) {
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+                                  POS(inode, rbio->bio.bi_iter.bi_sector),
+                                  BTREE_ITER_SLOTS);
+
+       while (1) {
                BKEY_PADDED(k) tmp;
-               unsigned bytes, offset_into_extent;
+               unsigned bytes, sectors, offset_into_extent;
+
+               bch2_btree_iter_set_pos(iter,
+                               POS(inode, rbio->bio.bi_iter.bi_sector));
+
+               k = bch2_btree_iter_peek_slot(iter);
+               ret = bkey_err(k);
+               if (ret)
+                       goto err;
 
-               /*
-                * Unlock the iterator while the btree node's lock is still in
-                * cache, before doing the IO:
-                */
                bkey_reassemble(&tmp.k, k);
                k = bkey_i_to_s_c(&tmp.k);
-               bch2_trans_unlock(&trans);
 
                offset_into_extent = iter->pos.offset -
                        bkey_start_offset(k.k);
+               sectors = k.k->size - offset_into_extent;
+
+               ret = bch2_read_indirect_extent(&trans, iter,
+                                       &offset_into_extent, &tmp.k);
+               if (ret)
+                       goto err;
+
+               /*
+                * With indirect extents, the amount of data to read is the min
+                * of the original extent and the indirect extent:
+                */
+               sectors = min(sectors, k.k->size - offset_into_extent);
+
+               /*
+                * Unlock the iterator while the btree node's lock is still in
+                * cache, before doing the IO:
+                */
+               bch2_trans_unlock(&trans);
 
-               bytes = min_t(unsigned, bio_sectors(&rbio->bio),
-                             (k.k->size - offset_into_extent)) << 9;
+               bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
                swap(rbio->bio.bi_iter.bi_size, bytes);
 
                if (rbio->bio.bi_iter.bi_size == bytes)
@@ -1856,21 +1934,18 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
                bch2_read_extent(c, rbio, k, offset_into_extent, flags);
 
                if (flags & BCH_READ_LAST_FRAGMENT)
-                       return;
+                       break;
 
                swap(rbio->bio.bi_iter.bi_size, bytes);
                bio_advance(&rbio->bio, bytes);
        }
-
-       /*
-        * If we get here, it better have been because there was an error
-        * reading a btree node
-        */
-       BUG_ON(!ret);
-       bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret);
-
+out:
        bch2_trans_exit(&trans);
+       return;
+err:
+       bcache_io_error(c, &rbio->bio, "btree IO error: %i", ret);
        bch2_rbio_done(rbio);
+       goto out;
 }
 
 void bch2_fs_io_exit(struct bch_fs *c)
index aa437cb05fe7c0a6cd731022df7f415cfa220c93..a768ccc90f1ffea2c0ba09065fac75ba3a833a22 100644 (file)
@@ -99,6 +99,9 @@ struct bch_devs_mask;
 struct cache_promote_op;
 struct extent_ptr_decoded;
 
+int bch2_read_indirect_extent(struct btree_trans *, struct btree_iter *,
+                             unsigned *, struct bkey_i *);
+
 enum bch_read_flags {
        BCH_READ_RETRY_IF_STALE         = 1 << 0,
        BCH_READ_MAY_PROMOTE            = 1 << 1,
index 301cb72bd3e475b46e6d5e11cc9102bb6651218f..dc3b03d6e627cb1aac2359a580176dc62482894d 100644 (file)
@@ -34,7 +34,8 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
        return 0;
 }
 
-static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
+static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags,
+                                  enum btree_id btree_id)
 {
        struct btree_trans trans;
        struct btree_iter *iter;
@@ -44,8 +45,8 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
 
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
-                                  POS_MIN, BTREE_ITER_PREFETCH);
+       iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
+                                  BTREE_ITER_PREFETCH);
 
        while ((k = bch2_btree_iter_peek(iter)).k &&
               !(ret = bkey_err(k))) {
@@ -98,6 +99,12 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
        return ret;
 }
 
+static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
+{
+       return  __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_EXTENTS) ?:
+               __bch2_dev_usrdata_drop(c, dev_idx, flags, BTREE_ID_REFLINK);
+}
+
 static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
 {
        struct btree_trans trans;
index ffa0c2bbe290fa36224aef7d9bb10a280311341b..05bb74a36230b419630f8707d77aa58bbbc94f2d 100644 (file)
@@ -63,13 +63,14 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
 
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+       iter = bch2_trans_get_iter(&trans, m->btree_id,
                                   bkey_start_pos(&bch2_keylist_front(keys)->k),
                                   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 
        while (1) {
                struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
-               struct bkey_i_extent *insert, *new =
+               struct bkey_i *insert;
+               struct bkey_i_extent *new =
                        bkey_i_to_extent(bch2_keylist_front(keys));
                BKEY_PADDED(k) _new, _insert;
                const union bch_extent_entry *entry;
@@ -86,26 +87,25 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                        goto nomatch;
 
                if (m->data_cmd == DATA_REWRITE &&
-                   !bch2_extent_has_device(bkey_s_c_to_extent(k),
-                                           m->data_opts.rewrite_dev))
+                   !bch2_bkey_has_device(k, m->data_opts.rewrite_dev))
                        goto nomatch;
 
                bkey_reassemble(&_insert.k, k);
-               insert = bkey_i_to_extent(&_insert.k);
+               insert = &_insert.k;
 
                bkey_copy(&_new.k, bch2_keylist_front(keys));
                new = bkey_i_to_extent(&_new.k);
 
-               bch2_cut_front(iter->pos, &insert->k_i);
+               bch2_cut_front(iter->pos, insert);
                bch2_cut_back(new->k.p, &insert->k);
                bch2_cut_back(insert->k.p, &new->k);
 
                if (m->data_cmd == DATA_REWRITE)
-                       bch2_bkey_drop_device(extent_i_to_s(insert).s,
+                       bch2_bkey_drop_device(bkey_i_to_s(insert),
                                              m->data_opts.rewrite_dev);
 
                extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) {
-                       if (bch2_extent_has_device(extent_i_to_s_c(insert), p.ptr.dev)) {
+                       if (bch2_bkey_has_device(bkey_i_to_s_c(insert), p.ptr.dev)) {
                                /*
                                 * raced with another move op? extent already
                                 * has a pointer to the device we just wrote
@@ -114,25 +114,25 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                                continue;
                        }
 
-                       bch2_extent_ptr_decoded_append(&insert->k_i, &p);
+                       bch2_extent_ptr_decoded_append(insert, &p);
                        did_work = true;
                }
 
                if (!did_work)
                        goto nomatch;
 
-               bch2_bkey_narrow_crcs(&insert->k_i,
+               bch2_bkey_narrow_crcs(insert,
                                (struct bch_extent_crc_unpacked) { 0 });
-               bch2_extent_normalize(c, extent_i_to_s(insert).s);
-               bch2_extent_mark_replicas_cached(c, extent_i_to_s(insert),
-                                                op->opts.background_target,
-                                                op->opts.data_replicas);
+               bch2_extent_normalize(c, bkey_i_to_s(insert));
+               bch2_bkey_mark_replicas_cached(c, bkey_i_to_s(insert),
+                                              op->opts.background_target,
+                                              op->opts.data_replicas);
 
                /*
                 * If we're not fully overwriting @k, and it's compressed, we
                 * need a reservation for all the pointers in @insert
                 */
-               nr = bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) -
+               nr = bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(insert)) -
                         m->nr_ptrs_reserved;
 
                if (insert->k.size < k.k->size &&
@@ -148,7 +148,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                }
 
                bch2_trans_update(&trans,
-                               BTREE_INSERT_ENTRY(iter, &insert->k_i));
+                               BTREE_INSERT_ENTRY(iter, insert));
 
                ret = bch2_trans_commit(&trans, &op->res,
                                op_journal_seq(op),
@@ -213,10 +213,12 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
                            struct bch_io_opts io_opts,
                            enum data_cmd data_cmd,
                            struct data_opts data_opts,
+                           enum btree_id btree_id,
                            struct bkey_s_c k)
 {
        int ret;
 
+       m->btree_id     = btree_id;
        m->data_cmd     = data_cmd;
        m->data_opts    = data_opts;
        m->nr_ptrs_reserved = 0;
@@ -264,11 +266,12 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
                break;
        }
        case DATA_REWRITE: {
+               struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
                const union bch_extent_entry *entry;
                struct extent_ptr_decoded p;
                unsigned compressed_sectors = 0;
 
-               extent_for_each_ptr_decode(bkey_s_c_to_extent(k), p, entry)
+               bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
                        if (!p.ptr.cached &&
                            p.crc.compression_type != BCH_COMPRESSION_NONE &&
                            bch2_dev_in_target(c, p.ptr.dev, data_opts.target))
@@ -391,6 +394,7 @@ static int bch2_move_extent(struct bch_fs *c,
                            struct moving_context *ctxt,
                            struct write_point_specifier wp,
                            struct bch_io_opts io_opts,
+                           enum btree_id btree_id,
                            struct bkey_s_c k,
                            enum data_cmd data_cmd,
                            struct data_opts data_opts)
@@ -443,7 +447,7 @@ static int bch2_move_extent(struct bch_fs *c,
        io->rbio.bio.bi_end_io          = move_read_endio;
 
        ret = bch2_migrate_write_init(c, &io->write, wp, io_opts,
-                                     data_cmd, data_opts, k);
+                                     data_cmd, data_opts, btree_id, k);
        if (ret)
                goto err_free_pages;
 
@@ -473,16 +477,17 @@ err:
        return ret;
 }
 
-int bch2_move_data(struct bch_fs *c,
-                  struct bch_ratelimit *rate,
-                  struct write_point_specifier wp,
-                  struct bpos start,
-                  struct bpos end,
-                  move_pred_fn pred, void *arg,
-                  struct bch_move_stats *stats)
+static int __bch2_move_data(struct bch_fs *c,
+               struct moving_context *ctxt,
+               struct bch_ratelimit *rate,
+               struct write_point_specifier wp,
+               struct bpos start,
+               struct bpos end,
+               move_pred_fn pred, void *arg,
+               struct bch_move_stats *stats,
+               enum btree_id btree_id)
 {
        bool kthread = (current->flags & PF_KTHREAD) != 0;
-       struct moving_context ctxt = { .stats = stats };
        struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
        BKEY_PADDED(k) tmp;
        struct btree_trans trans;
@@ -493,17 +498,13 @@ int bch2_move_data(struct bch_fs *c,
        u64 delay, cur_inum = U64_MAX;
        int ret = 0, ret2;
 
-       closure_init_stack(&ctxt.cl);
-       INIT_LIST_HEAD(&ctxt.reads);
-       init_waitqueue_head(&ctxt.wait);
-
        bch2_trans_init(&trans, c, 0, 0);
 
        stats->data_type = BCH_DATA_USER;
-       stats->btree_id = BTREE_ID_EXTENTS;
+       stats->btree_id = btree_id;
        stats->pos      = POS_MIN;
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, start,
+       iter = bch2_trans_get_iter(&trans, btree_id, start,
                                   BTREE_ITER_PREFETCH);
 
        if (rate)
@@ -528,7 +529,7 @@ int bch2_move_data(struct bch_fs *c,
 
                        if (unlikely(freezing(current))) {
                                bch2_trans_unlock(&trans);
-                               move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
+                               move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads));
                                try_to_freeze();
                        }
                } while (delay);
@@ -579,12 +580,12 @@ peek:
                k = bkey_i_to_s_c(&tmp.k);
                bch2_trans_unlock(&trans);
 
-               ret2 = bch2_move_extent(c, &ctxt, wp, io_opts, k,
+               ret2 = bch2_move_extent(c, ctxt, wp, io_opts, btree_id, k,
                                        data_cmd, data_opts);
                if (ret2) {
                        if (ret2 == -ENOMEM) {
                                /* memory allocation failure, wait for some IO to finish */
-                               bch2_move_ctxt_wait_for_io(&ctxt);
+                               bch2_move_ctxt_wait_for_io(ctxt);
                                continue;
                        }
 
@@ -602,7 +603,32 @@ next_nondata:
                bch2_trans_cond_resched(&trans);
        }
 out:
-       bch2_trans_exit(&trans);
+       ret = bch2_trans_exit(&trans) ?: ret;
+
+       return ret;
+}
+
+int bch2_move_data(struct bch_fs *c,
+                  struct bch_ratelimit *rate,
+                  struct write_point_specifier wp,
+                  struct bpos start,
+                  struct bpos end,
+                  move_pred_fn pred, void *arg,
+                  struct bch_move_stats *stats)
+{
+       struct moving_context ctxt = { .stats = stats };
+       int ret;
+
+       closure_init_stack(&ctxt.cl);
+       INIT_LIST_HEAD(&ctxt.reads);
+       init_waitqueue_head(&ctxt.wait);
+
+       stats->data_type = BCH_DATA_USER;
+
+       ret =   __bch2_move_data(c, &ctxt, rate, wp, start, end,
+                                pred, arg, stats, BTREE_ID_EXTENTS) ?:
+               __bch2_move_data(c, &ctxt, rate, wp, start, end,
+                                pred, arg, stats, BTREE_ID_REFLINK);
 
        move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
        closure_sync(&ctxt.cl);
index 71b3d2b2ddb6ddbcc1ef744a5e00676578563336..0acd1720d4f8571b3f7cf6f3e883be575c952d2b 100644 (file)
@@ -25,6 +25,7 @@ struct data_opts {
 };
 
 struct migrate_write {
+       enum btree_id           btree_id;
        enum data_cmd           data_cmd;
        struct data_opts        data_opts;
 
@@ -44,7 +45,7 @@ int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *,
                            struct write_point_specifier,
                            struct bch_io_opts,
                            enum data_cmd, struct data_opts,
-                           struct bkey_s_c);
+                           enum btree_id, struct bkey_s_c);
 
 typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
                                struct bkey_s_c,
index 3742b241807c3e47a67dd96980781f06901daa18..f2899ba9ad43822e33d58aa7e4c5088b849db823 100644 (file)
@@ -236,7 +236,8 @@ static void replay_now_at(struct journal *j, u64 seq)
                bch2_journal_pin_put(j, j->replay_journal_seq++);
 }
 
-static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
+static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id,
+                                 struct bkey_i *k)
 {
        struct btree_trans trans;
        struct btree_iter *iter, *split_iter;
@@ -255,7 +256,7 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
 retry:
        bch2_trans_begin(&trans);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
+       iter = bch2_trans_get_iter(&trans, btree_id,
                                   bkey_start_pos(&k->k),
                                   BTREE_ITER_INTENT);
 
@@ -341,22 +342,17 @@ static int bch2_journal_replay(struct bch_fs *c,
        for_each_journal_key(keys, i) {
                replay_now_at(j, keys.journal_seq_base + i->journal_seq);
 
-               switch (i->btree_id) {
-               case BTREE_ID_ALLOC:
+               if (i->btree_id == BTREE_ID_ALLOC)
                        ret = bch2_alloc_replay_key(c, i->k);
-                       break;
-               case BTREE_ID_EXTENTS:
-                       ret = bch2_extent_replay_key(c, i->k);
-                       break;
-               default:
+               else if (btree_node_type_is_extents(i->btree_id))
+                       ret = bch2_extent_replay_key(c, i->btree_id, i->k);
+               else
                        ret = bch2_btree_insert(c, i->btree_id, i->k,
                                                NULL, NULL,
                                                BTREE_INSERT_NOFAIL|
                                                BTREE_INSERT_LAZY_RW|
                                                BTREE_INSERT_JOURNAL_REPLAY|
                                                BTREE_INSERT_NOMARK);
-                       break;
-               }
 
                if (ret) {
                        bch_err(c, "journal replay: error %d while replaying key",
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
new file mode 100644 (file)
index 0000000..dcca9c1
--- /dev/null
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "btree_update.h"
+#include "extents.h"
+#include "fs.h"
+#include "fs-io.h"
+#include "reflink.h"
+
+#include <linux/sched/signal.h>
+
+/* reflink pointers */
+
+const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+
+       if (bkey_val_bytes(p.k) != sizeof(*p.v))
+               return "incorrect value size";
+
+       return NULL;
+}
+
+void bch2_reflink_p_to_text(struct printbuf *out, struct bch_fs *c,
+                           struct bkey_s_c k)
+{
+       struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
+
+       pr_buf(out, "idx %llu", le64_to_cpu(p.v->idx));
+}
+
+enum merge_result bch2_reflink_p_merge(struct bch_fs *c,
+                                      struct bkey_s _l, struct bkey_s _r)
+{
+       struct bkey_s_reflink_p l = bkey_s_to_reflink_p(_l);
+       struct bkey_s_reflink_p r = bkey_s_to_reflink_p(_r);
+
+       if (le64_to_cpu(l.v->idx) + l.k->size != le64_to_cpu(r.v->idx))
+               return BCH_MERGE_NOMERGE;
+
+       if ((u64) l.k->size + r.k->size > KEY_SIZE_MAX) {
+               bch2_key_resize(l.k, KEY_SIZE_MAX);
+               __bch2_cut_front(l.k->p, _r);
+               return BCH_MERGE_PARTIAL;
+       }
+
+       bch2_key_resize(l.k, l.k->size + r.k->size);
+
+       return BCH_MERGE_MERGE;
+}
+
+/* indirect extents */
+
+const char *bch2_reflink_v_invalid(const struct bch_fs *c, struct bkey_s_c k)
+{
+       struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
+
+       if (bkey_val_bytes(r.k) < sizeof(*r.v))
+               return "incorrect value size";
+
+       return bch2_bkey_ptrs_invalid(c, k);
+}
+
+void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c,
+                           struct bkey_s_c k)
+{
+       struct bkey_s_c_reflink_v r = bkey_s_c_to_reflink_v(k);
+
+       pr_buf(out, "refcount: %llu ", le64_to_cpu(r.v->refcount));
+
+       bch2_bkey_ptrs_to_text(out, c, k);
+}
+
+/*
+ * bch2_remap_range() depends on bch2_extent_update(), which depends on various
+ * things tied to the linux vfs for inode updates, for now:
+ */
+#ifndef NO_BCACHEFS_FS
+
+static int bch2_make_extent_indirect(struct btree_trans *trans,
+                                    struct btree_iter *extent_iter,
+                                    struct bkey_i_extent *e)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter *reflink_iter;
+       struct bkey_s_c k;
+       struct bkey_i_reflink_v *r_v;
+       struct bkey_i_reflink_p *r_p;
+       int ret;
+
+       for_each_btree_key(trans, reflink_iter, BTREE_ID_REFLINK,
+                          POS(0, c->reflink_hint),
+                          BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) {
+               if (reflink_iter->pos.inode) {
+                       bch2_btree_iter_set_pos(reflink_iter, POS_MIN);
+                       continue;
+               }
+
+               if (bkey_deleted(k.k) && e->k.size <= k.k->size)
+                       break;
+       }
+
+       if (ret)
+               goto err;
+
+       /* rewind iter to start of hole, if necessary: */
+       bch2_btree_iter_set_pos(reflink_iter, bkey_start_pos(k.k));
+
+       r_v = bch2_trans_kmalloc(trans, sizeof(*r_v) + bkey_val_bytes(&e->k));
+       ret = PTR_ERR_OR_ZERO(r_v);
+       if (ret)
+               goto err;
+
+       bkey_reflink_v_init(&r_v->k_i);
+       r_v->k.p        = reflink_iter->pos;
+       bch2_key_resize(&r_v->k, e->k.size);
+       r_v->k.version  = e->k.version;
+
+       set_bkey_val_u64s(&r_v->k, bkey_val_u64s(&r_v->k) +
+                         bkey_val_u64s(&e->k));
+       r_v->v.refcount = 0;
+       memcpy(r_v->v.start, e->v.start, bkey_val_bytes(&e->k));
+
+       bch2_trans_update(trans, BTREE_INSERT_ENTRY(reflink_iter, &r_v->k_i));
+
+       r_p = bch2_trans_kmalloc(trans, sizeof(*r_p));
+       if (IS_ERR(r_p))
+               return PTR_ERR(r_p);
+
+       e->k.type = KEY_TYPE_reflink_p;
+       r_p = bkey_i_to_reflink_p(&e->k_i);
+       set_bkey_val_bytes(&r_p->k, sizeof(r_p->v));
+       r_p->v.idx = cpu_to_le64(bkey_start_offset(&r_v->k));
+
+       bch2_trans_update(trans, BTREE_INSERT_ENTRY(extent_iter, &r_p->k_i));
+err:
+       if (!IS_ERR(reflink_iter)) {
+               c->reflink_hint = reflink_iter->pos.offset;
+               bch2_trans_iter_put(trans, reflink_iter);
+       }
+
+       return ret;
+}
+
+static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
+{
+       struct bkey_s_c k = bch2_btree_iter_peek(iter);
+
+       while (1) {
+               if (bkey_err(k))
+                       return k;
+
+               if (bkey_cmp(iter->pos, end) >= 0)
+                       return bkey_s_c_null;
+
+               if (k.k->type == KEY_TYPE_extent ||
+                   k.k->type == KEY_TYPE_reflink_p)
+                       return k;
+
+               k = bch2_btree_iter_next(iter);
+       }
+}
+
+s64 bch2_remap_range(struct bch_fs *c,
+                    struct bch_inode_info *dst_inode,
+                    struct bpos dst_start, struct bpos src_start,
+                    u64 remap_sectors, u64 new_i_size)
+{
+       struct btree_trans trans;
+       struct btree_iter *dst_iter, *src_iter;
+       struct bkey_s_c src_k;
+       BKEY_PADDED(k) new_dst, new_src;
+       struct bpos dst_end = dst_start, src_end = src_start;
+       struct bpos dst_want, src_want;
+       u64 src_done, dst_done;
+       int ret = 0;
+
+       if (!(c->sb.features & (1ULL << BCH_FEATURE_REFLINK))) {
+               mutex_lock(&c->sb_lock);
+               if (!(c->sb.features & (1ULL << BCH_FEATURE_REFLINK))) {
+                       c->disk_sb.sb->features[0] |=
+                               cpu_to_le64(1ULL << BCH_FEATURE_REFLINK);
+
+                       bch2_write_super(c);
+               }
+               mutex_unlock(&c->sb_lock);
+       }
+
+       dst_end.offset += remap_sectors;
+       src_end.offset += remap_sectors;
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
+
+       src_iter = __bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start,
+                                        BTREE_ITER_INTENT, 1);
+       dst_iter = __bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, dst_start,
+                                        BTREE_ITER_INTENT, 2);
+
+       while (1) {
+               bch2_trans_begin_updates(&trans);
+               trans.mem_top = 0;
+
+               if (fatal_signal_pending(current)) {
+                       ret = -EINTR;
+                       goto err;
+               }
+
+               src_k = get_next_src(src_iter, src_end);
+               ret = bkey_err(src_k);
+               if (ret)
+                       goto btree_err;
+
+               src_done = bpos_min(src_iter->pos, src_end).offset -
+                       src_start.offset;
+               dst_want = POS(dst_start.inode, dst_start.offset + src_done);
+
+               if (bkey_cmp(dst_iter->pos, dst_want) < 0) {
+                       ret = bch2_fpunch_at(&trans, dst_iter, dst_want,
+                                            dst_inode, new_i_size);
+                       if (ret)
+                               goto btree_err;
+                       continue;
+               }
+
+               BUG_ON(bkey_cmp(dst_iter->pos, dst_want));
+
+               if (!bkey_cmp(dst_iter->pos, dst_end))
+                       break;
+
+               if (src_k.k->type == KEY_TYPE_extent) {
+                       bkey_reassemble(&new_src.k, src_k);
+                       src_k = bkey_i_to_s_c(&new_src.k);
+
+                       bch2_cut_front(src_iter->pos,   &new_src.k);
+                       bch2_cut_back(src_end,          &new_src.k.k);
+
+                       ret = bch2_make_extent_indirect(&trans, src_iter,
+                                               bkey_i_to_extent(&new_src.k));
+                       if (ret)
+                               goto btree_err;
+
+                       BUG_ON(src_k.k->type != KEY_TYPE_reflink_p);
+               }
+
+               if (src_k.k->type == KEY_TYPE_reflink_p) {
+                       struct bkey_s_c_reflink_p src_p =
+                               bkey_s_c_to_reflink_p(src_k);
+                       struct bkey_i_reflink_p *dst_p =
+                               bkey_reflink_p_init(&new_dst.k);
+
+                       u64 offset = le64_to_cpu(src_p.v->idx) +
+                               (src_iter->pos.offset -
+                                bkey_start_offset(src_k.k));
+
+                       dst_p->v.idx = cpu_to_le64(offset);
+               } else {
+                       BUG();
+               }
+
+               new_dst.k.k.p = dst_iter->pos;
+               bch2_key_resize(&new_dst.k.k,
+                               min(src_k.k->p.offset - src_iter->pos.offset,
+                                   dst_end.offset - dst_iter->pos.offset));
+
+               ret = bch2_extent_update(&trans, dst_inode, NULL, NULL,
+                                        dst_iter, &new_dst.k,
+                                        new_i_size, false, true, NULL);
+               if (ret)
+                       goto btree_err;
+
+               dst_done = dst_iter->pos.offset - dst_start.offset;
+               src_want = POS(src_start.inode, src_start.offset + dst_done);
+               bch2_btree_iter_set_pos(src_iter, src_want);
+btree_err:
+               if (ret == -EINTR)
+                       ret = 0;
+               if (ret)
+                       goto err;
+       }
+
+       BUG_ON(bkey_cmp(dst_iter->pos, dst_end));
+err:
+       BUG_ON(bkey_cmp(dst_iter->pos, dst_end) > 0);
+
+       dst_done = dst_iter->pos.offset - dst_start.offset;
+       new_i_size = min(dst_iter->pos.offset << 9, new_i_size);
+
+       ret = bch2_trans_exit(&trans) ?: ret;
+
+       mutex_lock(&dst_inode->ei_update_lock);
+       if (dst_inode->v.i_size < new_i_size) {
+               i_size_write(&dst_inode->v, new_i_size);
+               ret = bch2_write_inode_size(c, dst_inode, new_i_size,
+                                           ATTR_MTIME|ATTR_CTIME);
+       }
+       mutex_unlock(&dst_inode->ei_update_lock);
+
+       return dst_done ?: ret;
+}
+
+#endif /* NO_BCACHEFS_FS */
diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h
new file mode 100644 (file)
index 0000000..327618c
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_REFLINK_H
+#define _BCACHEFS_REFLINK_H
+
+const char *bch2_reflink_p_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *,
+                           struct bkey_s_c);
+enum merge_result bch2_reflink_p_merge(struct bch_fs *,
+                                      struct bkey_s, struct bkey_s);
+
+#define bch2_bkey_ops_reflink_p (struct bkey_ops) {            \
+       .key_invalid    = bch2_reflink_p_invalid,               \
+       .val_to_text    = bch2_reflink_p_to_text,               \
+       .key_merge      = bch2_reflink_p_merge,         \
+}
+
+const char *bch2_reflink_v_invalid(const struct bch_fs *, struct bkey_s_c);
+void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *,
+                           struct bkey_s_c);
+
+
+#define bch2_bkey_ops_reflink_v (struct bkey_ops) {            \
+       .key_invalid    = bch2_reflink_v_invalid,               \
+       .val_to_text    = bch2_reflink_v_to_text,               \
+}
+
+#ifndef NO_BCACHEFS_FS
+s64 bch2_remap_range(struct bch_fs *, struct bch_inode_info *,
+                    struct bpos, struct bpos, u64, u64);
+#endif /* NO_BCACHEFS_FS */
+
+#endif /* _BCACHEFS_REFLINK_H */
index 7a9a7ec26c93d3285c7b10cdef3f2172ef9e9852..4fb142f3d39c9af1f90037e17c7bf876669dacd5 100644 (file)
@@ -113,6 +113,7 @@ void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
                extent_to_replicas(k, e);
                break;
        case KEY_TYPE_extent:
+       case KEY_TYPE_reflink_v:
                e->data_type = BCH_DATA_USER;
                extent_to_replicas(k, e);
                break;