bcachefs: bkey_on_stack
authorKent Overstreet <kent.overstreet@gmail.com>
Sat, 9 Nov 2019 21:01:15 +0000 (16:01 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:32 +0000 (17:08 -0400)
This implements code for storing small bkeys on the stack and allocating
out of a mempool if they're too big.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
12 files changed:
fs/bcachefs/bcachefs.h
fs/bcachefs/bkey_on_stack.h [new file with mode: 0644]
fs/bcachefs/bkey_sort.c
fs/bcachefs/ec.c
fs/bcachefs/extents.c
fs/bcachefs/fs-io.c
fs/bcachefs/fs.c
fs/bcachefs/io.c
fs/bcachefs/migrate.c
fs/bcachefs/move.c
fs/bcachefs/reflink.c
fs/bcachefs/super.c

index f8a040115fd1cbc9301d52372957b4aa046a8b85..344cf982124fabc9cb4610bb4ff855a296d5be55 100644 (file)
@@ -729,6 +729,8 @@ struct bch_fs {
 
        atomic64_t              key_version;
 
+       mempool_t               large_bkey_pool;
+
        /* REBALANCE */
        struct bch_fs_rebalance rebalance;
 
diff --git a/fs/bcachefs/bkey_on_stack.h b/fs/bcachefs/bkey_on_stack.h
new file mode 100644 (file)
index 0000000..d473903
--- /dev/null
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_ON_STACK_H
+#define _BCACHEFS_BKEY_ON_STACK_H
+
+#include "bcachefs.h"
+
+struct bkey_on_stack {
+       struct bkey_i   *k;
+       u64             onstack[12];
+};
+
+static inline void bkey_on_stack_realloc(struct bkey_on_stack *s,
+                                        struct bch_fs *c, unsigned u64s)
+{
+       if (s->k == (void *) s->onstack &&
+           u64s > ARRAY_SIZE(s->onstack)) {
+               s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
+               memcpy(s->k, s->onstack, sizeof(s->onstack));
+       }
+}
+
+static inline void bkey_on_stack_init(struct bkey_on_stack *s)
+{
+       s->k = (void *) s->onstack;
+}
+
+static inline void bkey_on_stack_exit(struct bkey_on_stack *s,
+                                     struct bch_fs *c)
+{
+       if (s->k != (void *) s->onstack)
+               mempool_free(s->k, &c->large_bkey_pool);
+       s->k = NULL;
+}
+
+#endif /* _BCACHEFS_BKEY_ON_STACK_H */
index 2cac269b386f96ec9774b8dab64eb9823fd19336..f5c0507ad79da34df01f241f35dfc4f43b9cd151 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
+#include "bkey_on_stack.h"
 #include "bkey_sort.h"
 #include "bset.h"
 #include "extents.h"
@@ -292,8 +293,10 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
        struct bkey l_unpacked, r_unpacked;
        struct bkey_s l, r;
        struct btree_nr_keys nr;
+       struct bkey_on_stack split;
 
        memset(&nr, 0, sizeof(nr));
+       bkey_on_stack_init(&split);
 
        heap_resort(iter, extent_sort_cmp, NULL);
 
@@ -349,13 +352,13 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
 
                        extent_sort_sift(iter, b, _r - iter->data);
                } else if (bkey_cmp(l.k->p, r.k->p) > 0) {
-                       BKEY_PADDED(k) tmp;
+                       bkey_on_stack_realloc(&split, c, l.k->u64s);
 
                        /*
                         * r wins, but it overlaps in the middle of l - split l:
                         */
-                       bkey_reassemble(&tmp.k, l.s_c);
-                       bch2_cut_back(bkey_start_pos(r.k), &tmp.k.k);
+                       bkey_reassemble(split.k, l.s_c);
+                       bch2_cut_back(bkey_start_pos(r.k), &split.k->k);
 
                        __bch2_cut_front(r.k->p, l);
                        extent_save(b, lk, l.k);
@@ -363,7 +366,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
                        extent_sort_sift(iter, b, 0);
 
                        extent_sort_append(c, f, &nr, dst->start,
-                                          &prev, bkey_i_to_s(&tmp.k));
+                                          &prev, bkey_i_to_s(split.k));
                } else {
                        bch2_cut_back(bkey_start_pos(r.k), l.k);
                        extent_save(b, lk, l.k);
@@ -373,6 +376,8 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c,
        extent_sort_advance_prev(f, &nr, dst->start, &prev);
 
        dst->u64s = cpu_to_le16((u64 *) prev - dst->_data);
+
+       bkey_on_stack_exit(&split, c);
        return nr;
 }
 
index f32b8e6bf2ce7c1c7ac0d157ec6ab874133455d9..b24f867520c3a318fb766edd1d22db1d6724342b 100644 (file)
@@ -4,6 +4,7 @@
 
 #include "bcachefs.h"
 #include "alloc_foreground.h"
+#include "bkey_on_stack.h"
 #include "bset.h"
 #include "btree_gc.h"
 #include "btree_update.h"
@@ -777,9 +778,10 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
        struct bkey_s_c k;
        struct bkey_s_extent e;
        struct bch_extent_ptr *ptr;
-       BKEY_PADDED(k) tmp;
+       struct bkey_on_stack sk;
        int ret = 0, dev, idx;
 
+       bkey_on_stack_init(&sk);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@@ -804,8 +806,9 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
 
                dev = s->key.v.ptrs[idx].dev;
 
-               bkey_reassemble(&tmp.k, k);
-               e = bkey_i_to_s_extent(&tmp.k);
+               bkey_on_stack_realloc(&sk, c, k.k->u64s);
+               bkey_reassemble(sk.k, k);
+               e = bkey_i_to_s_extent(sk.k);
 
                extent_for_each_ptr(e, ptr)
                        if (ptr->dev != dev)
@@ -816,7 +819,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
 
                extent_stripe_ptr_add(e, s, ptr, idx);
 
-               bch2_trans_update(&trans, iter, &tmp.k);
+               bch2_trans_update(&trans, iter, sk.k);
 
                ret = bch2_trans_commit(&trans, NULL, NULL,
                                        BTREE_INSERT_ATOMIC|
@@ -829,6 +832,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
        }
 
        bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&sk, c);
 
        return ret;
 }
index b12798103763090cafd64c80bfd356071254a8b8..46eeaa574e860a0f9b5f4e49a662ad3f604e8b7d 100644 (file)
@@ -8,6 +8,7 @@
 
 #include "bcachefs.h"
 #include "bkey_methods.h"
+#include "bkey_on_stack.h"
 #include "btree_gc.h"
 #include "btree_update.h"
 #include "btree_update_interior.h"
@@ -1132,7 +1133,11 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
                break;
        }
        case BCH_EXTENT_OVERLAP_MIDDLE: {
-               BKEY_PADDED(k) split;
+               struct bkey_on_stack split;
+
+               bkey_on_stack_init(&split);
+               bkey_on_stack_realloc(&split, c, k.k->u64s);
+
                /*
                 * The insert key falls 'in the middle' of k
                 * The insert key splits k in 3:
@@ -1147,18 +1152,19 @@ extent_squash(struct bch_fs *c, struct btree_iter *iter,
                 * modify k _before_ doing the insert (which will move
                 * what k points to)
                 */
-               bkey_reassemble(&split.k, k.s_c);
-               split.k.k.needs_whiteout |= bkey_written(l->b, _k);
+               bkey_reassemble(split.k, k.s_c);
+               split.k->k.needs_whiteout |= bkey_written(l->b, _k);
 
-               bch2_cut_back(bkey_start_pos(&insert->k), &split.k.k);
-               BUG_ON(bkey_deleted(&split.k.k));
+               bch2_cut_back(bkey_start_pos(&insert->k), &split.k->k);
+               BUG_ON(bkey_deleted(&split.k->k));
 
                __bch2_cut_front(insert->k.p, k);
                BUG_ON(bkey_deleted(k.k));
                extent_save(l->b, _k, k.k);
                bch2_btree_iter_fix_key_modified(iter, l->b, _k);
 
-               extent_bset_insert(c, iter, &split.k);
+               extent_bset_insert(c, iter, split.k);
+               bkey_on_stack_exit(&split, c);
                break;
        }
        }
index 657559c2db14799eda842d9ee9dd2115fa45db1c..478630fdf643924077f537c62c6eb757db91502b 100644 (file)
@@ -3,6 +3,7 @@
 
 #include "bcachefs.h"
 #include "alloc_foreground.h"
+#include "bkey_on_stack.h"
 #include "btree_update.h"
 #include "buckets.h"
 #include "clock.h"
@@ -691,6 +692,18 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k)
        }
 }
 
+static bool extent_partial_reads_expensive(struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       struct bch_extent_crc_unpacked crc;
+       const union bch_extent_entry *i;
+
+       bkey_for_each_crc(k.k, ptrs, crc, i)
+               if (crc.csum_type || crc.compression_type)
+                       return true;
+       return false;
+}
+
 static void readpage_bio_extend(struct readpages_iter *iter,
                                struct bio *bio,
                                unsigned sectors_this_extent,
@@ -744,15 +757,17 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
                       struct readpages_iter *readpages_iter)
 {
        struct bch_fs *c = trans->c;
+       struct bkey_on_stack sk;
        int flags = BCH_READ_RETRY_IF_STALE|
                BCH_READ_MAY_PROMOTE;
        int ret = 0;
 
        rbio->c = c;
        rbio->start_time = local_clock();
+
+       bkey_on_stack_init(&sk);
 retry:
        while (1) {
-               BKEY_PADDED(k) tmp;
                struct bkey_s_c k;
                unsigned bytes, sectors, offset_into_extent;
 
@@ -764,15 +779,16 @@ retry:
                if (ret)
                        break;
 
-               bkey_reassemble(&tmp.k, k);
-               k = bkey_i_to_s_c(&tmp.k);
+               bkey_on_stack_realloc(&sk, c, k.k->u64s);
+               bkey_reassemble(sk.k, k);
+               k = bkey_i_to_s_c(sk.k);
 
                offset_into_extent = iter->pos.offset -
                        bkey_start_offset(k.k);
                sectors = k.k->size - offset_into_extent;
 
                ret = bch2_read_indirect_extent(trans,
-                                       &offset_into_extent, &tmp.k);
+                                       &offset_into_extent, sk.k);
                if (ret)
                        break;
 
@@ -780,22 +796,9 @@ retry:
 
                bch2_trans_unlock(trans);
 
-               if (readpages_iter) {
-                       bool want_full_extent = false;
-
-                       if (bkey_extent_is_data(k.k)) {
-                               struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
-                               const union bch_extent_entry *i;
-                               struct extent_ptr_decoded p;
-
-                               bkey_for_each_ptr_decode(k.k, ptrs, p, i)
-                                       want_full_extent |= ((p.crc.csum_type != 0) |
-                                                            (p.crc.compression_type != 0));
-                       }
-
-                       readpage_bio_extend(readpages_iter, &rbio->bio,
-                                           sectors, want_full_extent);
-               }
+               if (readpages_iter)
+                       readpage_bio_extend(readpages_iter, &rbio->bio, sectors,
+                                           extent_partial_reads_expensive(k));
 
                bytes = min(sectors, bio_sectors(&rbio->bio)) << 9;
                swap(rbio->bio.bi_iter.bi_size, bytes);
@@ -809,7 +812,7 @@ retry:
                bch2_read_extent(c, rbio, k, offset_into_extent, flags);
 
                if (flags & BCH_READ_LAST_FRAGMENT)
-                       return;
+                       break;
 
                swap(rbio->bio.bi_iter.bi_size, bytes);
                bio_advance(&rbio->bio, bytes);
@@ -818,8 +821,12 @@ retry:
        if (ret == -EINTR)
                goto retry;
 
-       bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
-       bio_endio(&rbio->bio);
+       if (ret) {
+               bcache_io_error(c, &rbio->bio, "btree IO error %i", ret);
+               bio_endio(&rbio->bio);
+       }
+
+       bkey_on_stack_exit(&sk, c);
 }
 
 void bch2_readahead(struct readahead_control *ractl)
@@ -2353,6 +2360,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
 {
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct address_space *mapping = inode->v.i_mapping;
+       struct bkey_on_stack copy;
        struct btree_trans trans;
        struct btree_iter *src, *dst, *del = NULL;
        loff_t shift, new_size;
@@ -2362,6 +2370,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
        if ((offset | len) & (block_bytes(c) - 1))
                return -EINVAL;
 
+       bkey_on_stack_init(&copy);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
 
        /*
@@ -2430,7 +2439,6 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
        while (1) {
                struct disk_reservation disk_res =
                        bch2_disk_reservation_init(c, 0);
-               BKEY_PADDED(k) copy;
                struct bkey_i delete;
                struct bkey_s_c k;
                struct bpos next_pos;
@@ -2455,34 +2463,35 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
                    bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
                        break;
 reassemble:
-               bkey_reassemble(&copy.k, k);
+               bkey_on_stack_realloc(&copy, c, k.k->u64s);
+               bkey_reassemble(copy.k, k);
 
                if (insert &&
                    bkey_cmp(bkey_start_pos(k.k), move_pos) < 0) {
-                       bch2_cut_front(move_pos, &copy.k);
-                       bch2_btree_iter_set_pos(src, bkey_start_pos(&copy.k.k));
+                       bch2_cut_front(move_pos, copy.k);
+                       bch2_btree_iter_set_pos(src, bkey_start_pos(&copy.k->k));
                }
 
-               copy.k.k.p.offset += shift >> 9;
-               bch2_btree_iter_set_pos(dst, bkey_start_pos(&copy.k.k));
+               copy.k->k.p.offset += shift >> 9;
+               bch2_btree_iter_set_pos(dst, bkey_start_pos(&copy.k->k));
 
-               ret = bch2_extent_atomic_end(dst, &copy.k, &atomic_end);
+               ret = bch2_extent_atomic_end(dst, copy.k, &atomic_end);
                if (ret)
                        goto bkey_err;
 
-               if (bkey_cmp(atomic_end, copy.k.k.p)) {
+               if (bkey_cmp(atomic_end, copy.k->k.p)) {
                        if (insert) {
                                move_pos = atomic_end;
                                move_pos.offset -= shift >> 9;
                                goto reassemble;
                        } else {
-                               bch2_cut_back(atomic_end, &copy.k.k);
+                               bch2_cut_back(atomic_end, &copy.k->k);
                        }
                }
 
                bkey_init(&delete.k);
                delete.k.p = src->pos;
-               bch2_key_resize(&delete.k, copy.k.k.size);
+               bch2_key_resize(&delete.k, copy.k->k.size);
 
                next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
 
@@ -2495,12 +2504,12 @@ reassemble:
                 * by the triggers machinery:
                 */
                if (insert &&
-                   bkey_cmp(bkey_start_pos(&copy.k.k), delete.k.p) < 0) {
-                       bch2_cut_back(bkey_start_pos(&copy.k.k), &delete.k);
+                   bkey_cmp(bkey_start_pos(&copy.k->k), delete.k.p) < 0) {
+                       bch2_cut_back(bkey_start_pos(&copy.k->k), &delete.k);
                } else if (!insert &&
-                          bkey_cmp(copy.k.k.p,
+                          bkey_cmp(copy.k->k.p,
                                    bkey_start_pos(&delete.k)) > 0) {
-                       bch2_cut_front(copy.k.k.p, &delete);
+                       bch2_cut_front(copy.k->k.p, &delete);
 
                        del = bch2_trans_copy_iter(&trans, src);
                        BUG_ON(IS_ERR_OR_NULL(del));
@@ -2509,10 +2518,10 @@ reassemble:
                                bkey_start_pos(&delete.k));
                }
 
-               bch2_trans_update(&trans, dst, &copy.k);
+               bch2_trans_update(&trans, dst, copy.k);
                bch2_trans_update(&trans, del ?: src, &delete);
 
-               if (copy.k.k.size == k.k->size) {
+               if (copy.k->k.size == k.k->size) {
                        /*
                         * If we're moving the entire extent, we can skip
                         * running triggers:
@@ -2521,10 +2530,10 @@ reassemble:
                } else {
                        /* We might end up splitting compressed extents: */
                        unsigned nr_ptrs =
-                               bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&copy.k));
+                               bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(copy.k));
 
                        ret = bch2_disk_reservation_get(c, &disk_res,
-                                       copy.k.k.size, nr_ptrs,
+                                       copy.k->k.size, nr_ptrs,
                                        BCH_DISK_RESERVATION_NOFAIL);
                        BUG_ON(ret);
                }
@@ -2559,6 +2568,7 @@ bkey_err:
        }
 err:
        bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&copy, c);
        bch2_pagecache_block_put(&inode->ei_pagecache_lock);
        inode_unlock(&inode->v);
        return ret;
index b241164f6f7e154f35cd04db8cd162a355f2e04e..e8cdae3c114b89066de1bca6cba8036a94df4be3 100644 (file)
@@ -3,6 +3,7 @@
 
 #include "bcachefs.h"
 #include "acl.h"
+#include "bkey_on_stack.h"
 #include "btree_update.h"
 #include "buckets.h"
 #include "chardev.h"
@@ -875,7 +876,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
        struct btree_trans trans;
        struct btree_iter *iter;
        struct bkey_s_c k;
-       BKEY_PADDED(k) cur, prev;
+       struct bkey_on_stack cur, prev;
        struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
        unsigned offset_into_extent, sectors;
        bool have_extent = false;
@@ -888,6 +889,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
        if (start + len < start)
                return -EINVAL;
 
+       bkey_on_stack_init(&cur);
+       bkey_on_stack_init(&prev);
        bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@@ -902,15 +905,17 @@ retry:
                        continue;
                }
 
-               bkey_reassemble(&cur.k, k);
-               k = bkey_i_to_s_c(&cur.k);
+               bkey_on_stack_realloc(&cur, c, k.k->u64s);
+               bkey_on_stack_realloc(&prev, c, k.k->u64s);
+               bkey_reassemble(cur.k, k);
+               k = bkey_i_to_s_c(cur.k);
 
                offset_into_extent      = iter->pos.offset -
                        bkey_start_offset(k.k);
                sectors                 = k.k->size - offset_into_extent;
 
                ret = bch2_read_indirect_extent(&trans,
-                                       &offset_into_extent, &cur.k);
+                                       &offset_into_extent, cur.k);
                if (ret)
                        break;
 
@@ -920,19 +925,19 @@ retry:
                        bch2_cut_front(POS(k.k->p.inode,
                                           bkey_start_offset(k.k) +
                                           offset_into_extent),
-                                      &cur.k);
-               bch2_key_resize(&cur.k.k, sectors);
-               cur.k.k.p = iter->pos;
-               cur.k.k.p.offset += cur.k.k.size;
+                                      cur.k);
+               bch2_key_resize(&cur.k->k, sectors);
+               cur.k->k.p = iter->pos;
+               cur.k->k.p.offset += cur.k->k.size;
 
                if (have_extent) {
                        ret = bch2_fill_extent(c, info,
-                                       bkey_i_to_s_c(&prev.k), 0);
+                                       bkey_i_to_s_c(prev.k), 0);
                        if (ret)
                                break;
                }
 
-               bkey_copy(&prev.k, &cur.k);
+               bkey_copy(prev.k, cur.k);
                have_extent = true;
 
                if (k.k->type == KEY_TYPE_reflink_v)
@@ -945,10 +950,12 @@ retry:
                goto retry;
 
        if (!ret && have_extent)
-               ret = bch2_fill_extent(c, info, bkey_i_to_s_c(&prev.k),
+               ret = bch2_fill_extent(c, info, bkey_i_to_s_c(prev.k),
                                       FIEMAP_EXTENT_LAST);
 
        ret = bch2_trans_exit(&trans) ?: ret;
+       bkey_on_stack_exit(&cur, c);
+       bkey_on_stack_exit(&prev, c);
        return ret < 0 ? ret : 0;
 }
 
index f53eee7accc8953405b047f05bd345f86d294b43..4fe61705ae75b54f65c73516474cb88a086d3813 100644 (file)
@@ -8,6 +8,7 @@
 
 #include "bcachefs.h"
 #include "alloc_foreground.h"
+#include "bkey_on_stack.h"
 #include "bset.h"
 #include "btree_update.h"
 #include "buckets.h"
@@ -394,12 +395,14 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end,
 int bch2_write_index_default(struct bch_write_op *op)
 {
        struct bch_fs *c = op->c;
+       struct bkey_on_stack sk;
        struct keylist *keys = &op->insert_keys;
        struct bkey_i *k = bch2_keylist_front(keys);
        struct btree_trans trans;
        struct btree_iter *iter;
        int ret;
 
+       bkey_on_stack_init(&sk);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@@ -407,13 +410,14 @@ int bch2_write_index_default(struct bch_write_op *op)
                                   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 
        do {
-               BKEY_PADDED(k) tmp;
+               k = bch2_keylist_front(keys);
 
-               bkey_copy(&tmp.k, bch2_keylist_front(keys));
+               bkey_on_stack_realloc(&sk, c, k->k.u64s);
+               bkey_copy(sk.k, k);
 
                bch2_trans_begin_updates(&trans);
 
-               ret = bch2_extent_update(&trans, iter, &tmp.k,
+               ret = bch2_extent_update(&trans, iter, sk.k,
                                         &op->res, op_journal_seq(op),
                                         op->new_i_size, &op->i_sectors_delta);
                if (ret == -EINTR)
@@ -421,13 +425,14 @@ int bch2_write_index_default(struct bch_write_op *op)
                if (ret)
                        break;
 
-               if (bkey_cmp(iter->pos, bch2_keylist_front(keys)->k.p) < 0)
-                       bch2_cut_front(iter->pos, bch2_keylist_front(keys));
+               if (bkey_cmp(iter->pos, k->k.p) < 0)
+                       bch2_cut_front(iter->pos, k);
                else
                        bch2_keylist_pop_front(keys);
        } while (!bch2_keylist_empty(keys));
 
        bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&sk, c);
 
        return ret;
 }
@@ -1463,13 +1468,14 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
 {
        struct btree_trans trans;
        struct btree_iter *iter;
-       BKEY_PADDED(k) tmp;
+       struct bkey_on_stack sk;
        struct bkey_s_c k;
        int ret;
 
        flags &= ~BCH_READ_LAST_FRAGMENT;
        flags |= BCH_READ_MUST_CLONE;
 
+       bkey_on_stack_init(&sk);
        bch2_trans_init(&trans, c, 0, 0);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@@ -1481,11 +1487,12 @@ retry:
        if (bkey_err(k))
                goto err;
 
-       bkey_reassemble(&tmp.k, k);
-       k = bkey_i_to_s_c(&tmp.k);
+       bkey_on_stack_realloc(&sk, c, k.k->u64s);
+       bkey_reassemble(sk.k, k);
+       k = bkey_i_to_s_c(sk.k);
        bch2_trans_unlock(&trans);
 
-       if (!bch2_bkey_matches_ptr(c, bkey_i_to_s_c(&tmp.k),
+       if (!bch2_bkey_matches_ptr(c, k,
                                   rbio->pick.ptr,
                                   rbio->pos.offset -
                                   rbio->pick.crc.offset)) {
@@ -1502,6 +1509,7 @@ retry:
 out:
        bch2_rbio_done(rbio);
        bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&sk, c);
        return;
 err:
        rbio->bio.bi_status = BLK_STS_IOERR;
@@ -1514,12 +1522,14 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
 {
        struct btree_trans trans;
        struct btree_iter *iter;
+       struct bkey_on_stack sk;
        struct bkey_s_c k;
        int ret;
 
        flags &= ~BCH_READ_LAST_FRAGMENT;
        flags |= BCH_READ_MUST_CLONE;
 
+       bkey_on_stack_init(&sk);
        bch2_trans_init(&trans, c, 0, 0);
 retry:
        bch2_trans_begin(&trans);
@@ -1527,18 +1537,18 @@ retry:
        for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS,
                           POS(inode, bvec_iter.bi_sector),
                           BTREE_ITER_SLOTS, k, ret) {
-               BKEY_PADDED(k) tmp;
                unsigned bytes, sectors, offset_into_extent;
 
-               bkey_reassemble(&tmp.k, k);
-               k = bkey_i_to_s_c(&tmp.k);
+               bkey_on_stack_realloc(&sk, c, k.k->u64s);
+               bkey_reassemble(sk.k, k);
+               k = bkey_i_to_s_c(sk.k);
 
                offset_into_extent = iter->pos.offset -
                        bkey_start_offset(k.k);
                sectors = k.k->size - offset_into_extent;
 
                ret = bch2_read_indirect_extent(&trans,
-                                       &offset_into_extent, &tmp.k);
+                                       &offset_into_extent, sk.k);
                if (ret)
                        break;
 
@@ -1577,6 +1587,7 @@ err:
        rbio->bio.bi_status = BLK_STS_IOERR;
 out:
        bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&sk, c);
        bch2_rbio_done(rbio);
 }
 
@@ -1633,7 +1644,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
        struct btree_trans trans;
        struct btree_iter *iter;
        struct bkey_s_c k;
-       BKEY_PADDED(k) new;
+       struct bkey_on_stack new;
        struct bch_extent_crc_unpacked new_crc;
        u64 data_offset = rbio->pos.offset - rbio->pick.crc.offset;
        int ret;
@@ -1641,6 +1652,7 @@ static void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio)
        if (rbio->pick.crc.compression_type)
                return;
 
+       bkey_on_stack_init(&new);
        bch2_trans_init(&trans, c, 0, 0);
 retry:
        bch2_trans_begin(&trans);
@@ -1651,8 +1663,9 @@ retry:
        if (IS_ERR_OR_NULL(k.k))
                goto out;
 
-       bkey_reassemble(&new.k, k);
-       k = bkey_i_to_s_c(&new.k);
+       bkey_on_stack_realloc(&new, c, k.k->u64s);
+       bkey_reassemble(new.k, k);
+       k = bkey_i_to_s_c(new.k);
 
        if (bversion_cmp(k.k->version, rbio->version) ||
            !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
@@ -1671,10 +1684,10 @@ retry:
                goto out;
        }
 
-       if (!bch2_bkey_narrow_crcs(&new.k, new_crc))
+       if (!bch2_bkey_narrow_crcs(new.k, new_crc))
                goto out;
 
-       bch2_trans_update(&trans, iter, &new.k);
+       bch2_trans_update(&trans, iter, new.k);
        ret = bch2_trans_commit(&trans, NULL, NULL,
                                BTREE_INSERT_ATOMIC|
                                BTREE_INSERT_NOFAIL|
@@ -1683,6 +1696,7 @@ retry:
                goto retry;
 out:
        bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&new, c);
 }
 
 /* Inner part that may run in process context */
@@ -2114,6 +2128,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
 {
        struct btree_trans trans;
        struct btree_iter *iter;
+       struct bkey_on_stack sk;
        struct bkey_s_c k;
        unsigned flags = BCH_READ_RETRY_IF_STALE|
                BCH_READ_MAY_PROMOTE|
@@ -2127,6 +2142,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
        rbio->c = c;
        rbio->start_time = local_clock();
 
+       bkey_on_stack_init(&sk);
        bch2_trans_init(&trans, c, 0, 0);
 retry:
        bch2_trans_begin(&trans);
@@ -2135,7 +2151,6 @@ retry:
                                   POS(inode, rbio->bio.bi_iter.bi_sector),
                                   BTREE_ITER_SLOTS);
        while (1) {
-               BKEY_PADDED(k) tmp;
                unsigned bytes, sectors, offset_into_extent;
 
                bch2_btree_iter_set_pos(iter,
@@ -2146,15 +2161,16 @@ retry:
                if (ret)
                        goto err;
 
-               bkey_reassemble(&tmp.k, k);
-               k = bkey_i_to_s_c(&tmp.k);
-
                offset_into_extent = iter->pos.offset -
                        bkey_start_offset(k.k);
                sectors = k.k->size - offset_into_extent;
 
+               bkey_on_stack_realloc(&sk, c, k.k->u64s);
+               bkey_reassemble(sk.k, k);
+               k = bkey_i_to_s_c(sk.k);
+
                ret = bch2_read_indirect_extent(&trans,
-                                       &offset_into_extent, &tmp.k);
+                                       &offset_into_extent, sk.k);
                if (ret)
                        goto err;
 
@@ -2186,6 +2202,7 @@ retry:
        }
 out:
        bch2_trans_exit(&trans);
+       bkey_on_stack_exit(&sk, c);
        return;
 err:
        if (ret == -EINTR)
index de8522f754e284acc986d115ac8f5f099b4d9bd5..4dacbd637d021b5121877f23311989d7b9d19b0c 100644 (file)
@@ -4,6 +4,7 @@
  */
 
 #include "bcachefs.h"
+#include "bkey_on_stack.h"
 #include "btree_update.h"
 #include "btree_update_interior.h"
 #include "buckets.h"
@@ -40,9 +41,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
        struct btree_trans trans;
        struct btree_iter *iter;
        struct bkey_s_c k;
-       BKEY_PADDED(key) tmp;
+       struct bkey_on_stack sk;
        int ret = 0;
 
+       bkey_on_stack_init(&sk);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
        iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
@@ -58,9 +60,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
                        continue;
                }
 
-               bkey_reassemble(&tmp.key, k);
+               bkey_on_stack_realloc(&sk, c, k.k->u64s);
+               bkey_reassemble(sk.k, k);
 
-               ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.key),
+               ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k),
                                    dev_idx, flags, false);
                if (ret)
                        break;
@@ -70,11 +73,11 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
                 * will do the appropriate thing with it (turning it into a
                 * KEY_TYPE_error key, or just a discard if it was a cached extent)
                 */
-               bch2_extent_normalize(c, bkey_i_to_s(&tmp.key));
+               bch2_extent_normalize(c, bkey_i_to_s(sk.k));
 
-               bch2_btree_iter_set_pos(iter, bkey_start_pos(&tmp.key.k));
+               bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
 
-               bch2_trans_update(&trans, iter, &tmp.key);
+               bch2_trans_update(&trans, iter, sk.k);
 
                ret = bch2_trans_commit(&trans, NULL, NULL,
                                        BTREE_INSERT_ATOMIC|
@@ -92,6 +95,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
        }
 
        ret = bch2_trans_exit(&trans) ?: ret;
+       bkey_on_stack_exit(&sk, c);
 
        BUG_ON(ret == -EINTR);
 
index c5d3375882d7c50f2daefc90125013cc124e6d7c..dbe35d16e7ddc9e8496699771f9d66aa86ec7d09 100644 (file)
@@ -2,6 +2,7 @@
 
 #include "bcachefs.h"
 #include "alloc_foreground.h"
+#include "bkey_on_stack.h"
 #include "btree_gc.h"
 #include "btree_update.h"
 #include "btree_update_interior.h"
@@ -489,7 +490,7 @@ static int __bch2_move_data(struct bch_fs *c,
 {
        bool kthread = (current->flags & PF_KTHREAD) != 0;
        struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
-       BKEY_PADDED(k) tmp;
+       struct bkey_on_stack sk;
        struct btree_trans trans;
        struct btree_iter *iter;
        struct bkey_s_c k;
@@ -498,6 +499,7 @@ static int __bch2_move_data(struct bch_fs *c,
        u64 delay, cur_inum = U64_MAX;
        int ret = 0, ret2;
 
+       bkey_on_stack_init(&sk);
        bch2_trans_init(&trans, c, 0, 0);
 
        stats->data_type = BCH_DATA_USER;
@@ -577,8 +579,9 @@ peek:
                }
 
                /* unlock before doing IO: */
-               bkey_reassemble(&tmp.k, k);
-               k = bkey_i_to_s_c(&tmp.k);
+               bkey_on_stack_realloc(&sk, c, k.k->u64s);
+               bkey_reassemble(sk.k, k);
+               k = bkey_i_to_s_c(sk.k);
                bch2_trans_unlock(&trans);
 
                ret2 = bch2_move_extent(c, ctxt, wp, io_opts, btree_id, k,
@@ -605,6 +608,7 @@ next_nondata:
        }
 out:
        ret = bch2_trans_exit(&trans) ?: ret;
+       bkey_on_stack_exit(&sk, c);
 
        return ret;
 }
index 6e71c5e8f9a20d078423ce615b169aca4419e8ef..6d21086c3254ef693074d0e8ed6bff57f3efde00 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
+#include "bkey_on_stack.h"
 #include "btree_update.h"
 #include "extents.h"
 #include "inode.h"
@@ -160,7 +161,8 @@ s64 bch2_remap_range(struct bch_fs *c,
        struct btree_trans trans;
        struct btree_iter *dst_iter, *src_iter;
        struct bkey_s_c src_k;
-       BKEY_PADDED(k) new_dst, new_src;
+       BKEY_PADDED(k) new_dst;
+       struct bkey_on_stack new_src;
        struct bpos dst_end = dst_start, src_end = src_start;
        struct bpos dst_want, src_want;
        u64 src_done, dst_done;
@@ -183,6 +185,7 @@ s64 bch2_remap_range(struct bch_fs *c,
        dst_end.offset += remap_sectors;
        src_end.offset += remap_sectors;
 
+       bkey_on_stack_init(&new_src);
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
 
        src_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start,
@@ -222,14 +225,15 @@ s64 bch2_remap_range(struct bch_fs *c,
                        break;
 
                if (src_k.k->type == KEY_TYPE_extent) {
-                       bkey_reassemble(&new_src.k, src_k);
-                       src_k = bkey_i_to_s_c(&new_src.k);
+                       bkey_on_stack_realloc(&new_src, c, src_k.k->u64s);
+                       bkey_reassemble(new_src.k, src_k);
+                       src_k = bkey_i_to_s_c(new_src.k);
 
-                       bch2_cut_front(src_iter->pos,   &new_src.k);
-                       bch2_cut_back(src_end,          &new_src.k.k);
+                       bch2_cut_front(src_iter->pos,   new_src.k);
+                       bch2_cut_back(src_end,          &new_src.k->k);
 
                        ret = bch2_make_extent_indirect(&trans, src_iter,
-                                               bkey_i_to_extent(&new_src.k));
+                                               bkey_i_to_extent(new_src.k));
                        if (ret)
                                goto btree_err;
 
@@ -299,6 +303,7 @@ err:
        } while (ret2 == -EINTR);
 
        ret = bch2_trans_exit(&trans) ?: ret;
+       bkey_on_stack_exit(&new_src, c);
 
        percpu_ref_put(&c->writes);
 
index 14e2f6828cc6f6b8a1899845fd59b935f68d78cc..8c7b56a95f4b4a041d15aad567ff87d3c9588c7a 100644 (file)
@@ -475,6 +475,7 @@ static void bch2_fs_free(struct bch_fs *c)
        free_percpu(c->usage[0]);
        kfree(c->usage_base);
        free_percpu(c->pcpu);
+       mempool_exit(&c->large_bkey_pool);
        mempool_exit(&c->btree_bounce_pool);
        bioset_exit(&c->btree_bio);
        mempool_exit(&c->btree_interior_update_pool);
@@ -729,6 +730,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
            !(c->online_reserved = alloc_percpu(u64)) ||
            mempool_init_kvpmalloc_pool(&c->btree_bounce_pool, 1,
                                        btree_bytes(c)) ||
+           mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
            bch2_io_clock_init(&c->io_clock[READ]) ||
            bch2_io_clock_init(&c->io_clock[WRITE]) ||
            bch2_fs_journal_init(&c->journal) ||