erofs: rename per-CPU buffers to global buffer pool and make it configurable
authorChunhai Guo <guochunhai@vivo.com>
Tue, 2 Apr 2024 10:00:36 +0000 (04:00 -0600)
committerGao Xiang <hsiangkao@linux.alibaba.com>
Wed, 8 May 2024 09:12:49 +0000 (17:12 +0800)
It will cost more time if compressed buffers are allocated on demand for
low-latency algorithms (like lz4) so EROFS uses per-CPU buffers to keep
compressed data if in-place decompression is unfulfilled.  While it is kind
of wasteful of memory for a device with hundreds of CPUs, and only a small
number of CPUs concurrently decompress most of the time.

This patch renames it as 'global buffer pool' and makes it configurable.
This allows two or more CPUs to share a common buffer to reduce memory
occupation.

Suggested-by: Gao Xiang <xiang@kernel.org>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Signed-off-by: Chunhai Guo <guochunhai@vivo.com>
Link: https://lore.kernel.org/r/20240402100036.2673604-1-guochunhai@vivo.com
Signed-off-by: Sandeep Dhavale <dhavale@google.com>
Link: https://lore.kernel.org/r/20240408215231.3376659-1-dhavale@google.com
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
fs/erofs/Makefile
fs/erofs/decompressor.c
fs/erofs/internal.h
fs/erofs/pcpubuf.c [deleted file]
fs/erofs/super.c
fs/erofs/zutil.c

index 845eafdcee4a1ed07a3001264d2d8a2caa2022d8..20d1ec42244354820fe50416690df62d8f3c332c 100644 (file)
@@ -3,7 +3,7 @@
 obj-$(CONFIG_EROFS_FS) += erofs.o
 erofs-objs := super.o inode.o data.o namei.o dir.o sysfs.o
 erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
-erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o pcpubuf.o zutil.o
+erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
 erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
 erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
 erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
index 2ec9b2bb628d6b03bdf454c3fc6457b4065aabc8..e1239d88698444d19ca6f782f4bcae69a327d900 100644 (file)
@@ -54,7 +54,7 @@ static int z_erofs_load_lz4_config(struct super_block *sb,
        sbi->lz4.max_distance_pages = distance ?
                                        DIV_ROUND_UP(distance, PAGE_SIZE) + 1 :
                                        LZ4_MAX_DISTANCE_PAGES;
-       return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks);
+       return z_erofs_gbuf_growsize(sbi->lz4.max_pclusterblks);
 }
 
 /*
@@ -159,7 +159,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
 docopy:
        /* Or copy compressed data which can be overlapped to per-CPU buffer */
        in = rq->in;
-       src = erofs_get_pcpubuf(ctx->inpages);
+       src = z_erofs_get_gbuf(ctx->inpages);
        if (!src) {
                DBG_BUGON(1);
                kunmap_local(inpage);
@@ -260,7 +260,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
        } else if (maptype == 1) {
                vm_unmap_ram(src, ctx->inpages);
        } else if (maptype == 2) {
-               erofs_put_pcpubuf(src);
+               z_erofs_put_gbuf(src);
        } else if (maptype != 3) {
                DBG_BUGON(1);
                return -EFAULT;
index d28ccfc0352b1ae4728fb25fdf8672bd0ee81789..ee080d042ab34e48c16a582c0c9fe4422ef4f823 100644 (file)
@@ -463,11 +463,11 @@ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
                                        struct erofs_workgroup *egrp);
 int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
                            int flags);
-void *erofs_get_pcpubuf(unsigned int requiredpages);
-void erofs_put_pcpubuf(void *ptr);
-int erofs_pcpubuf_growsize(unsigned int nrpages);
-void __init erofs_pcpubuf_init(void);
-void erofs_pcpubuf_exit(void);
+void *z_erofs_get_gbuf(unsigned int requiredpages);
+void z_erofs_put_gbuf(void *ptr);
+int z_erofs_gbuf_growsize(unsigned int nrpages);
+int __init z_erofs_gbuf_init(void);
+void z_erofs_gbuf_exit(void);
 int erofs_init_managed_cache(struct super_block *sb);
 int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb);
 #else
@@ -477,8 +477,8 @@ static inline int erofs_init_shrinker(void) { return 0; }
 static inline void erofs_exit_shrinker(void) {}
 static inline int z_erofs_init_zip_subsystem(void) { return 0; }
 static inline void z_erofs_exit_zip_subsystem(void) {}
-static inline void erofs_pcpubuf_init(void) {}
-static inline void erofs_pcpubuf_exit(void) {}
+static inline int z_erofs_gbuf_init(void) { return 0; }
+static inline void z_erofs_gbuf_exit(void) {}
 static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; }
 #endif /* !CONFIG_EROFS_FS_ZIP */
 
diff --git a/fs/erofs/pcpubuf.c b/fs/erofs/pcpubuf.c
deleted file mode 100644 (file)
index c7a4b1d..0000000
+++ /dev/null
@@ -1,148 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) Gao Xiang <xiang@kernel.org>
- *
- * For low-latency decompression algorithms (e.g. lz4), reserve consecutive
- * per-CPU virtual memory (in pages) in advance to store such inplace I/O
- * data if inplace decompression is failed (due to unmet inplace margin for
- * example).
- */
-#include "internal.h"
-
-struct erofs_pcpubuf {
-       raw_spinlock_t lock;
-       void *ptr;
-       struct page **pages;
-       unsigned int nrpages;
-};
-
-static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb);
-
-void *erofs_get_pcpubuf(unsigned int requiredpages)
-       __acquires(pcb->lock)
-{
-       struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb);
-
-       raw_spin_lock(&pcb->lock);
-       /* check if the per-CPU buffer is too small */
-       if (requiredpages > pcb->nrpages) {
-               raw_spin_unlock(&pcb->lock);
-               put_cpu_var(erofs_pcb);
-               /* (for sparse checker) pretend pcb->lock is still taken */
-               __acquire(pcb->lock);
-               return NULL;
-       }
-       return pcb->ptr;
-}
-
-void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock)
-{
-       struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id());
-
-       DBG_BUGON(pcb->ptr != ptr);
-       raw_spin_unlock(&pcb->lock);
-       put_cpu_var(erofs_pcb);
-}
-
-/* the next step: support per-CPU page buffers hotplug */
-int erofs_pcpubuf_growsize(unsigned int nrpages)
-{
-       static DEFINE_MUTEX(pcb_resize_mutex);
-       static unsigned int pcb_nrpages;
-       struct page *pagepool = NULL;
-       int delta, cpu, ret, i;
-
-       mutex_lock(&pcb_resize_mutex);
-       delta = nrpages - pcb_nrpages;
-       ret = 0;
-       /* avoid shrinking pcpubuf, since no idea how many fses rely on */
-       if (delta <= 0)
-               goto out;
-
-       for_each_possible_cpu(cpu) {
-               struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
-               struct page **pages, **oldpages;
-               void *ptr, *old_ptr;
-
-               pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL);
-               if (!pages) {
-                       ret = -ENOMEM;
-                       break;
-               }
-
-               for (i = 0; i < nrpages; ++i) {
-                       pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL);
-                       if (!pages[i]) {
-                               ret = -ENOMEM;
-                               oldpages = pages;
-                               goto free_pagearray;
-                       }
-               }
-               ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL);
-               if (!ptr) {
-                       ret = -ENOMEM;
-                       oldpages = pages;
-                       goto free_pagearray;
-               }
-               raw_spin_lock(&pcb->lock);
-               old_ptr = pcb->ptr;
-               pcb->ptr = ptr;
-               oldpages = pcb->pages;
-               pcb->pages = pages;
-               i = pcb->nrpages;
-               pcb->nrpages = nrpages;
-               raw_spin_unlock(&pcb->lock);
-
-               if (!oldpages) {
-                       DBG_BUGON(old_ptr);
-                       continue;
-               }
-
-               if (old_ptr)
-                       vunmap(old_ptr);
-free_pagearray:
-               while (i)
-                       erofs_pagepool_add(&pagepool, oldpages[--i]);
-               kfree(oldpages);
-               if (ret)
-                       break;
-       }
-       pcb_nrpages = nrpages;
-       erofs_release_pages(&pagepool);
-out:
-       mutex_unlock(&pcb_resize_mutex);
-       return ret;
-}
-
-void __init erofs_pcpubuf_init(void)
-{
-       int cpu;
-
-       for_each_possible_cpu(cpu) {
-               struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
-
-               raw_spin_lock_init(&pcb->lock);
-       }
-}
-
-void erofs_pcpubuf_exit(void)
-{
-       int cpu, i;
-
-       for_each_possible_cpu(cpu) {
-               struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu);
-
-               if (pcb->ptr) {
-                       vunmap(pcb->ptr);
-                       pcb->ptr = NULL;
-               }
-               if (!pcb->pages)
-                       continue;
-
-               for (i = 0; i < pcb->nrpages; ++i)
-                       if (pcb->pages[i])
-                               put_page(pcb->pages[i]);
-               kfree(pcb->pages);
-               pcb->pages = NULL;
-       }
-}
index 30b49b2eee53409a0b4b293a4ff579eceb2d5087..c1dae1fb949b2d3caa6a9b67fecde90bd761c3e1 100644 (file)
@@ -859,7 +859,10 @@ static int __init erofs_module_init(void)
        if (err)
                goto deflate_err;
 
-       erofs_pcpubuf_init();
+       err = z_erofs_gbuf_init();
+       if (err)
+               goto gbuf_err;
+
        err = z_erofs_init_zip_subsystem();
        if (err)
                goto zip_err;
@@ -879,6 +882,8 @@ fs_err:
 sysfs_err:
        z_erofs_exit_zip_subsystem();
 zip_err:
+       z_erofs_gbuf_exit();
+gbuf_err:
        z_erofs_deflate_exit();
 deflate_err:
        z_erofs_lzma_exit();
@@ -902,7 +907,7 @@ static void __exit erofs_module_exit(void)
        z_erofs_lzma_exit();
        erofs_exit_shrinker();
        kmem_cache_destroy(erofs_inode_cachep);
-       erofs_pcpubuf_exit();
+       z_erofs_gbuf_exit();
 }
 
 static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
index 8cd30ac2091fea6c90e9a413d8f3c34df9ea5c67..2fa90b10b985c076468cbc04e308448d5fae378d 100644 (file)
@@ -5,6 +5,18 @@
  */
 #include "internal.h"
 
+struct z_erofs_gbuf {
+       spinlock_t lock;
+       void *ptr;
+       struct page **pages;
+       unsigned int nrpages;
+};
+
+static struct z_erofs_gbuf *z_erofs_gbufpool;
+static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages;
+
+module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444);
+
 static atomic_long_t erofs_global_shrink_cnt;  /* for all mounted instances */
 /* protected by 'erofs_sb_list_lock' */
 static unsigned int shrinker_run_no;
@@ -14,6 +26,142 @@ static DEFINE_SPINLOCK(erofs_sb_list_lock);
 static LIST_HEAD(erofs_sb_list);
 static struct shrinker *erofs_shrinker_info;
 
+static unsigned int z_erofs_gbuf_id(void)
+{
+       return raw_smp_processor_id() % z_erofs_gbuf_count;
+}
+
+void *z_erofs_get_gbuf(unsigned int requiredpages)
+       __acquires(gbuf->lock)
+{
+       struct z_erofs_gbuf *gbuf;
+
+       gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
+       spin_lock(&gbuf->lock);
+       /* check if the buffer is too small */
+       if (requiredpages > gbuf->nrpages) {
+               spin_unlock(&gbuf->lock);
+               /* (for sparse checker) pretend gbuf->lock is still taken */
+               __acquire(gbuf->lock);
+               return NULL;
+       }
+       return gbuf->ptr;
+}
+
+void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock)
+{
+       struct z_erofs_gbuf *gbuf;
+
+       gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()];
+       DBG_BUGON(gbuf->ptr != ptr);
+       spin_unlock(&gbuf->lock);
+}
+
+int z_erofs_gbuf_growsize(unsigned int nrpages)
+{
+       static DEFINE_MUTEX(gbuf_resize_mutex);
+       struct page *pagepool = NULL;
+       int delta, ret, i, j;
+
+       mutex_lock(&gbuf_resize_mutex);
+       delta = nrpages - z_erofs_gbuf_nrpages;
+       ret = 0;
+       /* avoid shrinking gbufs, since no idea how many fses rely on */
+       if (delta <= 0)
+               goto out;
+
+       for (i = 0; i < z_erofs_gbuf_count; ++i) {
+               struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
+               struct page **pages, **tmp_pages;
+               void *ptr, *old_ptr = NULL;
+
+               ret = -ENOMEM;
+               tmp_pages = kcalloc(nrpages, sizeof(*tmp_pages), GFP_KERNEL);
+               if (!tmp_pages)
+                       break;
+               for (j = 0; j < nrpages; ++j) {
+                       tmp_pages[j] = erofs_allocpage(&pagepool, GFP_KERNEL);
+                       if (!tmp_pages[j])
+                               goto free_pagearray;
+               }
+               ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL);
+               if (!ptr)
+                       goto free_pagearray;
+
+               pages = tmp_pages;
+               spin_lock(&gbuf->lock);
+               old_ptr = gbuf->ptr;
+               gbuf->ptr = ptr;
+               tmp_pages = gbuf->pages;
+               gbuf->pages = pages;
+               j = gbuf->nrpages;
+               gbuf->nrpages = nrpages;
+               spin_unlock(&gbuf->lock);
+               ret = 0;
+               if (!tmp_pages) {
+                       DBG_BUGON(old_ptr);
+                       continue;
+               }
+
+               if (old_ptr)
+                       vunmap(old_ptr);
+free_pagearray:
+               while (j)
+                       erofs_pagepool_add(&pagepool, tmp_pages[--j]);
+               kfree(tmp_pages);
+               if (ret)
+                       break;
+       }
+       z_erofs_gbuf_nrpages = nrpages;
+       erofs_release_pages(&pagepool);
+out:
+       mutex_unlock(&gbuf_resize_mutex);
+       return ret;
+}
+
+int __init z_erofs_gbuf_init(void)
+{
+       unsigned int i = num_possible_cpus();
+
+       if (!z_erofs_gbuf_count)
+               z_erofs_gbuf_count = i;
+       else
+               z_erofs_gbuf_count = min(z_erofs_gbuf_count, i);
+
+       z_erofs_gbufpool = kcalloc(z_erofs_gbuf_count,
+                       sizeof(*z_erofs_gbufpool), GFP_KERNEL);
+       if (!z_erofs_gbufpool)
+               return -ENOMEM;
+
+       for (i = 0; i < z_erofs_gbuf_count; ++i)
+               spin_lock_init(&z_erofs_gbufpool[i].lock);
+       return 0;
+}
+
+void z_erofs_gbuf_exit(void)
+{
+       int i;
+
+       for (i = 0; i < z_erofs_gbuf_count; ++i) {
+               struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i];
+
+               if (gbuf->ptr) {
+                       vunmap(gbuf->ptr);
+                       gbuf->ptr = NULL;
+               }
+
+               if (!gbuf->pages)
+                       continue;
+
+               for (i = 0; i < gbuf->nrpages; ++i)
+                       if (gbuf->pages[i])
+                               put_page(gbuf->pages[i]);
+               kfree(gbuf->pages);
+               gbuf->pages = NULL;
+       }
+       kfree(z_erofs_gbufpool);
+}
+
 struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp)
 {
        struct page *page = *pagepool;