Reallocate fuse_session buffer transparently for extended max writes
authorJoanne Koong <joannelkoong@gmail.com>
Fri, 15 Nov 2024 04:55:48 +0000 (20:55 -0800)
committerBernd Schubert <bernd.schubert@fastmail.fm>
Thu, 21 Nov 2024 18:18:03 +0000 (19:18 +0100)
A previous PR supported extended max writes (eg write requests larger than 1 MB)
by initializing the fuse session buffer size to use the max_pages_limit set in
/proc/sys/fs/fuse. However, this is a huge problem for machines where multiple
fuse servers may be running but only one server needs large writes. In this case,
a lot of memory will be wasted and will lead to OOM issues.

This PR does a reallocation of the session buffer transparently if the server set
 "se->conn.max_write" to a value larger than 1 MiB. This is only for buffers that
are "owned" by libfuse - if the server wishes to provide its own allocated buffer
for receiving/processing requests, then it should ensure that buffer is allocated
to the proper size from the start.

Local testing showed:
echo 65535 | sudo tee /proc/sys/fs/fuse/max_pages_limit
dd if=/dev/urandom of=hello_file bs=6M count=2

write requests:
write request size is 5242880
write request size is 1048576
write request size is 5242880
write request size is 1048576

include/fuse_common.h
lib/fuse_i.h
lib/fuse_lowlevel.c

index 85f0b23044750b3333ce9bd60c3791663ce6b2b5..e6c3b5d4465a65839201e6db1af3e832346efbcf 100644 (file)
@@ -868,6 +868,14 @@ struct fuse_buf {
         * Used if FUSE_BUF_FD_SEEK flag is set.
         */
        off_t pos;
+
+       /**
+        * Size of memory pointer
+        *
+        * Used only if mem was internally allocated.
+        * Not used if mem was user-provided.
+        */
+       size_t mem_size;
 };
 
 /**
@@ -924,6 +932,7 @@ struct libfuse_version
                        /* .mem =   */ NULL,                    \
                        /* .fd =    */ -1,                      \
                        /* .pos =   */ 0,                       \
+                       /* .mem_size = */ 0,                    \
                } }                                             \
        } )
 
index f7924ebcac6c5d073df2c373f608e926b496fbe7..d4421f2db0d3646e6b09d91d61a579c7aa095cfd 100644 (file)
@@ -9,6 +9,8 @@
 #include "fuse.h"
 #include "fuse_lowlevel.h"
 
+#include <stdbool.h>
+
 #define MIN(a, b) \
 ({                                                                     \
        typeof(a) _a = (a);                                             \
@@ -77,6 +79,7 @@ struct fuse_session {
         * a later version, to 'fix' it at run time.
         */
        struct libfuse_version version;
+       bool buf_reallocable;
 };
 
 struct fuse_chan {
index 35dc3ca509e64160a8f7d9d7ec7bff8bcd1cc3ac..aa1de8726922980aeb98ccb77f3a76854f91cb40 100644 (file)
@@ -1990,6 +1990,7 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
        size_t outargsize = sizeof(outarg);
        uint64_t inargflags = 0;
        uint64_t outargflags = 0;
+       bool buf_reallocable = se->buf_reallocable;
        (void) nodeid;
        if (se->debug) {
                fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor);
@@ -2074,6 +2075,7 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
                        if (bufsize > max_bufsize) {
                                bufsize = max_bufsize;
                        }
+                       buf_reallocable = false;
                }
                if (inargflags & FUSE_DIRECT_IO_ALLOW_MMAP)
                        se->conn.capable |= FUSE_CAP_DIRECT_IO_ALLOW_MMAP;
@@ -2161,6 +2163,8 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
                bufsize = FUSE_MIN_READ_BUFFER;
        }
 
+       if (buf_reallocable)
+           bufsize = UINT_MAX;
        se->conn.max_write = MIN(se->conn.max_write, bufsize - FUSE_BUFFER_HEADER_SIZE);
        se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE;
 
@@ -2921,30 +2925,6 @@ static void fuse_ll_pipe_destructor(void *data)
        fuse_ll_pipe_free(llp);
 }
 
-static unsigned int get_max_pages(void)
-{
-       char buf[32];
-       long res;
-       int fd;
-       int err;
-
-       fd = open("/proc/sys/fs/fuse/max_pages_limit", O_RDONLY);
-       if (fd < 0)
-               return FUSE_DEFAULT_MAX_PAGES_LIMIT;
-
-       res = read(fd, buf, sizeof(buf) - 1);
-
-       close(fd);
-
-       if (res < 0)
-               return FUSE_DEFAULT_MAX_PAGES_LIMIT;
-
-       buf[res] = '\0';
-
-       err = libfuse_strtol(buf, &res);
-       return err ? FUSE_DEFAULT_MAX_PAGES_LIMIT : res;
-}
-
 int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf)
 {
        return fuse_session_receive_buf_int(se, buf, NULL);
@@ -2955,8 +2935,8 @@ int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
 {
        int err;
        ssize_t res;
-#ifdef HAVE_SPLICE
        size_t bufsize = se->bufsize;
+#ifdef HAVE_SPLICE
        struct fuse_ll_pipe *llp;
        struct fuse_buf tmpbuf;
 
@@ -3036,6 +3016,8 @@ int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
                                        "fuse: failed to allocate read buffer\n");
                                return -ENOMEM;
                        }
+                       buf->mem_size = se->bufsize;
+                       se->buf_reallocable = true;
                }
                buf->size = se->bufsize;
                buf->flags = 0;
@@ -3073,22 +3055,40 @@ fallback:
                                "fuse: failed to allocate read buffer\n");
                        return -ENOMEM;
                }
+               buf->mem_size = se->bufsize;
+               se->buf_reallocable = true;
        }
 
 restart:
+       if (se->buf_reallocable)
+           bufsize = buf->mem_size;
        if (se->io != NULL) {
                /* se->io->read is never NULL if se->io is not NULL as
                specified by fuse_session_custom_io()*/
-               res = se->io->read(ch ? ch->fd : se->fd, buf->mem, se->bufsize,
+               res = se->io->read(ch ? ch->fd : se->fd, buf->mem, bufsize,
                                         se->userdata);
        } else {
-               res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize);
+               res = read(ch ? ch->fd : se->fd, buf->mem, bufsize);
        }
        err = errno;
 
        if (fuse_session_exited(se))
                return 0;
        if (res == -1) {
+               if (err == EINVAL && se->buf_reallocable && se->bufsize > buf->mem_size)  {
+                   void *newbuf  = malloc(se->bufsize);
+                   if (!newbuf) {
+                       fuse_log(FUSE_LOG_ERR,
+                               "fuse: failed to (re)allocate read buffer\n");
+                       return -ENOMEM;
+                   }
+                   free(buf->mem);
+                   buf->mem = newbuf;
+                   buf->mem_size = se->bufsize;
+                   se->buf_reallocable = true;
+                   goto restart;
+               }
+
                /* ENOENT means the operation was interrupted, it's safe
                   to restart */
                if (err == ENOENT)
@@ -3144,7 +3144,8 @@ struct fuse_session *_fuse_session_new_317(struct fuse_args *args,
                goto out1;
        }
        se->fd = -1;
-       se->conn.max_write = UINT_MAX;
+       se->conn.max_write = FUSE_DEFAULT_MAX_PAGES_LIMIT * getpagesize();
+       se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE;
        se->conn.max_readahead = UINT_MAX;
 
        /* Parse options */
@@ -3180,9 +3181,6 @@ struct fuse_session *_fuse_session_new_317(struct fuse_args *args,
        if (se->debug)
                fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION);
 
-       se->bufsize = get_max_pages() * getpagesize() +
-               FUSE_BUFFER_HEADER_SIZE;
-
        list_init_req(&se->list);
        list_init_req(&se->interrupts);
        list_init_nreq(&se->notify_list);