Allow to have page aligned writes
authorBernd Schubert <bschubert@ddn.com>
Mon, 16 Dec 2024 22:37:29 +0000 (23:37 +0100)
committerBernd Schubert <bernd.schubert@fastmail.fm>
Wed, 18 Dec 2024 08:28:18 +0000 (09:28 +0100)
Read/writes IOs should be page aligned as fuse server
might need to copy data to another buffer otherwise in
order to fulfill network or device storage requirements.

Simple reproducer is example/passthrough*
and opening a file with O_DIRECT - without this change
writing to that file failed with -EINVAL if the underlying
file system was using ext4 (for passthrough_hp the
'passthrough' feature has to be disabled).

The mis-alignment from fuse kernel is not ideal, but we can handle
it by allocation one page more than needed and then using a buffer
that is set up to compensate for kernel misalignment.

This also only set se->buf_reallocable to true when called
by a libfuse internal caller - we do not know what
external callers are doing with the buffer - update to
commit 0e0f43b79b9b

ChangeLog.rst
lib/fuse.c
lib/fuse_i.h
lib/fuse_loop.c
lib/fuse_loop_mt.c
lib/fuse_lowlevel.c
lib/util.h

index 0ed7d782d48437fb9eea5c06821f9005b8c942ad..8fd8cd34ec206ce3acef8cef3b32a27617b3e8cb 100644 (file)
@@ -5,6 +5,7 @@ libfuse 3.17 (unreleased)
   New public function: fuse_set_fail_signal_handlers()
 * Allows fuse_log() messages to be send to syslog instead of stderr
   New public functions: fuse_log_enable_syslog() and fuse_log_close_syslog()
+* Handle buffer misalignment for FUSE_WRITE
 
 libfuse 3.16.2 (2023-10-10)
 ===========================
index b6407eb4c1f52f9ac7a1d7d46639a8b02b769fa4..3f08d7047ef9d6322959a12378d55a9e916ed043 100644 (file)
@@ -4550,14 +4550,13 @@ static int fuse_session_loop_remember(struct fuse *f)
                        else
                                break;
                } else if (res > 0) {
-                       res = fuse_session_receive_buf(se, &fbuf);
-
+                       res = fuse_session_receive_buf_int(se, &fbuf, NULL);
                        if (res == -EINTR)
                                continue;
                        if (res <= 0)
                                break;
 
-                       fuse_session_process_buf(se, &fbuf);
+                       fuse_session_process_buf_int(se, &fbuf, NULL);
                } else {
                        timeout = fuse_clean_cache(f);
                        curr_time(&now);
index d4421f2db0d3646e6b09d91d61a579c7aa095cfd..6713ab844164375d2e66a6de0dbf70c51b1cd5a7 100644 (file)
@@ -188,6 +188,8 @@ void cuse_lowlevel_init(fuse_req_t req, fuse_ino_t nodeide, const void *inarg);
 
 int fuse_start_thread(pthread_t *thread_id, void *(*func)(void *), void *arg);
 
+void fuse_buf_free(struct fuse_buf *buf);
+
 int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
                                 struct fuse_chan *ch);
 void fuse_session_process_buf_int(struct fuse_session *se,
index cad3a5ca81f9296303d530e4ba723a8f5dc15197..742d16907dee82f09520d8bfec969934b0d9dba2 100644 (file)
@@ -24,7 +24,7 @@ int fuse_session_loop(struct fuse_session *se)
        };
 
        while (!fuse_session_exited(se)) {
-               res = fuse_session_receive_buf(se, &fbuf);
+               res = fuse_session_receive_buf_int(se, &fbuf, NULL);
 
                if (res == -EINTR)
                        continue;
@@ -34,7 +34,7 @@ int fuse_session_loop(struct fuse_session *se)
                fuse_session_process_buf(se, &fbuf);
        }
 
-       free(fbuf.mem);
+       fuse_buf_free(&fbuf);
        if(res > 0)
                /* No error, just the length of the most recently read
                   request */
index 075ac2e02d7b945c189d33beda58237e9fc0178a..c66a5034c0e8b71e61682833324419bb7a850442 100644 (file)
@@ -194,7 +194,7 @@ static void *fuse_do_work(void *data)
                        pthread_mutex_unlock(&mt->lock);
 
                        pthread_detach(w->thread_id);
-                       free(w->fbuf.mem);
+                       fuse_buf_free(&w->fbuf);
                        fuse_chan_put(w->ch);
                        free(w);
                        return NULL;
@@ -350,7 +350,7 @@ static void fuse_join_worker(struct fuse_mt *mt, struct fuse_worker *w)
        pthread_mutex_lock(&mt->lock);
        list_del_worker(w);
        pthread_mutex_unlock(&mt->lock);
-       free(w->fbuf.mem);
+       fuse_buf_free(&w->fbuf);
        fuse_chan_put(w->ch);
        free(w);
 }
index b233e55b90c255f2c4cd1b308553a6b7740f709c..4b45f735ac224f739e93b226ffb8bd0821687142 100644 (file)
@@ -2945,13 +2945,52 @@ static void fuse_ll_pipe_destructor(void *data)
        fuse_ll_pipe_free(llp);
 }
 
-int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf)
+void fuse_buf_free(struct fuse_buf *buf)
 {
-       return fuse_session_receive_buf_int(se, buf, NULL);
+       if (buf->mem == NULL)
+               return;
+
+       size_t write_header_sz =
+               sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in);
+
+       char *ptr = (char *)buf->mem - pagesize + write_header_sz;
+       free(ptr);
+       buf->mem = NULL;
 }
 
-int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
-                                struct fuse_chan *ch)
+/*
+ * This is used to allocate buffers that hold fuse requests
+ */
+static void *buf_alloc(size_t size, bool internal)
+{
+       /*
+        * For libfuse internal caller add in alignment. That cannot be done
+        * for an external caller, as it is not guaranteed that the external
+        * caller frees the raw pointer.
+        */
+       if (internal) {
+               size_t write_header_sz = sizeof(struct fuse_in_header) +
+                                        sizeof(struct fuse_write_in);
+               size_t new_size = ROUND_UP(size + write_header_sz, pagesize);
+
+               char *buf = aligned_alloc(pagesize, new_size);
+               if (buf == NULL)
+                       return NULL;
+
+               buf += pagesize - write_header_sz;
+
+               return buf;
+       } else {
+               return malloc(size);
+       }
+}
+
+/*
+ *@param internal true if called from libfuse internal code
+ */
+static int _fuse_session_receive_buf(struct fuse_session *se,
+                                    struct fuse_buf *buf, struct fuse_chan *ch,
+                                    bool internal)
 {
        int err;
        ssize_t res;
@@ -2960,7 +2999,8 @@ int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
        struct fuse_ll_pipe *llp;
        struct fuse_buf tmpbuf;
 
-       if (se->conn.proto_minor < 14 || !(se->conn.want & FUSE_CAP_SPLICE_READ))
+       if (se->conn.proto_minor < 14 ||
+           !(se->conn.want & FUSE_CAP_SPLICE_READ))
                goto fallback;
 
        llp = fuse_ll_get_pipe(se);
@@ -2985,11 +3025,11 @@ int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
 
        if (se->io != NULL && se->io->splice_receive != NULL) {
                res = se->io->splice_receive(ch ? ch->fd : se->fd, NULL,
-                                                    llp->pipe[1], NULL, bufsize, 0,
-                                                    se->userdata);
+                                            llp->pipe[1], NULL, bufsize, 0,
+                                            se->userdata);
        } else {
                res = splice(ch ? ch->fd : se->fd, NULL, llp->pipe[1], NULL,
-                                bufsize, 0);
+                            bufsize, 0);
        }
        err = errno;
 
@@ -3013,7 +3053,7 @@ int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
                return -EIO;
        }
 
-       tmpbuf = (struct fuse_buf) {
+       tmpbuf = (struct fuse_buf){
                .size = res,
                .flags = FUSE_BUF_IS_FD,
                .fd = llp->pipe[0],
@@ -3024,20 +3064,22 @@ int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
         * fuse_loop_mt() needs to check for FORGET so this more than
         * just an optimization.
         */
-       if (res < sizeof(struct fuse_in_header) +
-           sizeof(struct fuse_write_in) + pagesize) {
+       if (res < sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in) +
+                         pagesize) {
                struct fuse_bufvec src = { .buf[0] = tmpbuf, .count = 1 };
                struct fuse_bufvec dst = { .count = 1 };
 
                if (!buf->mem) {
-                       buf->mem = malloc(se->bufsize);
+                       buf->mem = buf_alloc(se->bufsize, internal);
                        if (!buf->mem) {
-                               fuse_log(FUSE_LOG_ERR,
+                               fuse_log(
+                                       FUSE_LOG_ERR,
                                        "fuse: failed to allocate read buffer\n");
                                return -ENOMEM;
                        }
                        buf->mem_size = se->bufsize;
-                       se->buf_reallocable = true;
+                       if (internal)
+                               se->buf_reallocable = true;
                }
                buf->size = se->bufsize;
                buf->flags = 0;
@@ -3046,12 +3088,13 @@ int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
                res = fuse_buf_copy(&dst, &src, 0);
                if (res < 0) {
                        fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: %s\n",
-                               strerror(-res));
+                                strerror(-res));
                        fuse_ll_clear_pipe(se);
                        return res;
                }
                if (res < tmpbuf.size) {
-                       fuse_log(FUSE_LOG_ERR, "fuse: copy from pipe: short read\n");
+                       fuse_log(FUSE_LOG_ERR,
+                                "fuse: copy from pipe: short read\n");
                        fuse_ll_clear_pipe(se);
                        return -EIO;
                }
@@ -3069,24 +3112,25 @@ int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
 fallback:
 #endif
        if (!buf->mem) {
-               buf->mem = malloc(se->bufsize);
+               buf->mem = buf_alloc(se->bufsize, internal);
                if (!buf->mem) {
                        fuse_log(FUSE_LOG_ERR,
-                               "fuse: failed to allocate read buffer\n");
+                                "fuse: failed to allocate read buffer\n");
                        return -ENOMEM;
                }
                buf->mem_size = se->bufsize;
-               se->buf_reallocable = true;
+               if (internal)
+                       se->buf_reallocable = true;
        }
 
 restart:
        if (se->buf_reallocable)
-           bufsize = buf->mem_size;
+               bufsize = buf->mem_size;
        if (se->io != NULL) {
                /* se->io->read is never NULL if se->io is not NULL as
                specified by fuse_session_custom_io()*/
                res = se->io->read(ch ? ch->fd : se->fd, buf->mem, bufsize,
-                                        se->userdata);
+                                  se->userdata);
        } else {
                res = read(ch ? ch->fd : se->fd, buf->mem, bufsize);
        }
@@ -3095,18 +3139,20 @@ restart:
        if (fuse_session_exited(se))
                return 0;
        if (res == -1) {
-               if (err == EINVAL && se->buf_reallocable && se->bufsize > buf->mem_size)  {
-                   void *newbuf  = malloc(se->bufsize);
-                   if (!newbuf) {
-                       fuse_log(FUSE_LOG_ERR,
-                               "fuse: failed to (re)allocate read buffer\n");
-                       return -ENOMEM;
-                   }
-                   free(buf->mem);
-                   buf->mem = newbuf;
-                   buf->mem_size = se->bufsize;
-                   se->buf_reallocable = true;
-                   goto restart;
+               if (err == EINVAL && se->buf_reallocable &&
+                   se->bufsize > buf->mem_size) {
+                       void *newbuf = buf_alloc(se->bufsize, internal);
+                       if (!newbuf) {
+                               fuse_log(
+                                       FUSE_LOG_ERR,
+                                       "fuse: failed to (re)allocate read buffer\n");
+                               return -ENOMEM;
+                       }
+                       fuse_buf_free(buf);
+                       buf->mem = newbuf;
+                       buf->mem_size = se->bufsize;
+                       se->buf_reallocable = true;
+                       goto restart;
                }
 
                /* ENOENT means the operation was interrupted, it's safe
@@ -3127,7 +3173,7 @@ restart:
                        perror("fuse: reading device");
                return -err;
        }
-       if ((size_t) res < sizeof(struct fuse_in_header)) {
+       if ((size_t)res < sizeof(struct fuse_in_header)) {
                fuse_log(FUSE_LOG_ERR, "short read on fuse device\n");
                return -EIO;
        }
@@ -3137,6 +3183,18 @@ restart:
        return res;
 }
 
+int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf)
+{
+       return _fuse_session_receive_buf(se, buf, NULL, false);
+}
+
+/* libfuse internal handler */
+int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
+                                struct fuse_chan *ch)
+{
+       return _fuse_session_receive_buf(se, buf, ch, true);
+}
+
 FUSE_SYMVER("_fuse_session_new_317", "_fuse_session_new@@FUSE_3.17")
 struct fuse_session *_fuse_session_new_317(struct fuse_args *args,
                                          const struct fuse_lowlevel_ops *op,
index 0a01711d8ac92c94b980771481854b5c2054be0b..74ce74845854e022b2332bf3d2b8bcd1d212d0ce 100644 (file)
@@ -1 +1,3 @@
+#define ROUND_UP(val, round_to) (((val) + (round_to - 1)) & ~(round_to - 1))
+
 int libfuse_strtol(const char *str, long *res);