Add unprivileged option in `mount.fuse3`
authorMattias Nissler <mnissler@chromium.org>
Fri, 31 Aug 2018 07:44:04 +0000 (09:44 +0200)
committerNikolaus Rath <Nikolaus@rath.org>
Tue, 9 Oct 2018 19:36:22 +0000 (20:36 +0100)
The unprivileged option allows to run the FUSE file system process
without privileges by dropping capabilities and preventing them from
being re-acquired via setuid / fscaps etc. To accomplish this,
mount.fuse sets up the `/dev/fuse` file descriptor and mount itself
and passes the file descriptor via the `/dev/fd/%u` mountpoint syntax
to the FUSE file system.

ChangeLog.rst
doc/mount.fuse3.8
include/fuse.h
lib/fuse_versionscript
lib/helper.c
lib/meson.build
test/test_examples.py
test/travis-build.sh
test/util.py
util/meson.build
util/mount.fuse.c

index 65f57d79d64d5eea03cf17a44bee9c6f2bcfc81b..24b4d9c9d456e926945e19392024da4af8c40f52 100644 (file)
@@ -12,6 +12,11 @@ Unreleased Changes
   special format `/dev/fd/%u`. This allows mounting to be handled by the parent
   so the FUSE filesystem process can run fully unprivileged.
 
+* Add a `drop_privileges` option to mount.fuse3 which causes it to open
+  `/dev/fuse` and mount the file system itself, then run the FUSE file
+  filesystem fully unprivileged and unable to re-acquire privilege via setuid,
+  fscaps, etc.
+
 libfuse 3.2.6 (2018-08-31)
 ==========================
 
index 8020c46cd98d2ea2801889c37f3f584e07f1d31e..e291703ea509153f0d3aceed3ce0bdc2597d6cac 100644 (file)
@@ -199,6 +199,16 @@ inode numbers.
 .TP
 \fBmodules=M1[:M2...]\fP
 Add modules to the filesystem stack.  Modules are pushed in the order they are specified, with the original filesystem being on the bottom of the stack.
+
+.SS "\fBmount.fuse3\fP options:"
+These options are interpreted by \fBmount.fuse3\fP and are thus only available when mounting a file system via \fBmount.fuse3\fP (such as when mounting via the generic \fBmount\fP(1) command or \fI/etc/fstab\fP). Supported options are:
+.TP
+\fBsetuid=USER\fP
+Switch to \fBUSER\fP and its primary group before launching the FUSE file system process. mount.fuse3 must be run as root or with \fBCAP_SETUID\fP and \fBCAP_SETGID\fP for this to work.
+.TP
+\fBdrop_privileges\fP
+Perform setup of the FUSE file descriptor and mounting the file system before launching the FUSE file system process. \fBmount.fuse3\fP requires privilege to do so, i.e. must be run as root or at least with \fBCAP_SYS_ADMIN\fP and \fBCAP_SETPCAP\fP. It will launch the file system process fully unprivileged, i.e. without \fBcapabilities\fP(7) and \fBprctl\fP(2) flags set up such that privileges can't be reacquired (e.g. via setuid or fscaps binaries). This reduces risk in the event of the FUSE file system process getting compromised by malicious file system data.
+
 .SH FUSE MODULES (STACKING)
 Modules are filesystem stacking support to high level API. Filesystem modules can be built into libfuse or loaded from shared object
 .SS "iconv"
index 7b63c4211be94dfdb8ce3b453a82f86dbe1eaa56..24e04bc69bd2e98cfd813570394bf5d3dcafb95b 100644 (file)
@@ -1219,6 +1219,16 @@ typedef struct fuse_fs *(*fuse_module_factory_t)(struct fuse_args *args,
 /** Get session from fuse object */
 struct fuse_session *fuse_get_session(struct fuse *f);
 
+/**
+ * Open a FUSE file descriptor and set up the mount for the given
+ * mountpoint and flags.
+ *
+ * @param mountpoint reference to the mount in the file system
+ * @param options mount options
+ * @return the FUSE file descriptor or -1 upon error
+ */
+int fuse_open_channel(const char *mountpoint, const char *options);
+
 #ifdef __cplusplus
 }
 #endif
index e52dd86b936922f3f8b7f7b3759cb906bd76146f..2802bb4026090452a1db9bb1306fb39518d41629 100644 (file)
@@ -148,6 +148,11 @@ FUSE_3.2 {
                fuse_loop_mt_31;
 } FUSE_3.1;
 
+FUSE_3.3 {
+       global:
+               fuse_open_channel;
+} FUSE_3.2;
+
 # Local Variables:
 # indent-tabs-mode: t
 # End:
index e1de362ee87eda09a8d4950836847f25fee2e011..5b80c6ea11d97609f4c878e904142bdbd8eed0fb 100644 (file)
@@ -420,3 +420,21 @@ struct fuse_conn_info_opts* fuse_parse_conn_info_opts(struct fuse_args *args)
        }
        return opts;
 }
+
+int fuse_open_channel(const char *mountpoint, const char* options)
+{
+       struct mount_opts *opts = NULL;
+       int fd = -1;
+       const char *argv[] = { "", "-o", options };
+       int argc = sizeof(argv) / sizeof(argv[0]);
+       struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv);
+
+       opts = parse_mount_opts(&args);
+       if (opts == NULL)
+               return -1;
+
+       fd = fuse_kern_mount(mountpoint, opts);
+       destroy_mount_opts(opts);
+
+       return fd;
+}
index 5dd84508200446411429878ca5931deb9c50569c..492abf7d49832cf47565e13eae0f1bc0817887e3 100644 (file)
@@ -32,7 +32,7 @@ libfuse = library('fuse3', libfuse_sources, version: meson.project_version(),
                   soversion: '3', include_directories: include_dirs,
                   dependencies: deps, install: true,
                   link_depends: 'fuse_versionscript',
-                  c_args: [ '-DFUSE_USE_VERSION=32',
+                  c_args: [ '-DFUSE_USE_VERSION=33',
                             '-DFUSERMOUNT_DIR="@0@"'.format(fusermount_path) ],
                   link_args: ['-Wl,--version-script,' + meson.current_source_dir()
                               + '/fuse_versionscript' ])
index 12fe6d7accdd41e8b12d49096aaad9fc0b825387..0224bac41b31e5fe8d1cd5e74612179b58f72848 100755 (executable)
@@ -19,7 +19,7 @@ from tempfile import NamedTemporaryFile
 from contextlib import contextmanager
 from util import (wait_for_mount, umount, cleanup, base_cmdline,
                   safe_sleep, basename, fuse_test_marker, test_printcap,
-                  fuse_proto)
+                  fuse_proto, powerset)
 from os.path import join as pjoin
 
 pytestmark = fuse_test_marker()
@@ -33,20 +33,36 @@ def name_generator(__ctr=[0]):
     __ctr[0] += 1
     return 'testfile_%d' % __ctr[0]
 
-options = [ [] ]
+options = []
 if sys.platform == 'linux':
-    options.append(['-o', 'clone_fd'])
-@pytest.mark.parametrize("options", options)
-@pytest.mark.parametrize("name", ('hello', 'hello_ll'))
-def test_hello(tmpdir, name, options):
-    mnt_dir = str(tmpdir)
-    cmdline = base_cmdline + \
-              [ pjoin(basename, 'example', name),
-                '-f', mnt_dir ] + options
+    options.append('clone_fd')
+
+def invoke_directly(mnt_dir, name, options):
+    cmdline = base_cmdline + [ pjoin(basename, 'example', name),
+                               '-f', mnt_dir, '-o', ','.join(options) ]
     if name == 'hello_ll':
         # supports single-threading only
         cmdline.append('-s')
-    mount_process = subprocess.Popen(cmdline)
+
+    return cmdline
+
+def invoke_mount_fuse(mnt_dir, name, options):
+    return base_cmdline + [ pjoin(basename, 'util', 'mount.fuse3'),
+                            name, mnt_dir, '-o', ','.join(options) ]
+
+def invoke_mount_fuse_drop_privileges(mnt_dir, name, options):
+    if os.getuid() != 0:
+        pytest.skip('drop_privileges requires root, skipping.')
+
+    return invoke_mount_fuse(mnt_dir, name, options + ('drop_privileges',))
+
+@pytest.mark.parametrize("cmdline_builder", (invoke_directly, invoke_mount_fuse,
+                                             invoke_mount_fuse_drop_privileges))
+@pytest.mark.parametrize("options", powerset(options))
+@pytest.mark.parametrize("name", ('hello', 'hello_ll'))
+def test_hello(tmpdir, name, options, cmdline_builder):
+    mnt_dir = str(tmpdir)
+    mount_process = subprocess.Popen(cmdline_builder(mnt_dir, name, options))
     try:
         wait_for_mount(mount_process, mnt_dir)
         assert os.listdir(mnt_dir) == [ 'hello' ]
index dae1a10021caddb6e0edd8e44914bbf7126dde77..6232e747df9494f906e6006475376c10987600a3 100755 (executable)
@@ -11,6 +11,19 @@ export CC
 
 TEST_CMD="python3 -m pytest --maxfail=99 test/"
 
+# Make sure binaries can be accessed when invoked by root.
+umask 0022
+
+# There are tests that run as root but without CAP_DAC_OVERRIDE. To allow these
+# to launch built binaries, the directory tree must be accessible to the root
+# user. Since the source directory isn't necessarily accessible to root, we
+# build and run tests in a temporary directory that we can set up to be world
+# readable/executable.
+SOURCE_DIR="$(readlink -f .)"
+TEST_DIR="$(mktemp -dt libfuse-build-XXXXXX)"
+chmod 0755 "${TEST_DIR}"
+cd "${TEST_DIR}"
+
 # Standard build
 for CC in gcc gcc-6 clang; do
     mkdir build-${CC}; cd build-${CC}
@@ -19,7 +32,7 @@ for CC in gcc gcc-6 clang; do
     else
         build_opts=''
     fi
-    meson -D werror=true ${build_opts} ../
+    meson -D werror=true ${build_opts} "${SOURCE_DIR}"
     ninja
 
     sudo chown root:root util/fusermount3
@@ -35,7 +48,7 @@ for san in undefined address; do
     mkdir build-${san}; cd build-${san}
     # b_lundef=false is required to work around clang
     # bug, cf. https://groups.google.com/forum/#!topic/mesonbuild/tgEdAXIIdC4
-    meson -D b_sanitize=${san} -D b_lundef=false -D werror=true ..
+    meson -D b_sanitize=${san} -D b_lundef=false -D werror=true "${SOURCE_DIR}"
     ninja
 
     # Test as root and regular user
@@ -43,12 +56,14 @@ for san in undefined address; do
     sudo chown root:root util/fusermount3
     sudo chmod 4755 util/fusermount3
     # Cleanup temporary files (since they're now owned by root)
-    sudo rm -rf test/.pytest_cache/
+    sudo rm -rf test/.pytest_cache/ test/__pycache__
 
     ${TEST_CMD}
     cd ..
 done
 
-# Documentation
-doxygen doc/Doxyfile
+# Documentation.
+(cd "${SOURCE_DIR}"; doxygen doc/Doxyfile)
 
+# Clean up.
+rm -rf "${TEST_DIR}"
index b9c1b0cb179120b252e54934bf569c9d5dd8660b..ba02b9fdb27b6415ddb55420486caa23c3d82c0d 100644 (file)
@@ -7,6 +7,7 @@ import time
 from os.path import join as pjoin
 import sys
 import re
+import itertools
 
 basename = pjoin(os.path.dirname(__file__), '..')
 
@@ -138,6 +139,12 @@ def fuse_test_marker():
 
     return pytest.mark.uses_fuse()
 
+def powerset(iterable):
+  s = list(iterable)
+  return itertools.chain.from_iterable(
+      itertools.combinations(s, r) for r in range(len(s)+1))
+
+
 # Use valgrind if requested
 if os.environ.get('TEST_WITH_VALGRIND', 'no').lower().strip() \
    not in ('no', 'false', '0'):
@@ -147,6 +154,8 @@ else:
 
 # Try to use local fusermount3
 os.environ['PATH'] = '%s:%s' % (pjoin(basename, 'util'), os.environ['PATH'])
+# Put example binaries on PATH
+os.environ['PATH'] = '%s:%s' % (pjoin(basename, 'example'), os.environ['PATH'])
 
 try:
     (fuse_proto, fuse_caps) = test_printcap()
index 674fd779794495b5c8a37168bc0e932360184913..e8105bd8251f2e20c6e57942475800893501dee5 100644 (file)
@@ -6,10 +6,12 @@ executable('fusermount3', ['fusermount.c', '../lib/mount_util.c'],
            install_dir: get_option('bindir'),
            c_args: '-DFUSE_CONF="@0@"'.format(fuseconf_path))
 
-executable('mount.fuse3', ['mount.fuse.c'], 
+executable('mount.fuse3', ['mount.fuse.c'],
            include_directories: include_dirs,
+           link_with: [ libfuse ],
            install: true,
-           install_dir: get_option('sbindir'))
+           install_dir: get_option('sbindir'),
+           c_args: '-DFUSE_USE_VERSION=33')
 
 
 udevrulesdir = get_option('udevrulesdir')
index 169fe537f1f167a55258bb0d1c74c207131a1bb6..84e680b89fcdbfc853f490ce9377e901983cdf8d 100644 (file)
 #include <unistd.h>
 #include <errno.h>
 #include <stdint.h>
+#include <fcntl.h>
+#include <pwd.h>
+#include <sys/wait.h>
+
+#ifdef linux
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <linux/capability.h>
+#include <linux/securebits.h>
+#endif
+
+#include "fuse.h"
 
 static char *progname;
 
@@ -80,6 +92,124 @@ static char *add_option(const char *opt, char *options)
        return options;
 }
 
+static int prepare_fuse_fd(const char *mountpoint, const char* subtype,
+                          const char *options)
+{
+       int fuse_fd = -1;
+       int flags = -1;
+       int subtype_len = strlen(subtype) + 9;
+       char* options_copy = xrealloc(NULL, subtype_len);
+
+       snprintf(options_copy, subtype_len, "subtype=%s", subtype);
+       options_copy = add_option(options, options_copy);
+       fuse_fd = fuse_open_channel(mountpoint, options_copy);
+       if (fuse_fd == -1) {
+               exit(1);
+       }
+
+       flags = fcntl(fuse_fd, F_GETFD);
+       if (flags == -1 || fcntl(fuse_fd, F_SETFD, flags & ~FD_CLOEXEC) == 1) {
+               fprintf(stderr, "%s: Failed to clear CLOEXEC: %s\n",
+                       progname, strerror(errno));
+               exit(1);
+       }
+
+       return fuse_fd;
+}
+
+#ifdef linux
+static uint64_t get_capabilities(void)
+{
+       /*
+        * This invokes the capset syscall directly to avoid the libcap
+        * dependency, which isn't really justified just for this.
+        */
+       struct __user_cap_header_struct header = {
+               .version = _LINUX_CAPABILITY_VERSION_3,
+               .pid = 0,
+       };
+       struct __user_cap_data_struct data[2];
+       memset(data, 0, sizeof(data));
+       if (syscall(SYS_capget, &header, data) == -1) {
+               fprintf(stderr, "%s: Failed to get capabilities: %s\n",
+                       progname, strerror(errno));
+               exit(1);
+       }
+
+       return data[0].effective | ((uint64_t) data[1].effective << 32);
+}
+
+static void set_capabilities(uint64_t caps)
+{
+       /*
+        * This invokes the capset syscall directly to avoid the libcap
+        * dependency, which isn't really justified just for this.
+        */
+       struct __user_cap_header_struct header = {
+               .version = _LINUX_CAPABILITY_VERSION_3,
+               .pid = 0,
+       };
+       struct __user_cap_data_struct data[2];
+       memset(data, 0, sizeof(data));
+       data[0].effective = data[0].permitted = caps;
+       data[1].effective = data[1].permitted = caps >> 32;
+       if (syscall(SYS_capset, &header, data) == -1) {
+               fprintf(stderr, "%s: Failed to set capabilities: %s\n",
+                       progname, strerror(errno));
+               exit(1);
+       }
+}
+
+static void drop_and_lock_capabilities(void)
+{
+       /* Set and lock securebits. */
+       if (prctl(PR_SET_SECUREBITS,
+                 SECBIT_KEEP_CAPS_LOCKED |
+                 SECBIT_NO_SETUID_FIXUP |
+                 SECBIT_NO_SETUID_FIXUP_LOCKED |
+                 SECBIT_NOROOT |
+                 SECBIT_NOROOT_LOCKED) == -1) {
+               fprintf(stderr, "%s: Failed to set securebits %s\n",
+                       progname, strerror(errno));
+               exit(1);
+       }
+
+       /* Clear the capability bounding set. */
+       int cap;
+       for (cap = 0; ; cap++) {
+               int cap_status = prctl(PR_CAPBSET_READ, cap);
+               if (cap_status == 0) {
+                       continue;
+               }
+               if (cap_status == -1 && errno == EINVAL) {
+                       break;
+               }
+
+               if (cap_status != 1) {
+                       fprintf(stderr,
+                               "%s: Failed to get capability %u: %s\n",
+                               progname, cap, strerror(errno));
+                       exit(1);
+               }
+               if (prctl(PR_CAPBSET_DROP, cap) == -1) {
+                       fprintf(stderr,
+                               "%s: Failed to drop capability %u: %s\n",
+                               progname, cap, strerror(errno));
+               }
+       }
+
+       /* Drop capabilities. */
+       set_capabilities(0);
+
+       /* Prevent re-acquisition of privileges. */
+       if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) {
+               fprintf(stderr, "%s: Failed to set no_new_privs: %s\n",
+                       progname, strerror(errno));
+               exit(1);
+       }
+}
+#endif
+
 int main(int argc, char *argv[])
 {
        char *type = NULL;
@@ -88,10 +218,12 @@ int main(int argc, char *argv[])
        char *basename;
        char *options = NULL;
        char *command = NULL;
-       char *setuid = NULL;
+       char *setuid_name = NULL;
        int i;
        int dev = 1;
        int suid = 1;
+       int pass_fuse_fd = 0;
+       int drop_privileges = 0;
 
        progname = argv[0];
        basename = strrchr(argv[0], '/');
@@ -167,7 +299,12 @@ int main(int argc, char *argv[])
                                                              "_netdev",
                                                              NULL};
                                if (strncmp(opt, "setuid=", 7) == 0) {
-                                       setuid = xstrdup(opt + 7);
+                                       setuid_name = xstrdup(opt + 7);
+                                       ignore = 1;
+                               } else if (strcmp(opt,
+                                                 "drop_privileges") == 0) {
+                                       pass_fuse_fd = 1;
+                                       drop_privileges = 1;
                                        ignore = 1;
                                }
                                for (j = 0; ignore_opts[j]; j++)
@@ -187,6 +324,16 @@ int main(int argc, char *argv[])
                }
        }
 
+       if (drop_privileges) {
+               uint64_t required_caps = CAP_TO_MASK(CAP_SETPCAP) |
+                               CAP_TO_MASK(CAP_SYS_ADMIN);
+               if ((get_capabilities() & required_caps) != required_caps) {
+                       fprintf(stderr, "%s: drop_privileges was requested, which launches the FUSE file system fully unprivileged. In order to do so %s must be run with privileges, please invoke with CAP_SYS_ADMIN and CAP_SETPCAP (e.g. as root).\n",
+                       progname, progname);
+                       exit(1);
+               }
+       }
+
        if (dev)
                options = add_option("dev", options);
        if (suid)
@@ -209,6 +356,51 @@ int main(int argc, char *argv[])
                }
        }
 
+       if (setuid_name && setuid_name[0]) {
+#ifdef linux
+               if (drop_privileges) {
+                       /*
+                        * Make securebits more permissive before calling
+                        * setuid(). Specifically, if SECBIT_KEEP_CAPS and
+                        * SECBIT_NO_SETUID_FIXUP weren't set, setuid() would
+                        * have the side effect of dropping all capabilities,
+                        * and we need to retain CAP_SETPCAP in order to drop
+                        * all privileges before exec().
+                        */
+                       if (prctl(PR_SET_SECUREBITS,
+                                 SECBIT_KEEP_CAPS |
+                                 SECBIT_NO_SETUID_FIXUP) == -1) {
+                               fprintf(stderr,
+                                       "%s: Failed to set securebits %s\n",
+                                       progname, strerror(errno));
+                               exit(1);
+                       }
+               }
+#endif
+
+               struct passwd *pwd = getpwnam(setuid_name);
+               if (setgid(pwd->pw_gid) == -1 || setuid(pwd->pw_uid) == -1) {
+                       fprintf(stderr, "%s: Failed to setuid to %s: %s\n",
+                               progname, setuid_name, strerror(errno));
+                       exit(1);
+               }
+       } else if (!getenv("HOME")) {
+               /* Hack to make filesystems work in the boot environment */
+               setenv("HOME", "/root", 0);
+       }
+
+       if (pass_fuse_fd)  {
+               int fuse_fd = prepare_fuse_fd(mountpoint, type, options);
+               char *dev_fd_mountpoint = xrealloc(NULL, 20);
+               snprintf(dev_fd_mountpoint, 20, "/dev/fd/%u", fuse_fd);
+               mountpoint = dev_fd_mountpoint;
+       }
+
+#ifdef linux
+       if (drop_privileges) {
+               drop_and_lock_capabilities();
+       }
+#endif
        add_arg(&command, type);
        if (source)
                add_arg(&command, source);
@@ -218,19 +410,6 @@ int main(int argc, char *argv[])
                add_arg(&command, options);
        }
 
-       if (setuid && setuid[0]) {
-               char *sucommand = command;
-               command = NULL;
-               add_arg(&command, "su");
-               add_arg(&command, "-");
-               add_arg(&command, setuid);
-               add_arg(&command, "-c");
-               add_arg(&command, sucommand);
-       } else if (!getenv("HOME")) {
-               /* Hack to make filesystems work in the boot environment */
-               setenv("HOME", "/root", 0);
-       }
-
        execl("/bin/sh", "/bin/sh", "-c", command, NULL);
        fprintf(stderr, "%s: failed to execute /bin/sh: %s\n", progname,
                strerror(errno));