special format `/dev/fd/%u`. This allows mounting to be handled by the parent
   so the FUSE filesystem process can run fully unprivileged.
 
+* Add a `drop_privileges` option to mount.fuse3 which causes it to open
+  `/dev/fuse` and mount the file system itself, then run the FUSE file
+  filesystem fully unprivileged and unable to re-acquire privilege via setuid,
+  fscaps, etc.
+
 libfuse 3.2.6 (2018-08-31)
 ==========================
 
 
 .TP
 \fBmodules=M1[:M2...]\fP
 Add modules to the filesystem stack.  Modules are pushed in the order they are specified, with the original filesystem being on the bottom of the stack.
+
+.SS "\fBmount.fuse3\fP options:"
+These options are interpreted by \fBmount.fuse3\fP and are thus only available when mounting a file system via \fBmount.fuse3\fP (such as when mounting via the generic \fBmount\fP(1) command or \fI/etc/fstab\fP). Supported options are:
+.TP
+\fBsetuid=USER\fP
+Switch to \fBUSER\fP and its primary group before launching the FUSE file system process. mount.fuse3 must be run as root or with \fBCAP_SETUID\fP and \fBCAP_SETGID\fP for this to work.
+.TP
+\fBdrop_privileges\fP
+Perform setup of the FUSE file descriptor and mounting the file system before launching the FUSE file system process. \fBmount.fuse3\fP requires privilege to do so, i.e. must be run as root or at least with \fBCAP_SYS_ADMIN\fP and \fBCAP_SETPCAP\fP. It will launch the file system process fully unprivileged, i.e. without \fBcapabilities\fP(7) and \fBprctl\fP(2) flags set up such that privileges can't be reacquired (e.g. via setuid or fscaps binaries). This reduces risk in the event of the FUSE file system process getting compromised by malicious file system data.
+
 .SH FUSE MODULES (STACKING)
 Modules are filesystem stacking support to high level API. Filesystem modules can be built into libfuse or loaded from shared object
 .SS "iconv"
 
 /** Get session from fuse object */
 struct fuse_session *fuse_get_session(struct fuse *f);
 
+/**
+ * Open a FUSE file descriptor and set up the mount for the given
+ * mountpoint and flags.
+ *
+ * @param mountpoint reference to the mount in the file system
+ * @param options mount options
+ * @return the FUSE file descriptor or -1 upon error
+ */
+int fuse_open_channel(const char *mountpoint, const char *options);
+
 #ifdef __cplusplus
 }
 #endif
 
                fuse_loop_mt_31;
 } FUSE_3.1;
 
+FUSE_3.3 {
+       global:
+               fuse_open_channel;
+} FUSE_3.2;
+
 # Local Variables:
 # indent-tabs-mode: t
 # End:
 
        }
        return opts;
 }
+
+int fuse_open_channel(const char *mountpoint, const char* options)
+{
+       struct mount_opts *opts = NULL;
+       int fd = -1;
+       const char *argv[] = { "", "-o", options };
+       int argc = sizeof(argv) / sizeof(argv[0]);
+       struct fuse_args args = FUSE_ARGS_INIT(argc, (char**) argv);
+
+       opts = parse_mount_opts(&args);
+       if (opts == NULL)
+               return -1;
+
+       fd = fuse_kern_mount(mountpoint, opts);
+       destroy_mount_opts(opts);
+
+       return fd;
+}
 
                   soversion: '3', include_directories: include_dirs,
                   dependencies: deps, install: true,
                   link_depends: 'fuse_versionscript',
-                  c_args: [ '-DFUSE_USE_VERSION=32',
+                  c_args: [ '-DFUSE_USE_VERSION=33',
                             '-DFUSERMOUNT_DIR="@0@"'.format(fusermount_path) ],
                   link_args: ['-Wl,--version-script,' + meson.current_source_dir()
                               + '/fuse_versionscript' ])
 
 from contextlib import contextmanager
 from util import (wait_for_mount, umount, cleanup, base_cmdline,
                   safe_sleep, basename, fuse_test_marker, test_printcap,
-                  fuse_proto)
+                  fuse_proto, powerset)
 from os.path import join as pjoin
 
 pytestmark = fuse_test_marker()
     __ctr[0] += 1
     return 'testfile_%d' % __ctr[0]
 
-options = [ [] ]
+options = []
 if sys.platform == 'linux':
-    options.append(['-o', 'clone_fd'])
-@pytest.mark.parametrize("options", options)
-@pytest.mark.parametrize("name", ('hello', 'hello_ll'))
-def test_hello(tmpdir, name, options):
-    mnt_dir = str(tmpdir)
-    cmdline = base_cmdline + \
-              [ pjoin(basename, 'example', name),
-                '-f', mnt_dir ] + options
+    options.append('clone_fd')
+
+def invoke_directly(mnt_dir, name, options):
+    cmdline = base_cmdline + [ pjoin(basename, 'example', name),
+                               '-f', mnt_dir, '-o', ','.join(options) ]
     if name == 'hello_ll':
         # supports single-threading only
         cmdline.append('-s')
-    mount_process = subprocess.Popen(cmdline)
+
+    return cmdline
+
+def invoke_mount_fuse(mnt_dir, name, options):
+    return base_cmdline + [ pjoin(basename, 'util', 'mount.fuse3'),
+                            name, mnt_dir, '-o', ','.join(options) ]
+
+def invoke_mount_fuse_drop_privileges(mnt_dir, name, options):
+    if os.getuid() != 0:
+        pytest.skip('drop_privileges requires root, skipping.')
+
+    return invoke_mount_fuse(mnt_dir, name, options + ('drop_privileges',))
+
+@pytest.mark.parametrize("cmdline_builder", (invoke_directly, invoke_mount_fuse,
+                                             invoke_mount_fuse_drop_privileges))
+@pytest.mark.parametrize("options", powerset(options))
+@pytest.mark.parametrize("name", ('hello', 'hello_ll'))
+def test_hello(tmpdir, name, options, cmdline_builder):
+    mnt_dir = str(tmpdir)
+    mount_process = subprocess.Popen(cmdline_builder(mnt_dir, name, options))
     try:
         wait_for_mount(mount_process, mnt_dir)
         assert os.listdir(mnt_dir) == [ 'hello' ]
 
 
 TEST_CMD="python3 -m pytest --maxfail=99 test/"
 
+# Make sure binaries can be accessed when invoked by root.
+umask 0022
+
+# There are tests that run as root but without CAP_DAC_OVERRIDE. To allow these
+# to launch built binaries, the directory tree must be accessible to the root
+# user. Since the source directory isn't necessarily accessible to root, we
+# build and run tests in a temporary directory that we can set up to be world
+# readable/executable.
+SOURCE_DIR="$(readlink -f .)"
+TEST_DIR="$(mktemp -dt libfuse-build-XXXXXX)"
+chmod 0755 "${TEST_DIR}"
+cd "${TEST_DIR}"
+
 # Standard build
 for CC in gcc gcc-6 clang; do
     mkdir build-${CC}; cd build-${CC}
     else
         build_opts=''
     fi
-    meson -D werror=true ${build_opts} ../
+    meson -D werror=true ${build_opts} "${SOURCE_DIR}"
     ninja
 
     sudo chown root:root util/fusermount3
     mkdir build-${san}; cd build-${san}
     # b_lundef=false is required to work around clang
     # bug, cf. https://groups.google.com/forum/#!topic/mesonbuild/tgEdAXIIdC4
-    meson -D b_sanitize=${san} -D b_lundef=false -D werror=true ..
+    meson -D b_sanitize=${san} -D b_lundef=false -D werror=true "${SOURCE_DIR}"
     ninja
 
     # Test as root and regular user
     sudo chown root:root util/fusermount3
     sudo chmod 4755 util/fusermount3
     # Cleanup temporary files (since they're now owned by root)
-    sudo rm -rf test/.pytest_cache/
+    sudo rm -rf test/.pytest_cache/ test/__pycache__
 
     ${TEST_CMD}
     cd ..
 done
 
-# Documentation
-doxygen doc/Doxyfile
+# Documentation.
+(cd "${SOURCE_DIR}"; doxygen doc/Doxyfile)
 
+# Clean up.
+rm -rf "${TEST_DIR}"
 
 from os.path import join as pjoin
 import sys
 import re
+import itertools
 
 basename = pjoin(os.path.dirname(__file__), '..')
 
 
     return pytest.mark.uses_fuse()
 
+def powerset(iterable):
+  s = list(iterable)
+  return itertools.chain.from_iterable(
+      itertools.combinations(s, r) for r in range(len(s)+1))
+
+
 # Use valgrind if requested
 if os.environ.get('TEST_WITH_VALGRIND', 'no').lower().strip() \
    not in ('no', 'false', '0'):
 
 # Try to use local fusermount3
 os.environ['PATH'] = '%s:%s' % (pjoin(basename, 'util'), os.environ['PATH'])
+# Put example binaries on PATH
+os.environ['PATH'] = '%s:%s' % (pjoin(basename, 'example'), os.environ['PATH'])
 
 try:
     (fuse_proto, fuse_caps) = test_printcap()
 
            install_dir: get_option('bindir'),
            c_args: '-DFUSE_CONF="@0@"'.format(fuseconf_path))
 
-executable('mount.fuse3', ['mount.fuse.c'], 
+executable('mount.fuse3', ['mount.fuse.c'],
            include_directories: include_dirs,
+           link_with: [ libfuse ],
            install: true,
-           install_dir: get_option('sbindir'))
+           install_dir: get_option('sbindir'),
+           c_args: '-DFUSE_USE_VERSION=33')
 
 
 udevrulesdir = get_option('udevrulesdir')
 
 #include <unistd.h>
 #include <errno.h>
 #include <stdint.h>
+#include <fcntl.h>
+#include <pwd.h>
+#include <sys/wait.h>
+
+#ifdef linux
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <linux/capability.h>
+#include <linux/securebits.h>
+#endif
+
+#include "fuse.h"
 
 static char *progname;
 
        return options;
 }
 
+static int prepare_fuse_fd(const char *mountpoint, const char* subtype,
+                          const char *options)
+{
+       int fuse_fd = -1;
+       int flags = -1;
+       int subtype_len = strlen(subtype) + 9;
+       char* options_copy = xrealloc(NULL, subtype_len);
+
+       snprintf(options_copy, subtype_len, "subtype=%s", subtype);
+       options_copy = add_option(options, options_copy);
+       fuse_fd = fuse_open_channel(mountpoint, options_copy);
+       if (fuse_fd == -1) {
+               exit(1);
+       }
+
+       flags = fcntl(fuse_fd, F_GETFD);
+       if (flags == -1 || fcntl(fuse_fd, F_SETFD, flags & ~FD_CLOEXEC) == 1) {
+               fprintf(stderr, "%s: Failed to clear CLOEXEC: %s\n",
+                       progname, strerror(errno));
+               exit(1);
+       }
+
+       return fuse_fd;
+}
+
+#ifdef linux
+static uint64_t get_capabilities(void)
+{
+       /*
+        * This invokes the capset syscall directly to avoid the libcap
+        * dependency, which isn't really justified just for this.
+        */
+       struct __user_cap_header_struct header = {
+               .version = _LINUX_CAPABILITY_VERSION_3,
+               .pid = 0,
+       };
+       struct __user_cap_data_struct data[2];
+       memset(data, 0, sizeof(data));
+       if (syscall(SYS_capget, &header, data) == -1) {
+               fprintf(stderr, "%s: Failed to get capabilities: %s\n",
+                       progname, strerror(errno));
+               exit(1);
+       }
+
+       return data[0].effective | ((uint64_t) data[1].effective << 32);
+}
+
+static void set_capabilities(uint64_t caps)
+{
+       /*
+        * This invokes the capset syscall directly to avoid the libcap
+        * dependency, which isn't really justified just for this.
+        */
+       struct __user_cap_header_struct header = {
+               .version = _LINUX_CAPABILITY_VERSION_3,
+               .pid = 0,
+       };
+       struct __user_cap_data_struct data[2];
+       memset(data, 0, sizeof(data));
+       data[0].effective = data[0].permitted = caps;
+       data[1].effective = data[1].permitted = caps >> 32;
+       if (syscall(SYS_capset, &header, data) == -1) {
+               fprintf(stderr, "%s: Failed to set capabilities: %s\n",
+                       progname, strerror(errno));
+               exit(1);
+       }
+}
+
+static void drop_and_lock_capabilities(void)
+{
+       /* Set and lock securebits. */
+       if (prctl(PR_SET_SECUREBITS,
+                 SECBIT_KEEP_CAPS_LOCKED |
+                 SECBIT_NO_SETUID_FIXUP |
+                 SECBIT_NO_SETUID_FIXUP_LOCKED |
+                 SECBIT_NOROOT |
+                 SECBIT_NOROOT_LOCKED) == -1) {
+               fprintf(stderr, "%s: Failed to set securebits %s\n",
+                       progname, strerror(errno));
+               exit(1);
+       }
+
+       /* Clear the capability bounding set. */
+       int cap;
+       for (cap = 0; ; cap++) {
+               int cap_status = prctl(PR_CAPBSET_READ, cap);
+               if (cap_status == 0) {
+                       continue;
+               }
+               if (cap_status == -1 && errno == EINVAL) {
+                       break;
+               }
+
+               if (cap_status != 1) {
+                       fprintf(stderr,
+                               "%s: Failed to get capability %u: %s\n",
+                               progname, cap, strerror(errno));
+                       exit(1);
+               }
+               if (prctl(PR_CAPBSET_DROP, cap) == -1) {
+                       fprintf(stderr,
+                               "%s: Failed to drop capability %u: %s\n",
+                               progname, cap, strerror(errno));
+               }
+       }
+
+       /* Drop capabilities. */
+       set_capabilities(0);
+
+       /* Prevent re-acquisition of privileges. */
+       if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) {
+               fprintf(stderr, "%s: Failed to set no_new_privs: %s\n",
+                       progname, strerror(errno));
+               exit(1);
+       }
+}
+#endif
+
 int main(int argc, char *argv[])
 {
        char *type = NULL;
        char *basename;
        char *options = NULL;
        char *command = NULL;
-       char *setuid = NULL;
+       char *setuid_name = NULL;
        int i;
        int dev = 1;
        int suid = 1;
+       int pass_fuse_fd = 0;
+       int drop_privileges = 0;
 
        progname = argv[0];
        basename = strrchr(argv[0], '/');
                                                              "_netdev",
                                                              NULL};
                                if (strncmp(opt, "setuid=", 7) == 0) {
-                                       setuid = xstrdup(opt + 7);
+                                       setuid_name = xstrdup(opt + 7);
+                                       ignore = 1;
+                               } else if (strcmp(opt,
+                                                 "drop_privileges") == 0) {
+                                       pass_fuse_fd = 1;
+                                       drop_privileges = 1;
                                        ignore = 1;
                                }
                                for (j = 0; ignore_opts[j]; j++)
                }
        }
 
+       if (drop_privileges) {
+               uint64_t required_caps = CAP_TO_MASK(CAP_SETPCAP) |
+                               CAP_TO_MASK(CAP_SYS_ADMIN);
+               if ((get_capabilities() & required_caps) != required_caps) {
+                       fprintf(stderr, "%s: drop_privileges was requested, which launches the FUSE file system fully unprivileged. In order to do so %s must be run with privileges, please invoke with CAP_SYS_ADMIN and CAP_SETPCAP (e.g. as root).\n",
+                       progname, progname);
+                       exit(1);
+               }
+       }
+
        if (dev)
                options = add_option("dev", options);
        if (suid)
                }
        }
 
+       if (setuid_name && setuid_name[0]) {
+#ifdef linux
+               if (drop_privileges) {
+                       /*
+                        * Make securebits more permissive before calling
+                        * setuid(). Specifically, if SECBIT_KEEP_CAPS and
+                        * SECBIT_NO_SETUID_FIXUP weren't set, setuid() would
+                        * have the side effect of dropping all capabilities,
+                        * and we need to retain CAP_SETPCAP in order to drop
+                        * all privileges before exec().
+                        */
+                       if (prctl(PR_SET_SECUREBITS,
+                                 SECBIT_KEEP_CAPS |
+                                 SECBIT_NO_SETUID_FIXUP) == -1) {
+                               fprintf(stderr,
+                                       "%s: Failed to set securebits %s\n",
+                                       progname, strerror(errno));
+                               exit(1);
+                       }
+               }
+#endif
+
+               struct passwd *pwd = getpwnam(setuid_name);
+               if (setgid(pwd->pw_gid) == -1 || setuid(pwd->pw_uid) == -1) {
+                       fprintf(stderr, "%s: Failed to setuid to %s: %s\n",
+                               progname, setuid_name, strerror(errno));
+                       exit(1);
+               }
+       } else if (!getenv("HOME")) {
+               /* Hack to make filesystems work in the boot environment */
+               setenv("HOME", "/root", 0);
+       }
+
+       if (pass_fuse_fd)  {
+               int fuse_fd = prepare_fuse_fd(mountpoint, type, options);
+               char *dev_fd_mountpoint = xrealloc(NULL, 20);
+               snprintf(dev_fd_mountpoint, 20, "/dev/fd/%u", fuse_fd);
+               mountpoint = dev_fd_mountpoint;
+       }
+
+#ifdef linux
+       if (drop_privileges) {
+               drop_and_lock_capabilities();
+       }
+#endif
        add_arg(&command, type);
        if (source)
                add_arg(&command, source);
                add_arg(&command, options);
        }
 
-       if (setuid && setuid[0]) {
-               char *sucommand = command;
-               command = NULL;
-               add_arg(&command, "su");
-               add_arg(&command, "-");
-               add_arg(&command, setuid);
-               add_arg(&command, "-c");
-               add_arg(&command, sucommand);
-       } else if (!getenv("HOME")) {
-               /* Hack to make filesystems work in the boot environment */
-               setenv("HOME", "/root", 0);
-       }
-
        execl("/bin/sh", "/bin/sh", "-c", command, NULL);
        fprintf(stderr, "%s: failed to execute /bin/sh: %s\n", progname,
                strerror(errno));