2006-09-30 Miklos Szeredi <miklos@szeredi.hu>
+ * Add support for block device backed filesystems. This mode is
+ selected with the 'blkdev' option, which is privileged.
+
+ * Add support for the bmap (FIBMAP ioctl) operation on block
+ device backed filesystems. This allows swapon and lilo to work on
+ such filesystems.
+
* kernel changes:
* Drop support for kernels earlier than 2.6.9. Kernel module from
gid=N
Override the 'st_gid' field set by the filesystem.
+
+blkdev
+
+ Mount a filesystem backed by a block device. This is a privileged
+ option. The device must be specified with the 'fsname=NAME' option.
http://fuse.sourceforge.net/
+Filesystem type
+~~~~~~~~~~~~~~~
+
+The filesystem type given to mount(2) can be one of the following:
+
+'fuse'
+
+ This is the usual way to mount a FUSE filesystem. The first
+ argument of the mount system call may contain an arbitrary string,
+ which is not interpreted by the kernel.
+
+'fuseblk'
+
+ The filesystem is block device based. The first argument of the
+ mount system call is interpreted as the name of the device.
+
Mount options
~~~~~~~~~~~~~
The default is infinite. Note that the size of read requests is
limited anyway to 32 pages (which is 128kbyte on i386).
+'blksize=N'
+
+ Set the block size for the filesystem. The default is 512. This
+ option is only valid for 'fuseblk' type mounts.
+
Control filesystem
~~~~~~~~~~~~~~~~~~
* Introduced in version 2.6
*/
int (*utimens) (const char *, const struct timespec tv[2]);
+
+ /**
+ * Map block index within file to block index within device
+ *
+ * Note: This makes sense only for block device backed filesystems
+ * mounted with the 'blkdev' option
+ *
+ * Introduced in version 2.6
+ */
+ int (*bmap) (const char *, size_t blocksize, uint64_t *idx);
};
/** Extra context that may be needed by some filesystems
/**
* Test for a POSIX file lock
*
+ * Introduced in version 2.6
+ *
* Valid replies:
* fuse_reply_lock
* fuse_reply_err
* will still allow file locking to work locally. Hence these are
* only interesting for network filesystems and similar.
*
+ * Introduced in version 2.6
+ *
* Valid replies:
* fuse_reply_err
*
*/
void (*setlk) (fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
struct flock *lock, uint64_t owner, int sleep);
+
+ /**
+ * Map block index within file to block index within device
+ *
+ * Note: This makes sense only for block device backed filesystems
+ * mounted with the 'blkdev' option
+ *
+ * Introduced in version 2.6
+ *
+ * Valid replies:
+ * fuse_reply_bmap
+ * fuse_reply_err
+ *
+ * @param req request handle
+ * @param ino the inode number
+ * @param blocksize unit of block index
+ * @param idx block index within file
+ */
+ void (*bmap) (fuse_req_t req, fuse_ino_t ino, size_t blocksize,
+ uint64_t idx);
};
/**
*/
int fuse_reply_lock(fuse_req_t req, struct flock *lock);
+/**
+ * Reply with block index
+ *
+ * Possible requests:
+ * bmap
+ *
+ * @param req request handle
+ * @param idx block index within device
+ * @return zero for success, -errno for failure to send reply
+ */
+int fuse_reply_bmap(fuse_req_t req, uint64_t idx);
+
/* ----------------------------------------------------------- *
* Filling a buffer in readdir *
* ----------------------------------------------------------- */
return err;
}
+static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
+{
+ struct inode *inode = mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_req *req;
+ struct fuse_bmap_in inarg;
+ struct fuse_bmap_out outarg;
+ int err;
+
+ if (!inode->i_sb->s_bdev || fc->no_bmap)
+ return 0;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req))
+ return 0;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.block = block;
+ inarg.blocksize = inode->i_sb->s_blocksize;
+ req->in.h.opcode = FUSE_BMAP;
+ req->in.h.nodeid = get_node_id(inode);
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(inarg);
+ req->in.args[0].value = &inarg;
+ req->out.numargs = 1;
+ req->out.args[0].size = sizeof(outarg);
+ req->out.args[0].value = &outarg;
+ request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS)
+ fc->no_bmap = 1;
+
+ return err ? 0 : outarg.block;
+}
+
static struct file_operations fuse_file_operations = {
.llseek = generic_file_llseek,
.read = generic_file_read,
.commit_write = fuse_commit_write,
.readpages = fuse_readpages,
.set_page_dirty = fuse_set_page_dirty,
+ .bmap = fuse_bmap,
};
void fuse_init_file_inode(struct inode *inode)
/** Is interrupt not implemented by fs? */
unsigned no_interrupt : 1;
+ /** Is bmap not implemented by fs? */
+ unsigned no_bmap : 1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
FUSE_ACCESS = 34,
FUSE_CREATE = 35,
FUSE_INTERRUPT = 36,
+ FUSE_BMAP = 37,
};
/* The read buffer is required to be at least 8k, but may be much larger */
__u64 unique;
};
+struct fuse_bmap_in {
+ __u64 block;
+ __u32 blocksize;
+ __u32 padding;
+};
+
+struct fuse_bmap_out {
+ __u64 block;
+};
+
struct fuse_in_header {
__u32 len;
__u32 opcode;
unsigned group_id_present : 1;
unsigned flags;
unsigned max_read;
+ unsigned blksize;
};
static struct inode *fuse_alloc_inode(struct super_block *sb)
OPT_DEFAULT_PERMISSIONS,
OPT_ALLOW_OTHER,
OPT_MAX_READ,
+ OPT_BLKSIZE,
OPT_ERR
};
{OPT_DEFAULT_PERMISSIONS, "default_permissions"},
{OPT_ALLOW_OTHER, "allow_other"},
{OPT_MAX_READ, "max_read=%u"},
+ {OPT_BLKSIZE, "blksize=%u"},
{OPT_ERR, NULL}
};
-static int parse_fuse_opt(char *opt, struct fuse_mount_data *d)
+static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
{
char *p;
memset(d, 0, sizeof(struct fuse_mount_data));
d->max_read = ~0;
+ d->blksize = 512;
while ((p = strsep(&opt, ",")) != NULL) {
int token;
d->max_read = value;
break;
+ case OPT_BLKSIZE:
+ if (!is_bdev || match_int(&args[0], &value))
+ return 0;
+ d->blksize = value;
+ break;
+
default:
return 0;
}
struct dentry *root_dentry;
struct fuse_req *init_req;
int err;
+ int is_bdev = sb->s_bdev != NULL;
if (sb->s_flags & MS_MANDLOCK)
return -EINVAL;
- if (!parse_fuse_opt((char *) data, &d))
+ if (!parse_fuse_opt((char *) data, &d, is_bdev))
return -EINVAL;
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ if (is_bdev) {
+ if (!sb_set_blocksize(sb, d.blksize))
+ return -EINVAL;
+ } else {
+ sb->s_blocksize = PAGE_CACHE_SIZE;
+ sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ }
sb->s_magic = FUSE_SUPER_MAGIC;
sb->s_op = &fuse_super_operations;
sb->s_maxbytes = MAX_LFS_FILESIZE;
{
return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
}
+
+static int fuse_get_sb_blk(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *raw_data, struct vfsmount *mnt)
+{
+ return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super,
+ mnt);
+}
#else
static struct super_block *fuse_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name,
{
return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super);
}
+
+static struct super_block *fuse_get_sb_blk(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *raw_data)
+{
+ return get_sb_bdev(fs_type, flags, dev_name, raw_data,
+ fuse_fill_super);
+}
#endif
static struct file_system_type fuse_fs_type = {
.kill_sb = kill_anon_super,
};
+static struct file_system_type fuseblk_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "fuseblk",
+ .get_sb = fuse_get_sb_blk,
+ .kill_sb = kill_block_super,
+ .fs_flags = FS_REQUIRES_DEV,
+};
+
#ifndef HAVE_FS_SUBSYS
static decl_subsys(fs, NULL, NULL);
#endif
err = register_filesystem(&fuse_fs_type);
if (err)
- printk("fuse: failed to register filesystem\n");
- else {
- fuse_inode_cachep = kmem_cache_create("fuse_inode",
- sizeof(struct fuse_inode),
- 0, SLAB_HWCACHE_ALIGN,
- fuse_inode_init_once, NULL);
- if (!fuse_inode_cachep) {
- unregister_filesystem(&fuse_fs_type);
- err = -ENOMEM;
- }
- }
+ goto out;
+
+ err = register_filesystem(&fuseblk_fs_type);
+ if (err)
+ goto out_unreg;
+ fuse_inode_cachep = kmem_cache_create("fuse_inode",
+ sizeof(struct fuse_inode),
+ 0, SLAB_HWCACHE_ALIGN,
+ fuse_inode_init_once, NULL);
+ err = -ENOMEM;
+ if (!fuse_inode_cachep)
+ goto out_unreg2;
+
+ return 0;
+
+ out_unreg2:
+ unregister_filesystem(&fuseblk_fs_type);
+ out_unreg:
+ unregister_filesystem(&fuse_fs_type);
+ out:
return err;
}
static void fuse_fs_cleanup(void)
{
unregister_filesystem(&fuse_fs_type);
+ unregister_filesystem(&fuseblk_fs_type);
kmem_cache_destroy(fuse_inode_cachep);
}
reply_err(req, err);
}
+static void fuse_bmap(fuse_req_t req, fuse_ino_t ino, size_t blocksize,
+ uint64_t idx)
+{
+ struct fuse *f = req_fuse_prepare(req);
+ char *path;
+ int err;
+
+ err = -ENOENT;
+ pthread_rwlock_rdlock(&f->tree_lock);
+ path = get_path(f, ino);
+ if (path != NULL) {
+ err = -ENOSYS;
+ if (f->op.bmap)
+ err = f->op.bmap(path, blocksize, &idx);
+ free(path);
+ }
+ pthread_rwlock_unlock(&f->tree_lock);
+ if (!err)
+ fuse_reply_bmap(req, idx);
+ else
+ reply_err(req, err);
+}
+
static struct fuse_lowlevel_ops fuse_path_ops = {
.init = fuse_data_init,
.destroy = fuse_data_destroy,
.removexattr = fuse_removexattr,
.getlk = fuse_getlk,
.setlk = fuse_setlk,
+ .bmap = fuse_bmap,
};
static void free_cmd(struct fuse_cmd *cmd)
return send_reply_ok(req, &arg, sizeof(arg));
}
+int fuse_reply_bmap(fuse_req_t req, uint64_t idx)
+{
+ struct fuse_bmap_out arg;
+
+ memset(&arg, 0, sizeof(arg));
+ arg.block = idx;
+
+ return send_reply_ok(req, &arg, sizeof(arg));
+}
+
static void do_lookup(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
{
char *name = (char *) inarg;
return NULL;
}
+static void do_bmap(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
+{
+ struct fuse_bmap_in *arg = (struct fuse_bmap_in *) inarg;
+
+ if (req->f->op.bmap)
+ req->f->op.bmap(req, nodeid, arg->blocksize, arg->block);
+ else
+ fuse_reply_err(req, ENOSYS);
+}
+
static void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
{
struct fuse_init_in *arg = (struct fuse_init_in *) inarg;
[FUSE_ACCESS] = { do_access, "ACCESS" },
[FUSE_CREATE] = { do_create, "CREATE" },
[FUSE_INTERRUPT] = { do_interrupt, "INTERRUPT" },
+ [FUSE_BMAP] = { do_bmap, "BMAP" },
};
#define FUSE_MAXOP (sizeof(fuse_ll_ops) / sizeof(fuse_ll_ops[0]))
FUSE_OPT_KEY("allow_other", KEY_KERN),
FUSE_OPT_KEY("allow_root", KEY_ALLOW_ROOT),
FUSE_OPT_KEY("nonempty", KEY_KERN),
+ FUSE_OPT_KEY("blkdev", KEY_KERN),
+ FUSE_OPT_KEY("blksize=", KEY_KERN),
FUSE_OPT_KEY("default_permissions", KEY_KERN),
FUSE_OPT_KEY("fsname=", KEY_KERN),
FUSE_OPT_KEY("large_read", KEY_KERN),
while ((entp = getmntent(fp)) != NULL) {
int removed = 0;
if (!found && strcmp(entp->mnt_dir, mnt) == 0 &&
- strcmp(entp->mnt_type, "fuse") == 0) {
+ (strcmp(entp->mnt_type, "fuse") == 0 ||
+ strcmp(entp->mnt_type, "fuseblk") == 0)) {
if (user == NULL)
removed = 1;
else {
return 0;
}
-static int do_mount(const char *mnt, const char *type, mode_t rootmode,
+static int do_mount(const char *mnt, const char **type, mode_t rootmode,
int fd, const char *opts, const char *dev, char **fsnamep,
char **mnt_optsp, off_t rootsize)
{
char *d;
char *fsname = NULL;
int check_empty = 1;
+ int blkdev = 0;
optbuf = (char *) malloc(strlen(opts) + 128);
if (!optbuf) {
}
memcpy(fsname, s + fsname_str_len, len - fsname_str_len);
fsname[len - fsname_str_len] = '\0';
+ } else if (opt_eq(s, len, "blkdev")) {
+ if (getuid() != 0) {
+ fprintf(stderr, "%s: option blkdev is privileged\n", progname);
+ goto err;
+ }
+ blkdev = 1;
} else if (opt_eq(s, len, "nonempty")) {
check_empty = 0;
} else if (!begins_with(s, "fd=") &&
if (check_empty && check_mountpoint_empty(mnt, rootmode, rootsize) == -1)
goto err;
- res = mount(fsname, mnt, type, flags, optbuf);
+ if (blkdev)
+ *type = "fuseblk";
+ res = mount(fsname, mnt, *type, flags, optbuf);
if (res == -1 && errno == EINVAL) {
/* It could be an old version not supporting group_id */
sprintf(d, "fd=%i,rootmode=%o,user_id=%i", fd, rootmode, getuid());
- res = mount(fsname, mnt, type, flags, optbuf);
+ res = mount(fsname, mnt, *type, flags, optbuf);
}
if (res == -1) {
fprintf(stderr, "%s: mount failed: %s\n", progname, strerror(errno));
res = check_perm(&real_mnt, &stbuf, &currdir_fd, &mountpoint_fd);
restore_privs();
if (res != -1)
- res = do_mount(real_mnt, type, stbuf.st_mode & S_IFMT, fd, opts,
+ res = do_mount(real_mnt, &type, stbuf.st_mode & S_IFMT, fd, opts,
dev, &fsname, &mnt_opts, stbuf.st_size);
} else
restore_privs();