From 91762cd331d955f1378dd90af08b56aa15861ab4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 29 Jun 2006 14:38:35 +0000 Subject: [PATCH] *** empty log message *** --- ChangeLog | 11 +- configure.in | 2 +- doc/kernel.txt | 118 +++++++----- include/fuse_lowlevel.h | 7 +- kernel/Makefile.in | 7 +- kernel/configure.ac | 2 +- kernel/control.c | 217 +++++++++++++++++++++ kernel/dev.c | 418 ++++++++++++++++++++++++++-------------- kernel/dir.c | 101 ++++++---- kernel/file.c | 224 +++++++++++++++++---- kernel/fuse_i.h | 170 ++++++++++------ kernel/inode.c | 213 ++++++++------------ 12 files changed, 1016 insertions(+), 474 deletions(-) create mode 100644 kernel/control.c diff --git a/ChangeLog b/ChangeLog index e3cec01..bd6d44f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,11 +1,18 @@ +2006-06-29 Miklos Szeredi + + * Released 2.6.0-pre3 + +2006-06-29 Miklos Szeredi + + * Support in kernel module for file locking and interruption. The + same functionality is available in official kernels >= 2.6.18 + 2006-06-28 Miklos Szeredi * Add POSIX file locking support * Add request interruption - * The above need 2.6.17-git9 or later to be usable - 2006-06-06 Miklos Szeredi * Add missing pthread_rwlock_destroy(). Patch from Remy Blank diff --git a/configure.in b/configure.in index 8262670..91b65b1 100644 --- a/configure.in +++ b/configure.in @@ -1,4 +1,4 @@ -AC_INIT(fuse, 2.6.0-pre2) +AC_INIT(fuse, 2.6.0-pre3) AC_CANONICAL_TARGET AM_INIT_AUTOMAKE AM_CONFIG_HEADER(include/config.h) diff --git a/doc/kernel.txt b/doc/kernel.txt index 33f7431..a584f05 100644 --- a/doc/kernel.txt +++ b/doc/kernel.txt @@ -18,6 +18,14 @@ Non-privileged mount (or user mount): user. NOTE: this is not the same as mounts allowed with the "user" option in /etc/fstab, which is not discussed here. +Filesystem connection: + + A connection between the filesystem daemon and the kernel. The + connection exists until either the daemon dies, or the filesystem is + umounted. Note that detaching (or lazy umounting) the filesystem + does _not_ break the connection, in this case it will exist until + the last reference to the filesystem is released. + Mount owner: The user who does the mounting. @@ -86,16 +94,20 @@ Mount options The default is infinite. Note that the size of read requests is limited anyway to 32 pages (which is 128kbyte on i386). -Sysfs -~~~~~ +Control filesystem +~~~~~~~~~~~~~~~~~~ + +There's a control filesystem for FUSE, which can be mounted by: -FUSE sets up the following hierarchy in sysfs: + mount -t fusectl none /sys/fs/fuse/connections - /sys/fs/fuse/connections/N/ +Mounting it under the '/sys/fs/fuse/connections' directory makes it +backwards compatible with earlier versions. -where N is an increasing number allocated to each new connection. +Under the fuse control filesystem each connection has a directory +named by a unique number. -For each connection the following attributes are defined: +For each connection the following files exist within this directory: 'waiting' @@ -110,7 +122,47 @@ For each connection the following attributes are defined: connection. This means that all waiting requests will be aborted an error returned for all aborted and new requests. -Only a privileged user may read or write these attributes. +Only the owner of the mount may read or write these files. + +Interrupting filesystem operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If a process issuing a FUSE filesystem request is interrupted, the +following will happen: + + 1) If the request is not yet sent to userspace AND the signal is + fatal (SIGKILL or unhandled fatal signal), then the request is + dequeued and returns immediately. + + 2) If the request is not yet sent to userspace AND the signal is not + fatal, then an 'interrupted' flag is set for the request. When + the request has been successfully transfered to userspace and + this flag is set, an INTERRUPT request is queued. + + 3) If the request is already sent to userspace, then an INTERRUPT + request is queued. + +INTERRUPT requests take precedence over other requests, so the +userspace filesystem will receive queued INTERRUPTs before any others. + +The userspace filesystem may ignore the INTERRUPT requests entirely, +or may honor them by sending a reply to the _original_ request, with +the error set to EINTR. + +It is also possible that there's a race between processing the +original request and it's INTERRUPT request. There are two possibilities: + + 1) The INTERRUPT request is processed before the original request is + processed + + 2) The INTERRUPT request is processed after the original request has + been answered + +If the filesystem cannot find the original request, it should wait for +some timeout and/or a number of new requests to arrive, after which it +should reply to the INTERRUPT request with an EAGAIN error. In case +1) the INTERRUPT request will be requeued. In case 2) the INTERRUPT +reply will be ignored. Aborting a filesystem connection ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -139,8 +191,8 @@ the filesystem. There are several ways to do this: - Use forced umount (umount -f). Works in all cases but only if filesystem is still attached (it hasn't been lazy unmounted) - - Abort filesystem through the sysfs interface. Most powerful - method, always works. + - Abort filesystem through the FUSE control filesystem. Most + powerful method, always works. How do non-privileged mounts work? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -304,25 +356,7 @@ Scenario 1 - Simple deadlock | | for "file"] | | *DEADLOCK* -The solution for this is to allow requests to be interrupted while -they are in userspace: - - | [interrupted by signal] | - | fuse_unlink() - | | [queue req on fc->pending] - | | [wake up fc->waitq] - | | [sleep on req->waitq] - -If the filesystem daemon was single threaded, this will stop here, -since there's no other thread to dequeue and execute the request. -In this case the solution is to kill the FUSE daemon as well. If -there are multiple serving threads, you just have to kill them as -long as any remain. - -Moral: a filesystem which deadlocks, can soon find itself dead. +The solution for this is to allow the filesystem to be aborted. Scenario 2 - Tricky deadlock ---------------------------- @@ -355,24 +389,14 @@ but is caused by a pagefault. | | [lock page] | | * DEADLOCK * -Solution is again to let the the request be interrupted (not -elaborated further). - -An additional problem is that while the write buffer is being -copied to the request, the request must not be interrupted. This -is because the destination address of the copy may not be valid -after the request is interrupted. - -This is solved with doing the copy atomically, and allowing -interruption while the page(s) belonging to the write buffer are -faulted with get_user_pages(). The 'req->locked' flag indicates -when the copy is taking place, and interruption is delayed until -this flag is unset. +Solution is basically the same as above. -Scenario 3 - Tricky deadlock with asynchronous read ---------------------------------------------------- +An additional problem is that while the write buffer is being copied +to the request, the request must not be interrupted/aborted. This is +because the destination address of the copy may not be valid after the +request has returned. -The same situation as above, except thread-1 will wait on page lock -and hence it will be uninterruptible as well. The solution is to -abort the connection with forced umount (if mount is attached) or -through the abort attribute in sysfs. +This is solved with doing the copy atomically, and allowing abort +while the page(s) belonging to the write buffer are faulted with +get_user_pages(). The 'req->locked' flag indicates when the copy is +taking place, and abort is delayed until this flag is unset. diff --git a/include/fuse_lowlevel.h b/include/fuse_lowlevel.h index cf1fe0c..ede1307 100644 --- a/include/fuse_lowlevel.h +++ b/include/fuse_lowlevel.h @@ -749,9 +749,10 @@ struct fuse_lowlevel_ops { * Acquire, modify or release a POSIX file lock * * For POSIX threads (NPTL) there's a 1-1 relation between pid and - * owner, but this is not always the case. For checking lock - * ownership, 'owner' must be used. The l_pid field in 'struct - * flock' should only be used to fill in this field in getlk(). + * owner, but otherwise this is not always the case. For checking + * lock ownership, 'owner' must be used. The l_pid field in + * 'struct flock' should only be used to fill in this field in + * getlk(). * * Valid replies: * fuse_reply_err diff --git a/kernel/Makefile.in b/kernel/Makefile.in index 34d376d..a8361ac 100644 --- a/kernel/Makefile.in +++ b/kernel/Makefile.in @@ -7,7 +7,7 @@ majver = @majver@ VERSION = @PACKAGE_VERSION@ DISTFILES = Makefile.in configure.ac configure config.h.in ../install-sh \ - dev.c dir.c file.c inode.c fuse_i.h fuse_kernel.h + dev.c dir.c file.c inode.c fuse_i.h fuse_kernel.h control.c COMPATDISTFILES = compat/parser.c compat/parser.h fusemoduledir = @kmoduledir@/kernel/fs/fuse @@ -59,7 +59,7 @@ LD = ld CFLAGS = -O2 -Wall -Wstrict-prototypes -fno-strict-aliasing -pipe @KERNELCFLAGS@ CPPFLAGS = -I@kernelsrc@/include -I. -D__KERNEL__ -DMODULE -D_LOOSE_KERNEL_NAMES -DFUSE_VERSION=\"$(VERSION)\" @KERNELCPPFLAGS@ -fuse_objs = dev.o dir.o file.o inode.o compat/parser.o +fuse_objs = dev.o dir.o file.o inode.o compat/parser.o control.o SUFFIXES = .c .o .s @@ -77,13 +77,14 @@ dev.o: $(fuse_headers) dir.o: $(fuse_headers) file.o: $(fuse_headers) inode.o: $(fuse_headers) +control.o: $(fuse_headers) else EXTRA_CFLAGS += -DFUSE_VERSION=\"$(VERSION)\" obj-m := fuse.o -fuse-objs := dev.o dir.o file.o inode.o +fuse-objs := dev.o dir.o file.o inode.o control.o all-spec: $(MAKE) -C @kernelsrc@ SUBDIRS=$(PWD) @KERNELMAKE_PARAMS@ modules diff --git a/kernel/configure.ac b/kernel/configure.ac index 2aa7dee..21dc036 100644 --- a/kernel/configure.ac +++ b/kernel/configure.ac @@ -1,4 +1,4 @@ -AC_INIT(fuse-kernel, 2.6.0-pre2) +AC_INIT(fuse-kernel, 2.6.0-pre3) AC_CONFIG_HEADERS([config.h]) AC_PROG_INSTALL diff --git a/kernel/control.c b/kernel/control.c new file mode 100644 index 0000000..aff8b64 --- /dev/null +++ b/kernel/control.c @@ -0,0 +1,217 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2006 Miklos Szeredi + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +#include "fuse_i.h" + +#include +#include + +#define FUSE_CTL_SUPER_MAGIC 0x65735543 + +/* + * This is non-NULL when the single instance of the control filesystem + * exists. Protected by fuse_mutex + */ +static struct super_block *fuse_control_sb; + +static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file) +{ + struct fuse_conn *fc; + mutex_lock(&fuse_mutex); + fc = file->f_dentry->d_inode->u.generic_ip; + if (fc) + fc = fuse_conn_get(fc); + mutex_unlock(&fuse_mutex); + return fc; +} + +static ssize_t fuse_conn_abort_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct fuse_conn *fc = fuse_ctl_file_conn_get(file); + if (fc) { + fuse_abort_conn(fc); + fuse_conn_put(fc); + } + return count; +} + +static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf, + size_t len, loff_t *ppos) +{ + char tmp[32]; + size_t size; + + if (!*ppos) { + struct fuse_conn *fc = fuse_ctl_file_conn_get(file); + if (!fc) + return 0; + + file->private_data=(void *)(long)atomic_read(&fc->num_waiting); + fuse_conn_put(fc); + } + size = sprintf(tmp, "%ld\n", (long)file->private_data); + return simple_read_from_buffer(buf, len, ppos, tmp, size); +} + +static struct file_operations fuse_ctl_abort_ops = { + .open = nonseekable_open, + .write = fuse_conn_abort_write, +}; + +static struct file_operations fuse_ctl_waiting_ops = { + .open = nonseekable_open, + .read = fuse_conn_waiting_read, +}; + +static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, + struct fuse_conn *fc, + const char *name, + int mode, int nlink, + struct inode_operations *iop, + struct file_operations *fop) +{ + struct dentry *dentry; + struct inode *inode; + + BUG_ON(fc->ctl_ndents >= FUSE_CTL_NUM_DENTRIES); + dentry = d_alloc_name(parent, name); + if (!dentry) + return NULL; + + fc->ctl_dentry[fc->ctl_ndents++] = dentry; + inode = new_inode(fuse_control_sb); + if (!inode) + return NULL; + + inode->i_mode = mode; + inode->i_uid = fc->user_id; + inode->i_gid = fc->group_id; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + /* setting ->i_op to NULL is not allowed */ + if (iop) + inode->i_op = iop; + inode->i_fop = fop; + inode->i_nlink = nlink; + inode->u.generic_ip = fc; + d_add(dentry, inode); + return dentry; +} + +/* + * Add a connection to the control filesystem (if it exists). Caller + * must host fuse_mutex + */ +int fuse_ctl_add_conn(struct fuse_conn *fc) +{ + struct dentry *parent; + char name[32]; + + if (!fuse_control_sb) + return 0; + + parent = fuse_control_sb->s_root; + parent->d_inode->i_nlink++; + sprintf(name, "%llu", (unsigned long long) fc->id); + parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2, + &simple_dir_inode_operations, + &simple_dir_operations); + if (!parent) + goto err; + + if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1, + NULL, &fuse_ctl_waiting_ops) || + !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1, + NULL, &fuse_ctl_abort_ops)) + goto err; + + return 0; + + err: + fuse_ctl_remove_conn(fc); + return -ENOMEM; +} + +/* + * Remove a connection from the control filesystem (if it exists). + * Caller must host fuse_mutex + */ +void fuse_ctl_remove_conn(struct fuse_conn *fc) +{ + int i; + + if (!fuse_control_sb) + return; + + for (i = fc->ctl_ndents - 1; i >= 0; i--) { + struct dentry *dentry = fc->ctl_dentry[i]; + dentry->d_inode->u.generic_ip = NULL; + d_drop(dentry); + dput(dentry); + } + fuse_control_sb->s_root->d_inode->i_nlink--; +} + +static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent) +{ + struct tree_descr empty_descr = {""}; + struct fuse_conn *fc; + int err; + + err = simple_fill_super(sb, FUSE_CTL_SUPER_MAGIC, &empty_descr); + if (err) + return err; + + mutex_lock(&fuse_mutex); + BUG_ON(fuse_control_sb); + fuse_control_sb = sb; + list_for_each_entry(fc, &fuse_conn_list, entry) { + err = fuse_ctl_add_conn(fc); + if (err) { + fuse_control_sb = NULL; + mutex_unlock(&fuse_mutex); + return err; + } + } + mutex_unlock(&fuse_mutex); + + return 0; +} + +static struct super_block *fuse_ctl_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *raw_data) +{ + return get_sb_single(fs_type, flags, raw_data, fuse_ctl_fill_super); +} + +static void fuse_ctl_kill_sb(struct super_block *sb) +{ + mutex_lock(&fuse_mutex); + fuse_control_sb = NULL; + mutex_unlock(&fuse_mutex); + + kill_litter_super(sb); +} + +static struct file_system_type fuse_ctl_fs_type = { + .owner = THIS_MODULE, + .name = "fusectl", + .get_sb = fuse_ctl_get_sb, + .kill_sb = fuse_ctl_kill_sb, +}; + +int __init fuse_ctl_init(void) +{ + return register_filesystem(&fuse_ctl_fs_type); +} + +void fuse_ctl_cleanup(void) +{ + unregister_filesystem(&fuse_ctl_fs_type); +} diff --git a/kernel/dev.c b/kernel/dev.c index 80f662f..0231caa 100644 --- a/kernel/dev.c +++ b/kernel/dev.c @@ -36,6 +36,7 @@ static void fuse_request_init(struct fuse_req *req) { memset(req, 0, sizeof(*req)); INIT_LIST_HEAD(&req->list); + INIT_LIST_HEAD(&req->intr_entry); init_waitqueue_head(&req->waitq); atomic_set(&req->count, 1); } @@ -104,18 +105,6 @@ static void restore_sigs(sigset_t *oldset) #endif #endif -/* - * Reset request, so that it can be reused - * - * The caller must be _very_ careful to make sure, that it is holding - * the only reference to req - */ -void fuse_reset_request(struct fuse_req *req) -{ - BUG_ON(atomic_read(&req->count) != 1); - fuse_request_init(req); -} - static void __fuse_get_request(struct fuse_req *req) { atomic_inc(&req->count); @@ -128,6 +117,13 @@ static void __fuse_put_request(struct fuse_req *req) atomic_dec(&req->count); } +static void fuse_req_init_context(struct fuse_req *req) +{ + req->in.h.uid = current->fsuid; + req->in.h.gid = current->fsgid; + req->in.h.pid = current->pid; +} + struct fuse_req *fuse_get_req(struct fuse_conn *fc) { struct fuse_req *req; @@ -143,14 +139,16 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc) if (intr) goto out; + err = -ENOTCONN; + if (!fc->connected) + goto out; + req = fuse_request_alloc(); err = -ENOMEM; if (!req) goto out; - req->in.h.uid = current->fsuid; - req->in.h.gid = current->fsgid; - req->in.h.pid = current->pid; + fuse_req_init_context(req); req->waiting = 1; return req; @@ -159,142 +157,183 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc) return ERR_PTR(err); } -void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) +/* + * Return request in fuse_file->reserved_req. However that may + * currently be in use. If that is the case, wait for it to become + * available. + */ +static struct fuse_req *get_reserved_req(struct fuse_conn *fc, + struct file *file) { - if (atomic_dec_and_test(&req->count)) { - if (req->waiting) - atomic_dec(&fc->num_waiting); - fuse_request_free(req); - } + struct fuse_req *req = NULL; + struct fuse_file *ff = file->private_data; + + do { + wait_event(fc->blocked_waitq, ff->reserved_req); + spin_lock(&fc->lock); + if (ff->reserved_req) { + req = ff->reserved_req; + ff->reserved_req = NULL; + get_file(file); + req->stolen_file = file; + } + spin_unlock(&fc->lock); + } while (!req); + + return req; } /* - * Called with sbput_sem held for read (request_end) or write - * (fuse_put_super). By the time fuse_put_super() is finished, all - * inodes belonging to background requests must be released, so the - * iputs have to be done within the locked region. + * Put stolen request back into fuse_file->reserved_req */ -void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req) +static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req) { - iput(req->inode); - iput(req->inode2); + struct file *file = req->stolen_file; + struct fuse_file *ff = file->private_data; + spin_lock(&fc->lock); - list_del(&req->bg_entry); - if (fc->num_background == FUSE_MAX_BACKGROUND) { - fc->blocked = 0; - wake_up_all(&fc->blocked_waitq); - } - fc->num_background--; + fuse_request_init(req); + BUG_ON(ff->reserved_req); + ff->reserved_req = req; + wake_up(&fc->blocked_waitq); spin_unlock(&fc->lock); + fput(file); } /* - * This function is called when a request is finished. Either a reply - * has arrived or it was interrupted (and not yet sent) or some error - * occurred during communication with userspace, or the device file - * was closed. In case of a background request the reference to the - * stored objects are released. The requester thread is woken up (if - * still waiting), the 'end' callback is called if given, else the - * reference to the request is released + * Gets a requests for a file operation, always succeeds * - * Releasing extra reference for foreground requests must be done - * within the same locked region as setting state to finished. This - * is because fuse_reset_request() may be called after request is - * finished and it must be the sole possessor. If request is - * interrupted and put in the background, it will return with an error - * and hence never be reset and reused. + * This is used for sending the FLUSH request, which must get to + * userspace, due to POSIX locks which may need to be unlocked. * - * Called with fc->lock, unlocks it + * If allocation fails due to OOM, use the reserved request in + * fuse_file. + * + * This is very unlikely to deadlock accidentally, since the + * filesystem should not have it's own file open. If deadlock is + * intentional, it can still be broken by "aborting" the filesystem. */ -static void request_end(struct fuse_conn *fc, struct fuse_req *req) +struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file) { - list_del(&req->list); - req->state = FUSE_REQ_FINISHED; - if (!req->background) { - spin_unlock(&fc->lock); - wake_up(&req->waitq); - fuse_put_request(fc, req); - } else { - void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; - req->end = NULL; - spin_unlock(&fc->lock); - down_read(&fc->sbput_sem); - if (fc->mounted) - fuse_release_background(fc, req); - up_read(&fc->sbput_sem); + struct fuse_req *req; - /* fput must go outside sbput_sem, otherwise it can deadlock */ - if (req->file) - fput(req->file); + atomic_inc(&fc->num_waiting); + wait_event(fc->blocked_waitq, !fc->blocked); + req = fuse_request_alloc(); + if (!req) + req = get_reserved_req(fc, file); - if (end) - end(fc, req); + fuse_req_init_context(req); + req->waiting = 1; + return req; +} + +void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req) +{ + if (atomic_dec_and_test(&req->count)) { + if (req->waiting) + atomic_dec(&fc->num_waiting); + + if (req->stolen_file) + put_reserved_req(fc, req); else - fuse_put_request(fc, req); + fuse_request_free(req); } } /* - * Unfortunately request interruption not just solves the deadlock - * problem, it causes problems too. These stem from the fact, that an - * interrupted request is continued to be processed in userspace, - * while all the locks and object references (inode and file) held - * during the operation are released. - * - * To release the locks is exactly why there's a need to interrupt the - * request, so there's not a lot that can be done about this, except - * introduce additional locking in userspace. - * - * More important is to keep inode and file references until userspace - * has replied, otherwise FORGET and RELEASE could be sent while the - * inode/file is still used by the filesystem. - * - * For this reason the concept of "background" request is introduced. - * An interrupted request is backgrounded if it has been already sent - * to userspace. Backgrounding involves getting an extra reference to - * inode(s) or file used in the request, and adding the request to - * fc->background list. When a reply is received for a background - * request, the object references are released, and the request is - * removed from the list. If the filesystem is unmounted while there - * are still background requests, the list is walked and references - * are released as if a reply was received. + * This function is called when a request is finished. Either a reply + * has arrived or it was aborted (and not yet sent) or some error + * occurred during communication with userspace, or the device file + * was closed. The requester thread is woken up (if still waiting), + * the 'end' callback is called if given, else the reference to the + * request is released * - * There's one more use for a background request. The RELEASE message is - * always sent as background, since it doesn't return an error or - * data. + * Called with fc->lock, unlocks it */ -static void background_request(struct fuse_conn *fc, struct fuse_req *req) -{ - req->background = 1; - list_add(&req->bg_entry, &fc->background); - fc->num_background++; - if (fc->num_background == FUSE_MAX_BACKGROUND) - fc->blocked = 1; - if (req->inode) - req->inode = igrab(req->inode); - if (req->inode2) - req->inode2 = igrab(req->inode2); +static void request_end(struct fuse_conn *fc, struct fuse_req *req) +{ + void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; + req->end = NULL; + list_del(&req->list); + list_del(&req->intr_entry); + req->state = FUSE_REQ_FINISHED; + if (req->background) { + if (fc->num_background == FUSE_MAX_BACKGROUND) { + fc->blocked = 0; + wake_up_all(&fc->blocked_waitq); + } + fc->num_background--; + } + spin_unlock(&fc->lock); + dput(req->dentry); + mntput(req->vfsmount); if (req->file) - get_file(req->file); + fput(req->file); + wake_up(&req->waitq); + if (end) + end(fc, req); + else + fuse_put_request(fc, req); } -/* Called with fc->lock held. Releases, and then reacquires it. */ -static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) +static void wait_answer_interruptible(struct fuse_conn *fc, + struct fuse_req *req) { - sigset_t oldset; + if (signal_pending(current)) + return; spin_unlock(&fc->lock); - block_sigs(&oldset); wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); - restore_sigs(&oldset); spin_lock(&fc->lock); - if (req->state == FUSE_REQ_FINISHED && !req->interrupted) - return; +} + +static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req) +{ + list_add_tail(&req->intr_entry, &fc->interrupts); + wake_up(&fc->waitq); + kill_fasync(&fc->fasync, SIGIO, POLL_IN); +} + +/* Called with fc->lock held. Releases, and then reacquires it. */ +static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) +{ + if (!fc->no_interrupt) { + /* Any signal may interrupt this */ + wait_answer_interruptible(fc, req); + + if (req->aborted) + goto aborted; + if (req->state == FUSE_REQ_FINISHED) + return; - if (!req->interrupted) { - req->out.h.error = -EINTR; req->interrupted = 1; + if (req->state == FUSE_REQ_SENT) + queue_interrupt(fc, req); + } + + if (req->force) { + spin_unlock(&fc->lock); + wait_event(req->waitq, req->state == FUSE_REQ_FINISHED); + spin_lock(&fc->lock); + } else { + sigset_t oldset; + + /* Only fatal signals may interrupt this */ + block_sigs(&oldset); + wait_answer_interruptible(fc, req); + restore_sigs(&oldset); } + + if (req->aborted) + goto aborted; + if (req->state == FUSE_REQ_FINISHED) + return; + + req->out.h.error = -EINTR; + req->aborted = 1; + + aborted: if (req->locked) { /* This is uninterruptible sleep, because data is being copied to/from the buffers of req. During @@ -308,8 +347,11 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) if (req->state == FUSE_REQ_PENDING) { list_del(&req->list); __fuse_put_request(req); - } else if (req->state == FUSE_REQ_SENT) - background_request(fc, req); + } else if (req->state == FUSE_REQ_SENT) { + spin_unlock(&fc->lock); + wait_event(req->waitq, req->state == FUSE_REQ_FINISHED); + spin_lock(&fc->lock); + } } static unsigned len_args(unsigned numargs, struct fuse_arg *args) @@ -323,13 +365,19 @@ static unsigned len_args(unsigned numargs, struct fuse_arg *args) return nbytes; } +static u64 fuse_get_unique(struct fuse_conn *fc) + { + fc->reqctr++; + /* zero is special */ + if (fc->reqctr == 0) + fc->reqctr = 1; + + return fc->reqctr; +} + static void queue_request(struct fuse_conn *fc, struct fuse_req *req) { - fc->reqctr++; - /* zero is special */ - if (fc->reqctr == 0) - fc->reqctr = 1; - req->in.h.unique = fc->reqctr; + req->in.h.unique = fuse_get_unique(fc); req->in.h.len = sizeof(struct fuse_in_header) + len_args(req->in.numargs, (struct fuse_arg *) req->in.args); list_add_tail(&req->list, &fc->pending); @@ -342,9 +390,6 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) kill_fasync(&fc->fasync, SIGIO, POLL_IN); } -/* - * This can only be interrupted by a SIGKILL - */ void request_send(struct fuse_conn *fc, struct fuse_req *req) { req->isreply = 1; @@ -367,8 +412,12 @@ void request_send(struct fuse_conn *fc, struct fuse_req *req) static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req) { spin_lock(&fc->lock); - background_request(fc, req); if (fc->connected) { + req->background = 1; + fc->num_background++; + if (fc->num_background == FUSE_MAX_BACKGROUND) + fc->blocked = 1; + queue_request(fc, req); spin_unlock(&fc->lock); } else { @@ -392,14 +441,14 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req) /* * Lock the request. Up to the next unlock_request() there mustn't be * anything that could cause a page-fault. If the request was already - * interrupted bail out. + * aborted bail out. */ static int lock_request(struct fuse_conn *fc, struct fuse_req *req) { int err = 0; if (req) { spin_lock(&fc->lock); - if (req->interrupted) + if (req->aborted) err = -ENOENT; else req->locked = 1; @@ -409,7 +458,7 @@ static int lock_request(struct fuse_conn *fc, struct fuse_req *req) } /* - * Unlock request. If it was interrupted during being locked, the + * Unlock request. If it was aborted during being locked, the * requester thread is currently waiting for it to be unlocked, so * wake it up. */ @@ -418,7 +467,7 @@ static void unlock_request(struct fuse_conn *fc, struct fuse_req *req) if (req) { spin_lock(&fc->lock); req->locked = 0; - if (req->interrupted) + if (req->aborted) wake_up(&req->waitq); spin_unlock(&fc->lock); } @@ -597,13 +646,18 @@ static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs, return err; } +static int request_pending(struct fuse_conn *fc) +{ + return !list_empty(&fc->pending) || !list_empty(&fc->interrupts); +} + /* Wait until a request is available on the pending list */ static void request_wait(struct fuse_conn *fc) { DECLARE_WAITQUEUE(wait, current); add_wait_queue_exclusive(&fc->waitq, &wait); - while (fc->connected && list_empty(&fc->pending)) { + while (fc->connected && !request_pending(fc)) { set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) break; @@ -627,12 +681,51 @@ static size_t iov_length(const struct iovec *iov, unsigned long nr_segs) return ret; } #endif +/* + * Transfer an interrupt request to userspace + * + * Unlike other requests this is assembled on demand, without a need + * to allocate a separate fuse_req structure. + * + * Called with fc->lock held, releases it + */ +static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req, + const struct iovec *iov, unsigned long nr_segs) +{ + struct fuse_copy_state cs; + struct fuse_in_header ih; + struct fuse_interrupt_in arg; + unsigned reqsize = sizeof(ih) + sizeof(arg); + int err; + + list_del_init(&req->intr_entry); + req->intr_unique = fuse_get_unique(fc); + memset(&ih, 0, sizeof(ih)); + memset(&arg, 0, sizeof(arg)); + ih.len = reqsize; + ih.opcode = FUSE_INTERRUPT; + ih.unique = req->intr_unique; + arg.unique = req->in.h.unique; + + spin_unlock(&fc->lock); + if (iov_length(iov, nr_segs) < reqsize) + return -EINVAL; + + fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs); + err = fuse_copy_one(&cs, &ih, sizeof(ih)); + if (!err) + err = fuse_copy_one(&cs, &arg, sizeof(arg)); + fuse_copy_finish(&cs); + + return err ? err : reqsize; +} + /* * Read a single request into the userspace filesystem's buffer. This * function waits until a request is available, then removes it from * the pending list and copies request data to userspace buffer. If - * no reply is needed (FORGET) or request has been interrupted or - * there was an error during the copying then it's finished by calling + * no reply is needed (FORGET) or request has been aborted or there + * was an error during the copying then it's finished by calling * request_end(). Otherwise add it to the processing list, and set * the 'sent' flag. */ @@ -652,7 +745,7 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, spin_lock(&fc->lock); err = -EAGAIN; if ((file->f_flags & O_NONBLOCK) && fc->connected && - list_empty(&fc->pending)) + !request_pending(fc)) goto err_unlock; request_wait(fc); @@ -660,9 +753,15 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, if (!fc->connected) goto err_unlock; err = -ERESTARTSYS; - if (list_empty(&fc->pending)) + if (!request_pending(fc)) goto err_unlock; + if (!list_empty(&fc->interrupts)) { + req = list_entry(fc->interrupts.next, struct fuse_req, + intr_entry); + return fuse_read_interrupt(fc, req, iov, nr_segs); + } + req = list_entry(fc->pending.next, struct fuse_req, list); req->state = FUSE_REQ_READING; list_move(&req->list, &fc->io); @@ -687,10 +786,10 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, fuse_copy_finish(&cs); spin_lock(&fc->lock); req->locked = 0; - if (!err && req->interrupted) + if (!err && req->aborted) err = -ENOENT; if (err) { - if (!req->interrupted) + if (!req->aborted) req->out.h.error = -EIO; request_end(fc, req); return err; @@ -700,6 +799,8 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, else { req->state = FUSE_REQ_SENT; list_move_tail(&req->list, &fc->processing); + if (req->interrupted) + queue_interrupt(fc, req); spin_unlock(&fc->lock); } return reqsize; @@ -726,7 +827,7 @@ static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique) list_for_each(entry, &fc->processing) { struct fuse_req *req; req = list_entry(entry, struct fuse_req, list); - if (req->in.h.unique == unique) + if (req->in.h.unique == unique || req->intr_unique == unique) return req; } return NULL; @@ -792,17 +893,33 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, goto err_unlock; req = request_find(fc, oh.unique); - err = -EINVAL; if (!req) goto err_unlock; - if (req->interrupted) { + if (req->aborted) { spin_unlock(&fc->lock); fuse_copy_finish(&cs); spin_lock(&fc->lock); request_end(fc, req); return -ENOENT; } + /* Is it an interrupt reply? */ + if (req->intr_unique == oh.unique) { + err = -EINVAL; + if (nbytes != sizeof(struct fuse_out_header)) + goto err_unlock; + + if (oh.error == -ENOSYS) + fc->no_interrupt = 1; + else if (oh.error == -EAGAIN) + queue_interrupt(fc, req); + + spin_unlock(&fc->lock); + fuse_copy_finish(&cs); + return nbytes; + } + + req->state = FUSE_REQ_WRITING; list_move(&req->list, &fc->io); req->out.h = oh; req->locked = 1; @@ -815,9 +932,9 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, spin_lock(&fc->lock); req->locked = 0; if (!err) { - if (req->interrupted) + if (req->aborted) err = -ENOENT; - } else if (!req->interrupted) + } else if (!req->aborted) req->out.h.error = -EIO; request_end(fc, req); @@ -851,7 +968,7 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) spin_lock(&fc->lock); if (!fc->connected) mask = POLLERR; - else if (!list_empty(&fc->pending)) + else if (request_pending(fc)) mask |= POLLIN | POLLRDNORM; spin_unlock(&fc->lock); @@ -877,7 +994,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head) /* * Abort requests under I/O * - * The requests are set to interrupted and finished, and the request + * The requests are set to aborted and finished, and the request * waiter is woken up. This will make request_wait_answer() wait * until the request is unlocked and then return. * @@ -892,7 +1009,7 @@ static void end_io_requests(struct fuse_conn *fc) list_entry(fc->io.next, struct fuse_req, list); void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; - req->interrupted = 1; + req->aborted = 1; req->out.h.error = -ECONNABORTED; req->state = FUSE_REQ_FINISHED; list_del_init(&req->list); @@ -925,19 +1042,20 @@ static void end_io_requests(struct fuse_conn *fc) * onto the pending list is prevented by req->connected being false. * * Progression of requests under I/O to the processing list is - * prevented by the req->interrupted flag being true for these - * requests. For this reason requests on the io list must be aborted - * first. + * prevented by the req->aborted flag being true for these requests. + * For this reason requests on the io list must be aborted first. */ void fuse_abort_conn(struct fuse_conn *fc) { spin_lock(&fc->lock); if (fc->connected) { fc->connected = 0; + fc->blocked = 0; end_io_requests(fc); end_requests(fc, &fc->pending); end_requests(fc, &fc->processing); wake_up_all(&fc->waitq); + wake_up_all(&fc->blocked_waitq); kill_fasync(&fc->fasync, SIGIO, POLL_IN); } spin_unlock(&fc->lock); @@ -953,7 +1071,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file) end_requests(fc, &fc->processing); spin_unlock(&fc->lock); fasync_helper(-1, file, 0, &fc->fasync); - kobject_put(&fc->kobj); + fuse_conn_put(fc); } return 0; diff --git a/kernel/dir.c b/kernel/dir.c index a2887b6..66dac79 100644 --- a/kernel/dir.c +++ b/kernel/dir.c @@ -20,6 +20,33 @@ #include #endif +#if BITS_PER_LONG >= 64 +static inline void fuse_dentry_settime(struct dentry *entry, u64 time) +{ + entry->d_time = time; +} + +static inline u64 fuse_dentry_time(struct dentry *entry) +{ + return entry->d_time; +} +#else +/* + * On 32 bit archs store the high 32 bits of time in d_fsdata + */ +static void fuse_dentry_settime(struct dentry *entry, u64 time) +{ + entry->d_time = time; + entry->d_fsdata = (void *) (unsigned long) (time >> 32); +} + +static u64 fuse_dentry_time(struct dentry *entry) +{ + return (u64) entry->d_time + + ((u64) (unsigned long) entry->d_fsdata << 32); +} +#endif + /* * FUSE caches dentries and attributes with separate timeout. The * time in jiffies until the dentry/attributes are valid is stored in @@ -29,10 +56,13 @@ /* * Calculate the time in jiffies until a dentry/attributes are valid */ -static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec) +static u64 time_to_jiffies(unsigned long sec, unsigned long nsec) { - struct timespec ts = {sec, nsec}; - return jiffies + timespec_to_jiffies(&ts); + if (sec || nsec) { + struct timespec ts = {sec, nsec}; + return get_jiffies_64() + timespec_to_jiffies(&ts); + } else + return 0; } /* @@ -41,7 +71,8 @@ static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec) */ static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) { - entry->d_time = time_to_jiffies(o->entry_valid, o->entry_valid_nsec); + fuse_dentry_settime(entry, + time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); if (entry->d_inode) get_fuse_inode(entry->d_inode)->i_time = time_to_jiffies(o->attr_valid, o->attr_valid_nsec); @@ -53,7 +84,7 @@ static void fuse_change_timeout(struct dentry *entry, struct fuse_entry_out *o) */ void fuse_invalidate_attr(struct inode *inode) { - get_fuse_inode(inode)->i_time = jiffies - 1; + get_fuse_inode(inode)->i_time = 0; } /* @@ -66,7 +97,7 @@ void fuse_invalidate_attr(struct inode *inode) */ static void fuse_invalidate_entry_cache(struct dentry *entry) { - entry->d_time = jiffies - 1; + fuse_dentry_settime(entry, 0); } /* @@ -85,7 +116,6 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, { req->in.h.opcode = FUSE_LOOKUP; req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->in.numargs = 1; req->in.args[0].size = entry->d_name.len + 1; req->in.args[0].value = entry->d_name.name; @@ -109,7 +139,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) if (inode && is_bad_inode(inode)) return 0; - else if (time_after(jiffies, entry->d_time)) { + else if (fuse_dentry_time(entry) < get_jiffies_64()) { int err; struct fuse_entry_out outarg; struct fuse_conn *fc; @@ -261,6 +291,20 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, } #ifdef HAVE_LOOKUP_INSTANTIATE_FILP +/* + * Synchronous release for the case when something goes wrong in CREATE_OPEN + */ +static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff, + u64 nodeid, int flags) +{ + struct fuse_req *req; + + req = fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE); + req->force = 1; + request_send(fc, req); + fuse_put_request(fc, req); +} + /* * Atomic create+open operation * @@ -274,6 +318,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, struct inode *inode; struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_req *req; + struct fuse_req *forget_req; struct fuse_open_in inarg; struct fuse_open_out outopen; struct fuse_entry_out outentry; @@ -284,9 +329,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (fc->no_create) return -ENOSYS; + forget_req = fuse_get_req(fc); + if (IS_ERR(forget_req)) + return PTR_ERR(forget_req); + req = fuse_get_req(fc); + err = PTR_ERR(req); if (IS_ERR(req)) - return PTR_ERR(req); + goto out_put_forget_req; err = -ENOMEM; ff = fuse_file_alloc(); @@ -299,7 +349,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, inarg.mode = mode; req->in.h.opcode = FUSE_CREATE; req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -322,25 +371,23 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid)) goto out_free_ff; + fuse_put_request(fc, req); inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, &outentry.attr); - err = -ENOMEM; if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); ff->fh = outopen.fh; - /* Special release, with inode = NULL, this will - trigger a 'forget' request when the release is - complete */ - fuse_send_release(fc, ff, outentry.nodeid, NULL, flags, 0); - goto out_put_request; + fuse_sync_release(fc, ff, outentry.nodeid, flags); + fuse_send_forget(fc, forget_req, outentry.nodeid, 1); + return -ENOMEM; } - fuse_put_request(fc, req); + fuse_put_request(fc, forget_req); d_instantiate(entry, inode); fuse_change_timeout(entry, &outentry); file = lookup_instantiate_filp(nd, entry, generic_file_open); if (IS_ERR(file)) { ff->fh = outopen.fh; - fuse_send_release(fc, ff, outentry.nodeid, inode, flags, 0); + fuse_sync_release(fc, ff, outentry.nodeid, flags); return PTR_ERR(file); } fuse_finish_open(inode, file, ff, &outopen); @@ -350,6 +397,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, fuse_file_free(ff); out_put_request: fuse_put_request(fc, req); + out_put_forget_req: + fuse_put_request(fc, forget_req); return err; } #endif @@ -366,7 +415,6 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, int err; req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->out.numargs = 1; req->out.args[0].size = sizeof(outarg); req->out.args[0].value = &outarg; @@ -488,7 +536,6 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) req->in.h.opcode = FUSE_UNLINK; req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->in.numargs = 1; req->in.args[0].size = entry->d_name.len + 1; req->in.args[0].value = entry->d_name.name; @@ -520,7 +567,6 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) req->in.h.opcode = FUSE_RMDIR; req->in.h.nodeid = get_node_id(dir); - req->inode = dir; req->in.numargs = 1; req->in.args[0].size = entry->d_name.len + 1; req->in.args[0].value = entry->d_name.name; @@ -550,8 +596,6 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, inarg.newdir = get_node_id(newdir); req->in.h.opcode = FUSE_RENAME; req->in.h.nodeid = get_node_id(olddir); - req->inode = olddir; - req->inode2 = newdir; req->in.numargs = 3; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -598,7 +642,6 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, memset(&inarg, 0, sizeof(inarg)); inarg.oldnodeid = get_node_id(inode); req->in.h.opcode = FUSE_LINK; - req->inode2 = inode; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -627,7 +670,6 @@ int fuse_do_getattr(struct inode *inode) req->in.h.opcode = FUSE_GETATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->out.numargs = 1; req->out.args[0].size = sizeof(arg); req->out.args[0].value = &arg; @@ -700,7 +742,7 @@ static int fuse_revalidate(struct dentry *entry) if (!fuse_allow_task(fc, current)) return -EACCES; if (get_node_id(inode) != FUSE_ROOT_ID && - time_before_eq(jiffies, fi->i_time)) + fi->i_time >= get_jiffies_64()) return 0; return fuse_do_getattr(inode); @@ -725,7 +767,6 @@ static int fuse_access(struct inode *inode, int mask) inarg.mask = mask; req->in.h.opcode = FUSE_ACCESS; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -882,7 +923,6 @@ static char *read_link(struct dentry *dentry) } req->in.h.opcode = FUSE_READLINK; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->out.argvar = 1; req->out.numargs = 1; req->out.args[0].size = PAGE_SIZE - 1; @@ -1048,7 +1088,6 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) iattr_to_fattr(attr, &inarg); req->in.h.opcode = FUSE_SETATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1145,7 +1184,6 @@ static int fuse_setxattr(struct dentry *entry, const char *name, inarg.flags = flags; req->in.h.opcode = FUSE_SETXATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 3; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1184,7 +1222,6 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name, inarg.size = size; req->in.h.opcode = FUSE_GETXATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1234,7 +1271,6 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) inarg.size = size; req->in.h.opcode = FUSE_LISTXATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -1278,7 +1314,6 @@ static int fuse_removexattr(struct dentry *entry, const char *name) req->in.h.opcode = FUSE_REMOVEXATTR; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = strlen(name) + 1; req->in.args[0].value = name; diff --git a/kernel/file.c b/kernel/file.c index dd9758c..42e56f0 100644 --- a/kernel/file.c +++ b/kernel/file.c @@ -40,7 +40,6 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir, inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -59,8 +58,8 @@ struct fuse_file *fuse_file_alloc(void) struct fuse_file *ff; ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL); if (ff) { - ff->release_req = fuse_request_alloc(); - if (!ff->release_req) { + ff->reserved_req = fuse_request_alloc(); + if (!ff->reserved_req) { kfree(ff); ff = NULL; } @@ -70,7 +69,7 @@ struct fuse_file *fuse_file_alloc(void) void fuse_file_free(struct fuse_file *ff) { - fuse_request_free(ff->release_req); + fuse_request_free(ff->reserved_req); kfree(ff); } @@ -127,37 +126,22 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) return err; } -/* Special case for failed iget in CREATE */ -static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) +struct fuse_req *fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, + int opcode) { - /* If called from end_io_requests(), req has more than one - reference and fuse_reset_request() cannot work */ - if (fc->connected) { - u64 nodeid = req->in.h.nodeid; - fuse_reset_request(req); - fuse_send_forget(fc, req, nodeid, 1); - } else - fuse_put_request(fc, req); -} - -void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff, - u64 nodeid, struct inode *inode, int flags, int isdir) -{ - struct fuse_req * req = ff->release_req; + struct fuse_req *req = ff->reserved_req; struct fuse_release_in *inarg = &req->misc.release_in; inarg->fh = ff->fh; inarg->flags = flags; - req->in.h.opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; + req->in.h.opcode = opcode; req->in.h.nodeid = nodeid; - req->inode = inode; req->in.numargs = 1; req->in.args[0].size = sizeof(struct fuse_release_in); req->in.args[0].value = inarg; - request_send_background(fc, req); - if (!inode) - req->end = fuse_release_end; kfree(ff); + + return req; } int fuse_release_common(struct inode *inode, struct file *file, int isdir) @@ -165,8 +149,15 @@ int fuse_release_common(struct inode *inode, struct file *file, int isdir) struct fuse_file *ff = file->private_data; if (ff) { struct fuse_conn *fc = get_fuse_conn(inode); - u64 nodeid = get_node_id(inode); - fuse_send_release(fc, ff, nodeid, inode, file->f_flags, isdir); + struct fuse_req *req; + + req = fuse_release_fill(ff, get_node_id(inode), file->f_flags, + isdir ? FUSE_RELEASEDIR : FUSE_RELEASE); + + /* Hold vfsmount and dentry until release is finished */ + req->vfsmount = mntget(file->f_vfsmnt); + req->dentry = dget(file->f_dentry); + request_send_background(fc, req); } /* Return value is ignored by VFS */ @@ -183,7 +174,33 @@ static int fuse_release(struct inode *inode, struct file *file) return fuse_release_common(inode, file, 0); } +/* + * Scramble the ID space with XTEA, so that the value of the files_struct + * pointer is not exposed to userspace. + */ +static u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) +{ + u32 *k = fc->scramble_key; + u64 v = (unsigned long) id; + u32 v0 = v; + u32 v1 = v >> 32; + u32 sum = 0; + int i; + + for (i = 0; i < 32; i++) { + v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]); + sum += 0x9E3779B9; + v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]); + } + + return (u64) v0 + ((u64) v1 << 32); +} + +#ifdef KERNEL_2_6_18_PLUS +static int fuse_flush(struct file *file, fl_owner_t id) +#else static int fuse_flush(struct file *file) +#endif { struct inode *inode = file->f_dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); @@ -198,19 +215,20 @@ static int fuse_flush(struct file *file) if (fc->no_flush) return 0; - req = fuse_get_req(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - + req = fuse_get_req_nofail(fc, file); memset(&inarg, 0, sizeof(inarg)); inarg.fh = ff->fh; +#ifdef KERNEL_2_6_18_PLUS + inarg.lock_owner = fuse_lock_owner_id(fc, id); +#else + inarg.lock_owner = fuse_lock_owner_id(fc, NULL); +#endif req->in.h.opcode = FUSE_FLUSH; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; - req->file = file; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; + req->force = 1; request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); @@ -246,8 +264,6 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync, inarg.fsync_flags = datasync ? 1 : 0; req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; - req->file = file; req->in.numargs = 1; req->in.args[0].size = sizeof(inarg); req->in.args[0].value = &inarg; @@ -280,8 +296,6 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, inarg->size = count; req->in.h.opcode = opcode; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; - req->file = file; req->in.numargs = 1; req->in.args[0].size = sizeof(struct fuse_read_in); req->in.args[0].value = inarg; @@ -357,6 +371,8 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file, req->out.page_zeroing = 1; fuse_read_fill(req, file, inode, pos, count, FUSE_READ); if (fc->async_read) { + get_file(file); + req->file = file; req->end = fuse_readpages_end; request_send_background(fc, req); } else { @@ -534,8 +550,6 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, inarg.size = count; req->in.h.opcode = FUSE_WRITE; req->in.h.nodeid = get_node_id(inode); - req->inode = inode; - req->file = file; req->in.argpages = 1; req->in.numargs = 2; req->in.args[0].size = sizeof(struct fuse_write_in); @@ -763,6 +777,136 @@ static int fuse_set_page_dirty(struct page *page) } #endif +static int convert_fuse_file_lock(const struct fuse_file_lock *ffl, + struct file_lock *fl) +{ + switch (ffl->type) { + case F_UNLCK: + break; + + case F_RDLCK: + case F_WRLCK: + if (ffl->start > OFFSET_MAX || ffl->end > OFFSET_MAX || + ffl->end < ffl->start) + return -EIO; + + fl->fl_start = ffl->start; + fl->fl_end = ffl->end; + fl->fl_pid = ffl->pid; + break; + + default: + return -EIO; + } + fl->fl_type = ffl->type; + return 0; +} + +static void fuse_lk_fill(struct fuse_req *req, struct file *file, + const struct file_lock *fl, int opcode, pid_t pid) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_file *ff = file->private_data; + struct fuse_lk_in *arg = &req->misc.lk_in; + + arg->fh = ff->fh; + arg->owner = fuse_lock_owner_id(fc, fl->fl_owner); + arg->lk.start = fl->fl_start; + arg->lk.end = fl->fl_end; + arg->lk.type = fl->fl_type; + arg->lk.pid = pid; + req->in.h.opcode = opcode; + req->in.h.nodeid = get_node_id(inode); + req->in.numargs = 1; + req->in.args[0].size = sizeof(*arg); + req->in.args[0].value = arg; +} + +static int fuse_getlk(struct file *file, struct file_lock *fl) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + struct fuse_lk_out outarg; + int err; + + req = fuse_get_req(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + fuse_lk_fill(req, file, fl, FUSE_GETLK, 0); + req->out.numargs = 1; + req->out.args[0].size = sizeof(outarg); + req->out.args[0].value = &outarg; + request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + if (!err) + err = convert_fuse_file_lock(&outarg.lk, fl); + + return err; +} + +static int fuse_setlk(struct file *file, struct file_lock *fl) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req; + int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK; + pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; + int err; + +#ifdef KERNEL_2_6_18_PLUS + /* Unlock on close is handled by the flush method */ + if (fl->fl_flags & FL_CLOSE) + return 0; +#endif + + req = fuse_get_req(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + fuse_lk_fill(req, file, fl, opcode, pid); + request_send(fc, req); + err = req->out.h.error; + /* locking is restartable */ + if (err == -EINTR) + err = -ERESTARTSYS; + fuse_put_request(fc, req); + return err; +} + +static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) +{ + struct inode *inode = file->f_dentry->d_inode; + struct fuse_conn *fc = get_fuse_conn(inode); + int err; + + if (cmd == F_GETLK) { + if (fc->no_lock) { +#ifdef KERNEL_2_6_17_PLUS + if (!posix_test_lock(file, fl, fl)) + fl->fl_type = F_UNLCK; +#else + struct file_lock *cfl = posix_test_lock(file, fl); + if (!cfl) + fl->fl_type = F_UNLCK; + else + *fl = *cfl; +#endif + err = 0; + } else + err = fuse_getlk(file, fl); + } else { + if (fc->no_lock) + err = posix_lock_file_wait(file, fl); + else + err = fuse_setlk(file, fl); + } + return err; +} + static struct file_operations fuse_file_operations = { .llseek = generic_file_llseek, #ifdef KERNEL_2_6 @@ -777,6 +921,7 @@ static struct file_operations fuse_file_operations = { .flush = fuse_flush, .release = fuse_release, .fsync = fuse_fsync, + .lock = fuse_file_lock, #ifdef KERNEL_2_6 .sendfile = generic_file_sendfile, #endif @@ -790,6 +935,7 @@ static struct file_operations fuse_direct_io_file_operations = { .flush = fuse_flush, .release = fuse_release, .fsync = fuse_fsync, + .lock = fuse_file_lock, /* no mmap and sendfile */ }; diff --git a/kernel/fuse_i.h b/kernel/fuse_i.h index 1693b05..5f718a2 100644 --- a/kernel/fuse_i.h +++ b/kernel/fuse_i.h @@ -43,6 +43,12 @@ # if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) # define KERNEL_2_6_16_PLUS # endif +# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17) +# define KERNEL_2_6_17_PLUS +# endif +# if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,18) +# define KERNEL_2_6_18_PLUS +# endif #endif #include "config.h" @@ -62,6 +68,7 @@ #endif /* KERNEL_2_6 */ #endif /* FUSE_MAINLINE */ #include +#include #include #include #include @@ -69,7 +76,14 @@ #include #include #endif +#ifdef KERNEL_2_6_17_PLUS +#include +#else #include +#define DEFINE_MUTEX(m) DECLARE_MUTEX(m) +#define mutex_lock(m) down(m) +#define mutex_unlock(m) up(m) +#endif #ifndef BUG_ON #define BUG_ON(x) @@ -90,22 +104,6 @@ static inline void set_page_dirty_lock(struct page *page) set_page_dirty(page); unlock_page(page); } - -struct kobject { - atomic_t count; - void (*release) (struct kobject *); -}; - -static inline void kobject_get(struct kobject *kobj) -{ - atomic_inc(&kobj->count); -} - -static inline void kobject_put(struct kobject *kobj) -{ - if (atomic_dec_and_test(&kobj->count)) - kobj->release(kobj); -} #endif /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 @@ -116,6 +114,9 @@ static inline void kobject_put(struct kobject *kobj) /** It could be as large as PATH_MAX, but would that have any uses? */ #define FUSE_NAME_MAX 1024 +/** Number of dentries for each connection in the control filesystem */ +#define FUSE_CTL_NUM_DENTRIES 3 + /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem module will check permissions based on the file mode. Otherwise no permission checking is done in the kernel */ @@ -125,6 +126,15 @@ static inline void kobject_put(struct kobject *kobj) doing the mount will be allowed to access the filesystem */ #define FUSE_ALLOW_OTHER (1 << 1) +/** List of active connections */ +extern struct list_head fuse_conn_list; + +/** Global mutex protecting fuse_conn_list and the control filesystem */ +#ifdef KERNEL_2_6_17_PLUS +extern struct mutex fuse_mutex; +#else +extern struct semaphore fuse_mutex; +#endif #ifndef KERNEL_2_6 /** Allow FUSE to combine reads into 64k chunks. This is useful if the filesystem is better at handling large chunks */ @@ -147,13 +157,13 @@ struct fuse_inode { struct fuse_req *forget_req; /** Time in jiffies until the file attributes are valid */ - unsigned long i_time; + u64 i_time; }; /** FUSE specific file data */ struct fuse_file { /** Request reserved for flush and release */ - struct fuse_req *release_req; + struct fuse_req *reserved_req; /** File handle used by userspace */ u64 fh; @@ -219,6 +229,7 @@ enum fuse_req_state { FUSE_REQ_PENDING, FUSE_REQ_READING, FUSE_REQ_SENT, + FUSE_REQ_WRITING, FUSE_REQ_FINISHED }; @@ -232,12 +243,15 @@ struct fuse_req { fuse_conn */ struct list_head list; - /** Entry on the background list */ - struct list_head bg_entry; + /** Entry on the interrupts list */ + struct list_head intr_entry; /** refcount */ atomic_t count; + /** Unique ID for the interrupt request */ + u64 intr_unique; + /* * The following bitfields are either set once before the * request is queued or setting/clearing them is protected by @@ -247,12 +261,18 @@ struct fuse_req { /** True if the request has reply */ unsigned isreply:1; - /** The request was interrupted */ - unsigned interrupted:1; + /** Force sending of the request even if interrupted */ + unsigned force:1; + + /** The request was aborted */ + unsigned aborted:1; /** Request is sent in the background */ unsigned background:1; + /** The request has been interrupted */ + unsigned interrupted:1; + /** Data is being copied to/from the request */ unsigned locked:1; @@ -278,6 +298,7 @@ struct fuse_req { struct fuse_init_in init_in; struct fuse_init_out init_out; struct fuse_read_in read_in; + struct fuse_lk_in lk_in; } misc; /** page vector */ @@ -289,17 +310,20 @@ struct fuse_req { /** offset of data on first page */ unsigned page_offset; - /** Inode used in the request */ - struct inode *inode; - - /** Second inode used in the request (or NULL) */ - struct inode *inode2; - /** File used in the request (or NULL) */ struct file *file; + /** vfsmount used in release */ + struct vfsmount *vfsmount; + + /** dentry used in release */ + struct dentry *dentry; + /** Request completion callback */ void (*end)(struct fuse_conn *, struct fuse_req *); + + /** Request is stolen from fuse_file->reserved_req */ + struct file *stolen_file; }; /** @@ -313,6 +337,9 @@ struct fuse_conn { /** Lock protecting accessess to members of this structure */ spinlock_t lock; + /** Refcount */ + atomic_t count; + /** The user id for this mount */ uid_t user_id; @@ -340,13 +367,12 @@ struct fuse_conn { /** The list of requests under I/O */ struct list_head io; - /** Requests put in the background (RELEASE or any other - interrupted request) */ - struct list_head background; - /** Number of requests currently in the background */ unsigned num_background; + /** Pending interrupts */ + struct list_head interrupts; + /** Flag indicating if connection is blocked. This will be the case before the INIT reply is received, and if there are too many outstading backgrounds requests */ @@ -355,15 +381,9 @@ struct fuse_conn { /** waitq for blocked connection */ wait_queue_head_t blocked_waitq; - /** RW semaphore for exclusion with fuse_put_super() */ - struct rw_semaphore sbput_sem; - /** The next unique request id */ u64 reqctr; - /** Mount is active */ - unsigned mounted; - /** Connection established, cleared on umount, connection abort and device release */ unsigned connected; @@ -402,12 +422,18 @@ struct fuse_conn { /** Is removexattr not implemented by fs? */ unsigned no_removexattr : 1; + /** Are file locking primitives not implemented by fs? */ + unsigned no_lock : 1; + /** Is access not implemented by fs? */ unsigned no_access : 1; /** Is create not implemented by fs? */ unsigned no_create : 1; + /** Is interrupt not implemented by fs? */ + unsigned no_interrupt : 1; + /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -419,11 +445,23 @@ struct fuse_conn { struct backing_dev_info bdi; #endif - /** kobject */ - struct kobject kobj; + /** Entry on the fuse_conn_list */ + struct list_head entry; + + /** Unique ID */ + u64 id; + + /** Dentries in the control filesystem */ + struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES]; + + /** number of dentries used in the above array */ + int ctl_ndents; /** O_ASYNC requests */ struct fasync_struct *fasync; + + /** Key for lock owner ID scrambling */ + u32 scramble_key[4]; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -436,11 +474,6 @@ static inline struct fuse_conn *get_fuse_conn(struct inode *inode) return get_fuse_conn_super(inode->i_sb); } -static inline struct fuse_conn *get_fuse_conn_kobj(struct kobject *obj) -{ - return container_of(obj, struct fuse_conn, kobj); -} - static inline struct fuse_inode *get_fuse_inode(struct inode *inode) { return container_of(inode, struct fuse_inode, inode); @@ -482,12 +515,9 @@ void fuse_file_free(struct fuse_file *ff); void fuse_finish_open(struct inode *inode, struct file *file, struct fuse_file *ff, struct fuse_open_out *outarg); -/** - * Send a RELEASE request - */ -void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff, - u64 nodeid, struct inode *inode, int flags, int isdir); - +/** */ +struct fuse_req *fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, + int opcode); /** * Send RELEASE or RELEASEDIR request */ @@ -534,6 +564,9 @@ int fuse_dev_init(void); */ void fuse_dev_cleanup(void); +int fuse_ctl_init(void); +void fuse_ctl_cleanup(void); + /** * Allocate a request */ @@ -545,14 +578,14 @@ struct fuse_req *fuse_request_alloc(void); void fuse_request_free(struct fuse_req *req); /** - * Reinitialize a request, the preallocated flag is left unmodified + * Get a request, may fail with -ENOMEM */ -void fuse_reset_request(struct fuse_req *req); +struct fuse_req *fuse_get_req(struct fuse_conn *fc); /** - * Reserve a preallocated request + * Gets a requests for a file operation, always succeeds */ -struct fuse_req *fuse_get_req(struct fuse_conn *fc); +struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file); /** * Decrement reference count of a request. If count goes to zero free @@ -575,12 +608,7 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req); */ void request_send_background(struct fuse_conn *fc, struct fuse_req *req); -/** - * Release inodes and file associated with background request - */ -void fuse_release_background(struct fuse_conn *fc, struct fuse_req *req); - -/** Abort all requests */ +/* Abort all requests */ void fuse_abort_conn(struct fuse_conn *fc); /** @@ -592,3 +620,23 @@ int fuse_do_getattr(struct inode *inode); * Invalidate inode attributes */ void fuse_invalidate_attr(struct inode *inode); + +/** + * Acquire reference to fuse_conn + */ +struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); + +/** + * Release reference to fuse_conn + */ +void fuse_conn_put(struct fuse_conn *fc); + +/** + * Add connection to control filesystem + */ +int fuse_ctl_add_conn(struct fuse_conn *fc); + +/** + * Remove connection from control filesystem + */ +void fuse_ctl_remove_conn(struct fuse_conn *fc); diff --git a/kernel/inode.c b/kernel/inode.c index a2f144c..0eb8ced 100644 --- a/kernel/inode.c +++ b/kernel/inode.c @@ -11,13 +11,13 @@ #include #include #include -#include #include #include #include #ifdef KERNEL_2_6 #include #include +#include #else #include "compat/parser.h" #endif @@ -29,15 +29,8 @@ MODULE_LICENSE("GPL"); #endif static kmem_cache_t *fuse_inode_cachep; -#ifdef KERNEL_2_6 -static struct subsystem connections_subsys; - -struct fuse_conn_attr { - struct attribute attr; - ssize_t (*show)(struct fuse_conn *, char *); - ssize_t (*store)(struct fuse_conn *, const char *, size_t); -}; -#endif +struct list_head fuse_conn_list; +DEFINE_MUTEX(fuse_mutex); #define FUSE_SUPER_MAGIC 0x65735546 @@ -73,7 +66,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) inode->u.generic_ip = NULL; #endif fi = get_fuse_inode(inode); - fi->i_time = jiffies - 1; + fi->i_time = 0; fi->nodeid = 0; fi->nlookup = 0; fi->forget_req = fuse_request_alloc(); @@ -121,6 +114,14 @@ static void fuse_clear_inode(struct inode *inode) } } +static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) +{ + if (*flags & MS_MANDLOCK) + return -EINVAL; + + return 0; +} + void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr) { if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size) @@ -278,24 +279,19 @@ static void fuse_put_super(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); - down_write(&fc->sbput_sem); - while (!list_empty(&fc->background)) - fuse_release_background(fc, - list_entry(fc->background.next, - struct fuse_req, bg_entry)); - spin_lock(&fc->lock); - fc->mounted = 0; fc->connected = 0; + fc->blocked = 0; spin_unlock(&fc->lock); - up_write(&fc->sbput_sem); /* Flush all readers on this fs */ kill_fasync(&fc->fasync, SIGIO, POLL_IN); wake_up_all(&fc->waitq); -#ifdef KERNEL_2_6 - kobject_del(&fc->kobj); -#endif - kobject_put(&fc->kobj); + wake_up_all(&fc->blocked_waitq); + mutex_lock(&fuse_mutex); + list_del(&fc->entry); + fuse_ctl_remove_conn(fc); + mutex_unlock(&fuse_mutex); + fuse_conn_put(fc); } static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) @@ -314,8 +310,16 @@ static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr /* fsid is left zero */ } +#ifdef KERNEL_2_6_18_PLUS +static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) +{ +#else static int fuse_statfs(struct super_block *sb, struct kstatfs *buf) +#endif { +#ifdef KERNEL_2_6_18_PLUS + struct super_block *sb = dentry->d_sb; +#endif struct fuse_conn *fc = get_fuse_conn_super(sb); struct fuse_req *req; struct fuse_statfs_out outarg; @@ -328,6 +332,9 @@ static int fuse_statfs(struct super_block *sb, struct kstatfs *buf) memset(&outarg, 0, sizeof(outarg)); req->in.numargs = 0; req->in.h.opcode = FUSE_STATFS; +#ifdef KERNEL_2_6_18_PLUS + req->in.h.nodeid = get_node_id(dentry->d_inode); +#endif req->out.numargs = 1; req->out.args[0].size = fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg); @@ -461,11 +468,6 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } -static void fuse_conn_release(struct kobject *kobj) -{ - kfree(get_fuse_conn_kobj(kobj)); -} - #ifndef HAVE_KZALLOC static void *kzalloc(size_t size, int flags) { @@ -482,20 +484,13 @@ static struct fuse_conn *new_conn(void) fc = kzalloc(sizeof(*fc), GFP_KERNEL); if (fc) { spin_lock_init(&fc->lock); + atomic_set(&fc->count, 1); init_waitqueue_head(&fc->waitq); init_waitqueue_head(&fc->blocked_waitq); INIT_LIST_HEAD(&fc->pending); INIT_LIST_HEAD(&fc->processing); INIT_LIST_HEAD(&fc->io); - INIT_LIST_HEAD(&fc->background); - init_rwsem(&fc->sbput_sem); -#ifdef KERNEL_2_6 - kobj_set_kset_s(fc, connections_subsys); - kobject_init(&fc->kobj); -#else - atomic_set(&fc->kobj.count, 1); - fc->kobj.release = fuse_conn_release; -#endif + INIT_LIST_HEAD(&fc->interrupts); atomic_set(&fc->num_waiting, 0); #ifdef KERNEL_2_6_6_PLUS fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; @@ -503,10 +498,23 @@ static struct fuse_conn *new_conn(void) #endif fc->reqctr = 0; fc->blocked = 1; + get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); } return fc; } +void fuse_conn_put(struct fuse_conn *fc) +{ + if (atomic_dec_and_test(&fc->count)) + kfree(fc); +} + +struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) +{ + atomic_inc(&fc->count); + return fc; +} + static struct inode *get_root_inode(struct super_block *sb, unsigned mode) { struct fuse_attr attr; @@ -586,6 +594,7 @@ static struct super_operations fuse_super_operations = { .destroy_inode = fuse_destroy_inode, .read_inode = fuse_read_inode, .clear_inode = fuse_clear_inode, + .remount_fs = fuse_remount_fs, .put_super = fuse_put_super, .umount_begin = fuse_umount_begin, .statfs = fuse_statfs, @@ -606,8 +615,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) ra_pages = arg->max_readahead / PAGE_CACHE_SIZE; if (arg->flags & FUSE_ASYNC_READ) fc->async_read = 1; - } else + if (!(arg->flags & FUSE_POSIX_LOCKS)) + fc->no_lock = 1; + } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; + fc->no_lock = 1; + } fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); #endif @@ -627,7 +640,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->minor = FUSE_KERNEL_MINOR_VERSION; #ifdef KERNEL_2_6 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; - arg->flags |= FUSE_ASYNC_READ; + arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS; #endif req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; @@ -644,14 +657,11 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) request_send_background(fc, req); } -#ifdef KERNEL_2_6 -static unsigned long long conn_id(void) +static u64 conn_id(void) { - /* BKL is held for ->get_sb() */ - static unsigned long long ctr = 1; + static u64 ctr = 1; return ctr++; } -#endif static int fuse_fill_super(struct super_block *sb, void *data, int silent) { @@ -663,6 +673,9 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) struct fuse_req *init_req; int err; + if (sb->s_flags & MS_MANDLOCK) + return -EINVAL; + if (!parse_fuse_opt((char *) data, &d)) return -EINVAL; @@ -711,27 +724,21 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (!init_req) goto err_put_root; -#ifdef KERNEL_2_6 - err = kobject_set_name(&fc->kobj, "%llu", conn_id()); - if (err) - goto err_free_req; - - err = kobject_add(&fc->kobj); - if (err) - goto err_free_req; -#endif - - /* Setting file->private_data can't race with other mount() - instances, since BKL is held for ->get_sb() */ + mutex_lock(&fuse_mutex); err = -EINVAL; if (file->private_data) - goto err_kobject_del; + goto err_unlock; + + fc->id = conn_id(); + err = fuse_ctl_add_conn(fc); + if (err) + goto err_unlock; + list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; - fc->mounted = 1; fc->connected = 1; - kobject_get(&fc->kobj); - file->private_data = fc; + file->private_data = fuse_conn_get(fc); + mutex_unlock(&fuse_mutex); /* * atomic_dec_and_test() in fput() provides the necessary * memory barrier for file->private_data to be visible on all @@ -743,17 +750,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) return 0; - err_kobject_del: -#ifdef KERNEL_2_6 - kobject_del(&fc->kobj); - err_free_req: + err_unlock: + mutex_unlock(&fuse_mutex); fuse_request_free(init_req); -#endif err_put_root: dput(root_dentry); err: fput(file); - kobject_put(&fc->kobj); + fuse_conn_put(fc); return err; } @@ -786,78 +790,11 @@ static DECLARE_FSTYPE(fuse_fs_type, "fuse", fuse_read_super_compat, 0); #endif #ifdef KERNEL_2_6 -static ssize_t fuse_conn_waiting_show(struct fuse_conn *fc, char *page) -{ - return sprintf(page, "%i\n", atomic_read(&fc->num_waiting)); -} - -static ssize_t fuse_conn_abort_store(struct fuse_conn *fc, const char *page, - size_t count) -{ - fuse_abort_conn(fc); - return count; -} - -#ifndef __ATTR -#define __ATTR(_name,_mode,_show,_store) { \ - .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ - .show = _show, \ - .store = _store, \ -} -#endif -static struct fuse_conn_attr fuse_conn_waiting = - __ATTR(waiting, 0400, fuse_conn_waiting_show, NULL); -static struct fuse_conn_attr fuse_conn_abort = - __ATTR(abort, 0600, NULL, fuse_conn_abort_store); - -static struct attribute *fuse_conn_attrs[] = { - &fuse_conn_waiting.attr, - &fuse_conn_abort.attr, - NULL, -}; - -static ssize_t fuse_conn_attr_show(struct kobject *kobj, - struct attribute *attr, - char *page) -{ - struct fuse_conn_attr *fca = - container_of(attr, struct fuse_conn_attr, attr); - - if (fca->show) - return fca->show(get_fuse_conn_kobj(kobj), page); - else - return -EACCES; -} - -static ssize_t fuse_conn_attr_store(struct kobject *kobj, - struct attribute *attr, - const char *page, size_t count) -{ - struct fuse_conn_attr *fca = - container_of(attr, struct fuse_conn_attr, attr); - - if (fca->store) - return fca->store(get_fuse_conn_kobj(kobj), page, count); - else - return -EACCES; -} - -static struct sysfs_ops fuse_conn_sysfs_ops = { - .show = &fuse_conn_attr_show, - .store = &fuse_conn_attr_store, -}; - -static struct kobj_type ktype_fuse_conn = { - .release = fuse_conn_release, - .sysfs_ops = &fuse_conn_sysfs_ops, - .default_attrs = fuse_conn_attrs, -}; - #ifndef HAVE_FS_SUBSYS static decl_subsys(fs, NULL, NULL); #endif static decl_subsys(fuse, NULL, NULL); -static decl_subsys(connections, &ktype_fuse_conn, NULL); +static decl_subsys(connections, NULL, NULL); #endif /* KERNEL_2_6 */ static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep, @@ -956,6 +893,7 @@ static int __init fuse_init(void) printk("fuse distribution version: %s\n", FUSE_VERSION); #endif + INIT_LIST_HEAD(&fuse_conn_list); res = fuse_fs_init(); if (res) goto err; @@ -968,8 +906,14 @@ static int __init fuse_init(void) if (res) goto err_dev_cleanup; + res = fuse_ctl_init(); + if (res) + goto err_sysfs_cleanup; + return 0; + err_sysfs_cleanup: + fuse_sysfs_cleanup(); err_dev_cleanup: fuse_dev_cleanup(); err_fs_cleanup: @@ -982,6 +926,7 @@ static void __exit fuse_exit(void) { printk(KERN_DEBUG "fuse exit\n"); + fuse_ctl_cleanup(); fuse_sysfs_cleanup(); fuse_fs_cleanup(); fuse_dev_cleanup(); -- 2.30.2