From: Miklos Szeredi Date: Mon, 16 Jan 2006 17:52:24 +0000 (+0000) Subject: fix X-Git-Tag: fuse_2_6_0_pre1~24 X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=15af1b81cfcc81b9f25d4155d191c071e35dabac;p=qemu-gpiodev%2Flibfuse.git fix --- diff --git a/ChangeLog b/ChangeLog index 8b76422..efd5a3e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,17 @@ +2006-01-16 Miklos Szeredi + + * Added (again) asynchronous readpages support + + * Each connection now shows up under /sys/fs/fuse/connections + + * Connection attributes exported to sysfs: 'waiting' number of + waiting requests; 'abort' abort the connection + + * Connection may be aborted through either the sysfs interface or + with 'umount -f mountpoint' + + * + 2006-01-14 Miklos Szeredi * Released 2.5.0 @@ -12,7 +26,7 @@ * Clearing ->connected or ->mounted connection flags could race with setting other bitfields not protected with a lock - + 2006-01-10 Miklos Szeredi * kernel: add necessary compile flags for 2.4.X/x86_64. diff --git a/doc/kernel.txt b/doc/kernel.txt index 6b5741e..33f7431 100644 --- a/doc/kernel.txt +++ b/doc/kernel.txt @@ -86,6 +86,62 @@ Mount options The default is infinite. Note that the size of read requests is limited anyway to 32 pages (which is 128kbyte on i386). +Sysfs +~~~~~ + +FUSE sets up the following hierarchy in sysfs: + + /sys/fs/fuse/connections/N/ + +where N is an increasing number allocated to each new connection. + +For each connection the following attributes are defined: + + 'waiting' + + The number of requests which are waiting to be transfered to + userspace or being processed by the filesystem daemon. If there is + no filesystem activity and 'waiting' is non-zero, then the + filesystem is hung or deadlocked. + + 'abort' + + Writing anything into this file will abort the filesystem + connection. This means that all waiting requests will be aborted an + error returned for all aborted and new requests. + +Only a privileged user may read or write these attributes. + +Aborting a filesystem connection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +It is possible to get into certain situations where the filesystem is +not responding. Reasons for this may be: + + a) Broken userspace filesystem implementation + + b) Network connection down + + c) Accidental deadlock + + d) Malicious deadlock + +(For more on c) and d) see later sections) + +In either of these cases it may be useful to abort the connection to +the filesystem. There are several ways to do this: + + - Kill the filesystem daemon. Works in case of a) and b) + + - Kill the filesystem daemon and all users of the filesystem. Works + in all cases except some malicious deadlocks + + - Use forced umount (umount -f). Works in all cases but only if + filesystem is still attached (it hasn't been lazy unmounted) + + - Abort filesystem through the sysfs interface. Most powerful + method, always works. + How do non-privileged mounts work? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -313,3 +369,10 @@ faulted with get_user_pages(). The 'req->locked' flag indicates when the copy is taking place, and interruption is delayed until this flag is unset. +Scenario 3 - Tricky deadlock with asynchronous read +--------------------------------------------------- + +The same situation as above, except thread-1 will wait on page lock +and hence it will be uninterruptible as well. The solution is to +abort the connection with forced umount (if mount is attached) or +through the abort attribute in sysfs. diff --git a/kernel/configure.ac b/kernel/configure.ac index 12d224a..0ef06f8 100644 --- a/kernel/configure.ac +++ b/kernel/configure.ac @@ -37,7 +37,9 @@ if test -z "$enable_kernel_module" -a -z "$kernelbuild" && echo "$runver" | grep fi if test "$checkmodule" = yes; then AC_MSG_CHECKING([if FUSE module is from official kernel]) - if fgrep -q "fuse distribution version: " /lib/modules/${runver}/kernel/fs/fuse/fuse.ko 2> /dev/null; then + if test ! -f /lib/modules/${runver}/kernel/fs/fuse/fuse.ko; then + AC_MSG_RESULT([no]) + elif fgrep -q "fuse distribution version: " /lib/modules/${runver}/kernel/fs/fuse/fuse.ko 2> /dev/null; then AC_MSG_RESULT([no]) else AC_MSG_RESULT([yes]) @@ -152,6 +154,21 @@ if test "$ENABLE_FUSE_MODULE" = y; then else AC_MSG_RESULT([no]) fi + AC_MSG_CHECKING([if kernel defines kzalloc function]) + if egrep -qw "kzalloc" $kernelsrc/include/linux/slab.h; then + AC_DEFINE(HAVE_KZALLOC, 1, [kzalloc() is defined]) + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + + AC_MSG_CHECKING([if kernel defines fs_subsys]) + if egrep -qw "fs_subsys" $kernelsrc/include/linux/fs.h; then + AC_DEFINE(HAVE_FS_SUBSYS, 1, [fs_subsys is defined]) + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi AC_MSG_CHECKING([whether lookup_instantiate_filp is defined]) if test -f $kernelsrc/include/linux/namei.h && egrep -q "lookup_instantiate_filp" $kernelsrc/include/linux/namei.h; then diff --git a/kernel/dev.c b/kernel/dev.c index 3880f13..2fc773b 100644 --- a/kernel/dev.c +++ b/kernel/dev.c @@ -23,18 +23,18 @@ MODULE_ALIAS_MISCDEV(FUSE_MINOR); static kmem_cache_t *fuse_req_cachep; -static inline struct fuse_conn *fuse_get_conn(struct file *file) +static struct fuse_conn *fuse_get_conn(struct file *file) { struct fuse_conn *fc; spin_lock(&fuse_lock); fc = file->private_data; - if (fc && !fc->mounted) + if (fc && !fc->connected) fc = NULL; spin_unlock(&fuse_lock); return fc; } -static inline void fuse_request_init(struct fuse_req *req) +static void fuse_request_init(struct fuse_req *req) { memset(req, 0, sizeof(*req)); INIT_LIST_HEAD(&req->list); @@ -56,7 +56,7 @@ void fuse_request_free(struct fuse_req *req) } #ifdef KERNEL_2_6 -static inline void block_sigs(sigset_t *oldset) +static void block_sigs(sigset_t *oldset) { sigset_t mask; @@ -64,13 +64,13 @@ static inline void block_sigs(sigset_t *oldset) sigprocmask(SIG_BLOCK, &mask, oldset); } -static inline void restore_sigs(sigset_t *oldset) +static void restore_sigs(sigset_t *oldset) { sigprocmask(SIG_SETMASK, oldset, NULL); } #else #ifdef HAVE_RECALC_SIGPENDING_TSK -static inline void block_sigs(sigset_t *oldset) +static void block_sigs(sigset_t *oldset) { spin_lock_irq(¤t->sighand->siglock); *oldset = current->blocked; @@ -79,7 +79,7 @@ static inline void block_sigs(sigset_t *oldset) spin_unlock_irq(¤t->sighand->siglock); } -static inline void restore_sigs(sigset_t *oldset) +static void restore_sigs(sigset_t *oldset) { spin_lock_irq(¤t->sighand->siglock); current->blocked = *oldset; @@ -87,7 +87,7 @@ static inline void restore_sigs(sigset_t *oldset) spin_unlock_irq(¤t->sighand->siglock); } #else -static inline void block_sigs(sigset_t *oldset) +static void block_sigs(sigset_t *oldset) { spin_lock_irq(¤t->sigmask_lock); *oldset = current->blocked; @@ -96,7 +96,7 @@ static inline void block_sigs(sigset_t *oldset) spin_unlock_irq(¤t->sigmask_lock); } -static inline void restore_sigs(sigset_t *oldset) +static void restore_sigs(sigset_t *oldset) { spin_lock_irq(¤t->sigmask_lock); current->blocked = *oldset; @@ -149,18 +149,24 @@ struct fuse_req *fuse_get_request(struct fuse_conn *fc) int intr; sigset_t oldset; + atomic_inc(&fc->num_waiting); block_sigs(&oldset); intr = down_interruptible(&fc->outstanding_sem); restore_sigs(&oldset); - return intr ? NULL : do_get_request(fc); + if (intr) { + atomic_dec(&fc->num_waiting); + return NULL; + } + return do_get_request(fc); } static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req) { spin_lock(&fuse_lock); - if (req->preallocated) + if (req->preallocated) { + atomic_dec(&fc->num_waiting); list_add(&req->list, &fc->unused_list); - else + } else fuse_request_free(req); /* If we are in debt decrease that first */ @@ -188,40 +194,23 @@ void fuse_release_background(struct fuse_req *req) spin_unlock(&fuse_lock); } -static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) -{ - int i; - struct fuse_init_out *arg = &req->misc.init_out; - - if (arg->major != FUSE_KERNEL_VERSION) - fc->conn_error = 1; - else { - fc->minor = arg->minor; - fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; - } - - /* After INIT reply is received other requests can go - out. So do (FUSE_MAX_OUTSTANDING - 1) number of - up()s on outstanding_sem. The last up() is done in - fuse_putback_request() */ - for (i = 1; i < FUSE_MAX_OUTSTANDING; i++) - up(&fc->outstanding_sem); -} - /* * This function is called when a request is finished. Either a reply * has arrived or it was interrupted (and not yet sent) or some error - * occurred during communication with userspace, or the device file was - * closed. It decreases the reference count for the request. In case - * of a background request the reference to the stored objects are - * released. The requester thread is woken up (if still waiting), and - * finally the request is either freed or put on the unused_list + * occurred during communication with userspace, or the device file + * was closed. In case of a background request the reference to the + * stored objects are released. The requester thread is woken up (if + * still waiting), the 'end' callback is called if given, else the + * reference to the request is released * * Called with fuse_lock, unlocks it */ static void request_end(struct fuse_conn *fc, struct fuse_req *req) { - req->finished = 1; + void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; + req->end = NULL; + list_del(&req->list); + req->state = FUSE_REQ_FINISHED; spin_unlock(&fuse_lock); if (req->background) { down_read(&fc->sbput_sem); @@ -230,16 +219,10 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) up_read(&fc->sbput_sem); } wake_up(&req->waitq); - if (req->in.h.opcode == FUSE_INIT) - process_init_reply(fc, req); - else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) { - /* Special case for failed iget in CREATE */ - u64 nodeid = req->in.h.nodeid; - fuse_reset_request(req); - fuse_send_forget(fc, req, nodeid, 1); - return; - } - fuse_put_request(fc, req); + if (end) + end(fc, req); + else + fuse_put_request(fc, req); } /* @@ -290,14 +273,16 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) spin_unlock(&fuse_lock); block_sigs(&oldset); - wait_event_interruptible(req->waitq, req->finished); + wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED); restore_sigs(&oldset); spin_lock(&fuse_lock); - if (req->finished) + if (req->state == FUSE_REQ_FINISHED && !req->interrupted) return; - req->out.h.error = -EINTR; - req->interrupted = 1; + if (!req->interrupted) { + req->out.h.error = -EINTR; + req->interrupted = 1; + } if (req->locked) { /* This is uninterruptible sleep, because data is being copied to/from the buffers of req. During @@ -308,10 +293,10 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) wait_event(req->waitq, !req->locked); spin_lock(&fuse_lock); } - if (!req->sent && !list_empty(&req->list)) { + if (req->state == FUSE_REQ_PENDING) { list_del(&req->list); __fuse_put_request(req); - } else if (!req->finished && req->sent) + } else if (req->state == FUSE_REQ_SENT) background_request(fc, req); } @@ -346,6 +331,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req) fc->outstanding_debt++; } list_add_tail(&req->list, &fc->pending); + req->state = FUSE_REQ_PENDING; wake_up(&fc->waitq); } @@ -398,34 +384,12 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req) request_send_nowait(fc, req); } -void fuse_send_init(struct fuse_conn *fc) -{ - /* This is called from fuse_read_super() so there's guaranteed - to be a request available */ - struct fuse_req *req = do_get_request(fc); - struct fuse_init_in *arg = &req->misc.init_in; - arg->major = FUSE_KERNEL_VERSION; - arg->minor = FUSE_KERNEL_MINOR_VERSION; - req->in.h.opcode = FUSE_INIT; - req->in.numargs = 1; - req->in.args[0].size = sizeof(*arg); - req->in.args[0].value = arg; - req->out.numargs = 1; - /* Variable length arguement used for backward compatibility - with interface version < 7.5. Rest of init_out is zeroed - by do_get_request(), so a short reply is not a problem */ - req->out.argvar = 1; - req->out.args[0].size = sizeof(struct fuse_init_out); - req->out.args[0].value = &req->misc.init_out; - request_send_background(fc, req); -} - /* * Lock the request. Up to the next unlock_request() there mustn't be * anything that could cause a page-fault. If the request was already * interrupted bail out. */ -static inline int lock_request(struct fuse_req *req) +static int lock_request(struct fuse_req *req) { int err = 0; if (req) { @@ -444,7 +408,7 @@ static inline int lock_request(struct fuse_req *req) * requester thread is currently waiting for it to be unlocked, so * wake it up. */ -static inline void unlock_request(struct fuse_req *req) +static void unlock_request(struct fuse_req *req) { if (req) { spin_lock(&fuse_lock); @@ -480,7 +444,7 @@ static void fuse_copy_init(struct fuse_copy_state *cs, int write, } /* Unmap and put previous page of userspace buffer */ -static inline void fuse_copy_finish(struct fuse_copy_state *cs) +static void fuse_copy_finish(struct fuse_copy_state *cs) { if (cs->mapaddr) { kunmap_atomic(cs->mapaddr, KM_USER0); @@ -529,8 +493,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) } /* Do as much copy to/from userspace buffer as we can */ -static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val, - unsigned *size) +static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size) { unsigned ncpy = min(*size, cs->len); if (val) { @@ -550,8 +513,8 @@ static inline int fuse_copy_do(struct fuse_copy_state *cs, void **val, * Copy a page in the request to/from the userspace buffer. Must be * done atomically */ -static inline int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, - unsigned offset, unsigned count, int zeroing) +static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page, + unsigned offset, unsigned count, int zeroing) { if (page && zeroing && count < PAGE_SIZE) { void *mapaddr = kmap_atomic(page, KM_USER1); @@ -633,7 +596,7 @@ static void request_wait(struct fuse_conn *fc) DECLARE_WAITQUEUE(wait, current); add_wait_queue_exclusive(&fc->waitq, &wait); - while (fc->mounted && list_empty(&fc->pending)) { + while (fc->connected && list_empty(&fc->pending)) { set_current_state(TASK_INTERRUPTIBLE); if (signal_pending(current)) break; @@ -647,7 +610,7 @@ static void request_wait(struct fuse_conn *fc) } #ifndef KERNEL_2_6 -static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) +static size_t iov_length(const struct iovec *iov, unsigned long nr_segs) { unsigned long seg; size_t ret = 0; @@ -684,14 +647,15 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, goto err_unlock; request_wait(fc); err = -ENODEV; - if (!fc->mounted) + if (!fc->connected) goto err_unlock; err = -ERESTARTSYS; if (list_empty(&fc->pending)) goto err_unlock; req = list_entry(fc->pending.next, struct fuse_req, list); - list_del_init(&req->list); + req->state = FUSE_REQ_READING; + list_move(&req->list, &fc->io); in = &req->in; reqsize = in->h.len; @@ -724,8 +688,8 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov, if (!req->isreply) request_end(fc, req); else { - req->sent = 1; - list_add_tail(&req->list, &fc->processing); + req->state = FUSE_REQ_SENT; + list_move_tail(&req->list, &fc->processing); spin_unlock(&fuse_lock); } return reqsize; @@ -813,12 +777,15 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, goto err_finish; spin_lock(&fuse_lock); + err = -ENOENT; + if (!fc->connected) + goto err_unlock; + req = request_find(fc, oh.unique); err = -EINVAL; if (!req) goto err_unlock; - list_del_init(&req->list); if (req->interrupted) { spin_unlock(&fuse_lock); fuse_copy_finish(&cs); @@ -826,6 +793,7 @@ static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov, request_end(fc, req); return -ENOENT; } + list_move(&req->list, &fc->io); req->out.h = oh; req->locked = 1; cs.req = req; @@ -879,19 +847,90 @@ static unsigned fuse_dev_poll(struct file *file, poll_table *wait) return mask; } -/* Abort all requests on the given list (pending or processing) */ +/* + * Abort all requests on the given list (pending or processing) + * + * This function releases and reacquires fuse_lock + */ static void end_requests(struct fuse_conn *fc, struct list_head *head) { while (!list_empty(head)) { struct fuse_req *req; req = list_entry(head->next, struct fuse_req, list); - list_del_init(&req->list); req->out.h.error = -ECONNABORTED; request_end(fc, req); spin_lock(&fuse_lock); } } +/* + * Abort requests under I/O + * + * The requests are set to interrupted and finished, and the request + * waiter is woken up. This will make request_wait_answer() wait + * until the request is unlocked and then return. + * + * If the request is asynchronous, then the end function needs to be + * called after waiting for the request to be unlocked (if it was + * locked). + */ +static void end_io_requests(struct fuse_conn *fc) +{ + while (!list_empty(&fc->io)) { + struct fuse_req *req = + list_entry(fc->io.next, struct fuse_req, list); + void (*end) (struct fuse_conn *, struct fuse_req *) = req->end; + + req->interrupted = 1; + req->out.h.error = -ECONNABORTED; + req->state = FUSE_REQ_FINISHED; + list_del_init(&req->list); + wake_up(&req->waitq); + if (end) { + req->end = NULL; + /* The end function will consume this reference */ + __fuse_get_request(req); + spin_unlock(&fuse_lock); + wait_event(req->waitq, !req->locked); + end(fc, req); + spin_lock(&fuse_lock); + } + } +} + +/* + * Abort all requests. + * + * Emergency exit in case of a malicious or accidental deadlock, or + * just a hung filesystem. + * + * The same effect is usually achievable through killing the + * filesystem daemon and all users of the filesystem. The exception + * is the combination of an asynchronous request and the tricky + * deadlock (see Documentation/filesystems/fuse.txt). + * + * During the aborting, progression of requests from the pending and + * processing lists onto the io list, and progression of new requests + * onto the pending list is prevented by req->connected being false. + * + * Progression of requests under I/O to the processing list is + * prevented by the req->interrupted flag being true for these + * requests. For this reason requests on the io list must be aborted + * first. + */ +void fuse_abort_conn(struct fuse_conn *fc) +{ + spin_lock(&fuse_lock); + if (fc->connected) { + fc->connected = 0; + end_io_requests(fc); + end_requests(fc, &fc->pending); + end_requests(fc, &fc->processing); + wake_up_all(&fc->waitq); + } + spin_unlock(&fuse_lock); +} + static int fuse_dev_release(struct inode *inode, struct file *file) { struct fuse_conn *fc; @@ -902,9 +941,11 @@ static int fuse_dev_release(struct inode *inode, struct file *file) fc->connected = 0; end_requests(fc, &fc->pending); end_requests(fc, &fc->processing); - fuse_release_conn(fc); } spin_unlock(&fuse_lock); + if (fc) + kobject_put(&fc->kobj); + return 0; } diff --git a/kernel/dir.c b/kernel/dir.c index 084946d..478f576 100644 --- a/kernel/dir.c +++ b/kernel/dir.c @@ -27,10 +27,9 @@ */ /* - * Calcualte the time in jiffies until a dentry/attributes are valid + * Calculate the time in jiffies until a dentry/attributes are valid */ -static inline unsigned long time_to_jiffies(unsigned long sec, - unsigned long nsec) +static unsigned long time_to_jiffies(unsigned long sec, unsigned long nsec) { struct timespec ts = {sec, nsec}; return jiffies + timespec_to_jiffies(&ts); @@ -171,7 +170,7 @@ static int dir_alias(struct inode *inode) return 0; } -static inline int invalid_nodeid(u64 nodeid) +static int invalid_nodeid(u64 nodeid) { return !nodeid || nodeid == FUSE_ROOT_ID; } @@ -190,7 +189,7 @@ static struct dentry_operations fuse_dentry_operations = { #endif }; -static inline int valid_mode(int m) +static int valid_mode(int m) { return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) || S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); @@ -825,13 +824,6 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file, return 0; } -static inline size_t fuse_send_readdir(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, - size_t count) -{ - return fuse_send_read_common(req, file, inode, pos, count, 1); -} - static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) { int err; @@ -855,7 +847,9 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) } req->num_pages = 1; req->pages[0] = page; - nbytes = fuse_send_readdir(req, file, inode, file->f_pos, PAGE_SIZE); + fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR); + request_send(fc, req); + nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); if (!err) diff --git a/kernel/file.c b/kernel/file.c index 14c4f7c..42ab037 100644 --- a/kernel/file.c +++ b/kernel/file.c @@ -127,6 +127,14 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir) return err; } +/* Special case for failed iget in CREATE */ +static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req) +{ + u64 nodeid = req->in.h.nodeid; + fuse_reset_request(req); + fuse_send_forget(fc, req, nodeid, 1); +} + void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff, u64 nodeid, struct inode *inode, int flags, int isdir) { @@ -142,6 +150,8 @@ void fuse_send_release(struct fuse_conn *fc, struct fuse_file *ff, req->in.args[0].size = sizeof(struct fuse_release_in); req->in.args[0].value = inarg; request_send_background(fc, req); + if (!inode) + req->end = fuse_release_end; kfree(ff); } @@ -254,38 +264,35 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync) return fuse_fsync_common(file, de, datasync, 0); } -size_t fuse_send_read_common(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, - int isdir) +void fuse_read_fill(struct fuse_req *req, struct file *file, + struct inode *inode, loff_t pos, size_t count, int opcode) { - struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_file *ff = file->private_data; - struct fuse_read_in inarg; + struct fuse_read_in *inarg = &req->misc.read_in; - memset(&inarg, 0, sizeof(struct fuse_read_in)); - inarg.fh = ff->fh; - inarg.offset = pos; - inarg.size = count; - req->in.h.opcode = isdir ? FUSE_READDIR : FUSE_READ; + inarg->fh = ff->fh; + inarg->offset = pos; + inarg->size = count; + req->in.h.opcode = opcode; req->in.h.nodeid = get_node_id(inode); req->inode = inode; req->file = file; req->in.numargs = 1; req->in.args[0].size = sizeof(struct fuse_read_in); - req->in.args[0].value = &inarg; + req->in.args[0].value = inarg; req->out.argpages = 1; req->out.argvar = 1; req->out.numargs = 1; req->out.args[0].size = count; - request_send(fc, req); - return req->out.args[0].size; } -static inline size_t fuse_send_read(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, - size_t count) +static size_t fuse_send_read(struct fuse_req *req, struct file *file, + struct inode *inode, loff_t pos, size_t count) { - return fuse_send_read_common(req, file, inode, pos, count, 0); + struct fuse_conn *fc = get_fuse_conn(inode); + fuse_read_fill(req, file, inode, pos, count, FUSE_READ); + request_send(fc, req); + return req->out.args[0].size; } static int fuse_readpage(struct file *file, struct page *page) @@ -319,21 +326,33 @@ static int fuse_readpage(struct file *file, struct page *page) } #ifdef KERNEL_2_6 -static int fuse_send_readpages(struct fuse_req *req, struct file *file, - struct inode *inode) +static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) { - loff_t pos = page_offset(req->pages[0]); - size_t count = req->num_pages << PAGE_CACHE_SHIFT; - unsigned i; - req->out.page_zeroing = 1; - fuse_send_read(req, file, inode, pos, count); + int i; + + fuse_invalidate_attr(req->pages[0]->mapping->host); /* atime changed */ + for (i = 0; i < req->num_pages; i++) { struct page *page = req->pages[i]; if (!req->out.h.error) SetPageUptodate(page); + else + SetPageError(page); unlock_page(page); } - return req->out.h.error; + fuse_put_request(fc, req); +} + +static void fuse_send_readpages(struct fuse_req *req, struct file *file, + struct inode *inode) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + loff_t pos = page_offset(req->pages[0]); + size_t count = req->num_pages << PAGE_CACHE_SHIFT; + req->out.page_zeroing = 1; + req->end = fuse_readpages_end; + fuse_read_fill(req, file, inode, pos, count, FUSE_READ); + request_send_background(fc, req); } struct fuse_readpages_data { @@ -353,12 +372,12 @@ static int fuse_readpages_fill(void *_data, struct page *page) (req->num_pages == FUSE_MAX_PAGES_PER_REQ || (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read || req->pages[req->num_pages - 1]->index + 1 != page->index)) { - int err = fuse_send_readpages(req, data->file, inode); - if (err) { + fuse_send_readpages(req, data->file, inode); + data->req = req = fuse_get_request(fc); + if (!req) { unlock_page(page); - return err; + return -EINTR; } - fuse_reset_request(req); } req->pages[req->num_pages] = page; req->num_pages ++; @@ -383,10 +402,8 @@ static int fuse_readpages(struct file *file, struct address_space *mapping, return -EINTR; err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); - if (!err && data.req->num_pages) - err = fuse_send_readpages(data.req, file, inode); - fuse_put_request(fc, data.req); - fuse_invalidate_attr(inode); /* atime changed */ + if (!err) + fuse_send_readpages(data.req, file, inode); return err; } #else /* KERNEL_2_6 */ @@ -725,12 +742,8 @@ static int fuse_set_page_dirty(struct page *page) static struct file_operations fuse_file_operations = { .llseek = generic_file_llseek, #ifdef KERNEL_2_6 - .read = do_sync_read, - .write = do_sync_write, - .readv = generic_file_readv, - .writev = generic_file_writev, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read = generic_file_read, + .write = generic_file_write, #else .read = fuse_file_read, .write = generic_file_write, diff --git a/kernel/fuse_i.h b/kernel/fuse_i.h index 84e17dc..b68b9e3 100644 --- a/kernel/fuse_i.h +++ b/kernel/fuse_i.h @@ -55,6 +55,7 @@ # endif # define new_decode_dev(x) (x) # define new_encode_dev(x) (x) +# define s_fs_info u.generic_sbp #endif /* KERNEL_2_6 */ #endif /* FUSE_MAINLINE */ #include @@ -86,6 +87,22 @@ static inline void set_page_dirty_lock(struct page *page) set_page_dirty(page); unlock_page(page); } + +struct kobject { + atomic_t count; + void (*release) (struct kobject *); +}; + +static inline void kobject_get(struct kobject *kobj) +{ + atomic_inc(&kobj->count); +} + +static inline void kobject_put(struct kobject *kobj) +{ + if (atomic_dec_and_test(&kobj->count)) + kobj->release(kobj); +} #endif /** Max number of pages that can be used in a single read request */ #define FUSE_MAX_PAGES_PER_REQ 32 @@ -171,6 +188,11 @@ struct fuse_out { /** Header returned from userspace */ struct fuse_out_header h; + /* + * The following bitfields are not changed during the request + * processing + */ + /** Last argument is variable length (can be shorter than arg->size) */ unsigned argvar:1; @@ -188,12 +210,23 @@ struct fuse_out { struct fuse_arg args[3]; }; +/** The request state */ +enum fuse_req_state { + FUSE_REQ_INIT = 0, + FUSE_REQ_PENDING, + FUSE_REQ_READING, + FUSE_REQ_SENT, + FUSE_REQ_FINISHED +}; + +struct fuse_conn; + /** * A request to the client */ struct fuse_req { - /** This can be on either unused_list, pending or processing - lists in fuse_conn */ + /** This can be on either unused_list, pending processing or + io lists in fuse_conn */ struct list_head list; /** Entry on the background list */ @@ -202,6 +235,12 @@ struct fuse_req { /** refcount */ atomic_t count; + /* + * The following bitfields are either set once before the + * request is queued or setting/clearing them is protected by + * fuse_lock + */ + /** True if the request has reply */ unsigned isreply:1; @@ -217,11 +256,8 @@ struct fuse_req { /** Data is being copied to/from the request */ unsigned locked:1; - /** Request has been sent to userspace */ - unsigned sent:1; - - /** The request is finished */ - unsigned finished:1; + /** State of the request */ + enum fuse_req_state state; /** The request input */ struct fuse_in in; @@ -238,6 +274,7 @@ struct fuse_req { struct fuse_release_in release_in; struct fuse_init_in init_in; struct fuse_init_out init_out; + struct fuse_read_in read_in; } misc; /** page vector */ @@ -257,6 +294,9 @@ struct fuse_req { /** File used in the request (or NULL) */ struct file *file; + + /** Request completion callback */ + void (*end)(struct fuse_conn *, struct fuse_req *); }; /** @@ -267,9 +307,6 @@ struct fuse_req { * unmounted. */ struct fuse_conn { - /** Reference count */ - int count; - /** The user id for this mount */ uid_t user_id; @@ -294,6 +331,9 @@ struct fuse_conn { /** The list of requests being processed */ struct list_head processing; + /** The list of requests under I/O */ + struct list_head io; + /** Requests put in the background (RELEASE or any other interrupted request) */ struct list_head background; @@ -317,12 +357,20 @@ struct fuse_conn { /** Mount is active */ unsigned mounted; - /** Connection established */ + /** Connection established, cleared on umount, connection + abort and device release */ unsigned connected; - /** Connection failed (version mismatch) */ + /** Connection failed (version mismatch). Cannot race with + setting other bitfields since it is only set once in INIT + reply, before any other request, and never cleared */ unsigned conn_error : 1; + /* + * The following bitfields are only for optimization purposes + * and hence races in setting them will not cause malfunction + */ + /** Is fsync not implemented by fs? */ unsigned no_fsync : 1; @@ -350,6 +398,9 @@ struct fuse_conn { /** Is create not implemented by fs? */ unsigned no_create : 1; + /** The number of requests waiting for completion */ + atomic_t num_waiting; + /** Negotiated minor version */ unsigned minor; @@ -357,20 +408,14 @@ struct fuse_conn { /** Backing dev info */ struct backing_dev_info bdi; #endif -}; -static inline struct fuse_conn **get_fuse_conn_super_p(struct super_block *sb) -{ -#ifdef KERNEL_2_6 - return (struct fuse_conn **) &sb->s_fs_info; -#else - return (struct fuse_conn **) &sb->u.generic_sbp; -#endif -} + /** kobject */ + struct kobject kobj; +}; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) { - return *get_fuse_conn_super_p(sb); + return sb->s_fs_info; } static inline struct fuse_conn *get_fuse_conn(struct inode *inode) @@ -378,6 +423,11 @@ static inline struct fuse_conn *get_fuse_conn(struct inode *inode) return get_fuse_conn_super(inode->i_sb); } +static inline struct fuse_conn *get_fuse_conn_kobj(struct kobject *obj) +{ + return container_of(obj, struct fuse_conn, kobj); +} + static inline struct fuse_inode *get_fuse_inode(struct inode *inode) { return container_of(inode, struct fuse_inode, inode); @@ -419,11 +469,10 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, unsigned long nodeid, u64 nlookup); /** - * Send READ or READDIR request + * Initialize READ or READDIR request */ -size_t fuse_send_read_common(struct fuse_req *req, struct file *file, - struct inode *inode, loff_t pos, size_t count, - int isdir); +void fuse_read_fill(struct fuse_req *req, struct file *file, + struct inode *inode, loff_t pos, size_t count, int opcode); /** * Send OPEN or OPENDIR request @@ -477,12 +526,6 @@ void fuse_init_symlink(struct inode *inode); */ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr); -/** - * Check if the connection can be released, and if yes, then free the - * connection structure - */ -void fuse_release_conn(struct fuse_conn *fc); - /** * Initialize the client device */ @@ -539,6 +582,9 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req); */ void fuse_release_background(struct fuse_req *req); +/* Abort all requests */ +void fuse_abort_conn(struct fuse_conn *fc); + /** * Get the attributes of a file */ @@ -548,8 +594,3 @@ int fuse_do_getattr(struct inode *inode); * Invalidate inode attributes */ void fuse_invalidate_attr(struct inode *inode); - -/** - * Send the INIT message - */ -void fuse_send_init(struct fuse_conn *fc); diff --git a/kernel/inode.c b/kernel/inode.c index af374c8..f10990f 100644 --- a/kernel/inode.c +++ b/kernel/inode.c @@ -30,6 +30,15 @@ MODULE_LICENSE("GPL"); spinlock_t fuse_lock; static kmem_cache_t *fuse_inode_cachep; +#ifdef KERNEL_2_6 +static struct subsystem connections_subsys; + +struct fuse_conn_attr { + struct attribute attr; + ssize_t (*show)(struct fuse_conn *, char *); + ssize_t (*store)(struct fuse_conn *, const char *, size_t); +}; +#endif #define FUSE_SUPER_MAGIC 0x65735546 @@ -261,6 +270,11 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid, } #endif +static void fuse_umount_begin(struct super_block *sb) +{ + fuse_abort_conn(get_fuse_conn_super(sb)); +} + static void fuse_put_super(struct super_block *sb) { struct fuse_conn *fc = get_fuse_conn_super(sb); @@ -272,14 +286,15 @@ static void fuse_put_super(struct super_block *sb) spin_lock(&fuse_lock); fc->mounted = 0; - fc->user_id = 0; - fc->group_id = 0; - fc->flags = 0; + fc->connected = 0; + spin_unlock(&fuse_lock); + up_write(&fc->sbput_sem); /* Flush all readers on this fs */ wake_up_all(&fc->waitq); - up_write(&fc->sbput_sem); - fuse_release_conn(fc); - spin_unlock(&fuse_lock); +#ifdef KERNEL_2_6 + kobject_del(&fc->kobj); +#endif + kobject_put(&fc->kobj); } static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) @@ -445,8 +460,10 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } -static void free_conn(struct fuse_conn *fc) +static void fuse_conn_release(struct kobject *kobj) { + struct fuse_conn *fc = get_fuse_conn_kobj(kobj); + while (!list_empty(&fc->unused_list)) { struct fuse_req *req; req = list_entry(fc->unused_list.next, struct fuse_req, list); @@ -456,33 +473,42 @@ static void free_conn(struct fuse_conn *fc) kfree(fc); } -/* Must be called with the fuse lock held */ -void fuse_release_conn(struct fuse_conn *fc) +#ifndef HAVE_KZALLOC +static void *kzalloc(size_t size, int flags) { - fc->count--; - if (!fc->count) - free_conn(fc); + void *ret = kmalloc(size, flags); + if (ret) + memset(ret, 0, size); + return ret; } - +#endif static struct fuse_conn *new_conn(void) { struct fuse_conn *fc; - fc = kmalloc(sizeof(*fc), GFP_KERNEL); - if (fc != NULL) { + fc = kzalloc(sizeof(*fc), GFP_KERNEL); + if (fc) { int i; - memset(fc, 0, sizeof(*fc)); init_waitqueue_head(&fc->waitq); INIT_LIST_HEAD(&fc->pending); INIT_LIST_HEAD(&fc->processing); + INIT_LIST_HEAD(&fc->io); INIT_LIST_HEAD(&fc->unused_list); INIT_LIST_HEAD(&fc->background); - sema_init(&fc->outstanding_sem, 0); + sema_init(&fc->outstanding_sem, 1); /* One for INIT */ init_rwsem(&fc->sbput_sem); +#ifdef KERNEL_2_6 + kobj_set_kset_s(fc, connections_subsys); + kobject_init(&fc->kobj); +#else + atomic_set(&fc->kobj.count, 1); + fc->kobj.release = fuse_conn_release; +#endif + atomic_set(&fc->num_waiting, 0); for (i = 0; i < FUSE_MAX_OUTSTANDING; i++) { struct fuse_req *req = fuse_request_alloc(); if (!req) { - free_conn(fc); + kobject_put(&fc->kobj); return NULL; } list_add(&req->list, &fc->unused_list); @@ -499,25 +525,32 @@ static struct fuse_conn *new_conn(void) static struct fuse_conn *get_conn(struct file *file, struct super_block *sb) { struct fuse_conn *fc; + int err; + err = -EINVAL; if (file->f_op != &fuse_dev_operations) - return ERR_PTR(-EINVAL); + goto out_err; + + err = -ENOMEM; fc = new_conn(); - if (fc == NULL) - return ERR_PTR(-ENOMEM); + if (!fc) + goto out_err; + spin_lock(&fuse_lock); - if (file->private_data) { - free_conn(fc); - fc = ERR_PTR(-EINVAL); - } else { - file->private_data = fc; - *get_fuse_conn_super_p(sb) = fc; - fc->mounted = 1; - fc->connected = 1; - fc->count = 2; - } + err = -EINVAL; + if (file->private_data) + goto out_unlock; + + kobject_get(&fc->kobj); + file->private_data = fc; spin_unlock(&fuse_lock); return fc; + + out_unlock: + spin_unlock(&fuse_lock); + kobject_put(&fc->kobj); + out_err: + return ERR_PTR(err); } static struct inode *get_root_inode(struct super_block *sb, unsigned mode) @@ -600,16 +633,76 @@ static struct super_operations fuse_super_operations = { .read_inode = fuse_read_inode, .clear_inode = fuse_clear_inode, .put_super = fuse_put_super, + .umount_begin = fuse_umount_begin, .statfs = fuse_statfs, .show_options = fuse_show_options, }; +static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) +{ + int i; + struct fuse_init_out *arg = &req->misc.init_out; + + if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION) + fc->conn_error = 1; + else { + fc->minor = arg->minor; + fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; + } + + /* After INIT reply is received other requests can go + out. So do (FUSE_MAX_OUTSTANDING - 1) number of + up()s on outstanding_sem. The last up() is done in + fuse_putback_request() */ + for (i = 1; i < FUSE_MAX_OUTSTANDING; i++) + up(&fc->outstanding_sem); + + fuse_put_request(fc, req); +} + +static void fuse_send_init(struct fuse_conn *fc) +{ + /* This is called from fuse_read_super() so there's guaranteed + to be exactly one request available */ + struct fuse_req *req = fuse_get_request(fc); + struct fuse_init_in *arg = &req->misc.init_in; + + arg->major = FUSE_KERNEL_VERSION; + arg->minor = FUSE_KERNEL_MINOR_VERSION; + req->in.h.opcode = FUSE_INIT; + req->in.numargs = 1; + req->in.args[0].size = sizeof(*arg); + req->in.args[0].value = arg; + req->out.numargs = 1; + /* Variable length arguement used for backward compatibility + with interface version < 7.5. Rest of init_out is zeroed + by do_get_request(), so a short reply is not a problem */ + req->out.argvar = 1; + req->out.args[0].size = sizeof(struct fuse_init_out); + req->out.args[0].value = &req->misc.init_out; + req->end = process_init_reply; + request_send_background(fc, req); +} + +#ifdef KERNEL_2_6 +static unsigned long long conn_id(void) +{ + static unsigned long long ctr = 1; + unsigned long long val; + spin_lock(&fuse_lock); + val = ctr++; + spin_unlock(&fuse_lock); + return val; +} +#endif + static int fuse_fill_super(struct super_block *sb, void *data, int silent) { struct fuse_conn *fc; struct inode *root; struct fuse_mount_data d; struct file *file; + struct dentry *root_dentry; int err; if (!parse_fuse_opt((char *) data, &d)) @@ -644,23 +737,46 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE; #endif + /* Used by get_root_inode() */ + sb->s_fs_info = fc; + err = -ENOMEM; root = get_root_inode(sb, d.rootmode); - if (root == NULL) + if (!root) goto err; - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { + root_dentry = d_alloc_root(root); + if (!root_dentry) { iput(root); goto err; } + +#ifdef KERNEL_2_6 + err = kobject_set_name(&fc->kobj, "%llu", conn_id()); + if (err) + goto err_put_root; + + err = kobject_add(&fc->kobj); + if (err) + goto err_put_root; +#endif + + sb->s_root = root_dentry; + spin_lock(&fuse_lock); + fc->mounted = 1; + fc->connected = 1; + spin_unlock(&fuse_lock); + fuse_send_init(fc); + return 0; +#ifdef KERNEL_2_6 + err_put_root: + dput(root_dentry); +#endif err: - spin_lock(&fuse_lock); - fuse_release_conn(fc); - spin_unlock(&fuse_lock); + kobject_put(&fc->kobj); return err; } @@ -692,6 +808,81 @@ static struct super_block *fuse_read_super_compat(struct super_block *sb, static DECLARE_FSTYPE(fuse_fs_type, "fuse", fuse_read_super_compat, 0); #endif +#ifdef KERNEL_2_6 +static ssize_t fuse_conn_waiting_show(struct fuse_conn *fc, char *page) +{ + return sprintf(page, "%i\n", atomic_read(&fc->num_waiting)); +} + +static ssize_t fuse_conn_abort_store(struct fuse_conn *fc, const char *page, + size_t count) +{ + fuse_abort_conn(fc); + return count; +} + +#ifndef __ATTR +#define __ATTR(_name,_mode,_show,_store) { \ + .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ + .show = _show, \ + .store = _store, \ +} +#endif +static struct fuse_conn_attr fuse_conn_waiting = + __ATTR(waiting, 0400, fuse_conn_waiting_show, NULL); +static struct fuse_conn_attr fuse_conn_abort = + __ATTR(abort, 0600, NULL, fuse_conn_abort_store); + +static struct attribute *fuse_conn_attrs[] = { + &fuse_conn_waiting.attr, + &fuse_conn_abort.attr, + NULL, +}; + +static ssize_t fuse_conn_attr_show(struct kobject *kobj, + struct attribute *attr, + char *page) +{ + struct fuse_conn_attr *fca = + container_of(attr, struct fuse_conn_attr, attr); + + if (fca->show) + return fca->show(get_fuse_conn_kobj(kobj), page); + else + return -EACCES; +} + +static ssize_t fuse_conn_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *page, size_t count) +{ + struct fuse_conn_attr *fca = + container_of(attr, struct fuse_conn_attr, attr); + + if (fca->store) + return fca->store(get_fuse_conn_kobj(kobj), page, count); + else + return -EACCES; +} + +static struct sysfs_ops fuse_conn_sysfs_ops = { + .show = &fuse_conn_attr_show, + .store = &fuse_conn_attr_store, +}; + +static struct kobj_type ktype_fuse_conn = { + .release = fuse_conn_release, + .sysfs_ops = &fuse_conn_sysfs_ops, + .default_attrs = fuse_conn_attrs, +}; + +#ifndef HAVE_FS_SUBSYS +static decl_subsys(fs, NULL, NULL); +#endif +static decl_subsys(fuse, NULL, NULL); +static decl_subsys(connections, &ktype_fuse_conn, NULL); +#endif /* KERNEL_2_6 */ + static void fuse_inode_init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) { @@ -729,6 +920,55 @@ static void fuse_fs_cleanup(void) kmem_cache_destroy(fuse_inode_cachep); } +#ifdef KERNEL_2_6 +static int fuse_sysfs_init(void) +{ + int err; + +#ifndef HAVE_FS_SUBSYS + err = subsystem_register(&fs_subsys); + if (err) + return err; +#endif + kset_set_kset_s(&fuse_subsys, fs_subsys); + err = subsystem_register(&fuse_subsys); + if (err) + goto out_err; + + kset_set_kset_s(&connections_subsys, fuse_subsys); + err = subsystem_register(&connections_subsys); + if (err) + goto out_fuse_unregister; + + return 0; + + out_fuse_unregister: + subsystem_unregister(&fuse_subsys); + out_err: +#ifndef HAVE_FS_SUBSYS + subsystem_unregister(&fs_subsys); +#endif + return err; +} + +static void fuse_sysfs_cleanup(void) +{ + subsystem_unregister(&connections_subsys); + subsystem_unregister(&fuse_subsys); +#ifndef HAVE_FS_SUBSYS + subsystem_unregister(&fs_subsys); +#endif +} +#else /* KERNEL_2_6 */ +static int fuse_sysfs_init(void) +{ + return 0; +} +static void fuse_sysfs_cleanup(void) +{ +} +#endif /* KERNEL_2_6 */ + static int __init fuse_init(void) { int res; @@ -748,8 +988,14 @@ static int __init fuse_init(void) if (res) goto err_fs_cleanup; + res = fuse_sysfs_init(); + if (res) + goto err_dev_cleanup; + return 0; + err_dev_cleanup: + fuse_dev_cleanup(); err_fs_cleanup: fuse_fs_cleanup(); err: @@ -760,6 +1006,7 @@ static void __exit fuse_exit(void) { printk(KERN_DEBUG "fuse exit\n"); + fuse_sysfs_cleanup(); fuse_fs_cleanup(); fuse_dev_cleanup(); }