From 6159d2c16abd4d0e23a595e0786f7f59bcc56f7a Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Sat, 22 Feb 2025 08:16:33 +0300 Subject: [PATCH] WIP: first guse Signed-off-by: Nikita Shubin --- Kbuild | 4 + Makefile | 10 + fuse_i.h | 1483 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ guse.c | 767 ++++++++++++++++++++++++++++ guse.h | 0 5 files changed, 2264 insertions(+) create mode 100644 Kbuild create mode 100644 Makefile create mode 100644 fuse_i.h create mode 100644 guse.c create mode 100644 guse.h diff --git a/Kbuild b/Kbuild new file mode 100644 index 0000000..0681fdf --- /dev/null +++ b/Kbuild @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-m := guse.o +CFLAGS_guse.o = -I$(src) \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f43c125 --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +KDIR ?= /lib/modules/`uname -r`/build + +default: + $(MAKE) -C $(KDIR) M=$$PWD + +modules: + $(MAKE) -C $(KDIR) M=$$PWD modules + +modules_install: + $(MAKE) -C $(KDIR) M=$$PWD INSTALL_MOD_PATH=$(INSTALL_MOD_PATH) modules_install \ No newline at end of file diff --git a/fuse_i.h b/fuse_i.h new file mode 100644 index 0000000..e6cc3d5 --- /dev/null +++ b/fuse_i.h @@ -0,0 +1,1483 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2008 Miklos Szeredi + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +#ifndef _FS_FUSE_I_H +#define _FS_FUSE_I_H + +#ifndef pr_fmt +# define pr_fmt(fmt) "fuse: " fmt +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** Default max number of pages that can be used in a single read request */ +#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 + +/** Maximum of max_pages received in init_out */ +#define FUSE_MAX_MAX_PAGES 256 + +/** Bias for fi->writectr, meaning new writepages must not be sent */ +#define FUSE_NOWRITE INT_MIN + +/** It could be as large as PATH_MAX, but would that have any uses? */ +#define FUSE_NAME_MAX 1024 + +/** Number of dentries for each connection in the control filesystem */ +#define FUSE_CTL_NUM_DENTRIES 5 + +/** List of active connections */ +extern struct list_head fuse_conn_list; + +/** Global mutex protecting fuse_conn_list and the control filesystem */ +extern struct mutex fuse_mutex; + +/** Module parameters */ +extern unsigned max_user_bgreq; +extern unsigned max_user_congthresh; + +/* One forget request */ +struct fuse_forget_link { + struct fuse_forget_one forget_one; + struct fuse_forget_link *next; +}; + +/* Submount lookup tracking */ +struct fuse_submount_lookup { + /** Refcount */ + refcount_t count; + + /** Unique ID, which identifies the inode between userspace + * and kernel */ + u64 nodeid; + + /** The request used for sending the FORGET message */ + struct fuse_forget_link *forget; +}; + +/** Container for data related to mapping to backing file */ +struct fuse_backing { + struct file *file; + struct cred *cred; + + /** refcount */ + refcount_t count; + struct rcu_head rcu; +}; + +/** FUSE inode */ +struct fuse_inode { + /** Inode data */ + struct inode inode; + + /** Unique ID, which identifies the inode between userspace + * and kernel */ + u64 nodeid; + + /** Number of lookups on this inode */ + u64 nlookup; + + /** The request used for sending the FORGET message */ + struct fuse_forget_link *forget; + + /** Time in jiffies until the file attributes are valid */ + u64 i_time; + + /* Which attributes are invalid */ + u32 inval_mask; + + /** The sticky bit in inode->i_mode may have been removed, so + preserve the original mode */ + umode_t orig_i_mode; + + /* Cache birthtime */ + struct timespec64 i_btime; + + /** 64 bit inode number */ + u64 orig_ino; + + /** Version of last attribute change */ + u64 attr_version; + + union { + /* read/write io cache (regular file only) */ + struct { + /* Files usable in writepage. Protected by fi->lock */ + struct list_head write_files; + + /* Writepages pending on truncate or fsync */ + struct list_head queued_writes; + + /* Number of sent writes, a negative bias + * (FUSE_NOWRITE) means more writes are blocked */ + int writectr; + + /** Number of files/maps using page cache */ + int iocachectr; + + /* Waitq for writepage completion */ + wait_queue_head_t page_waitq; + + /* waitq for direct-io completion */ + wait_queue_head_t direct_io_waitq; + + /* List of writepage requestst (pending or sent) */ + struct rb_root writepages; + }; + + /* readdir cache (directory only) */ + struct { + /* true if fully cached */ + bool cached; + + /* size of cache */ + loff_t size; + + /* position at end of cache (position of next entry) */ + loff_t pos; + + /* version of the cache */ + u64 version; + + /* modification time of directory when cache was + * started */ + struct timespec64 mtime; + + /* iversion of directory when cache was started */ + u64 iversion; + + /* protects above fields */ + spinlock_t lock; + } rdc; + }; + + /** Miscellaneous bits describing inode state */ + unsigned long state; + + /** Lock for serializing lookup and readdir for back compatibility*/ + struct mutex mutex; + + /** Lock to protect write related fields */ + spinlock_t lock; + +#ifdef CONFIG_FUSE_DAX + /* + * Dax specific inode data + */ + struct fuse_inode_dax *dax; +#endif + /** Submount specific lookup tracking */ + struct fuse_submount_lookup *submount_lookup; +#ifdef CONFIG_FUSE_PASSTHROUGH + /** Reference to backing file in passthrough mode */ + struct fuse_backing *fb; +#endif +}; + +/** FUSE inode state bits */ +enum { + /** Advise readdirplus */ + FUSE_I_ADVISE_RDPLUS, + /** Initialized with readdirplus */ + FUSE_I_INIT_RDPLUS, + /** An operation changing file size is in progress */ + FUSE_I_SIZE_UNSTABLE, + /* Bad inode */ + FUSE_I_BAD, + /* Has btime */ + FUSE_I_BTIME, + /* Wants or already has page cache IO */ + FUSE_I_CACHE_IO_MODE, +}; + +struct fuse_conn; +struct fuse_mount; +union fuse_file_args; + +/** FUSE specific file data */ +struct fuse_file { + /** Fuse connection for this file */ + struct fuse_mount *fm; + + /* Argument space reserved for open/release */ + union fuse_file_args *args; + + /** Kernel file handle guaranteed to be unique */ + u64 kh; + + /** File handle used by userspace */ + u64 fh; + + /** Node id of this file */ + u64 nodeid; + + /** Refcount */ + refcount_t count; + + /** FOPEN_* flags returned by open */ + u32 open_flags; + + /** Entry on inode's write_files list */ + struct list_head write_entry; + + /* Readdir related */ + struct { + /* Dir stream position */ + loff_t pos; + + /* Offset in cache */ + loff_t cache_off; + + /* Version of cache we are reading */ + u64 version; + + } readdir; + + /** RB node to be linked on fuse_conn->polled_files */ + struct rb_node polled_node; + + /** Wait queue head for poll */ + wait_queue_head_t poll_wait; + + /** Does file hold a fi->iocachectr refcount? */ + enum { IOM_NONE, IOM_CACHED, IOM_UNCACHED } iomode; + +#ifdef CONFIG_FUSE_PASSTHROUGH + /** Reference to backing file in passthrough mode */ + struct file *passthrough; + const struct cred *cred; +#endif + + /** Has flock been performed on this file? */ + bool flock:1; +}; + +/** One input argument of a request */ +struct fuse_in_arg { + unsigned size; + const void *value; +}; + +/** One output argument of a request */ +struct fuse_arg { + unsigned size; + void *value; +}; + +/** FUSE page descriptor */ +struct fuse_page_desc { + unsigned int length; + unsigned int offset; +}; + +struct fuse_args { + uint64_t nodeid; + uint32_t opcode; + uint8_t in_numargs; + uint8_t out_numargs; + uint8_t ext_idx; + bool force:1; + bool noreply:1; + bool nocreds:1; + bool in_pages:1; + bool out_pages:1; + bool user_pages:1; + bool out_argvar:1; + bool page_zeroing:1; + bool page_replace:1; + bool may_block:1; + bool is_ext:1; + bool is_pinned:1; + struct fuse_in_arg in_args[3]; + struct fuse_arg out_args[2]; + void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); +}; + +struct fuse_args_pages { + struct fuse_args args; + struct page **pages; + struct fuse_page_desc *descs; + unsigned int num_pages; +}; + +struct fuse_release_args { + struct fuse_args args; + struct fuse_release_in inarg; + struct inode *inode; +}; + +union fuse_file_args { + /* Used during open() */ + struct fuse_open_out open_outarg; + /* Used during release() */ + struct fuse_release_args release_args; +}; + +#define FUSE_ARGS(args) struct fuse_args args = {} + +/** The request IO state (for asynchronous processing) */ +struct fuse_io_priv { + struct kref refcnt; + int async; + spinlock_t lock; + unsigned reqs; + ssize_t bytes; + size_t size; + __u64 offset; + bool write; + bool should_dirty; + int err; + struct kiocb *iocb; + struct completion *done; + bool blocking; +}; + +#define FUSE_IO_PRIV_SYNC(i) \ +{ \ + .refcnt = KREF_INIT(1), \ + .async = 0, \ + .iocb = i, \ +} + +/** + * Request flags + * + * FR_ISREPLY: set if the request has reply + * FR_FORCE: force sending of the request even if interrupted + * FR_BACKGROUND: request is sent in the background + * FR_WAITING: request is counted as "waiting" + * FR_ABORTED: the request was aborted + * FR_INTERRUPTED: the request has been interrupted + * FR_LOCKED: data is being copied to/from the request + * FR_PENDING: request is not yet in userspace + * FR_SENT: request is in userspace, waiting for an answer + * FR_FINISHED: request is finished + * FR_PRIVATE: request is on private list + * FR_ASYNC: request is asynchronous + */ +enum fuse_req_flag { + FR_ISREPLY, + FR_FORCE, + FR_BACKGROUND, + FR_WAITING, + FR_ABORTED, + FR_INTERRUPTED, + FR_LOCKED, + FR_PENDING, + FR_SENT, + FR_FINISHED, + FR_PRIVATE, + FR_ASYNC, +}; + +/** + * A request to the client + * + * .waitq.lock protects the following fields: + * - FR_ABORTED + * - FR_LOCKED (may also be modified under fc->lock, tested under both) + */ +struct fuse_req { + /** This can be on either pending processing or io lists in + fuse_conn */ + struct list_head list; + + /** Entry on the interrupts list */ + struct list_head intr_entry; + + /* Input/output arguments */ + struct fuse_args *args; + + /** refcount */ + refcount_t count; + + /* Request flags, updated with test/set/clear_bit() */ + unsigned long flags; + + /* The request input header */ + struct { + struct fuse_in_header h; + } in; + + /* The request output header */ + struct { + struct fuse_out_header h; + } out; + + /** Used to wake up the task waiting for completion of request*/ + wait_queue_head_t waitq; + +#if IS_ENABLED(CONFIG_VIRTIO_FS) + /** virtio-fs's physically contiguous buffer for in and out args */ + void *argbuf; +#endif + + /** fuse_mount this request belongs to */ + struct fuse_mount *fm; +}; + +struct fuse_iqueue; + +/** + * Input queue callbacks + * + * Input queue signalling is device-specific. For example, the /dev/fuse file + * uses fiq->waitq and fasync to wake processes that are waiting on queue + * readiness. These callbacks allow other device types to respond to input + * queue activity. + */ +struct fuse_iqueue_ops { + /** + * Send one forget + */ + void (*send_forget)(struct fuse_iqueue *fiq, struct fuse_forget_link *link); + + /** + * Send interrupt for request + */ + void (*send_interrupt)(struct fuse_iqueue *fiq, struct fuse_req *req); + + /** + * Send one request + */ + void (*send_req)(struct fuse_iqueue *fiq, struct fuse_req *req); + + /** + * Clean up when fuse_iqueue is destroyed + */ + void (*release)(struct fuse_iqueue *fiq); +}; + +/** /dev/fuse input queue operations */ +extern const struct fuse_iqueue_ops fuse_dev_fiq_ops; + +struct fuse_iqueue { + /** Connection established */ + unsigned connected; + + /** Lock protecting accesses to members of this structure */ + spinlock_t lock; + + /** Readers of the connection are waiting on this */ + wait_queue_head_t waitq; + + /** The next unique request id */ + u64 reqctr; + + /** The list of pending requests */ + struct list_head pending; + + /** Pending interrupts */ + struct list_head interrupts; + + /** Queue of pending forgets */ + struct fuse_forget_link forget_list_head; + struct fuse_forget_link *forget_list_tail; + + /** Batching of FORGET requests (positive indicates FORGET batch) */ + int forget_batch; + + /** O_ASYNC requests */ + struct fasync_struct *fasync; + + /** Device-specific callbacks */ + const struct fuse_iqueue_ops *ops; + + /** Device-specific state */ + void *priv; +}; + +#define FUSE_PQ_HASH_BITS 8 +#define FUSE_PQ_HASH_SIZE (1 << FUSE_PQ_HASH_BITS) + +struct fuse_pqueue { + /** Connection established */ + unsigned connected; + + /** Lock protecting accessess to members of this structure */ + spinlock_t lock; + + /** Hash table of requests being processed */ + struct list_head *processing; + + /** The list of requests under I/O */ + struct list_head io; +}; + +/** + * Fuse device instance + */ +struct fuse_dev { + /** Fuse connection for this device */ + struct fuse_conn *fc; + + /** Processing queue */ + struct fuse_pqueue pq; + + /** list entry on fc->devices */ + struct list_head entry; +}; + +enum fuse_dax_mode { + FUSE_DAX_INODE_DEFAULT, /* default */ + FUSE_DAX_ALWAYS, /* "-o dax=always" */ + FUSE_DAX_NEVER, /* "-o dax=never" */ + FUSE_DAX_INODE_USER, /* "-o dax=inode" */ +}; + +static inline bool fuse_is_inode_dax_mode(enum fuse_dax_mode mode) +{ + return mode == FUSE_DAX_INODE_DEFAULT || mode == FUSE_DAX_INODE_USER; +} + +struct fuse_fs_context { + int fd; + struct file *file; + unsigned int rootmode; + kuid_t user_id; + kgid_t group_id; + bool is_bdev:1; + bool fd_present:1; + bool rootmode_present:1; + bool user_id_present:1; + bool group_id_present:1; + bool default_permissions:1; + bool allow_other:1; + bool destroy:1; + bool no_control:1; + bool no_force_umount:1; + bool legacy_opts_show:1; + enum fuse_dax_mode dax_mode; + unsigned int max_read; + unsigned int blksize; + const char *subtype; + + /* DAX device, may be NULL */ + struct dax_device *dax_dev; + + /* fuse_dev pointer to fill in, should contain NULL on entry */ + void **fudptr; +}; + +struct fuse_sync_bucket { + /* count is a possible scalability bottleneck */ + atomic_t count; + wait_queue_head_t waitq; + struct rcu_head rcu; +}; + +/** + * A Fuse connection. + * + * This structure is created, when the root filesystem is mounted, and + * is destroyed, when the client device is closed and the last + * fuse_mount is destroyed. + */ +struct fuse_conn { + /** Lock protecting accessess to members of this structure */ + spinlock_t lock; + + /** Refcount */ + refcount_t count; + + /** Number of fuse_dev's */ + atomic_t dev_count; + + struct rcu_head rcu; + + /** The user id for this mount */ + kuid_t user_id; + + /** The group id for this mount */ + kgid_t group_id; + + /** The pid namespace for this mount */ + struct pid_namespace *pid_ns; + + /** The user namespace for this mount */ + struct user_namespace *user_ns; + + /** Maximum read size */ + unsigned max_read; + + /** Maximum write size */ + unsigned max_write; + + /** Maximum number of pages that can be used in a single request */ + unsigned int max_pages; + + /** Constrain ->max_pages to this value during feature negotiation */ + unsigned int max_pages_limit; + + /** Input queue */ + struct fuse_iqueue iq; + + /** The next unique kernel file handle */ + atomic64_t khctr; + + /** rbtree of fuse_files waiting for poll events indexed by ph */ + struct rb_root polled_files; + + /** Maximum number of outstanding background requests */ + unsigned max_background; + + /** Number of background requests at which congestion starts */ + unsigned congestion_threshold; + + /** Number of requests currently in the background */ + unsigned num_background; + + /** Number of background requests currently queued for userspace */ + unsigned active_background; + + /** The list of background requests set aside for later queuing */ + struct list_head bg_queue; + + /** Protects: max_background, congestion_threshold, num_background, + * active_background, bg_queue, blocked */ + spinlock_t bg_lock; + + /** Flag indicating that INIT reply has been received. Allocating + * any fuse request will be suspended until the flag is set */ + int initialized; + + /** Flag indicating if connection is blocked. This will be + the case before the INIT reply is received, and if there + are too many outstading backgrounds requests */ + int blocked; + + /** waitq for blocked connection */ + wait_queue_head_t blocked_waitq; + + /** Connection established, cleared on umount, connection + abort and device release */ + unsigned connected; + + /** Connection aborted via sysfs */ + bool aborted; + + /** Connection failed (version mismatch). Cannot race with + setting other bitfields since it is only set once in INIT + reply, before any other request, and never cleared */ + unsigned conn_error:1; + + /** Connection successful. Only set in INIT */ + unsigned conn_init:1; + + /** Do readahead asynchronously? Only set in INIT */ + unsigned async_read:1; + + /** Return an unique read error after abort. Only set in INIT */ + unsigned abort_err:1; + + /** Do not send separate SETATTR request before open(O_TRUNC) */ + unsigned atomic_o_trunc:1; + + /** Filesystem supports NFS exporting. Only set in INIT */ + unsigned export_support:1; + + /** write-back cache policy (default is write-through) */ + unsigned writeback_cache:1; + + /** allow parallel lookups and readdir (default is serialized) */ + unsigned parallel_dirops:1; + + /** handle fs handles killing suid/sgid/cap on write/chown/trunc */ + unsigned handle_killpriv:1; + + /** cache READLINK responses in page cache */ + unsigned cache_symlinks:1; + + /* show legacy mount options */ + unsigned int legacy_opts_show:1; + + /* + * fs kills suid/sgid/cap on write/chown/trunc. suid is killed on + * write/trunc only if caller did not have CAP_FSETID. sgid is killed + * on write/truncate only if caller did not have CAP_FSETID as well as + * file has group execute permission. + */ + unsigned handle_killpriv_v2:1; + + /* + * The following bitfields are only for optimization purposes + * and hence races in setting them will not cause malfunction + */ + + /** Is open/release not implemented by fs? */ + unsigned no_open:1; + + /** Is opendir/releasedir not implemented by fs? */ + unsigned no_opendir:1; + + /** Is fsync not implemented by fs? */ + unsigned no_fsync:1; + + /** Is fsyncdir not implemented by fs? */ + unsigned no_fsyncdir:1; + + /** Is flush not implemented by fs? */ + unsigned no_flush:1; + + /** Is setxattr not implemented by fs? */ + unsigned no_setxattr:1; + + /** Does file server support extended setxattr */ + unsigned setxattr_ext:1; + + /** Is getxattr not implemented by fs? */ + unsigned no_getxattr:1; + + /** Is listxattr not implemented by fs? */ + unsigned no_listxattr:1; + + /** Is removexattr not implemented by fs? */ + unsigned no_removexattr:1; + + /** Are posix file locking primitives not implemented by fs? */ + unsigned no_lock:1; + + /** Is access not implemented by fs? */ + unsigned no_access:1; + + /** Is create not implemented by fs? */ + unsigned no_create:1; + + /** Is interrupt not implemented by fs? */ + unsigned no_interrupt:1; + + /** Is bmap not implemented by fs? */ + unsigned no_bmap:1; + + /** Is poll not implemented by fs? */ + unsigned no_poll:1; + + /** Do multi-page cached writes */ + unsigned big_writes:1; + + /** Don't apply umask to creation modes */ + unsigned dont_mask:1; + + /** Are BSD file locking primitives not implemented by fs? */ + unsigned no_flock:1; + + /** Is fallocate not implemented by fs? */ + unsigned no_fallocate:1; + + /** Is rename with flags implemented by fs? */ + unsigned no_rename2:1; + + /** Use enhanced/automatic page cache invalidation. */ + unsigned auto_inval_data:1; + + /** Filesystem is fully responsible for page cache invalidation. */ + unsigned explicit_inval_data:1; + + /** Does the filesystem support readdirplus? */ + unsigned do_readdirplus:1; + + /** Does the filesystem want adaptive readdirplus? */ + unsigned readdirplus_auto:1; + + /** Does the filesystem support asynchronous direct-IO submission? */ + unsigned async_dio:1; + + /** Is lseek not implemented by fs? */ + unsigned no_lseek:1; + + /** Does the filesystem support posix acls? */ + unsigned posix_acl:1; + + /** Check permissions based on the file mode or not? */ + unsigned default_permissions:1; + + /** Allow other than the mounter user to access the filesystem ? */ + unsigned allow_other:1; + + /** Does the filesystem support copy_file_range? */ + unsigned no_copy_file_range:1; + + /* Send DESTROY request */ + unsigned int destroy:1; + + /* Delete dentries that have gone stale */ + unsigned int delete_stale:1; + + /** Do not create entry in fusectl fs */ + unsigned int no_control:1; + + /** Do not allow MNT_FORCE umount */ + unsigned int no_force_umount:1; + + /* Auto-mount submounts announced by the server */ + unsigned int auto_submounts:1; + + /* Propagate syncfs() to server */ + unsigned int sync_fs:1; + + /* Initialize security xattrs when creating a new inode */ + unsigned int init_security:1; + + /* Add supplementary group info when creating a new inode */ + unsigned int create_supp_group:1; + + /* Does the filesystem support per inode DAX? */ + unsigned int inode_dax:1; + + /* Is tmpfile not implemented by fs? */ + unsigned int no_tmpfile:1; + + /* Relax restrictions to allow shared mmap in FOPEN_DIRECT_IO mode */ + unsigned int direct_io_allow_mmap:1; + + /* Is statx not implemented by fs? */ + unsigned int no_statx:1; + + /** Passthrough support for read/write IO */ + unsigned int passthrough:1; + + /** Maximum stack depth for passthrough backing files */ + int max_stack_depth; + + /** The number of requests waiting for completion */ + atomic_t num_waiting; + + /** Negotiated minor version */ + unsigned minor; + + /** Entry on the fuse_conn_list */ + struct list_head entry; + + /** Device ID from the root super block */ + dev_t dev; + + /** Dentries in the control filesystem */ + struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES]; + + /** number of dentries used in the above array */ + int ctl_ndents; + + /** Key for lock owner ID scrambling */ + u32 scramble_key[4]; + + /** Version counter for attribute changes */ + atomic64_t attr_version; + + /** Called on final put */ + void (*release)(struct fuse_conn *); + + /** + * Read/write semaphore to hold when accessing the sb of any + * fuse_mount belonging to this connection + */ + struct rw_semaphore killsb; + + /** List of device instances belonging to this connection */ + struct list_head devices; + +#ifdef CONFIG_FUSE_DAX + /* Dax mode */ + enum fuse_dax_mode dax_mode; + + /* Dax specific conn data, non-NULL if DAX is enabled */ + struct fuse_conn_dax *dax; +#endif + + /** List of filesystems using this connection */ + struct list_head mounts; + + /* New writepages go into this bucket */ + struct fuse_sync_bucket __rcu *curr_bucket; + +#ifdef CONFIG_FUSE_PASSTHROUGH + /** IDR for backing files ids */ + struct idr backing_files_map; +#endif +}; + +/* + * Represents a mounted filesystem, potentially a submount. + * + * This object allows sharing a fuse_conn between separate mounts to + * allow submounts with dedicated superblocks and thus separate device + * IDs. + */ +struct fuse_mount { + /* Underlying (potentially shared) connection to the FUSE server */ + struct fuse_conn *fc; + + /* + * Super block for this connection (fc->killsb must be held when + * accessing this). + */ + struct super_block *sb; + + /* Entry on fc->mounts */ + struct list_head fc_entry; + struct rcu_head rcu; +}; + +static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) +{ + return get_fuse_mount_super(sb)->fc; +} + +static inline struct fuse_mount *get_fuse_mount(struct inode *inode) +{ + return get_fuse_mount_super(inode->i_sb); +} + +static inline struct fuse_conn *get_fuse_conn(struct inode *inode) +{ + return get_fuse_mount_super(inode->i_sb)->fc; +} + +static inline struct fuse_inode *get_fuse_inode(struct inode *inode) +{ + return container_of(inode, struct fuse_inode, inode); +} + +static inline u64 get_node_id(struct inode *inode) +{ + return get_fuse_inode(inode)->nodeid; +} + +static inline int invalid_nodeid(u64 nodeid) +{ + return !nodeid || nodeid == FUSE_ROOT_ID; +} + +static inline u64 fuse_get_attr_version(struct fuse_conn *fc) +{ + return atomic64_read(&fc->attr_version); +} + +static inline bool fuse_stale_inode(const struct inode *inode, int generation, + struct fuse_attr *attr) +{ + return inode->i_generation != generation || + inode_wrong_type(inode, attr->mode); +} + +static inline void fuse_make_bad(struct inode *inode) +{ + set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); +} + +static inline bool fuse_is_bad(struct inode *inode) +{ + return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state)); +} + +static inline struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags, + struct fuse_page_desc **desc) +{ + struct page **pages; + + pages = kzalloc(npages * (sizeof(struct page *) + + sizeof(struct fuse_page_desc)), flags); + *desc = (void *) (pages + npages); + + return pages; +} + +static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs, + unsigned int index, + unsigned int nr_pages) +{ + int i; + + for (i = index; i < index + nr_pages; i++) + descs[i].length = PAGE_SIZE - descs[i].offset; +} + +static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket) +{ + /* Need RCU protection to prevent use after free after the decrement */ + rcu_read_lock(); + if (atomic_dec_and_test(&bucket->count)) + wake_up(&bucket->waitq); + rcu_read_unlock(); +} + +/** Device operations */ +extern const struct file_operations fuse_dev_operations; + +extern const struct dentry_operations fuse_dentry_operations; +extern const struct dentry_operations fuse_root_dentry_operations; + +/** + * Get a filled in inode + */ +struct inode *fuse_iget(struct super_block *sb, u64 nodeid, + int generation, struct fuse_attr *attr, + u64 attr_valid, u64 attr_version); + +int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, + struct fuse_entry_out *outarg, struct inode **inode); + +/** + * Send FORGET command + */ +void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, + u64 nodeid, u64 nlookup); + +struct fuse_forget_link *fuse_alloc_forget(void); + +/* + * Initialize READ or READDIR request + */ +struct fuse_io_args { + union { + struct { + struct fuse_read_in in; + u64 attr_ver; + } read; + struct { + struct fuse_write_in in; + struct fuse_write_out out; + bool page_locked; + } write; + }; + struct fuse_args_pages ap; + struct fuse_io_priv *io; + struct fuse_file *ff; +}; + +void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos, + size_t count, int opcode); + + +struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release); +void fuse_file_free(struct fuse_file *ff); +int fuse_finish_open(struct inode *inode, struct file *file); + +void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, + unsigned int flags); + +/** + * Send RELEASE or RELEASEDIR request + */ +void fuse_release_common(struct file *file, bool isdir); + +/** + * Send FSYNC or FSYNCDIR request + */ +int fuse_fsync_common(struct file *file, loff_t start, loff_t end, + int datasync, int opcode); + +/** + * Notify poll wakeup + */ +int fuse_notify_poll_wakeup(struct fuse_conn *fc, + struct fuse_notify_poll_wakeup_out *outarg); + +/** + * Initialize file operations on a regular file + */ +void fuse_init_file_inode(struct inode *inode, unsigned int flags); + +/** + * Initialize inode operations on regular files and special files + */ +void fuse_init_common(struct inode *inode); + +/** + * Initialize inode and file operations on a directory + */ +void fuse_init_dir(struct inode *inode); + +/** + * Initialize inode operations on a symlink + */ +void fuse_init_symlink(struct inode *inode); + +/** + * Change attributes of an inode + */ +void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, + struct fuse_statx *sx, + u64 attr_valid, u64 attr_version); + +void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, + struct fuse_statx *sx, + u64 attr_valid, u32 cache_mask); + +u32 fuse_get_cache_mask(struct inode *inode); + +/** + * Initialize the client device + */ +int fuse_dev_init(void); + +/** + * Cleanup the client device + */ +void fuse_dev_cleanup(void); + +int fuse_ctl_init(void); +void __exit fuse_ctl_cleanup(void); + +/** + * Simple request sending that does request allocation and freeing + */ +ssize_t __fuse_simple_request(struct mnt_idmap *idmap, + struct fuse_mount *fm, + struct fuse_args *args); + +static inline ssize_t fuse_simple_request(struct fuse_mount *fm, struct fuse_args *args) +{ + return __fuse_simple_request(&invalid_mnt_idmap, fm, args); +} + +static inline ssize_t fuse_simple_idmap_request(struct mnt_idmap *idmap, + struct fuse_mount *fm, + struct fuse_args *args) +{ + return __fuse_simple_request(idmap, fm, args); +} + +int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, + gfp_t gfp_flags); + +/** + * End a finished request + */ +void fuse_request_end(struct fuse_req *req); + +/* Abort all requests */ +void fuse_abort_conn(struct fuse_conn *fc); +void fuse_wait_aborted(struct fuse_conn *fc); + +/** + * Invalidate inode attributes + */ + +/* Attributes possibly changed on data modification */ +#define FUSE_STATX_MODIFY (STATX_MTIME | STATX_CTIME | STATX_BLOCKS) + +/* Attributes possibly changed on data and/or size modification */ +#define FUSE_STATX_MODSIZE (FUSE_STATX_MODIFY | STATX_SIZE) + +void fuse_invalidate_attr(struct inode *inode); +void fuse_invalidate_attr_mask(struct inode *inode, u32 mask); + +void fuse_invalidate_entry_cache(struct dentry *entry); + +void fuse_invalidate_atime(struct inode *inode); + +u64 fuse_time_to_jiffies(u64 sec, u32 nsec); +#define ATTR_TIMEOUT(o) \ + fuse_time_to_jiffies((o)->attr_valid, (o)->attr_valid_nsec) + +void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o); + +/** + * Acquire reference to fuse_conn + */ +struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); + +/** + * Initialize fuse_conn + */ +void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, + struct user_namespace *user_ns, + const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv); + +/** + * Release reference to fuse_conn + */ +void fuse_conn_put(struct fuse_conn *fc); + +struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc); +struct fuse_dev *fuse_dev_alloc(void); +void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc); +void fuse_dev_free(struct fuse_dev *fud); +void fuse_send_init(struct fuse_mount *fm); + +/** + * Fill in superblock and initialize fuse connection + * @sb: partially-initialized superblock to fill in + * @ctx: mount context + */ +int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx); + +/* + * Remove the mount from the connection + * + * Returns whether this was the last mount + */ +bool fuse_mount_remove(struct fuse_mount *fm); + +/* + * Setup context ops for submounts + */ +int fuse_init_fs_context_submount(struct fs_context *fsc); + +/* + * Shut down the connection (possibly sending DESTROY request). + */ +void fuse_conn_destroy(struct fuse_mount *fm); + +/* Drop the connection and free the fuse mount */ +void fuse_mount_destroy(struct fuse_mount *fm); + +/** + * Add connection to control filesystem + */ +int fuse_ctl_add_conn(struct fuse_conn *fc); + +/** + * Remove connection from control filesystem + */ +void fuse_ctl_remove_conn(struct fuse_conn *fc); + +/** + * Is file type valid? + */ +int fuse_valid_type(int m); + +bool fuse_invalid_attr(struct fuse_attr *attr); + +/** + * Is current process allowed to perform filesystem operation? + */ +bool fuse_allow_current_process(struct fuse_conn *fc); + +u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); + +void fuse_flush_time_update(struct inode *inode); +void fuse_update_ctime(struct inode *inode); + +int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask); + +void fuse_flush_writepages(struct inode *inode); + +void fuse_set_nowrite(struct inode *inode); +void fuse_release_nowrite(struct inode *inode); + +/** + * Scan all fuse_mounts belonging to fc to find the first where + * ilookup5() returns a result. Return that result and the + * respective fuse_mount in *fm (unless fm is NULL). + * + * The caller must hold fc->killsb. + */ +struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, + struct fuse_mount **fm); + +/** + * File-system tells the kernel to invalidate cache for the given node id. + */ +int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, + loff_t offset, loff_t len); + +/** + * File-system tells the kernel to invalidate parent attributes and + * the dentry matching parent/name. + * + * If the child_nodeid is non-zero and: + * - matches the inode number for the dentry matching parent/name, + * - is not a mount point + * - is a file or oan empty directory + * then the dentry is unhashed (d_delete()). + */ +int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, + u64 child_nodeid, struct qstr *name, u32 flags); + +int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, + bool isdir); + +/** + * fuse_direct_io() flags + */ + +/** If set, it is WRITE; otherwise - READ */ +#define FUSE_DIO_WRITE (1 << 0) + +/** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */ +#define FUSE_DIO_CUSE (1 << 1) + +ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, + loff_t *ppos, int flags); +long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, + unsigned int flags); +long fuse_ioctl_common(struct file *file, unsigned int cmd, + unsigned long arg, unsigned int flags); +__poll_t fuse_file_poll(struct file *file, poll_table *wait); +int fuse_dev_release(struct inode *inode, struct file *file); + +bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written); + +int fuse_flush_times(struct inode *inode, struct fuse_file *ff); +int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); + +int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr, struct file *file); + +void fuse_set_initialized(struct fuse_conn *fc); + +void fuse_unlock_inode(struct inode *inode, bool locked); +bool fuse_lock_inode(struct inode *inode); + +int fuse_setxattr(struct inode *inode, const char *name, const void *value, + size_t size, int flags, unsigned int extra_flags); +ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, + size_t size); +ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size); +int fuse_removexattr(struct inode *inode, const char *name); +extern const struct xattr_handler * const fuse_xattr_handlers[]; + +struct posix_acl; +struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu); +struct posix_acl *fuse_get_acl(struct mnt_idmap *idmap, + struct dentry *dentry, int type); +int fuse_set_acl(struct mnt_idmap *, struct dentry *dentry, + struct posix_acl *acl, int type); + +/* readdir.c */ +int fuse_readdir(struct file *file, struct dir_context *ctx); + +/** + * Return the number of bytes in an arguments list + */ +unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args); + +/** + * Get the next unique ID for a request + */ +u64 fuse_get_unique(struct fuse_iqueue *fiq); +void fuse_free_conn(struct fuse_conn *fc); + +/* dax.c */ + +#define FUSE_IS_DAX(inode) (IS_ENABLED(CONFIG_FUSE_DAX) && IS_DAX(inode)) + +ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to); +ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from); +int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma); +int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end); +int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode mode, + struct dax_device *dax_dev); +void fuse_dax_conn_free(struct fuse_conn *fc); +bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi); +void fuse_dax_inode_init(struct inode *inode, unsigned int flags); +void fuse_dax_inode_cleanup(struct inode *inode); +void fuse_dax_dontcache(struct inode *inode, unsigned int flags); +bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment); +void fuse_dax_cancel_work(struct fuse_conn *fc); + +/* ioctl.c */ +long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg); +int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa); +int fuse_fileattr_set(struct mnt_idmap *idmap, + struct dentry *dentry, struct fileattr *fa); + +/* iomode.c */ +int fuse_file_cached_io_open(struct inode *inode, struct fuse_file *ff); +int fuse_inode_uncached_io_start(struct fuse_inode *fi, + struct fuse_backing *fb); +void fuse_inode_uncached_io_end(struct fuse_inode *fi); + +int fuse_file_io_open(struct file *file, struct inode *inode); +void fuse_file_io_release(struct fuse_file *ff, struct inode *inode); + +/* file.c */ +struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, + unsigned int open_flags, bool isdir); +void fuse_file_release(struct inode *inode, struct fuse_file *ff, + unsigned int open_flags, fl_owner_t id, bool isdir); + +/* passthrough.c */ +static inline struct fuse_backing *fuse_inode_backing(struct fuse_inode *fi) +{ +#ifdef CONFIG_FUSE_PASSTHROUGH + return READ_ONCE(fi->fb); +#else + return NULL; +#endif +} + +static inline struct fuse_backing *fuse_inode_backing_set(struct fuse_inode *fi, + struct fuse_backing *fb) +{ +#ifdef CONFIG_FUSE_PASSTHROUGH + return xchg(&fi->fb, fb); +#else + return NULL; +#endif +} + +#ifdef CONFIG_FUSE_PASSTHROUGH +struct fuse_backing *fuse_backing_get(struct fuse_backing *fb); +void fuse_backing_put(struct fuse_backing *fb); +#else + +static inline struct fuse_backing *fuse_backing_get(struct fuse_backing *fb) +{ + return NULL; +} + +static inline void fuse_backing_put(struct fuse_backing *fb) +{ +} +#endif + +void fuse_backing_files_init(struct fuse_conn *fc); +void fuse_backing_files_free(struct fuse_conn *fc); +int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map); +int fuse_backing_close(struct fuse_conn *fc, int backing_id); + +struct fuse_backing *fuse_passthrough_open(struct file *file, + struct inode *inode, + int backing_id); +void fuse_passthrough_release(struct fuse_file *ff, struct fuse_backing *fb); + +static inline struct file *fuse_file_passthrough(struct fuse_file *ff) +{ +#ifdef CONFIG_FUSE_PASSTHROUGH + return ff->passthrough; +#else + return NULL; +#endif +} + +ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *iter); +ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *iter); +ssize_t fuse_passthrough_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t len, unsigned int flags); +ssize_t fuse_passthrough_splice_write(struct pipe_inode_info *pipe, + struct file *out, loff_t *ppos, + size_t len, unsigned int flags); +ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma); + +#endif /* _FS_FUSE_I_H */ diff --git a/guse.c b/guse.c new file mode 100644 index 0000000..510c52f --- /dev/null +++ b/guse.c @@ -0,0 +1,767 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * GUSE: gpio device in Userspace + * + * Copyright (C) 2024 Nikita Shubin + * + */ + +#define pr_fmt(fmt) "GUSE: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +extern struct mnt_idmap invalid_mnt_idmap; +#include "fuse_i.h" + +/* GUSE specific operations */ +#define GUSE_INIT 4097 + +#define GUSE_CONNTBL_LEN 64 + +struct guse_conn { + struct list_head list; /* linked on guse_conntbl */ + struct fuse_mount fm; /* Dummy mount referencing fc */ + struct fuse_conn fc; /* fuse connection */ + struct cdev *cdev; /* associated character device */ + struct device *dev; /* device representing @cdev */ + + /* + * Split inode into two parts: + * - device_inode is required for GPIO_V2_GET_LINEINFO_WATCH_IOCTL + * - linereq_inode required for individual GPIO_V2_GET_LINE_IOCTL + */ + u32 device_inode; + u32 linereq_inode; + + /* init parameters, set once during initialization */ + bool unrestricted_ioctl; +}; + +static DEFINE_MUTEX(guse_lock); /* protects registration */ +static struct list_head guse_conntbl[GUSE_CONNTBL_LEN]; +static struct class *guse_class; + +struct guse_init_in { + uint32_t major; + uint32_t minor; + uint32_t unused; + uint32_t flags; +}; + +struct guse_init_out { + uint32_t major; + uint32_t minor; + uint32_t unused; + uint32_t flags; + uint32_t max_read; + uint32_t max_write; + uint32_t dev_major; /* chardev major */ + uint32_t dev_minor; /* chardev minor */ + uint32_t spare[10]; +}; + +static struct guse_conn *fc_to_gc(struct fuse_conn *fc) +{ + return container_of(fc, struct guse_conn, fc); +} + +static struct list_head *guse_conntbl_head(dev_t devt) +{ + return &guse_conntbl[(MAJOR(devt) + MINOR(devt)) % GUSE_CONNTBL_LEN]; +} + + +/************************************************************************** + * GUSE frontend operations + * + * These are file operations for the character device. + * + * On open, GUSE opens a file from the FUSE mnt and stores it to + * private_data of the open file. All other ops call FUSE ops on the + * FUSE file. + */ + +static ssize_t guse_read_iter(struct kiocb *kiocb, struct iov_iter *to) +{ + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb); + loff_t pos = 0; + + return fuse_direct_io(&io, to, &pos, FUSE_DIO_CUSE); +} + +static ssize_t guse_write_iter(struct kiocb *kiocb, struct iov_iter *from) +{ + struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb); + loff_t pos = 0; + /* + * No locking or generic_write_checks(), the server is + * responsible for locking and sanity checks. + */ + return fuse_direct_io(&io, from, &pos, + FUSE_DIO_WRITE | FUSE_DIO_CUSE); +} + +static int guse_open(struct inode *inode, struct file *file) +{ + dev_t devt = inode->i_cdev->dev; + struct guse_conn *cc = NULL, *pos; + u64 u_inode = 0; + int rc; + + /* look up and get the connection */ + scoped_guard(mutex, &guse_lock) + list_for_each_entry(pos, guse_conntbl_head(devt), list) + if (pos->dev->devt == devt) { + fuse_conn_get(&pos->fc); + cc = pos; + break; + } + + /* dead? */ + if (!cc) + return -ENODEV; + + /* + * Generic permission check is already done against the chrdev + * file, proceed to open. + */ + u_inode = (u64)(++cc->device_inode) << 32; + rc = fuse_do_open(&cc->fm, u_inode, file, 0); + if (rc) + fuse_conn_put(&cc->fc); + + return rc; +} + +static int guse_release(struct inode *inode, struct file *file) +{ + struct fuse_file *ff = file->private_data; + struct fuse_mount *fm = ff->fm; + + fuse_sync_release(NULL, ff, file->f_flags); + fuse_conn_put(fm->fc); + + return 0; +} + +static int guse_line_release(struct inode *inode, struct file *file) +{ + struct fuse_file *ff = file->private_data; + + fuse_sync_release(NULL, ff, file->f_flags); + + return 0; +} + +static long guse_line_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + unsigned int flags = 0; + + switch (cmd) { + case GPIO_V2_LINE_GET_VALUES_IOCTL: + case GPIO_V2_LINE_SET_VALUES_IOCTL: + pr_err("GPIO_V2_LINE_GET_VALUES_IOCTL/GPIO_V2_LINE_SET_VALUES_IOCTL\n"); + return fuse_do_ioctl(file, cmd, arg, flags); + default: + return -EINVAL; + } +} + +static const struct file_operations guse_line_fops = { + .owner = THIS_MODULE, + .read_iter = guse_read_iter, + .release = guse_line_release, + .unlocked_ioctl = guse_line_ioctl, + .poll = fuse_file_poll, + .llseek = noop_llseek, +}; + +static long guse_create_line_request(struct guse_conn *cc, unsigned long arg) +{ + struct gpio_v2_line_request __user *uarg = (void __user *)arg; + struct gpio_v2_line_request request; + struct file *file; + unsigned int i; + int rc = 0, fd; + + if (copy_from_user(&request, uarg, sizeof(request))) + return -EFAULT; + + for (i = 0; i < request.num_lines; i++) + pr_err("%s: %u\n", __func__, request.offsets[i]); + + fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); + if (fd < 0) + return fd; + + file = anon_inode_getfile("[guse_line]", &guse_line_fops, + NULL, O_RDONLY | O_CLOEXEC); + rc = fuse_do_open(&cc->fm, ++cc->linereq_inode, file, 0); + if (rc) + goto out_fail; + + rc = fuse_do_ioctl(file, GPIO_V2_GET_LINE_IOCTL, arg, 0); + if (rc) + goto out_fail; + + request.fd = fd; + if (copy_to_user(uarg, &request, sizeof(request))) { + rc = -EFAULT; + goto out_fail; + } + + fd_install(fd, file); + + return 0; + +out_fail: + /* + * fput() will trigger the release() callback, so do not go onto + * the regular error cleanup path here. + */ + fput(file); + put_unused_fd(fd); + return rc; +} + +/* TODO: rename all to guse_device_* for device */ +static long guse_file_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fuse_file *ff = file->private_data; + struct guse_conn *cc = fc_to_gc(ff->fm->fc); + unsigned int flags = 0; + int ret = 0; + + switch (cmd) { + case GPIO_V2_GET_LINE_IOCTL: + /* create new file */ + pr_err("GPIO_V2_GET_LINE_IOCTL\n"); + return guse_create_line_request(cc, arg); + default: + break; + } + + if (ret) + pr_err("%s: %d\n", __func__, ret); + + return fuse_do_ioctl(file, cmd, arg, flags); +} + +static long guse_file_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct fuse_file *ff = file->private_data; + struct guse_conn *cc = fc_to_gc(ff->fm->fc); + unsigned int flags = FUSE_IOCTL_COMPAT; + + if (cc->unrestricted_ioctl) + flags |= FUSE_IOCTL_UNRESTRICTED; + + return fuse_do_ioctl(file, cmd, arg, flags); +} + +static const struct file_operations guse_frontend_fops = { + .owner = THIS_MODULE, + .read_iter = guse_read_iter, + .write_iter = guse_write_iter, + .open = guse_open, + .release = guse_release, + .unlocked_ioctl = guse_file_ioctl, + .compat_ioctl = guse_file_compat_ioctl, + .poll = fuse_file_poll, + .llseek = noop_llseek, +}; + + +/************************************************************************** + * GUSE channel initialization and destruction + */ + +struct guse_devinfo { + const char *name; +}; + +/** + * guse_parse_one - parse one key=value pair + * @pp: i/o parameter for the current position + * @end: points to one past the end of the packed string + * @keyp: out parameter for key + * @valp: out parameter for value + * + * *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends + * at @end - 1. This function parses one pair and set *@keyp to the + * start of the key and *@valp to the start of the value. Note that + * the original string is modified such that the key string is + * terminated with '\0'. *@pp is updated to point to the next string. + * + * RETURNS: + * 1 on successful parse, 0 on EOF, -errno on failure. + */ +static int guse_parse_one(char **pp, char *end, char **keyp, char **valp) +{ + char *p = *pp; + char *key, *val; + + while (p < end && *p == '\0') + p++; + if (p == end) + return 0; + + if (end[-1] != '\0') { + pr_err("info not properly terminated\n"); + return -EINVAL; + } + + key = val = p; + p += strlen(p); + + if (valp) { + strsep(&val, "="); + if (!val) + val = key + strlen(key); + key = strstrip(key); + val = strstrip(val); + } else + key = strstrip(key); + + if (!strlen(key)) { + pr_err("zero length info key specified\n"); + return -EINVAL; + } + + *pp = p; + *keyp = key; + if (valp) + *valp = val; + + return 1; +} + +/** + * guse_parse_devinfo - parse device info + * @p: device info string + * @len: length of device info string + * @devinfo: out parameter for parsed device info + * + * Parse @p to extract device info and store it into @devinfo. String + * pointed to by @p is modified by parsing and @devinfo points into + * them, so @p shouldn't be freed while @devinfo is in use. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int guse_parse_devinfo(char *p, size_t len, struct guse_devinfo *devinfo) +{ + char *end = p + len; + char *key, *val; + int rc; + + while (true) { + rc = guse_parse_one(&p, end, &key, &val); + if (rc < 0) + return rc; + if (!rc) + break; + if (strcmp(key, "DEVNAME") == 0) + devinfo->name = val; + else + pr_warn("unknown device info \"%s\"\n", key); + } + + if (!devinfo->name || !strlen(devinfo->name)) { + pr_err("DEVNAME unspecified\n"); + return -EINVAL; + } + + return 0; +} + +static void guse_gendev_release(struct device *dev) +{ + kfree(dev); +} + +struct guse_init_args { + struct fuse_args_pages ap; + struct guse_init_in in; + struct guse_init_out out; + struct page *page; + struct fuse_page_desc desc; +}; + +/** + * guse_process_init_reply - finish initializing CUSE channel + * + * @fm: The fuse mount information containing the CUSE connection. + * @args: The arguments passed to the init reply. + * @error: The error code signifying if any error occurred during the process. + * + * This function creates the character device and sets up all the + * required data structures for it. Please read the comment at the + * top of this file for high level overview. + */ +static void guse_process_init_reply(struct fuse_mount *fm, + struct fuse_args *args, int error) +{ + struct fuse_conn *fc = fm->fc; + struct guse_init_args *ia = container_of(args, typeof(*ia), ap.args); + struct fuse_args_pages *ap = &ia->ap; + struct guse_conn *cc = fc_to_gc(fc), *pos; + struct guse_init_out *arg = &ia->out; + struct page *page = ap->pages[0]; + struct guse_devinfo devinfo = { }; + struct device *dev; + struct cdev *cdev; + dev_t devt; + int rc, i; + + if (error || arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) + goto err; + + fc->minor = arg->minor; + fc->max_read = max_t(unsigned, arg->max_read, 4096); + fc->max_write = max_t(unsigned, arg->max_write, 4096); + + /* parse init reply */ + cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL; + + rc = guse_parse_devinfo(page_address(page), ap->args.out_args[1].size, + &devinfo); + if (rc) + goto err; + + /* determine and reserve devt */ + /* + devt = MKDEV(arg->dev_major, arg->dev_minor); + if (!MAJOR(devt)) { + rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name); + pr_err("failed to alloc chrdev region\n"); + } else { + rc = register_chrdev_region(devt, 1, devinfo.name); + pr_err("failed to register chrdev region\n"); + } + + if (rc) + goto err; + */ + + devt = MKDEV(254, 1); + + /* devt determined, create device */ + rc = -ENOMEM; + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + goto err_region; + + device_initialize(dev); + dev_set_uevent_suppress(dev, 1); + dev->class = guse_class; + dev->devt = devt; + dev->release = guse_gendev_release; + dev_set_drvdata(dev, cc); + dev_set_name(dev, "%s", devinfo.name); + + scoped_guard(mutex, &guse_lock) { + /* make sure the device-name is unique */ + for (i = 0; i < GUSE_CONNTBL_LEN; ++i) { + list_for_each_entry(pos, &guse_conntbl[i], list) + if (!strcmp(dev_name(pos->dev), dev_name(dev))) + goto err_unlock; + } + + rc = device_add(dev); + if (rc) + goto err_unlock; + + /* register cdev */ + rc = -ENOMEM; + cdev = cdev_alloc(); + if (!cdev) + goto err_unlock; + + cdev->owner = THIS_MODULE; + cdev->ops = &guse_frontend_fops; + + rc = cdev_add(cdev, devt, 1); + if (rc) + goto err_cdev; + + cc->dev = dev; + cc->cdev = cdev; + + /* make the device available */ + list_add(&cc->list, guse_conntbl_head(devt)); + } + + /* announce device availability */ + dev_set_uevent_suppress(dev, 0); + kobject_uevent(&dev->kobj, KOBJ_ADD); +out: + kfree(ia); + __free_page(page); + return; + +err_cdev: + cdev_del(cdev); +err_unlock: + put_device(dev); +err_region: + unregister_chrdev_region(devt, 1); +err: + fuse_abort_conn(fc); + goto out; +} + +/* + * We require: + * - number of lines + * - chipname + * - name/label + */ +static int guse_send_init(struct guse_conn *cc) +{ + int rc; + struct page *page; + struct fuse_mount *fm = &cc->fm; + struct guse_init_args *ia; + struct fuse_args_pages *ap; + + BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); + + rc = -ENOMEM; + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto err; + + ia = kzalloc(sizeof(*ia), GFP_KERNEL); + if (!ia) + goto err_free_page; + + ap = &ia->ap; + ia->in.major = FUSE_KERNEL_VERSION; + ia->in.minor = FUSE_KERNEL_MINOR_VERSION; + ap->args.opcode = GUSE_INIT; + ap->args.in_numargs = 1; + ap->args.in_args[0].size = sizeof(ia->in); + ap->args.in_args[0].value = &ia->in; + ap->args.out_numargs = 2; + ap->args.out_args[0].size = sizeof(ia->out); + ap->args.out_args[0].value = &ia->out; + ap->args.out_args[1].size = CUSE_INIT_INFO_MAX; + ap->args.out_argvar = true; + ap->args.out_pages = true; + ap->num_pages = 1; + ap->pages = &ia->page; + ap->descs = &ia->desc; + ia->page = page; + ia->desc.length = ap->args.out_args[1].size; + ap->args.end = guse_process_init_reply; + + rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL); + if (rc) { + kfree(ia); +err_free_page: + __free_page(page); + } +err: + return rc; +} + +static void guse_fc_release(struct fuse_conn *fc) +{ + kfree(fc_to_gc(fc)); +} + +/** + * guse_channel_open - open method for /dev/guse + * @inode: inode for /dev/guse + * @file: file struct being opened + * + * Userland CUSE server can create a CUSE device by opening /dev/guse + * and replying to the initialization request kernel sends. This + * function is responsible for handling CUSE device initialization. + * Because the fd opened by this function is used during + * initialization, this function only creates guse_conn and sends + * init. The rest is delegated to a kthread. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int guse_channel_open(struct inode *inode, struct file *file) +{ + struct fuse_dev *fud; + struct guse_conn *cc; + int rc; + + /* set up guse_conn */ + cc = kzalloc(sizeof(*cc), GFP_KERNEL); + if (!cc) + return -ENOMEM; + + /* + * Limit the guse channel to requests that can + * be represented in file->f_cred->user_ns. + */ + fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns, + &fuse_dev_fiq_ops, NULL); + + cc->fc.release = guse_fc_release; + fud = fuse_dev_alloc_install(&cc->fc); + fuse_conn_put(&cc->fc); + if (!fud) + return -ENOMEM; + + INIT_LIST_HEAD(&cc->list); + + cc->fc.initialized = 1; + rc = guse_send_init(cc); + if (rc) { + fuse_dev_free(fud); + return rc; + } + file->private_data = fud; + + return 0; +} + +/** + * guse_channel_release - release method for /dev/guse + * @inode: inode for /dev/guse + * @file: file struct being closed + * + * Disconnect the channel, deregister CUSE device and initiate + * destruction by putting the default reference. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int guse_channel_release(struct inode *inode, struct file *file) +{ + struct fuse_dev *fud = file->private_data; + struct guse_conn *cc = fc_to_gc(fud->fc); + + /* remove from the conntbl, no more access from this point on */ + scoped_guard(mutex, &guse_lock) + list_del_init(&cc->list); + + /* remove device */ + if (cc->dev) + device_unregister(cc->dev); + + if (cc->cdev) { + unregister_chrdev_region(cc->cdev->dev, 1); + cdev_del(cc->cdev); + } + + return fuse_dev_release(inode, file); +} + +static struct file_operations guse_channel_fops; /* initialized during init */ + + +/************************************************************************** + * Misc stuff and module initializatiion + * + * CUSE exports the same set of attributes to sysfs as fusectl. + */ + +static ssize_t guse_class_waiting_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct guse_conn *cc = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting)); +} +static DEVICE_ATTR(waiting, 0400, guse_class_waiting_show, NULL); + +static ssize_t guse_class_abort_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct guse_conn *cc = dev_get_drvdata(dev); + + fuse_abort_conn(&cc->fc); + return count; +} +static DEVICE_ATTR(abort, 0200, NULL, guse_class_abort_store); + +static struct attribute *guse_class_dev_attrs[] = { + &dev_attr_waiting.attr, + &dev_attr_abort.attr, + NULL, +}; +ATTRIBUTE_GROUPS(guse_class_dev); + +static struct miscdevice guse_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "guse", + .fops = &guse_channel_fops, +}; + +MODULE_ALIAS("devname:guse"); + +static int __init guse_init(void) +{ + int i, rc; + + /* init conntbl */ + for (i = 0; i < GUSE_CONNTBL_LEN; i++) + INIT_LIST_HEAD(&guse_conntbl[i]); + + /* inherit and extend fuse_dev_operations */ + guse_channel_fops = fuse_dev_operations; + guse_channel_fops.owner = THIS_MODULE; + guse_channel_fops.open = guse_channel_open; + guse_channel_fops.release = guse_channel_release; + + /* GUSE is not prepared for FUSE_DEV_IOC_CLONE */ + guse_channel_fops.unlocked_ioctl = NULL; + + guse_class = class_create("guse"); + if (IS_ERR(guse_class)) + return PTR_ERR(guse_class); + + guse_class->dev_groups = guse_class_dev_groups; + + rc = misc_register(&guse_miscdev); + if (rc) { + class_destroy(guse_class); + return rc; + } + + return 0; +} + +static void __exit guse_exit(void) +{ + misc_deregister(&guse_miscdev); + class_destroy(guse_class); +} + +module_init(guse_init); +module_exit(guse_exit); + +MODULE_AUTHOR("Nikita Shubin "); +MODULE_DESCRIPTION("Gpio device in Userspace"); +MODULE_LICENSE("GPL"); diff --git a/guse.h b/guse.h new file mode 100644 index 0000000..e69de29 -- 2.30.2