From: Linus Torvalds Date: Mon, 26 Jun 2023 18:52:12 +0000 (-0700) Subject: Merge tag 'for-6.5/splice-2023-06-23' of git://git.kernel.dk/linux X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=3eccc0c886b1796f95a289c9d127c8ca1a254bd5;p=linux.git Merge tag 'for-6.5/splice-2023-06-23' of git://git.kernel.dk/linux Pull splice updates from Jens Axboe: "This kills off ITER_PIPE to avoid a race between truncate, iov_iter_revert() on the pipe and an as-yet incomplete DMA to a bio with unpinned/unref'ed pages from an O_DIRECT splice read. This causes memory corruption. Instead, we either use (a) filemap_splice_read(), which invokes the buffered file reading code and splices from the pagecache into the pipe; (b) copy_splice_read(), which bulk-allocates a buffer, reads into it and then pushes the filled pages into the pipe; or (c) handle it in filesystem-specific code. Summary: - Rename direct_splice_read() to copy_splice_read() - Simplify the calculations for the number of pages to be reclaimed in copy_splice_read() - Turn do_splice_to() into a helper, vfs_splice_read(), so that it can be used by overlayfs and coda to perform the checks on the lower fs - Make vfs_splice_read() jump to copy_splice_read() to handle direct-I/O and DAX - Provide shmem with its own splice_read to handle non-existent pages in the pagecache. We don't want a ->read_folio() as we don't want to populate holes, but filemap_get_pages() requires it - Provide overlayfs with its own splice_read to call down to a lower layer as overlayfs doesn't provide ->read_folio() - Provide coda with its own splice_read to call down to a lower layer as coda doesn't provide ->read_folio() - Direct ->splice_read to copy_splice_read() in tty, procfs, kernfs and random files as they just copy to the output buffer and don't splice pages - Provide wrappers for afs, ceph, ecryptfs, ext4, f2fs, nfs, ntfs3, ocfs2, orangefs, xfs and zonefs to do locking and/or revalidation - Make cifs use filemap_splice_read() - Replace pointers to generic_file_splice_read() with pointers to filemap_splice_read() as DIO and DAX are handled in the caller; filesystems can still provide their own alternate ->splice_read() op - Remove generic_file_splice_read() - Remove ITER_PIPE and its paraphernalia as generic_file_splice_read was the only user" * tag 'for-6.5/splice-2023-06-23' of git://git.kernel.dk/linux: (31 commits) splice: kdoc for filemap_splice_read() and copy_splice_read() iov_iter: Kill ITER_PIPE splice: Remove generic_file_splice_read() splice: Use filemap_splice_read() instead of generic_file_splice_read() cifs: Use filemap_splice_read() trace: Convert trace/seq to use copy_splice_read() zonefs: Provide a splice-read wrapper xfs: Provide a splice-read wrapper orangefs: Provide a splice-read wrapper ocfs2: Provide a splice-read wrapper ntfs3: Provide a splice-read wrapper nfs: Provide a splice-read wrapper f2fs: Provide a splice-read wrapper ext4: Provide a splice-read wrapper ecryptfs: Provide a splice-read wrapper ceph: Provide a splice-read wrapper afs: Provide a splice-read wrapper 9p: Add splice_read wrapper net: Make sock_splice_read() use copy_splice_read() by default tty, proc, kernfs, random: Use copy_splice_read() ... --- 3eccc0c886b1796f95a289c9d127c8ca1a254bd5 diff --cc fs/smb/client/cifsfs.c index 43a4d8603db34,0000000000000..4f4492eb975fd mode 100644,000000..100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@@ -1,1854 -1,0 +1,1854 @@@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * + * Copyright (C) International Business Machines Corp., 2002,2008 + * Author(s): Steve French (sfrench@us.ibm.com) + * + * Common Internet FileSystem (CIFS) client + * + */ + +/* Note that BB means BUGBUG (ie something to fix eventually) */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cifsfs.h" +#include "cifspdu.h" +#define DECLARE_GLOBALS_HERE +#include "cifsglob.h" +#include "cifsproto.h" +#include "cifs_debug.h" +#include "cifs_fs_sb.h" +#include +#include +#include "cifs_spnego.h" +#include "fscache.h" +#ifdef CONFIG_CIFS_DFS_UPCALL +#include "dfs_cache.h" +#endif +#ifdef CONFIG_CIFS_SWN_UPCALL +#include "netlink.h" +#endif +#include "fs_context.h" +#include "cached_dir.h" + +/* + * DOS dates from 1980/1/1 through 2107/12/31 + * Protocol specifications indicate the range should be to 119, which + * limits maximum year to 2099. But this range has not been checked. + */ +#define SMB_DATE_MAX (127<<9 | 12<<5 | 31) +#define SMB_DATE_MIN (0<<9 | 1<<5 | 1) +#define SMB_TIME_MAX (23<<11 | 59<<5 | 29) + +int cifsFYI = 0; +bool traceSMB; +bool enable_oplocks = true; +bool linuxExtEnabled = true; +bool lookupCacheEnabled = true; +bool disable_legacy_dialects; /* false by default */ +bool enable_gcm_256 = true; +bool require_gcm_256; /* false by default */ +bool enable_negotiate_signing; /* false by default */ +unsigned int global_secflags = CIFSSEC_DEF; +/* unsigned int ntlmv2_support = 0; */ +unsigned int sign_CIFS_PDUs = 1; + +/* + * Global transaction id (XID) information + */ +unsigned int GlobalCurrentXid; /* protected by GlobalMid_Sem */ +unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */ +unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */ +spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */ + +/* + * Global counters, updated atomically + */ +atomic_t sesInfoAllocCount; +atomic_t tconInfoAllocCount; +atomic_t tcpSesNextId; +atomic_t tcpSesAllocCount; +atomic_t tcpSesReconnectCount; +atomic_t tconInfoReconnectCount; + +atomic_t mid_count; +atomic_t buf_alloc_count; +atomic_t small_buf_alloc_count; +#ifdef CONFIG_CIFS_STATS2 +atomic_t total_buf_alloc_count; +atomic_t total_small_buf_alloc_count; +#endif/* STATS2 */ +struct list_head cifs_tcp_ses_list; +spinlock_t cifs_tcp_ses_lock; +static const struct super_operations cifs_super_ops; +unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE; +module_param(CIFSMaxBufSize, uint, 0444); +MODULE_PARM_DESC(CIFSMaxBufSize, "Network buffer size (not including header) " + "for CIFS requests. " + "Default: 16384 Range: 8192 to 130048"); +unsigned int cifs_min_rcv = CIFS_MIN_RCV_POOL; +module_param(cifs_min_rcv, uint, 0444); +MODULE_PARM_DESC(cifs_min_rcv, "Network buffers in pool. Default: 4 Range: " + "1 to 64"); +unsigned int cifs_min_small = 30; +module_param(cifs_min_small, uint, 0444); +MODULE_PARM_DESC(cifs_min_small, "Small network buffers in pool. Default: 30 " + "Range: 2 to 256"); +unsigned int cifs_max_pending = CIFS_MAX_REQ; +module_param(cifs_max_pending, uint, 0444); +MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for " + "CIFS/SMB1 dialect (N/A for SMB3) " + "Default: 32767 Range: 2 to 32767."); +#ifdef CONFIG_CIFS_STATS2 +unsigned int slow_rsp_threshold = 1; +module_param(slow_rsp_threshold, uint, 0644); +MODULE_PARM_DESC(slow_rsp_threshold, "Amount of time (in seconds) to wait " + "before logging that a response is delayed. " + "Default: 1 (if set to 0 disables msg)."); +#endif /* STATS2 */ + +module_param(enable_oplocks, bool, 0644); +MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1"); + +module_param(enable_gcm_256, bool, 0644); +MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: n/N/0"); + +module_param(require_gcm_256, bool, 0644); +MODULE_PARM_DESC(require_gcm_256, "Require strongest (256 bit) GCM encryption. Default: n/N/0"); + +module_param(enable_negotiate_signing, bool, 0644); +MODULE_PARM_DESC(enable_negotiate_signing, "Enable negotiating packet signing algorithm with server. Default: n/N/0"); + +module_param(disable_legacy_dialects, bool, 0644); +MODULE_PARM_DESC(disable_legacy_dialects, "To improve security it may be " + "helpful to restrict the ability to " + "override the default dialects (SMB2.1, " + "SMB3 and SMB3.02) on mount with old " + "dialects (CIFS/SMB1 and SMB2) since " + "vers=1.0 (CIFS/SMB1) and vers=2.0 are weaker" + " and less secure. Default: n/N/0"); + +extern mempool_t *cifs_sm_req_poolp; +extern mempool_t *cifs_req_poolp; +extern mempool_t *cifs_mid_poolp; + +struct workqueue_struct *cifsiod_wq; +struct workqueue_struct *decrypt_wq; +struct workqueue_struct *fileinfo_put_wq; +struct workqueue_struct *cifsoplockd_wq; +struct workqueue_struct *deferredclose_wq; +__u32 cifs_lock_secret; + +/* + * Bumps refcount for cifs super block. + * Note that it should be only called if a referece to VFS super block is + * already held, e.g. in open-type syscalls context. Otherwise it can race with + * atomic_dec_and_test in deactivate_locked_super. + */ +void +cifs_sb_active(struct super_block *sb) +{ + struct cifs_sb_info *server = CIFS_SB(sb); + + if (atomic_inc_return(&server->active) == 1) + atomic_inc(&sb->s_active); +} + +void +cifs_sb_deactive(struct super_block *sb) +{ + struct cifs_sb_info *server = CIFS_SB(sb); + + if (atomic_dec_and_test(&server->active)) + deactivate_super(sb); +} + +static int +cifs_read_super(struct super_block *sb) +{ + struct inode *inode; + struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; + struct timespec64 ts; + int rc = 0; + + cifs_sb = CIFS_SB(sb); + tcon = cifs_sb_master_tcon(cifs_sb); + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIXACL) + sb->s_flags |= SB_POSIXACL; + + if (tcon->snapshot_time) + sb->s_flags |= SB_RDONLY; + + if (tcon->ses->capabilities & tcon->ses->server->vals->cap_large_files) + sb->s_maxbytes = MAX_LFS_FILESIZE; + else + sb->s_maxbytes = MAX_NON_LFS; + + /* + * Some very old servers like DOS and OS/2 used 2 second granularity + * (while all current servers use 100ns granularity - see MS-DTYP) + * but 1 second is the maximum allowed granularity for the VFS + * so for old servers set time granularity to 1 second while for + * everything else (current servers) set it to 100ns. + */ + if ((tcon->ses->server->vals->protocol_id == SMB10_PROT_ID) && + ((tcon->ses->capabilities & + tcon->ses->server->vals->cap_nt_find) == 0) && + !tcon->unix_ext) { + sb->s_time_gran = 1000000000; /* 1 second is max allowed gran */ + ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MIN), 0, 0); + sb->s_time_min = ts.tv_sec; + ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MAX), + cpu_to_le16(SMB_TIME_MAX), 0); + sb->s_time_max = ts.tv_sec; + } else { + /* + * Almost every server, including all SMB2+, uses DCE TIME + * ie 100 nanosecond units, since 1601. See MS-DTYP and MS-FSCC + */ + sb->s_time_gran = 100; + ts = cifs_NTtimeToUnix(0); + sb->s_time_min = ts.tv_sec; + ts = cifs_NTtimeToUnix(cpu_to_le64(S64_MAX)); + sb->s_time_max = ts.tv_sec; + } + + sb->s_magic = CIFS_SUPER_MAGIC; + sb->s_op = &cifs_super_ops; + sb->s_xattr = cifs_xattr_handlers; + rc = super_setup_bdi(sb); + if (rc) + goto out_no_root; + /* tune readahead according to rsize if readahead size not set on mount */ + if (cifs_sb->ctx->rsize == 0) + cifs_sb->ctx->rsize = + tcon->ses->server->ops->negotiate_rsize(tcon, cifs_sb->ctx); + if (cifs_sb->ctx->rasize) + sb->s_bdi->ra_pages = cifs_sb->ctx->rasize / PAGE_SIZE; + else + sb->s_bdi->ra_pages = 2 * (cifs_sb->ctx->rsize / PAGE_SIZE); + + sb->s_blocksize = CIFS_MAX_MSGSIZE; + sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ + inode = cifs_root_iget(sb); + + if (IS_ERR(inode)) { + rc = PTR_ERR(inode); + goto out_no_root; + } + + if (tcon->nocase) + sb->s_d_op = &cifs_ci_dentry_ops; + else + sb->s_d_op = &cifs_dentry_ops; + + sb->s_root = d_make_root(inode); + if (!sb->s_root) { + rc = -ENOMEM; + goto out_no_root; + } + +#ifdef CONFIG_CIFS_NFSD_EXPORT + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { + cifs_dbg(FYI, "export ops supported\n"); + sb->s_export_op = &cifs_export_ops; + } +#endif /* CONFIG_CIFS_NFSD_EXPORT */ + + return 0; + +out_no_root: + cifs_dbg(VFS, "%s: get root inode failed\n", __func__); + return rc; +} + +static void cifs_kill_sb(struct super_block *sb) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + + /* + * We ned to release all dentries for the cached directories + * before we kill the sb. + */ + if (cifs_sb->root) { + close_all_cached_dirs(cifs_sb); + + /* finally release root dentry */ + dput(cifs_sb->root); + cifs_sb->root = NULL; + } + + kill_anon_super(sb); + cifs_umount(cifs_sb); +} + +static int +cifs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + struct TCP_Server_Info *server = tcon->ses->server; + unsigned int xid; + int rc = 0; + + xid = get_xid(); + + if (le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength) > 0) + buf->f_namelen = + le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength); + else + buf->f_namelen = PATH_MAX; + + buf->f_fsid.val[0] = tcon->vol_serial_number; + /* are using part of create time for more randomness, see man statfs */ + buf->f_fsid.val[1] = (int)le64_to_cpu(tcon->vol_create_time); + + buf->f_files = 0; /* undefined */ + buf->f_ffree = 0; /* unlimited */ + + if (server->ops->queryfs) + rc = server->ops->queryfs(xid, tcon, cifs_sb, buf); + + free_xid(xid); + return rc; +} + +static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) +{ + struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + struct TCP_Server_Info *server = tcon->ses->server; + + if (server->ops->fallocate) + return server->ops->fallocate(file, tcon, mode, off, len); + + return -EOPNOTSUPP; +} + +static int cifs_permission(struct mnt_idmap *idmap, + struct inode *inode, int mask) +{ + struct cifs_sb_info *cifs_sb; + + cifs_sb = CIFS_SB(inode->i_sb); + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { + if ((mask & MAY_EXEC) && !execute_ok(inode)) + return -EACCES; + else + return 0; + } else /* file mode might have been restricted at mount time + on the client (above and beyond ACL on servers) for + servers which do not support setting and viewing mode bits, + so allowing client to check permissions is useful */ + return generic_permission(&nop_mnt_idmap, inode, mask); +} + +static struct kmem_cache *cifs_inode_cachep; +static struct kmem_cache *cifs_req_cachep; +static struct kmem_cache *cifs_mid_cachep; +static struct kmem_cache *cifs_sm_req_cachep; +mempool_t *cifs_sm_req_poolp; +mempool_t *cifs_req_poolp; +mempool_t *cifs_mid_poolp; + +static struct inode * +cifs_alloc_inode(struct super_block *sb) +{ + struct cifsInodeInfo *cifs_inode; + cifs_inode = alloc_inode_sb(sb, cifs_inode_cachep, GFP_KERNEL); + if (!cifs_inode) + return NULL; + cifs_inode->cifsAttrs = 0x20; /* default */ + cifs_inode->time = 0; + /* + * Until the file is open and we have gotten oplock info back from the + * server, can not assume caching of file data or metadata. + */ + cifs_set_oplock_level(cifs_inode, 0); + cifs_inode->flags = 0; + spin_lock_init(&cifs_inode->writers_lock); + cifs_inode->writers = 0; + cifs_inode->netfs.inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ + cifs_inode->server_eof = 0; + cifs_inode->uniqueid = 0; + cifs_inode->createtime = 0; + cifs_inode->epoch = 0; + spin_lock_init(&cifs_inode->open_file_lock); + generate_random_uuid(cifs_inode->lease_key); + cifs_inode->symlink_target = NULL; + + /* + * Can not set i_flags here - they get immediately overwritten to zero + * by the VFS. + */ + /* cifs_inode->netfs.inode.i_flags = S_NOATIME | S_NOCMTIME; */ + INIT_LIST_HEAD(&cifs_inode->openFileList); + INIT_LIST_HEAD(&cifs_inode->llist); + INIT_LIST_HEAD(&cifs_inode->deferred_closes); + spin_lock_init(&cifs_inode->deferred_lock); + return &cifs_inode->netfs.inode; +} + +static void +cifs_free_inode(struct inode *inode) +{ + struct cifsInodeInfo *cinode = CIFS_I(inode); + + if (S_ISLNK(inode->i_mode)) + kfree(cinode->symlink_target); + kmem_cache_free(cifs_inode_cachep, cinode); +} + +static void +cifs_evict_inode(struct inode *inode) +{ + truncate_inode_pages_final(&inode->i_data); + if (inode->i_state & I_PINNING_FSCACHE_WB) + cifs_fscache_unuse_inode_cookie(inode, true); + cifs_fscache_release_inode_cookie(inode); + clear_inode(inode); +} + +static void +cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) +{ + struct sockaddr_in *sa = (struct sockaddr_in *) &server->dstaddr; + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) &server->dstaddr; + + seq_puts(s, ",addr="); + + switch (server->dstaddr.ss_family) { + case AF_INET: + seq_printf(s, "%pI4", &sa->sin_addr.s_addr); + break; + case AF_INET6: + seq_printf(s, "%pI6", &sa6->sin6_addr.s6_addr); + if (sa6->sin6_scope_id) + seq_printf(s, "%%%u", sa6->sin6_scope_id); + break; + default: + seq_puts(s, "(unknown)"); + } + if (server->rdma) + seq_puts(s, ",rdma"); +} + +static void +cifs_show_security(struct seq_file *s, struct cifs_ses *ses) +{ + if (ses->sectype == Unspecified) { + if (ses->user_name == NULL) + seq_puts(s, ",sec=none"); + return; + } + + seq_puts(s, ",sec="); + + switch (ses->sectype) { + case NTLMv2: + seq_puts(s, "ntlmv2"); + break; + case Kerberos: + seq_puts(s, "krb5"); + break; + case RawNTLMSSP: + seq_puts(s, "ntlmssp"); + break; + default: + /* shouldn't ever happen */ + seq_puts(s, "unknown"); + break; + } + + if (ses->sign) + seq_puts(s, "i"); + + if (ses->sectype == Kerberos) + seq_printf(s, ",cruid=%u", + from_kuid_munged(&init_user_ns, ses->cred_uid)); +} + +static void +cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb) +{ + seq_puts(s, ",cache="); + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) + seq_puts(s, "strict"); + else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) + seq_puts(s, "none"); + else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE) + seq_puts(s, "singleclient"); /* assume only one client access */ + else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) + seq_puts(s, "ro"); /* read only caching assumed */ + else + seq_puts(s, "loose"); +} + +/* + * cifs_show_devname() is used so we show the mount device name with correct + * format (e.g. forward slashes vs. back slashes) in /proc/mounts + */ +static int cifs_show_devname(struct seq_file *m, struct dentry *root) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb); + char *devname = kstrdup(cifs_sb->ctx->source, GFP_KERNEL); + + if (devname == NULL) + seq_puts(m, "none"); + else { + convert_delimiter(devname, '/'); + /* escape all spaces in share names */ + seq_escape(m, devname, " \t"); + kfree(devname); + } + return 0; +} + +/* + * cifs_show_options() is for displaying mount options in /proc/mounts. + * Not all settable options are displayed but most of the important + * ones are. + */ +static int +cifs_show_options(struct seq_file *s, struct dentry *root) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + struct sockaddr *srcaddr; + srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; + + seq_show_option(s, "vers", tcon->ses->server->vals->version_string); + cifs_show_security(s, tcon->ses); + cifs_show_cache_flavor(s, cifs_sb); + + if (tcon->no_lease) + seq_puts(s, ",nolease"); + if (cifs_sb->ctx->multiuser) + seq_puts(s, ",multiuser"); + else if (tcon->ses->user_name) + seq_show_option(s, "username", tcon->ses->user_name); + + if (tcon->ses->domainName && tcon->ses->domainName[0] != 0) + seq_show_option(s, "domain", tcon->ses->domainName); + + if (srcaddr->sa_family != AF_UNSPEC) { + struct sockaddr_in *saddr4; + struct sockaddr_in6 *saddr6; + saddr4 = (struct sockaddr_in *)srcaddr; + saddr6 = (struct sockaddr_in6 *)srcaddr; + if (srcaddr->sa_family == AF_INET6) + seq_printf(s, ",srcaddr=%pI6c", + &saddr6->sin6_addr); + else if (srcaddr->sa_family == AF_INET) + seq_printf(s, ",srcaddr=%pI4", + &saddr4->sin_addr.s_addr); + else + seq_printf(s, ",srcaddr=BAD-AF:%i", + (int)(srcaddr->sa_family)); + } + + seq_printf(s, ",uid=%u", + from_kuid_munged(&init_user_ns, cifs_sb->ctx->linux_uid)); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) + seq_puts(s, ",forceuid"); + else + seq_puts(s, ",noforceuid"); + + seq_printf(s, ",gid=%u", + from_kgid_munged(&init_user_ns, cifs_sb->ctx->linux_gid)); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) + seq_puts(s, ",forcegid"); + else + seq_puts(s, ",noforcegid"); + + cifs_show_address(s, tcon->ses->server); + + if (!tcon->unix_ext) + seq_printf(s, ",file_mode=0%ho,dir_mode=0%ho", + cifs_sb->ctx->file_mode, + cifs_sb->ctx->dir_mode); + if (cifs_sb->ctx->iocharset) + seq_printf(s, ",iocharset=%s", cifs_sb->ctx->iocharset); + if (tcon->seal) + seq_puts(s, ",seal"); + else if (tcon->ses->server->ignore_signature) + seq_puts(s, ",signloosely"); + if (tcon->nocase) + seq_puts(s, ",nocase"); + if (tcon->nodelete) + seq_puts(s, ",nodelete"); + if (cifs_sb->ctx->no_sparse) + seq_puts(s, ",nosparse"); + if (tcon->local_lease) + seq_puts(s, ",locallease"); + if (tcon->retry) + seq_puts(s, ",hard"); + else + seq_puts(s, ",soft"); + if (tcon->use_persistent) + seq_puts(s, ",persistenthandles"); + else if (tcon->use_resilient) + seq_puts(s, ",resilienthandles"); + if (tcon->posix_extensions) + seq_puts(s, ",posix"); + else if (tcon->unix_ext) + seq_puts(s, ",unix"); + else + seq_puts(s, ",nounix"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) + seq_puts(s, ",nodfs"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) + seq_puts(s, ",posixpaths"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) + seq_puts(s, ",setuids"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL) + seq_puts(s, ",idsfromsid"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + seq_puts(s, ",serverino"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) + seq_puts(s, ",rwpidforward"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) + seq_puts(s, ",forcemand"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + seq_puts(s, ",nouser_xattr"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) + seq_puts(s, ",mapchars"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) + seq_puts(s, ",mapposix"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) + seq_puts(s, ",sfu"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + seq_puts(s, ",nobrl"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_HANDLE_CACHE) + seq_puts(s, ",nohandlecache"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID) + seq_puts(s, ",modefromsid"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) + seq_puts(s, ",cifsacl"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + seq_puts(s, ",dynperm"); + if (root->d_sb->s_flags & SB_POSIXACL) + seq_puts(s, ",acl"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) + seq_puts(s, ",mfsymlinks"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) + seq_puts(s, ",fsc"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC) + seq_puts(s, ",nostrictsync"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) + seq_puts(s, ",noperm"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) + seq_printf(s, ",backupuid=%u", + from_kuid_munged(&init_user_ns, + cifs_sb->ctx->backupuid)); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) + seq_printf(s, ",backupgid=%u", + from_kgid_munged(&init_user_ns, + cifs_sb->ctx->backupgid)); + + seq_printf(s, ",rsize=%u", cifs_sb->ctx->rsize); + seq_printf(s, ",wsize=%u", cifs_sb->ctx->wsize); + seq_printf(s, ",bsize=%u", cifs_sb->ctx->bsize); + if (cifs_sb->ctx->rasize) + seq_printf(s, ",rasize=%u", cifs_sb->ctx->rasize); + if (tcon->ses->server->min_offload) + seq_printf(s, ",esize=%u", tcon->ses->server->min_offload); + seq_printf(s, ",echo_interval=%lu", + tcon->ses->server->echo_interval / HZ); + + /* Only display the following if overridden on mount */ + if (tcon->ses->server->max_credits != SMB2_MAX_CREDITS_AVAILABLE) + seq_printf(s, ",max_credits=%u", tcon->ses->server->max_credits); + if (tcon->ses->server->tcp_nodelay) + seq_puts(s, ",tcpnodelay"); + if (tcon->ses->server->noautotune) + seq_puts(s, ",noautotune"); + if (tcon->ses->server->noblocksnd) + seq_puts(s, ",noblocksend"); + + if (tcon->snapshot_time) + seq_printf(s, ",snapshot=%llu", tcon->snapshot_time); + if (tcon->handle_timeout) + seq_printf(s, ",handletimeout=%u", tcon->handle_timeout); + + /* + * Display file and directory attribute timeout in seconds. + * If file and directory attribute timeout the same then actimeo + * was likely specified on mount + */ + if (cifs_sb->ctx->acdirmax == cifs_sb->ctx->acregmax) + seq_printf(s, ",actimeo=%lu", cifs_sb->ctx->acregmax / HZ); + else { + seq_printf(s, ",acdirmax=%lu", cifs_sb->ctx->acdirmax / HZ); + seq_printf(s, ",acregmax=%lu", cifs_sb->ctx->acregmax / HZ); + } + seq_printf(s, ",closetimeo=%lu", cifs_sb->ctx->closetimeo / HZ); + + if (tcon->ses->chan_max > 1) + seq_printf(s, ",multichannel,max_channels=%zu", + tcon->ses->chan_max); + + if (tcon->use_witness) + seq_puts(s, ",witness"); + + return 0; +} + +static void cifs_umount_begin(struct super_block *sb) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_tcon *tcon; + + if (cifs_sb == NULL) + return; + + tcon = cifs_sb_master_tcon(cifs_sb); + + spin_lock(&cifs_tcp_ses_lock); + spin_lock(&tcon->tc_lock); + if ((tcon->tc_count > 1) || (tcon->status == TID_EXITING)) { + /* we have other mounts to same share or we have + already tried to umount this and woken up + all waiting network requests, nothing to do */ + spin_unlock(&tcon->tc_lock); + spin_unlock(&cifs_tcp_ses_lock); + return; + } + /* + * can not set tcon->status to TID_EXITING yet since we don't know if umount -f will + * fail later (e.g. due to open files). TID_EXITING will be set just before tdis req sent + */ + spin_unlock(&tcon->tc_lock); + spin_unlock(&cifs_tcp_ses_lock); + + cifs_close_all_deferred_files(tcon); + /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ + /* cancel_notify_requests(tcon); */ + if (tcon->ses && tcon->ses->server) { + cifs_dbg(FYI, "wake up tasks now - umount begin not complete\n"); + wake_up_all(&tcon->ses->server->request_q); + wake_up_all(&tcon->ses->server->response_q); + msleep(1); /* yield */ + /* we have to kick the requests once more */ + wake_up_all(&tcon->ses->server->response_q); + msleep(1); + } + + return; +} + +static int cifs_freeze(struct super_block *sb) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_tcon *tcon; + + if (cifs_sb == NULL) + return 0; + + tcon = cifs_sb_master_tcon(cifs_sb); + + cifs_close_all_deferred_files(tcon); + return 0; +} + +#ifdef CONFIG_CIFS_STATS2 +static int cifs_show_stats(struct seq_file *s, struct dentry *root) +{ + /* BB FIXME */ + return 0; +} +#endif + +static int cifs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + fscache_unpin_writeback(wbc, cifs_inode_cookie(inode)); + return 0; +} + +static int cifs_drop_inode(struct inode *inode) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + + /* no serverino => unconditional eviction */ + return !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) || + generic_drop_inode(inode); +} + +static const struct super_operations cifs_super_ops = { + .statfs = cifs_statfs, + .alloc_inode = cifs_alloc_inode, + .write_inode = cifs_write_inode, + .free_inode = cifs_free_inode, + .drop_inode = cifs_drop_inode, + .evict_inode = cifs_evict_inode, +/* .show_path = cifs_show_path, */ /* Would we ever need show path? */ + .show_devname = cifs_show_devname, +/* .delete_inode = cifs_delete_inode, */ /* Do not need above + function unless later we add lazy close of inodes or unless the + kernel forgets to call us with the same number of releases (closes) + as opens */ + .show_options = cifs_show_options, + .umount_begin = cifs_umount_begin, + .freeze_fs = cifs_freeze, +#ifdef CONFIG_CIFS_STATS2 + .show_stats = cifs_show_stats, +#endif +}; + +/* + * Get root dentry from superblock according to prefix path mount option. + * Return dentry with refcount + 1 on success and NULL otherwise. + */ +static struct dentry * +cifs_get_root(struct smb3_fs_context *ctx, struct super_block *sb) +{ + struct dentry *dentry; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + char *full_path = NULL; + char *s, *p; + char sep; + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) + return dget(sb->s_root); + + full_path = cifs_build_path_to_root(ctx, cifs_sb, + cifs_sb_master_tcon(cifs_sb), 0); + if (full_path == NULL) + return ERR_PTR(-ENOMEM); + + cifs_dbg(FYI, "Get root dentry for %s\n", full_path); + + sep = CIFS_DIR_SEP(cifs_sb); + dentry = dget(sb->s_root); + s = full_path; + + do { + struct inode *dir = d_inode(dentry); + struct dentry *child; + + if (!S_ISDIR(dir->i_mode)) { + dput(dentry); + dentry = ERR_PTR(-ENOTDIR); + break; + } + + /* skip separators */ + while (*s == sep) + s++; + if (!*s) + break; + p = s++; + /* next separator */ + while (*s && *s != sep) + s++; + + child = lookup_positive_unlocked(p, dentry, s - p); + dput(dentry); + dentry = child; + } while (!IS_ERR(dentry)); + kfree(full_path); + return dentry; +} + +static int cifs_set_super(struct super_block *sb, void *data) +{ + struct cifs_mnt_data *mnt_data = data; + sb->s_fs_info = mnt_data->cifs_sb; + return set_anon_super(sb, NULL); +} + +struct dentry * +cifs_smb3_do_mount(struct file_system_type *fs_type, + int flags, struct smb3_fs_context *old_ctx) +{ + int rc; + struct super_block *sb = NULL; + struct cifs_sb_info *cifs_sb = NULL; + struct cifs_mnt_data mnt_data; + struct dentry *root; + + if (cifsFYI) { + cifs_dbg(FYI, "%s: devname=%s flags=0x%x\n", __func__, + old_ctx->source, flags); + } else { + cifs_info("Attempting to mount %s\n", old_ctx->source); + } + + cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL); + if (cifs_sb == NULL) { + root = ERR_PTR(-ENOMEM); + goto out; + } + + cifs_sb->ctx = kzalloc(sizeof(struct smb3_fs_context), GFP_KERNEL); + if (!cifs_sb->ctx) { + root = ERR_PTR(-ENOMEM); + goto out; + } + rc = smb3_fs_context_dup(cifs_sb->ctx, old_ctx); + if (rc) { + root = ERR_PTR(rc); + goto out; + } + + rc = cifs_setup_cifs_sb(cifs_sb); + if (rc) { + root = ERR_PTR(rc); + goto out; + } + + rc = cifs_mount(cifs_sb, cifs_sb->ctx); + if (rc) { + if (!(flags & SB_SILENT)) + cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n", + rc); + root = ERR_PTR(rc); + goto out; + } + + mnt_data.ctx = cifs_sb->ctx; + mnt_data.cifs_sb = cifs_sb; + mnt_data.flags = flags; + + /* BB should we make this contingent on mount parm? */ + flags |= SB_NODIRATIME | SB_NOATIME; + + sb = sget(fs_type, cifs_match_super, cifs_set_super, flags, &mnt_data); + if (IS_ERR(sb)) { + root = ERR_CAST(sb); + cifs_umount(cifs_sb); + cifs_sb = NULL; + goto out; + } + + if (sb->s_root) { + cifs_dbg(FYI, "Use existing superblock\n"); + cifs_umount(cifs_sb); + cifs_sb = NULL; + } else { + rc = cifs_read_super(sb); + if (rc) { + root = ERR_PTR(rc); + goto out_super; + } + + sb->s_flags |= SB_ACTIVE; + } + + root = cifs_get_root(cifs_sb ? cifs_sb->ctx : old_ctx, sb); + if (IS_ERR(root)) + goto out_super; + + if (cifs_sb) + cifs_sb->root = dget(root); + + cifs_dbg(FYI, "dentry root is: %p\n", root); + return root; + +out_super: + deactivate_locked_super(sb); + return root; +out: + if (cifs_sb) { + if (!sb || IS_ERR(sb)) { /* otherwise kill_sb will handle */ + kfree(cifs_sb->prepath); + smb3_cleanup_fs_context(cifs_sb->ctx); + kfree(cifs_sb); + } + } + return root; +} + + +static ssize_t +cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + ssize_t rc; + struct inode *inode = file_inode(iocb->ki_filp); + + if (iocb->ki_flags & IOCB_DIRECT) + return cifs_user_readv(iocb, iter); + + rc = cifs_revalidate_mapping(inode); + if (rc) + return rc; + + return generic_file_read_iter(iocb, iter); +} + +static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct cifsInodeInfo *cinode = CIFS_I(inode); + ssize_t written; + int rc; + + if (iocb->ki_filp->f_flags & O_DIRECT) { + written = cifs_user_writev(iocb, from); + if (written > 0 && CIFS_CACHE_READ(cinode)) { + cifs_zap_mapping(inode); + cifs_dbg(FYI, + "Set no oplock for inode=%p after a write operation\n", + inode); + cinode->oplock = 0; + } + return written; + } + + written = cifs_get_writer(cinode); + if (written) + return written; + + written = generic_file_write_iter(iocb, from); + + if (CIFS_CACHE_WRITE(CIFS_I(inode))) + goto out; + + rc = filemap_fdatawrite(inode->i_mapping); + if (rc) + cifs_dbg(FYI, "cifs_file_write_iter: %d rc on %p inode\n", + rc, inode); + +out: + cifs_put_writer(cinode); + return written; +} + +static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) +{ + struct cifsFileInfo *cfile = file->private_data; + struct cifs_tcon *tcon; + + /* + * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate + * the cached file length + */ + if (whence != SEEK_SET && whence != SEEK_CUR) { + int rc; + struct inode *inode = file_inode(file); + + /* + * We need to be sure that all dirty pages are written and the + * server has the newest file length. + */ + if (!CIFS_CACHE_READ(CIFS_I(inode)) && inode->i_mapping && + inode->i_mapping->nrpages != 0) { + rc = filemap_fdatawait(inode->i_mapping); + if (rc) { + mapping_set_error(inode->i_mapping, rc); + return rc; + } + } + /* + * Some applications poll for the file length in this strange + * way so we must seek to end on non-oplocked files by + * setting the revalidate time to zero. + */ + CIFS_I(inode)->time = 0; + + rc = cifs_revalidate_file_attr(file); + if (rc < 0) + return (loff_t)rc; + } + if (cfile && cfile->tlink) { + tcon = tlink_tcon(cfile->tlink); + if (tcon->ses->server->ops->llseek) + return tcon->ses->server->ops->llseek(file, tcon, + offset, whence); + } + return generic_file_llseek(file, offset, whence); +} + +static int +cifs_setlease(struct file *file, long arg, struct file_lock **lease, void **priv) +{ + /* + * Note that this is called by vfs setlease with i_lock held to + * protect *lease from going away. + */ + struct inode *inode = file_inode(file); + struct cifsFileInfo *cfile = file->private_data; + + if (!(S_ISREG(inode->i_mode))) + return -EINVAL; + + /* Check if file is oplocked if this is request for new lease */ + if (arg == F_UNLCK || + ((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) || + ((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode)))) + return generic_setlease(file, arg, lease, priv); + else if (tlink_tcon(cfile->tlink)->local_lease && + !CIFS_CACHE_READ(CIFS_I(inode))) + /* + * If the server claims to support oplock on this file, then we + * still need to check oplock even if the local_lease mount + * option is set, but there are servers which do not support + * oplock for which this mount option may be useful if the user + * knows that the file won't be changed on the server by anyone + * else. + */ + return generic_setlease(file, arg, lease, priv); + else + return -EAGAIN; +} + +struct file_system_type cifs_fs_type = { + .owner = THIS_MODULE, + .name = "cifs", + .init_fs_context = smb3_init_fs_context, + .parameters = smb3_fs_parameters, + .kill_sb = cifs_kill_sb, + .fs_flags = FS_RENAME_DOES_D_MOVE, +}; +MODULE_ALIAS_FS("cifs"); + +struct file_system_type smb3_fs_type = { + .owner = THIS_MODULE, + .name = "smb3", + .init_fs_context = smb3_init_fs_context, + .parameters = smb3_fs_parameters, + .kill_sb = cifs_kill_sb, + .fs_flags = FS_RENAME_DOES_D_MOVE, +}; +MODULE_ALIAS_FS("smb3"); +MODULE_ALIAS("smb3"); + +const struct inode_operations cifs_dir_inode_ops = { + .create = cifs_create, + .atomic_open = cifs_atomic_open, + .lookup = cifs_lookup, + .getattr = cifs_getattr, + .unlink = cifs_unlink, + .link = cifs_hardlink, + .mkdir = cifs_mkdir, + .rmdir = cifs_rmdir, + .rename = cifs_rename2, + .permission = cifs_permission, + .setattr = cifs_setattr, + .symlink = cifs_symlink, + .mknod = cifs_mknod, + .listxattr = cifs_listxattr, + .get_acl = cifs_get_acl, + .set_acl = cifs_set_acl, +}; + +const struct inode_operations cifs_file_inode_ops = { + .setattr = cifs_setattr, + .getattr = cifs_getattr, + .permission = cifs_permission, + .listxattr = cifs_listxattr, + .fiemap = cifs_fiemap, + .get_acl = cifs_get_acl, + .set_acl = cifs_set_acl, +}; + +const char *cifs_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *done) +{ + char *target_path; + + target_path = kmalloc(PATH_MAX, GFP_KERNEL); + if (!target_path) + return ERR_PTR(-ENOMEM); + + spin_lock(&inode->i_lock); + if (likely(CIFS_I(inode)->symlink_target)) { + strscpy(target_path, CIFS_I(inode)->symlink_target, PATH_MAX); + } else { + kfree(target_path); + target_path = ERR_PTR(-EOPNOTSUPP); + } + spin_unlock(&inode->i_lock); + + if (!IS_ERR(target_path)) + set_delayed_call(done, kfree_link, target_path); + + return target_path; +} + +const struct inode_operations cifs_symlink_inode_ops = { + .get_link = cifs_get_link, + .permission = cifs_permission, + .listxattr = cifs_listxattr, +}; + +static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, loff_t len, + unsigned int remap_flags) +{ + struct inode *src_inode = file_inode(src_file); + struct inode *target_inode = file_inode(dst_file); + struct cifsFileInfo *smb_file_src = src_file->private_data; + struct cifsFileInfo *smb_file_target; + struct cifs_tcon *target_tcon; + unsigned int xid; + int rc; + + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) + return -EINVAL; + + cifs_dbg(FYI, "clone range\n"); + + xid = get_xid(); + + if (!src_file->private_data || !dst_file->private_data) { + rc = -EBADF; + cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); + goto out; + } + + smb_file_target = dst_file->private_data; + target_tcon = tlink_tcon(smb_file_target->tlink); + + /* + * Note: cifs case is easier than btrfs since server responsible for + * checks for proper open modes and file type and if it wants + * server could even support copy of range where source = target + */ + lock_two_nondirectories(target_inode, src_inode); + + if (len == 0) + len = src_inode->i_size - off; + + cifs_dbg(FYI, "about to flush pages\n"); + /* should we flush first and last page first */ + truncate_inode_pages_range(&target_inode->i_data, destoff, + PAGE_ALIGN(destoff + len)-1); + + if (target_tcon->ses->server->ops->duplicate_extents) + rc = target_tcon->ses->server->ops->duplicate_extents(xid, + smb_file_src, smb_file_target, off, len, destoff); + else + rc = -EOPNOTSUPP; + + /* force revalidate of size and timestamps of target file now + that target is updated on the server */ + CIFS_I(target_inode)->time = 0; + /* although unlocking in the reverse order from locking is not + strictly necessary here it is a little cleaner to be consistent */ + unlock_two_nondirectories(src_inode, target_inode); +out: + free_xid(xid); + return rc < 0 ? rc : len; +} + +ssize_t cifs_file_copychunk_range(unsigned int xid, + struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, + size_t len, unsigned int flags) +{ + struct inode *src_inode = file_inode(src_file); + struct inode *target_inode = file_inode(dst_file); + struct cifsFileInfo *smb_file_src; + struct cifsFileInfo *smb_file_target; + struct cifs_tcon *src_tcon; + struct cifs_tcon *target_tcon; + ssize_t rc; + + cifs_dbg(FYI, "copychunk range\n"); + + if (!src_file->private_data || !dst_file->private_data) { + rc = -EBADF; + cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n"); + goto out; + } + + rc = -EXDEV; + smb_file_target = dst_file->private_data; + smb_file_src = src_file->private_data; + src_tcon = tlink_tcon(smb_file_src->tlink); + target_tcon = tlink_tcon(smb_file_target->tlink); + + if (src_tcon->ses != target_tcon->ses) { + cifs_dbg(VFS, "source and target of copy not on same server\n"); + goto out; + } + + rc = -EOPNOTSUPP; + if (!target_tcon->ses->server->ops->copychunk_range) + goto out; + + /* + * Note: cifs case is easier than btrfs since server responsible for + * checks for proper open modes and file type and if it wants + * server could even support copy of range where source = target + */ + lock_two_nondirectories(target_inode, src_inode); + + cifs_dbg(FYI, "about to flush pages\n"); + + rc = filemap_write_and_wait_range(src_inode->i_mapping, off, + off + len - 1); + if (rc) + goto unlock; + + /* should we flush first and last page first */ + truncate_inode_pages(&target_inode->i_data, 0); + + rc = file_modified(dst_file); + if (!rc) + rc = target_tcon->ses->server->ops->copychunk_range(xid, + smb_file_src, smb_file_target, off, len, destoff); + + file_accessed(src_file); + + /* force revalidate of size and timestamps of target file now + * that target is updated on the server + */ + CIFS_I(target_inode)->time = 0; + +unlock: + /* although unlocking in the reverse order from locking is not + * strictly necessary here it is a little cleaner to be consistent + */ + unlock_two_nondirectories(src_inode, target_inode); + +out: + return rc; +} + +/* + * Directory operations under CIFS/SMB2/SMB3 are synchronous, so fsync() + * is a dummy operation. + */ +static int cifs_dir_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + cifs_dbg(FYI, "Sync directory - name: %pD datasync: 0x%x\n", + file, datasync); + + return 0; +} + +static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, + size_t len, unsigned int flags) +{ + unsigned int xid = get_xid(); + ssize_t rc; + struct cifsFileInfo *cfile = dst_file->private_data; + + if (cfile->swapfile) { + rc = -EOPNOTSUPP; + free_xid(xid); + return rc; + } + + rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff, + len, flags); + free_xid(xid); + + if (rc == -EOPNOTSUPP || rc == -EXDEV) + rc = generic_copy_file_range(src_file, off, dst_file, + destoff, len, flags); + return rc; +} + +const struct file_operations cifs_file_ops = { + .read_iter = cifs_loose_read_iter, + .write_iter = cifs_file_write_iter, + .open = cifs_open, + .release = cifs_close, + .lock = cifs_lock, + .flock = cifs_flock, + .fsync = cifs_fsync, + .flush = cifs_flush, + .mmap = cifs_file_mmap, - .splice_read = cifs_splice_read, ++ .splice_read = filemap_splice_read, + .splice_write = iter_file_splice_write, + .llseek = cifs_llseek, + .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, + .remap_file_range = cifs_remap_file_range, + .setlease = cifs_setlease, + .fallocate = cifs_fallocate, +}; + +const struct file_operations cifs_file_strict_ops = { + .read_iter = cifs_strict_readv, + .write_iter = cifs_strict_writev, + .open = cifs_open, + .release = cifs_close, + .lock = cifs_lock, + .flock = cifs_flock, + .fsync = cifs_strict_fsync, + .flush = cifs_flush, + .mmap = cifs_file_strict_mmap, - .splice_read = cifs_splice_read, ++ .splice_read = filemap_splice_read, + .splice_write = iter_file_splice_write, + .llseek = cifs_llseek, + .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, + .remap_file_range = cifs_remap_file_range, + .setlease = cifs_setlease, + .fallocate = cifs_fallocate, +}; + +const struct file_operations cifs_file_direct_ops = { + .read_iter = cifs_direct_readv, + .write_iter = cifs_direct_writev, + .open = cifs_open, + .release = cifs_close, + .lock = cifs_lock, + .flock = cifs_flock, + .fsync = cifs_fsync, + .flush = cifs_flush, + .mmap = cifs_file_mmap, - .splice_read = direct_splice_read, ++ .splice_read = copy_splice_read, + .splice_write = iter_file_splice_write, + .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, + .remap_file_range = cifs_remap_file_range, + .llseek = cifs_llseek, + .setlease = cifs_setlease, + .fallocate = cifs_fallocate, +}; + +const struct file_operations cifs_file_nobrl_ops = { + .read_iter = cifs_loose_read_iter, + .write_iter = cifs_file_write_iter, + .open = cifs_open, + .release = cifs_close, + .fsync = cifs_fsync, + .flush = cifs_flush, + .mmap = cifs_file_mmap, - .splice_read = cifs_splice_read, ++ .splice_read = filemap_splice_read, + .splice_write = iter_file_splice_write, + .llseek = cifs_llseek, + .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, + .remap_file_range = cifs_remap_file_range, + .setlease = cifs_setlease, + .fallocate = cifs_fallocate, +}; + +const struct file_operations cifs_file_strict_nobrl_ops = { + .read_iter = cifs_strict_readv, + .write_iter = cifs_strict_writev, + .open = cifs_open, + .release = cifs_close, + .fsync = cifs_strict_fsync, + .flush = cifs_flush, + .mmap = cifs_file_strict_mmap, - .splice_read = cifs_splice_read, ++ .splice_read = filemap_splice_read, + .splice_write = iter_file_splice_write, + .llseek = cifs_llseek, + .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, + .remap_file_range = cifs_remap_file_range, + .setlease = cifs_setlease, + .fallocate = cifs_fallocate, +}; + +const struct file_operations cifs_file_direct_nobrl_ops = { + .read_iter = cifs_direct_readv, + .write_iter = cifs_direct_writev, + .open = cifs_open, + .release = cifs_close, + .fsync = cifs_fsync, + .flush = cifs_flush, + .mmap = cifs_file_mmap, - .splice_read = direct_splice_read, ++ .splice_read = copy_splice_read, + .splice_write = iter_file_splice_write, + .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, + .remap_file_range = cifs_remap_file_range, + .llseek = cifs_llseek, + .setlease = cifs_setlease, + .fallocate = cifs_fallocate, +}; + +const struct file_operations cifs_dir_ops = { + .iterate_shared = cifs_readdir, + .release = cifs_closedir, + .read = generic_read_dir, + .unlocked_ioctl = cifs_ioctl, + .copy_file_range = cifs_copy_file_range, + .remap_file_range = cifs_remap_file_range, + .llseek = generic_file_llseek, + .fsync = cifs_dir_fsync, +}; + +static void +cifs_init_once(void *inode) +{ + struct cifsInodeInfo *cifsi = inode; + + inode_init_once(&cifsi->netfs.inode); + init_rwsem(&cifsi->lock_sem); +} + +static int __init +cifs_init_inodecache(void) +{ + cifs_inode_cachep = kmem_cache_create("cifs_inode_cache", + sizeof(struct cifsInodeInfo), + 0, (SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD|SLAB_ACCOUNT), + cifs_init_once); + if (cifs_inode_cachep == NULL) + return -ENOMEM; + + return 0; +} + +static void +cifs_destroy_inodecache(void) +{ + /* + * Make sure all delayed rcu free inodes are flushed before we + * destroy cache. + */ + rcu_barrier(); + kmem_cache_destroy(cifs_inode_cachep); +} + +static int +cifs_init_request_bufs(void) +{ + /* + * SMB2 maximum header size is bigger than CIFS one - no problems to + * allocate some more bytes for CIFS. + */ + size_t max_hdr_size = MAX_SMB2_HDR_SIZE; + + if (CIFSMaxBufSize < 8192) { + /* Buffer size can not be smaller than 2 * PATH_MAX since maximum + Unicode path name has to fit in any SMB/CIFS path based frames */ + CIFSMaxBufSize = 8192; + } else if (CIFSMaxBufSize > 1024*127) { + CIFSMaxBufSize = 1024 * 127; + } else { + CIFSMaxBufSize &= 0x1FE00; /* Round size to even 512 byte mult*/ + } +/* + cifs_dbg(VFS, "CIFSMaxBufSize %d 0x%x\n", + CIFSMaxBufSize, CIFSMaxBufSize); +*/ + cifs_req_cachep = kmem_cache_create_usercopy("cifs_request", + CIFSMaxBufSize + max_hdr_size, 0, + SLAB_HWCACHE_ALIGN, 0, + CIFSMaxBufSize + max_hdr_size, + NULL); + if (cifs_req_cachep == NULL) + return -ENOMEM; + + if (cifs_min_rcv < 1) + cifs_min_rcv = 1; + else if (cifs_min_rcv > 64) { + cifs_min_rcv = 64; + cifs_dbg(VFS, "cifs_min_rcv set to maximum (64)\n"); + } + + cifs_req_poolp = mempool_create_slab_pool(cifs_min_rcv, + cifs_req_cachep); + + if (cifs_req_poolp == NULL) { + kmem_cache_destroy(cifs_req_cachep); + return -ENOMEM; + } + /* MAX_CIFS_SMALL_BUFFER_SIZE bytes is enough for most SMB responses and + almost all handle based requests (but not write response, nor is it + sufficient for path based requests). A smaller size would have + been more efficient (compacting multiple slab items on one 4k page) + for the case in which debug was on, but this larger size allows + more SMBs to use small buffer alloc and is still much more + efficient to alloc 1 per page off the slab compared to 17K (5page) + alloc of large cifs buffers even when page debugging is on */ + cifs_sm_req_cachep = kmem_cache_create_usercopy("cifs_small_rq", + MAX_CIFS_SMALL_BUFFER_SIZE, 0, SLAB_HWCACHE_ALIGN, + 0, MAX_CIFS_SMALL_BUFFER_SIZE, NULL); + if (cifs_sm_req_cachep == NULL) { + mempool_destroy(cifs_req_poolp); + kmem_cache_destroy(cifs_req_cachep); + return -ENOMEM; + } + + if (cifs_min_small < 2) + cifs_min_small = 2; + else if (cifs_min_small > 256) { + cifs_min_small = 256; + cifs_dbg(FYI, "cifs_min_small set to maximum (256)\n"); + } + + cifs_sm_req_poolp = mempool_create_slab_pool(cifs_min_small, + cifs_sm_req_cachep); + + if (cifs_sm_req_poolp == NULL) { + mempool_destroy(cifs_req_poolp); + kmem_cache_destroy(cifs_req_cachep); + kmem_cache_destroy(cifs_sm_req_cachep); + return -ENOMEM; + } + + return 0; +} + +static void +cifs_destroy_request_bufs(void) +{ + mempool_destroy(cifs_req_poolp); + kmem_cache_destroy(cifs_req_cachep); + mempool_destroy(cifs_sm_req_poolp); + kmem_cache_destroy(cifs_sm_req_cachep); +} + +static int init_mids(void) +{ + cifs_mid_cachep = kmem_cache_create("cifs_mpx_ids", + sizeof(struct mid_q_entry), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (cifs_mid_cachep == NULL) + return -ENOMEM; + + /* 3 is a reasonable minimum number of simultaneous operations */ + cifs_mid_poolp = mempool_create_slab_pool(3, cifs_mid_cachep); + if (cifs_mid_poolp == NULL) { + kmem_cache_destroy(cifs_mid_cachep); + return -ENOMEM; + } + + return 0; +} + +static void destroy_mids(void) +{ + mempool_destroy(cifs_mid_poolp); + kmem_cache_destroy(cifs_mid_cachep); +} + +static int __init +init_cifs(void) +{ + int rc = 0; + cifs_proc_init(); + INIT_LIST_HEAD(&cifs_tcp_ses_list); +/* + * Initialize Global counters + */ + atomic_set(&sesInfoAllocCount, 0); + atomic_set(&tconInfoAllocCount, 0); + atomic_set(&tcpSesNextId, 0); + atomic_set(&tcpSesAllocCount, 0); + atomic_set(&tcpSesReconnectCount, 0); + atomic_set(&tconInfoReconnectCount, 0); + + atomic_set(&buf_alloc_count, 0); + atomic_set(&small_buf_alloc_count, 0); +#ifdef CONFIG_CIFS_STATS2 + atomic_set(&total_buf_alloc_count, 0); + atomic_set(&total_small_buf_alloc_count, 0); + if (slow_rsp_threshold < 1) + cifs_dbg(FYI, "slow_response_threshold msgs disabled\n"); + else if (slow_rsp_threshold > 32767) + cifs_dbg(VFS, + "slow response threshold set higher than recommended (0 to 32767)\n"); +#endif /* CONFIG_CIFS_STATS2 */ + + atomic_set(&mid_count, 0); + GlobalCurrentXid = 0; + GlobalTotalActiveXid = 0; + GlobalMaxActiveXid = 0; + spin_lock_init(&cifs_tcp_ses_lock); + spin_lock_init(&GlobalMid_Lock); + + cifs_lock_secret = get_random_u32(); + + if (cifs_max_pending < 2) { + cifs_max_pending = 2; + cifs_dbg(FYI, "cifs_max_pending set to min of 2\n"); + } else if (cifs_max_pending > CIFS_MAX_REQ) { + cifs_max_pending = CIFS_MAX_REQ; + cifs_dbg(FYI, "cifs_max_pending set to max of %u\n", + CIFS_MAX_REQ); + } + + cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!cifsiod_wq) { + rc = -ENOMEM; + goto out_clean_proc; + } + + /* + * Consider in future setting limit!=0 maybe to min(num_of_cores - 1, 3) + * so that we don't launch too many worker threads but + * Documentation/core-api/workqueue.rst recommends setting it to 0 + */ + + /* WQ_UNBOUND allows decrypt tasks to run on any CPU */ + decrypt_wq = alloc_workqueue("smb3decryptd", + WQ_UNBOUND|WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!decrypt_wq) { + rc = -ENOMEM; + goto out_destroy_cifsiod_wq; + } + + fileinfo_put_wq = alloc_workqueue("cifsfileinfoput", + WQ_UNBOUND|WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!fileinfo_put_wq) { + rc = -ENOMEM; + goto out_destroy_decrypt_wq; + } + + cifsoplockd_wq = alloc_workqueue("cifsoplockd", + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!cifsoplockd_wq) { + rc = -ENOMEM; + goto out_destroy_fileinfo_put_wq; + } + + deferredclose_wq = alloc_workqueue("deferredclose", + WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + if (!deferredclose_wq) { + rc = -ENOMEM; + goto out_destroy_cifsoplockd_wq; + } + + rc = cifs_init_inodecache(); + if (rc) + goto out_destroy_deferredclose_wq; + + rc = init_mids(); + if (rc) + goto out_destroy_inodecache; + + rc = cifs_init_request_bufs(); + if (rc) + goto out_destroy_mids; + +#ifdef CONFIG_CIFS_DFS_UPCALL + rc = dfs_cache_init(); + if (rc) + goto out_destroy_request_bufs; +#endif /* CONFIG_CIFS_DFS_UPCALL */ +#ifdef CONFIG_CIFS_UPCALL + rc = init_cifs_spnego(); + if (rc) + goto out_destroy_dfs_cache; +#endif /* CONFIG_CIFS_UPCALL */ +#ifdef CONFIG_CIFS_SWN_UPCALL + rc = cifs_genl_init(); + if (rc) + goto out_register_key_type; +#endif /* CONFIG_CIFS_SWN_UPCALL */ + + rc = init_cifs_idmap(); + if (rc) + goto out_cifs_swn_init; + + rc = register_filesystem(&cifs_fs_type); + if (rc) + goto out_init_cifs_idmap; + + rc = register_filesystem(&smb3_fs_type); + if (rc) { + unregister_filesystem(&cifs_fs_type); + goto out_init_cifs_idmap; + } + + return 0; + +out_init_cifs_idmap: + exit_cifs_idmap(); +out_cifs_swn_init: +#ifdef CONFIG_CIFS_SWN_UPCALL + cifs_genl_exit(); +out_register_key_type: +#endif +#ifdef CONFIG_CIFS_UPCALL + exit_cifs_spnego(); +out_destroy_dfs_cache: +#endif +#ifdef CONFIG_CIFS_DFS_UPCALL + dfs_cache_destroy(); +out_destroy_request_bufs: +#endif + cifs_destroy_request_bufs(); +out_destroy_mids: + destroy_mids(); +out_destroy_inodecache: + cifs_destroy_inodecache(); +out_destroy_deferredclose_wq: + destroy_workqueue(deferredclose_wq); +out_destroy_cifsoplockd_wq: + destroy_workqueue(cifsoplockd_wq); +out_destroy_fileinfo_put_wq: + destroy_workqueue(fileinfo_put_wq); +out_destroy_decrypt_wq: + destroy_workqueue(decrypt_wq); +out_destroy_cifsiod_wq: + destroy_workqueue(cifsiod_wq); +out_clean_proc: + cifs_proc_clean(); + return rc; +} + +static void __exit +exit_cifs(void) +{ + cifs_dbg(NOISY, "exit_smb3\n"); + unregister_filesystem(&cifs_fs_type); + unregister_filesystem(&smb3_fs_type); + cifs_dfs_release_automount_timer(); + exit_cifs_idmap(); +#ifdef CONFIG_CIFS_SWN_UPCALL + cifs_genl_exit(); +#endif +#ifdef CONFIG_CIFS_UPCALL + exit_cifs_spnego(); +#endif +#ifdef CONFIG_CIFS_DFS_UPCALL + dfs_cache_destroy(); +#endif + cifs_destroy_request_bufs(); + destroy_mids(); + cifs_destroy_inodecache(); + destroy_workqueue(deferredclose_wq); + destroy_workqueue(cifsoplockd_wq); + destroy_workqueue(decrypt_wq); + destroy_workqueue(fileinfo_put_wq); + destroy_workqueue(cifsiod_wq); + cifs_proc_clean(); +} + +MODULE_AUTHOR("Steve French"); +MODULE_LICENSE("GPL"); /* combination of LGPL + GPL source behaves as GPL */ +MODULE_DESCRIPTION + ("VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and " + "also older servers complying with the SNIA CIFS Specification)"); +MODULE_VERSION(CIFS_VERSION); +MODULE_SOFTDEP("ecb"); +MODULE_SOFTDEP("hmac"); +MODULE_SOFTDEP("md5"); +MODULE_SOFTDEP("nls"); +MODULE_SOFTDEP("aes"); +MODULE_SOFTDEP("cmac"); +MODULE_SOFTDEP("sha256"); +MODULE_SOFTDEP("sha512"); +MODULE_SOFTDEP("aead2"); +MODULE_SOFTDEP("ccm"); +MODULE_SOFTDEP("gcm"); +module_init(init_cifs) +module_exit(exit_cifs) diff --cc fs/smb/client/cifsfs.h index 74cd6fafb33e8,0000000000000..d7274eefc666a mode 100644,000000..100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@@ -1,167 -1,0 +1,164 @@@ +/* SPDX-License-Identifier: LGPL-2.1 */ +/* + * + * Copyright (c) International Business Machines Corp., 2002, 2007 + * Author(s): Steve French (sfrench@us.ibm.com) + * + */ + +#ifndef _CIFSFS_H +#define _CIFSFS_H + +#include + +#define ROOT_I 2 + +/* + * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down + * so that it will fit. We use hash_64 to convert the value to 31 bits, and + * then add 1, to ensure that we don't end up with a 0 as the value. + */ +static inline ino_t +cifs_uniqueid_to_ino_t(u64 fileid) +{ + if ((sizeof(ino_t)) < (sizeof(u64))) + return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1; + + return (ino_t)fileid; + +} + +static inline void cifs_set_time(struct dentry *dentry, unsigned long time) +{ + dentry->d_fsdata = (void *) time; +} + +static inline unsigned long cifs_get_time(struct dentry *dentry) +{ + return (unsigned long) dentry->d_fsdata; +} + +extern struct file_system_type cifs_fs_type, smb3_fs_type; +extern const struct address_space_operations cifs_addr_ops; +extern const struct address_space_operations cifs_addr_ops_smallbuf; + +/* Functions related to super block operations */ +extern void cifs_sb_active(struct super_block *sb); +extern void cifs_sb_deactive(struct super_block *sb); + +/* Functions related to inodes */ +extern const struct inode_operations cifs_dir_inode_ops; +extern struct inode *cifs_root_iget(struct super_block *); +extern int cifs_create(struct mnt_idmap *, struct inode *, + struct dentry *, umode_t, bool excl); +extern int cifs_atomic_open(struct inode *, struct dentry *, + struct file *, unsigned, umode_t); +extern struct dentry *cifs_lookup(struct inode *, struct dentry *, + unsigned int); +extern int cifs_unlink(struct inode *dir, struct dentry *dentry); +extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); +extern int cifs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, + umode_t, dev_t); +extern int cifs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *, + umode_t); +extern int cifs_rmdir(struct inode *, struct dentry *); +extern int cifs_rename2(struct mnt_idmap *, struct inode *, + struct dentry *, struct inode *, struct dentry *, + unsigned int); +extern int cifs_revalidate_file_attr(struct file *filp); +extern int cifs_revalidate_dentry_attr(struct dentry *); +extern int cifs_revalidate_file(struct file *filp); +extern int cifs_revalidate_dentry(struct dentry *); +extern int cifs_invalidate_mapping(struct inode *inode); +extern int cifs_revalidate_mapping(struct inode *inode); +extern int cifs_zap_mapping(struct inode *inode); +extern int cifs_getattr(struct mnt_idmap *, const struct path *, + struct kstat *, u32, unsigned int); +extern int cifs_setattr(struct mnt_idmap *, struct dentry *, + struct iattr *); +extern int cifs_fiemap(struct inode *, struct fiemap_extent_info *, u64 start, + u64 len); + +extern const struct inode_operations cifs_file_inode_ops; +extern const struct inode_operations cifs_symlink_inode_ops; +extern const struct inode_operations cifs_dfs_referral_inode_operations; + + +/* Functions related to files and directories */ +extern const struct file_operations cifs_file_ops; +extern const struct file_operations cifs_file_direct_ops; /* if directio mnt */ +extern const struct file_operations cifs_file_strict_ops; /* if strictio mnt */ +extern const struct file_operations cifs_file_nobrl_ops; /* no brlocks */ +extern const struct file_operations cifs_file_direct_nobrl_ops; +extern const struct file_operations cifs_file_strict_nobrl_ops; +extern int cifs_open(struct inode *inode, struct file *file); +extern int cifs_close(struct inode *inode, struct file *file); +extern int cifs_closedir(struct inode *inode, struct file *file); +extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to); +extern ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to); +extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to); +extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from); +extern ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from); +extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from); - extern ssize_t cifs_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags); +extern int cifs_flock(struct file *pfile, int cmd, struct file_lock *plock); +extern int cifs_lock(struct file *, int, struct file_lock *); +extern int cifs_fsync(struct file *, loff_t, loff_t, int); +extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int); +extern int cifs_flush(struct file *, fl_owner_t id); +extern int cifs_file_mmap(struct file *file, struct vm_area_struct *vma); +extern int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma); +extern const struct file_operations cifs_dir_ops; +extern int cifs_dir_open(struct inode *inode, struct file *file); +extern int cifs_readdir(struct file *file, struct dir_context *ctx); +extern void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len); +extern void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len); +extern void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len); + +/* Functions related to dir entries */ +extern const struct dentry_operations cifs_dentry_ops; +extern const struct dentry_operations cifs_ci_dentry_ops; + +#ifdef CONFIG_CIFS_DFS_UPCALL +extern struct vfsmount *cifs_dfs_d_automount(struct path *path); +#else +static inline struct vfsmount *cifs_dfs_d_automount(struct path *path) +{ + return ERR_PTR(-EREMOTE); +} +#endif + +/* Functions related to symlinks */ +extern const char *cifs_get_link(struct dentry *, struct inode *, + struct delayed_call *); +extern int cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, + struct dentry *direntry, const char *symname); + +#ifdef CONFIG_CIFS_XATTR +extern const struct xattr_handler *cifs_xattr_handlers[]; +extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); +#else +# define cifs_xattr_handlers NULL +# define cifs_listxattr NULL +#endif + +extern ssize_t cifs_file_copychunk_range(unsigned int xid, + struct file *src_file, loff_t off, + struct file *dst_file, loff_t destoff, + size_t len, unsigned int flags); + +extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); +extern void cifs_setsize(struct inode *inode, loff_t offset); +extern int cifs_truncate_page(struct address_space *mapping, loff_t from); + +struct smb3_fs_context; +extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type, + int flags, struct smb3_fs_context *ctx); + +#ifdef CONFIG_CIFS_NFSD_EXPORT +extern const struct export_operations cifs_export_ops; +#endif /* CONFIG_CIFS_NFSD_EXPORT */ + +/* when changing internal version - update following two lines at same time */ +#define SMB3_PRODUCT_BUILD 43 +#define CIFS_VERSION "2.43" +#endif /* _CIFSFS_H */ diff --cc fs/smb/client/file.c index 051283386e229,0000000000000..f30f6ddc4b816 mode 100644,000000..100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@@ -1,5101 -1,0 +1,5085 @@@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * + * vfs operations that deal with files + * + * Copyright (C) International Business Machines Corp., 2002,2010 + * Author(s): Steve French (sfrench@us.ibm.com) + * Jeremy Allison (jra@samba.org) + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cifsfs.h" +#include "cifspdu.h" +#include "cifsglob.h" +#include "cifsproto.h" +#include "smb2proto.h" +#include "cifs_unicode.h" +#include "cifs_debug.h" +#include "cifs_fs_sb.h" +#include "fscache.h" +#include "smbdirect.h" +#include "fs_context.h" +#include "cifs_ioctl.h" +#include "cached_dir.h" + +/* + * Remove the dirty flags from a span of pages. + */ +static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len) +{ + struct address_space *mapping = inode->i_mapping; + struct folio *folio; + pgoff_t end; + + XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); + + rcu_read_lock(); + + end = (start + len - 1) / PAGE_SIZE; + xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) { + if (xas_retry(&xas, folio)) + continue; + xas_pause(&xas); + rcu_read_unlock(); + folio_lock(folio); + folio_clear_dirty_for_io(folio); + folio_unlock(folio); + rcu_read_lock(); + } + + rcu_read_unlock(); +} + +/* + * Completion of write to server. + */ +void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len) +{ + struct address_space *mapping = inode->i_mapping; + struct folio *folio; + pgoff_t end; + + XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); + + if (!len) + return; + + rcu_read_lock(); + + end = (start + len - 1) / PAGE_SIZE; + xas_for_each(&xas, folio, end) { + if (xas_retry(&xas, folio)) + continue; + if (!folio_test_writeback(folio)) { + WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", + len, start, folio_index(folio), end); + continue; + } + + folio_detach_private(folio); + folio_end_writeback(folio); + } + + rcu_read_unlock(); +} + +/* + * Failure of write to server. + */ +void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len) +{ + struct address_space *mapping = inode->i_mapping; + struct folio *folio; + pgoff_t end; + + XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); + + if (!len) + return; + + rcu_read_lock(); + + end = (start + len - 1) / PAGE_SIZE; + xas_for_each(&xas, folio, end) { + if (xas_retry(&xas, folio)) + continue; + if (!folio_test_writeback(folio)) { + WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", + len, start, folio_index(folio), end); + continue; + } + + folio_set_error(folio); + folio_end_writeback(folio); + } + + rcu_read_unlock(); +} + +/* + * Redirty pages after a temporary failure. + */ +void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len) +{ + struct address_space *mapping = inode->i_mapping; + struct folio *folio; + pgoff_t end; + + XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE); + + if (!len) + return; + + rcu_read_lock(); + + end = (start + len - 1) / PAGE_SIZE; + xas_for_each(&xas, folio, end) { + if (!folio_test_writeback(folio)) { + WARN_ONCE(1, "bad %x @%llx page %lx %lx\n", + len, start, folio_index(folio), end); + continue; + } + + filemap_dirty_folio(folio->mapping, folio); + folio_end_writeback(folio); + } + + rcu_read_unlock(); +} + +/* + * Mark as invalid, all open files on tree connections since they + * were closed when session to server was lost. + */ +void +cifs_mark_open_files_invalid(struct cifs_tcon *tcon) +{ + struct cifsFileInfo *open_file = NULL; + struct list_head *tmp; + struct list_head *tmp1; + + /* only send once per connect */ + spin_lock(&tcon->tc_lock); + if (tcon->status != TID_NEED_RECON) { + spin_unlock(&tcon->tc_lock); + return; + } + tcon->status = TID_IN_FILES_INVALIDATE; + spin_unlock(&tcon->tc_lock); + + /* list all files open on tree connection and mark them invalid */ + spin_lock(&tcon->open_file_lock); + list_for_each_safe(tmp, tmp1, &tcon->openFileList) { + open_file = list_entry(tmp, struct cifsFileInfo, tlist); + open_file->invalidHandle = true; + open_file->oplock_break_cancelled = true; + } + spin_unlock(&tcon->open_file_lock); + + invalidate_all_cached_dirs(tcon); + spin_lock(&tcon->tc_lock); + if (tcon->status == TID_IN_FILES_INVALIDATE) + tcon->status = TID_NEED_TCON; + spin_unlock(&tcon->tc_lock); + + /* + * BB Add call to invalidate_inodes(sb) for all superblocks mounted + * to this tcon. + */ +} + +static inline int cifs_convert_flags(unsigned int flags) +{ + if ((flags & O_ACCMODE) == O_RDONLY) + return GENERIC_READ; + else if ((flags & O_ACCMODE) == O_WRONLY) + return GENERIC_WRITE; + else if ((flags & O_ACCMODE) == O_RDWR) { + /* GENERIC_ALL is too much permission to request + can cause unnecessary access denied on create */ + /* return GENERIC_ALL; */ + return (GENERIC_READ | GENERIC_WRITE); + } + + return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES | + FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA | + FILE_READ_DATA); +} + +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY +static u32 cifs_posix_convert_flags(unsigned int flags) +{ + u32 posix_flags = 0; + + if ((flags & O_ACCMODE) == O_RDONLY) + posix_flags = SMB_O_RDONLY; + else if ((flags & O_ACCMODE) == O_WRONLY) + posix_flags = SMB_O_WRONLY; + else if ((flags & O_ACCMODE) == O_RDWR) + posix_flags = SMB_O_RDWR; + + if (flags & O_CREAT) { + posix_flags |= SMB_O_CREAT; + if (flags & O_EXCL) + posix_flags |= SMB_O_EXCL; + } else if (flags & O_EXCL) + cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n", + current->comm, current->tgid); + + if (flags & O_TRUNC) + posix_flags |= SMB_O_TRUNC; + /* be safe and imply O_SYNC for O_DSYNC */ + if (flags & O_DSYNC) + posix_flags |= SMB_O_SYNC; + if (flags & O_DIRECTORY) + posix_flags |= SMB_O_DIRECTORY; + if (flags & O_NOFOLLOW) + posix_flags |= SMB_O_NOFOLLOW; + if (flags & O_DIRECT) + posix_flags |= SMB_O_DIRECT; + + return posix_flags; +} +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + +static inline int cifs_get_disposition(unsigned int flags) +{ + if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + return FILE_CREATE; + else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) + return FILE_OVERWRITE_IF; + else if ((flags & O_CREAT) == O_CREAT) + return FILE_OPEN_IF; + else if ((flags & O_TRUNC) == O_TRUNC) + return FILE_OVERWRITE; + else + return FILE_OPEN; +} + +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY +int cifs_posix_open(const char *full_path, struct inode **pinode, + struct super_block *sb, int mode, unsigned int f_flags, + __u32 *poplock, __u16 *pnetfid, unsigned int xid) +{ + int rc; + FILE_UNIX_BASIC_INFO *presp_data; + __u32 posix_flags = 0; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_fattr fattr; + struct tcon_link *tlink; + struct cifs_tcon *tcon; + + cifs_dbg(FYI, "posix open %s\n", full_path); + + presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); + if (presp_data == NULL) + return -ENOMEM; + + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + rc = PTR_ERR(tlink); + goto posix_open_ret; + } + + tcon = tlink_tcon(tlink); + mode &= ~current_umask(); + + posix_flags = cifs_posix_convert_flags(f_flags); + rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, + poplock, full_path, cifs_sb->local_nls, + cifs_remap(cifs_sb)); + cifs_put_tlink(tlink); + + if (rc) + goto posix_open_ret; + + if (presp_data->Type == cpu_to_le32(-1)) + goto posix_open_ret; /* open ok, caller does qpathinfo */ + + if (!pinode) + goto posix_open_ret; /* caller does not need info */ + + cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb); + + /* get new inode and set it up */ + if (*pinode == NULL) { + cifs_fill_uniqueid(sb, &fattr); + *pinode = cifs_iget(sb, &fattr); + if (!*pinode) { + rc = -ENOMEM; + goto posix_open_ret; + } + } else { + cifs_revalidate_mapping(*pinode); + rc = cifs_fattr_to_inode(*pinode, &fattr); + } + +posix_open_ret: + kfree(presp_data); + return rc; +} +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + +static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, + struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock, + struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf) +{ + int rc; + int desired_access; + int disposition; + int create_options = CREATE_NOT_DIR; + struct TCP_Server_Info *server = tcon->ses->server; + struct cifs_open_parms oparms; + + if (!server->ops->open) + return -ENOSYS; + + desired_access = cifs_convert_flags(f_flags); + +/********************************************************************* + * open flag mapping table: + * + * POSIX Flag CIFS Disposition + * ---------- ---------------- + * O_CREAT FILE_OPEN_IF + * O_CREAT | O_EXCL FILE_CREATE + * O_CREAT | O_TRUNC FILE_OVERWRITE_IF + * O_TRUNC FILE_OVERWRITE + * none of the above FILE_OPEN + * + * Note that there is not a direct match between disposition + * FILE_SUPERSEDE (ie create whether or not file exists although + * O_CREAT | O_TRUNC is similar but truncates the existing + * file rather than creating a new file as FILE_SUPERSEDE does + * (which uses the attributes / metadata passed in on open call) + *? + *? O_SYNC is a reasonable match to CIFS writethrough flag + *? and the read write flags match reasonably. O_LARGEFILE + *? is irrelevant because largefile support is always used + *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY, + * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation + *********************************************************************/ + + disposition = cifs_get_disposition(f_flags); + + /* BB pass O_SYNC flag through on file attributes .. BB */ + + /* O_SYNC also has bit for O_DSYNC so following check picks up either */ + if (f_flags & O_SYNC) + create_options |= CREATE_WRITE_THROUGH; + + if (f_flags & O_DIRECT) + create_options |= CREATE_NO_BUFFER; + + oparms = (struct cifs_open_parms) { + .tcon = tcon, + .cifs_sb = cifs_sb, + .desired_access = desired_access, + .create_options = cifs_create_options(cifs_sb, create_options), + .disposition = disposition, + .path = full_path, + .fid = fid, + }; + + rc = server->ops->open(xid, &oparms, oplock, buf); + if (rc) + return rc; + + /* TODO: Add support for calling posix query info but with passing in fid */ + if (tcon->unix_ext) + rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb, + xid); + else + rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, + xid, fid); + + if (rc) { + server->ops->close(xid, tcon, fid); + if (rc == -ESTALE) + rc = -EOPENSTALE; + } + + return rc; +} + +static bool +cifs_has_mand_locks(struct cifsInodeInfo *cinode) +{ + struct cifs_fid_locks *cur; + bool has_locks = false; + + down_read(&cinode->lock_sem); + list_for_each_entry(cur, &cinode->llist, llist) { + if (!list_empty(&cur->locks)) { + has_locks = true; + break; + } + } + up_read(&cinode->lock_sem); + return has_locks; +} + +void +cifs_down_write(struct rw_semaphore *sem) +{ + while (!down_write_trylock(sem)) + msleep(10); +} + +static void cifsFileInfo_put_work(struct work_struct *work); + +struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, + struct tcon_link *tlink, __u32 oplock, + const char *symlink_target) +{ + struct dentry *dentry = file_dentry(file); + struct inode *inode = d_inode(dentry); + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifsFileInfo *cfile; + struct cifs_fid_locks *fdlocks; + struct cifs_tcon *tcon = tlink_tcon(tlink); + struct TCP_Server_Info *server = tcon->ses->server; + + cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); + if (cfile == NULL) + return cfile; + + fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL); + if (!fdlocks) { + kfree(cfile); + return NULL; + } + + if (symlink_target) { + cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL); + if (!cfile->symlink_target) { + kfree(fdlocks); + kfree(cfile); + return NULL; + } + } + + INIT_LIST_HEAD(&fdlocks->locks); + fdlocks->cfile = cfile; + cfile->llist = fdlocks; + + cfile->count = 1; + cfile->pid = current->tgid; + cfile->uid = current_fsuid(); + cfile->dentry = dget(dentry); + cfile->f_flags = file->f_flags; + cfile->invalidHandle = false; + cfile->deferred_close_scheduled = false; + cfile->tlink = cifs_get_tlink(tlink); + INIT_WORK(&cfile->oplock_break, cifs_oplock_break); + INIT_WORK(&cfile->put, cifsFileInfo_put_work); + INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close); + mutex_init(&cfile->fh_mutex); + spin_lock_init(&cfile->file_info_lock); + + cifs_sb_active(inode->i_sb); + + /* + * If the server returned a read oplock and we have mandatory brlocks, + * set oplock level to None. + */ + if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { + cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); + oplock = 0; + } + + cifs_down_write(&cinode->lock_sem); + list_add(&fdlocks->llist, &cinode->llist); + up_write(&cinode->lock_sem); + + spin_lock(&tcon->open_file_lock); + if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) + oplock = fid->pending_open->oplock; + list_del(&fid->pending_open->olist); + + fid->purge_cache = false; + server->ops->set_fid(cfile, fid, oplock); + + list_add(&cfile->tlist, &tcon->openFileList); + atomic_inc(&tcon->num_local_opens); + + /* if readable file instance put first in list*/ + spin_lock(&cinode->open_file_lock); + if (file->f_mode & FMODE_READ) + list_add(&cfile->flist, &cinode->openFileList); + else + list_add_tail(&cfile->flist, &cinode->openFileList); + spin_unlock(&cinode->open_file_lock); + spin_unlock(&tcon->open_file_lock); + + if (fid->purge_cache) + cifs_zap_mapping(inode); + + file->private_data = cfile; + return cfile; +} + +struct cifsFileInfo * +cifsFileInfo_get(struct cifsFileInfo *cifs_file) +{ + spin_lock(&cifs_file->file_info_lock); + cifsFileInfo_get_locked(cifs_file); + spin_unlock(&cifs_file->file_info_lock); + return cifs_file; +} + +static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) +{ + struct inode *inode = d_inode(cifs_file->dentry); + struct cifsInodeInfo *cifsi = CIFS_I(inode); + struct cifsLockInfo *li, *tmp; + struct super_block *sb = inode->i_sb; + + /* + * Delete any outstanding lock records. We'll lose them when the file + * is closed anyway. + */ + cifs_down_write(&cifsi->lock_sem); + list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { + list_del(&li->llist); + cifs_del_lock_waiters(li); + kfree(li); + } + list_del(&cifs_file->llist->llist); + kfree(cifs_file->llist); + up_write(&cifsi->lock_sem); + + cifs_put_tlink(cifs_file->tlink); + dput(cifs_file->dentry); + cifs_sb_deactive(sb); + kfree(cifs_file->symlink_target); + kfree(cifs_file); +} + +static void cifsFileInfo_put_work(struct work_struct *work) +{ + struct cifsFileInfo *cifs_file = container_of(work, + struct cifsFileInfo, put); + + cifsFileInfo_put_final(cifs_file); +} + +/** + * cifsFileInfo_put - release a reference of file priv data + * + * Always potentially wait for oplock handler. See _cifsFileInfo_put(). + * + * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file + */ +void cifsFileInfo_put(struct cifsFileInfo *cifs_file) +{ + _cifsFileInfo_put(cifs_file, true, true); +} + +/** + * _cifsFileInfo_put - release a reference of file priv data + * + * This may involve closing the filehandle @cifs_file out on the + * server. Must be called without holding tcon->open_file_lock, + * cinode->open_file_lock and cifs_file->file_info_lock. + * + * If @wait_for_oplock_handler is true and we are releasing the last + * reference, wait for any running oplock break handler of the file + * and cancel any pending one. + * + * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file + * @wait_oplock_handler: must be false if called from oplock_break_handler + * @offload: not offloaded on close and oplock breaks + * + */ +void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, + bool wait_oplock_handler, bool offload) +{ + struct inode *inode = d_inode(cifs_file->dentry); + struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink); + struct TCP_Server_Info *server = tcon->ses->server; + struct cifsInodeInfo *cifsi = CIFS_I(inode); + struct super_block *sb = inode->i_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_fid fid = {}; + struct cifs_pending_open open; + bool oplock_break_cancelled; + + spin_lock(&tcon->open_file_lock); + spin_lock(&cifsi->open_file_lock); + spin_lock(&cifs_file->file_info_lock); + if (--cifs_file->count > 0) { + spin_unlock(&cifs_file->file_info_lock); + spin_unlock(&cifsi->open_file_lock); + spin_unlock(&tcon->open_file_lock); + return; + } + spin_unlock(&cifs_file->file_info_lock); + + if (server->ops->get_lease_key) + server->ops->get_lease_key(inode, &fid); + + /* store open in pending opens to make sure we don't miss lease break */ + cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open); + + /* remove it from the lists */ + list_del(&cifs_file->flist); + list_del(&cifs_file->tlist); + atomic_dec(&tcon->num_local_opens); + + if (list_empty(&cifsi->openFileList)) { + cifs_dbg(FYI, "closing last open instance for inode %p\n", + d_inode(cifs_file->dentry)); + /* + * In strict cache mode we need invalidate mapping on the last + * close because it may cause a error when we open this file + * again and get at least level II oplock. + */ + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) + set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags); + cifs_set_oplock_level(cifsi, 0); + } + + spin_unlock(&cifsi->open_file_lock); + spin_unlock(&tcon->open_file_lock); + + oplock_break_cancelled = wait_oplock_handler ? + cancel_work_sync(&cifs_file->oplock_break) : false; + + if (!tcon->need_reconnect && !cifs_file->invalidHandle) { + struct TCP_Server_Info *server = tcon->ses->server; + unsigned int xid; + + xid = get_xid(); + if (server->ops->close_getattr) + server->ops->close_getattr(xid, tcon, cifs_file); + else if (server->ops->close) + server->ops->close(xid, tcon, &cifs_file->fid); + _free_xid(xid); + } + + if (oplock_break_cancelled) + cifs_done_oplock_break(cifsi); + + cifs_del_pending_open(&open); + + if (offload) + queue_work(fileinfo_put_wq, &cifs_file->put); + else + cifsFileInfo_put_final(cifs_file); +} + +int cifs_open(struct inode *inode, struct file *file) + +{ + int rc = -EACCES; + unsigned int xid; + __u32 oplock; + struct cifs_sb_info *cifs_sb; + struct TCP_Server_Info *server; + struct cifs_tcon *tcon; + struct tcon_link *tlink; + struct cifsFileInfo *cfile = NULL; + void *page; + const char *full_path; + bool posix_open_ok = false; + struct cifs_fid fid = {}; + struct cifs_pending_open open; + struct cifs_open_info_data data = {}; + + xid = get_xid(); + + cifs_sb = CIFS_SB(inode->i_sb); + if (unlikely(cifs_forced_shutdown(cifs_sb))) { + free_xid(xid); + return -EIO; + } + + tlink = cifs_sb_tlink(cifs_sb); + if (IS_ERR(tlink)) { + free_xid(xid); + return PTR_ERR(tlink); + } + tcon = tlink_tcon(tlink); + server = tcon->ses->server; + + page = alloc_dentry_path(); + full_path = build_path_from_dentry(file_dentry(file), page); + if (IS_ERR(full_path)) { + rc = PTR_ERR(full_path); + goto out; + } + + cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", + inode, file->f_flags, full_path); + + if (file->f_flags & O_DIRECT && + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + file->f_op = &cifs_file_direct_nobrl_ops; + else + file->f_op = &cifs_file_direct_ops; + } + + /* Get the cached handle as SMB2 close is deferred */ + rc = cifs_get_readable_path(tcon, full_path, &cfile); + if (rc == 0) { + if (file->f_flags == cfile->f_flags) { + file->private_data = cfile; + spin_lock(&CIFS_I(inode)->deferred_lock); + cifs_del_deferred_close(cfile); + spin_unlock(&CIFS_I(inode)->deferred_lock); + goto use_cache; + } else { + _cifsFileInfo_put(cfile, true, false); + } + } + + if (server->oplocks) + oplock = REQ_OPLOCK; + else + oplock = 0; + +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + if (!tcon->broken_posix_open && tcon->unix_ext && + cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & + le64_to_cpu(tcon->fsUnixInfo.Capability))) { + /* can not refresh inode info since size could be stale */ + rc = cifs_posix_open(full_path, &inode, inode->i_sb, + cifs_sb->ctx->file_mode /* ignored */, + file->f_flags, &oplock, &fid.netfid, xid); + if (rc == 0) { + cifs_dbg(FYI, "posix open succeeded\n"); + posix_open_ok = true; + } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { + if (tcon->ses->serverNOS) + cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n", + tcon->ses->ip_addr, + tcon->ses->serverNOS); + tcon->broken_posix_open = true; + } else if ((rc != -EIO) && (rc != -EREMOTE) && + (rc != -EOPNOTSUPP)) /* path not found or net err */ + goto out; + /* + * Else fallthrough to retry open the old way on network i/o + * or DFS errors. + */ + } +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + + if (server->ops->get_lease_key) + server->ops->get_lease_key(inode, &fid); + + cifs_add_pending_open(&fid, tlink, &open); + + if (!posix_open_ok) { + if (server->ops->get_lease_key) + server->ops->get_lease_key(inode, &fid); + + rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid, + xid, &data); + if (rc) { + cifs_del_pending_open(&open); + goto out; + } + } + + cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target); + if (cfile == NULL) { + if (server->ops->close) + server->ops->close(xid, tcon, &fid); + cifs_del_pending_open(&open); + rc = -ENOMEM; + goto out; + } + +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { + /* + * Time to set mode which we can not set earlier due to + * problems creating new read-only files. + */ + struct cifs_unix_set_info_args args = { + .mode = inode->i_mode, + .uid = INVALID_UID, /* no change */ + .gid = INVALID_GID, /* no change */ + .ctime = NO_CHANGE_64, + .atime = NO_CHANGE_64, + .mtime = NO_CHANGE_64, + .device = 0, + }; + CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid, + cfile->pid); + } +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + +use_cache: + fscache_use_cookie(cifs_inode_cookie(file_inode(file)), + file->f_mode & FMODE_WRITE); + if (file->f_flags & O_DIRECT && + (!((file->f_flags & O_ACCMODE) != O_RDONLY) || + file->f_flags & O_APPEND)) + cifs_invalidate_cache(file_inode(file), + FSCACHE_INVAL_DIO_WRITE); + +out: + free_dentry_path(page); + free_xid(xid); + cifs_put_tlink(tlink); + cifs_free_open_info(&data); + return rc; +} + +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY +static int cifs_push_posix_locks(struct cifsFileInfo *cfile); +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + +/* + * Try to reacquire byte range locks that were released when session + * to server was lost. + */ +static int +cifs_relock_file(struct cifsFileInfo *cfile) +{ + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + int rc = 0; +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + + down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); + if (cinode->can_cache_brlcks) { + /* can cache locks - no need to relock */ + up_read(&cinode->lock_sem); + return rc; + } + +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + if (cap_unix(tcon->ses) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + rc = cifs_push_posix_locks(cfile); + else +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + rc = tcon->ses->server->ops->push_mand_locks(cfile); + + up_read(&cinode->lock_sem); + return rc; +} + +static int +cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) +{ + int rc = -EACCES; + unsigned int xid; + __u32 oplock; + struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; + struct cifsInodeInfo *cinode; + struct inode *inode; + void *page; + const char *full_path; + int desired_access; + int disposition = FILE_OPEN; + int create_options = CREATE_NOT_DIR; + struct cifs_open_parms oparms; + + xid = get_xid(); + mutex_lock(&cfile->fh_mutex); + if (!cfile->invalidHandle) { + mutex_unlock(&cfile->fh_mutex); + free_xid(xid); + return 0; + } + + inode = d_inode(cfile->dentry); + cifs_sb = CIFS_SB(inode->i_sb); + tcon = tlink_tcon(cfile->tlink); + server = tcon->ses->server; + + /* + * Can not grab rename sem here because various ops, including those + * that already have the rename sem can end up causing writepage to get + * called and if the server was down that means we end up here, and we + * can never tell if the caller already has the rename_sem. + */ + page = alloc_dentry_path(); + full_path = build_path_from_dentry(cfile->dentry, page); + if (IS_ERR(full_path)) { + mutex_unlock(&cfile->fh_mutex); + free_dentry_path(page); + free_xid(xid); + return PTR_ERR(full_path); + } + + cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n", + inode, cfile->f_flags, full_path); + + if (tcon->ses->server->oplocks) + oplock = REQ_OPLOCK; + else + oplock = 0; + +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + if (tcon->unix_ext && cap_unix(tcon->ses) && + (CIFS_UNIX_POSIX_PATH_OPS_CAP & + le64_to_cpu(tcon->fsUnixInfo.Capability))) { + /* + * O_CREAT, O_EXCL and O_TRUNC already had their effect on the + * original open. Must mask them off for a reopen. + */ + unsigned int oflags = cfile->f_flags & + ~(O_CREAT | O_EXCL | O_TRUNC); + + rc = cifs_posix_open(full_path, NULL, inode->i_sb, + cifs_sb->ctx->file_mode /* ignored */, + oflags, &oplock, &cfile->fid.netfid, xid); + if (rc == 0) { + cifs_dbg(FYI, "posix reopen succeeded\n"); + oparms.reconnect = true; + goto reopen_success; + } + /* + * fallthrough to retry open the old way on errors, especially + * in the reconnect path it is important to retry hard + */ + } +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + + desired_access = cifs_convert_flags(cfile->f_flags); + + /* O_SYNC also has bit for O_DSYNC so following check picks up either */ + if (cfile->f_flags & O_SYNC) + create_options |= CREATE_WRITE_THROUGH; + + if (cfile->f_flags & O_DIRECT) + create_options |= CREATE_NO_BUFFER; + + if (server->ops->get_lease_key) + server->ops->get_lease_key(inode, &cfile->fid); + + oparms = (struct cifs_open_parms) { + .tcon = tcon, + .cifs_sb = cifs_sb, + .desired_access = desired_access, + .create_options = cifs_create_options(cifs_sb, create_options), + .disposition = disposition, + .path = full_path, + .fid = &cfile->fid, + .reconnect = true, + }; + + /* + * Can not refresh inode by passing in file_info buf to be returned by + * ops->open and then calling get_inode_info with returned buf since + * file might have write behind data that needs to be flushed and server + * version of file size can be stale. If we knew for sure that inode was + * not dirty locally we could do this. + */ + rc = server->ops->open(xid, &oparms, &oplock, NULL); + if (rc == -ENOENT && oparms.reconnect == false) { + /* durable handle timeout is expired - open the file again */ + rc = server->ops->open(xid, &oparms, &oplock, NULL); + /* indicate that we need to relock the file */ + oparms.reconnect = true; + } + + if (rc) { + mutex_unlock(&cfile->fh_mutex); + cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); + cifs_dbg(FYI, "oplock: %d\n", oplock); + goto reopen_error_exit; + } + +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY +reopen_success: +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + cfile->invalidHandle = false; + mutex_unlock(&cfile->fh_mutex); + cinode = CIFS_I(inode); + + if (can_flush) { + rc = filemap_write_and_wait(inode->i_mapping); + if (!is_interrupt_error(rc)) + mapping_set_error(inode->i_mapping, rc); + + if (tcon->posix_extensions) + rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid); + else if (tcon->unix_ext) + rc = cifs_get_inode_info_unix(&inode, full_path, + inode->i_sb, xid); + else + rc = cifs_get_inode_info(&inode, full_path, NULL, + inode->i_sb, xid, NULL); + } + /* + * Else we are writing out data to server already and could deadlock if + * we tried to flush data, and since we do not know if we have data that + * would invalidate the current end of file on the server we can not go + * to the server to get the new inode info. + */ + + /* + * If the server returned a read oplock and we have mandatory brlocks, + * set oplock level to None. + */ + if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) { + cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n"); + oplock = 0; + } + + server->ops->set_fid(cfile, &cfile->fid, oplock); + if (oparms.reconnect) + cifs_relock_file(cfile); + +reopen_error_exit: + free_dentry_path(page); + free_xid(xid); + return rc; +} + +void smb2_deferred_work_close(struct work_struct *work) +{ + struct cifsFileInfo *cfile = container_of(work, + struct cifsFileInfo, deferred.work); + + spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); + cifs_del_deferred_close(cfile); + cfile->deferred_close_scheduled = false; + spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock); + _cifsFileInfo_put(cfile, true, false); +} + +int cifs_close(struct inode *inode, struct file *file) +{ + struct cifsFileInfo *cfile; + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_deferred_close *dclose; + + cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE); + + if (file->private_data != NULL) { + cfile = file->private_data; + file->private_data = NULL; + dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); + if ((cinode->oplock == CIFS_CACHE_RHW_FLG) && + cinode->lease_granted && + !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && + dclose) { + if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { + inode->i_ctime = inode->i_mtime = current_time(inode); + } + spin_lock(&cinode->deferred_lock); + cifs_add_deferred_close(cfile, dclose); + if (cfile->deferred_close_scheduled && + delayed_work_pending(&cfile->deferred)) { + /* + * If there is no pending work, mod_delayed_work queues new work. + * So, Increase the ref count to avoid use-after-free. + */ + if (!mod_delayed_work(deferredclose_wq, + &cfile->deferred, cifs_sb->ctx->closetimeo)) + cifsFileInfo_get(cfile); + } else { + /* Deferred close for files */ + queue_delayed_work(deferredclose_wq, + &cfile->deferred, cifs_sb->ctx->closetimeo); + cfile->deferred_close_scheduled = true; + spin_unlock(&cinode->deferred_lock); + return 0; + } + spin_unlock(&cinode->deferred_lock); + _cifsFileInfo_put(cfile, true, false); + } else { + _cifsFileInfo_put(cfile, true, false); + kfree(dclose); + } + } + + /* return code from the ->release op is always ignored */ + return 0; +} + +void +cifs_reopen_persistent_handles(struct cifs_tcon *tcon) +{ + struct cifsFileInfo *open_file, *tmp; + struct list_head tmp_list; + + if (!tcon->use_persistent || !tcon->need_reopen_files) + return; + + tcon->need_reopen_files = false; + + cifs_dbg(FYI, "Reopen persistent handles\n"); + INIT_LIST_HEAD(&tmp_list); + + /* list all files open on tree connection, reopen resilient handles */ + spin_lock(&tcon->open_file_lock); + list_for_each_entry(open_file, &tcon->openFileList, tlist) { + if (!open_file->invalidHandle) + continue; + cifsFileInfo_get(open_file); + list_add_tail(&open_file->rlist, &tmp_list); + } + spin_unlock(&tcon->open_file_lock); + + list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) { + if (cifs_reopen_file(open_file, false /* do not flush */)) + tcon->need_reopen_files = true; + list_del_init(&open_file->rlist); + cifsFileInfo_put(open_file); + } +} + +int cifs_closedir(struct inode *inode, struct file *file) +{ + int rc = 0; + unsigned int xid; + struct cifsFileInfo *cfile = file->private_data; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; + char *buf; + + cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode); + + if (cfile == NULL) + return rc; + + xid = get_xid(); + tcon = tlink_tcon(cfile->tlink); + server = tcon->ses->server; + + cifs_dbg(FYI, "Freeing private data in close dir\n"); + spin_lock(&cfile->file_info_lock); + if (server->ops->dir_needs_close(cfile)) { + cfile->invalidHandle = true; + spin_unlock(&cfile->file_info_lock); + if (server->ops->close_dir) + rc = server->ops->close_dir(xid, tcon, &cfile->fid); + else + rc = -ENOSYS; + cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc); + /* not much we can do if it fails anyway, ignore rc */ + rc = 0; + } else + spin_unlock(&cfile->file_info_lock); + + buf = cfile->srch_inf.ntwrk_buf_start; + if (buf) { + cifs_dbg(FYI, "closedir free smb buf in srch struct\n"); + cfile->srch_inf.ntwrk_buf_start = NULL; + if (cfile->srch_inf.smallBuf) + cifs_small_buf_release(buf); + else + cifs_buf_release(buf); + } + + cifs_put_tlink(cfile->tlink); + kfree(file->private_data); + file->private_data = NULL; + /* BB can we lock the filestruct while this is going on? */ + free_xid(xid); + return rc; +} + +static struct cifsLockInfo * +cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags) +{ + struct cifsLockInfo *lock = + kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL); + if (!lock) + return lock; + lock->offset = offset; + lock->length = length; + lock->type = type; + lock->pid = current->tgid; + lock->flags = flags; + INIT_LIST_HEAD(&lock->blist); + init_waitqueue_head(&lock->block_q); + return lock; +} + +void +cifs_del_lock_waiters(struct cifsLockInfo *lock) +{ + struct cifsLockInfo *li, *tmp; + list_for_each_entry_safe(li, tmp, &lock->blist, blist) { + list_del_init(&li->blist); + wake_up(&li->block_q); + } +} + +#define CIFS_LOCK_OP 0 +#define CIFS_READ_OP 1 +#define CIFS_WRITE_OP 2 + +/* @rw_check : 0 - no op, 1 - read, 2 - write */ +static bool +cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset, + __u64 length, __u8 type, __u16 flags, + struct cifsFileInfo *cfile, + struct cifsLockInfo **conf_lock, int rw_check) +{ + struct cifsLockInfo *li; + struct cifsFileInfo *cur_cfile = fdlocks->cfile; + struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; + + list_for_each_entry(li, &fdlocks->locks, llist) { + if (offset + length <= li->offset || + offset >= li->offset + li->length) + continue; + if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid && + server->ops->compare_fids(cfile, cur_cfile)) { + /* shared lock prevents write op through the same fid */ + if (!(li->type & server->vals->shared_lock_type) || + rw_check != CIFS_WRITE_OP) + continue; + } + if ((type & server->vals->shared_lock_type) && + ((server->ops->compare_fids(cfile, cur_cfile) && + current->tgid == li->pid) || type == li->type)) + continue; + if (rw_check == CIFS_LOCK_OP && + (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) && + server->ops->compare_fids(cfile, cur_cfile)) + continue; + if (conf_lock) + *conf_lock = li; + return true; + } + return false; +} + +bool +cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length, + __u8 type, __u16 flags, + struct cifsLockInfo **conf_lock, int rw_check) +{ + bool rc = false; + struct cifs_fid_locks *cur; + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); + + list_for_each_entry(cur, &cinode->llist, llist) { + rc = cifs_find_fid_lock_conflict(cur, offset, length, type, + flags, cfile, conf_lock, + rw_check); + if (rc) + break; + } + + return rc; +} + +/* + * Check if there is another lock that prevents us to set the lock (mandatory + * style). If such a lock exists, update the flock structure with its + * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks + * or leave it the same if we can't. Returns 0 if we don't need to request to + * the server or 1 otherwise. + */ +static int +cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length, + __u8 type, struct file_lock *flock) +{ + int rc = 0; + struct cifsLockInfo *conf_lock; + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); + struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; + bool exist; + + down_read(&cinode->lock_sem); + + exist = cifs_find_lock_conflict(cfile, offset, length, type, + flock->fl_flags, &conf_lock, + CIFS_LOCK_OP); + if (exist) { + flock->fl_start = conf_lock->offset; + flock->fl_end = conf_lock->offset + conf_lock->length - 1; + flock->fl_pid = conf_lock->pid; + if (conf_lock->type & server->vals->shared_lock_type) + flock->fl_type = F_RDLCK; + else + flock->fl_type = F_WRLCK; + } else if (!cinode->can_cache_brlcks) + rc = 1; + else + flock->fl_type = F_UNLCK; + + up_read(&cinode->lock_sem); + return rc; +} + +static void +cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) +{ + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); + cifs_down_write(&cinode->lock_sem); + list_add_tail(&lock->llist, &cfile->llist->locks); + up_write(&cinode->lock_sem); +} + +/* + * Set the byte-range lock (mandatory style). Returns: + * 1) 0, if we set the lock and don't need to request to the server; + * 2) 1, if no locks prevent us but we need to request to the server; + * 3) -EACCES, if there is a lock that prevents us and wait is false. + */ +static int +cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, + bool wait) +{ + struct cifsLockInfo *conf_lock; + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); + bool exist; + int rc = 0; + +try_again: + exist = false; + cifs_down_write(&cinode->lock_sem); + + exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, + lock->type, lock->flags, &conf_lock, + CIFS_LOCK_OP); + if (!exist && cinode->can_cache_brlcks) { + list_add_tail(&lock->llist, &cfile->llist->locks); + up_write(&cinode->lock_sem); + return rc; + } + + if (!exist) + rc = 1; + else if (!wait) + rc = -EACCES; + else { + list_add_tail(&lock->blist, &conf_lock->blist); + up_write(&cinode->lock_sem); + rc = wait_event_interruptible(lock->block_q, + (lock->blist.prev == &lock->blist) && + (lock->blist.next == &lock->blist)); + if (!rc) + goto try_again; + cifs_down_write(&cinode->lock_sem); + list_del_init(&lock->blist); + } + + up_write(&cinode->lock_sem); + return rc; +} + +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY +/* + * Check if there is another lock that prevents us to set the lock (posix + * style). If such a lock exists, update the flock structure with its + * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks + * or leave it the same if we can't. Returns 0 if we don't need to request to + * the server or 1 otherwise. + */ +static int +cifs_posix_lock_test(struct file *file, struct file_lock *flock) +{ + int rc = 0; + struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); + unsigned char saved_type = flock->fl_type; + + if ((flock->fl_flags & FL_POSIX) == 0) + return 1; + + down_read(&cinode->lock_sem); + posix_test_lock(file, flock); + + if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) { + flock->fl_type = saved_type; + rc = 1; + } + + up_read(&cinode->lock_sem); + return rc; +} + +/* + * Set the byte-range lock (posix style). Returns: + * 1) <0, if the error occurs while setting the lock; + * 2) 0, if we set the lock and don't need to request to the server; + * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock; + * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server. + */ +static int +cifs_posix_lock_set(struct file *file, struct file_lock *flock) +{ + struct cifsInodeInfo *cinode = CIFS_I(file_inode(file)); + int rc = FILE_LOCK_DEFERRED + 1; + + if ((flock->fl_flags & FL_POSIX) == 0) + return rc; + + cifs_down_write(&cinode->lock_sem); + if (!cinode->can_cache_brlcks) { + up_write(&cinode->lock_sem); + return rc; + } + + rc = posix_lock_file(file, flock, NULL); + up_write(&cinode->lock_sem); + return rc; +} + +int +cifs_push_mandatory_locks(struct cifsFileInfo *cfile) +{ + unsigned int xid; + int rc = 0, stored_rc; + struct cifsLockInfo *li, *tmp; + struct cifs_tcon *tcon; + unsigned int num, max_num, max_buf; + LOCKING_ANDX_RANGE *buf, *cur; + static const int types[] = { + LOCKING_ANDX_LARGE_FILES, + LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES + }; + int i; + + xid = get_xid(); + tcon = tlink_tcon(cfile->tlink); + + /* + * Accessing maxBuf is racy with cifs_reconnect - need to store value + * and check it before using. + */ + max_buf = tcon->ses->server->maxBuf; + if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) { + free_xid(xid); + return -EINVAL; + } + + BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > + PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), + PAGE_SIZE); + max_num = (max_buf - sizeof(struct smb_hdr)) / + sizeof(LOCKING_ANDX_RANGE); + buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); + if (!buf) { + free_xid(xid); + return -ENOMEM; + } + + for (i = 0; i < 2; i++) { + cur = buf; + num = 0; + list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { + if (li->type != types[i]) + continue; + cur->Pid = cpu_to_le16(li->pid); + cur->LengthLow = cpu_to_le32((u32)li->length); + cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); + cur->OffsetLow = cpu_to_le32((u32)li->offset); + cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); + if (++num == max_num) { + stored_rc = cifs_lockv(xid, tcon, + cfile->fid.netfid, + (__u8)li->type, 0, num, + buf); + if (stored_rc) + rc = stored_rc; + cur = buf; + num = 0; + } else + cur++; + } + + if (num) { + stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, + (__u8)types[i], 0, num, buf); + if (stored_rc) + rc = stored_rc; + } + } + + kfree(buf); + free_xid(xid); + return rc; +} + +static __u32 +hash_lockowner(fl_owner_t owner) +{ + return cifs_lock_secret ^ hash32_ptr((const void *)owner); +} +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + +struct lock_to_push { + struct list_head llist; + __u64 offset; + __u64 length; + __u32 pid; + __u16 netfid; + __u8 type; +}; + +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY +static int +cifs_push_posix_locks(struct cifsFileInfo *cfile) +{ + struct inode *inode = d_inode(cfile->dentry); + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + struct file_lock *flock; + struct file_lock_context *flctx = locks_inode_context(inode); + unsigned int count = 0, i; + int rc = 0, xid, type; + struct list_head locks_to_send, *el; + struct lock_to_push *lck, *tmp; + __u64 length; + + xid = get_xid(); + + if (!flctx) + goto out; + + spin_lock(&flctx->flc_lock); + list_for_each(el, &flctx->flc_posix) { + count++; + } + spin_unlock(&flctx->flc_lock); + + INIT_LIST_HEAD(&locks_to_send); + + /* + * Allocating count locks is enough because no FL_POSIX locks can be + * added to the list while we are holding cinode->lock_sem that + * protects locking operations of this inode. + */ + for (i = 0; i < count; i++) { + lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL); + if (!lck) { + rc = -ENOMEM; + goto err_out; + } + list_add_tail(&lck->llist, &locks_to_send); + } + + el = locks_to_send.next; + spin_lock(&flctx->flc_lock); + list_for_each_entry(flock, &flctx->flc_posix, fl_list) { + if (el == &locks_to_send) { + /* + * The list ended. We don't have enough allocated + * structures - something is really wrong. + */ + cifs_dbg(VFS, "Can't push all brlocks!\n"); + break; + } + length = cifs_flock_len(flock); + if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK) + type = CIFS_RDLCK; + else + type = CIFS_WRLCK; + lck = list_entry(el, struct lock_to_push, llist); + lck->pid = hash_lockowner(flock->fl_owner); + lck->netfid = cfile->fid.netfid; + lck->length = length; + lck->type = type; + lck->offset = flock->fl_start; + } + spin_unlock(&flctx->flc_lock); + + list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { + int stored_rc; + + stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid, + lck->offset, lck->length, NULL, + lck->type, 0); + if (stored_rc) + rc = stored_rc; + list_del(&lck->llist); + kfree(lck); + } + +out: + free_xid(xid); + return rc; +err_out: + list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { + list_del(&lck->llist); + kfree(lck); + } + goto out; +} +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + +static int +cifs_push_locks(struct cifsFileInfo *cfile) +{ + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + int rc = 0; +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + + /* we are going to update can_cache_brlcks here - need a write access */ + cifs_down_write(&cinode->lock_sem); + if (!cinode->can_cache_brlcks) { + up_write(&cinode->lock_sem); + return rc; + } + +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + if (cap_unix(tcon->ses) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + rc = cifs_push_posix_locks(cfile); + else +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + rc = tcon->ses->server->ops->push_mand_locks(cfile); + + cinode->can_cache_brlcks = false; + up_write(&cinode->lock_sem); + return rc; +} + +static void +cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock, + bool *wait_flag, struct TCP_Server_Info *server) +{ + if (flock->fl_flags & FL_POSIX) + cifs_dbg(FYI, "Posix\n"); + if (flock->fl_flags & FL_FLOCK) + cifs_dbg(FYI, "Flock\n"); + if (flock->fl_flags & FL_SLEEP) { + cifs_dbg(FYI, "Blocking lock\n"); + *wait_flag = true; + } + if (flock->fl_flags & FL_ACCESS) + cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n"); + if (flock->fl_flags & FL_LEASE) + cifs_dbg(FYI, "Lease on file - not implemented yet\n"); + if (flock->fl_flags & + (~(FL_POSIX | FL_FLOCK | FL_SLEEP | + FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK))) + cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags); + + *type = server->vals->large_lock_type; + if (flock->fl_type == F_WRLCK) { + cifs_dbg(FYI, "F_WRLCK\n"); + *type |= server->vals->exclusive_lock_type; + *lock = 1; + } else if (flock->fl_type == F_UNLCK) { + cifs_dbg(FYI, "F_UNLCK\n"); + *type |= server->vals->unlock_lock_type; + *unlock = 1; + /* Check if unlock includes more than one lock range */ + } else if (flock->fl_type == F_RDLCK) { + cifs_dbg(FYI, "F_RDLCK\n"); + *type |= server->vals->shared_lock_type; + *lock = 1; + } else if (flock->fl_type == F_EXLCK) { + cifs_dbg(FYI, "F_EXLCK\n"); + *type |= server->vals->exclusive_lock_type; + *lock = 1; + } else if (flock->fl_type == F_SHLCK) { + cifs_dbg(FYI, "F_SHLCK\n"); + *type |= server->vals->shared_lock_type; + *lock = 1; + } else + cifs_dbg(FYI, "Unknown type of lock\n"); +} + +static int +cifs_getlk(struct file *file, struct file_lock *flock, __u32 type, + bool wait_flag, bool posix_lck, unsigned int xid) +{ + int rc = 0; + __u64 length = cifs_flock_len(flock); + struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + struct TCP_Server_Info *server = tcon->ses->server; +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + __u16 netfid = cfile->fid.netfid; + + if (posix_lck) { + int posix_lock_type; + + rc = cifs_posix_lock_test(file, flock); + if (!rc) + return rc; + + if (type & server->vals->shared_lock_type) + posix_lock_type = CIFS_RDLCK; + else + posix_lock_type = CIFS_WRLCK; + rc = CIFSSMBPosixLock(xid, tcon, netfid, + hash_lockowner(flock->fl_owner), + flock->fl_start, length, flock, + posix_lock_type, wait_flag); + return rc; + } +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + + rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock); + if (!rc) + return rc; + + /* BB we could chain these into one lock request BB */ + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type, + 1, 0, false); + if (rc == 0) { + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, + type, 0, 1, false); + flock->fl_type = F_UNLCK; + if (rc != 0) + cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", + rc); + return 0; + } + + if (type & server->vals->shared_lock_type) { + flock->fl_type = F_WRLCK; + return 0; + } + + type &= ~server->vals->exclusive_lock_type; + + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, + type | server->vals->shared_lock_type, + 1, 0, false); + if (rc == 0) { + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, + type | server->vals->shared_lock_type, 0, 1, false); + flock->fl_type = F_RDLCK; + if (rc != 0) + cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n", + rc); + } else + flock->fl_type = F_WRLCK; + + return 0; +} + +void +cifs_move_llist(struct list_head *source, struct list_head *dest) +{ + struct list_head *li, *tmp; + list_for_each_safe(li, tmp, source) + list_move(li, dest); +} + +void +cifs_free_llist(struct list_head *llist) +{ + struct cifsLockInfo *li, *tmp; + list_for_each_entry_safe(li, tmp, llist, llist) { + cifs_del_lock_waiters(li); + list_del(&li->llist); + kfree(li); + } +} + +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY +int +cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, + unsigned int xid) +{ + int rc = 0, stored_rc; + static const int types[] = { + LOCKING_ANDX_LARGE_FILES, + LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES + }; + unsigned int i; + unsigned int max_num, num, max_buf; + LOCKING_ANDX_RANGE *buf, *cur; + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); + struct cifsLockInfo *li, *tmp; + __u64 length = cifs_flock_len(flock); + struct list_head tmp_llist; + + INIT_LIST_HEAD(&tmp_llist); + + /* + * Accessing maxBuf is racy with cifs_reconnect - need to store value + * and check it before using. + */ + max_buf = tcon->ses->server->maxBuf; + if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) + return -EINVAL; + + BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) > + PAGE_SIZE); + max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr), + PAGE_SIZE); + max_num = (max_buf - sizeof(struct smb_hdr)) / + sizeof(LOCKING_ANDX_RANGE); + buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); + if (!buf) + return -ENOMEM; + + cifs_down_write(&cinode->lock_sem); + for (i = 0; i < 2; i++) { + cur = buf; + num = 0; + list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { + if (flock->fl_start > li->offset || + (flock->fl_start + length) < + (li->offset + li->length)) + continue; + if (current->tgid != li->pid) + continue; + if (types[i] != li->type) + continue; + if (cinode->can_cache_brlcks) { + /* + * We can cache brlock requests - simply remove + * a lock from the file's list. + */ + list_del(&li->llist); + cifs_del_lock_waiters(li); + kfree(li); + continue; + } + cur->Pid = cpu_to_le16(li->pid); + cur->LengthLow = cpu_to_le32((u32)li->length); + cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); + cur->OffsetLow = cpu_to_le32((u32)li->offset); + cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); + /* + * We need to save a lock here to let us add it again to + * the file's list if the unlock range request fails on + * the server. + */ + list_move(&li->llist, &tmp_llist); + if (++num == max_num) { + stored_rc = cifs_lockv(xid, tcon, + cfile->fid.netfid, + li->type, num, 0, buf); + if (stored_rc) { + /* + * We failed on the unlock range + * request - add all locks from the tmp + * list to the head of the file's list. + */ + cifs_move_llist(&tmp_llist, + &cfile->llist->locks); + rc = stored_rc; + } else + /* + * The unlock range request succeed - + * free the tmp list. + */ + cifs_free_llist(&tmp_llist); + cur = buf; + num = 0; + } else + cur++; + } + if (num) { + stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid, + types[i], num, 0, buf); + if (stored_rc) { + cifs_move_llist(&tmp_llist, + &cfile->llist->locks); + rc = stored_rc; + } else + cifs_free_llist(&tmp_llist); + } + } + + up_write(&cinode->lock_sem); + kfree(buf); + return rc; +} +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + +static int +cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, + bool wait_flag, bool posix_lck, int lock, int unlock, + unsigned int xid) +{ + int rc = 0; + __u64 length = cifs_flock_len(flock); + struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + struct TCP_Server_Info *server = tcon->ses->server; + struct inode *inode = d_inode(cfile->dentry); + +#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY + if (posix_lck) { + int posix_lock_type; + + rc = cifs_posix_lock_set(file, flock); + if (rc <= FILE_LOCK_DEFERRED) + return rc; + + if (type & server->vals->shared_lock_type) + posix_lock_type = CIFS_RDLCK; + else + posix_lock_type = CIFS_WRLCK; + + if (unlock == 1) + posix_lock_type = CIFS_UNLCK; + + rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid, + hash_lockowner(flock->fl_owner), + flock->fl_start, length, + NULL, posix_lock_type, wait_flag); + goto out; + } +#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ + if (lock) { + struct cifsLockInfo *lock; + + lock = cifs_lock_init(flock->fl_start, length, type, + flock->fl_flags); + if (!lock) + return -ENOMEM; + + rc = cifs_lock_add_if(cfile, lock, wait_flag); + if (rc < 0) { + kfree(lock); + return rc; + } + if (!rc) + goto out; + + /* + * Windows 7 server can delay breaking lease from read to None + * if we set a byte-range lock on a file - break it explicitly + * before sending the lock to the server to be sure the next + * read won't conflict with non-overlapted locks due to + * pagereading. + */ + if (!CIFS_CACHE_WRITE(CIFS_I(inode)) && + CIFS_CACHE_READ(CIFS_I(inode))) { + cifs_zap_mapping(inode); + cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n", + inode); + CIFS_I(inode)->oplock = 0; + } + + rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, + type, 1, 0, wait_flag); + if (rc) { + kfree(lock); + return rc; + } + + cifs_lock_add(cfile, lock); + } else if (unlock) + rc = server->ops->mand_unlock_range(cfile, flock, xid); + +out: + if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) { + /* + * If this is a request to remove all locks because we + * are closing the file, it doesn't matter if the + * unlocking failed as both cifs.ko and the SMB server + * remove the lock on file close + */ + if (rc) { + cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc); + if (!(flock->fl_flags & FL_CLOSE)) + return rc; + } + rc = locks_lock_file_wait(file, flock); + } + return rc; +} + +int cifs_flock(struct file *file, int cmd, struct file_lock *fl) +{ + int rc, xid; + int lock = 0, unlock = 0; + bool wait_flag = false; + bool posix_lck = false; + struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; + struct cifsFileInfo *cfile; + __u32 type; + + xid = get_xid(); + + if (!(fl->fl_flags & FL_FLOCK)) { + rc = -ENOLCK; + free_xid(xid); + return rc; + } + + cfile = (struct cifsFileInfo *)file->private_data; + tcon = tlink_tcon(cfile->tlink); + + cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag, + tcon->ses->server); + cifs_sb = CIFS_FILE_SB(file); + + if (cap_unix(tcon->ses) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + posix_lck = true; + + if (!lock && !unlock) { + /* + * if no lock or unlock then nothing to do since we do not + * know what it is + */ + rc = -EOPNOTSUPP; + free_xid(xid); + return rc; + } + + rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock, + xid); + free_xid(xid); + return rc; + + +} + +int cifs_lock(struct file *file, int cmd, struct file_lock *flock) +{ + int rc, xid; + int lock = 0, unlock = 0; + bool wait_flag = false; + bool posix_lck = false; + struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; + struct cifsFileInfo *cfile; + __u32 type; + + rc = -EACCES; + xid = get_xid(); + + cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd, + flock->fl_flags, flock->fl_type, (long long)flock->fl_start, + (long long)flock->fl_end); + + cfile = (struct cifsFileInfo *)file->private_data; + tcon = tlink_tcon(cfile->tlink); + + cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag, + tcon->ses->server); + cifs_sb = CIFS_FILE_SB(file); + set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags); + + if (cap_unix(tcon->ses) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + posix_lck = true; + /* + * BB add code here to normalize offset and length to account for + * negative length which we can not accept over the wire. + */ + if (IS_GETLK(cmd)) { + rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid); + free_xid(xid); + return rc; + } + + if (!lock && !unlock) { + /* + * if no lock or unlock then nothing to do since we do not + * know what it is + */ + free_xid(xid); + return -EOPNOTSUPP; + } + + rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock, + xid); + free_xid(xid); + return rc; +} + +/* + * update the file size (if needed) after a write. Should be called with + * the inode->i_lock held + */ +void +cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset, + unsigned int bytes_written) +{ + loff_t end_of_write = offset + bytes_written; + + if (end_of_write > cifsi->server_eof) + cifsi->server_eof = end_of_write; +} + +static ssize_t +cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data, + size_t write_size, loff_t *offset) +{ + int rc = 0; + unsigned int bytes_written = 0; + unsigned int total_written; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; + unsigned int xid; + struct dentry *dentry = open_file->dentry; + struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry)); + struct cifs_io_parms io_parms = {0}; + + cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n", + write_size, *offset, dentry); + + tcon = tlink_tcon(open_file->tlink); + server = tcon->ses->server; + + if (!server->ops->sync_write) + return -ENOSYS; + + xid = get_xid(); + + for (total_written = 0; write_size > total_written; + total_written += bytes_written) { + rc = -EAGAIN; + while (rc == -EAGAIN) { + struct kvec iov[2]; + unsigned int len; + + if (open_file->invalidHandle) { + /* we could deadlock if we called + filemap_fdatawait from here so tell + reopen_file not to flush data to + server now */ + rc = cifs_reopen_file(open_file, false); + if (rc != 0) + break; + } + + len = min(server->ops->wp_retry_size(d_inode(dentry)), + (unsigned int)write_size - total_written); + /* iov[0] is reserved for smb header */ + iov[1].iov_base = (char *)write_data + total_written; + iov[1].iov_len = len; + io_parms.pid = pid; + io_parms.tcon = tcon; + io_parms.offset = *offset; + io_parms.length = len; + rc = server->ops->sync_write(xid, &open_file->fid, + &io_parms, &bytes_written, iov, 1); + } + if (rc || (bytes_written == 0)) { + if (total_written) + break; + else { + free_xid(xid); + return rc; + } + } else { + spin_lock(&d_inode(dentry)->i_lock); + cifs_update_eof(cifsi, *offset, bytes_written); + spin_unlock(&d_inode(dentry)->i_lock); + *offset += bytes_written; + } + } + + cifs_stats_bytes_written(tcon, total_written); + + if (total_written > 0) { + spin_lock(&d_inode(dentry)->i_lock); + if (*offset > d_inode(dentry)->i_size) { + i_size_write(d_inode(dentry), *offset); + d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9; + } + spin_unlock(&d_inode(dentry)->i_lock); + } + mark_inode_dirty_sync(d_inode(dentry)); + free_xid(xid); + return total_written; +} + +struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, + bool fsuid_only) +{ + struct cifsFileInfo *open_file = NULL; + struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); + + /* only filter by fsuid on multiuser mounts */ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) + fsuid_only = false; + + spin_lock(&cifs_inode->open_file_lock); + /* we could simply get the first_list_entry since write-only entries + are always at the end of the list but since the first entry might + have a close pending, we go through the whole list */ + list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { + if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) + continue; + if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { + if ((!open_file->invalidHandle)) { + /* found a good file */ + /* lock it so it will not be closed on us */ + cifsFileInfo_get(open_file); + spin_unlock(&cifs_inode->open_file_lock); + return open_file; + } /* else might as well continue, and look for + another, or simply have the caller reopen it + again rather than trying to fix this handle */ + } else /* write only file */ + break; /* write only files are last so must be done */ + } + spin_unlock(&cifs_inode->open_file_lock); + return NULL; +} + +/* Return -EBADF if no handle is found and general rc otherwise */ +int +cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags, + struct cifsFileInfo **ret_file) +{ + struct cifsFileInfo *open_file, *inv_file = NULL; + struct cifs_sb_info *cifs_sb; + bool any_available = false; + int rc = -EBADF; + unsigned int refind = 0; + bool fsuid_only = flags & FIND_WR_FSUID_ONLY; + bool with_delete = flags & FIND_WR_WITH_DELETE; + *ret_file = NULL; + + /* + * Having a null inode here (because mapping->host was set to zero by + * the VFS or MM) should not happen but we had reports of on oops (due + * to it being zero) during stress testcases so we need to check for it + */ + + if (cifs_inode == NULL) { + cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n"); + dump_stack(); + return rc; + } + + cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb); + + /* only filter by fsuid on multiuser mounts */ + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) + fsuid_only = false; + + spin_lock(&cifs_inode->open_file_lock); +refind_writable: + if (refind > MAX_REOPEN_ATT) { + spin_unlock(&cifs_inode->open_file_lock); + return rc; + } + list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { + if (!any_available && open_file->pid != current->tgid) + continue; + if (fsuid_only && !uid_eq(open_file->uid, current_fsuid())) + continue; + if (with_delete && !(open_file->fid.access & DELETE)) + continue; + if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { + if (!open_file->invalidHandle) { + /* found a good writable file */ + cifsFileInfo_get(open_file); + spin_unlock(&cifs_inode->open_file_lock); + *ret_file = open_file; + return 0; + } else { + if (!inv_file) + inv_file = open_file; + } + } + } + /* couldn't find useable FH with same pid, try any available */ + if (!any_available) { + any_available = true; + goto refind_writable; + } + + if (inv_file) { + any_available = false; + cifsFileInfo_get(inv_file); + } + + spin_unlock(&cifs_inode->open_file_lock); + + if (inv_file) { + rc = cifs_reopen_file(inv_file, false); + if (!rc) { + *ret_file = inv_file; + return 0; + } + + spin_lock(&cifs_inode->open_file_lock); + list_move_tail(&inv_file->flist, &cifs_inode->openFileList); + spin_unlock(&cifs_inode->open_file_lock); + cifsFileInfo_put(inv_file); + ++refind; + inv_file = NULL; + spin_lock(&cifs_inode->open_file_lock); + goto refind_writable; + } + + return rc; +} + +struct cifsFileInfo * +find_writable_file(struct cifsInodeInfo *cifs_inode, int flags) +{ + struct cifsFileInfo *cfile; + int rc; + + rc = cifs_get_writable_file(cifs_inode, flags, &cfile); + if (rc) + cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc); + + return cfile; +} + +int +cifs_get_writable_path(struct cifs_tcon *tcon, const char *name, + int flags, + struct cifsFileInfo **ret_file) +{ + struct cifsFileInfo *cfile; + void *page = alloc_dentry_path(); + + *ret_file = NULL; + + spin_lock(&tcon->open_file_lock); + list_for_each_entry(cfile, &tcon->openFileList, tlist) { + struct cifsInodeInfo *cinode; + const char *full_path = build_path_from_dentry(cfile->dentry, page); + if (IS_ERR(full_path)) { + spin_unlock(&tcon->open_file_lock); + free_dentry_path(page); + return PTR_ERR(full_path); + } + if (strcmp(full_path, name)) + continue; + + cinode = CIFS_I(d_inode(cfile->dentry)); + spin_unlock(&tcon->open_file_lock); + free_dentry_path(page); + return cifs_get_writable_file(cinode, flags, ret_file); + } + + spin_unlock(&tcon->open_file_lock); + free_dentry_path(page); + return -ENOENT; +} + +int +cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, + struct cifsFileInfo **ret_file) +{ + struct cifsFileInfo *cfile; + void *page = alloc_dentry_path(); + + *ret_file = NULL; + + spin_lock(&tcon->open_file_lock); + list_for_each_entry(cfile, &tcon->openFileList, tlist) { + struct cifsInodeInfo *cinode; + const char *full_path = build_path_from_dentry(cfile->dentry, page); + if (IS_ERR(full_path)) { + spin_unlock(&tcon->open_file_lock); + free_dentry_path(page); + return PTR_ERR(full_path); + } + if (strcmp(full_path, name)) + continue; + + cinode = CIFS_I(d_inode(cfile->dentry)); + spin_unlock(&tcon->open_file_lock); + free_dentry_path(page); + *ret_file = find_readable_file(cinode, 0); + return *ret_file ? 0 : -ENOENT; + } + + spin_unlock(&tcon->open_file_lock); + free_dentry_path(page); + return -ENOENT; +} + +void +cifs_writedata_release(struct kref *refcount) +{ + struct cifs_writedata *wdata = container_of(refcount, + struct cifs_writedata, refcount); +#ifdef CONFIG_CIFS_SMB_DIRECT + if (wdata->mr) { + smbd_deregister_mr(wdata->mr); + wdata->mr = NULL; + } +#endif + + if (wdata->cfile) + cifsFileInfo_put(wdata->cfile); + + kfree(wdata); +} + +/* + * Write failed with a retryable error. Resend the write request. It's also + * possible that the page was redirtied so re-clean the page. + */ +static void +cifs_writev_requeue(struct cifs_writedata *wdata) +{ + int rc = 0; + struct inode *inode = d_inode(wdata->cfile->dentry); + struct TCP_Server_Info *server; + unsigned int rest_len = wdata->bytes; + loff_t fpos = wdata->offset; + + server = tlink_tcon(wdata->cfile->tlink)->ses->server; + do { + struct cifs_writedata *wdata2; + unsigned int wsize, cur_len; + + wsize = server->ops->wp_retry_size(inode); + if (wsize < rest_len) { + if (wsize < PAGE_SIZE) { + rc = -EOPNOTSUPP; + break; + } + cur_len = min(round_down(wsize, PAGE_SIZE), rest_len); + } else { + cur_len = rest_len; + } + + wdata2 = cifs_writedata_alloc(cifs_writev_complete); + if (!wdata2) { + rc = -ENOMEM; + break; + } + + wdata2->sync_mode = wdata->sync_mode; + wdata2->offset = fpos; + wdata2->bytes = cur_len; + wdata2->iter = wdata->iter; + + iov_iter_advance(&wdata2->iter, fpos - wdata->offset); + iov_iter_truncate(&wdata2->iter, wdata2->bytes); + + if (iov_iter_is_xarray(&wdata2->iter)) + /* Check for pages having been redirtied and clean + * them. We can do this by walking the xarray. If + * it's not an xarray, then it's a DIO and we shouldn't + * be mucking around with the page bits. + */ + cifs_undirty_folios(inode, fpos, cur_len); + + rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, + &wdata2->cfile); + if (!wdata2->cfile) { + cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n", + rc); + if (!is_retryable_error(rc)) + rc = -EBADF; + } else { + wdata2->pid = wdata2->cfile->pid; + rc = server->ops->async_writev(wdata2, + cifs_writedata_release); + } + + kref_put(&wdata2->refcount, cifs_writedata_release); + if (rc) { + if (is_retryable_error(rc)) + continue; + fpos += cur_len; + rest_len -= cur_len; + break; + } + + fpos += cur_len; + rest_len -= cur_len; + } while (rest_len > 0); + + /* Clean up remaining pages from the original wdata */ + if (iov_iter_is_xarray(&wdata->iter)) + cifs_pages_write_failed(inode, fpos, rest_len); + + if (rc != 0 && !is_retryable_error(rc)) + mapping_set_error(inode->i_mapping, rc); + kref_put(&wdata->refcount, cifs_writedata_release); +} + +void +cifs_writev_complete(struct work_struct *work) +{ + struct cifs_writedata *wdata = container_of(work, + struct cifs_writedata, work); + struct inode *inode = d_inode(wdata->cfile->dentry); + + if (wdata->result == 0) { + spin_lock(&inode->i_lock); + cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes); + spin_unlock(&inode->i_lock); + cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink), + wdata->bytes); + } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN) + return cifs_writev_requeue(wdata); + + if (wdata->result == -EAGAIN) + cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes); + else if (wdata->result < 0) + cifs_pages_write_failed(inode, wdata->offset, wdata->bytes); + else + cifs_pages_written_back(inode, wdata->offset, wdata->bytes); + + if (wdata->result != -EAGAIN) + mapping_set_error(inode->i_mapping, wdata->result); + kref_put(&wdata->refcount, cifs_writedata_release); +} + +struct cifs_writedata *cifs_writedata_alloc(work_func_t complete) +{ + struct cifs_writedata *wdata; + + wdata = kzalloc(sizeof(*wdata), GFP_NOFS); + if (wdata != NULL) { + kref_init(&wdata->refcount); + INIT_LIST_HEAD(&wdata->list); + init_completion(&wdata->done); + INIT_WORK(&wdata->work, complete); + } + return wdata; +} + +static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) +{ + struct address_space *mapping = page->mapping; + loff_t offset = (loff_t)page->index << PAGE_SHIFT; + char *write_data; + int rc = -EFAULT; + int bytes_written = 0; + struct inode *inode; + struct cifsFileInfo *open_file; + + if (!mapping || !mapping->host) + return -EFAULT; + + inode = page->mapping->host; + + offset += (loff_t)from; + write_data = kmap(page); + write_data += from; + + if ((to > PAGE_SIZE) || (from > to)) { + kunmap(page); + return -EIO; + } + + /* racing with truncate? */ + if (offset > mapping->host->i_size) { + kunmap(page); + return 0; /* don't care */ + } + + /* check to make sure that we are not extending the file */ + if (mapping->host->i_size - offset < (loff_t)to) + to = (unsigned)(mapping->host->i_size - offset); + + rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY, + &open_file); + if (!rc) { + bytes_written = cifs_write(open_file, open_file->pid, + write_data, to - from, &offset); + cifsFileInfo_put(open_file); + /* Does mm or vfs already set times? */ + inode->i_atime = inode->i_mtime = current_time(inode); + if ((bytes_written > 0) && (offset)) + rc = 0; + else if (bytes_written < 0) + rc = bytes_written; + else + rc = -EFAULT; + } else { + cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc); + if (!is_retryable_error(rc)) + rc = -EIO; + } + + kunmap(page); + return rc; +} + +/* + * Extend the region to be written back to include subsequent contiguously + * dirty pages if possible, but don't sleep while doing so. + */ +static void cifs_extend_writeback(struct address_space *mapping, + long *_count, + loff_t start, + int max_pages, + size_t max_len, + unsigned int *_len) +{ + struct folio_batch batch; + struct folio *folio; + unsigned int psize, nr_pages; + size_t len = *_len; + pgoff_t index = (start + len) / PAGE_SIZE; + bool stop = true; + unsigned int i; + XA_STATE(xas, &mapping->i_pages, index); + + folio_batch_init(&batch); + + do { + /* Firstly, we gather up a batch of contiguous dirty pages + * under the RCU read lock - but we can't clear the dirty flags + * there if any of those pages are mapped. + */ + rcu_read_lock(); + + xas_for_each(&xas, folio, ULONG_MAX) { + stop = true; + if (xas_retry(&xas, folio)) + continue; + if (xa_is_value(folio)) + break; + if (folio_index(folio) != index) + break; + if (!folio_try_get_rcu(folio)) { + xas_reset(&xas); + continue; + } + nr_pages = folio_nr_pages(folio); + if (nr_pages > max_pages) + break; + + /* Has the page moved or been split? */ + if (unlikely(folio != xas_reload(&xas))) { + folio_put(folio); + break; + } + + if (!folio_trylock(folio)) { + folio_put(folio); + break; + } + if (!folio_test_dirty(folio) || folio_test_writeback(folio)) { + folio_unlock(folio); + folio_put(folio); + break; + } + + max_pages -= nr_pages; + psize = folio_size(folio); + len += psize; + stop = false; + if (max_pages <= 0 || len >= max_len || *_count <= 0) + stop = true; + + index += nr_pages; + if (!folio_batch_add(&batch, folio)) + break; + if (stop) + break; + } + + if (!stop) + xas_pause(&xas); + rcu_read_unlock(); + + /* Now, if we obtained any pages, we can shift them to being + * writable and mark them for caching. + */ + if (!folio_batch_count(&batch)) + break; + + for (i = 0; i < folio_batch_count(&batch); i++) { + folio = batch.folios[i]; + /* The folio should be locked, dirty and not undergoing + * writeback from the loop above. + */ + if (!folio_clear_dirty_for_io(folio)) + WARN_ON(1); + if (folio_start_writeback(folio)) + WARN_ON(1); + + *_count -= folio_nr_pages(folio); + folio_unlock(folio); + } + + folio_batch_release(&batch); + cond_resched(); + } while (!stop); + + *_len = len; +} + +/* + * Write back the locked page and any subsequent non-locked dirty pages. + */ +static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, + struct writeback_control *wbc, + struct folio *folio, + loff_t start, loff_t end) +{ + struct inode *inode = mapping->host; + struct TCP_Server_Info *server; + struct cifs_writedata *wdata; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_credits credits_on_stack; + struct cifs_credits *credits = &credits_on_stack; + struct cifsFileInfo *cfile = NULL; + unsigned int xid, wsize, len; + loff_t i_size = i_size_read(inode); + size_t max_len; + long count = wbc->nr_to_write; + int rc; + + /* The folio should be locked, dirty and not undergoing writeback. */ + if (folio_start_writeback(folio)) + WARN_ON(1); + + count -= folio_nr_pages(folio); + len = folio_size(folio); + + xid = get_xid(); + server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); + + rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); + if (rc) { + cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc); + goto err_xid; + } + + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, + &wsize, credits); + if (rc != 0) + goto err_close; + + wdata = cifs_writedata_alloc(cifs_writev_complete); + if (!wdata) { + rc = -ENOMEM; + goto err_uncredit; + } + + wdata->sync_mode = wbc->sync_mode; + wdata->offset = folio_pos(folio); + wdata->pid = cfile->pid; + wdata->credits = credits_on_stack; + wdata->cfile = cfile; + wdata->server = server; + cfile = NULL; + + /* Find all consecutive lockable dirty pages, stopping when we find a + * page that is not immediately lockable, is not dirty or is missing, + * or we reach the end of the range. + */ + if (start < i_size) { + /* Trim the write to the EOF; the extra data is ignored. Also + * put an upper limit on the size of a single storedata op. + */ + max_len = wsize; + max_len = min_t(unsigned long long, max_len, end - start + 1); + max_len = min_t(unsigned long long, max_len, i_size - start); + + if (len < max_len) { + int max_pages = INT_MAX; + +#ifdef CONFIG_CIFS_SMB_DIRECT + if (server->smbd_conn) + max_pages = server->smbd_conn->max_frmr_depth; +#endif + max_pages -= folio_nr_pages(folio); + + if (max_pages > 0) + cifs_extend_writeback(mapping, &count, start, + max_pages, max_len, &len); + } + len = min_t(loff_t, len, max_len); + } + + wdata->bytes = len; + + /* We now have a contiguous set of dirty pages, each with writeback + * set; the first page is still locked at this point, but all the rest + * have been unlocked. + */ + folio_unlock(folio); + + if (start < i_size) { + iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, + start, len); + + rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); + if (rc) + goto err_wdata; + + if (wdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = wdata->server->ops->async_writev(wdata, + cifs_writedata_release); + if (rc >= 0) { + kref_put(&wdata->refcount, cifs_writedata_release); + goto err_close; + } + } else { + /* The dirty region was entirely beyond the EOF. */ + cifs_pages_written_back(inode, start, len); + rc = 0; + } + +err_wdata: + kref_put(&wdata->refcount, cifs_writedata_release); +err_uncredit: + add_credits_and_wake_if(server, credits, 0); +err_close: + if (cfile) + cifsFileInfo_put(cfile); +err_xid: + free_xid(xid); + if (rc == 0) { + wbc->nr_to_write = count; + rc = len; + } else if (is_retryable_error(rc)) { + cifs_pages_write_redirty(inode, start, len); + } else { + cifs_pages_write_failed(inode, start, len); + mapping_set_error(mapping, rc); + } + /* Indication to update ctime and mtime as close is deferred */ + set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); + return rc; +} + +/* + * write a region of pages back to the server + */ +static int cifs_writepages_region(struct address_space *mapping, + struct writeback_control *wbc, + loff_t start, loff_t end, loff_t *_next) +{ + struct folio_batch fbatch; + int skips = 0; + + folio_batch_init(&fbatch); + do { + int nr; + pgoff_t index = start / PAGE_SIZE; + + nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE, + PAGECACHE_TAG_DIRTY, &fbatch); + if (!nr) + break; + + for (int i = 0; i < nr; i++) { + ssize_t ret; + struct folio *folio = fbatch.folios[i]; + +redo_folio: + start = folio_pos(folio); /* May regress with THPs */ + + /* At this point we hold neither the i_pages lock nor the + * page lock: the page may be truncated or invalidated + * (changing page->mapping to NULL), or even swizzled + * back from swapper_space to tmpfs file mapping + */ + if (wbc->sync_mode != WB_SYNC_NONE) { + ret = folio_lock_killable(folio); + if (ret < 0) + goto write_error; + } else { + if (!folio_trylock(folio)) + goto skip_write; + } + + if (folio_mapping(folio) != mapping || + !folio_test_dirty(folio)) { + start += folio_size(folio); + folio_unlock(folio); + continue; + } + + if (folio_test_writeback(folio) || + folio_test_fscache(folio)) { + folio_unlock(folio); + if (wbc->sync_mode == WB_SYNC_NONE) + goto skip_write; + + folio_wait_writeback(folio); +#ifdef CONFIG_CIFS_FSCACHE + folio_wait_fscache(folio); +#endif + goto redo_folio; + } + + if (!folio_clear_dirty_for_io(folio)) + /* We hold the page lock - it should've been dirty. */ + WARN_ON(1); + + ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end); + if (ret < 0) + goto write_error; + + start += ret; + continue; + +write_error: + folio_batch_release(&fbatch); + *_next = start; + return ret; + +skip_write: + /* + * Too many skipped writes, or need to reschedule? + * Treat it as a write error without an error code. + */ + if (skips >= 5 || need_resched()) { + ret = 0; + goto write_error; + } + + /* Otherwise, just skip that folio and go on to the next */ + skips++; + start += folio_size(folio); + continue; + } + + folio_batch_release(&fbatch); + cond_resched(); + } while (wbc->nr_to_write > 0); + + *_next = start; + return 0; +} + +/* + * Write some of the pending data back to the server + */ +static int cifs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + loff_t start, next; + int ret; + + /* We have to be careful as we can end up racing with setattr() + * truncating the pagecache since the caller doesn't take a lock here + * to prevent it. + */ + + if (wbc->range_cyclic) { + start = mapping->writeback_index * PAGE_SIZE; + ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next); + if (ret == 0) { + mapping->writeback_index = next / PAGE_SIZE; + if (start > 0 && wbc->nr_to_write > 0) { + ret = cifs_writepages_region(mapping, wbc, 0, + start, &next); + if (ret == 0) + mapping->writeback_index = + next / PAGE_SIZE; + } + } + } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { + ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next); + if (wbc->nr_to_write > 0 && ret == 0) + mapping->writeback_index = next / PAGE_SIZE; + } else { + ret = cifs_writepages_region(mapping, wbc, + wbc->range_start, wbc->range_end, &next); + } + + return ret; +} + +static int +cifs_writepage_locked(struct page *page, struct writeback_control *wbc) +{ + int rc; + unsigned int xid; + + xid = get_xid(); +/* BB add check for wbc flags */ + get_page(page); + if (!PageUptodate(page)) + cifs_dbg(FYI, "ppw - page not up to date\n"); + + /* + * Set the "writeback" flag, and clear "dirty" in the radix tree. + * + * A writepage() implementation always needs to do either this, + * or re-dirty the page with "redirty_page_for_writepage()" in + * the case of a failure. + * + * Just unlocking the page will cause the radix tree tag-bits + * to fail to update with the state of the page correctly. + */ + set_page_writeback(page); +retry_write: + rc = cifs_partialpagewrite(page, 0, PAGE_SIZE); + if (is_retryable_error(rc)) { + if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) + goto retry_write; + redirty_page_for_writepage(wbc, page); + } else if (rc != 0) { + SetPageError(page); + mapping_set_error(page->mapping, rc); + } else { + SetPageUptodate(page); + } + end_page_writeback(page); + put_page(page); + free_xid(xid); + return rc; +} + +static int cifs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + int rc; + struct inode *inode = mapping->host; + struct cifsFileInfo *cfile = file->private_data; + struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb); + struct folio *folio = page_folio(page); + __u32 pid; + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) + pid = cfile->pid; + else + pid = current->tgid; + + cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n", + page, pos, copied); + + if (folio_test_checked(folio)) { + if (copied == len) + folio_mark_uptodate(folio); + folio_clear_checked(folio); + } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE) + folio_mark_uptodate(folio); + + if (!folio_test_uptodate(folio)) { + char *page_data; + unsigned offset = pos & (PAGE_SIZE - 1); + unsigned int xid; + + xid = get_xid(); + /* this is probably better than directly calling + partialpage_write since in this function the file handle is + known which we might as well leverage */ + /* BB check if anything else missing out of ppw + such as updating last write time */ + page_data = kmap(page); + rc = cifs_write(cfile, pid, page_data + offset, copied, &pos); + /* if (rc < 0) should we set writebehind rc? */ + kunmap(page); + + free_xid(xid); + } else { + rc = copied; + pos += copied; + set_page_dirty(page); + } + + if (rc > 0) { + spin_lock(&inode->i_lock); + if (pos > inode->i_size) { + i_size_write(inode, pos); + inode->i_blocks = (512 - 1 + pos) >> 9; + } + spin_unlock(&inode->i_lock); + } + + unlock_page(page); + put_page(page); + /* Indication to update ctime and mtime as close is deferred */ + set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); + + return rc; +} + +int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + unsigned int xid; + int rc = 0; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; + struct cifsFileInfo *smbfile = file->private_data; + struct inode *inode = file_inode(file); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + + rc = file_write_and_wait_range(file, start, end); + if (rc) { + trace_cifs_fsync_err(inode->i_ino, rc); + return rc; + } + + xid = get_xid(); + + cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", + file, datasync); + + if (!CIFS_CACHE_READ(CIFS_I(inode))) { + rc = cifs_zap_mapping(inode); + if (rc) { + cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); + rc = 0; /* don't care about it in fsync */ + } + } + + tcon = tlink_tcon(smbfile->tlink); + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { + server = tcon->ses->server; + if (server->ops->flush == NULL) { + rc = -ENOSYS; + goto strict_fsync_exit; + } + + if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { + smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); + if (smbfile) { + rc = server->ops->flush(xid, tcon, &smbfile->fid); + cifsFileInfo_put(smbfile); + } else + cifs_dbg(FYI, "ignore fsync for file not open for write\n"); + } else + rc = server->ops->flush(xid, tcon, &smbfile->fid); + } + +strict_fsync_exit: + free_xid(xid); + return rc; +} + +int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + unsigned int xid; + int rc = 0; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; + struct cifsFileInfo *smbfile = file->private_data; + struct inode *inode = file_inode(file); + struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); + + rc = file_write_and_wait_range(file, start, end); + if (rc) { + trace_cifs_fsync_err(file_inode(file)->i_ino, rc); + return rc; + } + + xid = get_xid(); + + cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", + file, datasync); + + tcon = tlink_tcon(smbfile->tlink); + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { + server = tcon->ses->server; + if (server->ops->flush == NULL) { + rc = -ENOSYS; + goto fsync_exit; + } + + if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { + smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); + if (smbfile) { + rc = server->ops->flush(xid, tcon, &smbfile->fid); + cifsFileInfo_put(smbfile); + } else + cifs_dbg(FYI, "ignore fsync for file not open for write\n"); + } else + rc = server->ops->flush(xid, tcon, &smbfile->fid); + } + +fsync_exit: + free_xid(xid); + return rc; +} + +/* + * As file closes, flush all cached write data for this inode checking + * for write behind errors. + */ +int cifs_flush(struct file *file, fl_owner_t id) +{ + struct inode *inode = file_inode(file); + int rc = 0; + + if (file->f_mode & FMODE_WRITE) + rc = filemap_write_and_wait(inode->i_mapping); + + cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc); + if (rc) { + /* get more nuanced writeback errors */ + rc = filemap_check_wb_err(file->f_mapping, 0); + trace_cifs_flush_err(inode->i_ino, rc); + } + return rc; +} + +static void +cifs_uncached_writedata_release(struct kref *refcount) +{ + struct cifs_writedata *wdata = container_of(refcount, + struct cifs_writedata, refcount); + + kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release); + cifs_writedata_release(refcount); +} + +static void collect_uncached_write_data(struct cifs_aio_ctx *ctx); + +static void +cifs_uncached_writev_complete(struct work_struct *work) +{ + struct cifs_writedata *wdata = container_of(work, + struct cifs_writedata, work); + struct inode *inode = d_inode(wdata->cfile->dentry); + struct cifsInodeInfo *cifsi = CIFS_I(inode); + + spin_lock(&inode->i_lock); + cifs_update_eof(cifsi, wdata->offset, wdata->bytes); + if (cifsi->server_eof > inode->i_size) + i_size_write(inode, cifsi->server_eof); + spin_unlock(&inode->i_lock); + + complete(&wdata->done); + collect_uncached_write_data(wdata->ctx); + /* the below call can possibly free the last ref to aio ctx */ + kref_put(&wdata->refcount, cifs_uncached_writedata_release); +} + +static int +cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, + struct cifs_aio_ctx *ctx) +{ + unsigned int wsize; + struct cifs_credits credits; + int rc; + struct TCP_Server_Info *server = wdata->server; + + do { + if (wdata->cfile->invalidHandle) { + rc = cifs_reopen_file(wdata->cfile, false); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } + + + /* + * Wait for credits to resend this wdata. + * Note: we are attempting to resend the whole wdata not in + * segments + */ + do { + rc = server->ops->wait_mtu_credits(server, wdata->bytes, + &wsize, &credits); + if (rc) + goto fail; + + if (wsize < wdata->bytes) { + add_credits_and_wake_if(server, &credits, 0); + msleep(1000); + } + } while (wsize < wdata->bytes); + wdata->credits = credits; + + rc = adjust_credits(server, &wdata->credits, wdata->bytes); + + if (!rc) { + if (wdata->cfile->invalidHandle) + rc = -EAGAIN; + else { +#ifdef CONFIG_CIFS_SMB_DIRECT + if (wdata->mr) { + wdata->mr->need_invalidate = true; + smbd_deregister_mr(wdata->mr); + wdata->mr = NULL; + } +#endif + rc = server->ops->async_writev(wdata, + cifs_uncached_writedata_release); + } + } + + /* If the write was successfully sent, we are done */ + if (!rc) { + list_add_tail(&wdata->list, wdata_list); + return 0; + } + + /* Roll back credits and retry if needed */ + add_credits_and_wake_if(server, &wdata->credits, 0); + } while (rc == -EAGAIN); + +fail: + kref_put(&wdata->refcount, cifs_uncached_writedata_release); + return rc; +} + +/* + * Select span of a bvec iterator we're going to use. Limit it by both maximum + * size and maximum number of segments. + */ +static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size, + size_t max_segs, unsigned int *_nsegs) +{ + const struct bio_vec *bvecs = iter->bvec; + unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0; + size_t len, span = 0, n = iter->count; + size_t skip = iter->iov_offset; + + if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0) + return 0; + + while (n && ix < nbv && skip) { + len = bvecs[ix].bv_len; + if (skip < len) + break; + skip -= len; + n -= len; + ix++; + } + + while (n && ix < nbv) { + len = min3(n, bvecs[ix].bv_len - skip, max_size); + span += len; + max_size -= len; + nsegs++; + ix++; + if (max_size == 0 || nsegs >= max_segs) + break; + skip = 0; + n -= len; + } + + *_nsegs = nsegs; + return span; +} + +static int +cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from, + struct cifsFileInfo *open_file, + struct cifs_sb_info *cifs_sb, struct list_head *wdata_list, + struct cifs_aio_ctx *ctx) +{ + int rc = 0; + size_t cur_len, max_len; + struct cifs_writedata *wdata; + pid_t pid; + struct TCP_Server_Info *server; + unsigned int xid, max_segs = INT_MAX; + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) + pid = open_file->pid; + else + pid = current->tgid; + + server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); + xid = get_xid(); + +#ifdef CONFIG_CIFS_SMB_DIRECT + if (server->smbd_conn) + max_segs = server->smbd_conn->max_frmr_depth; +#endif + + do { + struct cifs_credits credits_on_stack; + struct cifs_credits *credits = &credits_on_stack; + unsigned int wsize, nsegs = 0; + + if (signal_pending(current)) { + rc = -EINTR; + break; + } + + if (open_file->invalidHandle) { + rc = cifs_reopen_file(open_file, false); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } + + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, + &wsize, credits); + if (rc) + break; + + max_len = min_t(const size_t, len, wsize); + if (!max_len) { + rc = -EAGAIN; + add_credits_and_wake_if(server, credits, 0); + break; + } + + cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs); + cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n", + cur_len, max_len, nsegs, from->nr_segs, max_segs); + if (cur_len == 0) { + rc = -EIO; + add_credits_and_wake_if(server, credits, 0); + break; + } + + wdata = cifs_writedata_alloc(cifs_uncached_writev_complete); + if (!wdata) { + rc = -ENOMEM; + add_credits_and_wake_if(server, credits, 0); + break; + } + + wdata->sync_mode = WB_SYNC_ALL; + wdata->offset = (__u64)fpos; + wdata->cfile = cifsFileInfo_get(open_file); + wdata->server = server; + wdata->pid = pid; + wdata->bytes = cur_len; + wdata->credits = credits_on_stack; + wdata->iter = *from; + wdata->ctx = ctx; + kref_get(&ctx->refcount); + + iov_iter_truncate(&wdata->iter, cur_len); + + rc = adjust_credits(server, &wdata->credits, wdata->bytes); + + if (!rc) { + if (wdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = server->ops->async_writev(wdata, + cifs_uncached_writedata_release); + } + + if (rc) { + add_credits_and_wake_if(server, &wdata->credits, 0); + kref_put(&wdata->refcount, + cifs_uncached_writedata_release); + if (rc == -EAGAIN) + continue; + break; + } + + list_add_tail(&wdata->list, wdata_list); + iov_iter_advance(from, cur_len); + fpos += cur_len; + len -= cur_len; + } while (len > 0); + + free_xid(xid); + return rc; +} + +static void collect_uncached_write_data(struct cifs_aio_ctx *ctx) +{ + struct cifs_writedata *wdata, *tmp; + struct cifs_tcon *tcon; + struct cifs_sb_info *cifs_sb; + struct dentry *dentry = ctx->cfile->dentry; + ssize_t rc; + + tcon = tlink_tcon(ctx->cfile->tlink); + cifs_sb = CIFS_SB(dentry->d_sb); + + mutex_lock(&ctx->aio_mutex); + + if (list_empty(&ctx->list)) { + mutex_unlock(&ctx->aio_mutex); + return; + } + + rc = ctx->rc; + /* + * Wait for and collect replies for any successful sends in order of + * increasing offset. Once an error is hit, then return without waiting + * for any more replies. + */ +restart_loop: + list_for_each_entry_safe(wdata, tmp, &ctx->list, list) { + if (!rc) { + if (!try_wait_for_completion(&wdata->done)) { + mutex_unlock(&ctx->aio_mutex); + return; + } + + if (wdata->result) + rc = wdata->result; + else + ctx->total_len += wdata->bytes; + + /* resend call if it's a retryable error */ + if (rc == -EAGAIN) { + struct list_head tmp_list; + struct iov_iter tmp_from = ctx->iter; + + INIT_LIST_HEAD(&tmp_list); + list_del_init(&wdata->list); + + if (ctx->direct_io) + rc = cifs_resend_wdata( + wdata, &tmp_list, ctx); + else { + iov_iter_advance(&tmp_from, + wdata->offset - ctx->pos); + + rc = cifs_write_from_iter(wdata->offset, + wdata->bytes, &tmp_from, + ctx->cfile, cifs_sb, &tmp_list, + ctx); + + kref_put(&wdata->refcount, + cifs_uncached_writedata_release); + } + + list_splice(&tmp_list, &ctx->list); + goto restart_loop; + } + } + list_del_init(&wdata->list); + kref_put(&wdata->refcount, cifs_uncached_writedata_release); + } + + cifs_stats_bytes_written(tcon, ctx->total_len); + set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags); + + ctx->rc = (rc == 0) ? ctx->total_len : rc; + + mutex_unlock(&ctx->aio_mutex); + + if (ctx->iocb && ctx->iocb->ki_complete) + ctx->iocb->ki_complete(ctx->iocb, ctx->rc); + else + complete(&ctx->done); +} + +static ssize_t __cifs_writev( + struct kiocb *iocb, struct iov_iter *from, bool direct) +{ + struct file *file = iocb->ki_filp; + ssize_t total_written = 0; + struct cifsFileInfo *cfile; + struct cifs_tcon *tcon; + struct cifs_sb_info *cifs_sb; + struct cifs_aio_ctx *ctx; + int rc; + + rc = generic_write_checks(iocb, from); + if (rc <= 0) + return rc; + + cifs_sb = CIFS_FILE_SB(file); + cfile = file->private_data; + tcon = tlink_tcon(cfile->tlink); + + if (!tcon->ses->server->ops->async_writev) + return -ENOSYS; + + ctx = cifs_aio_ctx_alloc(); + if (!ctx) + return -ENOMEM; + + ctx->cfile = cifsFileInfo_get(cfile); + + if (!is_sync_kiocb(iocb)) + ctx->iocb = iocb; + + ctx->pos = iocb->ki_pos; + ctx->direct_io = direct; + ctx->nr_pinned_pages = 0; + + if (user_backed_iter(from)) { + /* + * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as + * they contain references to the calling process's virtual + * memory layout which won't be available in an async worker + * thread. This also takes a pin on every folio involved. + */ + rc = netfs_extract_user_iter(from, iov_iter_count(from), + &ctx->iter, 0); + if (rc < 0) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return rc; + } + + ctx->nr_pinned_pages = rc; + ctx->bv = (void *)ctx->iter.bvec; + ctx->bv_need_unpin = iov_iter_extract_will_pin(from); + } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) && + !is_sync_kiocb(iocb)) { + /* + * If the op is asynchronous, we need to copy the list attached + * to a BVEC/KVEC-type iterator, but we assume that the storage + * will be pinned by the caller; in any case, we may or may not + * be able to pin the pages, so we don't try. + */ + ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL); + if (!ctx->bv) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return -ENOMEM; + } + } else { + /* + * Otherwise, we just pass the iterator down as-is and rely on + * the caller to make sure the pages referred to by the + * iterator don't evaporate. + */ + ctx->iter = *from; + } + + ctx->len = iov_iter_count(&ctx->iter); + + /* grab a lock here due to read response handlers can access ctx */ + mutex_lock(&ctx->aio_mutex); + + rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter, + cfile, cifs_sb, &ctx->list, ctx); + + /* + * If at least one write was successfully sent, then discard any rc + * value from the later writes. If the other write succeeds, then + * we'll end up returning whatever was written. If it fails, then + * we'll get a new rc value from that. + */ + if (!list_empty(&ctx->list)) + rc = 0; + + mutex_unlock(&ctx->aio_mutex); + + if (rc) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return rc; + } + + if (!is_sync_kiocb(iocb)) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return -EIOCBQUEUED; + } + + rc = wait_for_completion_killable(&ctx->done); + if (rc) { + mutex_lock(&ctx->aio_mutex); + ctx->rc = rc = -EINTR; + total_written = ctx->total_len; + mutex_unlock(&ctx->aio_mutex); + } else { + rc = ctx->rc; + total_written = ctx->total_len; + } + + kref_put(&ctx->refcount, cifs_aio_ctx_release); + + if (unlikely(!total_written)) + return rc; + + iocb->ki_pos += total_written; + return total_written; +} + +ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + + cifs_revalidate_mapping(file->f_inode); + return __cifs_writev(iocb, from, true); +} + +ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) +{ + return __cifs_writev(iocb, from, false); +} + +static ssize_t +cifs_writev(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; + struct inode *inode = file->f_mapping->host; + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; + ssize_t rc; + + inode_lock(inode); + /* + * We need to hold the sem to be sure nobody modifies lock list + * with a brlock that prevents writing. + */ + down_read(&cinode->lock_sem); + + rc = generic_write_checks(iocb, from); + if (rc <= 0) + goto out; + + if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), + server->vals->exclusive_lock_type, 0, + NULL, CIFS_WRITE_OP)) + rc = __generic_file_write_iter(iocb, from); + else + rc = -EACCES; +out: + up_read(&cinode->lock_sem); + inode_unlock(inode); + + if (rc > 0) + rc = generic_write_sync(iocb, rc); + return rc; +} + +ssize_t +cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsFileInfo *cfile = (struct cifsFileInfo *) + iocb->ki_filp->private_data; + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + ssize_t written; + + written = cifs_get_writer(cinode); + if (written) + return written; + + if (CIFS_CACHE_WRITE(cinode)) { + if (cap_unix(tcon->ses) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) + && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { + written = generic_file_write_iter(iocb, from); + goto out; + } + written = cifs_writev(iocb, from); + goto out; + } + /* + * For non-oplocked files in strict cache mode we need to write the data + * to the server exactly from the pos to pos+len-1 rather than flush all + * affected pages because it may cause a error with mandatory locks on + * these pages but not on the region from pos to ppos+len-1. + */ + written = cifs_user_writev(iocb, from); + if (CIFS_CACHE_READ(cinode)) { + /* + * We have read level caching and we have just sent a write + * request to the server thus making data in the cache stale. + * Zap the cache and set oplock/lease level to NONE to avoid + * reading stale data from the cache. All subsequent read + * operations will read new data from the server. + */ + cifs_zap_mapping(inode); + cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n", + inode); + cinode->oplock = 0; + } +out: + cifs_put_writer(cinode); + return written; +} + +static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete) +{ + struct cifs_readdata *rdata; + + rdata = kzalloc(sizeof(*rdata), GFP_KERNEL); + if (rdata) { + kref_init(&rdata->refcount); + INIT_LIST_HEAD(&rdata->list); + init_completion(&rdata->done); + INIT_WORK(&rdata->work, complete); + } + + return rdata; +} + +void +cifs_readdata_release(struct kref *refcount) +{ + struct cifs_readdata *rdata = container_of(refcount, + struct cifs_readdata, refcount); + + if (rdata->ctx) + kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release); +#ifdef CONFIG_CIFS_SMB_DIRECT + if (rdata->mr) { + smbd_deregister_mr(rdata->mr); + rdata->mr = NULL; + } +#endif + if (rdata->cfile) + cifsFileInfo_put(rdata->cfile); + + kfree(rdata); +} + +static void collect_uncached_read_data(struct cifs_aio_ctx *ctx); + +static void +cifs_uncached_readv_complete(struct work_struct *work) +{ + struct cifs_readdata *rdata = container_of(work, + struct cifs_readdata, work); + + complete(&rdata->done); + collect_uncached_read_data(rdata->ctx); + /* the below call can possibly free the last ref to aio ctx */ + kref_put(&rdata->refcount, cifs_readdata_release); +} + +static int cifs_resend_rdata(struct cifs_readdata *rdata, + struct list_head *rdata_list, + struct cifs_aio_ctx *ctx) +{ + unsigned int rsize; + struct cifs_credits credits; + int rc; + struct TCP_Server_Info *server; + + /* XXX: should we pick a new channel here? */ + server = rdata->server; + + do { + if (rdata->cfile->invalidHandle) { + rc = cifs_reopen_file(rdata->cfile, true); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } + + /* + * Wait for credits to resend this rdata. + * Note: we are attempting to resend the whole rdata not in + * segments + */ + do { + rc = server->ops->wait_mtu_credits(server, rdata->bytes, + &rsize, &credits); + + if (rc) + goto fail; + + if (rsize < rdata->bytes) { + add_credits_and_wake_if(server, &credits, 0); + msleep(1000); + } + } while (rsize < rdata->bytes); + rdata->credits = credits; + + rc = adjust_credits(server, &rdata->credits, rdata->bytes); + if (!rc) { + if (rdata->cfile->invalidHandle) + rc = -EAGAIN; + else { +#ifdef CONFIG_CIFS_SMB_DIRECT + if (rdata->mr) { + rdata->mr->need_invalidate = true; + smbd_deregister_mr(rdata->mr); + rdata->mr = NULL; + } +#endif + rc = server->ops->async_readv(rdata); + } + } + + /* If the read was successfully sent, we are done */ + if (!rc) { + /* Add to aio pending list */ + list_add_tail(&rdata->list, rdata_list); + return 0; + } + + /* Roll back credits and retry if needed */ + add_credits_and_wake_if(server, &rdata->credits, 0); + } while (rc == -EAGAIN); + +fail: + kref_put(&rdata->refcount, cifs_readdata_release); + return rc; +} + +static int +cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file, + struct cifs_sb_info *cifs_sb, struct list_head *rdata_list, + struct cifs_aio_ctx *ctx) +{ + struct cifs_readdata *rdata; + unsigned int rsize, nsegs, max_segs = INT_MAX; + struct cifs_credits credits_on_stack; + struct cifs_credits *credits = &credits_on_stack; + size_t cur_len, max_len; + int rc; + pid_t pid; + struct TCP_Server_Info *server; + + server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); + +#ifdef CONFIG_CIFS_SMB_DIRECT + if (server->smbd_conn) + max_segs = server->smbd_conn->max_frmr_depth; +#endif + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) + pid = open_file->pid; + else + pid = current->tgid; + + do { + if (open_file->invalidHandle) { + rc = cifs_reopen_file(open_file, true); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } + + if (cifs_sb->ctx->rsize == 0) + cifs_sb->ctx->rsize = + server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), + cifs_sb->ctx); + + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, + &rsize, credits); + if (rc) + break; + + max_len = min_t(size_t, len, rsize); + + cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len, + max_segs, &nsegs); + cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n", + cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs); + if (cur_len == 0) { + rc = -EIO; + add_credits_and_wake_if(server, credits, 0); + break; + } + + rdata = cifs_readdata_alloc(cifs_uncached_readv_complete); + if (!rdata) { + add_credits_and_wake_if(server, credits, 0); + rc = -ENOMEM; + break; + } + + rdata->server = server; + rdata->cfile = cifsFileInfo_get(open_file); + rdata->offset = fpos; + rdata->bytes = cur_len; + rdata->pid = pid; + rdata->credits = credits_on_stack; + rdata->ctx = ctx; + kref_get(&ctx->refcount); + + rdata->iter = ctx->iter; + iov_iter_truncate(&rdata->iter, cur_len); + + rc = adjust_credits(server, &rdata->credits, rdata->bytes); + + if (!rc) { + if (rdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = server->ops->async_readv(rdata); + } + + if (rc) { + add_credits_and_wake_if(server, &rdata->credits, 0); + kref_put(&rdata->refcount, cifs_readdata_release); + if (rc == -EAGAIN) + continue; + break; + } + + list_add_tail(&rdata->list, rdata_list); + iov_iter_advance(&ctx->iter, cur_len); + fpos += cur_len; + len -= cur_len; + } while (len > 0); + + return rc; +} + +static void +collect_uncached_read_data(struct cifs_aio_ctx *ctx) +{ + struct cifs_readdata *rdata, *tmp; + struct cifs_sb_info *cifs_sb; + int rc; + + cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb); + + mutex_lock(&ctx->aio_mutex); + + if (list_empty(&ctx->list)) { + mutex_unlock(&ctx->aio_mutex); + return; + } + + rc = ctx->rc; + /* the loop below should proceed in the order of increasing offsets */ +again: + list_for_each_entry_safe(rdata, tmp, &ctx->list, list) { + if (!rc) { + if (!try_wait_for_completion(&rdata->done)) { + mutex_unlock(&ctx->aio_mutex); + return; + } + + if (rdata->result == -EAGAIN) { + /* resend call if it's a retryable error */ + struct list_head tmp_list; + unsigned int got_bytes = rdata->got_bytes; + + list_del_init(&rdata->list); + INIT_LIST_HEAD(&tmp_list); + + if (ctx->direct_io) { + /* + * Re-use rdata as this is a + * direct I/O + */ + rc = cifs_resend_rdata( + rdata, + &tmp_list, ctx); + } else { + rc = cifs_send_async_read( + rdata->offset + got_bytes, + rdata->bytes - got_bytes, + rdata->cfile, cifs_sb, + &tmp_list, ctx); + + kref_put(&rdata->refcount, + cifs_readdata_release); + } + + list_splice(&tmp_list, &ctx->list); + + goto again; + } else if (rdata->result) + rc = rdata->result; + + /* if there was a short read -- discard anything left */ + if (rdata->got_bytes && rdata->got_bytes < rdata->bytes) + rc = -ENODATA; + + ctx->total_len += rdata->got_bytes; + } + list_del_init(&rdata->list); + kref_put(&rdata->refcount, cifs_readdata_release); + } + + /* mask nodata case */ + if (rc == -ENODATA) + rc = 0; + + ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc; + + mutex_unlock(&ctx->aio_mutex); + + if (ctx->iocb && ctx->iocb->ki_complete) + ctx->iocb->ki_complete(ctx->iocb, ctx->rc); + else + complete(&ctx->done); +} + +static ssize_t __cifs_readv( + struct kiocb *iocb, struct iov_iter *to, bool direct) +{ + size_t len; + struct file *file = iocb->ki_filp; + struct cifs_sb_info *cifs_sb; + struct cifsFileInfo *cfile; + struct cifs_tcon *tcon; + ssize_t rc, total_read = 0; + loff_t offset = iocb->ki_pos; + struct cifs_aio_ctx *ctx; + + len = iov_iter_count(to); + if (!len) + return 0; + + cifs_sb = CIFS_FILE_SB(file); + cfile = file->private_data; + tcon = tlink_tcon(cfile->tlink); + + if (!tcon->ses->server->ops->async_readv) + return -ENOSYS; + + if ((file->f_flags & O_ACCMODE) == O_WRONLY) + cifs_dbg(FYI, "attempting read on write only file instance\n"); + + ctx = cifs_aio_ctx_alloc(); + if (!ctx) + return -ENOMEM; + + ctx->pos = offset; + ctx->direct_io = direct; + ctx->len = len; + ctx->cfile = cifsFileInfo_get(cfile); + ctx->nr_pinned_pages = 0; + + if (!is_sync_kiocb(iocb)) + ctx->iocb = iocb; + + if (user_backed_iter(to)) { + /* + * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as + * they contain references to the calling process's virtual + * memory layout which won't be available in an async worker + * thread. This also takes a pin on every folio involved. + */ + rc = netfs_extract_user_iter(to, iov_iter_count(to), + &ctx->iter, 0); + if (rc < 0) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return rc; + } + + ctx->nr_pinned_pages = rc; + ctx->bv = (void *)ctx->iter.bvec; + ctx->bv_need_unpin = iov_iter_extract_will_pin(to); + ctx->should_dirty = true; + } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) && + !is_sync_kiocb(iocb)) { + /* + * If the op is asynchronous, we need to copy the list attached + * to a BVEC/KVEC-type iterator, but we assume that the storage + * will be retained by the caller; in any case, we may or may + * not be able to pin the pages, so we don't try. + */ + ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL); + if (!ctx->bv) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return -ENOMEM; + } + } else { + /* + * Otherwise, we just pass the iterator down as-is and rely on + * the caller to make sure the pages referred to by the + * iterator don't evaporate. + */ + ctx->iter = *to; + } + + if (direct) { + rc = filemap_write_and_wait_range(file->f_inode->i_mapping, + offset, offset + len - 1); + if (rc) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return -EAGAIN; + } + } + + /* grab a lock here due to read response handlers can access ctx */ + mutex_lock(&ctx->aio_mutex); + + rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx); + + /* if at least one read request send succeeded, then reset rc */ + if (!list_empty(&ctx->list)) + rc = 0; + + mutex_unlock(&ctx->aio_mutex); + + if (rc) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return rc; + } + + if (!is_sync_kiocb(iocb)) { + kref_put(&ctx->refcount, cifs_aio_ctx_release); + return -EIOCBQUEUED; + } + + rc = wait_for_completion_killable(&ctx->done); + if (rc) { + mutex_lock(&ctx->aio_mutex); + ctx->rc = rc = -EINTR; + total_read = ctx->total_len; + mutex_unlock(&ctx->aio_mutex); + } else { + rc = ctx->rc; + total_read = ctx->total_len; + } + + kref_put(&ctx->refcount, cifs_aio_ctx_release); + + if (total_read) { + iocb->ki_pos += total_read; + return total_read; + } + return rc; +} + +ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to) +{ + return __cifs_readv(iocb, to, true); +} + +ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) +{ + return __cifs_readv(iocb, to, false); +} + +ssize_t +cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsFileInfo *cfile = (struct cifsFileInfo *) + iocb->ki_filp->private_data; + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + int rc = -EACCES; + + /* + * In strict cache mode we need to read from the server all the time + * if we don't have level II oplock because the server can delay mtime + * change - so we can't make a decision about inode invalidating. + * And we can also fail with pagereading if there are mandatory locks + * on pages affected by this read but not on the region from pos to + * pos+len-1. + */ + if (!CIFS_CACHE_READ(cinode)) + return cifs_user_readv(iocb, to); + + if (cap_unix(tcon->ses) && + (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && + ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) + return generic_file_read_iter(iocb, to); + + /* + * We need to hold the sem to be sure nobody modifies lock list + * with a brlock that prevents reading. + */ + down_read(&cinode->lock_sem); + if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), + tcon->ses->server->vals->shared_lock_type, + 0, NULL, CIFS_READ_OP)) + rc = generic_file_read_iter(iocb, to); + up_read(&cinode->lock_sem); + return rc; +} + +static ssize_t +cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset) +{ + int rc = -EACCES; + unsigned int bytes_read = 0; + unsigned int total_read; + unsigned int current_read_size; + unsigned int rsize; + struct cifs_sb_info *cifs_sb; + struct cifs_tcon *tcon; + struct TCP_Server_Info *server; + unsigned int xid; + char *cur_offset; + struct cifsFileInfo *open_file; + struct cifs_io_parms io_parms = {0}; + int buf_type = CIFS_NO_BUFFER; + __u32 pid; + + xid = get_xid(); + cifs_sb = CIFS_FILE_SB(file); + + /* FIXME: set up handlers for larger reads and/or convert to async */ + rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize); + + if (file->private_data == NULL) { + rc = -EBADF; + free_xid(xid); + return rc; + } + open_file = file->private_data; + tcon = tlink_tcon(open_file->tlink); + server = cifs_pick_channel(tcon->ses); + + if (!server->ops->sync_read) { + free_xid(xid); + return -ENOSYS; + } + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) + pid = open_file->pid; + else + pid = current->tgid; + + if ((file->f_flags & O_ACCMODE) == O_WRONLY) + cifs_dbg(FYI, "attempting read on write only file instance\n"); + + for (total_read = 0, cur_offset = read_data; read_size > total_read; + total_read += bytes_read, cur_offset += bytes_read) { + do { + current_read_size = min_t(uint, read_size - total_read, + rsize); + /* + * For windows me and 9x we do not want to request more + * than it negotiated since it will refuse the read + * then. + */ + if (!(tcon->ses->capabilities & + tcon->ses->server->vals->cap_large_files)) { + current_read_size = min_t(uint, + current_read_size, CIFSMaxBufSize); + } + if (open_file->invalidHandle) { + rc = cifs_reopen_file(open_file, true); + if (rc != 0) + break; + } + io_parms.pid = pid; + io_parms.tcon = tcon; + io_parms.offset = *offset; + io_parms.length = current_read_size; + io_parms.server = server; + rc = server->ops->sync_read(xid, &open_file->fid, &io_parms, + &bytes_read, &cur_offset, + &buf_type); + } while (rc == -EAGAIN); + + if (rc || (bytes_read == 0)) { + if (total_read) { + break; + } else { + free_xid(xid); + return rc; + } + } else { + cifs_stats_bytes_read(tcon, total_read); + *offset += bytes_read; + } + } + free_xid(xid); + return total_read; +} + +/* + * If the page is mmap'ed into a process' page tables, then we need to make + * sure that it doesn't change while being written back. + */ +static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) +{ + struct folio *folio = page_folio(vmf->page); + + /* Wait for the folio to be written to the cache before we allow it to + * be modified. We then assume the entire folio will need writing back. + */ +#ifdef CONFIG_CIFS_FSCACHE + if (folio_test_fscache(folio) && + folio_wait_fscache_killable(folio) < 0) + return VM_FAULT_RETRY; +#endif + + folio_wait_writeback(folio); + + if (folio_lock_killable(folio) < 0) + return VM_FAULT_RETRY; + return VM_FAULT_LOCKED; +} + +static const struct vm_operations_struct cifs_file_vm_ops = { + .fault = filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = cifs_page_mkwrite, +}; + +int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma) +{ + int xid, rc = 0; + struct inode *inode = file_inode(file); + + xid = get_xid(); + + if (!CIFS_CACHE_READ(CIFS_I(inode))) + rc = cifs_zap_mapping(inode); + if (!rc) + rc = generic_file_mmap(file, vma); + if (!rc) + vma->vm_ops = &cifs_file_vm_ops; + + free_xid(xid); + return rc; +} + +int cifs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + int rc, xid; + + xid = get_xid(); + + rc = cifs_revalidate_file(file); + if (rc) + cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n", + rc); + if (!rc) + rc = generic_file_mmap(file, vma); + if (!rc) + vma->vm_ops = &cifs_file_vm_ops; + + free_xid(xid); + return rc; +} + +/* + * Unlock a bunch of folios in the pagecache. + */ +static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last) +{ + struct folio *folio; + XA_STATE(xas, &mapping->i_pages, first); + + rcu_read_lock(); + xas_for_each(&xas, folio, last) { + folio_unlock(folio); + } + rcu_read_unlock(); +} + +static void cifs_readahead_complete(struct work_struct *work) +{ + struct cifs_readdata *rdata = container_of(work, + struct cifs_readdata, work); + struct folio *folio; + pgoff_t last; + bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes); + + XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE); + + if (good) + cifs_readahead_to_fscache(rdata->mapping->host, + rdata->offset, rdata->bytes); + + if (iov_iter_count(&rdata->iter) > 0) + iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter); + + last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE; + + rcu_read_lock(); + xas_for_each(&xas, folio, last) { + if (good) { + flush_dcache_folio(folio); + folio_mark_uptodate(folio); + } + folio_unlock(folio); + } + rcu_read_unlock(); + + kref_put(&rdata->refcount, cifs_readdata_release); +} + +static void cifs_readahead(struct readahead_control *ractl) +{ + struct cifsFileInfo *open_file = ractl->file->private_data; + struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); + struct TCP_Server_Info *server; + unsigned int xid, nr_pages, cache_nr_pages = 0; + unsigned int ra_pages; + pgoff_t next_cached = ULONG_MAX, ra_index; + bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && + cifs_inode_cookie(ractl->mapping->host)->cache_priv; + bool check_cache = caching; + pid_t pid; + int rc = 0; + + /* Note that readahead_count() lags behind our dequeuing of pages from + * the ractl, wo we have to keep track for ourselves. + */ + ra_pages = readahead_count(ractl); + ra_index = readahead_index(ractl); + + xid = get_xid(); + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) + pid = open_file->pid; + else + pid = current->tgid; + + server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); + + cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", + __func__, ractl->file, ractl->mapping, ra_pages); + + /* + * Chop the readahead request up into rsize-sized read requests. + */ + while ((nr_pages = ra_pages)) { + unsigned int i, rsize; + struct cifs_readdata *rdata; + struct cifs_credits credits_on_stack; + struct cifs_credits *credits = &credits_on_stack; + struct folio *folio; + pgoff_t fsize; + + /* + * Find out if we have anything cached in the range of + * interest, and if so, where the next chunk of cached data is. + */ + if (caching) { + if (check_cache) { + rc = cifs_fscache_query_occupancy( + ractl->mapping->host, ra_index, nr_pages, + &next_cached, &cache_nr_pages); + if (rc < 0) + caching = false; + check_cache = false; + } + + if (ra_index == next_cached) { + /* + * TODO: Send a whole batch of pages to be read + * by the cache. + */ + folio = readahead_folio(ractl); + fsize = folio_nr_pages(folio); + ra_pages -= fsize; + ra_index += fsize; + if (cifs_readpage_from_fscache(ractl->mapping->host, + &folio->page) < 0) { + /* + * TODO: Deal with cache read failure + * here, but for the moment, delegate + * that to readpage. + */ + caching = false; + } + folio_unlock(folio); + next_cached += fsize; + cache_nr_pages -= fsize; + if (cache_nr_pages == 0) + check_cache = true; + continue; + } + } + + if (open_file->invalidHandle) { + rc = cifs_reopen_file(open_file, true); + if (rc) { + if (rc == -EAGAIN) + continue; + break; + } + } + + if (cifs_sb->ctx->rsize == 0) + cifs_sb->ctx->rsize = + server->ops->negotiate_rsize(tlink_tcon(open_file->tlink), + cifs_sb->ctx); + + rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, + &rsize, credits); + if (rc) + break; + nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages); + if (next_cached != ULONG_MAX) + nr_pages = min_t(size_t, nr_pages, next_cached - ra_index); + + /* + * Give up immediately if rsize is too small to read an entire + * page. The VFS will fall back to readpage. We should never + * reach this point however since we set ra_pages to 0 when the + * rsize is smaller than a cache page. + */ + if (unlikely(!nr_pages)) { + add_credits_and_wake_if(server, credits, 0); + break; + } + + rdata = cifs_readdata_alloc(cifs_readahead_complete); + if (!rdata) { + /* best to give up if we're out of mem */ + add_credits_and_wake_if(server, credits, 0); + break; + } + + rdata->offset = ra_index * PAGE_SIZE; + rdata->bytes = nr_pages * PAGE_SIZE; + rdata->cfile = cifsFileInfo_get(open_file); + rdata->server = server; + rdata->mapping = ractl->mapping; + rdata->pid = pid; + rdata->credits = credits_on_stack; + + for (i = 0; i < nr_pages; i++) { + if (!readahead_folio(ractl)) + WARN_ON(1); + } + ra_pages -= nr_pages; + ra_index += nr_pages; + + iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages, + rdata->offset, rdata->bytes); + + rc = adjust_credits(server, &rdata->credits, rdata->bytes); + if (!rc) { + if (rdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = server->ops->async_readv(rdata); + } + + if (rc) { + add_credits_and_wake_if(server, &rdata->credits, 0); + cifs_unlock_folios(rdata->mapping, + rdata->offset / PAGE_SIZE, + (rdata->offset + rdata->bytes - 1) / PAGE_SIZE); + /* Fallback to the readpage in error/reconnect cases */ + kref_put(&rdata->refcount, cifs_readdata_release); + break; + } + + kref_put(&rdata->refcount, cifs_readdata_release); + } + + free_xid(xid); +} + +/* + * cifs_readpage_worker must be called with the page pinned + */ +static int cifs_readpage_worker(struct file *file, struct page *page, + loff_t *poffset) +{ + char *read_data; + int rc; + + /* Is the page cached? */ + rc = cifs_readpage_from_fscache(file_inode(file), page); + if (rc == 0) + goto read_complete; + + read_data = kmap(page); + /* for reads over a certain size could initiate async read ahead */ + + rc = cifs_read(file, read_data, PAGE_SIZE, poffset); + + if (rc < 0) + goto io_error; + else + cifs_dbg(FYI, "Bytes read %d\n", rc); + + /* we do not want atime to be less than mtime, it broke some apps */ + file_inode(file)->i_atime = current_time(file_inode(file)); + if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime))) + file_inode(file)->i_atime = file_inode(file)->i_mtime; + else + file_inode(file)->i_atime = current_time(file_inode(file)); + + if (PAGE_SIZE > rc) + memset(read_data + rc, 0, PAGE_SIZE - rc); + + flush_dcache_page(page); + SetPageUptodate(page); + rc = 0; + +io_error: + kunmap(page); + unlock_page(page); + +read_complete: + return rc; +} + +static int cifs_read_folio(struct file *file, struct folio *folio) +{ + struct page *page = &folio->page; + loff_t offset = page_file_offset(page); + int rc = -EACCES; + unsigned int xid; + + xid = get_xid(); + + if (file->private_data == NULL) { + rc = -EBADF; + free_xid(xid); + return rc; + } + + cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n", + page, (int)offset, (int)offset); + + rc = cifs_readpage_worker(file, page, &offset); + + free_xid(xid); + return rc; +} + +static int is_inode_writable(struct cifsInodeInfo *cifs_inode) +{ + struct cifsFileInfo *open_file; + + spin_lock(&cifs_inode->open_file_lock); + list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { + if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { + spin_unlock(&cifs_inode->open_file_lock); + return 1; + } + } + spin_unlock(&cifs_inode->open_file_lock); + return 0; +} + +/* We do not want to update the file size from server for inodes + open for write - to avoid races with writepage extending + the file - in the future we could consider allowing + refreshing the inode only on increases in the file size + but this is tricky to do without racing with writebehind + page caching in the current Linux kernel design */ +bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) +{ + if (!cifsInode) + return true; + + if (is_inode_writable(cifsInode)) { + /* This inode is open for write at least once */ + struct cifs_sb_info *cifs_sb; + + cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) { + /* since no page cache to corrupt on directio + we can change size safely */ + return true; + } + + if (i_size_read(&cifsInode->netfs.inode) < end_of_file) + return true; + + return false; + } else + return true; +} + +static int cifs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, + struct page **pagep, void **fsdata) +{ + int oncethru = 0; + pgoff_t index = pos >> PAGE_SHIFT; + loff_t offset = pos & (PAGE_SIZE - 1); + loff_t page_start = pos & PAGE_MASK; + loff_t i_size; + struct page *page; + int rc = 0; + + cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len); + +start: + page = grab_cache_page_write_begin(mapping, index); + if (!page) { + rc = -ENOMEM; + goto out; + } + + if (PageUptodate(page)) + goto out; + + /* + * If we write a full page it will be up to date, no need to read from + * the server. If the write is short, we'll end up doing a sync write + * instead. + */ + if (len == PAGE_SIZE) + goto out; + + /* + * optimize away the read when we have an oplock, and we're not + * expecting to use any of the data we'd be reading in. That + * is, when the page lies beyond the EOF, or straddles the EOF + * and the write will cover all of the existing data. + */ + if (CIFS_CACHE_READ(CIFS_I(mapping->host))) { + i_size = i_size_read(mapping->host); + if (page_start >= i_size || + (offset == 0 && (pos + len) >= i_size)) { + zero_user_segments(page, 0, offset, + offset + len, + PAGE_SIZE); + /* + * PageChecked means that the parts of the page + * to which we're not writing are considered up + * to date. Once the data is copied to the + * page, it can be set uptodate. + */ + SetPageChecked(page); + goto out; + } + } + + if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) { + /* + * might as well read a page, it is fast enough. If we get + * an error, we don't need to return it. cifs_write_end will + * do a sync write instead since PG_uptodate isn't set. + */ + cifs_readpage_worker(file, page, &page_start); + put_page(page); + oncethru = 1; + goto start; + } else { + /* we could try using another file handle if there is one - + but how would we lock it to prevent close of that handle + racing with this read? In any case + this will be written out by write_end so is fine */ + } +out: + *pagep = page; + return rc; +} + +static bool cifs_release_folio(struct folio *folio, gfp_t gfp) +{ + if (folio_test_private(folio)) + return 0; + if (folio_test_fscache(folio)) { + if (current_is_kswapd() || !(gfp & __GFP_FS)) + return false; + folio_wait_fscache(folio); + } + fscache_note_page_release(cifs_inode_cookie(folio->mapping->host)); + return true; +} + +static void cifs_invalidate_folio(struct folio *folio, size_t offset, + size_t length) +{ + folio_wait_fscache(folio); +} + +static int cifs_launder_folio(struct folio *folio) +{ + int rc = 0; + loff_t range_start = folio_pos(folio); + loff_t range_end = range_start + folio_size(folio); + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 0, + .range_start = range_start, + .range_end = range_end, + }; + + cifs_dbg(FYI, "Launder page: %lu\n", folio->index); + + if (folio_clear_dirty_for_io(folio)) + rc = cifs_writepage_locked(&folio->page, &wbc); + + folio_wait_fscache(folio); + return rc; +} + +void cifs_oplock_break(struct work_struct *work) +{ + struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, + oplock_break); + struct inode *inode = d_inode(cfile->dentry); + struct cifsInodeInfo *cinode = CIFS_I(inode); + struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); + struct TCP_Server_Info *server = tcon->ses->server; + int rc = 0; + bool purge_cache = false, oplock_break_cancelled; + __u64 persistent_fid, volatile_fid; + __u16 net_fid; + + wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, + TASK_UNINTERRUPTIBLE); + + server->ops->downgrade_oplock(server, cinode, cfile->oplock_level, + cfile->oplock_epoch, &purge_cache); + + if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) && + cifs_has_mand_locks(cinode)) { + cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n", + inode); + cinode->oplock = 0; + } + + if (inode && S_ISREG(inode->i_mode)) { + if (CIFS_CACHE_READ(cinode)) + break_lease(inode, O_RDONLY); + else + break_lease(inode, O_WRONLY); + rc = filemap_fdatawrite(inode->i_mapping); + if (!CIFS_CACHE_READ(cinode) || purge_cache) { + rc = filemap_fdatawait(inode->i_mapping); + mapping_set_error(inode->i_mapping, rc); + cifs_zap_mapping(inode); + } + cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc); + if (CIFS_CACHE_WRITE(cinode)) + goto oplock_break_ack; + } + + rc = cifs_push_locks(cfile); + if (rc) + cifs_dbg(VFS, "Push locks rc = %d\n", rc); + +oplock_break_ack: + /* + * When oplock break is received and there are no active + * file handles but cached, then schedule deferred close immediately. + * So, new open will not use cached handle. + */ + + if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes)) + cifs_close_deferred_file(cinode); + + persistent_fid = cfile->fid.persistent_fid; + volatile_fid = cfile->fid.volatile_fid; + net_fid = cfile->fid.netfid; + oplock_break_cancelled = cfile->oplock_break_cancelled; + + _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); + /* + * releasing stale oplock after recent reconnect of smb session using + * a now incorrect file handle is not a data integrity issue but do + * not bother sending an oplock release if session to server still is + * disconnected since oplock already released by the server + */ + if (!oplock_break_cancelled) { + /* check for server null since can race with kill_sb calling tree disconnect */ + if (tcon->ses && tcon->ses->server) { + rc = tcon->ses->server->ops->oplock_response(tcon, persistent_fid, + volatile_fid, net_fid, cinode); + cifs_dbg(FYI, "Oplock release rc = %d\n", rc); + } else + pr_warn_once("lease break not sent for unmounted share\n"); + } + + cifs_done_oplock_break(cinode); +} + +/* + * The presence of cifs_direct_io() in the address space ops vector + * allowes open() O_DIRECT flags which would have failed otherwise. + * + * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests + * so this method should never be called. + * + * Direct IO is not yet supported in the cached mode. + */ +static ssize_t +cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) +{ + /* + * FIXME + * Eventually need to support direct IO for non forcedirectio mounts + */ + return -EINVAL; +} + +static int cifs_swap_activate(struct swap_info_struct *sis, + struct file *swap_file, sector_t *span) +{ + struct cifsFileInfo *cfile = swap_file->private_data; + struct inode *inode = swap_file->f_mapping->host; + unsigned long blocks; + long long isize; + + cifs_dbg(FYI, "swap activate\n"); + + if (!swap_file->f_mapping->a_ops->swap_rw) + /* Cannot support swap */ + return -EINVAL; + + spin_lock(&inode->i_lock); + blocks = inode->i_blocks; + isize = inode->i_size; + spin_unlock(&inode->i_lock); + if (blocks*512 < isize) { + pr_warn("swap activate: swapfile has holes\n"); + return -EINVAL; + } + *span = sis->pages; + + pr_warn_once("Swap support over SMB3 is experimental\n"); + + /* + * TODO: consider adding ACL (or documenting how) to prevent other + * users (on this or other systems) from reading it + */ + + + /* TODO: add sk_set_memalloc(inet) or similar */ + + if (cfile) + cfile->swapfile = true; + /* + * TODO: Since file already open, we can't open with DENY_ALL here + * but we could add call to grab a byte range lock to prevent others + * from reading or writing the file + */ + + sis->flags |= SWP_FS_OPS; + return add_swap_extent(sis, 0, sis->max, 0); +} + +static void cifs_swap_deactivate(struct file *file) +{ + struct cifsFileInfo *cfile = file->private_data; + + cifs_dbg(FYI, "swap deactivate\n"); + + /* TODO: undo sk_set_memalloc(inet) will eventually be needed */ + + if (cfile) + cfile->swapfile = false; + + /* do we need to unpin (or unlock) the file */ +} + +/* + * Mark a page as having been made dirty and thus needing writeback. We also + * need to pin the cache object to write back to. + */ +#ifdef CONFIG_CIFS_FSCACHE +static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio) +{ + return fscache_dirty_folio(mapping, folio, + cifs_inode_cookie(mapping->host)); +} +#else +#define cifs_dirty_folio filemap_dirty_folio +#endif + +const struct address_space_operations cifs_addr_ops = { + .read_folio = cifs_read_folio, + .readahead = cifs_readahead, + .writepages = cifs_writepages, + .write_begin = cifs_write_begin, + .write_end = cifs_write_end, + .dirty_folio = cifs_dirty_folio, + .release_folio = cifs_release_folio, + .direct_IO = cifs_direct_io, + .invalidate_folio = cifs_invalidate_folio, + .launder_folio = cifs_launder_folio, + .migrate_folio = filemap_migrate_folio, + /* + * TODO: investigate and if useful we could add an is_dirty_writeback + * helper if needed + */ + .swap_activate = cifs_swap_activate, + .swap_deactivate = cifs_swap_deactivate, +}; + +/* + * cifs_readahead requires the server to support a buffer large enough to + * contain the header plus one complete page of data. Otherwise, we need + * to leave cifs_readahead out of the address space operations. + */ +const struct address_space_operations cifs_addr_ops_smallbuf = { + .read_folio = cifs_read_folio, + .writepages = cifs_writepages, + .write_begin = cifs_write_begin, + .write_end = cifs_write_end, + .dirty_folio = cifs_dirty_folio, + .release_folio = cifs_release_folio, + .invalidate_folio = cifs_invalidate_folio, + .launder_folio = cifs_launder_folio, + .migrate_folio = filemap_migrate_folio, +}; - - /* - * Splice data from a file into a pipe. - */ - ssize_t cifs_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) - { - if (unlikely(*ppos >= file_inode(in)->i_sb->s_maxbytes)) - return 0; - if (unlikely(!len)) - return 0; - if (in->f_flags & O_DIRECT) - return direct_splice_read(in, ppos, pipe, len, flags); - return filemap_splice_read(in, ppos, pipe, len, flags); - }