erofs: change to use asynchronous io for fscache readpage/readahead
authorXin Yin <yinxin.x@bytedance.com>
Mon, 9 May 2022 07:40:28 +0000 (15:40 +0800)
committerGao Xiang <hsiangkao@linux.alibaba.com>
Tue, 17 May 2022 16:11:21 +0000 (00:11 +0800)
Use asynchronous io to read data from fscache may greatly improve IO
bandwidth for sequential buffered read scenario.

Change erofs_fscache_read_folios to erofs_fscache_read_folios_async,
and read data from fscache asynchronously.
Make .readpage()/.readahead() to use this new helper.

Signed-off-by: Xin Yin <yinxin.x@bytedance.com>
Reviewed-by: Jeffle Xu <jefflexu@linux.alibaba.com>
Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
Link: https://lore.kernel.org/r/20220509074028.74954-23-jefflexu@linux.alibaba.com
Acked-by: Chao Yu <chao@kernel.org>
[ Gao Xiang: minor styling changes. ]
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
fs/erofs/fscache.c

index a402d8f0a0635f5bc4e7f05ec479d1bc8200f668..7e4417167d0b412f2289046c7bd951ec618d4928 100644 (file)
 #include <linux/fscache.h>
 #include "internal.h"
 
+static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping,
+                                            loff_t start, size_t len)
+{
+       struct netfs_io_request *rreq;
+
+       rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL);
+       if (!rreq)
+               return ERR_PTR(-ENOMEM);
+
+       rreq->start     = start;
+       rreq->len       = len;
+       rreq->mapping   = mapping;
+       INIT_LIST_HEAD(&rreq->subrequests);
+       refcount_set(&rreq->ref, 1);
+       return rreq;
+}
+
+static void erofs_fscache_put_request(struct netfs_io_request *rreq)
+{
+       if (!refcount_dec_and_test(&rreq->ref))
+               return;
+       if (rreq->cache_resources.ops)
+               rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
+       kfree(rreq);
+}
+
+static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq)
+{
+       if (!refcount_dec_and_test(&subreq->ref))
+               return;
+       erofs_fscache_put_request(subreq->rreq);
+       kfree(subreq);
+}
+
+static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq)
+{
+       struct netfs_io_subrequest *subreq;
+
+       while (!list_empty(&rreq->subrequests)) {
+               subreq = list_first_entry(&rreq->subrequests,
+                               struct netfs_io_subrequest, rreq_link);
+               list_del(&subreq->rreq_link);
+               erofs_fscache_put_subrequest(subreq);
+       }
+}
+
+static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq)
+{
+       struct netfs_io_subrequest *subreq;
+       struct folio *folio;
+       unsigned int iopos = 0;
+       pgoff_t start_page = rreq->start / PAGE_SIZE;
+       pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
+       bool subreq_failed = false;
+
+       XA_STATE(xas, &rreq->mapping->i_pages, start_page);
+
+       subreq = list_first_entry(&rreq->subrequests,
+                                 struct netfs_io_subrequest, rreq_link);
+       subreq_failed = (subreq->error < 0);
+
+       rcu_read_lock();
+       xas_for_each(&xas, folio, last_page) {
+               unsigned int pgpos =
+                       (folio_index(folio) - start_page) * PAGE_SIZE;
+               unsigned int pgend = pgpos + folio_size(folio);
+               bool pg_failed = false;
+
+               for (;;) {
+                       if (!subreq) {
+                               pg_failed = true;
+                               break;
+                       }
+
+                       pg_failed |= subreq_failed;
+                       if (pgend < iopos + subreq->len)
+                               break;
+
+                       iopos += subreq->len;
+                       if (!list_is_last(&subreq->rreq_link,
+                                         &rreq->subrequests)) {
+                               subreq = list_next_entry(subreq, rreq_link);
+                               subreq_failed = (subreq->error < 0);
+                       } else {
+                               subreq = NULL;
+                               subreq_failed = false;
+                       }
+                       if (pgend == iopos)
+                               break;
+               }
+
+               if (!pg_failed)
+                       folio_mark_uptodate(folio);
+
+               folio_unlock(folio);
+       }
+       rcu_read_unlock();
+}
+
+static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq)
+{
+       erofs_fscache_rreq_unlock_folios(rreq);
+       erofs_fscache_clear_subrequests(rreq);
+       erofs_fscache_put_request(rreq);
+}
+
+static void erofc_fscache_subreq_complete(void *priv,
+               ssize_t transferred_or_error, bool was_async)
+{
+       struct netfs_io_subrequest *subreq = priv;
+       struct netfs_io_request *rreq = subreq->rreq;
+
+       if (IS_ERR_VALUE(transferred_or_error))
+               subreq->error = transferred_or_error;
+
+       if (atomic_dec_and_test(&rreq->nr_outstanding))
+               erofs_fscache_rreq_complete(rreq);
+
+       erofs_fscache_put_subrequest(subreq);
+}
+
 /*
  * Read data from fscache and fill the read data into page cache described by
- * @start/len, which shall be both aligned with PAGE_SIZE. @pstart describes
+ * @rreq, which shall be both aligned with PAGE_SIZE. @pstart describes
  * the start physical address in the cache file.
  */
-static int erofs_fscache_read_folios(struct fscache_cookie *cookie,
-                                    struct address_space *mapping,
-                                    loff_t start, size_t len,
-                                    loff_t pstart)
+static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
+                               struct netfs_io_request *rreq, loff_t pstart)
 {
        enum netfs_io_source source;
-       struct netfs_io_request rreq = {};
-       struct netfs_io_subrequest subreq = { .rreq = &rreq, };
-       struct netfs_cache_resources *cres = &rreq.cache_resources;
-       struct super_block *sb = mapping->host->i_sb;
+       struct super_block *sb = rreq->mapping->host->i_sb;
+       struct netfs_io_subrequest *subreq;
+       struct netfs_cache_resources *cres = &rreq->cache_resources;
        struct iov_iter iter;
+       loff_t start = rreq->start;
+       size_t len = rreq->len;
        size_t done = 0;
        int ret;
 
+       atomic_set(&rreq->nr_outstanding, 1);
+
        ret = fscache_begin_read_operation(cres, cookie);
        if (ret)
-               return ret;
+               goto out;
 
        while (done < len) {
-               subreq.start = pstart + done;
-               subreq.len = len - done;
-               subreq.flags = 1 << NETFS_SREQ_ONDEMAND;
+               subreq = kzalloc(sizeof(struct netfs_io_subrequest),
+                                GFP_KERNEL);
+               if (subreq) {
+                       INIT_LIST_HEAD(&subreq->rreq_link);
+                       refcount_set(&subreq->ref, 2);
+                       subreq->rreq = rreq;
+                       refcount_inc(&rreq->ref);
+               } else {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               subreq->start = pstart + done;
+               subreq->len     =  len - done;
+               subreq->flags = 1 << NETFS_SREQ_ONDEMAND;
 
-               source = cres->ops->prepare_read(&subreq, LLONG_MAX);
-               if (WARN_ON(subreq.len == 0))
+               list_add_tail(&subreq->rreq_link, &rreq->subrequests);
+
+               source = cres->ops->prepare_read(subreq, LLONG_MAX);
+               if (WARN_ON(subreq->len == 0))
                        source = NETFS_INVALID_READ;
                if (source != NETFS_READ_FROM_CACHE) {
                        erofs_err(sb, "failed to fscache prepare_read (source %d)",
                                  source);
                        ret = -EIO;
+                       subreq->error = ret;
+                       erofs_fscache_put_subrequest(subreq);
                        goto out;
                }
 
-               iov_iter_xarray(&iter, READ, &mapping->i_pages,
-                               start + done, subreq.len);
-               ret = fscache_read(cres, subreq.start, &iter,
-                                  NETFS_READ_HOLE_FAIL, NULL, NULL);
+               atomic_inc(&rreq->nr_outstanding);
+
+               iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
+                               start + done, subreq->len);
+
+               ret = fscache_read(cres, subreq->start, &iter,
+                                  NETFS_READ_HOLE_FAIL,
+                                  erofc_fscache_subreq_complete, subreq);
+               if (ret == -EIOCBQUEUED)
+                       ret = 0;
                if (ret) {
                        erofs_err(sb, "failed to fscache_read (ret %d)", ret);
                        goto out;
                }
 
-               done += subreq.len;
+               done += subreq->len;
        }
 out:
-       fscache_end_operation(cres);
+       if (atomic_dec_and_test(&rreq->nr_outstanding))
+               erofs_fscache_rreq_complete(rreq);
+
        return ret;
 }
 
@@ -64,6 +210,7 @@ static int erofs_fscache_meta_readpage(struct file *data, struct page *page)
        int ret;
        struct folio *folio = page_folio(page);
        struct super_block *sb = folio_mapping(folio)->host->i_sb;
+       struct netfs_io_request *rreq;
        struct erofs_map_dev mdev = {
                .m_deviceid = 0,
                .m_pa = folio_pos(folio),
@@ -73,11 +220,13 @@ static int erofs_fscache_meta_readpage(struct file *data, struct page *page)
        if (ret)
                goto out;
 
-       ret = erofs_fscache_read_folios(mdev.m_fscache->cookie,
-                       folio_mapping(folio), folio_pos(folio),
-                       folio_size(folio), mdev.m_pa);
-       if (!ret)
-               folio_mark_uptodate(folio);
+       rreq = erofs_fscache_alloc_request(folio_mapping(folio),
+                               folio_pos(folio), folio_size(folio));
+       if (IS_ERR(rreq))
+               goto out;
+
+       return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
+                               rreq, mdev.m_pa);
 out:
        folio_unlock(folio);
        return ret;
@@ -117,6 +266,7 @@ static int erofs_fscache_readpage(struct file *file, struct page *page)
        struct super_block *sb = inode->i_sb;
        struct erofs_map_blocks map;
        struct erofs_map_dev mdev;
+       struct netfs_io_request *rreq;
        erofs_off_t pos;
        loff_t pstart;
        int ret;
@@ -149,10 +299,15 @@ static int erofs_fscache_readpage(struct file *file, struct page *page)
        if (ret)
                goto out_unlock;
 
+
+       rreq = erofs_fscache_alloc_request(folio_mapping(folio),
+                               folio_pos(folio), folio_size(folio));
+       if (IS_ERR(rreq))
+               goto out_unlock;
+
        pstart = mdev.m_pa + (pos - map.m_la);
-       ret = erofs_fscache_read_folios(mdev.m_fscache->cookie,
-                       folio_mapping(folio), folio_pos(folio),
-                       folio_size(folio), pstart);
+       return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
+                               rreq, pstart);
 
 out_uptodate:
        if (!ret)
@@ -162,15 +317,16 @@ out_unlock:
        return ret;
 }
 
-static void erofs_fscache_unlock_folios(struct readahead_control *rac,
-                                       size_t len)
+static void erofs_fscache_advance_folios(struct readahead_control *rac,
+                                        size_t len, bool unlock)
 {
        while (len) {
                struct folio *folio = readahead_folio(rac);
-
                len -= folio_size(folio);
-               folio_mark_uptodate(folio);
-               folio_unlock(folio);
+               if (unlock) {
+                       folio_mark_uptodate(folio);
+                       folio_unlock(folio);
+               }
        }
 }
 
@@ -192,6 +348,7 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
        do {
                struct erofs_map_blocks map;
                struct erofs_map_dev mdev;
+               struct netfs_io_request *rreq;
 
                pos = start + done;
                map.m_la = pos;
@@ -211,7 +368,7 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
                                        offset, count);
                        iov_iter_zero(count, &iter);
 
-                       erofs_fscache_unlock_folios(rac, count);
+                       erofs_fscache_advance_folios(rac, count, true);
                        ret = count;
                        continue;
                }
@@ -237,17 +394,18 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
                if (ret)
                        return;
 
-               ret = erofs_fscache_read_folios(mdev.m_fscache->cookie,
-                               rac->mapping, offset, count,
-                               mdev.m_pa + (pos - map.m_la));
+               rreq = erofs_fscache_alloc_request(rac->mapping, offset, count);
+               if (IS_ERR(rreq))
+                       return;
                /*
-                * For the error cases, the folios will be unlocked when
-                * .readahead() returns.
+                * Drop the ref of folios here. Unlock them in
+                * rreq_unlock_folios() when rreq complete.
                 */
-               if (!ret) {
-                       erofs_fscache_unlock_folios(rac, count);
+               erofs_fscache_advance_folios(rac, count, false);
+               ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
+                                       rreq, mdev.m_pa + (pos - map.m_la));
+               if (!ret)
                        ret = count;
-               }
        } while (ret > 0 && ((done += ret) < len));
 }