mm: cachestat: fix folio read-after-free in cache walk
authorNhat Pham <nphamcs@gmail.com>
Tue, 20 Feb 2024 03:01:21 +0000 (19:01 -0800)
committerAndrew Morton <akpm@linux-foundation.org>
Sat, 24 Feb 2024 01:27:13 +0000 (17:27 -0800)
In cachestat, we access the folio from the page cache's xarray to compute
its page offset, and check for its dirty and writeback flags.  However, we
do not hold a reference to the folio before performing these actions,
which means the folio can concurrently be released and reused as another
folio/page/slab.

Get around this altogether by just using xarray's existing machinery for
the folio page offsets and dirty/writeback states.

This changes behavior for tmpfs files to now always report zeroes in their
dirty and writeback counters.  This is okay as tmpfs doesn't follow
conventional writeback cache behavior: its pages get "cleaned" during
swapout, after which they're no longer resident etc.

Link: https://lkml.kernel.org/r/20240220153409.GA216065@cmpxchg.org
Fixes: cf264e1329fb ("cachestat: implement cachestat syscall")
Reported-by: Jann Horn <jannh@google.com>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Tested-by: Jann Horn <jannh@google.com>
Cc: <stable@vger.kernel.org> [6.4+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/filemap.c

index 750e779c23db74730fa7743c2307d1b996729d62..4a30de98a8c75daec31d1d79d15a9d9514e9fd1d 100644 (file)
@@ -4111,28 +4111,40 @@ static void filemap_cachestat(struct address_space *mapping,
 
        rcu_read_lock();
        xas_for_each(&xas, folio, last_index) {
+               int order;
                unsigned long nr_pages;
                pgoff_t folio_first_index, folio_last_index;
 
+               /*
+                * Don't deref the folio. It is not pinned, and might
+                * get freed (and reused) underneath us.
+                *
+                * We *could* pin it, but that would be expensive for
+                * what should be a fast and lightweight syscall.
+                *
+                * Instead, derive all information of interest from
+                * the rcu-protected xarray.
+                */
+
                if (xas_retry(&xas, folio))
                        continue;
 
+               order = xa_get_order(xas.xa, xas.xa_index);
+               nr_pages = 1 << order;
+               folio_first_index = round_down(xas.xa_index, 1 << order);
+               folio_last_index = folio_first_index + nr_pages - 1;
+
+               /* Folios might straddle the range boundaries, only count covered pages */
+               if (folio_first_index < first_index)
+                       nr_pages -= first_index - folio_first_index;
+
+               if (folio_last_index > last_index)
+                       nr_pages -= folio_last_index - last_index;
+
                if (xa_is_value(folio)) {
                        /* page is evicted */
                        void *shadow = (void *)folio;
                        bool workingset; /* not used */
-                       int order = xa_get_order(xas.xa, xas.xa_index);
-
-                       nr_pages = 1 << order;
-                       folio_first_index = round_down(xas.xa_index, 1 << order);
-                       folio_last_index = folio_first_index + nr_pages - 1;
-
-                       /* Folios might straddle the range boundaries, only count covered pages */
-                       if (folio_first_index < first_index)
-                               nr_pages -= first_index - folio_first_index;
-
-                       if (folio_last_index > last_index)
-                               nr_pages -= folio_last_index - last_index;
 
                        cs->nr_evicted += nr_pages;
 
@@ -4150,24 +4162,13 @@ static void filemap_cachestat(struct address_space *mapping,
                        goto resched;
                }
 
-               nr_pages = folio_nr_pages(folio);
-               folio_first_index = folio_pgoff(folio);
-               folio_last_index = folio_first_index + nr_pages - 1;
-
-               /* Folios might straddle the range boundaries, only count covered pages */
-               if (folio_first_index < first_index)
-                       nr_pages -= first_index - folio_first_index;
-
-               if (folio_last_index > last_index)
-                       nr_pages -= folio_last_index - last_index;
-
                /* page is in cache */
                cs->nr_cache += nr_pages;
 
-               if (folio_test_dirty(folio))
+               if (xas_get_mark(&xas, PAGECACHE_TAG_DIRTY))
                        cs->nr_dirty += nr_pages;
 
-               if (folio_test_writeback(folio))
+               if (xas_get_mark(&xas, PAGECACHE_TAG_WRITEBACK))
                        cs->nr_writeback += nr_pages;
 
 resched: