mm: use pte markers for swap errors
authorPeter Xu <peterx@redhat.com>
Sun, 30 Oct 2022 21:41:51 +0000 (17:41 -0400)
committerAndrew Morton <akpm@linux-foundation.org>
Wed, 30 Nov 2022 23:58:46 +0000 (15:58 -0800)
PTE markers are ideal mechanism for things like SWP_SWAPIN_ERROR.  Using a
whole swap entry type for this purpose can be an overkill, especially if
we already have PTE markers.  Define a new bit for swapin error and
replace it with pte markers.  Then we can safely drop SWP_SWAPIN_ERROR and
give one device slot back to swap.

We used to have SWP_SWAPIN_ERROR taking the page pfn as part of the swap
entry, but it's never used.  Neither do I see how it can be useful because
normally the swapin failure should not be caused by a bad page but bad
swap device.  Drop it alongside.

Link: https://lkml.kernel.org/r/20221030214151.402274-3-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/swap.h
include/linux/swapops.h
mm/memory.c
mm/shmem.c
mm/swapfile.c

index 211aeca9bfa7f1cdb1a4e53cbfba90b832e9a9f1..fec6647a289af71aace55de0ebc2292bec89f8ce 100644 (file)
@@ -55,10 +55,6 @@ static inline int current_is_kswapd(void)
  * actions on faults.
  */
 
-#define SWP_SWAPIN_ERROR_NUM 1
-#define SWP_SWAPIN_ERROR     (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
-                            SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \
-                            SWP_PTE_MARKER_NUM)
 /*
  * PTE markers are used to persist information onto PTEs that otherwise
  * should be a none pte.  As its name "PTE" hints, it should only be
@@ -121,7 +117,7 @@ static inline int current_is_kswapd(void)
 #define MAX_SWAPFILES \
        ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
        SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \
-       SWP_PTE_MARKER_NUM - SWP_SWAPIN_ERROR_NUM)
+       SWP_PTE_MARKER_NUM)
 
 /*
  * Magic header for a swap area. The first part of the union is
index 35c1fe62d2e1aedb21604c0c9778c151fa68a659..27ade4f22abb56f622a2aa11b92e7b1c8348cfd1 100644 (file)
@@ -162,16 +162,6 @@ static inline void *swp_to_radix_entry(swp_entry_t entry)
        return xa_mk_value(entry.val);
 }
 
-static inline swp_entry_t make_swapin_error_entry(struct page *page)
-{
-       return swp_entry(SWP_SWAPIN_ERROR, page_to_pfn(page));
-}
-
-static inline int is_swapin_error_entry(swp_entry_t entry)
-{
-       return swp_type(entry) == SWP_SWAPIN_ERROR;
-}
-
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
 static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
 {
@@ -409,8 +399,9 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
 
 typedef unsigned long pte_marker;
 
-#define  PTE_MARKER_UFFD_WP  BIT(0)
-#define  PTE_MARKER_MASK     (PTE_MARKER_UFFD_WP)
+#define  PTE_MARKER_UFFD_WP                    BIT(0)
+#define  PTE_MARKER_SWAPIN_ERROR               BIT(1)
+#define  PTE_MARKER_MASK                       (BIT(2) - 1)
 
 static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
 {
@@ -437,6 +428,17 @@ static inline pte_t make_pte_marker(pte_marker marker)
        return swp_entry_to_pte(make_pte_marker_entry(marker));
 }
 
+static inline swp_entry_t make_swapin_error_entry(void)
+{
+       return make_pte_marker_entry(PTE_MARKER_SWAPIN_ERROR);
+}
+
+static inline int is_swapin_error_entry(swp_entry_t entry)
+{
+       return is_pte_marker_entry(entry) &&
+           (pte_marker_get(entry) & PTE_MARKER_SWAPIN_ERROR);
+}
+
 /*
  * This is a special version to check pte_none() just to cover the case when
  * the pte is a pte marker.  It existed because in many cases the pte marker
index b79d275337223a7ae4e9056093362a8b6ac790ea..142c4229549b061561dda3bcb7e376ba2a371888 100644 (file)
@@ -3668,6 +3668,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
        if (WARN_ON_ONCE(!marker))
                return VM_FAULT_SIGBUS;
 
+       /* Higher priority than uffd-wp when data corrupted */
+       if (marker & PTE_MARKER_SWAPIN_ERROR)
+               return VM_FAULT_SIGBUS;
+
        if (pte_marker_entry_uffd_wp(entry))
                return pte_marker_handle_uffd_wp(vmf);
 
@@ -3727,8 +3731,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
                        put_page(vmf->page);
                } else if (is_hwpoison_entry(entry)) {
                        ret = VM_FAULT_HWPOISON;
-               } else if (is_swapin_error_entry(entry)) {
-                       ret = VM_FAULT_SIGBUS;
                } else if (is_pte_marker_entry(entry)) {
                        ret = handle_pte_marker(vmf);
                } else {
index 0a7c4a748811d60946894c4b714308a68f614da7..7428ae3fa4b9e70a466ccddc89b7ebc6322361eb 100644 (file)
@@ -1682,7 +1682,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
        swp_entry_t swapin_error;
        void *old;
 
-       swapin_error = make_swapin_error_entry(&folio->page);
+       swapin_error = make_swapin_error_entry();
        old = xa_cmpxchg_irq(&mapping->i_pages, index,
                             swp_to_radix_entry(swap),
                             swp_to_radix_entry(swapin_error), 0);
index 72e481aacd5dfc76b1d091e84bac82835f302d6e..03fe0949f6b2f2cba619b1c76543ce9559c938e6 100644 (file)
@@ -1781,7 +1781,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
                pte_t pteval;
 
                dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
-               pteval = swp_entry_to_pte(make_swapin_error_entry(page));
+               pteval = swp_entry_to_pte(make_swapin_error_entry());
                set_pte_at(vma->vm_mm, addr, pte, pteval);
                swap_free(entry);
                ret = 0;