From: Linus Torvalds Date: Tue, 15 Dec 2020 20:53:37 +0000 (-0800) Subject: Merge branch 'akpm' (patches from Andrew) X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=ac73e3dc8acd0a3be292755db30388c3580f5674;p=linux.git Merge branch 'akpm' (patches from Andrew) Merge misc updates from Andrew Morton: - a few random little subsystems - almost all of the MM patches which are staged ahead of linux-next material. I'll trickle to post-linux-next work in as the dependents get merged up. Subsystems affected by this patch series: kthread, kbuild, ide, ntfs, ocfs2, arch, and mm (slab-generic, slab, slub, dax, debug, pagecache, gup, swap, shmem, memcg, pagemap, mremap, hmm, vmalloc, documentation, kasan, pagealloc, memory-failure, hugetlb, vmscan, z3fold, compaction, oom-kill, migration, cma, page-poison, userfaultfd, zswap, zsmalloc, uaccess, zram, and cleanups). * emailed patches from Andrew Morton : (200 commits) mm: cleanup kstrto*() usage mm: fix fall-through warnings for Clang mm: slub: convert sysfs sprintf family to sysfs_emit/sysfs_emit_at mm: shmem: convert shmem_enabled_show to use sysfs_emit_at mm:backing-dev: use sysfs_emit in macro defining functions mm: huge_memory: convert remaining use of sprintf to sysfs_emit and neatening mm: use sysfs_emit for struct kobject * uses mm: fix kernel-doc markups zram: break the strict dependency from lzo zram: add stat to gather incompressible pages since zram set up zram: support page writeback mm/process_vm_access: remove redundant initialization of iov_r mm/zsmalloc.c: rework the list_add code in insert_zspage() mm/zswap: move to use crypto_acomp API for hardware acceleration mm/zswap: fix passing zero to 'PTR_ERR' warning mm/zswap: make struct kernel_param_ops definitions const userfaultfd/selftests: hint the test runner on required privilege userfaultfd/selftests: fix retval check for userfaultfd_open() userfaultfd/selftests: always dump something in modes userfaultfd: selftests: make __{s,u}64 format specifiers portable ... --- ac73e3dc8acd0a3be292755db30388c3580f5674 diff --cc arch/arc/Kconfig index d8804001d5507,c874f8ab0341a..6a821e13a98f7 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@@ -506,8 -507,7 +507,8 @@@ config LINUX_RAM_BAS config HIGHMEM bool "High Memory Support" - select ARCH_DISCONTIGMEM_ENABLE + select HAVE_ARCH_PFN_VALID + select KMAP_LOCAL help With ARC 2G:2G address split, only upper 2G is directly addressable by kernel. Enable this to potentially allow access to rest of 2G and PAE diff --cc arch/x86/Kconfig index eeb87fce9c6f6,384c91b057c53..00746dd364db0 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@@ -92,8 -91,8 +92,9 @@@ config X8 select ARCH_STACKWALK select ARCH_SUPPORTS_ACPI select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEBUG_PAGEALLOC select ARCH_SUPPORTS_NUMA_BALANCING if X86_64 + select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP if NR_CPUS <= 4096 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS diff --cc include/linux/mm.h index 1813fa86b9811,3e1fe8ca97206..e189509323f87 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@@ -557,15 -557,9 +557,16 @@@ enum page_entry_size struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); - int (*split)(struct vm_area_struct * area, unsigned long addr); - int (*mremap)(struct vm_area_struct * area); + /* Called any time before splitting to check if it's allowed */ + int (*may_split)(struct vm_area_struct *area, unsigned long addr); + int (*mremap)(struct vm_area_struct *area, unsigned long flags); + /* + * Called by mprotect() to make driver-specific permission + * checks before mprotect() is finalised. The VMA must not + * be modified. Returns 0 if eprotect() can proceed. + */ + int (*mprotect)(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long newflags); vm_fault_t (*fault)(struct vm_fault *vmf); vm_fault_t (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); diff --cc mm/gup.c index 44b0c6b896029,b3d852b4a60c4..e4c224cd9661f --- a/mm/gup.c +++ b/mm/gup.c @@@ -2062,29 -1977,62 +1977,7 @@@ EXPORT_SYMBOL(get_user_pages_unlocked) * This code is based heavily on the PowerPC implementation by Nick Piggin. */ #ifdef CONFIG_HAVE_FAST_GUP -#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH - -/* - * WARNING: only to be used in the get_user_pages_fast() implementation. - * - * With get_user_pages_fast(), we walk down the pagetables without taking any - * locks. For this we would like to load the pointers atomically, but sometimes - * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What - * we do have is the guarantee that a PTE will only either go from not present - * to present, or present to not present or both -- it will not switch to a - * completely different present page without a TLB flush in between; something - * that we are blocking by holding interrupts off. - * - * Setting ptes from not present to present goes: - * - * ptep->pte_high = h; - * smp_wmb(); - * ptep->pte_low = l; - * - * And present to not present goes: - * - * ptep->pte_low = 0; - * smp_wmb(); - * ptep->pte_high = 0; - * - * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'. - * We load pte_high *after* loading pte_low, which ensures we don't see an older - * value of pte_high. *Then* we recheck pte_low, which ensures that we haven't - * picked up a changed pte high. We might have gotten rubbish values from - * pte_low and pte_high, but we are guaranteed that pte_low will not have the - * present bit set *unless* it is 'l'. Because get_user_pages_fast() only - * operates on present ptes we're safe. - */ -static inline pte_t gup_get_pte(pte_t *ptep) -{ - pte_t pte; - - do { - pte.pte_low = ptep->pte_low; - smp_rmb(); - pte.pte_high = ptep->pte_high; - smp_rmb(); - } while (unlikely(pte.pte_low != ptep->pte_low)); - - return pte; -} -#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */ -/* - * We require that the PTE can be read atomically. - */ -static inline pte_t gup_get_pte(pte_t *ptep) -{ - return ptep_get(ptep); -} -#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ - static void put_compound_head(struct page *page, int refs, unsigned int flags) - { - if (flags & FOLL_PIN) { - mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED, - refs); - - if (hpage_pincount_available(page)) - hpage_pincount_sub(page, refs); - else - refs *= GUP_PIN_COUNTING_BIAS; - } - - VM_BUG_ON_PAGE(page_ref_count(page) < refs, page); - /* - * Calling put_page() for each ref is unnecessarily slow. Only the last - * ref needs a put_page(). - */ - if (refs > 1) - page_ref_sub(page, refs - 1); - put_page(page); - } - static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start, unsigned int flags, struct page **pages) diff --cc mm/highmem.c index 83f9660f168f5,0ee87a9e0cbf2..c3a9ea7875ef8 --- a/mm/highmem.c +++ b/mm/highmem.c @@@ -358,260 -367,61 +358,312 @@@ void kunmap_high(struct page *page if (need_wakeup) wake_up(pkmap_map_wait); } - EXPORT_SYMBOL(kunmap_high); + + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + void zero_user_segments(struct page *page, unsigned start1, unsigned end1, + unsigned start2, unsigned end2) + { + unsigned int i; + + BUG_ON(end1 > page_size(page) || end2 > page_size(page)); + + for (i = 0; i < compound_nr(page); i++) { + void *kaddr = NULL; + + if (start1 < PAGE_SIZE || start2 < PAGE_SIZE) + kaddr = kmap_atomic(page + i); + + if (start1 >= PAGE_SIZE) { + start1 -= PAGE_SIZE; + end1 -= PAGE_SIZE; + } else { + unsigned this_end = min_t(unsigned, end1, PAGE_SIZE); + + if (end1 > start1) + memset(kaddr + start1, 0, this_end - start1); + end1 -= this_end; + start1 = 0; + } + + if (start2 >= PAGE_SIZE) { + start2 -= PAGE_SIZE; + end2 -= PAGE_SIZE; + } else { + unsigned this_end = min_t(unsigned, end2, PAGE_SIZE); + + if (end2 > start2) + memset(kaddr + start2, 0, this_end - start2); + end2 -= this_end; + start2 = 0; + } + + if (kaddr) { + kunmap_atomic(kaddr); + flush_dcache_page(page + i); + } + + if (!end1 && !end2) + break; + } + + BUG_ON((start1 | start2 | end1 | end2) != 0); + } + EXPORT_SYMBOL(zero_user_segments); + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -#endif /* CONFIG_HIGHMEM */ +#endif /* CONFIG_HIGHMEM */ + +#ifdef CONFIG_KMAP_LOCAL + +#include + +/* + * With DEBUG_KMAP_LOCAL the stack depth is doubled and every second + * slot is unused which acts as a guard page + */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL +# define KM_INCR 2 +#else +# define KM_INCR 1 +#endif + +static inline int kmap_local_idx_push(void) +{ + WARN_ON_ONCE(in_irq() && !irqs_disabled()); + current->kmap_ctrl.idx += KM_INCR; + BUG_ON(current->kmap_ctrl.idx >= KM_MAX_IDX); + return current->kmap_ctrl.idx - 1; +} + +static inline int kmap_local_idx(void) +{ + return current->kmap_ctrl.idx - 1; +} + +static inline void kmap_local_idx_pop(void) +{ + current->kmap_ctrl.idx -= KM_INCR; + BUG_ON(current->kmap_ctrl.idx < 0); +} + +#ifndef arch_kmap_local_post_map +# define arch_kmap_local_post_map(vaddr, pteval) do { } while (0) +#endif + +#ifndef arch_kmap_local_pre_unmap +# define arch_kmap_local_pre_unmap(vaddr) do { } while (0) +#endif + +#ifndef arch_kmap_local_post_unmap +# define arch_kmap_local_post_unmap(vaddr) do { } while (0) +#endif + +#ifndef arch_kmap_local_map_idx +#define arch_kmap_local_map_idx(idx, pfn) kmap_local_calc_idx(idx) +#endif + +#ifndef arch_kmap_local_unmap_idx +#define arch_kmap_local_unmap_idx(idx, vaddr) kmap_local_calc_idx(idx) +#endif + +#ifndef arch_kmap_local_high_get +static inline void *arch_kmap_local_high_get(struct page *page) +{ + return NULL; +} +#endif + +/* Unmap a local mapping which was obtained by kmap_high_get() */ +static inline bool kmap_high_unmap_local(unsigned long vaddr) +{ +#ifdef ARCH_NEEDS_KMAP_HIGH_GET + if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { + kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); + return true; + } +#endif + return false; +} + +static inline int kmap_local_calc_idx(int idx) +{ + return idx + KM_MAX_IDX * smp_processor_id(); +} + +static pte_t *__kmap_pte; + +static pte_t *kmap_get_pte(void) +{ + if (!__kmap_pte) + __kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); + return __kmap_pte; +} + +void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot) +{ + pte_t pteval, *kmap_pte = kmap_get_pte(); + unsigned long vaddr; + int idx; + + /* + * Disable migration so resulting virtual address is stable + * accross preemption. + */ + migrate_disable(); + preempt_disable(); + idx = arch_kmap_local_map_idx(kmap_local_idx_push(), pfn); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + BUG_ON(!pte_none(*(kmap_pte - idx))); + pteval = pfn_pte(pfn, prot); + set_pte_at(&init_mm, vaddr, kmap_pte - idx, pteval); + arch_kmap_local_post_map(vaddr, pteval); + current->kmap_ctrl.pteval[kmap_local_idx()] = pteval; + preempt_enable(); + + return (void *)vaddr; +} +EXPORT_SYMBOL_GPL(__kmap_local_pfn_prot); + +void *__kmap_local_page_prot(struct page *page, pgprot_t prot) +{ + void *kmap; + + /* + * To broaden the usage of the actual kmap_local() machinery always map + * pages when debugging is enabled and the architecture has no problems + * with alias mappings. + */ + if (!IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) && !PageHighMem(page)) + return page_address(page); + + /* Try kmap_high_get() if architecture has it enabled */ + kmap = arch_kmap_local_high_get(page); + if (kmap) + return kmap; + + return __kmap_local_pfn_prot(page_to_pfn(page), prot); +} +EXPORT_SYMBOL(__kmap_local_page_prot); + +void kunmap_local_indexed(void *vaddr) +{ + unsigned long addr = (unsigned long) vaddr & PAGE_MASK; + pte_t *kmap_pte = kmap_get_pte(); + int idx; + + if (addr < __fix_to_virt(FIX_KMAP_END) || + addr > __fix_to_virt(FIX_KMAP_BEGIN)) { + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP)) { + /* This _should_ never happen! See above. */ + WARN_ON_ONCE(1); + return; + } + /* + * Handle mappings which were obtained by kmap_high_get() + * first as the virtual address of such mappings is below + * PAGE_OFFSET. Warn for all other addresses which are in + * the user space part of the virtual address space. + */ + if (!kmap_high_unmap_local(addr)) + WARN_ON_ONCE(addr < PAGE_OFFSET); + return; + } + + preempt_disable(); + idx = arch_kmap_local_unmap_idx(kmap_local_idx(), addr); + WARN_ON_ONCE(addr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); + + arch_kmap_local_pre_unmap(addr); + pte_clear(&init_mm, addr, kmap_pte - idx); + arch_kmap_local_post_unmap(addr); + current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0); + kmap_local_idx_pop(); + preempt_enable(); + migrate_enable(); +} +EXPORT_SYMBOL(kunmap_local_indexed); + +/* + * Invoked before switch_to(). This is safe even when during or after + * clearing the maps an interrupt which needs a kmap_local happens because + * the task::kmap_ctrl.idx is not modified by the unmapping code so a + * nested kmap_local will use the next unused index and restore the index + * on unmap. The already cleared kmaps of the outgoing task are irrelevant + * because the interrupt context does not know about them. The same applies + * when scheduling back in for an interrupt which happens before the + * restore is complete. + */ +void __kmap_local_sched_out(void) +{ + struct task_struct *tsk = current; + pte_t *kmap_pte = kmap_get_pte(); + int i; + + /* Clear kmaps */ + for (i = 0; i < tsk->kmap_ctrl.idx; i++) { + pte_t pteval = tsk->kmap_ctrl.pteval[i]; + unsigned long addr; + int idx; + + /* With debug all even slots are unmapped and act as guard */ + if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) { + WARN_ON_ONCE(!pte_none(pteval)); + continue; + } + if (WARN_ON_ONCE(pte_none(pteval))) + continue; + + /* + * This is a horrible hack for XTENSA to calculate the + * coloured PTE index. Uses the PFN encoded into the pteval + * and the map index calculation because the actual mapped + * virtual address is not stored in task::kmap_ctrl. + * For any sane architecture this is optimized out. + */ + idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); + + addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + arch_kmap_local_pre_unmap(addr); + pte_clear(&init_mm, addr, kmap_pte - idx); + arch_kmap_local_post_unmap(addr); + } +} + +void __kmap_local_sched_in(void) +{ + struct task_struct *tsk = current; + pte_t *kmap_pte = kmap_get_pte(); + int i; + + /* Restore kmaps */ + for (i = 0; i < tsk->kmap_ctrl.idx; i++) { + pte_t pteval = tsk->kmap_ctrl.pteval[i]; + unsigned long addr; + int idx; + + /* With debug all even slots are unmapped and act as guard */ + if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) { + WARN_ON_ONCE(!pte_none(pteval)); + continue; + } + if (WARN_ON_ONCE(pte_none(pteval))) + continue; + + /* See comment in __kmap_local_sched_out() */ + idx = arch_kmap_local_map_idx(i, pte_pfn(pteval)); + addr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte_at(&init_mm, addr, kmap_pte - idx, pteval); + arch_kmap_local_post_map(addr, pteval); + } +} + +void kmap_local_fork(struct task_struct *tsk) +{ + if (WARN_ON_ONCE(tsk->kmap_ctrl.idx)) + memset(&tsk->kmap_ctrl, 0, sizeof(tsk->kmap_ctrl)); +} + +#endif #if defined(HASHED_PAGE_VIRTUAL)