diff options
author | Alex Shi <alex.shi@linaro.org> | 2018-03-02 12:02:08 +0800 |
---|---|---|
committer | Alex Shi <alex.shi@linaro.org> | 2018-03-02 12:02:08 +0800 |
commit | ed6c8f83c24719274e1af5c6915ec98f5f9c8be0 (patch) | |
tree | 3d767b3c84a55d6410aa943bb559050997b1cfc5 /mm | |
parent | 1e9516bdab3d922eddeddbe17f2643d0df9ef12c (diff) | |
parent | c426a717c3c633c743bfa84af902012aa84063f4 (diff) |
Merge tag 'v4.9.85' into linux-linaro-lsk-v4.9
This is the 4.9.85 stable release
Diffstat (limited to 'mm')
-rw-r--r-- | mm/frame_vector.c | 12 | ||||
-rw-r--r-- | mm/gup.c | 64 | ||||
-rw-r--r-- | mm/memory.c | 40 |
3 files changed, 106 insertions, 10 deletions
diff --git a/mm/frame_vector.c b/mm/frame_vector.c index db77dcb38afd..375a103d7a56 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -52,6 +52,18 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, ret = -EFAULT; goto out; } + + /* + * While get_vaddr_frames() could be used for transient (kernel + * controlled lifetime) pinning of memory pages all current + * users establish long term (userspace controlled lifetime) + * page pinning. Treat get_vaddr_frames() like + * get_user_pages_longterm() and disallow it for filesystem-dax + * mappings. + */ + if (vma_is_fsdax(vma)) + return -EOPNOTSUPP; + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) { vec->got_ref = true; vec->is_pfns = false; @@ -982,6 +982,70 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, } EXPORT_SYMBOL(get_user_pages); +#ifdef CONFIG_FS_DAX +/* + * This is the same as get_user_pages() in that it assumes we are + * operating on the current task's mm, but it goes further to validate + * that the vmas associated with the address range are suitable for + * longterm elevated page reference counts. For example, filesystem-dax + * mappings are subject to the lifetime enforced by the filesystem and + * we need guarantees that longterm users like RDMA and V4L2 only + * establish mappings that have a kernel enforced revocation mechanism. + * + * "longterm" == userspace controlled elevated page count lifetime. + * Contrast this to iov_iter_get_pages() usages which are transient. + */ +long get_user_pages_longterm(unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas_arg) +{ + struct vm_area_struct **vmas = vmas_arg; + struct vm_area_struct *vma_prev = NULL; + long rc, i; + + if (!pages) + return -EINVAL; + + if (!vmas) { + vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *), + GFP_KERNEL); + if (!vmas) + return -ENOMEM; + } + + rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas); + + for (i = 0; i < rc; i++) { + struct vm_area_struct *vma = vmas[i]; + + if (vma == vma_prev) + continue; + + vma_prev = vma; + + if (vma_is_fsdax(vma)) + break; + } + + /* + * Either get_user_pages() failed, or the vma validation + * succeeded, in either case we don't need to put_page() before + * returning. + */ + if (i >= rc) + goto out; + + for (i = 0; i < rc; i++) + put_page(pages[i]); + rc = -EOPNOTSUPP; +out: + if (vmas != vmas_arg) + kfree(vmas); + return rc; +} +EXPORT_SYMBOL(get_user_pages_longterm); +#endif /* CONFIG_FS_DAX */ + /** * populate_vma_page_range() - populate a range of pages in the vma. * @vma: target vma diff --git a/mm/memory.c b/mm/memory.c index e2e68767a373..d2db2c4eb0a4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2848,6 +2848,17 @@ static int __do_fault(struct fault_env *fe, pgoff_t pgoff, return ret; } +/* + * The ordering of these checks is important for pmds with _PAGE_DEVMAP set. + * If we check pmd_trans_unstable() first we will trip the bad_pmd() check + * inside of pmd_none_or_trans_huge_or_clear_bad(). This will end up correctly + * returning 1 but not before it spams dmesg with the pmd_clear_bad() output. + */ +static int pmd_devmap_trans_unstable(pmd_t *pmd) +{ + return pmd_devmap(*pmd) || pmd_trans_unstable(pmd); +} + static int pte_alloc_one_map(struct fault_env *fe) { struct vm_area_struct *vma = fe->vma; @@ -2871,18 +2882,27 @@ static int pte_alloc_one_map(struct fault_env *fe) map_pte: /* * If a huge pmd materialized under us just retry later. Use - * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd - * didn't become pmd_trans_huge under us and then back to pmd_none, as - * a result of MADV_DONTNEED running immediately after a huge pmd fault - * in a different thread of this mm, in turn leading to a misleading - * pmd_trans_huge() retval. All we have to ensure is that it is a - * regular pmd that we can walk with pte_offset_map() and we can do that - * through an atomic read in C, which is what pmd_trans_unstable() - * provides. + * pmd_trans_unstable() via pmd_devmap_trans_unstable() instead of + * pmd_trans_huge() to ensure the pmd didn't become pmd_trans_huge + * under us and then back to pmd_none, as a result of MADV_DONTNEED + * running immediately after a huge pmd fault in a different thread of + * this mm, in turn leading to a misleading pmd_trans_huge() retval. + * All we have to ensure is that it is a regular pmd that we can walk + * with pte_offset_map() and we can do that through an atomic read in + * C, which is what pmd_trans_unstable() provides. */ - if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd)) + if (pmd_devmap_trans_unstable(fe->pmd)) return VM_FAULT_NOPAGE; + /* + * At this point we know that our vmf->pmd points to a page of ptes + * and it cannot become pmd_none(), pmd_devmap() or pmd_trans_huge() + * for the duration of the fault. If a racing MADV_DONTNEED runs and + * we zap the ptes pointed to by our vmf->pmd, the vmf->ptl will still + * be valid and we will re-check to make sure the vmf->pte isn't + * pte_none() under vmf->ptl protection when we return to + * alloc_set_pte(). + */ fe->pte = pte_offset_map_lock(vma->vm_mm, fe->pmd, fe->address, &fe->ptl); return 0; @@ -3456,7 +3476,7 @@ static int handle_pte_fault(struct fault_env *fe) fe->pte = NULL; } else { /* See comment in pte_alloc_one_map() */ - if (pmd_trans_unstable(fe->pmd) || pmd_devmap(*fe->pmd)) + if (pmd_devmap_trans_unstable(fe->pmd)) return 0; /* * A regular pmd is established and it can't morph into a huge |