summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/gup.c2
-rw-r--r--mm/hugetlb.c66
-rw-r--r--mm/mempolicy.c32
-rw-r--r--mm/mmap.c13
-rw-r--r--mm/rmap.c56
-rw-r--r--mm/shmem.c4
6 files changed, 155 insertions, 18 deletions
diff --git a/mm/gup.c b/mm/gup.c
index b599526db9f7..018144c4b9ec 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -940,8 +940,6 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
int locked = 0;
long ret = 0;
- VM_BUG_ON(start & ~PAGE_MASK);
- VM_BUG_ON(len != PAGE_ALIGN(len));
end = start + len;
for (nstart = start; nstart < end; nstart = nend) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a813b03021b7..6f99a0f906bb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3103,7 +3103,7 @@ static int is_hugetlb_entry_hwpoisoned(pte_t pte)
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
struct vm_area_struct *vma)
{
- pte_t *src_pte, *dst_pte, entry;
+ pte_t *src_pte, *dst_pte, entry, dst_entry;
struct page *ptepage;
unsigned long addr;
int cow;
@@ -3131,15 +3131,30 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
break;
}
- /* If the pagetables are shared don't copy or take references */
- if (dst_pte == src_pte)
+ /*
+ * If the pagetables are shared don't copy or take references.
+ * dst_pte == src_pte is the common case of src/dest sharing.
+ *
+ * However, src could have 'unshared' and dst shares with
+ * another vma. If dst_pte !none, this implies sharing.
+ * Check here before taking page table lock, and once again
+ * after taking the lock below.
+ */
+ dst_entry = huge_ptep_get(dst_pte);
+ if ((dst_pte == src_pte) || !huge_pte_none(dst_entry))
continue;
dst_ptl = huge_pte_lock(h, dst, dst_pte);
src_ptl = huge_pte_lockptr(h, src, src_pte);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
entry = huge_ptep_get(src_pte);
- if (huge_pte_none(entry)) { /* skip none entry */
+ dst_entry = huge_ptep_get(dst_pte);
+ if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
+ /*
+ * Skip if src entry none. Also, skip in the
+ * unlikely case dst entry !none as this implies
+ * sharing with another vma.
+ */
;
} else if (unlikely(is_hugetlb_entry_migration(entry) ||
is_hugetlb_entry_hwpoisoned(entry))) {
@@ -3537,6 +3552,12 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
return err;
ClearPagePrivate(page);
+ /*
+ * set page dirty so that it will not be removed from cache/file
+ * by non-hugetlbfs specific code paths.
+ */
+ set_page_dirty(page);
+
spin_lock(&inode->i_lock);
inode->i_blocks += blocks_per_huge_page(h);
spin_unlock(&inode->i_lock);
@@ -4195,13 +4216,41 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
/*
* check on proper vm_flags and page table alignment
*/
- if (vma->vm_flags & VM_MAYSHARE &&
- vma->vm_start <= base && end <= vma->vm_end)
+ if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
return true;
return false;
}
/*
+ * Determine if start,end range within vma could be mapped by shared pmd.
+ * If yes, adjust start and end to cover range associated with possible
+ * shared pmd mappings.
+ */
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
+{
+ unsigned long check_addr = *start;
+
+ if (!(vma->vm_flags & VM_MAYSHARE))
+ return;
+
+ for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
+ unsigned long a_start = check_addr & PUD_MASK;
+ unsigned long a_end = a_start + PUD_SIZE;
+
+ /*
+ * If sharing is possible, adjust start/end if necessary.
+ */
+ if (range_in_vma(vma, a_start, a_end)) {
+ if (a_start < *start)
+ *start = a_start;
+ if (a_end > *end)
+ *end = a_end;
+ }
+ }
+}
+
+/*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the
* !shared pmd case because we can allocate the pmd later as well, it makes the
@@ -4297,6 +4346,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{
return 0;
}
+
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+ unsigned long *start, unsigned long *end)
+{
+}
#define want_pmd_share() (0)
#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index b9b2e25342d4..e48fa18e1828 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2011,8 +2011,36 @@ retry_cpuset:
nmask = policy_nodemask(gfp, pol);
if (!nmask || node_isset(hpage_node, *nmask)) {
mpol_cond_put(pol);
- page = __alloc_pages_node(hpage_node,
- gfp | __GFP_THISNODE, order);
+ /*
+ * We cannot invoke reclaim if __GFP_THISNODE
+ * is set. Invoking reclaim with
+ * __GFP_THISNODE set, would cause THP
+ * allocations to trigger heavy swapping
+ * despite there may be tons of free memory
+ * (including potentially plenty of THP
+ * already available in the buddy) on all the
+ * other NUMA nodes.
+ *
+ * At most we could invoke compaction when
+ * __GFP_THISNODE is set (but we would need to
+ * refrain from invoking reclaim even if
+ * compaction returned COMPACT_SKIPPED because
+ * there wasn't not enough memory to succeed
+ * compaction). For now just avoid
+ * __GFP_THISNODE instead of limiting the
+ * allocation path to a strict and single
+ * compaction invocation.
+ *
+ * Supposedly if direct reclaim was enabled by
+ * the caller, the app prefers THP regardless
+ * of the node it comes from so this would be
+ * more desiderable behavior than only
+ * providing THP originated from the local
+ * node in such case.
+ */
+ if (!(gfp & __GFP_DIRECT_RECLAIM))
+ gfp |= __GFP_THISNODE;
+ page = __alloc_pages_node(hpage_node, gfp, order);
goto out;
}
}
diff --git a/mm/mmap.c b/mm/mmap.c
index c9d2061257ac..0617c12aebe4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2854,10 +2854,6 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
pgoff_t pgoff = addr >> PAGE_SHIFT;
int error;
- len = PAGE_ALIGN(len);
- if (!len)
- return addr;
-
flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
@@ -2925,12 +2921,19 @@ out:
return addr;
}
-unsigned long vm_brk(unsigned long addr, unsigned long len)
+unsigned long vm_brk(unsigned long addr, unsigned long request)
{
struct mm_struct *mm = current->mm;
+ unsigned long len;
unsigned long ret;
bool populate;
+ len = PAGE_ALIGN(request);
+ if (len < request)
+ return -ENOMEM;
+ if (!len)
+ return addr;
+
down_write(&mm->mmap_sem);
ret = do_brk(addr, len);
populate = ((mm->def_flags & VM_LOCKED) != 0);
diff --git a/mm/rmap.c b/mm/rmap.c
index effcea83ac4e..58f0f26bdccf 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1324,12 +1324,41 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
pte_t pteval;
spinlock_t *ptl;
int ret = SWAP_AGAIN;
+ unsigned long sh_address;
+ bool pmd_sharing_possible = false;
+ unsigned long spmd_start, spmd_end;
enum ttu_flags flags = (enum ttu_flags)arg;
/* munlock has nothing to gain from examining un-locked vmas */
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
goto out;
+ /*
+ * Only use the range_start/end mmu notifiers if huge pmd sharing
+ * is possible. In the normal case, mmu_notifier_invalidate_page
+ * is sufficient as we only unmap a page. However, if we unshare
+ * a pmd, we will unmap a PUD_SIZE range.
+ */
+ if (PageHuge(page)) {
+ spmd_start = address;
+ spmd_end = spmd_start + vma_mmu_pagesize(vma);
+
+ /*
+ * Check if pmd sharing is possible. If possible, we could
+ * unmap a PUD_SIZE range. spmd_start/spmd_end will be
+ * modified if sharing is possible.
+ */
+ adjust_range_if_pmd_sharing_possible(vma, &spmd_start,
+ &spmd_end);
+ if (spmd_end - spmd_start != vma_mmu_pagesize(vma)) {
+ sh_address = address;
+
+ pmd_sharing_possible = true;
+ mmu_notifier_invalidate_range_start(vma->vm_mm,
+ spmd_start, spmd_end);
+ }
+ }
+
pte = page_check_address(page, mm, address, &ptl, 0);
if (!pte)
goto out;
@@ -1356,6 +1385,30 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
}
}
+ /*
+ * Call huge_pmd_unshare to potentially unshare a huge pmd. Pass
+ * sh_address as it will be modified if unsharing is successful.
+ */
+ if (PageHuge(page) && huge_pmd_unshare(mm, &sh_address, pte)) {
+ /*
+ * huge_pmd_unshare unmapped an entire PMD page. There is
+ * no way of knowing exactly which PMDs may be cached for
+ * this mm, so flush them all. spmd_start/spmd_end cover
+ * this PUD_SIZE range.
+ */
+ flush_cache_range(vma, spmd_start, spmd_end);
+ flush_tlb_range(vma, spmd_start, spmd_end);
+
+ /*
+ * The ref count of the PMD page was dropped which is part
+ * of the way map counting is done for shared PMDs. When
+ * there is no other sharing, huge_pmd_unshare returns false
+ * and we will unmap the actual page and drop map count
+ * to zero.
+ */
+ goto out_unmap;
+ }
+
/* Nuke the page table entry. */
flush_cache_page(vma, address, page_to_pfn(page));
if (should_defer_flush(mm, flags)) {
@@ -1450,6 +1503,9 @@ out_unmap:
if (ret != SWAP_FAIL && ret != SWAP_MLOCK && !(flags & TTU_MUNLOCK))
mmu_notifier_invalidate_page(mm, address);
out:
+ if (pmd_sharing_possible)
+ mmu_notifier_invalidate_range_end(vma->vm_mm,
+ spmd_start, spmd_end);
return ret;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 6318fc7883c4..6397dc2e15bc 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1818,9 +1818,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
mutex_lock(&inode->i_mutex);
/* We're holding i_mutex so we can access i_size directly */
- if (offset < 0)
- offset = -EINVAL;
- else if (offset >= inode->i_size)
+ if (offset < 0 || offset >= inode->i_size)
offset = -ENXIO;
else {
start = offset >> PAGE_CACHE_SHIFT;