aboutsummaryrefslogtreecommitdiff
path: root/mm/khugepaged.c
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@google.com>2018-12-05 20:11:17 +0100
committerGreg Kroah-Hartman <gregkh@google.com>2018-12-05 20:11:17 +0100
commitb7ec0d723484ef9c99508a3b77ec142351800cca (patch)
treef449c229b711cad3b2816ecd359de26cddabbb65 /mm/khugepaged.c
parent79461484eed9b183dab99faaeab92e1be88e3cd2 (diff)
parentb136f0e9e9d79b8449d99ea701ade1e17a971826 (diff)
Merge 4.9.143 into android-4.9-oASB-2018-12-05_4.9-o
Changes in 4.9.143 mm/huge_memory: rename freeze_page() to unmap_page() mm/huge_memory.c: reorder operations in __split_huge_page_tail() mm/huge_memory: splitting set mapping+index before unfreeze mm/huge_memory: fix lockdep complaint on 32-bit i_size_read() mm/khugepaged: collapse_shmem() stop if punched or truncated shmem: shmem_charge: verify max_block is not exceeded before inode update shmem: introduce shmem_inode_acct_block mm/khugepaged: fix crashes due to misaccounted holes mm/khugepaged: collapse_shmem() remember to clear holes mm/khugepaged: minor reorderings in collapse_shmem() mm/khugepaged: collapse_shmem() without freezing new_page mm/khugepaged: collapse_shmem() do not crash on Compound media: em28xx: Fix use-after-free when disconnecting Revert "wlcore: Add missing PM call for wlcore_cmd_wait_for_event_or_timeout()" net: skb_scrub_packet(): Scrub offload_fwd_mark rapidio/rionet: do not free skb before reading its length s390/qeth: fix length check in SNMP processing usbnet: ipheth: fix potential recvmsg bug and recvmsg bug 2 kvm: mmu: Fix race in emulated page table writes kvm: svm: Ensure an IBPB on all affected CPUs when freeing a vmcb KVM: X86: Fix scan ioapic use-before-initialization xtensa: enable coprocessors that are being flushed xtensa: fix coprocessor context offset definitions Btrfs: ensure path name is null terminated at btrfs_control_ioctl perf/x86/intel: Move branch tracing setup to the Intel-specific source file perf/x86/intel: Add generic branch tracing check to intel_pmu_has_bts() fs: fix lost error code in dio_complete ALSA: wss: Fix invalid snd_free_pages() at error path ALSA: ac97: Fix incorrect bit shift at AC97-SPSA control write ALSA: control: Fix race between adding and removing a user element ALSA: sparc: Fix invalid snd_free_pages() at error path ext2: fix potential use after free dmaengine: at_hdmac: fix memory leak in at_dma_xlate() dmaengine: at_hdmac: fix module unloading btrfs: release metadata before running delayed refs USB: usb-storage: Add new IDs to ums-realtek usb: core: quirks: add RESET_RESUME quirk for Cherry G230 Stream series Revert "usb: dwc3: gadget: skip Set/Clear Halt when invalid" iio:st_magn: Fix enable device after trigger mm: use swp_offset as key in shmem_replace_page() Drivers: hv: vmbus: check the creation_status in vmbus_establish_gpadl() misc: mic/scif: fix copy-paste error in scif_create_remote_lookup efi/libstub: arm: support building with clang ARM: 8766/1: drop no-thumb-interwork in EABI mode ARM: 8767/1: add support for building ARM kernel with clang bus: arm-cci: remove unnecessary unreachable() ARM: trusted_foundations: do not use naked function workqueue: avoid clang warning efi/libstub: Make file I/O chunking x86-specific kbuild: Set KBUILD_CFLAGS before incl. arch Makefile Linux 4.9.143 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Diffstat (limited to 'mm/khugepaged.c')
-rw-r--r--mm/khugepaged.c129
1 files changed, 73 insertions, 56 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 1df37ee996d5..e0cfc3a54b6a 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1286,7 +1286,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* collapse_shmem - collapse small tmpfs/shmem pages into huge one.
*
* Basic scheme is simple, details are more complex:
- * - allocate and freeze a new huge page;
+ * - allocate and lock a new huge page;
* - scan over radix tree replacing old pages the new one
* + swap in pages if necessary;
* + fill in gaps;
@@ -1294,11 +1294,11 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* - if replacing succeed:
* + copy data over;
* + free old pages;
- * + unfreeze huge page;
+ * + unlock huge page;
* - if replacing failed;
* + put all pages back and unfreeze them;
* + restore gaps in the radix-tree;
- * + free huge page;
+ * + unlock and free huge page;
*/
static void collapse_shmem(struct mm_struct *mm,
struct address_space *mapping, pgoff_t start,
@@ -1332,18 +1332,15 @@ static void collapse_shmem(struct mm_struct *mm,
goto out;
}
+ __SetPageLocked(new_page);
+ __SetPageSwapBacked(new_page);
new_page->index = start;
new_page->mapping = mapping;
- __SetPageSwapBacked(new_page);
- __SetPageLocked(new_page);
- BUG_ON(!page_ref_freeze(new_page, 1));
-
/*
- * At this point the new_page is 'frozen' (page_count() is zero), locked
- * and not up-to-date. It's safe to insert it into radix tree, because
- * nobody would be able to map it or use it in other way until we
- * unfreeze it.
+ * At this point the new_page is locked and not up-to-date.
+ * It's safe to insert it into the page cache, because nobody would
+ * be able to map it or use it in another way until we unlock it.
*/
index = start;
@@ -1352,18 +1349,28 @@ static void collapse_shmem(struct mm_struct *mm,
int n = min(iter.index, end) - index;
/*
+ * Stop if extent has been hole-punched, and is now completely
+ * empty (the more obvious i_size_read() check would take an
+ * irq-unsafe seqlock on 32-bit).
+ */
+ if (n >= HPAGE_PMD_NR) {
+ result = SCAN_TRUNCATED;
+ goto tree_locked;
+ }
+
+ /*
* Handle holes in the radix tree: charge it from shmem and
* insert relevant subpage of new_page into the radix-tree.
*/
if (n && !shmem_charge(mapping->host, n)) {
result = SCAN_FAIL;
- break;
+ goto tree_locked;
}
- nr_none += n;
for (; index < min(iter.index, end); index++) {
radix_tree_insert(&mapping->page_tree, index,
new_page + (index % HPAGE_PMD_NR));
}
+ nr_none += n;
/* We are done. */
if (index >= end)
@@ -1379,12 +1386,12 @@ static void collapse_shmem(struct mm_struct *mm,
result = SCAN_FAIL;
goto tree_unlocked;
}
- spin_lock_irq(&mapping->tree_lock);
} else if (trylock_page(page)) {
get_page(page);
+ spin_unlock_irq(&mapping->tree_lock);
} else {
result = SCAN_PAGE_LOCK;
- break;
+ goto tree_locked;
}
/*
@@ -1393,17 +1400,24 @@ static void collapse_shmem(struct mm_struct *mm,
*/
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageUptodate(page), page);
- VM_BUG_ON_PAGE(PageTransCompound(page), page);
+
+ /*
+ * If file was truncated then extended, or hole-punched, before
+ * we locked the first page, then a THP might be there already.
+ */
+ if (PageTransCompound(page)) {
+ result = SCAN_PAGE_COMPOUND;
+ goto out_unlock;
+ }
if (page_mapping(page) != mapping) {
result = SCAN_TRUNCATED;
goto out_unlock;
}
- spin_unlock_irq(&mapping->tree_lock);
if (isolate_lru_page(page)) {
result = SCAN_DEL_PAGE_LRU;
- goto out_isolate_failed;
+ goto out_unlock;
}
if (page_mapped(page))
@@ -1425,7 +1439,9 @@ static void collapse_shmem(struct mm_struct *mm,
*/
if (!page_ref_freeze(page, 3)) {
result = SCAN_PAGE_COUNT;
- goto out_lru;
+ spin_unlock_irq(&mapping->tree_lock);
+ putback_lru_page(page);
+ goto out_unlock;
}
/*
@@ -1441,17 +1457,10 @@ static void collapse_shmem(struct mm_struct *mm,
slot = radix_tree_iter_next(&iter);
index++;
continue;
-out_lru:
- spin_unlock_irq(&mapping->tree_lock);
- putback_lru_page(page);
-out_isolate_failed:
- unlock_page(page);
- put_page(page);
- goto tree_unlocked;
out_unlock:
unlock_page(page);
put_page(page);
- break;
+ goto tree_unlocked;
}
/*
@@ -1459,14 +1468,18 @@ out_unlock:
* This code only triggers if there's nothing in radix tree
* beyond 'end'.
*/
- if (result == SCAN_SUCCEED && index < end) {
+ if (index < end) {
int n = end - index;
+ /* Stop if extent has been truncated, and is now empty */
+ if (n >= HPAGE_PMD_NR) {
+ result = SCAN_TRUNCATED;
+ goto tree_locked;
+ }
if (!shmem_charge(mapping->host, n)) {
result = SCAN_FAIL;
goto tree_locked;
}
-
for (; index < end; index++) {
radix_tree_insert(&mapping->page_tree, index,
new_page + (index % HPAGE_PMD_NR));
@@ -1474,57 +1487,62 @@ out_unlock:
nr_none += n;
}
+ __inc_node_page_state(new_page, NR_SHMEM_THPS);
+ if (nr_none) {
+ struct zone *zone = page_zone(new_page);
+
+ __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
+ __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+ }
+
tree_locked:
spin_unlock_irq(&mapping->tree_lock);
tree_unlocked:
if (result == SCAN_SUCCEED) {
- unsigned long flags;
- struct zone *zone = page_zone(new_page);
-
/*
* Replacing old pages with new one has succeed, now we need to
* copy the content and free old pages.
*/
+ index = start;
list_for_each_entry_safe(page, tmp, &pagelist, lru) {
+ while (index < page->index) {
+ clear_highpage(new_page + (index % HPAGE_PMD_NR));
+ index++;
+ }
copy_highpage(new_page + (page->index % HPAGE_PMD_NR),
page);
list_del(&page->lru);
- unlock_page(page);
- page_ref_unfreeze(page, 1);
page->mapping = NULL;
+ page_ref_unfreeze(page, 1);
ClearPageActive(page);
ClearPageUnevictable(page);
+ unlock_page(page);
put_page(page);
+ index++;
}
-
- local_irq_save(flags);
- __inc_node_page_state(new_page, NR_SHMEM_THPS);
- if (nr_none) {
- __mod_node_page_state(zone->zone_pgdat, NR_FILE_PAGES, nr_none);
- __mod_node_page_state(zone->zone_pgdat, NR_SHMEM, nr_none);
+ while (index < end) {
+ clear_highpage(new_page + (index % HPAGE_PMD_NR));
+ index++;
}
- local_irq_restore(flags);
- /*
- * Remove pte page tables, so we can re-faulti
- * the page as huge.
- */
- retract_page_tables(mapping, start);
-
- /* Everything is ready, let's unfreeze the new_page */
- set_page_dirty(new_page);
SetPageUptodate(new_page);
- page_ref_unfreeze(new_page, HPAGE_PMD_NR);
+ page_ref_add(new_page, HPAGE_PMD_NR - 1);
+ set_page_dirty(new_page);
mem_cgroup_commit_charge(new_page, memcg, false, true);
lru_cache_add_anon(new_page);
- unlock_page(new_page);
+ /*
+ * Remove pte page tables, so we can re-fault the page as huge.
+ */
+ retract_page_tables(mapping, start);
*hpage = NULL;
} else {
/* Something went wrong: rollback changes to the radix-tree */
- shmem_uncharge(mapping->host, nr_none);
spin_lock_irq(&mapping->tree_lock);
+ mapping->nrpages -= nr_none;
+ shmem_uncharge(mapping->host, nr_none);
+
radix_tree_for_each_slot(slot, &mapping->page_tree, &iter,
start) {
if (iter.index >= end)
@@ -1549,20 +1567,19 @@ tree_unlocked:
page_ref_unfreeze(page, 2);
radix_tree_replace_slot(slot, page);
spin_unlock_irq(&mapping->tree_lock);
- putback_lru_page(page);
unlock_page(page);
+ putback_lru_page(page);
spin_lock_irq(&mapping->tree_lock);
slot = radix_tree_iter_next(&iter);
}
VM_BUG_ON(nr_none);
spin_unlock_irq(&mapping->tree_lock);
- /* Unfreeze new_page, caller would take care about freeing it */
- page_ref_unfreeze(new_page, 1);
mem_cgroup_cancel_charge(new_page, memcg, true);
- unlock_page(new_page);
new_page->mapping = NULL;
}
+
+ unlock_page(new_page);
out:
VM_BUG_ON(!list_empty(&pagelist));
/* TODO: tracepoints */