aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/mips/mm/dma-default.c2
-rw-r--r--arch/sh/include/uapi/asm/unistd_64.h2
-rw-r--r--drivers/base/memory.c4
-rw-r--r--fs/exofs/inode.c5
-rw-r--r--fs/nfs/objlayout/objio_osd.c5
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--include/linux/kmemleak.h2
-rw-r--r--include/linux/stop_machine.h6
-rw-r--r--init/Kconfig7
-rw-r--r--kernel/stop_machine.c4
-rw-r--r--mm/backing-dev.c19
-rw-r--r--mm/hugetlb.c27
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/page_alloc.c3
-rw-r--r--mm/shmem.c34
-rw-r--r--mm/vmstat.c8
18 files changed, 77 insertions, 62 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 38df53f828e1..9bff63cf326e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2975,6 +2975,7 @@ F: kernel/cpuset.c
CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
M: Johannes Weiner <hannes@cmpxchg.org>
M: Michal Hocko <mhocko@kernel.org>
+M: Vladimir Davydov <vdavydov@virtuozzo.com>
L: cgroups@vger.kernel.org
L: linux-mm@kvack.org
S: Maintained
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index d8117be729a2..730d394ce5f0 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -145,7 +145,7 @@ static void *mips_dma_alloc_coherent(struct device *dev, size_t size,
gfp = massage_gfp_flags(dev, gfp);
- if (IS_ENABLED(CONFIG_DMA_CMA) && !(gfp & GFP_ATOMIC))
+ if (IS_ENABLED(CONFIG_DMA_CMA) && gfpflags_allow_blocking(gfp))
page = dma_alloc_from_contiguous(dev,
count, get_order(size));
if (!page)
diff --git a/arch/sh/include/uapi/asm/unistd_64.h b/arch/sh/include/uapi/asm/unistd_64.h
index e6820c86e8c7..47ebd5b5ed55 100644
--- a/arch/sh/include/uapi/asm/unistd_64.h
+++ b/arch/sh/include/uapi/asm/unistd_64.h
@@ -278,7 +278,7 @@
#define __NR_fsetxattr 256
#define __NR_getxattr 257
#define __NR_lgetxattr 258
-#define __NR_fgetxattr 269
+#define __NR_fgetxattr 259
#define __NR_listxattr 260
#define __NR_llistxattr 261
#define __NR_flistxattr 262
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 2804aed3f416..25425d3f2575 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -303,6 +303,10 @@ static int memory_subsys_offline(struct device *dev)
if (mem->state == MEM_OFFLINE)
return 0;
+ /* Can't offline block with non-present sections */
+ if (mem->section_count != sections_per_block)
+ return -EINVAL;
+
return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
}
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 73c64daa0f55..60f03b78914e 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -592,10 +592,7 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
}
unlock_page(page);
}
- if (PageDirty(page) || PageWriteback(page))
- *uptodate = true;
- else
- *uptodate = PageUptodate(page);
+ *uptodate = PageUptodate(page);
EXOFS_DBGMSG2("index=0x%lx uptodate=%d\n", index, *uptodate);
return page;
} else {
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 5c0c6b58157f..9aebffb40505 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -476,10 +476,7 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
}
unlock_page(page);
}
- if (PageDirty(page) || PageWriteback(page))
- *uptodate = true;
- else
- *uptodate = PageUptodate(page);
+ *uptodate = PageUptodate(page);
dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
return page;
}
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index a03f6f433075..3123408da935 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -367,13 +367,11 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
- status = posix_acl_create(dir, &mode, &default_acl, &acl);
+ status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
if (status) {
mlog_errno(status);
goto leave;
}
- /* update inode->i_mode after mask with "umask". */
- inode->i_mode = mode;
handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
S_ISDIR(mode),
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index d0a1f99e24e3..4894c6888bc6 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -25,7 +25,7 @@
#ifdef CONFIG_DEBUG_KMEMLEAK
-extern void kmemleak_init(void) __ref;
+extern void kmemleak_init(void) __init;
extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
gfp_t gfp) __ref;
extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 0adedca24c5b..0e1b1540597a 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -99,7 +99,7 @@ static inline int try_stop_cpus(const struct cpumask *cpumask,
* grabbing every spinlock (and more). So the "read" side to such a
* lock is anything which disables preemption.
*/
-#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
+#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU)
/**
* stop_machine: freeze the machine on all CPUs and run this function
@@ -118,7 +118,7 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus);
-#else /* CONFIG_STOP_MACHINE && CONFIG_SMP */
+#else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
static inline int stop_machine(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus)
@@ -137,5 +137,5 @@ static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
return stop_machine(fn, data, cpus);
}
-#endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */
+#endif /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
#endif /* _LINUX_STOP_MACHINE */
diff --git a/init/Kconfig b/init/Kconfig
index c24b6f767bf0..235c7a2c0d20 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -2030,13 +2030,6 @@ config INIT_ALL_POSSIBLE
it was better to provide this option than to break all the archs
and have several arch maintainers pursuing me down dark alleys.
-config STOP_MACHINE
- bool
- default y
- depends on (SMP && MODULE_UNLOAD) || HOTPLUG_CPU
- help
- Need stop_machine() primitive.
-
source "block/Kconfig"
config PREEMPT_NOTIFIERS
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 867bc20e1ef1..a3bbaee77c58 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -531,7 +531,7 @@ static int __init cpu_stop_init(void)
}
early_initcall(cpu_stop_init);
-#ifdef CONFIG_STOP_MACHINE
+#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU)
static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
{
@@ -631,4 +631,4 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
return ret ?: done.ret;
}
-#endif /* CONFIG_STOP_MACHINE */
+#endif /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 8ed2ffd963c5..7340353f8aea 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -957,8 +957,9 @@ EXPORT_SYMBOL(congestion_wait);
* jiffies for either a BDI to exit congestion of the given @sync queue
* or a write to complete.
*
- * In the absence of zone congestion, cond_resched() is called to yield
- * the processor if necessary but otherwise does not sleep.
+ * In the absence of zone congestion, a short sleep or a cond_resched is
+ * performed to yield the processor and to allow other subsystems to make
+ * a forward progress.
*
* The return value is 0 if the sleep is for the full timeout. Otherwise,
* it is the number of jiffies that were still remaining when the function
@@ -978,7 +979,19 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
*/
if (atomic_read(&nr_wb_congested[sync]) == 0 ||
!test_bit(ZONE_CONGESTED, &zone->flags)) {
- cond_resched();
+
+ /*
+ * Memory allocation/reclaim might be called from a WQ
+ * context and the current implementation of the WQ
+ * concurrency control doesn't recognize that a particular
+ * WQ is congested if the worker thread is looping without
+ * ever sleeping. Therefore we have to do a short sleep
+ * here rather than calling cond_resched().
+ */
+ if (current->flags & PF_WQ_WORKER)
+ schedule_timeout(1);
+ else
+ cond_resched();
/* In case we scheduled, work out time remaining */
ret = timeout - (jiffies - start);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 827bb02a43a4..ef6963b577fd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -372,8 +372,10 @@ retry_locked:
spin_unlock(&resv->lock);
trg = kmalloc(sizeof(*trg), GFP_KERNEL);
- if (!trg)
+ if (!trg) {
+ kfree(nrg);
return -ENOMEM;
+ }
spin_lock(&resv->lock);
list_add(&trg->link, &resv->region_cache);
@@ -483,8 +485,16 @@ static long region_del(struct resv_map *resv, long f, long t)
retry:
spin_lock(&resv->lock);
list_for_each_entry_safe(rg, trg, head, link) {
- if (rg->to <= f)
+ /*
+ * Skip regions before the range to be deleted. file_region
+ * ranges are normally of the form [from, to). However, there
+ * may be a "placeholder" entry in the map which is of the form
+ * (from, to) with from == to. Check for placeholder entries
+ * at the beginning of the range to be deleted.
+ */
+ if (rg->to <= f && (rg->to != rg->from || rg->to != f))
continue;
+
if (rg->from >= t)
break;
@@ -1886,7 +1896,10 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
page = __alloc_buddy_huge_page_with_mpol(h, vma, addr);
if (!page)
goto out_uncharge_cgroup;
-
+ if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
+ SetPagePrivate(page);
+ h->resv_huge_pages--;
+ }
spin_lock(&hugetlb_lock);
list_move(&page->lru, &h->hugepage_activelist);
/* Fall through */
@@ -3693,12 +3706,12 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
+ } else {
+ ptep = huge_pte_alloc(mm, address, huge_page_size(h));
+ if (!ptep)
+ return VM_FAULT_OOM;
}
- ptep = huge_pte_alloc(mm, address, huge_page_size(h));
- if (!ptep)
- return VM_FAULT_OOM;
-
mapping = vma->vm_file->f_mapping;
idx = vma_hugecache_offset(h, vma, address);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c92a65b2b4ab..e234c21a5e6c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2128,7 +2128,7 @@ done_restock:
*/
do {
if (page_counter_read(&memcg->memory) > memcg->high) {
- current->memcg_nr_pages_over_high += nr_pages;
+ current->memcg_nr_pages_over_high += batch;
set_notify_resume(current);
break;
}
@@ -5512,11 +5512,11 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
* mem_cgroup_replace_page - migrate a charge to another page
* @oldpage: currently charged page
* @newpage: page to transfer the charge to
- * @lrucare: either or both pages might be on the LRU already
*
* Migrate the charge from @oldpage to @newpage.
*
* Both pages must be locked, @newpage->mapping must be set up.
+ * Either or both pages might be on the LRU already.
*/
void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
{
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d13a33918fa2..c12680993ff3 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -608,6 +608,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
continue;
if (unlikely(p->flags & PF_KTHREAD))
continue;
+ if (is_global_init(p))
+ continue;
if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
continue;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 17a3c66639a9..9d666df5ef95 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3647,8 +3647,9 @@ static void show_migration_types(unsigned char type)
{
static const char types[MIGRATE_TYPES] = {
[MIGRATE_UNMOVABLE] = 'U',
- [MIGRATE_RECLAIMABLE] = 'E',
[MIGRATE_MOVABLE] = 'M',
+ [MIGRATE_RECLAIMABLE] = 'E',
+ [MIGRATE_HIGHATOMIC] = 'H',
#ifdef CONFIG_CMA
[MIGRATE_CMA] = 'C',
#endif
diff --git a/mm/shmem.c b/mm/shmem.c
index 9187eee4128b..2afcdbbdb685 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -843,14 +843,14 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
list_add_tail(&info->swaplist, &shmem_swaplist);
if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
- swap_shmem_alloc(swap);
- shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
-
spin_lock(&info->lock);
- info->swapped++;
shmem_recalc_inode(inode);
+ info->swapped++;
spin_unlock(&info->lock);
+ swap_shmem_alloc(swap);
+ shmem_delete_from_page_cache(page, swp_to_radix_entry(swap));
+
mutex_unlock(&shmem_swaplist_mutex);
BUG_ON(page_mapped(page));
swap_writepage(page, wbc);
@@ -1078,7 +1078,7 @@ repeat:
if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
error = -EINVAL;
- goto failed;
+ goto unlock;
}
if (page && sgp == SGP_WRITE)
@@ -1246,11 +1246,15 @@ clear:
/* Perhaps the file has been truncated since we checked */
if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
+ if (alloced) {
+ ClearPageDirty(page);
+ delete_from_page_cache(page);
+ spin_lock(&info->lock);
+ shmem_recalc_inode(inode);
+ spin_unlock(&info->lock);
+ }
error = -EINVAL;
- if (alloced)
- goto trunc;
- else
- goto failed;
+ goto unlock;
}
*pagep = page;
return 0;
@@ -1258,23 +1262,13 @@ clear:
/*
* Error recovery.
*/
-trunc:
- info = SHMEM_I(inode);
- ClearPageDirty(page);
- delete_from_page_cache(page);
- spin_lock(&info->lock);
- info->alloced--;
- inode->i_blocks -= BLOCKS_PER_PAGE;
- spin_unlock(&info->lock);
decused:
- sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo->max_blocks)
percpu_counter_add(&sbinfo->used_blocks, -1);
unacct:
shmem_unacct_blocks(info->flags, 1);
failed:
- if (swap.val && error != -EINVAL &&
- !shmem_confirm_swap(mapping, index, swap))
+ if (swap.val && !shmem_confirm_swap(mapping, index, swap))
error = -EEXIST;
unlock:
if (page) {
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 879a2be23325..0d5712b0206c 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -921,8 +921,8 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
#ifdef CONFIG_PROC_FS
static char * const migratetype_names[MIGRATE_TYPES] = {
"Unmovable",
- "Reclaimable",
"Movable",
+ "Reclaimable",
"HighAtomic",
#ifdef CONFIG_CMA
"CMA",
@@ -1379,6 +1379,7 @@ static const struct file_operations proc_vmstat_file_operations = {
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_SMP
+static struct workqueue_struct *vmstat_wq;
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
int sysctl_stat_interval __read_mostly = HZ;
static cpumask_var_t cpu_stat_off;
@@ -1391,7 +1392,7 @@ static void vmstat_update(struct work_struct *w)
* to occur in the future. Keep on running the
* update worker thread.
*/
- schedule_delayed_work_on(smp_processor_id(),
+ queue_delayed_work_on(smp_processor_id(), vmstat_wq,
this_cpu_ptr(&vmstat_work),
round_jiffies_relative(sysctl_stat_interval));
} else {
@@ -1460,7 +1461,7 @@ static void vmstat_shepherd(struct work_struct *w)
if (need_update(cpu) &&
cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
- schedule_delayed_work_on(cpu,
+ queue_delayed_work_on(cpu, vmstat_wq,
&per_cpu(vmstat_work, cpu), 0);
put_online_cpus();
@@ -1549,6 +1550,7 @@ static int __init setup_vmstat(void)
start_shepherd_timer();
cpu_notifier_register_done();
+ vmstat_wq = alloc_workqueue("vmstat", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
#endif
#ifdef CONFIG_PROC_FS
proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations);