locking/lockdep: Rework FS_RECLAIM annotation
A while ago someone, and I cannot find the email just now, asked if we
could not implement the RECLAIM_FS inversion stuff with a 'fake' lock
like we use for other things like workqueues etc. I think this should
be possible which allows reducing the 'irq' states and will reduce the
amount of __bfs() lookups we do.
Removing the 1 IRQ state results in 4 less __bfs() walks per
dependency, improving lockdep performance. And by moving this
annotation out of the lockdep code it becomes easier for the mm people
to extend.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Byungchul Park <byungchul.park@lge.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Nikolay Borisov <nborisov@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: boqun.feng@gmail.com
Cc: iamjoonsoo.kim@lge.com
Cc: kernel-team@lge.com
Cc: kirill@shutemov.name
Cc: npiggin@gmail.com
Cc: walken@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fc32aa8..c20d896 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -66,6 +66,7 @@
#include <linux/kthread.h>
#include <linux/memcontrol.h>
#include <linux/ftrace.h>
+#include <linux/lockdep.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -3490,6 +3491,47 @@
}
#endif /* CONFIG_COMPACTION */
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map __fs_reclaim_map =
+ STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map);
+
+static bool __need_fs_reclaim(gfp_t gfp_mask)
+{
+ gfp_mask = current_gfp_context(gfp_mask);
+
+ /* no reclaim without waiting on it */
+ if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
+ return false;
+
+ /* this guy won't enter reclaim */
+ if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
+ return false;
+
+ /* We're only interested __GFP_FS allocations for now */
+ if (!(gfp_mask & __GFP_FS))
+ return false;
+
+ if (gfp_mask & __GFP_NOLOCKDEP)
+ return false;
+
+ return true;
+}
+
+void fs_reclaim_acquire(gfp_t gfp_mask)
+{
+ if (__need_fs_reclaim(gfp_mask))
+ lock_map_acquire(&__fs_reclaim_map);
+}
+EXPORT_SYMBOL_GPL(fs_reclaim_acquire);
+
+void fs_reclaim_release(gfp_t gfp_mask)
+{
+ if (__need_fs_reclaim(gfp_mask))
+ lock_map_release(&__fs_reclaim_map);
+}
+EXPORT_SYMBOL_GPL(fs_reclaim_release);
+#endif
+
/* Perform direct synchronous page reclaim */
static int
__perform_reclaim(gfp_t gfp_mask, unsigned int order,
@@ -3504,7 +3546,7 @@
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
noreclaim_flag = memalloc_noreclaim_save();
- lockdep_set_current_reclaim_state(gfp_mask);
+ fs_reclaim_acquire(gfp_mask);
reclaim_state.reclaimed_slab = 0;
current->reclaim_state = &reclaim_state;
@@ -3512,7 +3554,7 @@
ac->nodemask);
current->reclaim_state = NULL;
- lockdep_clear_current_reclaim_state();
+ fs_reclaim_release(gfp_mask);
memalloc_noreclaim_restore(noreclaim_flag);
cond_resched();
@@ -4041,7 +4083,8 @@
*alloc_flags |= ALLOC_CPUSET;
}
- lockdep_trace_alloc(gfp_mask);
+ fs_reclaim_acquire(gfp_mask);
+ fs_reclaim_release(gfp_mask);
might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
diff --git a/mm/slab.h b/mm/slab.h
index 6885e11..0733628 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -43,6 +43,7 @@
#include <linux/kasan.h>
#include <linux/kmemleak.h>
#include <linux/random.h>
+#include <linux/sched/mm.h>
/*
* State of the slab allocator.
@@ -412,7 +413,10 @@
gfp_t flags)
{
flags &= gfp_allowed_mask;
- lockdep_trace_alloc(flags);
+
+ fs_reclaim_acquire(flags);
+ fs_reclaim_release(flags);
+
might_sleep_if(gfpflags_allow_blocking(flags));
if (should_failslab(s, flags))
diff --git a/mm/slob.c b/mm/slob.c
index 1bae78d..a8bd6fa 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -432,7 +432,8 @@
gfp &= gfp_allowed_mask;
- lockdep_trace_alloc(gfp);
+ fs_reclaim_acquire(gfp);
+ fs_reclaim_release(gfp);
if (size < PAGE_SIZE - align) {
if (!size)
@@ -538,7 +539,8 @@
flags &= gfp_allowed_mask;
- lockdep_trace_alloc(flags);
+ fs_reclaim_acquire(flags);
+ fs_reclaim_release(flags);
if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a1af041..f957afe 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3525,8 +3525,6 @@
};
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
- lockdep_set_current_reclaim_state(GFP_KERNEL);
-
if (!cpumask_empty(cpumask))
set_cpus_allowed_ptr(tsk, cpumask);
current->reclaim_state = &reclaim_state;
@@ -3585,14 +3583,15 @@
*/
trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx,
alloc_order);
+ fs_reclaim_acquire(GFP_KERNEL);
reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
+ fs_reclaim_release(GFP_KERNEL);
if (reclaim_order < alloc_order)
goto kswapd_try_sleep;
}
tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
current->reclaim_state = NULL;
- lockdep_clear_current_reclaim_state();
return 0;
}
@@ -3655,14 +3654,14 @@
unsigned int noreclaim_flag;
noreclaim_flag = memalloc_noreclaim_save();
- lockdep_set_current_reclaim_state(sc.gfp_mask);
+ fs_reclaim_acquire(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
p->reclaim_state = NULL;
- lockdep_clear_current_reclaim_state();
+ fs_reclaim_release(sc.gfp_mask);
memalloc_noreclaim_restore(noreclaim_flag);
return nr_reclaimed;
@@ -3847,7 +3846,7 @@
*/
noreclaim_flag = memalloc_noreclaim_save();
p->flags |= PF_SWAPWRITE;
- lockdep_set_current_reclaim_state(sc.gfp_mask);
+ fs_reclaim_acquire(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
@@ -3862,9 +3861,9 @@
}
p->reclaim_state = NULL;
+ fs_reclaim_release(gfp_mask);
current->flags &= ~PF_SWAPWRITE;
memalloc_noreclaim_restore(noreclaim_flag);
- lockdep_clear_current_reclaim_state();
return sc.nr_reclaimed >= nr_pages;
}