diff options
Diffstat (limited to 'arch')
139 files changed, 940 insertions, 493 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index ecd8c3f5ea1b..9772b600175e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -6,6 +6,7 @@ config OPROFILE tristate "OProfile system profiling" depends on PROFILING depends on HAVE_OPROFILE + depends on !PREEMPT_RT_FULL select RING_BUFFER select RING_BUFFER_ALLOW_SWAP help @@ -49,6 +50,7 @@ config KPROBES config JUMP_LABEL bool "Optimize very unlikely/likely branches" depends on HAVE_ARCH_JUMP_LABEL + depends on (!INTERRUPT_OFF_HIST && !PREEMPT_OFF_HIST && !WAKEUP_LATENCY_HIST && !MISSED_TIMER_OFFSETS_HIST) help This option enables a transparent branch optimization that makes certain almost-always-true or almost-always-false branch diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 9d0ac091a52a..4a905bd667e2 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -23,8 +23,7 @@ #include <linux/smp.h> #include <linux/interrupt.h> #include <linux/module.h> - -#include <asm/uaccess.h> +#include <linux/uaccess.h> extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *); @@ -107,7 +106,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, /* If we're in an interrupt context, or have no user context, we must not take the fault. */ - if (!mm || in_atomic()) + if (!mm || faulthandler_disabled()) goto no_context; #ifdef CONFIG_ALPHA_LARGE_VMALLOC diff --git a/arch/arc/include/asm/futex.h b/arch/arc/include/asm/futex.h index 4dc64ddebece..05b5aaf5b0f9 100644 --- a/arch/arc/include/asm/futex.h +++ b/arch/arc/include/asm/futex.h @@ -53,7 +53,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - pagefault_disable(); /* implies preempt_disable() */ + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -75,7 +75,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) ret = -ENOSYS; } - pagefault_enable(); /* subsumes preempt_enable() */ + pagefault_enable(); if (!ret) { switch (cmp) { @@ -104,7 +104,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) return ret; } -/* Compare-xchg with preemption disabled. +/* Compare-xchg with pagefaults disabled. * Notes: * -Best-Effort: Exchg happens only if compare succeeds. * If compare fails, returns; leaving retry/looping to upper layers @@ -121,7 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - pagefault_disable(); /* implies preempt_disable() */ + pagefault_disable(); /* TBD : can use llock/scond */ __asm__ __volatile__( @@ -142,7 +142,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, : "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT) : "cc", "memory"); - pagefault_enable(); /* subsumes preempt_enable() */ + pagefault_enable(); *uval = val; return val; diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 6a2e006cbcce..d948e4e9d89c 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -86,7 +86,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 32909b4ef959..f7f5b057e505 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -31,7 +31,7 @@ config ARM select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 - select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL + select HAVE_ARCH_JUMP_LABEL if (!XIP_KERNEL && !PREEMPT_RT_BASE) select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK @@ -67,6 +67,7 @@ config ARM select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_LAZY select HAVE_RCU_TABLE_FREE if (SMP && ARM_LPAE) select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h index abb2c3769b01..2386e9745ba4 100644 --- a/arch/arm/include/asm/cmpxchg.h +++ b/arch/arm/include/asm/cmpxchg.h @@ -129,6 +129,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size #else /* min ARCH >= ARMv6 */ +#define __HAVE_ARCH_CMPXCHG 1 + extern void __bad_cmpxchg(volatile void *ptr, int size); /* diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 4e78065a16aa..5eed82809d82 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -93,6 +93,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; + preempt_disable(); __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" "1: " TUSER(ldr) " %1, [%4]\n" " teq %1, %2\n" @@ -104,6 +105,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "cc", "memory"); *uval = val; + preempt_enable(); + return ret; } @@ -124,7 +127,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - pagefault_disable(); /* implies preempt_disable() */ +#ifndef CONFIG_SMP + preempt_disable(); +#endif + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -146,7 +152,10 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) ret = -ENOSYS; } - pagefault_enable(); /* subsumes preempt_enable() */ + pagefault_enable(); +#ifndef CONFIG_SMP + preempt_enable(); +#endif if (!ret) { switch (cmp) { diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index c99e259469f7..f3e3d800c407 100644 --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h @@ -3,6 +3,13 @@ #include <linux/thread_info.h> +#if defined CONFIG_PREEMPT_RT_FULL && defined CONFIG_HIGHMEM +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p); +#else +static inline void +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } +#endif + /* * For v7 SMP cores running a preemptible kernel we may be pre-empted * during a TLB maintenance operation, so execute an inner-shareable dsb @@ -22,6 +29,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info #define switch_to(prev,next,last) \ do { \ + switch_kmaps(prev, next); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ } while (0) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index bd32eded3e50..b5a616376f60 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -50,6 +50,7 @@ struct cpu_context_save { struct thread_info { unsigned long flags; /* low level flags */ int preempt_count; /* 0 => preemptable, <0 => bug */ + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ __u32 cpu; /* cpu */ @@ -147,6 +148,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ +#define TIF_NEED_RESCHED_LAZY 3 #define TIF_UPROBE 7 #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 @@ -160,6 +162,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp __user *, #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 871b8267d211..4dbe70de7318 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -65,6 +65,7 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 570306c49406..797a13d959b7 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -208,11 +208,18 @@ __irq_svc: #ifdef CONFIG_PREEMPT get_thread_info tsk ldr r8, [tsk, #TI_PREEMPT] @ get preempt count - ldr r0, [tsk, #TI_FLAGS] @ get flags teq r8, #0 @ if preempt count != 0 + bne 1f @ return from exeption + ldr r0, [tsk, #TI_FLAGS] @ get flags + tst r0, #_TIF_NEED_RESCHED @ if NEED_RESCHED is set + blne svc_preempt @ preempt! + + ldr r8, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count + teq r8, #0 @ if preempt lazy count != 0 movne r0, #0 @ force flags to 0 - tst r0, #_TIF_NEED_RESCHED + tst r0, #_TIF_NEED_RESCHED_LAZY blne svc_preempt +1: #endif svc_exit r5, irq = 1 @ return from exception @@ -227,8 +234,14 @@ svc_preempt: 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED + bne 1b + tst r0, #_TIF_NEED_RESCHED_LAZY reteq r8 @ go again - b 1b + ldr r0, [tsk, #TI_PREEMPT_LAZY] @ get preempt lazy count + teq r0, #0 @ if preempt lazy count != 0 + beq 1b + ret r8 @ go again + #endif __und_fault: diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index f192a2a41719..649247ac00e6 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -290,6 +290,30 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) } #ifdef CONFIG_MMU +/* + * CONFIG_SPLIT_PTLOCK_CPUS results in a page->ptl lock. If the lock is not + * initialized by pgtable_page_ctor() then a coredump of the vector page will + * fail. + */ +static int __init vectors_user_mapping_init_page(void) +{ + struct page *page; + unsigned long addr = 0xffff0000; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + page = pmd_page(*(pmd)); + + pgtable_page_ctor(page); + + return 0; +} +late_initcall(vectors_user_mapping_init_page); + #ifdef CONFIG_KUSER_HELPERS /* * The vectors page is always readable from user space for the diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 586eef26203d..25bd12ef0b36 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -568,7 +568,8 @@ asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) { do { - if (likely(thread_flags & _TIF_NEED_RESCHED)) { + if (likely(thread_flags & (_TIF_NEED_RESCHED | + _TIF_NEED_RESCHED_LAZY))) { schedule(); } else { if (unlikely(!user_mode(regs))) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index f11d82527076..e561aef093c7 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -213,8 +213,6 @@ int __cpu_disable(void) flush_cache_louis(); local_flush_tlb_all(); - clear_tasks_mm_cpumask(cpu); - return 0; } @@ -230,6 +228,9 @@ void __cpu_die(unsigned int cpu) pr_err("CPU%u: cpu didn't die\n", cpu); return; } + + clear_tasks_mm_cpumask(cpu); + pr_notice("CPU%u: shutdown\n", cpu); /* diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index 0bee233fef9a..314cfb232a63 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -93,7 +93,7 @@ extern const struct unwind_idx __start_unwind_idx[]; static const struct unwind_idx *__origin_unwind_idx; extern const struct unwind_idx __stop_unwind_idx[]; -static DEFINE_SPINLOCK(unwind_lock); +static DEFINE_RAW_SPINLOCK(unwind_lock); static LIST_HEAD(unwind_tables); /* Convert a prel31 symbol to an absolute address */ @@ -201,7 +201,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) /* module unwind tables */ struct unwind_table *table; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_for_each_entry(table, &unwind_tables, list) { if (addr >= table->begin_addr && addr < table->end_addr) { @@ -213,7 +213,7 @@ static const struct unwind_idx *unwind_find_idx(unsigned long addr) break; } } - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); } pr_debug("%s: idx = %p\n", __func__, idx); @@ -529,9 +529,9 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, tab->begin_addr = text_addr; tab->end_addr = text_addr + text_size; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_add_tail(&tab->list, &unwind_tables); - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); return tab; } @@ -543,9 +543,9 @@ void unwind_table_del(struct unwind_table *tab) if (!tab) return; - spin_lock_irqsave(&unwind_lock, flags); + raw_spin_lock_irqsave(&unwind_lock, flags); list_del(&tab->list); - spin_unlock_irqrestore(&unwind_lock, flags); + raw_spin_unlock_irqrestore(&unwind_lock, flags); kfree(tab); } diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 69dbe8679c5f..1b8fcb9d75dc 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -477,9 +477,9 @@ bool kvm_arch_intc_initialized(struct kvm *kvm) static void vcpu_pause(struct kvm_vcpu *vcpu) { - wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); + struct swait_head *wq = kvm_arch_vcpu_wq(vcpu); - wait_event_interruptible(*wq, !vcpu->arch.pause); + swait_event_interruptible(*wq, !vcpu->arch.pause); } static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index ad6f6424f1d1..ddc0defe5548 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -70,7 +70,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { struct kvm *kvm = source_vcpu->kvm; struct kvm_vcpu *vcpu = NULL; - wait_queue_head_t *wq; + struct swait_head *wq; unsigned long cpu_id; unsigned long context_id; phys_addr_t target_pc; @@ -119,7 +119,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) smp_mb(); /* Make sure the above is visible */ wq = kvm_arch_vcpu_wq(vcpu); - wake_up_interruptible(wq); + swait_wake_interruptible(wq); return PSCI_RET_SUCCESS; } diff --git a/arch/arm/mach-at91/at91rm9200.c b/arch/arm/mach-at91/at91rm9200.c index eaf58f88ef5d..8d3cb458a99c 100644 --- a/arch/arm/mach-at91/at91rm9200.c +++ b/arch/arm/mach-at91/at91rm9200.c @@ -13,7 +13,6 @@ #include <linux/of_platform.h> #include <asm/mach/arch.h> -#include <asm/system_misc.h> #include "generic.h" #include "soc.h" @@ -34,7 +33,6 @@ static void __init at91rm9200_dt_device_init(void) of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev); - arm_pm_idle = at91rm9200_idle; at91rm9200_pm_init(); } diff --git a/arch/arm/mach-at91/at91sam9.c b/arch/arm/mach-at91/at91sam9.c index e47a2093a0e7..d2bede665a1b 100644 --- a/arch/arm/mach-at91/at91sam9.c +++ b/arch/arm/mach-at91/at91sam9.c @@ -62,8 +62,6 @@ static void __init at91sam9_common_init(void) soc_dev = soc_device_to_device(soc); of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev); - - arm_pm_idle = at91sam9_idle; } static void __init at91sam9_dt_device_init(void) diff --git a/arch/arm/mach-at91/generic.h b/arch/arm/mach-at91/generic.h index b0fa7dc7286d..28ca57a2060f 100644 --- a/arch/arm/mach-at91/generic.h +++ b/arch/arm/mach-at91/generic.h @@ -11,27 +11,18 @@ #ifndef _AT91_GENERIC_H #define _AT91_GENERIC_H -#include <linux/of.h> -#include <linux/reboot.h> - - /* Map io */ -extern void __init at91_map_io(void); -extern void __init at91_alt_map_io(void); - -/* idle */ -extern void at91rm9200_idle(void); -extern void at91sam9_idle(void); - #ifdef CONFIG_PM extern void __init at91rm9200_pm_init(void); extern void __init at91sam9260_pm_init(void); extern void __init at91sam9g45_pm_init(void); extern void __init at91sam9x5_pm_init(void); +extern void __init sama5_pm_init(void); #else static inline void __init at91rm9200_pm_init(void) { } static inline void __init at91sam9260_pm_init(void) { } static inline void __init at91sam9g45_pm_init(void) { } static inline void __init at91sam9x5_pm_init(void) { } +static inline void __init sama5_pm_init(void) { } #endif #endif /* _AT91_GENERIC_H */ diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 5062699cbb12..3be82cf983dd 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -31,10 +31,13 @@ #include <asm/mach/irq.h> #include <asm/fncpy.h> #include <asm/cacheflush.h> +#include <asm/system_misc.h> #include "generic.h" #include "pm.h" +static void __iomem *pmc; + /* * FIXME: this is needed to communicate between the pinctrl driver and * the PM implementation in the machine. Possibly part of the PM @@ -85,7 +88,7 @@ static int at91_pm_verify_clocks(void) unsigned long scsr; int i; - scsr = at91_pmc_read(AT91_PMC_SCSR); + scsr = readl(pmc + AT91_PMC_SCSR); /* USB must not be using PLLB */ if ((scsr & at91_pm_data.uhp_udp_mask) != 0) { @@ -99,8 +102,7 @@ static int at91_pm_verify_clocks(void) if ((scsr & (AT91_PMC_PCK0 << i)) == 0) continue; - - css = at91_pmc_read(AT91_PMC_PCKR(i)) & AT91_PMC_CSS; + css = readl(pmc + AT91_PMC_PCKR(i)) & AT91_PMC_CSS; if (css != AT91_PMC_CSS_SLOW) { pr_err("AT91: PM - Suspend-to-RAM with PCK%d src %d\n", i, css); return 0; @@ -143,8 +145,8 @@ static void at91_pm_suspend(suspend_state_t state) flush_cache_all(); outer_disable(); - at91_suspend_sram_fn(at91_pmc_base, at91_ramc_base[0], - at91_ramc_base[1], pm_data); + at91_suspend_sram_fn(pmc, at91_ramc_base[0], + at91_ramc_base[1], pm_data); outer_resume(); } @@ -348,6 +350,21 @@ static __init void at91_dt_ramc(void) at91_pm_set_standby(standby); } +void at91rm9200_idle(void) +{ + /* + * Disable the processor clock. The processor will be automatically + * re-enabled by an interrupt or by a reset. + */ + writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR); +} + +void at91sam9_idle(void) +{ + writel(AT91_PMC_PCK, pmc + AT91_PMC_SCDR); + cpu_do_idle(); +} + static void __init at91_pm_sram_init(void) { struct gen_pool *sram_pool; @@ -394,13 +411,36 @@ static void __init at91_pm_sram_init(void) &at91_pm_suspend_in_sram, at91_pm_suspend_in_sram_sz); } -static void __init at91_pm_init(void) +static const struct of_device_id atmel_pmc_ids[] __initconst = { + { .compatible = "atmel,at91rm9200-pmc" }, + { .compatible = "atmel,at91sam9260-pmc" }, + { .compatible = "atmel,at91sam9g45-pmc" }, + { .compatible = "atmel,at91sam9n12-pmc" }, + { .compatible = "atmel,at91sam9x5-pmc" }, + { .compatible = "atmel,sama5d3-pmc" }, + { .compatible = "atmel,sama5d2-pmc" }, + { /* sentinel */ }, +}; + +static void __init at91_pm_init(void (*pm_idle)(void)) { - at91_pm_sram_init(); + struct device_node *pmc_np; if (at91_cpuidle_device.dev.platform_data) platform_device_register(&at91_cpuidle_device); + pmc_np = of_find_matching_node(NULL, atmel_pmc_ids); + pmc = of_iomap(pmc_np, 0); + if (!pmc) { + pr_err("AT91: PM not supported, PMC not found\n"); + return; + } + + if (pm_idle) + arm_pm_idle = pm_idle; + + at91_pm_sram_init(); + if (at91_suspend_sram_fn) suspend_set_ops(&at91_pm_ops); else @@ -419,7 +459,7 @@ void __init at91rm9200_pm_init(void) at91_pm_data.uhp_udp_mask = AT91RM9200_PMC_UHP | AT91RM9200_PMC_UDP; at91_pm_data.memctrl = AT91_MEMCTRL_MC; - at91_pm_init(); + at91_pm_init(at91rm9200_idle); } void __init at91sam9260_pm_init(void) @@ -427,7 +467,7 @@ void __init at91sam9260_pm_init(void) at91_dt_ramc(); at91_pm_data.memctrl = AT91_MEMCTRL_SDRAMC; at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP; - return at91_pm_init(); + at91_pm_init(at91sam9_idle); } void __init at91sam9g45_pm_init(void) @@ -435,7 +475,7 @@ void __init at91sam9g45_pm_init(void) at91_dt_ramc(); at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP; at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR; - return at91_pm_init(); + at91_pm_init(at91sam9_idle); } void __init at91sam9x5_pm_init(void) @@ -443,5 +483,13 @@ void __init at91sam9x5_pm_init(void) at91_dt_ramc(); at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP; at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR; - return at91_pm_init(); + at91_pm_init(at91sam9_idle); +} + +void __init sama5_pm_init(void) +{ + at91_dt_ramc(); + at91_pm_data.uhp_udp_mask = AT91SAM926x_PMC_UHP | AT91SAM926x_PMC_UDP; + at91_pm_data.memctrl = AT91_MEMCTRL_DDRSDR; + at91_pm_init(NULL); } diff --git a/arch/arm/mach-at91/sama5.c b/arch/arm/mach-at91/sama5.c index 41d829d8e7d5..3755da6decf5 100644 --- a/arch/arm/mach-at91/sama5.c +++ b/arch/arm/mach-at91/sama5.c @@ -49,7 +49,7 @@ static void __init sama5_dt_device_init(void) soc_dev = soc_device_to_device(soc); of_platform_populate(NULL, of_default_bus_match_table, NULL, soc_dev); - at91sam9x5_pm_init(); + sama5_pm_init(); } static const char *sama5_dt_board_compat[] __initconst = { diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index a825bca2a2b6..4619e228df41 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -231,7 +231,7 @@ static void __iomem *scu_base_addr(void) return (void __iomem *)(S5P_VA_SCU); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void exynos_secondary_init(unsigned int cpu) { @@ -244,8 +244,8 @@ static void exynos_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -259,7 +259,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -286,7 +286,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) if (timeout == 0) { printk(KERN_ERR "cpu1 power enable failed"); - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return -ETIMEDOUT; } } @@ -342,7 +342,7 @@ static int exynos_boot_secondary(unsigned int cpu, struct task_struct *idle) * calibrations, then wait for it to finish */ fail: - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? ret : 0; } diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c index 280f3f14f77c..bc2ed95c0e62 100644 --- a/arch/arm/mach-hisi/platmcpm.c +++ b/arch/arm/mach-hisi/platmcpm.c @@ -57,7 +57,7 @@ static void __iomem *sysctrl, *fabric; static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER]; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static u32 fabric_phys_addr; /* * [0]: bootwrapper physical address @@ -104,7 +104,7 @@ static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster) if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) return -EINVAL; - spin_lock_irq(&boot_lock); + raw_spin_lock_irq(&boot_lock); if (hip04_cpu_table[cluster][cpu]) goto out; @@ -133,7 +133,7 @@ static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster) udelay(20); out: hip04_cpu_table[cluster][cpu]++; - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); return 0; } @@ -149,7 +149,7 @@ static void hip04_mcpm_power_down(void) __mcpm_cpu_going_down(cpu, cluster); - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); hip04_cpu_table[cluster][cpu]--; if (hip04_cpu_table[cluster][cpu] == 1) { @@ -162,7 +162,7 @@ static void hip04_mcpm_power_down(void) last_man = hip04_cluster_is_down(cluster); if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); /* Since it's Cortex A15, disable L2 prefetching. */ asm volatile( "mcr p15, 1, %0, c15, c0, 3 \n\t" @@ -173,7 +173,7 @@ static void hip04_mcpm_power_down(void) hip04_set_snoop_filter(cluster, 0); __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); } else { - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); v7_exit_coherency_flush(louis); } @@ -192,7 +192,7 @@ static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster) cpu >= HIP04_MAX_CPUS_PER_CLUSTER); count = TIMEOUT_MSEC / POLL_MSEC; - spin_lock_irq(&boot_lock); + raw_spin_lock_irq(&boot_lock); for (tries = 0; tries < count; tries++) { if (hip04_cpu_table[cluster][cpu]) { ret = -EBUSY; @@ -202,10 +202,10 @@ static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster) data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); if (data & CORE_WFI_STATUS(cpu)) break; - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); /* Wait for clean L2 when the whole cluster is down. */ msleep(POLL_MSEC); - spin_lock_irq(&boot_lock); + raw_spin_lock_irq(&boot_lock); } if (tries >= count) goto err; @@ -220,10 +220,10 @@ static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster) } if (tries >= count) goto err; - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); return 0; err: - spin_unlock_irq(&boot_lock); + raw_spin_unlock_irq(&boot_lock); return ret; } @@ -235,10 +235,10 @@ static void hip04_mcpm_powered_up(void) cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); if (!hip04_cpu_table[cluster][cpu]) hip04_cpu_table[cluster][cpu] = 1; - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); } static void __naked hip04_mcpm_power_up_setup(unsigned int affinity_level) diff --git a/arch/arm/mach-omap2/gpio.c b/arch/arm/mach-omap2/gpio.c index 7a577145b68b..689a1af47c80 100644 --- a/arch/arm/mach-omap2/gpio.c +++ b/arch/arm/mach-omap2/gpio.c @@ -130,7 +130,6 @@ static int __init omap2_gpio_dev_init(struct omap_hwmod *oh, void *unused) } pwrdm = omap_hwmod_get_pwrdm(oh); - pdata->loses_context = pwrdm_can_ever_lose_context(pwrdm); pdev = omap_device_build(name, id - 1, oh, pdata, sizeof(*pdata)); kfree(pdata); diff --git a/arch/arm/mach-omap2/omap-smp.c b/arch/arm/mach-omap2/omap-smp.c index 5305ec7341ec..19732b56088b 100644 --- a/arch/arm/mach-omap2/omap-smp.c +++ b/arch/arm/mach-omap2/omap-smp.c @@ -43,7 +43,7 @@ /* SCU base address */ static void __iomem *scu_base; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void __iomem *omap4_get_scu_base(void) { @@ -74,8 +74,8 @@ static void omap4_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -89,7 +89,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * Update the AuxCoreBoot0 with boot state for secondary core. @@ -166,7 +166,7 @@ static int omap4_boot_secondary(unsigned int cpu, struct task_struct *idle) * Now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return 0; } diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index 78af6d8cf2e2..ef4227ffa3b6 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -1166,43 +1166,3 @@ int pwrdm_get_context_loss_count(struct powerdomain *pwrdm) return count; } -/** - * pwrdm_can_ever_lose_context - can this powerdomain ever lose context? - * @pwrdm: struct powerdomain * - * - * Given a struct powerdomain * @pwrdm, returns 1 if the powerdomain - * can lose either memory or logic context or if @pwrdm is invalid, or - * returns 0 otherwise. This function is not concerned with how the - * powerdomain registers are programmed (i.e., to go off or not); it's - * concerned with whether it's ever possible for this powerdomain to - * go off while some other part of the chip is active. This function - * assumes that every powerdomain can go to either ON or INACTIVE. - */ -bool pwrdm_can_ever_lose_context(struct powerdomain *pwrdm) -{ - int i; - - if (!pwrdm) { - pr_debug("powerdomain: %s: invalid powerdomain pointer\n", - __func__); - return 1; - } - - if (pwrdm->pwrsts & PWRSTS_OFF) - return 1; - - if (pwrdm->pwrsts & PWRSTS_RET) { - if (pwrdm->pwrsts_logic_ret & PWRSTS_OFF) - return 1; - - for (i = 0; i < pwrdm->banks; i++) - if (pwrdm->pwrsts_mem_ret[i] & PWRSTS_OFF) - return 1; - } - - for (i = 0; i < pwrdm->banks; i++) - if (pwrdm->pwrsts_mem_on[i] & PWRSTS_OFF) - return 1; - - return 0; -} diff --git a/arch/arm/mach-omap2/powerdomain.h b/arch/arm/mach-omap2/powerdomain.h index 28a796ce07d7..5e0c033a21db 100644 --- a/arch/arm/mach-omap2/powerdomain.h +++ b/arch/arm/mach-omap2/powerdomain.h @@ -244,7 +244,6 @@ int pwrdm_state_switch(struct powerdomain *pwrdm); int pwrdm_pre_transition(struct powerdomain *pwrdm); int pwrdm_post_transition(struct powerdomain *pwrdm); int pwrdm_get_context_loss_count(struct powerdomain *pwrdm); -bool pwrdm_can_ever_lose_context(struct powerdomain *pwrdm); extern int omap_set_pwrdm_state(struct powerdomain *pwrdm, u8 state); diff --git a/arch/arm/mach-prima2/platsmp.c b/arch/arm/mach-prima2/platsmp.c index e46c91094dde..dcb3ed0c26da 100644 --- a/arch/arm/mach-prima2/platsmp.c +++ b/arch/arm/mach-prima2/platsmp.c @@ -22,7 +22,7 @@ static void __iomem *clk_base; -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void sirfsoc_secondary_init(unsigned int cpu) { @@ -36,8 +36,8 @@ static void sirfsoc_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static const struct of_device_id clk_ids[] = { @@ -75,7 +75,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) /* make sure write buffer is drained */ mb(); - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -107,7 +107,7 @@ static int sirfsoc_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c index 9b00123a315d..0a49fe1bc8cf 100644 --- a/arch/arm/mach-qcom/platsmp.c +++ b/arch/arm/mach-qcom/platsmp.c @@ -46,7 +46,7 @@ extern void secondary_startup_arm(void); -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); #ifdef CONFIG_HOTPLUG_CPU static void qcom_cpu_die(unsigned int cpu) @@ -60,8 +60,8 @@ static void qcom_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int scss_release_secondary(unsigned int cpu) @@ -284,7 +284,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * Send the secondary CPU a soft interrupt, thereby causing @@ -297,7 +297,7 @@ static int qcom_boot_secondary(unsigned int cpu, int (*func)(unsigned int)) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return ret; } diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c index fd4297713d67..b0553b2c2d53 100644 --- a/arch/arm/mach-spear/platsmp.c +++ b/arch/arm/mach-spear/platsmp.c @@ -32,7 +32,7 @@ static void write_pen_release(int val) sync_cache_w(&pen_release); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void __iomem *scu_base = IOMEM(VA_SCU_BASE); @@ -47,8 +47,8 @@ static void spear13xx_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -59,7 +59,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -84,7 +84,7 @@ static int spear13xx_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-sti/platsmp.c b/arch/arm/mach-sti/platsmp.c index d4b624f8dfcb..56d4028122f5 100644 --- a/arch/arm/mach-sti/platsmp.c +++ b/arch/arm/mach-sti/platsmp.c @@ -34,7 +34,7 @@ static void write_pen_release(int val) sync_cache_w(&pen_release); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void sti_secondary_init(unsigned int cpu) { @@ -49,8 +49,8 @@ static void sti_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -61,7 +61,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -92,7 +92,7 @@ static int sti_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mach-ux500/platsmp.c b/arch/arm/mach-ux500/platsmp.c index a44967f3168c..3af22a4836bf 100644 --- a/arch/arm/mach-ux500/platsmp.c +++ b/arch/arm/mach-ux500/platsmp.c @@ -51,7 +51,7 @@ static void __iomem *scu_base_addr(void) return NULL; } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); static void ux500_secondary_init(unsigned int cpu) { @@ -64,8 +64,8 @@ static void ux500_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -76,7 +76,7 @@ static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle) * set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * The secondary processor is waiting to be released from @@ -97,7 +97,7 @@ static int ux500_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 6333d9c17875..62016e3e4a9c 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -276,7 +276,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) @@ -430,6 +430,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); + if (interrupts_enabled(regs)) + local_irq_enable(); + if (user_mode(regs)) goto bad_area; @@ -497,6 +500,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + if (interrupts_enabled(regs)) + local_irq_enable(); + do_bad_area(addr, fsr, regs); return 0; } diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index b98895d9fe57..4050e9d99d6b 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -54,11 +54,13 @@ EXPORT_SYMBOL(kunmap); void *kmap_atomic(struct page *page) { + pte_t pte = mk_pte(page, kmap_prot); unsigned int idx; unsigned long vaddr; void *kmap; int type; + preempt_disable_nort(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -92,7 +94,10 @@ void *kmap_atomic(struct page *page) * in place, so the contained TLB flush ensures the TLB is updated * with the new mapping. */ - set_fixmap_pte(idx, mk_pte(page, kmap_prot)); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_fixmap_pte(idx, pte); return (void *)vaddr; } @@ -109,27 +114,33 @@ void __kunmap_atomic(void *kvaddr) if (cache_is_vivt()) __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = __pte(0); +#endif #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(vaddr != __fix_to_virt(idx)); - set_fixmap_pte(idx, __pte(0)); #else (void) idx; /* to kill a warning */ #endif + set_fixmap_pte(idx, __pte(0)); kmap_atomic_idx_pop(); } else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) { /* this address was obtained through kmap_high_get() */ kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)])); } pagefault_enable(); + preempt_enable_nort(); } EXPORT_SYMBOL(__kunmap_atomic); void *kmap_atomic_pfn(unsigned long pfn) { + pte_t pte = pfn_pte(pfn, kmap_prot); unsigned long vaddr; int idx, type; struct page *page = pfn_to_page(pfn); + preempt_disable_nort(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -140,7 +151,10 @@ void *kmap_atomic_pfn(unsigned long pfn) #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(!pte_none(get_fixmap_pte(vaddr))); #endif - set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot)); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_fixmap_pte(idx, pte); return (void *)vaddr; } @@ -154,3 +168,28 @@ struct page *kmap_atomic_to_page(const void *ptr) return pte_page(get_fixmap_pte(vaddr)); } + +#if defined CONFIG_PREEMPT_RT_FULL +void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) +{ + int i; + + /* + * Clear @prev's kmap_atomic mappings + */ + for (i = 0; i < prev_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + + set_fixmap_pte(idx, __pte(0)); + } + /* + * Restore @next_p's kmap_atomic mappings + */ + for (i = 0; i < next_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + + if (!pte_none(next_p->kmap_pte[i])) + set_fixmap_pte(idx, next_p->kmap_pte[i]); + } +} +#endif diff --git a/arch/arm/plat-versatile/platsmp.c b/arch/arm/plat-versatile/platsmp.c index 53feb90c840c..b4a8d54fc3f3 100644 --- a/arch/arm/plat-versatile/platsmp.c +++ b/arch/arm/plat-versatile/platsmp.c @@ -30,7 +30,7 @@ static void write_pen_release(int val) sync_cache_w(&pen_release); } -static DEFINE_SPINLOCK(boot_lock); +static DEFINE_RAW_SPINLOCK(boot_lock); void versatile_secondary_init(unsigned int cpu) { @@ -43,8 +43,8 @@ void versatile_secondary_init(unsigned int cpu) /* * Synchronise with the boot thread. */ - spin_lock(&boot_lock); - spin_unlock(&boot_lock); + raw_spin_lock(&boot_lock); + raw_spin_unlock(&boot_lock); } int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) @@ -55,7 +55,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) * Set synchronisation state between this boot processor * and the secondary one */ - spin_lock(&boot_lock); + raw_spin_lock(&boot_lock); /* * This is really belt and braces; we hold unintended secondary @@ -85,7 +85,7 @@ int versatile_boot_secondary(unsigned int cpu, struct task_struct *idle) * now the secondary core is starting up let it run its * calibrations, then wait for it to finish */ - spin_unlock(&boot_lock); + raw_spin_unlock(&boot_lock); return pen_release != -1 ? -ENOSYS : 0; } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 2b5b0c5ac670..d65535871f96 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -72,9 +72,11 @@ config ARM64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP select HAVE_RCU_TABLE_FREE + select HAVE_PREEMPT_LAZY select HAVE_SYSCALL_TRACEPOINTS select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN + select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA select NO_BOOTMEM select OF @@ -607,7 +609,7 @@ config XEN_DOM0 config XEN bool "Xen guest support on ARM64" - depends on ARM64 && OF + depends on ARM64 && OF && !PREEMPT_RT_FULL select SWIOTLB_XEN help Say Y if you want to run Linux in a Virtual Machine on Xen on ARM64. diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 667346273d9b..775e85b9d1f2 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -66,7 +66,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - pagefault_disable(); /* implies preempt_disable() */ + pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -93,7 +93,7 @@ futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) ret = -ENOSYS; } - pagefault_enable(); /* subsumes preempt_enable() */ + pagefault_enable(); if (!ret) { switch (cmp) { diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 7b2f2ecadfd1..b81944822374 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -47,6 +47,7 @@ struct thread_info { mm_segment_t addr_limit; /* address limit */ struct task_struct *task; /* main task structure */ int preempt_count; /* 0 => preemptable, <0 => bug */ + int preempt_lazy_count; /* 0 => preemptable, <0 => bug */ int cpu; /* cpu */ }; @@ -107,6 +108,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ +#define TIF_NEED_RESCHED_LAZY 4 #define TIF_NOHZ 7 #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 @@ -122,6 +124,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index dc02084b36aa..259335ebeccf 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -37,6 +37,7 @@ int main(void) BLANK(); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); DEFINE(TI_ADDR_LIMIT, offsetof(struct thread_info, addr_limit)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b400bad28052..81e8467c0971 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -387,11 +387,16 @@ el1_irq: #ifdef CONFIG_PREEMPT get_thread_info tsk ldr w24, [tsk, #TI_PREEMPT] // get preempt count - cbnz w24, 1f // preempt count != 0 + cbnz w24, 2f // preempt count != 0 ldr x0, [tsk, #TI_FLAGS] // get flags - tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? - bl el1_preempt + tbnz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? + + ldr w24, [tsk, #TI_PREEMPT_LAZY] // get preempt lazy count + cbnz w24, 2f // preempt lazy count != 0 + tbz x0, #TIF_NEED_RESCHED_LAZY, 2f // needs rescheduling? 1: + bl el1_preempt +2: #endif #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_on @@ -405,6 +410,7 @@ el1_preempt: 1: bl preempt_schedule_irq // irq en/disable is done inside ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? + tbnz x0, #TIF_NEED_RESCHED_LAZY, 1b // needs rescheduling? ret x24 #endif @@ -644,6 +650,7 @@ fast_work_pending: str x0, [sp, #S_X0] // returned x0 work_pending: tbnz x1, #TIF_NEED_RESCHED, work_resched + tbnz x1, #TIF_NEED_RESCHED_LAZY, work_resched /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ ldr x2, [sp, #S_PSTATE] mov x0, sp // 'regs' diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 924902083e47..30eb88e5b896 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -77,7 +77,7 @@ bool __kprobes aarch64_insn_is_nop(u32 insn) } } -static DEFINE_SPINLOCK(patch_lock); +static DEFINE_RAW_SPINLOCK(patch_lock); static void __kprobes *patch_map(void *addr, int fixmap) { @@ -124,13 +124,13 @@ static int __kprobes __aarch64_insn_write(void *addr, u32 insn) unsigned long flags = 0; int ret; - spin_lock_irqsave(&patch_lock, flags); + raw_spin_lock_irqsave(&patch_lock, flags); waddr = patch_map(addr, FIX_TEXT_POKE0); ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE); patch_unmap(FIX_TEXT_POKE0); - spin_unlock_irqrestore(&patch_lock, flags); + raw_spin_unlock_irqrestore(&patch_lock, flags); return ret; } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index b67b01cb5109..c4cb2596ede6 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -488,7 +488,7 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu) } err = request_irq(irq, armpmu->handle_irq, - IRQF_NOBALANCING, + IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", armpmu); if (err) { pr_err("unable to request IRQ%d for ARM PMU counters\n", diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 10a1fc5004dc..c7c0471e384f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -213,7 +213,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, * If we're in an interrupt or have no user context, we must not take * the fault. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/avr32/include/asm/uaccess.h b/arch/avr32/include/asm/uaccess.h index 20b52c40bcd2..b1ec1fa06463 100644 --- a/arch/avr32/include/asm/uaccess.h +++ b/arch/avr32/include/asm/uaccess.h @@ -106,7 +106,8 @@ static inline __kernel_size_t copy_from_user(void *to, * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger @@ -125,7 +126,8 @@ static inline __kernel_size_t copy_from_user(void *to, * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger @@ -145,7 +147,8 @@ static inline __kernel_size_t copy_from_user(void *to, * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger @@ -167,7 +170,8 @@ static inline __kernel_size_t copy_from_user(void *to, * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c index d223a8b57c1e..c03533937a9f 100644 --- a/arch/avr32/mm/fault.c +++ b/arch/avr32/mm/fault.c @@ -14,11 +14,11 @@ #include <linux/pagemap.h> #include <linux/kdebug.h> #include <linux/kprobes.h> +#include <linux/uaccess.h> #include <asm/mmu_context.h> #include <asm/sysreg.h> #include <asm/tlb.h> -#include <asm/uaccess.h> #ifdef CONFIG_KPROBES static inline int notify_page_fault(struct pt_regs *regs, int trap) @@ -81,7 +81,7 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs) * If we're in an interrupt or have no user context, we must * not take the fault... */ - if (in_atomic() || !mm || regs->sr & SYSREG_BIT(GM)) + if (faulthandler_disabled() || !mm || regs->sr & SYSREG_BIT(GM)) goto no_context; local_irq_enable(); diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 83f12f2ed9e3..3066d40a6db1 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -8,7 +8,7 @@ #include <linux/interrupt.h> #include <linux/module.h> #include <linux/wait.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <arch/system.h> extern int find_fixup_code(struct pt_regs *); @@ -109,11 +109,11 @@ do_page_fault(unsigned long address, struct pt_regs *regs, info.si_code = SEGV_MAPERR; /* - * If we're in an interrupt or "atomic" operation or have no + * If we're in an interrupt, have pagefaults disabled or have no * user context, we must not take the fault. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/frv/mm/fault.c b/arch/frv/mm/fault.c index ec4917ddf678..61d99767fe16 100644 --- a/arch/frv/mm/fault.c +++ b/arch/frv/mm/fault.c @@ -19,9 +19,9 @@ #include <linux/kernel.h> #include <linux/ptrace.h> #include <linux/hardirq.h> +#include <linux/uaccess.h> #include <asm/pgtable.h> -#include <asm/uaccess.h> #include <asm/gdb-stub.h> /*****************************************************************************/ @@ -78,7 +78,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(__frame)) diff --git a/arch/frv/mm/highmem.c b/arch/frv/mm/highmem.c index bed9a9bd3c10..785344bbdc07 100644 --- a/arch/frv/mm/highmem.c +++ b/arch/frv/mm/highmem.c @@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page) unsigned long paddr; int type; + preempt_disable(); pagefault_disable(); type = kmap_atomic_idx_push(); paddr = page_to_phys(page); @@ -85,5 +86,6 @@ void __kunmap_atomic(void *kvaddr) } kmap_atomic_idx_pop(); pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h index 25fc9049db8a..f61cfb28e9f2 100644 --- a/arch/hexagon/include/asm/uaccess.h +++ b/arch/hexagon/include/asm/uaccess.h @@ -36,7 +36,8 @@ * @addr: User space pointer to start of block to check * @size: Size of block to check * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Checks if a pointer to a block of memory in user space is valid. * diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index ba5ba7accd0d..70b40d1205a6 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -11,10 +11,10 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/prefetch.h> +#include <linux/uaccess.h> #include <asm/pgtable.h> #include <asm/processor.h> -#include <asm/uaccess.h> extern int die(char *, struct pt_regs *, long); @@ -96,7 +96,7 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re /* * If we're in an interrupt or have no user context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; #ifdef CONFIG_VIRTUAL_MEM_MAP diff --git a/arch/m32r/include/asm/uaccess.h b/arch/m32r/include/asm/uaccess.h index c66a38d0a895..6f8982157a75 100644 --- a/arch/m32r/include/asm/uaccess.h +++ b/arch/m32r/include/asm/uaccess.h @@ -91,7 +91,8 @@ static inline void set_fs(mm_segment_t s) * @addr: User space pointer to start of block to check * @size: Size of block to check * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Checks if a pointer to a block of memory in user space is valid. * @@ -155,7 +156,8 @@ extern int fixup_exception(struct pt_regs *regs); * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger @@ -175,7 +177,8 @@ extern int fixup_exception(struct pt_regs *regs); * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger @@ -194,7 +197,8 @@ extern int fixup_exception(struct pt_regs *regs); * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger @@ -274,7 +278,8 @@ do { \ * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger @@ -568,7 +573,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon * @from: Source address, in kernel space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from kernel space to user space. Caller must check * the specified block with access_ok() before calling this function. @@ -588,7 +594,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon * @from: Source address, in kernel space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from kernel space to user space. * @@ -606,7 +613,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon * @from: Source address, in user space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from user space to kernel space. Caller must check * the specified block with access_ok() before calling this function. @@ -626,7 +634,8 @@ unsigned long __generic_copy_from_user(void *, const void __user *, unsigned lon * @from: Source address, in user space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from user space to kernel space. * @@ -677,7 +686,8 @@ unsigned long clear_user(void __user *mem, unsigned long len); * strlen_user: - Get the size of a string in user space. * @str: The string to measure. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Get the size of a NUL-terminated string in user space. * diff --git a/arch/m32r/mm/fault.c b/arch/m32r/mm/fault.c index e3d4d4890104..8f9875b7933d 100644 --- a/arch/m32r/mm/fault.c +++ b/arch/m32r/mm/fault.c @@ -24,9 +24,9 @@ #include <linux/vt_kern.h> /* For unblank_screen() */ #include <linux/highmem.h> #include <linux/module.h> +#include <linux/uaccess.h> #include <asm/m32r.h> -#include <asm/uaccess.h> #include <asm/hardirq.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> @@ -111,10 +111,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code, mm = tsk->mm; /* - * If we're in an interrupt or have no user context or are running in an - * atomic region then we must not take the fault.. + * If we're in an interrupt or have no user context or have pagefaults + * disabled then we must not take the fault. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto bad_area_nosemaphore; if (error_code & ACE_USERMODE) diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index b2f04aee46ec..6a94cdd0c830 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -10,10 +10,10 @@ #include <linux/ptrace.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/uaccess.h> #include <asm/setup.h> #include <asm/traps.h> -#include <asm/uaccess.h> #include <asm/pgalloc.h> extern void die_if_kernel(char *, struct pt_regs *, long); @@ -81,7 +81,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c index 2de5dc695a87..f57edca63609 100644 --- a/arch/metag/mm/fault.c +++ b/arch/metag/mm/fault.c @@ -105,7 +105,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, mm = tsk->mm; - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/metag/mm/highmem.c b/arch/metag/mm/highmem.c index d71f621a2c0b..807f1b1c4e65 100644 --- a/arch/metag/mm/highmem.c +++ b/arch/metag/mm/highmem.c @@ -43,7 +43,7 @@ void *kmap_atomic(struct page *page) unsigned long vaddr; int type; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -82,6 +82,7 @@ void __kunmap_atomic(void *kvaddr) } pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); @@ -95,6 +96,7 @@ void *kmap_atomic_pfn(unsigned long pfn) unsigned long vaddr; int type; + preempt_disable(); pagefault_disable(); type = kmap_atomic_idx_push(); diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 0c0a5cfbf79a..826676778094 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h @@ -178,7 +178,8 @@ extern long __user_bad(void); * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger @@ -290,7 +291,8 @@ extern long __user_bad(void); * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index d46a5ebb7570..177dfc003643 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -107,14 +107,14 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, if ((error_code & 0x13) == 0x13 || (error_code & 0x11) == 0x11) is_write = 0; - if (unlikely(in_atomic() || !mm)) { + if (unlikely(faulthandler_disabled() || !mm)) { if (kernel_mode(regs)) goto bad_area_nosemaphore; - /* in_atomic() in user mode is really bad, + /* faulthandler_disabled() in user mode is really bad, as is current->mm == NULL. */ - pr_emerg("Page fault in user mode with in_atomic(), mm = %p\n", - mm); + pr_emerg("Page fault in user mode with faulthandler_disabled(), mm = %p\n", + mm); pr_emerg("r15 = %lx MSR = %lx\n", regs->r15, regs->msr); die("Weird page fault", regs, SIGSEGV); diff --git a/arch/microblaze/mm/highmem.c b/arch/microblaze/mm/highmem.c index 5a92576fad92..2fcc5a52d84d 100644 --- a/arch/microblaze/mm/highmem.c +++ b/arch/microblaze/mm/highmem.c @@ -37,7 +37,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) unsigned long vaddr; int idx, type; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -63,6 +63,7 @@ void __kunmap_atomic(void *kvaddr) if (vaddr < __fix_to_virt(FIX_KMAP_END)) { pagefault_enable(); + preempt_enable(); return; } @@ -84,5 +85,6 @@ void __kunmap_atomic(void *kvaddr) #endif kmap_atomic_idx_pop(); pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index c99e8a32bea4..7e6ab18c488a 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2367,7 +2367,7 @@ config CPU_R4400_WORKAROUNDS # config HIGHMEM bool "High Memory Support" - depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA + depends on 32BIT && CPU_SUPPORTS_HIGHMEM && SYS_SUPPORTS_HIGHMEM && !CPU_MIPS32_3_5_EVA && !PREEMPT_RT_FULL config CPU_SUPPORTS_HIGHMEM bool diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h index bc2f5164ce51..6dc7f5130d49 100644 --- a/arch/mips/include/asm/uaccess.h +++ b/arch/mips/include/asm/uaccess.h @@ -104,7 +104,8 @@ extern u64 __ua_limit; * @addr: User space pointer to start of block to check * @size: Size of block to check * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Checks if a pointer to a block of memory in user space is valid. * @@ -139,7 +140,8 @@ extern u64 __ua_limit; * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger @@ -158,7 +160,8 @@ extern u64 __ua_limit; * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger @@ -178,7 +181,8 @@ extern u64 __ua_limit; * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger @@ -200,7 +204,8 @@ extern u64 __ua_limit; * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger @@ -499,7 +504,8 @@ extern void __put_user_unknown(void); * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger @@ -518,7 +524,8 @@ extern void __put_user_unknown(void); * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger @@ -538,7 +545,8 @@ extern void __put_user_unknown(void); * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger @@ -560,7 +568,8 @@ extern void __put_user_unknown(void); * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger @@ -816,7 +825,8 @@ extern size_t __copy_user(void *__to, const void *__from, size_t __n); * @from: Source address, in kernel space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from kernel space to user space. Caller must check * the specified block with access_ok() before calling this function. @@ -889,7 +899,8 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n); * @from: Source address, in kernel space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from kernel space to user space. * @@ -1076,7 +1087,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n); * @from: Source address, in user space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from user space to kernel space. Caller must check * the specified block with access_ok() before calling this function. @@ -1108,7 +1120,8 @@ extern size_t __copy_in_user_eva(void *__to, const void *__from, size_t __n); * @from: Source address, in user space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from user space to kernel space. * @@ -1332,7 +1345,8 @@ strncpy_from_user(char *__to, const char __user *__from, long __len) * strlen_user: - Get the size of a string in user space. * @str: The string to measure. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Get the size of a NUL-terminated string in user space. * @@ -1401,7 +1415,8 @@ static inline long __strnlen_user(const char __user *s, long n) * strnlen_user: - Get the size of a string in user space. * @str: The string to measure. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Get the size of a NUL-terminated string in user space. * diff --git a/arch/mips/kernel/signal-common.h b/arch/mips/kernel/signal-common.h index 06805e09bcd3..0b85f827cd18 100644 --- a/arch/mips/kernel/signal-common.h +++ b/arch/mips/kernel/signal-common.h @@ -28,12 +28,7 @@ extern void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, extern int fpcsr_pending(unsigned int __user *fpcsr); /* Make sure we will not lose FPU ownership */ -#ifdef CONFIG_PREEMPT -#define lock_fpu_owner() preempt_disable() -#define unlock_fpu_owner() preempt_enable() -#else -#define lock_fpu_owner() pagefault_disable() -#define unlock_fpu_owner() pagefault_enable() -#endif +#define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); }) +#define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); }) #endif /* __SIGNAL_COMMON_H */ diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 7ff8637e530d..36c0f26fac6b 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -21,10 +21,10 @@ #include <linux/module.h> #include <linux/kprobes.h> #include <linux/perf_event.h> +#include <linux/uaccess.h> #include <asm/branch.h> #include <asm/mmu_context.h> -#include <asm/uaccess.h> #include <asm/ptrace.h> #include <asm/highmem.h> /* For VMALLOC_END */ #include <linux/kdebug.h> @@ -94,7 +94,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto bad_area_nosemaphore; if (user_mode(regs)) diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c index da815d295239..11661cbc11a8 100644 --- a/arch/mips/mm/highmem.c +++ b/arch/mips/mm/highmem.c @@ -47,7 +47,7 @@ void *kmap_atomic(struct page *page) unsigned long vaddr; int idx, type; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -72,6 +72,7 @@ void __kunmap_atomic(void *kvaddr) if (vaddr < FIXADDR_START) { // FIXME pagefault_enable(); + preempt_enable(); return; } @@ -92,6 +93,7 @@ void __kunmap_atomic(void *kvaddr) #endif kmap_atomic_idx_pop(); pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); @@ -104,6 +106,7 @@ void *kmap_atomic_pfn(unsigned long pfn) unsigned long vaddr; int idx, type; + preempt_disable(); pagefault_disable(); type = kmap_atomic_idx_push(); diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index faa5c9822ecc..198a3147dd7d 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -90,6 +90,7 @@ static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot) BUG_ON(Page_dcache_dirty(page)); + preempt_disable(); pagefault_disable(); idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1); idx += in_interrupt() ? FIX_N_COLOURS : 0; @@ -152,6 +153,7 @@ void kunmap_coherent(void) write_c0_entryhi(old_ctx); local_irq_restore(flags); pagefault_enable(); + preempt_enable(); } void copy_user_highpage(struct page *to, struct page *from, diff --git a/arch/mn10300/include/asm/highmem.h b/arch/mn10300/include/asm/highmem.h index 2fbbe4d920aa..1ddea5afba09 100644 --- a/arch/mn10300/include/asm/highmem.h +++ b/arch/mn10300/include/asm/highmem.h @@ -75,6 +75,7 @@ static inline void *kmap_atomic(struct page *page) unsigned long vaddr; int idx, type; + preempt_disable(); pagefault_disable(); if (page < highmem_start_page) return page_address(page); @@ -98,6 +99,7 @@ static inline void __kunmap_atomic(unsigned long vaddr) if (vaddr < FIXADDR_START) { /* FIXME */ pagefault_enable(); + preempt_enable(); return; } @@ -122,6 +124,7 @@ static inline void __kunmap_atomic(unsigned long vaddr) kmap_atomic_idx_pop(); pagefault_enable(); + preempt_enable(); } #endif /* __KERNEL__ */ diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index 0c2cc5d39c8e..4a1d181ed32f 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -23,8 +23,8 @@ #include <linux/interrupt.h> #include <linux/init.h> #include <linux/vt_kern.h> /* For unblank_screen() */ +#include <linux/uaccess.h> -#include <asm/uaccess.h> #include <asm/pgalloc.h> #include <asm/hardirq.h> #include <asm/cpu-regs.h> @@ -168,7 +168,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR) diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index 0c9b6afe69e9..b51878b0c6b8 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -77,7 +77,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto bad_area_nosemaphore; if (user_mode(regs)) diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index b188d1f26027..326f043ec1f9 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -138,6 +138,7 @@ static inline void kunmap(struct page *page) static inline void *kmap_atomic(struct page *page) { + preempt_disable(); pagefault_disable(); return page_address(page); } @@ -146,6 +147,7 @@ static inline void __kunmap_atomic(void *addr) { flush_kernel_dcache_page_addr(addr); pagefault_enable(); + preempt_enable(); } #define kmap_atomic_prot(page, prot) kmap_atomic(page) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index bbf22658d1a3..341966889a51 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -26,9 +26,9 @@ #include <linux/console.h> #include <linux/bug.h> #include <linux/ratelimit.h> +#include <linux/uaccess.h> #include <asm/assembly.h> -#include <asm/uaccess.h> #include <asm/io.h> #include <asm/irq.h> #include <asm/traps.h> @@ -796,7 +796,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) * unless pagefault_disable() was called before. */ - if (fault_space == 0 && !in_atomic()) + if (fault_space == 0 && !faulthandler_disabled()) { /* Clean up and return if in exception table. */ if (fixup_exception(regs)) diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 50d64a7fc672..3bc9db1ad19a 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -15,8 +15,8 @@ #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> #include <asm/traps.h> /* Various important other fields */ @@ -208,7 +208,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, int fault; unsigned int flags; - if (in_atomic()) + if (pagefault_disabled()) goto no_context; tsk = current; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 190cc48abc0c..7b70a5754e34 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -60,10 +60,11 @@ config LOCKDEP_SUPPORT config RWSEM_GENERIC_SPINLOCK bool + default y if PREEMPT_RT_FULL config RWSEM_XCHGADD_ALGORITHM bool - default y + default y if !PREEMPT_RT_FULL config GENERIC_LOCKBREAK bool @@ -138,6 +139,7 @@ config PPC select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER + select HAVE_PREEMPT_LAZY select HAVE_MOD_ARCH_SPECIFIC select MODULES_USE_ELF_RELA select CLONE_BACKWARDS @@ -312,7 +314,7 @@ menu "Kernel options" config HIGHMEM bool "High memory support" - depends on PPC32 + depends on PPC32 && !PREEMPT_RT_FULL source kernel/Kconfig.hz source kernel/Kconfig.preempt diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 7fe65af0035d..22312f79dac7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -280,7 +280,7 @@ struct kvmppc_vcore { u8 in_guest; struct list_head runnable_threads; spinlock_t lock; - wait_queue_head_t wq; + struct swait_head wq; spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ u64 stolen_tb; u64 preempt_tb; @@ -614,7 +614,7 @@ struct kvm_vcpu_arch { u8 prodded; u32 last_inst; - wait_queue_head_t *wqp; + struct swait_head *wqp; struct kvmppc_vcore *vcore; int ret; int trap; diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 7efee4a3240b..40e6fa1b85b2 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -42,6 +42,8 @@ struct thread_info { int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_lazy_count; /* 0 => preemptable, + <0 => BUG */ unsigned long local_flags; /* private flags for thread */ /* low level flags - has atomic operations done on it */ @@ -82,8 +84,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ #define TIF_SIGPENDING 1 /* signal pending */ #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling - TIF_NEED_RESCHED */ +#define TIF_NEED_RESCHED_LAZY 3 /* lazy rescheduling necessary */ #define TIF_32BIT 4 /* 32 bit binary */ #define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ @@ -101,6 +102,8 @@ static inline struct thread_info *current_thread_info(void) #if defined(CONFIG_PPC64) #define TIF_ELF2ABI 18 /* function descriptors must die! */ #endif +#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling + TIF_NEED_RESCHED */ /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -119,14 +122,16 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT) #define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE) #define _TIF_NOHZ (1<<TIF_NOHZ) +#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY) #define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ _TIF_NOHZ) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ - _TIF_RESTORE_TM) + _TIF_RESTORE_TM | _TIF_NEED_RESCHED_LAZY) #define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) /* Bits in local_flags */ /* Don't move TLF_NAPPING without adjusting the code in entry_32.S */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d8d332e65078..3c8d51c224f9 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -160,6 +160,7 @@ int main(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_PREEMPT_LAZY, offsetof(struct thread_info, preempt_lazy_count)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 46fc0f4d8982..3d390ac490d9 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -813,7 +813,14 @@ resume_kernel: cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ bne restore andi. r8,r8,_TIF_NEED_RESCHED + bne+ 1f + lwz r0,TI_PREEMPT_LAZY(r9) + cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ + bne restore + lwz r0,TI_FLAGS(r9) + andi. r0,r0,_TIF_NEED_RESCHED_LAZY beq+ restore +1: lwz r3,_MSR(r1) andi. r0,r3,MSR_EE /* interrupts off? */ beq restore /* don't schedule if so */ @@ -824,11 +831,11 @@ resume_kernel: */ bl trace_hardirqs_off #endif -1: bl preempt_schedule_irq +2: bl preempt_schedule_irq CURRENT_THREAD_INFO(r9, r1) lwz r3,TI_FLAGS(r9) - andi. r0,r3,_TIF_NEED_RESCHED - bne- 1b + andi. r0,r3,_TIF_NEED_RESCHED_MASK + bne- 2b #ifdef CONFIG_TRACE_IRQFLAGS /* And now, to properly rebalance the above, we tell lockdep they * are being turned back on, which will happen when we return @@ -1149,7 +1156,7 @@ global_dbcr0: #endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ do_work: /* r10 contains MSR_KERNEL here */ - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,_TIF_NEED_RESCHED_MASK beq do_user_signal do_resched: /* r10 contains MSR_KERNEL here */ @@ -1170,7 +1177,7 @@ recheck: MTMSRD(r10) /* disable interrupts */ CURRENT_THREAD_INFO(r9, r1) lwz r9,TI_FLAGS(r9) - andi. r0,r9,_TIF_NEED_RESCHED + andi. r0,r9,_TIF_NEED_RESCHED_MASK bne- do_resched andi. r0,r9,_TIF_USER_WORK_MASK beq restore_user diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index afbc20019c2e..5e2d2645d1e0 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -636,7 +636,7 @@ _GLOBAL(ret_from_except_lite) #else beq restore #endif -1: andi. r0,r4,_TIF_NEED_RESCHED +1: andi. r0,r4,_TIF_NEED_RESCHED_MASK beq 2f bl restore_interrupts SCHEDULE_USER @@ -698,10 +698,18 @@ resume_kernel: #ifdef CONFIG_PREEMPT /* Check if we need to preempt */ + lwz r8,TI_PREEMPT(r9) + cmpwi 0,r8,0 /* if non-zero, just restore regs and return */ + bne restore andi. r0,r4,_TIF_NEED_RESCHED + bne+ check_count + + andi. r0,r4,_TIF_NEED_RESCHED_LAZY beq+ restore + lwz r8,TI_PREEMPT_LAZY(r9) + /* Check that preempt_count() == 0 and interrupts are enabled */ - lwz r8,TI_PREEMPT(r9) +check_count: cmpwi cr1,r8,0 ld r0,SOFTE(r1) cmpdi r0,0 @@ -718,7 +726,7 @@ resume_kernel: /* Re-test flags and eventually loop */ CURRENT_THREAD_INFO(r9, r1) ld r4,TI_FLAGS(r9) - andi. r0,r4,_TIF_NEED_RESCHED + andi. r0,r4,_TIF_NEED_RESCHED_MASK bne 1b /* diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 45096033d37b..6a8e55a17683 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -614,6 +614,7 @@ void irq_ctx_init(void) } } +#ifndef CONFIG_PREEMPT_RT_FULL void do_softirq_own_stack(void) { struct thread_info *curtp, *irqtp; @@ -631,6 +632,7 @@ void do_softirq_own_stack(void) if (irqtp->flags) set_bits(irqtp->flags, &curtp->flags); } +#endif irq_hw_number_t virq_to_hw(unsigned int virq) { diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 7c6bb4b17b49..e9dfe2270e93 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -40,6 +40,7 @@ * We store the saved ksp_limit in the unused part * of the STACK_FRAME_OVERHEAD */ +#ifndef CONFIG_PREEMPT_RT_FULL _GLOBAL(call_do_softirq) mflr r0 stw r0,4(r1) @@ -56,6 +57,7 @@ _GLOBAL(call_do_softirq) stw r10,THREAD+KSP_LIMIT(r2) mtlr r0 blr +#endif /* * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp); diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 4e314b90c75d..8a7238dd2f4b 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -29,6 +29,7 @@ .text +#ifndef CONFIG_PREEMPT_RT_FULL _GLOBAL(call_do_softirq) mflr r0 std r0,16(r1) @@ -39,6 +40,7 @@ _GLOBAL(call_do_softirq) ld r0,16(r1) mtlr r0 blr +#endif _GLOBAL(call_do_irq) mflr r0 diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 3caec2c42105..d4c48506ea1b 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -172,6 +172,7 @@ config KVM_E500MC config KVM_MPIC bool "KVM in-kernel MPIC emulation" depends on KVM && E500 + depends on !PREEMPT_RT_FULL select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 63c37fd2b7a6..0482f1e4356f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -115,11 +115,11 @@ static bool kvmppc_ipi_thread(int cpu) static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) { int cpu = vcpu->cpu; - wait_queue_head_t *wqp; + struct swait_head *wqp; wqp = kvm_arch_vcpu_wq(vcpu); - if (waitqueue_active(wqp)) { - wake_up_interruptible(wqp); + if (swaitqueue_active(wqp)) { + swait_wake_interruptible(wqp); ++vcpu->stat.halt_wakeup; } @@ -692,8 +692,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) tvcpu->arch.prodded = 1; smp_mb(); if (vcpu->arch.ceded) { - if (waitqueue_active(&vcpu->wq)) { - wake_up_interruptible(&vcpu->wq); + if (swaitqueue_active(&vcpu->wq)) { + swait_wake_interruptible(&vcpu->wq); vcpu->stat.halt_wakeup++; } } @@ -1438,7 +1438,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core) INIT_LIST_HEAD(&vcore->runnable_threads); spin_lock_init(&vcore->lock); spin_lock_init(&vcore->stoltb_lock); - init_waitqueue_head(&vcore->wq); + init_swait_head(&vcore->wq); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; vcore->first_vcpuid = core * threads_per_subcore; @@ -2085,10 +2085,9 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) { struct kvm_vcpu *vcpu; int do_sleep = 1; + DEFINE_SWAITER(wait); - DEFINE_WAIT(wait); - - prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE); + swait_prepare(&vc->wq, &wait, TASK_INTERRUPTIBLE); /* * Check one last time for pending exceptions and ceded state after @@ -2102,7 +2101,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) } if (!do_sleep) { - finish_wait(&vc->wq, &wait); + swait_finish(&vc->wq, &wait); return; } @@ -2110,7 +2109,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) trace_kvmppc_vcore_blocked(vc, 0); spin_unlock(&vc->lock); schedule(); - finish_wait(&vc->wq, &wait); + swait_finish(&vc->wq, &wait); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; trace_kvmppc_vcore_blocked(vc, 1); @@ -2154,7 +2153,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_start_thread(vcpu); trace_kvm_guest_enter(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { - wake_up(&vc->wq); + swait_wake(&vc->wq); } } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b396868d2aa7..6d535973b200 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -33,13 +33,13 @@ #include <linux/ratelimit.h> #include <linux/context_tracking.h> #include <linux/hugetlb.h> +#include <linux/uaccess.h> #include <asm/firmware.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/mmu.h> #include <asm/mmu_context.h> -#include <asm/uaccess.h> #include <asm/tlbflush.h> #include <asm/siginfo.h> #include <asm/debug.h> @@ -272,15 +272,16 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - if (in_atomic() || mm == NULL) { + if (faulthandler_disabled() || mm == NULL) { if (!user_mode(regs)) { rc = SIGSEGV; goto bail; } - /* in_atomic() in user mode is really bad, + /* faulthandler_disabled() in user mode is really bad, as is current->mm == NULL. */ printk(KERN_EMERG "Page fault in user mode with " - "in_atomic() = %d mm = %p\n", in_atomic(), mm); + "faulthandler_disabled() = %d mm = %p\n", + faulthandler_disabled(), mm); printk(KERN_EMERG "NIP = %lx MSR = %lx\n", regs->nip, regs->msr); die("Weird page fault", regs, SIGSEGV); diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c index e7450bdbe83a..e292c8a60952 100644 --- a/arch/powerpc/mm/highmem.c +++ b/arch/powerpc/mm/highmem.c @@ -34,7 +34,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) unsigned long vaddr; int idx, type; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -59,6 +59,7 @@ void __kunmap_atomic(void *kvaddr) if (vaddr < __fix_to_virt(FIX_KMAP_END)) { pagefault_enable(); + preempt_enable(); return; } @@ -82,5 +83,6 @@ void __kunmap_atomic(void *kvaddr) kmap_atomic_idx_pop(); pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index 3f175e8aedb4..c4c02f91904c 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -752,7 +752,7 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev, } pr_debug("%s:%u: notification %s issued\n", __func__, __LINE__, op); - res = wait_event_interruptible(dev->done.wait, + res = swait_event_interruptible(dev->done.wait, dev->done.done || kthread_should_stop()); if (kthread_should_stop()) res = -EINTR; diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d01fc588b5c3..905007eead88 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -419,7 +419,7 @@ struct kvm_s390_irq_payload { struct kvm_s390_local_interrupt { spinlock_t lock; struct kvm_s390_float_interrupt *float_int; - wait_queue_head_t *wq; + struct swait_head *wq; atomic_t *cpuflags; DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS); struct kvm_s390_irq_payload irq; diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index f6ac1d7e7ed8..5c7381c5ad7f 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -98,7 +98,8 @@ static inline unsigned long extable_fixup(const struct exception_table_entry *x) * @from: Source address, in user space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from user space to kernel space. Caller must check * the specified block with access_ok() before calling this function. @@ -118,7 +119,8 @@ unsigned long __must_check __copy_from_user(void *to, const void __user *from, * @from: Source address, in kernel space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from kernel space to user space. Caller must check * the specified block with access_ok() before calling this function. @@ -264,7 +266,8 @@ int __get_user_bad(void) __attribute__((noreturn)); * @from: Source address, in kernel space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from kernel space to user space. * @@ -290,7 +293,8 @@ __compiletime_warning("copy_from_user() buffer size is not provably correct") * @from: Source address, in user space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from user space to kernel space. * @@ -348,7 +352,8 @@ static inline unsigned long strnlen_user(const char __user *src, unsigned long n * strlen_user: - Get the size of a string in user space. * @str: The string to measure. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Get the size of a NUL-terminated string in user space. * diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 3dbba9a2bb0f..15016703b4bf 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -875,13 +875,13 @@ no_timer: void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) { - if (waitqueue_active(&vcpu->wq)) { + if (swaitqueue_active(&vcpu->wq)) { /* * The vcpu gave up the cpu voluntarily, mark it as a good * yield-candidate. */ vcpu->preempted = true; - wake_up_interruptible(&vcpu->wq); + swait_wake_interruptible(&vcpu->wq); vcpu->stat.halt_wakeup++; } } @@ -987,7 +987,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) spin_lock(&li->lock); irq.u.pgm.code = code; __inject_prog(vcpu, &irq); - BUG_ON(waitqueue_active(li->wq)); + BUG_ON(swaitqueue_active(li->wq)); spin_unlock(&li->lock); return 0; } @@ -1006,7 +1006,7 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu, spin_lock(&li->lock); irq.u.pgm = *pgm_info; rc = __inject_prog(vcpu, &irq); - BUG_ON(waitqueue_active(li->wq)); + BUG_ON(swaitqueue_active(li->wq)); spin_unlock(&li->lock); return rc; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 76515bcea2f1..4c8f5d7f9c23 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -399,7 +399,7 @@ static inline int do_exception(struct pt_regs *regs, int access) * user context. */ fault = VM_FAULT_BADCONTEXT; - if (unlikely(!user_space_fault(regs) || in_atomic() || !mm)) + if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm)) goto out; address = trans_exc_code & __FAIL_ADDR_MASK; diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h index 69326dfb894d..01aec8ccde83 100644 --- a/arch/score/include/asm/uaccess.h +++ b/arch/score/include/asm/uaccess.h @@ -36,7 +36,8 @@ * @addr: User space pointer to start of block to check * @size: Size of block to check * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Checks if a pointer to a block of memory in user space is valid. * @@ -61,7 +62,8 @@ * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger @@ -79,7 +81,8 @@ * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger @@ -98,7 +101,8 @@ * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger @@ -119,7 +123,8 @@ * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger diff --git a/arch/score/mm/fault.c b/arch/score/mm/fault.c index 6860beb2a280..37a6c2e0e969 100644 --- a/arch/score/mm/fault.c +++ b/arch/score/mm/fault.c @@ -34,6 +34,7 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/ptrace.h> +#include <linux/uaccess.h> /* * This routine handles page faults. It determines the address, @@ -73,7 +74,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (pagefault_disabled() || !mm) goto bad_area_nosemaphore; if (user_mode(regs)) diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index eb10ff84015c..6fe8089e63fa 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -147,6 +147,7 @@ void irq_ctx_exit(int cpu) hardirq_ctx[cpu] = NULL; } +#ifndef CONFIG_PREEMPT_RT_FULL void do_softirq_own_stack(void) { struct thread_info *curctx; @@ -174,6 +175,7 @@ void do_softirq_own_stack(void) "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr" ); } +#endif #else static inline void handle_one_irq(unsigned int irq) { diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index a58fec9b55e0..79d8276377d1 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -17,6 +17,7 @@ #include <linux/kprobes.h> #include <linux/perf_event.h> #include <linux/kdebug.h> +#include <linux/uaccess.h> #include <asm/io_trapped.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> @@ -438,9 +439,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, /* * If we're in an interrupt, have no user context or are running - * in an atomic region then we must not take the fault: + * with pagefaults disabled then we must not take the fault: */ - if (unlikely(in_atomic() || !mm)) { + if (unlikely(faulthandler_disabled() || !mm)) { bad_area_nosemaphore(regs, error_code, address); return; } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index e49502acbab4..85cb0c621283 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -189,12 +189,10 @@ config NR_CPUS source kernel/Kconfig.hz config RWSEM_GENERIC_SPINLOCK - bool - default y if SPARC32 + def_bool PREEMPT_RT_FULL config RWSEM_XCHGADD_ALGORITHM - bool - default y if SPARC64 + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL config GENERIC_HWEIGHT bool diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 4033c23bdfa6..763cd88b4e92 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -849,6 +849,7 @@ void __irq_entry handler_irq(int pil, struct pt_regs *regs) set_irq_regs(old_regs); } +#ifndef CONFIG_PREEMPT_RT_FULL void do_softirq_own_stack(void) { void *orig_sp, *sp = softirq_stack[smp_processor_id()]; @@ -863,6 +864,7 @@ void do_softirq_own_stack(void) __asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp)); } +#endif #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(void) diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index 70d817154fe8..c399e7b3b035 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -21,6 +21,7 @@ #include <linux/perf_event.h> #include <linux/interrupt.h> #include <linux/kdebug.h> +#include <linux/uaccess.h> #include <asm/page.h> #include <asm/pgtable.h> @@ -29,7 +30,6 @@ #include <asm/setup.h> #include <asm/smp.h> #include <asm/traps.h> -#include <asm/uaccess.h> #include "mm_32.h" @@ -196,7 +196,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (pagefault_disabled() || !mm) goto no_context; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 479823249429..e9268ea1a68d 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -22,12 +22,12 @@ #include <linux/kdebug.h> #include <linux/percpu.h> #include <linux/context_tracking.h> +#include <linux/uaccess.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/openprom.h> #include <asm/oplib.h> -#include <asm/uaccess.h> #include <asm/asi.h> #include <asm/lsu.h> #include <asm/sections.h> @@ -330,7 +330,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto intr_or_no_mm; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); diff --git a/arch/sparc/mm/highmem.c b/arch/sparc/mm/highmem.c index 449f864f0cef..a454ec5ff07a 100644 --- a/arch/sparc/mm/highmem.c +++ b/arch/sparc/mm/highmem.c @@ -53,7 +53,7 @@ void *kmap_atomic(struct page *page) unsigned long vaddr; long idx, type; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -91,6 +91,7 @@ void __kunmap_atomic(void *kvaddr) if (vaddr < FIXADDR_START) { // FIXME pagefault_enable(); + preempt_enable(); return; } @@ -126,5 +127,6 @@ void __kunmap_atomic(void *kvaddr) kmap_atomic_idx_pop(); pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 1d71181dcc04..1a55c8481272 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2738,7 +2738,7 @@ void hugetlb_setup(struct pt_regs *regs) struct mm_struct *mm = current->mm; struct tsb_config *tp; - if (in_atomic() || !mm) { + if (faulthandler_disabled() || !mm) { const struct exception_table_entry *entry; entry = search_exception_tables(regs->tpc); diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h index f41cb53cf645..a33276bf5ca1 100644 --- a/arch/tile/include/asm/uaccess.h +++ b/arch/tile/include/asm/uaccess.h @@ -78,7 +78,8 @@ int __range_ok(unsigned long addr, unsigned long size); * @addr: User space pointer to start of block to check * @size: Size of block to check * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Checks if a pointer to a block of memory in user space is valid. * @@ -192,7 +193,8 @@ extern int __get_user_bad(void) * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger @@ -274,7 +276,8 @@ extern int __put_user_bad(void) * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger @@ -330,7 +333,8 @@ extern int __put_user_bad(void) * @from: Source address, in kernel space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from kernel space to user space. Caller must check * the specified block with access_ok() before calling this function. @@ -366,7 +370,8 @@ copy_to_user(void __user *to, const void *from, unsigned long n) * @from: Source address, in user space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from user space to kernel space. Caller must check * the specified block with access_ok() before calling this function. @@ -437,7 +442,8 @@ static inline unsigned long __must_check copy_from_user(void *to, * @from: Source address, in user space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from user space to user space. Caller must check * the specified blocks with access_ok() before calling this function. diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index e83cc999da02..3f4f58d34a92 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -354,9 +354,9 @@ static int handle_page_fault(struct pt_regs *regs, /* * If we're in an interrupt, have no user context or are running in an - * atomic region then we must not take the fault. + * region with pagefaults disabled then we must not take the fault. */ - if (in_atomic() || !mm) { + if (pagefault_disabled() || !mm) { vma = NULL; /* happy compiler */ goto bad_area_nosemaphore; } diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c index 6aa2f2625447..fcd545014e79 100644 --- a/arch/tile/mm/highmem.c +++ b/arch/tile/mm/highmem.c @@ -201,7 +201,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) int idx, type; pte_t *pte; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable(); pagefault_disable(); /* Avoid icache flushes by disallowing atomic executable mappings. */ @@ -259,6 +259,7 @@ void __kunmap_atomic(void *kvaddr) } pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 8e4daf44e980..f9c9e5a6beba 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -35,10 +35,10 @@ int handle_page_fault(unsigned long address, unsigned long ip, *code_out = SEGV_MAPERR; /* - * If the fault was during atomic operation, don't take the fault, just + * If the fault was with pagefaults disabled, don't take the fault, just * fail. */ - if (in_atomic()) + if (faulthandler_disabled()) goto out_nosemaphore; if (is_user) diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index 0dc922dba915..afccef5529cc 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -218,7 +218,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs) * If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) + if (faulthandler_disabled() || !mm) goto no_context; if (user_mode(regs)) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 983983c390ea..7c23372b5615 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -22,6 +22,7 @@ config X86_64 ### Arch settings config X86 def_bool y + select HAVE_PREEMPT_LAZY select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS @@ -203,8 +204,11 @@ config ARCH_MAY_HAVE_PC_FDC def_bool y depends on ISA_DMA_API +config RWSEM_GENERIC_SPINLOCK + def_bool PREEMPT_RT_FULL + config RWSEM_XCHGADD_ALGORITHM - def_bool y + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL config GENERIC_CALIBRATE_DELAY def_bool y @@ -846,7 +850,7 @@ config IOMMU_HELPER config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL - select CPUMASK_OFFSTACK + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL ---help--- Enable maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 112cefacf2af..3fd3b16349ae 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -382,14 +382,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK); + nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -406,14 +406,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -430,14 +430,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -454,14 +454,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -513,18 +513,20 @@ static int ctr_crypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { + kernel_fpu_begin(); aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } if (walk.nbytes) { + kernel_fpu_begin(); ctr_crypt_final(ctx, &walk); + kernel_fpu_end(); err = blkcipher_walk_done(desc, &walk, 0); } - kernel_fpu_end(); return err; } diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 236c80974457..f799ec36bfa7 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -60,7 +60,7 @@ static inline void cast5_fpu_end(bool fpu_enabled) static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, bool enc) { - bool fpu_enabled = false; + bool fpu_enabled; struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes; @@ -76,7 +76,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, u8 *wsrc = walk->src.virt.addr; u8 *wdst = walk->dst.virt.addr; - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(false, nbytes); /* Process multi-block batch */ if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { @@ -104,10 +104,9 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, } while (nbytes >= bsize); done: + cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, walk, nbytes); } - - cast5_fpu_end(fpu_enabled); return err; } @@ -228,7 +227,7 @@ done: static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -237,12 +236,11 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes)) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(false, nbytes); nbytes = __cbc_decrypt(desc, &walk); + cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - - cast5_fpu_end(fpu_enabled); return err; } @@ -312,7 +310,7 @@ done: static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -321,13 +319,12 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(false, nbytes); nbytes = __ctr_crypt(desc, &walk); + cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - cast5_fpu_end(fpu_enabled); - if (walk.nbytes) { ctr_crypt_final(desc, &walk); err = blkcipher_walk_done(desc, &walk, 0); diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 6a85598931b5..3a506ce7ed93 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, void *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = 128 / 8; unsigned int nbytes, i, func_bytes; - bool fpu_enabled = false; + bool fpu_enabled; int err; err = blkcipher_walk_virt(desc, walk); @@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, u8 *wdst = walk->dst.virt.addr; fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); + desc, false, nbytes); for (i = 0; i < gctx->num_funcs; i++) { func_bytes = bsize * gctx->funcs[i].num_blocks; @@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, } done: + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, walk, nbytes); } - glue_fpu_end(fpu_enabled); return err; } @@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, struct scatterlist *src, unsigned int nbytes) { const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, while ((nbytes = walk.nbytes)) { fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); + desc, false, nbytes); nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - glue_fpu_end(fpu_enabled); return err; } EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); @@ -277,7 +277,7 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, struct scatterlist *src, unsigned int nbytes) { const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -286,13 +286,12 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, while ((nbytes = walk.nbytes) >= bsize) { fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); + desc, false, nbytes); nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - glue_fpu_end(fpu_enabled); - if (walk.nbytes) { glue_ctr_crypt_final_128bit( gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); @@ -347,7 +346,7 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, void *tweak_ctx, void *crypt_ctx) { const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -360,21 +359,21 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, /* set minimum length to bsize, for tweak_fn */ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, + desc, false, nbytes < bsize ? bsize : nbytes); - /* calculate first value of T */ tweak_fn(tweak_ctx, walk.iv, walk.iv); + glue_fpu_end(fpu_enabled); while (nbytes) { + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + desc, false, nbytes); nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); nbytes = walk.nbytes; } - - glue_fpu_end(fpu_enabled); - return err; } EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 67b6cd00a44f..c08949b0314d 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -82,17 +82,33 @@ static __always_inline void __preempt_count_sub(int val) * a decrement which hits zero means we have no preempt_count and should * reschedule. */ -static __always_inline bool __preempt_count_dec_and_test(void) +static __always_inline bool ____preempt_count_dec_and_test(void) { GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); } +static __always_inline bool __preempt_count_dec_and_test(void) +{ + if (____preempt_count_dec_and_test()) + return true; +#ifdef CONFIG_PREEMPT_LAZY + return test_thread_flag(TIF_NEED_RESCHED_LAZY); +#else + return false; +#endif +} + /* * Returns true when we need to resched and can (barring IRQ state). */ static __always_inline bool should_resched(int preempt_offset) { +#ifdef CONFIG_PREEMPT_LAZY + return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset || + test_thread_flag(TIF_NEED_RESCHED_LAZY)); +#else return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); +#endif } #ifdef CONFIG_PREEMPT diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 31eab867e6d3..0e7bfe98e1d1 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -23,6 +23,19 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; +/* + * Because some traps use the IST stack, we must keep preemption + * disabled while calling do_trap(), but do_trap() may call + * force_sig_info() which will grab the signal spin_locks for the + * task, which in PREEMPT_RT_FULL are mutexes. By defining + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the + * trap. + */ +#if defined(CONFIG_PREEMPT_RT_FULL) +#define ARCH_RT_DELAYS_SIGNAL_SEND +#endif + #ifndef CONFIG_COMPAT typedef sigset_t compat_sigset_t; #endif diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 6a998598f172..64fb5cbe54fa 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -57,7 +57,7 @@ */ static __always_inline void boot_init_stack_canary(void) { - u64 canary; + u64 uninitialized_var(canary); u64 tsc; #ifdef CONFIG_X86_64 @@ -68,8 +68,16 @@ static __always_inline void boot_init_stack_canary(void) * of randomness. The TSC only matters for very early init, * there it already has some randomness on most systems. Later * on during the bootup the random pool has true entropy too. + * + * For preempt-rt we need to weaken the randomness a bit, as + * we can't call into the random generator from atomic context + * due to locking constraints. We just leave canary + * uninitialized and use the TSC based randomness on top of + * it. */ +#ifndef CONFIG_PREEMPT_RT_FULL get_random_bytes(&canary, sizeof(canary)); +#endif tsc = __native_read_tsc(); canary += tsc + (tsc << 32UL); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index b4bdec3e9523..606144afb990 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -55,6 +55,8 @@ struct thread_info { __u32 status; /* thread synchronous flags */ __u32 cpu; /* current CPU */ int saved_preempt_count; + int preempt_lazy_count; /* 0 => lazy preemptable + <0 => BUG */ mm_segment_t addr_limit; void __user *sysenter_return; unsigned int sig_on_uaccess_error:1; @@ -95,6 +97,7 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ @@ -119,6 +122,7 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_NOTSC (1 << TIF_NOTSC) @@ -168,6 +172,8 @@ struct thread_info { #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) + #define STACK_WARN (THREAD_SIZE/8) /* diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 81782dc54193..c64568fd41d5 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -74,7 +74,8 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un * @addr: User space pointer to start of block to check * @size: Size of block to check * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Checks if a pointer to a block of memory in user space is valid. * @@ -145,7 +146,8 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger @@ -240,7 +242,8 @@ extern void __put_user_8(void); * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger @@ -459,7 +462,8 @@ struct __large_struct { unsigned long buf[100]; }; * @x: Variable to store result. * @ptr: Source address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple variable from user space to kernel * space. It supports simple types like char and int, but not larger @@ -483,7 +487,8 @@ struct __large_struct { unsigned long buf[100]; }; * @x: Value to copy to user space. * @ptr: Destination address, in user space. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * This macro copies a single simple value from kernel space to user * space. It supports simple types like char and int, but not larger diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 3c03a5de64d3..7c8ad3451988 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -70,7 +70,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) * @from: Source address, in kernel space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from kernel space to user space. Caller must check * the specified block with access_ok() before calling this function. @@ -117,7 +118,8 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) * @from: Source address, in user space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from user space to kernel space. Caller must check * the specified block with access_ok() before calling this function. diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index fc808b83fccb..ebb40118abf5 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -615,9 +615,9 @@ struct bau_control { cycles_t send_message; cycles_t period_end; cycles_t period_time; - spinlock_t uvhub_lock; - spinlock_t queue_lock; - spinlock_t disable_lock; + raw_spinlock_t uvhub_lock; + raw_spinlock_t queue_lock; + raw_spinlock_t disable_lock; /* tunables */ int max_concurr; int max_concurr_const; @@ -776,15 +776,15 @@ static inline int atom_asr(short i, struct atomic_short *v) * to be lowered below the current 'v'. atomic_add_unless can only stop * on equal. */ -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u) { - spin_lock(lock); + raw_spin_lock(lock); if (atomic_read(v) >= u) { - spin_unlock(lock); + raw_spin_unlock(lock); return 0; } atomic_inc(v); - spin_unlock(lock); + raw_spin_unlock(lock); return 1; } diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index a00ad8f2a657..c2729abe02bc 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -492,7 +492,7 @@ struct uv_blade_info { unsigned short nr_online_cpus; unsigned short pnode; short memory_nid; - spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */ + raw_spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */ unsigned long nmi_count; /* obsolete, see uv_hub_nmi */ }; extern struct uv_blade_info *uv_blade_info; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f4dc2462a1ac..07c6aba75aa0 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1891,7 +1891,8 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) { /* If we are moving the irq we need to mask it */ - if (unlikely(irqd_is_setaffinity_pending(data))) { + if (unlikely(irqd_is_setaffinity_pending(data) && + !irqd_irq_inprogress(data))) { mask_ioapic(cfg); return true; } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c8d92950bc04..3d2fbca33b73 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -949,7 +949,7 @@ void __init uv_system_init(void) uv_blade_info[blade].pnode = pnode; uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; - spin_lock_init(&uv_blade_info[blade].nmi_lock); + raw_spin_lock_init(&uv_blade_info[blade].nmi_lock); min_pnode = min(pnode, min_pnode); max_pnode = max(pnode, max_pnode); blade++; diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 9f6b9341950f..5701b507510b 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -32,6 +32,7 @@ void common(void) { OFFSET(TI_flags, thread_info, flags); OFFSET(TI_status, thread_info, status); OFFSET(TI_addr_limit, thread_info, addr_limit); + OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count); BLANK(); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); @@ -71,4 +72,5 @@ void common(void) { BLANK(); DEFINE(PTREGS_SIZE, sizeof(struct pt_regs)); + DEFINE(_PREEMPT_ENABLED, PREEMPT_ENABLED); } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 20190bdac9d5..9d46f9a133e1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -41,6 +41,8 @@ #include <linux/debugfs.h> #include <linux/irq_work.h> #include <linux/export.h> +#include <linux/jiffies.h> +#include <linux/work-simple.h> #include <asm/processor.h> #include <asm/traps.h> @@ -1267,7 +1269,7 @@ void mce_log_therm_throt_event(__u64 status) static unsigned long check_interval = INITIAL_CHECK_INTERVAL; static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ -static DEFINE_PER_CPU(struct timer_list, mce_timer); +static DEFINE_PER_CPU(struct hrtimer, mce_timer); static unsigned long mce_adjust_timer_default(unsigned long interval) { @@ -1276,32 +1278,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; -static void __restart_timer(struct timer_list *t, unsigned long interval) +static enum hrtimer_restart __restart_timer(struct hrtimer *timer, unsigned long interval) { - unsigned long when = jiffies + interval; - unsigned long flags; - - local_irq_save(flags); - - if (timer_pending(t)) { - if (time_before(when, t->expires)) - mod_timer_pinned(t, when); - } else { - t->expires = round_jiffies(when); - add_timer_on(t, smp_processor_id()); - } - - local_irq_restore(flags); + if (!interval) + return HRTIMER_NORESTART; + hrtimer_forward_now(timer, ns_to_ktime(jiffies_to_nsecs(interval))); + return HRTIMER_RESTART; } -static void mce_timer_fn(unsigned long data) +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) { - struct timer_list *t = this_cpu_ptr(&mce_timer); - int cpu = smp_processor_id(); unsigned long iv; - WARN_ON(cpu != data); - iv = __this_cpu_read(mce_next_interval); if (mce_available(this_cpu_ptr(&cpu_info))) { @@ -1324,7 +1312,7 @@ static void mce_timer_fn(unsigned long data) done: __this_cpu_write(mce_next_interval, iv); - __restart_timer(t, iv); + return __restart_timer(timer, iv); } /* @@ -1332,7 +1320,7 @@ done: */ void mce_timer_kick(unsigned long interval) { - struct timer_list *t = this_cpu_ptr(&mce_timer); + struct hrtimer *t = this_cpu_ptr(&mce_timer); unsigned long iv = __this_cpu_read(mce_next_interval); __restart_timer(t, interval); @@ -1347,7 +1335,7 @@ static void mce_timer_delete_all(void) int cpu; for_each_online_cpu(cpu) - del_timer_sync(&per_cpu(mce_timer, cpu)); + hrtimer_cancel(&per_cpu(mce_timer, cpu)); } static void mce_do_trigger(struct work_struct *work) @@ -1357,6 +1345,56 @@ static void mce_do_trigger(struct work_struct *work) static DECLARE_WORK(mce_trigger_work, mce_do_trigger); +static void __mce_notify_work(struct swork_event *event) +{ + /* Not more than two messages every minute */ + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); + + /* wake processes polling /dev/mcelog */ + wake_up_interruptible(&mce_chrdev_wait); + + /* + * There is no risk of missing notifications because + * work_pending is always cleared before the function is + * executed. + */ + if (mce_helper[0] && !work_pending(&mce_trigger_work)) + schedule_work(&mce_trigger_work); + + if (__ratelimit(&ratelimit)) + pr_info(HW_ERR "Machine check events logged\n"); +} + +#ifdef CONFIG_PREEMPT_RT_FULL +static bool notify_work_ready __read_mostly; +static struct swork_event notify_work; + +static int mce_notify_work_init(void) +{ + int err; + + err = swork_get(); + if (err) + return err; + + INIT_SWORK(¬ify_work, __mce_notify_work); + notify_work_ready = true; + return 0; +} + +static void mce_notify_work(void) +{ + if (notify_work_ready) + swork_queue(¬ify_work); +} +#else +static void mce_notify_work(void) +{ + __mce_notify_work(NULL); +} +static inline int mce_notify_work_init(void) { return 0; } +#endif + /* * Notify the user(s) about new machine check events. * Can be called from interrupt context, but not from machine check/NMI @@ -1364,19 +1402,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger); */ int mce_notify_irq(void) { - /* Not more than two messages every minute */ - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - if (test_and_clear_bit(0, &mce_need_notify)) { - /* wake processes polling /dev/mcelog */ - wake_up_interruptible(&mce_chrdev_wait); - - if (mce_helper[0]) - schedule_work(&mce_trigger_work); - - if (__ratelimit(&ratelimit)) - pr_info(HW_ERR "Machine check events logged\n"); - + mce_notify_work(); return 1; } return 0; @@ -1649,7 +1676,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) } } -static void mce_start_timer(unsigned int cpu, struct timer_list *t) +static void mce_start_timer(unsigned int cpu, struct hrtimer *t) { unsigned long iv = check_interval * HZ; @@ -1658,16 +1685,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) per_cpu(mce_next_interval, cpu) = iv; - t->expires = round_jiffies(jiffies + iv); - add_timer_on(t, cpu); + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL), + 0, HRTIMER_MODE_REL_PINNED); } static void __mcheck_cpu_init_timer(void) { - struct timer_list *t = this_cpu_ptr(&mce_timer); + struct hrtimer *t = this_cpu_ptr(&mce_timer); unsigned int cpu = smp_processor_id(); - setup_timer(t, mce_timer_fn, cpu); + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + t->function = mce_timer_fn; mce_start_timer(cpu, t); } @@ -2345,6 +2373,8 @@ static void mce_disable_cpu(void *h) if (!mce_available(raw_cpu_ptr(&cpu_info))) return; + hrtimer_cancel(this_cpu_ptr(&mce_timer)); + if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); for (i = 0; i < mca_cfg.banks; i++) { @@ -2371,6 +2401,7 @@ static void mce_reenable_cpu(void *h) if (b->init) wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); } + __mcheck_cpu_init_timer(); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ @@ -2378,7 +2409,6 @@ static int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct timer_list *t = &per_cpu(mce_timer, cpu); switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: @@ -2398,11 +2428,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_DOWN_PREPARE: smp_call_function_single(cpu, mce_disable_cpu, &action, 1); - del_timer_sync(t); break; case CPU_DOWN_FAILED: smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); - mce_start_timer(cpu, t); break; } @@ -2441,6 +2469,10 @@ static __init int mcheck_init_device(void) goto err_out; } + err = mce_notify_work_init(); + if (err) + goto err_out; + if (!zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL)) { err = -ENOMEM; goto err_out; diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 358c54ad20d4..94689f19ad92 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -119,7 +119,7 @@ static struct perf_pmu_events_attr event_attr_##v = { \ }; struct rapl_pmu { - spinlock_t lock; + raw_spinlock_t lock; int n_active; /* number of active events */ struct list_head active_list; struct pmu *pmu; /* pointer to rapl_pmu_class */ @@ -223,13 +223,13 @@ static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) if (!pmu->n_active) return HRTIMER_NORESTART; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); list_for_each_entry(event, &pmu->active_list, active_entry) { rapl_event_update(event); } - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); hrtimer_forward_now(hrtimer, pmu->timer_interval); @@ -266,9 +266,9 @@ static void rapl_pmu_event_start(struct perf_event *event, int mode) struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); unsigned long flags; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); __rapl_pmu_event_start(pmu, event); - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); } static void rapl_pmu_event_stop(struct perf_event *event, int mode) @@ -277,7 +277,7 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) struct hw_perf_event *hwc = &event->hw; unsigned long flags; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); /* mark event as deactivated and stopped */ if (!(hwc->state & PERF_HES_STOPPED)) { @@ -302,7 +302,7 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) hwc->state |= PERF_HES_UPTODATE; } - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); } static int rapl_pmu_event_add(struct perf_event *event, int mode) @@ -311,14 +311,14 @@ static int rapl_pmu_event_add(struct perf_event *event, int mode) struct hw_perf_event *hwc = &event->hw; unsigned long flags; - spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&pmu->lock, flags); hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (mode & PERF_EF_START) __rapl_pmu_event_start(pmu, event); - spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&pmu->lock, flags); return 0; } @@ -594,7 +594,7 @@ static int rapl_cpu_prepare(int cpu) pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); if (!pmu) return -1; - spin_lock_init(&pmu->lock); + raw_spin_lock_init(&pmu->lock); INIT_LIST_HEAD(&pmu->active_list); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 464ffd69b92e..00db1aad1548 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -42,7 +42,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) { - const unsigned cpu = get_cpu(); + const unsigned cpu = get_cpu_light(); int graph = 0; u32 *prev_esp; @@ -86,7 +86,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, break; touch_nmi_watchdog(); } - put_cpu(); + put_cpu_light(); } EXPORT_SYMBOL(dump_trace); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 5f1c6266eb30..c331e3fef465 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -152,7 +152,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) { - const unsigned cpu = get_cpu(); + const unsigned cpu = get_cpu_light(); struct thread_info *tinfo; unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); unsigned long dummy; @@ -241,7 +241,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, * This handles the process stack: */ bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); - put_cpu(); + put_cpu_light(); } EXPORT_SYMBOL(dump_trace); @@ -255,7 +255,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, int cpu; int i; - preempt_disable(); + migrate_disable(); cpu = smp_processor_id(); irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); @@ -291,7 +291,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, pr_cont(" %016lx", *stack++); touch_nmi_watchdog(); } - preempt_enable(); + migrate_enable(); pr_cont("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 1c309763e321..8612b314af92 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -359,8 +359,24 @@ END(ret_from_exception) ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) need_resched: + # preempt count == 0 + NEED_RS set? cmpl $0,PER_CPU_VAR(__preempt_count) +#ifndef CONFIG_PREEMPT_LAZY jnz restore_all +#else + jz test_int_off + + # atleast preempt count == 0 ? + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) + jne restore_all + + cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? + jnz restore_all + + testl $_TIF_NEED_RESCHED_LAZY, TI_flags(%ebp) + jz restore_all +test_int_off: +#endif testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq @@ -594,7 +610,7 @@ ENDPROC(system_call) ALIGN RING0_PTREGS_FRAME # can't unwind into user space anyway work_pending: - testb $_TIF_NEED_RESCHED, %cl + testl $_TIF_NEED_RESCHED_MASK, %ecx jz work_notifysig work_resched: call schedule @@ -607,7 +623,7 @@ work_resched: andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all - testb $_TIF_NEED_RESCHED, %cl + testl $_TIF_NEED_RESCHED_MASK, %ecx jnz work_resched work_notifysig: # deal with pending signals and diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 6c9cb6073832..db2a15c91a65 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -370,8 +370,8 @@ GLOBAL(int_with_check) /* First do a reschedule test. */ /* edx: work, edi: workmask */ int_careful: - bt $TIF_NEED_RESCHED,%edx - jnc int_very_careful + testl $_TIF_NEED_RESCHED_MASK,%edx + jz int_very_careful TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi @@ -776,7 +776,23 @@ retint_kernel: bt $9,EFLAGS(%rsp) /* interrupts were off? */ jnc 1f 0: cmpl $0,PER_CPU_VAR(__preempt_count) +#ifndef CONFIG_PREEMPT_LAZY jnz 1f +#else + jz do_preempt_schedule_irq + + # atleast preempt count == 0 ? + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) + jnz 1f + + GET_THREAD_INFO(%rcx) + cmpl $0, TI_preempt_lazy_count(%rcx) + jnz 1f + + bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx) + jnc 1f +do_preempt_schedule_irq: +#endif call preempt_schedule_irq jmp 0b 1: @@ -844,8 +860,8 @@ native_irq_return_ldt: /* edi: workmask, edx: work */ retint_careful: CFI_RESTORE_STATE - bt $TIF_NEED_RESCHED,%edx - jnc retint_signal + testl $_TIF_NEED_RESCHED_MASK,%edx + jz retint_signal TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi @@ -1118,6 +1134,7 @@ bad_gs: jmp 2b .previous +#ifndef CONFIG_PREEMPT_RT_FULL /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(do_softirq_own_stack) CFI_STARTPROC @@ -1137,6 +1154,7 @@ ENTRY(do_softirq_own_stack) ret CFI_ENDPROC END(do_softirq_own_stack) +#endif #ifdef CONFIG_XEN idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index f9fd86a7fcc7..521ef3cc8066 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -135,6 +135,7 @@ void irq_ctx_init(int cpu) cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); } +#ifndef CONFIG_PREEMPT_RT_FULL void do_softirq_own_stack(void) { struct thread_info *curstk; @@ -153,6 +154,7 @@ void do_softirq_own_stack(void) call_on_stack(__do_softirq, isp); } +#endif bool handle_irq(unsigned irq, struct pt_regs *regs) { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9435620062df..ba97b5b45879 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -36,6 +36,7 @@ #include <linux/kprobes.h> #include <linux/debugfs.h> #include <linux/nmi.h> +#include <linux/wait-simple.h> #include <asm/timer.h> #include <asm/cpu.h> #include <asm/traps.h> @@ -91,14 +92,14 @@ static void kvm_io_delay(void) struct kvm_task_sleep_node { struct hlist_node link; - wait_queue_head_t wq; + struct swait_head wq; u32 token; int cpu; bool halted; }; static struct kvm_task_sleep_head { - spinlock_t lock; + raw_spinlock_t lock; struct hlist_head list; } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE]; @@ -122,17 +123,17 @@ void kvm_async_pf_task_wait(u32 token) u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; struct kvm_task_sleep_node n, *e; - DEFINE_WAIT(wait); + DEFINE_SWAITER(wait); rcu_irq_enter(); - spin_lock(&b->lock); + raw_spin_lock(&b->lock); e = _find_apf_task(b, token); if (e) { /* dummy entry exist -> wake up was delivered ahead of PF */ hlist_del(&e->link); kfree(e); - spin_unlock(&b->lock); + raw_spin_unlock(&b->lock); rcu_irq_exit(); return; @@ -141,13 +142,13 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); n.halted = is_idle_task(current) || preempt_count() > 1; - init_waitqueue_head(&n.wq); + init_swait_head(&n.wq); hlist_add_head(&n.link, &b->list); - spin_unlock(&b->lock); + raw_spin_unlock(&b->lock); for (;;) { if (!n.halted) - prepare_to_wait(&n.wq, &wait, TASK_UNINTERRUPTIBLE); + swait_prepare(&n.wq, &wait, TASK_UNINTERRUPTIBLE); if (hlist_unhashed(&n.link)) break; @@ -166,7 +167,7 @@ void kvm_async_pf_task_wait(u32 token) } } if (!n.halted) - finish_wait(&n.wq, &wait); + swait_finish(&n.wq, &wait); rcu_irq_exit(); return; @@ -178,8 +179,8 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n) hlist_del_init(&n->link); if (n->halted) smp_send_reschedule(n->cpu); - else if (waitqueue_active(&n->wq)) - wake_up(&n->wq); + else if (swaitqueue_active(&n->wq)) + swait_wake(&n->wq); } static void apf_task_wake_all(void) @@ -189,14 +190,14 @@ static void apf_task_wake_all(void) for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) { struct hlist_node *p, *next; struct kvm_task_sleep_head *b = &async_pf_sleepers[i]; - spin_lock(&b->lock); + raw_spin_lock(&b->lock); hlist_for_each_safe(p, next, &b->list) { struct kvm_task_sleep_node *n = hlist_entry(p, typeof(*n), link); if (n->cpu == smp_processor_id()) apf_task_wake_one(n); } - spin_unlock(&b->lock); + raw_spin_unlock(&b->lock); } } @@ -212,7 +213,7 @@ void kvm_async_pf_task_wake(u32 token) } again: - spin_lock(&b->lock); + raw_spin_lock(&b->lock); n = _find_apf_task(b, token); if (!n) { /* @@ -225,17 +226,17 @@ again: * Allocation failed! Busy wait while other cpu * handles async PF. */ - spin_unlock(&b->lock); + raw_spin_unlock(&b->lock); cpu_relax(); goto again; } n->token = token; n->cpu = smp_processor_id(); - init_waitqueue_head(&n->wq); + init_swait_head(&n->wq); hlist_add_head(&n->link, &b->list); } else apf_task_wake_one(n); - spin_unlock(&b->lock); + raw_spin_unlock(&b->lock); return; } EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); @@ -486,7 +487,7 @@ void __init kvm_guest_init(void) paravirt_ops_setup(); register_reboot_notifier(&kvm_pv_reboot_nb); for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) - spin_lock_init(&async_pf_sleepers[i].lock); + raw_spin_lock_init(&async_pf_sleepers[i].lock); if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) x86_init.irqs.trap_init = kvm_apf_trap_init; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8ed2106b06da..3a70713079c5 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -35,6 +35,7 @@ #include <linux/uaccess.h> #include <linux/io.h> #include <linux/kdebug.h> +#include <linux/highmem.h> #include <asm/pgtable.h> #include <asm/ldt.h> @@ -210,6 +211,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); +#ifdef CONFIG_PREEMPT_RT_FULL +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) +{ + int i; + + /* + * Clear @prev's kmap_atomic mappings + */ + for (i = 0; i < prev_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + pte_t *ptep = kmap_pte - idx; + + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); + } + /* + * Restore @next_p's kmap_atomic mappings + */ + for (i = 0; i < next_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + + if (!pte_none(next_p->kmap_pte[i])) + set_pte(kmap_pte - idx, next_p->kmap_pte[i]); + } +} +#else +static inline void +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } +#endif + /* * switch_to(x,y) should switch tasks from x to y. @@ -292,6 +322,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); + switch_kmaps(prev_p, next_p); + /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 5d2e2e9af1c4..1c9cc74ba99b 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -726,6 +726,14 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) { user_exit(); +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND + if (unlikely(current->forced_info.si_signo)) { + struct task_struct *t = current; + force_sig_info(t->forced_info.si_signo, &t->forced_info, t); + t->forced_info.si_signo = 0; + } +#endif + if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 7dd9a8d3911a..192de5908083 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1106,7 +1106,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic) static void apic_timer_expired(struct kvm_lapic *apic) { struct kvm_vcpu *vcpu = apic->vcpu; - wait_queue_head_t *q = &vcpu->wq; + struct swait_head *q = &vcpu->wq; struct kvm_timer *ktimer = &apic->lapic_timer; if (atomic_read(&apic->lapic_timer.pending)) @@ -1115,8 +1115,8 @@ static void apic_timer_expired(struct kvm_lapic *apic) atomic_inc(&apic->lapic_timer.pending); kvm_set_pending_timer(vcpu); - if (waitqueue_active(q)) - wake_up_interruptible(q); + if (swaitqueue_active(q)) + swait_wake_interruptible(q); if (apic_lvtt_tscdeadline(apic)) ktimer->expired_tscdeadline = ktimer->tscdeadline; @@ -1169,8 +1169,36 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu) __delay(tsc_deadline - guest_tsc); } +static enum hrtimer_restart apic_timer_fn(struct hrtimer *data); + +static void __apic_timer_expired(struct hrtimer *data) +{ + int ret, i = 0; + enum hrtimer_restart r; + struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); + + r = apic_timer_fn(data); + + if (r == HRTIMER_RESTART) { + do { + ret = hrtimer_start_expires(data, HRTIMER_MODE_ABS); + if (ret == -ETIME) + hrtimer_add_expires_ns(&ktimer->timer, + ktimer->period); + i++; + } while (ret == -ETIME && i < 10); + + if (ret == -ETIME) { + printk_once(KERN_ERR "%s: failed to reprogram timer\n", + __func__); + WARN_ON_ONCE(1); + } + } +} + static void start_apic_timer(struct kvm_lapic *apic) { + int ret; ktime_t now; atomic_set(&apic->lapic_timer.pending, 0); @@ -1201,9 +1229,11 @@ static void start_apic_timer(struct kvm_lapic *apic) } } - hrtimer_start(&apic->lapic_timer.timer, + ret = hrtimer_start(&apic->lapic_timer.timer, ktime_add_ns(now, apic->lapic_timer.period), HRTIMER_MODE_ABS); + if (ret == -ETIME) + __apic_timer_expired(&apic->lapic_timer.timer); apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" PRIx64 ", " @@ -1235,8 +1265,10 @@ static void start_apic_timer(struct kvm_lapic *apic) do_div(ns, this_tsc_khz); expire = ktime_add_ns(now, ns); expire = ktime_sub_ns(expire, lapic_timer_advance_ns); - hrtimer_start(&apic->lapic_timer.timer, + ret = hrtimer_start(&apic->lapic_timer.timer, expire, HRTIMER_MODE_ABS); + if (ret == -ETIME) + __apic_timer_expired(&apic->lapic_timer.timer); } else apic_timer_expired(apic); @@ -1709,6 +1741,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); apic->lapic_timer.timer.function = apic_timer_fn; + apic->lapic_timer.timer.irqsafe = 1; /* * APIC is created enabled. This will prevent kvm_lapic_set_base from @@ -1836,7 +1869,8 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) timer = &vcpu->arch.apic->lapic_timer.timer; if (hrtimer_cancel(timer)) - hrtimer_start_expires(timer, HRTIMER_MODE_ABS); + if (hrtimer_start_expires(timer, HRTIMER_MODE_ABS) == -ETIME) + __apic_timer_expired(timer); } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c6c09e241885..a740d91b2a69 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5815,6 +5815,13 @@ int kvm_arch_init(void *opaque) goto out; } +#ifdef CONFIG_PREEMPT_RT_FULL + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n"); + return -EOPNOTSUPP; + } +#endif + r = kvm_mmu_module_init(); if (r) goto out_free_percpu; diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index e2f5e21c03b3..91d93b95bd86 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -647,7 +647,8 @@ EXPORT_SYMBOL(__copy_from_user_ll_nocache_nozero); * @from: Source address, in kernel space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from kernel space to user space. * @@ -668,7 +669,8 @@ EXPORT_SYMBOL(_copy_to_user); * @from: Source address, in user space. * @n: Number of bytes to copy. * - * Context: User context only. This function may sleep. + * Context: User context only. This function may sleep if pagefaults are + * enabled. * * Copy data from user space to kernel space. * diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 27bc31f0da52..4eb6b9c6648a 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -13,6 +13,7 @@ #include <linux/hugetlb.h> /* hstate_index_to_shift */ #include <linux/prefetch.h> /* prefetchw */ #include <linux/context_tracking.h> /* exception_enter(), ... */ +#include <linux/uaccess.h> /* faulthandler_disabled() */ #include <asm/traps.h> /* dotraplinkage, ... */ #include <asm/pgalloc.h> /* pgd_*(), ... */ @@ -1133,9 +1134,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, /* * If we're in an interrupt, have no user context or are running - * in an atomic region then we must not take the fault: + * in a region with pagefaults disabled then we must not take the fault */ - if (unlikely(in_atomic() || !mm)) { + if (unlikely(faulthandler_disabled() || !mm)) { bad_area_nosemaphore(regs, error_code, address); return; } diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 4500142bc4aa..0d1cbcf47f80 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -32,10 +32,11 @@ EXPORT_SYMBOL(kunmap); */ void *kmap_atomic_prot(struct page *page, pgprot_t prot) { + pte_t pte = mk_pte(page, prot); unsigned long vaddr; int idx, type; - /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ + preempt_disable_nort(); pagefault_disable(); if (!PageHighMem(page)) @@ -45,7 +46,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); - set_pte(kmap_pte-idx, mk_pte(page, prot)); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_pte(kmap_pte-idx, pte); arch_flush_lazy_mmu_mode(); return (void *)vaddr; @@ -88,6 +92,9 @@ void __kunmap_atomic(void *kvaddr) * is a bad idea also, in case the page changes cacheability * attributes or becomes a protected page in a hypervisor. */ +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = __pte(0); +#endif kpte_clear_flush(kmap_pte-idx, vaddr); kmap_atomic_idx_pop(); arch_flush_lazy_mmu_mode(); @@ -100,6 +107,7 @@ void __kunmap_atomic(void *kvaddr) #endif pagefault_enable(); + preempt_enable_nort(); } EXPORT_SYMBOL(__kunmap_atomic); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 9ca35fc60cfe..b2ffa5c7d3d3 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -56,15 +56,22 @@ EXPORT_SYMBOL_GPL(iomap_free); void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) { + pte_t pte = pfn_pte(pfn, prot); unsigned long vaddr; int idx, type; + preempt_disable(); pagefault_disable(); type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); + WARN_ON(!pte_none(*(kmap_pte - idx))); + +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_pte(kmap_pte - idx, pte); arch_flush_lazy_mmu_mode(); return (void *)vaddr; @@ -112,10 +119,14 @@ iounmap_atomic(void __iomem *kvaddr) * is a bad idea also, in case the page changes cacheability * attributes or becomes a protected page in a hypervisor. */ +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = __pte(0); +#endif kpte_clear_flush(kmap_pte-idx, vaddr); kmap_atomic_idx_pop(); } pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL_GPL(iounmap_atomic); diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 3b6ec42718e4..7871083de089 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -714,9 +714,9 @@ static void destination_plugged(struct bau_desc *bau_desc, quiesce_local_uvhub(hmaster); - spin_lock(&hmaster->queue_lock); + raw_spin_lock(&hmaster->queue_lock); reset_with_ipi(&bau_desc->distribution, bcp); - spin_unlock(&hmaster->queue_lock); + raw_spin_unlock(&hmaster->queue_lock); end_uvhub_quiesce(hmaster); @@ -736,9 +736,9 @@ static void destination_timeout(struct bau_desc *bau_desc, quiesce_local_uvhub(hmaster); - spin_lock(&hmaster->queue_lock); + raw_spin_lock(&hmaster->queue_lock); reset_with_ipi(&bau_desc->distribution, bcp); - spin_unlock(&hmaster->queue_lock); + raw_spin_unlock(&hmaster->queue_lock); end_uvhub_quiesce(hmaster); @@ -759,7 +759,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) cycles_t tm1; hmaster = bcp->uvhub_master; - spin_lock(&hmaster->disable_lock); + raw_spin_lock(&hmaster->disable_lock); if (!bcp->baudisabled) { stat->s_bau_disabled++; tm1 = get_cycles(); @@ -772,7 +772,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) } } } - spin_unlock(&hmaster->disable_lock); + raw_spin_unlock(&hmaster->disable_lock); } static void count_max_concurr(int stat, struct bau_control *bcp, @@ -835,7 +835,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2, */ static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) { - spinlock_t *lock = &hmaster->uvhub_lock; + raw_spinlock_t *lock = &hmaster->uvhub_lock; atomic_t *v; v = &hmaster->active_descriptor_count; @@ -968,7 +968,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) struct bau_control *hmaster; hmaster = bcp->uvhub_master; - spin_lock(&hmaster->disable_lock); + raw_spin_lock(&hmaster->disable_lock); if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { stat->s_bau_reenabled++; for_each_present_cpu(tcpu) { @@ -980,10 +980,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) tbcp->period_giveups = 0; } } - spin_unlock(&hmaster->disable_lock); + raw_spin_unlock(&hmaster->disable_lock); return 0; } - spin_unlock(&hmaster->disable_lock); + raw_spin_unlock(&hmaster->disable_lock); return -1; } @@ -1901,9 +1901,9 @@ static void __init init_per_cpu_tunables(void) bcp->cong_reps = congested_reps; bcp->disabled_period = sec_2_cycles(disabled_period); bcp->giveup_limit = giveup_limit; - spin_lock_init(&bcp->queue_lock); - spin_lock_init(&bcp->uvhub_lock); - spin_lock_init(&bcp->disable_lock); + raw_spin_lock_init(&bcp->queue_lock); + raw_spin_lock_init(&bcp->uvhub_lock); + raw_spin_lock_init(&bcp->disable_lock); } } diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index a244237f3cfa..a718fe0d2e73 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -58,7 +58,7 @@ static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); /* There is one of these allocated per node */ struct uv_rtc_timer_head { - spinlock_t lock; + raw_spinlock_t lock; /* next cpu waiting for timer, local node relative: */ int next_cpu; /* number of cpus on this node: */ @@ -178,7 +178,7 @@ static __init int uv_rtc_allocate_timers(void) uv_rtc_deallocate_timers(); return -ENOMEM; } - spin_lock_init(&head->lock); + raw_spin_lock_init(&head->lock); head->ncpus = uv_blade_nr_possible_cpus(bid); head->next_cpu = -1; blade_info[bid] = head; @@ -232,7 +232,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) unsigned long flags; int next_cpu; - spin_lock_irqsave(&head->lock, flags); + raw_spin_lock_irqsave(&head->lock, flags); next_cpu = head->next_cpu; *t = expires; @@ -244,12 +244,12 @@ static int uv_rtc_set_timer(int cpu, u64 expires) if (uv_setup_intr(cpu, expires)) { *t = ULLONG_MAX; uv_rtc_find_next_timer(head, pnode); - spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock_irqrestore(&head->lock, flags); return -ETIME; } } - spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock_irqrestore(&head->lock, flags); return 0; } @@ -268,7 +268,7 @@ static int uv_rtc_unset_timer(int cpu, int force) unsigned long flags; int rc = 0; - spin_lock_irqsave(&head->lock, flags); + raw_spin_lock_irqsave(&head->lock, flags); if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) rc = 1; @@ -280,7 +280,7 @@ static int uv_rtc_unset_timer(int cpu, int force) uv_rtc_find_next_timer(head, pnode); } - spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock_irqrestore(&head->lock, flags); return rc; } @@ -300,13 +300,18 @@ static int uv_rtc_unset_timer(int cpu, int force) static cycle_t uv_read_rtc(struct clocksource *cs) { unsigned long offset; + cycle_t cycles; + preempt_disable(); if (uv_get_min_hub_revision_id() == 1) offset = 0; else offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset); + preempt_enable(); + + return cycles; } /* diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 9e3571a6535c..83a44a33cfa1 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -15,10 +15,10 @@ #include <linux/mm.h> #include <linux/module.h> #include <linux/hardirq.h> +#include <linux/uaccess.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> #include <asm/hardirq.h> -#include <asm/uaccess.h> #include <asm/pgalloc.h> DEFINE_PER_CPU(unsigned long, asid_cache) = ASID_USER_FIRST; @@ -57,7 +57,7 @@ void do_page_fault(struct pt_regs *regs) /* If we're in an interrupt or have no user * context, we must not take the fault.. */ - if (in_atomic() || !mm) { + if (faulthandler_disabled() || !mm) { bad_page_fault(regs, address, SIGSEGV); return; } diff --git a/arch/xtensa/mm/highmem.c b/arch/xtensa/mm/highmem.c index 8cfb71ec0937..184ceadccc1a 100644 --- a/arch/xtensa/mm/highmem.c +++ b/arch/xtensa/mm/highmem.c @@ -42,6 +42,7 @@ void *kmap_atomic(struct page *page) enum fixed_addresses idx; unsigned long vaddr; + preempt_disable(); pagefault_disable(); if (!PageHighMem(page)) return page_address(page); @@ -79,6 +80,7 @@ void __kunmap_atomic(void *kvaddr) } pagefault_enable(); + preempt_enable(); } EXPORT_SYMBOL(__kunmap_atomic); |