diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 10 | ||||
-rw-r--r-- | arch/x86/crypto/aesni-intel_glue.c | 24 | ||||
-rw-r--r-- | arch/x86/crypto/cast5_avx_glue.c | 21 | ||||
-rw-r--r-- | arch/x86/crypto/glue_helper.c | 31 | ||||
-rw-r--r-- | arch/x86/include/asm/signal.h | 13 | ||||
-rw-r--r-- | arch/x86/include/asm/stackprotector.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/thread_info.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/asm-offsets.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 131 | ||||
-rw-r--r-- | arch/x86/kernel/entry_32.S | 18 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 26 | ||||
-rw-r--r-- | arch/x86/kernel/irq_32.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/irq_64.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/irq_work.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 32 | ||||
-rw-r--r-- | arch/x86/kernel/signal.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 32 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/highmem_32.c | 9 | ||||
-rw-r--r-- | arch/x86/mm/iomap_32.c | 11 |
22 files changed, 295 insertions, 107 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 78592b17a259..c212286025b6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -121,6 +121,7 @@ config X86 select OLD_SIGACTION if X86_32 select COMPAT_OLD_SIGACTION if IA32_EMULATION select RTC_LIB + select HAVE_PREEMPT_LAZY select ARCH_SUPPORTS_ATOMIC_RMW config INSTRUCTION_DECODER @@ -178,8 +179,11 @@ config ARCH_MAY_HAVE_PC_FDC def_bool y depends on ISA_DMA_API +config RWSEM_GENERIC_SPINLOCK + def_bool PREEMPT_RT_FULL + config RWSEM_XCHGADD_ALGORITHM - def_bool y + def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL config GENERIC_CALIBRATE_DELAY def_bool y @@ -467,7 +471,7 @@ config X86_MDFLD select MFD_INTEL_MSIC ---help--- Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin - Internet Device(MID) platform. + Internet Device(MID) platform. Unlike standard x86 PCs, Medfield does not have many legacy devices nor standard legacy replacement devices/features. e.g. Medfield does not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. @@ -803,7 +807,7 @@ config IOMMU_HELPER config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL - select CPUMASK_OFFSTACK + select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL ---help--- Enable maximum number of CPUS and NUMA Nodes for this architecture. If unsure, say N. diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index f80e668785c0..3fbe870b5c97 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -252,14 +252,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, - nbytes & AES_BLOCK_MASK); + nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -276,14 +276,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -300,14 +300,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -324,14 +324,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt(desc, &walk); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes)) { + kernel_fpu_begin(); aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } - kernel_fpu_end(); return err; } @@ -364,18 +364,20 @@ static int ctr_crypt(struct blkcipher_desc *desc, err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE); desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; - kernel_fpu_begin(); while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { + kernel_fpu_begin(); aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, nbytes & AES_BLOCK_MASK, walk.iv); + kernel_fpu_end(); nbytes &= AES_BLOCK_SIZE - 1; err = blkcipher_walk_done(desc, &walk, nbytes); } if (walk.nbytes) { + kernel_fpu_begin(); ctr_crypt_final(ctx, &walk); + kernel_fpu_end(); err = blkcipher_walk_done(desc, &walk, 0); } - kernel_fpu_end(); return err; } diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index c6631813dc11..2d48e83f00d3 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -60,7 +60,7 @@ static inline void cast5_fpu_end(bool fpu_enabled) static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, bool enc) { - bool fpu_enabled = false; + bool fpu_enabled; struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = CAST5_BLOCK_SIZE; unsigned int nbytes; @@ -76,7 +76,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, u8 *wsrc = walk->src.virt.addr; u8 *wdst = walk->dst.virt.addr; - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(false, nbytes); /* Process multi-block batch */ if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { @@ -104,10 +104,9 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk, } while (nbytes >= bsize); done: + cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, walk, nbytes); } - - cast5_fpu_end(fpu_enabled); return err; } @@ -231,7 +230,7 @@ done: static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -240,12 +239,11 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes)) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(false, nbytes); nbytes = __cbc_decrypt(desc, &walk); + cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - - cast5_fpu_end(fpu_enabled); return err; } @@ -315,7 +313,7 @@ done: static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -324,13 +322,12 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { - fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes); + fpu_enabled = cast5_fpu_begin(false, nbytes); nbytes = __ctr_crypt(desc, &walk); + cast5_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - cast5_fpu_end(fpu_enabled); - if (walk.nbytes) { ctr_crypt_final(desc, &walk); err = blkcipher_walk_done(desc, &walk, 0); diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c index 432f1d76ceb8..4a2bd21c2137 100644 --- a/arch/x86/crypto/glue_helper.c +++ b/arch/x86/crypto/glue_helper.c @@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, void *ctx = crypto_blkcipher_ctx(desc->tfm); const unsigned int bsize = 128 / 8; unsigned int nbytes, i, func_bytes; - bool fpu_enabled = false; + bool fpu_enabled; int err; err = blkcipher_walk_virt(desc, walk); @@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, u8 *wdst = walk->dst.virt.addr; fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); + desc, false, nbytes); for (i = 0; i < gctx->num_funcs; i++) { func_bytes = bsize * gctx->funcs[i].num_blocks; @@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx, } done: + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, walk, nbytes); } - glue_fpu_end(fpu_enabled); return err; } @@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, struct scatterlist *src, unsigned int nbytes) { const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx, while ((nbytes = walk.nbytes)) { fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); + desc, false, nbytes); nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk); + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - glue_fpu_end(fpu_enabled); return err; } EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit); @@ -278,7 +278,7 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, struct scatterlist *src, unsigned int nbytes) { const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -287,13 +287,12 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx, while ((nbytes = walk.nbytes) >= bsize) { fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, nbytes); + desc, false, nbytes); nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk); + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); } - glue_fpu_end(fpu_enabled); - if (walk.nbytes) { glue_ctr_crypt_final_128bit( gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk); @@ -348,7 +347,7 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, void *tweak_ctx, void *crypt_ctx) { const unsigned int bsize = 128 / 8; - bool fpu_enabled = false; + bool fpu_enabled; struct blkcipher_walk walk; int err; @@ -361,21 +360,21 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx, /* set minimum length to bsize, for tweak_fn */ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, - desc, fpu_enabled, + desc, false, nbytes < bsize ? bsize : nbytes); - /* calculate first value of T */ tweak_fn(tweak_ctx, walk.iv, walk.iv); + glue_fpu_end(fpu_enabled); while (nbytes) { + fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit, + desc, false, nbytes); nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk); + glue_fpu_end(fpu_enabled); err = blkcipher_walk_done(desc, &walk, nbytes); nbytes = walk.nbytes; } - - glue_fpu_end(fpu_enabled); - return err; } EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit); diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 35e67a457182..6ec0792b3b9f 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -23,6 +23,19 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; +/* + * Because some traps use the IST stack, we must keep preemption + * disabled while calling do_trap(), but do_trap() may call + * force_sig_info() which will grab the signal spin_locks for the + * task, which in PREEMPT_RT_FULL are mutexes. By defining + * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set + * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the + * trap. + */ +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64) +#define ARCH_RT_DELAYS_SIGNAL_SEND +#endif + #ifndef CONFIG_COMPAT typedef sigset_t compat_sigset_t; #endif diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 6a998598f172..64fb5cbe54fa 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -57,7 +57,7 @@ */ static __always_inline void boot_init_stack_canary(void) { - u64 canary; + u64 uninitialized_var(canary); u64 tsc; #ifdef CONFIG_X86_64 @@ -68,8 +68,16 @@ static __always_inline void boot_init_stack_canary(void) * of randomness. The TSC only matters for very early init, * there it already has some randomness on most systems. Later * on during the bootup the random pool has true entropy too. + * + * For preempt-rt we need to weaken the randomness a bit, as + * we can't call into the random generator from atomic context + * due to locking constraints. We just leave canary + * uninitialized and use the TSC based randomness on top of + * it. */ +#ifndef CONFIG_PREEMPT_RT_FULL get_random_bytes(&canary, sizeof(canary)); +#endif tsc = __native_read_tsc(); canary += tsc + (tsc << 32UL); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a1df6e84691f..0365caf31d49 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -30,6 +30,8 @@ struct thread_info { __u32 cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ + int preempt_lazy_count; /* 0 => lazy preemptable, + <0 => BUG */ mm_segment_t addr_limit; struct restart_block restart_block; void __user *sysenter_return; @@ -81,6 +83,7 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ @@ -106,6 +109,7 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) @@ -156,6 +160,8 @@ struct thread_info { #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) +#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY) + #define PREEMPT_ACTIVE 0x10000000 #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9ed796ccc32c..7f116f21c3bf 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2391,7 +2391,8 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg) { /* If we are moving the irq we need to mask it */ - if (unlikely(irqd_is_setaffinity_pending(data))) { + if (unlikely(irqd_is_setaffinity_pending(data) && + !irqd_irq_inprogress(data))) { mask_ioapic(cfg); return true; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 28610822fb3c..a36d9cfb71ea 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -33,6 +33,7 @@ void common(void) { OFFSET(TI_status, thread_info, status); OFFSET(TI_addr_limit, thread_info, addr_limit); OFFSET(TI_preempt_count, thread_info, preempt_count); + OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count); BLANK(); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9239504b41cb..aaf4b9b94f38 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -18,6 +18,7 @@ #include <linux/rcupdate.h> #include <linux/kobject.h> #include <linux/uaccess.h> +#include <linux/kthread.h> #include <linux/kdebug.h> #include <linux/kernel.h> #include <linux/percpu.h> @@ -41,6 +42,7 @@ #include <linux/debugfs.h> #include <linux/irq_work.h> #include <linux/export.h> +#include <linux/jiffies.h> #include <asm/processor.h> #include <asm/mce.h> @@ -1256,7 +1258,7 @@ void mce_log_therm_throt_event(__u64 status) static unsigned long check_interval = 5 * 60; /* 5 minutes */ static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ -static DEFINE_PER_CPU(struct timer_list, mce_timer); +static DEFINE_PER_CPU(struct hrtimer, mce_timer); static unsigned long mce_adjust_timer_default(unsigned long interval) { @@ -1266,13 +1268,10 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; -static void mce_timer_fn(unsigned long data) +static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer) { - struct timer_list *t = &__get_cpu_var(mce_timer); unsigned long iv; - WARN_ON(smp_processor_id() != data); - if (mce_available(__this_cpu_ptr(&cpu_info))) { machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_poll_banks)); @@ -1293,9 +1292,11 @@ static void mce_timer_fn(unsigned long data) __this_cpu_write(mce_next_interval, iv); /* Might have become 0 after CMCI storm subsided */ if (iv) { - t->expires = jiffies + iv; - add_timer_on(t, smp_processor_id()); + hrtimer_forward_now(timer, ns_to_ktime( + jiffies_to_usecs(iv) * 1000ULL)); + return HRTIMER_RESTART; } + return HRTIMER_NORESTART; } /* @@ -1303,28 +1304,37 @@ static void mce_timer_fn(unsigned long data) */ void mce_timer_kick(unsigned long interval) { - struct timer_list *t = &__get_cpu_var(mce_timer); - unsigned long when = jiffies + interval; + struct hrtimer *t = &__get_cpu_var(mce_timer); unsigned long iv = __this_cpu_read(mce_next_interval); - if (timer_pending(t)) { - if (time_before(when, t->expires)) - mod_timer_pinned(t, when); + if (hrtimer_active(t)) { + s64 exp; + s64 intv_us; + + intv_us = jiffies_to_usecs(interval); + exp = ktime_to_us(hrtimer_expires_remaining(t)); + if (intv_us < exp) { + hrtimer_cancel(t); + hrtimer_start_range_ns(t, + ns_to_ktime(intv_us * 1000), + 0, HRTIMER_MODE_REL_PINNED); + } } else { - t->expires = round_jiffies(when); - add_timer_on(t, smp_processor_id()); + hrtimer_start_range_ns(t, + ns_to_ktime(jiffies_to_usecs(interval) * 1000ULL), + 0, HRTIMER_MODE_REL_PINNED); } if (interval < iv) __this_cpu_write(mce_next_interval, interval); } -/* Must not be called in IRQ context where del_timer_sync() can deadlock */ +/* Must not be called in IRQ context where hrtimer_cancel() can deadlock */ static void mce_timer_delete_all(void) { int cpu; for_each_online_cpu(cpu) - del_timer_sync(&per_cpu(mce_timer, cpu)); + hrtimer_cancel(&per_cpu(mce_timer, cpu)); } static void mce_do_trigger(struct work_struct *work) @@ -1334,6 +1344,63 @@ static void mce_do_trigger(struct work_struct *work) static DECLARE_WORK(mce_trigger_work, mce_do_trigger); +static void __mce_notify_work(void) +{ + /* Not more than two messages every minute */ + static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); + + /* wake processes polling /dev/mcelog */ + wake_up_interruptible(&mce_chrdev_wait); + + /* + * There is no risk of missing notifications because + * work_pending is always cleared before the function is + * executed. + */ + if (mce_helper[0] && !work_pending(&mce_trigger_work)) + schedule_work(&mce_trigger_work); + + if (__ratelimit(&ratelimit)) + pr_info(HW_ERR "Machine check events logged\n"); +} + +#ifdef CONFIG_PREEMPT_RT_FULL +struct task_struct *mce_notify_helper; + +static int mce_notify_helper_thread(void *unused) +{ + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + if (kthread_should_stop()) + break; + __mce_notify_work(); + } + return 0; +} + +static int mce_notify_work_init(void) +{ + mce_notify_helper = kthread_run(mce_notify_helper_thread, NULL, + "mce-notify"); + if (!mce_notify_helper) + return -ENOMEM; + + return 0; +} + +static void mce_notify_work(void) +{ + wake_up_process(mce_notify_helper); +} +#else +static void mce_notify_work(void) +{ + __mce_notify_work(); +} +static inline int mce_notify_work_init(void) { return 0; } +#endif + /* * Notify the user(s) about new machine check events. * Can be called from interrupt context, but not from machine check/NMI @@ -1341,19 +1408,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger); */ int mce_notify_irq(void) { - /* Not more than two messages every minute */ - static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); - if (test_and_clear_bit(0, &mce_need_notify)) { - /* wake processes polling /dev/mcelog */ - wake_up_interruptible(&mce_chrdev_wait); - - if (mce_helper[0]) - schedule_work(&mce_trigger_work); - - if (__ratelimit(&ratelimit)) - pr_info(HW_ERR "Machine check events logged\n"); - + mce_notify_work(); return 1; } return 0; @@ -1624,7 +1680,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) } } -static void mce_start_timer(unsigned int cpu, struct timer_list *t) +static void mce_start_timer(unsigned int cpu, struct hrtimer *t) { unsigned long iv = mce_adjust_timer(check_interval * HZ); @@ -1633,16 +1689,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) if (mca_cfg.ignore_ce || !iv) return; - t->expires = round_jiffies(jiffies + iv); - add_timer_on(t, smp_processor_id()); + hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL), + 0, HRTIMER_MODE_REL_PINNED); } static void __mcheck_cpu_init_timer(void) { - struct timer_list *t = &__get_cpu_var(mce_timer); + struct hrtimer *t = &__get_cpu_var(mce_timer); unsigned int cpu = smp_processor_id(); - setup_timer(t, mce_timer_fn, cpu); + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + t->function = mce_timer_fn; mce_start_timer(cpu, t); } @@ -2299,6 +2356,8 @@ static void __cpuinit mce_disable_cpu(void *h) if (!mce_available(__this_cpu_ptr(&cpu_info))) return; + hrtimer_cancel(&__get_cpu_var(mce_timer)); + if (!(action & CPU_TASKS_FROZEN)) cmci_clear(); for (i = 0; i < mca_cfg.banks; i++) { @@ -2325,6 +2384,7 @@ static void __cpuinit mce_reenable_cpu(void *h) if (b->init) wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); } + __mcheck_cpu_init_timer(); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ @@ -2332,7 +2392,6 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct timer_list *t = &per_cpu(mce_timer, cpu); switch (action & ~CPU_TASKS_FROZEN) { case CPU_ONLINE: @@ -2348,11 +2407,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_DOWN_PREPARE: smp_call_function_single(cpu, mce_disable_cpu, &action, 1); - del_timer_sync(t); break; case CPU_DOWN_FAILED: smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); - mce_start_timer(cpu, t); break; } @@ -2414,6 +2471,8 @@ static __init int mcheck_init_device(void) /* register character device /dev/mcelog */ misc_register(&mce_chrdev_device); + err = mce_notify_work_init(); + return err; } device_initcall_sync(mcheck_init_device); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 5c38e2b298cd..599dcc1175d2 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -364,14 +364,22 @@ ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_all -need_resched: movl TI_flags(%ebp), %ecx # need_resched set ? testb $_TIF_NEED_RESCHED, %cl + jnz 1f + + cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? + jnz restore_all + testl $_TIF_NEED_RESCHED_LAZY, %ecx jz restore_all - testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? + +1: testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all call preempt_schedule_irq - jmp need_resched + movl TI_flags(%ebp), %ecx # need_resched set ? + testl $_TIF_NEED_RESCHED_MASK, %ecx + jnz 1b + jmp restore_all END(resume_kernel) #endif CFI_ENDPROC @@ -608,7 +616,7 @@ ENDPROC(system_call) ALIGN RING0_PTREGS_FRAME # can't unwind into user space anyway work_pending: - testb $_TIF_NEED_RESCHED, %cl + testl $_TIF_NEED_RESCHED_MASK, %ecx jz work_notifysig work_resched: call schedule @@ -621,7 +629,7 @@ work_resched: andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? jz restore_all - testb $_TIF_NEED_RESCHED, %cl + testl $_TIF_NEED_RESCHED_MASK, %ecx jnz work_resched work_notifysig: # deal with pending signals and diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 39ba6914bbc6..429ac48db857 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -674,8 +674,8 @@ sysret_check: /* Handle reschedules */ /* edx: work, edi: workmask */ sysret_careful: - bt $TIF_NEED_RESCHED,%edx - jnc sysret_signal + testl $_TIF_NEED_RESCHED_MASK,%edx + jz sysret_signal TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi @@ -787,8 +787,8 @@ GLOBAL(int_with_check) /* First do a reschedule test. */ /* edx: work, edi: workmask */ int_careful: - bt $TIF_NEED_RESCHED,%edx - jnc int_very_careful + testl $_TIF_NEED_RESCHED_MASK,%edx + jz int_very_careful TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi @@ -1120,8 +1120,8 @@ bad_iret: /* edi: workmask, edx: work */ retint_careful: CFI_RESTORE_STATE - bt $TIF_NEED_RESCHED,%edx - jnc retint_signal + testl $_TIF_NEED_RESCHED_MASK,%edx + jz retint_signal TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) pushq_cfi %rdi @@ -1154,9 +1154,15 @@ retint_signal: ENTRY(retint_kernel) cmpl $0,TI_preempt_count(%rcx) jnz retint_restore_args - bt $TIF_NEED_RESCHED,TI_flags(%rcx) + bt $TIF_NEED_RESCHED,TI_flags(%rcx) + jc 1f + + cmpl $0,TI_preempt_lazy_count(%rcx) + jnz retint_restore_args + bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx) jnc retint_restore_args - bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ + +1: bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ jnc retint_restore_args call preempt_schedule_irq jmp exit_intr @@ -1399,6 +1405,7 @@ bad_gs: jmp 2b .previous +#ifndef CONFIG_PREEMPT_RT_FULL /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) CFI_STARTPROC @@ -1418,6 +1425,7 @@ ENTRY(call_softirq) ret CFI_ENDPROC END(call_softirq) +#endif #ifdef CONFIG_XEN zeroentry xen_hypervisor_callback xen_do_hypervisor_callback @@ -1587,7 +1595,7 @@ paranoid_userspace: movq %rsp,%rdi /* &pt_regs */ call sync_regs movq %rax,%rsp /* switch stack for scheduling */ - testl $_TIF_NEED_RESCHED,%ebx + testl $_TIF_NEED_RESCHED_MASK,%ebx jnz paranoid_schedule movl %ebx,%edx /* arg3: thread flags */ TRACE_IRQS_ON diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 344faf8d0d62..f60ecc0d4db4 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -149,6 +149,7 @@ void __cpuinit irq_ctx_init(int cpu) cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); } +#ifndef CONFIG_PREEMPT_RT_FULL asmlinkage void do_softirq(void) { unsigned long flags; @@ -179,6 +180,7 @@ asmlinkage void do_softirq(void) local_irq_restore(flags); } +#endif bool handle_irq(unsigned irq, struct pt_regs *regs) { diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index d04d3ecded62..831f247b5798 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -88,7 +88,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) return true; } - +#ifndef CONFIG_PREEMPT_RT_FULL extern void call_softirq(void); asmlinkage void do_softirq(void) @@ -108,3 +108,4 @@ asmlinkage void do_softirq(void) } local_irq_restore(flags); } +#endif diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index ca8f703a1e70..129b8bb73de2 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -18,6 +18,7 @@ void smp_irq_work_interrupt(struct pt_regs *regs) irq_exit(); } +#ifndef CONFIG_PREEMPT_RT_FULL void arch_irq_work_raise(void) { #ifdef CONFIG_X86_LOCAL_APIC @@ -28,3 +29,4 @@ void arch_irq_work_raise(void) apic_wait_icr_idle(); #endif } +#endif diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 7305f7dfc7ab..ee29e6c11f40 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -36,6 +36,7 @@ #include <linux/uaccess.h> #include <linux/io.h> #include <linux/kdebug.h> +#include <linux/highmem.h> #include <asm/pgtable.h> #include <asm/ldt.h> @@ -214,6 +215,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) } EXPORT_SYMBOL_GPL(start_thread); +#ifdef CONFIG_PREEMPT_RT_FULL +static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) +{ + int i; + + /* + * Clear @prev's kmap_atomic mappings + */ + for (i = 0; i < prev_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + pte_t *ptep = kmap_pte - idx; + + kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx)); + } + /* + * Restore @next_p's kmap_atomic mappings + */ + for (i = 0; i < next_p->kmap_idx; i++) { + int idx = i + KM_TYPE_NR * smp_processor_id(); + + if (!pte_none(next_p->kmap_pte[i])) + set_pte(kmap_pte - idx, next_p->kmap_pte[i]); + } +} +#else +static inline void +switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { } +#endif + /* * switch_to(x,y) should switch tasks from x to y. @@ -293,6 +323,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) __switch_to_xtra(prev_p, next_p, tss); + switch_kmaps(prev_p, next_p); + /* * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 087ab2af381a..d827bf98fd60 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -743,6 +743,14 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) mce_notify_process(); #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ +#ifdef ARCH_RT_DELAYS_SIGNAL_SEND + if (unlikely(current->forced_info.si_signo)) { + struct task_struct *t = current; + force_sig_info(t->forced_info.si_signo, &t->forced_info, t); + t->forced_info.si_signo = 0; + } +#endif + if (thread_info_flags & _TIF_UPROBE) uprobe_notify_resume(regs); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 772e2a846dec..cbd25b2caf6e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -85,9 +85,21 @@ static inline void conditional_sti(struct pt_regs *regs) local_irq_enable(); } -static inline void preempt_conditional_sti(struct pt_regs *regs) +static inline void conditional_sti_ist(struct pt_regs *regs) { +#ifdef CONFIG_X86_64 + /* + * X86_64 uses a per CPU stack on the IST for certain traps + * like int3. The task can not be preempted when using one + * of these stacks, thus preemption must be disabled, otherwise + * the stack can be corrupted if the task is scheduled out, + * and another task comes in and uses this stack. + * + * On x86_32 the task keeps its own stack and it is OK if the + * task schedules out. + */ inc_preempt_count(); +#endif if (regs->flags & X86_EFLAGS_IF) local_irq_enable(); } @@ -98,11 +110,13 @@ static inline void conditional_cli(struct pt_regs *regs) local_irq_disable(); } -static inline void preempt_conditional_cli(struct pt_regs *regs) +static inline void conditional_cli_ist(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) local_irq_disable(); +#ifdef CONFIG_X86_64 dec_preempt_count(); +#endif } static int __kprobes @@ -235,9 +249,9 @@ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code) prev_state = exception_enter(); if (notify_die(DIE_TRAP, "stack segment", regs, error_code, X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) { - preempt_conditional_sti(regs); + conditional_sti_ist(regs); do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); } exception_exit(prev_state); } @@ -340,9 +354,9 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co * as we may switch to the interrupt stack. */ debug_stack_usage_inc(); - preempt_conditional_sti(regs); + conditional_sti_ist(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); debug_stack_usage_dec(); exit: exception_exit(prev_state); @@ -448,12 +462,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_inc(); /* It's safe to allow irq's after DR6 has been saved */ - preempt_conditional_sti(regs); + conditional_sti_ist(regs); if (regs->flags & X86_VM_MASK) { handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, X86_TRAP_DB); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); debug_stack_usage_dec(); goto exit; } @@ -473,7 +487,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) si_code = get_si_code(tsk->thread.debugreg6); if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); - preempt_conditional_cli(regs); + conditional_cli_ist(regs); debug_stack_usage_dec(); exit: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1be0a9e75d1f..98b2fa39e295 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5328,6 +5328,13 @@ int kvm_arch_init(void *opaque) goto out; } +#ifdef CONFIG_PREEMPT_RT_FULL + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n"); + return -EOPNOTSUPP; + } +#endif + r = kvm_mmu_module_init(); if (r) goto out_free_percpu; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c1e9e4cbbd76..c8312502ddf7 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1108,7 +1108,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: */ - if (unlikely(in_atomic() || !mm)) { + if (unlikely(!mm || pagefault_disabled())) { bad_area_nosemaphore(regs, error_code, address); return; } diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 252b8f5489ba..0f00d9746604 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c @@ -31,6 +31,7 @@ EXPORT_SYMBOL(kunmap); */ void *kmap_atomic_prot(struct page *page, pgprot_t prot) { + pte_t pte = mk_pte(page, prot); unsigned long vaddr; int idx, type; @@ -44,7 +45,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot) idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); BUG_ON(!pte_none(*(kmap_pte-idx))); - set_pte(kmap_pte-idx, mk_pte(page, prot)); +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_pte(kmap_pte-idx, pte); arch_flush_lazy_mmu_mode(); return (void *)vaddr; @@ -87,6 +91,9 @@ void __kunmap_atomic(void *kvaddr) * is a bad idea also, in case the page changes cacheability * attributes or becomes a protected page in a hypervisor. */ +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = __pte(0); +#endif kpte_clear_flush(kmap_pte-idx, vaddr); kmap_atomic_idx_pop(); arch_flush_lazy_mmu_mode(); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 7b179b499fa3..62377d67ab07 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free); void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) { + pte_t pte = pfn_pte(pfn, prot); unsigned long vaddr; int idx, type; @@ -64,7 +65,12 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - set_pte(kmap_pte - idx, pfn_pte(pfn, prot)); + WARN_ON(!pte_none(*(kmap_pte - idx))); + +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = pte; +#endif + set_pte(kmap_pte - idx, pte); arch_flush_lazy_mmu_mode(); return (void *)vaddr; @@ -110,6 +116,9 @@ iounmap_atomic(void __iomem *kvaddr) * is a bad idea also, in case the page changes cacheability * attributes or becomes a protected page in a hypervisor. */ +#ifdef CONFIG_PREEMPT_RT_FULL + current->kmap_pte[type] = __pte(0); +#endif kpte_clear_flush(kmap_pte-idx, vaddr); kmap_atomic_idx_pop(); } |