aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig10
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c24
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c21
-rw-r--r--arch/x86/crypto/glue_helper.c31
-rw-r--r--arch/x86/include/asm/signal.h13
-rw-r--r--arch/x86/include/asm/stackprotector.h10
-rw-r--r--arch/x86/include/asm/thread_info.h6
-rw-r--r--arch/x86/kernel/apic/io_apic.c3
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c131
-rw-r--r--arch/x86/kernel/entry_32.S18
-rw-r--r--arch/x86/kernel/entry_64.S26
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c3
-rw-r--r--arch/x86/kernel/irq_work.c2
-rw-r--r--arch/x86/kernel/process_32.c32
-rw-r--r--arch/x86/kernel/signal.c8
-rw-r--r--arch/x86/kernel/traps.c32
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/highmem_32.c9
-rw-r--r--arch/x86/mm/iomap_32.c11
22 files changed, 295 insertions, 107 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 78592b17a259..c212286025b6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -121,6 +121,7 @@ config X86
select OLD_SIGACTION if X86_32
select COMPAT_OLD_SIGACTION if IA32_EMULATION
select RTC_LIB
+ select HAVE_PREEMPT_LAZY
select ARCH_SUPPORTS_ATOMIC_RMW
config INSTRUCTION_DECODER
@@ -178,8 +179,11 @@ config ARCH_MAY_HAVE_PC_FDC
def_bool y
depends on ISA_DMA_API
+config RWSEM_GENERIC_SPINLOCK
+ def_bool PREEMPT_RT_FULL
+
config RWSEM_XCHGADD_ALGORITHM
- def_bool y
+ def_bool !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT_FULL
config GENERIC_CALIBRATE_DELAY
def_bool y
@@ -467,7 +471,7 @@ config X86_MDFLD
select MFD_INTEL_MSIC
---help---
Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin
- Internet Device(MID) platform.
+ Internet Device(MID) platform.
Unlike standard x86 PCs, Medfield does not have many legacy devices
nor standard legacy replacement devices/features. e.g. Medfield does
not contain i8259, i8254, HPET, legacy BIOS, most of the io ports.
@@ -803,7 +807,7 @@ config IOMMU_HELPER
config MAXSMP
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
depends on X86_64 && SMP && DEBUG_KERNEL
- select CPUMASK_OFFSTACK
+ select CPUMASK_OFFSTACK if !PREEMPT_RT_FULL
---help---
Enable maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index f80e668785c0..3fbe870b5c97 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -252,14 +252,14 @@ static int ecb_encrypt(struct blkcipher_desc *desc,
err = blkcipher_walk_virt(desc, &walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes & AES_BLOCK_MASK);
+ nbytes & AES_BLOCK_MASK);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -276,14 +276,14 @@ static int ecb_decrypt(struct blkcipher_desc *desc,
err = blkcipher_walk_virt(desc, &walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -300,14 +300,14 @@ static int cbc_encrypt(struct blkcipher_desc *desc,
err = blkcipher_walk_virt(desc, &walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -324,14 +324,14 @@ static int cbc_decrypt(struct blkcipher_desc *desc,
err = blkcipher_walk_virt(desc, &walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -364,18 +364,20 @@ static int ctr_crypt(struct blkcipher_desc *desc,
err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- kernel_fpu_begin();
while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+ kernel_fpu_begin();
aesni_ctr_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
if (walk.nbytes) {
+ kernel_fpu_begin();
ctr_crypt_final(ctx, &walk);
+ kernel_fpu_end();
err = blkcipher_walk_done(desc, &walk, 0);
}
- kernel_fpu_end();
return err;
}
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index c6631813dc11..2d48e83f00d3 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -60,7 +60,7 @@ static inline void cast5_fpu_end(bool fpu_enabled)
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
bool enc)
{
- bool fpu_enabled = false;
+ bool fpu_enabled;
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = CAST5_BLOCK_SIZE;
unsigned int nbytes;
@@ -76,7 +76,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
- fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+ fpu_enabled = cast5_fpu_begin(false, nbytes);
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
@@ -104,10 +104,9 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
} while (nbytes >= bsize);
done:
+ cast5_fpu_end(fpu_enabled);
err = blkcipher_walk_done(desc, walk, nbytes);
}
-
- cast5_fpu_end(fpu_enabled);
return err;
}
@@ -231,7 +230,7 @@ done:
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
- bool fpu_enabled = false;
+ bool fpu_enabled;
struct blkcipher_walk walk;
int err;
@@ -240,12 +239,11 @@ static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk.nbytes)) {
- fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+ fpu_enabled = cast5_fpu_begin(false, nbytes);
nbytes = __cbc_decrypt(desc, &walk);
+ cast5_fpu_end(fpu_enabled);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
-
- cast5_fpu_end(fpu_enabled);
return err;
}
@@ -315,7 +313,7 @@ done:
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
- bool fpu_enabled = false;
+ bool fpu_enabled;
struct blkcipher_walk walk;
int err;
@@ -324,13 +322,12 @@ static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
- fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+ fpu_enabled = cast5_fpu_begin(false, nbytes);
nbytes = __ctr_crypt(desc, &walk);
+ cast5_fpu_end(fpu_enabled);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- cast5_fpu_end(fpu_enabled);
-
if (walk.nbytes) {
ctr_crypt_final(desc, &walk);
err = blkcipher_walk_done(desc, &walk, 0);
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 432f1d76ceb8..4a2bd21c2137 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -39,7 +39,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
void *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = 128 / 8;
unsigned int nbytes, i, func_bytes;
- bool fpu_enabled = false;
+ bool fpu_enabled;
int err;
err = blkcipher_walk_virt(desc, walk);
@@ -49,7 +49,7 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
u8 *wdst = walk->dst.virt.addr;
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- desc, fpu_enabled, nbytes);
+ desc, false, nbytes);
for (i = 0; i < gctx->num_funcs; i++) {
func_bytes = bsize * gctx->funcs[i].num_blocks;
@@ -71,10 +71,10 @@ static int __glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
}
done:
+ glue_fpu_end(fpu_enabled);
err = blkcipher_walk_done(desc, walk, nbytes);
}
- glue_fpu_end(fpu_enabled);
return err;
}
@@ -194,7 +194,7 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
struct scatterlist *src, unsigned int nbytes)
{
const unsigned int bsize = 128 / 8;
- bool fpu_enabled = false;
+ bool fpu_enabled;
struct blkcipher_walk walk;
int err;
@@ -203,12 +203,12 @@ int glue_cbc_decrypt_128bit(const struct common_glue_ctx *gctx,
while ((nbytes = walk.nbytes)) {
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- desc, fpu_enabled, nbytes);
+ desc, false, nbytes);
nbytes = __glue_cbc_decrypt_128bit(gctx, desc, &walk);
+ glue_fpu_end(fpu_enabled);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- glue_fpu_end(fpu_enabled);
return err;
}
EXPORT_SYMBOL_GPL(glue_cbc_decrypt_128bit);
@@ -278,7 +278,7 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
struct scatterlist *src, unsigned int nbytes)
{
const unsigned int bsize = 128 / 8;
- bool fpu_enabled = false;
+ bool fpu_enabled;
struct blkcipher_walk walk;
int err;
@@ -287,13 +287,12 @@ int glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
while ((nbytes = walk.nbytes) >= bsize) {
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- desc, fpu_enabled, nbytes);
+ desc, false, nbytes);
nbytes = __glue_ctr_crypt_128bit(gctx, desc, &walk);
+ glue_fpu_end(fpu_enabled);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
- glue_fpu_end(fpu_enabled);
-
if (walk.nbytes) {
glue_ctr_crypt_final_128bit(
gctx->funcs[gctx->num_funcs - 1].fn_u.ctr, desc, &walk);
@@ -348,7 +347,7 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
void *tweak_ctx, void *crypt_ctx)
{
const unsigned int bsize = 128 / 8;
- bool fpu_enabled = false;
+ bool fpu_enabled;
struct blkcipher_walk walk;
int err;
@@ -361,21 +360,21 @@ int glue_xts_crypt_128bit(const struct common_glue_ctx *gctx,
/* set minimum length to bsize, for tweak_fn */
fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- desc, fpu_enabled,
+ desc, false,
nbytes < bsize ? bsize : nbytes);
-
/* calculate first value of T */
tweak_fn(tweak_ctx, walk.iv, walk.iv);
+ glue_fpu_end(fpu_enabled);
while (nbytes) {
+ fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
+ desc, false, nbytes);
nbytes = __glue_xts_crypt_128bit(gctx, crypt_ctx, desc, &walk);
+ glue_fpu_end(fpu_enabled);
err = blkcipher_walk_done(desc, &walk, nbytes);
nbytes = walk.nbytes;
}
-
- glue_fpu_end(fpu_enabled);
-
return err;
}
EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit);
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 35e67a457182..6ec0792b3b9f 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -23,6 +23,19 @@ typedef struct {
unsigned long sig[_NSIG_WORDS];
} sigset_t;
+/*
+ * Because some traps use the IST stack, we must keep preemption
+ * disabled while calling do_trap(), but do_trap() may call
+ * force_sig_info() which will grab the signal spin_locks for the
+ * task, which in PREEMPT_RT_FULL are mutexes. By defining
+ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
+ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
+ * trap.
+ */
+#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64)
+#define ARCH_RT_DELAYS_SIGNAL_SEND
+#endif
+
#ifndef CONFIG_COMPAT
typedef sigset_t compat_sigset_t;
#endif
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 6a998598f172..64fb5cbe54fa 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -57,7 +57,7 @@
*/
static __always_inline void boot_init_stack_canary(void)
{
- u64 canary;
+ u64 uninitialized_var(canary);
u64 tsc;
#ifdef CONFIG_X86_64
@@ -68,8 +68,16 @@ static __always_inline void boot_init_stack_canary(void)
* of randomness. The TSC only matters for very early init,
* there it already has some randomness on most systems. Later
* on during the bootup the random pool has true entropy too.
+ *
+ * For preempt-rt we need to weaken the randomness a bit, as
+ * we can't call into the random generator from atomic context
+ * due to locking constraints. We just leave canary
+ * uninitialized and use the TSC based randomness on top of
+ * it.
*/
+#ifndef CONFIG_PREEMPT_RT_FULL
get_random_bytes(&canary, sizeof(canary));
+#endif
tsc = __native_read_tsc();
canary += tsc + (tsc << 32UL);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a1df6e84691f..0365caf31d49 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -30,6 +30,8 @@ struct thread_info {
__u32 cpu; /* current CPU */
int preempt_count; /* 0 => preemptable,
<0 => BUG */
+ int preempt_lazy_count; /* 0 => lazy preemptable,
+ <0 => BUG */
mm_segment_t addr_limit;
struct restart_block restart_block;
void __user *sysenter_return;
@@ -81,6 +83,7 @@ struct thread_info {
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
+#define TIF_NEED_RESCHED_LAZY 9 /* lazy rescheduling necessary */
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
@@ -106,6 +109,7 @@ struct thread_info {
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
@@ -156,6 +160,8 @@ struct thread_info {
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
+#define _TIF_NEED_RESCHED_MASK (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)
+
#define PREEMPT_ACTIVE 0x10000000
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 9ed796ccc32c..7f116f21c3bf 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2391,7 +2391,8 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
{
/* If we are moving the irq we need to mask it */
- if (unlikely(irqd_is_setaffinity_pending(data))) {
+ if (unlikely(irqd_is_setaffinity_pending(data) &&
+ !irqd_irq_inprogress(data))) {
mask_ioapic(cfg);
return true;
}
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 28610822fb3c..a36d9cfb71ea 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -33,6 +33,7 @@ void common(void) {
OFFSET(TI_status, thread_info, status);
OFFSET(TI_addr_limit, thread_info, addr_limit);
OFFSET(TI_preempt_count, thread_info, preempt_count);
+ OFFSET(TI_preempt_lazy_count, thread_info, preempt_lazy_count);
BLANK();
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9239504b41cb..aaf4b9b94f38 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -18,6 +18,7 @@
#include <linux/rcupdate.h>
#include <linux/kobject.h>
#include <linux/uaccess.h>
+#include <linux/kthread.h>
#include <linux/kdebug.h>
#include <linux/kernel.h>
#include <linux/percpu.h>
@@ -41,6 +42,7 @@
#include <linux/debugfs.h>
#include <linux/irq_work.h>
#include <linux/export.h>
+#include <linux/jiffies.h>
#include <asm/processor.h>
#include <asm/mce.h>
@@ -1256,7 +1258,7 @@ void mce_log_therm_throt_event(__u64 status)
static unsigned long check_interval = 5 * 60; /* 5 minutes */
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
-static DEFINE_PER_CPU(struct timer_list, mce_timer);
+static DEFINE_PER_CPU(struct hrtimer, mce_timer);
static unsigned long mce_adjust_timer_default(unsigned long interval)
{
@@ -1266,13 +1268,10 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
static unsigned long (*mce_adjust_timer)(unsigned long interval) =
mce_adjust_timer_default;
-static void mce_timer_fn(unsigned long data)
+static enum hrtimer_restart mce_timer_fn(struct hrtimer *timer)
{
- struct timer_list *t = &__get_cpu_var(mce_timer);
unsigned long iv;
- WARN_ON(smp_processor_id() != data);
-
if (mce_available(__this_cpu_ptr(&cpu_info))) {
machine_check_poll(MCP_TIMESTAMP,
&__get_cpu_var(mce_poll_banks));
@@ -1293,9 +1292,11 @@ static void mce_timer_fn(unsigned long data)
__this_cpu_write(mce_next_interval, iv);
/* Might have become 0 after CMCI storm subsided */
if (iv) {
- t->expires = jiffies + iv;
- add_timer_on(t, smp_processor_id());
+ hrtimer_forward_now(timer, ns_to_ktime(
+ jiffies_to_usecs(iv) * 1000ULL));
+ return HRTIMER_RESTART;
}
+ return HRTIMER_NORESTART;
}
/*
@@ -1303,28 +1304,37 @@ static void mce_timer_fn(unsigned long data)
*/
void mce_timer_kick(unsigned long interval)
{
- struct timer_list *t = &__get_cpu_var(mce_timer);
- unsigned long when = jiffies + interval;
+ struct hrtimer *t = &__get_cpu_var(mce_timer);
unsigned long iv = __this_cpu_read(mce_next_interval);
- if (timer_pending(t)) {
- if (time_before(when, t->expires))
- mod_timer_pinned(t, when);
+ if (hrtimer_active(t)) {
+ s64 exp;
+ s64 intv_us;
+
+ intv_us = jiffies_to_usecs(interval);
+ exp = ktime_to_us(hrtimer_expires_remaining(t));
+ if (intv_us < exp) {
+ hrtimer_cancel(t);
+ hrtimer_start_range_ns(t,
+ ns_to_ktime(intv_us * 1000),
+ 0, HRTIMER_MODE_REL_PINNED);
+ }
} else {
- t->expires = round_jiffies(when);
- add_timer_on(t, smp_processor_id());
+ hrtimer_start_range_ns(t,
+ ns_to_ktime(jiffies_to_usecs(interval) * 1000ULL),
+ 0, HRTIMER_MODE_REL_PINNED);
}
if (interval < iv)
__this_cpu_write(mce_next_interval, interval);
}
-/* Must not be called in IRQ context where del_timer_sync() can deadlock */
+/* Must not be called in IRQ context where hrtimer_cancel() can deadlock */
static void mce_timer_delete_all(void)
{
int cpu;
for_each_online_cpu(cpu)
- del_timer_sync(&per_cpu(mce_timer, cpu));
+ hrtimer_cancel(&per_cpu(mce_timer, cpu));
}
static void mce_do_trigger(struct work_struct *work)
@@ -1334,6 +1344,63 @@ static void mce_do_trigger(struct work_struct *work)
static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
+static void __mce_notify_work(void)
+{
+ /* Not more than two messages every minute */
+ static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
+
+ /* wake processes polling /dev/mcelog */
+ wake_up_interruptible(&mce_chrdev_wait);
+
+ /*
+ * There is no risk of missing notifications because
+ * work_pending is always cleared before the function is
+ * executed.
+ */
+ if (mce_helper[0] && !work_pending(&mce_trigger_work))
+ schedule_work(&mce_trigger_work);
+
+ if (__ratelimit(&ratelimit))
+ pr_info(HW_ERR "Machine check events logged\n");
+}
+
+#ifdef CONFIG_PREEMPT_RT_FULL
+struct task_struct *mce_notify_helper;
+
+static int mce_notify_helper_thread(void *unused)
+{
+ while (1) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ if (kthread_should_stop())
+ break;
+ __mce_notify_work();
+ }
+ return 0;
+}
+
+static int mce_notify_work_init(void)
+{
+ mce_notify_helper = kthread_run(mce_notify_helper_thread, NULL,
+ "mce-notify");
+ if (!mce_notify_helper)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void mce_notify_work(void)
+{
+ wake_up_process(mce_notify_helper);
+}
+#else
+static void mce_notify_work(void)
+{
+ __mce_notify_work();
+}
+static inline int mce_notify_work_init(void) { return 0; }
+#endif
+
/*
* Notify the user(s) about new machine check events.
* Can be called from interrupt context, but not from machine check/NMI
@@ -1341,19 +1408,8 @@ static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
*/
int mce_notify_irq(void)
{
- /* Not more than two messages every minute */
- static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
-
if (test_and_clear_bit(0, &mce_need_notify)) {
- /* wake processes polling /dev/mcelog */
- wake_up_interruptible(&mce_chrdev_wait);
-
- if (mce_helper[0])
- schedule_work(&mce_trigger_work);
-
- if (__ratelimit(&ratelimit))
- pr_info(HW_ERR "Machine check events logged\n");
-
+ mce_notify_work();
return 1;
}
return 0;
@@ -1624,7 +1680,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
}
}
-static void mce_start_timer(unsigned int cpu, struct timer_list *t)
+static void mce_start_timer(unsigned int cpu, struct hrtimer *t)
{
unsigned long iv = mce_adjust_timer(check_interval * HZ);
@@ -1633,16 +1689,17 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
if (mca_cfg.ignore_ce || !iv)
return;
- t->expires = round_jiffies(jiffies + iv);
- add_timer_on(t, smp_processor_id());
+ hrtimer_start_range_ns(t, ns_to_ktime(jiffies_to_usecs(iv) * 1000ULL),
+ 0, HRTIMER_MODE_REL_PINNED);
}
static void __mcheck_cpu_init_timer(void)
{
- struct timer_list *t = &__get_cpu_var(mce_timer);
+ struct hrtimer *t = &__get_cpu_var(mce_timer);
unsigned int cpu = smp_processor_id();
- setup_timer(t, mce_timer_fn, cpu);
+ hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ t->function = mce_timer_fn;
mce_start_timer(cpu, t);
}
@@ -2299,6 +2356,8 @@ static void __cpuinit mce_disable_cpu(void *h)
if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
+ hrtimer_cancel(&__get_cpu_var(mce_timer));
+
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
for (i = 0; i < mca_cfg.banks; i++) {
@@ -2325,6 +2384,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
}
+ __mcheck_cpu_init_timer();
}
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
@@ -2332,7 +2392,6 @@ static int __cpuinit
mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
- struct timer_list *t = &per_cpu(mce_timer, cpu);
switch (action & ~CPU_TASKS_FROZEN) {
case CPU_ONLINE:
@@ -2348,11 +2407,9 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
case CPU_DOWN_PREPARE:
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
- del_timer_sync(t);
break;
case CPU_DOWN_FAILED:
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
- mce_start_timer(cpu, t);
break;
}
@@ -2414,6 +2471,8 @@ static __init int mcheck_init_device(void)
/* register character device /dev/mcelog */
misc_register(&mce_chrdev_device);
+ err = mce_notify_work_init();
+
return err;
}
device_initcall_sync(mcheck_init_device);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 5c38e2b298cd..599dcc1175d2 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -364,14 +364,22 @@ ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY)
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
jnz restore_all
-need_resched:
movl TI_flags(%ebp), %ecx # need_resched set ?
testb $_TIF_NEED_RESCHED, %cl
+ jnz 1f
+
+ cmpl $0,TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ?
+ jnz restore_all
+ testl $_TIF_NEED_RESCHED_LAZY, %ecx
jz restore_all
- testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
+
+1: testl $X86_EFLAGS_IF,PT_EFLAGS(%esp) # interrupts off (exception path) ?
jz restore_all
call preempt_schedule_irq
- jmp need_resched
+ movl TI_flags(%ebp), %ecx # need_resched set ?
+ testl $_TIF_NEED_RESCHED_MASK, %ecx
+ jnz 1b
+ jmp restore_all
END(resume_kernel)
#endif
CFI_ENDPROC
@@ -608,7 +616,7 @@ ENDPROC(system_call)
ALIGN
RING0_PTREGS_FRAME # can't unwind into user space anyway
work_pending:
- testb $_TIF_NEED_RESCHED, %cl
+ testl $_TIF_NEED_RESCHED_MASK, %ecx
jz work_notifysig
work_resched:
call schedule
@@ -621,7 +629,7 @@ work_resched:
andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
# than syscall tracing?
jz restore_all
- testb $_TIF_NEED_RESCHED, %cl
+ testl $_TIF_NEED_RESCHED_MASK, %ecx
jnz work_resched
work_notifysig: # deal with pending signals and
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 39ba6914bbc6..429ac48db857 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -674,8 +674,8 @@ sysret_check:
/* Handle reschedules */
/* edx: work, edi: workmask */
sysret_careful:
- bt $TIF_NEED_RESCHED,%edx
- jnc sysret_signal
+ testl $_TIF_NEED_RESCHED_MASK,%edx
+ jz sysret_signal
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
@@ -787,8 +787,8 @@ GLOBAL(int_with_check)
/* First do a reschedule test. */
/* edx: work, edi: workmask */
int_careful:
- bt $TIF_NEED_RESCHED,%edx
- jnc int_very_careful
+ testl $_TIF_NEED_RESCHED_MASK,%edx
+ jz int_very_careful
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
@@ -1120,8 +1120,8 @@ bad_iret:
/* edi: workmask, edx: work */
retint_careful:
CFI_RESTORE_STATE
- bt $TIF_NEED_RESCHED,%edx
- jnc retint_signal
+ testl $_TIF_NEED_RESCHED_MASK,%edx
+ jz retint_signal
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
@@ -1154,9 +1154,15 @@ retint_signal:
ENTRY(retint_kernel)
cmpl $0,TI_preempt_count(%rcx)
jnz retint_restore_args
- bt $TIF_NEED_RESCHED,TI_flags(%rcx)
+ bt $TIF_NEED_RESCHED,TI_flags(%rcx)
+ jc 1f
+
+ cmpl $0,TI_preempt_lazy_count(%rcx)
+ jnz retint_restore_args
+ bt $TIF_NEED_RESCHED_LAZY,TI_flags(%rcx)
jnc retint_restore_args
- bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
+
+1: bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
jnc retint_restore_args
call preempt_schedule_irq
jmp exit_intr
@@ -1399,6 +1405,7 @@ bad_gs:
jmp 2b
.previous
+#ifndef CONFIG_PREEMPT_RT_FULL
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(call_softirq)
CFI_STARTPROC
@@ -1418,6 +1425,7 @@ ENTRY(call_softirq)
ret
CFI_ENDPROC
END(call_softirq)
+#endif
#ifdef CONFIG_XEN
zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
@@ -1587,7 +1595,7 @@ paranoid_userspace:
movq %rsp,%rdi /* &pt_regs */
call sync_regs
movq %rax,%rsp /* switch stack for scheduling */
- testl $_TIF_NEED_RESCHED,%ebx
+ testl $_TIF_NEED_RESCHED_MASK,%ebx
jnz paranoid_schedule
movl %ebx,%edx /* arg3: thread flags */
TRACE_IRQS_ON
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 344faf8d0d62..f60ecc0d4db4 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -149,6 +149,7 @@ void __cpuinit irq_ctx_init(int cpu)
cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
}
+#ifndef CONFIG_PREEMPT_RT_FULL
asmlinkage void do_softirq(void)
{
unsigned long flags;
@@ -179,6 +180,7 @@ asmlinkage void do_softirq(void)
local_irq_restore(flags);
}
+#endif
bool handle_irq(unsigned irq, struct pt_regs *regs)
{
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index d04d3ecded62..831f247b5798 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -88,7 +88,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
return true;
}
-
+#ifndef CONFIG_PREEMPT_RT_FULL
extern void call_softirq(void);
asmlinkage void do_softirq(void)
@@ -108,3 +108,4 @@ asmlinkage void do_softirq(void)
}
local_irq_restore(flags);
}
+#endif
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index ca8f703a1e70..129b8bb73de2 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -18,6 +18,7 @@ void smp_irq_work_interrupt(struct pt_regs *regs)
irq_exit();
}
+#ifndef CONFIG_PREEMPT_RT_FULL
void arch_irq_work_raise(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
@@ -28,3 +29,4 @@ void arch_irq_work_raise(void)
apic_wait_icr_idle();
#endif
}
+#endif
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7305f7dfc7ab..ee29e6c11f40 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -36,6 +36,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
#include <asm/ldt.h>
@@ -214,6 +215,35 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
}
EXPORT_SYMBOL_GPL(start_thread);
+#ifdef CONFIG_PREEMPT_RT_FULL
+static void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p)
+{
+ int i;
+
+ /*
+ * Clear @prev's kmap_atomic mappings
+ */
+ for (i = 0; i < prev_p->kmap_idx; i++) {
+ int idx = i + KM_TYPE_NR * smp_processor_id();
+ pte_t *ptep = kmap_pte - idx;
+
+ kpte_clear_flush(ptep, __fix_to_virt(FIX_KMAP_BEGIN + idx));
+ }
+ /*
+ * Restore @next_p's kmap_atomic mappings
+ */
+ for (i = 0; i < next_p->kmap_idx; i++) {
+ int idx = i + KM_TYPE_NR * smp_processor_id();
+
+ if (!pte_none(next_p->kmap_pte[i]))
+ set_pte(kmap_pte - idx, next_p->kmap_pte[i]);
+ }
+}
+#else
+static inline void
+switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { }
+#endif
+
/*
* switch_to(x,y) should switch tasks from x to y.
@@ -293,6 +323,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
__switch_to_xtra(prev_p, next_p, tss);
+ switch_kmaps(prev_p, next_p);
+
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 087ab2af381a..d827bf98fd60 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -743,6 +743,14 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
mce_notify_process();
#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
+ if (unlikely(current->forced_info.si_signo)) {
+ struct task_struct *t = current;
+ force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
+ t->forced_info.si_signo = 0;
+ }
+#endif
+
if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 772e2a846dec..cbd25b2caf6e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -85,9 +85,21 @@ static inline void conditional_sti(struct pt_regs *regs)
local_irq_enable();
}
-static inline void preempt_conditional_sti(struct pt_regs *regs)
+static inline void conditional_sti_ist(struct pt_regs *regs)
{
+#ifdef CONFIG_X86_64
+ /*
+ * X86_64 uses a per CPU stack on the IST for certain traps
+ * like int3. The task can not be preempted when using one
+ * of these stacks, thus preemption must be disabled, otherwise
+ * the stack can be corrupted if the task is scheduled out,
+ * and another task comes in and uses this stack.
+ *
+ * On x86_32 the task keeps its own stack and it is OK if the
+ * task schedules out.
+ */
inc_preempt_count();
+#endif
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
@@ -98,11 +110,13 @@ static inline void conditional_cli(struct pt_regs *regs)
local_irq_disable();
}
-static inline void preempt_conditional_cli(struct pt_regs *regs)
+static inline void conditional_cli_ist(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
local_irq_disable();
+#ifdef CONFIG_X86_64
dec_preempt_count();
+#endif
}
static int __kprobes
@@ -235,9 +249,9 @@ dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
prev_state = exception_enter();
if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
- preempt_conditional_sti(regs);
+ conditional_sti_ist(regs);
do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
- preempt_conditional_cli(regs);
+ conditional_cli_ist(regs);
}
exception_exit(prev_state);
}
@@ -340,9 +354,9 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
* as we may switch to the interrupt stack.
*/
debug_stack_usage_inc();
- preempt_conditional_sti(regs);
+ conditional_sti_ist(regs);
do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
- preempt_conditional_cli(regs);
+ conditional_cli_ist(regs);
debug_stack_usage_dec();
exit:
exception_exit(prev_state);
@@ -448,12 +462,12 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
debug_stack_usage_inc();
/* It's safe to allow irq's after DR6 has been saved */
- preempt_conditional_sti(regs);
+ conditional_sti_ist(regs);
if (regs->flags & X86_VM_MASK) {
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
X86_TRAP_DB);
- preempt_conditional_cli(regs);
+ conditional_cli_ist(regs);
debug_stack_usage_dec();
goto exit;
}
@@ -473,7 +487,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
si_code = get_si_code(tsk->thread.debugreg6);
if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
send_sigtrap(tsk, regs, error_code, si_code);
- preempt_conditional_cli(regs);
+ conditional_cli_ist(regs);
debug_stack_usage_dec();
exit:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1be0a9e75d1f..98b2fa39e295 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5328,6 +5328,13 @@ int kvm_arch_init(void *opaque)
goto out;
}
+#ifdef CONFIG_PREEMPT_RT_FULL
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+ printk(KERN_ERR "RT requires X86_FEATURE_CONSTANT_TSC\n");
+ return -EOPNOTSUPP;
+ }
+#endif
+
r = kvm_mmu_module_init();
if (r)
goto out_free_percpu;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index c1e9e4cbbd76..c8312502ddf7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1108,7 +1108,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
* If we're in an interrupt, have no user context or are running
* in an atomic region then we must not take the fault:
*/
- if (unlikely(in_atomic() || !mm)) {
+ if (unlikely(!mm || pagefault_disabled())) {
bad_area_nosemaphore(regs, error_code, address);
return;
}
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 252b8f5489ba..0f00d9746604 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -31,6 +31,7 @@ EXPORT_SYMBOL(kunmap);
*/
void *kmap_atomic_prot(struct page *page, pgprot_t prot)
{
+ pte_t pte = mk_pte(page, prot);
unsigned long vaddr;
int idx, type;
@@ -44,7 +45,10 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
BUG_ON(!pte_none(*(kmap_pte-idx)));
- set_pte(kmap_pte-idx, mk_pte(page, prot));
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = pte;
+#endif
+ set_pte(kmap_pte-idx, pte);
arch_flush_lazy_mmu_mode();
return (void *)vaddr;
@@ -87,6 +91,9 @@ void __kunmap_atomic(void *kvaddr)
* is a bad idea also, in case the page changes cacheability
* attributes or becomes a protected page in a hypervisor.
*/
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = __pte(0);
+#endif
kpte_clear_flush(kmap_pte-idx, vaddr);
kmap_atomic_idx_pop();
arch_flush_lazy_mmu_mode();
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index 7b179b499fa3..62377d67ab07 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -56,6 +56,7 @@ EXPORT_SYMBOL_GPL(iomap_free);
void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
{
+ pte_t pte = pfn_pte(pfn, prot);
unsigned long vaddr;
int idx, type;
@@ -64,7 +65,12 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR * smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
- set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
+ WARN_ON(!pte_none(*(kmap_pte - idx)));
+
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = pte;
+#endif
+ set_pte(kmap_pte - idx, pte);
arch_flush_lazy_mmu_mode();
return (void *)vaddr;
@@ -110,6 +116,9 @@ iounmap_atomic(void __iomem *kvaddr)
* is a bad idea also, in case the page changes cacheability
* attributes or becomes a protected page in a hypervisor.
*/
+#ifdef CONFIG_PREEMPT_RT_FULL
+ current->kmap_pte[type] = __pte(0);
+#endif
kpte_clear_flush(kmap_pte-idx, vaddr);
kmap_atomic_idx_pop();
}