diff options
Diffstat (limited to 'arch')
57 files changed, 309 insertions, 163 deletions
diff --git a/arch/arm/boot/dts/armada-xp-axpwifiap.dts b/arch/arm/boot/dts/armada-xp-axpwifiap.dts index 23fc670c0427..5c21b236721f 100644 --- a/arch/arm/boot/dts/armada-xp-axpwifiap.dts +++ b/arch/arm/boot/dts/armada-xp-axpwifiap.dts @@ -70,8 +70,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-db.dts b/arch/arm/boot/dts/armada-xp-db.dts index f774101416a5..ebe1d267406d 100644 --- a/arch/arm/boot/dts/armada-xp-db.dts +++ b/arch/arm/boot/dts/armada-xp-db.dts @@ -76,8 +76,8 @@ ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; devbus-bootcs { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-gp.dts b/arch/arm/boot/dts/armada-xp-gp.dts index 4878d7353069..5730b875c4f5 100644 --- a/arch/arm/boot/dts/armada-xp-gp.dts +++ b/arch/arm/boot/dts/armada-xp-gp.dts @@ -95,8 +95,8 @@ ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; devbus-bootcs { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts index 58b500873bfd..d960fef77ca1 100644 --- a/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts +++ b/arch/arm/boot/dts/armada-xp-lenovo-ix4-300d.dts @@ -65,8 +65,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts index 6e9820e141f8..b89e6cf1271a 100644 --- a/arch/arm/boot/dts/armada-xp-linksys-mamba.dts +++ b/arch/arm/boot/dts/armada-xp-linksys-mamba.dts @@ -70,8 +70,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-matrix.dts b/arch/arm/boot/dts/armada-xp-matrix.dts index 6ab33837a2b6..6522b04f4a8e 100644 --- a/arch/arm/boot/dts/armada-xp-matrix.dts +++ b/arch/arm/boot/dts/armada-xp-matrix.dts @@ -68,8 +68,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; internal-regs { serial@12000 { diff --git a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts index 6fe8972de0a2..db54c7158a36 100644 --- a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts +++ b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts @@ -64,8 +64,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts index a5db17782e08..853bd392a4fe 100644 --- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts +++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts @@ -65,9 +65,9 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x8000000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x01, 0x2f) 0 0 0xe8000000 0x8000000 + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; devbus-bootcs { status = "okay"; diff --git a/arch/arm/boot/dts/armada-xp-synology-ds414.dts b/arch/arm/boot/dts/armada-xp-synology-ds414.dts index 2391b11dc546..d17dab0a6f51 100644 --- a/arch/arm/boot/dts/armada-xp-synology-ds414.dts +++ b/arch/arm/boot/dts/armada-xp-synology-ds414.dts @@ -78,8 +78,8 @@ soc { ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xf1000000 0x100000 MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000 - MBUS_ID(0x09, 0x09) 0 0 0xf8100000 0x10000 - MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000>; + MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000 + MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>; pcie-controller { status = "okay"; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index fe99231cbde5..c2a03c740e79 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1497,6 +1497,16 @@ 0x48485200 0x2E00>; #address-cells = <1>; #size-cells = <1>; + + /* + * Do not allow gating of cpsw clock as workaround + * for errata i877. Keeping internal clock disabled + * causes the device switching characteristics + * to degrade over time and eventually fail to meet + * the data manual delay time/skew specs. + */ + ti,no-idle; + /* * rx_thresh_pend * rx_pend diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index d5525bfc7e3e..9156fc303afd 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -491,7 +491,6 @@ static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif #ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); void set_kernel_text_rw(void); void set_kernel_text_ro(void); #else diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 96e935bbc38c..3705fc2921c2 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -155,7 +155,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 val; val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; } static unsigned long num_core_regs(void) diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 48495ad82aba..8e0bd5939e5a 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2200,6 +2200,11 @@ static int _enable(struct omap_hwmod *oh) */ static int _idle(struct omap_hwmod *oh) { + if (oh->flags & HWMOD_NO_IDLE) { + oh->_int_flags |= _HWMOD_SKIP_ENABLE; + return 0; + } + pr_debug("omap_hwmod: %s: idling\n", oh->name); if (oh->_state != _HWMOD_STATE_ENABLED) { @@ -2504,6 +2509,8 @@ static int __init _init(struct omap_hwmod *oh, void *data) oh->flags |= HWMOD_INIT_NO_RESET; if (of_find_property(np, "ti,no-idle-on-init", NULL)) oh->flags |= HWMOD_INIT_NO_IDLE; + if (of_find_property(np, "ti,no-idle", NULL)) + oh->flags |= HWMOD_NO_IDLE; } oh->_state = _HWMOD_STATE_INITIALIZED; @@ -2630,7 +2637,7 @@ static void __init _setup_postsetup(struct omap_hwmod *oh) * XXX HWMOD_INIT_NO_IDLE does not belong in hwmod data - * it should be set by the core code as a runtime flag during startup */ - if ((oh->flags & HWMOD_INIT_NO_IDLE) && + if ((oh->flags & (HWMOD_INIT_NO_IDLE | HWMOD_NO_IDLE)) && (postsetup_state == _HWMOD_STATE_IDLE)) { oh->_int_flags |= _HWMOD_SKIP_ENABLE; postsetup_state = _HWMOD_STATE_ENABLED; diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h index 76bce11c85a4..7c7a31169475 100644 --- a/arch/arm/mach-omap2/omap_hwmod.h +++ b/arch/arm/mach-omap2/omap_hwmod.h @@ -525,6 +525,8 @@ struct omap_hwmod_omap4_prcm { * or idled. * HWMOD_OPT_CLKS_NEEDED: The optional clocks are needed for the module to * operate and they need to be handled at the same time as the main_clk. + * HWMOD_NO_IDLE: Do not idle the hwmod at all. Useful to handle certain + * IPs like CPSW on DRA7, where clocks to this module cannot be disabled. */ #define HWMOD_SWSUP_SIDLE (1 << 0) #define HWMOD_SWSUP_MSTANDBY (1 << 1) @@ -541,6 +543,7 @@ struct omap_hwmod_omap4_prcm { #define HWMOD_SWSUP_SIDLE_ACT (1 << 12) #define HWMOD_RECONFIG_IO_CHAIN (1 << 13) #define HWMOD_OPT_CLKS_NEEDED (1 << 14) +#define HWMOD_NO_IDLE (1 << 15) /* * omap_hwmod._int_flags definitions diff --git a/arch/arm/vdso/vdso.S b/arch/arm/vdso/vdso.S index b2b97e3e7bab..a62a7b64f49c 100644 --- a/arch/arm/vdso/vdso.S +++ b/arch/arm/vdso/vdso.S @@ -23,9 +23,8 @@ #include <linux/const.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA - .globl vdso_start, vdso_end + .section .data..ro_after_init .balign PAGE_SIZE vdso_start: .incbin "arch/arm/vdso/vdso.so" diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 54efedaf331f..ca3b7841e1c6 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -155,8 +155,4 @@ int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); -#endif - #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 63f52b55defe..eaa9cabf4066 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -34,7 +34,7 @@ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. * - * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array + * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array * (rounded up to PUD_SIZE). * VMALLOC_START: beginning of the kernel VA space * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space, @@ -51,7 +51,9 @@ #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) -#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) +#define VMEMMAP_START (VMALLOC_END + SZ_64K) +#define vmemmap ((struct page *)VMEMMAP_START - \ + SECTION_ALIGN_DOWN(memstart_addr >> PAGE_SHIFT)) #define FIRST_USER_ADDRESS 0UL diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index d250160d32bc..3039f080e2d5 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -186,7 +186,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) u64 val; val = kvm_arm_timer_get_reg(vcpu, reg->id); - return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0; } /** diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 17bf39ac83ba..4cb98aa8c27b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -319,8 +319,8 @@ void __init mem_init(void) #endif MLG(VMALLOC_START, VMALLOC_END), #ifdef CONFIG_SPARSEMEM_VMEMMAP - MLG((unsigned long)vmemmap, - (unsigned long)vmemmap + VMEMMAP_SIZE), + MLG(VMEMMAP_START, + VMEMMAP_START + VMEMMAP_SIZE), MLM((unsigned long)virt_to_page(PAGE_OFFSET), (unsigned long)virt_to_page(high_memory)), #endif diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 0c0d06a4ae11..bd8be6a0e745 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2155,7 +2155,7 @@ config MIPS_MT_SMP select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI select SYNC_R4K - select MIPS_GIC_IPI + select MIPS_GIC_IPI if MIPS_GIC select MIPS_MT select SMP select SMP_UP @@ -2253,7 +2253,7 @@ config MIPS_VPE_APSP_API_MT config MIPS_CMP bool "MIPS CMP framework support (DEPRECATED)" depends on SYS_SUPPORTS_MIPS_CMP && !CPU_MIPSR6 - select MIPS_GIC_IPI + select MIPS_GIC_IPI if MIPS_GIC select SMP select SYNC_R4K select SYS_SUPPORTS_SMP @@ -2273,7 +2273,7 @@ config MIPS_CPS select MIPS_CM select MIPS_CPC select MIPS_CPS_PM if HOTPLUG_CPU - select MIPS_GIC_IPI + select MIPS_GIC_IPI if MIPS_GIC select SMP select SYNC_R4K if (CEVT_R4K || CSRC_R4K) select SYS_SUPPORTS_HOTPLUG_CPU @@ -2292,6 +2292,7 @@ config MIPS_CPS_PM bool config MIPS_GIC_IPI + depends on MIPS_GIC bool config MIPS_CM diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index bd4385a8e6e8..2b521e07b860 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -121,6 +121,7 @@ static inline void calculate_cpu_foreign_map(void) cpumask_t temp_foreign_map; /* Re-calculate the mask */ + cpumask_clear(&temp_foreign_map); for_each_online_cpu(i) { core_present = 0; for_each_cpu(k, &temp_foreign_map) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 886cb1976e90..ca9a81007489 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -690,15 +690,15 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode) asmlinkage void do_ov(struct pt_regs *regs) { enum ctx_state prev_state; - siginfo_t info; + siginfo_t info = { + .si_signo = SIGFPE, + .si_code = FPE_INTOVF, + .si_addr = (void __user *)regs->cp0_epc, + }; prev_state = exception_enter(); die_if_kernel("Integer overflow", regs); - info.si_code = FPE_INTOVF; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *) regs->cp0_epc; force_sig_info(SIGFPE, &info, current); exception_exit(prev_state); } @@ -874,7 +874,7 @@ out: void do_trap_or_bp(struct pt_regs *regs, unsigned int code, const char *str) { - siginfo_t info; + siginfo_t info = { 0 }; char b[40]; #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP @@ -903,7 +903,6 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, else info.si_code = FPE_INTOVF; info.si_signo = SIGFPE; - info.si_errno = 0; info.si_addr = (void __user *) regs->cp0_epc; force_sig_info(SIGFPE, &info, current); break; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 28e9acbde4eb..8a113298b6cc 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -702,7 +702,7 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { void __user *uaddr = (void __user *)(long)reg->addr; - return copy_to_user(uaddr, vs, 16); + return copy_to_user(uaddr, vs, 16) ? -EFAULT : 0; } else { return -EINVAL; } @@ -732,7 +732,7 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { void __user *uaddr = (void __user *)(long)reg->addr; - return copy_from_user(vs, uaddr, 16); + return copy_from_user(vs, uaddr, 16) ? -EFAULT : 0; } else { return -EINVAL; } diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index 3bd0597d9c3d..ddb8154610cc 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -164,11 +164,13 @@ static int __init mips_sc_probe_cm3(void) sets = cfg & CM_GCR_L2_CONFIG_SET_SIZE_MSK; sets >>= CM_GCR_L2_CONFIG_SET_SIZE_SHF; - c->scache.sets = 64 << sets; + if (sets) + c->scache.sets = 64 << sets; line_sz = cfg & CM_GCR_L2_CONFIG_LINE_SIZE_MSK; line_sz >>= CM_GCR_L2_CONFIG_LINE_SIZE_SHF; - c->scache.linesz = 2 << line_sz; + if (line_sz) + c->scache.linesz = 2 << line_sz; assoc = cfg & CM_GCR_L2_CONFIG_ASSOC_MSK; assoc >>= CM_GCR_L2_CONFIG_ASSOC_SHF; @@ -176,9 +178,12 @@ static int __init mips_sc_probe_cm3(void) c->scache.waysize = c->scache.sets * c->scache.linesz; c->scache.waybit = __ffs(c->scache.waysize); - c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT; + if (c->scache.linesz) { + c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT; + return 1; + } - return 1; + return 0; } void __weak platform_early_l2_init(void) diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 3d0e17bcc8e9..df0f52bd18b4 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -22,6 +22,9 @@ #define __read_mostly __attribute__((__section__(".data..read_mostly"))) +/* Read-only memory is marked before mark_rodata_ro() is called. */ +#define __ro_after_init __read_mostly + void parisc_cache_init(void); /* initializes cache-flushing */ void disable_sr_hashing_asm(int); /* low level support for above */ void disable_sr_hashing(void); /* turns off space register hashing */ diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 845272ce9cc5..7bd69bd43a01 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -121,10 +121,6 @@ flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vma } } -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); -#endif - #include <asm/kmap_types.h> #define ARCH_HAS_KMAP diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 9585c81f755f..ce0b2b4075c7 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -269,14 +269,19 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, long do_syscall_trace_enter(struct pt_regs *regs) { - long ret = 0; - /* Do the secure computing check first. */ secure_computing_strict(regs->gr[20]); if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) - ret = -1L; + tracehook_report_syscall_entry(regs)) { + /* + * Tracing decided this syscall should not happen or the + * debugger stored an invalid system call number. Skip + * the system call and the system call restart handling. + */ + regs->gr[20] = -1UL; + goto out; + } #ifdef CONFIG_64BIT if (!is_compat_task()) @@ -290,7 +295,8 @@ long do_syscall_trace_enter(struct pt_regs *regs) regs->gr[24] & 0xffffffff, regs->gr[23] & 0xffffffff); - return ret ? : regs->gr[20]; +out: + return regs->gr[20]; } void do_syscall_trace_exit(struct pt_regs *regs) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 3fbd7252a4b2..fbafa0d0e2bf 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -343,7 +343,7 @@ tracesys_next: #endif comiclr,>>= __NR_Linux_syscalls, %r20, %r0 - b,n .Lsyscall_nosys + b,n .Ltracesys_nosys LDREGX %r20(%r19), %r19 @@ -359,6 +359,9 @@ tracesys_next: be 0(%sr7,%r19) ldo R%tracesys_exit(%r2),%r2 +.Ltracesys_nosys: + ldo -ENOSYS(%r0),%r28 /* set errno */ + /* Do *not* call this function on the gateway page, because it makes a direct call to syscall_trace. */ diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 8374afed9d0a..f8faaaeeca1e 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -157,7 +157,8 @@ #define OPAL_LEDS_GET_INDICATOR 114 #define OPAL_LEDS_SET_INDICATOR 115 #define OPAL_CEC_REBOOT2 116 -#define OPAL_LAST 116 +#define OPAL_CONSOLE_FLUSH 117 +#define OPAL_LAST 117 /* Device tree flags */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 800115910e43..07a99e638449 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -35,6 +35,7 @@ int64_t opal_console_read(int64_t term_number, __be64 *length, uint8_t *buffer); int64_t opal_console_write_buffer_space(int64_t term_number, __be64 *length); +int64_t opal_console_flush(int64_t term_number); int64_t opal_rtc_read(__be32 *year_month_day, __be64 *hour_minute_second_millisecond); int64_t opal_rtc_write(uint32_t year_month_day, @@ -262,6 +263,8 @@ extern int opal_resync_timebase(void); extern void opal_lpc_init(void); +extern void opal_kmsg_init(void); + extern int opal_event_request(unsigned int opal_event_nr); struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr, diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 59663af9315f..e4f7d4eed20c 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -335,7 +335,7 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab) if (syms[i].st_shndx == SHN_UNDEF) { char *name = strtab + syms[i].st_name; if (name[0] == '.') - memmove(name, name+1, strlen(name)); + syms[i].st_name++; } } } diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e57cc383e5da..463af88c95a2 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1370,6 +1370,20 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) std r6, VCPU_ACOP(r9) stw r7, VCPU_GUEST_PID(r9) std r8, VCPU_WORT(r9) + /* + * Restore various registers to 0, where non-zero values + * set by the guest could disrupt the host. + */ + li r0, 0 + mtspr SPRN_IAMR, r0 + mtspr SPRN_CIABR, r0 + mtspr SPRN_DAWRX, r0 + mtspr SPRN_TCSCR, r0 + mtspr SPRN_WORT, r0 + /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */ + li r0, 1 + sldi r0, r0, 31 + mtspr SPRN_MMCRS, r0 8: /* Save and reset AMR and UAMOR before turning on the MMU */ diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 1c8cdb6250e7..b9de7ef48849 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -2,6 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o +obj-y += opal-kmsg.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c new file mode 100644 index 000000000000..6f1214d4de92 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-kmsg.c @@ -0,0 +1,75 @@ +/* + * kmsg dumper that ensures the OPAL console fully flushes panic messages + * + * Author: Russell Currey <ruscur@russell.cc> + * + * Copyright 2015 IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/kmsg_dump.h> + +#include <asm/opal.h> +#include <asm/opal-api.h> + +/* + * Console output is controlled by OPAL firmware. The kernel regularly calls + * OPAL_POLL_EVENTS, which flushes some console output. In a panic state, + * however, the kernel no longer calls OPAL_POLL_EVENTS and the panic message + * may not be completely printed. This function does not actually dump the + * message, it just ensures that OPAL completely flushes the console buffer. + */ +static void force_opal_console_flush(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason) +{ + int i; + int64_t ret; + + /* + * Outside of a panic context the pollers will continue to run, + * so we don't need to do any special flushing. + */ + if (reason != KMSG_DUMP_PANIC) + return; + + if (opal_check_token(OPAL_CONSOLE_FLUSH)) { + ret = opal_console_flush(0); + + if (ret == OPAL_UNSUPPORTED || ret == OPAL_PARAMETER) + return; + + /* Incrementally flush until there's nothing left */ + while (opal_console_flush(0) != OPAL_SUCCESS); + } else { + /* + * If OPAL_CONSOLE_FLUSH is not implemented in the firmware, + * the console can still be flushed by calling the polling + * function enough times to flush the buffer. We don't know + * how much output still needs to be flushed, but we can be + * generous since the kernel is in panic and doesn't need + * to do much else. + */ + printk(KERN_NOTICE "opal: OPAL_CONSOLE_FLUSH missing.\n"); + for (i = 0; i < 1024; i++) { + opal_poll_events(NULL); + } + } +} + +static struct kmsg_dumper opal_kmsg_dumper = { + .dump = force_opal_console_flush +}; + +void __init opal_kmsg_init(void) +{ + int rc; + + /* Add our dumper to the list */ + rc = kmsg_dump_register(&opal_kmsg_dumper); + if (rc != 0) + pr_err("opal: kmsg_dump_register failed; returned %d\n", rc); +} diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index b7a464fef7a7..e45b88a5d7e0 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -301,3 +301,4 @@ OPAL_CALL(opal_flash_erase, OPAL_FLASH_ERASE); OPAL_CALL(opal_prd_msg, OPAL_PRD_MSG); OPAL_CALL(opal_leds_get_ind, OPAL_LEDS_GET_INDICATOR); OPAL_CALL(opal_leds_set_ind, OPAL_LEDS_SET_INDICATOR); +OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 57cffb80bc36..ae29eaf85e9e 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -758,6 +758,9 @@ static int __init opal_init(void) opal_pdev_init(opal_node, "ibm,opal-flash"); opal_pdev_init(opal_node, "ibm,opal-prd"); + /* Initialise OPAL kmsg dumper for flushing console on panic */ + opal_kmsg_init(); + return 0; } machine_subsys_initcall(powernv, opal_init); diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index fb1b93ea3e3f..e485817f7b1a 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -15,17 +15,25 @@ static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { + spin_lock_init(&mm->context.list_lock); + INIT_LIST_HEAD(&mm->context.pgtable_list); + INIT_LIST_HEAD(&mm->context.gmap_list); cpumask_clear(&mm->context.cpu_attach_mask); atomic_set(&mm->context.attach_count, 0); mm->context.flush_mm = 0; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | _ASCE_USER_BITS; - mm->context.asce_bits |= _ASCE_TYPE_REGION3; #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste; mm->context.has_pgste = 0; mm->context.use_skey = 0; #endif - mm->context.asce_limit = STACK_TOP_MAX; + if (mm->context.asce_limit == 0) { + /* context created by exec, set asce limit to 4TB */ + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | _ASCE_TYPE_REGION3; + mm->context.asce_limit = STACK_TOP_MAX; + } else if (mm->context.asce_limit == (1UL << 31)) { + mm_inc_nr_pmds(mm); + } crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); return 0; } @@ -111,8 +119,6 @@ static inline void activate_mm(struct mm_struct *prev, static inline void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { - if (oldmm->context.asce_limit < mm->context.asce_limit) - crst_table_downgrade(mm, oldmm->context.asce_limit); } static inline void arch_exit_mmap(struct mm_struct *mm) diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 7b7858f158b4..d7cc79fb6191 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -100,12 +100,26 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - spin_lock_init(&mm->context.list_lock); - INIT_LIST_HEAD(&mm->context.pgtable_list); - INIT_LIST_HEAD(&mm->context.gmap_list); - return (pgd_t *) crst_table_alloc(mm); + unsigned long *table = crst_table_alloc(mm); + + if (!table) + return NULL; + if (mm->context.asce_limit == (1UL << 31)) { + /* Forking a compat process with 2 page table levels */ + if (!pgtable_pmd_page_ctor(virt_to_page(table))) { + crst_table_free(mm, table); + return NULL; + } + } + return (pgd_t *) table; +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + if (mm->context.asce_limit == (1UL << 31)) + pgtable_pmd_page_dtor(virt_to_page(pgd)); + crst_table_free(mm, (unsigned long *) pgd); } -#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd) static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index a08d0afd5ff6..575dc123bda2 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2249,7 +2249,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa) /* manually convert vector registers if necessary */ if (MACHINE_HAS_VX) { - convert_vx_to_fp(fprs, current->thread.fpu.vxrs); + convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs); rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA, fprs, 128); } else { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d8a1650a0f67..021ae1987cdc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -293,6 +293,9 @@ config ARCH_SUPPORTS_UPROBES config FIX_EARLYCON_MEM def_bool y +config DEBUG_RODATA + def_bool y + config PGTABLE_LEVELS int default 4 if X86_64 diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 137dfa96aa14..1f6c306a9a00 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -91,28 +91,16 @@ config EFI_PGT_DUMP issues with the mapping of the EFI runtime regions into that table. -config DEBUG_RODATA - bool "Write protect kernel read-only data structures" - default y - depends on DEBUG_KERNEL - ---help--- - Mark the kernel read-only data as write-protected in the pagetables, - in order to catch accidental (and incorrect) writes to such const - data. This is recommended so that we can catch kernel bugs sooner. - If in doubt, say "Y". - config DEBUG_RODATA_TEST - bool "Testcase for the DEBUG_RODATA feature" - depends on DEBUG_RODATA + bool "Testcase for the marking rodata read-only" default y ---help--- - This option enables a testcase for the DEBUG_RODATA - feature as well as for the change_page_attr() infrastructure. + This option enables a testcase for the setting rodata read-only + as well as for the change_page_attr() infrastructure. If in doubt, say "N" config DEBUG_WX bool "Warn on W+X mappings at boot" - depends on DEBUG_RODATA select X86_PTDUMP_CORE ---help--- Generate a warning if any W+X mappings are found at boot. diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index 0224987556ce..3f69326ed545 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h @@ -140,7 +140,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, fprintf(outfile, "#include <asm/vdso.h>\n"); fprintf(outfile, "\n"); fprintf(outfile, - "static unsigned char raw_data[%lu] __page_aligned_data = {", + "static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {", mapping_size); for (j = 0; j < stripped_len; j++) { if (j % 10 == 0) diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index e63aa38e85fb..61518cf79437 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -91,16 +91,10 @@ void clflush_cache_range(void *addr, unsigned int size); #define mmio_flush_range(addr, size) clflush_cache_range(addr, size) -#ifdef CONFIG_DEBUG_RODATA -void mark_rodata_ro(void); extern const int rodata_test_data; extern int kernel_set_to_readonly; void set_kernel_text_rw(void); void set_kernel_text_ro(void); -#else -static inline void set_kernel_text_rw(void) { } -static inline void set_kernel_text_ro(void) { } -#endif #ifdef CONFIG_DEBUG_RODATA_TEST int rodata_test(void); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index c1adf33fdd0d..bc62e7cbf1b1 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -17,15 +17,8 @@ static inline bool kvm_check_and_clear_guest_paused(void) } #endif /* CONFIG_KVM_GUEST */ -#ifdef CONFIG_DEBUG_RODATA #define KVM_HYPERCALL \ ALTERNATIVE(".byte 0x0f,0x01,0xc1", ".byte 0x0f,0x01,0xd9", X86_FEATURE_VMMCALL) -#else -/* On AMD processors, vmcall will generate a trap that we will - * then rewrite to the appropriate instruction. - */ -#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1" -#endif /* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall * instruction. The hypervisor may replace it with something else but only the diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index 0a5242428659..13b6cdd0af57 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -7,7 +7,7 @@ extern char __brk_base[], __brk_limit[]; extern struct exception_table_entry __stop___ex_table[]; -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; #endif diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index d1daead5fcdd..adb3eaf8fe2a 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -16,6 +16,7 @@ #include <asm/cacheflush.h> #include <asm/realmode.h> +#include <linux/ftrace.h> #include "../../realmode/rm/wakeup.h" #include "sleep.h" @@ -107,7 +108,13 @@ int x86_acpi_suspend_lowlevel(void) saved_magic = 0x123456789abcdef0L; #endif /* CONFIG_64BIT */ + /* + * Pause/unpause graph tracing around do_suspend_lowlevel as it has + * inconsistent call/return info after it jumps to the wakeup vector. + */ + pause_graph_tracing(); do_suspend_lowlevel(); + unpause_graph_tracing(); return 0; } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 311bcf338f07..eb6bd34582c6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -81,9 +81,9 @@ within(unsigned long addr, unsigned long start, unsigned long end) static unsigned long text_ip_addr(unsigned long ip) { /* - * On x86_64, kernel text mappings are mapped read-only with - * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead - * of the kernel text mapping to modify the kernel text. + * On x86_64, kernel text mappings are mapped read-only, so we use + * the kernel identity mapping instead of the kernel text mapping + * to modify the kernel text. * * For 32bit kernels, these mappings are same and we can use * kernel identity mapping to modify code. diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 44256a62702b..ed15cd486d06 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -750,9 +750,7 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) { int err; -#ifdef CONFIG_DEBUG_RODATA char opc[BREAK_INSTR_SIZE]; -#endif /* CONFIG_DEBUG_RODATA */ bpt->type = BP_BREAKPOINT; err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, @@ -761,7 +759,6 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) return err; err = probe_kernel_write((char *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); -#ifdef CONFIG_DEBUG_RODATA if (!err) return err; /* @@ -778,13 +775,12 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE)) return -EINVAL; bpt->type = BP_POKE_BREAKPOINT; -#endif /* CONFIG_DEBUG_RODATA */ + return err; } int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) { -#ifdef CONFIG_DEBUG_RODATA int err; char opc[BREAK_INSTR_SIZE]; @@ -801,8 +797,8 @@ int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE)) goto knl_write; return err; + knl_write: -#endif /* CONFIG_DEBUG_RODATA */ return probe_kernel_write((char *)bpt->bpt_addr, (char *)bpt->saved_instr, BREAK_INSTR_SIZE); } diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c index 3f92ce07e525..27538f183c3b 100644 --- a/arch/x86/kernel/test_nx.c +++ b/arch/x86/kernel/test_nx.c @@ -142,7 +142,6 @@ static int test_NX(void) * by the error message */ -#ifdef CONFIG_DEBUG_RODATA /* Test 3: Check if the .rodata section is executable */ if (rodata_test_data != 0xC3) { printk(KERN_ERR "test_nx: .rodata marker has invalid value\n"); @@ -151,7 +150,6 @@ static int test_NX(void) printk(KERN_ERR "test_nx: .rodata section is executable\n"); ret = -ENODEV; } -#endif #if 0 /* Test 4: Check if the .data section of a module is executable */ diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c index 5ecbfe5099da..cb4a01b41e27 100644 --- a/arch/x86/kernel/test_rodata.c +++ b/arch/x86/kernel/test_rodata.c @@ -76,5 +76,5 @@ int rodata_test(void) } MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Testcase for the DEBUG_RODATA infrastructure"); +MODULE_DESCRIPTION("Testcase for marking rodata as read-only"); MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 74e4bf11f562..fe133b710bef 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -41,29 +41,28 @@ ENTRY(phys_startup_64) jiffies_64 = jiffies; #endif -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) /* - * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA - * we retain large page mappings for boundaries spanning kernel text, rodata - * and data sections. + * On 64-bit, align RODATA to 2MB so we retain large page mappings for + * boundaries spanning kernel text, rodata and data sections. * * However, kernel identity mappings will have different RWX permissions * to the pages mapping to text and to the pages padding (which are freed) the * text section. Hence kernel identity mappings will be broken to smaller * pages. For 64-bit, kernel text and kernel identity mappings are different, - * so we can enable protection checks that come with CONFIG_DEBUG_RODATA, - * as well as retain 2MB large page mappings for kernel text. + * so we can enable protection checks as well as retain 2MB large page + * mappings for kernel text. */ -#define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); +#define X64_ALIGN_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); -#define X64_ALIGN_DEBUG_RODATA_END \ +#define X64_ALIGN_RODATA_END \ . = ALIGN(HPAGE_SIZE); \ __end_rodata_hpage_align = .; #else -#define X64_ALIGN_DEBUG_RODATA_BEGIN -#define X64_ALIGN_DEBUG_RODATA_END +#define X64_ALIGN_RODATA_BEGIN +#define X64_ALIGN_RODATA_END #endif @@ -112,13 +111,11 @@ SECTIONS EXCEPTION_TABLE(16) :text = 0x9090 -#if defined(CONFIG_DEBUG_RODATA) /* .text should occupy whole number of pages */ . = ALIGN(PAGE_SIZE); -#endif - X64_ALIGN_DEBUG_RODATA_BEGIN + X64_ALIGN_RODATA_BEGIN RO_DATA(PAGE_SIZE) - X64_ALIGN_DEBUG_RODATA_END + X64_ALIGN_RODATA_END /* Data */ .data : AT(ADDR(.data) - LOAD_OFFSET) { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e7c2c1428a69..8eb8a934b531 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3754,13 +3754,15 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { + bool uses_nx = context->nx || context->base_role.smep_andnot_wp; + /* * Passing "true" to the last argument is okay; it adds a check * on bit 8 of the SPTEs which KVM doesn't use anyway. */ __reset_rsvds_bits_mask(vcpu, &context->shadow_zero_check, boot_cpu_data.x86_phys_bits, - context->shadow_root_level, context->nx, + context->shadow_root_level, uses_nx, guest_cpuid_has_gbpages(vcpu), is_pse(vcpu), true); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 10e7693b3540..0958fa2b7cb7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -595,6 +595,8 @@ struct vcpu_vmx { /* Support for PML */ #define PML_ENTITY_NUM 512 struct page *pml_pg; + + u64 current_tsc_ratio; }; enum segment_cache_field { @@ -1746,6 +1748,13 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, return; } break; + case MSR_IA32_PEBS_ENABLE: + /* PEBS needs a quiescent period after being disabled (to write + * a record). Disabling PEBS through VMX MSR swapping doesn't + * provide that period, so a CPU could write host's record into + * guest's memory. + */ + wrmsrl(MSR_IA32_PEBS_ENABLE, 0); } for (i = 0; i < m->nr; ++i) @@ -1783,26 +1792,31 @@ static void reload_tss(void) static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) { - u64 guest_efer; - u64 ignore_bits; + u64 guest_efer = vmx->vcpu.arch.efer; + u64 ignore_bits = 0; - guest_efer = vmx->vcpu.arch.efer; + if (!enable_ept) { + /* + * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing + * host CPUID is more efficient than testing guest CPUID + * or CR4. Host SMEP is anyway a requirement for guest SMEP. + */ + if (boot_cpu_has(X86_FEATURE_SMEP)) + guest_efer |= EFER_NX; + else if (!(guest_efer & EFER_NX)) + ignore_bits |= EFER_NX; + } /* - * NX is emulated; LMA and LME handled by hardware; SCE meaningless - * outside long mode + * LMA and LME handled by hardware; SCE meaningless outside long mode. */ - ignore_bits = EFER_NX | EFER_SCE; + ignore_bits |= EFER_SCE; #ifdef CONFIG_X86_64 ignore_bits |= EFER_LMA | EFER_LME; /* SCE is meaningful only in long mode on Intel */ if (guest_efer & EFER_LMA) ignore_bits &= ~(u64)EFER_SCE; #endif - guest_efer &= ~ignore_bits; - guest_efer |= host_efer & ignore_bits; - vmx->guest_msrs[efer_offset].data = guest_efer; - vmx->guest_msrs[efer_offset].mask = ~ignore_bits; clear_atomic_switch_msr(vmx, MSR_EFER); @@ -1813,16 +1827,21 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) */ if (cpu_has_load_ia32_efer || (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) { - guest_efer = vmx->vcpu.arch.efer; if (!(guest_efer & EFER_LMA)) guest_efer &= ~EFER_LME; if (guest_efer != host_efer) add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer); return false; - } + } else { + guest_efer &= ~ignore_bits; + guest_efer |= host_efer & ignore_bits; - return true; + vmx->guest_msrs[efer_offset].data = guest_efer; + vmx->guest_msrs[efer_offset].mask = ~ignore_bits; + + return true; + } } static unsigned long segment_base(u16 selector) @@ -2062,14 +2081,16 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ - /* Setup TSC multiplier */ - if (cpu_has_vmx_tsc_scaling()) - vmcs_write64(TSC_MULTIPLIER, - vcpu->arch.tsc_scaling_ratio); - vmx->loaded_vmcs->cpu = cpu; } + /* Setup TSC multiplier */ + if (kvm_has_tsc_control && + vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) { + vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio; + vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); + } + vmx_vcpu_pi_load(vcpu, cpu); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e79162c0a26d..27419ba5af26 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2736,7 +2736,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); - vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) @@ -6552,12 +6551,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * KVM_DEBUGREG_WONT_EXIT again. */ if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) { - int i; - WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); kvm_x86_ops->sync_dirty_debug_regs(vcpu); - for (i = 0; i < KVM_NR_DB_REGS; i++) - vcpu->arch.eff_db[i] = vcpu->arch.db[i]; + kvm_update_dr0123(vcpu); + kvm_update_dr6(vcpu); + kvm_update_dr7(vcpu); + vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } /* diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index cb4ef3de61f9..2ebfbaf61142 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -871,7 +871,6 @@ static noinline int do_test_wp_bit(void) return flag; } -#ifdef CONFIG_DEBUG_RODATA const int rodata_test_data = 0xC3; EXPORT_SYMBOL_GPL(rodata_test_data); @@ -960,5 +959,3 @@ void mark_rodata_ro(void) if (__supported_pte_mask & _PAGE_NX) debug_checkwx(); } -#endif - diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ec081fe0ce2c..e08d141844ee 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1062,7 +1062,6 @@ void __init mem_init(void) mem_init_print_info(NULL); } -#ifdef CONFIG_DEBUG_RODATA const int rodata_test_data = 0xC3; EXPORT_SYMBOL_GPL(rodata_test_data); @@ -1154,8 +1153,6 @@ void mark_rodata_ro(void) debug_checkwx(); } -#endif - int kern_addr_valid(unsigned long addr) { unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index db20ee9a413a..4540e8880cd9 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -278,7 +278,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, __pa_symbol(__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; -#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) +#if defined(CONFIG_X86_64) /* * Once the kernel maps the text as RO (kernel_set_to_readonly is set), * kernel text mappings for the large page aligned text, rodata sections @@ -414,24 +414,30 @@ pmd_t *lookup_pmd_address(unsigned long address) phys_addr_t slow_virt_to_phys(void *__virt_addr) { unsigned long virt_addr = (unsigned long)__virt_addr; - unsigned long phys_addr, offset; + phys_addr_t phys_addr; + unsigned long offset; enum pg_level level; pte_t *pte; pte = lookup_address(virt_addr, &level); BUG_ON(!pte); + /* + * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t + * before being left-shifted PAGE_SHIFT bits -- this trick is to + * make 32-PAE kernel work correctly. + */ switch (level) { case PG_LEVEL_1G: - phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; + phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT; offset = virt_addr & ~PUD_PAGE_MASK; break; case PG_LEVEL_2M: - phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; + phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT; offset = virt_addr & ~PMD_PAGE_MASK; break; default: - phys_addr = pte_pfn(*pte) << PAGE_SHIFT; + phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; offset = virt_addr & ~PAGE_MASK; } |