summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/Makefile3
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c9
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/ia32/sys_ia32.c15
-rw-r--r--arch/x86/include/asm/efi.h6
-rw-r--r--arch/x86/include/asm/msr-index.h5
-rw-r--r--arch/x86/include/asm/processor.h5
-rw-r--r--arch/x86/include/asm/sys_ia32.h2
-rw-r--r--arch/x86/include/asm/syscalls.h2
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/unistd.h1
-rw-r--r--arch/x86/include/asm/vgtod.h4
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h1
-rw-r--r--arch/x86/include/asm/xen/interface.h4
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/apic/io_apic.c3
-rw-r--r--arch/x86/kernel/asm-offsets.c3
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c61
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c48
-rw-r--r--arch/x86/kernel/cpu/perf_event_knc.c319
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c127
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/e820.c3
-rw-r--r--arch/x86/kernel/entry_32.S54
-rw-r--r--arch/x86/kernel/entry_64.S62
-rw-r--r--arch/x86/kernel/kgdb.c2
-rw-r--r--arch/x86/kernel/kvm.c3
-rw-r--r--arch/x86/kernel/process.c65
-rw-r--r--arch/x86/kernel/process_32.c37
-rw-r--r--arch/x86/kernel/process_64.c35
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup.c33
-rw-r--r--arch/x86/kernel/signal.c8
-rw-r--r--arch/x86/kernel/sys_i386_32.c40
-rw-r--r--arch/x86/kernel/uprobes.c16
-rw-r--r--arch/x86/kernel/vm86_32.c6
-rw-r--r--arch/x86/kernel/vsyscall_64.c49
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu.c3
-rw-r--r--arch/x86/kvm/x86.c60
-rw-r--r--arch/x86/mm/init.c58
-rw-r--r--arch/x86/mm/init_64.c7
-rw-r--r--arch/x86/oprofile/nmi_int.c2
-rw-r--r--arch/x86/platform/efi/efi.c47
-rw-r--r--arch/x86/platform/efi/efi_64.c7
-rw-r--r--arch/x86/realmode/rm/wakeup_asm.S15
-rw-r--r--arch/x86/syscalls/syscall_32.tbl2
-rw-r--r--arch/x86/um/Kconfig4
-rw-r--r--arch/x86/um/asm/checksum.h144
-rw-r--r--arch/x86/um/asm/checksum_32.h140
-rw-r--r--arch/x86/um/asm/checksum_64.h125
-rw-r--r--arch/x86/um/asm/elf.h2
-rw-r--r--arch/x86/um/asm/ptrace.h58
-rw-r--r--arch/x86/um/asm/ptrace_32.h28
-rw-r--r--arch/x86/um/asm/ptrace_64.h46
-rw-r--r--arch/x86/um/bugs_32.c6
-rw-r--r--arch/x86/um/bugs_64.c2
-rw-r--r--arch/x86/um/fault.c2
-rw-r--r--arch/x86/um/ldt.c10
-rw-r--r--arch/x86/um/mem_64.c6
-rw-r--r--arch/x86/um/os-Linux/registers.c4
-rw-r--r--arch/x86/um/os-Linux/task_size.c2
-rw-r--r--arch/x86/um/os-Linux/tls.c2
-rw-r--r--arch/x86/um/ptrace_32.c8
-rw-r--r--arch/x86/um/ptrace_user.c2
-rw-r--r--arch/x86/um/shared/sysdep/ptrace.h2
-rw-r--r--arch/x86/um/shared/sysdep/stub.h4
-rw-r--r--arch/x86/um/shared/sysdep/syscalls_32.h4
-rw-r--r--arch/x86/um/signal.c4
-rw-r--r--arch/x86/um/stub_32.S2
-rw-r--r--arch/x86/um/stub_64.S2
-rw-r--r--arch/x86/um/stub_segv.c6
-rw-r--r--arch/x86/um/sys_call_table_32.c1
-rw-r--r--arch/x86/um/sysrq_32.c12
-rw-r--r--arch/x86/um/sysrq_64.c2
-rw-r--r--arch/x86/um/tls_32.c12
-rw-r--r--arch/x86/um/tls_64.c2
-rw-r--r--arch/x86/vdso/vclock_gettime.c22
-rw-r--r--arch/x86/xen/enlighten.c20
-rw-r--r--arch/x86/xen/mmu.c62
89 files changed, 1192 insertions, 854 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1ae94bcae5d..46c3bff3ced 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -108,6 +108,10 @@ config X86
select GENERIC_STRNLEN_USER
select HAVE_RCU_USER_QS if X86_64
select HAVE_IRQ_TIME_ACCOUNTING
+ select GENERIC_KERNEL_THREAD
+ select GENERIC_KERNEL_EXECVE
+ select MODULES_USE_ELF_REL if X86_32
+ select MODULES_USE_ELF_RELA if X86_64
config INSTRUCTION_DECODER
def_bool y
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 58790bd85c1..05afcca66de 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -142,7 +142,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
KBUILD_CFLAGS += $(mflags-y)
KBUILD_AFLAGS += $(mflags-y)
-archscripts:
+archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
###
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index ce03476d8c8..ccce0ed67dd 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -37,7 +37,8 @@ setup-y += video-bios.o
targets += $(setup-y)
hostprogs-y := mkcpustr tools/build
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(USERINCLUDE) \
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include \
+ -include include/generated/autoconf.h \
-D__EXPORTED_HEADERS__
$(obj)/cpu.o: $(obj)/cpustr.h
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 7c04d0da709..1b9c22bea8a 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -515,6 +515,11 @@ static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
}
+static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in)
+{
+ aesni_enc(ctx, out, in);
+}
+
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
@@ -525,7 +530,7 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
.tbuflen = sizeof(buf),
.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
- .tweak_fn = XTS_TWEAK_CAST(aesni_enc),
+ .tweak_fn = aesni_xts_tweak,
.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
.crypt_fn = lrw_xts_encrypt_callback,
};
@@ -550,7 +555,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
.tbuflen = sizeof(buf),
.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
- .tweak_fn = XTS_TWEAK_CAST(aesni_enc),
+ .tweak_fn = aesni_xts_tweak,
.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
.crypt_fn = lrw_xts_decrypt_callback,
};
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 9c289504e68..076745fc804 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -465,7 +465,7 @@ GLOBAL(\label)
PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
- PTREGSCALL stub32_execve, sys32_execve, %rcx
+ PTREGSCALL stub32_execve, compat_sys_execve, %rcx
PTREGSCALL stub32_fork, sys_fork, %rdi
PTREGSCALL stub32_clone, sys32_clone, %rdx
PTREGSCALL stub32_vfork, sys_vfork, %rdi
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index c5b938d92ea..86d68d1c880 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -385,21 +385,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,
return ret;
}
-asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv,
- compat_uptr_t __user *envp, struct pt_regs *regs)
-{
- long error;
- char *filename;
-
- filename = getname(name);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- return error;
- error = compat_do_execve(filename, argv, envp, regs);
- putname(filename);
- return error;
-}
-
asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
struct pt_regs *regs)
{
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index c9dcc181d4d..6e8fdf5ad11 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -35,7 +35,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...);
#define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \
efi_call_virt(f, a1, a2, a3, a4, a5, a6)
-#define efi_ioremap(addr, size, type) ioremap_cache(addr, size)
+#define efi_ioremap(addr, size, type, attr) ioremap_cache(addr, size)
#else /* !CONFIG_X86_32 */
@@ -89,7 +89,7 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
(u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
- u32 type);
+ u32 type, u64 attribute);
#endif /* CONFIG_X86_32 */
@@ -98,6 +98,8 @@ extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int efi_memblock_x86_reserve_range(void);
extern void efi_call_phys_prelog(void);
extern void efi_call_phys_epilog(void);
+extern void efi_unmap_memmap(void);
+extern void efi_memory_uc(u64 addr, unsigned long size);
#ifndef CONFIG_EFI
/*
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index fbee9714d9a..7f0edceb756 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -121,6 +121,11 @@
#define MSR_P6_EVNTSEL0 0x00000186
#define MSR_P6_EVNTSEL1 0x00000187
+#define MSR_KNC_PERFCTR0 0x00000020
+#define MSR_KNC_PERFCTR1 0x00000021
+#define MSR_KNC_EVNTSEL0 0x00000028
+#define MSR_KNC_EVNTSEL1 0x00000029
+
/* AMD64 MSRs. Not complete. See the architecture manual for a more
complete list. */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b98c0d958eb..ad1fc851167 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -588,11 +588,6 @@ typedef struct {
} mm_segment_t;
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 4ca1c611b55..a9a8cf3da49 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -54,8 +54,6 @@ asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
asmlinkage long sys32_personality(unsigned long);
asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
-asmlinkage long sys32_execve(const char __user *, compat_uptr_t __user *,
- compat_uptr_t __user *, struct pt_regs *);
asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
long sys32_lseek(unsigned int, int, unsigned int);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index f1d8b441fc7..2be0b880417 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -25,7 +25,7 @@ int sys_fork(struct pt_regs *);
int sys_vfork(struct pt_regs *);
long sys_execve(const char __user *,
const char __user *const __user *,
- const char __user *const __user *, struct pt_regs *);
+ const char __user *const __user *);
long sys_clone(unsigned long, unsigned long, void __user *,
void __user *, struct pt_regs *);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index c535d847e3b..2d946e63ee8 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -79,7 +79,6 @@ struct thread_info {
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
-#define TIF_IRET 5 /* force IRET */
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
@@ -105,7 +104,6 @@ struct thread_info {
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
-#define _TIF_IRET (1 << TIF_IRET)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 0d9776e9e2d..16f3fc6ebf2 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -50,6 +50,7 @@
# define __ARCH_WANT_SYS_TIME
# define __ARCH_WANT_SYS_UTIME
# define __ARCH_WANT_SYS_WAITPID
+# define __ARCH_WANT_SYS_EXECVE
/*
* "Conditional" syscalls
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 8b38be2de9e..46e24d36b7d 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -17,8 +17,8 @@ struct vsyscall_gtod_data {
/* open coded 'struct timespec' */
time_t wall_time_sec;
- u32 wall_time_nsec;
- u32 monotonic_time_nsec;
+ u64 wall_time_snsec;
+ u64 monotonic_time_snsec;
time_t monotonic_time_sec;
struct timezone sys_tz;
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 66d0fff1ee8..125f344f06a 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -33,7 +33,6 @@
#ifndef _ASM_X86_XEN_HYPERVISOR_H
#define _ASM_X86_XEN_HYPERVISOR_H
-/* arch/i386/kernel/setup.c */
extern struct shared_info *HYPERVISOR_shared_info;
extern struct start_info *xen_start_info;
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 6d2f75a82a1..54d52ff1304 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -51,14 +51,14 @@
* with Xen so that on ARM we can have one ABI that works for 32 and 64
* bit guests. */
typedef unsigned long xen_pfn_t;
+#define PRI_xen_pfn "lx"
typedef unsigned long xen_ulong_t;
+#define PRI_xen_ulong "lx"
/* Guest handles for primitive C types. */
__DEFINE_GUEST_HANDLE(uchar, unsigned char);
__DEFINE_GUEST_HANDLE(uint, unsigned int);
-__DEFINE_GUEST_HANDLE(ulong, unsigned long);
DEFINE_GUEST_HANDLE(char);
DEFINE_GUEST_HANDLE(int);
-DEFINE_GUEST_HANDLE(long);
DEFINE_GUEST_HANDLE(void);
DEFINE_GUEST_HANDLE(uint64_t);
DEFINE_GUEST_HANDLE(uint32_t);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a48ea05157d..91ce48f05f9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o
-obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
+obj-$(CONFIG_X86_32) += i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-y += syscall_$(BITS).o
obj-$(CONFIG_X86_64) += vsyscall_64.o
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index c265593ec2c..1817fa91102 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2257,6 +2257,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
continue;
cfg = irq_cfg(irq);
+ if (!cfg)
+ continue;
+
raw_spin_lock(&desc->lock);
/*
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 68de2dc962e..28610822fb3 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -69,4 +69,7 @@ void common(void) {
OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
OFFSET(BP_pref_address, boot_params, hdr.pref_address);
OFFSET(BP_code32_start, boot_params, hdr.code32_start);
+
+ BLANK();
+ DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
}
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index d30a6a9a012..a0e067d3d96 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -32,7 +32,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
ifdef CONFIG_PERF_EVENTS
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o
+obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 9a7c90d80bc..93c5451bdd5 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -991,7 +991,7 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
if (attrs)
return attrs;
- n = sizeof (default_attrs) / sizeof (struct attribute *);
+ n = ARRAY_SIZE(default_attrs);
if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
n += 2;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 29e87d3b284..46cbf868969 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2209,11 +2209,6 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
&mce_cmci_disabled
};
-static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
- __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
- &mce_bios_cmci_threshold
-};
-
static struct device_attribute *mce_device_attrs[] = {
&dev_attr_tolerant.attr,
&dev_attr_check_interval.attr,
@@ -2222,7 +2217,6 @@ static struct device_attribute *mce_device_attrs[] = {
&dev_attr_dont_log_ce.attr,
&dev_attr_ignore_ce.attr,
&dev_attr_cmci_disabled.attr,
- &dev_attr_bios_cmci_threshold.attr,
NULL
};
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index c4e916d7737..698b6ec12e0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -576,12 +576,10 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
int err = 0;
if (shared_bank[bank]) {
-
nb = node_to_amd_nb(amd_get_nb_id(cpu));
- WARN_ON(!nb);
/* threshold descriptor already initialized on this node? */
- if (nb->bank4) {
+ if (nb && nb->bank4) {
/* yes, use it */
b = nb->bank4;
err = kobject_add(b->kobj, &dev->kobj, name);
@@ -615,8 +613,10 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
atomic_set(&b->cpus, 1);
/* nb is already initialized, see above */
- WARN_ON(nb->bank4);
- nb->bank4 = b;
+ if (nb) {
+ WARN_ON(nb->bank4);
+ nb->bank4 = b;
+ }
}
err = allocate_threshold_blocks(cpu, bank, 0,
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 915b876edd1..4a3374e61a9 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -208,12 +208,14 @@ static bool check_hw_exists(void)
}
/*
- * Now write a value and read it back to see if it matches,
- * this is needed to detect certain hardware emulators (qemu/kvm)
- * that don't trap on the MSR access and always return 0s.
+ * Read the current value, change it and read it back to see if it
+ * matches, this is needed to detect certain hardware emulators
+ * (qemu/kvm) that don't trap on the MSR access and always return 0s.
*/
- val = 0xabcdUL;
reg = x86_pmu_event_addr(0);
+ if (rdmsrl_safe(reg, &val))
+ goto msr_fail;
+ val ^= 0xffffUL;
ret = wrmsrl_safe(reg, val);
ret |= rdmsrl_safe(reg, &val_new);
if (ret || val != val_new)
@@ -338,6 +340,9 @@ int x86_setup_perfctr(struct perf_event *event)
/* BTS is currently only allowed for user-mode. */
if (!attr->exclude_kernel)
return -EOPNOTSUPP;
+
+ if (!attr->exclude_guest)
+ return -EOPNOTSUPP;
}
hwc->config |= config;
@@ -380,6 +385,9 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip) {
int precise = 0;
+ if (!event->attr.exclude_guest)
+ return -EOPNOTSUPP;
+
/* Support for constant skid */
if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
precise++;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8b6defe7eef..271d2570029 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -626,6 +626,8 @@ int p4_pmu_init(void);
int p6_pmu_init(void);
+int knc_pmu_init(void);
+
#else /* CONFIG_CPU_SUP_INTEL */
static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index eebd5ffe1bb..6336bcbd061 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -41,17 +41,22 @@ struct cpu_perf_ibs {
};
struct perf_ibs {
- struct pmu pmu;
- unsigned int msr;
- u64 config_mask;
- u64 cnt_mask;
- u64 enable_mask;
- u64 valid_mask;
- u64 max_period;
- unsigned long offset_mask[1];
- int offset_max;
- struct cpu_perf_ibs __percpu *pcpu;
- u64 (*get_count)(u64 config);
+ struct pmu pmu;
+ unsigned int msr;
+ u64 config_mask;
+ u64 cnt_mask;
+ u64 enable_mask;
+ u64 valid_mask;
+ u64 max_period;
+ unsigned long offset_mask[1];
+ int offset_max;
+ struct cpu_perf_ibs __percpu *pcpu;
+
+ struct attribute **format_attrs;
+ struct attribute_group format_group;
+ const struct attribute_group *attr_groups[2];
+
+ u64 (*get_count)(u64 config);
};
struct perf_ibs_data {
@@ -446,6 +451,19 @@ static void perf_ibs_del(struct perf_event *event, int flags)
static void perf_ibs_read(struct perf_event *event) { }
+PMU_FORMAT_ATTR(rand_en, "config:57");
+PMU_FORMAT_ATTR(cnt_ctl, "config:19");
+
+static struct attribute *ibs_fetch_format_attrs[] = {
+ &format_attr_rand_en.attr,
+ NULL,
+};
+
+static struct attribute *ibs_op_format_attrs[] = {
+ NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
+ NULL,
+};
+
static struct perf_ibs perf_ibs_fetch = {
.pmu = {
.task_ctx_nr = perf_invalid_context,
@@ -465,6 +483,7 @@ static struct perf_ibs perf_ibs_fetch = {
.max_period = IBS_FETCH_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
+ .format_attrs = ibs_fetch_format_attrs,
.get_count = get_ibs_fetch_count,
};
@@ -488,6 +507,7 @@ static struct perf_ibs perf_ibs_op = {
.max_period = IBS_OP_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
+ .format_attrs = ibs_op_format_attrs,
.get_count = get_ibs_op_count,
};
@@ -597,6 +617,17 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
perf_ibs->pcpu = pcpu;
+ /* register attributes */
+ if (perf_ibs->format_attrs[0]) {
+ memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
+ perf_ibs->format_group.name = "format";
+ perf_ibs->format_group.attrs = perf_ibs->format_attrs;
+
+ memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
+ perf_ibs->attr_groups[0] = &perf_ibs->format_group;
+ perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
+ }
+
ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
if (ret) {
perf_ibs->pcpu = NULL;
@@ -608,13 +639,19 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
static __init int perf_event_ibs_init(void)
{
+ struct attribute **attr = ibs_op_format_attrs;
+
if (!ibs_caps)
return -ENODEV; /* ibs not supported by the cpu */
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
- if (ibs_caps & IBS_CAPS_OPCNT)
+
+ if (ibs_caps & IBS_CAPS_OPCNT) {
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+ *attr++ = &format_attr_cnt_ctl.attr;
+ }
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 6bca492b854..324bb523d9d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1906,6 +1906,8 @@ __init int intel_pmu_init(void)
switch (boot_cpu_data.x86) {
case 0x6:
return p6_pmu_init();
+ case 0xb:
+ return knc_pmu_init();
case 0xf:
return p4_pmu_init();
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 99d96a4978b..3cf3d97cce3 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -118,22 +118,24 @@ static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box);
- u32 config;
+ u32 config = 0;
- pci_read_config_dword(pdev, box_ctl, &config);
- config |= SNBEP_PMON_BOX_CTL_FRZ;
- pci_write_config_dword(pdev, box_ctl, config);
+ if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+ config |= SNBEP_PMON_BOX_CTL_FRZ;
+ pci_write_config_dword(pdev, box_ctl, config);
+ }
}
static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box);
- u32 config;
+ u32 config = 0;
- pci_read_config_dword(pdev, box_ctl, &config);
- config &= ~SNBEP_PMON_BOX_CTL_FRZ;
- pci_write_config_dword(pdev, box_ctl, config);
+ if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+ config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+ pci_write_config_dword(pdev, box_ctl, config);
+ }
}
static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
@@ -156,7 +158,7 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct pe
{
struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw;
- u64 count;
+ u64 count = 0;
pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
@@ -603,11 +605,12 @@ static struct pci_driver snbep_uncore_pci_driver = {
/*
* build pci bus to socket mapping
*/
-static void snbep_pci2phy_map_init(void)
+static int snbep_pci2phy_map_init(void)
{
struct pci_dev *ubox_dev = NULL;
int i, bus, nodeid;
- u32 config;
+ int err = 0;
+ u32 config = 0;
while (1) {
/* find the UBOX device */
@@ -618,10 +621,14 @@ static void snbep_pci2phy_map_init(void)
break;
bus = ubox_dev->bus->number;
/* get the Node ID of the local register */
- pci_read_config_dword(ubox_dev, 0x40, &config);
+ err = pci_read_config_dword(ubox_dev, 0x40, &config);
+ if (err)
+ break;
nodeid = config;
/* get the Node ID mapping */
- pci_read_config_dword(ubox_dev, 0x54, &config);
+ err = pci_read_config_dword(ubox_dev, 0x54, &config);
+ if (err)
+ break;
/*
* every three bits in the Node ID mapping register maps
* to a particular node.
@@ -633,7 +640,11 @@ static void snbep_pci2phy_map_init(void)
}
}
};
- return;
+
+ if (ubox_dev)
+ pci_dev_put(ubox_dev);
+
+ return err ? pcibios_err_to_errno(err) : 0;
}
/* end of Sandy Bridge-EP uncore support */
@@ -1547,7 +1558,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- int port;
/* adjust the main event selector and extra register index */
if (reg1->idx % 2) {
@@ -1559,7 +1569,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
}
/* adjust extra register config */
- port = reg1->idx / 6 + box->pmu->pmu_idx * 4;
switch (reg1->idx % 6) {
case 2:
/* shift the 8~15 bits to the 0~7 bits */
@@ -2578,9 +2587,11 @@ static int __init uncore_pci_init(void)
switch (boot_cpu_data.x86_model) {
case 45: /* Sandy Bridge-EP */
+ ret = snbep_pci2phy_map_init();
+ if (ret)
+ return ret;
pci_uncores = snbep_pci_uncores;
uncore_pci_driver = &snbep_uncore_pci_driver;
- snbep_pci2phy_map_init();
break;
default:
return 0;
@@ -2926,6 +2937,9 @@ static int __init intel_uncore_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -ENODEV;
+ if (cpu_has_hypervisor)
+ return -ENODEV;
+
ret = uncore_pci_init();
if (ret)
goto fail;
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
new file mode 100644
index 00000000000..4b7731bf23a
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_knc.c
@@ -0,0 +1,319 @@
+/* Driver for Intel Xeon Phi "Knights Corner" PMU */
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include <asm/hardirq.h>
+
+#include "perf_event.h"
+
+static const u64 knc_perfmon_event_map[] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
+};
+
+static __initconst u64 knc_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ /* On Xeon Phi event "0" is a valid DATA_READ */
+ /* (L1 Data Cache Reads) Instruction. */
+ /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
+ /* bit will always be set in x86_pmu_hw_config(). */
+ [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+ /* DATA_READ */
+ [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
+ [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
+ [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
+ [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
+ [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+ /* DATA_READ */
+ /* see note on L1 OP_READ */
+ [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
+ [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
+ [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
+ [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+
+static u64 knc_pmu_event_map(int hw_event)
+{
+ return knc_perfmon_event_map[hw_event];
+}
+
+static struct event_constraint knc_event_constraints[] =
+{
+ INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
+ INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
+ INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
+ INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
+ INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
+ INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
+ INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
+ INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
+ INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
+ INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
+ INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
+ INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
+ INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
+ INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
+ INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
+ INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
+ INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
+ INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
+ INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
+ INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
+ EVENT_CONSTRAINT_END
+};
+
+#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
+#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
+#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
+
+#define KNC_ENABLE_COUNTER0 0x00000001
+#define KNC_ENABLE_COUNTER1 0x00000002
+
+static void knc_pmu_disable_all(void)
+{
+ u64 val;
+
+ rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+ val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+ wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static void knc_pmu_enable_all(int added)
+{
+ u64 val;
+
+ rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+ val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+ wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static inline void
+knc_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 val;
+
+ val = hwc->config;
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+
+ (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+static void knc_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 val;
+
+ val = hwc->config;
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+
+ (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+static inline u64 knc_pmu_get_status(void)
+{
+ u64 status;
+
+ rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
+
+ return status;
+}
+
+static inline void knc_pmu_ack_status(u64 ack)
+{
+ wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
+}
+
+static int knc_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc;
+ int handled = 0;
+ int bit, loops;
+ u64 status;
+
+ cpuc = &__get_cpu_var(cpu_hw_events);
+
+ knc_pmu_disable_all();
+
+ status = knc_pmu_get_status();
+ if (!status) {
+ knc_pmu_enable_all(0);
+ return handled;
+ }
+
+ loops = 0;
+again:
+ knc_pmu_ack_status(status);
+ if (++loops > 100) {
+ WARN_ONCE(1, "perf: irq loop stuck!\n");
+ perf_event_print_debug();
+ goto done;
+ }
+
+ inc_irq_stat(apic_perf_irqs);
+
+ for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+ struct perf_event *event = cpuc->events[bit];
+
+ handled++;
+
+ if (!test_bit(bit, cpuc->active_mask))
+ continue;
+
+ if (!intel_pmu_save_and_restart(event))
+ continue;
+
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+ }
+
+ /*
+ * Repeat if there is more work to be done:
+ */
+ status = knc_pmu_get_status();
+ if (status)
+ goto again;
+
+done:
+ knc_pmu_enable_all(0);
+
+ return handled;
+}
+
+
+PMU_FORMAT_ATTR(event, "config:0-7" );
+PMU_FORMAT_ATTR(umask, "config:8-15" );
+PMU_FORMAT_ATTR(edge, "config:18" );
+PMU_FORMAT_ATTR(inv, "config:23" );
+PMU_FORMAT_ATTR(cmask, "config:24-31" );
+
+static struct attribute *intel_knc_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask.attr,
+ NULL,
+};
+
+static __initconst struct x86_pmu knc_pmu = {
+ .name = "knc",
+ .handle_irq = knc_pmu_handle_irq,
+ .disable_all = knc_pmu_disable_all,
+ .enable_all = knc_pmu_enable_all,
+ .enable = knc_pmu_enable_event,
+ .disable = knc_pmu_disable_event,
+ .hw_config = x86_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_KNC_EVNTSEL0,
+ .perfctr = MSR_KNC_PERFCTR0,
+ .event_map = knc_pmu_event_map,
+ .max_events = ARRAY_SIZE(knc_perfmon_event_map),
+ .apic = 1,
+ .max_period = (1ULL << 39) - 1,
+ .version = 0,
+ .num_counters = 2,
+ .cntval_bits = 40,
+ .cntval_mask = (1ULL << 40) - 1,
+ .get_event_constraints = x86_get_event_constraints,
+ .event_constraints = knc_event_constraints,
+ .format_attrs = intel_knc_formats_attr,
+};
+
+__init int knc_pmu_init(void)
+{
+ x86_pmu = knc_pmu;
+
+ memcpy(hw_cache_event_ids, knc_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ return 0;
+}
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index e4dd0f7a045..7d0270bd793 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -8,13 +8,106 @@
*/
static const u64 p6_perfmon_event_map[] =
{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
- [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, /* CPU_CLK_UNHALTED */
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, /* INST_RETIRED */
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, /* L2_RQSTS:M:E:S:I */
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, /* L2_RQSTS:I */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, /* BR_INST_RETIRED */
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, /* BR_MISS_PRED_RETIRED */
+ [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, /* BUS_DRDY_CLOCKS */
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2, /* RESOURCE_STALLS */
+
+};
+
+static __initconst u64 p6_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
+ [ C(RESULT_MISS) ] = 0x0045, /* DCU_LINES_IN */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0f29, /* L2_LD:M:E:S:I */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
+ [ C(RESULT_MISS) ] = 0x0f28, /* L2_IFETCH:M:E:S:I */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0025, /* L2_M_LINES_INM */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
+ [ C(RESULT_MISS) ] = 0x0085, /* ITLB_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISS_PRED_RETIRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
};
static u64 p6_pmu_event_map(int hw_event)
@@ -34,7 +127,7 @@ static struct event_constraint p6_event_constraints[] =
{
INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
- INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
+ INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
@@ -64,25 +157,25 @@ static void p6_pmu_enable_all(int added)
static inline void
p6_pmu_disable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val = P6_NOP_EVENT;
- if (cpuc->enabled)
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
-
(void)wrmsrl_safe(hwc->config_base, val);
}
static void p6_pmu_enable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val;
val = hwc->config;
- if (cpuc->enabled)
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+
+ /*
+ * p6 only has a global event enable, set on PerfEvtSel0
+ * We "disable" events by programming P6_NOP_EVENT
+ * and we rely on p6_pmu_enable_all() being called
+ * to actually enable the events.
+ */
(void)wrmsrl_safe(hwc->config_base, val);
}
@@ -158,5 +251,9 @@ __init int p6_pmu_init(void)
x86_pmu = p6_pmu;
+ memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 966512b2cac..2e8caf03f59 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -56,6 +56,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
switch (boot_cpu_data.x86) {
case 6:
return msr - MSR_P6_PERFCTR0;
+ case 11:
+ return msr - MSR_KNC_PERFCTR0;
case 15:
return msr - MSR_P4_BPU_PERFCTR0;
}
@@ -82,6 +84,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
switch (boot_cpu_data.x86) {
case 6:
return msr - MSR_P6_EVNTSEL0;
+ case 11:
+ return msr - MSR_KNC_EVNTSEL0;
case 15:
return msr - MSR_P4_BSU_ESCR0;
}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index ed858e9e9a7..df06ade26be 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1077,6 +1077,9 @@ void __init memblock_x86_fill(void)
memblock_add(ei->addr, ei->size);
}
+ /* throw away partial pages */
+ memblock_trim_memory(PAGE_SIZE);
+
memblock_dump_all();
}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 0750e3ba87c..88b725aa1d5 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -299,6 +299,21 @@ ENTRY(ret_from_fork)
CFI_ENDPROC
END(ret_from_fork)
+ENTRY(ret_from_kernel_thread)
+ CFI_STARTPROC
+ pushl_cfi %eax
+ call schedule_tail
+ GET_THREAD_INFO(%ebp)
+ popl_cfi %eax
+ pushl_cfi $0x0202 # Reset kernel eflags
+ popfl_cfi
+ movl PT_EBP(%esp),%eax
+ call *PT_EBX(%esp)
+ movl $0,PT_EAX(%esp)
+ jmp syscall_exit
+ CFI_ENDPROC
+ENDPROC(ret_from_kernel_thread)
+
/*
* Interrupt exit functions should be protected against kprobes
*/
@@ -323,8 +338,7 @@ ret_from_intr:
andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
#else
/*
- * We can be coming here from a syscall done in the kernel space,
- * e.g. a failed kernel_execve().
+ * We can be coming here from child spawned by kernel_thread().
*/
movl PT_CS(%esp), %eax
andl $SEGMENT_RPL_MASK, %eax
@@ -616,6 +630,10 @@ work_notifysig: # deal with pending signals and
movl %esp, %eax
jne work_notifysig_v86 # returning to kernel-space or
# vm86-space
+1:
+#else
+ movl %esp, %eax
+#endif
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
movb PT_CS(%esp), %bl
@@ -626,24 +644,15 @@ work_notifysig: # deal with pending signals and
call do_notify_resume
jmp resume_userspace
+#ifdef CONFIG_VM86
ALIGN
work_notifysig_v86:
pushl_cfi %ecx # save ti_flags for do_notify_resume
call save_v86_state # %eax contains pt_regs pointer
popl_cfi %ecx
movl %eax, %esp
-#else
- movl %esp, %eax
+ jmp 1b
#endif
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- movb PT_CS(%esp), %bl
- andb $SEGMENT_RPL_MASK, %bl
- cmpb $USER_RPL, %bl
- jb resume_kernel
- xorl %edx, %edx
- call do_notify_resume
- jmp resume_userspace
END(work_pending)
# perform syscall exit tracing
@@ -732,7 +741,6 @@ ENDPROC(ptregs_##name)
PTREGSCALL1(iopl)
PTREGSCALL0(fork)
PTREGSCALL0(vfork)
-PTREGSCALL3(execve)
PTREGSCALL2(sigaltstack)
PTREGSCALL0(sigreturn)
PTREGSCALL0(rt_sigreturn)
@@ -1015,16 +1023,6 @@ END(spurious_interrupt_bug)
*/
.popsection
-ENTRY(kernel_thread_helper)
- pushl $0 # fake return address for unwinder
- CFI_STARTPROC
- movl %edi,%eax
- call *%esi
- call do_exit
- ud2 # padding for call trace
- CFI_ENDPROC
-ENDPROC(kernel_thread_helper)
-
#ifdef CONFIG_XEN
/* Xen doesn't set %esp to be precisely what the normal sysenter
entrypoint expects, so fix it up before using the normal path. */
@@ -1037,7 +1035,7 @@ ENTRY(xen_sysenter_target)
ENTRY(xen_hypervisor_callback)
CFI_STARTPROC
- pushl_cfi $0
+ pushl_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
TRACE_IRQS_OFF
@@ -1079,14 +1077,16 @@ ENTRY(xen_failsafe_callback)
2: mov 8(%esp),%es
3: mov 12(%esp),%fs
4: mov 16(%esp),%gs
+ /* EAX == 0 => Category 1 (Bad segment)
+ EAX != 0 => Category 2 (Bad IRET) */
testl %eax,%eax
popl_cfi %eax
lea 16(%esp),%esp
CFI_ADJUST_CFA_OFFSET -16
jz 5f
addl $16,%esp
- jmp iret_exc # EAX != 0 => Category 2 (Bad IRET)
-5: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment)
+ jmp iret_exc
+5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
jmp ret_from_exception
CFI_ENDPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 44531acd9a8..b51b2c7ee51 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -554,7 +554,7 @@ ENTRY(ret_from_fork)
RESTORE_REST
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
- jz retint_restore_args
+ jz 1f
testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
jnz int_ret_from_sys_call
@@ -562,6 +562,14 @@ ENTRY(ret_from_fork)
RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
jmp ret_from_sys_call # go to the SYSRET fastpath
+1:
+ subq $REST_SKIP, %rsp # leave space for volatiles
+ CFI_ADJUST_CFA_OFFSET REST_SKIP
+ movq %rbp, %rdi
+ call *%rbx
+ movl $0, RAX(%rsp)
+ RESTORE_REST
+ jmp int_ret_from_sys_call
CFI_ENDPROC
END(ret_from_fork)
@@ -862,7 +870,6 @@ ENTRY(stub_execve)
PARTIAL_FRAME 0
SAVE_REST
FIXUP_TOP_OF_STACK %r11
- movq %rsp, %rcx
call sys_execve
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
@@ -912,8 +919,7 @@ ENTRY(stub_x32_execve)
PARTIAL_FRAME 0
SAVE_REST
FIXUP_TOP_OF_STACK %r11
- movq %rsp, %rcx
- call sys32_execve
+ call compat_sys_execve
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
RESTORE_REST
@@ -1318,52 +1324,6 @@ bad_gs:
jmp 2b
.previous
-ENTRY(kernel_thread_helper)
- pushq $0 # fake return address
- CFI_STARTPROC
- /*
- * Here we are in the child and the registers are set as they were
- * at kernel_thread() invocation in the parent.
- */
- call *%rsi
- # exit
- mov %eax, %edi
- call do_exit
- ud2 # padding for call trace
- CFI_ENDPROC
-END(kernel_thread_helper)
-
-/*
- * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
- *
- * C extern interface:
- * extern long execve(const char *name, char **argv, char **envp)
- *
- * asm input arguments:
- * rdi: name, rsi: argv, rdx: envp
- *
- * We want to fallback into:
- * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
- *
- * do_sys_execve asm fallback arguments:
- * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
- */
-ENTRY(kernel_execve)
- CFI_STARTPROC
- FAKE_STACK_FRAME $0
- SAVE_ALL
- movq %rsp,%rcx
- call sys_execve
- movq %rax, RAX(%rsp)
- RESTORE_REST
- testq %rax,%rax
- je int_ret_from_sys_call
- RESTORE_ARGS
- UNFAKE_STACK_FRAME
- ret
- CFI_ENDPROC
-END(kernel_execve)
-
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(call_softirq)
CFI_STARTPROC
@@ -1475,7 +1435,7 @@ ENTRY(xen_failsafe_callback)
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
- pushq_cfi $0
+ pushq_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
jmp error_exit
CFI_ENDPROC
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 3f61904365c..836f8322960 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -746,7 +746,9 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
int err;
+#ifdef CONFIG_DEBUG_RODATA
char opc[BREAK_INSTR_SIZE];
+#endif /* CONFIG_DEBUG_RODATA */
bpt->type = BP_BREAKPOINT;
err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b3e5e51bc90..4180a874c76 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -247,7 +247,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
+ rcu_irq_enter();
+ exit_idle();
kvm_async_pf_task_wait((u32)read_cr2());
+ rcu_irq_exit();
break;
case KVM_PV_REASON_PAGE_READY:
rcu_irq_enter();
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index dc3567e083f..b644e1c765d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -293,71 +293,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
}
/*
- * This gets run with %si containing the
- * function to call, and %di containing
- * the "args".
- */
-extern void kernel_thread_helper(void);
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
- struct pt_regs regs;
-
- memset(&regs, 0, sizeof(regs));
-
- regs.si = (unsigned long) fn;
- regs.di = (unsigned long) arg;
-
-#ifdef CONFIG_X86_32
- regs.ds = __USER_DS;
- regs.es = __USER_DS;
- regs.fs = __KERNEL_PERCPU;
- regs.gs = __KERNEL_STACK_CANARY;
-#else
- regs.ss = __KERNEL_DS;
-#endif
-
- regs.orig_ax = -1;
- regs.ip = (unsigned long) kernel_thread_helper;
- regs.cs = __KERNEL_CS | get_kernel_rpl();
- regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
-
- /* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/*
- * sys_execve() executes a new program.
- */
-long sys_execve(const char __user *name,
- const char __user *const __user *argv,
- const char __user *const __user *envp, struct pt_regs *regs)
-{
- long error;
- char *filename;
-
- filename = getname(name);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- return error;
- error = do_execve(filename, argv, envp, regs);
-
-#ifdef CONFIG_X86_32
- if (error == 0) {
- /* Make sure we don't return using sysenter.. */
- set_thread_flag(TIF_IRET);
- }
-#endif
-
- putname(filename);
- return error;
-}
-
-/*
* Idle related variables and functions
*/
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index b9ff83c7135..44e0bff38e7 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,7 @@
#include <asm/switch_to.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
/*
* Return saved PC of a blocked thread.
@@ -127,23 +128,39 @@ void release_thread(struct task_struct *dead_task)
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
- unsigned long unused,
+ unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
{
- struct pt_regs *childregs;
+ struct pt_regs *childregs = task_pt_regs(p);
struct task_struct *tsk;
int err;
- childregs = task_pt_regs(p);
+ p->thread.sp = (unsigned long) childregs;
+ p->thread.sp0 = (unsigned long) (childregs+1);
+
+ if (unlikely(!regs)) {
+ /* kernel thread */
+ memset(childregs, 0, sizeof(struct pt_regs));
+ p->thread.ip = (unsigned long) ret_from_kernel_thread;
+ task_user_gs(p) = __KERNEL_STACK_CANARY;
+ childregs->ds = __USER_DS;
+ childregs->es = __USER_DS;
+ childregs->fs = __KERNEL_PERCPU;
+ childregs->bx = sp; /* function */
+ childregs->bp = arg;
+ childregs->orig_ax = -1;
+ childregs->cs = __KERNEL_CS | get_kernel_rpl();
+ childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ p->fpu_counter = 0;
+ p->thread.io_bitmap_ptr = NULL;
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+ return 0;
+ }
*childregs = *regs;
childregs->ax = 0;
childregs->sp = sp;
- p->thread.sp = (unsigned long) childregs;
- p->thread.sp0 = (unsigned long) (childregs+1);
-
p->thread.ip = (unsigned long) ret_from_fork;
-
task_user_gs(p) = get_user_gs(regs);
p->fpu_counter = 0;
@@ -190,6 +207,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
regs->cs = __USER_CS;
regs->ip = new_ip;
regs->sp = new_sp;
+ regs->flags = X86_EFLAGS_IF;
+ /*
+ * force it to the iret return path by making it look as if there was
+ * some work pending.
+ */
+ set_thread_flag(TIF_NOTIFY_RESUME);
}
EXPORT_SYMBOL_GPL(start_thread);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 8a6d20ce197..16c6365e2b8 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -146,29 +146,18 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
- unsigned long unused,
+ unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
{
int err;
struct pt_regs *childregs;
struct task_struct *me = current;
- childregs = ((struct pt_regs *)
- (THREAD_SIZE + task_stack_page(p))) - 1;
- *childregs = *regs;
-
- childregs->ax = 0;
- if (user_mode(regs))
- childregs->sp = sp;
- else
- childregs->sp = (unsigned long)childregs;
-
+ p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+ childregs = task_pt_regs(p);
p->thread.sp = (unsigned long) childregs;
- p->thread.sp0 = (unsigned long) (childregs+1);
p->thread.usersp = me->thread.usersp;
-
set_tsk_thread_flag(p, TIF_FORK);
-
p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
@@ -178,6 +167,24 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
+ if (unlikely(!regs)) {
+ /* kernel thread */
+ memset(childregs, 0, sizeof(struct pt_regs));
+ childregs->sp = (unsigned long)childregs;
+ childregs->ss = __KERNEL_DS;
+ childregs->bx = sp; /* function */
+ childregs->bp = arg;
+ childregs->orig_ax = -1;
+ childregs->cs = __KERNEL_CS | get_kernel_rpl();
+ childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ return 0;
+ }
+ *childregs = *regs;
+
+ childregs->ax = 0;
+ childregs->sp = sp;
err = -ENOMEM;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 52190a938b4..4e8ba39eaf0 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -358,14 +358,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"),
},
},
- { /* Handle problems with rebooting on CompuLab SBC-FITPC2 */
- .callback = set_bios_reboot,
- .ident = "CompuLab SBC-FITPC2",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "CompuLab"),
- DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"),
- },
- },
{ /* Handle problems with rebooting on ASUS P4S800 */
.callback = set_bios_reboot,
.ident = "ASUS P4S800",
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d609be046b5..ca45696f30f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -68,6 +68,7 @@
#include <linux/percpu.h>
#include <linux/crash_dump.h>
#include <linux/tboot.h>
+#include <linux/jiffies.h>
#include <video/edid.h>
@@ -919,8 +920,22 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_64
if (max_pfn > max_low_pfn) {
- max_pfn_mapped = init_memory_mapping(1UL<<32,
- max_pfn<<PAGE_SHIFT);
+ int i;
+ unsigned long start, end;
+ unsigned long start_pfn, end_pfn;
+
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn,
+ NULL) {
+
+ end = PFN_PHYS(end_pfn);
+ if (end <= (1UL<<32))
+ continue;
+
+ start = PFN_PHYS(start_pfn);
+ max_pfn_mapped = init_memory_mapping(
+ max((1UL<<32), start), end);
+ }
+
/* can we preseve max_low_pfn ?*/
max_low_pfn = max_pfn;
}
@@ -1032,6 +1047,20 @@ void __init setup_arch(char **cmdline_p)
mcheck_init();
arch_init_ideal_nops();
+
+ register_refined_jiffies(CLOCK_TICK_RATE);
+
+#ifdef CONFIG_EFI
+ /* Once setup is done above, disable efi_enabled on mismatched
+ * firmware/kernel archtectures since there is no support for
+ * runtime services.
+ */
+ if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) {
+ pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
+ efi_unmap_memmap();
+ efi_enabled = 0;
+ }
+#endif
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b33144c8b30..70b27ee6118 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -824,10 +824,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
mce_notify_process();
#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
- if (thread_info_flags & _TIF_UPROBE) {
- clear_thread_flag(TIF_UPROBE);
+ if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
- }
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
@@ -840,10 +838,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
-#ifdef CONFIG_X86_32
- clear_thread_flag(TIF_IRET);
-#endif /* CONFIG_X86_32 */
-
rcu_user_enter();
}
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
deleted file mode 100644
index 0b0cb5fede1..00000000000
--- a/arch/x86/kernel/sys_i386_32.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * This file contains various random system calls that
- * have a non-standard calling sequence on the Linux/i386
- * platform.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/syscalls.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/utsname.h>
-#include <linux/ipc.h>
-
-#include <linux/uaccess.h>
-#include <linux/unistd.h>
-
-#include <asm/syscalls.h>
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
- const char *const argv[],
- const char *const envp[])
-{
- long __res;
- asm volatile ("int $0x80"
- : "=a" (__res)
- : "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory");
- return __res;
-}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 9538f00827a..aafa5557b39 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -651,31 +651,19 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
/*
* Skip these instructions as per the currently known x86 ISA.
- * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
+ * rep=0x66*; nop=0x90
*/
static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
int i;
for (i = 0; i < MAX_UINSN_BYTES; i++) {
- if ((auprobe->insn[i] == 0x66))
+ if (auprobe->insn[i] == 0x66)
continue;
if (auprobe->insn[i] == 0x90)
return true;
- if (i == (MAX_UINSN_BYTES - 1))
- break;
-
- if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x1f))
- return true;
-
- if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x19))
- return true;
-
- if ((auprobe->insn[i] == 0x87) && (auprobe->insn[i+1] == 0xc0))
- return true;
-
break;
}
return false;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 54abcc0baf2..5c9687b1bde 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -561,9 +561,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
if ((trapno == 3) || (trapno == 1)) {
KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
/* setting this flag forces the code in entry_32.S to
- call save_v86_state() and change the stack pointer
- to KVM86->regs32 */
- set_thread_flag(TIF_IRET);
+ the path where we call save_v86_state() and change
+ the stack pointer to KVM86->regs32 */
+ set_thread_flag(TIF_NOTIFY_RESUME);
return 0;
}
do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 8d141b30904..3a3e8c9e280 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -28,7 +28,7 @@
#include <linux/jiffies.h>
#include <linux/sysctl.h>
#include <linux/topology.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
#include <linux/getcpu.h>
#include <linux/cpu.h>
#include <linux/smp.h>
@@ -82,32 +82,41 @@ void update_vsyscall_tz(void)
vsyscall_gtod_data.sys_tz = sys_tz;
}
-void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
- struct clocksource *clock, u32 mult)
+void update_vsyscall(struct timekeeper *tk)
{
- struct timespec monotonic;
+ struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
- write_seqcount_begin(&vsyscall_gtod_data.seq);
+ write_seqcount_begin(&vdata->seq);
/* copy vsyscall data */
- vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode;
- vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
- vsyscall_gtod_data.clock.mask = clock->mask;
- vsyscall_gtod_data.clock.mult = mult;
- vsyscall_gtod_data.clock.shift = clock->shift;
-
- vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
- vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
+ vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
+ vdata->clock.cycle_last = tk->clock->cycle_last;
+ vdata->clock.mask = tk->clock->mask;
+ vdata->clock.mult = tk->mult;
+ vdata->clock.shift = tk->shift;
+
+ vdata->wall_time_sec = tk->xtime_sec;
+ vdata->wall_time_snsec = tk->xtime_nsec;
+
+ vdata->monotonic_time_sec = tk->xtime_sec
+ + tk->wall_to_monotonic.tv_sec;
+ vdata->monotonic_time_snsec = tk->xtime_nsec
+ + (tk->wall_to_monotonic.tv_nsec
+ << tk->shift);
+ while (vdata->monotonic_time_snsec >=
+ (((u64)NSEC_PER_SEC) << tk->shift)) {
+ vdata->monotonic_time_snsec -=
+ ((u64)NSEC_PER_SEC) << tk->shift;
+ vdata->monotonic_time_sec++;
+ }
- monotonic = timespec_add(*wall_time, *wtm);
- vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec;
- vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec;
+ vdata->wall_time_coarse.tv_sec = tk->xtime_sec;
+ vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
- vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
- vsyscall_gtod_data.monotonic_time_coarse =
- timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm);
+ vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse,
+ tk->wall_to_monotonic);
- write_seqcount_end(&vsyscall_gtod_data.seq);
+ write_seqcount_end(&vdata->seq);
}
static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c6e6b721b6e..43e9fadca5d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1311,7 +1311,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
vcpu->arch.apic_base = value;
if (apic_x2apic_mode(apic)) {
u32 id = kvm_apic_id(apic);
- u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
+ u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
kvm_apic_set_ldr(apic, ldr);
}
apic->base_address = apic->vcpu->arch.apic_base &
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d289fee1ffb..6f85fe0bf95 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2497,8 +2497,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
}
}
- if (!is_error_pfn(pfn))
- kvm_release_pfn_clean(pfn);
+ kvm_release_pfn_clean(pfn);
}
static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1eefebe5d72..224a7e78cb6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3779,7 +3779,7 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
{
struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0];
- memcpy(vcpu->run->mmio.data, frag->data, frag->len);
+ memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len));
return X86EMUL_CONTINUE;
}
@@ -3832,18 +3832,11 @@ mmio:
bytes -= handled;
val += handled;
- while (bytes) {
- unsigned now = min(bytes, 8U);
-
- frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
- frag->gpa = gpa;
- frag->data = val;
- frag->len = now;
-
- gpa += now;
- val += now;
- bytes -= now;
- }
+ WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS);
+ frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++];
+ frag->gpa = gpa;
+ frag->data = val;
+ frag->len = bytes;
return X86EMUL_CONTINUE;
}
@@ -3890,7 +3883,7 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
vcpu->mmio_needed = 1;
vcpu->mmio_cur_fragment = 0;
- vcpu->run->mmio.len = vcpu->mmio_fragments[0].len;
+ vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len);
vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write;
vcpu->run->exit_reason = KVM_EXIT_MMIO;
vcpu->run->mmio.phys_addr = gpa;
@@ -5522,28 +5515,44 @@ static int complete_emulated_pio(struct kvm_vcpu *vcpu)
*
* read:
* for each fragment
- * write gpa, len
- * exit
- * copy data
+ * for each mmio piece in the fragment
+ * write gpa, len
+ * exit
+ * copy data
* execute insn
*
* write:
* for each fragment
- * write gpa, len
- * copy data
- * exit
+ * for each mmio piece in the fragment
+ * write gpa, len
+ * copy data
+ * exit
*/
static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
struct kvm_mmio_fragment *frag;
+ unsigned len;
BUG_ON(!vcpu->mmio_needed);
/* Complete previous fragment */
- frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
+ frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment];
+ len = min(8u, frag->len);
if (!vcpu->mmio_is_write)
- memcpy(frag->data, run->mmio.data, frag->len);
+ memcpy(frag->data, run->mmio.data, len);
+
+ if (frag->len <= 8) {
+ /* Switch to the next fragment. */
+ frag++;
+ vcpu->mmio_cur_fragment++;
+ } else {
+ /* Go forward to the next mmio piece. */
+ frag->data += len;
+ frag->gpa += len;
+ frag->len -= len;
+ }
+
if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
vcpu->mmio_needed = 0;
if (vcpu->mmio_is_write)
@@ -5551,13 +5560,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
vcpu->mmio_read_completed = 1;
return complete_emulated_io(vcpu);
}
- /* Initiate next fragment */
- ++frag;
+
run->exit_reason = KVM_EXIT_MMIO;
run->mmio.phys_addr = frag->gpa;
if (vcpu->mmio_is_write)
- memcpy(run->mmio.data, frag->data, frag->len);
- run->mmio.len = frag->len;
+ memcpy(run->mmio.data, frag->data, min(8u, frag->len));
+ run->mmio.len = min(8u, frag->len);
run->mmio.is_write = vcpu->mmio_is_write;
vcpu->arch.complete_userspace_io = complete_emulated_mmio;
return 0;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index ab1f6a93b52..d7aea41563b 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -35,40 +35,44 @@ struct map_range {
unsigned page_size_mask;
};
-static void __init find_early_table_space(struct map_range *mr, unsigned long end,
- int use_pse, int use_gbpages)
+/*
+ * First calculate space needed for kernel direct mapping page tables to cover
+ * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB
+ * pages. Then find enough contiguous space for those page tables.
+ */
+static void __init find_early_table_space(struct map_range *mr, int nr_range)
{
- unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
+ int i;
+ unsigned long puds = 0, pmds = 0, ptes = 0, tables;
+ unsigned long start = 0, good_end;
phys_addr_t base;
- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
- tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
-
- if (use_gbpages) {
- unsigned long extra;
-
- extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
- pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
- } else
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+ for (i = 0; i < nr_range; i++) {
+ unsigned long range, extra;
- tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
+ range = mr[i].end - mr[i].start;
+ puds += (range + PUD_SIZE - 1) >> PUD_SHIFT;
- if (use_pse) {
- unsigned long extra;
+ if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) {
+ extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT);
+ pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT;
+ } else {
+ pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT;
+ }
- extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
+ if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) {
+ extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT);
#ifdef CONFIG_X86_32
- extra += PMD_SIZE;
+ extra += PMD_SIZE;
#endif
- /* The first 2/4M doesn't use large pages. */
- if (mr->start < PMD_SIZE)
- extra += mr->end - mr->start;
-
- ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
- } else
- ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ } else {
+ ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ }
+ }
+ tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
+ tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
#ifdef CONFIG_X86_32
@@ -86,7 +90,7 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en
pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n",
- end - 1, pgt_buf_start << PAGE_SHIFT,
+ mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT,
(pgt_buf_top << PAGE_SHIFT) - 1);
}
@@ -267,7 +271,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
* nodes are discovered.
*/
if (!after_bootmem)
- find_early_table_space(&mr[0], end, use_pse, use_gbpages);
+ find_early_table_space(mr, nr_range);
for (i = 0; i < nr_range; i++)
ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 2b6b4a3c8be..3baff255ada 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -386,7 +386,8 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
* these mappings are more intelligent.
*/
if (pte_val(*pte)) {
- pages++;
+ if (!after_bootmem)
+ pages++;
continue;
}
@@ -451,6 +452,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
* attributes.
*/
if (page_size_mask & (1 << PG_LEVEL_2M)) {
+ if (!after_bootmem)
+ pages++;
last_map_addr = next;
continue;
}
@@ -526,6 +529,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
* attributes.
*/
if (page_size_mask & (1 << PG_LEVEL_1G)) {
+ if (!after_bootmem)
+ pages++;
last_map_addr = next;
continue;
}
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 26b8a8514ee..48768df2471 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -55,7 +55,7 @@ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
val |= counter_config->extra;
event &= model->event_mask ? model->event_mask : 0xFF;
val |= event & 0xFF;
- val |= (event & 0x0F00) << 24;
+ val |= (u64)(event & 0x0F00) << 24;
return val;
}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index aded2a91162..ad4439145f8 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -70,11 +70,15 @@ EXPORT_SYMBOL(efi);
struct efi_memory_map memmap;
bool efi_64bit;
-static bool efi_native;
static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
+static inline bool efi_is_native(void)
+{
+ return IS_ENABLED(CONFIG_X86_64) == efi_64bit;
+}
+
static int __init setup_noefi(char *arg)
{
efi_enabled = 0;
@@ -420,7 +424,7 @@ void __init efi_reserve_boot_services(void)
}
}
-static void __init efi_unmap_memmap(void)
+void __init efi_unmap_memmap(void)
{
if (memmap.map) {
early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
@@ -432,7 +436,7 @@ void __init efi_free_boot_services(void)
{
void *p;
- if (!efi_native)
+ if (!efi_is_native())
return;
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -684,12 +688,10 @@ void __init efi_init(void)
return;
}
efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
- efi_native = !efi_64bit;
#else
efi_phys.systab = (efi_system_table_t *)
(boot_params.efi_info.efi_systab |
((__u64)boot_params.efi_info.efi_systab_hi<<32));
- efi_native = efi_64bit;
#endif
if (efi_systab_init(efi_phys.systab)) {
@@ -723,7 +725,7 @@ void __init efi_init(void)
* that doesn't match the kernel 32/64-bit mode.
*/
- if (!efi_native)
+ if (!efi_is_native())
pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
else if (efi_runtime_init()) {
efi_enabled = 0;
@@ -735,7 +737,7 @@ void __init efi_init(void)
return;
}
#ifdef CONFIG_X86_32
- if (efi_native) {
+ if (efi_is_native()) {
x86_platform.get_wallclock = efi_get_time;
x86_platform.set_wallclock = efi_set_rtc_mmss;
}
@@ -810,6 +812,16 @@ void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
return NULL;
}
+void efi_memory_uc(u64 addr, unsigned long size)
+{
+ unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
+ u64 npages;
+
+ npages = round_up(size, page_shift) / page_shift;
+ memrange_efi_to_native(&addr, &npages);
+ set_memory_uc(addr, npages);
+}
+
/*
* This function will switch the EFI runtime services to virtual mode.
* Essentially, look through the EFI memmap and map every region that
@@ -823,7 +835,7 @@ void __init efi_enter_virtual_mode(void)
efi_memory_desc_t *md, *prev_md = NULL;
efi_status_t status;
unsigned long size;
- u64 end, systab, addr, npages, end_pfn;
+ u64 end, systab, end_pfn;
void *p, *va, *new_memmap = NULL;
int count = 0;
@@ -834,7 +846,7 @@ void __init efi_enter_virtual_mode(void)
* non-native EFI
*/
- if (!efi_native) {
+ if (!efi_is_native()) {
efi_unmap_memmap();
return;
}
@@ -879,10 +891,14 @@ void __init efi_enter_virtual_mode(void)
end_pfn = PFN_UP(end);
if (end_pfn <= max_low_pfn_mapped
|| (end_pfn > (1UL << (32 - PAGE_SHIFT))
- && end_pfn <= max_pfn_mapped))
+ && end_pfn <= max_pfn_mapped)) {
va = __va(md->phys_addr);
- else
- va = efi_ioremap(md->phys_addr, size, md->type);
+
+ if (!(md->attribute & EFI_MEMORY_WB))
+ efi_memory_uc((u64)(unsigned long)va, size);
+ } else
+ va = efi_ioremap(md->phys_addr, size,
+ md->type, md->attribute);
md->virt_addr = (u64) (unsigned long) va;
@@ -892,13 +908,6 @@ void __init efi_enter_virtual_mode(void)
continue;
}
- if (!(md->attribute & EFI_MEMORY_WB)) {
- addr = md->virt_addr;
- npages = md->num_pages;
- memrange_efi_to_native(&addr, &npages);
- set_memory_uc(addr, npages);
- }
-
systab = (u64) (unsigned long) efi_phys.systab;
if (md->phys_addr <= systab && systab < end) {
systab += md->virt_addr - md->phys_addr;
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index ac3aa54e265..95fd505dfeb 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -82,7 +82,7 @@ void __init efi_call_phys_epilog(void)
}
void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
- u32 type)
+ u32 type, u64 attribute)
{
unsigned long last_map_pfn;
@@ -92,8 +92,11 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size,
last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size);
if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) {
unsigned long top = last_map_pfn << PAGE_SHIFT;
- efi_ioremap(top, size - (top - phys_addr), type);
+ efi_ioremap(top, size - (top - phys_addr), type, attribute);
}
+ if (!(attribute & EFI_MEMORY_WB))
+ efi_memory_uc((u64)(unsigned long)__va(phys_addr), size);
+
return (void __iomem *)__va(phys_addr);
}
diff --git a/arch/x86/realmode/rm/wakeup_asm.S b/arch/x86/realmode/rm/wakeup_asm.S
index e56479e5805..9e7e14797a7 100644
--- a/arch/x86/realmode/rm/wakeup_asm.S
+++ b/arch/x86/realmode/rm/wakeup_asm.S
@@ -74,18 +74,9 @@ ENTRY(wakeup_start)
lidtl wakeup_idt
- /* Clear the EFLAGS but remember if we have EFLAGS.ID */
- movl $X86_EFLAGS_ID, %ecx
- pushl %ecx
- popfl
- pushfl
- popl %edi
+ /* Clear the EFLAGS */
pushl $0
popfl
- pushfl
- popl %edx
- xorl %edx, %edi
- andl %ecx, %edi /* %edi is zero iff CPUID & %cr4 are missing */
/* Check header signature... */
movl signature, %eax
@@ -120,12 +111,12 @@ ENTRY(wakeup_start)
movl %eax, %cr3
btl $WAKEUP_BEHAVIOR_RESTORE_CR4, %edi
- jz 1f
+ jnc 1f
movl pmode_cr4, %eax
movl %eax, %cr4
1:
btl $WAKEUP_BEHAVIOR_RESTORE_EFER, %edi
- jz 1f
+ jnc 1f
movl pmode_efer, %eax
movl pmode_efer + 4, %edx
movl $MSR_EFER, %ecx
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 7a35a6e71d4..a47103fbc69 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -17,7 +17,7 @@
8 i386 creat sys_creat
9 i386 link sys_link
10 i386 unlink sys_unlink
-11 i386 execve ptregs_execve stub32_execve
+11 i386 execve sys_execve stub32_execve
12 i386 chdir sys_chdir
13 i386 time sys_time compat_sys_time
14 i386 mknod sys_mknod
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index aeaff8bef2f..07611759ce3 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -13,6 +13,8 @@ endmenu
config UML_X86
def_bool y
select GENERIC_FIND_FIRST_BIT
+ select GENERIC_KERNEL_THREAD
+ select GENERIC_KERNEL_EXECVE
config 64BIT
bool "64-bit kernel" if SUBARCH = "x86"
@@ -22,9 +24,11 @@ config X86_32
def_bool !64BIT
select HAVE_AOUT
select ARCH_WANT_IPC_PARSE_VERSION
+ select MODULES_USE_ELF_REL
config X86_64
def_bool 64BIT
+ select MODULES_USE_ELF_RELA
config RWSEM_XCHGADD_ALGORITHM
def_bool X86_XADD && 64BIT
diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
index b6efe2381b5..4b181b74454 100644
--- a/arch/x86/um/asm/checksum.h
+++ b/arch/x86/um/asm/checksum.h
@@ -1,6 +1,150 @@
#ifndef __UM_CHECKSUM_H
#define __UM_CHECKSUM_H
+#include <linux/string.h>
+#include <linux/in6.h>
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ * Note: when you get a NULL pointer exception here this means someone
+ * passed in an incorrect kernel address to one of these functions.
+ *
+ * If you use these functions directly please don't forget the
+ * access_ok().
+ */
+
+static __inline__
+__wsum csum_partial_copy_nocheck(const void *src, void *dst,
+ int len, __wsum sum)
+{
+ memcpy(dst, src, len);
+ return csum_partial(dst, len, sum);
+}
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums, and handles user-space pointer exceptions correctly, when needed.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+
+static __inline__
+__wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+ int len, __wsum sum, int *err_ptr)
+{
+ if (copy_from_user(dst, src, len)) {
+ *err_ptr = -EFAULT;
+ return (__force __wsum)-1;
+ }
+
+ return csum_partial(dst, len, sum);
+}
+
+/**
+ * csum_fold - Fold and invert a 32bit checksum.
+ * sum: 32bit unfolded sum
+ *
+ * Fold a 32bit running checksum to 16bit and invert it. This is usually
+ * the last step before putting a checksum into a packet.
+ * Make sure not to mix with 64bit checksums.
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+ __asm__(
+ " addl %1,%0\n"
+ " adcl $0xffff,%0"
+ : "=r" (sum)
+ : "r" ((__force u32)sum << 16),
+ "0" ((__force u32)sum & 0xffff0000)
+ );
+ return (__force __sum16)(~(__force u32)sum >> 16);
+}
+
+/**
+ * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum.
+ * @saddr: source address
+ * @daddr: destination address
+ * @len: length of packet
+ * @proto: ip protocol of packet
+ * @sum: initial sum to be added in (32bit unfolded)
+ *
+ * Returns the pseudo header checksum the input data. Result is
+ * 32bit unfolded.
+ */
+static inline __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
+ unsigned short proto, __wsum sum)
+{
+ asm(" addl %1, %0\n"
+ " adcl %2, %0\n"
+ " adcl %3, %0\n"
+ " adcl $0, %0\n"
+ : "=r" (sum)
+ : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum));
+ return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+ unsigned short len,
+ unsigned short proto,
+ __wsum sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/**
+ * ip_fast_csum - Compute the IPv4 header checksum efficiently.
+ * iph: ipv4 header
+ * ihl: length of header / 4
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ unsigned int sum;
+
+ asm( " movl (%1), %0\n"
+ " subl $4, %2\n"
+ " jbe 2f\n"
+ " addl 4(%1), %0\n"
+ " adcl 8(%1), %0\n"
+ " adcl 12(%1), %0\n"
+ "1: adcl 16(%1), %0\n"
+ " lea 4(%1), %1\n"
+ " decl %2\n"
+ " jne 1b\n"
+ " adcl $0, %0\n"
+ " movl %0, %2\n"
+ " shrl $16, %0\n"
+ " addw %w2, %w0\n"
+ " adcl $0, %0\n"
+ " notl %0\n"
+ "2:"
+ /* Since the input registers which are loaded with iph and ipl
+ are modified, we must also specify them as outputs, or gcc
+ will assume they contain their original values. */
+ : "=r" (sum), "=r" (iph), "=r" (ihl)
+ : "1" (iph), "2" (ihl)
+ : "memory");
+ return (__force __sum16)sum;
+}
+
#ifdef CONFIG_X86_32
# include "checksum_32.h"
#else
diff --git a/arch/x86/um/asm/checksum_32.h b/arch/x86/um/asm/checksum_32.h
index caab74252e2..ab77b6f9a4b 100644
--- a/arch/x86/um/asm/checksum_32.h
+++ b/arch/x86/um/asm/checksum_32.h
@@ -5,145 +5,6 @@
#ifndef __UM_SYSDEP_CHECKSUM_H
#define __UM_SYSDEP_CHECKSUM_H
-#include "linux/in6.h"
-#include "linux/string.h"
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 32-bit boundary
- */
-__wsum csum_partial(const void *buff, int len, __wsum sum);
-
-/*
- * Note: when you get a NULL pointer exception here this means someone
- * passed in an incorrect kernel address to one of these functions.
- *
- * If you use these functions directly please don't forget the
- * access_ok().
- */
-
-static __inline__
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
- int len, __wsum sum)
-{
- memcpy(dst, src, len);
- return csum_partial(dst, len, sum);
-}
-
-/*
- * the same as csum_partial, but copies from src while it
- * checksums, and handles user-space pointer exceptions correctly, when needed.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-
-static __inline__
-__wsum csum_partial_copy_from_user(const void __user *src, void *dst,
- int len, __wsum sum, int *err_ptr)
-{
- if (copy_from_user(dst, src, len)) {
- *err_ptr = -EFAULT;
- return (__force __wsum)-1;
- }
-
- return csum_partial(dst, len, sum);
-}
-
-/*
- * This is a version of ip_compute_csum() optimized for IP headers,
- * which always checksum on 4 octet boundaries.
- *
- * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
- * Arnt Gulbrandsen.
- */
-static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
- unsigned int sum;
-
- __asm__ __volatile__(
- "movl (%1), %0 ;\n"
- "subl $4, %2 ;\n"
- "jbe 2f ;\n"
- "addl 4(%1), %0 ;\n"
- "adcl 8(%1), %0 ;\n"
- "adcl 12(%1), %0 ;\n"
-"1: adcl 16(%1), %0 ;\n"
- "lea 4(%1), %1 ;\n"
- "decl %2 ;\n"
- "jne 1b ;\n"
- "adcl $0, %0 ;\n"
- "movl %0, %2 ;\n"
- "shrl $16, %0 ;\n"
- "addw %w2, %w0 ;\n"
- "adcl $0, %0 ;\n"
- "notl %0 ;\n"
-"2: ;\n"
- /* Since the input registers which are loaded with iph and ipl
- are modified, we must also specify them as outputs, or gcc
- will assume they contain their original values. */
- : "=r" (sum), "=r" (iph), "=r" (ihl)
- : "1" (iph), "2" (ihl)
- : "memory");
- return (__force __sum16)sum;
-}
-
-/*
- * Fold a partial checksum
- */
-
-static inline __sum16 csum_fold(__wsum sum)
-{
- __asm__(
- "addl %1, %0 ;\n"
- "adcl $0xffff, %0 ;\n"
- : "=r" (sum)
- : "r" ((__force u32)sum << 16),
- "0" ((__force u32)sum & 0xffff0000)
- );
- return (__force __sum16)(~(__force u32)sum >> 16);
-}
-
-static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
- unsigned short len,
- unsigned short proto,
- __wsum sum)
-{
- __asm__(
- "addl %1, %0 ;\n"
- "adcl %2, %0 ;\n"
- "adcl %3, %0 ;\n"
- "adcl $0, %0 ;\n"
- : "=r" (sum)
- : "g" (daddr), "g"(saddr), "g"((len + proto) << 8), "0"(sum));
- return sum;
-}
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
- unsigned short len,
- unsigned short proto,
- __wsum sum)
-{
- return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
-}
-
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-
static inline __sum16 ip_compute_csum(const void *buff, int len)
{
return csum_fold (csum_partial(buff, len, 0));
@@ -198,4 +59,3 @@ static __inline__ __wsum csum_and_copy_to_user(const void *src,
}
#endif
-
diff --git a/arch/x86/um/asm/checksum_64.h b/arch/x86/um/asm/checksum_64.h
index a5be9031ea8..7b6cd192157 100644
--- a/arch/x86/um/asm/checksum_64.h
+++ b/arch/x86/um/asm/checksum_64.h
@@ -5,131 +5,6 @@
#ifndef __UM_SYSDEP_CHECKSUM_H
#define __UM_SYSDEP_CHECKSUM_H
-#include "linux/string.h"
-#include "linux/in6.h"
-#include "asm/uaccess.h"
-
-extern __wsum csum_partial(const void *buff, int len, __wsum sum);
-
-/*
- * Note: when you get a NULL pointer exception here this means someone
- * passed in an incorrect kernel address to one of these functions.
- *
- * If you use these functions directly please don't forget the
- * access_ok().
- */
-
-static __inline__
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
- int len, __wsum sum)
-{
- memcpy(dst, src, len);
- return(csum_partial(dst, len, sum));
-}
-
-static __inline__
-__wsum csum_partial_copy_from_user(const void __user *src,
- void *dst, int len, __wsum sum,
- int *err_ptr)
-{
- if (copy_from_user(dst, src, len)) {
- *err_ptr = -EFAULT;
- return (__force __wsum)-1;
- }
- return csum_partial(dst, len, sum);
-}
-
-/**
- * csum_fold - Fold and invert a 32bit checksum.
- * sum: 32bit unfolded sum
- *
- * Fold a 32bit running checksum to 16bit and invert it. This is usually
- * the last step before putting a checksum into a packet.
- * Make sure not to mix with 64bit checksums.
- */
-static inline __sum16 csum_fold(__wsum sum)
-{
- __asm__(
- " addl %1,%0\n"
- " adcl $0xffff,%0"
- : "=r" (sum)
- : "r" ((__force u32)sum << 16),
- "0" ((__force u32)sum & 0xffff0000)
- );
- return (__force __sum16)(~(__force u32)sum >> 16);
-}
-
-/**
- * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum.
- * @saddr: source address
- * @daddr: destination address
- * @len: length of packet
- * @proto: ip protocol of packet
- * @sum: initial sum to be added in (32bit unfolded)
- *
- * Returns the pseudo header checksum the input data. Result is
- * 32bit unfolded.
- */
-static inline __wsum
-csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
- unsigned short proto, __wsum sum)
-{
- asm(" addl %1, %0\n"
- " adcl %2, %0\n"
- " adcl %3, %0\n"
- " adcl $0, %0\n"
- : "=r" (sum)
- : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum));
- return sum;
-}
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
- unsigned short len,
- unsigned short proto,
- __wsum sum)
-{
- return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
-}
-
-/**
- * ip_fast_csum - Compute the IPv4 header checksum efficiently.
- * iph: ipv4 header
- * ihl: length of header / 4
- */
-static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
- unsigned int sum;
-
- asm( " movl (%1), %0\n"
- " subl $4, %2\n"
- " jbe 2f\n"
- " addl 4(%1), %0\n"
- " adcl 8(%1), %0\n"
- " adcl 12(%1), %0\n"
- "1: adcl 16(%1), %0\n"
- " lea 4(%1), %1\n"
- " decl %2\n"
- " jne 1b\n"
- " adcl $0, %0\n"
- " movl %0, %2\n"
- " shrl $16, %0\n"
- " addw %w2, %w0\n"
- " adcl $0, %0\n"
- " notl %0\n"
- "2:"
- /* Since the input registers which are loaded with iph and ipl
- are modified, we must also specify them as outputs, or gcc
- will assume they contain their original values. */
- : "=r" (sum), "=r" (iph), "=r" (ihl)
- : "1" (iph), "2" (ihl)
- : "memory");
- return (__force __sum16)sum;
-}
-
static inline unsigned add32_with_carry(unsigned a, unsigned b)
{
asm("addl %2,%0\n\t"
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index 0e07adc8cbe..0feee2fd507 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -6,7 +6,7 @@
#define __UM_ELF_X86_H
#include <asm/user.h>
-#include "skas.h"
+#include <skas.h>
#ifdef CONFIG_X86_32
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index e72cd0df5ba..755133258c4 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -1,11 +1,13 @@
#ifndef __UM_X86_PTRACE_H
#define __UM_X86_PTRACE_H
-#ifdef CONFIG_X86_32
-# include "ptrace_32.h"
-#else
-# include "ptrace_64.h"
+#include <linux/compiler.h>
+#ifndef CONFIG_X86_32
+#define __FRAME_OFFSETS /* Needed to get the R* macros */
#endif
+#include <asm/ptrace-generic.h>
+
+#define user_mode(r) UPT_IS_USER(&(r)->regs)
#define PT_REGS_AX(r) UPT_AX(&(r)->regs)
#define PT_REGS_BX(r) UPT_BX(&(r)->regs)
@@ -36,4 +38,52 @@ static inline long regs_return_value(struct pt_regs *regs)
{
return PT_REGS_AX(regs);
}
+
+/*
+ * Forward declaration to avoid including sysdep/tls.h, which causes a
+ * circular include, and compilation failures.
+ */
+struct user_desc;
+
+#ifdef CONFIG_X86_32
+
+#define HOST_AUDIT_ARCH AUDIT_ARCH_I386
+
+extern int ptrace_get_thread_area(struct task_struct *child, int idx,
+ struct user_desc __user *user_desc);
+
+extern int ptrace_set_thread_area(struct task_struct *child, int idx,
+ struct user_desc __user *user_desc);
+
+#else
+
+#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
+
+#define PT_REGS_R8(r) UPT_R8(&(r)->regs)
+#define PT_REGS_R9(r) UPT_R9(&(r)->regs)
+#define PT_REGS_R10(r) UPT_R10(&(r)->regs)
+#define PT_REGS_R11(r) UPT_R11(&(r)->regs)
+#define PT_REGS_R12(r) UPT_R12(&(r)->regs)
+#define PT_REGS_R13(r) UPT_R13(&(r)->regs)
+#define PT_REGS_R14(r) UPT_R14(&(r)->regs)
+#define PT_REGS_R15(r) UPT_R15(&(r)->regs)
+
+#include <asm/errno.h>
+
+static inline int ptrace_get_thread_area(struct task_struct *child, int idx,
+ struct user_desc __user *user_desc)
+{
+ return -ENOSYS;
+}
+
+static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
+ struct user_desc __user *user_desc)
+{
+ return -ENOSYS;
+}
+
+extern long arch_prctl(struct task_struct *task, int code,
+ unsigned long __user *addr);
+
+#endif
#endif /* __UM_X86_PTRACE_H */
diff --git a/arch/x86/um/asm/ptrace_32.h b/arch/x86/um/asm/ptrace_32.h
deleted file mode 100644
index 2cf225351b6..00000000000
--- a/arch/x86/um/asm/ptrace_32.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __UM_PTRACE_I386_H
-#define __UM_PTRACE_I386_H
-
-#define HOST_AUDIT_ARCH AUDIT_ARCH_I386
-
-#include "linux/compiler.h"
-#include "asm/ptrace-generic.h"
-
-#define user_mode(r) UPT_IS_USER(&(r)->regs)
-
-/*
- * Forward declaration to avoid including sysdep/tls.h, which causes a
- * circular include, and compilation failures.
- */
-struct user_desc;
-
-extern int ptrace_get_thread_area(struct task_struct *child, int idx,
- struct user_desc __user *user_desc);
-
-extern int ptrace_set_thread_area(struct task_struct *child, int idx,
- struct user_desc __user *user_desc);
-
-#endif
diff --git a/arch/x86/um/asm/ptrace_64.h b/arch/x86/um/asm/ptrace_64.h
deleted file mode 100644
index ea7bff39432..00000000000
--- a/arch/x86/um/asm/ptrace_64.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __UM_PTRACE_X86_64_H
-#define __UM_PTRACE_X86_64_H
-
-#include "linux/compiler.h"
-#include "asm/errno.h"
-
-#define __FRAME_OFFSETS /* Needed to get the R* macros */
-#include "asm/ptrace-generic.h"
-
-#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
-
-#define PT_REGS_R8(r) UPT_R8(&(r)->regs)
-#define PT_REGS_R9(r) UPT_R9(&(r)->regs)
-#define PT_REGS_R10(r) UPT_R10(&(r)->regs)
-#define PT_REGS_R11(r) UPT_R11(&(r)->regs)
-#define PT_REGS_R12(r) UPT_R12(&(r)->regs)
-#define PT_REGS_R13(r) UPT_R13(&(r)->regs)
-#define PT_REGS_R14(r) UPT_R14(&(r)->regs)
-#define PT_REGS_R15(r) UPT_R15(&(r)->regs)
-
-/* XXX */
-#define user_mode(r) UPT_IS_USER(&(r)->regs)
-
-struct user_desc;
-
-static inline int ptrace_get_thread_area(struct task_struct *child, int idx,
- struct user_desc __user *user_desc)
-{
- return -ENOSYS;
-}
-
-static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
- struct user_desc __user *user_desc)
-{
- return -ENOSYS;
-}
-
-extern long arch_prctl(struct task_struct *task, int code,
- unsigned long __user *addr);
-#endif
diff --git a/arch/x86/um/bugs_32.c b/arch/x86/um/bugs_32.c
index 17d88cf2c6c..33daff4dade 100644
--- a/arch/x86/um/bugs_32.c
+++ b/arch/x86/um/bugs_32.c
@@ -4,9 +4,9 @@
*/
#include <signal.h>
-#include "kern_util.h"
-#include "longjmp.h"
-#include "sysdep/ptrace.h"
+#include <kern_util.h>
+#include <longjmp.h>
+#include <sysdep/ptrace.h>
#include <generated/asm-offsets.h>
/* Set during early boot */
diff --git a/arch/x86/um/bugs_64.c b/arch/x86/um/bugs_64.c
index 44e02ba2a26..8cc8256c698 100644
--- a/arch/x86/um/bugs_64.c
+++ b/arch/x86/um/bugs_64.c
@@ -4,7 +4,7 @@
* Licensed under the GPL
*/
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
void arch_check_bugs(void)
{
diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c
index d670f68532f..8784ab30d91 100644
--- a/arch/x86/um/fault.c
+++ b/arch/x86/um/fault.c
@@ -3,7 +3,7 @@
* Licensed under the GPL
*/
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
/* These two are from asm-um/uaccess.h and linux/module.h, check them. */
struct exception_table_entry
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 26b0e39d2ce..8e08176f0bc 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -7,11 +7,11 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <asm/unistd.h>
-#include "os.h"
-#include "proc_mm.h"
-#include "skas.h"
-#include "skas_ptrace.h"
-#include "sysdep/tls.h"
+#include <os.h>
+#include <proc_mm.h>
+#include <skas.h>
+#include <skas_ptrace.h>
+#include <sysdep/tls.h>
extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c
index 546518727a7..c6492e75797 100644
--- a/arch/x86/um/mem_64.c
+++ b/arch/x86/um/mem_64.c
@@ -1,6 +1,6 @@
-#include "linux/mm.h"
-#include "asm/page.h"
-#include "asm/mman.h"
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <asm/mman.h>
const char *arch_vma_name(struct vm_area_struct *vma)
{
diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c
index 0cdbb86b012..41bfe84e11a 100644
--- a/arch/x86/um/os-Linux/registers.c
+++ b/arch/x86/um/os-Linux/registers.c
@@ -9,8 +9,8 @@
#ifdef __i386__
#include <sys/user.h>
#endif
-#include "longjmp.h"
-#include "sysdep/ptrace_user.h"
+#include <longjmp.h>
+#include <sysdep/ptrace_user.h>
int save_fp_registers(int pid, unsigned long *fp_regs)
{
diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c
index efb16c5c9bc..8502ad30e61 100644
--- a/arch/x86/um/os-Linux/task_size.c
+++ b/arch/x86/um/os-Linux/task_size.c
@@ -2,7 +2,7 @@
#include <stdlib.h>
#include <signal.h>
#include <sys/mman.h>
-#include "longjmp.h"
+#include <longjmp.h>
#ifdef __i386__
diff --git a/arch/x86/um/os-Linux/tls.c b/arch/x86/um/os-Linux/tls.c
index 82276b6071a..9d94b3b76c7 100644
--- a/arch/x86/um/os-Linux/tls.c
+++ b/arch/x86/um/os-Linux/tls.c
@@ -5,7 +5,7 @@
#include <sys/syscall.h>
#include <unistd.h>
-#include "sysdep/tls.h"
+#include <sysdep/tls.h>
#ifndef PTRACE_GET_THREAD_AREA
#define PTRACE_GET_THREAD_AREA 25
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index 3b949daa095..ce3dd4f36f3 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -3,10 +3,10 @@
* Licensed under the GPL
*/
-#include "linux/mm.h"
-#include "linux/sched.h"
-#include "asm/uaccess.h"
-#include "skas.h"
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <skas.h>
extern int arch_switch_tls(struct task_struct *to);
diff --git a/arch/x86/um/ptrace_user.c b/arch/x86/um/ptrace_user.c
index 3960ca1dd35..617885b1899 100644
--- a/arch/x86/um/ptrace_user.c
+++ b/arch/x86/um/ptrace_user.c
@@ -4,7 +4,7 @@
*/
#include <errno.h>
-#include "ptrace_user.h"
+#include <ptrace_user.h>
int ptrace_getregs(long pid, unsigned long *regs_out)
{
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 6ce2d76eb90..eb9356904ad 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -2,7 +2,7 @@
#define __SYSDEP_X86_PTRACE_H
#include <generated/user_constants.h>
-#include "sysdep/faultinfo.h"
+#include <sysdep/faultinfo.h>
#define MAX_REG_OFFSET (UM_FRAME_SIZE)
#define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h
index bd161e30010..3f55e5bd3ce 100644
--- a/arch/x86/um/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub.h
@@ -1,8 +1,8 @@
#include <asm/unistd.h>
#include <sys/mman.h>
#include <signal.h>
-#include "as-layout.h"
-#include "stub-data.h"
+#include <as-layout.h>
+#include <stub-data.h>
#ifdef __i386__
#include "stub_32.h"
diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h
index 05cb796aecb..8436079be91 100644
--- a/arch/x86/um/shared/sysdep/syscalls_32.h
+++ b/arch/x86/um/shared/sysdep/syscalls_32.h
@@ -3,8 +3,8 @@
* Licensed under the GPL
*/
-#include "asm/unistd.h"
-#include "sysdep/ptrace.h"
+#include <asm/unistd.h>
+#include <sysdep/ptrace.h>
typedef long syscall_handler_t(struct pt_regs);
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index ba7363ecf89..bdaa08cfbcf 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -11,8 +11,8 @@
#include <asm/unistd.h>
#include <asm/uaccess.h>
#include <asm/ucontext.h>
-#include "frame_kern.h"
-#include "skas.h"
+#include <frame_kern.h>
+#include <skas.h>
#ifdef CONFIG_X86_32
diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S
index 54a36ec20cb..b972649d3a1 100644
--- a/arch/x86/um/stub_32.S
+++ b/arch/x86/um/stub_32.S
@@ -1,4 +1,4 @@
-#include "as-layout.h"
+#include <as-layout.h>
.globl syscall_stub
.section .__syscall_stub, "ax"
diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S
index 20e4a96a6dc..7160b20172d 100644
--- a/arch/x86/um/stub_64.S
+++ b/arch/x86/um/stub_64.S
@@ -1,4 +1,4 @@
-#include "as-layout.h"
+#include <as-layout.h>
.globl syscall_stub
.section .__syscall_stub, "ax"
diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c
index b7450bd22e7..1518d2805ae 100644
--- a/arch/x86/um/stub_segv.c
+++ b/arch/x86/um/stub_segv.c
@@ -3,9 +3,9 @@
* Licensed under the GPL
*/
-#include "sysdep/stub.h"
-#include "sysdep/faultinfo.h"
-#include "sysdep/mcontext.h"
+#include <sysdep/stub.h>
+#include <sysdep/faultinfo.h>
+#include <sysdep/mcontext.h>
void __attribute__ ((__section__ (".__syscall_stub")))
stub_segv_handler(int sig, siginfo_t *info, void *p)
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index b5408cecac6..232e60504b3 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -25,7 +25,6 @@
#define old_mmap sys_old_mmap
#define ptregs_fork sys_fork
-#define ptregs_execve sys_execve
#define ptregs_iopl sys_iopl
#define ptregs_vm86old sys_vm86old
#define ptregs_clone i386_clone
diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c
index 2d5cc51e9be..c9bee5b8c0d 100644
--- a/arch/x86/um/sysrq_32.c
+++ b/arch/x86/um/sysrq_32.c
@@ -3,12 +3,12 @@
* Licensed under the GPL
*/
-#include "linux/kernel.h"
-#include "linux/smp.h"
-#include "linux/sched.h"
-#include "linux/kallsyms.h"
-#include "asm/ptrace.h"
-#include "sysrq.h"
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <linux/kallsyms.h>
+#include <asm/ptrace.h>
+#include <asm/sysrq.h>
/* This is declared by <linux/sched.h> */
void show_regs(struct pt_regs *regs)
diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c
index 08258f17996..a0e7fb1134a 100644
--- a/arch/x86/um/sysrq_64.c
+++ b/arch/x86/um/sysrq_64.c
@@ -10,7 +10,7 @@
#include <linux/utsname.h>
#include <asm/current.h>
#include <asm/ptrace.h>
-#include "sysrq.h"
+#include <asm/sysrq.h>
void __show_regs(struct pt_regs *regs)
{
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
index baba84f8ecb..5f5feff3d24 100644
--- a/arch/x86/um/tls_32.c
+++ b/arch/x86/um/tls_32.c
@@ -3,12 +3,12 @@
* Licensed under the GPL
*/
-#include "linux/percpu.h"
-#include "linux/sched.h"
-#include "asm/uaccess.h"
-#include "os.h"
-#include "skas.h"
-#include "sysdep/tls.h"
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <os.h>
+#include <skas.h>
+#include <sysdep/tls.h>
/*
* If needed we can detect when it's uninitialized.
diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c
index f7ba46200ec..d22363cb854 100644
--- a/arch/x86/um/tls_64.c
+++ b/arch/x86/um/tls_64.c
@@ -1,4 +1,4 @@
-#include "linux/sched.h"
+#include <linux/sched.h>
void clear_flushed_tls(struct task_struct *task)
{
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 885eff49d6a..4df6c373421 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -80,7 +80,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
}
-notrace static inline long vgetns(void)
+notrace static inline u64 vgetsns(void)
{
long v;
cycles_t cycles;
@@ -91,21 +91,24 @@ notrace static inline long vgetns(void)
else
return 0;
v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
- return (v * gtod->clock.mult) >> gtod->clock.shift;
+ return v * gtod->clock.mult;
}
/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
notrace static int __always_inline do_realtime(struct timespec *ts)
{
- unsigned long seq, ns;
+ unsigned long seq;
+ u64 ns;
int mode;
+ ts->tv_nsec = 0;
do {
seq = read_seqcount_begin(&gtod->seq);
mode = gtod->clock.vclock_mode;
ts->tv_sec = gtod->wall_time_sec;
- ts->tv_nsec = gtod->wall_time_nsec;
- ns = vgetns();
+ ns = gtod->wall_time_snsec;
+ ns += vgetsns();
+ ns >>= gtod->clock.shift;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
timespec_add_ns(ts, ns);
@@ -114,15 +117,18 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
notrace static int do_monotonic(struct timespec *ts)
{
- unsigned long seq, ns;
+ unsigned long seq;
+ u64 ns;
int mode;
+ ts->tv_nsec = 0;
do {
seq = read_seqcount_begin(&gtod->seq);
mode = gtod->clock.vclock_mode;
ts->tv_sec = gtod->monotonic_time_sec;
- ts->tv_nsec = gtod->monotonic_time_nsec;
- ns = vgetns();
+ ns = gtod->monotonic_time_snsec;
+ ns += vgetsns();
+ ns >>= gtod->clock.shift;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
timespec_add_ns(ts, ns);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bf788d34530..586d83812b6 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -81,8 +81,6 @@
#include "smp.h"
#include "multicalls.h"
-#include <xen/events.h>
-
EXPORT_SYMBOL_GPL(hypercall_page);
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
@@ -987,7 +985,16 @@ static void xen_write_cr4(unsigned long cr4)
native_write_cr4(cr4);
}
-
+#ifdef CONFIG_X86_64
+static inline unsigned long xen_read_cr8(void)
+{
+ return 0;
+}
+static inline void xen_write_cr8(unsigned long val)
+{
+ BUG_ON(val);
+}
+#endif
static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
{
int ret;
@@ -1156,6 +1163,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.read_cr4_safe = native_read_cr4_safe,
.write_cr4 = xen_write_cr4,
+#ifdef CONFIG_X86_64
+ .read_cr8 = xen_read_cr8,
+ .write_cr8 = xen_write_cr8,
+#endif
+
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
@@ -1164,6 +1176,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,
+ .read_tscp = native_read_tscp,
+
.iret = xen_iret,
.irq_enable_sysexit = xen_sysexit,
#ifdef CONFIG_X86_64
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index fd28d86fe3d..dcf5f2dd91e 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -47,6 +47,7 @@
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
#include <trace/events/xen.h>
@@ -1287,6 +1288,25 @@ unsigned long xen_read_cr2_direct(void)
return this_cpu_read(xen_vcpu_info.arch.cr2);
}
+void xen_flush_tlb_all(void)
+{
+ struct mmuext_op *op;
+ struct multicall_space mcs;
+
+ trace_xen_mmu_flush_tlb_all(0);
+
+ preempt_disable();
+
+ mcs = xen_mc_entry(sizeof(*op));
+
+ op = mcs.args;
+ op->cmd = MMUEXT_TLB_FLUSH_ALL;
+ MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+
+ xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+ preempt_enable();
+}
static void xen_flush_tlb(void)
{
struct mmuext_op *op;
@@ -2381,6 +2401,43 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
#ifdef CONFIG_XEN_PVHVM
+#ifdef CONFIG_PROC_VMCORE
+/*
+ * This function is used in two contexts:
+ * - the kdump kernel has to check whether a pfn of the crashed kernel
+ * was a ballooned page. vmcore is using this function to decide
+ * whether to access a pfn of the crashed kernel.
+ * - the kexec kernel has to check whether a pfn was ballooned by the
+ * previous kernel. If the pfn is ballooned, handle it properly.
+ * Returns 0 if the pfn is not backed by a RAM page, the caller may
+ * handle the pfn special in this case.
+ */
+static int xen_oldmem_pfn_is_ram(unsigned long pfn)
+{
+ struct xen_hvm_get_mem_type a = {
+ .domid = DOMID_SELF,
+ .pfn = pfn,
+ };
+ int ram;
+
+ if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a))
+ return -ENXIO;
+
+ switch (a.mem_type) {
+ case HVMMEM_mmio_dm:
+ ram = 0;
+ break;
+ case HVMMEM_ram_rw:
+ case HVMMEM_ram_ro:
+ default:
+ ram = 1;
+ break;
+ }
+
+ return ram;
+}
+#endif
+
static void xen_hvm_exit_mmap(struct mm_struct *mm)
{
struct xen_hvm_pagetable_dying a;
@@ -2411,6 +2468,9 @@ void __init xen_hvm_init_mmu_ops(void)
{
if (is_pagetable_dying_supported())
pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
+#ifdef CONFIG_PROC_VMCORE
+ register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram);
+#endif
}
#endif
@@ -2477,7 +2537,7 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
err = 0;
out:
- flush_tlb_all();
+ xen_flush_tlb_all();
return err;
}