aboutsummaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/Makefile4
-rw-r--r--arch/x86/boot/compressed/head_32.S5
-rw-r--r--arch/x86/boot/compressed/head_64.S5
-rw-r--r--arch/x86/boot/compressed/misc.c13
-rw-r--r--arch/x86/boot/compressed/mkpiggy.c9
-rw-r--r--arch/x86/kernel/cpu/microcode/amd_early.c33
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c3
-rw-r--r--arch/x86/kernel/ptrace.c11
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/mm/pgtable.c21
-rw-r--r--arch/x86/tools/calc_run_size.pl30
11 files changed, 97 insertions, 39 deletions
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 0fcd9133790c..14fe7cba21d1 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -75,8 +75,10 @@ suffix-$(CONFIG_KERNEL_XZ) := xz
suffix-$(CONFIG_KERNEL_LZO) := lzo
suffix-$(CONFIG_KERNEL_LZ4) := lz4
+RUN_SIZE = $(shell objdump -h vmlinux | \
+ perl $(srctree)/arch/x86/tools/calc_run_size.pl)
quiet_cmd_mkpiggy = MKPIGGY $@
- cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
+ cmd_mkpiggy = $(obj)/mkpiggy $< $(RUN_SIZE) > $@ || ( rm -f $@ ; false )
targets += piggy.S
$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index f45ab7a36fb6..c5b56ed10aff 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -186,7 +186,8 @@ relocated:
* Do the decompression, and jump to the new kernel..
*/
/* push arguments for decompress_kernel: */
- pushl $z_output_len /* decompressed length */
+ pushl $z_run_size /* size of kernel with .bss and .brk */
+ pushl $z_output_len /* decompressed length, end of relocs */
leal z_extract_offset_negative(%ebx), %ebp
pushl %ebp /* output address */
pushl $z_input_len /* input_len */
@@ -196,7 +197,7 @@ relocated:
pushl %eax /* heap area */
pushl %esi /* real mode pointer */
call decompress_kernel /* returns kernel location in %eax */
- addl $24, %esp
+ addl $28, %esp
/*
* Jump to the decompressed kernel.
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index b10fa66a2540..34bbc0911b7c 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -334,13 +334,16 @@ relocated:
* Do the decompression, and jump to the new kernel..
*/
pushq %rsi /* Save the real mode argument */
+ movq $z_run_size, %r9 /* size of kernel with .bss and .brk */
+ pushq %r9
movq %rsi, %rdi /* real mode address */
leaq boot_heap(%rip), %rsi /* malloc area for uncompression */
leaq input_data(%rip), %rdx /* input_data */
movl $z_input_len, %ecx /* input_len */
movq %rbp, %r8 /* output target address */
- movq $z_output_len, %r9 /* decompressed length */
+ movq $z_output_len, %r9 /* decompressed length, end of relocs */
call decompress_kernel /* returns kernel location in %rax */
+ popq %r9
popq %rsi
/*
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 196eaf373a06..eb25ca1eb6da 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -393,7 +393,8 @@ asmlinkage void *decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
unsigned char *output,
- unsigned long output_len)
+ unsigned long output_len,
+ unsigned long run_size)
{
real_mode = rmode;
@@ -416,8 +417,14 @@ asmlinkage void *decompress_kernel(void *rmode, memptr heap,
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
- output = choose_kernel_location(input_data, input_len,
- output, output_len);
+ /*
+ * The memory hole needed for the kernel is the larger of either
+ * the entire decompressed kernel plus relocation table, or the
+ * entire decompressed kernel plus .bss and .brk sections.
+ */
+ output = choose_kernel_location(input_data, input_len, output,
+ output_len > run_size ? output_len
+ : run_size);
/* Validate memory location choices. */
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
index b669ab65bf6c..d8222f213182 100644
--- a/arch/x86/boot/compressed/mkpiggy.c
+++ b/arch/x86/boot/compressed/mkpiggy.c
@@ -36,11 +36,13 @@ int main(int argc, char *argv[])
uint32_t olen;
long ilen;
unsigned long offs;
+ unsigned long run_size;
FILE *f = NULL;
int retval = 1;
- if (argc < 2) {
- fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s compressed_file run_size\n",
+ argv[0]);
goto bail;
}
@@ -74,6 +76,7 @@ int main(int argc, char *argv[])
offs += olen >> 12; /* Add 8 bytes for each 32K block */
offs += 64*1024 + 128; /* Add 64K + 128 bytes slack */
offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
+ run_size = atoi(argv[2]);
printf(".section \".rodata..compressed\",\"a\",@progbits\n");
printf(".globl z_input_len\n");
@@ -85,6 +88,8 @@ int main(int argc, char *argv[])
/* z_extract_offset_negative allows simplification of head_32.S */
printf(".globl z_extract_offset_negative\n");
printf("z_extract_offset_negative = -0x%lx\n", offs);
+ printf(".globl z_run_size\n");
+ printf("z_run_size = %lu\n", run_size);
printf(".globl input_data, input_data_end\n");
printf("input_data:\n");
diff --git a/arch/x86/kernel/cpu/microcode/amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c
index 617a9e284245..b63773ba1646 100644
--- a/arch/x86/kernel/cpu/microcode/amd_early.c
+++ b/arch/x86/kernel/cpu/microcode/amd_early.c
@@ -108,12 +108,13 @@ static size_t compute_container_size(u8 *data, u32 total_size)
* load_microcode_amd() to save equivalent cpu table and microcode patches in
* kernel heap memory.
*/
-static void apply_ucode_in_initrd(void *ucode, size_t size)
+static void apply_ucode_in_initrd(void *ucode, size_t size, bool save_patch)
{
struct equiv_cpu_entry *eq;
size_t *cont_sz;
u32 *header;
u8 *data, **cont;
+ u8 (*patch)[PATCH_MAX_SIZE];
u16 eq_id = 0;
int offset, left;
u32 rev, eax, ebx, ecx, edx;
@@ -123,10 +124,12 @@ static void apply_ucode_in_initrd(void *ucode, size_t size)
new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
cont_sz = (size_t *)__pa_nodebug(&container_size);
cont = (u8 **)__pa_nodebug(&container);
+ patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
#else
new_rev = &ucode_new_rev;
cont_sz = &container_size;
cont = &container;
+ patch = &amd_ucode_patch;
#endif
data = ucode;
@@ -213,9 +216,9 @@ static void apply_ucode_in_initrd(void *ucode, size_t size)
rev = mc->hdr.patch_id;
*new_rev = rev;
- /* save ucode patch */
- memcpy(amd_ucode_patch, mc,
- min_t(u32, header[1], PATCH_MAX_SIZE));
+ if (save_patch)
+ memcpy(patch, mc,
+ min_t(u32, header[1], PATCH_MAX_SIZE));
}
}
@@ -246,7 +249,7 @@ void __init load_ucode_amd_bsp(void)
*data = cp.data;
*size = cp.size;
- apply_ucode_in_initrd(cp.data, cp.size);
+ apply_ucode_in_initrd(cp.data, cp.size, true);
}
#ifdef CONFIG_X86_32
@@ -263,7 +266,7 @@ void load_ucode_amd_ap(void)
size_t *usize;
void **ucode;
- mc = (struct microcode_amd *)__pa(amd_ucode_patch);
+ mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
if (mc->hdr.patch_id && mc->hdr.processor_rev_id) {
__apply_microcode_amd(mc);
return;
@@ -275,7 +278,7 @@ void load_ucode_amd_ap(void)
if (!*ucode || !*usize)
return;
- apply_ucode_in_initrd(*ucode, *usize);
+ apply_ucode_in_initrd(*ucode, *usize, false);
}
static void __init collect_cpu_sig_on_bsp(void *arg)
@@ -339,7 +342,7 @@ void load_ucode_amd_ap(void)
* AP has a different equivalence ID than BSP, looks like
* mixed-steppings silicon so go through the ucode blob anew.
*/
- apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size);
+ apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size, false);
}
}
#endif
@@ -347,7 +350,9 @@ void load_ucode_amd_ap(void)
int __init save_microcode_in_initrd_amd(void)
{
unsigned long cont;
+ int retval = 0;
enum ucode_state ret;
+ u8 *cont_va;
u32 eax;
if (!container)
@@ -355,13 +360,15 @@ int __init save_microcode_in_initrd_amd(void)
#ifdef CONFIG_X86_32
get_bsp_sig();
- cont = (unsigned long)container;
+ cont = (unsigned long)container;
+ cont_va = __va(container);
#else
/*
* We need the physical address of the container for both bitness since
* boot_params.hdr.ramdisk_image is a physical address.
*/
- cont = __pa(container);
+ cont = __pa(container);
+ cont_va = container;
#endif
/*
@@ -372,6 +379,8 @@ int __init save_microcode_in_initrd_amd(void)
if (relocated_ramdisk)
container = (u8 *)(__va(relocated_ramdisk) +
(cont - boot_params.hdr.ramdisk_image));
+ else
+ container = cont_va;
if (ucode_new_rev)
pr_info("microcode: updated early to new patch_level=0x%08x\n",
@@ -382,7 +391,7 @@ int __init save_microcode_in_initrd_amd(void)
ret = load_microcode_amd(eax, container, container_size);
if (ret != UCODE_OK)
- return -EINVAL;
+ retval = -EINVAL;
/*
* This will be freed any msec now, stash patches for the current
@@ -391,5 +400,5 @@ int __init save_microcode_in_initrd_amd(void)
container = NULL;
container_size = 0;
- return 0;
+ return retval;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 1340ebfcb467..5ee8064bd1d2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2475,6 +2475,9 @@ __init int intel_pmu_init(void)
case 62: /* IvyBridge EP */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
+ /* dTLB-load-misses on IVB is different than SNB */
+ hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
+
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
sizeof(hw_cache_extra_regs));
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 7461f50d5bb1..0686fe313b3b 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1441,15 +1441,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
force_sig_info(SIGTRAP, &info, tsk);
}
-
-#ifdef CONFIG_X86_32
-# define IS_IA32 1
-#elif defined CONFIG_IA32_EMULATION
-# define IS_IA32 is_compat_task()
-#else
-# define IS_IA32 0
-#endif
-
/*
* We must return the syscall number to actually look up in the table.
* This can be -1L to skip running any syscall at all.
@@ -1487,7 +1478,7 @@ long syscall_trace_enter(struct pt_regs *regs)
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs->orig_ax);
- if (IS_IA32)
+ if (is_ia32_task())
audit_syscall_entry(AUDIT_ARCH_I386,
regs->orig_ax,
regs->bx, regs->cx,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aa21c23e010e..f127b83b5e0c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4911,7 +4911,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
++vcpu->stat.insn_emulation_fail;
trace_kvm_emulate_insn_failed(vcpu);
- if (!is_guest_mode(vcpu)) {
+ if (!is_guest_mode(vcpu) && kvm_x86_ops->get_cpl(vcpu) == 0) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index c96314abd144..0004ac72dbdd 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -399,13 +399,20 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma,
int ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
- int young;
-
- young = ptep_test_and_clear_young(vma, address, ptep);
- if (young)
- flush_tlb_page(vma, address);
-
- return young;
+ /*
+ * On x86 CPUs, clearing the accessed bit without a TLB flush
+ * doesn't cause data corruption. [ It could cause incorrect
+ * page aging and the (mistaken) reclaim of hot pages, but the
+ * chance of that should be relatively low. ]
+ *
+ * So as a performance optimization don't flush the TLB when
+ * clearing the accessed bit, it will eventually be flushed by
+ * a context switch or a VM operation anyway. [ In the rare
+ * event of it not getting flushed for a long time the delay
+ * shouldn't really matter because there's no real memory
+ * pressure for swapout to react to. ]
+ */
+ return ptep_test_and_clear_young(vma, address, ptep);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/arch/x86/tools/calc_run_size.pl b/arch/x86/tools/calc_run_size.pl
new file mode 100644
index 000000000000..0b0b124d3ece
--- /dev/null
+++ b/arch/x86/tools/calc_run_size.pl
@@ -0,0 +1,30 @@
+#!/usr/bin/perl
+#
+# Calculate the amount of space needed to run the kernel, including room for
+# the .bss and .brk sections.
+#
+# Usage:
+# objdump -h a.out | perl calc_run_size.pl
+use strict;
+
+my $mem_size = 0;
+my $file_offset = 0;
+
+my $sections=" *[0-9]+ \.(?:bss|brk) +";
+while (<>) {
+ if (/^$sections([0-9a-f]+) +(?:[0-9a-f]+ +){2}([0-9a-f]+)/) {
+ my $size = hex($1);
+ my $offset = hex($2);
+ $mem_size += $size;
+ if ($file_offset == 0) {
+ $file_offset = $offset;
+ } elsif ($file_offset != $offset) {
+ die ".bss and .brk lack common file offset\n";
+ }
+ }
+}
+
+if ($file_offset == 0) {
+ die "Never found .bss or .brk file offset\n";
+}
+printf("%d\n", $mem_size + $file_offset);