diff options
Diffstat (limited to 'linux-user/i386')
-rw-r--r-- | linux-user/i386/Makefile.vdso | 11 | ||||
-rw-r--r-- | linux-user/i386/cpu_loop.c | 175 | ||||
-rw-r--r-- | linux-user/i386/meson.build | 7 | ||||
-rw-r--r-- | linux-user/i386/signal.c | 319 | ||||
-rw-r--r-- | linux-user/i386/target_elf.h | 2 | ||||
-rw-r--r-- | linux-user/i386/target_mman.h | 17 | ||||
-rw-r--r-- | linux-user/i386/target_prctl.h | 1 | ||||
-rw-r--r-- | linux-user/i386/target_proc.h | 1 | ||||
-rw-r--r-- | linux-user/i386/target_resource.h | 1 | ||||
-rw-r--r-- | linux-user/i386/target_signal.h | 20 | ||||
-rw-r--r-- | linux-user/i386/target_structs.h | 59 | ||||
-rw-r--r-- | linux-user/i386/target_syscall.h | 1 | ||||
-rw-r--r-- | linux-user/i386/vdso-asmoffset.h | 6 | ||||
-rw-r--r-- | linux-user/i386/vdso.S | 143 | ||||
-rw-r--r-- | linux-user/i386/vdso.ld | 76 | ||||
-rwxr-xr-x | linux-user/i386/vdso.so | bin | 0 -> 2672 bytes |
16 files changed, 582 insertions, 257 deletions
diff --git a/linux-user/i386/Makefile.vdso b/linux-user/i386/Makefile.vdso new file mode 100644 index 0000000000..95bc616f6d --- /dev/null +++ b/linux-user/i386/Makefile.vdso @@ -0,0 +1,11 @@ +include $(BUILD_DIR)/tests/tcg/i386-linux-user/config-target.mak + +SUBDIR = $(SRC_PATH)/linux-user/i386 +VPATH += $(SUBDIR) + +all: $(SUBDIR)/vdso.so + +$(SUBDIR)/vdso.so: vdso.S vdso.ld vdso-asmoffset.h + $(CC) -o $@ -m32 -nostdlib -shared -Wl,-h,linux-gate.so.1 \ + -Wl,--build-id=sha1 -Wl,--hash-style=both \ + -Wl,-T,$(SUBDIR)/vdso.ld $< diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c index f6a1cc632b..92beb6830c 100644 --- a/linux-user/i386/cpu_loop.c +++ b/linux-user/i386/cpu_loop.c @@ -18,8 +18,8 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "qemu.h" +#include "qemu/timer.h" #include "user-internals.h" #include "cpu_loop-common.h" #include "signal-common.h" @@ -47,7 +47,7 @@ static void write_dt(void *ptr, unsigned long addr, unsigned long limit, } static uint64_t *idt_table; -#ifdef TARGET_X86_64 + static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, uint64_t addr, unsigned int sel) { @@ -60,8 +60,10 @@ static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, p[2] = tswap32(addr >> 32); p[3] = 0; } + +#ifdef TARGET_X86_64 /* only dpl matters as we do only user space emulation */ -static void set_idt(int n, unsigned int dpl) +static void set_idt(int n, unsigned int dpl, bool is64) { set_gate64(idt_table + n * 2, 0, dpl, 0, 0); } @@ -78,23 +80,16 @@ static void set_gate(void *ptr, unsigned int type, unsigned int dpl, } /* only dpl matters as we do only user space emulation */ -static void set_idt(int n, unsigned int dpl) +static void set_idt(int n, unsigned int dpl, bool is64) { - set_gate(idt_table + n, 0, dpl, 0, 0); + if (is64) { + set_gate64(idt_table + n * 2, 0, dpl, 0, 0); + } else { + set_gate(idt_table + n, 0, dpl, 0, 0); + } } #endif -static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr) -{ - target_siginfo_t info = { - .si_signo = sig, - .si_code = code, - ._sifields._sigfault._addr = addr - }; - - queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); -} - #ifdef TARGET_X86_64 static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) { @@ -107,7 +102,7 @@ static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) } env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK; - gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); + force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); return false; } @@ -148,7 +143,7 @@ static void emulate_vsyscall(CPUX86State *env) } /* - * Validate the the pointer arguments. + * Validate the pointer arguments. */ switch (syscall) { case TARGET_NR_gettimeofday: @@ -180,8 +175,8 @@ static void emulate_vsyscall(CPUX86State *env) ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI], env->regs[R_EDX], env->regs[10], env->regs[8], env->regs[9], 0, 0); - g_assert(ret != -TARGET_ERESTARTSYS); - g_assert(ret != -TARGET_QEMU_ESIGRETURN); + g_assert(ret != -QEMU_ERESTARTSYS); + g_assert(ret != -QEMU_ESIGRETURN); if (ret == -TARGET_EFAULT) { goto sigsegv; } @@ -193,16 +188,25 @@ static void emulate_vsyscall(CPUX86State *env) return; sigsegv: - /* Like force_sig(SIGSEGV). */ - gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); + force_sig(TARGET_SIGSEGV); } #endif +static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr) +{ +#ifndef TARGET_X86_64 + if (env->eflags & VM_MASK) { + handle_vm86_trap(env, trapnr); + return true; + } +#endif + return false; +} + void cpu_loop(CPUX86State *env) { CPUState *cs = env_cpu(env); int trapnr; - abi_ulong pc; abi_ulong ret; for(;;) { @@ -213,6 +217,9 @@ void cpu_loop(CPUX86State *env) switch(trapnr) { case 0x80: +#ifndef TARGET_X86_64 + case EXCP_SYSCALL: +#endif /* linux syscall from int $0x80 */ ret = do_syscall(env, env->regs[R_EAX], @@ -223,15 +230,15 @@ void cpu_loop(CPUX86State *env) env->regs[R_EDI], env->regs[R_EBP], 0, 0); - if (ret == -TARGET_ERESTARTSYS) { + if (ret == -QEMU_ERESTARTSYS) { env->eip -= 2; - } else if (ret != -TARGET_QEMU_ESIGRETURN) { + } else if (ret != -QEMU_ESIGRETURN) { env->regs[R_EAX] = ret; } break; -#ifndef TARGET_ABI32 +#ifdef TARGET_X86_64 case EXCP_SYSCALL: - /* linux syscall from syscall instruction */ + /* linux syscall from syscall instruction. */ ret = do_syscall(env, env->regs[R_EAX], env->regs[R_EDI], @@ -241,110 +248,111 @@ void cpu_loop(CPUX86State *env) env->regs[8], env->regs[9], 0, 0); - if (ret == -TARGET_ERESTARTSYS) { + if (ret == -QEMU_ERESTARTSYS) { env->eip -= 2; - } else if (ret != -TARGET_QEMU_ESIGRETURN) { + } else if (ret != -QEMU_ESIGRETURN) { env->regs[R_EAX] = ret; } break; -#endif -#ifdef TARGET_X86_64 case EXCP_VSYSCALL: emulate_vsyscall(env); break; #endif case EXCP0B_NOSEG: case EXCP0C_STACK: - gen_signal(env, TARGET_SIGBUS, TARGET_SI_KERNEL, 0); + force_sig(TARGET_SIGBUS); break; case EXCP0D_GPF: /* XXX: potential problem if ABI32 */ -#ifndef TARGET_X86_64 - if (env->eflags & VM_MASK) { - handle_vm86_fault(env); + if (maybe_handle_vm86_trap(env, trapnr)) { break; } -#endif - gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); + force_sig(TARGET_SIGSEGV); break; case EXCP0E_PAGE: - gen_signal(env, TARGET_SIGSEGV, - (env->error_code & 1 ? - TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR), - env->cr[2]); + force_sig_fault(TARGET_SIGSEGV, + (env->error_code & PG_ERROR_P_MASK ? + TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR), + env->cr[2]); break; case EXCP00_DIVZ: -#ifndef TARGET_X86_64 - if (env->eflags & VM_MASK) { - handle_vm86_trap(env, trapnr); + if (maybe_handle_vm86_trap(env, trapnr)) { break; } -#endif - gen_signal(env, TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip); + force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip); break; case EXCP01_DB: - case EXCP03_INT3: -#ifndef TARGET_X86_64 - if (env->eflags & VM_MASK) { - handle_vm86_trap(env, trapnr); + if (maybe_handle_vm86_trap(env, trapnr)) { break; } -#endif - if (trapnr == EXCP01_DB) { - gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); - } else { - gen_signal(env, TARGET_SIGTRAP, TARGET_SI_KERNEL, 0); + force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); + break; + case EXCP03_INT3: + if (maybe_handle_vm86_trap(env, trapnr)) { + break; } + force_sig(TARGET_SIGTRAP); break; case EXCP04_INTO: case EXCP05_BOUND: -#ifndef TARGET_X86_64 - if (env->eflags & VM_MASK) { - handle_vm86_trap(env, trapnr); + if (maybe_handle_vm86_trap(env, trapnr)) { break; } -#endif - gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); + force_sig(TARGET_SIGSEGV); break; case EXCP06_ILLOP: - gen_signal(env, TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip); + force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip); break; case EXCP_INTERRUPT: /* just indicate that signals should be handled asap */ break; case EXCP_DEBUG: - gen_signal(env, TARGET_SIGTRAP, TARGET_TRAP_BRKPT, 0); + force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); break; case EXCP_ATOMIC: cpu_exec_step_atomic(cs); break; default: - pc = env->segs[R_CS].base + env->eip; - EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n", - (long)pc, trapnr); + EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", + trapnr); abort(); } process_pending_signals(env); } } +static void target_cpu_free(void *obj) +{ + target_munmap(cpu_env(obj)->gdt.base, + sizeof(uint64_t) * TARGET_GDT_ENTRIES); + g_free(obj); +} + void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs) { + CPUState *cpu = env_cpu(env); + bool is64 = (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) != 0; + int i; + + OBJECT(cpu)->free = target_cpu_free; env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; env->hflags |= HF_PE_MASK | HF_CPL_MASK; if (env->features[FEAT_1_EDX] & CPUID_SSE) { env->cr[4] |= CR4_OSFXSR_MASK; env->hflags |= HF_OSFXSR_MASK; } -#ifndef TARGET_ABI32 + /* enable 64 bit mode if possible */ - if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) { + if (is64) { + env->cr[4] |= CR4_PAE_MASK; + env->efer |= MSR_EFER_LMA | MSR_EFER_LME; + env->hflags |= HF_LMA_MASK; + } +#ifndef TARGET_ABI32 + else { fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); exit(EXIT_FAILURE); } - env->cr[4] |= CR4_PAE_MASK; - env->efer |= MSR_EFER_LMA | MSR_EFER_LME; - env->hflags |= HF_LMA_MASK; #endif /* flags setup : we activate the IRQs by default as in user mode */ @@ -383,27 +391,12 @@ void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs) PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); idt_table = g2h_untagged(env->idt.base); - set_idt(0, 0); - set_idt(1, 0); - set_idt(2, 0); - set_idt(3, 3); - set_idt(4, 3); - set_idt(5, 0); - set_idt(6, 0); - set_idt(7, 0); - set_idt(8, 0); - set_idt(9, 0); - set_idt(10, 0); - set_idt(11, 0); - set_idt(12, 0); - set_idt(13, 0); - set_idt(14, 0); - set_idt(15, 0); - set_idt(16, 0); - set_idt(17, 0); - set_idt(18, 0); - set_idt(19, 0); - set_idt(0x80, 3); + for (i = 0; i < 20; i++) { + set_idt(i, 0, is64); + } + set_idt(3, 3, is64); + set_idt(4, 3, is64); + set_idt(0x80, 3, is64); /* linux segment setup */ { diff --git a/linux-user/i386/meson.build b/linux-user/i386/meson.build index ee523019a5..d42fc6cbc9 100644 --- a/linux-user/i386/meson.build +++ b/linux-user/i386/meson.build @@ -3,3 +3,10 @@ syscall_nr_generators += { arguments: [ meson.current_source_dir() / 'syscallhdr.sh', '@INPUT@', '@OUTPUT@', '@EXTRA_ARGS@' ], output: '@BASENAME@_nr.h') } + +vdso_inc = gen_vdso.process('vdso.so', extra_args: [ + '-s', '__kernel_sigreturn', + '-r', '__kernel_rt_sigreturn' + ]) + +linux_user_ss.add(when: 'TARGET_I386', if_true: vdso_inc) diff --git a/linux-user/i386/signal.c b/linux-user/i386/signal.c index 3b4b55fc0a..cfe70fc5cf 100644 --- a/linux-user/i386/signal.c +++ b/linux-user/i386/signal.c @@ -24,6 +24,10 @@ /* from the Linux kernel - /arch/x86/include/uapi/asm/sigcontext.h */ +#define TARGET_FP_XSTATE_MAGIC1 0x46505853U /* FPXS */ +#define TARGET_FP_XSTATE_MAGIC2 0x46505845U /* FPXE */ +#define TARGET_FP_XSTATE_MAGIC2_SIZE 4 + struct target_fpreg { uint16_t significand[4]; uint16_t exponent; @@ -39,29 +43,16 @@ struct target_xmmreg { uint32_t element[4]; }; -struct target_fpstate_32 { - /* Regular FPU environment */ - uint32_t cw; - uint32_t sw; - uint32_t tag; - uint32_t ipoff; - uint32_t cssel; - uint32_t dataoff; - uint32_t datasel; - struct target_fpreg st[8]; - uint16_t status; - uint16_t magic; /* 0xffff = regular FPU data only */ - - /* FXSR FPU environment */ - uint32_t _fxsr_env[6]; /* FXSR FPU env is ignored */ - uint32_t mxcsr; - uint32_t reserved; - struct target_fpxreg fxsr_st[8]; /* FXSR FPU reg data is ignored */ - struct target_xmmreg xmm[8]; - uint32_t padding[56]; +struct target_fpx_sw_bytes { + uint32_t magic1; + uint32_t extended_size; + uint64_t xfeatures; + uint32_t xstate_size; + uint32_t reserved[7]; }; +QEMU_BUILD_BUG_ON(sizeof(struct target_fpx_sw_bytes) != 12*4); -struct target_fpstate_64 { +struct target_fpstate_fxsave { /* FXSAVE format */ uint16_t cw; uint16_t sw; @@ -73,13 +64,41 @@ struct target_fpstate_64 { uint32_t mxcsr_mask; uint32_t st_space[32]; uint32_t xmm_space[64]; - uint32_t reserved[24]; + uint32_t hw_reserved[12]; + struct target_fpx_sw_bytes sw_reserved; + uint8_t xfeatures[]; +}; +#define TARGET_FXSAVE_SIZE sizeof(struct target_fpstate_fxsave) +QEMU_BUILD_BUG_ON(TARGET_FXSAVE_SIZE != 512); +QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_fxsave, sw_reserved) != 464); + +struct target_fpstate_32 { + /* Regular FPU environment */ + uint32_t cw; + uint32_t sw; + uint32_t tag; + uint32_t ipoff; + uint32_t cssel; + uint32_t dataoff; + uint32_t datasel; + struct target_fpreg st[8]; + uint16_t status; + uint16_t magic; /* 0xffff = regular FPU data only */ + struct target_fpstate_fxsave fxsave; }; +/* + * For simplicity, setup_frame aligns struct target_fpstate_32 to + * 16 bytes, so ensure that the FXSAVE area is also aligned. + */ +QEMU_BUILD_BUG_ON(offsetof(struct target_fpstate_32, fxsave) & 15); + #ifndef TARGET_X86_64 # define target_fpstate target_fpstate_32 +# define TARGET_FPSTATE_FXSAVE_OFFSET offsetof(struct target_fpstate_32, fxsave) #else -# define target_fpstate target_fpstate_64 +# define target_fpstate target_fpstate_fxsave +# define TARGET_FPSTATE_FXSAVE_OFFSET 0 #endif struct target_sigcontext_32 { @@ -163,10 +182,25 @@ struct sigframe { abi_ulong pretcode; int sig; struct target_sigcontext sc; - struct target_fpstate fpstate; + /* + * The actual fpstate is placed after retcode[] below, to make + * room for the variable-sized xsave data. The older unused fpstate + * has to be kept to avoid changing the offset of extramask[], which + * is part of the ABI. + */ + struct target_fpstate fpstate_unused; abi_ulong extramask[TARGET_NSIG_WORDS-1]; char retcode[8]; + + /* + * This field will be 16-byte aligned in memory. Applying QEMU_ALIGNED + * to it ensures that the base of the frame has an appropriate alignment + * too. + */ + struct target_fpstate fpstate QEMU_ALIGNED(8); }; +#define TARGET_SIGFRAME_FXSAVE_OFFSET ( \ + offsetof(struct sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET) struct rt_sigframe { abi_ulong pretcode; @@ -175,9 +209,21 @@ struct rt_sigframe { abi_ulong puc; struct target_siginfo info; struct target_ucontext uc; - struct target_fpstate fpstate; char retcode[8]; + struct target_fpstate fpstate QEMU_ALIGNED(8); }; +#define TARGET_RT_SIGFRAME_FXSAVE_OFFSET ( \ + offsetof(struct rt_sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET) + +/* + * Verify that vdso-asmoffset.h constants match. + */ +#include "i386/vdso-asmoffset.h" + +QEMU_BUILD_BUG_ON(offsetof(struct sigframe, sc.eip) + != SIGFRAME_SIGCONTEXT_eip); +QEMU_BUILD_BUG_ON(offsetof(struct rt_sigframe, uc.tuc_mcontext.eip) + != RT_SIGFRAME_SIGCONTEXT_eip); #else @@ -185,16 +231,51 @@ struct rt_sigframe { abi_ulong pretcode; struct target_ucontext uc; struct target_siginfo info; - struct target_fpstate fpstate; + struct target_fpstate fpstate QEMU_ALIGNED(16); }; - +#define TARGET_RT_SIGFRAME_FXSAVE_OFFSET ( \ + offsetof(struct rt_sigframe, fpstate) + TARGET_FPSTATE_FXSAVE_OFFSET) #endif /* * Set up a signal frame. */ -/* XXX: save x87 state */ +static void xsave_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxsave, + abi_ulong fxsave_addr) +{ + if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { + /* fxsave_addr must be 16 byte aligned for fxsave */ + assert(!(fxsave_addr & 0xf)); + + cpu_x86_fxsave(env, fxsave_addr); + __put_user(0, &fxsave->sw_reserved.magic1); + } else { + uint32_t xstate_size = xsave_area_size(env->xcr0, false); + uint32_t xfeatures_size = xstate_size - TARGET_FXSAVE_SIZE; + + /* + * extended_size is the offset from fpstate_addr to right after the end + * of the extended save states. On 32-bit that includes the legacy + * FSAVE area. + */ + uint32_t extended_size = TARGET_FPSTATE_FXSAVE_OFFSET + + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE; + + /* fxsave_addr must be 64 byte aligned for xsave */ + assert(!(fxsave_addr & 0x3f)); + + /* Zero the header, XSAVE *adds* features to an existing save state. */ + memset(fxsave->xfeatures, 0, 64); + cpu_x86_xsave(env, fxsave_addr); + __put_user(TARGET_FP_XSTATE_MAGIC1, &fxsave->sw_reserved.magic1); + __put_user(extended_size, &fxsave->sw_reserved.extended_size); + __put_user(env->xcr0, &fxsave->sw_reserved.xfeatures); + __put_user(xstate_size, &fxsave->sw_reserved.xstate_size); + __put_user(TARGET_FP_XSTATE_MAGIC2, (uint32_t *) &fxsave->xfeatures[xfeatures_size]); + } +} + static void setup_sigcontext(struct target_sigcontext *sc, struct target_fpstate *fpstate, CPUX86State *env, abi_ulong mask, abi_ulong fpstate_addr) @@ -226,13 +307,14 @@ static void setup_sigcontext(struct target_sigcontext *sc, cpu_x86_fsave(env, fpstate_addr, 1); fpstate->status = fpstate->sw; - magic = 0xffff; + if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { + magic = 0xffff; + } else { + xsave_sigcontext(env, &fpstate->fxsave, + fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); + magic = 0; + } __put_user(magic, &fpstate->magic); - __put_user(fpstate_addr, &sc->fpstate); - - /* non-iBCS2 extensions.. */ - __put_user(mask, &sc->oldmask); - __put_user(env->cr[2], &sc->cr2); #else __put_user(env->regs[R_EDI], &sc->rdi); __put_user(env->regs[R_ESI], &sc->rsi); @@ -262,15 +344,14 @@ static void setup_sigcontext(struct target_sigcontext *sc, __put_user((uint16_t)0, &sc->fs); __put_user(env->segs[R_SS].selector, &sc->ss); - __put_user(mask, &sc->oldmask); - __put_user(env->cr[2], &sc->cr2); - - /* fpstate_addr must be 16 byte aligned for fxsave */ - assert(!(fpstate_addr & 0xf)); + xsave_sigcontext(env, fpstate, fpstate_addr); +#endif - cpu_x86_fxsave(env, fpstate_addr); __put_user(fpstate_addr, &sc->fpstate); -#endif + + /* non-iBCS2 extensions.. */ + __put_user(mask, &sc->oldmask); + __put_user(env->cr[2], &sc->cr2); } /* @@ -278,7 +359,7 @@ static void setup_sigcontext(struct target_sigcontext *sc, */ static inline abi_ulong -get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t frame_size) +get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t fxsave_offset) { unsigned long esp; @@ -302,14 +383,34 @@ get_sigframe(struct target_sigaction *ka, CPUX86State *env, size_t frame_size) #endif } -#ifndef TARGET_X86_64 - return (esp - frame_size) & -8ul; -#else - return ((esp - frame_size) & (~15ul)) - 8; -#endif + if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { + return (esp - (fxsave_offset + TARGET_FXSAVE_SIZE)) & -8ul; + } else if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { + return ((esp - TARGET_FXSAVE_SIZE) & -16ul) - fxsave_offset; + } else { + size_t xstate_size = + xsave_area_size(env->xcr0, false) + TARGET_FP_XSTATE_MAGIC2_SIZE; + return ((esp - xstate_size) & -64ul) - fxsave_offset; + } } #ifndef TARGET_X86_64 +static void install_sigtramp(void *tramp) +{ + /* This is popl %eax ; movl $syscall,%eax ; int $0x80 */ + __put_user(0xb858, (uint16_t *)(tramp + 0)); + __put_user(TARGET_NR_sigreturn, (int32_t *)(tramp + 2)); + __put_user(0x80cd, (uint16_t *)(tramp + 6)); +} + +static void install_rt_sigtramp(void *tramp) +{ + /* This is movl $syscall,%eax ; int $0x80 */ + __put_user(0xb8, (uint8_t *)(tramp + 0)); + __put_user(TARGET_NR_rt_sigreturn, (int32_t *)(tramp + 1)); + __put_user(0x80cd, (uint16_t *)(tramp + 5)); +} + /* compare linux/arch/i386/kernel/signal.c:setup_frame() */ void setup_frame(int sig, struct target_sigaction *ka, target_sigset_t *set, CPUX86State *env) @@ -318,7 +419,7 @@ void setup_frame(int sig, struct target_sigaction *ka, struct sigframe *frame; int i; - frame_addr = get_sigframe(ka, env, sizeof(*frame)); + frame_addr = get_sigframe(ka, env, TARGET_SIGFRAME_FXSAVE_OFFSET); trace_user_setup_frame(env, frame_addr); if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) @@ -329,7 +430,7 @@ void setup_frame(int sig, struct target_sigaction *ka, setup_sigcontext(&frame->sc, &frame->fpstate, env, set->sig[0], frame_addr + offsetof(struct sigframe, fpstate)); - for(i = 1; i < TARGET_NSIG_WORDS; i++) { + for (i = 1; i < TARGET_NSIG_WORDS; i++) { __put_user(set->sig[i], &frame->extramask[i - 1]); } @@ -338,16 +439,9 @@ void setup_frame(int sig, struct target_sigaction *ka, if (ka->sa_flags & TARGET_SA_RESTORER) { __put_user(ka->sa_restorer, &frame->pretcode); } else { - uint16_t val16; - abi_ulong retcode_addr; - retcode_addr = frame_addr + offsetof(struct sigframe, retcode); - __put_user(retcode_addr, &frame->pretcode); - /* This is popl %eax ; movl $,%eax ; int $0x80 */ - val16 = 0xb858; - __put_user(val16, (uint16_t *)(frame->retcode+0)); - __put_user(TARGET_NR_sigreturn, (int *)(frame->retcode+2)); - val16 = 0x80cd; - __put_user(val16, (uint16_t *)(frame->retcode+6)); + /* This is no longer used, but is retained for ABI compatibility. */ + install_sigtramp(frame->retcode); + __put_user(default_sigreturn, &frame->pretcode); } /* Set up registers for signal handler */ @@ -381,7 +475,7 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, struct rt_sigframe *frame; int i; - frame_addr = get_sigframe(ka, env, sizeof(*frame)); + frame_addr = get_sigframe(ka, env, TARGET_RT_SIGFRAME_FXSAVE_OFFSET); trace_user_setup_rt_frame(env, frame_addr); if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 0)) @@ -396,40 +490,38 @@ void setup_rt_frame(int sig, struct target_sigaction *ka, __put_user(addr, &frame->puc); #endif if (ka->sa_flags & TARGET_SA_SIGINFO) { - tswap_siginfo(&frame->info, info); + frame->info = *info; } /* Create the ucontext. */ - __put_user(0, &frame->uc.tuc_flags); + if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) { + __put_user(1, &frame->uc.tuc_flags); + } else { + __put_user(0, &frame->uc.tuc_flags); + } __put_user(0, &frame->uc.tuc_link); target_save_altstack(&frame->uc.tuc_stack, env); setup_sigcontext(&frame->uc.tuc_mcontext, &frame->fpstate, env, set->sig[0], frame_addr + offsetof(struct rt_sigframe, fpstate)); - for(i = 0; i < TARGET_NSIG_WORDS; i++) { + for (i = 0; i < TARGET_NSIG_WORDS; i++) { __put_user(set->sig[i], &frame->uc.tuc_sigmask.sig[i]); } /* Set up to return from userspace. If provided, use a stub already in userspace. */ -#ifndef TARGET_X86_64 if (ka->sa_flags & TARGET_SA_RESTORER) { __put_user(ka->sa_restorer, &frame->pretcode); } else { - uint16_t val16; - addr = frame_addr + offsetof(struct rt_sigframe, retcode); - __put_user(addr, &frame->pretcode); - /* This is movl $,%eax ; int $0x80 */ - __put_user(0xb8, (char *)(frame->retcode+0)); - __put_user(TARGET_NR_rt_sigreturn, (int *)(frame->retcode+1)); - val16 = 0x80cd; - __put_user(val16, (uint16_t *)(frame->retcode+5)); - } +#ifdef TARGET_X86_64 + /* For x86_64, SA_RESTORER is required ABI. */ + goto give_sigsegv; #else - /* XXX: Would be slightly better to return -EFAULT here if test fails - assert(ka->sa_flags & TARGET_SA_RESTORER); */ - __put_user(ka->sa_restorer, &frame->pretcode); + /* This is no longer used, but is retained for ABI compatibility. */ + install_rt_sigtramp(frame->retcode); + __put_user(default_rt_sigreturn, &frame->pretcode); #endif + } /* Set up registers for signal handler */ env->regs[R_ESP] = frame_addr; @@ -460,10 +552,37 @@ give_sigsegv: force_sigsegv(sig); } +static int xrstor_sigcontext(CPUX86State *env, struct target_fpstate_fxsave *fxsave, + abi_ulong fxsave_addr) +{ + if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) { + uint32_t extended_size = tswapl(fxsave->sw_reserved.extended_size); + uint32_t xstate_size = tswapl(fxsave->sw_reserved.xstate_size); + uint32_t xfeatures_size = xstate_size - TARGET_FXSAVE_SIZE; + + /* Linux checks MAGIC2 using xstate_size, not extended_size. */ + if (tswapl(fxsave->sw_reserved.magic1) == TARGET_FP_XSTATE_MAGIC1 && + extended_size >= TARGET_FPSTATE_FXSAVE_OFFSET + xstate_size + TARGET_FP_XSTATE_MAGIC2_SIZE) { + if (!access_ok(env_cpu(env), VERIFY_READ, fxsave_addr, + extended_size - TARGET_FPSTATE_FXSAVE_OFFSET)) { + return 1; + } + if (tswapl(*(uint32_t *) &fxsave->xfeatures[xfeatures_size]) == TARGET_FP_XSTATE_MAGIC2) { + cpu_x86_xrstor(env, fxsave_addr); + return 0; + } + } + /* fall through to fxrstor */ + } + + cpu_x86_fxrstor(env, fxsave_addr); + return 0; +} + static int restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) { - unsigned int err = 0; + int err = 1; abi_ulong fpstate_addr; unsigned int tmpflags; @@ -514,20 +633,28 @@ restore_sigcontext(CPUX86State *env, struct target_sigcontext *sc) fpstate_addr = tswapl(sc->fpstate); if (fpstate_addr != 0) { - if (!access_ok(env_cpu(env), VERIFY_READ, fpstate_addr, - sizeof(struct target_fpstate))) { - goto badframe; + struct target_fpstate *fpstate; + if (!lock_user_struct(VERIFY_READ, fpstate, fpstate_addr, + sizeof(struct target_fpstate))) { + return err; } #ifndef TARGET_X86_64 - cpu_x86_frstor(env, fpstate_addr, 1); + if (!(env->features[FEAT_1_EDX] & CPUID_FXSR)) { + cpu_x86_frstor(env, fpstate_addr, 1); + err = 0; + } else { + err = xrstor_sigcontext(env, &fpstate->fxsave, + fpstate_addr + TARGET_FPSTATE_FXSAVE_OFFSET); + } #else - cpu_x86_fxrstor(env, fpstate_addr); + err = xrstor_sigcontext(env, fpstate, fpstate_addr); #endif + unlock_user_struct(fpstate, fpstate_addr, 0); + } else { + err = 0; } return err; -badframe: - return 1; } /* Note: there is no sigreturn on x86_64, there is only rt_sigreturn */ @@ -556,12 +683,12 @@ long do_sigreturn(CPUX86State *env) if (restore_sigcontext(env, &frame->sc)) goto badframe; unlock_user_struct(frame, frame_addr, 0); - return -TARGET_QEMU_ESIGRETURN; + return -QEMU_ESIGRETURN; badframe: unlock_user_struct(frame, frame_addr, 0); force_sig(TARGET_SIGSEGV); - return -TARGET_QEMU_ESIGRETURN; + return -QEMU_ESIGRETURN; } #endif @@ -585,10 +712,26 @@ long do_rt_sigreturn(CPUX86State *env) target_restore_altstack(&frame->uc.tuc_stack, env); unlock_user_struct(frame, frame_addr, 0); - return -TARGET_QEMU_ESIGRETURN; + return -QEMU_ESIGRETURN; badframe: unlock_user_struct(frame, frame_addr, 0); force_sig(TARGET_SIGSEGV); - return -TARGET_QEMU_ESIGRETURN; + return -QEMU_ESIGRETURN; +} + +#ifndef TARGET_X86_64 +void setup_sigtramp(abi_ulong sigtramp_page) +{ + uint16_t *tramp = lock_user(VERIFY_WRITE, sigtramp_page, 2 * 8, 0); + assert(tramp != NULL); + + default_sigreturn = sigtramp_page; + install_sigtramp(tramp); + + default_rt_sigreturn = sigtramp_page + 8; + install_rt_sigtramp(tramp + 8); + + unlock_user(tramp, sigtramp_page, 2 * 8); } +#endif diff --git a/linux-user/i386/target_elf.h b/linux-user/i386/target_elf.h index 1c6142e7da..238a9aba73 100644 --- a/linux-user/i386/target_elf.h +++ b/linux-user/i386/target_elf.h @@ -9,6 +9,6 @@ #define I386_TARGET_ELF_H static inline const char *cpu_get_model(uint32_t eflags) { - return "qemu32"; + return "max"; } #endif diff --git a/linux-user/i386/target_mman.h b/linux-user/i386/target_mman.h new file mode 100644 index 0000000000..e3b8e1eaa6 --- /dev/null +++ b/linux-user/i386/target_mman.h @@ -0,0 +1,17 @@ +/* + * arch/x86/include/asm/processor.h: + * TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW) + * __TASK_UNMAPPED_BASE(S) PAGE_ALIGN(S / 3) + * + * arch/x86/include/asm/page_32_types.h: + * TASK_SIZE_LOW TASK_SIZE + * TASK_SIZE __PAGE_OFFSET + * __PAGE_OFFSET CONFIG_PAGE_OFFSET + * CONFIG_PAGE_OFFSET 0xc0000000 (default in Kconfig) + */ +#define TASK_UNMAPPED_BASE 0x40000000 + +/* arch/x86/include/asm/elf.h */ +#define ELF_ET_DYN_BASE 0x00400000 + +#include "../generic/target_mman.h" diff --git a/linux-user/i386/target_prctl.h b/linux-user/i386/target_prctl.h new file mode 100644 index 0000000000..eb53b31ad5 --- /dev/null +++ b/linux-user/i386/target_prctl.h @@ -0,0 +1 @@ +/* No special prctl support required. */ diff --git a/linux-user/i386/target_proc.h b/linux-user/i386/target_proc.h new file mode 100644 index 0000000000..43fe29ca72 --- /dev/null +++ b/linux-user/i386/target_proc.h @@ -0,0 +1 @@ +/* No target-specific /proc support */ diff --git a/linux-user/i386/target_resource.h b/linux-user/i386/target_resource.h new file mode 100644 index 0000000000..227259594c --- /dev/null +++ b/linux-user/i386/target_resource.h @@ -0,0 +1 @@ +#include "../generic/target_resource.h" diff --git a/linux-user/i386/target_signal.h b/linux-user/i386/target_signal.h index 50361af874..9315cba241 100644 --- a/linux-user/i386/target_signal.h +++ b/linux-user/i386/target_signal.h @@ -1,25 +1,9 @@ #ifndef I386_TARGET_SIGNAL_H #define I386_TARGET_SIGNAL_H -/* this struct defines a stack used during syscall handling */ - -typedef struct target_sigaltstack { - abi_ulong ss_sp; - abi_int ss_flags; - abi_ulong ss_size; -} target_stack_t; - - -/* - * sigaltstack controls - */ -#define TARGET_SS_ONSTACK 1 -#define TARGET_SS_DISABLE 2 - -#define TARGET_MINSIGSTKSZ 2048 -#define TARGET_SIGSTKSZ 8192 - #include "../generic/signal.h" #define TARGET_ARCH_HAS_SETUP_FRAME +#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 1 + #endif /* I386_TARGET_SIGNAL_H */ diff --git a/linux-user/i386/target_structs.h b/linux-user/i386/target_structs.h index e22847fd20..3a06f373c3 100644 --- a/linux-user/i386/target_structs.h +++ b/linux-user/i386/target_structs.h @@ -1,58 +1 @@ -/* - * i386 specific structures for linux-user - * - * Copyright (c) 2013 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ -#ifndef I386_TARGET_STRUCTS_H -#define I386_TARGET_STRUCTS_H - -struct target_ipc_perm { - abi_int __key; /* Key. */ - abi_uint uid; /* Owner's user ID. */ - abi_uint gid; /* Owner's group ID. */ - abi_uint cuid; /* Creator's user ID. */ - abi_uint cgid; /* Creator's group ID. */ - abi_ushort mode; /* Read/write permission. */ - abi_ushort __pad1; - abi_ushort __seq; /* Sequence number. */ - abi_ushort __pad2; - abi_ulong __unused1; - abi_ulong __unused2; -}; - -struct target_shmid_ds { - struct target_ipc_perm shm_perm; /* operation permission struct */ - abi_long shm_segsz; /* size of segment in bytes */ - abi_ulong shm_atime; /* time of last shmat() */ -#if TARGET_ABI_BITS == 32 - abi_ulong __unused1; -#endif - abi_ulong shm_dtime; /* time of last shmdt() */ -#if TARGET_ABI_BITS == 32 - abi_ulong __unused2; -#endif - abi_ulong shm_ctime; /* time of last change by shmctl() */ -#if TARGET_ABI_BITS == 32 - abi_ulong __unused3; -#endif - abi_int shm_cpid; /* pid of creator */ - abi_int shm_lpid; /* pid of last shmop */ - abi_ulong shm_nattch; /* number of current attaches */ - abi_ulong __unused4; - abi_ulong __unused5; -}; - -#endif +#include "../generic/target_structs.h" diff --git a/linux-user/i386/target_syscall.h b/linux-user/i386/target_syscall.h index ed356b3908..aaade06b13 100644 --- a/linux-user/i386/target_syscall.h +++ b/linux-user/i386/target_syscall.h @@ -150,7 +150,6 @@ struct target_vm86plus_struct { #define UNAME_MINIMUM_RELEASE "2.6.32" #define TARGET_CLONE_BACKWARDS -#define TARGET_MINSIGSTKSZ 2048 #define TARGET_MCL_CURRENT 1 #define TARGET_MCL_FUTURE 2 #define TARGET_MCL_ONFAULT 4 diff --git a/linux-user/i386/vdso-asmoffset.h b/linux-user/i386/vdso-asmoffset.h new file mode 100644 index 0000000000..4e5ee0dd49 --- /dev/null +++ b/linux-user/i386/vdso-asmoffset.h @@ -0,0 +1,6 @@ +/* + * offsetof(struct sigframe, sc.eip) + * offsetof(struct rt_sigframe, uc.tuc_mcontext.eip) + */ +#define SIGFRAME_SIGCONTEXT_eip 64 +#define RT_SIGFRAME_SIGCONTEXT_eip 220 diff --git a/linux-user/i386/vdso.S b/linux-user/i386/vdso.S new file mode 100644 index 0000000000..e7a1f333a1 --- /dev/null +++ b/linux-user/i386/vdso.S @@ -0,0 +1,143 @@ +/* + * i386 linux replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <asm/unistd.h> +#include "vdso-asmoffset.h" + +.macro endf name + .globl \name + .type \name, @function + .size \name, . - \name +.endm + +.macro vdso_syscall1 name, nr +\name: + .cfi_startproc + mov %ebx, %edx + .cfi_register %ebx, %edx + mov 4(%esp), %ebx + mov $\nr, %eax + int $0x80 + mov %edx, %ebx + ret + .cfi_endproc +endf \name +.endm + +.macro vdso_syscall2 name, nr +\name: + .cfi_startproc + mov %ebx, %edx + .cfi_register %ebx, %edx + mov 4(%esp), %ebx + mov 8(%esp), %ecx + mov $\nr, %eax + int $0x80 + mov %edx, %ebx + ret + .cfi_endproc +endf \name +.endm + +.macro vdso_syscall3 name, nr +\name: + .cfi_startproc + push %ebx + .cfi_adjust_cfa_offset 4 + .cfi_rel_offset %ebx, 0 + mov 8(%esp), %ebx + mov 12(%esp), %ecx + mov 16(%esp), %edx + mov $\nr, %eax + int $0x80 + pop %ebx + .cfi_adjust_cfa_offset -4 + .cfi_restore %ebx + ret + .cfi_endproc +endf \name +.endm + +__kernel_vsyscall: + .cfi_startproc + int $0x80 + ret + .cfi_endproc +endf __kernel_vsyscall + +vdso_syscall2 __vdso_clock_gettime, __NR_clock_gettime +vdso_syscall2 __vdso_clock_gettime64, __NR_clock_gettime64 +vdso_syscall2 __vdso_clock_getres, __NR_clock_getres +vdso_syscall2 __vdso_gettimeofday, __NR_gettimeofday +vdso_syscall1 __vdso_time, __NR_time +vdso_syscall3 __vdso_getcpu, __NR_gettimeofday + +/* + * Signal return handlers. + */ + + .cfi_startproc simple + .cfi_signal_frame + +/* + * For convenience, put the cfa just above eip in sigcontext, and count + * offsets backward from there. Re-compute the cfa in the two contexts + * we have for signal unwinding. This is far simpler than the + * DW_CFA_expression form that the kernel uses, and is equally correct. + */ + + .cfi_def_cfa %esp, SIGFRAME_SIGCONTEXT_eip + 4 + + .cfi_offset %eip, -4 + /* err, -8 */ + /* trapno, -12 */ + .cfi_offset %eax, -16 + .cfi_offset %ecx, -20 + .cfi_offset %edx, -24 + .cfi_offset %ebx, -28 + .cfi_offset %esp, -32 + .cfi_offset %ebp, -36 + .cfi_offset %esi, -40 + .cfi_offset %edi, -44 + +/* + * While this frame is marked as a signal frame, that only applies to how + * the return address is handled for the outer frame. The return address + * that arrived here, from the inner frame, is not marked as a signal frame + * and so the unwinder still tries to subtract 1 to examine the presumed + * call insn. Thus we must extend the unwind info to a nop before the start. + */ + nop + +__kernel_sigreturn: + popl %eax /* pop sig */ + .cfi_adjust_cfa_offset -4 + movl $__NR_sigreturn, %eax + int $0x80 +endf __kernel_sigreturn + + .cfi_def_cfa_offset RT_SIGFRAME_SIGCONTEXT_eip + 4 + nop + +__kernel_rt_sigreturn: + movl $__NR_rt_sigreturn, %eax + int $0x80 +endf __kernel_rt_sigreturn + + .cfi_endproc + +/* + * TODO: Add elf notes. E.g. + * + * #include <linux/elfnote.h> + * ELFNOTE_START(Linux, 0, "a") + * .long LINUX_VERSION_CODE + * ELFNOTE_END + * + * but what version number would we set for QEMU? + */ diff --git a/linux-user/i386/vdso.ld b/linux-user/i386/vdso.ld new file mode 100644 index 0000000000..326b7a8f98 --- /dev/null +++ b/linux-user/i386/vdso.ld @@ -0,0 +1,76 @@ +/* + * Linker script for linux i386 replacement vdso. + * + * Copyright 2023 Linaro, Ltd. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +ENTRY(__kernel_vsyscall) + +VERSION { + LINUX_2.6 { + global: + __vdso_clock_gettime; + __vdso_gettimeofday; + __vdso_time; + __vdso_clock_getres; + __vdso_clock_gettime64; + __vdso_getcpu; + }; + + LINUX_2.5 { + global: + __kernel_vsyscall; + __kernel_sigreturn; + __kernel_rt_sigreturn; + local: *; + }; +} + +PHDRS { + phdr PT_PHDR FLAGS(4) PHDRS; + load PT_LOAD FLAGS(7) FILEHDR PHDRS; /* FLAGS=RWX */ + dynamic PT_DYNAMIC FLAGS(4); + eh_frame_hdr PT_GNU_EH_FRAME; + note PT_NOTE FLAGS(4); +} + +SECTIONS { + . = SIZEOF_HEADERS; + + /* + * The following, including the FILEHDRS and PHDRS, are modified + * when we relocate the binary. We want them to be initially + * writable for the relocation; we'll force them read-only after. + */ + .note : { *(.note*) } :load :note + .dynamic : { *(.dynamic) } :load :dynamic + .dynsym : { *(.dynsym) } :load + .data : { + /* + * There ought not be any real read-write data. + * But since we manipulated the segment layout, + * we have to put these sections somewhere. + */ + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + } + + .rodata : { *(.rodata*) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .eh_frame_hdr : { *(.eh_frame_hdr) } :load :eh_frame_hdr + .eh_frame : { *(.eh_frame) } :load + + .text : { *(.text*) } :load =0x90909090 +} diff --git a/linux-user/i386/vdso.so b/linux-user/i386/vdso.so Binary files differnew file mode 100755 index 0000000000..bdece5dfcf --- /dev/null +++ b/linux-user/i386/vdso.so |