diff options
Diffstat (limited to 'linux-user/elfload.c')
-rw-r--r-- | linux-user/elfload.c | 3329 |
1 files changed, 2088 insertions, 1241 deletions
diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 4cff9e1a31..f9461d2844 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -2,11 +2,33 @@ #include "qemu/osdep.h" #include <sys/param.h> +#include <sys/prctl.h> #include <sys/resource.h> +#include <sys/shm.h> #include "qemu.h" +#include "user/tswap-target.h" +#include "user/guest-base.h" +#include "user-internals.h" +#include "signal-common.h" +#include "loader.h" +#include "user-mmap.h" #include "disas/disas.h" +#include "qemu/bitops.h" #include "qemu/path.h" +#include "qemu/queue.h" +#include "qemu/guest-random.h" +#include "qemu/units.h" +#include "qemu/selfmap.h" +#include "qemu/lockable.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "target_signal.h" +#include "tcg/debuginfo.h" + +#ifdef TARGET_ARM +#include "target/arm/cpu-features.h" +#endif #ifdef _ARCH_PPC64 #undef ARCH_DLINFO @@ -18,6 +40,19 @@ #undef ELF_ARCH #endif +#ifndef TARGET_ARCH_HAS_SIGTRAMP_PAGE +#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0 +#endif + +typedef struct { + const uint8_t *image; + const uint32_t *relocs; + unsigned image_size; + unsigned reloc_count; + unsigned sigreturn_ofs; + unsigned rt_sigreturn_ofs; +} VdsoImageInfo; + #define ELF_OSABI ELFOSABI_SYSV /* from personality.h */ @@ -93,7 +128,7 @@ int info_is_fdpic(struct image_info *info) #define ELIBBAD 80 #endif -#ifdef TARGET_WORDS_BIGENDIAN +#if TARGET_BIG_ENDIAN #define ELF_DATA ELFDATA2MSB #else #define ELF_DATA ELFDATA2LSB @@ -118,19 +153,6 @@ typedef abi_int target_pid_t; #ifdef TARGET_I386 -#define ELF_PLATFORM get_elf_platform() - -static const char *get_elf_platform(void) -{ - static char elf_platform[] = "i386"; - int family = object_property_get_int(OBJECT(thread_cpu), "family", NULL); - if (family > 6) - family = 6; - if (family >= 3) - elf_platform[1] = '0' + family; - return elf_platform; -} - #define ELF_HWCAP get_elf_hwcap() static uint32_t get_elf_hwcap(void) @@ -141,11 +163,11 @@ static uint32_t get_elf_hwcap(void) } #ifdef TARGET_X86_64 -#define ELF_START_MMAP 0x2aaaaab000ULL - #define ELF_CLASS ELFCLASS64 #define ELF_ARCH EM_X86_64 +#define ELF_PLATFORM "x86_64" + static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) { regs->rax = 0; @@ -165,39 +187,58 @@ typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; */ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *env) { - (*regs)[0] = env->regs[15]; - (*regs)[1] = env->regs[14]; - (*regs)[2] = env->regs[13]; - (*regs)[3] = env->regs[12]; - (*regs)[4] = env->regs[R_EBP]; - (*regs)[5] = env->regs[R_EBX]; - (*regs)[6] = env->regs[11]; - (*regs)[7] = env->regs[10]; - (*regs)[8] = env->regs[9]; - (*regs)[9] = env->regs[8]; - (*regs)[10] = env->regs[R_EAX]; - (*regs)[11] = env->regs[R_ECX]; - (*regs)[12] = env->regs[R_EDX]; - (*regs)[13] = env->regs[R_ESI]; - (*regs)[14] = env->regs[R_EDI]; - (*regs)[15] = env->regs[R_EAX]; /* XXX */ - (*regs)[16] = env->eip; - (*regs)[17] = env->segs[R_CS].selector & 0xffff; - (*regs)[18] = env->eflags; - (*regs)[19] = env->regs[R_ESP]; - (*regs)[20] = env->segs[R_SS].selector & 0xffff; - (*regs)[21] = env->segs[R_FS].selector & 0xffff; - (*regs)[22] = env->segs[R_GS].selector & 0xffff; - (*regs)[23] = env->segs[R_DS].selector & 0xffff; - (*regs)[24] = env->segs[R_ES].selector & 0xffff; - (*regs)[25] = env->segs[R_FS].selector & 0xffff; - (*regs)[26] = env->segs[R_GS].selector & 0xffff; + (*regs)[0] = tswapreg(env->regs[15]); + (*regs)[1] = tswapreg(env->regs[14]); + (*regs)[2] = tswapreg(env->regs[13]); + (*regs)[3] = tswapreg(env->regs[12]); + (*regs)[4] = tswapreg(env->regs[R_EBP]); + (*regs)[5] = tswapreg(env->regs[R_EBX]); + (*regs)[6] = tswapreg(env->regs[11]); + (*regs)[7] = tswapreg(env->regs[10]); + (*regs)[8] = tswapreg(env->regs[9]); + (*regs)[9] = tswapreg(env->regs[8]); + (*regs)[10] = tswapreg(env->regs[R_EAX]); + (*regs)[11] = tswapreg(env->regs[R_ECX]); + (*regs)[12] = tswapreg(env->regs[R_EDX]); + (*regs)[13] = tswapreg(env->regs[R_ESI]); + (*regs)[14] = tswapreg(env->regs[R_EDI]); + (*regs)[15] = tswapreg(env->regs[R_EAX]); /* XXX */ + (*regs)[16] = tswapreg(env->eip); + (*regs)[17] = tswapreg(env->segs[R_CS].selector & 0xffff); + (*regs)[18] = tswapreg(env->eflags); + (*regs)[19] = tswapreg(env->regs[R_ESP]); + (*regs)[20] = tswapreg(env->segs[R_SS].selector & 0xffff); + (*regs)[21] = tswapreg(env->segs[R_FS].selector & 0xffff); + (*regs)[22] = tswapreg(env->segs[R_GS].selector & 0xffff); + (*regs)[23] = tswapreg(env->segs[R_DS].selector & 0xffff); + (*regs)[24] = tswapreg(env->segs[R_ES].selector & 0xffff); + (*regs)[25] = tswapreg(env->segs[R_FS].selector & 0xffff); + (*regs)[26] = tswapreg(env->segs[R_GS].selector & 0xffff); } +#if ULONG_MAX > UINT32_MAX +#define INIT_GUEST_COMMPAGE +static bool init_guest_commpage(void) +{ + /* + * The vsyscall page is at a high negative address aka kernel space, + * which means that we cannot actually allocate it with target_mmap. + * We still should be able to use page_set_flags, unless the user + * has specified -R reserved_va, which would trigger an assert(). + */ + if (reserved_va != 0 && + TARGET_VSYSCALL_PAGE + TARGET_PAGE_SIZE - 1 > reserved_va) { + error_report("Cannot allocate vsyscall page"); + exit(EXIT_FAILURE); + } + page_set_flags(TARGET_VSYSCALL_PAGE, + TARGET_VSYSCALL_PAGE | ~TARGET_PAGE_MASK, + PAGE_EXEC | PAGE_VALID); + return true; +} +#endif #else -#define ELF_START_MMAP 0x80000000 - /* * This is used to ensure we don't load something for the wrong architecture. */ @@ -209,6 +250,22 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *en #define ELF_CLASS ELFCLASS32 #define ELF_ARCH EM_386 +#define ELF_PLATFORM get_elf_platform() +#define EXSTACK_DEFAULT true + +static const char *get_elf_platform(void) +{ + static char elf_platform[] = "i386"; + int family = object_property_get_int(OBJECT(thread_cpu), "family", NULL); + if (family > 6) { + family = 6; + } + if (family >= 3) { + elf_platform[1] = '0' + family; + } + return elf_platform; +} + static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) { @@ -237,40 +294,54 @@ typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; */ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUX86State *env) { - (*regs)[0] = env->regs[R_EBX]; - (*regs)[1] = env->regs[R_ECX]; - (*regs)[2] = env->regs[R_EDX]; - (*regs)[3] = env->regs[R_ESI]; - (*regs)[4] = env->regs[R_EDI]; - (*regs)[5] = env->regs[R_EBP]; - (*regs)[6] = env->regs[R_EAX]; - (*regs)[7] = env->segs[R_DS].selector & 0xffff; - (*regs)[8] = env->segs[R_ES].selector & 0xffff; - (*regs)[9] = env->segs[R_FS].selector & 0xffff; - (*regs)[10] = env->segs[R_GS].selector & 0xffff; - (*regs)[11] = env->regs[R_EAX]; /* XXX */ - (*regs)[12] = env->eip; - (*regs)[13] = env->segs[R_CS].selector & 0xffff; - (*regs)[14] = env->eflags; - (*regs)[15] = env->regs[R_ESP]; - (*regs)[16] = env->segs[R_SS].selector & 0xffff; + (*regs)[0] = tswapreg(env->regs[R_EBX]); + (*regs)[1] = tswapreg(env->regs[R_ECX]); + (*regs)[2] = tswapreg(env->regs[R_EDX]); + (*regs)[3] = tswapreg(env->regs[R_ESI]); + (*regs)[4] = tswapreg(env->regs[R_EDI]); + (*regs)[5] = tswapreg(env->regs[R_EBP]); + (*regs)[6] = tswapreg(env->regs[R_EAX]); + (*regs)[7] = tswapreg(env->segs[R_DS].selector & 0xffff); + (*regs)[8] = tswapreg(env->segs[R_ES].selector & 0xffff); + (*regs)[9] = tswapreg(env->segs[R_FS].selector & 0xffff); + (*regs)[10] = tswapreg(env->segs[R_GS].selector & 0xffff); + (*regs)[11] = tswapreg(env->regs[R_EAX]); /* XXX */ + (*regs)[12] = tswapreg(env->eip); + (*regs)[13] = tswapreg(env->segs[R_CS].selector & 0xffff); + (*regs)[14] = tswapreg(env->eflags); + (*regs)[15] = tswapreg(env->regs[R_ESP]); + (*regs)[16] = tswapreg(env->segs[R_SS].selector & 0xffff); } -#endif + +/* + * i386 is the only target which supplies AT_SYSINFO for the vdso. + * All others only supply AT_SYSINFO_EHDR. + */ +#define DLINFO_ARCH_ITEMS (vdso_info != NULL) +#define ARCH_DLINFO \ + do { \ + if (vdso_info) { \ + NEW_AUX_ENT(AT_SYSINFO, vdso_info->entry); \ + } \ + } while (0) + +#endif /* TARGET_X86_64 */ + +#define VDSO_HEADER "vdso.c.inc" #define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 -#endif +#endif /* TARGET_I386 */ #ifdef TARGET_ARM #ifndef TARGET_AARCH64 /* 32 bit ARM definitions */ -#define ELF_START_MMAP 0x80000000 - #define ELF_ARCH EM_ARM #define ELF_CLASS ELFCLASS32 +#define EXSTACK_DEFAULT true static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) @@ -366,6 +437,12 @@ enum ARM_HWCAP_ARM_VFPD32 = 1 << 19, ARM_HWCAP_ARM_LPAE = 1 << 20, ARM_HWCAP_ARM_EVTSTRM = 1 << 21, + ARM_HWCAP_ARM_FPHP = 1 << 22, + ARM_HWCAP_ARM_ASIMDHP = 1 << 23, + ARM_HWCAP_ARM_ASIMDDP = 1 << 24, + ARM_HWCAP_ARM_ASIMDFHM = 1 << 25, + ARM_HWCAP_ARM_ASIMDBF16 = 1 << 26, + ARM_HWCAP_ARM_I8MM = 1 << 27, }; enum { @@ -374,78 +451,62 @@ enum { ARM_HWCAP2_ARM_SHA1 = 1 << 2, ARM_HWCAP2_ARM_SHA2 = 1 << 3, ARM_HWCAP2_ARM_CRC32 = 1 << 4, + ARM_HWCAP2_ARM_SB = 1 << 5, + ARM_HWCAP2_ARM_SSBS = 1 << 6, }; /* The commpage only exists for 32 bit kernels */ -/* Return 1 if the proposed guest space is suitable for the guest. - * Return 0 if the proposed guest space isn't suitable, but another - * address space should be tried. - * Return -1 if there is no way the proposed guest space can be - * valid regardless of the base. - * The guest code may leave a page mapped and populate it if the - * address is suitable. - */ -static int init_guest_commpage(unsigned long guest_base, - unsigned long guest_size) -{ - unsigned long real_start, test_page_addr; +#define HI_COMMPAGE (intptr_t)0xffff0f00u - /* We need to check that we can force a fault on access to the - * commpage at 0xffff0fxx - */ - test_page_addr = guest_base + (0xffff0f00 & qemu_host_page_mask); +static bool init_guest_commpage(void) +{ + ARMCPU *cpu = ARM_CPU(thread_cpu); + int host_page_size = qemu_real_host_page_size(); + abi_ptr commpage; + void *want; + void *addr; - /* If the commpage lies within the already allocated guest space, - * then there is no way we can allocate it. - * - * You may be thinking that that this check is redundant because - * we already validated the guest size against MAX_RESERVED_VA; - * but if qemu_host_page_mask is unusually large, then - * test_page_addr may be lower. + /* + * M-profile allocates maximum of 2GB address space, so can never + * allocate the commpage. Skip it. */ - if (test_page_addr >= guest_base - && test_page_addr < (guest_base + guest_size)) { - return -1; + if (arm_feature(&cpu->env, ARM_FEATURE_M)) { + return true; } - /* Note it needs to be writeable to let us initialise it */ - real_start = (unsigned long) - mmap((void *)test_page_addr, qemu_host_page_size, - PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + commpage = HI_COMMPAGE & -host_page_size; + want = g2h_untagged(commpage); + addr = mmap(want, host_page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | + (commpage < reserved_va ? MAP_FIXED : MAP_FIXED_NOREPLACE), + -1, 0); - /* If we can't map it then try another address */ - if (real_start == -1ul) { - return 0; + if (addr == MAP_FAILED) { + perror("Allocating guest commpage"); + exit(EXIT_FAILURE); } - - if (real_start != test_page_addr) { - /* OS didn't put the page where we asked - unmap and reject */ - munmap((void *)real_start, qemu_host_page_size); - return 0; + if (addr != want) { + return false; } - /* Leave the page mapped - * Populate it (mmap should have left it all 0'd) - */ - - /* Kernel helper versions */ - __put_user(5, (uint32_t *)g2h(0xffff0ffcul)); + /* Set kernel helper versions; rest of page is 0. */ + __put_user(5, (uint32_t *)g2h_untagged(0xffff0ffcu)); - /* Now it's populated make it RO */ - if (mprotect((void *)test_page_addr, qemu_host_page_size, PROT_READ)) { + if (mprotect(addr, host_page_size, PROT_READ)) { perror("Protecting guest commpage"); - exit(-1); + exit(EXIT_FAILURE); } - return 1; /* All good */ + page_set_flags(commpage, commpage | (host_page_size - 1), + PAGE_READ | PAGE_EXEC | PAGE_VALID); + return true; } #define ELF_HWCAP get_elf_hwcap() #define ELF_HWCAP2 get_elf_hwcap2() -static uint32_t get_elf_hwcap(void) +uint32_t get_elf_hwcap(void) { ARMCPU *cpu = ARM_CPU(thread_cpu); uint32_t hwcaps = 0; @@ -464,49 +525,149 @@ static uint32_t get_elf_hwcap(void) /* EDSP is in v5TE and above, but all our v5 CPUs are v5TE */ GET_FEATURE(ARM_FEATURE_V5, ARM_HWCAP_ARM_EDSP); - GET_FEATURE(ARM_FEATURE_VFP, ARM_HWCAP_ARM_VFP); GET_FEATURE(ARM_FEATURE_IWMMXT, ARM_HWCAP_ARM_IWMMXT); GET_FEATURE(ARM_FEATURE_THUMB2EE, ARM_HWCAP_ARM_THUMBEE); GET_FEATURE(ARM_FEATURE_NEON, ARM_HWCAP_ARM_NEON); - GET_FEATURE(ARM_FEATURE_VFP3, ARM_HWCAP_ARM_VFPv3); GET_FEATURE(ARM_FEATURE_V6K, ARM_HWCAP_ARM_TLS); - GET_FEATURE(ARM_FEATURE_VFP4, ARM_HWCAP_ARM_VFPv4); - GET_FEATURE_ID(arm_div, ARM_HWCAP_ARM_IDIVA); - GET_FEATURE_ID(thumb_div, ARM_HWCAP_ARM_IDIVT); - /* All QEMU's VFPv3 CPUs have 32 registers, see VFP_DREG in translate.c. - * Note that the ARM_HWCAP_ARM_VFPv3D16 bit is always the inverse of - * ARM_HWCAP_ARM_VFPD32 (and so always clear for QEMU); it is unrelated - * to our VFP_FP16 feature bit. - */ - GET_FEATURE(ARM_FEATURE_VFP3, ARM_HWCAP_ARM_VFPD32); GET_FEATURE(ARM_FEATURE_LPAE, ARM_HWCAP_ARM_LPAE); + GET_FEATURE_ID(aa32_arm_div, ARM_HWCAP_ARM_IDIVA); + GET_FEATURE_ID(aa32_thumb_div, ARM_HWCAP_ARM_IDIVT); + GET_FEATURE_ID(aa32_vfp, ARM_HWCAP_ARM_VFP); + + if (cpu_isar_feature(aa32_fpsp_v3, cpu) || + cpu_isar_feature(aa32_fpdp_v3, cpu)) { + hwcaps |= ARM_HWCAP_ARM_VFPv3; + if (cpu_isar_feature(aa32_simd_r32, cpu)) { + hwcaps |= ARM_HWCAP_ARM_VFPD32; + } else { + hwcaps |= ARM_HWCAP_ARM_VFPv3D16; + } + } + GET_FEATURE_ID(aa32_simdfmac, ARM_HWCAP_ARM_VFPv4); + /* + * MVFR1.FPHP and .SIMDHP must be in sync, and QEMU uses the same + * isar_feature function for both. The kernel reports them as two hwcaps. + */ + GET_FEATURE_ID(aa32_fp16_arith, ARM_HWCAP_ARM_FPHP); + GET_FEATURE_ID(aa32_fp16_arith, ARM_HWCAP_ARM_ASIMDHP); + GET_FEATURE_ID(aa32_dp, ARM_HWCAP_ARM_ASIMDDP); + GET_FEATURE_ID(aa32_fhm, ARM_HWCAP_ARM_ASIMDFHM); + GET_FEATURE_ID(aa32_bf16, ARM_HWCAP_ARM_ASIMDBF16); + GET_FEATURE_ID(aa32_i8mm, ARM_HWCAP_ARM_I8MM); return hwcaps; } -static uint32_t get_elf_hwcap2(void) +uint64_t get_elf_hwcap2(void) { ARMCPU *cpu = ARM_CPU(thread_cpu); - uint32_t hwcaps = 0; + uint64_t hwcaps = 0; GET_FEATURE_ID(aa32_aes, ARM_HWCAP2_ARM_AES); GET_FEATURE_ID(aa32_pmull, ARM_HWCAP2_ARM_PMULL); GET_FEATURE_ID(aa32_sha1, ARM_HWCAP2_ARM_SHA1); GET_FEATURE_ID(aa32_sha2, ARM_HWCAP2_ARM_SHA2); GET_FEATURE_ID(aa32_crc32, ARM_HWCAP2_ARM_CRC32); + GET_FEATURE_ID(aa32_sb, ARM_HWCAP2_ARM_SB); + GET_FEATURE_ID(aa32_ssbs, ARM_HWCAP2_ARM_SSBS); return hwcaps; } +const char *elf_hwcap_str(uint32_t bit) +{ + static const char *hwcap_str[] = { + [__builtin_ctz(ARM_HWCAP_ARM_SWP )] = "swp", + [__builtin_ctz(ARM_HWCAP_ARM_HALF )] = "half", + [__builtin_ctz(ARM_HWCAP_ARM_THUMB )] = "thumb", + [__builtin_ctz(ARM_HWCAP_ARM_26BIT )] = "26bit", + [__builtin_ctz(ARM_HWCAP_ARM_FAST_MULT)] = "fast_mult", + [__builtin_ctz(ARM_HWCAP_ARM_FPA )] = "fpa", + [__builtin_ctz(ARM_HWCAP_ARM_VFP )] = "vfp", + [__builtin_ctz(ARM_HWCAP_ARM_EDSP )] = "edsp", + [__builtin_ctz(ARM_HWCAP_ARM_JAVA )] = "java", + [__builtin_ctz(ARM_HWCAP_ARM_IWMMXT )] = "iwmmxt", + [__builtin_ctz(ARM_HWCAP_ARM_CRUNCH )] = "crunch", + [__builtin_ctz(ARM_HWCAP_ARM_THUMBEE )] = "thumbee", + [__builtin_ctz(ARM_HWCAP_ARM_NEON )] = "neon", + [__builtin_ctz(ARM_HWCAP_ARM_VFPv3 )] = "vfpv3", + [__builtin_ctz(ARM_HWCAP_ARM_VFPv3D16 )] = "vfpv3d16", + [__builtin_ctz(ARM_HWCAP_ARM_TLS )] = "tls", + [__builtin_ctz(ARM_HWCAP_ARM_VFPv4 )] = "vfpv4", + [__builtin_ctz(ARM_HWCAP_ARM_IDIVA )] = "idiva", + [__builtin_ctz(ARM_HWCAP_ARM_IDIVT )] = "idivt", + [__builtin_ctz(ARM_HWCAP_ARM_VFPD32 )] = "vfpd32", + [__builtin_ctz(ARM_HWCAP_ARM_LPAE )] = "lpae", + [__builtin_ctz(ARM_HWCAP_ARM_EVTSTRM )] = "evtstrm", + [__builtin_ctz(ARM_HWCAP_ARM_FPHP )] = "fphp", + [__builtin_ctz(ARM_HWCAP_ARM_ASIMDHP )] = "asimdhp", + [__builtin_ctz(ARM_HWCAP_ARM_ASIMDDP )] = "asimddp", + [__builtin_ctz(ARM_HWCAP_ARM_ASIMDFHM )] = "asimdfhm", + [__builtin_ctz(ARM_HWCAP_ARM_ASIMDBF16)] = "asimdbf16", + [__builtin_ctz(ARM_HWCAP_ARM_I8MM )] = "i8mm", + }; + + return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL; +} + +const char *elf_hwcap2_str(uint32_t bit) +{ + static const char *hwcap_str[] = { + [__builtin_ctz(ARM_HWCAP2_ARM_AES )] = "aes", + [__builtin_ctz(ARM_HWCAP2_ARM_PMULL)] = "pmull", + [__builtin_ctz(ARM_HWCAP2_ARM_SHA1 )] = "sha1", + [__builtin_ctz(ARM_HWCAP2_ARM_SHA2 )] = "sha2", + [__builtin_ctz(ARM_HWCAP2_ARM_CRC32)] = "crc32", + [__builtin_ctz(ARM_HWCAP2_ARM_SB )] = "sb", + [__builtin_ctz(ARM_HWCAP2_ARM_SSBS )] = "ssbs", + }; + + return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL; +} + #undef GET_FEATURE #undef GET_FEATURE_ID +#define ELF_PLATFORM get_elf_platform() + +static const char *get_elf_platform(void) +{ + CPUARMState *env = cpu_env(thread_cpu); + +#if TARGET_BIG_ENDIAN +# define END "b" +#else +# define END "l" +#endif + + if (arm_feature(env, ARM_FEATURE_V8)) { + return "v8" END; + } else if (arm_feature(env, ARM_FEATURE_V7)) { + if (arm_feature(env, ARM_FEATURE_M)) { + return "v7m" END; + } else { + return "v7" END; + } + } else if (arm_feature(env, ARM_FEATURE_V6)) { + return "v6" END; + } else if (arm_feature(env, ARM_FEATURE_V5)) { + return "v5" END; + } else { + return "v4" END; + } + +#undef END +} + #else /* 64 bit ARM definitions */ -#define ELF_START_MMAP 0x80000000 #define ELF_ARCH EM_AARCH64 #define ELF_CLASS ELFCLASS64 -#define ELF_PLATFORM "aarch64" +#if TARGET_BIG_ENDIAN +# define ELF_PLATFORM "aarch64_be" +#else +# define ELF_PLATFORM "aarch64" +#endif static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) @@ -560,21 +721,79 @@ enum { ARM_HWCAP_A64_ASIMDDP = 1 << 20, ARM_HWCAP_A64_SHA512 = 1 << 21, ARM_HWCAP_A64_SVE = 1 << 22, + ARM_HWCAP_A64_ASIMDFHM = 1 << 23, + ARM_HWCAP_A64_DIT = 1 << 24, + ARM_HWCAP_A64_USCAT = 1 << 25, + ARM_HWCAP_A64_ILRCPC = 1 << 26, + ARM_HWCAP_A64_FLAGM = 1 << 27, + ARM_HWCAP_A64_SSBS = 1 << 28, + ARM_HWCAP_A64_SB = 1 << 29, + ARM_HWCAP_A64_PACA = 1 << 30, + ARM_HWCAP_A64_PACG = 1UL << 31, + + ARM_HWCAP2_A64_DCPODP = 1 << 0, + ARM_HWCAP2_A64_SVE2 = 1 << 1, + ARM_HWCAP2_A64_SVEAES = 1 << 2, + ARM_HWCAP2_A64_SVEPMULL = 1 << 3, + ARM_HWCAP2_A64_SVEBITPERM = 1 << 4, + ARM_HWCAP2_A64_SVESHA3 = 1 << 5, + ARM_HWCAP2_A64_SVESM4 = 1 << 6, + ARM_HWCAP2_A64_FLAGM2 = 1 << 7, + ARM_HWCAP2_A64_FRINT = 1 << 8, + ARM_HWCAP2_A64_SVEI8MM = 1 << 9, + ARM_HWCAP2_A64_SVEF32MM = 1 << 10, + ARM_HWCAP2_A64_SVEF64MM = 1 << 11, + ARM_HWCAP2_A64_SVEBF16 = 1 << 12, + ARM_HWCAP2_A64_I8MM = 1 << 13, + ARM_HWCAP2_A64_BF16 = 1 << 14, + ARM_HWCAP2_A64_DGH = 1 << 15, + ARM_HWCAP2_A64_RNG = 1 << 16, + ARM_HWCAP2_A64_BTI = 1 << 17, + ARM_HWCAP2_A64_MTE = 1 << 18, + ARM_HWCAP2_A64_ECV = 1 << 19, + ARM_HWCAP2_A64_AFP = 1 << 20, + ARM_HWCAP2_A64_RPRES = 1 << 21, + ARM_HWCAP2_A64_MTE3 = 1 << 22, + ARM_HWCAP2_A64_SME = 1 << 23, + ARM_HWCAP2_A64_SME_I16I64 = 1 << 24, + ARM_HWCAP2_A64_SME_F64F64 = 1 << 25, + ARM_HWCAP2_A64_SME_I8I32 = 1 << 26, + ARM_HWCAP2_A64_SME_F16F32 = 1 << 27, + ARM_HWCAP2_A64_SME_B16F32 = 1 << 28, + ARM_HWCAP2_A64_SME_F32F32 = 1 << 29, + ARM_HWCAP2_A64_SME_FA64 = 1 << 30, + ARM_HWCAP2_A64_WFXT = 1ULL << 31, + ARM_HWCAP2_A64_EBF16 = 1ULL << 32, + ARM_HWCAP2_A64_SVE_EBF16 = 1ULL << 33, + ARM_HWCAP2_A64_CSSC = 1ULL << 34, + ARM_HWCAP2_A64_RPRFM = 1ULL << 35, + ARM_HWCAP2_A64_SVE2P1 = 1ULL << 36, + ARM_HWCAP2_A64_SME2 = 1ULL << 37, + ARM_HWCAP2_A64_SME2P1 = 1ULL << 38, + ARM_HWCAP2_A64_SME_I16I32 = 1ULL << 39, + ARM_HWCAP2_A64_SME_BI32I32 = 1ULL << 40, + ARM_HWCAP2_A64_SME_B16B16 = 1ULL << 41, + ARM_HWCAP2_A64_SME_F16F16 = 1ULL << 42, + ARM_HWCAP2_A64_MOPS = 1ULL << 43, + ARM_HWCAP2_A64_HBC = 1ULL << 44, }; -#define ELF_HWCAP get_elf_hwcap() +#define ELF_HWCAP get_elf_hwcap() +#define ELF_HWCAP2 get_elf_hwcap2() -static uint32_t get_elf_hwcap(void) +#define GET_FEATURE_ID(feat, hwcap) \ + do { if (cpu_isar_feature(feat, cpu)) { hwcaps |= hwcap; } } while (0) + +uint32_t get_elf_hwcap(void) { ARMCPU *cpu = ARM_CPU(thread_cpu); uint32_t hwcaps = 0; hwcaps |= ARM_HWCAP_A64_FP; hwcaps |= ARM_HWCAP_A64_ASIMD; + hwcaps |= ARM_HWCAP_A64_CPUID; /* probe for the extra features */ -#define GET_FEATURE_ID(feat, hwcap) \ - do { if (cpu_isar_feature(feat, cpu)) { hwcaps |= hwcap; } } while (0) GET_FEATURE_ID(aa64_aes, ARM_HWCAP_A64_AES); GET_FEATURE_ID(aa64_pmull, ARM_HWCAP_A64_PMULL); @@ -587,23 +806,170 @@ static uint32_t get_elf_hwcap(void) GET_FEATURE_ID(aa64_sm4, ARM_HWCAP_A64_SM4); GET_FEATURE_ID(aa64_fp16, ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP); GET_FEATURE_ID(aa64_atomics, ARM_HWCAP_A64_ATOMICS); + GET_FEATURE_ID(aa64_lse2, ARM_HWCAP_A64_USCAT); GET_FEATURE_ID(aa64_rdm, ARM_HWCAP_A64_ASIMDRDM); GET_FEATURE_ID(aa64_dp, ARM_HWCAP_A64_ASIMDDP); GET_FEATURE_ID(aa64_fcma, ARM_HWCAP_A64_FCMA); GET_FEATURE_ID(aa64_sve, ARM_HWCAP_A64_SVE); + GET_FEATURE_ID(aa64_pauth, ARM_HWCAP_A64_PACA | ARM_HWCAP_A64_PACG); + GET_FEATURE_ID(aa64_fhm, ARM_HWCAP_A64_ASIMDFHM); + GET_FEATURE_ID(aa64_dit, ARM_HWCAP_A64_DIT); + GET_FEATURE_ID(aa64_jscvt, ARM_HWCAP_A64_JSCVT); + GET_FEATURE_ID(aa64_sb, ARM_HWCAP_A64_SB); + GET_FEATURE_ID(aa64_condm_4, ARM_HWCAP_A64_FLAGM); + GET_FEATURE_ID(aa64_dcpop, ARM_HWCAP_A64_DCPOP); + GET_FEATURE_ID(aa64_rcpc_8_3, ARM_HWCAP_A64_LRCPC); + GET_FEATURE_ID(aa64_rcpc_8_4, ARM_HWCAP_A64_ILRCPC); -#undef GET_FEATURE_ID + return hwcaps; +} + +uint64_t get_elf_hwcap2(void) +{ + ARMCPU *cpu = ARM_CPU(thread_cpu); + uint64_t hwcaps = 0; + + GET_FEATURE_ID(aa64_dcpodp, ARM_HWCAP2_A64_DCPODP); + GET_FEATURE_ID(aa64_sve2, ARM_HWCAP2_A64_SVE2); + GET_FEATURE_ID(aa64_sve2_aes, ARM_HWCAP2_A64_SVEAES); + GET_FEATURE_ID(aa64_sve2_pmull128, ARM_HWCAP2_A64_SVEPMULL); + GET_FEATURE_ID(aa64_sve2_bitperm, ARM_HWCAP2_A64_SVEBITPERM); + GET_FEATURE_ID(aa64_sve2_sha3, ARM_HWCAP2_A64_SVESHA3); + GET_FEATURE_ID(aa64_sve2_sm4, ARM_HWCAP2_A64_SVESM4); + GET_FEATURE_ID(aa64_condm_5, ARM_HWCAP2_A64_FLAGM2); + GET_FEATURE_ID(aa64_frint, ARM_HWCAP2_A64_FRINT); + GET_FEATURE_ID(aa64_sve_i8mm, ARM_HWCAP2_A64_SVEI8MM); + GET_FEATURE_ID(aa64_sve_f32mm, ARM_HWCAP2_A64_SVEF32MM); + GET_FEATURE_ID(aa64_sve_f64mm, ARM_HWCAP2_A64_SVEF64MM); + GET_FEATURE_ID(aa64_sve_bf16, ARM_HWCAP2_A64_SVEBF16); + GET_FEATURE_ID(aa64_i8mm, ARM_HWCAP2_A64_I8MM); + GET_FEATURE_ID(aa64_bf16, ARM_HWCAP2_A64_BF16); + GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG); + GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI); + GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE); + GET_FEATURE_ID(aa64_mte3, ARM_HWCAP2_A64_MTE3); + GET_FEATURE_ID(aa64_sme, (ARM_HWCAP2_A64_SME | + ARM_HWCAP2_A64_SME_F32F32 | + ARM_HWCAP2_A64_SME_B16F32 | + ARM_HWCAP2_A64_SME_F16F32 | + ARM_HWCAP2_A64_SME_I8I32)); + GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64); + GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64); + GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64); + GET_FEATURE_ID(aa64_hbc, ARM_HWCAP2_A64_HBC); + GET_FEATURE_ID(aa64_mops, ARM_HWCAP2_A64_MOPS); return hwcaps; } +const char *elf_hwcap_str(uint32_t bit) +{ + static const char *hwcap_str[] = { + [__builtin_ctz(ARM_HWCAP_A64_FP )] = "fp", + [__builtin_ctz(ARM_HWCAP_A64_ASIMD )] = "asimd", + [__builtin_ctz(ARM_HWCAP_A64_EVTSTRM )] = "evtstrm", + [__builtin_ctz(ARM_HWCAP_A64_AES )] = "aes", + [__builtin_ctz(ARM_HWCAP_A64_PMULL )] = "pmull", + [__builtin_ctz(ARM_HWCAP_A64_SHA1 )] = "sha1", + [__builtin_ctz(ARM_HWCAP_A64_SHA2 )] = "sha2", + [__builtin_ctz(ARM_HWCAP_A64_CRC32 )] = "crc32", + [__builtin_ctz(ARM_HWCAP_A64_ATOMICS )] = "atomics", + [__builtin_ctz(ARM_HWCAP_A64_FPHP )] = "fphp", + [__builtin_ctz(ARM_HWCAP_A64_ASIMDHP )] = "asimdhp", + [__builtin_ctz(ARM_HWCAP_A64_CPUID )] = "cpuid", + [__builtin_ctz(ARM_HWCAP_A64_ASIMDRDM)] = "asimdrdm", + [__builtin_ctz(ARM_HWCAP_A64_JSCVT )] = "jscvt", + [__builtin_ctz(ARM_HWCAP_A64_FCMA )] = "fcma", + [__builtin_ctz(ARM_HWCAP_A64_LRCPC )] = "lrcpc", + [__builtin_ctz(ARM_HWCAP_A64_DCPOP )] = "dcpop", + [__builtin_ctz(ARM_HWCAP_A64_SHA3 )] = "sha3", + [__builtin_ctz(ARM_HWCAP_A64_SM3 )] = "sm3", + [__builtin_ctz(ARM_HWCAP_A64_SM4 )] = "sm4", + [__builtin_ctz(ARM_HWCAP_A64_ASIMDDP )] = "asimddp", + [__builtin_ctz(ARM_HWCAP_A64_SHA512 )] = "sha512", + [__builtin_ctz(ARM_HWCAP_A64_SVE )] = "sve", + [__builtin_ctz(ARM_HWCAP_A64_ASIMDFHM)] = "asimdfhm", + [__builtin_ctz(ARM_HWCAP_A64_DIT )] = "dit", + [__builtin_ctz(ARM_HWCAP_A64_USCAT )] = "uscat", + [__builtin_ctz(ARM_HWCAP_A64_ILRCPC )] = "ilrcpc", + [__builtin_ctz(ARM_HWCAP_A64_FLAGM )] = "flagm", + [__builtin_ctz(ARM_HWCAP_A64_SSBS )] = "ssbs", + [__builtin_ctz(ARM_HWCAP_A64_SB )] = "sb", + [__builtin_ctz(ARM_HWCAP_A64_PACA )] = "paca", + [__builtin_ctz(ARM_HWCAP_A64_PACG )] = "pacg", + }; + + return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL; +} + +const char *elf_hwcap2_str(uint32_t bit) +{ + static const char *hwcap_str[] = { + [__builtin_ctz(ARM_HWCAP2_A64_DCPODP )] = "dcpodp", + [__builtin_ctz(ARM_HWCAP2_A64_SVE2 )] = "sve2", + [__builtin_ctz(ARM_HWCAP2_A64_SVEAES )] = "sveaes", + [__builtin_ctz(ARM_HWCAP2_A64_SVEPMULL )] = "svepmull", + [__builtin_ctz(ARM_HWCAP2_A64_SVEBITPERM )] = "svebitperm", + [__builtin_ctz(ARM_HWCAP2_A64_SVESHA3 )] = "svesha3", + [__builtin_ctz(ARM_HWCAP2_A64_SVESM4 )] = "svesm4", + [__builtin_ctz(ARM_HWCAP2_A64_FLAGM2 )] = "flagm2", + [__builtin_ctz(ARM_HWCAP2_A64_FRINT )] = "frint", + [__builtin_ctz(ARM_HWCAP2_A64_SVEI8MM )] = "svei8mm", + [__builtin_ctz(ARM_HWCAP2_A64_SVEF32MM )] = "svef32mm", + [__builtin_ctz(ARM_HWCAP2_A64_SVEF64MM )] = "svef64mm", + [__builtin_ctz(ARM_HWCAP2_A64_SVEBF16 )] = "svebf16", + [__builtin_ctz(ARM_HWCAP2_A64_I8MM )] = "i8mm", + [__builtin_ctz(ARM_HWCAP2_A64_BF16 )] = "bf16", + [__builtin_ctz(ARM_HWCAP2_A64_DGH )] = "dgh", + [__builtin_ctz(ARM_HWCAP2_A64_RNG )] = "rng", + [__builtin_ctz(ARM_HWCAP2_A64_BTI )] = "bti", + [__builtin_ctz(ARM_HWCAP2_A64_MTE )] = "mte", + [__builtin_ctz(ARM_HWCAP2_A64_ECV )] = "ecv", + [__builtin_ctz(ARM_HWCAP2_A64_AFP )] = "afp", + [__builtin_ctz(ARM_HWCAP2_A64_RPRES )] = "rpres", + [__builtin_ctz(ARM_HWCAP2_A64_MTE3 )] = "mte3", + [__builtin_ctz(ARM_HWCAP2_A64_SME )] = "sme", + [__builtin_ctz(ARM_HWCAP2_A64_SME_I16I64 )] = "smei16i64", + [__builtin_ctz(ARM_HWCAP2_A64_SME_F64F64 )] = "smef64f64", + [__builtin_ctz(ARM_HWCAP2_A64_SME_I8I32 )] = "smei8i32", + [__builtin_ctz(ARM_HWCAP2_A64_SME_F16F32 )] = "smef16f32", + [__builtin_ctz(ARM_HWCAP2_A64_SME_B16F32 )] = "smeb16f32", + [__builtin_ctz(ARM_HWCAP2_A64_SME_F32F32 )] = "smef32f32", + [__builtin_ctz(ARM_HWCAP2_A64_SME_FA64 )] = "smefa64", + [__builtin_ctz(ARM_HWCAP2_A64_WFXT )] = "wfxt", + [__builtin_ctzll(ARM_HWCAP2_A64_EBF16 )] = "ebf16", + [__builtin_ctzll(ARM_HWCAP2_A64_SVE_EBF16 )] = "sveebf16", + [__builtin_ctzll(ARM_HWCAP2_A64_CSSC )] = "cssc", + [__builtin_ctzll(ARM_HWCAP2_A64_RPRFM )] = "rprfm", + [__builtin_ctzll(ARM_HWCAP2_A64_SVE2P1 )] = "sve2p1", + [__builtin_ctzll(ARM_HWCAP2_A64_SME2 )] = "sme2", + [__builtin_ctzll(ARM_HWCAP2_A64_SME2P1 )] = "sme2p1", + [__builtin_ctzll(ARM_HWCAP2_A64_SME_I16I32 )] = "smei16i32", + [__builtin_ctzll(ARM_HWCAP2_A64_SME_BI32I32)] = "smebi32i32", + [__builtin_ctzll(ARM_HWCAP2_A64_SME_B16B16 )] = "smeb16b16", + [__builtin_ctzll(ARM_HWCAP2_A64_SME_F16F16 )] = "smef16f16", + [__builtin_ctzll(ARM_HWCAP2_A64_MOPS )] = "mops", + [__builtin_ctzll(ARM_HWCAP2_A64_HBC )] = "hbc", + }; + + return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL; +} + +#undef GET_FEATURE_ID + #endif /* not TARGET_AARCH64 */ + +#if TARGET_BIG_ENDIAN +# define VDSO_HEADER "vdso-be.c.inc" +#else +# define VDSO_HEADER "vdso-le.c.inc" +#endif + #endif /* TARGET_ARM */ #ifdef TARGET_SPARC #ifdef TARGET_SPARC64 -#define ELF_START_MMAP 0x80000000 #define ELF_HWCAP (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | HWCAP_SPARC_SWAP \ | HWCAP_SPARC_MULDIV | HWCAP_SPARC_V9) #ifndef TARGET_ABI32 @@ -614,55 +980,30 @@ static uint32_t get_elf_hwcap(void) #define ELF_CLASS ELFCLASS64 #define ELF_ARCH EM_SPARCV9 - -#define STACK_BIAS 2047 - -static inline void init_thread(struct target_pt_regs *regs, - struct image_info *infop) -{ -#ifndef TARGET_ABI32 - regs->tstate = 0; -#endif - regs->pc = infop->entry; - regs->npc = regs->pc + 4; - regs->y = 0; -#ifdef TARGET_ABI32 - regs->u_regs[14] = infop->start_stack - 16 * 4; #else - if (personality(infop->personality) == PER_LINUX32) - regs->u_regs[14] = infop->start_stack - 16 * 4; - else - regs->u_regs[14] = infop->start_stack - 16 * 8 - STACK_BIAS; -#endif -} - -#else -#define ELF_START_MMAP 0x80000000 #define ELF_HWCAP (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | HWCAP_SPARC_SWAP \ | HWCAP_SPARC_MULDIV) - #define ELF_CLASS ELFCLASS32 #define ELF_ARCH EM_SPARC +#endif /* TARGET_SPARC64 */ static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) { - regs->psr = 0; + /* Note that target_cpu_copy_regs does not read psr/tstate. */ regs->pc = infop->entry; regs->npc = regs->pc + 4; regs->y = 0; - regs->u_regs[14] = infop->start_stack - 16 * 4; + regs->u_regs[14] = (infop->start_stack - 16 * sizeof(abi_ulong) + - TARGET_STACK_BIAS); } - -#endif -#endif +#endif /* TARGET_SPARC */ #ifdef TARGET_PPC #define ELF_MACHINE PPC_ELF_MACHINE -#define ELF_START_MMAP 0x80000000 -#if defined(TARGET_PPC64) && !defined(TARGET_ABI32) +#if defined(TARGET_PPC64) #define elf_check_arch(x) ( (x) == EM_PPC64 ) @@ -671,6 +1012,7 @@ static inline void init_thread(struct target_pt_regs *regs, #else #define ELF_CLASS ELFCLASS32 +#define EXSTACK_DEFAULT true #endif @@ -716,7 +1058,15 @@ enum { QEMU_PPC_FEATURE2_HAS_EBB = 0x10000000, /* Event Base Branching */ QEMU_PPC_FEATURE2_HAS_ISEL = 0x08000000, /* Integer Select */ QEMU_PPC_FEATURE2_HAS_TAR = 0x04000000, /* Target Address Register */ + QEMU_PPC_FEATURE2_VEC_CRYPTO = 0x02000000, + QEMU_PPC_FEATURE2_HTM_NOSC = 0x01000000, QEMU_PPC_FEATURE2_ARCH_3_00 = 0x00800000, /* ISA 3.00 */ + QEMU_PPC_FEATURE2_HAS_IEEE128 = 0x00400000, /* VSX IEEE Bin Float 128-bit */ + QEMU_PPC_FEATURE2_DARN = 0x00200000, /* darn random number insn */ + QEMU_PPC_FEATURE2_SCV = 0x00100000, /* scv syscall */ + QEMU_PPC_FEATURE2_HTM_NO_SUSPEND = 0x00080000, /* TM w/o suspended state */ + QEMU_PPC_FEATURE2_ARCH_3_1 = 0x00040000, /* ISA 3.1 */ + QEMU_PPC_FEATURE2_MMA = 0x00020000, /* Matrix-Multiply Assist */ }; #define ELF_HWCAP get_elf_hwcap() @@ -770,8 +1120,12 @@ static uint32_t get_elf_hwcap2(void) GET_FEATURE(PPC_ISEL, QEMU_PPC_FEATURE2_HAS_ISEL); GET_FEATURE2(PPC2_BCTAR_ISA207, QEMU_PPC_FEATURE2_HAS_TAR); GET_FEATURE2((PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 | - PPC2_ISA207S), QEMU_PPC_FEATURE2_ARCH_2_07); - GET_FEATURE2(PPC2_ISA300, QEMU_PPC_FEATURE2_ARCH_3_00); + PPC2_ISA207S), QEMU_PPC_FEATURE2_ARCH_2_07 | + QEMU_PPC_FEATURE2_VEC_CRYPTO); + GET_FEATURE2(PPC2_ISA300, QEMU_PPC_FEATURE2_ARCH_3_00 | + QEMU_PPC_FEATURE2_DARN | QEMU_PPC_FEATURE2_HAS_IEEE128); + GET_FEATURE2(PPC2_ISA310, QEMU_PPC_FEATURE2_ARCH_3_1 | + QEMU_PPC_FEATURE2_MMA); #undef GET_FEATURE #undef GET_FEATURE2 @@ -806,7 +1160,7 @@ static uint32_t get_elf_hwcap2(void) static inline void init_thread(struct target_pt_regs *_regs, struct image_info *infop) { _regs->gpr[1] = infop->start_stack; -#if defined(TARGET_PPC64) && !defined(TARGET_ABI32) +#if defined(TARGET_PPC64) if (get_ppc64_abi(infop) < 2) { uint64_t val; get_user_u64(val, infop->entry + 8); @@ -837,22 +1191,126 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en (*regs)[33] = tswapreg(env->msr); (*regs)[35] = tswapreg(env->ctr); (*regs)[36] = tswapreg(env->lr); - (*regs)[37] = tswapreg(env->xer); + (*regs)[37] = tswapreg(cpu_read_xer(env)); - for (i = 0; i < ARRAY_SIZE(env->crf); i++) { - ccr |= env->crf[i] << (32 - ((i + 1) * 4)); - } + ccr = ppc_get_cr(env); (*regs)[38] = tswapreg(ccr); } #define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 +#ifndef TARGET_PPC64 +# define VDSO_HEADER "vdso-32.c.inc" +#elif TARGET_BIG_ENDIAN +# define VDSO_HEADER "vdso-64.c.inc" +#else +# define VDSO_HEADER "vdso-64le.c.inc" #endif -#ifdef TARGET_MIPS +#endif + +#ifdef TARGET_LOONGARCH64 + +#define ELF_CLASS ELFCLASS64 +#define ELF_ARCH EM_LOONGARCH +#define EXSTACK_DEFAULT true + +#define elf_check_arch(x) ((x) == EM_LOONGARCH) + +#define VDSO_HEADER "vdso.c.inc" + +static inline void init_thread(struct target_pt_regs *regs, + struct image_info *infop) +{ + /*Set crmd PG,DA = 1,0 */ + regs->csr.crmd = 2 << 3; + regs->csr.era = infop->entry; + regs->regs[3] = infop->start_stack; +} + +/* See linux kernel: arch/loongarch/include/asm/elf.h */ +#define ELF_NREG 45 +typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; + +enum { + TARGET_EF_R0 = 0, + TARGET_EF_CSR_ERA = TARGET_EF_R0 + 33, + TARGET_EF_CSR_BADV = TARGET_EF_R0 + 34, +}; -#define ELF_START_MMAP 0x80000000 +static void elf_core_copy_regs(target_elf_gregset_t *regs, + const CPULoongArchState *env) +{ + int i; + + (*regs)[TARGET_EF_R0] = 0; + + for (i = 1; i < ARRAY_SIZE(env->gpr); i++) { + (*regs)[TARGET_EF_R0 + i] = tswapreg(env->gpr[i]); + } + + (*regs)[TARGET_EF_CSR_ERA] = tswapreg(env->pc); + (*regs)[TARGET_EF_CSR_BADV] = tswapreg(env->CSR_BADV); +} + +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + +#define ELF_HWCAP get_elf_hwcap() + +/* See arch/loongarch/include/uapi/asm/hwcap.h */ +enum { + HWCAP_LOONGARCH_CPUCFG = (1 << 0), + HWCAP_LOONGARCH_LAM = (1 << 1), + HWCAP_LOONGARCH_UAL = (1 << 2), + HWCAP_LOONGARCH_FPU = (1 << 3), + HWCAP_LOONGARCH_LSX = (1 << 4), + HWCAP_LOONGARCH_LASX = (1 << 5), + HWCAP_LOONGARCH_CRC32 = (1 << 6), + HWCAP_LOONGARCH_COMPLEX = (1 << 7), + HWCAP_LOONGARCH_CRYPTO = (1 << 8), + HWCAP_LOONGARCH_LVZ = (1 << 9), + HWCAP_LOONGARCH_LBT_X86 = (1 << 10), + HWCAP_LOONGARCH_LBT_ARM = (1 << 11), + HWCAP_LOONGARCH_LBT_MIPS = (1 << 12), +}; + +static uint32_t get_elf_hwcap(void) +{ + LoongArchCPU *cpu = LOONGARCH_CPU(thread_cpu); + uint32_t hwcaps = 0; + + hwcaps |= HWCAP_LOONGARCH_CRC32; + + if (FIELD_EX32(cpu->env.cpucfg[1], CPUCFG1, UAL)) { + hwcaps |= HWCAP_LOONGARCH_UAL; + } + + if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, FP)) { + hwcaps |= HWCAP_LOONGARCH_FPU; + } + + if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LAM)) { + hwcaps |= HWCAP_LOONGARCH_LAM; + } + + if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LSX)) { + hwcaps |= HWCAP_LOONGARCH_LSX; + } + + if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LASX)) { + hwcaps |= HWCAP_LOONGARCH_LASX; + } + + return hwcaps; +} + +#define ELF_PLATFORM "loongarch" + +#endif /* TARGET_LOONGARCH64 */ + +#ifdef TARGET_MIPS #ifdef TARGET_MIPS64 #define ELF_CLASS ELFCLASS64 @@ -860,8 +1318,44 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUPPCState *en #define ELF_CLASS ELFCLASS32 #endif #define ELF_ARCH EM_MIPS +#define EXSTACK_DEFAULT true + +#ifdef TARGET_ABI_MIPSN32 +#define elf_check_abi(x) ((x) & EF_MIPS_ABI2) +#else +#define elf_check_abi(x) (!((x) & EF_MIPS_ABI2)) +#endif + +#define ELF_BASE_PLATFORM get_elf_base_platform() -#define elf_check_arch(x) ((x) == EM_MIPS || (x) == EM_NANOMIPS) +#define MATCH_PLATFORM_INSN(_flags, _base_platform) \ + do { if ((cpu->env.insn_flags & (_flags)) == _flags) \ + { return _base_platform; } } while (0) + +static const char *get_elf_base_platform(void) +{ + MIPSCPU *cpu = MIPS_CPU(thread_cpu); + + /* 64 bit ISAs goes first */ + MATCH_PLATFORM_INSN(CPU_MIPS64R6, "mips64r6"); + MATCH_PLATFORM_INSN(CPU_MIPS64R5, "mips64r5"); + MATCH_PLATFORM_INSN(CPU_MIPS64R2, "mips64r2"); + MATCH_PLATFORM_INSN(CPU_MIPS64R1, "mips64"); + MATCH_PLATFORM_INSN(CPU_MIPS5, "mips5"); + MATCH_PLATFORM_INSN(CPU_MIPS4, "mips4"); + MATCH_PLATFORM_INSN(CPU_MIPS3, "mips3"); + + /* 32 bit ISAs */ + MATCH_PLATFORM_INSN(CPU_MIPS32R6, "mips32r6"); + MATCH_PLATFORM_INSN(CPU_MIPS32R5, "mips32r5"); + MATCH_PLATFORM_INSN(CPU_MIPS32R2, "mips32r2"); + MATCH_PLATFORM_INSN(CPU_MIPS32R1, "mips32"); + MATCH_PLATFORM_INSN(CPU_MIPS2, "mips2"); + + /* Fallback */ + return "mips"; +} +#undef MATCH_PLATFORM_INSN static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) @@ -923,32 +1417,58 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUMIPSState *e enum { HWCAP_MIPS_R6 = (1 << 0), HWCAP_MIPS_MSA = (1 << 1), + HWCAP_MIPS_CRC32 = (1 << 2), + HWCAP_MIPS_MIPS16 = (1 << 3), + HWCAP_MIPS_MDMX = (1 << 4), + HWCAP_MIPS_MIPS3D = (1 << 5), + HWCAP_MIPS_SMARTMIPS = (1 << 6), + HWCAP_MIPS_DSP = (1 << 7), + HWCAP_MIPS_DSP2 = (1 << 8), + HWCAP_MIPS_DSP3 = (1 << 9), + HWCAP_MIPS_MIPS16E2 = (1 << 10), + HWCAP_LOONGSON_MMI = (1 << 11), + HWCAP_LOONGSON_EXT = (1 << 12), + HWCAP_LOONGSON_EXT2 = (1 << 13), + HWCAP_LOONGSON_CPUCFG = (1 << 14), }; #define ELF_HWCAP get_elf_hwcap() +#define GET_FEATURE_INSN(_flag, _hwcap) \ + do { if (cpu->env.insn_flags & (_flag)) { hwcaps |= _hwcap; } } while (0) + +#define GET_FEATURE_REG_SET(_reg, _mask, _hwcap) \ + do { if (cpu->env._reg & (_mask)) { hwcaps |= _hwcap; } } while (0) + +#define GET_FEATURE_REG_EQU(_reg, _start, _length, _val, _hwcap) \ + do { \ + if (extract32(cpu->env._reg, (_start), (_length)) == (_val)) { \ + hwcaps |= _hwcap; \ + } \ + } while (0) + static uint32_t get_elf_hwcap(void) { MIPSCPU *cpu = MIPS_CPU(thread_cpu); uint32_t hwcaps = 0; -#define GET_FEATURE(flag, hwcap) \ - do { if (cpu->env.insn_flags & (flag)) { hwcaps |= hwcap; } } while (0) - - GET_FEATURE(ISA_MIPS32R6 | ISA_MIPS64R6, HWCAP_MIPS_R6); - GET_FEATURE(ASE_MSA, HWCAP_MIPS_MSA); - -#undef GET_FEATURE + GET_FEATURE_REG_EQU(CP0_Config0, CP0C0_AR, CP0C0_AR_LENGTH, + 2, HWCAP_MIPS_R6); + GET_FEATURE_REG_SET(CP0_Config3, 1 << CP0C3_MSAP, HWCAP_MIPS_MSA); + GET_FEATURE_INSN(ASE_LMMI, HWCAP_LOONGSON_MMI); + GET_FEATURE_INSN(ASE_LEXT, HWCAP_LOONGSON_EXT); return hwcaps; } +#undef GET_FEATURE_REG_EQU +#undef GET_FEATURE_REG_SET +#undef GET_FEATURE_INSN + #endif /* TARGET_MIPS */ #ifdef TARGET_MICROBLAZE -#define ELF_START_MMAP 0x80000000 - #define elf_check_arch(x) ( (x) == EM_MICROBLAZE || (x) == EM_MICROBLAZE_OLD) #define ELF_CLASS ELFCLASS32 @@ -977,74 +1497,18 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUMBState *env (*regs)[pos++] = tswapreg(env->regs[i]); } - for (i = 0; i < 6; i++) { - (*regs)[pos++] = tswapreg(env->sregs[i]); - } + (*regs)[pos++] = tswapreg(env->pc); + (*regs)[pos++] = tswapreg(mb_cpu_read_msr(env)); + (*regs)[pos++] = 0; + (*regs)[pos++] = tswapreg(env->ear); + (*regs)[pos++] = 0; + (*regs)[pos++] = tswapreg(env->esr); } #endif /* TARGET_MICROBLAZE */ -#ifdef TARGET_NIOS2 - -#define ELF_START_MMAP 0x80000000 - -#define elf_check_arch(x) ((x) == EM_ALTERA_NIOS2) - -#define ELF_CLASS ELFCLASS32 -#define ELF_ARCH EM_ALTERA_NIOS2 - -static void init_thread(struct target_pt_regs *regs, struct image_info *infop) -{ - regs->ea = infop->entry; - regs->sp = infop->start_stack; - regs->estatus = 0x3; -} - -#define ELF_EXEC_PAGESIZE 4096 - -#define USE_ELF_CORE_DUMP -#define ELF_NREG 49 -typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; - -/* See linux kernel: arch/mips/kernel/process.c:elf_dump_regs. */ -static void elf_core_copy_regs(target_elf_gregset_t *regs, - const CPUNios2State *env) -{ - int i; - - (*regs)[0] = -1; - for (i = 1; i < 8; i++) /* r0-r7 */ - (*regs)[i] = tswapreg(env->regs[i + 7]); - - for (i = 8; i < 16; i++) /* r8-r15 */ - (*regs)[i] = tswapreg(env->regs[i - 8]); - - for (i = 16; i < 24; i++) /* r16-r23 */ - (*regs)[i] = tswapreg(env->regs[i + 7]); - (*regs)[24] = -1; /* R_ET */ - (*regs)[25] = -1; /* R_BT */ - (*regs)[26] = tswapreg(env->regs[R_GP]); - (*regs)[27] = tswapreg(env->regs[R_SP]); - (*regs)[28] = tswapreg(env->regs[R_FP]); - (*regs)[29] = tswapreg(env->regs[R_EA]); - (*regs)[30] = -1; /* R_SSTATUS */ - (*regs)[31] = tswapreg(env->regs[R_RA]); - - (*regs)[32] = tswapreg(env->regs[R_PC]); - - (*regs)[33] = -1; /* R_STATUS */ - (*regs)[34] = tswapreg(env->regs[CR_ESTATUS]); - - for (i = 35; i < 49; i++) /* ... */ - (*regs)[i] = -1; -} - -#endif /* TARGET_NIOS2 */ - #ifdef TARGET_OPENRISC -#define ELF_START_MMAP 0x08000000 - #define ELF_ARCH EM_OPENRISC #define ELF_CLASS ELFCLASS32 #define ELF_DATA ELFDATA2MSB @@ -1081,8 +1545,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, #ifdef TARGET_SH4 -#define ELF_START_MMAP 0x80000000 - #define ELF_CLASS ELFCLASS32 #define ELF_ARCH EM_SH @@ -1163,8 +1625,6 @@ static uint32_t get_elf_hwcap(void) #ifdef TARGET_CRIS -#define ELF_START_MMAP 0x80000000 - #define ELF_CLASS ELFCLASS32 #define ELF_ARCH EM_CRIS @@ -1180,8 +1640,6 @@ static inline void init_thread(struct target_pt_regs *regs, #ifdef TARGET_M68K -#define ELF_START_MMAP 0x80000000 - #define ELF_CLASS ELFCLASS32 #define ELF_ARCH EM_68K @@ -1231,8 +1689,6 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUM68KState *e #ifdef TARGET_ALPHA -#define ELF_START_MMAP (0x30000000000ULL) - #define ELF_CLASS ELFCLASS64 #define ELF_ARCH EM_ALPHA @@ -1250,55 +1706,142 @@ static inline void init_thread(struct target_pt_regs *regs, #ifdef TARGET_S390X -#define ELF_START_MMAP (0x20000000000ULL) - #define ELF_CLASS ELFCLASS64 #define ELF_DATA ELFDATA2MSB #define ELF_ARCH EM_S390 +#include "elf.h" + +#define ELF_HWCAP get_elf_hwcap() + +#define GET_FEATURE(_feat, _hwcap) \ + do { if (s390_has_feat(_feat)) { hwcap |= _hwcap; } } while (0) + +uint32_t get_elf_hwcap(void) +{ + /* + * Let's assume we always have esan3 and zarch. + * 31-bit processes can use 64-bit registers (high gprs). + */ + uint32_t hwcap = HWCAP_S390_ESAN3 | HWCAP_S390_ZARCH | HWCAP_S390_HIGH_GPRS; + + GET_FEATURE(S390_FEAT_STFLE, HWCAP_S390_STFLE); + GET_FEATURE(S390_FEAT_MSA, HWCAP_S390_MSA); + GET_FEATURE(S390_FEAT_LONG_DISPLACEMENT, HWCAP_S390_LDISP); + GET_FEATURE(S390_FEAT_EXTENDED_IMMEDIATE, HWCAP_S390_EIMM); + if (s390_has_feat(S390_FEAT_EXTENDED_TRANSLATION_3) && + s390_has_feat(S390_FEAT_ETF3_ENH)) { + hwcap |= HWCAP_S390_ETF3EH; + } + GET_FEATURE(S390_FEAT_VECTOR, HWCAP_S390_VXRS); + GET_FEATURE(S390_FEAT_VECTOR_ENH, HWCAP_S390_VXRS_EXT); + GET_FEATURE(S390_FEAT_VECTOR_ENH2, HWCAP_S390_VXRS_EXT2); + + return hwcap; +} + +const char *elf_hwcap_str(uint32_t bit) +{ + static const char *hwcap_str[] = { + [HWCAP_S390_NR_ESAN3] = "esan3", + [HWCAP_S390_NR_ZARCH] = "zarch", + [HWCAP_S390_NR_STFLE] = "stfle", + [HWCAP_S390_NR_MSA] = "msa", + [HWCAP_S390_NR_LDISP] = "ldisp", + [HWCAP_S390_NR_EIMM] = "eimm", + [HWCAP_S390_NR_DFP] = "dfp", + [HWCAP_S390_NR_HPAGE] = "edat", + [HWCAP_S390_NR_ETF3EH] = "etf3eh", + [HWCAP_S390_NR_HIGH_GPRS] = "highgprs", + [HWCAP_S390_NR_TE] = "te", + [HWCAP_S390_NR_VXRS] = "vx", + [HWCAP_S390_NR_VXRS_BCD] = "vxd", + [HWCAP_S390_NR_VXRS_EXT] = "vxe", + [HWCAP_S390_NR_GS] = "gs", + [HWCAP_S390_NR_VXRS_EXT2] = "vxe2", + [HWCAP_S390_NR_VXRS_PDE] = "vxp", + [HWCAP_S390_NR_SORT] = "sort", + [HWCAP_S390_NR_DFLT] = "dflt", + [HWCAP_S390_NR_NNPA] = "nnpa", + [HWCAP_S390_NR_PCI_MIO] = "pcimio", + [HWCAP_S390_NR_SIE] = "sie", + }; + + return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL; +} + static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) { regs->psw.addr = infop->entry; - regs->psw.mask = PSW_MASK_64 | PSW_MASK_32; + regs->psw.mask = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \ + PSW_MASK_MCHECK | PSW_MASK_PSTATE | PSW_MASK_64 | \ + PSW_MASK_32; regs->gprs[15] = infop->start_stack; } -#endif /* TARGET_S390X */ - -#ifdef TARGET_TILEGX - -/* 42 bits real used address, a half for user mode */ -#define ELF_START_MMAP (0x00000020000000000ULL) - -#define elf_check_arch(x) ((x) == EM_TILEGX) +/* See linux kernel: arch/s390/include/uapi/asm/ptrace.h (s390_regs). */ +#define ELF_NREG 27 +typedef target_elf_greg_t target_elf_gregset_t[ELF_NREG]; -#define ELF_CLASS ELFCLASS64 -#define ELF_DATA ELFDATA2LSB -#define ELF_ARCH EM_TILEGX +enum { + TARGET_REG_PSWM = 0, + TARGET_REG_PSWA = 1, + TARGET_REG_GPRS = 2, + TARGET_REG_ARS = 18, + TARGET_REG_ORIG_R2 = 26, +}; -static inline void init_thread(struct target_pt_regs *regs, - struct image_info *infop) +static void elf_core_copy_regs(target_elf_gregset_t *regs, + const CPUS390XState *env) { - regs->pc = infop->entry; - regs->sp = infop->start_stack; + int i; + uint32_t *aregs; + (*regs)[TARGET_REG_PSWM] = tswapreg(env->psw.mask); + (*regs)[TARGET_REG_PSWA] = tswapreg(env->psw.addr); + for (i = 0; i < 16; i++) { + (*regs)[TARGET_REG_GPRS + i] = tswapreg(env->regs[i]); + } + aregs = (uint32_t *)&((*regs)[TARGET_REG_ARS]); + for (i = 0; i < 16; i++) { + aregs[i] = tswap32(env->aregs[i]); + } + (*regs)[TARGET_REG_ORIG_R2] = 0; } -#define ELF_EXEC_PAGESIZE 65536 /* TILE-Gx page size is 64KB */ +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + +#define VDSO_HEADER "vdso.c.inc" -#endif /* TARGET_TILEGX */ +#endif /* TARGET_S390X */ #ifdef TARGET_RISCV -#define ELF_START_MMAP 0x80000000 #define ELF_ARCH EM_RISCV #ifdef TARGET_RISCV32 #define ELF_CLASS ELFCLASS32 +#define VDSO_HEADER "vdso-32.c.inc" #else #define ELF_CLASS ELFCLASS64 +#define VDSO_HEADER "vdso-64.c.inc" #endif +#define ELF_HWCAP get_elf_hwcap() + +static uint32_t get_elf_hwcap(void) +{ +#define MISA_BIT(EXT) (1 << (EXT - 'A')) + RISCVCPU *cpu = RISCV_CPU(thread_cpu); + uint32_t mask = MISA_BIT('I') | MISA_BIT('M') | MISA_BIT('A') + | MISA_BIT('F') | MISA_BIT('D') | MISA_BIT('C') + | MISA_BIT('V'); + + return cpu->env.misa_ext & mask; +#undef MISA_BIT +} + static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) { @@ -1312,32 +1855,63 @@ static inline void init_thread(struct target_pt_regs *regs, #ifdef TARGET_HPPA -#define ELF_START_MMAP 0x80000000 #define ELF_CLASS ELFCLASS32 #define ELF_ARCH EM_PARISC #define ELF_PLATFORM "PARISC" #define STACK_GROWS_DOWN 0 #define STACK_ALIGNMENT 64 +#define VDSO_HEADER "vdso.c.inc" + static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop) { regs->iaoq[0] = infop->entry; regs->iaoq[1] = infop->entry + 4; regs->gr[23] = 0; - regs->gr[24] = infop->arg_start; - regs->gr[25] = (infop->arg_end - infop->arg_start) / sizeof(abi_ulong); + regs->gr[24] = infop->argv; + regs->gr[25] = infop->argc; /* The top-of-stack contains a linkage buffer. */ regs->gr[30] = infop->start_stack + 64; regs->gr[31] = infop->entry; } +#define LO_COMMPAGE 0 + +static bool init_guest_commpage(void) +{ + /* If reserved_va, then we have already mapped 0 page on the host. */ + if (!reserved_va) { + void *want, *addr; + + want = g2h_untagged(LO_COMMPAGE); + addr = mmap(want, TARGET_PAGE_SIZE, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED_NOREPLACE, -1, 0); + if (addr == MAP_FAILED) { + perror("Allocating guest commpage"); + exit(EXIT_FAILURE); + } + if (addr != want) { + return false; + } + } + + /* + * On Linux, page zero is normally marked execute only + gateway. + * Normal read or write is supposed to fail (thus PROT_NONE above), + * but specific offsets have kernel code mapped to raise permissions + * and implement syscalls. Here, simply mark the page executable. + * Special case the entry points during translation (see do_page_zero). + */ + page_set_flags(LO_COMMPAGE, LO_COMMPAGE | ~TARGET_PAGE_MASK, + PAGE_EXEC | PAGE_VALID); + return true; +} + #endif /* TARGET_HPPA */ #ifdef TARGET_XTENSA -#define ELF_START_MMAP 0x20000000 - #define ELF_CLASS ELFCLASS32 #define ELF_ARCH EM_XTENSA @@ -1348,6 +1922,15 @@ static inline void init_thread(struct target_pt_regs *regs, regs->windowstart = 1; regs->areg[1] = infop->start_stack; regs->pc = infop->entry; + if (info_is_fdpic(infop)) { + regs->areg[4] = infop->loadmap_addr; + regs->areg[5] = infop->interpreter_loadmap_addr; + if (infop->interpreter_loadmap_addr) { + regs->areg[6] = infop->interpreter_pt_dynamic_addr; + } else { + regs->areg[6] = infop->pt_dynamic_addr; + } + } } /* See linux kernel: arch/xtensa/include/asm/elf.h. */ @@ -1392,6 +1975,24 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, #endif /* TARGET_XTENSA */ +#ifdef TARGET_HEXAGON + +#define ELF_CLASS ELFCLASS32 +#define ELF_ARCH EM_HEXAGON + +static inline void init_thread(struct target_pt_regs *regs, + struct image_info *infop) +{ + regs->sepc = infop->entry; + regs->sp = infop->start_stack; +} + +#endif /* TARGET_HEXAGON */ + +#ifndef ELF_BASE_PLATFORM +#define ELF_BASE_PLATFORM (NULL) +#endif + #ifndef ELF_PLATFORM #define ELF_PLATFORM (NULL) #endif @@ -1404,6 +2005,10 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, #define elf_check_arch(x) ((x) == ELF_ARCH) #endif +#ifndef elf_check_abi +#define elf_check_abi(x) (1) +#endif + #ifndef ELF_HWCAP #define ELF_HWCAP 0 #endif @@ -1423,8 +2028,45 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, #define bswaptls(ptr) bswap32s(ptr) #endif +#ifndef EXSTACK_DEFAULT +#define EXSTACK_DEFAULT false +#endif + #include "elf.h" +/* We must delay the following stanzas until after "elf.h". */ +#if defined(TARGET_AARCH64) + +static bool arch_parse_elf_property(uint32_t pr_type, uint32_t pr_datasz, + const uint32_t *data, + struct image_info *info, + Error **errp) +{ + if (pr_type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) { + if (pr_datasz != sizeof(uint32_t)) { + error_setg(errp, "Ill-formed GNU_PROPERTY_AARCH64_FEATURE_1_AND"); + return false; + } + /* We will extract GNU_PROPERTY_AARCH64_FEATURE_1_BTI later. */ + info->note_flags = *data; + } + return true; +} +#define ARCH_USE_GNU_PROPERTY 1 + +#else + +static bool arch_parse_elf_property(uint32_t pr_type, uint32_t pr_datasz, + const uint32_t *data, + struct image_info *info, + Error **errp) +{ + g_assert_not_reached(); +} +#define ARCH_USE_GNU_PROPERTY 0 + +#endif + struct exec { unsigned int a_info; /* Use macros N_MAGIC, etc for access */ @@ -1444,16 +2086,7 @@ struct exec #define ZMAGIC 0413 #define QMAGIC 0314 -/* Necessary parameters */ -#define TARGET_ELF_EXEC_PAGESIZE \ - (((eppnt->p_align & ~qemu_host_page_mask) != 0) ? \ - TARGET_PAGE_SIZE : MAX(qemu_host_page_size, TARGET_PAGE_SIZE)) -#define TARGET_ELF_PAGELENGTH(_v) ROUND_UP((_v), TARGET_ELF_EXEC_PAGESIZE) -#define TARGET_ELF_PAGESTART(_v) ((_v) & \ - ~(abi_ulong)(TARGET_ELF_EXEC_PAGESIZE-1)) -#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE-1)) - -#define DLINFO_ITEMS 15 +#define DLINFO_ITEMS 16 static inline void memcpy_fromfs(void * to, const void * from, unsigned long n) { @@ -1541,7 +2174,8 @@ static inline void bswap_mips_abiflags(Mips_elf_abiflags_v0 *abiflags) { } #ifdef USE_ELF_CORE_DUMP static int elf_core_dump(int, const CPUArchState *); #endif /* USE_ELF_CORE_DUMP */ -static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias); +static void load_symbols(struct elfhdr *hdr, const ImageSource *src, + abi_ulong load_bias); /* Verify the portions of EHDR within E_IDENT for the target. This can be performed before bswapping the entire header. */ @@ -1561,6 +2195,7 @@ static bool elf_check_ident(struct elfhdr *ehdr) static bool elf_check_ehdr(struct elfhdr *ehdr) { return (elf_check_arch(ehdr->e_machine) + && elf_check_abi(ehdr->e_flags) && ehdr->e_ehsize == sizeof(struct elfhdr) && ehdr->e_phentsize == sizeof(struct elf_phdr) && (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN)); @@ -1664,17 +2299,28 @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm, struct image_info *info) { abi_ulong size, error, guard; + int prot; size = guest_stack_size; if (size < STACK_LOWER_LIMIT) { size = STACK_LOWER_LIMIT; } - guard = TARGET_PAGE_SIZE; - if (guard < qemu_real_host_page_size) { - guard = qemu_real_host_page_size; + + if (STACK_GROWS_DOWN) { + guard = TARGET_PAGE_SIZE; + if (guard < qemu_real_host_page_size()) { + guard = qemu_real_host_page_size(); + } + } else { + /* no guard page for hppa target where stack grows upwards. */ + guard = 0; } - error = target_mmap(0, size + guard, PROT_READ | PROT_WRITE, + prot = PROT_READ | PROT_WRITE; + if (info->exec_stack) { + prot |= PROT_EXEC; + } + error = target_mmap(0, size + guard, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (error == -1) { perror("mmap stack"); @@ -1687,59 +2333,81 @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm, info->stack_limit = error + guard; return info->stack_limit + size - sizeof(void *); } else { - target_mprotect(error + size, guard, PROT_NONE); info->stack_limit = error + size; return error; } } -/* Map and zero the bss. We need to explicitly zero any fractional pages - after the data section (i.e. bss). */ -static void zero_bss(abi_ulong elf_bss, abi_ulong last_bss, int prot) +/** + * zero_bss: + * + * Map and zero the bss. We need to explicitly zero any fractional pages + * after the data section (i.e. bss). Return false on mapping failure. + */ +static bool zero_bss(abi_ulong start_bss, abi_ulong end_bss, + int prot, Error **errp) { - uintptr_t host_start, host_map_start, host_end; + abi_ulong align_bss; - last_bss = TARGET_PAGE_ALIGN(last_bss); + /* We only expect writable bss; the code segment shouldn't need this. */ + if (!(prot & PROT_WRITE)) { + error_setg(errp, "PT_LOAD with non-writable bss"); + return false; + } - /* ??? There is confusion between qemu_real_host_page_size and - qemu_host_page_size here and elsewhere in target_mmap, which - may lead to the end of the data section mapping from the file - not being mapped. At least there was an explicit test and - comment for that here, suggesting that "the file size must - be known". The comment probably pre-dates the introduction - of the fstat system call in target_mmap which does in fact - find out the size. What isn't clear is if the workaround - here is still actually needed. For now, continue with it, - but merge it with the "normal" mmap that would allocate the bss. */ + align_bss = TARGET_PAGE_ALIGN(start_bss); + end_bss = TARGET_PAGE_ALIGN(end_bss); - host_start = (uintptr_t) g2h(elf_bss); - host_end = (uintptr_t) g2h(last_bss); - host_map_start = REAL_HOST_PAGE_ALIGN(host_start); + if (start_bss < align_bss) { + int flags = page_get_flags(start_bss); - if (host_map_start < host_end) { - void *p = mmap((void *)host_map_start, host_end - host_map_start, - prot, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (p == MAP_FAILED) { - perror("cannot mmap brk"); - exit(-1); - } - } + if (!(flags & PAGE_BITS)) { + /* + * The whole address space of the executable was reserved + * at the start, therefore all pages will be VALID. + * But assuming there are no PROT_NONE PT_LOAD segments, + * a PROT_NONE page means no data all bss, and we can + * simply extend the new anon mapping back to the start + * of the page of bss. + */ + align_bss -= TARGET_PAGE_SIZE; + } else { + /* + * The start of the bss shares a page with something. + * The only thing that we expect is the data section, + * which would already be marked writable. + * Overlapping the RX code segment seems malformed. + */ + if (!(flags & PAGE_WRITE)) { + error_setg(errp, "PT_LOAD with bss overlapping " + "non-writable page"); + return false; + } - /* Ensure that the bss page(s) are valid */ - if ((page_get_flags(last_bss-1) & prot) != prot) { - page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss, prot | PAGE_VALID); + /* The page is already mapped and writable. */ + memset(g2h_untagged(start_bss), 0, align_bss - start_bss); + } } - if (host_start < host_map_start) { - memset((void *)host_start, 0, host_map_start - host_start); + if (align_bss < end_bss && + target_mmap(align_bss, end_bss - align_bss, prot, + MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == -1) { + error_setg_errno(errp, errno, "Error mapping bss"); + return false; } + return true; } -#ifdef TARGET_ARM +#if defined(TARGET_ARM) static int elf_is_fdpic(struct elfhdr *exec) { return exec->e_ident[EI_OSABI] == ELFOSABI_ARM_FDPIC; } +#elif defined(TARGET_XTENSA) +static int elf_is_fdpic(struct elfhdr *exec) +{ + return exec->e_ident[EI_OSABI] == ELFOSABI_XTENSA_FDPIC; +} #else /* Default implementation, always false. */ static int elf_is_fdpic(struct elfhdr *exec) @@ -1776,7 +2444,8 @@ static abi_ulong loader_build_fdpic_loadmap(struct image_info *info, abi_ulong s static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, struct elfhdr *exec, struct image_info *info, - struct image_info *interp_info) + struct image_info *interp_info, + struct image_info *vdso_info) { abi_ulong sp; abi_ulong u_argc, u_argv, u_envp, u_auxv; @@ -1784,8 +2453,8 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, int i; abi_ulong u_rand_bytes; uint8_t k_rand_bytes[16]; - abi_ulong u_platform; - const char *k_platform; + abi_ulong u_platform, u_base_platform; + const char *k_platform, *k_base_platform; const int n = sizeof(elf_addr_t); sp = p; @@ -1807,6 +2476,22 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, } } + u_base_platform = 0; + k_base_platform = ELF_BASE_PLATFORM; + if (k_base_platform) { + size_t len = strlen(k_base_platform) + 1; + if (STACK_GROWS_DOWN) { + sp -= (len + n - 1) & ~(n - 1); + u_base_platform = sp; + /* FIXME - check return value of memcpy_to_target() for failure */ + memcpy_to_target(sp, k_base_platform, len); + } else { + memcpy_to_target(sp, k_base_platform, len); + u_base_platform = sp; + sp += len + 1; + } + } + u_platform = 0; k_platform = ELF_PLATFORM; if (k_platform) { @@ -1833,12 +2518,9 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, } /* - * Generate 16 random bytes for userspace PRNG seeding (not - * cryptically secure but it's not the aim of QEMU). + * Generate 16 random bytes for userspace PRNG seeding. */ - for (i = 0; i < 16; i++) { - k_rand_bytes[i] = rand(); - } + qemu_guest_getrandom_nofail(k_rand_bytes, sizeof(k_rand_bytes)); if (STACK_GROWS_DOWN) { sp -= 16; u_rand_bytes = sp; @@ -1851,8 +2533,15 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, } size = (DLINFO_ITEMS + 1) * 2; - if (k_platform) + if (k_base_platform) { + size += 2; + } + if (k_platform) { size += 2; + } + if (vdso_info) { + size += 2; + } #ifdef DLINFO_ARCH_ITEMS size += DLINFO_ARCH_ITEMS * 2; #endif @@ -1878,8 +2567,10 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, u_envp = u_argv + (argc + 1) * n; u_auxv = u_envp + (envc + 1) * n; info->saved_auxv = u_auxv; - info->arg_start = u_argv; - info->arg_end = u_argv + argc * n; + info->argc = argc; + info->envc = envc; + info->argv = u_argv; + info->envp = u_envp; /* This is correct because Linux defines * elf_addr_t as Elf32_Off / Elf64_Off @@ -1902,13 +2593,7 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, NEW_AUX_ENT(AT_PHDR, (abi_ulong)(info->load_addr + exec->e_phoff)); NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof (struct elf_phdr))); NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum)); - if ((info->alignment & ~qemu_host_page_mask) != 0) { - /* Target doesn't support host page size alignment */ - NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE)); - } else { - NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(MAX(TARGET_PAGE_SIZE, - qemu_host_page_size))); - } + NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE)); NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_info ? interp_info->load_addr : 0)); NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0); NEW_AUX_ENT(AT_ENTRY, info->entry); @@ -1920,14 +2605,21 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, NEW_AUX_ENT(AT_CLKTCK, (abi_ulong) sysconf(_SC_CLK_TCK)); NEW_AUX_ENT(AT_RANDOM, (abi_ulong) u_rand_bytes); NEW_AUX_ENT(AT_SECURE, (abi_ulong) qemu_getauxval(AT_SECURE)); + NEW_AUX_ENT(AT_EXECFN, info->file_string); #ifdef ELF_HWCAP2 NEW_AUX_ENT(AT_HWCAP2, (abi_ulong) ELF_HWCAP2); #endif + if (u_base_platform) { + NEW_AUX_ENT(AT_BASE_PLATFORM, u_base_platform); + } if (u_platform) { NEW_AUX_ENT(AT_PLATFORM, u_platform); } + if (vdso_info) { + NEW_AUX_ENT(AT_SYSINFO_EHDR, vdso_info->load_addr); + } NEW_AUX_ENT (AT_NULL, 0); #undef NEW_AUX_ENT @@ -1957,273 +2649,554 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, return sp; } -unsigned long init_guest_space(unsigned long host_start, - unsigned long host_size, - unsigned long guest_start, - bool fixed) +#if defined(HI_COMMPAGE) +#define LO_COMMPAGE -1 +#elif defined(LO_COMMPAGE) +#define HI_COMMPAGE 0 +#else +#define HI_COMMPAGE 0 +#define LO_COMMPAGE -1 +#ifndef INIT_GUEST_COMMPAGE +#define init_guest_commpage() true +#endif +#endif + +/** + * pgb_try_mmap: + * @addr: host start address + * @addr_last: host last address + * @keep: do not unmap the probe region + * + * Return 1 if [@addr, @addr_last] is not mapped in the host, + * return 0 if it is not available to map, and -1 on mmap error. + * If @keep, the region is left mapped on success, otherwise unmapped. + */ +static int pgb_try_mmap(uintptr_t addr, uintptr_t addr_last, bool keep) { - unsigned long current_start, aligned_start; - int flags; + size_t size = addr_last - addr + 1; + void *p = mmap((void *)addr, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | + MAP_NORESERVE | MAP_FIXED_NOREPLACE, -1, 0); + int ret; - assert(host_start || host_size); + if (p == MAP_FAILED) { + return errno == EEXIST ? 0 : -1; + } + ret = p == (void *)addr; + if (!keep || !ret) { + munmap(p, size); + } + return ret; +} - /* If just a starting address is given, then just verify that - * address. */ - if (host_start && !host_size) { -#if defined(TARGET_ARM) && !defined(TARGET_AARCH64) - if (init_guest_commpage(host_start, host_size) != 1) { - return (unsigned long)-1; - } -#endif - return host_start; +/** + * pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t size, uintptr_t brk) + * @addr: host address + * @addr_last: host last address + * @brk: host brk + * + * Like pgb_try_mmap, but additionally reserve some memory following brk. + */ +static int pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t addr_last, + uintptr_t brk, bool keep) +{ + uintptr_t brk_last = brk + 16 * MiB - 1; + + /* Do not map anything close to the host brk. */ + if (addr <= brk_last && brk <= addr_last) { + return 0; } + return pgb_try_mmap(addr, addr_last, keep); +} - /* Setup the initial flags and start address. */ - current_start = host_start & qemu_host_page_mask; - flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE; - if (fixed) { - flags |= MAP_FIXED; +/** + * pgb_try_mmap_set: + * @ga: set of guest addrs + * @base: guest_base + * @brk: host brk + * + * Return true if all @ga can be mapped by the host at @base. + * On success, retain the mapping at index 0 for reserved_va. + */ + +typedef struct PGBAddrs { + uintptr_t bounds[3][2]; /* start/last pairs */ + int nbounds; +} PGBAddrs; + +static bool pgb_try_mmap_set(const PGBAddrs *ga, uintptr_t base, uintptr_t brk) +{ + for (int i = ga->nbounds - 1; i >= 0; --i) { + if (pgb_try_mmap_skip_brk(ga->bounds[i][0] + base, + ga->bounds[i][1] + base, + brk, i == 0 && reserved_va) <= 0) { + return false; + } } + return true; +} - /* Otherwise, a non-zero size region of memory needs to be mapped - * and validated. */ +/** + * pgb_addr_set: + * @ga: output set of guest addrs + * @guest_loaddr: guest image low address + * @guest_loaddr: guest image high address + * @identity: create for identity mapping + * + * Fill in @ga with the image, COMMPAGE and NULL page. + */ +static bool pgb_addr_set(PGBAddrs *ga, abi_ulong guest_loaddr, + abi_ulong guest_hiaddr, bool try_identity) +{ + int n; -#if defined(TARGET_ARM) && !defined(TARGET_AARCH64) - /* On 32-bit ARM, we need to map not just the usable memory, but - * also the commpage. Try to find a suitable place by allocating - * a big chunk for all of it. If host_start, then the naive - * strategy probably does good enough. + /* + * With a low commpage, or a guest mapped very low, + * we may not be able to use the identity map. */ - if (!host_start) { - unsigned long guest_full_size, host_full_size, real_start; - - guest_full_size = - (0xffff0f00 & qemu_host_page_mask) + qemu_host_page_size; - host_full_size = guest_full_size - guest_start; - real_start = (unsigned long) - mmap(NULL, host_full_size, PROT_NONE, flags, -1, 0); - if (real_start == (unsigned long)-1) { - if (host_size < host_full_size - qemu_host_page_size) { - /* We failed to map a continous segment, but we're - * allowed to have a gap between the usable memory and - * the commpage where other things can be mapped. - * This sparseness gives us more flexibility to find - * an address range. - */ - goto naive; - } - return (unsigned long)-1; + if (try_identity) { + if (LO_COMMPAGE != -1 && LO_COMMPAGE < mmap_min_addr) { + return false; } - munmap((void *)real_start, host_full_size); - if (real_start & ~qemu_host_page_mask) { - /* The same thing again, but with an extra qemu_host_page_size - * so that we can shift around alignment. - */ - unsigned long real_size = host_full_size + qemu_host_page_size; - real_start = (unsigned long) - mmap(NULL, real_size, PROT_NONE, flags, -1, 0); - if (real_start == (unsigned long)-1) { - if (host_size < host_full_size - qemu_host_page_size) { - goto naive; - } - return (unsigned long)-1; - } - munmap((void *)real_start, real_size); - real_start = HOST_PAGE_ALIGN(real_start); + if (guest_loaddr != 0 && guest_loaddr < mmap_min_addr) { + return false; } - current_start = real_start; } - naive: -#endif - while (1) { - unsigned long real_start, real_size, aligned_size; - aligned_size = real_size = host_size; + memset(ga, 0, sizeof(*ga)); + n = 0; - /* Do not use mmap_find_vma here because that is limited to the - * guest address space. We are going to make the - * guest address space fit whatever we're given. - */ - real_start = (unsigned long) - mmap((void *)current_start, host_size, PROT_NONE, flags, -1, 0); - if (real_start == (unsigned long)-1) { - return (unsigned long)-1; + if (reserved_va) { + ga->bounds[n][0] = try_identity ? mmap_min_addr : 0; + ga->bounds[n][1] = reserved_va; + n++; + /* LO_COMMPAGE and NULL handled by reserving from 0. */ + } else { + /* Add any LO_COMMPAGE or NULL page. */ + if (LO_COMMPAGE != -1) { + ga->bounds[n][0] = 0; + ga->bounds[n][1] = LO_COMMPAGE + TARGET_PAGE_SIZE - 1; + n++; + } else if (!try_identity) { + ga->bounds[n][0] = 0; + ga->bounds[n][1] = TARGET_PAGE_SIZE - 1; + n++; } - /* Check to see if the address is valid. */ - if (host_start && real_start != current_start) { - goto try_again; + /* Add the guest image for ET_EXEC. */ + if (guest_loaddr) { + ga->bounds[n][0] = guest_loaddr; + ga->bounds[n][1] = guest_hiaddr; + n++; } + } - /* Ensure the address is properly aligned. */ - if (real_start & ~qemu_host_page_mask) { - /* Ideally, we adjust like - * - * pages: [ ][ ][ ][ ][ ] - * old: [ real ] - * [ aligned ] - * new: [ real ] - * [ aligned ] - * - * But if there is something else mapped right after it, - * then obviously it won't have room to grow, and the - * kernel will put the new larger real someplace else with - * unknown alignment (if we made it to here, then - * fixed=false). Which is why we grow real by a full page - * size, instead of by part of one; so that even if we get - * moved, we can still guarantee alignment. But this does - * mean that there is a padding of < 1 page both before - * and after the aligned range; the "after" could could - * cause problems for ARM emulation where it could butt in - * to where we need to put the commpage. - */ - munmap((void *)real_start, host_size); - real_size = aligned_size + qemu_host_page_size; - real_start = (unsigned long) - mmap((void *)real_start, real_size, PROT_NONE, flags, -1, 0); - if (real_start == (unsigned long)-1) { - return (unsigned long)-1; - } - aligned_start = HOST_PAGE_ALIGN(real_start); - } else { - aligned_start = real_start; + /* + * Temporarily disable + * "comparison is always false due to limited range of data type" + * due to comparison between unsigned and (possible) 0. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wtype-limits" + + /* Add any HI_COMMPAGE not covered by reserved_va. */ + if (reserved_va < HI_COMMPAGE) { + ga->bounds[n][0] = HI_COMMPAGE & qemu_real_host_page_mask(); + ga->bounds[n][1] = HI_COMMPAGE + TARGET_PAGE_SIZE - 1; + n++; + } + +#pragma GCC diagnostic pop + + ga->nbounds = n; + return true; +} + +static void pgb_fail_in_use(const char *image_name) +{ + error_report("%s: requires virtual address space that is in use " + "(omit the -B option or choose a different value)", + image_name); + exit(EXIT_FAILURE); +} + +static void pgb_fixed(const char *image_name, uintptr_t guest_loaddr, + uintptr_t guest_hiaddr, uintptr_t align) +{ + PGBAddrs ga; + uintptr_t brk = (uintptr_t)sbrk(0); + + if (!QEMU_IS_ALIGNED(guest_base, align)) { + fprintf(stderr, "Requested guest base %p does not satisfy " + "host minimum alignment (0x%" PRIxPTR ")\n", + (void *)guest_base, align); + exit(EXIT_FAILURE); + } + + if (!pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, !guest_base) + || !pgb_try_mmap_set(&ga, guest_base, brk)) { + pgb_fail_in_use(image_name); + } +} + +/** + * pgb_find_fallback: + * + * This is a fallback method for finding holes in the host address space + * if we don't have the benefit of being able to access /proc/self/map. + * It can potentially take a very long time as we can only dumbly iterate + * up the host address space seeing if the allocation would work. + */ +static uintptr_t pgb_find_fallback(const PGBAddrs *ga, uintptr_t align, + uintptr_t brk) +{ + /* TODO: come up with a better estimate of how much to skip. */ + uintptr_t skip = sizeof(uintptr_t) == 4 ? MiB : GiB; + + for (uintptr_t base = skip; ; base += skip) { + base = ROUND_UP(base, align); + if (pgb_try_mmap_set(ga, base, brk)) { + return base; + } + if (base >= -skip) { + return -1; } + } +} -#if defined(TARGET_ARM) && !defined(TARGET_AARCH64) - /* On 32-bit ARM, we need to also be able to map the commpage. */ - int valid = init_guest_commpage(aligned_start - guest_start, - aligned_size + guest_start); - if (valid == -1) { - munmap((void *)real_start, real_size); - return (unsigned long)-1; - } else if (valid == 0) { - goto try_again; +static uintptr_t pgb_try_itree(const PGBAddrs *ga, uintptr_t base, + IntervalTreeRoot *root) +{ + for (int i = ga->nbounds - 1; i >= 0; --i) { + uintptr_t s = base + ga->bounds[i][0]; + uintptr_t l = base + ga->bounds[i][1]; + IntervalTreeNode *n; + + if (l < s) { + /* Wraparound. Skip to advance S to mmap_min_addr. */ + return mmap_min_addr - s; } -#endif - /* If nothing has said `return -1` or `goto try_again` yet, - * then the address we have is good. - */ - break; - - try_again: - /* That address didn't work. Unmap and try a different one. - * The address the host picked because is typically right at - * the top of the host address space and leaves the guest with - * no usable address space. Resort to a linear search. We - * already compensated for mmap_min_addr, so this should not - * happen often. Probably means we got unlucky and host - * address space randomization put a shared library somewhere - * inconvenient. - * - * This is probably a good strategy if host_start, but is - * probably a bad strategy if not, which means we got here - * because of trouble with ARM commpage setup. - */ - munmap((void *)real_start, real_size); - current_start += qemu_host_page_size; - if (host_start == current_start) { - /* Theoretically possible if host doesn't have any suitably - * aligned areas. Normally the first mmap will fail. - */ - return (unsigned long)-1; + n = interval_tree_iter_first(root, s, l); + if (n != NULL) { + /* Conflict. Skip to advance S to LAST + 1. */ + return n->last - s + 1; } } + return 0; /* success */ +} + +static uintptr_t pgb_find_itree(const PGBAddrs *ga, IntervalTreeRoot *root, + uintptr_t align, uintptr_t brk) +{ + uintptr_t last = mmap_min_addr; + uintptr_t base, skip; - qemu_log_mask(CPU_LOG_PAGE, "Reserved 0x%lx bytes of guest address space\n", host_size); + while (true) { + base = ROUND_UP(last, align); + if (base < last) { + return -1; + } + + skip = pgb_try_itree(ga, base, root); + if (skip == 0) { + break; + } - return aligned_start; + last = base + skip; + if (last < base) { + return -1; + } + } + + /* + * We've chosen 'base' based on holes in the interval tree, + * but we don't yet know if it is a valid host address. + * Because it is the first matching hole, if the host addresses + * are invalid we know there are no further matches. + */ + return pgb_try_mmap_set(ga, base, brk) ? base : -1; } -static void probe_guest_base(const char *image_name, - abi_ulong loaddr, abi_ulong hiaddr) +static void pgb_dynamic(const char *image_name, uintptr_t guest_loaddr, + uintptr_t guest_hiaddr, uintptr_t align) { - /* Probe for a suitable guest base address, if the user has not set - * it explicitly, and set guest_base appropriately. - * In case of error we will print a suitable message and exit. + IntervalTreeRoot *root; + uintptr_t brk, ret; + PGBAddrs ga; + + /* Try the identity map first. */ + if (pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, true)) { + brk = (uintptr_t)sbrk(0); + if (pgb_try_mmap_set(&ga, 0, brk)) { + guest_base = 0; + return; + } + } + + /* + * Rebuild the address set for non-identity map. + * This differs in the mapping of the guest NULL page. */ - const char *errmsg; - if (!have_guest_base && !reserved_va) { - unsigned long host_start, real_start, host_size; + pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, false); - /* Round addresses to page boundaries. */ - loaddr &= qemu_host_page_mask; - hiaddr = HOST_PAGE_ALIGN(hiaddr); + root = read_self_maps(); - if (loaddr < mmap_min_addr) { - host_start = HOST_PAGE_ALIGN(mmap_min_addr); - } else { - host_start = loaddr; - if (host_start != loaddr) { - errmsg = "Address overflow loading ELF binary"; - goto exit_errmsg; - } - } - host_size = hiaddr - loaddr; + /* Read brk after we've read the maps, which will malloc. */ + brk = (uintptr_t)sbrk(0); - /* Setup the initial guest memory space with ranges gleaned from - * the ELF image that is being loaded. + if (!root) { + ret = pgb_find_fallback(&ga, align, brk); + } else { + /* + * Reserve the area close to the host brk. + * This will be freed with the rest of the tree. */ - real_start = init_guest_space(host_start, host_size, loaddr, false); - if (real_start == (unsigned long)-1) { - errmsg = "Unable to find space for application"; - goto exit_errmsg; + IntervalTreeNode *b = g_new0(IntervalTreeNode, 1); + b->start = brk; + b->last = brk + 16 * MiB - 1; + interval_tree_insert(b, root); + + ret = pgb_find_itree(&ga, root, align, brk); + free_self_maps(root); + } + + if (ret == -1) { + int w = TARGET_LONG_BITS / 4; + + error_report("%s: Unable to find a guest_base to satisfy all " + "guest address mapping requirements", image_name); + + for (int i = 0; i < ga.nbounds; ++i) { + error_printf(" %0*" PRIx64 "-%0*" PRIx64 "\n", + w, (uint64_t)ga.bounds[i][0], + w, (uint64_t)ga.bounds[i][1]); } - guest_base = real_start - loaddr; + exit(EXIT_FAILURE); + } + guest_base = ret; +} - qemu_log_mask(CPU_LOG_PAGE, "Relocating guest address space from 0x" - TARGET_ABI_FMT_lx " to 0x%lx\n", - loaddr, real_start); +void probe_guest_base(const char *image_name, abi_ulong guest_loaddr, + abi_ulong guest_hiaddr) +{ + /* In order to use host shmat, we must be able to honor SHMLBA. */ + uintptr_t align = MAX(SHMLBA, TARGET_PAGE_SIZE); + + /* Sanity check the guest binary. */ + if (reserved_va) { + if (guest_hiaddr > reserved_va) { + error_report("%s: requires more than reserved virtual " + "address space (0x%" PRIx64 " > 0x%lx)", + image_name, (uint64_t)guest_hiaddr, reserved_va); + exit(EXIT_FAILURE); + } + } else { + if (guest_hiaddr != (uintptr_t)guest_hiaddr) { + error_report("%s: requires more virtual address space " + "than the host can provide (0x%" PRIx64 ")", + image_name, (uint64_t)guest_hiaddr + 1); + exit(EXIT_FAILURE); + } } - return; -exit_errmsg: - fprintf(stderr, "%s: %s\n", image_name, errmsg); - exit(-1); + if (have_guest_base) { + pgb_fixed(image_name, guest_loaddr, guest_hiaddr, align); + } else { + pgb_dynamic(image_name, guest_loaddr, guest_hiaddr, align); + } + + /* Reserve and initialize the commpage. */ + if (!init_guest_commpage()) { + /* We have already probed for the commpage being free. */ + g_assert_not_reached(); + } + + assert(QEMU_IS_ALIGNED(guest_base, align)); + qemu_log_mask(CPU_LOG_PAGE, "Locating guest address space " + "@ 0x%" PRIx64 "\n", (uint64_t)guest_base); } +enum { + /* The string "GNU\0" as a magic number. */ + GNU0_MAGIC = const_le32('G' | 'N' << 8 | 'U' << 16), + NOTE_DATA_SZ = 1 * KiB, + NOTE_NAME_SZ = 4, + ELF_GNU_PROPERTY_ALIGN = ELF_CLASS == ELFCLASS32 ? 4 : 8, +}; -/* Load an ELF image into the address space. +/* + * Process a single gnu_property entry. + * Return false for error. + */ +static bool parse_elf_property(const uint32_t *data, int *off, int datasz, + struct image_info *info, bool have_prev_type, + uint32_t *prev_type, Error **errp) +{ + uint32_t pr_type, pr_datasz, step; - IMAGE_NAME is the filename of the image, to use in error messages. - IMAGE_FD is the open file descriptor for the image. + if (*off > datasz || !QEMU_IS_ALIGNED(*off, ELF_GNU_PROPERTY_ALIGN)) { + goto error_data; + } + datasz -= *off; + data += *off / sizeof(uint32_t); - BPRM_BUF is a copy of the beginning of the file; this of course - contains the elf file header at offset 0. It is assumed that this - buffer is sufficiently aligned to present no problems to the host - in accessing data at aligned offsets within the buffer. + if (datasz < 2 * sizeof(uint32_t)) { + goto error_data; + } + pr_type = data[0]; + pr_datasz = data[1]; + data += 2; + datasz -= 2 * sizeof(uint32_t); + step = ROUND_UP(pr_datasz, ELF_GNU_PROPERTY_ALIGN); + if (step > datasz) { + goto error_data; + } - On return: INFO values will be filled in, as necessary or available. */ + /* Properties are supposed to be unique and sorted on pr_type. */ + if (have_prev_type && pr_type <= *prev_type) { + if (pr_type == *prev_type) { + error_setg(errp, "Duplicate property in PT_GNU_PROPERTY"); + } else { + error_setg(errp, "Unsorted property in PT_GNU_PROPERTY"); + } + return false; + } + *prev_type = pr_type; + + if (!arch_parse_elf_property(pr_type, pr_datasz, data, info, errp)) { + return false; + } + + *off += 2 * sizeof(uint32_t) + step; + return true; -static void load_elf_image(const char *image_name, int image_fd, - struct image_info *info, char **pinterp_name, - char bprm_buf[BPRM_BUF_SIZE]) + error_data: + error_setg(errp, "Ill-formed property in PT_GNU_PROPERTY"); + return false; +} + +/* Process NT_GNU_PROPERTY_TYPE_0. */ +static bool parse_elf_properties(const ImageSource *src, + struct image_info *info, + const struct elf_phdr *phdr, + Error **errp) { - struct elfhdr *ehdr = (struct elfhdr *)bprm_buf; - struct elf_phdr *phdr; + union { + struct elf_note nhdr; + uint32_t data[NOTE_DATA_SZ / sizeof(uint32_t)]; + } note; + + int n, off, datasz; + bool have_prev_type; + uint32_t prev_type; + + /* Unless the arch requires properties, ignore them. */ + if (!ARCH_USE_GNU_PROPERTY) { + return true; + } + + /* If the properties are crazy large, that's too bad. */ + n = phdr->p_filesz; + if (n > sizeof(note)) { + error_setg(errp, "PT_GNU_PROPERTY too large"); + return false; + } + if (n < sizeof(note.nhdr)) { + error_setg(errp, "PT_GNU_PROPERTY too small"); + return false; + } + + if (!imgsrc_read(¬e, phdr->p_offset, n, src, errp)) { + return false; + } + + /* + * The contents of a valid PT_GNU_PROPERTY is a sequence + * of uint32_t -- swap them all now. + */ +#ifdef BSWAP_NEEDED + for (int i = 0; i < n / 4; i++) { + bswap32s(note.data + i); + } +#endif + + /* + * Note that nhdr is 3 words, and that the "name" described by namesz + * immediately follows nhdr and is thus at the 4th word. Further, all + * of the inputs to the kernel's round_up are multiples of 4. + */ + if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 || + note.nhdr.n_namesz != NOTE_NAME_SZ || + note.data[3] != GNU0_MAGIC) { + error_setg(errp, "Invalid note in PT_GNU_PROPERTY"); + return false; + } + off = sizeof(note.nhdr) + NOTE_NAME_SZ; + + datasz = note.nhdr.n_descsz + off; + if (datasz > n) { + error_setg(errp, "Invalid note size in PT_GNU_PROPERTY"); + return false; + } + + have_prev_type = false; + prev_type = 0; + while (1) { + if (off == datasz) { + return true; /* end, exit ok */ + } + if (!parse_elf_property(note.data, &off, datasz, info, + have_prev_type, &prev_type, errp)) { + return false; + } + have_prev_type = true; + } +} + +/** + * load_elf_image: Load an ELF image into the address space. + * @image_name: the filename of the image, to use in error messages. + * @src: the ImageSource from which to read. + * @info: info collected from the loaded image. + * @ehdr: the ELF header, not yet bswapped. + * @pinterp_name: record any PT_INTERP string found. + * + * On return: @info values will be filled in, as necessary or available. + */ + +static void load_elf_image(const char *image_name, const ImageSource *src, + struct image_info *info, struct elfhdr *ehdr, + char **pinterp_name) +{ + g_autofree struct elf_phdr *phdr = NULL; abi_ulong load_addr, load_bias, loaddr, hiaddr, error; - int i, retval; - const char *errmsg; + int i, prot_exec; + Error *err = NULL; - /* First of all, some simple consistency checks */ - errmsg = "Invalid ELF image for this architecture"; + /* + * First of all, some simple consistency checks. + * Note that we rely on the bswapped ehdr staying in bprm_buf, + * for later use by load_elf_binary and create_elf_tables. + */ + if (!imgsrc_read(ehdr, 0, sizeof(*ehdr), src, &err)) { + goto exit_errmsg; + } if (!elf_check_ident(ehdr)) { + error_setg(&err, "Invalid ELF image for this architecture"); goto exit_errmsg; } bswap_ehdr(ehdr); if (!elf_check_ehdr(ehdr)) { + error_setg(&err, "Invalid ELF image for this architecture"); goto exit_errmsg; } - i = ehdr->e_phnum * sizeof(struct elf_phdr); - if (ehdr->e_phoff + i <= BPRM_BUF_SIZE) { - phdr = (struct elf_phdr *)(bprm_buf + ehdr->e_phoff); - } else { - phdr = (struct elf_phdr *) alloca(i); - retval = pread(image_fd, phdr, i, ehdr->e_phoff); - if (retval != i) { - goto exit_read; - } + phdr = imgsrc_read_alloc(ehdr->e_phoff, + ehdr->e_phnum * sizeof(struct elf_phdr), + src, &err); + if (phdr == NULL) { + goto exit_errmsg; } bswap_phdr(phdr, ehdr->e_phnum); @@ -2232,43 +3205,115 @@ static void load_elf_image(const char *image_name, int image_fd, mmap_lock(); - /* Find the maximum size of the image and allocate an appropriate - amount of memory to handle that. */ + /* + * Find the maximum size of the image and allocate an appropriate + * amount of memory to handle that. Locate the interpreter, if any. + */ loaddr = -1, hiaddr = 0; info->alignment = 0; + info->exec_stack = EXSTACK_DEFAULT; for (i = 0; i < ehdr->e_phnum; ++i) { - if (phdr[i].p_type == PT_LOAD) { - abi_ulong a = phdr[i].p_vaddr - phdr[i].p_offset; + struct elf_phdr *eppnt = phdr + i; + if (eppnt->p_type == PT_LOAD) { + abi_ulong a = eppnt->p_vaddr & TARGET_PAGE_MASK; if (a < loaddr) { loaddr = a; } - a = phdr[i].p_vaddr + phdr[i].p_memsz; + a = eppnt->p_vaddr + eppnt->p_memsz - 1; if (a > hiaddr) { hiaddr = a; } ++info->nsegs; - info->alignment |= phdr[i].p_align; + info->alignment |= eppnt->p_align; + } else if (eppnt->p_type == PT_INTERP && pinterp_name) { + g_autofree char *interp_name = NULL; + + if (*pinterp_name) { + error_setg(&err, "Multiple PT_INTERP entries"); + goto exit_errmsg; + } + + interp_name = imgsrc_read_alloc(eppnt->p_offset, eppnt->p_filesz, + src, &err); + if (interp_name == NULL) { + goto exit_errmsg; + } + if (interp_name[eppnt->p_filesz - 1] != 0) { + error_setg(&err, "Invalid PT_INTERP entry"); + goto exit_errmsg; + } + *pinterp_name = g_steal_pointer(&interp_name); + } else if (eppnt->p_type == PT_GNU_PROPERTY) { + if (!parse_elf_properties(src, info, eppnt, &err)) { + goto exit_errmsg; + } + } else if (eppnt->p_type == PT_GNU_STACK) { + info->exec_stack = eppnt->p_flags & PF_X; } } load_addr = loaddr; - if (ehdr->e_type == ET_DYN) { - /* The image indicates that it can be loaded anywhere. Find a - location that can hold the memory space required. If the - image is pre-linked, LOADDR will be non-zero. Since we do - not supply MAP_FIXED here we'll use that address if and - only if it remains available. */ - load_addr = target_mmap(loaddr, hiaddr - loaddr, PROT_NONE, - MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, - -1, 0); - if (load_addr == -1) { - goto exit_perror; + + if (pinterp_name != NULL) { + if (ehdr->e_type == ET_EXEC) { + /* + * Make sure that the low address does not conflict with + * MMAP_MIN_ADDR or the QEMU application itself. + */ + probe_guest_base(image_name, loaddr, hiaddr); + } else { + abi_ulong align; + + /* + * The binary is dynamic, but we still need to + * select guest_base. In this case we pass a size. + */ + probe_guest_base(image_name, 0, hiaddr - loaddr); + + /* + * Avoid collision with the loader by providing a different + * default load address. + */ + load_addr += elf_et_dyn_base; + + /* + * TODO: Better support for mmap alignment is desirable. + * Since we do not have complete control over the guest + * address space, we prefer the kernel to choose some address + * rather than force the use of LOAD_ADDR via MAP_FIXED. + * But without MAP_FIXED we cannot guarantee alignment, + * only suggest it. + */ + align = pow2ceil(info->alignment); + if (align) { + load_addr &= -align; + } } - } else if (pinterp_name != NULL) { - /* This is the main executable. Make sure that the low - address does not conflict with MMAP_MIN_ADDR or the - QEMU application itself. */ - probe_guest_base(image_name, loaddr, hiaddr); + } + + /* + * Reserve address space for all of this. + * + * In the case of ET_EXEC, we supply MAP_FIXED_NOREPLACE so that we get + * exactly the address range that is required. Without reserved_va, + * the guest address space is not isolated. We have attempted to avoid + * conflict with the host program itself via probe_guest_base, but using + * MAP_FIXED_NOREPLACE instead of MAP_FIXED provides an extra check. + * + * Otherwise this is ET_DYN, and we are searching for a location + * that can hold the memory space required. If the image is + * pre-linked, LOAD_ADDR will be non-zero, and the kernel should + * honor that address if it happens to be free. + * + * In both cases, we will overwrite pages in this range with mappings + * from the executable. + */ + load_addr = target_mmap(load_addr, (size_t)hiaddr - loaddr + 1, PROT_NONE, + MAP_PRIVATE | MAP_ANON | MAP_NORESERVE | + (ehdr->e_type == ET_EXEC ? MAP_FIXED_NOREPLACE : 0), + -1, 0); + if (load_addr == -1) { + goto exit_mmap; } load_bias = load_addr - loaddr; @@ -2292,43 +3337,78 @@ static void load_elf_image(const char *image_name, int image_fd, } info->load_bias = load_bias; + info->code_offset = load_bias; + info->data_offset = load_bias; info->load_addr = load_addr; info->entry = ehdr->e_entry + load_bias; info->start_code = -1; info->end_code = 0; info->start_data = -1; info->end_data = 0; - info->brk = 0; + /* Usual start for brk is after all sections of the main executable. */ + info->brk = TARGET_PAGE_ALIGN(hiaddr + load_bias); info->elf_flags = ehdr->e_flags; + prot_exec = PROT_EXEC; +#ifdef TARGET_AARCH64 + /* + * If the BTI feature is present, this indicates that the executable + * pages of the startup binary should be mapped with PROT_BTI, so that + * branch targets are enforced. + * + * The startup binary is either the interpreter or the static executable. + * The interpreter is responsible for all pages of a dynamic executable. + * + * Elf notes are backward compatible to older cpus. + * Do not enable BTI unless it is supported. + */ + if ((info->note_flags & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) + && (pinterp_name == NULL || *pinterp_name == 0) + && cpu_isar_feature(aa64_bti, ARM_CPU(thread_cpu))) { + prot_exec |= TARGET_PROT_BTI; + } +#endif + for (i = 0; i < ehdr->e_phnum; i++) { struct elf_phdr *eppnt = phdr + i; if (eppnt->p_type == PT_LOAD) { - abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em, vaddr_len; + abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em; int elf_prot = 0; - if (eppnt->p_flags & PF_R) elf_prot = PROT_READ; - if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE; - if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC; + if (eppnt->p_flags & PF_R) { + elf_prot |= PROT_READ; + } + if (eppnt->p_flags & PF_W) { + elf_prot |= PROT_WRITE; + } + if (eppnt->p_flags & PF_X) { + elf_prot |= prot_exec; + } vaddr = load_bias + eppnt->p_vaddr; - vaddr_po = TARGET_ELF_PAGEOFFSET(vaddr); - vaddr_ps = TARGET_ELF_PAGESTART(vaddr); - vaddr_len = TARGET_ELF_PAGELENGTH(eppnt->p_filesz + vaddr_po); - - error = target_mmap(vaddr_ps, vaddr_len, - elf_prot, MAP_PRIVATE | MAP_FIXED, - image_fd, eppnt->p_offset - vaddr_po); - if (error == -1) { - goto exit_perror; - } + vaddr_po = vaddr & ~TARGET_PAGE_MASK; + vaddr_ps = vaddr & TARGET_PAGE_MASK; vaddr_ef = vaddr + eppnt->p_filesz; vaddr_em = vaddr + eppnt->p_memsz; - /* If the load segment requests extra zeros (e.g. bss), map it. */ - if (vaddr_ef < vaddr_em) { - zero_bss(vaddr_ef, vaddr_em, elf_prot); + /* + * Some segments may be completely empty, with a non-zero p_memsz + * but no backing file segment. + */ + if (eppnt->p_filesz != 0) { + error = imgsrc_mmap(vaddr_ps, eppnt->p_filesz + vaddr_po, + elf_prot, MAP_PRIVATE | MAP_FIXED, + src, eppnt->p_offset - vaddr_po); + if (error == -1) { + goto exit_mmap; + } + } + + /* If the load segment requests extra zeros (e.g. bss), map it. */ + if (vaddr_ef < vaddr_em && + !zero_bss(vaddr_ef, vaddr_em, elf_prot, &err)) { + goto exit_errmsg; } /* Find the full program boundaries. */ @@ -2347,54 +3427,15 @@ static void load_elf_image(const char *image_name, int image_fd, if (vaddr_ef > info->end_data) { info->end_data = vaddr_ef; } - if (vaddr_em > info->brk) { - info->brk = vaddr_em; - } } - } else if (eppnt->p_type == PT_INTERP && pinterp_name) { - char *interp_name; - - if (*pinterp_name) { - errmsg = "Multiple PT_INTERP entries"; - goto exit_errmsg; - } - interp_name = malloc(eppnt->p_filesz); - if (!interp_name) { - goto exit_perror; - } - - if (eppnt->p_offset + eppnt->p_filesz <= BPRM_BUF_SIZE) { - memcpy(interp_name, bprm_buf + eppnt->p_offset, - eppnt->p_filesz); - } else { - retval = pread(image_fd, interp_name, eppnt->p_filesz, - eppnt->p_offset); - if (retval != eppnt->p_filesz) { - goto exit_perror; - } - } - if (interp_name[eppnt->p_filesz - 1] != 0) { - errmsg = "Invalid PT_INTERP entry"; - goto exit_errmsg; - } - *pinterp_name = interp_name; #ifdef TARGET_MIPS } else if (eppnt->p_type == PT_MIPS_ABIFLAGS) { Mips_elf_abiflags_v0 abiflags; - if (eppnt->p_filesz < sizeof(Mips_elf_abiflags_v0)) { - errmsg = "Invalid PT_MIPS_ABIFLAGS entry"; + + if (!imgsrc_read(&abiflags, eppnt->p_offset, sizeof(abiflags), + src, &err)) { goto exit_errmsg; } - if (eppnt->p_offset + eppnt->p_filesz <= BPRM_BUF_SIZE) { - memcpy(&abiflags, bprm_buf + eppnt->p_offset, - sizeof(Mips_elf_abiflags_v0)); - } else { - retval = pread(image_fd, &abiflags, sizeof(Mips_elf_abiflags_v0), - eppnt->p_offset); - if (retval != sizeof(Mips_elf_abiflags_v0)) { - goto exit_perror; - } - } bswap_mips_abiflags(&abiflags); info->fp_abi = abiflags.fp_abi; #endif @@ -2404,61 +3445,108 @@ static void load_elf_image(const char *image_name, int image_fd, if (info->end_data == 0) { info->start_data = info->end_code; info->end_data = info->end_code; - info->brk = info->end_code; } if (qemu_log_enabled()) { - load_symbols(ehdr, image_fd, load_bias); + load_symbols(ehdr, src, load_bias); } + debuginfo_report_elf(image_name, src->fd, load_bias); + mmap_unlock(); - close(image_fd); + close(src->fd); return; - exit_read: - if (retval >= 0) { - errmsg = "Incomplete read of file header"; - goto exit_errmsg; - } - exit_perror: - errmsg = strerror(errno); + exit_mmap: + error_setg_errno(&err, errno, "Error mapping file"); + goto exit_errmsg; exit_errmsg: - fprintf(stderr, "%s: %s\n", image_name, errmsg); + error_reportf_err(err, "%s: ", image_name); exit(-1); } static void load_elf_interp(const char *filename, struct image_info *info, char bprm_buf[BPRM_BUF_SIZE]) { + struct elfhdr ehdr; + ImageSource src; int fd, retval; + Error *err = NULL; fd = open(path(filename), O_RDONLY); if (fd < 0) { - goto exit_perror; + error_setg_file_open(&err, errno, filename); + error_report_err(err); + exit(-1); } retval = read(fd, bprm_buf, BPRM_BUF_SIZE); if (retval < 0) { - goto exit_perror; + error_setg_errno(&err, errno, "Error reading file header"); + error_reportf_err(err, "%s: ", filename); + exit(-1); } - if (retval < BPRM_BUF_SIZE) { - memset(bprm_buf + retval, 0, BPRM_BUF_SIZE - retval); + + src.fd = fd; + src.cache = bprm_buf; + src.cache_size = retval; + + load_elf_image(filename, &src, info, &ehdr, NULL); +} + +#ifdef VDSO_HEADER +#include VDSO_HEADER +#define vdso_image_info() &vdso_image_info +#else +#define vdso_image_info() NULL +#endif + +static void load_elf_vdso(struct image_info *info, const VdsoImageInfo *vdso) +{ + ImageSource src; + struct elfhdr ehdr; + abi_ulong load_bias, load_addr; + + src.fd = -1; + src.cache = vdso->image; + src.cache_size = vdso->image_size; + + load_elf_image("<internal-vdso>", &src, info, &ehdr, NULL); + load_addr = info->load_addr; + load_bias = info->load_bias; + + /* + * We need to relocate the VDSO image. The one built into the kernel + * is built for a fixed address. The one built for QEMU is not, since + * that requires close control of the guest address space. + * We pre-processed the image to locate all of the addresses that need + * to be updated. + */ + for (unsigned i = 0, n = vdso->reloc_count; i < n; i++) { + abi_ulong *addr = g2h_untagged(load_addr + vdso->relocs[i]); + *addr = tswapal(tswapal(*addr) + load_bias); } - load_elf_image(filename, fd, info, NULL, bprm_buf); - return; + /* Install signal trampolines, if present. */ + if (vdso->sigreturn_ofs) { + default_sigreturn = load_addr + vdso->sigreturn_ofs; + } + if (vdso->rt_sigreturn_ofs) { + default_rt_sigreturn = load_addr + vdso->rt_sigreturn_ofs; + } - exit_perror: - fprintf(stderr, "%s: %s\n", filename, strerror(errno)); - exit(-1); + /* Remove write from VDSO segment. */ + target_mprotect(info->start_data, info->end_data - info->start_data, + PROT_READ | PROT_EXEC); } static int symfind(const void *s0, const void *s1) { - target_ulong addr = *(target_ulong *)s0; struct elf_sym *sym = (struct elf_sym *)s1; + __typeof(sym->st_value) addr = *(uint64_t *)s0; int result = 0; + if (addr < sym->st_value) { result = -1; } else if (addr >= sym->st_value + sym->st_size) { @@ -2467,7 +3555,7 @@ static int symfind(const void *s0, const void *s1) return result; } -static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr) +static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr) { #if ELF_CLASS == ELFCLASS32 struct elf_sym *syms = s->disas_symtab.elf32; @@ -2486,7 +3574,7 @@ static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr) return ""; } -/* FIXME: This should use elf_ops.h */ +/* FIXME: This should use elf_ops.h.inc */ static int symcmp(const void *s0, const void *s1) { struct elf_sym *sym0 = (struct elf_sym *)s0; @@ -2497,19 +3585,20 @@ static int symcmp(const void *s0, const void *s1) } /* Best attempt to load symbols from this ELF object. */ -static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias) +static void load_symbols(struct elfhdr *hdr, const ImageSource *src, + abi_ulong load_bias) { int i, shnum, nsyms, sym_idx = 0, str_idx = 0; - uint64_t segsz; - struct elf_shdr *shdr; + g_autofree struct elf_shdr *shdr = NULL; char *strings = NULL; - struct syminfo *s = NULL; - struct elf_sym *new_syms, *syms = NULL; + struct elf_sym *syms = NULL; + struct elf_sym *new_syms; + uint64_t segsz; shnum = hdr->e_shnum; - i = shnum * sizeof(struct elf_shdr); - shdr = (struct elf_shdr *)alloca(i); - if (pread(fd, shdr, i, hdr->e_shoff) != i) { + shdr = imgsrc_read_alloc(hdr->e_shoff, shnum * sizeof(struct elf_shdr), + src, NULL); + if (shdr == NULL) { return; } @@ -2527,31 +3616,33 @@ static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias) found: /* Now know where the strtab and symtab are. Snarf them. */ - s = g_try_new(struct syminfo, 1); - if (!s) { - goto give_up; - } segsz = shdr[str_idx].sh_size; - s->disas_strtab = strings = g_try_malloc(segsz); - if (!strings || - pread(fd, strings, segsz, shdr[str_idx].sh_offset) != segsz) { + strings = g_try_malloc(segsz); + if (!strings) { goto give_up; } - - segsz = shdr[sym_idx].sh_size; - syms = g_try_malloc(segsz); - if (!syms || pread(fd, syms, segsz, shdr[sym_idx].sh_offset) != segsz) { + if (!imgsrc_read(strings, shdr[str_idx].sh_offset, segsz, src, NULL)) { goto give_up; } + segsz = shdr[sym_idx].sh_size; if (segsz / sizeof(struct elf_sym) > INT_MAX) { - /* Implausibly large symbol table: give up rather than ploughing - * on with the number of symbols calculation overflowing + /* + * Implausibly large symbol table: give up rather than ploughing + * on with the number of symbols calculation overflowing. */ goto give_up; } nsyms = segsz / sizeof(struct elf_sym); + syms = g_try_malloc(segsz); + if (!syms) { + goto give_up; + } + if (!imgsrc_read(syms, shdr[sym_idx].sh_offset, segsz, src, NULL)) { + goto give_up; + } + for (i = 0; i < nsyms; ) { bswap_sym(syms + i); /* Throw away entries which we do not need. */ @@ -2576,10 +3667,12 @@ static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias) goto give_up; } - /* Attempt to free the storage associated with the local symbols - that we threw away. Whether or not this has any effect on the - memory allocation depends on the malloc implementation and how - many symbols we managed to discard. */ + /* + * Attempt to free the storage associated with the local symbols + * that we threw away. Whether or not this has any effect on the + * memory allocation depends on the malloc implementation and how + * many symbols we managed to discard. + */ new_syms = g_try_renew(struct elf_sym, syms, nsyms); if (new_syms == NULL) { goto give_up; @@ -2588,20 +3681,23 @@ static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias) qsort(syms, nsyms, sizeof(*syms), symcmp); - s->disas_num_syms = nsyms; + { + struct syminfo *s = g_new(struct syminfo, 1); + + s->disas_strtab = strings; + s->disas_num_syms = nsyms; #if ELF_CLASS == ELFCLASS32 - s->disas_symtab.elf32 = syms; + s->disas_symtab.elf32 = syms; #else - s->disas_symtab.elf64 = syms; + s->disas_symtab.elf64 = syms; #endif - s->lookup_symbol = lookup_symbolxx; - s->next = syminfos; - syminfos = s; - + s->lookup_symbol = lookup_symbolxx; + s->next = syminfos; + syminfos = s; + } return; -give_up: - g_free(s); + give_up: g_free(strings); g_free(syms); } @@ -2643,20 +3739,23 @@ uint32_t get_elf_eflags(int fd) int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) { - struct image_info interp_info; - struct elfhdr elf_ex; + /* + * We need a copy of the elf header for passing to create_elf_tables. + * We will have overwritten the original when we re-use bprm->buf + * while loading the interpreter. Allocate the storage for this now + * and let elf_load_image do any swapping that may be required. + */ + struct elfhdr ehdr; + struct image_info interp_info, vdso_info; char *elf_interpreter = NULL; char *scratch; - info->start_mmap = (abi_ulong)ELF_START_MMAP; - - load_elf_image(bprm->filename, bprm->fd, info, - &elf_interpreter, bprm->buf); + memset(&interp_info, 0, sizeof(interp_info)); +#ifdef TARGET_MIPS + interp_info.fp_abi = MIPS_ABI_FP_UNKNOWN; +#endif - /* ??? We need a copy of the elf header for passing to create_elf_tables. - If we do nothing, we'll have overwritten this when we re-use bprm->buf - when we load the interpreter. */ - elf_ex = *(struct elfhdr *)bprm->buf; + load_elf_image(bprm->filename, &bprm->src, info, &ehdr, &elf_interpreter); /* Do this so that we can load the interpreter, if need be. We will change some of these later */ @@ -2695,6 +3794,19 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) if (elf_interpreter) { load_elf_interp(elf_interpreter, &interp_info, bprm->buf); + /* + * While unusual because of ELF_ET_DYN_BASE, if we are unlucky + * with the mappings the interpreter can be loaded above but + * near the main executable, which can leave very little room + * for the heap. + * If the current brk has less than 16MB, use the end of the + * interpreter. + */ + if (interp_info.brk > info->brk && + interp_info.load_bias - info->brk < 16 * MiB) { + info->brk = interp_info.brk; + } + /* If the program interpreter is one of these two, then assume an iBCS2 image. Otherwise assume a native linux image. */ @@ -2706,16 +3818,38 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) and some applications "depend" upon this behavior. Since we do not have the power to recompile these, we emulate the SVr4 behavior. Sigh. */ - target_mmap(0, qemu_host_page_size, PROT_READ | PROT_EXEC, - MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + target_mmap(0, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC, + MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); } #ifdef TARGET_MIPS info->interp_fp_abi = interp_info.fp_abi; #endif } - bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &elf_ex, - info, (elf_interpreter ? &interp_info : NULL)); + /* + * Load a vdso if available, which will amongst other things contain the + * signal trampolines. Otherwise, allocate a separate page for them. + */ + const VdsoImageInfo *vdso = vdso_image_info(); + if (vdso) { + load_elf_vdso(&vdso_info, vdso); + info->vdso = vdso_info.load_bias; + } else if (TARGET_ARCH_HAS_SIGTRAMP_PAGE) { + abi_long tramp_page = target_mmap(0, TARGET_PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (tramp_page == -1) { + return -errno; + } + + setup_sigtramp(tramp_page); + target_mprotect(tramp_page, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC); + } + + bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &ehdr, info, + elf_interpreter ? &interp_info : NULL, + vdso ? &vdso_info : NULL); info->start_stack = bprm->p; /* If we have an interpreter, set that as the program's entry point. @@ -2725,7 +3859,7 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) if (elf_interpreter) { info->load_bias = interp_info.load_bias; info->entry = interp_info.entry; - free(elf_interpreter); + g_free(elf_interpreter); } #ifdef USE_ELF_CORE_DUMP @@ -2736,6 +3870,8 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) } #ifdef USE_ELF_CORE_DUMP +#include "exec/translate-all.h" + /* * Definitions to generate Intel SVR4-like core files. * These mostly have the same names as the SVR4 types with "target_elf_" @@ -2775,18 +3911,6 @@ int load_elf_binary(struct linux_binprm *bprm, struct image_info *info) * Example for ARM target is provided in this file. */ -/* An ELF note in memory */ -struct memelfnote { - const char *name; - size_t namesz; - size_t namesz_rounded; - int type; - size_t datasz; - size_t datasz_rounded; - void *data; - size_t notesz; -}; - struct target_elf_siginfo { abi_int si_signo; /* signal number */ abi_int si_code; /* extra code */ @@ -2822,82 +3946,10 @@ struct target_elf_prpsinfo { target_gid_t pr_gid; target_pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; /* Lots missing */ - char pr_fname[16]; /* filename of executable */ + char pr_fname[16] QEMU_NONSTRING; /* filename of executable */ char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */ }; -/* Here is the structure in which status of each thread is captured. */ -struct elf_thread_status { - QTAILQ_ENTRY(elf_thread_status) ets_link; - struct target_elf_prstatus prstatus; /* NT_PRSTATUS */ -#if 0 - elf_fpregset_t fpu; /* NT_PRFPREG */ - struct task_struct *thread; - elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */ -#endif - struct memelfnote notes[1]; - int num_notes; -}; - -struct elf_note_info { - struct memelfnote *notes; - struct target_elf_prstatus *prstatus; /* NT_PRSTATUS */ - struct target_elf_prpsinfo *psinfo; /* NT_PRPSINFO */ - - QTAILQ_HEAD(, elf_thread_status) thread_list; -#if 0 - /* - * Current version of ELF coredump doesn't support - * dumping fp regs etc. - */ - elf_fpregset_t *fpu; - elf_fpxregset_t *xfpu; - int thread_status_size; -#endif - int notes_size; - int numnote; -}; - -struct vm_area_struct { - target_ulong vma_start; /* start vaddr of memory region */ - target_ulong vma_end; /* end vaddr of memory region */ - abi_ulong vma_flags; /* protection etc. flags for the region */ - QTAILQ_ENTRY(vm_area_struct) vma_link; -}; - -struct mm_struct { - QTAILQ_HEAD(, vm_area_struct) mm_mmap; - int mm_count; /* number of mappings */ -}; - -static struct mm_struct *vma_init(void); -static void vma_delete(struct mm_struct *); -static int vma_add_mapping(struct mm_struct *, target_ulong, - target_ulong, abi_ulong); -static int vma_get_mapping_count(const struct mm_struct *); -static struct vm_area_struct *vma_first(const struct mm_struct *); -static struct vm_area_struct *vma_next(struct vm_area_struct *); -static abi_ulong vma_dump_size(const struct vm_area_struct *); -static int vma_walker(void *priv, target_ulong start, target_ulong end, - unsigned long flags); - -static void fill_elf_header(struct elfhdr *, int, uint16_t, uint32_t); -static void fill_note(struct memelfnote *, const char *, int, - unsigned int, void *); -static void fill_prstatus(struct target_elf_prstatus *, const TaskState *, int); -static int fill_psinfo(struct target_elf_prpsinfo *, const TaskState *); -static void fill_auxv_note(struct memelfnote *, const TaskState *); -static void fill_elf_note_phdr(struct elf_phdr *, int, off_t); -static size_t note_size(const struct memelfnote *); -static void free_note_info(struct elf_note_info *); -static int fill_note_info(struct elf_note_info *, long, const CPUArchState *); -static void fill_thread_info(struct elf_note_info *, const CPUArchState *); -static int core_dump_filename(const TaskState *, char *, size_t); - -static int dump_write(int, const void *, size_t); -static int write_note(struct memelfnote *, int); -static int write_note_info(struct elf_note_info *, int); - #ifdef BSWAP_NEEDED static void bswap_prstatus(struct target_elf_prstatus *prstatus) { @@ -2940,143 +3992,66 @@ static inline void bswap_note(struct elf_note *en) { } #endif /* BSWAP_NEEDED */ /* - * Minimal support for linux memory regions. These are needed - * when we are finding out what memory exactly belongs to - * emulated process. No locks needed here, as long as - * thread that received the signal is stopped. - */ - -static struct mm_struct *vma_init(void) -{ - struct mm_struct *mm; - - if ((mm = g_malloc(sizeof (*mm))) == NULL) - return (NULL); - - mm->mm_count = 0; - QTAILQ_INIT(&mm->mm_mmap); - - return (mm); -} - -static void vma_delete(struct mm_struct *mm) -{ - struct vm_area_struct *vma; - - while ((vma = vma_first(mm)) != NULL) { - QTAILQ_REMOVE(&mm->mm_mmap, vma, vma_link); - g_free(vma); - } - g_free(mm); -} - -static int vma_add_mapping(struct mm_struct *mm, target_ulong start, - target_ulong end, abi_ulong flags) -{ - struct vm_area_struct *vma; - - if ((vma = g_malloc0(sizeof (*vma))) == NULL) - return (-1); - - vma->vma_start = start; - vma->vma_end = end; - vma->vma_flags = flags; - - QTAILQ_INSERT_TAIL(&mm->mm_mmap, vma, vma_link); - mm->mm_count++; - - return (0); -} - -static struct vm_area_struct *vma_first(const struct mm_struct *mm) -{ - return (QTAILQ_FIRST(&mm->mm_mmap)); -} - -static struct vm_area_struct *vma_next(struct vm_area_struct *vma) -{ - return (QTAILQ_NEXT(vma, vma_link)); -} - -static int vma_get_mapping_count(const struct mm_struct *mm) -{ - return (mm->mm_count); -} - -/* * Calculate file (dump) size of given memory region. */ -static abi_ulong vma_dump_size(const struct vm_area_struct *vma) +static size_t vma_dump_size(target_ulong start, target_ulong end, + unsigned long flags) { - /* if we cannot even read the first page, skip it */ - if (!access_ok(VERIFY_READ, vma->vma_start, TARGET_PAGE_SIZE)) - return (0); + /* The area must be readable. */ + if (!(flags & PAGE_READ)) { + return 0; + } /* * Usually we don't dump executable pages as they contain * non-writable code that debugger can read directly from - * target library etc. However, thread stacks are marked - * also executable so we read in first page of given region - * and check whether it contains elf header. If there is - * no elf header, we dump it. + * target library etc. If there is no elf header, we dump it. */ - if (vma->vma_flags & PROT_EXEC) { - char page[TARGET_PAGE_SIZE]; - - copy_from_user(page, vma->vma_start, sizeof (page)); - if ((page[EI_MAG0] == ELFMAG0) && - (page[EI_MAG1] == ELFMAG1) && - (page[EI_MAG2] == ELFMAG2) && - (page[EI_MAG3] == ELFMAG3)) { - /* - * Mappings are possibly from ELF binary. Don't dump - * them. - */ - return (0); - } + if (!(flags & PAGE_WRITE_ORG) && + (flags & PAGE_EXEC) && + memcmp(g2h_untagged(start), ELFMAG, SELFMAG) == 0) { + return 0; } - return (vma->vma_end - vma->vma_start); + return end - start; } -static int vma_walker(void *priv, target_ulong start, target_ulong end, - unsigned long flags) +static size_t size_note(const char *name, size_t datasz) { - struct mm_struct *mm = (struct mm_struct *)priv; + size_t namesz = strlen(name) + 1; - vma_add_mapping(mm, start, end, flags); - return (0); + namesz = ROUND_UP(namesz, 4); + datasz = ROUND_UP(datasz, 4); + + return sizeof(struct elf_note) + namesz + datasz; } -static void fill_note(struct memelfnote *note, const char *name, int type, - unsigned int sz, void *data) +static void *fill_note(void **pptr, int type, const char *name, size_t datasz) { - unsigned int namesz; + void *ptr = *pptr; + struct elf_note *n = ptr; + size_t namesz = strlen(name) + 1; - namesz = strlen(name) + 1; - note->name = name; - note->namesz = namesz; - note->namesz_rounded = roundup(namesz, sizeof (int32_t)); - note->type = type; - note->datasz = sz; - note->datasz_rounded = roundup(sz, sizeof (int32_t)); + n->n_namesz = namesz; + n->n_descsz = datasz; + n->n_type = type; + bswap_note(n); - note->data = data; + ptr += sizeof(*n); + memcpy(ptr, name, namesz); - /* - * We calculate rounded up note size here as specified by - * ELF document. - */ - note->notesz = sizeof (struct elf_note) + - note->namesz_rounded + note->datasz_rounded; + namesz = ROUND_UP(namesz, 4); + datasz = ROUND_UP(datasz, 4); + + *pptr = ptr + namesz + datasz; + return ptr + namesz; } static void fill_elf_header(struct elfhdr *elf, int segs, uint16_t machine, uint32_t flags) { - (void) memset(elf, 0, sizeof(*elf)); + memcpy(elf->e_ident, ELFMAG, SELFMAG); - (void) memcpy(elf->e_ident, ELFMAG, SELFMAG); elf->e_ident[EI_CLASS] = ELF_CLASS; elf->e_ident[EI_DATA] = ELF_DATA; elf->e_ident[EI_VERSION] = EV_CURRENT; @@ -3094,94 +4069,79 @@ static void fill_elf_header(struct elfhdr *elf, int segs, uint16_t machine, bswap_ehdr(elf); } -static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, off_t offset) +static void fill_elf_note_phdr(struct elf_phdr *phdr, size_t sz, off_t offset) { phdr->p_type = PT_NOTE; phdr->p_offset = offset; - phdr->p_vaddr = 0; - phdr->p_paddr = 0; phdr->p_filesz = sz; - phdr->p_memsz = 0; - phdr->p_flags = 0; - phdr->p_align = 0; bswap_phdr(phdr, 1); } -static size_t note_size(const struct memelfnote *note) +static void fill_prstatus_note(void *data, const TaskState *ts, + CPUState *cpu, int signr) { - return (note->notesz); -} - -static void fill_prstatus(struct target_elf_prstatus *prstatus, - const TaskState *ts, int signr) -{ - (void) memset(prstatus, 0, sizeof (*prstatus)); - prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; - prstatus->pr_pid = ts->ts_tid; - prstatus->pr_ppid = getppid(); - prstatus->pr_pgrp = getpgrp(); - prstatus->pr_sid = getsid(0); - - bswap_prstatus(prstatus); + /* + * Because note memory is only aligned to 4, and target_elf_prstatus + * may well have higher alignment requirements, fill locally and + * memcpy to the destination afterward. + */ + struct target_elf_prstatus prstatus = { + .pr_info.si_signo = signr, + .pr_cursig = signr, + .pr_pid = ts->ts_tid, + .pr_ppid = getppid(), + .pr_pgrp = getpgrp(), + .pr_sid = getsid(0), + }; + + elf_core_copy_regs(&prstatus.pr_reg, cpu_env(cpu)); + bswap_prstatus(&prstatus); + memcpy(data, &prstatus, sizeof(prstatus)); } -static int fill_psinfo(struct target_elf_prpsinfo *psinfo, const TaskState *ts) +static void fill_prpsinfo_note(void *data, const TaskState *ts) { + /* + * Because note memory is only aligned to 4, and target_elf_prpsinfo + * may well have higher alignment requirements, fill locally and + * memcpy to the destination afterward. + */ + struct target_elf_prpsinfo psinfo = { + .pr_pid = getpid(), + .pr_ppid = getppid(), + .pr_pgrp = getpgrp(), + .pr_sid = getsid(0), + .pr_uid = getuid(), + .pr_gid = getgid(), + }; char *base_filename; - unsigned int i, len; - - (void) memset(psinfo, 0, sizeof (*psinfo)); - - len = ts->info->arg_end - ts->info->arg_start; - if (len >= ELF_PRARGSZ) - len = ELF_PRARGSZ - 1; - if (copy_from_user(&psinfo->pr_psargs, ts->info->arg_start, len)) - return -EFAULT; - for (i = 0; i < len; i++) - if (psinfo->pr_psargs[i] == 0) - psinfo->pr_psargs[i] = ' '; - psinfo->pr_psargs[len] = 0; - - psinfo->pr_pid = getpid(); - psinfo->pr_ppid = getppid(); - psinfo->pr_pgrp = getpgrp(); - psinfo->pr_sid = getsid(0); - psinfo->pr_uid = getuid(); - psinfo->pr_gid = getgid(); + size_t len; + + len = ts->info->env_strings - ts->info->arg_strings; + len = MIN(len, ELF_PRARGSZ); + memcpy(&psinfo.pr_psargs, g2h_untagged(ts->info->arg_strings), len); + for (size_t i = 0; i < len; i++) { + if (psinfo.pr_psargs[i] == 0) { + psinfo.pr_psargs[i] = ' '; + } + } base_filename = g_path_get_basename(ts->bprm->filename); /* * Using strncpy here is fine: at max-length, * this field is not NUL-terminated. */ - (void) strncpy(psinfo->pr_fname, base_filename, - sizeof(psinfo->pr_fname)); - + strncpy(psinfo.pr_fname, base_filename, sizeof(psinfo.pr_fname)); g_free(base_filename); - bswap_psinfo(psinfo); - return (0); + + bswap_psinfo(&psinfo); + memcpy(data, &psinfo, sizeof(psinfo)); } -static void fill_auxv_note(struct memelfnote *note, const TaskState *ts) +static void fill_auxv_note(void *data, const TaskState *ts) { - elf_addr_t auxv = (elf_addr_t)ts->info->saved_auxv; - elf_addr_t orig_auxv = auxv; - void *ptr; - int len = ts->info->auxv_len; - - /* - * Auxiliary vector is stored in target process stack. It contains - * {type, value} pairs that we need to dump into note. This is not - * strictly necessary but we do it here for sake of completeness. - */ - - /* read in whole auxv vector and copy it to memelfnote */ - ptr = lock_user(VERIFY_READ, orig_auxv, len, 0); - if (ptr != NULL) { - fill_note(note, "CORE", NT_AUXV, len, ptr); - unlock_user(ptr, auxv, len); - } + memcpy(data, g2h_untagged(ts->info->saved_auxv), ts->info->auxv_len); } /* @@ -3189,59 +4149,25 @@ static void fill_auxv_note(struct memelfnote *note, const TaskState *ts) * for the name: * qemu_<basename-of-target-binary>_<date>-<time>_<pid>.core * - * Returns 0 in case of success, -1 otherwise (errno is set). + * Returns the filename */ -static int core_dump_filename(const TaskState *ts, char *buf, - size_t bufsize) +static char *core_dump_filename(const TaskState *ts) { - char timestamp[64]; - char *base_filename = NULL; - struct timeval tv; - struct tm tm; - - assert(bufsize >= PATH_MAX); + g_autoptr(GDateTime) now = g_date_time_new_now_local(); + g_autofree char *nowstr = g_date_time_format(now, "%Y%m%d-%H%M%S"); + g_autofree char *base_filename = g_path_get_basename(ts->bprm->filename); - if (gettimeofday(&tv, NULL) < 0) { - (void) fprintf(stderr, "unable to get current timestamp: %s", - strerror(errno)); - return (-1); - } - - base_filename = g_path_get_basename(ts->bprm->filename); - (void) strftime(timestamp, sizeof (timestamp), "%Y%m%d-%H%M%S", - localtime_r(&tv.tv_sec, &tm)); - (void) snprintf(buf, bufsize, "qemu_%s_%s_%d.core", - base_filename, timestamp, (int)getpid()); - g_free(base_filename); - - return (0); + return g_strdup_printf("qemu_%s_%s_%d.core", + base_filename, nowstr, (int)getpid()); } static int dump_write(int fd, const void *ptr, size_t size) { const char *bufp = (const char *)ptr; ssize_t bytes_written, bytes_left; - struct rlimit dumpsize; - off_t pos; bytes_written = 0; - getrlimit(RLIMIT_CORE, &dumpsize); - if ((pos = lseek(fd, 0, SEEK_CUR))==-1) { - if (errno == ESPIPE) { /* not a seekable stream */ - bytes_left = size; - } else { - return pos; - } - } else { - if (dumpsize.rlim_cur <= pos) { - return -1; - } else if (dumpsize.rlim_cur == RLIM_INFINITY) { - bytes_left = size; - } else { - size_t limit_left=dumpsize.rlim_cur - pos; - bytes_left = limit_left >= size ? size : limit_left ; - } - } + bytes_left = size; /* * In normal conditions, single write(2) should do but @@ -3263,135 +4189,76 @@ static int dump_write(int fd, const void *ptr, size_t size) return (0); } -static int write_note(struct memelfnote *men, int fd) +static int wmr_page_unprotect_regions(void *opaque, target_ulong start, + target_ulong end, unsigned long flags) { - struct elf_note en; + if ((flags & (PAGE_WRITE | PAGE_WRITE_ORG)) == PAGE_WRITE_ORG) { + size_t step = MAX(TARGET_PAGE_SIZE, qemu_real_host_page_size()); - en.n_namesz = men->namesz; - en.n_type = men->type; - en.n_descsz = men->datasz; - - bswap_note(&en); - - if (dump_write(fd, &en, sizeof(en)) != 0) - return (-1); - if (dump_write(fd, men->name, men->namesz_rounded) != 0) - return (-1); - if (dump_write(fd, men->data, men->datasz_rounded) != 0) - return (-1); - - return (0); -} - -static void fill_thread_info(struct elf_note_info *info, const CPUArchState *env) -{ - CPUState *cpu = ENV_GET_CPU((CPUArchState *)env); - TaskState *ts = (TaskState *)cpu->opaque; - struct elf_thread_status *ets; - - ets = g_malloc0(sizeof (*ets)); - ets->num_notes = 1; /* only prstatus is dumped */ - fill_prstatus(&ets->prstatus, ts, 0); - elf_core_copy_regs(&ets->prstatus.pr_reg, env); - fill_note(&ets->notes[0], "CORE", NT_PRSTATUS, sizeof (ets->prstatus), - &ets->prstatus); - - QTAILQ_INSERT_TAIL(&info->thread_list, ets, ets_link); - - info->notes_size += note_size(&ets->notes[0]); + while (1) { + page_unprotect(start, 0); + if (end - start <= step) { + break; + } + start += step; + } + } + return 0; } -static void init_note_info(struct elf_note_info *info) -{ - /* Initialize the elf_note_info structure so that it is at - * least safe to call free_note_info() on it. Must be - * called before calling fill_note_info(). - */ - memset(info, 0, sizeof (*info)); - QTAILQ_INIT(&info->thread_list); -} +typedef struct { + unsigned count; + size_t size; +} CountAndSizeRegions; -static int fill_note_info(struct elf_note_info *info, - long signr, const CPUArchState *env) +static int wmr_count_and_size_regions(void *opaque, target_ulong start, + target_ulong end, unsigned long flags) { -#define NUMNOTES 3 - CPUState *cpu = ENV_GET_CPU((CPUArchState *)env); - TaskState *ts = (TaskState *)cpu->opaque; - int i; - - info->notes = g_new0(struct memelfnote, NUMNOTES); - if (info->notes == NULL) - return (-ENOMEM); - info->prstatus = g_malloc0(sizeof (*info->prstatus)); - if (info->prstatus == NULL) - return (-ENOMEM); - info->psinfo = g_malloc0(sizeof (*info->psinfo)); - if (info->prstatus == NULL) - return (-ENOMEM); - - /* - * First fill in status (and registers) of current thread - * including process info & aux vector. - */ - fill_prstatus(info->prstatus, ts, signr); - elf_core_copy_regs(&info->prstatus->pr_reg, env); - fill_note(&info->notes[0], "CORE", NT_PRSTATUS, - sizeof (*info->prstatus), info->prstatus); - fill_psinfo(info->psinfo, ts); - fill_note(&info->notes[1], "CORE", NT_PRPSINFO, - sizeof (*info->psinfo), info->psinfo); - fill_auxv_note(&info->notes[2], ts); - info->numnote = 3; - - info->notes_size = 0; - for (i = 0; i < info->numnote; i++) - info->notes_size += note_size(&info->notes[i]); - - /* read and fill status of all threads */ - cpu_list_lock(); - CPU_FOREACH(cpu) { - if (cpu == thread_cpu) { - continue; - } - fill_thread_info(info, (CPUArchState *)cpu->env_ptr); - } - cpu_list_unlock(); + CountAndSizeRegions *css = opaque; - return (0); + css->count++; + css->size += vma_dump_size(start, end, flags); + return 0; } -static void free_note_info(struct elf_note_info *info) +typedef struct { + struct elf_phdr *phdr; + off_t offset; +} FillRegionPhdr; + +static int wmr_fill_region_phdr(void *opaque, target_ulong start, + target_ulong end, unsigned long flags) { - struct elf_thread_status *ets; + FillRegionPhdr *d = opaque; + struct elf_phdr *phdr = d->phdr; - while (!QTAILQ_EMPTY(&info->thread_list)) { - ets = QTAILQ_FIRST(&info->thread_list); - QTAILQ_REMOVE(&info->thread_list, ets, ets_link); - g_free(ets); - } + phdr->p_type = PT_LOAD; + phdr->p_vaddr = start; + phdr->p_paddr = 0; + phdr->p_filesz = vma_dump_size(start, end, flags); + phdr->p_offset = d->offset; + d->offset += phdr->p_filesz; + phdr->p_memsz = end - start; + phdr->p_flags = (flags & PAGE_READ ? PF_R : 0) + | (flags & PAGE_WRITE_ORG ? PF_W : 0) + | (flags & PAGE_EXEC ? PF_X : 0); + phdr->p_align = ELF_EXEC_PAGESIZE; - g_free(info->prstatus); - g_free(info->psinfo); - g_free(info->notes); + bswap_phdr(phdr, 1); + d->phdr = phdr + 1; + return 0; } -static int write_note_info(struct elf_note_info *info, int fd) +static int wmr_write_region(void *opaque, target_ulong start, + target_ulong end, unsigned long flags) { - struct elf_thread_status *ets; - int i, error = 0; - - /* write prstatus, psinfo and auxv for current thread */ - for (i = 0; i < info->numnote; i++) - if ((error = write_note(&info->notes[i], fd)) != 0) - return (error); + int fd = *(int *)opaque; + size_t size = vma_dump_size(start, end, flags); - /* write prstatus for each thread */ - QTAILQ_FOREACH(ets, &info->thread_list, ets_link) { - if ((error = write_note(&ets->notes[0], fd)) != 0) - return (error); + if (!size) { + return 0; } - - return (0); + return dump_write(fd, g2h_untagged(start), size); } /* @@ -3439,149 +4306,129 @@ static int write_note_info(struct elf_note_info *info, int fd) */ static int elf_core_dump(int signr, const CPUArchState *env) { - const CPUState *cpu = ENV_GET_CPU((CPUArchState *)env); - const TaskState *ts = (const TaskState *)cpu->opaque; - struct vm_area_struct *vma = NULL; - char corefile[PATH_MAX]; - struct elf_note_info info; - struct elfhdr elf; - struct elf_phdr phdr; + const CPUState *cpu = env_cpu((CPUArchState *)env); + const TaskState *ts = (const TaskState *)get_task_state((CPUState *)cpu); struct rlimit dumpsize; - struct mm_struct *mm = NULL; - off_t offset = 0, data_offset = 0; - int segs = 0; + CountAndSizeRegions css; + off_t offset, note_offset, data_offset; + size_t note_size; + int cpus, ret; int fd = -1; + CPUState *cpu_iter; - init_note_info(&info); + if (prctl(PR_GET_DUMPABLE) == 0) { + return 0; + } - errno = 0; - getrlimit(RLIMIT_CORE, &dumpsize); - if (dumpsize.rlim_cur == 0) + if (getrlimit(RLIMIT_CORE, &dumpsize) < 0 || dumpsize.rlim_cur == 0) { return 0; + } - if (core_dump_filename(ts, corefile, sizeof (corefile)) < 0) - return (-errno); + cpu_list_lock(); + mmap_lock(); - if ((fd = open(corefile, O_WRONLY | O_CREAT, - S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0) - return (-errno); + /* By unprotecting, we merge vmas that might be split. */ + walk_memory_regions(NULL, wmr_page_unprotect_regions); /* * Walk through target process memory mappings and - * set up structure containing this information. After - * this point vma_xxx functions can be used. + * set up structure containing this information. */ - if ((mm = vma_init()) == NULL) - goto out; + memset(&css, 0, sizeof(css)); + walk_memory_regions(&css, wmr_count_and_size_regions); - walk_memory_regions(mm, vma_walker); - segs = vma_get_mapping_count(mm); - - /* - * Construct valid coredump ELF header. We also - * add one more segment for notes. - */ - fill_elf_header(&elf, segs + 1, ELF_MACHINE, 0); - if (dump_write(fd, &elf, sizeof (elf)) != 0) - goto out; + cpus = 0; + CPU_FOREACH(cpu_iter) { + cpus++; + } - /* fill in the in-memory version of notes */ - if (fill_note_info(&info, signr, env) < 0) - goto out; + offset = sizeof(struct elfhdr); + offset += (css.count + 1) * sizeof(struct elf_phdr); + note_offset = offset; - offset += sizeof (elf); /* elf header */ - offset += (segs + 1) * sizeof (struct elf_phdr); /* program headers */ + offset += size_note("CORE", ts->info->auxv_len); + offset += size_note("CORE", sizeof(struct target_elf_prpsinfo)); + offset += size_note("CORE", sizeof(struct target_elf_prstatus)) * cpus; + note_size = offset - note_offset; + data_offset = ROUND_UP(offset, ELF_EXEC_PAGESIZE); - /* write out notes program header */ - fill_elf_note_phdr(&phdr, info.notes_size, offset); + /* Do not dump if the corefile size exceeds the limit. */ + if (dumpsize.rlim_cur != RLIM_INFINITY + && dumpsize.rlim_cur < data_offset + css.size) { + errno = 0; + goto out; + } - offset += info.notes_size; - if (dump_write(fd, &phdr, sizeof (phdr)) != 0) + { + g_autofree char *corefile = core_dump_filename(ts); + fd = open(corefile, O_WRONLY | O_CREAT | O_TRUNC, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + } + if (fd < 0) { goto out; + } /* - * ELF specification wants data to start at page boundary so - * we align it here. + * There is a fair amount of alignment padding within the notes + * as well as preceeding the process memory. Allocate a zeroed + * block to hold it all. Write all of the headers directly into + * this buffer and then write it out as a block. */ - data_offset = offset = roundup(offset, ELF_EXEC_PAGESIZE); + { + g_autofree void *header = g_malloc0(data_offset); + FillRegionPhdr frp; + void *hptr, *dptr; + + /* Create elf file header. */ + hptr = header; + fill_elf_header(hptr, css.count + 1, ELF_MACHINE, 0); + hptr += sizeof(struct elfhdr); + + /* Create elf program headers. */ + fill_elf_note_phdr(hptr, note_size, note_offset); + hptr += sizeof(struct elf_phdr); + + frp.phdr = hptr; + frp.offset = data_offset; + walk_memory_regions(&frp, wmr_fill_region_phdr); + hptr = frp.phdr; + + /* Create the notes. */ + dptr = fill_note(&hptr, NT_AUXV, "CORE", ts->info->auxv_len); + fill_auxv_note(dptr, ts); + + dptr = fill_note(&hptr, NT_PRPSINFO, "CORE", + sizeof(struct target_elf_prpsinfo)); + fill_prpsinfo_note(dptr, ts); + + CPU_FOREACH(cpu_iter) { + dptr = fill_note(&hptr, NT_PRSTATUS, "CORE", + sizeof(struct target_elf_prstatus)); + fill_prstatus_note(dptr, ts, cpu_iter, + cpu_iter == cpu ? signr : 0); + } - /* - * Write program headers for memory regions mapped in - * the target process. - */ - for (vma = vma_first(mm); vma != NULL; vma = vma_next(vma)) { - (void) memset(&phdr, 0, sizeof (phdr)); - - phdr.p_type = PT_LOAD; - phdr.p_offset = offset; - phdr.p_vaddr = vma->vma_start; - phdr.p_paddr = 0; - phdr.p_filesz = vma_dump_size(vma); - offset += phdr.p_filesz; - phdr.p_memsz = vma->vma_end - vma->vma_start; - phdr.p_flags = vma->vma_flags & PROT_READ ? PF_R : 0; - if (vma->vma_flags & PROT_WRITE) - phdr.p_flags |= PF_W; - if (vma->vma_flags & PROT_EXEC) - phdr.p_flags |= PF_X; - phdr.p_align = ELF_EXEC_PAGESIZE; - - bswap_phdr(&phdr, 1); - if (dump_write(fd, &phdr, sizeof(phdr)) != 0) { + if (dump_write(fd, header, data_offset) < 0) { goto out; } } /* - * Next we write notes just after program headers. No - * alignment needed here. + * Finally write process memory into the corefile as well. */ - if (write_note_info(&info, fd) < 0) - goto out; - - /* align data to page boundary */ - if (lseek(fd, data_offset, SEEK_SET) != data_offset) + if (walk_memory_regions(&fd, wmr_write_region) < 0) { goto out; - - /* - * Finally we can dump process memory into corefile as well. - */ - for (vma = vma_first(mm); vma != NULL; vma = vma_next(vma)) { - abi_ulong addr; - abi_ulong end; - - end = vma->vma_start + vma_dump_size(vma); - - for (addr = vma->vma_start; addr < end; - addr += TARGET_PAGE_SIZE) { - char page[TARGET_PAGE_SIZE]; - int error; - - /* - * Read in page from target process memory and - * write it to coredump file. - */ - error = copy_from_user(page, addr, sizeof (page)); - if (error != 0) { - (void) fprintf(stderr, "unable to dump " TARGET_ABI_FMT_lx "\n", - addr); - errno = -error; - goto out; - } - if (dump_write(fd, page, TARGET_PAGE_SIZE) < 0) - goto out; - } } + errno = 0; out: - free_note_info(&info); - if (mm != NULL) - vma_delete(mm); - (void) close(fd); - - if (errno != 0) - return (-errno); - return (0); + ret = -errno; + mmap_unlock(); + cpu_list_unlock(); + if (fd >= 0) { + close(fd); + } + return ret; } #endif /* USE_ELF_CORE_DUMP */ |